4A Server -  2.0
 All Classes Namespaces Files Functions Variables Enumerator
DocumentClone.java
Go to the documentation of this file.
1 /*
2  * Project: Server for annotations sharing
3  * Author: Ing. Jaroslav Dytrych idytrych@fit.vutbr.cz
4  * File: DocumentClone.java
5  * Description: Class provides document clonning with new content, iniciated from NLP.
6  */
7 
8 /**
9  * @file DocumentClone.java
10  *
11  * @brief Class provides document clonning with new content, iniciated from NLP.
12  */
13 
14 package cz.vutbr.fit.knot.annotations.documentCloner;
15 
36 import java.io.BufferedReader;
37 import java.io.StringWriter;
38 import java.util.ArrayList;
39 import java.util.Date;
40 import java.util.Iterator;
41 import java.util.List;
42 import java.util.logging.Level;
43 import java.util.logging.Logger;
44 import javax.servlet.http.HttpServletRequest;
45 import javax.xml.xpath.XPathExpressionException;
46 import org.json.simple.JSONObject;
47 import org.json.simple.JSONValue;
48 import org.w3c.dom.Document;
49 
50 /**
51  * Class provides document clonning with new content, iniciated from NLP.
52  *
53  * @brief Class provides document clonning with new content, iniciated from NLP
54  * @author Martin Petr (xpetrm05)
55  */
56 public class DocumentClone {
57  /** Name of old document uri parameter in JSON string */
58  public static final String OLD_DOCUMENT_URI_PARAM = "oldDocumentUri";
59 
60  /** Name of new document uri parameter in JSON string */
61  public static final String NEW_DOCUMENT_URI_PARAM = "newDocumentUri";
62 
63  /** Name of new document content parameter in JSON string */
64  public static final String NEW_DOCUMENT_CONTENT_PARAM = "content";
65 
66  /** Provider of matchers for finding fragments (exact matches only) */
68 
69  /** Provider of matchers for finding fragments (nearest matches only) */
71 
72  /** Provider of matchers for finding fragments (sequence iterator) */
74 
75  /**
76  * Constructor
77  */
78  public DocumentClone() {
82 
83  // create nearest matcher
84  nearestMatcherProvider.add(new Matcher(new BidirectionallyUnNestNodeIterator(), new Comparator(new ExactMethod(), Comparator.TraversingMethod.CHARACTER_BIDIRECTIONALLY)), 0);
85  nearestMatcherProvider.add(new Matcher(new BidirectionallyUnNestNodeIterator(), new Comparator(new LevenshteinMethod(0.8), Comparator.TraversingMethod.WORD)), 1);
86  nearestMatcherProvider.add(new Matcher(new BidirectionallyUnNestNodeIterator(), new Comparator(new LevenshteinMethod(0.8), Comparator.TraversingMethod.FIRST_AND_LAST_WORD_THEN_LETTER, 2.0, 2.0)), 2);
87 
88  // create sequence mathcer
89  sequenceMatcherProvider.add(new Matcher(new SequenceNodeIterator(), new Comparator(new ExactMethod(), Comparator.TraversingMethod.CHARACTER_BIDIRECTIONALLY)), 0);
90  sequenceMatcherProvider.add(new Matcher(new SequenceNodeIterator(), new Comparator(new LevenshteinMethod(0.8), Comparator.TraversingMethod.FIRST_AND_LAST_WORD_THEN_LETTER, 2.0, 2.0)), 1);
91  sequenceMatcherProvider.add(new Matcher(new SequenceNodeIterator(), new Comparator(new LevenshteinMethod(0.5), Comparator.TraversingMethod.WORD)), 2);
92  sequenceMatcherProvider.add(new Matcher(new SequenceNodeIterator(), new Comparator(new MetaphoneMethod(), Comparator.TraversingMethod.CHARACTER_BIDIRECTIONALLY)), 3);
93 
94  // create exact matcher
95  exactMatcherProvider.add(new Matcher(new XPathNodeIterator(), new Comparator(new ExactMethod(), Comparator.TraversingMethod.STATIC)), 0);
96  }
97 
98  /**
99  * Method provides clone of old document to new document with new content. Method
100  * check if server is approved to send request for clone. Method also clone
101  * annotations of old document.
102  *
103  * @param request request from servlet
104  * @return message for server
105  */
106  public String cloneDocument(HttpServletRequest request){
107  CloneParams actualParams = new CloneParams(request);
108  if(isAuthorized(actualParams)){
109  //request from authorized server
110  try{
111  parseJSON(actualParams);
112  }catch(Exception e){
113  //error in parsed JSON string
115  String msg = "Parse JSON in document clone error: " + e.getMessage();
116  Logger.getLogger(DocumentClone.class.getName()).log(Level.ALL, msg);
117  }
118  }
119  }else{
120  //error unauthorized request
122  String msg = "Unauthorized document clone request from IP: " + request.getRemoteAddr();
123  Logger.getLogger(DocumentClone.class.getName()).log(Level.ALL, msg);
124  }
125  return "";
126  }
127 
128  // here is authorized request and parsed paramethers of request
129  List<Annotation> oldAnnotList = null;
130  try{
131  //make object with parsed new document
132  parseDocument(actualParams);
133  //make and store new document to DB
134  createNewDocument(actualParams);
135 
136  oldAnnotList = getAllAnnotations(actualParams);
137  if(oldAnnotList != null && !oldAnnotList.isEmpty()){
138  //there are some annotations in document
139  makeNewAnnotations(oldAnnotList,actualParams);
140  }else{
141  //no annotations in ndocument clone is comeplete
142  return "ok";
143  }
144  }catch(Exception e){
146  String msg = "Error while creating new document: " + e.getMessage();
147  System.out.println(msg);
148  Logger.getLogger(DocumentClone.class.getName()).log(Level.ALL, msg);
149  }
150  }
151 
152  // inform StoryScope - tripleStore
153  if(oldAnnotList != null && !oldAnnotList.isEmpty()){
154  StoryscopeInterfaceSender sender = new StoryscopeInterfaceSender("StoryscopeInterfaceSender for DocumentClonner");
155  ArrayList<Annotation> tmpArray = new ArrayList<Annotation>(oldAnnotList.size());
156  tmpArray.addAll(oldAnnotList);
157  sender.setAddedAnnotations(tmpArray);
158  sender.start();
159  }
160  return "";
161  }
162 
163  /**
164  * Method checks if request come from authorized server.
165  *
166  * @param params object with parameters of request
167  * @return true value indicate thet server is authorized, false indicates unauthorized request
168  */
169  private boolean isAuthorized(CloneParams params){
170  @SuppressWarnings("unchecked")
171  List<DocClonerServer> results = AppBean.getPersistenceManager().getEntitiesByName("DocClonerServer");
172  if(results == null || results.isEmpty()){
173  return true;
174  }
175 
176  String address = params.getRequest().getRemoteAddr();
177  Iterator<DocClonerServer> resultsIt = results.iterator();
178  if(resultsIt.hasNext()){
179  if(resultsIt.next().getIpAddress().equals(address)){
180  return true;
181  }
182  }
183 
184  return false;
185  }
186 
187 
188  /**
189  * Method parse JSON string to parameters and save them to params object. If
190  * problem occurs during parsing, method throws exception with message.
191  *
192  * @param params object with parameters of request
193  * @return method returns true if parsing ends with succes otherwise returns false
194  * @throws RuntimeException if it fails to find the request parameters
195  *
196  * Simple example of JSON string:
197  *
198  * {
199  * "oldDocumentUri": "oneValue",
200  * "newDocumentUri": "oneValue",
201  * "content": "abcd content cdef"
202  * }
203  */
204  private boolean parseJSON(CloneParams params)
205  throws RuntimeException{
206  String requestMessage = readMessage(params);
207  if(requestMessage != null && !requestMessage.isEmpty()){
208  JSONObject jsonResponse = (JSONObject)JSONValue.parse(requestMessage);
209  if(jsonResponse != null){
210 
211  try{
212  // get uri of old document
213  params.setOldDocumentUri(getJsonParameter(jsonResponse,OLD_DOCUMENT_URI_PARAM));
214  // get uri of new document
215  params.setNewDocumentUri(getJsonParameter(jsonResponse,NEW_DOCUMENT_URI_PARAM));
216  // get conetent of new document
217  params.setNewDocumentContent(getJsonParameter(jsonResponse,NEW_DOCUMENT_CONTENT_PARAM));
218  }catch(Exception e){
219  //re-throw occured exception
220  throw new RuntimeException(e.getMessage());
221  }
222  }else{
223  //error can't parse recived message
224  throw new RuntimeException("Can't parse JSON string.");
225  }
226  }else{
227  //can't read request string
228  throw new RuntimeException("Can't read JSON request string.");
229  }
230  return false;
231  }
232 
233 
234  /**
235  * Method creates a new document by the source document.
236  *
237  * @param params object with parameters of request
238  * @return true if creating of new document ends with success
239  * @throws RuntimeException if creating new document fails
240  */
241  private boolean createNewDocument(CloneParams params)
242  throws RuntimeException{
243 
244  AnnotDocument checkDoc = findDocumentInDB(params.getNewDocumentUri());
245  if(checkDoc != null) {
246  throw new RuntimeException("Document with new document URI: " + params.getNewDocumentUri() + " is already in DB.");
247  }
248  // at first is important to fetch old document from DB
249  AnnotDocument oldDocument = findDocumentInDB(params.getOldDocumentUri());
250  params.setOldDocument(oldDocument);
251  if(oldDocument != null){
252  // make new document
253  AnnotDocument newDocument = new AnnotDocument();
254  newDocument.setUri(params.getNewDocumentUri());
255  newDocument.setContent(params.getNewDocumentContent());
256  newDocument.setModified(new Date());
257  newDocument.setAdded(oldDocument.getAdded());
258 
259  // store new document to DB
260  if (AppBean.getPersistenceManager().persistEntity(newDocument)) {
261  throw new RuntimeException("Persisting of new document failed.");
262  }
263 
264  params.setNewDocument(newDocument);
265 
266  }else{
267  //error can't find document in DB
268  throw new RuntimeException("Can't find document with uri: " + params.getOldDocumentUri() + " in DB.");
269  }
270 
271  return false;
272  }
273 
274  /**
275  * The method creates a new annotation according to the source document.
276  *
277  * @param oldAnnotations List of old annotations to clone
278  * @param params object with parameters of request
279  * @return true if copying of new annotations ends with success
280  * @throws RuntimeException if creating new annotations fails
281  */
282  private boolean makeNewAnnotations(List<Annotation> oldAnnotations, CloneParams params)
283  throws RuntimeException {
284  //At first clone annotations
285  Iterator<Annotation> oldAnnotIt = oldAnnotations.iterator();
286  //clone and create list of clonned annotations
287  while(oldAnnotIt.hasNext()){
288  Annotation currentAnnot = oldAnnotIt.next();
289  AnnotCloneItem newItem = new AnnotCloneItem(currentAnnot.getId(),currentAnnot);
290  newItem.setNewAttributesOfAnnotation(getAllAttributes(currentAnnot.getId()));
291 
292  //change old annotation to new
293  currentAnnot.setId(null);
294  currentAnnot.setAttributes(null);
295  currentAnnot.setSourceDocument(params.getNewDocument());
296  currentAnnot.setSourceDocumentId(params.getNewDocument().getId());
297 
298  Iterator<Fragment> fragmentsIt = currentAnnot.getFragments().iterator();
299  while(fragmentsIt.hasNext()){
300  fragmentsIt.next().setRefAnnotation(currentAnnot);
301  }
302  //update annotation
303  updateAnnotation(currentAnnot, params);
304 
305  //persist annotation to DB
306  if(AppBean.getPersistenceManager().persistEntity(currentAnnot)){
307  //error can't persist parameter of annotation to DB
308  throw new RuntimeException("Can't persist annotation to DB.");
309  }
310 
311  //add information about cloned annotation to list
312  params.addCloneAnnotation(newItem);
313  }
314 
315  //As second add attributes to clonned annotations
316  Iterator<AnnotCloneItem> clonedAnnotIt = params.getCloneAnnotations().iterator();
317  while(clonedAnnotIt.hasNext()){
318  AnnotCloneItem currentItem = clonedAnnotIt.next();
319  //check if old annotation had any attributes
320 
321  if(currentItem.getNewAttributesOfAnnotation() != null && !currentItem.getNewAttributesOfAnnotation().isEmpty()){
322  Iterator<BaseAttribute> attributeIt = currentItem.getNewAttributesOfAnnotation().iterator();
323  //Go trough all attributes
324  while(attributeIt.hasNext()){
325  BaseAttribute currentAttribute = attributeIt.next();
326  //Test if current attribute is annotation link
327  if(currentAttribute instanceof LinkedAnnotationAttribute){
328  //Linked attribute
329  LinkedAnnotationAttribute tmpAttribute = (LinkedAnnotationAttribute)currentAttribute;
330  Annotation linkedAnnotation = (Annotation)tmpAttribute.getValue();
331  if(linkedAnnotation != null){
332  AnnotCloneItem foundedAnnotation = params.findCloneAnnotation(linkedAnnotation.getId());
333  tmpAttribute.setValue(foundedAnnotation.getNewAnnotation());
334  if(foundedAnnotation.getNewAnnotation() != null){
335  tmpAttribute.setAttributeType(foundedAnnotation.getNewAnnotation().getAnnotType());
336  }
337  }
338  }else if(currentAttribute instanceof NestedAnnotationAttribute){
339  //Nested attribute
340  NestedAnnotationAttribute tmpAttribute = (NestedAnnotationAttribute)currentAttribute;
341  Annotation nestedAnnotation = (Annotation)tmpAttribute.getValue();
342  if(nestedAnnotation != null){
343  AnnotCloneItem foundedAnnotation = params.findCloneAnnotation(nestedAnnotation.getId());
344  tmpAttribute.setValue(foundedAnnotation.getNewAnnotation());
345  if(foundedAnnotation.getNewAnnotation() != null){
346  tmpAttribute.setAttributeType(foundedAnnotation.getNewAnnotation().getAnnotType());
347  }
348  }
349  }
350  //Normal attribute
351  currentAttribute.setAnnotation(currentItem.getNewAnnotation().getId());
352  //Add attribute into DB
353  if(AppBean.getPersistenceManager().persistEntity(currentAttribute)){
354  //error can't persist parameter of annotation to DB
355  throw new RuntimeException("Can't persist parameter of annotation to DB.");
356  }
357  }
358  }
359  }
360 
361  return true;
362  } // makeNewAnnotations()
363 
364  /**
365  * Method reads content of POST message.
366  *
367  * @param params object with parameters of request
368  * @return string with content of message
369  */
370  private String readMessage(CloneParams params){
371  StringWriter writer = new StringWriter();
372  try{
373  BufferedReader bufReader = params.getRequest().getReader();
374  int nextChar;
375  while ((nextChar = bufReader.read()) != -1) {
376  writer.write(nextChar);
377  }
378  }catch(Exception e){
379  return null;
380  }
381  return writer.toString();
382  }
383 
384  /**
385  * Method parse specific request parameter in JSON string. If there is more
386  * then one parameter with given name, is returned first found.
387  *
388  * @param jsonResponse JSON object with parameters
389  * @param parameterName Name of specific parameter
390  * @return value of parameter
391  * @throws RuntimeException if parsing of JSON parameters fails
392  */
393  private String getJsonParameter(JSONObject jsonResponse, String parameterName)
394  throws RuntimeException{
395  String result = null;
396  if(jsonResponse.containsKey(parameterName)){
397  //get first found
398  result = (String)jsonResponse.get(parameterName);
399  if(result == null || result.isEmpty()){
400  //error old document parameter id empty
401  throw new RuntimeException("Parameter " + parameterName + " is empty.");
402  }
403  }else{
404  //error can't find old document uri
405  throw new RuntimeException("Can't find " + parameterName + " parameter in JSON string.");
406  }
407 
408  return result;
409  }
410 
411  /**
412  * Method parse given document and save it to CloneParams object. Method throws
413  * exception if error occurs.
414  *
415  * @param params object with parameters of request
416  */
417  private void parseDocument(CloneParams params){
418  // method builds old document
419  try {
420  MatcherProvider mp = new MatcherProvider();
421  Document parsedSyncDocument = mp.getDocumentFromString(params.getNewDocumentContent(), false, true);
422  params.setParsedDoc(parsedSyncDocument);
423  } catch (Exception ex) {
424  //error can't build document
425  throw new RuntimeException("Can't parse document:" + ex.getMessage() + ".");
426  }
427  }
428 
429  /**
430  * Updates fragments in annotation. (Method copied from CoreFuncMOdule)
431  *
432  * @param annotation Annotation to update
433  * @param params Parameters from request
434  * @return If annotation was updated, returns true, if no update needed or
435  * error occurred, returns false
436  */
437  private boolean updateAnnotation(Annotation annotation, CloneParams params) {
438  if (annotation.getFragments().size() < 1) {
439  // if annotation haven't fragments, it can't be updated
440  return false;
441  }
442 
443  // get synchronized document
444  AnnotDocument annotDoc = params.getNewDocument();
445  Document parsedDoc = params.getParsedDoc();
446  if (parsedDoc == null) {
447  //make message about error and throw it
448  //"Bad document for update of the annotation."
449  return false; // updating is not possible
450  }
451 
452  int badFragCnt = 0; // total count of bad fragments
453  int nowOrphFragCnt = 0; // count of just orphaned fragments
454  int updatedFragCnt = 0; // count of just updated fragments
455  Iterator<Fragment> fragIt = annotation.getFragments().iterator();
456  while (fragIt.hasNext()) { // for each annotated fragment
457  Fragment fragment = fragIt.next();
458  try {
459  // find fragment
460  UpdatableFragment uf = exactMatcherProvider.match(parsedDoc, fragment.toUpdatableFragment());
461  if (uf != null) { // if fragment was found
462  if (fragment.getIsGood() == false) { // if fragment was marked as bad
463  fragment.setIsGood(true); // mark as good
464  updatedFragCnt++; // fragment was now updated
465  }
466  } else { // if fragment was not found
467  // find fragment in document with full matching capability
468  //ArrayList <UpdatableFragment> ufl = matcherProvider.matchAllIncrementally(parsedDoc, fragment.toUpdatableFragment());
469  ArrayList<UpdatableFragment> ufl = new ArrayList<UpdatableFragment>();
470  // find nearest fragment
471  UpdatableFragment ufTemp = nearestMatcherProvider.matchInClosestNode(parsedDoc, fragment.toUpdatableFragment());
472  if (ufTemp != null) {
473  ufl.add(ufTemp);
474  } else {
475  // find fragment with sequence iterator
476  ufl = sequenceMatcherProvider.matchAllIncrementally(parsedDoc, fragment.toUpdatableFragment());
477  }
478  if (ufl.size() == 1) { // if one fragment was found
479  // get fragment (index unknown)
480  Iterator<UpdatableFragment> ufIt = ufl.iterator();
481  uf = ufIt.next();
482  fragment.updateWithUpdatableFragment(uf); // update fragment in annotation
483  updatedFragCnt++;
484  if (fragment.getIsGood() == false) { // if fragment was marked as bad
485  fragment.setIsGood(true); // mark as good
486  }
487  } else if (ufl.isEmpty()) { // if no fragment found
488  if (fragment.getIsGood() == true) { // if fragment was not bad
489  fragment.setIsGood(false); // mark as bad
490  nowOrphFragCnt++; // fragment was now orphaned
491  }
492  badFragCnt++;
493  } else { // if searching is ambiguous
494  // select the best fragment
495  int minBadness = Integer.MAX_VALUE; // minimal badness
496  UpdatableFragment minBFR = null; // fragment with minimal L.D.
497  String origPath = fragment.getPath(); // original XPath
498  String origText = fragment.getAnnotatedText(); // original annotated text
499  int origOffset = fragment.getOffset(); // original offset
500  for (Iterator<UpdatableFragment> uflIt = ufl.iterator(); uflIt.hasNext();) {
501  UpdatableFragment ufr = uflIt.next();
502  int lD = Util.levenshtein(origPath, ufr.getXPathString());
503  int lDT = Util.levenshtein(origText, ufr.getText());
504  int offDistance = Math.abs(origOffset - ufr.getOffset());
505  int badness = 10000 * lD + 1000 * lDT + offDistance;
506  if (badness < minBadness) {
507  minBadness = badness;
508  minBFR = ufr;
509  }
510  }
511  uf = minBFR;
512  fragment.updateWithUpdatableFragment(uf); // update fragment in annotation
513  updatedFragCnt++;
514  if (fragment.getIsGood() == false) { // if fragment was marked as bad
515  fragment.setIsGood(true); // mark as good
516  }
517  } // if searching is ambiguous
518  } // if fragment was not found
519  } catch (XPathExpressionException ex) { // bad XPath in fragment
520  if (fragment.getIsGood() == true) { // if fragment was not bad
521  fragment.setIsGood(false); // mark as bad
522  nowOrphFragCnt++; // fragment was now orphaned
523  }
524  badFragCnt++;
525  } catch (Exception e) {
526  //make and throw error
527  //"Exception during the update of the annotation."
528  badFragCnt++;
529  break;
530  }
531  } // for each annotated fragment
532 
533  if (badFragCnt == annotation.getFragments().size() && nowOrphFragCnt > 0) {
534  // if annotation was just orphaned
535  } else if (nowOrphFragCnt > 0) {
536  // if annotation was just partially orphaned (bad fragments found)
537  }
538  if (nowOrphFragCnt > 0 || updatedFragCnt > 0) {
539  // if new bad fragments found or some fragments was updated
540  return true;
541  }
542  return false; // nothing updated (no update needed)
543  } // updateAnnotation()
544 
545  /**
546  * Method finds document in database by given uri. If don't find returns null.
547  *
548  * @param uri Uri of document
549  * @return document that is specified by given uri or null
550  */
551  private AnnotDocument findDocumentInDB(String uri){
552  Object[] params = new Object[2];
553  params[0] = "uri";
554  params[1] = uri;
555 
556  @SuppressWarnings("unchecked")
557  List<AnnotDocument> results = (List<AnnotDocument>) AppBean.getPersistenceManager().queryDB("AnnotDocument.findByUri", params);
558  if(results != null && !results.isEmpty()){
559  return results.get(0);
560  }
561 
562  return null;
563  }
564 
565  /**
566  * Method finds all annotation belongs to old document. If don't find returns null.
567  *
568  * @param params object with parameters of request
569  * @return list of annotations that belongs to old document
570  */
571  private List<Annotation> getAllAnnotations(CloneParams params){
572  Object[] paramsDB = new Object[2];
573  paramsDB[0] = "sourceDocumentId";
574  paramsDB[1] = params.getOldDocument().getId();
575 
576  @SuppressWarnings("unchecked")
577  List<Annotation> retList = AppBean.getPersistenceManager().queryDB("Annotation.findBySourceDocumentID", paramsDB);
578 
579  return retList;
580  }
581 
582  /**
583  * Method finds all attributes of annotation. If don't find returns null.
584  *
585  * @param id Id of annotation
586  * @return Returns list of attributes that belongs to given annotation
587  */
588  private List<BaseAttribute> getAllAttributes(Integer id){
589  Object[] paramsDB = new Object[2];
590  paramsDB[0] = "annotation";
591  paramsDB[1] = id;
592 
593  @SuppressWarnings("unchecked")
594  List<BaseAttribute> retList = AppBean.getPersistenceManager().queryDB("Attribute.findByAnnotation", paramsDB);
595 
596  return retList;
597  }
598 } // public class DocumentClone
String getJsonParameter(JSONObject jsonResponse, String parameterName)
Class representing annotations for clone list item.
Class represent thread of Storyscope Interface that sends messages to StoryScopes.
List< Annotation > getAllAnnotations(CloneParams params)
Class representing annotated copy of document.
Singleton for storing global variables.
Definition: AppBean.java:47
Class providing access to available matchers.
Compare class using Metaphone approximate string matching method.
Class provides document clonning with new content, iniciated from NLP.
Class representing paramethers for document cloning.
Base class representing attribute of annotation.
Compare class using Levenshtein approximate string matching method.
Class consisting of traversing method and compare method.
Definition: Comparator.java:37
Class for matcher consisting of comparator and node iterator.
Definition: Matcher.java:32
Utility class (manipulates RFC 3339 dates)
Definition: Util.java:29
boolean makeNewAnnotations(List< Annotation > oldAnnotations, CloneParams params)
Class representing annotated fragment.
Definition: Fragment.java:48
boolean updateAnnotation(Annotation annotation, CloneParams params)