4A Server -  2.0
 All Classes Namespaces Files Functions Variables Enumerator
Comparator.java
Go to the documentation of this file.
1 /*
2  * Project: Server for annotations sharing
3  * Subproject: Position of fragment of text in XML document
4  * Author: Michael Angelov
5  * Edited by: Ing. Jaroslav Dytrych idytrych@fit.vutbr.cz,
6  * Bc. Lukas Kubik xkubik22@stud.fit.vutbr.cz
7  * File: Comparator.java
8  * Description: Class consisting of traversing method and compare method
9  */
10 
11 /**
12  * @file Comparator.java
13  *
14  * @brief Class consisting of traversing method and compare method
15  */
16 
17 /**
18  * @package cz.vutbr.fit.knot.annotations.fragmentUpdater
19  *
20  * @brief Library for updating fragments of documents
21  */
22 package cz.vutbr.fit.knot.annotations.fragmentUpdater;
23 
26 import java.util.ArrayList;
27 import java.util.Iterator;
28 import javax.xml.xpath.XPathExpressionException;
29 import org.w3c.dom.Node;
30 
31 /**
32  * Comparator class consisting of traversing method and compare method
33  *
34  * @brief Class consisting of traversing method and compare method
35  * @author Michael Angelov
36  */
37 public class Comparator {
38 
39  /**
40  * Names of traversing methods
41  *
42  * @brief Names of traversing methods
43  */
44  public enum TraversingMethod {
45  STATIC, CHARACTER, CHARACTER_BIDIRECTIONALLY, WORD, FIRST_AND_LAST_WORD_THEN_LETTER
46  };
47 
48  /** Traversing method */
49  private TraversingMethod traversingMethod;
50  /** Compare method */
52  /** Maximal enlargement of fragment length */
53  private double maxLenghtEnlargement = 1.5;
54  /** Maximal reduction of fragment length */
55  private double maxLenghtReduction = 0.5;
56 
57  /**
58  * Getter for traversing method used in comparator
59  *
60  * @return traversing method used in comparator
61  */
62  public TraversingMethod getTraversingMethod() {
63  return traversingMethod;
64  }
65 
66  /**
67  * Getter for compare method used in comparator
68  *
69  * @return compare method used in comparator
70  */
72  return compareMethod;
73  }
74 
75  /**
76  * Setter for maximum annotation length enlargement used in
77  * FIRST_AND_LAST_WORD method
78  *
79  * @param maxLenghtEnlargement maximum annotation length enlargement
80  */
82  this.maxLenghtEnlargement = maxLenghtEnlargement;
83  }
84 
85  /**
86  * Constructor
87  *
88  * @param compareMethod compare method used to determine if strings match
89  * @param traversingMethod traversing method in strings
90  */
92  this.compareMethod = compareMethod;
93  this.traversingMethod = traversingMethod;
94  }
95 
96  /**
97  * Constructor
98  *
99  * @param compareMethod compare method used to determine if strings match
100  * @param traversingMethod traversing method in strings
101  * @param maxLenghtEnlargement maximum annotation length enlargement
102  * @param maxLenghtReduction maximum annotation length reduction
103  */
105  double maxLenghtEnlargement, double maxLenghtReduction) {
106  this.compareMethod = compareMethod;
107  this.traversingMethod = traversingMethod;
108  this.maxLenghtEnlargement = maxLenghtEnlargement;
109  this.maxLenghtReduction = maxLenghtReduction;
110  }
111 
112  /**
113  * Overriden toString() method
114  *
115  * @return string representation of comparator
116  */
117  @Override
118  public String toString() {
119  String tm = "";
120  switch (traversingMethod) {
121  case CHARACTER:
122  tm = "Character";
123  break;
124  case CHARACTER_BIDIRECTIONALLY:
125  tm = "Character_bidirectionally";
126  break;
127  case WORD:
128  tm = "Word";
129  break;
130  case FIRST_AND_LAST_WORD_THEN_LETTER:
131  tm = "First_and_last_word_then_letter";
132  break;
133  default:
134  tm = "Static";
135  break;
136  }
137  return compareMethod + " (" + tm + ")";
138  }
139 
140 
141  /**
142  * Fragment compare method
143  *
144  * @param queryFragment fragment to be matched in node
145  * @param node node in which string content of queryFrament should be matched
146  * @return result fragment, null if there was no match
147  */
148  public UpdatableFragment compare(UpdatableFragment queryFragment, Node node) throws XPathExpressionException {
149 
150  int beginIndex, endIndex;
151  String nodeSubstring = "", XPathString;
152 
153  if (getNodeContent(node) == null || (getNodeContent(node).length() < queryFragment.getLength())) {
154  return null;
155  }
156 
157  // determining which traversing method should be used
158  switch (traversingMethod) {
159  // offset and length of queryFragment are strictly used
160  case STATIC:
161  beginIndex = queryFragment.getOffset();
162  endIndex = beginIndex + queryFragment.getLength();
163 
164  // if node text too short, skip
165  if (endIndex > getNodeContent(node).length()) {
166  return null;
167  }
168 
169  nodeSubstring = getNodeContent(node).substring(beginIndex, endIndex);
170 
171  // if comparison is successful, return XPath position of node in which match occured
172  if (compareMethod.compare(queryFragment.getText(), nodeSubstring) == true) {
173  XPathString = XPathHelper.XPathStringOfNode(node);
174 
175  // return brand new result fragment
176  return new UpdatableFragment(XPathString, queryFragment.getOffset(),
177  queryFragment.getLength(), nodeSubstring);
178  }
179 
180  break;
181 
182  // string traversing method with "character bidirectional step"
183  case CHARACTER_BIDIRECTIONALLY:
184  // indexes for bidirectional iteration
185  int beginIndexL, beginIndexR, endIndexL, endIndexR;
186  beginIndexL = beginIndexR = queryFragment.getOffset();
187  endIndexL = endIndexR = beginIndexL + queryFragment.getLength();
188 
189  // check fragment position and shift it to the end of element if the offset is bad.
190  if(endIndexL > getNodeContent(node).length()){
191  endIndexL = endIndexR = getNodeContent(node).length();
192  beginIndexL = beginIndexR = endIndexL - queryFragment.getLength();
193  }
194 
195  // loop end condition (Stops the loop when both iteration ends.)
196  boolean endIteration = false;
197  // stepping through string
198  while(!endIteration){
199  endIteration = true;
200 
201  // Iterating to right from the original offset
202  if(endIndexR <= getNodeContent(node).length()){
203  nodeSubstring = getNodeContent(node).substring(beginIndexR, endIndexR);
204 
205  // if comparison is successful, return XPath position of node in which match occured
206  if (compareMethod.compare(queryFragment.getText(), nodeSubstring) == true) {
207  XPathString = XPathHelper.XPathStringOfNode(node);
208 
209  // return brand new result fragment
210  return new UpdatableFragment(XPathString, beginIndexR,
211  queryFragment.getLength(), nodeSubstring);
212  }
213  ++beginIndexR;
214  ++endIndexR;
215  endIteration = false;
216  }
217 
218  // Iterating to left from the original offset
219  if(beginIndexL >= 0){
220  nodeSubstring = getNodeContent(node).substring(beginIndexL, endIndexL);
221 
222  // if comparison is successful, return XPath position of node in which match occured
223  if (compareMethod.compare(queryFragment.getText(), nodeSubstring) == true) {
224  XPathString = XPathHelper.XPathStringOfNode(node);
225 
226  // return brand new result fragment
227  return new UpdatableFragment(XPathString, beginIndexL,
228  queryFragment.getLength(), nodeSubstring);
229  }
230  --beginIndexL;
231  --endIndexL;
232  endIteration = false;
233  }
234  }
235  break;
236 
237  // string traversing method with "character step"
238  case CHARACTER:
239  beginIndex = 0;
240  endIndex = beginIndex + queryFragment.getLength();
241 
242  // stepping through string
243  while (endIndex <= getNodeContent(node).length()) {
244  nodeSubstring = getNodeContent(node).substring(beginIndex, endIndex);
245 
246  // if comparison is successful, return XPath position of node in which match occured
247  if (compareMethod.compare(queryFragment.getText(), nodeSubstring) == true) {
248  XPathString = XPathHelper.XPathStringOfNode(node);
249 
250  // return brand new result fragment
251  return new UpdatableFragment(XPathString, beginIndex,
252  queryFragment.getLength(), nodeSubstring);
253  }
254 
255  beginIndex++;
256  endIndex++;
257  }
258 
259  break;
260 
261  // string traversing method with "word step" comparing first and last word of the fragment
262  case FIRST_AND_LAST_WORD_THEN_LETTER:
263  ArrayList<IndexAndlength> firstWordsHit = new ArrayList<IndexAndlength>();
264  ArrayList<IndexAndlength> lastWordsHit = new ArrayList<IndexAndlength>();
265 
266  UpdatableFragment fr = firstAndLastWordMatchMethod(node, queryFragment, firstWordsHit, lastWordsHit);
267  if(fr != null){
268  return fr;
269  }
270 
271  return firstAndLastLetterMatchMethod(node, queryFragment, firstWordsHit, lastWordsHit);
272 
273  // string traversing method with "word step"
274  case WORD:
275  return wordMatchMethod(node, queryFragment);
276 
277  } // switch (traversingMethod)
278 
279  // there was no success in matching, return null
280  return null;
281  } // compare()
282 
283  /**
284  * Gets content of XML node
285  *
286  * @param node Node with content
287  * @return If it's text node or CDATA, returns textual content of XML node, null otherwise
288  */
289  public static String getNodeContent(Node node) {
290  if (node == null) {
291  return null;
292  }
293  if (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE) {
294  return node.getNodeValue();
295  }
296 
297  return null;
298  } // getNodeContent()
299 
300  /**
301  * Selects the best substrings of the new matched fragment.
302  * Best == least different from the original fragment
303  *
304  * @param origFragment Original fragment
305  * @param newFragment Matched fragment
306  * @return least different part of the matched fragment
307  * @throws XPathExpressionException
308  */
310  throws XPathExpressionException {
311  String words[] = newFragment.getText().split(" "); // new fragment words
312  int minBadnessBeg, minBadnessEnd, badness; // tresholds
313  minBadnessBeg = badness = Integer.MAX_VALUE;
314 
315  String origText = origFragment.getText(); // original annotated text
316  int origOffset = origFragment.getOffset(); // original offset
317 
318  String origNewText = newFragment.getText(); // new fragment annotated text
319  int origNewOffset = newFragment.getOffset(); // new fragment offset
320  String origNewXpath = newFragment.getXPathString();
321 
322  int maxIterations = words.length;
323  int index = 0;
324  int index2 = 0;
325  String newPart = ""; // currently matched substring
326  String minPart = ""; // lest different substring in iteration
327  int minOffset = 0; // lest different substring offset in iteration
328 
329  // lest different subfragments in all iterations
330  ArrayList<UpdatableFragment> BestSubParts = new ArrayList<UpdatableFragment>();
331 
332  // removing from the beginning of the fragment
333  while(index < maxIterations){
334  int newOffset = countWordsLengthBeg(index, words); // offset of the beginning od the next matched substring
335  String removedFromBeg = origNewText.substring(newOffset); // currently matched substring cut from the beginning
336 
337  // result for the beginning cut
338  int LD = Util.levenshtein(origText, removedFromBeg);
339  double LPerc = (LD / (double)origText.length()) * 100;
340 
341  badness = 5* (int)LPerc + Math.abs(origOffset - newOffset);
342 
343  // if the previous result was better then end the iteration
344  if(badness > minBadnessBeg){
345  break;
346  }
347  else{ // else lower the treshold
348  minBadnessBeg = badness;
349  }
350 
351  // reset the treshold for the end cut
352  minBadnessEnd = Integer.MAX_VALUE;
353 
354  // removing from the end of the fragment
355  while(index2 < (maxIterations-index)){
356  // currently matched substring cut from the end
357  newPart = removedFromBeg.substring(0, countWordsLengthEnd(index2, words, removedFromBeg.length()));
358  // result for the end cut
359  int LD2 = Util.levenshtein(origText, newPart);
360  double LPerc2 = (LD2 / (double)origText.length()) * 100;
361 
362  badness = 5 * (int)LPerc2 + Math.abs(origOffset - newOffset);
363 
364  // if the previous result was better then end the iteration
365  if(badness > minBadnessEnd){
366  break;
367  }
368  // else lower the treshold and remember the better substring
369  else{
370  minBadnessEnd = badness;
371  minPart = newPart;
372  minOffset = newOffset;
373  }
374  ++index2;
375  }
376 
377  // add the best subfragment
378  BestSubParts.add(new UpdatableFragment(origNewXpath, origNewOffset+minOffset, minPart.length(), minPart));
379 
380  // reset the inner index (end cut)
381  index2 = 0;
382 
383  // raise the outer index (beggining cut)
384  ++index;
385  }
386 
387  // Select the least different subfragment
388  if(BestSubParts.size() > 1){
389  return selectBestSubFragment(origFragment, BestSubParts);
390  }
391  // If there was only one result then return it
392  else if(BestSubParts.size() == 1){
393  return BestSubParts.get(0);
394  }
395 
396  // This should never happen
397  return null;
398  }
399 
400  /**
401  * What is the offset of the n-th word from the beginning in the string with gaps?
402  *
403  * @param count n
404  * @param words All words
405  * @return offset of the n-th word from the beginning od the string
406  */
407  private int countWordsLengthBeg(int count, String words[]){
408  int offset = 0;
409  for(int i = 0; i < count; i++){
410  offset += words[i].length()+1;
411  }
412  return offset;
413  }
414 
415  /**
416  * What is the offset of the n-th word from the end in the string with gaps?
417  *
418  * @param count n
419  * @param words All words
420  * @param wholeLength Length of the whole string with gaps
421  * @return offset of the n-th word from the end od the string
422  */
423  private int countWordsLengthEnd(int count, String words[], int wholeLength){
424  int offset = wholeLength;
425  for(int i = words.length-1; i >= words.length - count; i--){
426  offset -= words[i].length()+1;
427  }
428  return offset;
429  }
430 
431  /**
432  * Selects the best fragment from the best substrings
433  *
434  * @param origFragment Original fragment
435  * @param BestSubParts Best substrings
436  * @return New selected fragment
437  * @throws XPathExpressionException
438  */
440  ArrayList<UpdatableFragment> BestSubParts)
441  throws XPathExpressionException{
442  if(BestSubParts.isEmpty()){
443  return null;
444  }
445  else if(BestSubParts.size() == 1){
446  return BestSubParts.get(0);
447  }
448 
449  int minBadness = Integer.MAX_VALUE; //treshold
450 
451  String originalText = origFragment.getText(); // original fragment text
452  int orignalOffset = origFragment.getOffset(); // original fragment offset
453 
454  Iterator<UpdatableFragment> itFrg = BestSubParts.iterator(); // best subfragments iterator
455 
456  UpdatableFragment comparedfragment; // currently compared fragment
457 
458  int index = 0; // current index
459  int bestIndex = 0; // index of the best fragment text
460 
461  // finds the best substring
462  while(itFrg.hasNext()){
463  comparedfragment = itFrg.next();
464 
465  int LD = Util.levenshtein(originalText, comparedfragment.getText());
466  double LPerc = (LD / (double)originalText.length()) * 100;
467 
468  int badness = 5 * (int)LPerc +
469  Math.abs(orignalOffset - comparedfragment.getOffset());
470  if(badness < minBadness){
471  minBadness = badness;
472  bestIndex = index;
473  }
474  ++index;
475  }
476 
477  return BestSubParts.get(bestIndex);
478  }
479 
480  /**
481  * Method which is matching over all words of the fragment
482  *
483  * @param node node in which string content of queryFrament should be matched
484  * @param queryFragment fragment to be matched in node
485  * @return result fragment, null if there was no match
486  * @throws XPathExpressionException
487  */
488  private UpdatableFragment wordMatchMethod(Node node, UpdatableFragment queryFragment)
489  throws XPathExpressionException
490  {
491  String[] words = getNodeContent(node).split(" ");
492  int wordOffset = 0;
493  StringBuilder nodeSb;
494  // was any match in this substring?
495  boolean match = false;
496  StringBuilder newFragment = new StringBuilder();
497  int newFragmentOffset = 0;
498 
499  // all the best matched subfragments
500  ArrayList<UpdatableFragment> bestFragments = new ArrayList<UpdatableFragment>();
501 
502  // iterating through all of the words in node
503  for (int i = 0; i <= words.length - queryFragment.getWordCount(); i++) {
504  nodeSb = new StringBuilder();
505  if(i > 0){
506  wordOffset += words[i-1].length()+1;
507  }
508 
509  // comparing the same number of words in node that is in queryFragment
510  for (int j = 0; j < queryFragment.getWordCount(); j++) {
511  nodeSb.append(words[i + j]);
512  if (j != queryFragment.getWordCount() - 1) {
513  nodeSb.append(' ');
514  }
515  }
516 
517  // if comparison is successful, add the substring to the compared string
518  if (compareMethod.compare(queryFragment.getText(), nodeSb.toString()) == true) {
519  if(match == false){ // new match
520  newFragment = new StringBuilder(nodeSb.toString());
521  newFragmentOffset = wordOffset;
522  match = true;
523  }
524  else{ // at least one match was before this match
525  newFragment.append(' ');
526  newFragment.append(words[i + queryFragment.getWordCount()-1]);
527  }
528  }
529  // no match and substring is filled then select the best part of it
530  else if(match == true){
531  match = false;
532 
533  UpdatableFragment fr = new UpdatableFragment(XPathHelper.XPathStringOfNode(node), newFragmentOffset,
534  newFragment.length(), newFragment.toString());
535 
536  bestFragments.add(selectBestPart(queryFragment, fr));
537  }
538  }
539 
540  // Select the least different matched subfragment
541  if(bestFragments.size() > 1){
542  return selectBestSubFragment(queryFragment, bestFragments);
543  }
544  // If there was only one result then return it
545  else if(bestFragments.size() == 1){
546  return bestFragments.get(0);
547  }
548  else{
549  return null;
550  }
551  }
552 
553  /**
554  * Method which is matching first and last word of the original fragment
555  *
556  * @param node node in which string content of queryFrament should be matched
557  * @param queryFragment fragment to be matched in node
558  * @param firstWordsHit Initialized arraylist for storing first word of the fragment
559  * match hit in the node.
560  * @param lastWordsHit Initialized arraylist for storing last word of the fragment
561  * match hit in the node.
562  * @return result fragment, null if there was no match
563  * @throws XPathExpressionException
564  */
566  ArrayList<IndexAndlength> firstWordsHit, ArrayList<IndexAndlength> lastWordsHit)
567  throws XPathExpressionException
568  {
569  if(queryFragment.getWordCount() <= 1){
570  return null;
571  }
572 
573  // Content of the node
574  String[] words = getNodeContent(node).split(" ");
575  // Fragment words
576  String[] fragmentWords = queryFragment.getText().split(" ");
577  // Original fragment XPath
578  String XPath = XPathHelper.XPathStringOfNode(node);
579 
580  // First word of the original fragment
581  String firstWord = fragmentWords[0];
582  // Last word of the original fragment
583  String lastWord = fragmentWords[fragmentWords.length-1];
584 
585  // Variable with the starting index and the starting offset
586  IndexAndlength IaL = new IndexAndlength();
587  // Maximum length of the new fragment
588  int maxLength = (int) (queryFragment.getLength() * this.maxLenghtEnlargement);
589  // Minimum length of the new fragment
590  int minLength = (int) (queryFragment.getLength() * this.maxLenghtReduction);
591 
592  // all the best matched subfragments
593  ArrayList<UpdatableFragment> bestFragments = new ArrayList<UpdatableFragment>();
594 
595  // iterating through all of the words in node
596  while(IaL.index < words.length){
597  IaL.index = matchFirstWord(words, firstWord, lastWord, IaL, firstWordsHit, lastWordsHit);
598  // if the first word is same as the original first word
599  if(IaL.index != null){
600  ++IaL.index;
601  UpdatableFragment fr = matchLastWord(words, lastWord, IaL,
602  maxLength, minLength, XPath);
603  // if the last word is same as the original last word then we add the fragment to the best fragments
604  if(fr != null){
605  bestFragments.add(fr);
606  }
607  IaL.offset += words[IaL.index-1].length()+1;
608  }
609  else{
610  break;
611  }
612  }
613 
614  // Selects the best fragment from subfragments
615  return selectBestSubFragment(queryFragment, bestFragments);
616  }
617 
618  /**
619  * Finds the first occurrence of the word in the String array nodeWords
620  *
621  * @param nodeWords Words of the node
622  * @param firstWord First word which we are looking for
623  * @param lastWord Last word which we are looking for
624  * @param IaL Variable with the starting index and the starting offset
625  * @param firstWordsHit Array to which found positions of first word will be stored
626  * @param lastWordsHit Array to which found positions of last word will be stored
627  * @return new offset
628  */
629  private Integer matchFirstWord(String[] nodeWords, String firstWord, String lastWord,
630  IndexAndlength IaL, ArrayList<IndexAndlength> firstWordsHit, ArrayList<IndexAndlength> lastWordsHit)
631  {
632  for (int i = IaL.index; i <= nodeWords.length-1; i++) {
633  if(i > 0){
634  IaL.offset += nodeWords[i-1].length()+1;
635  }
636  if (compareMethod.compare(firstWord, nodeWords[i]) == true) {
637  IaL.comparedWordLength = nodeWords[i].length();
638  IndexAndlength IaLHit = new IndexAndlength(i, IaL.comparedWordLength, IaL.offset);
639  firstWordsHit.add(IaLHit);
640  return i;
641  }
642  else if(compareMethod.compare(lastWord, nodeWords[i]) == true){
643  IndexAndlength IaLHit = new IndexAndlength(i, nodeWords[i].length(), IaL.offset);
644  lastWordsHit.add(IaLHit);
645  }
646  }
647 
648  return null;
649  }
650 
651  /**
652  * Finds the last occurrence of the word in the String array nodeWords
653  *
654  * @param nodeWords Words of the node
655  * @param word Word which we are looking for
656  * @param IaL Variable with the starting index and the starting offset
657  * @param maxLength Maximum length of the new fragment
658  * @param minLength Minimal length of the new fragment
659  * @param XPath New fragment XPath
660  * @return New fragment or null if no fragment was found.
661  * @throws XPathExpressionException
662  */
663  private UpdatableFragment matchLastWord(String[] nodeWords, String word,
664  IndexAndlength IaL, int maxLength, int minLength, String XPath) throws XPathExpressionException
665  {
666  // maximum length of the fragment - first word
667  int maxSize = maxLength - (IaL.comparedWordLength);
668  int actualSize = 0;
669 
670  for (int i = IaL.index; i <= nodeWords.length-1; i++) {
671  actualSize += nodeWords[i].length()+1;
672  if(actualSize > maxSize){
673  return null;
674  }
675  if (compareMethod.compare(word, nodeWords[i]) == true) {
676  String fragmentString = createFragmentStringFromWords(nodeWords, IaL.index-1, i);
677  if(fragmentString.length() < minLength){
678  return null;
679  }
680  else{
681  return new UpdatableFragment(XPath, IaL.offset, fragmentString.length(), fragmentString);
682  }
683  }
684  }
685 
686  return null;
687  }
688 
689  /**
690  * Creates a new fragment string from the words in array words.
691  * Inserts spaces between the words.
692  *
693  * @param words Words array used for the creation of the fragment string
694  * @param firstWordIndex Index of the first word.
695  * @param lastWordIndex Index of the last word.
696  * @return String with the new fragment.
697  */
698  private String createFragmentStringFromWords(String[] words, int firstWordIndex, int lastWordIndex){
699  StringBuilder sb = new StringBuilder();
700  for(int i = firstWordIndex; i <= lastWordIndex; i++){
701  sb.append(words[i]);
702  if(i < lastWordIndex){
703  sb.append(' ');
704  }
705  }
706  return sb.toString();
707  }
708 
709  /**
710  * Method which is matching first or last word and last or first letter of the original fragment
711  * It's based in similiar idea as firstAndLastWordMatchMethod
712  *
713  * @param node node in which string content of queryFrament should be matched
714  * @param queryFragment fragment to be matched in node
715  * @param firstWordsHit Initialized arraylist with stored first words of the fragment
716  * match hit in the node.
717  * @param lastWordsHit Initialized arraylist with stored last words of the fragment
718  * match hit in the node.
719  * @return result fragment, null if there was no match
720  * @throws XPathExpressionException
721  */
723  ArrayList<IndexAndlength> firstWordsHit, ArrayList<IndexAndlength> lastWordsHit)
724  throws XPathExpressionException
725  {
726  if(firstWordsHit.isEmpty() && lastWordsHit.isEmpty()){
727  return null;
728  }
729 
730  String[] words = getNodeContent(node).split(" ");
731  String[] fragmentWords = queryFragment.getText().split(" ");
732  String XPath = XPathHelper.XPathStringOfNode(node);
733 
734  String firstWord = fragmentWords[0];
735  String lastWord = fragmentWords[fragmentWords.length-1];
736 
737  ArrayList<UpdatableFragment> matchedFragments = new ArrayList<UpdatableFragment>();
738 
739  int maxLength = (int) (queryFragment.getLength() * this.maxLenghtEnlargement);
740  int minLength = (int) (queryFragment.getLength() * this.maxLenghtReduction);
741 
742  Iterator<IndexAndlength> it = firstWordsHit.iterator();
743 
744  IndexAndlength IaL;
745  while(it.hasNext()){
746  IaL = it.next();
748  fr = matchSameLastLetter(words, lastWord, IaL, maxLength, minLength, XPath);
749  if(fr != null){
750  matchedFragments.add(fr);
751  }
752  }
753 
754  it = lastWordsHit.iterator();
755 
756  while(it.hasNext()){
757  IaL = it.next();
758  UpdatableFragment fr = matchSameFirstLetter(words, firstWord, IaL, maxLength, minLength, XPath);
759  if(fr != null){
760  matchedFragments.add(fr);
761  }
762  }
763 
764  // If we haven't matched any fragment then we try to find the most similiar fragment
765  if(matchedFragments.isEmpty()){
766  it = firstWordsHit.iterator();
767 
768  while(it.hasNext()){
769  IaL = it.next();
771  fr = matchMostSimiliar(words, IaL, maxLength, minLength, XPath, queryFragment.getText());
772  if(fr != null){
773  matchedFragments.add(fr);
774  }
775  }
776  }
777 
778  return selectBestSubFragment(queryFragment, matchedFragments);
779  }
780 
781  /**
782  * Method searches for the first word with the same last letter as the word in parameter word.
783  *
784  * @param nodeWords Words of the node
785  * @param word Word which we are looking for
786  * @param IaL Variable with the starting index and the starting offset
787  * @param maxLength Maximum length of the new fragment
788  * @param minLength Minimal length of the new fragment
789  * @param XPath New fragment XPath
790  * @return New fragment or null if no fragment was found.
791  * @throws XPathExpressionException
792  */
793  private UpdatableFragment matchSameLastLetter(String[] nodeWords, String word,
794  IndexAndlength IaL, int maxLength, int minLength, String XPath)
795  throws XPathExpressionException
796  {
797 
798  int maxSize = maxLength - (IaL.comparedWordLength);
799  int actualSize = 0;
800 
801  char lastLetter = word.charAt(word.length()-1);
802 
803  for(int i = IaL.index+1; i < nodeWords.length; i++){
804  actualSize += nodeWords[i].length()+1;
805  if(actualSize > maxSize){
806  return null;
807  }
808  if(nodeWords[i].charAt(nodeWords[i].length()-1) == lastLetter) {
809  String fragmentString = createFragmentStringFromWords(nodeWords, IaL.index, i);
810  if(fragmentString.length() < minLength){
811  return null;
812  }
813  else{
814  return new UpdatableFragment(XPath, IaL.offset, fragmentString.length(), fragmentString);
815  }
816  }
817  }
818  return null;
819  }
820 
821  /**
822  * Method searches backwards for the first word with the same first letter
823  * as the word in parameter word.
824  *
825  * @param nodeWords Words of the node
826  * @param word Word which we are looking for
827  * @param IaL Variable with the starting index and the starting offset
828  * @param maxLength Maximum length of the new fragment
829  * @param minLength Minimal length of the new fragment
830  * @param XPath New fragment XPath
831  * @return New fragment or null if no fragment was found.
832  * @throws XPathExpressionException
833  */
834  private UpdatableFragment matchSameFirstLetter(String[] nodeWords, String word,
835  IndexAndlength IaL, int maxLength, int minLength, String XPath)
836  throws XPathExpressionException
837  {
838 
839  int maxSize = maxLength - (IaL.comparedWordLength);
840  int actualSize = 0;
841 
842  char firstLetter = word.charAt(0);
843 
844  for(int i = IaL.index-1; i >= 0; i--){
845  actualSize += nodeWords[i].length()+1;
846  if(actualSize > maxSize){
847  return null;
848  }
849  IaL.offset -= nodeWords[i].length()+1;
850  if(nodeWords[i].charAt(0) == firstLetter) {
851  String fragmentString = createFragmentStringFromWords(nodeWords, i, IaL.index);
852  if(fragmentString.length() < minLength){
853  return null;
854  }
855  else{
856  return new UpdatableFragment(XPath, IaL.offset, fragmentString.length(), fragmentString);
857  }
858  }
859  }
860  return null;
861  }
862 
863  /**
864  * Method creates a fragment with most compatible words and the least
865  * levenstein distance from original fragment.
866  *
867  * @param nodeWords Words of the node
868  * @param IaL Variable with the starting index and the starting offset
869  * @param maxLength Maximum length of the new fragment
870  * @param minLength Minimum length of the new fragment
871  * @param XPath New fragment XPath
872  * @param originalFragment Text of the original fragment
873  * @return New fragment or null if no fragment was found.
874  * @throws XPathExpressionException
875  */
876  private UpdatableFragment matchMostSimiliar(String[] nodeWords,
877  IndexAndlength IaL, int maxLength, int minLength, String XPath, String originalFragment)
878  throws XPathExpressionException
879  {
880  int distance = Integer.MAX_VALUE;
881 
882  int maxSize = maxLength - (IaL.comparedWordLength);
883  int actualSize = 0;
884  String bestFragment = null;
885 
886  for(int i = IaL.index+1; i < nodeWords.length; i++){
887  actualSize += nodeWords[i].length()+1;
888  if(actualSize > maxSize){
889  return null;
890  }
891  String fragmentString = createFragmentStringFromWords(nodeWords, IaL.index, i);
892  int newDistance;
893  if((newDistance = Util.levenshtein(fragmentString, originalFragment)) <= distance){
894  distance = newDistance;
895  bestFragment = fragmentString;
896  }
897  // It will never be null because of the initial value of the distance.
898  else if(bestFragment.length() >= minLength){
899  return new UpdatableFragment(XPath, IaL.offset, bestFragment.length(), bestFragment);
900  }
901  }
902  if(bestFragment == null || bestFragment.length() < minLength){
903  return null;
904  }
905  else{
906  return new UpdatableFragment(XPath, IaL.offset, bestFragment.length(), bestFragment);
907  }
908 
909  } // matchMostSimiliar()
910 
911  /**
912  * Auxiliary class with index, offset and word length.
913  *
914  * @brief Auxiliary class with index, offset and word length.
915  */
916  private class IndexAndlength {
917  /** Index */
918  public Integer index = 0;
919  /** Compared word length */
920  public int comparedWordLength = 0;
921  /** Offset */
922  public int offset = 0;
923 
924  /**
925  * Default Constructor
926  */
927  public IndexAndlength(){
928 
929  }
930 
931  /**
932  * Constructor
933  *
934  * @param index Index
935  * @param comparedWordLength Compared word length
936  * @param offset Offset
937  */
938  public IndexAndlength(Integer index, int comparedWordLength, int offset){
939  this.index = index;
940  this.comparedWordLength = comparedWordLength;
941  this.offset = offset;
942  }
943  } // private class IndexAndlength
944 } // class Comparator
IndexAndlength(Integer index, int comparedWordLength, int offset)
Integer matchFirstWord(String[] nodeWords, String firstWord, String lastWord, IndexAndlength IaL, ArrayList< IndexAndlength > firstWordsHit, ArrayList< IndexAndlength > lastWordsHit)
static int levenshtein(String s, String t)
Definition: Util.java:307
Auxiliary class with index, offset and word length.
int countWordsLengthEnd(int count, String words[], int wholeLength)
UpdatableFragment selectBestPart(UpdatableFragment origFragment, UpdatableFragment newFragment)
UpdatableFragment matchMostSimiliar(String[] nodeWords, IndexAndlength IaL, int maxLength, int minLength, String XPath, String originalFragment)
Comparator(CompareMethod compareMethod, TraversingMethod traversingMethod, double maxLenghtEnlargement, double maxLenghtReduction)
UpdatableFragment wordMatchMethod(Node node, UpdatableFragment queryFragment)
UpdatableFragment matchLastWord(String[] nodeWords, String word, IndexAndlength IaL, int maxLength, int minLength, String XPath)
Comparator(CompareMethod compareMethod, TraversingMethod traversingMethod)
Definition: Comparator.java:91
Class consisting of traversing method and compare method.
Definition: Comparator.java:37
UpdatableFragment firstAndLastLetterMatchMethod(Node node, UpdatableFragment queryFragment, ArrayList< IndexAndlength > firstWordsHit, ArrayList< IndexAndlength > lastWordsHit)
UpdatableFragment compare(UpdatableFragment queryFragment, Node node)
UpdatableFragment matchSameFirstLetter(String[] nodeWords, String word, IndexAndlength IaL, int maxLength, int minLength, String XPath)
Utility class (manipulates RFC 3339 dates)
Definition: Util.java:29
String createFragmentStringFromWords(String[] words, int firstWordIndex, int lastWordIndex)
UpdatableFragment matchSameLastLetter(String[] nodeWords, String word, IndexAndlength IaL, int maxLength, int minLength, String XPath)
UpdatableFragment firstAndLastWordMatchMethod(Node node, UpdatableFragment queryFragment, ArrayList< IndexAndlength > firstWordsHit, ArrayList< IndexAndlength > lastWordsHit)
UpdatableFragment selectBestSubFragment(UpdatableFragment origFragment, ArrayList< UpdatableFragment > BestSubParts)
void setmaxLenghtEnlargement(double maxLenghtEnlargement)
Definition: Comparator.java:81