13 package cz.vutbr.fit.knot.annotations.comet;
 
   21 import java.io.IOException;
 
   22 import java.util.ArrayList;
 
   23 import java.util.Iterator;
 
   24 import javax.xml.parsers.ParserConfigurationException;
 
   25 import org.w3c.dom.Document;
 
   26 import org.w3c.dom.DocumentFragment;
 
   27 import org.w3c.dom.Node;
 
   28 import org.w3c.dom.NodeList;
 
   29 import org.w3c.dom.traversal.DocumentTraversal;
 
   30 import org.w3c.dom.traversal.NodeFilter;
 
   31 import org.w3c.dom.traversal.NodeIterator;
 
   32 import org.xml.sax.SAXException;
 
   59                                                                        ArrayList<ArrayList<Fragment>> badFragments) {
 
   60     ArrayList<ArrayList<Fragment>> retFragments = 
new ArrayList<ArrayList<Fragment>>();
 
   61     ArrayList<Integer> processedLengths = 
new ArrayList<Integer>();
 
   62     int linLength = linFragments.size();  
 
   63     for (
int i = 0; i < linLength; i++) {  
 
   64       retFragments.add(
new ArrayList<Fragment>());
 
   65       badFragments.add(
new ArrayList<Fragment>());
 
   66       processedLengths.add(0);
 
   71     if (doc.getDocumentElement() == null) {
 
   74       NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
 
   75                             NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null, 
true);
 
   76       Integer nStartOffset = 0;  
 
   77       Integer nEndOffset = 0;  
 
   80       Node currentNode = nIter.nextNode();
 
   81       int newlineOffsetCompensation = 0; 
 
   83       while (currentNode != null && convertedLin < linLength) {
 
   85         nContent = currentNode.getNodeValue();
 
   86         nEndOffset = nStartOffset + nContent.length();
 
   90           lFr = linFragments.get(i);
 
   92           lFr.setOffset(lFr.getOffset() - newlineOffsetCompensation);
 
   93           int lFrEnd = lFr.getOffset() + lFr.
getLength();
 
   94           if (lFr.
getOffset() < nStartOffset && lFrEnd < nStartOffset) {
 
   96             if (i == convertedLin) {  
 
   99           } 
else if (lFr.
getOffset() >= nStartOffset && lFrEnd <= nEndOffset) {
 
  101             String path = XPathHelper.XPathStringOfNode(currentNode);
 
  102             Integer offset = lFr.getOffset() - nStartOffset;
 
  103             String content = nContent.substring(offset, offset + lFr.getLength());
 
  107               retFragments.get(i).add(f);  
 
  109               badFragments.get(i).add(f);  
 
  111             processedLengths.set(i, lFr.getLength());
 
  112             if (i == convertedLin) {  
 
  115           } 
else if (lFr.
getOffset() >= nStartOffset && lFr.getOffset() < nEndOffset
 
  116                      && lFrEnd > nEndOffset) {  
 
  117             String path = XPathHelper.XPathStringOfNode(currentNode);
 
  118             Integer offset = lFr.getOffset() - nStartOffset;
 
  119             Integer length = nContent.length() - offset;
 
  120             String content = nContent.substring(offset, nContent.length());
 
  122             int lFTL = lFr.getAnnotatedText().length();
 
  123             if (length <= lFTL && lFr.
getAnnotatedText().substring(0, length).equals(content)) {
 
  124               retFragments.get(i).add(f);  
 
  126               badFragments.get(i).add(f);  
 
  128             processedLengths.set(i, length);
 
  129           } 
else if (lFr.
getOffset() < nStartOffset && lFrEnd > nStartOffset
 
  130                      && lFrEnd <= nEndOffset) {  
 
  131             String path = XPathHelper.XPathStringOfNode(currentNode);
 
  133             Integer length = lFrEnd - nStartOffset;
 
  134             String content = nContent.substring(0, lFrEnd - nStartOffset);
 
  135             int lFTL = lFr.getAnnotatedText().length();
 
  137             if (processedLengths.get(i) < lFTL
 
  138                 && lFr.getAnnotatedText().substring(processedLengths.get(i), lFTL).equals(content)) {
 
  139               retFragments.get(i).add(f);  
 
  141               badFragments.get(i).add(f);  
 
  143             processedLengths.set(i, processedLengths.get(i) + length);
 
  144             if (i == convertedLin) {  
 
  147           } 
else if (lFr.
getOffset() < nStartOffset && lFrEnd > nEndOffset) {
 
  149             String path = XPathHelper.XPathStringOfNode(currentNode);
 
  151             Integer length = nContent.length();
 
  152             int lFTL = lFr.getAnnotatedText().length();
 
  153             int lFTE = processedLengths.get(i) + length;
 
  155             if (processedLengths.get(i) < lFTL && lFTE <= lFTL &&
 
  156                 lFr.getAnnotatedText().substring(processedLengths.get(i), lFTE).equals(nContent)) {
 
  157               retFragments.get(i).add(f);  
 
  159               badFragments.get(i).add(f);  
 
  161             processedLengths.set(i, processedLengths.get(i) + length);
 
  166           lFr.setOffset(lFr.getOffset() + newlineOffsetCompensation);
 
  169         } 
while (i < linLength && lFr.
getOffset() < nEndOffset);
 
  171         nStartOffset += nContent.length();  
 
  172         currentNode = nIter.nextNode();  
 
  173         newlineOffsetCompensation += 1;  
 
  192                                                                        ArrayList<ArrayList<SuggestionFragment>> badFragments) {
 
  193     ArrayList<ArrayList<SuggestionFragment>> retFragments = 
new ArrayList<ArrayList<SuggestionFragment>>();
 
  194     ArrayList<Integer> processedLengths = 
new ArrayList<Integer>();
 
  195     int linLength = linFragments.size();  
 
  196     for (
int i = 0; i < linLength; i++) {  
 
  197       retFragments.add(
new ArrayList<SuggestionFragment>());
 
  198       badFragments.add(
new ArrayList<SuggestionFragment>());
 
  199       processedLengths.add(0);
 
  204     if (doc.getDocumentElement() == null) {
 
  207       NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
 
  208                             NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null, 
true);
 
  209       Integer nStartOffset = 0;  
 
  210       Integer nEndOffset = 0;  
 
  211       String nContent = 
"";  
 
  212       int convertedLin = 0;  
 
  213       Node currentNode = nIter.nextNode();
 
  214       int newlineOffsetCompensation = 0; 
 
  216       while (currentNode != null && convertedLin < linLength) {
 
  218         nContent = currentNode.getNodeValue();
 
  222         if (nContent.replaceAll(
"[\\s\\u00A0]+$", 
"").contentEquals(
"")) {
 
  223           currentNode = nIter.nextNode();  
 
  227         nEndOffset = nStartOffset + nContent.length();
 
  228         int i = convertedLin;
 
  231           lFr = linFragments.get(i);
 
  233           lFr.setOffset(lFr.getOffset() - newlineOffsetCompensation);
 
  234           int lFrEnd = lFr.getOffset() + lFr.
getLength();
 
  235           if (lFr.
getOffset() < nStartOffset && lFrEnd < nStartOffset) {
 
  237             if (i == convertedLin) {  
 
  240           } 
else if (lFr.
getOffset() >= nStartOffset && lFrEnd <= nEndOffset) {
 
  242             String path = XPathHelper.XPathStringOfNode(currentNode);
 
  243             Integer offset = lFr.getOffset() - nStartOffset;
 
  244             String content = nContent.substring(offset, offset + lFr.getLength());
 
  248               retFragments.get(i).add(f);  
 
  250               badFragments.get(i).add(f);  
 
  252             processedLengths.set(i, lFr.getLength());
 
  253             if (i == convertedLin) {  
 
  256           } 
else if (lFr.
getOffset() >= nStartOffset && lFr.getOffset() < nEndOffset
 
  257                      && lFrEnd > nEndOffset) {  
 
  258             String path = XPathHelper.XPathStringOfNode(currentNode);
 
  259             Integer offset = lFr.getOffset() - nStartOffset;
 
  260             Integer length = nContent.length() - offset;
 
  261             String content = nContent.substring(offset, nContent.length());
 
  263             int lFTL = lFr.getAnnotatedText().length();
 
  264             if (length <= lFTL && lFr.
getAnnotatedText().substring(0, length).equals(content)) {
 
  265               retFragments.get(i).add(f);  
 
  267               badFragments.get(i).add(f);  
 
  269             processedLengths.set(i, length);
 
  270           } 
else if (lFr.
getOffset() < nStartOffset && lFrEnd > nStartOffset
 
  271                      && lFrEnd <= nEndOffset) {  
 
  272             String path = XPathHelper.XPathStringOfNode(currentNode);
 
  274             Integer length = lFrEnd - nStartOffset;
 
  275             String content = nContent.substring(0, lFrEnd - nStartOffset);
 
  276             int lFTL = lFr.getAnnotatedText().length();
 
  278             if (processedLengths.get(i) < lFTL
 
  279                 && lFr.getAnnotatedText().substring(processedLengths.get(i), lFTL).equals(content)) {
 
  280               retFragments.get(i).add(f);  
 
  282               badFragments.get(i).add(f);  
 
  284             processedLengths.set(i, processedLengths.get(i) + length);
 
  285             if (i == convertedLin) {  
 
  288           } 
else if (lFr.
getOffset() < nStartOffset && lFrEnd > nEndOffset) {
 
  290             String path = XPathHelper.XPathStringOfNode(currentNode);
 
  292             Integer length = nContent.length();
 
  293             int lFTL = lFr.getAnnotatedText().length();
 
  294             int lFTE = processedLengths.get(i) + length;
 
  296             if (processedLengths.get(i) < lFTL && lFTE <= lFTL &&
 
  297                 lFr.getAnnotatedText().substring(processedLengths.get(i), lFTE).equals(nContent)) {
 
  298               retFragments.get(i).add(f);  
 
  300               badFragments.get(i).add(f);  
 
  302             processedLengths.set(i, processedLengths.get(i) + length);
 
  307           lFr.setOffset(lFr.getOffset() + newlineOffsetCompensation);
 
  310         } 
while (i < linLength && lFr.
getOffset() < nEndOffset);
 
  312         nStartOffset += nContent.length();  
 
  313         currentNode = nIter.nextNode();  
 
  314         newlineOffsetCompensation += 1;  
 
  333                                                                        ArrayList<ArrayList<AlternativeFragment>> badFragments) {
 
  334     ArrayList<ArrayList<AlternativeFragment>> retFragments = 
new ArrayList<ArrayList<AlternativeFragment>>();
 
  335     ArrayList<Integer> processedLengths = 
new ArrayList<Integer>();
 
  336     int linLength = linFragments.size();  
 
  337     for (
int i = 0; i < linLength; i++) {  
 
  338       retFragments.add(
new ArrayList<AlternativeFragment>());
 
  339       badFragments.add(
new ArrayList<AlternativeFragment>());
 
  340       processedLengths.add(0);
 
  345     if (doc.getDocumentElement() == null) {
 
  348       NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
 
  349                             NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null, 
true);
 
  350       Integer nStartOffset = 0;  
 
  351       Integer nEndOffset = 0;  
 
  352       String nContent = 
"";  
 
  353       int convertedLin = 0;  
 
  354       Node currentNode = nIter.nextNode();
 
  355       int newlineOffsetCompensation = 0; 
 
  357       while (currentNode != null && convertedLin < linLength) {
 
  359         nContent = currentNode.getNodeValue();
 
  363         if (nContent.replaceAll(
"[\\s\\u00A0]+$", 
"").contentEquals(
"")) {
 
  364           currentNode = nIter.nextNode();  
 
  368         nEndOffset = nStartOffset + nContent.length();
 
  369         int i = convertedLin;
 
  372           lFr = linFragments.get(i);
 
  374           lFr.setOffset(lFr.getOffset() - newlineOffsetCompensation);
 
  375           int lFrEnd = lFr.getOffset() + lFr.
getLength();
 
  376           if (lFr.
getOffset() < nStartOffset && lFrEnd < nStartOffset) {
 
  378             if (i == convertedLin) {  
 
  381           } 
else if (lFr.
getOffset() >= nStartOffset && lFrEnd <= nEndOffset) {
 
  383             String path = XPathHelper.XPathStringOfNode(currentNode);
 
  384             Integer offset = lFr.getOffset() - nStartOffset;
 
  385             String content = nContent.substring(offset, offset + lFr.getLength());
 
  389               retFragments.get(i).add(f);  
 
  391               badFragments.get(i).add(f);  
 
  393             processedLengths.set(i, lFr.getLength());
 
  394             if (i == convertedLin) {  
 
  397           } 
else if (lFr.
getOffset() >= nStartOffset && lFr.getOffset() < nEndOffset
 
  398                      && lFrEnd > nEndOffset) {  
 
  399             String path = XPathHelper.XPathStringOfNode(currentNode);
 
  400             Integer offset = lFr.getOffset() - nStartOffset;
 
  401             Integer length = nContent.length() - offset;
 
  402             String content = nContent.substring(offset, nContent.length());
 
  404             int lFTL = lFr.getAnnotatedText().length();
 
  405             if (length <= lFTL && lFr.
getAnnotatedText().substring(0, length).equals(content)) {
 
  406               retFragments.get(i).add(f);  
 
  408               badFragments.get(i).add(f);  
 
  410             processedLengths.set(i, length);
 
  411           } 
else if (lFr.
getOffset() < nStartOffset && lFrEnd > nStartOffset
 
  412                      && lFrEnd <= nEndOffset) {  
 
  413             String path = XPathHelper.XPathStringOfNode(currentNode);
 
  415             Integer length = lFrEnd - nStartOffset;
 
  416             String content = nContent.substring(0, lFrEnd - nStartOffset);
 
  417             int lFTL = lFr.getAnnotatedText().length();
 
  419             if (processedLengths.get(i) < lFTL
 
  420                 && lFr.getAnnotatedText().substring(processedLengths.get(i), lFTL).equals(content)) {
 
  421               retFragments.get(i).add(f);  
 
  423               badFragments.get(i).add(f);  
 
  425             processedLengths.set(i, processedLengths.get(i) + length);
 
  426             if (i == convertedLin) {  
 
  429           } 
else if (lFr.
getOffset() < nStartOffset && lFrEnd > nEndOffset) {
 
  431             String path = XPathHelper.XPathStringOfNode(currentNode);
 
  433             Integer length = nContent.length();
 
  434             int lFTL = lFr.getAnnotatedText().length();
 
  435             int lFTE = processedLengths.get(i) + length;
 
  437             if (processedLengths.get(i) < lFTL && lFTE <= lFTL &&
 
  438                 lFr.getAnnotatedText().substring(processedLengths.get(i), lFTE).equals(nContent)) {
 
  439               retFragments.get(i).add(f);  
 
  441               badFragments.get(i).add(f);  
 
  443             processedLengths.set(i, processedLengths.get(i) + length);
 
  448           lFr.setOffset(lFr.getOffset() + newlineOffsetCompensation);
 
  451         } 
while (i < linLength && lFr.
getOffset() < nEndOffset);
 
  453         nStartOffset += nContent.length();  
 
  454         currentNode = nIter.nextNode();  
 
  455         newlineOffsetCompensation += 1;  
 
  469     StringBuilder linDoc = 
new StringBuilder();
 
  470     if (doc.getDocumentElement() == null) {
 
  473       NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
 
  474                             NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null, 
true);
 
  475       Node currentNode = nIter.nextNode();
 
  477       boolean compensateNewline = 
false;
 
  478       while (currentNode != null) {
 
  482         if (currentNode.getNodeValue() == null || currentNode.getNodeValue().replaceAll(
"[\\s\\u00A0]+$", 
"").contentEquals(
"")) {
 
  483           currentNode = nIter.nextNode();  
 
  488         if (compensateNewline) {
 
  493         linDoc.append(currentNode.getNodeValue());
 
  494         currentNode = nIter.nextNode();  
 
  495         compensateNewline = 
true;
 
  498     return linDoc.toString();
 
  512                                                             Document doc, ArrayList<ArrayList<SuggestionFragment>> notConverted,
 
  517     int numOfAnnots = comFragments.size();
 
  519     ArrayList<ArrayList<SuggestionFragment>> comFrCopy = 
new ArrayList<ArrayList<SuggestionFragment>>();
 
  521     ArrayList<SuggestionFragment> retFragments = 
new ArrayList<SuggestionFragment>(numOfAnnots);
 
  523     ArrayList<SuggestionFragment> partialFragments = 
new ArrayList<SuggestionFragment>(numOfAnnots);
 
  525     ArrayList<Integer> numsOfSpacesSO = 
new ArrayList<Integer>(numOfAnnots);
 
  527     ArrayList<Integer> numsOfSpacesL = 
new ArrayList<Integer>(numOfAnnots);
 
  529     for (
int i = 0; i < numOfAnnots; i++) {
 
  530       ArrayList<SuggestionFragment> cFr = comFragments.get(i);
 
  531       ArrayList<SuggestionFragment> aFragments = 
new ArrayList<SuggestionFragment>();
 
  532       notConverted.add(
new ArrayList<SuggestionFragment>());
 
  533       comFrCopy.add(aFragments);
 
  534       for (Iterator<SuggestionFragment> aFrIt = cFr.iterator(); aFrIt.hasNext();) {
 
  541       retFragments.add(null);
 
  542       partialFragments.add(null);
 
  543       numsOfSpacesSO.add(null);
 
  544       numsOfSpacesL.add(null);
 
  550     if (doc.getDocumentElement() == null) {
 
  553       NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
 
  554                             NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null, 
true);
 
  555       Integer nStartOffset = 0;  
 
  556       Integer nEndOffset = 0;  
 
  557       String nContent = 
"";  
 
  558       Node currentNode = nIter.nextNode();
 
  561       while (currentNode != null && fragCount > 0) {
 
  563         nContent = currentNode.getNodeValue();
 
  564         int nContentL = nContent.length();
 
  565         nEndOffset = nStartOffset + nContentL;
 
  566         path = XPathHelper.XPathStringOfNode(currentNode);
 
  568         for (
int i = 0; i < numOfAnnots; i++) {
 
  570           ArrayList<SuggestionFragment> aFragments = comFrCopy.get(i);
 
  571           for (Iterator<SuggestionFragment> frIt = aFragments.iterator(); frIt.hasNext();) {
 
  575               notConverted.get(i).add(fr);  
 
  580             if (fr.
getPath().equals(path)) {  
 
  584                 frEndOffset = fr.getOffset() + fr.
getLength();
 
  587                 frEndOffset = fr.getLength();
 
  589               if (frEndOffset > nContentL) {  
 
  590                 notConverted.get(i).add(fr);
 
  594               }   
else if (fr.
getAnnotatedText() != null && !nContent.substring(fr.getOffset(), frEndOffset).equals(fr.getAnnotatedText())) {
 
  597                 notConverted.get(i).add(fr);
 
  602               if (partFrag == null && frEndOffset < nContentL) {
 
  608                 numsOfSpacesSO.set(i,nodeCounter);  
 
  609                 numsOfSpacesL.set(i,0);  
 
  612                                                     fr.getRefSuggestion());
 
  615                 retFragments.set(i, linFragment);  
 
  616               } 
else if (partFrag == null && frEndOffset == nContentL) {
 
  622                 numsOfSpacesSO.set(i,nodeCounter);  
 
  623                 numsOfSpacesL.set(i,0);  
 
  626                                                     fr.getRefSuggestion());
 
  628                 partialFragments.set(i, linFragment);
 
  631               } 
else if (partFrag != null && frEndOffset < nContentL) {
 
  635                   linFragment.setAnnotatedText(linFragment.getAnnotatedText() + fr.
getAnnotatedText());
 
  637                 linFragment.setLength(linFragment.getLength() + fr.
getLength());
 
  638                 retFragments.set(i, linFragment);
 
  639                 partialFragments.set(i, null);
 
  640                 if (numsOfSpacesL.get(i) != null) {  
 
  641                   numsOfSpacesL.set(i,numsOfSpacesL.get(i) + 1);
 
  645               } 
else if (partFrag != null && frEndOffset == nContentL) {
 
  649                   linFragment.setAnnotatedText(linFragment.getAnnotatedText() + fr.
getAnnotatedText());
 
  651                 linFragment.setLength(linFragment.getLength() + fr.
getLength());
 
  652                 if (numsOfSpacesL.get(i) != null) {  
 
  653                   numsOfSpacesL.set(i,numsOfSpacesL.get(i) + 1);
 
  659                 notConverted.get(i).add(fr);  
 
  669         nStartOffset += nContent.length();  
 
  670         currentNode = nIter.nextNode();  
 
  671         while (currentNode != null && (currentNode.getNodeValue() == null || currentNode.getNodeValue().replaceAll(
"[\\s\\u00A0]+$", 
"").contentEquals(
""))) {
 
  672           currentNode = nIter.nextNode();  
 
  678     for (
int i = 0; i < numOfAnnots; i++) {
 
  680       ArrayList<SuggestionFragment> aFragments = comFrCopy.get(i);
 
  682       for (Iterator<SuggestionFragment> aFrIt = aFragments.iterator(); aFrIt.hasNext();) {
 
  684         notConverted.get(i).add(fr);
 
  690       if (partialFragments.get(i) != null) {
 
  691         retFragments.set(i, partialFragments.get(i));  
 
  696       for (
int i = 0; i < numOfAnnots; i++) {  
 
  702         Integer spaces = numsOfSpacesSO.get(i);
 
  703         if (spaces == null) {
 
  706         fr.setOffset(fr.getOffset() + spaces);
 
  708         spaces = numsOfSpacesL.get(i);
 
  709         if (spaces == null) {
 
  712         fr.setLength(fr.getLength() + spaces);
 
  730                                                             Document doc, ArrayList<ArrayList<Fragment>> notConverted,
 
  735     int numOfAnnots = comFragments.size();
 
  737     ArrayList<ArrayList<Fragment>> comFrCopy = 
new ArrayList<ArrayList<Fragment>>();
 
  739     ArrayList<Fragment> retFragments = 
new ArrayList<Fragment>(numOfAnnots);
 
  741     ArrayList<Fragment> partialFragments = 
new ArrayList<Fragment>(numOfAnnots);
 
  743     ArrayList<Integer> numsOfSpacesSO = 
new ArrayList<Integer>(numOfAnnots);
 
  745     ArrayList<Integer> numsOfSpacesL = 
new ArrayList<Integer>(numOfAnnots);
 
  747     for (
int i = 0; i < numOfAnnots; i++) {
 
  748       ArrayList<Fragment> cFr = comFragments.get(i);
 
  749       ArrayList<Fragment> aFragments = 
new ArrayList<Fragment>();
 
  750       notConverted.add(
new ArrayList<Fragment>());
 
  751       comFrCopy.add(aFragments);
 
  752       for (Iterator<Fragment> aFrIt = cFr.iterator(); aFrIt.hasNext();) {
 
  759       retFragments.add(null);
 
  760       partialFragments.add(null);
 
  761       numsOfSpacesSO.add(null);
 
  762       numsOfSpacesL.add(null);
 
  768     if (doc.getDocumentElement() == null) {
 
  771       NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
 
  772                             NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null, 
true);
 
  773       Integer nStartOffset = 0;  
 
  774       Integer nEndOffset = 0;  
 
  775       String nContent = 
"";  
 
  776       Node currentNode = nIter.nextNode();
 
  779       while (currentNode != null && fragCount > 0) {
 
  781         nContent = currentNode.getNodeValue();
 
  782         int nContentL = nContent.length();
 
  783         nEndOffset = nStartOffset + nContentL;
 
  784         path = XPathHelper.XPathStringOfNode(currentNode);
 
  786         for (
int i = 0; i < numOfAnnots; i++) {
 
  788           ArrayList<Fragment> aFragments = comFrCopy.get(i);
 
  789           for (Iterator<Fragment> frIt = aFragments.iterator(); frIt.hasNext();) {
 
  793               notConverted.get(i).add(fr);  
 
  798             if (fr.
getPath().equals(path)) {  
 
  799               Fragment partFrag = partialFragments.get(i);
 
  802                 frEndOffset = fr.getOffset() + fr.
getLength();
 
  805                 frEndOffset = fr.getLength();
 
  807               if (frEndOffset > nContentL) {  
 
  808                 notConverted.get(i).add(fr);
 
  812               }   
else if (fr.
getAnnotatedText() != null && !nContent.substring(fr.getOffset(), frEndOffset).equals(fr.getAnnotatedText())) {
 
  815                 notConverted.get(i).add(fr);
 
  820               if (partFrag == null && frEndOffset < nContentL) {
 
  826                 numsOfSpacesSO.set(i,nodeCounter);  
 
  827                 numsOfSpacesL.set(i,0);  
 
  830                                                     fr.getRefAnnotation());
 
  833                 retFragments.set(i, linFragment);  
 
  834               } 
else if (partFrag == null && frEndOffset == nContentL) {
 
  840                 numsOfSpacesSO.set(i,nodeCounter);  
 
  841                 numsOfSpacesL.set(i,0);  
 
  844                                                     fr.getRefAnnotation());
 
  846                 partialFragments.set(i, linFragment);
 
  849               } 
else if (partFrag != null && frEndOffset < nContentL) {
 
  851                 Fragment linFragment = partialFragments.get(i);
 
  853                   linFragment.setAnnotatedText(linFragment.getAnnotatedText() + fr.
getAnnotatedText());
 
  855                 linFragment.setLength(linFragment.getLength() + fr.
getLength());
 
  856                 retFragments.set(i, linFragment);
 
  857                 partialFragments.set(i, null);
 
  858                 if (numsOfSpacesL.get(i) != null) {  
 
  859                   numsOfSpacesL.set(i,numsOfSpacesL.get(i) + 1);
 
  863               } 
else if (partFrag != null && frEndOffset == nContentL) {
 
  865                 Fragment linFragment = partialFragments.get(i);
 
  867                   linFragment.setAnnotatedText(linFragment.getAnnotatedText() + fr.
getAnnotatedText());
 
  869                 linFragment.setLength(linFragment.getLength() + fr.
getLength());
 
  870                 if (numsOfSpacesL.get(i) != null) {  
 
  871                   numsOfSpacesL.set(i,numsOfSpacesL.get(i) + 1);
 
  877                 notConverted.get(i).add(fr);  
 
  887         nStartOffset += nContent.length();  
 
  888         currentNode = nIter.nextNode();  
 
  889         while (currentNode != null && (currentNode.getNodeValue() == null || currentNode.getNodeValue().replaceAll(
"[\\s\\u00A0]+$", 
"").contentEquals(
""))) {
 
  890           currentNode = nIter.nextNode();  
 
  896     for (
int i = 0; i < numOfAnnots; i++) {
 
  898       ArrayList<Fragment> aFragments = comFrCopy.get(i);
 
  900       for (Iterator<Fragment> aFrIt = aFragments.iterator(); aFrIt.hasNext();) {
 
  902         notConverted.get(i).add(fr);
 
  908       if (partialFragments.get(i) != null) {
 
  909         retFragments.set(i, partialFragments.get(i));  
 
  914       for (
int i = 0; i < numOfAnnots; i++) {  
 
  920         Integer spaces = numsOfSpacesSO.get(i);
 
  921         if (spaces == null) {
 
  924         fr.setOffset(fr.getOffset() + spaces);
 
  926         spaces = numsOfSpacesL.get(i);
 
  927         if (spaces == null) {
 
  930         fr.setLength(fr.getLength() + spaces);
 
  948                                                             Document doc, ArrayList<ArrayList<AlternativeFragment>> notConverted,
 
  953     int numOfAnnots = comFragments.size();
 
  955     ArrayList<ArrayList<AlternativeFragment>> comFrCopy = 
new ArrayList<ArrayList<AlternativeFragment>>();
 
  957     ArrayList<AlternativeFragment> retFragments = 
new ArrayList<AlternativeFragment>(numOfAnnots);
 
  959     ArrayList<AlternativeFragment> partialFragments = 
new ArrayList<AlternativeFragment>(numOfAnnots);
 
  961     ArrayList<Integer> numsOfSpacesSO = 
new ArrayList<Integer>(numOfAnnots);
 
  963     ArrayList<Integer> numsOfSpacesL = 
new ArrayList<Integer>(numOfAnnots);
 
  965     for (
int i = 0; i < numOfAnnots; i++) {
 
  966       ArrayList<AlternativeFragment> cFr = comFragments.get(i);
 
  967       ArrayList<AlternativeFragment> aFragments = 
new ArrayList<AlternativeFragment>();
 
  968       notConverted.add(
new ArrayList<AlternativeFragment>());
 
  969       comFrCopy.add(aFragments);
 
  970       for (Iterator<AlternativeFragment> aFrIt = cFr.iterator(); aFrIt.hasNext();) {
 
  977       retFragments.add(null);
 
  978       partialFragments.add(null);
 
  979       numsOfSpacesSO.add(null);
 
  980       numsOfSpacesL.add(null);
 
  986     if (doc.getDocumentElement() == null) {
 
  989       NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
 
  990                             NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null, 
true);
 
  991       Integer nStartOffset = 0;  
 
  992       Integer nEndOffset = 0;  
 
  993       String nContent = 
"";  
 
  994       Node currentNode = nIter.nextNode();
 
  997       while (currentNode != null && fragCount > 0) {
 
  999         nContent = currentNode.getNodeValue();
 
 1000         int nContentL = nContent.length();
 
 1001         nEndOffset = nStartOffset + nContentL;
 
 1002         path = XPathHelper.XPathStringOfNode(currentNode);
 
 1004         for (
int i = 0; i < numOfAnnots; i++) {
 
 1006           ArrayList<AlternativeFragment> aFragments = comFrCopy.get(i);
 
 1007           for (Iterator<AlternativeFragment> frIt = aFragments.iterator(); frIt.hasNext();) {
 
 1011               notConverted.get(i).add(fr);  
 
 1016             if (fr.
getPath().equals(path)) {  
 
 1020                 frEndOffset = fr.getOffset() + fr.
getLength();
 
 1023                 frEndOffset = fr.getLength();
 
 1025               if (frEndOffset > nContentL) {  
 
 1026                 notConverted.get(i).add(fr);
 
 1030               }   
else if (fr.
getAnnotatedText() != null && !nContent.substring(fr.getOffset(), frEndOffset).equals(fr.getAnnotatedText())) {
 
 1033                 notConverted.get(i).add(fr);
 
 1038               if (partFrag == null && frEndOffset < nContentL) {
 
 1044                 numsOfSpacesSO.set(i,nodeCounter);  
 
 1045                 numsOfSpacesL.set(i,0);  
 
 1048                                                     fr.getRefAlternative());
 
 1051                 retFragments.set(i, linFragment);  
 
 1052               } 
else if (partFrag == null && frEndOffset == nContentL) {
 
 1058                 numsOfSpacesSO.set(i,nodeCounter);  
 
 1059                 numsOfSpacesL.set(i,0);  
 
 1062                                                     fr.getRefAlternative());
 
 1064                 partialFragments.set(i, linFragment);
 
 1067               } 
else if (partFrag != null && frEndOffset < nContentL) {
 
 1071                   linFragment.setAnnotatedText(linFragment.getAnnotatedText() + fr.
getAnnotatedText());
 
 1073                 linFragment.setLength(linFragment.getLength() + fr.
getLength());
 
 1074                 retFragments.set(i, linFragment);
 
 1075                 partialFragments.set(i, null);
 
 1076                 if (numsOfSpacesL.get(i) != null) {  
 
 1077                   numsOfSpacesL.set(i,numsOfSpacesL.get(i) + 1);
 
 1081               } 
else if (partFrag != null && frEndOffset == nContentL) {
 
 1085                   linFragment.setAnnotatedText(linFragment.getAnnotatedText() + fr.
getAnnotatedText());
 
 1087                 linFragment.setLength(linFragment.getLength() + fr.
getLength());
 
 1088                 if (numsOfSpacesL.get(i) != null) {  
 
 1089                   numsOfSpacesL.set(i,numsOfSpacesL.get(i) + 1);
 
 1095                 notConverted.get(i).add(fr);  
 
 1105         nStartOffset += nContent.length();  
 
 1106         currentNode = nIter.nextNode();  
 
 1107         while (currentNode != null && (currentNode.getNodeValue() == null || currentNode.getNodeValue().replaceAll(
"[\\s\\u00A0]+$", 
"").contentEquals(
""))) {
 
 1108           currentNode = nIter.nextNode();  
 
 1114     for (
int i = 0; i < numOfAnnots; i++) {
 
 1116       ArrayList<AlternativeFragment> aFragments = comFrCopy.get(i);
 
 1118       for (Iterator<AlternativeFragment> aFrIt = aFragments.iterator(); aFrIt.hasNext();) {
 
 1120         notConverted.get(i).add(fr);
 
 1126       if (partialFragments.get(i) != null) {
 
 1127         retFragments.set(i, partialFragments.get(i));  
 
 1132       for (
int i = 0; i < numOfAnnots; i++) {  
 
 1138         Integer spaces = numsOfSpacesSO.get(i);
 
 1139         if (spaces == null) {
 
 1142         fr.setOffset(fr.getOffset() + spaces);
 
 1144         spaces = numsOfSpacesL.get(i);
 
 1145         if (spaces == null) {
 
 1148         fr.setLength(fr.getLength() + spaces);
 
 1152     return retFragments;
 
 1164     StringBuilder linFr = 
new StringBuilder();
 
 1165     NodeList nodeL = docFr.getChildNodes();
 
 1166     int nodeCount = nodeL.getLength();
 
 1167     for (
int i = 0; i < nodeCount; i++) {  
 
 1168       Node curRootNode = nodeL.item(i);
 
 1169       NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(curRootNode,
 
 1170                             NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null, 
true);
 
 1171       Node currentNode = nIter.nextNode();
 
 1172       boolean compensateNewline = 
false;
 
 1173       while (currentNode != null) {
 
 1174         short nodeType = currentNode.getNodeType();
 
 1175         if (currentNode.getNodeValue() == null) {
 
 1176           currentNode = nIter.nextNode();  
 
 1179         if (compensateNewline) {
 
 1182         linFr.append(currentNode.getNodeValue());
 
 1183         if (!compensateNewline) {
 
 1184           compensateNewline = 
true;
 
 1186         currentNode = nIter.nextNode();  
 
 1189     return linFr.toString();
 
 1202                                                         Document doc, ArrayList<TextModification> notConverted)
 
 1203                                                         throws ParserConfigurationException, SAXException,
 
 1210     int numOfMods = comModifications.size();
 
 1211     ArrayList<TextModification> comModCopy = 
new ArrayList<TextModification>(numOfMods);
 
 1212     ArrayList<TextModification> notConvertedFlag = 
new ArrayList<TextModification>(numOfMods);
 
 1213     notConverted = 
new ArrayList<TextModification>();
 
 1215     for (
int i = 0; i < numOfMods; i++) {
 
 1218       notConvertedFlag.add(cM);
 
 1221     ArrayList<TextModification> retModifications = 
new ArrayList<TextModification>();
 
 1226     if (doc.getDocumentElement() == null) {
 
 1229       NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
 
 1230                             NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null, 
true);
 
 1231       Integer nStartOffset = 0;  
 
 1232       Integer nEndOffset = 0;  
 
 1233       String nContent = 
"";  
 
 1234       Node currentNode = nIter.nextNode();
 
 1237       if (currentNode == null) {
 
 1238         int nodeCounter = 0;
 
 1240         for (Iterator<TextModification> tmIt = comModCopy.iterator(); tmIt.hasNext();) {
 
 1245           tm.setNewContent(tm.getNewContent().replace(
"<br>",
" "));
 
 1247           tm.setNewContent(tm.getNewContent().replaceAll(
"<[^>\\s]*>",
""));
 
 1248           tm.setLength(tm.getNewContent().length());
 
 1251                                                             modifStart + tm.
getLength(), tm.getNewContent());
 
 1252           modifStart += tm.getLength();
 
 1253           retModifications.add(lTM);
 
 1254           notConvertedFlag.set(nodeCounter, null);  
 
 1264         boolean compensateNewline = 
false;
 
 1265         while (currentNode != null && numOfMods > 0) {
 
 1267           if ( compensateNewline ) {
 
 1269             nContent = currentNode.getNodeValue().concat(
" ");
 
 1272             nContent = currentNode.getNodeValue();
 
 1274           int nContentL = nContent.length();
 
 1275           nEndOffset = nStartOffset + nContentL;
 
 1276           path = XPathHelper.XPathStringOfNode(currentNode);
 
 1279           for (Iterator<TextModification> tmIt = comModCopy.iterator(); tmIt.hasNext();) {
 
 1283             if (tm.
getPath() == null || tm.getPath().isEmpty()) {
 
 1285                 retModifications.add(tm);
 
 1286                 notConvertedFlag.set(modifCounter, null);  
 
 1291             if (tm.
getOffset() == null && tm.getLength() == null && tm.
getPath().equals(
"/HTML[1]/BODY[1]")) {
 
 1294               NodeIterator wholeDocEraseIt = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
 
 1295                             NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null, 
true);
 
 1296               Node n = wholeDocEraseIt.nextNode();
 
 1297               boolean newlineCompens = 
false;
 
 1299               String nodeCont = null;
 
 1303                 nodeCont = n.getNodeValue();
 
 1304                 if (nodeCont == null) {
 
 1305                   n = wholeDocEraseIt.nextNode();
 
 1308                 docLength += nodeCont.length();
 
 1309                 if (newlineCompens) {
 
 1312                 if (!newlineCompens) {
 
 1313                   newlineCompens = 
true;
 
 1315                 n = wholeDocEraseIt.nextNode();
 
 1318               String newContent = 
"";
 
 1321               if (tm.
getNewContent() != null && !tm.getNewContent().equals(
"<body></body>")) {
 
 1322                 newContent = tm.getNewContent();
 
 1324                 newContent = newContent.replace(
"<br>",
" ");
 
 1326                 newContent = newContent.replaceAll(
"<[^>\\s]*>",
"");
 
 1331               retModifications.add(lTM);
 
 1332               notConvertedFlag.set(modifCounter, null);  
 
 1337             if (path.startsWith(tm.
getPath())) {  
 
 1340                 String linContent = 
"";
 
 1350                                                             nEndOffset - nStartOffset, linContent);
 
 1354                                                             nEndOffset - nStartOffset, linContent);
 
 1357                 retModifications.add(lTM);
 
 1358                 notConvertedFlag.set(modifCounter, null);  
 
 1364               if (tm.
getOffset() != null && tm.getLength() != null) {
 
 1365                 int tmEndOffset = tm.getOffset() + tm.
getLength();
 
 1367                 if (tmEndOffset > nContentL) {  
 
 1368                   notConverted.add(tm);
 
 1369                   notConvertedFlag.set(modifCounter, null);;
 
 1377           nStartOffset += nContent.length() + 1;  
 
 1379           currentNode = nIter.nextNode();  
 
 1385     for (Iterator<TextModification> tmIt = notConvertedFlag.iterator(); tmIt.hasNext();) {
 
 1388         notConverted.add(tm);
 
 1392     return retModifications;
 
Utility functions for document linearization. 
static ArrayList< TextModification > modificationsToLinMod(ArrayList< TextModification > comModifications, Document doc, ArrayList< TextModification > notConverted)
static ArrayList< ArrayList< AlternativeFragment > > linAltFragmentsToFragments(ArrayList< AlternativeFragment > linFragments, Document doc, ArrayList< ArrayList< AlternativeFragment >> badFragments)
String getAnnotatedText()
static String linearizeDocumentFragment(DocumentFragment docFr, Document doc)
Class providing access to available matchers. 
DocumentFragment getFragmentFromString(String text)
Class representing suggested annotation fragment. 
String getAnnotatedText()
static String linearizeDocument(Document doc)
Class representing fragment for suggestion alternative. 
String getAnnotatedText()
Class representing modification of annotated document text. 
static ArrayList< ArrayList< SuggestionFragment > > linSugFragmentsToFragments(ArrayList< SuggestionFragment > linFragments, Document doc, ArrayList< ArrayList< SuggestionFragment >> badFragments)
static ArrayList< Fragment > fragmentsToLinFragments(ArrayList< ArrayList< Fragment >> comFragments, Document doc, ArrayList< ArrayList< Fragment >> notConverted, boolean addSpaces)
Helper class with util XPath methods. 
static ArrayList< ArrayList< Fragment > > linFragmentsToFragments(ArrayList< Fragment > linFragments, Document doc, ArrayList< ArrayList< Fragment >> badFragments)
static MatcherProvider matcherProvider
Class representing annotated fragment. 
static ArrayList< SuggestionFragment > fragmentsToLinSugFragments(ArrayList< ArrayList< SuggestionFragment >> comFragments, Document doc, ArrayList< ArrayList< SuggestionFragment >> notConverted, boolean addSpaces)
static ArrayList< AlternativeFragment > fragmentsToLinAltFragments(ArrayList< ArrayList< AlternativeFragment >> comFragments, Document doc, ArrayList< ArrayList< AlternativeFragment >> notConverted, boolean addSpaces)