13 package cz.vutbr.fit.knot.annotations.comet;
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.Iterator;
24 import javax.xml.parsers.ParserConfigurationException;
25 import org.w3c.dom.Document;
26 import org.w3c.dom.DocumentFragment;
27 import org.w3c.dom.Node;
28 import org.w3c.dom.NodeList;
29 import org.w3c.dom.traversal.DocumentTraversal;
30 import org.w3c.dom.traversal.NodeFilter;
31 import org.w3c.dom.traversal.NodeIterator;
32 import org.xml.sax.SAXException;
59 ArrayList<ArrayList<Fragment>> badFragments) {
60 ArrayList<ArrayList<Fragment>> retFragments =
new ArrayList<ArrayList<Fragment>>();
61 ArrayList<Integer> processedLengths =
new ArrayList<Integer>();
62 int linLength = linFragments.size();
63 for (
int i = 0; i < linLength; i++) {
64 retFragments.add(
new ArrayList<Fragment>());
65 badFragments.add(
new ArrayList<Fragment>());
66 processedLengths.add(0);
71 if (doc.getDocumentElement() == null) {
74 NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
75 NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null,
true);
76 Integer nStartOffset = 0;
77 Integer nEndOffset = 0;
80 Node currentNode = nIter.nextNode();
81 int newlineOffsetCompensation = 0;
83 while (currentNode != null && convertedLin < linLength) {
85 nContent = currentNode.getNodeValue();
86 nEndOffset = nStartOffset + nContent.length();
90 lFr = linFragments.get(i);
92 lFr.setOffset(lFr.getOffset() - newlineOffsetCompensation);
93 int lFrEnd = lFr.getOffset() + lFr.
getLength();
94 if (lFr.
getOffset() < nStartOffset && lFrEnd < nStartOffset) {
96 if (i == convertedLin) {
99 }
else if (lFr.
getOffset() >= nStartOffset && lFrEnd <= nEndOffset) {
101 String path = XPathHelper.XPathStringOfNode(currentNode);
102 Integer offset = lFr.getOffset() - nStartOffset;
103 String content = nContent.substring(offset, offset + lFr.getLength());
107 retFragments.get(i).add(f);
109 badFragments.get(i).add(f);
111 processedLengths.set(i, lFr.getLength());
112 if (i == convertedLin) {
115 }
else if (lFr.
getOffset() >= nStartOffset && lFr.getOffset() < nEndOffset
116 && lFrEnd > nEndOffset) {
117 String path = XPathHelper.XPathStringOfNode(currentNode);
118 Integer offset = lFr.getOffset() - nStartOffset;
119 Integer length = nContent.length() - offset;
120 String content = nContent.substring(offset, nContent.length());
122 int lFTL = lFr.getAnnotatedText().length();
123 if (length <= lFTL && lFr.
getAnnotatedText().substring(0, length).equals(content)) {
124 retFragments.get(i).add(f);
126 badFragments.get(i).add(f);
128 processedLengths.set(i, length);
129 }
else if (lFr.
getOffset() < nStartOffset && lFrEnd > nStartOffset
130 && lFrEnd <= nEndOffset) {
131 String path = XPathHelper.XPathStringOfNode(currentNode);
133 Integer length = lFrEnd - nStartOffset;
134 String content = nContent.substring(0, lFrEnd - nStartOffset);
135 int lFTL = lFr.getAnnotatedText().length();
137 if (processedLengths.get(i) < lFTL
138 && lFr.getAnnotatedText().substring(processedLengths.get(i), lFTL).equals(content)) {
139 retFragments.get(i).add(f);
141 badFragments.get(i).add(f);
143 processedLengths.set(i, processedLengths.get(i) + length);
144 if (i == convertedLin) {
147 }
else if (lFr.
getOffset() < nStartOffset && lFrEnd > nEndOffset) {
149 String path = XPathHelper.XPathStringOfNode(currentNode);
151 Integer length = nContent.length();
152 int lFTL = lFr.getAnnotatedText().length();
153 int lFTE = processedLengths.get(i) + length;
155 if (processedLengths.get(i) < lFTL && lFTE <= lFTL &&
156 lFr.getAnnotatedText().substring(processedLengths.get(i), lFTE).equals(nContent)) {
157 retFragments.get(i).add(f);
159 badFragments.get(i).add(f);
161 processedLengths.set(i, processedLengths.get(i) + length);
166 lFr.setOffset(lFr.getOffset() + newlineOffsetCompensation);
169 }
while (i < linLength && lFr.
getOffset() < nEndOffset);
171 nStartOffset += nContent.length();
172 currentNode = nIter.nextNode();
173 newlineOffsetCompensation += 1;
192 ArrayList<ArrayList<SuggestionFragment>> badFragments) {
193 ArrayList<ArrayList<SuggestionFragment>> retFragments =
new ArrayList<ArrayList<SuggestionFragment>>();
194 ArrayList<Integer> processedLengths =
new ArrayList<Integer>();
195 int linLength = linFragments.size();
196 for (
int i = 0; i < linLength; i++) {
197 retFragments.add(
new ArrayList<SuggestionFragment>());
198 badFragments.add(
new ArrayList<SuggestionFragment>());
199 processedLengths.add(0);
204 if (doc.getDocumentElement() == null) {
207 NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
208 NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null,
true);
209 Integer nStartOffset = 0;
210 Integer nEndOffset = 0;
211 String nContent =
"";
212 int convertedLin = 0;
213 Node currentNode = nIter.nextNode();
214 int newlineOffsetCompensation = 0;
216 while (currentNode != null && convertedLin < linLength) {
218 nContent = currentNode.getNodeValue();
222 if (nContent.replaceAll(
"[\\s\\u00A0]+$",
"").contentEquals(
"")) {
223 currentNode = nIter.nextNode();
227 nEndOffset = nStartOffset + nContent.length();
228 int i = convertedLin;
231 lFr = linFragments.get(i);
233 lFr.setOffset(lFr.getOffset() - newlineOffsetCompensation);
234 int lFrEnd = lFr.getOffset() + lFr.
getLength();
235 if (lFr.
getOffset() < nStartOffset && lFrEnd < nStartOffset) {
237 if (i == convertedLin) {
240 }
else if (lFr.
getOffset() >= nStartOffset && lFrEnd <= nEndOffset) {
242 String path = XPathHelper.XPathStringOfNode(currentNode);
243 Integer offset = lFr.getOffset() - nStartOffset;
244 String content = nContent.substring(offset, offset + lFr.getLength());
248 retFragments.get(i).add(f);
250 badFragments.get(i).add(f);
252 processedLengths.set(i, lFr.getLength());
253 if (i == convertedLin) {
256 }
else if (lFr.
getOffset() >= nStartOffset && lFr.getOffset() < nEndOffset
257 && lFrEnd > nEndOffset) {
258 String path = XPathHelper.XPathStringOfNode(currentNode);
259 Integer offset = lFr.getOffset() - nStartOffset;
260 Integer length = nContent.length() - offset;
261 String content = nContent.substring(offset, nContent.length());
263 int lFTL = lFr.getAnnotatedText().length();
264 if (length <= lFTL && lFr.
getAnnotatedText().substring(0, length).equals(content)) {
265 retFragments.get(i).add(f);
267 badFragments.get(i).add(f);
269 processedLengths.set(i, length);
270 }
else if (lFr.
getOffset() < nStartOffset && lFrEnd > nStartOffset
271 && lFrEnd <= nEndOffset) {
272 String path = XPathHelper.XPathStringOfNode(currentNode);
274 Integer length = lFrEnd - nStartOffset;
275 String content = nContent.substring(0, lFrEnd - nStartOffset);
276 int lFTL = lFr.getAnnotatedText().length();
278 if (processedLengths.get(i) < lFTL
279 && lFr.getAnnotatedText().substring(processedLengths.get(i), lFTL).equals(content)) {
280 retFragments.get(i).add(f);
282 badFragments.get(i).add(f);
284 processedLengths.set(i, processedLengths.get(i) + length);
285 if (i == convertedLin) {
288 }
else if (lFr.
getOffset() < nStartOffset && lFrEnd > nEndOffset) {
290 String path = XPathHelper.XPathStringOfNode(currentNode);
292 Integer length = nContent.length();
293 int lFTL = lFr.getAnnotatedText().length();
294 int lFTE = processedLengths.get(i) + length;
296 if (processedLengths.get(i) < lFTL && lFTE <= lFTL &&
297 lFr.getAnnotatedText().substring(processedLengths.get(i), lFTE).equals(nContent)) {
298 retFragments.get(i).add(f);
300 badFragments.get(i).add(f);
302 processedLengths.set(i, processedLengths.get(i) + length);
307 lFr.setOffset(lFr.getOffset() + newlineOffsetCompensation);
310 }
while (i < linLength && lFr.
getOffset() < nEndOffset);
312 nStartOffset += nContent.length();
313 currentNode = nIter.nextNode();
314 newlineOffsetCompensation += 1;
333 ArrayList<ArrayList<AlternativeFragment>> badFragments) {
334 ArrayList<ArrayList<AlternativeFragment>> retFragments =
new ArrayList<ArrayList<AlternativeFragment>>();
335 ArrayList<Integer> processedLengths =
new ArrayList<Integer>();
336 int linLength = linFragments.size();
337 for (
int i = 0; i < linLength; i++) {
338 retFragments.add(
new ArrayList<AlternativeFragment>());
339 badFragments.add(
new ArrayList<AlternativeFragment>());
340 processedLengths.add(0);
345 if (doc.getDocumentElement() == null) {
348 NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
349 NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null,
true);
350 Integer nStartOffset = 0;
351 Integer nEndOffset = 0;
352 String nContent =
"";
353 int convertedLin = 0;
354 Node currentNode = nIter.nextNode();
355 int newlineOffsetCompensation = 0;
357 while (currentNode != null && convertedLin < linLength) {
359 nContent = currentNode.getNodeValue();
363 if (nContent.replaceAll(
"[\\s\\u00A0]+$",
"").contentEquals(
"")) {
364 currentNode = nIter.nextNode();
368 nEndOffset = nStartOffset + nContent.length();
369 int i = convertedLin;
372 lFr = linFragments.get(i);
374 lFr.setOffset(lFr.getOffset() - newlineOffsetCompensation);
375 int lFrEnd = lFr.getOffset() + lFr.
getLength();
376 if (lFr.
getOffset() < nStartOffset && lFrEnd < nStartOffset) {
378 if (i == convertedLin) {
381 }
else if (lFr.
getOffset() >= nStartOffset && lFrEnd <= nEndOffset) {
383 String path = XPathHelper.XPathStringOfNode(currentNode);
384 Integer offset = lFr.getOffset() - nStartOffset;
385 String content = nContent.substring(offset, offset + lFr.getLength());
389 retFragments.get(i).add(f);
391 badFragments.get(i).add(f);
393 processedLengths.set(i, lFr.getLength());
394 if (i == convertedLin) {
397 }
else if (lFr.
getOffset() >= nStartOffset && lFr.getOffset() < nEndOffset
398 && lFrEnd > nEndOffset) {
399 String path = XPathHelper.XPathStringOfNode(currentNode);
400 Integer offset = lFr.getOffset() - nStartOffset;
401 Integer length = nContent.length() - offset;
402 String content = nContent.substring(offset, nContent.length());
404 int lFTL = lFr.getAnnotatedText().length();
405 if (length <= lFTL && lFr.
getAnnotatedText().substring(0, length).equals(content)) {
406 retFragments.get(i).add(f);
408 badFragments.get(i).add(f);
410 processedLengths.set(i, length);
411 }
else if (lFr.
getOffset() < nStartOffset && lFrEnd > nStartOffset
412 && lFrEnd <= nEndOffset) {
413 String path = XPathHelper.XPathStringOfNode(currentNode);
415 Integer length = lFrEnd - nStartOffset;
416 String content = nContent.substring(0, lFrEnd - nStartOffset);
417 int lFTL = lFr.getAnnotatedText().length();
419 if (processedLengths.get(i) < lFTL
420 && lFr.getAnnotatedText().substring(processedLengths.get(i), lFTL).equals(content)) {
421 retFragments.get(i).add(f);
423 badFragments.get(i).add(f);
425 processedLengths.set(i, processedLengths.get(i) + length);
426 if (i == convertedLin) {
429 }
else if (lFr.
getOffset() < nStartOffset && lFrEnd > nEndOffset) {
431 String path = XPathHelper.XPathStringOfNode(currentNode);
433 Integer length = nContent.length();
434 int lFTL = lFr.getAnnotatedText().length();
435 int lFTE = processedLengths.get(i) + length;
437 if (processedLengths.get(i) < lFTL && lFTE <= lFTL &&
438 lFr.getAnnotatedText().substring(processedLengths.get(i), lFTE).equals(nContent)) {
439 retFragments.get(i).add(f);
441 badFragments.get(i).add(f);
443 processedLengths.set(i, processedLengths.get(i) + length);
448 lFr.setOffset(lFr.getOffset() + newlineOffsetCompensation);
451 }
while (i < linLength && lFr.
getOffset() < nEndOffset);
453 nStartOffset += nContent.length();
454 currentNode = nIter.nextNode();
455 newlineOffsetCompensation += 1;
469 StringBuilder linDoc =
new StringBuilder();
470 if (doc.getDocumentElement() == null) {
473 NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
474 NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null,
true);
475 Node currentNode = nIter.nextNode();
477 boolean compensateNewline =
false;
478 while (currentNode != null) {
482 if (currentNode.getNodeValue() == null || currentNode.getNodeValue().replaceAll(
"[\\s\\u00A0]+$",
"").contentEquals(
"")) {
483 currentNode = nIter.nextNode();
488 if (compensateNewline) {
493 linDoc.append(currentNode.getNodeValue());
494 currentNode = nIter.nextNode();
495 compensateNewline =
true;
498 return linDoc.toString();
512 Document doc, ArrayList<ArrayList<SuggestionFragment>> notConverted,
517 int numOfAnnots = comFragments.size();
519 ArrayList<ArrayList<SuggestionFragment>> comFrCopy =
new ArrayList<ArrayList<SuggestionFragment>>();
521 ArrayList<SuggestionFragment> retFragments =
new ArrayList<SuggestionFragment>(numOfAnnots);
523 ArrayList<SuggestionFragment> partialFragments =
new ArrayList<SuggestionFragment>(numOfAnnots);
525 ArrayList<Integer> numsOfSpacesSO =
new ArrayList<Integer>(numOfAnnots);
527 ArrayList<Integer> numsOfSpacesL =
new ArrayList<Integer>(numOfAnnots);
529 for (
int i = 0; i < numOfAnnots; i++) {
530 ArrayList<SuggestionFragment> cFr = comFragments.get(i);
531 ArrayList<SuggestionFragment> aFragments =
new ArrayList<SuggestionFragment>();
532 notConverted.add(
new ArrayList<SuggestionFragment>());
533 comFrCopy.add(aFragments);
534 for (Iterator<SuggestionFragment> aFrIt = cFr.iterator(); aFrIt.hasNext();) {
541 retFragments.add(null);
542 partialFragments.add(null);
543 numsOfSpacesSO.add(null);
544 numsOfSpacesL.add(null);
550 if (doc.getDocumentElement() == null) {
553 NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
554 NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null,
true);
555 Integer nStartOffset = 0;
556 Integer nEndOffset = 0;
557 String nContent =
"";
558 Node currentNode = nIter.nextNode();
561 while (currentNode != null && fragCount > 0) {
563 nContent = currentNode.getNodeValue();
564 int nContentL = nContent.length();
565 nEndOffset = nStartOffset + nContentL;
566 path = XPathHelper.XPathStringOfNode(currentNode);
568 for (
int i = 0; i < numOfAnnots; i++) {
570 ArrayList<SuggestionFragment> aFragments = comFrCopy.get(i);
571 for (Iterator<SuggestionFragment> frIt = aFragments.iterator(); frIt.hasNext();) {
575 notConverted.get(i).add(fr);
580 if (fr.
getPath().equals(path)) {
584 frEndOffset = fr.getOffset() + fr.
getLength();
587 frEndOffset = fr.getLength();
589 if (frEndOffset > nContentL) {
590 notConverted.get(i).add(fr);
594 }
else if (fr.
getAnnotatedText() != null && !nContent.substring(fr.getOffset(), frEndOffset).equals(fr.getAnnotatedText())) {
597 notConverted.get(i).add(fr);
602 if (partFrag == null && frEndOffset < nContentL) {
608 numsOfSpacesSO.set(i,nodeCounter);
609 numsOfSpacesL.set(i,0);
612 fr.getRefSuggestion());
615 retFragments.set(i, linFragment);
616 }
else if (partFrag == null && frEndOffset == nContentL) {
622 numsOfSpacesSO.set(i,nodeCounter);
623 numsOfSpacesL.set(i,0);
626 fr.getRefSuggestion());
628 partialFragments.set(i, linFragment);
631 }
else if (partFrag != null && frEndOffset < nContentL) {
635 linFragment.setAnnotatedText(linFragment.getAnnotatedText() + fr.
getAnnotatedText());
637 linFragment.setLength(linFragment.getLength() + fr.
getLength());
638 retFragments.set(i, linFragment);
639 partialFragments.set(i, null);
640 if (numsOfSpacesL.get(i) != null) {
641 numsOfSpacesL.set(i,numsOfSpacesL.get(i) + 1);
645 }
else if (partFrag != null && frEndOffset == nContentL) {
649 linFragment.setAnnotatedText(linFragment.getAnnotatedText() + fr.
getAnnotatedText());
651 linFragment.setLength(linFragment.getLength() + fr.
getLength());
652 if (numsOfSpacesL.get(i) != null) {
653 numsOfSpacesL.set(i,numsOfSpacesL.get(i) + 1);
659 notConverted.get(i).add(fr);
669 nStartOffset += nContent.length();
670 currentNode = nIter.nextNode();
671 while (currentNode != null && (currentNode.getNodeValue() == null || currentNode.getNodeValue().replaceAll(
"[\\s\\u00A0]+$",
"").contentEquals(
""))) {
672 currentNode = nIter.nextNode();
678 for (
int i = 0; i < numOfAnnots; i++) {
680 ArrayList<SuggestionFragment> aFragments = comFrCopy.get(i);
682 for (Iterator<SuggestionFragment> aFrIt = aFragments.iterator(); aFrIt.hasNext();) {
684 notConverted.get(i).add(fr);
690 if (partialFragments.get(i) != null) {
691 retFragments.set(i, partialFragments.get(i));
696 for (
int i = 0; i < numOfAnnots; i++) {
702 Integer spaces = numsOfSpacesSO.get(i);
703 if (spaces == null) {
706 fr.setOffset(fr.getOffset() + spaces);
708 spaces = numsOfSpacesL.get(i);
709 if (spaces == null) {
712 fr.setLength(fr.getLength() + spaces);
730 Document doc, ArrayList<ArrayList<Fragment>> notConverted,
735 int numOfAnnots = comFragments.size();
737 ArrayList<ArrayList<Fragment>> comFrCopy =
new ArrayList<ArrayList<Fragment>>();
739 ArrayList<Fragment> retFragments =
new ArrayList<Fragment>(numOfAnnots);
741 ArrayList<Fragment> partialFragments =
new ArrayList<Fragment>(numOfAnnots);
743 ArrayList<Integer> numsOfSpacesSO =
new ArrayList<Integer>(numOfAnnots);
745 ArrayList<Integer> numsOfSpacesL =
new ArrayList<Integer>(numOfAnnots);
747 for (
int i = 0; i < numOfAnnots; i++) {
748 ArrayList<Fragment> cFr = comFragments.get(i);
749 ArrayList<Fragment> aFragments =
new ArrayList<Fragment>();
750 notConverted.add(
new ArrayList<Fragment>());
751 comFrCopy.add(aFragments);
752 for (Iterator<Fragment> aFrIt = cFr.iterator(); aFrIt.hasNext();) {
759 retFragments.add(null);
760 partialFragments.add(null);
761 numsOfSpacesSO.add(null);
762 numsOfSpacesL.add(null);
768 if (doc.getDocumentElement() == null) {
771 NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
772 NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null,
true);
773 Integer nStartOffset = 0;
774 Integer nEndOffset = 0;
775 String nContent =
"";
776 Node currentNode = nIter.nextNode();
779 while (currentNode != null && fragCount > 0) {
781 nContent = currentNode.getNodeValue();
782 int nContentL = nContent.length();
783 nEndOffset = nStartOffset + nContentL;
784 path = XPathHelper.XPathStringOfNode(currentNode);
786 for (
int i = 0; i < numOfAnnots; i++) {
788 ArrayList<Fragment> aFragments = comFrCopy.get(i);
789 for (Iterator<Fragment> frIt = aFragments.iterator(); frIt.hasNext();) {
793 notConverted.get(i).add(fr);
798 if (fr.
getPath().equals(path)) {
799 Fragment partFrag = partialFragments.get(i);
802 frEndOffset = fr.getOffset() + fr.
getLength();
805 frEndOffset = fr.getLength();
807 if (frEndOffset > nContentL) {
808 notConverted.get(i).add(fr);
812 }
else if (fr.
getAnnotatedText() != null && !nContent.substring(fr.getOffset(), frEndOffset).equals(fr.getAnnotatedText())) {
815 notConverted.get(i).add(fr);
820 if (partFrag == null && frEndOffset < nContentL) {
826 numsOfSpacesSO.set(i,nodeCounter);
827 numsOfSpacesL.set(i,0);
830 fr.getRefAnnotation());
833 retFragments.set(i, linFragment);
834 }
else if (partFrag == null && frEndOffset == nContentL) {
840 numsOfSpacesSO.set(i,nodeCounter);
841 numsOfSpacesL.set(i,0);
844 fr.getRefAnnotation());
846 partialFragments.set(i, linFragment);
849 }
else if (partFrag != null && frEndOffset < nContentL) {
851 Fragment linFragment = partialFragments.get(i);
853 linFragment.setAnnotatedText(linFragment.getAnnotatedText() + fr.
getAnnotatedText());
855 linFragment.setLength(linFragment.getLength() + fr.
getLength());
856 retFragments.set(i, linFragment);
857 partialFragments.set(i, null);
858 if (numsOfSpacesL.get(i) != null) {
859 numsOfSpacesL.set(i,numsOfSpacesL.get(i) + 1);
863 }
else if (partFrag != null && frEndOffset == nContentL) {
865 Fragment linFragment = partialFragments.get(i);
867 linFragment.setAnnotatedText(linFragment.getAnnotatedText() + fr.
getAnnotatedText());
869 linFragment.setLength(linFragment.getLength() + fr.
getLength());
870 if (numsOfSpacesL.get(i) != null) {
871 numsOfSpacesL.set(i,numsOfSpacesL.get(i) + 1);
877 notConverted.get(i).add(fr);
887 nStartOffset += nContent.length();
888 currentNode = nIter.nextNode();
889 while (currentNode != null && (currentNode.getNodeValue() == null || currentNode.getNodeValue().replaceAll(
"[\\s\\u00A0]+$",
"").contentEquals(
""))) {
890 currentNode = nIter.nextNode();
896 for (
int i = 0; i < numOfAnnots; i++) {
898 ArrayList<Fragment> aFragments = comFrCopy.get(i);
900 for (Iterator<Fragment> aFrIt = aFragments.iterator(); aFrIt.hasNext();) {
902 notConverted.get(i).add(fr);
908 if (partialFragments.get(i) != null) {
909 retFragments.set(i, partialFragments.get(i));
914 for (
int i = 0; i < numOfAnnots; i++) {
920 Integer spaces = numsOfSpacesSO.get(i);
921 if (spaces == null) {
924 fr.setOffset(fr.getOffset() + spaces);
926 spaces = numsOfSpacesL.get(i);
927 if (spaces == null) {
930 fr.setLength(fr.getLength() + spaces);
948 Document doc, ArrayList<ArrayList<AlternativeFragment>> notConverted,
953 int numOfAnnots = comFragments.size();
955 ArrayList<ArrayList<AlternativeFragment>> comFrCopy =
new ArrayList<ArrayList<AlternativeFragment>>();
957 ArrayList<AlternativeFragment> retFragments =
new ArrayList<AlternativeFragment>(numOfAnnots);
959 ArrayList<AlternativeFragment> partialFragments =
new ArrayList<AlternativeFragment>(numOfAnnots);
961 ArrayList<Integer> numsOfSpacesSO =
new ArrayList<Integer>(numOfAnnots);
963 ArrayList<Integer> numsOfSpacesL =
new ArrayList<Integer>(numOfAnnots);
965 for (
int i = 0; i < numOfAnnots; i++) {
966 ArrayList<AlternativeFragment> cFr = comFragments.get(i);
967 ArrayList<AlternativeFragment> aFragments =
new ArrayList<AlternativeFragment>();
968 notConverted.add(
new ArrayList<AlternativeFragment>());
969 comFrCopy.add(aFragments);
970 for (Iterator<AlternativeFragment> aFrIt = cFr.iterator(); aFrIt.hasNext();) {
977 retFragments.add(null);
978 partialFragments.add(null);
979 numsOfSpacesSO.add(null);
980 numsOfSpacesL.add(null);
986 if (doc.getDocumentElement() == null) {
989 NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
990 NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null,
true);
991 Integer nStartOffset = 0;
992 Integer nEndOffset = 0;
993 String nContent =
"";
994 Node currentNode = nIter.nextNode();
997 while (currentNode != null && fragCount > 0) {
999 nContent = currentNode.getNodeValue();
1000 int nContentL = nContent.length();
1001 nEndOffset = nStartOffset + nContentL;
1002 path = XPathHelper.XPathStringOfNode(currentNode);
1004 for (
int i = 0; i < numOfAnnots; i++) {
1006 ArrayList<AlternativeFragment> aFragments = comFrCopy.get(i);
1007 for (Iterator<AlternativeFragment> frIt = aFragments.iterator(); frIt.hasNext();) {
1011 notConverted.get(i).add(fr);
1016 if (fr.
getPath().equals(path)) {
1020 frEndOffset = fr.getOffset() + fr.
getLength();
1023 frEndOffset = fr.getLength();
1025 if (frEndOffset > nContentL) {
1026 notConverted.get(i).add(fr);
1030 }
else if (fr.
getAnnotatedText() != null && !nContent.substring(fr.getOffset(), frEndOffset).equals(fr.getAnnotatedText())) {
1033 notConverted.get(i).add(fr);
1038 if (partFrag == null && frEndOffset < nContentL) {
1044 numsOfSpacesSO.set(i,nodeCounter);
1045 numsOfSpacesL.set(i,0);
1048 fr.getRefAlternative());
1051 retFragments.set(i, linFragment);
1052 }
else if (partFrag == null && frEndOffset == nContentL) {
1058 numsOfSpacesSO.set(i,nodeCounter);
1059 numsOfSpacesL.set(i,0);
1062 fr.getRefAlternative());
1064 partialFragments.set(i, linFragment);
1067 }
else if (partFrag != null && frEndOffset < nContentL) {
1071 linFragment.setAnnotatedText(linFragment.getAnnotatedText() + fr.
getAnnotatedText());
1073 linFragment.setLength(linFragment.getLength() + fr.
getLength());
1074 retFragments.set(i, linFragment);
1075 partialFragments.set(i, null);
1076 if (numsOfSpacesL.get(i) != null) {
1077 numsOfSpacesL.set(i,numsOfSpacesL.get(i) + 1);
1081 }
else if (partFrag != null && frEndOffset == nContentL) {
1085 linFragment.setAnnotatedText(linFragment.getAnnotatedText() + fr.
getAnnotatedText());
1087 linFragment.setLength(linFragment.getLength() + fr.
getLength());
1088 if (numsOfSpacesL.get(i) != null) {
1089 numsOfSpacesL.set(i,numsOfSpacesL.get(i) + 1);
1095 notConverted.get(i).add(fr);
1105 nStartOffset += nContent.length();
1106 currentNode = nIter.nextNode();
1107 while (currentNode != null && (currentNode.getNodeValue() == null || currentNode.getNodeValue().replaceAll(
"[\\s\\u00A0]+$",
"").contentEquals(
""))) {
1108 currentNode = nIter.nextNode();
1114 for (
int i = 0; i < numOfAnnots; i++) {
1116 ArrayList<AlternativeFragment> aFragments = comFrCopy.get(i);
1118 for (Iterator<AlternativeFragment> aFrIt = aFragments.iterator(); aFrIt.hasNext();) {
1120 notConverted.get(i).add(fr);
1126 if (partialFragments.get(i) != null) {
1127 retFragments.set(i, partialFragments.get(i));
1132 for (
int i = 0; i < numOfAnnots; i++) {
1138 Integer spaces = numsOfSpacesSO.get(i);
1139 if (spaces == null) {
1142 fr.setOffset(fr.getOffset() + spaces);
1144 spaces = numsOfSpacesL.get(i);
1145 if (spaces == null) {
1148 fr.setLength(fr.getLength() + spaces);
1152 return retFragments;
1164 StringBuilder linFr =
new StringBuilder();
1165 NodeList nodeL = docFr.getChildNodes();
1166 int nodeCount = nodeL.getLength();
1167 for (
int i = 0; i < nodeCount; i++) {
1168 Node curRootNode = nodeL.item(i);
1169 NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(curRootNode,
1170 NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null,
true);
1171 Node currentNode = nIter.nextNode();
1172 boolean compensateNewline =
false;
1173 while (currentNode != null) {
1174 short nodeType = currentNode.getNodeType();
1175 if (currentNode.getNodeValue() == null) {
1176 currentNode = nIter.nextNode();
1179 if (compensateNewline) {
1182 linFr.append(currentNode.getNodeValue());
1183 if (!compensateNewline) {
1184 compensateNewline =
true;
1186 currentNode = nIter.nextNode();
1189 return linFr.toString();
1202 Document doc, ArrayList<TextModification> notConverted)
1203 throws ParserConfigurationException, SAXException,
1210 int numOfMods = comModifications.size();
1211 ArrayList<TextModification> comModCopy =
new ArrayList<TextModification>(numOfMods);
1212 ArrayList<TextModification> notConvertedFlag =
new ArrayList<TextModification>(numOfMods);
1213 notConverted =
new ArrayList<TextModification>();
1215 for (
int i = 0; i < numOfMods; i++) {
1218 notConvertedFlag.add(cM);
1221 ArrayList<TextModification> retModifications =
new ArrayList<TextModification>();
1226 if (doc.getDocumentElement() == null) {
1229 NodeIterator nIter = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
1230 NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null,
true);
1231 Integer nStartOffset = 0;
1232 Integer nEndOffset = 0;
1233 String nContent =
"";
1234 Node currentNode = nIter.nextNode();
1237 if (currentNode == null) {
1238 int nodeCounter = 0;
1240 for (Iterator<TextModification> tmIt = comModCopy.iterator(); tmIt.hasNext();) {
1245 tm.setNewContent(tm.getNewContent().replace(
"<br>",
" "));
1247 tm.setNewContent(tm.getNewContent().replaceAll(
"<[^>\\s]*>",
""));
1248 tm.setLength(tm.getNewContent().length());
1251 modifStart + tm.
getLength(), tm.getNewContent());
1252 modifStart += tm.getLength();
1253 retModifications.add(lTM);
1254 notConvertedFlag.set(nodeCounter, null);
1264 boolean compensateNewline =
false;
1265 while (currentNode != null && numOfMods > 0) {
1267 if ( compensateNewline ) {
1269 nContent = currentNode.getNodeValue().concat(
" ");
1272 nContent = currentNode.getNodeValue();
1274 int nContentL = nContent.length();
1275 nEndOffset = nStartOffset + nContentL;
1276 path = XPathHelper.XPathStringOfNode(currentNode);
1279 for (Iterator<TextModification> tmIt = comModCopy.iterator(); tmIt.hasNext();) {
1283 if (tm.
getPath() == null || tm.getPath().isEmpty()) {
1285 retModifications.add(tm);
1286 notConvertedFlag.set(modifCounter, null);
1291 if (tm.
getOffset() == null && tm.getLength() == null && tm.
getPath().equals(
"/HTML[1]/BODY[1]")) {
1294 NodeIterator wholeDocEraseIt = ((DocumentTraversal) doc).createNodeIterator(doc.getDocumentElement(),
1295 NodeFilter.SHOW_TEXT + NodeFilter.SHOW_CDATA_SECTION, null,
true);
1296 Node n = wholeDocEraseIt.nextNode();
1297 boolean newlineCompens =
false;
1299 String nodeCont = null;
1303 nodeCont = n.getNodeValue();
1304 if (nodeCont == null) {
1305 n = wholeDocEraseIt.nextNode();
1308 docLength += nodeCont.length();
1309 if (newlineCompens) {
1312 if (!newlineCompens) {
1313 newlineCompens =
true;
1315 n = wholeDocEraseIt.nextNode();
1318 String newContent =
"";
1321 if (tm.
getNewContent() != null && !tm.getNewContent().equals(
"<body></body>")) {
1322 newContent = tm.getNewContent();
1324 newContent = newContent.replace(
"<br>",
" ");
1326 newContent = newContent.replaceAll(
"<[^>\\s]*>",
"");
1331 retModifications.add(lTM);
1332 notConvertedFlag.set(modifCounter, null);
1337 if (path.startsWith(tm.
getPath())) {
1340 String linContent =
"";
1350 nEndOffset - nStartOffset, linContent);
1354 nEndOffset - nStartOffset, linContent);
1357 retModifications.add(lTM);
1358 notConvertedFlag.set(modifCounter, null);
1364 if (tm.
getOffset() != null && tm.getLength() != null) {
1365 int tmEndOffset = tm.getOffset() + tm.
getLength();
1367 if (tmEndOffset > nContentL) {
1368 notConverted.add(tm);
1369 notConvertedFlag.set(modifCounter, null);;
1377 nStartOffset += nContent.length() + 1;
1379 currentNode = nIter.nextNode();
1385 for (Iterator<TextModification> tmIt = notConvertedFlag.iterator(); tmIt.hasNext();) {
1388 notConverted.add(tm);
1392 return retModifications;
Utility functions for document linearization.
static ArrayList< TextModification > modificationsToLinMod(ArrayList< TextModification > comModifications, Document doc, ArrayList< TextModification > notConverted)
static ArrayList< ArrayList< AlternativeFragment > > linAltFragmentsToFragments(ArrayList< AlternativeFragment > linFragments, Document doc, ArrayList< ArrayList< AlternativeFragment >> badFragments)
String getAnnotatedText()
static String linearizeDocumentFragment(DocumentFragment docFr, Document doc)
Class providing access to available matchers.
DocumentFragment getFragmentFromString(String text)
Class representing suggested annotation fragment.
String getAnnotatedText()
static String linearizeDocument(Document doc)
Class representing fragment for suggestion alternative.
String getAnnotatedText()
Class representing modification of annotated document text.
static ArrayList< ArrayList< SuggestionFragment > > linSugFragmentsToFragments(ArrayList< SuggestionFragment > linFragments, Document doc, ArrayList< ArrayList< SuggestionFragment >> badFragments)
static ArrayList< Fragment > fragmentsToLinFragments(ArrayList< ArrayList< Fragment >> comFragments, Document doc, ArrayList< ArrayList< Fragment >> notConverted, boolean addSpaces)
Helper class with util XPath methods.
static ArrayList< ArrayList< Fragment > > linFragmentsToFragments(ArrayList< Fragment > linFragments, Document doc, ArrayList< ArrayList< Fragment >> badFragments)
static MatcherProvider matcherProvider
Class representing annotated fragment.
static ArrayList< SuggestionFragment > fragmentsToLinSugFragments(ArrayList< ArrayList< SuggestionFragment >> comFragments, Document doc, ArrayList< ArrayList< SuggestionFragment >> notConverted, boolean addSpaces)
static ArrayList< AlternativeFragment > fragmentsToLinAltFragments(ArrayList< ArrayList< AlternativeFragment >> comFragments, Document doc, ArrayList< ArrayList< AlternativeFragment >> notConverted, boolean addSpaces)