19 package cz.vutbr.fit.knot.annotations.textModifications;
24 import java.io.IOException;
25 import java.io.StringReader;
26 import java.util.ArrayList;
27 import java.util.Iterator;
28 import java.util.LinkedHashMap;
29 import java.util.List;
30 import java.util.Map.Entry;
32 import javax.xml.parsers.ParserConfigurationException;
33 import javax.xml.xpath.XPathExpressionException;
34 import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
35 import org.custommonkey.xmlunit.DetailedDiff;
36 import org.custommonkey.xmlunit.Diff;
37 import org.custommonkey.xmlunit.Difference;
38 import org.custommonkey.xmlunit.NodeDetail;
39 import org.custommonkey.xmlunit.XMLUnit;
40 import org.w3c.dom.Document;
41 import org.w3c.dom.NamedNodeMap;
42 import org.w3c.dom.Node;
43 import org.w3c.dom.NodeList;
44 import org.xml.sax.InputSource;
45 import org.xml.sax.SAXException;
68 int highestProtocol)
throws ParserConfigurationException, SAXException, IOException{
70 HtmlDocumentBuilder htmlDocumentBuilder =
new HtmlDocumentBuilder();
72 InputSource inputSourceOld =
new InputSource(
new StringReader(oldVersion));
73 InputSource inputSourceNew =
new InputSource(
new StringReader(newVersion));
75 Document oldDom = htmlDocumentBuilder.parse(inputSourceOld);
76 Document newDom = htmlDocumentBuilder.parse(inputSourceNew);
93 int highestProtocol)
throws SAXException, IOException{
95 XMLUnit.setIgnoreAttributeOrder(
false);
97 Diff diff =
new Diff(oldVersion, newVersion);
98 DetailedDiff myDiff =
new DetailedDiff(diff);
99 List allDifferences = myDiff.getAllDifferences();
101 LinkedHashMap<String, TextModification> pathModMap =
new LinkedHashMap<String, TextModification>();
103 boolean unknownDif =
false;
105 for (
int i=0; i<allDifferences.size(); i++) {
106 Difference difference = (Difference) allDifferences.get(i);
108 NodeDetail controlNode = difference.getControlNodeDetail();
110 String desc = difference.getDescription();
112 NodeDetail testNode = difference.getTestNodeDetail();
114 if(desc.equalsIgnoreCase(
"text value")){
116 if(controlNode.getXpathLocation() != null && controlNode.getXpathLocation().equals(testNode.getXpathLocation())){
123 else if(desc.equalsIgnoreCase(
"presence of child node")){
124 if(testNode.getXpathLocation() == null){
125 removeNode(pathModMap, controlNode, highestProtocol, newVersion);
128 Node failSafeNode = null;
130 failSafeNode = XPathHelper.getNode(oldVersion, testNode.getXpathLocation());
131 }
catch (XPathExpressionException ex) {
136 if(failSafeNode == null){
137 if(!
addNode(highestProtocol, pathModMap, testNode, oldVersion)){
144 else if(desc.equalsIgnoreCase(
"sequence of child nodes")){
145 if(controlNode.getXpathLocation() != null && controlNode.getXpathLocation().equals(testNode.getXpathLocation())){
149 if(controlNode.getXpathLocation() != null){
151 removeNode(pathModMap, controlNode, highestProtocol, newVersion);
158 if(testNode.getXpathLocation() != null){
160 if(!
addNode(highestProtocol, pathModMap, testNode, oldVersion)){
172 else if(desc.equalsIgnoreCase(
"number of child nodes")){
176 else if(desc.equalsIgnoreCase(
"number of element attributes") ||
177 desc.equalsIgnoreCase(
"attribute name") ||
178 desc.equalsIgnoreCase(
"attribute value") ||
179 desc.equalsIgnoreCase(
"attribute value explicitly specified") ||
180 desc.equalsIgnoreCase(
"sequence of attributes") ||
181 desc.equalsIgnoreCase(
"element tag name") ||
182 desc.equalsIgnoreCase(
"node type") ||
183 desc.equalsIgnoreCase(
"presence of child nodes to be"))
185 if(controlNode.getXpathLocation() != null && controlNode.getXpathLocation().equals(testNode.getXpathLocation())){
189 if(controlNode.getXpathLocation() != null){
191 removeNode(pathModMap, controlNode, highestProtocol, newVersion);
197 if(testNode.getXpathLocation() != null){
199 if(!
addNode(highestProtocol, pathModMap, testNode, oldVersion)){
218 ArrayList<TextModification> tmsUnit =
new ArrayList<TextModification>();
222 ArrayList<TextModification> tmsUnit =
new ArrayList<TextModification>(pathModMap.values());
233 Node node = newVersion.getFirstChild();
234 while (node == null || !node.getNodeName().equalsIgnoreCase(
"html")) {
251 String rootName = root.getNodeName().toLowerCase();
252 StringBuilder sb =
new StringBuilder();
254 if (root.getNodeType() != Node.TEXT_NODE) {
255 if (root.getNodeName().equalsIgnoreCase(
"br")) {
257 }
else if (root.getNodeName().equalsIgnoreCase(
"hr")) {
261 sb.append(
"<").append(rootName);
265 NodeList nl = root.getChildNodes();
266 for (
int i = 0; i < nl.getLength(); i++) {
268 if (n.getNodeType() == Node.TEXT_NODE) {
269 sb.append(n.getNodeValue());
274 sb.append(
"</").append(rootName).append(
">");
276 sb.append(root.getNodeValue());
279 return sb.toString();
289 int attrLength = attributes.getLength();
292 StringBuilder sb =
new StringBuilder(
" ");
293 for(
int i = 0; i < attrLength; i++){
294 Node attr = attributes.item(i);
295 sb.append(attr.getNodeName());
297 sb.append(attr.getNodeValue());
298 if(i != attrLength-1){
305 return sb.toString();
319 while(sibling.getNextSibling() != null){
320 sibling = sibling.getNextSibling();
333 private static boolean existsParentMod(LinkedHashMap<String, TextModification> pathModMap, String XPath){
334 Set<String> paths = pathModMap.keySet();
335 Iterator<String> it = paths.iterator();
337 String path = it.next();
338 if(XPath.contains(path)){
351 private static void removeAllChildMods(LinkedHashMap<String, TextModification> pathModMap, String XPath){
352 Iterator<Entry<String, TextModification>> setIt = pathModMap.entrySet().iterator();
353 while(setIt.hasNext()){
354 Entry<String, TextModification> entry = setIt.next();
355 if(entry.getKey().contains(XPath)){
369 Node prevNode = testedNode.getPreviousSibling();
370 Node evaluatedNode = null;
371 while(prevNode != null && evaluatedNode == null){
373 evaluatedNode = XPathHelper.getNode(version, XPathHelper.XPathStringOfNode(prevNode));
374 }
catch (XPathExpressionException ex) {
375 prevNode = prevNode.getPreviousSibling();
378 prevNode = prevNode.getPreviousSibling();
380 return evaluatedNode;
390 if(testedNode.getNodeType() != Node.TEXT_NODE &&
391 !testedNode.getNodeName().equalsIgnoreCase(
"br") &&
392 !testedNode.getNodeName().equalsIgnoreCase(
"hr")){
406 Node parent = child.getParentNode();
409 String parentPath = XPathHelper.XPathStringOfNode(parent);
410 Node evaluatedNode = null;
412 evaluatedNode = XPathHelper.getNode(oldVersion, parentPath);
413 }
catch (XPathExpressionException ex) {
416 return evaluatedNode;
431 String parentPath = XPathHelper.XPathStringOfNode(node);
432 Node evaluatedNode = null;
434 evaluatedNode = XPathHelper.getNode(version, parentPath);
435 }
catch (XPathExpressionException ex) {
438 return evaluatedNode;
449 Node evaluatedNode = null;
451 evaluatedNode = XPathHelper.getNode(version, XPath);
452 }
catch (XPathExpressionException ex) {
455 return evaluatedNode;
467 private static boolean addNode(
int highestProtocol,
468 LinkedHashMap<String,TextModification> pathModMap,
469 NodeDetail addNodeDetail, Document oldVersion)
472 Node
addNode = addNodeDetail.getNode();
473 boolean v2Success =
false;
475 boolean skipFirst =
false;
478 Node previousNode = addNode.getPreviousSibling();
479 boolean tryNextNode =
false;
481 if(previousNode != null){
482 String path = XPathHelper.XPathStringOfNode(previousNode);
483 if(pathModMap.containsKey(path) ||
getNodeIfExists(oldVersion, previousNode) != null){
487 pathModMap.put(addNodeDetail.getXpathLocation(), newMod);
498 Node nextNode = addNode.getNextSibling();
500 if(nextNode != null){
501 String path = XPathHelper.XPathStringOfNode(nextNode);
502 if(pathModMap.containsKey(path) ||
getNodeIfExists(oldVersion, nextNode) != null){
506 pathModMap.put(addNodeDetail.getXpathLocation(), newMod);
519 Node firstPreviousNode = null;
520 Node firstParentNode = null;
522 while(firstPreviousNode == null && firstParentNode == null){
529 firstPreviousNode = null;
530 if(firstParentNode == null){
531 evaluatedNode = evaluatedNode.getParentNode();
532 if(evaluatedNode == null){
538 if(firstPreviousNode != null){
539 String firstPreviousNodePath = XPathHelper.XPathStringOfNode(firstPreviousNode);
543 if(modForUpdate != null){
544 String content = modForUpdate.getNewContent();
546 modForUpdate.setNewContent(content);
554 pathModMap.put(firstPreviousNodePath, newMod);
557 else if(firstParentNode != null){
558 String firstParentNodePath = XPathHelper.XPathStringOfNode(firstParentNode);
560 if(modForUpdate == null){
561 String content =
nodeToString(evaluatedNode.getParentNode());
564 pathModMap.put(firstParentNodePath, newMod);
580 public static void removeNode(LinkedHashMap<String,TextModification> pathModMap,
581 NodeDetail
removeNode,
int highestProtocol, Document newVersion){
584 (removeNode.getNode().getNodeName().equalsIgnoreCase(
"br") ||
585 removeNode.getNode().getNodeName().equalsIgnoreCase(
"hr")))
591 pathModMap.put(removeNode.getXpathLocation(), tm);
597 pathModMap.put(removeNode.getXpathLocation(), tm);
615 NodeDetail replaceNodeDetail, Document version,
boolean replaceByDocumentNode){
616 if(!
existsParentMod(pathModMap, replaceNodeDetail.getXpathLocation())){
617 Node firstParentNode = null;
618 Node evaluatedNode = replaceNodeDetail.getNode();
620 while(firstParentNode == null){
622 if(firstParentNode == null){
623 evaluatedNode = evaluatedNode.getParentNode();
624 if(evaluatedNode == null){
630 String firstParentNodePath = XPathHelper.XPathStringOfNode(firstParentNode);
632 if(modForUpdate == null){
633 String content = null;
634 if(replaceByDocumentNode){
642 pathModMap.put(firstParentNodePath, newMod);
655 public static void replaceNode(LinkedHashMap<String,TextModification> pathModMap,
656 NodeDetail replaceNodeDetail)
658 String replacePath = replaceNodeDetail.getXpathLocation();
661 if(replaceNodeDetail.getNode().getNodeName().equalsIgnoreCase(
"br") ||
662 replaceNodeDetail.getNode().getNodeName().equalsIgnoreCase(
"hr"))
667 if(replaceNodeDetail.getNode().getNodeType() == Node.TEXT_NODE){
669 pathModMap.put(replacePath, tm);
673 String content =
nodeToString(replaceNodeDetail.getNode());
676 pathModMap.put(replacePath, newMod);
static void replaceNode(LinkedHashMap< String, TextModification > pathModMap, NodeDetail replaceNodeDetail)
static void removeAllChildMods(LinkedHashMap< String, TextModification > pathModMap, String XPath)
static Node lastSiblingNode(Node node)
static String nodeToString(Node root)
static Node getNodeIfExists(Document version, Node node)
static boolean isNodeNotTextAndNotBRHR(Node testedNode)
static Node getFirstPreviousNodeExistingInDoc(Node testedNode, Document version)
static Node getParentIfExists(Document oldVersion, Node child)
static String attributeToString(NamedNodeMap attributes)
static ArrayList< TextModification > getTextModifications(Document oldVersion, Document newVersion, int highestProtocol)
static TextModification exchangeWholeBody(Document newVersion)
TextModification provider.
static final int TEXT_MOD_INS_AFTER
static boolean addNode(int highestProtocol, LinkedHashMap< String, TextModification > pathModMap, NodeDetail addNodeDetail, Document oldVersion)
static Node getNodeIfExists(Document version, String XPath)
Class representing modification of annotated document text.
static ArrayList< TextModification > getTextModifications(String oldVersion, String newVersion, int highestProtocol)
static final int PROTOCOL_LOD_V2
Helper class with util XPath methods.
static boolean existsParentMod(LinkedHashMap< String, TextModification > pathModMap, String XPath)
static boolean replaceFirstParentNode(LinkedHashMap< String, TextModification > pathModMap, NodeDetail replaceNodeDetail, Document version, boolean replaceByDocumentNode)
static final int TEXT_MOD_INS_BEFORE
static void removeNode(LinkedHashMap< String, TextModification > pathModMap, NodeDetail removeNode, int highestProtocol, Document newVersion)