19 package cz.vutbr.fit.knot.annotations.textModifications;
 
   24 import java.io.IOException;
 
   25 import java.io.StringReader;
 
   26 import java.util.ArrayList;
 
   27 import java.util.Iterator;
 
   28 import java.util.LinkedHashMap;
 
   29 import java.util.List;
 
   30 import java.util.Map.Entry;
 
   32 import javax.xml.parsers.ParserConfigurationException;
 
   33 import javax.xml.xpath.XPathExpressionException;
 
   34 import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
 
   35 import org.custommonkey.xmlunit.DetailedDiff;
 
   36 import org.custommonkey.xmlunit.Diff;
 
   37 import org.custommonkey.xmlunit.Difference;
 
   38 import org.custommonkey.xmlunit.NodeDetail;
 
   39 import org.custommonkey.xmlunit.XMLUnit;
 
   40 import org.w3c.dom.Document;
 
   41 import org.w3c.dom.NamedNodeMap;
 
   42 import org.w3c.dom.Node;
 
   43 import org.w3c.dom.NodeList;
 
   44 import org.xml.sax.InputSource;
 
   45 import org.xml.sax.SAXException;
 
   68           int highestProtocol) 
throws ParserConfigurationException, SAXException, IOException{
 
   70     HtmlDocumentBuilder htmlDocumentBuilder = 
new HtmlDocumentBuilder();
 
   72     InputSource inputSourceOld = 
new InputSource(
new StringReader(oldVersion));
 
   73     InputSource inputSourceNew = 
new InputSource(
new StringReader(newVersion));
 
   75     Document oldDom = htmlDocumentBuilder.parse(inputSourceOld);
 
   76     Document newDom = htmlDocumentBuilder.parse(inputSourceNew);
 
   93           int highestProtocol) 
throws SAXException, IOException{
 
   95     XMLUnit.setIgnoreAttributeOrder(
false);
 
   97     Diff diff = 
new Diff(oldVersion, newVersion);
 
   98     DetailedDiff myDiff = 
new DetailedDiff(diff);
 
   99     List allDifferences = myDiff.getAllDifferences();
 
  101     LinkedHashMap<String, TextModification> pathModMap = 
new LinkedHashMap<String, TextModification>();
 
  103     boolean unknownDif = 
false;
 
  105     for (
int i=0; i<allDifferences.size(); i++) {
 
  106       Difference difference = (Difference) allDifferences.get(i);
 
  108       NodeDetail controlNode = difference.getControlNodeDetail();
 
  110       String desc = difference.getDescription();
 
  112       NodeDetail testNode = difference.getTestNodeDetail();
 
  114       if(desc.equalsIgnoreCase(
"text value")){
 
  116         if(controlNode.getXpathLocation() != null && controlNode.getXpathLocation().equals(testNode.getXpathLocation())){
 
  123       else if(desc.equalsIgnoreCase(
"presence of child node")){
 
  124         if(testNode.getXpathLocation() == null){
 
  125           removeNode(pathModMap, controlNode, highestProtocol, newVersion);
 
  128         Node failSafeNode = null;
 
  130           failSafeNode = XPathHelper.getNode(oldVersion, testNode.getXpathLocation());
 
  131         } 
catch (XPathExpressionException ex) {
 
  136         if(failSafeNode == null){          
 
  137           if(!
addNode(highestProtocol, pathModMap, testNode, oldVersion)){
 
  144       else if(desc.equalsIgnoreCase(
"sequence of child nodes")){
 
  145         if(controlNode.getXpathLocation() != null && controlNode.getXpathLocation().equals(testNode.getXpathLocation())){
 
  149           if(controlNode.getXpathLocation() != null){
 
  151               removeNode(pathModMap, controlNode, highestProtocol, newVersion);
 
  158           if(testNode.getXpathLocation() != null){
 
  160               if(!
addNode(highestProtocol, pathModMap, testNode, oldVersion)){
 
  172       else if(desc.equalsIgnoreCase(
"number of child nodes")){
 
  176       else if(desc.equalsIgnoreCase(
"number of element attributes") || 
 
  177               desc.equalsIgnoreCase(
"attribute name") ||
 
  178               desc.equalsIgnoreCase(
"attribute value") ||
 
  179               desc.equalsIgnoreCase(
"attribute value explicitly specified") ||
 
  180               desc.equalsIgnoreCase(
"sequence of attributes") ||
 
  181               desc.equalsIgnoreCase(
"element tag name") ||
 
  182               desc.equalsIgnoreCase(
"node type") ||
 
  183               desc.equalsIgnoreCase(
"presence of child nodes to be"))
 
  185         if(controlNode.getXpathLocation() != null && controlNode.getXpathLocation().equals(testNode.getXpathLocation())){
 
  189           if(controlNode.getXpathLocation() != null){
 
  191               removeNode(pathModMap, controlNode, highestProtocol, newVersion); 
 
  197           if(testNode.getXpathLocation() != null){
 
  199               if(!
addNode(highestProtocol, pathModMap, testNode, oldVersion)){
 
  218       ArrayList<TextModification> tmsUnit = 
new ArrayList<TextModification>();
 
  222     ArrayList<TextModification> tmsUnit = 
new ArrayList<TextModification>(pathModMap.values());
 
  233     Node node = newVersion.getFirstChild();
 
  234     while (node == null || !node.getNodeName().equalsIgnoreCase(
"html")) {
 
  251     String rootName = root.getNodeName().toLowerCase();
 
  252     StringBuilder sb = 
new StringBuilder();
 
  254     if (root.getNodeType() != Node.TEXT_NODE) {
 
  255       if (root.getNodeName().equalsIgnoreCase(
"br")) {
 
  257       } 
else if (root.getNodeName().equalsIgnoreCase(
"hr")) {
 
  261       sb.append(
"<").append(rootName);
 
  265       NodeList nl = root.getChildNodes();
 
  266       for (
int i = 0; i < nl.getLength(); i++) {
 
  268         if (n.getNodeType() == Node.TEXT_NODE) {
 
  269           sb.append(n.getNodeValue());
 
  274       sb.append(
"</").append(rootName).append(
">");
 
  276       sb.append(root.getNodeValue());
 
  279     return sb.toString();
 
  289     int attrLength = attributes.getLength();
 
  292       StringBuilder sb = 
new StringBuilder(
" ");
 
  293       for(
int i = 0; i < attrLength; i++){
 
  294         Node attr = attributes.item(i);
 
  295         sb.append(attr.getNodeName());
 
  297         sb.append(attr.getNodeValue());
 
  298         if(i != attrLength-1){
 
  305       return sb.toString();
 
  319     while(sibling.getNextSibling() != null){
 
  320       sibling = sibling.getNextSibling();
 
  333   private static boolean existsParentMod(LinkedHashMap<String, TextModification> pathModMap, String XPath){  
 
  334     Set<String> paths = pathModMap.keySet();
 
  335     Iterator<String> it = paths.iterator();
 
  337       String path = it.next();
 
  338       if(XPath.contains(path)){
 
  351   private static void removeAllChildMods(LinkedHashMap<String, TextModification> pathModMap, String XPath){
 
  352     Iterator<Entry<String, TextModification>> setIt = pathModMap.entrySet().iterator();
 
  353     while(setIt.hasNext()){
 
  354       Entry<String, TextModification> entry = setIt.next();
 
  355       if(entry.getKey().contains(XPath)){
 
  369     Node prevNode = testedNode.getPreviousSibling();
 
  370     Node evaluatedNode = null;
 
  371     while(prevNode != null && evaluatedNode == null){
 
  373         evaluatedNode = XPathHelper.getNode(version, XPathHelper.XPathStringOfNode(prevNode));
 
  374       } 
catch (XPathExpressionException ex) {
 
  375         prevNode = prevNode.getPreviousSibling();
 
  378       prevNode = prevNode.getPreviousSibling();
 
  380     return evaluatedNode;
 
  390     if(testedNode.getNodeType() != Node.TEXT_NODE &&
 
  391             !testedNode.getNodeName().equalsIgnoreCase(
"br") &&
 
  392             !testedNode.getNodeName().equalsIgnoreCase(
"hr")){
 
  406     Node parent = child.getParentNode();
 
  409       String parentPath = XPathHelper.XPathStringOfNode(parent);
 
  410       Node evaluatedNode = null;
 
  412         evaluatedNode = XPathHelper.getNode(oldVersion, parentPath);
 
  413       } 
catch (XPathExpressionException ex) {
 
  416       return evaluatedNode;
 
  431     String parentPath = XPathHelper.XPathStringOfNode(node);
 
  432     Node evaluatedNode = null;
 
  434       evaluatedNode = XPathHelper.getNode(version, parentPath);
 
  435     } 
catch (XPathExpressionException ex) {
 
  438     return evaluatedNode;
 
  449     Node evaluatedNode = null;
 
  451       evaluatedNode = XPathHelper.getNode(version, XPath);
 
  452     } 
catch (XPathExpressionException ex) {
 
  455     return evaluatedNode;
 
  467   private static boolean addNode(
int highestProtocol, 
 
  468           LinkedHashMap<String,TextModification> pathModMap,
 
  469           NodeDetail addNodeDetail, Document oldVersion)
 
  472       Node 
addNode = addNodeDetail.getNode();
 
  473       boolean v2Success = 
false;
 
  475       boolean skipFirst = 
false;
 
  478         Node previousNode = addNode.getPreviousSibling();
 
  479         boolean tryNextNode = 
false;
 
  481         if(previousNode != null){
 
  482           String path = XPathHelper.XPathStringOfNode(previousNode);
 
  483           if(pathModMap.containsKey(path) || 
getNodeIfExists(oldVersion, previousNode) != null){
 
  487             pathModMap.put(addNodeDetail.getXpathLocation(), newMod);
 
  498           Node nextNode = addNode.getNextSibling();
 
  500           if(nextNode != null){
 
  501             String path = XPathHelper.XPathStringOfNode(nextNode);        
 
  502             if(pathModMap.containsKey(path) || 
getNodeIfExists(oldVersion, nextNode) != null){
 
  506               pathModMap.put(addNodeDetail.getXpathLocation(), newMod);
 
  519         Node firstPreviousNode = null;
 
  520         Node firstParentNode = null;   
 
  522         while(firstPreviousNode == null && firstParentNode == null){
 
  529             firstPreviousNode = null;
 
  530             if(firstParentNode == null){
 
  531               evaluatedNode = evaluatedNode.getParentNode();
 
  532               if(evaluatedNode == null){
 
  538         if(firstPreviousNode != null){
 
  539           String firstPreviousNodePath = XPathHelper.XPathStringOfNode(firstPreviousNode);
 
  543           if(modForUpdate != null){
 
  544             String content = modForUpdate.getNewContent();
 
  546             modForUpdate.setNewContent(content);
 
  554             pathModMap.put(firstPreviousNodePath, newMod);
 
  557         else if(firstParentNode != null){
 
  558           String firstParentNodePath = XPathHelper.XPathStringOfNode(firstParentNode);
 
  560           if(modForUpdate == null){
 
  561             String content = 
nodeToString(evaluatedNode.getParentNode());
 
  564             pathModMap.put(firstParentNodePath, newMod);
 
  580   public static void removeNode(LinkedHashMap<String,TextModification> pathModMap, 
 
  581           NodeDetail 
removeNode, 
int highestProtocol, Document newVersion){
 
  584               (removeNode.getNode().getNodeName().equalsIgnoreCase(
"br") ||
 
  585               removeNode.getNode().getNodeName().equalsIgnoreCase(
"hr")))
 
  591           pathModMap.put(removeNode.getXpathLocation(), tm);
 
  597         pathModMap.put(removeNode.getXpathLocation(), tm);
 
  615           NodeDetail replaceNodeDetail, Document version, 
boolean replaceByDocumentNode){
 
  616     if(!
existsParentMod(pathModMap, replaceNodeDetail.getXpathLocation())){
 
  617       Node firstParentNode = null;
 
  618       Node evaluatedNode = replaceNodeDetail.getNode();
 
  620       while(firstParentNode == null){
 
  622         if(firstParentNode == null){
 
  623           evaluatedNode = evaluatedNode.getParentNode();
 
  624           if(evaluatedNode == null){
 
  630       String firstParentNodePath = XPathHelper.XPathStringOfNode(firstParentNode);
 
  632       if(modForUpdate == null){
 
  633         String content = null;
 
  634         if(replaceByDocumentNode){
 
  642         pathModMap.put(firstParentNodePath, newMod);
 
  655   public static void replaceNode(LinkedHashMap<String,TextModification> pathModMap,
 
  656           NodeDetail replaceNodeDetail)
 
  658     String replacePath = replaceNodeDetail.getXpathLocation();
 
  661       if(replaceNodeDetail.getNode().getNodeName().equalsIgnoreCase(
"br") ||
 
  662             replaceNodeDetail.getNode().getNodeName().equalsIgnoreCase(
"hr"))
 
  667       if(replaceNodeDetail.getNode().getNodeType() == Node.TEXT_NODE){
 
  669         pathModMap.put(replacePath, tm);
 
  673         String content = 
nodeToString(replaceNodeDetail.getNode());
 
  676         pathModMap.put(replacePath, newMod);
 
static void replaceNode(LinkedHashMap< String, TextModification > pathModMap, NodeDetail replaceNodeDetail)
static void removeAllChildMods(LinkedHashMap< String, TextModification > pathModMap, String XPath)
static Node lastSiblingNode(Node node)
static String nodeToString(Node root)
static Node getNodeIfExists(Document version, Node node)
static boolean isNodeNotTextAndNotBRHR(Node testedNode)
static Node getFirstPreviousNodeExistingInDoc(Node testedNode, Document version)
static Node getParentIfExists(Document oldVersion, Node child)
static String attributeToString(NamedNodeMap attributes)
static ArrayList< TextModification > getTextModifications(Document oldVersion, Document newVersion, int highestProtocol)
static TextModification exchangeWholeBody(Document newVersion)
TextModification provider. 
static final int TEXT_MOD_INS_AFTER
static boolean addNode(int highestProtocol, LinkedHashMap< String, TextModification > pathModMap, NodeDetail addNodeDetail, Document oldVersion)
static Node getNodeIfExists(Document version, String XPath)
Class representing modification of annotated document text. 
static ArrayList< TextModification > getTextModifications(String oldVersion, String newVersion, int highestProtocol)
static final int PROTOCOL_LOD_V2
Helper class with util XPath methods. 
static boolean existsParentMod(LinkedHashMap< String, TextModification > pathModMap, String XPath)
static boolean replaceFirstParentNode(LinkedHashMap< String, TextModification > pathModMap, NodeDetail replaceNodeDetail, Document version, boolean replaceByDocumentNode)
static final int TEXT_MOD_INS_BEFORE
static void removeNode(LinkedHashMap< String, TextModification > pathModMap, NodeDetail removeNode, int highestProtocol, Document newVersion)