This project has retired. For details please refer to its Attic page.
DomUtilsTest xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html;
19  
20  import org.apache.any23.AbstractAny23TestBase;
21  import org.junit.Assert;
22  import org.junit.Test;
23  import org.w3c.dom.DOMImplementation;
24  import org.w3c.dom.Document;
25  import org.w3c.dom.Node;
26  import org.w3c.dom.NodeList;
27  
28  import javax.xml.parsers.DocumentBuilder;
29  import javax.xml.parsers.DocumentBuilderFactory;
30  import javax.xml.parsers.ParserConfigurationException;
31  import javax.xml.transform.TransformerException;
32  import javax.xml.xpath.XPath;
33  import javax.xml.xpath.XPathConstants;
34  import javax.xml.xpath.XPathExpressionException;
35  import javax.xml.xpath.XPathFactory;
36  
37  import java.io.FileNotFoundException;
38  import java.io.IOException;
39  import java.util.List;
40  
41  /**
42   * Reference test class for the {@link DomUtils} class.
43   * 
44   * @author Davide Palmisano (dpalmisano@gmail.com)
45   */
46  public class DomUtilsTest extends AbstractAny23TestBase {
47  
48      private final static XPath xPathEngine = XPathFactory.newInstance().newXPath();
49  
50      @Test
51      public void testGetXPathForNode() throws Exception {
52          check("/microformats/hcard/01-tantek-basic.html", "//DIV[@class='vcard']", "/HTML[1]/BODY[1]/DIV[1]");
53          check("/microformats/hcard/02-multiple-class-names-on-vcard.html", "//SPAN[@class='fn n']",
54                  "/HTML[1]/BODY[1]/DIV[1]/SPAN[1]");
55          check("/microformats/hcard/02-multiple-class-names-on-vcard.html", "//SPAN/SPAN[@class='fn n']",
56                  "/HTML[1]/BODY[1]/P[1]/SPAN[1]/SPAN[1]");
57          check("/microformats/hcard/02-multiple-class-names-on-vcard.html", "//SPAN/SPAN/*[@class='given-name']",
58                  "/HTML[1]/BODY[1]/P[1]/SPAN[1]/SPAN[1]/SPAN[1]");
59          check("/microformats/hcard/02-multiple-class-names-on-vcard.html", "//SPAN/SPAN/*[@class='family-name']",
60                  "/HTML[1]/BODY[1]/P[1]/SPAN[1]/SPAN[1]/SPAN[2]");
61      }
62  
63      @Test
64      public void testFindAllByClassName() throws Exception {
65          Node dom = new HTMLFixture(copyResourceToTempFile("/microformats/hcard/02-multiple-class-names-on-vcard.html"))
66                  .getDOM();
67          Assert.assertNotNull(dom);
68          List<Node> nodes = DomUtils.findAllByClassName(dom, "vcard");
69          NodeList nodeList = null;
70          try {
71              nodeList = (NodeList) xPathEngine.evaluate("//*[contains(@class, 'vcard')]", dom, XPathConstants.NODESET);
72          } catch (XPathExpressionException e) {
73              Assert.fail(e.getMessage());
74          }
75          Assert.assertNotNull(nodeList);
76          Assert.assertEquals("vcard elements number does not match", nodes.size(), nodeList.getLength());
77          for (int i = 0; i < nodeList.getLength(); i++) {
78              Assert.assertTrue(nodes.contains(nodeList.item(i)));
79          }
80      }
81  
82      @Test
83      public void testFindAllByTag() throws Exception {
84          Node dom = new HTMLFixture(copyResourceToTempFile("/microformats/hcard/02-multiple-class-names-on-vcard.html"))
85                  .getDOM();
86          Assert.assertNotNull(dom);
87          List<Node> nodes = DomUtils.findAllByTag(dom, "SPAN");
88          NodeList nodeList = null;
89          try {
90              nodeList = (NodeList) xPathEngine.evaluate("./descendant-or-self::SPAN", dom, XPathConstants.NODESET);
91          } catch (XPathExpressionException e) {
92              Assert.fail(e.getMessage());
93          }
94  
95          Assert.assertEquals("number of elements does not match", nodes.size(), nodeList.getLength());
96  
97          for (int i = 0; i < nodeList.getLength(); i++) {
98              Assert.assertTrue(nodes.contains(nodeList.item(i)));
99          }
100     }
101 
102     @Test
103     public void testFindAllByTagAndClassName() throws Exception {
104         Node dom = new HTMLFixture(copyResourceToTempFile("/microformats/hcard/02-multiple-class-names-on-vcard.html"))
105                 .getDOM();
106         Assert.assertNotNull(dom);
107         List<Node> nodes = DomUtils.findAllByTagAndClassName(dom, "SPAN", "family-name");
108         NodeList nodeList = null;
109         try {
110             nodeList = (NodeList) xPathEngine.evaluate("./descendant-or-self::SPAN[contains(@class,'family-name')]",
111                     dom, XPathConstants.NODESET);
112         } catch (XPathExpressionException e) {
113             Assert.fail(e.getMessage());
114         }
115 
116         Assert.assertEquals("number of elements does not match", nodes.size(), nodeList.getLength());
117 
118         for (int i = 0; i < nodeList.getLength(); i++) {
119             Assert.assertTrue(nodes.contains(nodeList.item(i)));
120         }
121 
122     }
123 
124     @Test
125     public void testHasClassName() throws Exception {
126         Node dom = new HTMLFixture(copyResourceToTempFile("/microformats/hcard/02-multiple-class-names-on-vcard.html"))
127                 .getDOM();
128         Assert.assertNotNull(dom);
129         List<Node> nodes = DomUtils.findAllByClassName(dom, "vcard");
130         for (Node node : nodes) {
131             Assert.assertTrue(DomUtils.hasClassName(node, "vcard"));
132         }
133     }
134 
135     @Test
136     public void testReadAttribute() throws Exception {
137         Node dom = new HTMLFixture(copyResourceToTempFile("/microformats/hcard/02-multiple-class-names-on-vcard.html"))
138                 .getDOM();
139         Assert.assertNotNull(dom);
140         List<Node> nodes = DomUtils.findAllByClassName(dom, "vcard");
141         for (Node node : nodes) {
142             // every node in nodes should have a class attribute containing vcard.
143             Assert.assertTrue(DomUtils.readAttribute(node, "class").contains("vcard"));
144         }
145 
146     }
147 
148     @Test
149     public void testSerializeToXML() throws ParserConfigurationException, TransformerException, IOException {
150         DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
151         DocumentBuilder builder = factory.newDocumentBuilder();
152         DOMImplementation impl = builder.getDOMImplementation();
153         Document doc = impl.createDocument(null, null, null);
154         Node n1 = doc.createElement("DIV");
155         Node n2 = doc.createElement("SPAN");
156         Node n3 = doc.createElement("P");
157         n1.setTextContent("Content 1");
158         n2.setTextContent("Content 2");
159         n3.setTextContent("Content 3");
160         n1.appendChild(n2);
161         n2.appendChild(n3);
162         doc.appendChild(n1);
163 
164         Assert.assertEquals("<DIV>Content 1<SPAN>Content 2<P>Content 3</P></SPAN></DIV>",
165                 DomUtils.serializeToXML(doc, false));
166     }
167 
168     private void check(String file, String xpath, String reverseXPath) throws FileNotFoundException, IOException {
169         Node dom = new HTMLFixture(copyResourceToTempFile(file)).getDOM();
170         Assert.assertNotNull(dom);
171         Node node;
172         try {
173             node = (Node) xPathEngine.evaluate(xpath, dom, XPathConstants.NODE);
174             Assert.assertNotNull(node);
175             Assert.assertEquals(Node.ELEMENT_NODE, node.getNodeType());
176             String newPath = DomUtils.getXPathForNode(node);
177             Assert.assertEquals(reverseXPath, newPath);
178             Node newNode = (Node) xPathEngine.evaluate(newPath, dom, XPathConstants.NODE);
179             Assert.assertEquals(node, newNode);
180 
181         } catch (XPathExpressionException ex) {
182             Assert.fail(ex.getMessage());
183         }
184     }
185 
186 }