This project has retired. For details please refer to its
Attic page.
DomUtilsTest xref
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import org.apache.any23.AbstractAny23TestBase;
21 import org.junit.Assert;
22 import org.junit.Test;
23 import org.w3c.dom.DOMImplementation;
24 import org.w3c.dom.Document;
25 import org.w3c.dom.Node;
26 import org.w3c.dom.NodeList;
27
28 import javax.xml.parsers.DocumentBuilder;
29 import javax.xml.parsers.DocumentBuilderFactory;
30 import javax.xml.parsers.ParserConfigurationException;
31 import javax.xml.transform.TransformerException;
32 import javax.xml.xpath.XPath;
33 import javax.xml.xpath.XPathConstants;
34 import javax.xml.xpath.XPathExpressionException;
35 import javax.xml.xpath.XPathFactory;
36
37 import java.io.FileNotFoundException;
38 import java.io.IOException;
39 import java.util.List;
40
41
42
43
44
45
46 public class DomUtilsTest extends AbstractAny23TestBase {
47
48 private final static XPath xPathEngine = XPathFactory.newInstance().newXPath();
49
50 @Test
51 public void testGetXPathForNode() throws Exception {
52 check("/microformats/hcard/01-tantek-basic.html", "//DIV[@class='vcard']", "/HTML[1]/BODY[1]/DIV[1]");
53 check("/microformats/hcard/02-multiple-class-names-on-vcard.html", "//SPAN[@class='fn n']",
54 "/HTML[1]/BODY[1]/DIV[1]/SPAN[1]");
55 check("/microformats/hcard/02-multiple-class-names-on-vcard.html", "//SPAN/SPAN[@class='fn n']",
56 "/HTML[1]/BODY[1]/P[1]/SPAN[1]/SPAN[1]");
57 check("/microformats/hcard/02-multiple-class-names-on-vcard.html", "//SPAN/SPAN/*[@class='given-name']",
58 "/HTML[1]/BODY[1]/P[1]/SPAN[1]/SPAN[1]/SPAN[1]");
59 check("/microformats/hcard/02-multiple-class-names-on-vcard.html", "//SPAN/SPAN/*[@class='family-name']",
60 "/HTML[1]/BODY[1]/P[1]/SPAN[1]/SPAN[1]/SPAN[2]");
61 }
62
63 @Test
64 public void testFindAllByClassName() throws Exception {
65 Node dom = new HTMLFixture(copyResourceToTempFile("/microformats/hcard/02-multiple-class-names-on-vcard.html"))
66 .getDOM();
67 Assert.assertNotNull(dom);
68 List<Node> nodes = DomUtils.findAllByClassName(dom, "vcard");
69 NodeList nodeList = null;
70 try {
71 nodeList = (NodeList) xPathEngine.evaluate("//*[contains(@class, 'vcard')]", dom, XPathConstants.NODESET);
72 } catch (XPathExpressionException e) {
73 Assert.fail(e.getMessage());
74 }
75 Assert.assertNotNull(nodeList);
76 Assert.assertEquals("vcard elements number does not match", nodes.size(), nodeList.getLength());
77 for (int i = 0; i < nodeList.getLength(); i++) {
78 Assert.assertTrue(nodes.contains(nodeList.item(i)));
79 }
80 }
81
82 @Test
83 public void testFindAllByTag() throws Exception {
84 Node dom = new HTMLFixture(copyResourceToTempFile("/microformats/hcard/02-multiple-class-names-on-vcard.html"))
85 .getDOM();
86 Assert.assertNotNull(dom);
87 List<Node> nodes = DomUtils.findAllByTag(dom, "SPAN");
88 NodeList nodeList = null;
89 try {
90 nodeList = (NodeList) xPathEngine.evaluate("./descendant-or-self::SPAN", dom, XPathConstants.NODESET);
91 } catch (XPathExpressionException e) {
92 Assert.fail(e.getMessage());
93 }
94
95 Assert.assertEquals("number of elements does not match", nodes.size(), nodeList.getLength());
96
97 for (int i = 0; i < nodeList.getLength(); i++) {
98 Assert.assertTrue(nodes.contains(nodeList.item(i)));
99 }
100 }
101
102 @Test
103 public void testFindAllByTagAndClassName() throws Exception {
104 Node dom = new HTMLFixture(copyResourceToTempFile("/microformats/hcard/02-multiple-class-names-on-vcard.html"))
105 .getDOM();
106 Assert.assertNotNull(dom);
107 List<Node> nodes = DomUtils.findAllByTagAndClassName(dom, "SPAN", "family-name");
108 NodeList nodeList = null;
109 try {
110 nodeList = (NodeList) xPathEngine.evaluate("./descendant-or-self::SPAN[contains(@class,'family-name')]",
111 dom, XPathConstants.NODESET);
112 } catch (XPathExpressionException e) {
113 Assert.fail(e.getMessage());
114 }
115
116 Assert.assertEquals("number of elements does not match", nodes.size(), nodeList.getLength());
117
118 for (int i = 0; i < nodeList.getLength(); i++) {
119 Assert.assertTrue(nodes.contains(nodeList.item(i)));
120 }
121
122 }
123
124 @Test
125 public void testHasClassName() throws Exception {
126 Node dom = new HTMLFixture(copyResourceToTempFile("/microformats/hcard/02-multiple-class-names-on-vcard.html"))
127 .getDOM();
128 Assert.assertNotNull(dom);
129 List<Node> nodes = DomUtils.findAllByClassName(dom, "vcard");
130 for (Node node : nodes) {
131 Assert.assertTrue(DomUtils.hasClassName(node, "vcard"));
132 }
133 }
134
135 @Test
136 public void testReadAttribute() throws Exception {
137 Node dom = new HTMLFixture(copyResourceToTempFile("/microformats/hcard/02-multiple-class-names-on-vcard.html"))
138 .getDOM();
139 Assert.assertNotNull(dom);
140 List<Node> nodes = DomUtils.findAllByClassName(dom, "vcard");
141 for (Node node : nodes) {
142
143 Assert.assertTrue(DomUtils.readAttribute(node, "class").contains("vcard"));
144 }
145
146 }
147
148 @Test
149 public void testSerializeToXML() throws ParserConfigurationException, TransformerException, IOException {
150 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
151 DocumentBuilder builder = factory.newDocumentBuilder();
152 DOMImplementation impl = builder.getDOMImplementation();
153 Document doc = impl.createDocument(null, null, null);
154 Node n1 = doc.createElement("DIV");
155 Node n2 = doc.createElement("SPAN");
156 Node n3 = doc.createElement("P");
157 n1.setTextContent("Content 1");
158 n2.setTextContent("Content 2");
159 n3.setTextContent("Content 3");
160 n1.appendChild(n2);
161 n2.appendChild(n3);
162 doc.appendChild(n1);
163
164 Assert.assertEquals("<DIV>Content 1<SPAN>Content 2<P>Content 3</P></SPAN></DIV>",
165 DomUtils.serializeToXML(doc, false));
166 }
167
168 private void check(String file, String xpath, String reverseXPath) throws FileNotFoundException, IOException {
169 Node dom = new HTMLFixture(copyResourceToTempFile(file)).getDOM();
170 Assert.assertNotNull(dom);
171 Node node;
172 try {
173 node = (Node) xPathEngine.evaluate(xpath, dom, XPathConstants.NODE);
174 Assert.assertNotNull(node);
175 Assert.assertEquals(Node.ELEMENT_NODE, node.getNodeType());
176 String newPath = DomUtils.getXPathForNode(node);
177 Assert.assertEquals(reverseXPath, newPath);
178 Node newNode = (Node) xPathEngine.evaluate(newPath, dom, XPathConstants.NODE);
179 Assert.assertEquals(node, newNode);
180
181 } catch (XPathExpressionException ex) {
182 Assert.fail(ex.getMessage());
183 }
184 }
185
186 }