This project has retired. For details please refer to its
Attic page.
TemplateXPathExtractorRuleImplTest xref
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.xpath;
19
20 import org.apache.any23.extractor.ExtractionResult;
21 import org.apache.any23.extractor.html.TagSoupParser;
22 import org.junit.After;
23 import org.junit.Assert;
24 import org.junit.Before;
25 import org.junit.Test;
26 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
27
28 import java.io.IOException;
29 import java.io.InputStream;
30
31 import static org.mockito.Mockito.mock;
32 import static org.mockito.Mockito.verify;
33
34
35
36
37
38
39 public class TemplateXPathExtractorRuleImplTest {
40
41 private TemplateXPathExtractionRule xPathExtractionRule;
42
43 @Before
44 public void setUp() {
45 xPathExtractionRule = new TemplateXPathExtractionRuleImpl("test-name", "http://test/pattern/*");
46 }
47
48 @After
49 public void tearDown() {
50 xPathExtractionRule = null;
51 }
52
53 @Test
54 public void testAddRemoveVariables() {
55 final Variable v1 = new Variable("v1", "/a/b/c1");
56 final Variable v2 = new Variable("v2", "/a/b/c2");
57 final Variable v3 = new Variable("v3", "/a/b/c3");
58
59 xPathExtractionRule.add(v1);
60 xPathExtractionRule.add(v2);
61 xPathExtractionRule.add(v3);
62
63 Assert.assertTrue(xPathExtractionRule.remove(v1));
64 Assert.assertTrue(xPathExtractionRule.remove(v2));
65 Assert.assertTrue(xPathExtractionRule.remove(v3));
66 Assert.assertFalse(xPathExtractionRule.remove(v3));
67 }
68
69 @Test(expected = IllegalArgumentException.class)
70 public void testAddVariableSameNameCheck() {
71 xPathExtractionRule.add(new Variable("v1", "/a"));
72 xPathExtractionRule.add(new Variable("v1", "/b"));
73 }
74
75 @Test
76 public void testAddRemoveTemplates() {
77 final QuadTemplate template1 = new QuadTemplate(
78 new TemplateSubject(TemplateSubject.Type.URI, "http://sub1", false),
79 new TemplatePredicate("http://pred1", false),
80 new TemplateObject(TemplateObject.Type.URI, "http://obj1", false),
81 new TemplateGraph("http://graph1", false));
82 final QuadTemplate template2 = new QuadTemplate(
83 new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false),
84 new TemplatePredicate("http://pred2", false),
85 new TemplateObject(TemplateObject.Type.URI, "http://obj2", false),
86 new TemplateGraph("http://graph2", false));
87
88 xPathExtractionRule.add(template1);
89 xPathExtractionRule.add(template2);
90 Assert.assertTrue(xPathExtractionRule.remove(template1));
91 Assert.assertTrue(xPathExtractionRule.remove(template2));
92
93 xPathExtractionRule.add(new Variable("v1", "//"));
94 final QuadTemplate template3 = new QuadTemplate(
95 new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false),
96 new TemplatePredicate("http://pred2", false), new TemplateObject(TemplateObject.Type.URI, "v1", true),
97 new TemplateGraph("http://graph2", false));
98 xPathExtractionRule.add(template3);
99 }
100
101 @Test(expected = IllegalArgumentException.class)
102 public void testAddTemplateWithNoDeclaredVarCheck() {
103 xPathExtractionRule.add(new QuadTemplate(new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false),
104 new TemplatePredicate("http://pred2", false), new TemplateObject(TemplateObject.Type.URI, "v1", true),
105 new TemplateGraph("http://graph2", false)));
106 }
107
108 @Test
109 public void testAcceptIRI() {
110 Assert.assertTrue(
111 xPathExtractionRule.acceptIRI(SimpleValueFactory.getInstance().createIRI("http://test/pattern/page")));
112 Assert.assertFalse(
113 xPathExtractionRule.acceptIRI(SimpleValueFactory.getInstance().createIRI("http://test/wrong/page")));
114 }
115
116 @Test
117 public void testProcess() throws IOException {
118 final QuadTemplate template1 = new QuadTemplate(
119 new TemplateSubject(TemplateSubject.Type.URI, "http://sub1", false),
120 new TemplatePredicate("http://pred1", false),
121 new TemplateObject(TemplateObject.Type.LITERAL, "v1", true), new TemplateGraph("http://graph1", false));
122 final QuadTemplate template2 = new QuadTemplate(
123 new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false), new TemplatePredicate("v2", true),
124 new TemplateObject(TemplateObject.Type.URI, "http://obj2", false),
125 new TemplateGraph("http://graph2", false));
126
127 xPathExtractionRule.add(new Variable("v1", "/html/body/div[1]"));
128 xPathExtractionRule.add(new Variable("v2", "/html/body/div[2]"));
129 xPathExtractionRule.add(template1);
130 xPathExtractionRule.add(template2);
131
132 final String documentIRI = "http://www.page.com/test-uri";
133 final InputStream testData = this.getClass().getResourceAsStream("xpathextractor-test.html");
134 final TagSoupParser tagSoupParser = new TagSoupParser(testData, documentIRI);
135 final ExtractionResult extractionResult = mock(ExtractionResult.class);
136 xPathExtractionRule.process(tagSoupParser.getDOM(), extractionResult);
137
138 verify(extractionResult).writeTriple(SimpleValueFactory.getInstance().createIRI("http://sub1"),
139 SimpleValueFactory.getInstance().createIRI("http://pred1"),
140 SimpleValueFactory.getInstance().createLiteral("value1"),
141 SimpleValueFactory.getInstance().createIRI("http://graph1"));
142
143 verify(extractionResult).writeTriple(SimpleValueFactory.getInstance().createIRI("http://sub2"),
144 SimpleValueFactory.getInstance().createIRI("http://test.dom/uri"),
145 SimpleValueFactory.getInstance().createIRI("http://obj2"),
146 SimpleValueFactory.getInstance().createIRI("http://graph2"));
147 }
148
149 }