This project has retired. For details please refer to its Attic page.
TemplateXPathExtractorRuleImplTest xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.xpath;
19  
20  import org.apache.any23.extractor.ExtractionResult;
21  import org.apache.any23.extractor.html.TagSoupParser;
22  import org.junit.After;
23  import org.junit.Assert;
24  import org.junit.Before;
25  import org.junit.Test;
26  import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
27  
28  import java.io.IOException;
29  import java.io.InputStream;
30  
31  import static org.mockito.Mockito.mock;
32  import static org.mockito.Mockito.verify;
33  
34  /**
35   * Test case for {@link TemplateXPathExtractionRuleImpl}.
36   *
37   * @author Michele Mostarda (mostarda@fbk.eu)
38   */
39  public class TemplateXPathExtractorRuleImplTest {
40  
41      private TemplateXPathExtractionRule xPathExtractionRule;
42  
43      @Before
44      public void setUp() {
45          xPathExtractionRule = new TemplateXPathExtractionRuleImpl("test-name", "http://test/pattern/*");
46      }
47  
48      @After
49      public void tearDown() {
50          xPathExtractionRule = null;
51      }
52  
53      @Test
54      public void testAddRemoveVariables() {
55          final Variable v1 = new Variable("v1", "/a/b/c1");
56          final Variable v2 = new Variable("v2", "/a/b/c2");
57          final Variable v3 = new Variable("v3", "/a/b/c3");
58  
59          xPathExtractionRule.add(v1);
60          xPathExtractionRule.add(v2);
61          xPathExtractionRule.add(v3);
62  
63          Assert.assertTrue(xPathExtractionRule.remove(v1));
64          Assert.assertTrue(xPathExtractionRule.remove(v2));
65          Assert.assertTrue(xPathExtractionRule.remove(v3));
66          Assert.assertFalse(xPathExtractionRule.remove(v3));
67      }
68  
69      @Test(expected = IllegalArgumentException.class)
70      public void testAddVariableSameNameCheck() {
71          xPathExtractionRule.add(new Variable("v1", "/a"));
72          xPathExtractionRule.add(new Variable("v1", "/b"));
73      }
74  
75      @Test
76      public void testAddRemoveTemplates() {
77          final QuadTemplate template1 = new QuadTemplate(
78                  new TemplateSubject(TemplateSubject.Type.URI, "http://sub1", false),
79                  new TemplatePredicate("http://pred1", false),
80                  new TemplateObject(TemplateObject.Type.URI, "http://obj1", false),
81                  new TemplateGraph("http://graph1", false));
82          final QuadTemplate template2 = new QuadTemplate(
83                  new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false),
84                  new TemplatePredicate("http://pred2", false),
85                  new TemplateObject(TemplateObject.Type.URI, "http://obj2", false),
86                  new TemplateGraph("http://graph2", false));
87  
88          xPathExtractionRule.add(template1);
89          xPathExtractionRule.add(template2);
90          Assert.assertTrue(xPathExtractionRule.remove(template1));
91          Assert.assertTrue(xPathExtractionRule.remove(template2));
92  
93          xPathExtractionRule.add(new Variable("v1", "//"));
94          final QuadTemplate template3 = new QuadTemplate(
95                  new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false),
96                  new TemplatePredicate("http://pred2", false), new TemplateObject(TemplateObject.Type.URI, "v1", true),
97                  new TemplateGraph("http://graph2", false));
98          xPathExtractionRule.add(template3);
99      }
100 
101     @Test(expected = IllegalArgumentException.class)
102     public void testAddTemplateWithNoDeclaredVarCheck() {
103         xPathExtractionRule.add(new QuadTemplate(new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false),
104                 new TemplatePredicate("http://pred2", false), new TemplateObject(TemplateObject.Type.URI, "v1", true),
105                 new TemplateGraph("http://graph2", false)));
106     }
107 
108     @Test
109     public void testAcceptIRI() {
110         Assert.assertTrue(
111                 xPathExtractionRule.acceptIRI(SimpleValueFactory.getInstance().createIRI("http://test/pattern/page")));
112         Assert.assertFalse(
113                 xPathExtractionRule.acceptIRI(SimpleValueFactory.getInstance().createIRI("http://test/wrong/page")));
114     }
115 
116     @Test
117     public void testProcess() throws IOException {
118         final QuadTemplate template1 = new QuadTemplate(
119                 new TemplateSubject(TemplateSubject.Type.URI, "http://sub1", false),
120                 new TemplatePredicate("http://pred1", false),
121                 new TemplateObject(TemplateObject.Type.LITERAL, "v1", true), new TemplateGraph("http://graph1", false));
122         final QuadTemplate template2 = new QuadTemplate(
123                 new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false), new TemplatePredicate("v2", true),
124                 new TemplateObject(TemplateObject.Type.URI, "http://obj2", false),
125                 new TemplateGraph("http://graph2", false));
126 
127         xPathExtractionRule.add(new Variable("v1", "/html/body/div[1]"));
128         xPathExtractionRule.add(new Variable("v2", "/html/body/div[2]"));
129         xPathExtractionRule.add(template1);
130         xPathExtractionRule.add(template2);
131 
132         final String documentIRI = "http://www.page.com/test-uri";
133         final InputStream testData = this.getClass().getResourceAsStream("xpathextractor-test.html");
134         final TagSoupParser tagSoupParser = new TagSoupParser(testData, documentIRI);
135         final ExtractionResult extractionResult = mock(ExtractionResult.class);
136         xPathExtractionRule.process(tagSoupParser.getDOM(), extractionResult);
137 
138         verify(extractionResult).writeTriple(SimpleValueFactory.getInstance().createIRI("http://sub1"),
139                 SimpleValueFactory.getInstance().createIRI("http://pred1"),
140                 SimpleValueFactory.getInstance().createLiteral("value1"),
141                 SimpleValueFactory.getInstance().createIRI("http://graph1"));
142 
143         verify(extractionResult).writeTriple(SimpleValueFactory.getInstance().createIRI("http://sub2"),
144                 SimpleValueFactory.getInstance().createIRI("http://test.dom/uri"),
145                 SimpleValueFactory.getInstance().createIRI("http://obj2"),
146                 SimpleValueFactory.getInstance().createIRI("http://graph2"));
147     }
148 
149 }