View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.xpath;
19  
20  import org.apache.any23.extractor.ExtractionContext;
21  import org.apache.any23.extractor.ExtractionException;
22  import org.apache.any23.extractor.ExtractionParameters;
23  import org.apache.any23.extractor.ExtractionResult;
24  import org.apache.any23.extractor.Extractor;
25  import org.apache.any23.extractor.ExtractorDescription;
26  import org.eclipse.rdf4j.model.IRI;
27  import org.w3c.dom.Document;
28  
29  import java.io.IOException;
30  import java.util.ArrayList;
31  import java.util.List;
32  
33  /**
34   * Implementation of an {@link org.apache.any23.extractor.Extractor.TagSoupDOMExtractor} able to apply
35   * {@link XPathExtractionRule}s and generate <i>quads</i>.
36   *
37   * @see XPathExtractionRule
38   * 
39   * @author Michele Mostarda (mostarda@fbk.eu)
40   */
41  public class XPathExtractor implements Extractor.TagSoupDOMExtractor {
42  
43      private final List<XPathExtractionRule> xPathExtractionRules = new ArrayList<>();
44  
45      public XPathExtractor() {
46          // default constructor
47      }
48  
49      public XPathExtractor(List<XPathExtractionRule> rules) {
50          xPathExtractionRules.addAll(rules);
51      }
52  
53      public void add(XPathExtractionRule rule) {
54          xPathExtractionRules.add(rule);
55      }
56  
57      public void remove(XPathExtractionRule rule) {
58          xPathExtractionRules.remove(rule);
59      }
60  
61      public boolean contains(XPathExtractionRule rule) {
62          return xPathExtractionRules.contains(rule);
63      }
64  
65      @Override
66      public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, Document in,
67              ExtractionResult out) throws IOException, ExtractionException {
68          final IRI documentIRI = extractionContext.getDocumentIRI();
69          for (XPathExtractionRule rule : xPathExtractionRules) {
70              if (rule.acceptIRI(documentIRI)) {
71                  rule.process(in, out);
72              }
73          }
74      }
75  
76      @Override
77      public ExtractorDescription getDescription() {
78          return XPathExtractorFactory.getDescriptionInstance();
79      }
80  
81  }