View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.xpath;
19  
20  import org.apache.any23.extractor.ExtractionContext;
21  import org.apache.any23.extractor.ExtractionException;
22  import org.apache.any23.extractor.ExtractionParameters;
23  import org.apache.any23.extractor.ExtractionResult;
24  import org.apache.any23.extractor.Extractor;
25  import org.apache.any23.extractor.ExtractorDescription;
26  import org.eclipse.rdf4j.model.IRI;
27  import org.w3c.dom.Document;
28  
29  import java.io.IOException;
30  import java.util.ArrayList;
31  import java.util.List;
32  
33  /**
34   * Implementation of an {@link org.apache.any23.extractor.Extractor.TagSoupDOMExtractor} able to
35   * apply {@link XPathExtractionRule}s and generate <i>quads</i>.
36   *
37   * @see XPathExtractionRule
38   * @author Michele Mostarda (mostarda@fbk.eu)
39   */
40  public class XPathExtractor implements Extractor.TagSoupDOMExtractor {
41  
42      private final List<XPathExtractionRule> xPathExtractionRules = new ArrayList<>();
43  
44      public XPathExtractor() {
45          //default constructor
46      }
47      
48      public XPathExtractor(List<XPathExtractionRule> rules) {
49          xPathExtractionRules.addAll(rules);
50      }
51  
52      public void add(XPathExtractionRule rule) {
53          xPathExtractionRules.add(rule);
54      }
55  
56      public void remove(XPathExtractionRule rule) {
57          xPathExtractionRules.remove(rule);
58      }
59  
60      public boolean contains(XPathExtractionRule rule) {
61          return xPathExtractionRules.contains(rule);
62      }
63  
64      @Override
65      public void run(
66              ExtractionParameters extractionParameters,
67              ExtractionContext extractionContext,
68              Document in,
69              ExtractionResult out
70      )
71      throws IOException, ExtractionException {
72          final IRI documentIRI = extractionContext.getDocumentIRI();
73          for(XPathExtractionRule rule : xPathExtractionRules) {
74              if(rule.acceptIRI(documentIRI)) {
75                  rule.process(in, out);
76              }
77          }
78      }
79  
80      @Override
81      public ExtractorDescription getDescription() {
82          return XPathExtractorFactory.getDescriptionInstance();
83      }
84  
85  }