View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.writer;
19  
20  import org.apache.any23.extractor.ExtractionContext;
21  import org.eclipse.rdf4j.model.Resource;
22  import org.eclipse.rdf4j.model.IRI;
23  import org.eclipse.rdf4j.model.Value;
24  
25  import java.util.Collection;
26  import java.util.HashSet;
27  import java.util.concurrent.atomic.AtomicInteger;
28  
29  /**
30   * A {@link TripleHandler} that collects
31   * various information about the extraction process, such as
32   * the extractors used and the total number of triples.
33   *
34   * @author Richard Cyganiak (richard@cyganiak.de)
35   */
36  public class ReportingTripleHandler implements TripleHandler {
37  
38      private final TripleHandler wrapped;
39  
40      private final Collection<String> extractorNames = new HashSet<>();
41      private AtomicInteger totalTriples   = new AtomicInteger(0);
42      private AtomicInteger totalDocuments = new AtomicInteger(0);
43  
44      public ReportingTripleHandler(TripleHandler wrapped) {
45          if(wrapped == null) {
46              throw new NullPointerException("wrapped cannot be null.");
47          }
48          this.wrapped = wrapped;
49      }
50  
51      public Collection<String> getExtractorNames() {
52          return extractorNames;
53      }
54  
55      public int getTotalTriples() {
56          return totalTriples.get();
57      }
58  
59      public int getTotalDocuments() {
60          return totalDocuments.get();
61      }
62  
63      /**
64       * @return a human readable report.
65       */
66      public String printReport() {
67          return String.format("Total Documents: %d, Total Triples: %d", getTotalDocuments(), getTotalTriples());
68      }
69  
70      public void startDocument(IRI documentIRI) throws TripleHandlerException {
71          totalDocuments.incrementAndGet();
72          wrapped.startDocument(documentIRI);
73      }
74  
75      public void openContext(ExtractionContext context) throws TripleHandlerException {
76          wrapped.openContext(context);
77      }
78  
79      public void receiveNamespace(
80              String prefix,
81              String uri,
82              ExtractionContext context
83      ) throws TripleHandlerException {
84          wrapped.receiveNamespace(prefix, uri, context);
85      }
86  
87      public void receiveTriple(
88              Resource s,
89              IRI p,
90              Value o,
91              IRI g,
92              ExtractionContext context
93      ) throws TripleHandlerException {
94          extractorNames.add(context.getExtractorName());
95          totalTriples.incrementAndGet();
96          wrapped.receiveTriple(s, p, o, g, context);
97      }
98  
99      public void setContentLength(long contentLength) {
100         wrapped.setContentLength(contentLength);
101     }
102 
103     public void closeContext(ExtractionContext context) throws TripleHandlerException {
104         wrapped.closeContext(context);
105     }
106 
107     public void endDocument(IRI documentIRI) throws TripleHandlerException {
108         wrapped.endDocument(documentIRI);
109     }
110 
111     public void close() throws TripleHandlerException {
112         wrapped.close();
113     }
114 
115 }