View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.writer;
19  
20  import org.apache.any23.extractor.ExtractionContext;
21  import org.eclipse.rdf4j.model.Resource;
22  import org.eclipse.rdf4j.model.IRI;
23  import org.eclipse.rdf4j.model.Value;
24  
25  import java.io.PrintWriter;
26  import java.util.HashMap;
27  import java.util.Locale;
28  import java.util.Map;
29  import java.util.concurrent.atomic.AtomicBoolean;
30  import java.util.stream.Collectors;
31  import org.apache.any23.util.StringUtils;
32  
33  /**
34   * Triple handler decorator useful for logging purposes.
35   */
36  public class LoggingTripleHandler implements TripleHandler {
37  
38      /**
39       * Decorated.
40       */
41      private final TripleHandler underlyingHandler;
42  
43      private final Map<String, Integer> contextTripleMap = new HashMap<String, Integer>();
44      private long startTime = 0;
45      private long contentLength = 0;
46      private final PrintWriter destination;
47  
48      public LoggingTripleHandler(TripleHandler tripleHandler, PrintWriter destination) {
49          if (tripleHandler == null) {
50              throw new NullPointerException("tripleHandler cannot be null.");
51          }
52          if (destination == null) {
53              throw new NullPointerException("destination cannot be null.");
54          }
55          underlyingHandler = tripleHandler;
56          this.destination = destination;
57  
58          printHeader(destination);
59      }
60  
61      public void startDocument(IRI documentIRI) throws TripleHandlerException {
62          underlyingHandler.startDocument(documentIRI);
63          startTime = System.currentTimeMillis();
64      }
65  
66      public void close() throws TripleHandlerException {
67          underlyingHandler.close();
68          destination.flush();
69          destination.close();
70      }
71  
72      public void closeContext(ExtractionContext context) throws TripleHandlerException {
73          underlyingHandler.closeContext(context);
74      }
75  
76      public void openContext(ExtractionContext context) throws TripleHandlerException {
77          underlyingHandler.openContext(context);
78      }
79  
80      public void receiveTriple(Resource s, IRI p, Value o, IRI g, ExtractionContext context)
81              throws TripleHandlerException {
82          underlyingHandler.receiveTriple(s, p, o, g, context);
83          Integer i = contextTripleMap.get(context.getExtractorName());
84          if (i == null)
85              i = 0;
86          contextTripleMap.put(context.getExtractorName(), (i + 1));
87      }
88  
89      public void receiveNamespace(String prefix, String uri, ExtractionContext context) throws TripleHandlerException {
90          underlyingHandler.receiveNamespace(prefix, uri, context);
91      }
92  
93      @Override
94      public void endDocument(IRI documentIRI) throws TripleHandlerException {
95          underlyingHandler.endDocument(documentIRI);
96          long elapsedTime = System.currentTimeMillis() - startTime;
97          final AtomicBoolean success = new AtomicBoolean(true);
98  
99          StringBuilder sb = new StringBuilder("[ ");
100         String[] parsers = contextTripleMap.entrySet().stream().map(e -> {
101             if (e.getValue() > 0) {
102                 success.set(true);
103             }
104             return String.format(Locale.ROOT, "%s:%d", e.getKey(), e.getValue());
105         }).collect(Collectors.toList()).toArray(new String[] {});
106         sb.append(StringUtils.join(", ", parsers));
107         sb.append(" ]");
108         destination.println(
109                 documentIRI + "\t" + contentLength + "\t" + elapsedTime + "\t" + success.get() + "\t" + sb.toString());
110         contextTripleMap.clear();
111     }
112 
113     public void setContentLength(long contentLength) {
114         underlyingHandler.setContentLength(contentLength);
115         this.contentLength = contentLength;
116     }
117 
118     private void printHeader(PrintWriter writer) {
119         writer.println("# Document-IRI\tContent-Length\tElapsed-Time\tSuccess\tExtractors:Triples");
120     }
121 }