View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.writer;
19  
20  import org.apache.any23.extractor.ExtractionContext;
21  import org.eclipse.rdf4j.model.Resource;
22  import org.eclipse.rdf4j.model.IRI;
23  import org.eclipse.rdf4j.model.Value;
24  
25  import java.io.PrintWriter;
26  import java.util.HashMap;
27  import java.util.Map;
28  import java.util.concurrent.atomic.AtomicBoolean;
29  import java.util.stream.Collectors;
30  import org.apache.any23.util.StringUtils;
31  
32  /**
33   * Triple handler decorator useful for logging purposes.
34   */
35  public class LoggingTripleHandler implements TripleHandler {
36  
37      /**
38       * Decorated.
39       */
40      private final TripleHandler underlyingHandler;
41  
42      private final Map<String, Integer> contextTripleMap = new HashMap<String, Integer>();
43      private long startTime     = 0;
44      private long contentLength = 0;
45      private final PrintWriter destination;
46  
47      public LoggingTripleHandler(TripleHandler tripleHandler, PrintWriter destination) {
48          if(tripleHandler == null) {
49              throw new NullPointerException("tripleHandler cannot be null.");
50          }
51          if(destination == null) {
52              throw new NullPointerException("destination cannot be null.");
53          }
54          underlyingHandler = tripleHandler;
55          this.destination = destination;
56  
57          printHeader(destination);
58      }
59  
60      public void startDocument(IRI documentIRI) throws TripleHandlerException {
61          underlyingHandler.startDocument(documentIRI);
62          startTime = System.currentTimeMillis();
63      }
64  
65      public void close() throws TripleHandlerException {
66          underlyingHandler.close();
67          destination.flush();
68          destination.close();
69      }
70  
71      public void closeContext(ExtractionContext context) throws TripleHandlerException {
72          underlyingHandler.closeContext(context);
73      }
74  
75      public void openContext(ExtractionContext context) throws TripleHandlerException {
76          underlyingHandler.openContext(context);
77      }
78  
79      public void receiveTriple(Resource s, IRI p, Value o, IRI g, ExtractionContext context)
80      throws TripleHandlerException {
81          underlyingHandler.receiveTriple(s, p, o, g, context);
82          Integer i = contextTripleMap.get(context.getExtractorName());
83          if (i == null) i = 0;
84          contextTripleMap.put(context.getExtractorName(), (i + 1));
85      }
86  
87      public void receiveNamespace(String prefix, String uri, ExtractionContext context)
88      throws TripleHandlerException {
89          underlyingHandler.receiveNamespace(prefix, uri, context);
90      }
91  
92      @Override
93      public void endDocument(IRI documentIRI) throws TripleHandlerException {
94          underlyingHandler.endDocument(documentIRI);
95          long elapsedTime = System.currentTimeMillis() - startTime;
96          final AtomicBoolean success = new AtomicBoolean(true);
97          
98          StringBuilder sb = new StringBuilder("[ ");
99          String[] parsers = contextTripleMap.entrySet().stream().map(e -> {
100                     if (e.getValue() > 0) {
101                         success.set(true);
102                     }
103                     return String.format("%s:%d", e.getKey(), e.getValue()); }
104                 ).collect(Collectors.toList()).toArray(new String[] {});
105         sb.append(StringUtils.join(", ", parsers));
106         sb.append(" ]");
107         destination.println(
108                 documentIRI + "\t" + contentLength + "\t" + elapsedTime + "\t" + success.get() + "\t" + sb.toString()
109         );
110         contextTripleMap.clear();
111     }
112 
113     public void setContentLength(long contentLength) {
114         underlyingHandler.setContentLength(contentLength);
115         this.contentLength = contentLength;
116     }
117 
118     private void printHeader(PrintWriter writer) {
119         writer.println("# Document-IRI\tContent-Length\tElapsed-Time\tSuccess\tExtractors:Triples");
120     }
121 }