View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor;
19  
20  import org.apache.any23.source.MemCopyFactory;
21  import org.apache.any23.source.StringDocumentSource;
22  import org.apache.any23.writer.TripleHandler;
23  import org.apache.any23.writer.TripleHandlerException;
24  import org.apache.any23.writer.TurtleWriter;
25  
26  import java.io.ByteArrayOutputStream;
27  import java.io.IOException;
28  import java.io.InputStream;
29  
30  /**
31   * A reporter for example input and output of an extractor. Example
32   * input is part of every extractor's metadata; example output is
33   * obtained by running the extractor on its own example input. This
34   * is useful as a documentation device.
35   *
36   * @author Richard Cyganiak (richard@cyganiak.de)
37   */
38  public class ExampleInputOutput {
39  
40      private final ExtractorFactory<?> factory;
41  
42      public ExampleInputOutput(String extractorName) {
43          this(ExtractorRegistryImpl.getInstance().getFactory(extractorName));
44      }
45  
46      public ExampleInputOutput(ExtractorFactory<?> factory) {
47          this.factory = factory;
48      }
49  
50      public String getExampleInput() throws IOException {
51          if (factory.getExampleInput() == null) {
52              return null;
53          }
54          if (isBlindExtractor()) {
55              return null;
56          }
57          InputStream in = factory.createExtractor().getClass().getResourceAsStream(
58                  factory.getExampleInput());
59          if (in == null) {
60              throw new IllegalArgumentException(
61                      "Example input resource not found for extractor " +
62                              factory.getExtractorName() + ": " +
63                              factory.getExampleInput());
64          }
65          return new String(MemCopyFactory.toByteArray(in), "utf-8");
66      }
67  
68      public String getExampleURI() {
69          if (factory.getExampleInput() == null) {
70              return null;
71          }
72          if (isBlindExtractor()) {
73              return factory.getExampleInput();    // Should be a URI.
74          }
75          return "http://example.com/";
76      }
77  
78      public String getExampleOutput() throws IOException, ExtractionException {
79          if (factory.getExampleInput() == null) {
80              return null;
81          }
82          ByteArrayOutputStream out = new ByteArrayOutputStream();
83          TripleHandler writer = new TurtleWriter(out);
84          new SingleDocumentExtraction(
85                  new StringDocumentSource(getExampleInput(), getExampleURI()),
86                  factory,
87                  writer).run();
88          try {
89              writer.close();
90          } catch (TripleHandlerException e) {
91              throw new ExtractionException("Error while closing the triple handler", e);
92          }
93          return out.toString("utf-8");
94      }
95  
96      private boolean isBlindExtractor() {
97          return factory.createExtractor() instanceof Extractor.BlindExtractor;
98      }
99  
100 }