1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor;
19
20 import org.apache.any23.source.MemCopyFactory;
21 import org.apache.any23.source.StringDocumentSource;
22 import org.apache.any23.writer.TripleHandler;
23 import org.apache.any23.writer.TripleHandlerException;
24 import org.apache.any23.writer.TurtleWriter;
25
26 import java.io.ByteArrayOutputStream;
27 import java.io.IOException;
28 import java.io.InputStream;
29
30
31
32
33
34
35
36
37
38 public class ExampleInputOutput {
39
40 private final ExtractorFactory<?> factory;
41
42 public ExampleInputOutput(String extractorName) {
43 this(ExtractorRegistryImpl.getInstance().getFactory(extractorName));
44 }
45
46 public ExampleInputOutput(ExtractorFactory<?> factory) {
47 this.factory = factory;
48 }
49
50 public String getExampleInput() throws IOException {
51 if (factory.getExampleInput() == null) {
52 return null;
53 }
54 if (isBlindExtractor()) {
55 return null;
56 }
57 InputStream in = factory.createExtractor().getClass().getResourceAsStream(
58 factory.getExampleInput());
59 if (in == null) {
60 throw new IllegalArgumentException(
61 "Example input resource not found for extractor " +
62 factory.getExtractorName() + ": " +
63 factory.getExampleInput());
64 }
65 return new String(MemCopyFactory.toByteArray(in), "utf-8");
66 }
67
68 public String getExampleURI() {
69 if (factory.getExampleInput() == null) {
70 return null;
71 }
72 if (isBlindExtractor()) {
73 return factory.getExampleInput();
74 }
75 return "http://example.com/";
76 }
77
78 public String getExampleOutput() throws IOException, ExtractionException {
79 if (factory.getExampleInput() == null) {
80 return null;
81 }
82 ByteArrayOutputStream out = new ByteArrayOutputStream();
83 TripleHandler writer = new TurtleWriter(out);
84 new SingleDocumentExtraction(
85 new StringDocumentSource(getExampleInput(), getExampleURI()),
86 factory,
87 writer).run();
88 try {
89 writer.close();
90 } catch (TripleHandlerException e) {
91 throw new ExtractionException("Error while closing the triple handler", e);
92 }
93 return out.toString("utf-8");
94 }
95
96 private boolean isBlindExtractor() {
97 return factory.createExtractor() instanceof Extractor.BlindExtractor;
98 }
99
100 }