View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.cli;
19  
20  import com.beust.jcommander.Parameter;
21  import com.beust.jcommander.Parameters;
22  import org.apache.any23.extractor.ExampleInputOutput;
23  import org.apache.any23.extractor.ExtractionException;
24  import org.apache.any23.extractor.Extractor;
25  import org.apache.any23.extractor.ExtractorRegistryImpl;
26  import org.apache.any23.extractor.Extractor.BlindExtractor;
27  import org.apache.any23.extractor.Extractor.ContentExtractor;
28  import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
29  import org.apache.any23.extractor.ExtractorFactory;
30  import org.apache.any23.extractor.ExtractorRegistry;
31  
32  import java.io.IOException;
33  import java.io.PrintStream;
34  
35  /**
36   * This class provides some command-line documentation
37   * about available extractors and their usage.
38   */
39  @Parameters( commandNames = { "extractor" }, commandDescription= "Utility for obtaining documentation about metadata extractors.")
40  public class ExtractorDocumentation extends BaseTool {
41  
42      @Parameter( names = { "-l", "--list" }, description = "shows the names of all available extractors" )
43      private boolean showList;
44  
45      @Parameter( names = { "-i", "--input" }, description = "shows example input for the given extractor" )
46      private String input;
47  
48      @Parameter( names = { "-o", "--output" }, description = "shows example output for the given extractor" )
49      private String output;
50  
51      @Parameter( names = { "-a", "--all" }, description = "shows a report about all available extractors" )
52      private boolean showAll;
53  
54      private PrintStream out = System.out;
55  
56      @Override
57      PrintStream getOut() {
58          return out;
59      }
60  
61      @Override
62      void setOut(PrintStream out) {
63          this.out = out;
64      }
65  
66      @Override
67      public void run() throws Exception {
68          if (showList) {
69              printExtractorList(ExtractorRegistryImpl.getInstance());
70          } else if (input != null) {
71              printExampleInput(input, ExtractorRegistryImpl.getInstance());
72          } else if (output != null) {
73              printExampleOutput(output, ExtractorRegistryImpl.getInstance());
74          } else if (showAll) {
75              printReport(ExtractorRegistryImpl.getInstance());
76          }
77      }
78  
79      /**
80       * Print an error message.
81       *
82       * @param msg the error message to be printed
83       */
84      public void printError(String msg) {
85          System.err.println(msg);
86      }
87  
88      /**
89       * Prints the list of all the available extractors.
90       * @param registry the {@link org.apache.any23.extractor.ExtractorRegistry}
91       * containing all extractors
92       */
93      public void printExtractorList(ExtractorRegistry registry) {
94          for (ExtractorFactory factory : registry.getExtractorGroup()) {
95              out.println(String.format("%25s [%15s]", factory.getExtractorName(), factory.getExtractorLabel()));
96          }
97      }
98  
99      /**
100      * Prints an example of input for the provided extractor.
101      *
102      * @param extractorName the name of the extractor
103      * @param registry the {@link org.apache.any23.extractor.ExtractorRegistry}
104      * containing all extractors
105      * @throws IOException raised if no extractor is found with that name
106      */
107     public void printExampleInput(String extractorName, ExtractorRegistry registry) throws IOException {
108         ExtractorFactory<?> factory = getFactory(registry, extractorName);
109         ExampleInputOutputtput.html#ExampleInputOutput">ExampleInputOutput example = new ExampleInputOutput(factory);
110         String input = example.getExampleInput();
111         if (input == null) {
112             throw new IllegalArgumentException("Extractor " + extractorName + " provides no example input");
113         }
114         out.println(input);
115     }
116 
117     /**
118      * Prints an output example for the given extractor.
119      *
120      * @param extractorName the extractor name
121      * @param registry the {@link org.apache.any23.extractor.ExtractorRegistry}
122      * containing all extractors
123      * @throws IOException raised if no extractor is found with that name
124      * @throws ExtractionException if there is an error duing extraction
125      */
126     public void printExampleOutput(String extractorName, ExtractorRegistry registry) throws IOException, ExtractionException {
127         ExtractorFactory<?> factory = getFactory(registry, extractorName);
128         ExampleInputOutputtput.html#ExampleInputOutput">ExampleInputOutput example = new ExampleInputOutput(factory);
129         String output = example.getExampleOutput();
130         if (output == null) {
131             throw new IllegalArgumentException("Extractor " + extractorName + " provides no example output");
132         }
133         out.println(output);
134     }
135 
136     /**
137      * Prints a complete report on all the available extractors.
138      *
139      * @param registry the {@link org.apache.any23.extractor.ExtractorRegistry}
140      * containing all extractors
141      * @throws IOException raised if no extractor is found with that name
142      * @throws ExtractionException if there is an error duing extraction
143      */
144     public void printReport(ExtractorRegistry registry) throws IOException, ExtractionException {
145         for (String extractorName : registry.getAllNames()) {
146             ExtractorFactory<?> factory = registry.getFactory(extractorName);
147             ExampleInputOutputtput.html#ExampleInputOutput">ExampleInputOutput example = new ExampleInputOutput(factory);
148             out.println("Extractor: " + extractorName);
149             out.println("\ttype: " + getType(factory));
150             out.println();
151             final String exampleInput = example.getExampleInput();
152             if (exampleInput == null) {
153                 out.println("(No Example Available)");
154             } else {
155                 out.println("-------- Example Input  --------");
156                 out.println(exampleInput);
157                 out.println("-------- Example Output --------");
158                 String output = example.getExampleOutput();
159                 out.println(output == null || output.trim().length() == 0 ? "(No Output Generated)" : output);
160             }
161             out.println("================================");
162             out.println();
163         }
164     }
165 
166     private ExtractorFactory<?> getFactory(ExtractorRegistry registry, String name) {
167         if (!registry.isRegisteredName(name)) {
168             throw new IllegalArgumentException("Unknown extractor name: " + name);
169         }
170         return registry.getFactory(name);
171     }
172 
173     private String getType(ExtractorFactory<?> factory) {
174         Extractor<?> extractor = factory.createExtractor();
175         if (extractor instanceof BlindExtractor) {
176             return BlindExtractor.class.getSimpleName();
177         }
178         if (extractor instanceof TagSoupDOMExtractor) {
179             return TagSoupDOMExtractor.class.getSimpleName();
180         }
181         if (extractor instanceof ContentExtractor) {
182             return ContentExtractor.class.getSimpleName();
183         }
184         return "?";
185     }
186 
187 }