View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor;
19  
20  import org.apache.any23.mime.MIMEType;
21  
22  import java.util.Collection;
23  
24  /**
25   * Interface defining a factory for {@link Extractor}.
26   *
27   * @param <T> the type of the {@link Extractor} to be created by this factory.
28   */
29  public interface ExtractorFactory<T extends Extractor<?>> extends ExtractorDescription {
30  
31      /**
32       * Returns the extractor type.
33       *
34       * @return the not <code>null</code> extractor class.
35       */
36      Class<T> getExtractorType();
37  
38      /**
39       * Creates an extractor instance.
40       *
41       * @return an instance of the extractor associated to this factory.
42       */
43      T createExtractor();
44  
45      /**
46       * Supports wildcards, e.g. <code>"*&#47;*"</code> for blind extractors that merely call a web service.
47       */
48      Collection<MIMEType> getSupportedMIMETypes();
49  
50      /**
51       * An example input file for the extractor, to be used in auto-generated
52       * documentation. For the {@link Extractor.BlindExtractor},
53       * this is an arbitrary URI.
54       * For extractors that require content, it is the name of a file, relative
55       * to the factory's class file's location, it will be opened using
56       * factory.getClass().getResourceAsStream(filename). The example should be
57       * a short file that produces characteristic output if sent through the
58       * extractor. The file will be read as UTF-8, so it should either use that
59       * encoding or avoid characters outside of the US-ASCII range.
60       */
61      String getExampleInput();
62  }