1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.any23.extractor;
19
20 import org.apache.any23.mime.MIMEType;
21
22 import java.util.Collection;
23
24 /**
25 * Interface defining a factory for {@link Extractor}.
26 *
27 * @param <T> the type of the {@link Extractor} to be created by this factory.
28 */
29 public interface ExtractorFactory<T extends Extractor<?>> extends ExtractorDescription {
30
31 /**
32 * Returns the extractor type.
33 *
34 * @return the not <code>null</code> extractor class.
35 */
36 Class<T> getExtractorType();
37
38 /**
39 * Creates an extractor instance.
40 *
41 * @return an instance of the extractor associated to this factory.
42 */
43 T createExtractor();
44
45 /**
46 * Supports wildcards, e.g. <code>"*/*"</code> for blind extractors that merely call a web service.
47 */
48 Collection<MIMEType> getSupportedMIMETypes();
49
50 /**
51 * An example input file for the extractor, to be used in auto-generated
52 * documentation. For the {@link Extractor.BlindExtractor},
53 * this is an arbitrary URI.
54 * For extractors that require content, it is the name of a file, relative
55 * to the factory's class file's location, it will be opened using
56 * factory.getClass().getResourceAsStream(filename). The example should be
57 * a short file that produces characteristic output if sent through the
58 * extractor. The file will be read as UTF-8, so it should either use that
59 * encoding or avoid characters outside of the US-ASCII range.
60 */
61 String getExampleInput();
62 }