View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor;
19  
20  import org.apache.any23.configuration.DefaultConfiguration;
21  import org.apache.any23.extractor.csv.CSVExtractor;
22  import org.apache.any23.extractor.html.AdrExtractor;
23  import org.apache.any23.extractor.html.GeoExtractor;
24  import org.apache.any23.extractor.html.HCalendarExtractor;
25  import org.apache.any23.extractor.html.HCardExtractor;
26  import org.apache.any23.extractor.html.HListingExtractor;
27  import org.apache.any23.extractor.html.HRecipeExtractor;
28  import org.apache.any23.extractor.html.HResumeExtractor;
29  import org.apache.any23.extractor.html.HReviewExtractor;
30  import org.apache.any23.extractor.html.HTMLMetaExtractor;
31  import org.apache.any23.extractor.html.HeadLinkExtractor;
32  import org.apache.any23.extractor.html.ICBMExtractor;
33  import org.apache.any23.extractor.html.LicenseExtractor;
34  import org.apache.any23.extractor.html.SpeciesExtractor;
35  import org.apache.any23.extractor.html.TitleExtractor;
36  import org.apache.any23.extractor.html.TurtleHTMLExtractor;
37  import org.apache.any23.extractor.html.XFNExtractor;
38  import org.apache.any23.extractor.microdata.MicrodataExtractor;
39  import org.apache.any23.extractor.rdf.NQuadsExtractor;
40  import org.apache.any23.extractor.rdf.NTriplesExtractor;
41  import org.apache.any23.extractor.rdf.RDFXMLExtractor;
42  import org.apache.any23.extractor.rdf.TriXExtractor;
43  import org.apache.any23.extractor.rdf.TurtleExtractor;
44  import org.apache.any23.extractor.rdfa.RDFa11Extractor;
45  import org.apache.any23.extractor.rdfa.RDFaExtractor;
46  
47  import java.util.ArrayList;
48  import java.util.Collections;
49  import java.util.HashMap;
50  import java.util.List;
51  import java.util.Map;
52  
53  /**
54   *  Singleton class acting as a register for all the various
55   *  {@link Extractor}.
56   */
57  public class ExtractorRegistryImpl implements ExtractorRegistry {
58  
59      /**
60       * The instance.
61       */
62      private static ExtractorRegistry instance = null;
63  
64      /**
65       * maps containing the related {@link ExtractorFactory} for each
66       * registered {@link Extractor}.
67       */
68      private Map<String, ExtractorFactory<?>> factories = new HashMap<String, ExtractorFactory<?>>();
69  
70      /**
71       * @return returns the {@link ExtractorRegistry} instance.
72       */
73      public static ExtractorRegistry getInstance() {
74          // Thread-safe
75          synchronized (ExtractorRegistry.class) {
76              final DefaultConfiguration conf = DefaultConfiguration.singleton();
77              if (instance == null) {
78                  instance = new ExtractorRegistryImpl();
79                  // FIXME: Remove these hardcoded links to the extractor factories by turning them into SPI interfaces
80                  instance.register(RDFXMLExtractor.factory);
81                  instance.register(TurtleExtractor.factory);
82                  instance.register(NTriplesExtractor.factory);
83                  instance.register(NQuadsExtractor.factory);
84                  instance.register(TriXExtractor.factory);
85                  if(conf.getFlagProperty("any23.extraction.rdfa.programmatic")) {
86                      instance.register(RDFa11Extractor.factory);
87                  } else {
88                      instance.register(RDFaExtractor.factory);
89                  }
90                  instance.register(HeadLinkExtractor.factory);
91                  instance.register(LicenseExtractor.factory);
92                  instance.register(TitleExtractor.factory);
93                  instance.register(XFNExtractor.factory);
94                  instance.register(ICBMExtractor.factory);
95                  instance.register(AdrExtractor.factory);
96                  instance.register(GeoExtractor.factory);
97                  instance.register(HCalendarExtractor.factory);
98                  instance.register(HCardExtractor.factory);
99                  instance.register(HListingExtractor.factory);
100                 instance.register(HResumeExtractor.factory);
101                 instance.register(HReviewExtractor.factory);
102                 instance.register(HRecipeExtractor.factory);
103                 instance.register(SpeciesExtractor.factory);
104                 instance.register(TurtleHTMLExtractor.factory);
105                 instance.register(MicrodataExtractor.factory);
106                 instance.register(CSVExtractor.factory);
107                 if(conf.getFlagProperty("any23.extraction.head.meta")) {
108                     instance.register(HTMLMetaExtractor.factory);
109                 }
110             }
111         }
112         return instance;
113     }
114 
115     /**
116      * Registers an {@link ExtractorFactory}.
117      *
118      * @param factory
119      * @throws IllegalArgumentException if trying to register a {@link ExtractorFactory}
120      *         with a that already exists in the registry.
121      */
122     public void register(ExtractorFactory<?> factory) {
123         if (factories.containsKey(factory.getExtractorName())) {
124             throw new IllegalArgumentException(String.format("Extractor name clash: %s",
125                     factory.getExtractorName()));
126         }
127         factories.put(factory.getExtractorName(), factory);
128     }
129 
130     /**
131      *
132      * Retrieves a {@link ExtractorFactory} given its name
133      *
134      * @param name of the desired factory
135      * @return the {@link ExtractorFactory} associated to the provided name
136      * @throws IllegalArgumentException if there is not a
137      * {@link ExtractorFactory} associated to the provided name.
138      */
139     public ExtractorFactory<?> getFactory(String name) {
140         if (!factories.containsKey(name)) {
141             throw new IllegalArgumentException("Unregistered extractor name: " + name);
142         }
143         return factories.get(name);
144     }
145 
146     /**
147      * @return an {@link ExtractorGroup} with all the registered
148      * {@link Extractor}.
149      */
150     public ExtractorGroup getExtractorGroup() {
151         return getExtractorGroup(getAllNames());
152     }
153 
154     /**
155      * Returns an {@link ExtractorGroup} containing the
156      * {@link ExtractorFactory} mathing the names provided as input.
157      * @param names a {@link java.util.List} containing the names of the desired {@link ExtractorFactory}.
158      * @return the extraction group.
159      */
160     public ExtractorGroup getExtractorGroup(List<String> names) {
161         List<ExtractorFactory<?>> members = new ArrayList<ExtractorFactory<?>>(names.size());
162         for (String name : names) {
163             members.add(getFactory(name));
164         }
165         return new ExtractorGroup(members);
166     }
167 
168     /**
169      * 
170      * @param name of the {@link ExtractorFactory}
171      * @return <code>true</code> if is there a {@link ExtractorFactory}
172      * associated to the provided name.
173      */
174     public boolean isRegisteredName(String name) {
175         return factories.containsKey(name);
176     }
177 
178     /**
179      * Returns the names of all registered extractors, sorted alphabetically.
180      */
181     public List<String> getAllNames() {
182         List<String> result = new ArrayList<String>(factories.keySet());
183         Collections.sort(result);
184         return result;
185     }
186 
187 }