View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor;
19  
20  import org.apache.any23.configuration.DefaultConfiguration;
21  import org.apache.any23.extractor.html.HTMLMetaExtractorFactory;
22  import org.apache.any23.extractor.rdfa.RDFa11ExtractorFactory;
23  import org.apache.any23.extractor.rdfa.RDFaExtractorFactory;
24  
25  import java.util.ArrayList;
26  import java.util.Collections;
27  import java.util.List;
28  
29  /**
30   *  Singleton class acting as a register for all the various
31   *  {@link Extractor}.
32   */
33  @SuppressWarnings("rawtypes")
34  public class ExtractorRegistryImpl extends org.eclipse.rdf4j.common.lang.service.ServiceRegistry<String, ExtractorFactory> implements ExtractorRegistry {
35  
36      /**
37       * The instance.
38       */
39      private static ExtractorRegistry instance = null;
40  
41      /**
42       * Public constructor for ExtractorRegistryImpl. Should normally call getInstance.
43       */
44      public ExtractorRegistryImpl() {
45          super(ExtractorFactory.class);
46      }
47  
48      /**
49       * @return returns the {@link ExtractorRegistry} instance.
50       */
51      public static ExtractorRegistry getInstance() {
52          // Thread-safe
53          synchronized (ExtractorRegistry.class) {
54              final DefaultConfiguration conf = DefaultConfiguration.singleton();
55              if (instance == null) {
56                  instance = new ExtractorRegistryImpl();
57                  
58                  if(conf.getFlagProperty("any23.extraction.rdfa.programmatic")) {
59                      instance.unregister(RDFaExtractorFactory.NAME);
60                      // FIXME: Unregister RDFaExtractor if flag is not set
61                      //instance.register(RDFa11Extractor.factory);
62                  } else {
63                      instance.unregister(RDFa11ExtractorFactory.NAME);
64                      // FIXME: Unregister RDFaExtractor if flag is set
65                      //instance.register(RDFaExtractor.factory);
66                  }
67                  if(!conf.getFlagProperty("any23.extraction.head.meta")) {
68                      instance.unregister(HTMLMetaExtractorFactory.NAME);
69                      // FIXME: Unregister HTMLMetaExtractor if this flag is not set
70                      //instance.register(HTMLMetaExtractor.factory);
71                  }
72              }
73          }
74          return instance;
75      }
76  
77      /**
78       * Registers an {@link ExtractorFactory}.
79       *
80       * @param factory the {@link org.apache.any23.extractor.ExtractorFactory} to register
81       * @throws IllegalArgumentException if trying to register a {@link ExtractorFactory}
82       *         with a that already exists in the registry.
83       */
84      @Override
85      public void register(ExtractorFactory<?> factory) {
86          this.add(factory);
87      }
88      
89      /**
90       * Unregisters the {@link ExtractorFactory} with the given name.
91       * 
92       * @param name The name of the ExtractorFactory to unregister.
93       */
94      @Override
95      public void unregister(String name) {
96          if(this.has(name)) {
97              this.remove(this.get(name).get());
98          }
99      }
100     
101     /**
102      *
103      * Retrieves a {@link ExtractorFactory} given its name
104      *
105      * @param name of the desired factory
106      * @return the {@link ExtractorFactory} associated to the provided name
107      * @throws IllegalArgumentException if there is not a
108      * {@link ExtractorFactory} associated to the provided name.
109      */
110     @Override
111     public ExtractorFactory<?> getFactory(final String name) {
112         return this.get(name).orElseThrow(() -> new IllegalArgumentException("Unregistered extractor name: " + name));
113     }
114 
115     /**
116      * @return an {@link ExtractorGroup} with all the registered
117      * {@link Extractor}.
118      */
119     @Override
120     public ExtractorGroup getExtractorGroup() {
121         return getExtractorGroup(getAllNames());
122     }
123 
124     /**
125      * Returns an {@link ExtractorGroup} containing the
126      * {@link ExtractorFactory} mathing the names provided as input.
127      * @param names a {@link java.util.List} containing the names of the desired {@link ExtractorFactory}.
128      * @return the extraction group.
129      */
130     @Override
131     public ExtractorGroup getExtractorGroup(List<String> names) {
132         List<ExtractorFactory<?>> members = new ArrayList<>(names.size());
133         for (String name : names) {
134             members.add(getFactory(name));
135         }
136         return new ExtractorGroup(members);
137     }
138 
139     /**
140      * 
141      * @param name of the {@link ExtractorFactory}
142      * @return <code>true</code> if is there a {@link ExtractorFactory}
143      * associated to the provided name.
144      */
145     @Override
146     public boolean isRegisteredName(String name) {
147         return this.has(name);
148     }
149 
150     /**
151      * Returns the names of all registered extractors, sorted alphabetically.
152      */
153     @Override
154     public List<String> getAllNames() {
155         List<String> result = new ArrayList<>(this.getKeys());
156         Collections.sort(result);
157         return result;
158     }
159 
160     @Override
161     protected String getKey(ExtractorFactory service) {
162         return service.getExtractorName();
163     }
164 
165 }