View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor;
19  
20  import org.apache.any23.configuration.DefaultConfiguration;
21  import org.apache.any23.extractor.html.HTMLMetaExtractorFactory;
22  import org.apache.any23.extractor.rdfa.RDFa11ExtractorFactory;
23  import org.apache.any23.extractor.rdfa.RDFaExtractorFactory;
24  
25  import java.util.ArrayList;
26  import java.util.Collections;
27  import java.util.List;
28  
29  /**
30   * Singleton class acting as a register for all the various {@link Extractor}.
31   */
32  @SuppressWarnings("rawtypes")
33  public class ExtractorRegistryImpl extends
34          org.eclipse.rdf4j.common.lang.service.ServiceRegistry<String, ExtractorFactory> implements ExtractorRegistry {
35  
36      /**
37       * The instance.
38       */
39      private static ExtractorRegistry instance = null;
40  
41      /**
42       * Public constructor for ExtractorRegistryImpl. Should normally call getInstance.
43       */
44      public ExtractorRegistryImpl() {
45          super(ExtractorFactory.class);
46      }
47  
48      /**
49       * @return returns the {@link ExtractorRegistry} instance.
50       */
51      public static ExtractorRegistry getInstance() {
52          // Thread-safe
53          synchronized (ExtractorRegistry.class) {
54              final DefaultConfiguration conf = DefaultConfiguration.singleton();
55              if (instance == null) {
56                  instance = new ExtractorRegistryImpl();
57  
58                  if (conf.getFlagProperty("any23.extraction.rdfa.programmatic")) {
59                      instance.unregister(RDFaExtractorFactory.NAME);
60                      // FIXME: Unregister RDFaExtractor if flag is not set
61                      // instance.register(RDFa11Extractor.factory);
62                  } else {
63                      instance.unregister(RDFa11ExtractorFactory.NAME);
64                      // FIXME: Unregister RDFaExtractor if flag is set
65                      // instance.register(RDFaExtractor.factory);
66                  }
67                  if (!conf.getFlagProperty("any23.extraction.head.meta")) {
68                      instance.unregister(HTMLMetaExtractorFactory.NAME);
69                      // FIXME: Unregister HTMLMetaExtractor if this flag is not set
70                      // instance.register(HTMLMetaExtractor.factory);
71                  }
72              }
73          }
74          return instance;
75      }
76  
77      /**
78       * Registers an {@link ExtractorFactory}.
79       *
80       * @param factory
81       *            the {@link org.apache.any23.extractor.ExtractorFactory} to register
82       * 
83       * @throws IllegalArgumentException
84       *             if trying to register a {@link ExtractorFactory} with a that already exists in the registry.
85       */
86      @Override
87      public void register(ExtractorFactory<?> factory) {
88          this.add(factory);
89      }
90  
91      /**
92       * Unregisters the {@link ExtractorFactory} with the given name.
93       * 
94       * @param name
95       *            The name of the ExtractorFactory to unregister.
96       */
97      @Override
98      public void unregister(String name) {
99          if (this.has(name)) {
100             this.remove(this.get(name).get());
101         }
102     }
103 
104     /**
105      *
106      * Retrieves a {@link ExtractorFactory} given its name
107      *
108      * @param name
109      *            of the desired factory
110      * 
111      * @return the {@link ExtractorFactory} associated to the provided name
112      * 
113      * @throws IllegalArgumentException
114      *             if there is not a {@link ExtractorFactory} associated to the provided name.
115      */
116     @Override
117     public ExtractorFactory<?> getFactory(final String name) {
118         return this.get(name).orElseThrow(() -> new IllegalArgumentException("Unregistered extractor name: " + name));
119     }
120 
121     /**
122      * @return an {@link ExtractorGroup} with all the registered {@link Extractor}.
123      */
124     @Override
125     public ExtractorGroup getExtractorGroup() {
126         return getExtractorGroup(getAllNames());
127     }
128 
129     /**
130      * Returns an {@link ExtractorGroup} containing the {@link ExtractorFactory} mathing the names provided as input.
131      * 
132      * @param names
133      *            a {@link java.util.List} containing the names of the desired {@link ExtractorFactory}.
134      * 
135      * @return the extraction group.
136      */
137     @Override
138     public ExtractorGroup getExtractorGroup(List<String> names) {
139         List<ExtractorFactory<?>> members = new ArrayList<>(names.size());
140         for (String name : names) {
141             members.add(getFactory(name));
142         }
143         return new ExtractorGroup(members);
144     }
145 
146     /**
147      * 
148      * @param name
149      *            of the {@link ExtractorFactory}
150      * 
151      * @return <code>true</code> if is there a {@link ExtractorFactory} associated to the provided name.
152      */
153     @Override
154     public boolean isRegisteredName(String name) {
155         return this.has(name);
156     }
157 
158     /**
159      * Returns the names of all registered extractors, sorted alphabetically.
160      */
161     @Override
162     public List<String> getAllNames() {
163         List<String> result = new ArrayList<>(this.getKeys());
164         Collections.sort(result);
165         return result;
166     }
167 
168     @Override
169     protected String getKey(ExtractorFactory service) {
170         return service.getExtractorName();
171     }
172 
173 }