1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor;
19
20 import org.apache.any23.configuration.DefaultConfiguration;
21 import org.apache.any23.extractor.csv.CSVExtractor;
22 import org.apache.any23.extractor.html.AdrExtractor;
23 import org.apache.any23.extractor.html.GeoExtractor;
24 import org.apache.any23.extractor.html.HCalendarExtractor;
25 import org.apache.any23.extractor.html.HCardExtractor;
26 import org.apache.any23.extractor.html.HListingExtractor;
27 import org.apache.any23.extractor.html.HRecipeExtractor;
28 import org.apache.any23.extractor.html.HResumeExtractor;
29 import org.apache.any23.extractor.html.HReviewExtractor;
30 import org.apache.any23.extractor.html.HTMLMetaExtractor;
31 import org.apache.any23.extractor.html.HeadLinkExtractor;
32 import org.apache.any23.extractor.html.ICBMExtractor;
33 import org.apache.any23.extractor.html.LicenseExtractor;
34 import org.apache.any23.extractor.html.SpeciesExtractor;
35 import org.apache.any23.extractor.html.TitleExtractor;
36 import org.apache.any23.extractor.html.TurtleHTMLExtractor;
37 import org.apache.any23.extractor.html.XFNExtractor;
38 import org.apache.any23.extractor.microdata.MicrodataExtractor;
39 import org.apache.any23.extractor.rdf.NQuadsExtractor;
40 import org.apache.any23.extractor.rdf.NTriplesExtractor;
41 import org.apache.any23.extractor.rdf.RDFXMLExtractor;
42 import org.apache.any23.extractor.rdf.TriXExtractor;
43 import org.apache.any23.extractor.rdf.TurtleExtractor;
44 import org.apache.any23.extractor.rdfa.RDFa11Extractor;
45 import org.apache.any23.extractor.rdfa.RDFaExtractor;
46
47 import java.util.ArrayList;
48 import java.util.Collections;
49 import java.util.HashMap;
50 import java.util.List;
51 import java.util.Map;
52
53
54
55
56
57 public class ExtractorRegistryImpl implements ExtractorRegistry {
58
59
60
61
62 private static ExtractorRegistry instance = null;
63
64
65
66
67
68 private Map<String, ExtractorFactory<?>> factories = new HashMap<String, ExtractorFactory<?>>();
69
70
71
72
73 public static ExtractorRegistry getInstance() {
74
75 synchronized (ExtractorRegistry.class) {
76 final DefaultConfiguration conf = DefaultConfiguration.singleton();
77 if (instance == null) {
78 instance = new ExtractorRegistryImpl();
79
80 instance.register(RDFXMLExtractor.factory);
81 instance.register(TurtleExtractor.factory);
82 instance.register(NTriplesExtractor.factory);
83 instance.register(NQuadsExtractor.factory);
84 instance.register(TriXExtractor.factory);
85 if(conf.getFlagProperty("any23.extraction.rdfa.programmatic")) {
86 instance.register(RDFa11Extractor.factory);
87 } else {
88 instance.register(RDFaExtractor.factory);
89 }
90 instance.register(HeadLinkExtractor.factory);
91 instance.register(LicenseExtractor.factory);
92 instance.register(TitleExtractor.factory);
93 instance.register(XFNExtractor.factory);
94 instance.register(ICBMExtractor.factory);
95 instance.register(AdrExtractor.factory);
96 instance.register(GeoExtractor.factory);
97 instance.register(HCalendarExtractor.factory);
98 instance.register(HCardExtractor.factory);
99 instance.register(HListingExtractor.factory);
100 instance.register(HResumeExtractor.factory);
101 instance.register(HReviewExtractor.factory);
102 instance.register(HRecipeExtractor.factory);
103 instance.register(SpeciesExtractor.factory);
104 instance.register(TurtleHTMLExtractor.factory);
105 instance.register(MicrodataExtractor.factory);
106 instance.register(CSVExtractor.factory);
107 if(conf.getFlagProperty("any23.extraction.head.meta")) {
108 instance.register(HTMLMetaExtractor.factory);
109 }
110 }
111 }
112 return instance;
113 }
114
115
116
117
118
119
120
121
122 public void register(ExtractorFactory<?> factory) {
123 if (factories.containsKey(factory.getExtractorName())) {
124 throw new IllegalArgumentException(String.format("Extractor name clash: %s",
125 factory.getExtractorName()));
126 }
127 factories.put(factory.getExtractorName(), factory);
128 }
129
130
131
132
133
134
135
136
137
138
139 public ExtractorFactory<?> getFactory(String name) {
140 if (!factories.containsKey(name)) {
141 throw new IllegalArgumentException("Unregistered extractor name: " + name);
142 }
143 return factories.get(name);
144 }
145
146
147
148
149
150 public ExtractorGroup getExtractorGroup() {
151 return getExtractorGroup(getAllNames());
152 }
153
154
155
156
157
158
159
160 public ExtractorGroup getExtractorGroup(List<String> names) {
161 List<ExtractorFactory<?>> members = new ArrayList<ExtractorFactory<?>>(names.size());
162 for (String name : names) {
163 members.add(getFactory(name));
164 }
165 return new ExtractorGroup(members);
166 }
167
168
169
170
171
172
173
174 public boolean isRegisteredName(String name) {
175 return factories.containsKey(name);
176 }
177
178
179
180
181 public List<String> getAllNames() {
182 List<String> result = new ArrayList<String>(factories.keySet());
183 Collections.sort(result);
184 return result;
185 }
186
187 }