1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor;
19
20 import org.apache.any23.extractor.html.MicroformatExtractor;
21 import org.apache.any23.rdf.Prefixes;
22 import org.apache.any23.writer.TripleHandler;
23 import org.apache.any23.writer.TripleHandlerException;
24 import org.openrdf.model.BNode;
25 import org.openrdf.model.Resource;
26 import org.openrdf.model.URI;
27 import org.openrdf.model.Value;
28
29 import java.io.PrintStream;
30 import java.util.ArrayList;
31 import java.util.Collection;
32 import java.util.Collections;
33 import java.util.HashSet;
34 import java.util.List;
35 import java.util.Set;
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57 public class ExtractionResultImpl implements TagSoupExtractionResult {
58
59 private final ExtractionContext context;
60
61 private final Extractor<?> extractor;
62
63 private final TripleHandler tripleHandler;
64
65 private final Collection<ExtractionResult> subResults = new ArrayList<ExtractionResult>();
66
67 private final Set<Object> knownContextIDs = new HashSet<Object>();
68
69 private boolean isClosed = false;
70
71 private boolean isInitialized = false;
72
73 private List<Issue> issues;
74
75 private List<ResourceRoot> resourceRoots;
76
77 private List<PropertyPath> propertyPaths;
78
79 public ExtractionResultImpl(
80 ExtractionContext context,
81 Extractor<?> extractor,
82 TripleHandler tripleHandler
83 ) {
84 this(context, extractor, tripleHandler, new ArrayList<Issue>());
85 }
86
87 private ExtractionResultImpl(
88 ExtractionContext context,
89 Extractor<?> extractor,
90 TripleHandler tripleHandler,
91 List<Issue> issues
92 ) {
93 if(context == null) {
94 throw new NullPointerException("context cannot be null.");
95 }
96 if(extractor == null) {
97 throw new NullPointerException("extractor cannot be null.");
98 }
99 if(tripleHandler == null) {
100 throw new NullPointerException("triple handler cannot be null.");
101 }
102
103 this.extractor = extractor;
104 this.tripleHandler = tripleHandler;
105 this.context = context;
106 this.issues = issues;
107
108 knownContextIDs.add( context.getUniqueID() );
109 }
110
111 public boolean hasIssues() {
112 return ! issues.isEmpty();
113 }
114
115 public int getIssuesCount() {
116 return issues.size();
117 }
118
119 public void printReport(PrintStream ps) {
120 ps.print(String.format("Context: %s [errors: %d] {\n", context, getIssuesCount()));
121 for (Issue issue : issues) {
122 ps.print(issue.toString());
123 ps.print("\n");
124 }
125
126 for (ExtractionResult er : subResults) {
127 er.printReport(ps);
128 }
129 ps.print("}\n");
130 }
131
132 public Collection<Issue> getIssues() {
133 return issues.isEmpty() ? Collections.<Issue>emptyList() : Collections.unmodifiableList(issues);
134 }
135
136 public ExtractionResult openSubResult(ExtractionContext context) {
137 final String contextID = context.getUniqueID();
138 if (knownContextIDs.contains(contextID)) {
139 throw new IllegalArgumentException("Duplicate contextID: " + contextID);
140 }
141 knownContextIDs.add(contextID);
142
143 checkOpen();
144 ExtractionResult result = new ExtractionResultImpl(context, extractor, tripleHandler, this.issues);
145 subResults.add(result);
146 return result;
147 }
148
149 public ExtractionContext getExtractionContext() {
150 return context;
151 }
152
153 public void writeTriple(Resource s, URI p, Value o, URI g) {
154 if (s == null || p == null || o == null) return;
155
156 if (s.stringValue() == null || p.stringValue() == null || o.stringValue() == null) {
157 throw new IllegalArgumentException("The statement arguments must be not null.");
158 }
159 checkOpen();
160 try {
161 tripleHandler.receiveTriple(s, p, o, g, context);
162 } catch (TripleHandlerException e) {
163 throw new RuntimeException(
164 String.format("Error while receiving triple %s %s %s", s, p, o ),
165 e
166 );
167 }
168 }
169
170 public void writeTriple(Resource s, URI p, Value o) {
171 writeTriple(s, p, o, null);
172 }
173
174 public void writeNamespace(String prefix, String uri) {
175 checkOpen();
176 try {
177 tripleHandler.receiveNamespace(prefix, uri, context);
178 } catch (TripleHandlerException e) {
179 throw new RuntimeException(
180 String.format("Error while writing namespace %s:%s", prefix, uri),
181 e
182 );
183 }
184 }
185
186 public void notifyIssue(IssueLevel level, String msg, int row, int col) {
187 issues.add(new Issue(level, msg, row, col));
188 }
189
190 public void close() {
191 if (isClosed) return;
192 isClosed = true;
193 for (ExtractionResult subResult : subResults) {
194 subResult.close();
195 }
196 if (isInitialized) {
197 try {
198 tripleHandler.closeContext(context);
199 } catch (TripleHandlerException e) {
200 throw new RuntimeException("Error while opening context", e);
201 }
202 }
203 }
204
205 private void checkOpen() {
206 if (!isInitialized) {
207 isInitialized = true;
208 try {
209 tripleHandler.openContext(context);
210 } catch (TripleHandlerException e) {
211 throw new RuntimeException("Error while opening context", e);
212 }
213 Prefixes prefixes = extractor.getDescription().getPrefixes();
214 for (String prefix : prefixes.allPrefixes()) {
215 try {
216 tripleHandler.receiveNamespace(prefix, prefixes.getNamespaceURIFor(prefix), context);
217 } catch (TripleHandlerException e) {
218 throw new RuntimeException(String.format("Error while writing namespace %s", prefix),
219 e
220 );
221 }
222 }
223 }
224 if (isClosed) {
225 throw new IllegalStateException("Not open: " + context);
226 }
227 }
228
229 public void addResourceRoot(String[] path, Resource root, Class<? extends MicroformatExtractor> extractor) {
230 if(resourceRoots == null) {
231 resourceRoots = new ArrayList<ResourceRoot>();
232 }
233 resourceRoots.add( new ResourceRoot(path, root, extractor) );
234 }
235
236 public List<ResourceRoot> getResourceRoots() {
237 List<ResourceRoot> allRoots = new ArrayList<ResourceRoot>();
238 if(resourceRoots != null) {
239 allRoots.addAll( resourceRoots );
240 }
241 for(ExtractionResult er : subResults) {
242 ExtractionResultImpl eri = (ExtractionResultImpl) er;
243 if( eri.resourceRoots != null ) {
244 allRoots.addAll( eri.resourceRoots );
245 }
246 }
247 return allRoots;
248 }
249
250 public void addPropertyPath(
251 Class<? extends MicroformatExtractor> extractor,
252 Resource propertySubject,
253 Resource property,
254 BNode object,
255 String[] path
256 ) {
257 if(propertyPaths == null) {
258 propertyPaths = new ArrayList<PropertyPath>();
259 }
260 propertyPaths.add( new PropertyPath(path, propertySubject, property, object, extractor) );
261 }
262
263 public List<PropertyPath> getPropertyPaths() {
264 List<PropertyPath> allPaths = new ArrayList<PropertyPath>();
265 if(propertyPaths != null) {
266 allPaths.addAll( propertyPaths );
267 }
268 for(ExtractionResult er : subResults) {
269 ExtractionResultImpl eri = (ExtractionResultImpl) er;
270 if( eri.propertyPaths != null ) {
271 allPaths.addAll( eri.propertyPaths );
272 }
273 }
274 return allPaths;
275 }
276
277 @Override
278 public String toString() {
279 final StringBuilder sb = new StringBuilder();
280 sb.append(context.toString());
281 sb.append('\n');
282 if (issues != null) {
283 sb.append("Errors {\n");
284 for (Issue issue : issues) {
285 sb.append('\t');
286 sb.append(issue.toString());
287 sb.append('\n');
288 }
289 }
290 sb.append("}\n");
291 return sb.toString();
292 }
293
294 }