1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import org.apache.any23.extractor.ExtractionException;
21 import org.apache.any23.extractor.ExtractionResult;
22 import org.apache.any23.extractor.ExtractorDescription;
23 import org.apache.any23.extractor.ExtractorFactory;
24 import org.apache.any23.extractor.SimpleExtractorFactory;
25 import org.apache.any23.rdf.PopularPrefixes;
26 import org.apache.any23.vocab.HRECIPE;
27 import org.openrdf.model.BNode;
28 import org.openrdf.model.URI;
29 import org.openrdf.model.vocabulary.RDF;
30 import org.w3c.dom.Node;
31
32 import java.util.Arrays;
33
34
35
36
37
38
39
40 public class HRecipeExtractor extends EntityBasedMicroformatExtractor {
41
42 private static final HRECIPE vHRECIPE = HRECIPE.getInstance();
43
44 public final static ExtractorFactory<HRecipeExtractor> factory =
45 SimpleExtractorFactory.create(
46 "html-mf-hrecipe",
47 PopularPrefixes.createSubset("rdf", "hrecipe"),
48 Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
49 "example-mf-hrecipe.html",
50 HRecipeExtractor.class
51 );
52
53
54 @Override
55 public ExtractorDescription getDescription() {
56 return factory;
57 }
58
59 @Override
60 protected String getBaseClassName() {
61 return "hrecipe";
62 }
63
64 @Override
65 protected void resetExtractor() {
66
67 }
68
69 @Override
70 protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
71 final BNode recipe = getBlankNodeFor(node);
72 conditionallyAddResourceProperty(recipe, RDF.TYPE, vHRECIPE.Recipe);
73 final HTMLDocument fragment = new HTMLDocument(node);
74 addFN(fragment, recipe);
75 addIngredients(fragment, recipe);
76 addYield(fragment, recipe);
77 addInstructions(fragment, recipe);
78 addDurations(fragment, recipe);
79 addPhoto(fragment, recipe);
80 addSummary(fragment, recipe);
81 addAuthors(fragment, recipe);
82 addPublished(fragment, recipe);
83 addNutritions(fragment, recipe);
84 addTags(fragment, recipe);
85 return true;
86 }
87
88
89
90
91
92
93
94
95
96 private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass, URI property) {
97 HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
98 conditionallyAddStringProperty(
99 title.source(), recipe, property, title.value()
100 );
101 }
102
103
104
105
106
107
108
109 private void addFN(HTMLDocument fragment, BNode recipe) {
110 mapFieldWithProperty(fragment, recipe, "fn", vHRECIPE.fn);
111 }
112
113
114
115
116
117
118
119
120 private BNode addIngredient(HTMLDocument fragment, HTMLDocument.TextField ingredient) {
121 final BNode ingredientBnode = getBlankNodeFor(ingredient.source());
122 addURIProperty(ingredientBnode, RDF.TYPE, vHRECIPE.Ingredient);
123 conditionallyAddStringProperty(
124 ingredient.source(),
125 ingredientBnode,
126 vHRECIPE.ingredientName,
127 HTMLDocument.readNodeContent(ingredient.source(), true)
128 );
129 mapFieldWithProperty(fragment, ingredientBnode, "value", vHRECIPE.ingredientQuantity);
130 mapFieldWithProperty(fragment, ingredientBnode, "type" , vHRECIPE.ingredientQuantityType);
131 return ingredientBnode;
132 }
133
134
135
136
137
138
139
140
141 private void addIngredients(HTMLDocument fragment, BNode recipe) {
142 final HTMLDocument.TextField[] ingredients = fragment.getPluralTextField("ingredient");
143 for(HTMLDocument.TextField ingredient : ingredients) {
144 addBNodeProperty(recipe, vHRECIPE.ingredient, addIngredient(fragment, ingredient));
145 }
146 }
147
148
149
150
151
152
153
154 private void addInstructions(HTMLDocument fragment, BNode recipe) {
155 mapFieldWithProperty(fragment, recipe, "instructions", vHRECIPE.instructions);
156
157 }
158
159
160
161
162
163
164
165 private void addYield(HTMLDocument fragment, BNode recipe) {
166 mapFieldWithProperty(fragment, recipe, "yield", vHRECIPE.yield);
167 }
168
169
170
171
172
173
174
175
176
177 private BNode addDuration(HTMLDocument fragment, HTMLDocument.TextField duration) {
178 final BNode durationBnode = getBlankNodeFor(duration.source());
179 addURIProperty(durationBnode, RDF.TYPE, vHRECIPE.Duration);
180 conditionallyAddStringProperty(
181 duration.source(),
182 durationBnode, vHRECIPE.durationTime, duration.value()
183 );
184 mapFieldWithProperty(fragment, durationBnode, "value-title", vHRECIPE.durationTitle);
185 return durationBnode;
186 }
187
188
189
190
191
192
193
194 private void addDurations(HTMLDocument fragment, BNode recipe) {
195 final HTMLDocument.TextField[] durations = fragment.getPluralTextField("duration");
196 for(HTMLDocument.TextField duration : durations) {
197 addBNodeProperty(recipe, vHRECIPE.duration, addDuration(fragment, duration));
198 }
199 }
200
201
202
203
204
205
206
207
208 private void addPhoto(HTMLDocument fragment, BNode recipe) throws ExtractionException {
209 final HTMLDocument.TextField[] photos = fragment.getPluralUrlField("photo");
210 for(HTMLDocument.TextField photo : photos) {
211 addURIProperty(recipe, vHRECIPE.photo, fragment.resolveURI(photo.value()));
212 }
213 }
214
215
216
217
218
219
220
221 private void addSummary(HTMLDocument fragment, BNode recipe) {
222 mapFieldWithProperty(fragment, recipe, "summary", vHRECIPE.summary);
223 }
224
225
226
227
228
229
230
231 private void addAuthors(HTMLDocument fragment, BNode recipe) {
232 final HTMLDocument.TextField[] authors = fragment.getPluralTextField("author");
233 for(HTMLDocument.TextField author : authors) {
234 conditionallyAddStringProperty(
235 author.source(),
236 recipe, vHRECIPE.author, author.value()
237 );
238 }
239 }
240
241
242
243
244
245
246
247
248 private void addPublished(HTMLDocument fragment, BNode recipe) {
249 mapFieldWithProperty(fragment, recipe, "published", vHRECIPE.published);
250 }
251
252
253
254
255
256
257
258
259 private BNode addNutrition(HTMLDocument fragment, HTMLDocument.TextField nutrition) {
260 final BNode nutritionBnode = getBlankNodeFor(nutrition.source());
261 addURIProperty(nutritionBnode, RDF.TYPE, vHRECIPE.Nutrition);
262 conditionallyAddStringProperty(
263 nutrition.source(),
264 nutritionBnode, vHRECIPE.nutritionValue, nutrition.value()
265 );
266 mapFieldWithProperty(fragment, nutritionBnode, "value", vHRECIPE.nutritionValue);
267 mapFieldWithProperty(fragment, nutritionBnode, "type" , vHRECIPE.nutritionValueType);
268 return nutritionBnode;
269 }
270
271
272
273
274
275
276
277 private void addNutritions(HTMLDocument fragment, BNode recipe) {
278 HTMLDocument.TextField[] nutritions = fragment.getPluralTextField("nutrition");
279 for (HTMLDocument.TextField nutrition : nutritions) {
280 addBNodeProperty(recipe, vHRECIPE.nutrition, addNutrition(fragment, nutrition));
281 }
282 }
283
284
285
286
287
288
289
290 private void addTags(HTMLDocument fragment, BNode recipe) {
291 HTMLDocument.TextField[] tags = fragment.extractRelTagNodes();
292 for(HTMLDocument.TextField tag : tags) {
293 conditionallyAddStringProperty(
294 tag.source(),
295 recipe, vHRECIPE.tag, tag.value()
296 );
297 }
298 }
299
300 }