View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html.microformats2;
19  
20  import org.apache.any23.extractor.ExtractionException;
21  import org.apache.any23.extractor.ExtractionResult;
22  import org.apache.any23.extractor.ExtractorDescription;
23  import org.apache.any23.vocab.HRecipe;
24  import org.eclipse.rdf4j.model.BNode;
25  import org.eclipse.rdf4j.model.IRI;
26  import org.eclipse.rdf4j.model.vocabulary.RDF;
27  import org.w3c.dom.Node;
28  import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
29  import org.apache.any23.extractor.html.HTMLDocument;
30  
31  /**
32   * Extractor for the <a href="http://microformats.org/wiki/hrecipe">hRecipe</a>
33   * microformat.
34   *
35   * @author Nisala Nirmana
36   */
37  public class HRecipeExtractor extends EntityBasedMicroformatExtractor {
38  
39      private static final HRecipe vHRECIPE = HRecipe.getInstance();
40  
41      private static final String[] recipeFields = {
42              "name",
43              "ingredient",
44              "yield",
45              "instructions",
46              "duration",
47              "photo",
48              "summary",
49              "author",
50              "published",
51              "nutrition"
52      };
53  
54      @Override
55      public ExtractorDescription getDescription() {
56          return HRecipeExtractorFactory.getDescriptionInstance();
57      }
58  
59      @Override
60      protected String getBaseClassName() {
61          return Microformats2Prefixes.CLASS_PREFIX+"recipe";
62      }
63  
64      @Override
65      protected void resetExtractor() {
66          // Empty.
67      }
68  
69      @Override
70      protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
71          final BNode recipe = getBlankNodeFor(node);
72          conditionallyAddResourceProperty(recipe, RDF.TYPE, vHRECIPE.Recipe);
73          final HTMLDocumentl/HTMLDocument.html#HTMLDocument">HTMLDocument fragment = new HTMLDocument(node);
74          addName(fragment, recipe);
75          addIngredients(fragment, recipe);
76          addYield(fragment, recipe);
77          addInstructions(fragment, recipe);
78          addDurations(fragment, recipe);
79          addPhoto(fragment, recipe);
80          addSummary(fragment, recipe);
81          addAuthors(fragment, recipe);
82          addPublished(fragment, recipe);
83          addNutritions(fragment, recipe);
84          return true;
85      }
86  
87      private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass, IRI property) {
88          HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
89          conditionallyAddStringProperty(
90                  title.source(), recipe, property, title.value()
91          );
92      }
93  
94      private void addName(HTMLDocument fragment, BNode recipe) {
95          mapFieldWithProperty(fragment, recipe, Microformats2Prefixes.PROPERTY_PREFIX + recipeFields[0], vHRECIPE.fn);
96      }
97  
98      private void addIngredients(HTMLDocument fragment, BNode recipe) {
99          final HTMLDocument.TextField[] ingredients = fragment.getPluralTextField
100                 (Microformats2Prefixes.PROPERTY_PREFIX+recipeFields[1]);
101         for(HTMLDocument.TextField ingredient : ingredients) {
102             conditionallyAddStringProperty(
103                     ingredient.source(), recipe, vHRECIPE.ingredient, ingredient.value()
104             );
105         }
106     }
107 
108     private void addInstructions(HTMLDocument fragment, BNode recipe) {
109         mapFieldWithProperty(fragment, recipe, Microformats2Prefixes.EMBEDDED_PROPERTY_PREFIX+recipeFields[2],
110                                                                                                vHRECIPE.instructions);
111     }
112 
113     private void addYield(HTMLDocument fragment, BNode recipe) {
114         mapFieldWithProperty(fragment, recipe, Microformats2Prefixes.PROPERTY_PREFIX+recipeFields[3], vHRECIPE.yield);
115     }
116 
117     private void addDurations(HTMLDocument fragment, BNode recipe) {
118         final HTMLDocument.TextField[] durations = fragment.getPluralTextField(
119                 Microformats2Prefixes.TIME_PROPERTY_PREFIX + recipeFields[4]);
120         for(HTMLDocument.TextField duration : durations) {
121             Node attribute=duration.source().getAttributes().getNamedItem("datetime");
122             if (attribute==null){
123                 conditionallyAddStringProperty(
124                     duration.source(),
125                     recipe, vHRECIPE.duration, duration.value()
126                 );
127             }else{
128                 conditionallyAddStringProperty(
129                         duration.source(),
130                         recipe, vHRECIPE.duration, attribute.getNodeValue()
131                 );
132 
133             }
134 
135         }
136     }
137 
138     private void addPhoto(HTMLDocument fragment, BNode recipe) throws ExtractionException {
139         final HTMLDocument.TextField[] photos = fragment.getPluralUrlField
140                 (Microformats2Prefixes.URL_PROPERTY_PREFIX+recipeFields[5]);
141         for(HTMLDocument.TextField photo : photos) {
142             addIRIProperty(recipe, vHRECIPE.photo, fragment.resolveIRI(photo.value()));
143         }
144     }
145 
146     private void addSummary(HTMLDocument fragment, BNode recipe) {
147         mapFieldWithProperty(fragment, recipe, Microformats2Prefixes.PROPERTY_PREFIX+recipeFields[6], vHRECIPE.summary);
148     }
149 
150     private void addAuthors(HTMLDocument fragment, BNode recipe) {
151         final HTMLDocument.TextField[] authors = fragment.
152                 getPluralTextField(Microformats2Prefixes.PROPERTY_PREFIX + recipeFields[7]);
153          for(HTMLDocument.TextField author : authors) {
154              conditionallyAddStringProperty(
155                     author.source(),
156                     recipe, vHRECIPE.author, author.value()
157               );
158         }
159     }
160 
161     private void addPublished(HTMLDocument fragment, BNode recipe) {
162         final HTMLDocument.TextField[] durations = fragment.getPluralTextField(
163                 Microformats2Prefixes.TIME_PROPERTY_PREFIX + recipeFields[8]);
164         for(HTMLDocument.TextField duration : durations) {
165             Node attribute=duration.source().getAttributes().getNamedItem("datetime");
166             if (attribute==null){
167                 conditionallyAddStringProperty(
168                         duration.source(),
169                         recipe, vHRECIPE.published, duration.value()
170                 );
171             }else{
172                 conditionallyAddStringProperty(
173                         duration.source(),
174                         recipe, vHRECIPE.published, attribute.getNodeValue()
175                 );
176             }
177         }
178     }
179 
180     private void addNutritions(HTMLDocument fragment, BNode recipe) {
181         final HTMLDocument.TextField[] nutritions = fragment.getPluralTextField
182                 (Microformats2Prefixes.PROPERTY_PREFIX+recipeFields[9]);
183         for(HTMLDocument.TextField nutrition : nutritions) {
184             conditionallyAddStringProperty(
185                     nutrition.source(), recipe, vHRECIPE.nutrition, nutrition.value()
186             );
187         }
188     }
189 }