View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html.microformats2;
19  
20  import org.apache.any23.extractor.ExtractionException;
21  import org.apache.any23.extractor.ExtractionResult;
22  import org.apache.any23.extractor.ExtractorDescription;
23  import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
24  import org.apache.any23.vocab.HEvent;
25  import org.apache.any23.vocab.VCard;
26  import org.eclipse.rdf4j.model.BNode;
27  import org.eclipse.rdf4j.model.Resource;
28  import org.eclipse.rdf4j.model.IRI;
29  import org.eclipse.rdf4j.model.vocabulary.RDF;
30  import org.w3c.dom.Node;
31  import org.apache.any23.extractor.html.HTMLDocument;
32  
33  import java.util.List;
34  
35  import static org.apache.any23.extractor.html.HTMLDocument.TextField;
36  
37  
38  /**
39   * Extractor for the <a href="http://microformats.org/wiki/h-event">h-event</a>
40   * microformat.
41   *
42   * @author Nisala Nirmana
43   */
44  public class HEventExtractor extends EntityBasedMicroformatExtractor {
45  
46      private static final HEvent vEvent = HEvent.getInstance();
47      private static final VCard vVCARD = VCard.getInstance();
48  
49      private String[] eventFields = {
50              "name",
51              "summary",
52              "start",
53              "end",
54              "duration",
55              "description",
56              "url",
57              "category",
58              "location",
59              "attendee"
60      };
61  
62      private static final String[] geoFields = {
63              "latitude",
64              "longitude",
65              "altitude"
66      };
67  
68  
69      @Override
70      public ExtractorDescription getDescription() {
71          return HEventExtractorFactory.getDescriptionInstance();
72      }
73  
74      @Override
75      protected String getBaseClassName() {
76          return Microformats2Prefixes.CLASS_PREFIX+"event";
77      }
78  
79      @Override
80      protected void resetExtractor() {
81          // Empty.
82      }
83  
84      @Override
85      protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
86          final BNode event = getBlankNodeFor(node);
87          conditionallyAddResourceProperty(event, RDF.TYPE, vEvent.event);
88          final HTMLDocumentl/HTMLDocument.html#HTMLDocument">HTMLDocument fragment = new HTMLDocument(node);
89          addName(fragment, event);
90          addSummary(fragment, event);
91          addStart(fragment, event);
92          addEnd(fragment, event);
93          addDuration(fragment, event);
94          addDescription(fragment, event);
95          addURLs(fragment, event);
96          addCategories(fragment, event);
97          addLocations(fragment, event);
98          addAttendees(fragment,event);
99          return true;
100     }
101 
102     public Resource extractEntityAsEmbeddedProperty(HTMLDocument fragment, BNode event,
103                                                     ExtractionResult out)
104             throws ExtractionException {
105         this.setCurrentExtractionResult(out);
106         addName(fragment, event);
107         addSummary(fragment, event);
108         addStart(fragment, event);
109         addEnd(fragment, event);
110         addDuration(fragment, event);
111         addDescription(fragment, event);
112         addURLs(fragment, event);
113         addCategories(fragment, event);
114         addLocations(fragment, event);
115         addAttendees(fragment,event);
116         return event;
117     }
118 
119     private void addAttendees(HTMLDocument doc, Resource entry) throws ExtractionException {
120         List<Node> nodes = doc.findAllByClassName(Microformats2Prefixes.PROPERTY_PREFIX + eventFields[9] +
121                 Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + "card");
122         if (nodes.isEmpty())
123             return;
124         HCardExtractorFactoryormats2/HCardExtractorFactory.html#HCardExtractorFactory">HCardExtractorFactory factory = new HCardExtractorFactory();
125         HCardExtractor extractor = factory.createExtractor();
126         for (Node node : nodes) {
127             BNode attendee = valueFactory.createBNode();
128             addIRIProperty(attendee, RDF.TYPE, vEvent.attendee);
129             extractor.extractEntityAsEmbeddedProperty(new HTMLDocument(node), attendee,
130                     getCurrentExtractionResult());
131         }
132     }
133 
134     private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass,
135                                       IRI property) {
136         HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
137         conditionallyAddStringProperty(
138                 title.source(), recipe, property, title.value()
139         );
140     }
141 
142     private void addName(HTMLDocument fragment, BNode event) {
143         mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
144                 eventFields[0], vEvent.name);
145     }
146 
147     private void addSummary(HTMLDocument fragment, BNode event) {
148         mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
149                 eventFields[1], vEvent.summary);
150     }
151 
152     private void addStart(HTMLDocument fragment, BNode event) {
153         final TextField start = fragment.getSingularTextField(
154                 Microformats2Prefixes.TIME_PROPERTY_PREFIX + eventFields[2]);
155         if(start.source()==null)
156             return;
157         Node attribute = start.source().getAttributes().getNamedItem("datetime");
158         if (attribute == null) {
159             conditionallyAddStringProperty(
160                     start.source(),
161                     event, vEvent.start, start.value()
162             );
163         } else {
164             conditionallyAddStringProperty(
165                     start.source(),
166                     event, vEvent.start, attribute.getNodeValue()
167             );
168         }
169     }
170 
171     private void addEnd(HTMLDocument fragment, BNode event) {
172         final TextField end = fragment.getSingularTextField(
173                 Microformats2Prefixes.TIME_PROPERTY_PREFIX + eventFields[3]);
174         if(end.source()==null)
175             return;
176         Node attribute = end.source().getAttributes().getNamedItem("datetime");
177         if (attribute == null) {
178             conditionallyAddStringProperty(
179                     end.source(),
180                     event, vEvent.end, end.value()
181             );
182         } else {
183             conditionallyAddStringProperty(
184                     end.source(),
185                     event, vEvent.end, attribute.getNodeValue()
186             );
187         }
188     }
189 
190     private void addDuration(HTMLDocument fragment, BNode event) {
191         final TextField duration = fragment.getSingularTextField(
192                 Microformats2Prefixes.TIME_PROPERTY_PREFIX + eventFields[4]);
193         if(duration.source()==null)
194             return;
195         Node attribute = duration.source().getAttributes().getNamedItem("datetime");
196         if (attribute == null) {
197             conditionallyAddStringProperty(
198                     duration.source(),
199                     event, vEvent.duration, duration.value()
200             );
201         } else {
202             conditionallyAddStringProperty(
203                     duration.source(),
204                     event, vEvent.duration, attribute.getNodeValue()
205             );
206         }
207     }
208 
209     private void addDescription(HTMLDocument fragment, BNode event) {
210         mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
211                 eventFields[5], vEvent.description);
212     }
213 
214     private void addURLs(HTMLDocument fragment, BNode event) throws ExtractionException {
215         final HTMLDocument.TextField[] urls = fragment.getPluralUrlField
216                 (Microformats2Prefixes.URL_PROPERTY_PREFIX + eventFields[6]);
217         for(HTMLDocument.TextField url : urls) {
218             addIRIProperty(event, vEvent.url, fragment.resolveIRI(url.value()));
219         }
220     }
221 
222     private void addCategories(HTMLDocument fragment, BNode event) {
223         final HTMLDocument.TextField[] categories = fragment.getPluralTextField
224                 (Microformats2Prefixes.PROPERTY_PREFIX + eventFields[7]);
225         for(HTMLDocument.TextField category : categories) {
226             conditionallyAddStringProperty(
227                     category.source(), event, vEvent.category, category.value()
228             );
229         }
230     }
231 
232     private void addLocations(HTMLDocument doc, Resource entry) throws ExtractionException {
233         List<Node> nodes = doc.findAllByClassName(Microformats2Prefixes.PROPERTY_PREFIX + eventFields[8] +
234                 Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + "geo");
235         if (nodes.isEmpty())
236             return;
237         for (Node node : nodes) {
238             BNode location = valueFactory.createBNode();
239             addIRIProperty(location, RDF.TYPE, vEvent.location);
240             HTMLDocumentl/HTMLDocument.html#HTMLDocument">HTMLDocument fragment = new HTMLDocument(node);
241             for (String field : geoFields) {
242                 HTMLDocument.TextField[] values = fragment.getPluralTextField(Microformats2Prefixes.PROPERTY_PREFIX+field);
243                 for (HTMLDocument.TextField val : values) {
244                     Node attribute=val.source().getAttributes().getNamedItem("title");
245                     if (attribute==null){
246                         conditionallyAddStringProperty(
247                                 val.source(),
248                                 location, vVCARD.getProperty(field), val.value()
249                         );
250                     }else{
251                         conditionallyAddStringProperty(
252                                 val.source(),
253                                 location, vVCARD.getProperty(field), attribute.getNodeValue()
254                         );
255                     }
256                 }
257             }
258         }
259     }
260 
261 }