This project has retired. For details please refer to its Attic page.
HCalendarExtractorTest xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html;
19  
20  import org.apache.any23.extractor.ExtractionException;
21  import org.apache.any23.extractor.ExtractorFactory;
22  import org.apache.any23.rdf.RDFUtils;
23  import org.apache.any23.vocab.ICAL;
24  import org.apache.any23.vocab.SINDICE;
25  import org.junit.Assert;
26  import org.junit.Test;
27  import org.eclipse.rdf4j.model.Resource;
28  import org.eclipse.rdf4j.model.Statement;
29  import org.eclipse.rdf4j.model.IRI;
30  import org.eclipse.rdf4j.model.Value;
31  import org.eclipse.rdf4j.model.vocabulary.RDF;
32  import org.eclipse.rdf4j.repository.RepositoryResult;
33  
34  import java.io.IOException;
35  
36  /**
37   * Test case for {@link HCalendarExtractor}class.
38   *
39   * @author Davide Palmisano ( dpalmisano@gmail.com )
40   * @author Michele Mostarda ( michele.mostarda@gmail.com )
41   * 
42   * @version $Id$
43   */
44  public class HCalendarExtractorTest extends AbstractExtractorTestCase {
45  
46      private static final ICAL vICAL = ICAL.getInstance();
47      private static final SINDICE vSINDICE = SINDICE.getInstance();
48  
49      private final static IRI vcal = vICAL.Vcalendar;
50      private final static IRI vevent = vICAL.Vevent;
51      private final static IRI vjournal = vICAL.Vjournal;
52      private final static IRI vtodo = vICAL.Vtodo;
53  
54      protected ExtractorFactory<?> getExtractorFactory() {
55          return new HCalendarExtractorFactory();
56      }
57  
58      @Test
59      public void testOneVEvent() throws Exception {
60          assertExtract("/microformats/hcalendar/example1.html");
61          assertModelNotEmpty();
62          assertContains(baseIRI, RDF.TYPE, vcal);
63          assertContains(null, RDF.TYPE, vevent);
64          RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, vevent);
65          try {
66              while (result.hasNext()) {
67                  Statement statement = result.next();
68                  final Resource subject = statement.getSubject();
69                  assertContains(null, vICAL.component, subject);
70                  assertContains(subject, RDF.TYPE, vevent);
71                  assertContains(subject, vICAL.dtstart, "1997-09-05T18:00:00.000Z");
72                  assertContains(subject, vICAL.dtstamp, "1997-09-01T13:00:00.000Z");
73                  assertContains(subject, vICAL.dtend, "1997-09-03T19:00:00.000Z");
74                  assertContains(subject, vICAL.uid, "19970901T130000Z-123401@host.com");
75                  assertContains(subject, vICAL.summary, "Annual Employee Review");
76                  assertContains(subject, vICAL.class_, "private");
77                  assertContains(subject, vICAL.categories, "Business");
78                  assertContains(subject, vICAL.categories, "Human Resources");
79              }
80          } finally {
81              result.close();
82          }
83      }
84  
85      @Test
86      public void testTransparentEvent() throws Exception {
87          assertExtract("/microformats/hcalendar/example2.html");
88          assertModelNotEmpty();
89          assertContains(baseIRI, RDF.TYPE, vcal);
90          assertContains(null, RDF.TYPE, vevent);
91          RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, vevent);
92          try {
93              while (result.hasNext()) {
94                  Statement statement = result.next();
95                  final Resource subject = statement.getSubject();
96                  assertContains(null, vICAL.component, subject);
97                  assertContains(subject, RDF.TYPE, vevent);
98                  assertContains(subject, vICAL.dtstart, "1997-04-03T18:00:00.000Z");
99                  assertContains(subject, vICAL.dtstamp, "1997-09-01T13:00:00.000Z");
100                 assertContains(subject, vICAL.dtend, "1997-04-02T01:00:00.000Z");
101                 assertContains(subject, vICAL.uid, "19970901T130000Z-123402@host.com");
102                 assertContains(subject, vICAL.summary, "Laurel is in sensitivity awareness class.");
103                 assertContains(subject, vICAL.class_, "public");
104                 assertContains(subject, vICAL.transp, "transparent");
105                 assertContains(subject, vICAL.categories, "Business");
106                 assertContains(subject, vICAL.categories, "Human Resources");
107             }
108         } finally {
109             result.close();
110         }
111     }
112 
113     @Test
114     public void testRepetitiveEvent() throws Exception {
115         assertExtract("/microformats/hcalendar/example3.html");
116         assertModelNotEmpty();
117         assertContains(baseIRI, RDF.TYPE, vcal);
118         assertContains(null, RDF.TYPE, vevent);
119         RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, vevent);
120         try {
121             while (result.hasNext()) {
122                 Statement statement = result.next();
123                 final Resource subject = statement.getSubject();
124                 assertContains(null, vICAL.component, subject);
125                 assertContains(subject, RDF.TYPE, vevent);
126                 assertContains(subject, vICAL.dtstart, "19971102");
127                 assertContains(subject, vICAL.dtstamp, "1997-09-01T13:00:00.000Z");
128                 assertContains(subject, vICAL.uid, "19970901T130000Z-123403@host.com");
129                 assertContains(subject, vICAL.summary, "Our Blissful Anniversary");
130                 assertContains(subject, vICAL.class_, "confidential");
131                 assertContains(subject, vICAL.categories, "Anniversary");
132                 assertContains(subject, vICAL.categories, "Personal");
133                 assertContains(subject, vICAL.categories, "Special Occassion");
134                 assertContains(subject, vICAL.rrule, (Value) null);
135             }
136         } finally {
137             result.close();
138         }
139     }
140 
141     @Test
142     public void testThreeDayEvent() throws Exception {
143         assertExtract("/microformats/hcalendar/example5.html");
144         assertModelNotEmpty();
145         assertContains(baseIRI, RDF.TYPE, vcal);
146         assertContains(null, RDF.TYPE, vevent);
147         RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, vevent);
148         try {
149             while (result.hasNext()) {
150                 Statement statement = result.next();
151                 final Resource subject = statement.getSubject();
152                 assertContains(null, vICAL.component, subject);
153                 assertContains(subject, RDF.TYPE, vevent);
154                 assertContains(subject, vICAL.dtstart, "1996-09-20T16:00:00.000Z");
155                 assertContains(subject, vICAL.dtstamp, "1996-07-04T12:00:00.000Z");
156                 assertContains(subject, vICAL.dtend, "1996-09-20T22:00:00.000Z");
157                 assertContains(subject, vICAL.uid, "uid1@host.com");
158                 assertContains(subject, vICAL.summary, "Networld+Interop Conference");
159                 assertContains(subject, vICAL.description,
160                         "Networld+Interop Conference and Exhibit Atlanta World Congress\n"
161                                 + "  Center Atlanta, Georgia");
162                 assertContains(subject, vICAL.categories, "Conference");
163                 assertContains(subject, vICAL.status, "CONFIRMED");
164                 assertContains(subject, vICAL.organizer, (Value) null);
165             }
166         } finally {
167             result.close();
168         }
169     }
170 
171     @Test
172     public void testHCalendarWithBudyInfo() throws Exception {
173         assertExtract("/microformats/hcalendar/example5.5.html");
174         assertModelNotEmpty();
175         assertContains(baseIRI, RDF.TYPE, vcal);
176         assertContains(null, RDF.TYPE, vjournal);
177     }
178 
179     @Test
180     public void test01() throws Exception {
181         assertDefault("/microformats/hcalendar/01-component-vevent-dtstart-date.html");
182         Resource event = getExactlyOneComponent(vevent);
183         assertContains(event, vICAL.dtstart, "19970903");
184     }
185 
186     @Test
187     public void test02() throws Exception {
188         assertDefault("/microformats/hcalendar/02-component-vevent-dtstart-datetime.html");
189         Resource event = getExactlyOneComponent(vevent);
190         assertContains(event, vICAL.dtstart, "1997-09-05T18:00:00.000Z");
191     }
192 
193     @Test
194     public void test03() throws Exception {
195         assertDefault("/microformats/hcalendar/03-component-vevent-dtend-date.html");
196         Resource event = getExactlyOneComponent(vevent);
197         assertContains(event, vICAL.dtstart, "19970903");
198         assertContains(event, vICAL.dtend, "19970904");
199     }
200 
201     @Test
202     public void test04() throws Exception {
203         assertDefault("/microformats/hcalendar/04-component-vevent-dtend-datetime.html");
204         Resource event = getExactlyOneComponent(vevent);
205         assertContains(event, vICAL.dtstart, "1997-09-03T16:00:00.000Z");
206         assertContains(event, vICAL.dtend, "1997-09-03T18:00:00.000Z");
207     }
208 
209     @Test
210     public void test05() throws Exception {
211         assertDefault("/microformats/hcalendar/05-calendar-simple.html");
212         Resource event = getExactlyOneComponent(vevent);
213         assertContains(event, vICAL.dtstart, "2005-10-05");
214         assertContains(event, vICAL.dtend, "2005-10-08");
215         assertContains(event, vICAL.summary, "Web 2.0 Conference");
216         assertContains(event, vICAL.url, RDFUtils.iri("http://www.web2con.com/"));
217         assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA");
218     }
219 
220     @Test
221     public void test06() throws Exception {
222         assertDefault("/microformats/hcalendar/06-component-vevent-uri-relative.html");
223         Resource event = getExactlyOneComponent(vevent);
224         assertContains(event, vICAL.dtstart, "20060115T000000");
225         assertContains(event, vICAL.summary, "Bad Movie Night - Gigli (blame mike spiegelman)");
226         assertContains(event, vICAL.url, RDFUtils.iri(baseIRI + "squidlist/calendar/12279/2006/1/15"));
227     }
228 
229     @Test
230     public void test07() throws Exception {
231         assertDefault("/microformats/hcalendar/07-component-vevent-description-simple.html");
232         Resource event = getExactlyOneComponent(vevent);
233         assertContains(event, vICAL.description, "Project xyz Review Meeting Minutes");
234         assertNotContains(event, vICAL.url, (Resource) null);
235     }
236 
237     @Test
238     public void test08() throws Exception {
239         assertDefault("/microformats/hcalendar/08-component-vevent-multiple-classes.html");
240         Resource event = getExactlyOneComponent(vevent);
241         assertContains(event, vICAL.dtstart, "2005-10-05");
242         assertContains(event, vICAL.dtend, "2005-10-08");
243         assertContains(event, vICAL.summary, "Web 2.0 Conference");
244         assertContains(event, vICAL.url, RDFUtils.iri("http://www.web2con.com/"));
245         assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA");
246     }
247 
248     @Test
249     public void test09() throws Exception {
250         assertDefault("/microformats/hcalendar/09-component-vevent-summary-in-img-alt.html");
251         Resource event = getExactlyOneComponent(vevent);
252         assertContains(event, vICAL.dtend, "20060310");
253         assertContains(event, vICAL.dtstart, "20060306");
254         assertContains(event, vICAL.summary, "O'Reilly Emerging Technology Conference");
255         assertContains(event, vICAL.url, RDFUtils.iri("http://conferences.oreillynet.com/et2006/"));
256         assertContains(event, vICAL.location, "Manchester Grand Hyatt in San Diego, CA");
257     }
258 
259     @Test
260     public void test10() throws Exception {
261         assertDefault("/microformats/hcalendar/10-component-vevent-entity.html");
262         Resource event = getExactlyOneComponent(vevent);
263         assertContains(event, vICAL.summary, "Cricket & Tennis Centre");
264         assertContains(event, vICAL.description, "Melbourne's Cricket & Tennis Centres are in the heart of the city");
265     }
266 
267     @Test
268     public void test11() throws Exception {
269         assertDefault("/microformats/hcalendar/11-component-vevent-summary-in-subelements.html");
270         Resource event = getExactlyOneComponent(vevent);
271 
272         assertContains(event, vICAL.dtstart, "20051005T1630-0700");
273         assertContains(event, vICAL.dtend, "20051005T1645-0700");
274         assertContains(event, vICAL.summary, "Welcome!\n      John Battelle,\n      Tim O'Reilly");
275     }
276 
277     @Test
278     public void test12() throws Exception {
279         assertDefault("/microformats/hcalendar/12-component-vevent-summary-url-in-same-class.html");
280         Resource event = getExactlyOneComponent(vevent);
281         assertContains(event, vICAL.dtstart, "20060125T000000");
282         assertContains(event, vICAL.url,
283                 RDFUtils.iri("http://www.laughingsquid.com/squidlist/calendar/12377/2006/1/25"));
284         assertContains(event, vICAL.summary, "Art Reception for Tom Schultz and Felix Macnee");
285     }
286 
287     @Test
288     public void test13() throws Exception {
289         assertDefault("/microformats/hcalendar/13-component-vevent-summary-url-property.html");
290         Resource event = getExactlyOneComponent(vevent);
291         assertContains(event, vICAL.url, RDFUtils
292                 .iri("http://dps1.travelocity.com/dparcobrand.ctl?smls=Y&Service=YHOE&.intl=us&aln_name=AA&flt_num="
293                         + "1655&dep_arp_name=&arr_arp_name=&dep_dt_dy_1=23&dep_dt_mn_1=Jan&dep_dt_yr_1=2006&dep_tm_1=9:00am"));
294         assertContains(event, vICAL.summary, "ORD-SFO/AA 1655");
295     }
296 
297     @Test
298     public void test15() throws Exception {
299         assertDefault("/microformats/hcalendar/15-calendar-xml-lang.html");
300         Resource event = getExactlyOneComponent(vevent);
301         assertContains(event, vICAL.dtstart, "2005-10-05");
302         assertContains(event, vICAL.dtend, "2005-10-08");
303         assertContains(event, vICAL.summary, "Web 2.0 Conference");
304         assertContains(event, vICAL.url, RDFUtils.iri("http://www.web2con.com/"));
305         assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA");
306     }
307 
308     @Test
309     public void test16() throws Exception {
310         assertDefault("/microformats/hcalendar/16-calendar-force-outlook.html");
311         Resource event = getExactlyOneComponent(vevent);
312         assertContains(event, vICAL.dtstart, "2005-10-05");
313         assertContains(event, vICAL.dtend, "2005-10-08");
314         assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA");
315     }
316 
317     @Test
318     public void test17() throws Exception {
319         assertDefault("/microformats/hcalendar/17-component-vevent-description-value-in-subelements.html");
320         Resource event = getExactlyOneComponent(vevent);
321         assertContains(event, vICAL.dtstart, "2006-01-18");
322         assertContains(event, vICAL.dtend, "2006-01-20");
323         assertContains(event, vICAL.location, "Maryland");
324         assertContains(event, vICAL.summary, "3rd PAW ftf meeting");
325         assertContains(event, vICAL.description, "RESOLUTION: to have a\n      3rd PAW ftf meeting \n"
326                 + "      18-19 Jan in \n      Maryland; location contingent" + " on confirmation from timbl");
327     }
328 
329     @Test
330     public void test18() throws Exception {
331         assertDefault("/microformats/hcalendar/18-component-vevent-uid.html");
332         assertStatementsSize(RDF.TYPE, vevent, 5);
333         assertStatementsSize(vICAL.uid, RDFUtils.literal("http://example.com/foo.html"), 5);
334     }
335 
336     @Test
337     public void testNoMicroformats() throws Exception, IOException, ExtractionException {
338         extract("/html/html-without-uf.html");
339         assertModelEmpty();
340     }
341 
342     @Test
343     public void testNoMicroformatsInStatCvsPage() throws Exception, IOException, ExtractionException {
344         extract("/microformats/hcalendar/empty-statcvs.html");
345         assertModelEmpty();
346     }
347 
348     @Test
349     public void testFullHCalendarClass() throws Exception {
350         assertExtract("/microformats/hcalendar/example5.3.html");
351         assertModelNotEmpty();
352         assertContains(baseIRI, RDF.TYPE, vcal);
353         assertContains(null, RDF.TYPE, vevent);
354     }
355 
356     @Test
357     public void testHCalendarClassWithTodo() throws Exception {
358         assertExtract("/microformats/hcalendar/example5.4.html");
359         assertModelNotEmpty();
360         assertContains(baseIRI, RDF.TYPE, vcal);
361         assertContains(null, RDF.TYPE, vtodo);
362     }
363 
364     @Test
365     public void testHCalendarClassWithJournal() throws Exception {
366         assertExtract("/microformats/hcalendar/example5.5.html");
367         assertModelNotEmpty();
368         assertContains(baseIRI, RDF.TYPE, vcal);
369         assertContains(null, RDF.TYPE, vjournal);
370     }
371 
372     private Resource getExactlyOneComponent(Resource r) throws Exception {
373         RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, r);
374         try {
375             Assert.assertTrue(result.hasNext());
376             Resource sub = result.next().getSubject();
377             Assert.assertFalse(result.hasNext());
378             return sub;
379         } finally {
380             result.close();
381         }
382     }
383 
384     private void assertDefault(String name) throws Exception {
385         assertExtract(name);
386         assertModelNotEmpty();
387         assertContains(baseIRI, RDF.TYPE, vcal);
388         assertStatementsSize(RDF.TYPE, vcal, 1);
389     }
390 
391 }