This project has retired. For details please refer to its
Attic page.
HCalendarExtractorTest xref
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import org.apache.any23.extractor.ExtractionException;
21 import org.apache.any23.extractor.ExtractorFactory;
22 import org.apache.any23.rdf.RDFUtils;
23 import org.apache.any23.vocab.ICAL;
24 import org.apache.any23.vocab.SINDICE;
25 import org.junit.Assert;
26 import org.junit.Test;
27 import org.eclipse.rdf4j.model.Resource;
28 import org.eclipse.rdf4j.model.Statement;
29 import org.eclipse.rdf4j.model.IRI;
30 import org.eclipse.rdf4j.model.Value;
31 import org.eclipse.rdf4j.model.vocabulary.RDF;
32 import org.eclipse.rdf4j.repository.RepositoryResult;
33
34 import java.io.IOException;
35
36
37
38
39
40
41
42
43
44 public class HCalendarExtractorTest extends AbstractExtractorTestCase {
45
46 private static final ICAL vICAL = ICAL.getInstance();
47 private static final SINDICE vSINDICE = SINDICE.getInstance();
48
49 private final static IRI vcal = vICAL.Vcalendar;
50 private final static IRI vevent = vICAL.Vevent;
51 private final static IRI vjournal = vICAL.Vjournal;
52 private final static IRI vtodo = vICAL.Vtodo;
53
54 protected ExtractorFactory<?> getExtractorFactory() {
55 return new HCalendarExtractorFactory();
56 }
57
58 @Test
59 public void testOneVEvent() throws Exception {
60 assertExtract("/microformats/hcalendar/example1.html");
61 assertModelNotEmpty();
62 assertContains(baseIRI, RDF.TYPE, vcal);
63 assertContains(null, RDF.TYPE, vevent);
64 RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, vevent);
65 try {
66 while (result.hasNext()) {
67 Statement statement = result.next();
68 final Resource subject = statement.getSubject();
69 assertContains(null, vICAL.component, subject);
70 assertContains(subject, RDF.TYPE, vevent);
71 assertContains(subject, vICAL.dtstart, "1997-09-05T18:00:00.000Z");
72 assertContains(subject, vICAL.dtstamp, "1997-09-01T13:00:00.000Z");
73 assertContains(subject, vICAL.dtend, "1997-09-03T19:00:00.000Z");
74 assertContains(subject, vICAL.uid, "19970901T130000Z-123401@host.com");
75 assertContains(subject, vICAL.summary, "Annual Employee Review");
76 assertContains(subject, vICAL.class_, "private");
77 assertContains(subject, vICAL.categories, "Business");
78 assertContains(subject, vICAL.categories, "Human Resources");
79 }
80 } finally {
81 result.close();
82 }
83 }
84
85 @Test
86 public void testTransparentEvent() throws Exception {
87 assertExtract("/microformats/hcalendar/example2.html");
88 assertModelNotEmpty();
89 assertContains(baseIRI, RDF.TYPE, vcal);
90 assertContains(null, RDF.TYPE, vevent);
91 RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, vevent);
92 try {
93 while (result.hasNext()) {
94 Statement statement = result.next();
95 final Resource subject = statement.getSubject();
96 assertContains(null, vICAL.component, subject);
97 assertContains(subject, RDF.TYPE, vevent);
98 assertContains(subject, vICAL.dtstart, "1997-04-03T18:00:00.000Z");
99 assertContains(subject, vICAL.dtstamp, "1997-09-01T13:00:00.000Z");
100 assertContains(subject, vICAL.dtend, "1997-04-02T01:00:00.000Z");
101 assertContains(subject, vICAL.uid, "19970901T130000Z-123402@host.com");
102 assertContains(subject, vICAL.summary, "Laurel is in sensitivity awareness class.");
103 assertContains(subject, vICAL.class_, "public");
104 assertContains(subject, vICAL.transp, "transparent");
105 assertContains(subject, vICAL.categories, "Business");
106 assertContains(subject, vICAL.categories, "Human Resources");
107 }
108 } finally {
109 result.close();
110 }
111 }
112
113 @Test
114 public void testRepetitiveEvent() throws Exception {
115 assertExtract("/microformats/hcalendar/example3.html");
116 assertModelNotEmpty();
117 assertContains(baseIRI, RDF.TYPE, vcal);
118 assertContains(null, RDF.TYPE, vevent);
119 RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, vevent);
120 try {
121 while (result.hasNext()) {
122 Statement statement = result.next();
123 final Resource subject = statement.getSubject();
124 assertContains(null, vICAL.component, subject);
125 assertContains(subject, RDF.TYPE, vevent);
126 assertContains(subject, vICAL.dtstart, "19971102");
127 assertContains(subject, vICAL.dtstamp, "1997-09-01T13:00:00.000Z");
128 assertContains(subject, vICAL.uid, "19970901T130000Z-123403@host.com");
129 assertContains(subject, vICAL.summary, "Our Blissful Anniversary");
130 assertContains(subject, vICAL.class_, "confidential");
131 assertContains(subject, vICAL.categories, "Anniversary");
132 assertContains(subject, vICAL.categories, "Personal");
133 assertContains(subject, vICAL.categories, "Special Occassion");
134 assertContains(subject, vICAL.rrule, (Value) null);
135 }
136 } finally {
137 result.close();
138 }
139 }
140
141 @Test
142 public void testThreeDayEvent() throws Exception {
143 assertExtract("/microformats/hcalendar/example5.html");
144 assertModelNotEmpty();
145 assertContains(baseIRI, RDF.TYPE, vcal);
146 assertContains(null, RDF.TYPE, vevent);
147 RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, vevent);
148 try {
149 while (result.hasNext()) {
150 Statement statement = result.next();
151 final Resource subject = statement.getSubject();
152 assertContains(null, vICAL.component, subject);
153 assertContains(subject, RDF.TYPE, vevent);
154 assertContains(subject, vICAL.dtstart, "1996-09-20T16:00:00.000Z");
155 assertContains(subject, vICAL.dtstamp, "1996-07-04T12:00:00.000Z");
156 assertContains(subject, vICAL.dtend, "1996-09-20T22:00:00.000Z");
157 assertContains(subject, vICAL.uid, "uid1@host.com");
158 assertContains(subject, vICAL.summary, "Networld+Interop Conference");
159 assertContains(subject, vICAL.description,
160 "Networld+Interop Conference and Exhibit Atlanta World Congress\n"
161 + " Center Atlanta, Georgia");
162 assertContains(subject, vICAL.categories, "Conference");
163 assertContains(subject, vICAL.status, "CONFIRMED");
164 assertContains(subject, vICAL.organizer, (Value) null);
165 }
166 } finally {
167 result.close();
168 }
169 }
170
171 @Test
172 public void testHCalendarWithBudyInfo() throws Exception {
173 assertExtract("/microformats/hcalendar/example5.5.html");
174 assertModelNotEmpty();
175 assertContains(baseIRI, RDF.TYPE, vcal);
176 assertContains(null, RDF.TYPE, vjournal);
177 }
178
179 @Test
180 public void test01() throws Exception {
181 assertDefault("/microformats/hcalendar/01-component-vevent-dtstart-date.html");
182 Resource event = getExactlyOneComponent(vevent);
183 assertContains(event, vICAL.dtstart, "19970903");
184 }
185
186 @Test
187 public void test02() throws Exception {
188 assertDefault("/microformats/hcalendar/02-component-vevent-dtstart-datetime.html");
189 Resource event = getExactlyOneComponent(vevent);
190 assertContains(event, vICAL.dtstart, "1997-09-05T18:00:00.000Z");
191 }
192
193 @Test
194 public void test03() throws Exception {
195 assertDefault("/microformats/hcalendar/03-component-vevent-dtend-date.html");
196 Resource event = getExactlyOneComponent(vevent);
197 assertContains(event, vICAL.dtstart, "19970903");
198 assertContains(event, vICAL.dtend, "19970904");
199 }
200
201 @Test
202 public void test04() throws Exception {
203 assertDefault("/microformats/hcalendar/04-component-vevent-dtend-datetime.html");
204 Resource event = getExactlyOneComponent(vevent);
205 assertContains(event, vICAL.dtstart, "1997-09-03T16:00:00.000Z");
206 assertContains(event, vICAL.dtend, "1997-09-03T18:00:00.000Z");
207 }
208
209 @Test
210 public void test05() throws Exception {
211 assertDefault("/microformats/hcalendar/05-calendar-simple.html");
212 Resource event = getExactlyOneComponent(vevent);
213 assertContains(event, vICAL.dtstart, "2005-10-05");
214 assertContains(event, vICAL.dtend, "2005-10-08");
215 assertContains(event, vICAL.summary, "Web 2.0 Conference");
216 assertContains(event, vICAL.url, RDFUtils.iri("http://www.web2con.com/"));
217 assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA");
218 }
219
220 @Test
221 public void test06() throws Exception {
222 assertDefault("/microformats/hcalendar/06-component-vevent-uri-relative.html");
223 Resource event = getExactlyOneComponent(vevent);
224 assertContains(event, vICAL.dtstart, "20060115T000000");
225 assertContains(event, vICAL.summary, "Bad Movie Night - Gigli (blame mike spiegelman)");
226 assertContains(event, vICAL.url, RDFUtils.iri(baseIRI + "squidlist/calendar/12279/2006/1/15"));
227 }
228
229 @Test
230 public void test07() throws Exception {
231 assertDefault("/microformats/hcalendar/07-component-vevent-description-simple.html");
232 Resource event = getExactlyOneComponent(vevent);
233 assertContains(event, vICAL.description, "Project xyz Review Meeting Minutes");
234 assertNotContains(event, vICAL.url, (Resource) null);
235 }
236
237 @Test
238 public void test08() throws Exception {
239 assertDefault("/microformats/hcalendar/08-component-vevent-multiple-classes.html");
240 Resource event = getExactlyOneComponent(vevent);
241 assertContains(event, vICAL.dtstart, "2005-10-05");
242 assertContains(event, vICAL.dtend, "2005-10-08");
243 assertContains(event, vICAL.summary, "Web 2.0 Conference");
244 assertContains(event, vICAL.url, RDFUtils.iri("http://www.web2con.com/"));
245 assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA");
246 }
247
248 @Test
249 public void test09() throws Exception {
250 assertDefault("/microformats/hcalendar/09-component-vevent-summary-in-img-alt.html");
251 Resource event = getExactlyOneComponent(vevent);
252 assertContains(event, vICAL.dtend, "20060310");
253 assertContains(event, vICAL.dtstart, "20060306");
254 assertContains(event, vICAL.summary, "O'Reilly Emerging Technology Conference");
255 assertContains(event, vICAL.url, RDFUtils.iri("http://conferences.oreillynet.com/et2006/"));
256 assertContains(event, vICAL.location, "Manchester Grand Hyatt in San Diego, CA");
257 }
258
259 @Test
260 public void test10() throws Exception {
261 assertDefault("/microformats/hcalendar/10-component-vevent-entity.html");
262 Resource event = getExactlyOneComponent(vevent);
263 assertContains(event, vICAL.summary, "Cricket & Tennis Centre");
264 assertContains(event, vICAL.description, "Melbourne's Cricket & Tennis Centres are in the heart of the city");
265 }
266
267 @Test
268 public void test11() throws Exception {
269 assertDefault("/microformats/hcalendar/11-component-vevent-summary-in-subelements.html");
270 Resource event = getExactlyOneComponent(vevent);
271
272 assertContains(event, vICAL.dtstart, "20051005T1630-0700");
273 assertContains(event, vICAL.dtend, "20051005T1645-0700");
274 assertContains(event, vICAL.summary, "Welcome!\n John Battelle,\n Tim O'Reilly");
275 }
276
277 @Test
278 public void test12() throws Exception {
279 assertDefault("/microformats/hcalendar/12-component-vevent-summary-url-in-same-class.html");
280 Resource event = getExactlyOneComponent(vevent);
281 assertContains(event, vICAL.dtstart, "20060125T000000");
282 assertContains(event, vICAL.url,
283 RDFUtils.iri("http://www.laughingsquid.com/squidlist/calendar/12377/2006/1/25"));
284 assertContains(event, vICAL.summary, "Art Reception for Tom Schultz and Felix Macnee");
285 }
286
287 @Test
288 public void test13() throws Exception {
289 assertDefault("/microformats/hcalendar/13-component-vevent-summary-url-property.html");
290 Resource event = getExactlyOneComponent(vevent);
291 assertContains(event, vICAL.url, RDFUtils
292 .iri("http://dps1.travelocity.com/dparcobrand.ctl?smls=Y&Service=YHOE&.intl=us&aln_name=AA&flt_num="
293 + "1655&dep_arp_name=&arr_arp_name=&dep_dt_dy_1=23&dep_dt_mn_1=Jan&dep_dt_yr_1=2006&dep_tm_1=9:00am"));
294 assertContains(event, vICAL.summary, "ORD-SFO/AA 1655");
295 }
296
297 @Test
298 public void test15() throws Exception {
299 assertDefault("/microformats/hcalendar/15-calendar-xml-lang.html");
300 Resource event = getExactlyOneComponent(vevent);
301 assertContains(event, vICAL.dtstart, "2005-10-05");
302 assertContains(event, vICAL.dtend, "2005-10-08");
303 assertContains(event, vICAL.summary, "Web 2.0 Conference");
304 assertContains(event, vICAL.url, RDFUtils.iri("http://www.web2con.com/"));
305 assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA");
306 }
307
308 @Test
309 public void test16() throws Exception {
310 assertDefault("/microformats/hcalendar/16-calendar-force-outlook.html");
311 Resource event = getExactlyOneComponent(vevent);
312 assertContains(event, vICAL.dtstart, "2005-10-05");
313 assertContains(event, vICAL.dtend, "2005-10-08");
314 assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA");
315 }
316
317 @Test
318 public void test17() throws Exception {
319 assertDefault("/microformats/hcalendar/17-component-vevent-description-value-in-subelements.html");
320 Resource event = getExactlyOneComponent(vevent);
321 assertContains(event, vICAL.dtstart, "2006-01-18");
322 assertContains(event, vICAL.dtend, "2006-01-20");
323 assertContains(event, vICAL.location, "Maryland");
324 assertContains(event, vICAL.summary, "3rd PAW ftf meeting");
325 assertContains(event, vICAL.description, "RESOLUTION: to have a\n 3rd PAW ftf meeting \n"
326 + " 18-19 Jan in \n Maryland; location contingent" + " on confirmation from timbl");
327 }
328
329 @Test
330 public void test18() throws Exception {
331 assertDefault("/microformats/hcalendar/18-component-vevent-uid.html");
332 assertStatementsSize(RDF.TYPE, vevent, 5);
333 assertStatementsSize(vICAL.uid, RDFUtils.literal("http://example.com/foo.html"), 5);
334 }
335
336 @Test
337 public void testNoMicroformats() throws Exception, IOException, ExtractionException {
338 extract("/html/html-without-uf.html");
339 assertModelEmpty();
340 }
341
342 @Test
343 public void testNoMicroformatsInStatCvsPage() throws Exception, IOException, ExtractionException {
344 extract("/microformats/hcalendar/empty-statcvs.html");
345 assertModelEmpty();
346 }
347
348 @Test
349 public void testFullHCalendarClass() throws Exception {
350 assertExtract("/microformats/hcalendar/example5.3.html");
351 assertModelNotEmpty();
352 assertContains(baseIRI, RDF.TYPE, vcal);
353 assertContains(null, RDF.TYPE, vevent);
354 }
355
356 @Test
357 public void testHCalendarClassWithTodo() throws Exception {
358 assertExtract("/microformats/hcalendar/example5.4.html");
359 assertModelNotEmpty();
360 assertContains(baseIRI, RDF.TYPE, vcal);
361 assertContains(null, RDF.TYPE, vtodo);
362 }
363
364 @Test
365 public void testHCalendarClassWithJournal() throws Exception {
366 assertExtract("/microformats/hcalendar/example5.5.html");
367 assertModelNotEmpty();
368 assertContains(baseIRI, RDF.TYPE, vcal);
369 assertContains(null, RDF.TYPE, vjournal);
370 }
371
372 private Resource getExactlyOneComponent(Resource r) throws Exception {
373 RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, r);
374 try {
375 Assert.assertTrue(result.hasNext());
376 Resource sub = result.next().getSubject();
377 Assert.assertFalse(result.hasNext());
378 return sub;
379 } finally {
380 result.close();
381 }
382 }
383
384 private void assertDefault(String name) throws Exception {
385 assertExtract(name);
386 assertModelNotEmpty();
387 assertContains(baseIRI, RDF.TYPE, vcal);
388 assertStatementsSize(RDF.TYPE, vcal, 1);
389 }
390
391 }