This project has retired. For details please refer to its
Attic page.
HCardExtractorTest xref
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import static org.junit.Assert.assertEquals;
21 import static org.junit.Assert.assertFalse;
22 import static org.junit.Assert.assertNotNull;
23 import static org.junit.Assert.assertTrue;
24 import org.apache.any23.extractor.ExtractionException;
25 import org.apache.any23.extractor.ExtractorFactory;
26 import org.apache.any23.extractor.IssueReport;
27 import org.apache.any23.rdf.RDFUtils;
28 import org.apache.any23.vocab.VCard;
29 import org.junit.Test;
30 import org.eclipse.rdf4j.model.Resource;
31 import org.eclipse.rdf4j.model.Statement;
32 import org.eclipse.rdf4j.model.Value;
33 import org.eclipse.rdf4j.model.vocabulary.RDF;
34 import org.eclipse.rdf4j.repository.RepositoryResult;
35
36 import java.io.IOException;
37 import java.util.Arrays;
38 import java.util.List;
39 import java.util.Locale;
40
41
42
43
44 public class HCardExtractorTest extends AbstractExtractorTestCase {
45
46 private static final VCard vVCARD = VCard.getInstance();
47
48 protected ExtractorFactory<?> getExtractorFactory() {
49 return new HCardExtractorFactory();
50 }
51
52 @Test
53 public void testNoNullPointers() {
54
55 assertExtract("/microformats/hcard/null-pointer.html");
56 assertContains(vVCARD.logo, RDFUtils.iri(
57 "http://cambridgewi.com/wp-content/uploads/connections-images/dean-bluhm/VillagePharmacy-e04951b21968ae4d9fd04cb14ce08ade.jpg"));
58 assertContains(vVCARD.email, RDFUtils.iri("mailto:bluhmrph@yahoo.com"));
59 }
60
61 @Test
62 public void testEMailNotUriReal() throws Exception {
63 assertExtract("/microformats/hcard/17-email-not-uri.html");
64 assertDefaultVCard();
65 assertJohn();
66 assertContains(vVCARD.email, RDFUtils.iri("mailto:john@example.com"));
67 }
68
69 @Test
70 public void testTel() throws Exception {
71 assertExtract("/microformats/hcard/21-tel.html");
72 assertDefaultVCard();
73 String[] tels = { "+1.415.555.1231", "+1.415.555.1235", "+1.415.555.1236", "+1.415.555.1237", "+1.415.555.1238",
74 "+1.415.555.1239", "+1.415.555.1240", "+1.415.555.1241", "+1.415.555.1242", "+1.415.555.1243" };
75 for (String tel : tels) {
76 assertContains(vVCARD.tel, RDFUtils.iri("tel:" + tel));
77 }
78 Resource telResource = RDFUtils.iri("tel:+14155551233");
79 assertContains(vVCARD.fax, telResource);
80 assertContains(vVCARD.workTel, telResource);
81 assertContains(vVCARD.homeTel, telResource);
82 assertJohn();
83 }
84
85 @Test
86 public void testAbbrTitleEverything() throws Exception {
87 assertExtract("/microformats/hcard/23-abbr-title-everything.html");
88 assertDefaultVCard();
89
90 assertContains(vVCARD.fn, "John Doe");
91 assertContains(vVCARD.nickname, "JJ");
92
93 assertContains(vVCARD.given_name, "Jonathan");
94 assertContains(vVCARD.additional_name, "John");
95 assertContains(vVCARD.family_name, "Doe-Smith");
96 assertContains(vVCARD.honorific_suffix, "Medical Doctor");
97
98 assertContains(vVCARD.title, "President");
99 assertContains(vVCARD.role, "Chief");
100 assertContains(vVCARD.tz, "-0700");
101 assertContains(vVCARD.bday, "2006-04-04");
102 assertContains(vVCARD.tel, RDFUtils.iri("tel:415.555.1234"));
103 assertContains(vVCARD.uid, "abcdefghijklmnopqrstuvwxyz");
104 assertContains(vVCARD.class_, "public");
105 assertContains(vVCARD.note, "this is a note");
106 assertContains(vVCARD.organization_name, "Intellicorp");
107 assertContains(vVCARD.organization_unit, "Intelligence");
108
109
110 assertContains(vVCARD.geo, (Resource) null);
111
112
113 assertNotContains(RDF.TYPE, vVCARD.Location);
114 assertNotContains(null, vVCARD.latitude, "37.77");
115 assertNotContains(null, vVCARD.longitude, "-122.41");
116
117
118 assertContains(vVCARD.adr, (Resource) null);
119 assertNotContains(RDF.TYPE, vVCARD.Address);
120 assertNotContains(null, vVCARD.post_office_box, "Box 1234");
121 assertNotContains(null, vVCARD.extended_address, "Suite 100");
122 assertNotContains(null, vVCARD.street_address, "123 Fake Street");
123 assertNotContains(null, vVCARD.locality, "San Francisco");
124 assertNotContains(null, vVCARD.region, "California");
125 assertNotContains(null, vVCARD.postal_code, "12345-6789");
126 assertNotContains(null, vVCARD.country_name, "United States of America");
127 assertNotContains(null, vVCARD.addressType, "work");
128 }
129
130 @Test
131 public void testGeoAbbr() throws Exception {
132 assertExtract("/microformats/hcard/25-geo-abbr.html");
133 assertModelNotEmpty();
134 assertContains(vVCARD.fn, "Paradise");
135 assertContains(RDF.TYPE, vVCARD.Organization);
136 assertContains(vVCARD.organization_name, "Paradise");
137
138 assertContains(vVCARD.geo, (Resource) null);
139 assertNotContains(RDF.TYPE, vVCARD.Location);
140 assertNotContains(null, vVCARD.latitude, "30.267991");
141 assertNotContains(null, vVCARD.longitude, "-97.739568");
142 }
143
144 @Test
145 public void testAncestors() throws Exception {
146 assertExtract("/microformats/hcard/26-ancestors.html");
147 assertModelNotEmpty();
148
149 assertContains(vVCARD.fn, "John Doe");
150 assertNotContains(null, vVCARD.fn, "Mister Jonathan John Doe-Smith Medical Doctor");
151 assertContains(vVCARD.nickname, "JJ");
152 assertNotContains(RDF.TYPE, vVCARD.Address);
153 assertContains(vVCARD.tz, "-0700");
154 assertContains(vVCARD.title, "President");
155 assertContains(vVCARD.role, "Chief");
156 assertContains(vVCARD.organization_name, "Intellicorp");
157 assertContains(vVCARD.organization_unit, "Intelligence");
158
159 assertContains(vVCARD.tel, RDFUtils.iri("tel:415.555.1234"));
160 assertContains(vVCARD.uid, "abcdefghijklmnopqrstuvwxyz");
161 assertContains(vVCARD.note, "this is a note");
162 assertContains(vVCARD.class_, "public");
163
164 assertNotContains(RDF.TYPE, vVCARD.Location);
165 assertContains(vVCARD.geo, (Resource) null);
166 assertNotContains(null, vVCARD.latitude, "37.77");
167 assertNotContains(null, vVCARD.longitude, "-122.41");
168
169 assertContains(RDF.TYPE, vVCARD.Name);
170 assertContains(vVCARD.additional_name, "John");
171 assertContains(vVCARD.given_name, "Jonathan");
172 assertContains(vVCARD.family_name, "Doe-Smith");
173 assertContains(vVCARD.honorific_prefix, "Mister");
174 assertContains(vVCARD.honorific_suffix, "Medical Doctor");
175
176 assertNotContains(null, vVCARD.post_office_box, "Box 1234");
177 assertNotContains(null, vVCARD.extended_address, "Suite 100");
178 assertNotContains(null, vVCARD.street_address, "123 Fake Street");
179 assertNotContains(null, vVCARD.locality, "San Francisco");
180 assertNotContains(null, vVCARD.region, "California");
181 assertNotContains(null, vVCARD.postal_code, "12345-6789");
182 assertNotContains(null, vVCARD.country_name, "United States of America");
183 assertNotContains(null, vVCARD.addressType, "work");
184 }
185
186 @Test
187 public void testfnOrg() throws Exception {
188 assertExtract("/microformats/hcard/30-fn-org.html");
189 assertModelNotEmpty();
190 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
191 RepositoryResult<Statement> repositoryResult = getStatements(null, RDF.TYPE, vVCARD.VCard);
192 try {
193 while (repositoryResult.hasNext()) {
194 Resource card = repositoryResult.next().getSubject();
195 assertNotNull(findObject(card, vVCARD.fn));
196 String name = findObjectAsLiteral(card, vVCARD.fn);
197
198 assertNotNull(findObject(card, vVCARD.org));
199 Resource org = findObjectAsResource(card, vVCARD.org);
200 assertNotNull(findObject(org, vVCARD.organization_name));
201
202 if (name.equals("Dan Connolly")) {
203 assertNotNull(findObject(card, vVCARD.n));
204 assertFalse(name.equals(org.stringValue()));
205 }
206 }
207 } finally {
208 repositoryResult.close();
209 }
210 }
211
212 @Test
213 public void testInclude() throws Exception {
214 assertExtract("/microformats/hcard/31-include.html");
215 assertModelNotEmpty();
216 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 3);
217 assertStatementsSize(vVCARD.email, (Value) null, 3);
218
219 RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
220 try {
221 while (statements.hasNext()) {
222 Resource vcard = statements.next().getSubject();
223
224 assertNotNull(findObject(vcard, vVCARD.fn));
225 assertEquals("Brian Suda", findObjectAsLiteral(vcard, vVCARD.fn));
226
227 assertNotNull(findObject(vcard, vVCARD.url));
228 String url = findObjectAsResource(vcard, vVCARD.url).stringValue();
229 assertEquals("http://suda.co.uk/", url);
230
231 Resource name = findObjectAsResource(vcard, vVCARD.n);
232 assertEquals("Brian", findObjectAsLiteral(name, vVCARD.given_name));
233 assertEquals("Suda", findObjectAsLiteral(name, vVCARD.family_name));
234
235
236 assertNotNull(findObject(vcard, vVCARD.email));
237 String mail = findObjectAsLiteral(vcard, vVCARD.email);
238 assertEquals("mailto:correct@example.com", mail);
239 }
240 } finally {
241 statements.close();
242 }
243 }
244
245 @Test
246 public void testHeader() throws Exception {
247 assertExtract("/microformats/hcard/32-header.html");
248 assertModelNotEmpty();
249
250 assertJohn();
251
252 RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
253 try {
254 Resource example = RDFUtils.iri("http://example.org/");
255 while (statements.hasNext()) {
256 Resource card = statements.next().getSubject();
257 assertNotNull(findObject(card, vVCARD.fn));
258
259 String fn = findObjectAsLiteral(card, vVCARD.fn);
260 if ("Jane Doe".equals(fn)) {
261 assertNotFound(card, vVCARD.org);
262 } else {
263 assertTrue("John Doe".equals(fn) || "Brian Suda".equals(fn));
264
265 assertNotNull(findObject(card, vVCARD.url));
266 assertEquals(example, findObjectAsResource(card, vVCARD.url));
267
268 assertNotNull(findObject(card, vVCARD.org));
269 Resource org = findObjectAsResource(card, vVCARD.org);
270 assertContains(org, RDF.TYPE, vVCARD.Organization);
271 assertNotNull(org);
272 assertNotNull(findObject(card, vVCARD.org));
273 assertNotNull(findObject(org, vVCARD.organization_name));
274 assertEquals("example.org", findObjectAsLiteral(org, vVCARD.organization_name));
275 }
276 }
277
278
279 assertStatementsSize(vVCARD.url, example, 2);
280 } finally {
281 statements.close();
282 }
283 }
284
285 @Test
286 public void testAreaFull() throws Exception {
287 assertExtract("/microformats/hcard/33-area.html");
288 assertModelNotEmpty();
289 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 5);
290
291 RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
292 while (statements.hasNext()) {
293 Resource vcard = statements.next().getSubject();
294 final Value fnValue = findObject(vcard, vVCARD.fn);
295 assertNotNull(fnValue);
296 String fn = fnValue.stringValue();
297 final Value vcardValue = findObject(vcard, vVCARD.url);
298 assertNotNull(vcardValue);
299 String url = vcardValue.stringValue();
300 final Value emailValue = findObject(vcard, vVCARD.email);
301 assertNotNull(emailValue);
302 String mail = emailValue.stringValue();
303 assertEquals("Joe Public", fn);
304 assertEquals("http://example.com/", url);
305 assertEquals("mailto:joe@example.com", mail);
306 }
307 }
308
309 @Test
310 public void testCategories() throws Exception {
311 assertExtract("/microformats/hcard/36-categories.html");
312 assertModelNotEmpty();
313 assertContains(vVCARD.given_name, "Joe");
314 assertContains(vVCARD.given_name, "john");
315 assertContains(vVCARD.family_name, "doe");
316 assertContains(vVCARD.family_name, "User");
317 assertContains(vVCARD.fn, "john doe");
318 assertContains(vVCARD.fn, "Joe User");
319
320 assertContains(vVCARD.category, "C1");
321 assertContains(vVCARD.category, "C2a");
322 assertContains(vVCARD.category, "C4");
323 assertContains(vVCARD.category, "User");
324 String[] cats = { "C3", "C5", "C6", "C7", "C9", "luser", "D1", "D2", "D3" };
325 for (String cat : cats)
326 assertContains(vVCARD.category, "http://example.com/tag/" + cat);
327
328 assertNotContains(null, vVCARD.category, "D4");
329 }
330
331 @Test
332 public void testSingleton() throws Exception {
333
334 assertExtract("/microformats/hcard/37-singleton.html");
335 assertModelNotEmpty();
336 assertStatementsSize(vVCARD.fn, (Value) null, 1);
337 assertContains(vVCARD.fn, "john doe 1");
338
339 assertStatementsSize(RDF.TYPE, vVCARD.Name, 1);
340 assertStatementsSize(vVCARD.given_name, (Value) null, 1);
341 assertContains(vVCARD.given_name, "john");
342 assertStatementsSize(vVCARD.family_name, (Value) null, 1);
343 assertContains(vVCARD.family_name, "doe");
344 assertStatementsSize(vVCARD.sort_string, (Value) null, 1);
345 assertContains(vVCARD.sort_string, "d");
346
347 assertStatementsSize(vVCARD.bday, (Value) null, 1);
348 assertContains(vVCARD.bday, "20060707");
349 assertStatementsSize(vVCARD.rev, (Value) null, 1);
350 assertContains(vVCARD.rev, "20060707");
351 assertStatementsSize(vVCARD.class_, (Value) null, 1);
352 assertContains(vVCARD.class_, "public");
353 assertStatementsSize(vVCARD.tz, (Value) null, 1);
354 assertContains(vVCARD.tz, "+0600");
355
356
357 assertStatementsSize(RDF.TYPE, vVCARD.Location, 0);
358 assertStatementsSize(vVCARD.geo, (Value) null, 2);
359
360 assertNotContains(null, vVCARD.latitude, "123.45");
361 assertNotContains(null, vVCARD.longitude, "67.89");
362
363 assertStatementsSize(vVCARD.uid, (Value) null, 1);
364 assertContains(vVCARD.uid, "unique-id-1");
365 }
366
367 @Test
368 public void testUidFull() throws Exception {
369 assertExtract("/microformats/hcard/38-uid.html");
370 assertModelNotEmpty();
371 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
372 RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
373
374 try {
375 while (statements.hasNext()) {
376 Resource vcard = statements.next().getSubject();
377 assertNotNull(findObject(vcard, vVCARD.fn));
378 String fn = findObjectAsLiteral(vcard, vVCARD.fn);
379 assertEquals("Ryan King", fn);
380
381 assertNotNull(findObject(vcard, vVCARD.n));
382 Resource n = findObjectAsResource(vcard, vVCARD.n);
383 assertNotNull(n);
384 assertNotNull(findObject(n, vVCARD.given_name));
385 assertEquals("Ryan", findObjectAsLiteral(n, vVCARD.given_name));
386 assertNotNull(findObject(n, vVCARD.family_name));
387 assertEquals("King", findObjectAsLiteral(n, vVCARD.family_name));
388
389 assertNotNull(findObject(vcard, vVCARD.url));
390 Resource url = findObjectAsResource(vcard, vVCARD.url);
391
392 assertNotNull(findObject(vcard, vVCARD.uid));
393 String uid = findObjectAsLiteral(vcard, vVCARD.uid);
394
395 assertEquals("http://theryanking.com/contact/", url.stringValue());
396 assertEquals("http://theryanking.com/contact/", uid);
397 }
398 } finally {
399 statements.close();
400 }
401 }
402
403 @Test
404 public void testRomanianWikipedia() throws Exception {
405 assertExtract("/microformats/hcard/40-fn-inside-adr.html");
406 assertModelNotEmpty();
407 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
408 RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
409
410 try {
411 while (statements.hasNext()) {
412 Resource card = statements.next().getSubject();
413 assertNotNull(findObject(card, vVCARD.fn));
414 String fn = findObjectAsLiteral(card, vVCARD.fn);
415 assertEquals("Berlin", fn);
416
417 assertNotNull(findObject(card, vVCARD.org));
418 Resource org = findObjectAsResource(card, vVCARD.org);
419 assertContains(org, RDF.TYPE, vVCARD.Organization);
420 assertNotNull(org);
421 assertNotNull(findObject(card, vVCARD.org));
422 assertNotNull(findObject(org, vVCARD.organization_name));
423 assertEquals("Berlin", findObjectAsLiteral(org, vVCARD.organization_name));
424
425 }
426 } finally {
427 statements.close();
428 }
429 }
430
431 @Test
432 public void testNoMicroformats() throws Exception, IOException, ExtractionException {
433 extract("/html/html-without-uf.html");
434 assertModelEmpty();
435 }
436
437 @Test
438 public void testBasic() throws Exception {
439 assertExtract("/microformats/hcard/01-tantek-basic.html");
440 assertModelNotEmpty();
441 assertContains(RDF.TYPE, vVCARD.VCard);
442
443 assertContains(RDF.TYPE, vVCARD.Name);
444
445 Resource person = findExactlyOneBlankSubject(vVCARD.fn, RDFUtils.literal("Tantek Celik"));
446 assertNotNull(person);
447 Resource org = findExactlyOneBlankSubject(vVCARD.organization_name, RDFUtils.literal("Technorati"));
448 assertNotNull(org);
449 assertContains(person, vVCARD.url, RDFUtils.iri("http://tantek.com/"));
450 assertContains(person, vVCARD.n, (Resource) null);
451 assertContains(person, vVCARD.org, (Resource) null);
452 }
453
454 @Test
455 public void testMultipleclassNamesOnVCard() throws Exception {
456 assertExtract("/microformats/hcard/02-multiple-class-names-on-vcard.html");
457 assertModelNotEmpty();
458 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
459 Resource name;
460 RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
461 while (statements.hasNext()) {
462 name = statements.next().getSubject();
463 assertContains(name, vVCARD.fn, "Ryan King");
464 }
465 }
466
467 @Test
468 public void testImpliedNames() throws Exception {
469 String[] ns = { "Ryan King", "King", "Ryan",
470
471 "Ryan King", "King", "Ryan",
472
473 "Ryan King", "King", "Ryan",
474
475 "Brian Suda", "Suda", "Brian",
476
477 "King, Ryan", "King", "Ryan",
478
479 "King, R", "King", "R",
480
481 "King R", "R", "King",
482
483 "R King", "King", "R",
484
485 "King R.", "R.", "King",
486
487 "Jesse James Garrett", "Garrett", "Jesse",
488
489 "Thomas Vander Wall", "Wall", "Thomas" };
490 List<String> NAMES = Arrays.asList(ns);
491 assertExtract("/microformats/hcard/03-implied-n.html");
492 assertModelNotEmpty();
493
494 RepositoryResult<Statement> statements = getStatements(null, vVCARD.fn, null);
495 Resource vcard;
496 int count = 0;
497 try {
498 while (statements.hasNext()) {
499 vcard = statements.next().getSubject();
500 assertContains(vcard, RDF.TYPE, vVCARD.VCard);
501 Resource name = findObjectAsResource(vcard, vVCARD.n);
502
503 final String objLiteral = findObjectAsLiteral(vcard, vVCARD.fn);
504 int idx = NAMES.indexOf(objLiteral);
505 assertTrue(String.format(Locale.ROOT, "not in names: '%s'", objLiteral), idx >= 0);
506 assertEquals(NAMES.get(idx + 1), findObjectAsLiteral(name, vVCARD.family_name));
507 assertEquals(NAMES.get(idx + 2), findObjectAsLiteral(name, vVCARD.given_name));
508 count++;
509 }
510 } finally {
511 statements.close();
512 }
513 assertEquals(10, count);
514 }
515
516 @Test
517 public void testIgnoreUnknowns() throws Exception {
518 assertExtract("/microformats/hcard/04-ignore-unknowns.html");
519 assertDefaultVCard();
520 assertContains(vVCARD.fn, "Ryan King");
521 assertContains(vVCARD.n, (Resource) null);
522 assertContains(null, "Ryan");
523 assertContains(vVCARD.given_name, "Ryan");
524 assertContains(vVCARD.family_name, "King");
525 }
526
527 @Test
528 public void testMailto1() throws Exception {
529 assertExtract("/microformats/hcard/05-mailto-1.html");
530 assertDefaultVCard();
531 assertContains(vVCARD.fn, "Ryan King");
532 assertContains(RDF.TYPE, vVCARD.Name);
533
534 assertContains(vVCARD.email, RDFUtils.iri("mailto:ryan@technorati.com"));
535
536 assertContains(vVCARD.given_name, "Ryan");
537 assertContains(vVCARD.family_name, "King");
538 }
539
540 @Test
541 public void testMailto2() throws Exception {
542 assertExtract("/microformats/hcard/06-mailto-2.html");
543 assertDefaultVCard();
544 assertContains(vVCARD.fn, "Brian Suda");
545
546 assertContains(vVCARD.email, RDFUtils.iri("mailto:brian@example.com"));
547 assertContains(vVCARD.given_name, "Brian");
548 assertContains(vVCARD.family_name, "Suda");
549 }
550
551 @Test
552 public void testRelativeUrl() throws Exception {
553 assertExtract("/microformats/hcard/07-relative-url.html");
554 assertDefaultVCard();
555 assertJohn();
556 assertContains(vVCARD.url, RDFUtils.iri(baseIRI + "home/blah"));
557 }
558
559 @Test
560 public void testRelativeUrlBase() throws Exception {
561 assertExtract("/microformats/hcard/08-relative-url-base.html");
562 assertDefaultVCard();
563 assertContains(vVCARD.url, RDFUtils.iri(baseIRI + "home/blah"));
564 assertJohn();
565 }
566
567 @Test
568 public void testRelativeUrlXmlBase1() throws Exception {
569 assertExtract("/microformats/hcard/09-relative-url-xmlbase-1.html");
570 assertDefaultVCard();
571 assertContains(vVCARD.url, RDFUtils.iri((baseIRI + "home/blah")));
572 assertJohn();
573 }
574
575 @Test
576 public void testRelativeUrlXmlBase2() throws Exception {
577 assertExtract("/microformats/hcard/10-relative-url-xmlbase-2.html");
578 assertDefaultVCard();
579 assertContains(vVCARD.url, RDFUtils.iri((baseIRI + "home/blah")));
580 assertJohn();
581 }
582
583 @Test
584 public void testMultipleUrls() throws Exception {
585 assertExtract("/microformats/hcard/11-multiple-urls.html");
586 assertDefaultVCard();
587 assertContains(vVCARD.url, RDFUtils.iri(("http://example.com/foo")));
588 assertContains(vVCARD.url, RDFUtils.iri(("http://example.com/bar")));
589
590 assertJohn();
591 }
592
593 @Test
594 public void testImageSrc() throws Exception {
595 assertExtract("/microformats/hcard/12-img-src-url.html");
596 assertDefaultVCard();
597 assertJohn();
598 }
599
600 @Test
601 public void testPhotoLogo() throws Exception {
602 assertExtract("/microformats/hcard/13-photo-logo.html");
603 assertDefaultVCard();
604 assertContains(vVCARD.photo, RDFUtils.iri(("http://example.org/picture1.png")));
605 assertContains(vVCARD.photo, RDFUtils.iri(("http://example.org/picture2.png")));
606 assertContains(vVCARD.logo, RDFUtils.iri(("http://example.org/picture1.png")));
607 assertContains(vVCARD.logo, RDFUtils.iri(("http://example.org/picture2.png")));
608 assertJohn();
609 }
610
611 @Test
612 public void testImgSrcDataUrl() throws Exception {
613 assertExtract("/microformats/hcard/14-img-src-data-url.html");
614 assertDefaultVCard();
615 Resource data = RDFUtils.iri("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADIAAAAyCAMAAAAp4XiDAAAABGdBTUEAAK/"
616 + "INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAASUExURf///8zMzJmZmWZmZ"
617 + "jMzMwAAAPOPemkAAAM1SURBVHjaYmBgYGBkYQUBFkYWFiCPCchixQAMCCZAACF0MAMVM4K4TFh0IGsBCC"
618 + "AkOxhYmBnAAKaHhZkZmxaAAGJgYIbpYGBihGgBWsTMzMwE4jIhaWGAYoAAYmCECDExYAcwGxkg5oNIgAB"
619 + "igDqLARdgZmGB2wICrKwAAcSA3xKgIxlZ0PwCEEAMBCxhgHoWSQtAADFAAxgfYEJ1GEAAQbQw4tUCsocB"
620 + "YQVAADEgu4uRkREeUCwszEwwLhOKLQABhNDCBA4aSDgwwhIAJKqYUPwCEEAMUK/AUwnc9aywJMCI7DAgA"
621 + "AggBohZ8JTBhGIJzCoWZL8ABBCYidAB8RUjWppkYUG2BSCAGMDqEMZiswUtXgACiAHsFYixTMywGGLGpgU"
622 + "WYgABxAA2mQkWCMyMqFoYmdD8ACQAAogBHJHMrCxg1cyIiICmCkYWDFsAAgiihYmZCewFFpR0BfI3LLch+"
623 + "QUggBiQ0iQjEyMDmh54qCBlUIAAYsCRJsElADQvgWKTlRGeKwECiAF3XgGmMEYQYADZzcoA9z5AAMG9RQC"
624 + "AtEC9DxBADFiyFyMjVi0wABBAWLQwQdIiuhYGWJIACCBg+KKUJ9BoBRdS2LQALQMIIGDQIEmwAO1kYcVWH"
625 + "CDZAhBAqFqYmOAxj2YNtAwDAYAAYmDEiBYWzHKKkRERYiwAAYSphZEZwxZGZiZQVEJTJkAAMTCyokc7M5o"
626 + "ORlC5wcoEjxeAAAJqQXU0UB6W5WFmABMtEzMi1wEEEFAbE0YyAUuzMMEsYQalMkQSBQggUDmNPU3C9IA4L"
627 + "CxI+QUggEBiKOU8yExgqccCL3chnkPKlQABhGo6ejHBDKmdUHMlQAAhhQvQaGZGkBIkjcAMywLmI+VKgAB"
628 + "CSowsTJhZkhlWXiBpAQggYBqBZl9GVOdBcz0LZqEEEEAMqLULMBLg1THWog9IAwQQA0qiZcRW5aPbAhBAD"
629 + "Cg1El4tMAAQQAxoiZYZXnTh1AIQQAzo2QlYpDDjcBgrxGEAAcSAJTthswmiBUwDBBC2GpkZJTaRvQ+mAQK"
630 + "IAUuuxdZWQvILQABBmSxMjBj5EpcWgACCMoFOYYSpZyHQHgMIMACt2hmoVEikCQAAAABJRU5ErkJggg==");
631
632 assertContains(vVCARD.photo, data);
633 assertContains(vVCARD.logo, data);
634 assertJohn();
635 }
636
637 @Test
638 public void testHonorificAdditionalSingle() throws Exception {
639 assertExtract("/microformats/hcard/15-honorific-additional-single.html");
640 assertDefaultVCard();
641 assertContains(vVCARD.fn, "Mr. John Maurice Doe, Ph.D.");
642
643 assertContains(vVCARD.honorific_prefix, "Mr.");
644 assertContains(vVCARD.honorific_suffix, "Ph.D.");
645
646 assertContains(vVCARD.given_name, "John");
647 assertContains(vVCARD.additional_name, "Maurice");
648 assertContains(vVCARD.family_name, "Doe");
649 }
650
651 @Test
652 public void testHonorificAdditionalMultiple() throws Exception {
653 assertExtract("/microformats/hcard/16-honorific-additional-multiple.html");
654 assertDefaultVCard();
655 assertContains(vVCARD.honorific_prefix, "Mr.");
656 assertContains(vVCARD.honorific_prefix, "Dr.");
657
658 assertContains(vVCARD.honorific_suffix, "Ph.D.");
659 assertContains(vVCARD.honorific_suffix, "J.D.");
660
661 assertContains(vVCARD.given_name, "John");
662 assertContains(vVCARD.additional_name, "Maurice");
663 assertContains(vVCARD.additional_name, "Benjamin");
664 assertContains(vVCARD.family_name, "Doe");
665
666 assertContains(vVCARD.fn, "Mr. Dr. John Maurice Benjamin Doe Ph.D., J.D.");
667 }
668
669 @Test
670 public void testEMailNotUri() throws Exception {
671 assertExtract("/microformats/hcard/17-email-not-uri.html");
672 assertDefaultVCard();
673 assertJohn();
674 assertContains(vVCARD.email, RDFUtils.iri("mailto:john@example.com"));
675 }
676
677 @Test
678 public void testObjectDataHttpUri() throws Exception {
679 assertExtract("/microformats/hcard/18-object-data-http-uri.html");
680 assertDefaultVCard();
681 assertJohn();
682 }
683
684 @Test
685 public void testObjectDataDataUri() throws Exception {
686 assertExtract("/microformats/hcard/19-object-data-data-uri.html");
687 assertDefaultVCard();
688 assertJohn();
689
690 assertContains(vVCARD.photo, (Resource) null);
691 assertContains(vVCARD.logo, (Resource) null);
692 }
693
694 @Test
695 public void testImgAlt() throws Exception {
696 assertExtract("/microformats/hcard/20-image-alt.html");
697 assertDefaultVCard();
698 Resource uri = RDFUtils.iri("http://example.com/foo.png");
699 assertContains(vVCARD.photo, uri);
700 assertContains(vVCARD.logo, uri);
701 assertJohn();
702 }
703
704 @Test
705 public void testAdr() throws Exception {
706 assertExtract("/microformats/hcard/22-adr.html");
707 assertDefaultVCard();
708 assertJohn();
709 assertStatementsSize(RDF.TYPE, vVCARD.Address, 0);
710 }
711
712 @Test
713 public void testBirthDayDate() throws Exception {
714 assertExtract("/microformats/hcard/27-bday-date.html");
715 assertModelNotEmpty();
716 assertContains(vVCARD.fn, "john doe");
717 assertContains(vVCARD.given_name, "john");
718 assertContains(vVCARD.family_name, "doe");
719 assertContains(vVCARD.bday, "2000-01-01");
720 }
721
722 @Test
723 public void testBirthDayDateTime() throws Exception {
724 assertExtract("/microformats/hcard/28-bday-datetime.html");
725 assertModelNotEmpty();
726 assertContains(vVCARD.fn, "john doe");
727 assertContains(vVCARD.given_name, "john");
728 assertContains(vVCARD.family_name, "doe");
729 assertContains(vVCARD.bday, "2000-01-01T00:00:00");
730 }
731
732 @Test
733 public void testBirthDayDateTimeTimeZone() throws Exception {
734 assertExtract("/microformats/hcard/29-bday-datetime-timezone.html");
735 assertModelNotEmpty();
736 assertContains(vVCARD.fn, "john doe");
737 assertContains(vVCARD.given_name, "john");
738 assertContains(vVCARD.family_name, "doe");
739 assertContains(vVCARD.bday, "2000-01-01T00:00:00-0800");
740 }
741
742 @Test
743 public void testArea() throws Exception {
744 assertExtract("/microformats/hcard/33-area.html");
745 assertModelNotEmpty();
746 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 5);
747 RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
748 try {
749 while (statements.hasNext()) {
750 Resource vcard = statements.next().getSubject();
751
752 assertNotNull(findObject(vcard, vVCARD.fn));
753 assertEquals("Joe Public", findObjectAsLiteral(vcard, vVCARD.fn));
754 assertNotNull(findObject(vcard, vVCARD.url));
755 String url = findObjectAsLiteral(vcard, vVCARD.url);
756 assertNotNull(findObject(vcard, vVCARD.email));
757 String mail = findObjectAsLiteral(vcard, vVCARD.email);
758 assertEquals("http://example.com/", url);
759 assertEquals("mailto:joe@example.com", mail);
760 }
761 } finally {
762 statements.close();
763 }
764
765
766 assertStatementsSize(RDF.TYPE, vVCARD.Organization, 4);
767 statements = getStatements(null, RDF.TYPE, vVCARD.Organization);
768 try {
769 while (statements.hasNext()) {
770 Resource org = statements.next().getSubject();
771 assertContains(null, vVCARD.org, org);
772 assertNotNull(findObject(org, vVCARD.organization_name));
773 assertEquals("Joe Public", findObjectAsLiteral(org, vVCARD.organization_name));
774 }
775 } finally {
776 statements.close();
777 }
778 }
779
780 @Test
781 public void testNotes() throws Exception {
782 final String[] NOTES = { "Note 1", "Note 3", "Note 4 with a ; and a , to be escaped" };
783
784 assertExtract("/microformats/hcard/34-notes.html");
785 assertModelNotEmpty();
786 RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
787 try {
788 while (statements.hasNext()) {
789 Resource vcard = statements.next().getSubject();
790 String fn = findObjectAsLiteral(vcard, vVCARD.fn);
791 String mail = findObjectAsLiteral(vcard, vVCARD.email);
792 assertEquals("Joe Public", fn);
793 assertEquals("mailto:joe@example.com", mail);
794 }
795 } finally {
796 statements.close();
797 }
798 for (String note : NOTES) {
799 assertContains(vVCARD.note, note);
800 }
801 }
802
803 @Test
804 public void testIncludePattern() throws Exception {
805 assertExtract("/microformats/hcard/35-include-pattern.html");
806 assertModelNotEmpty();
807 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 3);
808
809 RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.Name);
810 try {
811 while (statements.hasNext()) {
812 Resource name = statements.next().getSubject();
813 assertNotNull(findObject(name, vVCARD.given_name));
814 String gn = findObjectAsLiteral(name, vVCARD.given_name);
815 assertEquals("James", gn);
816 assertNotNull(findObject(name, vVCARD.family_name));
817 String fn = findObjectAsLiteral(name, vVCARD.family_name);
818 assertEquals("Levine", fn);
819 }
820 } finally {
821 statements.close();
822 }
823
824 assertStatementsSize(RDF.TYPE, vVCARD.Organization, 2);
825 statements = getStatements(null, RDF.TYPE, vVCARD.Organization);
826 try {
827 while (statements.hasNext()) {
828 Resource org = statements.next().getSubject();
829 assertNotNull(findObject(org, vVCARD.organization_name));
830 assertEquals("SimplyHired", findObjectAsLiteral(org, vVCARD.organization_name));
831
832 RepositoryResult<Statement> statements2 = getStatements(null, vVCARD.org, org);
833 try {
834 while (statements2.hasNext()) {
835 Resource vcard = statements2.next().getSubject();
836 assertNotNull(findObject(vcard, vVCARD.title));
837 assertEquals("Microformat Brainstormer", findObjectAsLiteral(vcard, vVCARD.title));
838 }
839 } finally {
840 statements2.close();
841 }
842 }
843 } finally {
844 statements.close();
845 }
846 }
847
848 @Test
849 public void testUid() throws Exception {
850 assertExtract("/microformats/hcard/38-uid.html");
851 assertModelNotEmpty();
852 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
853 RepositoryResult<Statement> iter = getStatements(null, RDF.TYPE, vVCARD.VCard);
854 while (iter.hasNext()) {
855 Resource vcard = iter.next().getSubject();
856 assertNotNull(findObject(vcard, vVCARD.fn));
857 String fn = findObjectAsLiteral(vcard, vVCARD.fn);
858 assertNotNull(findObject(vcard, vVCARD.url));
859 String url = findObjectAsLiteral(vcard, vVCARD.url);
860 assertNotNull(findObject(vcard, vVCARD.uid));
861 String uid = findObjectAsLiteral(vcard, vVCARD.uid);
862 assertEquals("Ryan King", fn);
863 assertEquals("http://theryanking.com/contact/", url);
864 assertEquals("http://theryanking.com/contact/", uid);
865
866 }
867 }
868
869 @Test
870 public void testIgnoreChildren() throws Exception {
871 assertExtract("/microformats/hcard/41-ignore-children.html");
872 assertModelNotEmpty();
873 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
874 assertContains(vVCARD.fn, "Melanie Kl\u00f6\u00df");
875 assertContains(vVCARD.email, RDFUtils.iri("mailto:mkloes@gmail.com"));
876 assertContains(vVCARD.adr, (Resource) null);
877 assertNotContains(null, vVCARD.postal_code, "53127");
878 assertNotContains(null, vVCARD.locality, "Bonn");
879 assertNotContains(null, vVCARD.street_address, "Ippendorfer Weg. 24");
880 assertNotContains(null, vVCARD.country_name, "Germany");
881 }
882
883
884
885
886
887
888
889 @Test
890 public void testCumulativeHNames() throws Exception {
891 assertExtract("/microformats/hcard/linkedin-michelemostarda.html");
892 assertModelNotEmpty();
893 assertStatementsSize(vVCARD.given_name, "Michele", 7);
894 assertStatementsSize(vVCARD.family_name, "Mostarda", 7);
895 }
896
897
898
899
900
901
902
903
904
905
906 @Test
907 public void testInfiniteLoop() throws IOException, ExtractionException {
908 assertExtract("/microformats/hcard/infinite-loop.html", false);
909 assertIssue(IssueReport.IssueLevel.WARNING, ".*Current node tries to include an ancestor node.*");
910 }
911
912
913
914
915
916 @Test(timeout = 30 * 1000)
917 public void testExtractionPerformance() {
918 assertExtract("/microformats/hcard/performance.html");
919 }
920
921 private void assertDefaultVCard() throws Exception {
922 assertModelNotEmpty();
923 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
924 }
925
926 private void assertJohn() throws Exception {
927 assertContains(vVCARD.fn, "John Doe");
928 assertContains(vVCARD.given_name, "John");
929 assertContains(vVCARD.family_name, "Doe");
930 }
931
932 }