This project has retired. For details please refer to its Attic page.
HCardExtractorTest xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotNull;
23  import static org.junit.Assert.assertTrue;
24  import org.apache.any23.extractor.ExtractionException;
25  import org.apache.any23.extractor.ExtractorFactory;
26  import org.apache.any23.extractor.IssueReport;
27  import org.apache.any23.rdf.RDFUtils;
28  import org.apache.any23.vocab.VCard;
29  import org.junit.Test;
30  import org.eclipse.rdf4j.model.Resource;
31  import org.eclipse.rdf4j.model.Statement;
32  import org.eclipse.rdf4j.model.Value;
33  import org.eclipse.rdf4j.model.vocabulary.RDF;
34  import org.eclipse.rdf4j.repository.RepositoryResult;
35  
36  import java.io.IOException;
37  import java.util.Arrays;
38  import java.util.List;
39  import java.util.Locale;
40  
41  /**
42   * {@link HCardExtractor} test case.
43   */
44  public class HCardExtractorTest extends AbstractExtractorTestCase {
45  
46      private static final VCard vVCARD = VCard.getInstance();
47  
48      protected ExtractorFactory<?> getExtractorFactory() {
49          return new HCardExtractorFactory();
50      }
51  
52      @Test
53      public void testNoNullPointers() {
54          // see https://issues.apache.org/jira/browse/ANY23-351
55          assertExtract("/microformats/hcard/null-pointer.html");
56          assertContains(vVCARD.logo, RDFUtils.iri(
57                  "http://cambridgewi.com/wp-content/uploads/connections-images/dean-bluhm/VillagePharmacy-e04951b21968ae4d9fd04cb14ce08ade.jpg"));
58          assertContains(vVCARD.email, RDFUtils.iri("mailto:bluhmrph@yahoo.com"));
59      }
60  
61      @Test
62      public void testEMailNotUriReal() throws Exception {
63          assertExtract("/microformats/hcard/17-email-not-uri.html");
64          assertDefaultVCard();
65          assertJohn();
66          assertContains(vVCARD.email, RDFUtils.iri("mailto:john@example.com"));
67      }
68  
69      @Test
70      public void testTel() throws Exception {
71          assertExtract("/microformats/hcard/21-tel.html");
72          assertDefaultVCard();
73          String[] tels = { "+1.415.555.1231", "+1.415.555.1235", "+1.415.555.1236", "+1.415.555.1237", "+1.415.555.1238",
74                  "+1.415.555.1239", "+1.415.555.1240", "+1.415.555.1241", "+1.415.555.1242", "+1.415.555.1243" };
75          for (String tel : tels) {
76              assertContains(vVCARD.tel, RDFUtils.iri("tel:" + tel));
77          }
78          Resource telResource = RDFUtils.iri("tel:+14155551233");
79          assertContains(vVCARD.fax, telResource);
80          assertContains(vVCARD.workTel, telResource);
81          assertContains(vVCARD.homeTel, telResource);
82          assertJohn();
83      }
84  
85      @Test
86      public void testAbbrTitleEverything() throws Exception {
87          assertExtract("/microformats/hcard/23-abbr-title-everything.html");
88          assertDefaultVCard();
89  
90          assertContains(vVCARD.fn, "John Doe");
91          assertContains(vVCARD.nickname, "JJ");
92  
93          assertContains(vVCARD.given_name, "Jonathan");
94          assertContains(vVCARD.additional_name, "John");
95          assertContains(vVCARD.family_name, "Doe-Smith");
96          assertContains(vVCARD.honorific_suffix, "Medical Doctor");
97  
98          assertContains(vVCARD.title, "President");
99          assertContains(vVCARD.role, "Chief");
100         assertContains(vVCARD.tz, "-0700");
101         assertContains(vVCARD.bday, "2006-04-04");
102         assertContains(vVCARD.tel, RDFUtils.iri("tel:415.555.1234"));
103         assertContains(vVCARD.uid, "abcdefghijklmnopqrstuvwxyz");
104         assertContains(vVCARD.class_, "public");
105         assertContains(vVCARD.note, "this is a note");
106         assertContains(vVCARD.organization_name, "Intellicorp");
107         assertContains(vVCARD.organization_unit, "Intelligence");
108 
109         // We define the property in this extractor _but_ we do not parse it.
110         assertContains(vVCARD.geo, (Resource) null);
111         // Thus we do not cointain these.
112         // The interaction is in @link RDFMergerTest.java
113         assertNotContains(RDF.TYPE, vVCARD.Location);
114         assertNotContains(null, vVCARD.latitude, "37.77");
115         assertNotContains(null, vVCARD.longitude, "-122.41");
116 
117         // see above
118         assertContains(vVCARD.adr, (Resource) null);
119         assertNotContains(RDF.TYPE, vVCARD.Address);
120         assertNotContains(null, vVCARD.post_office_box, "Box 1234");
121         assertNotContains(null, vVCARD.extended_address, "Suite 100");
122         assertNotContains(null, vVCARD.street_address, "123 Fake Street");
123         assertNotContains(null, vVCARD.locality, "San Francisco");
124         assertNotContains(null, vVCARD.region, "California");
125         assertNotContains(null, vVCARD.postal_code, "12345-6789");
126         assertNotContains(null, vVCARD.country_name, "United States of America");
127         assertNotContains(null, vVCARD.addressType, "work");
128     }
129 
130     @Test
131     public void testGeoAbbr() throws Exception {
132         assertExtract("/microformats/hcard/25-geo-abbr.html");
133         assertModelNotEmpty();
134         assertContains(vVCARD.fn, "Paradise");
135         assertContains(RDF.TYPE, vVCARD.Organization);
136         assertContains(vVCARD.organization_name, "Paradise");
137         // See above: geo property yes, gteo blank node no.
138         assertContains(vVCARD.geo, (Resource) null);
139         assertNotContains(RDF.TYPE, vVCARD.Location);
140         assertNotContains(null, vVCARD.latitude, "30.267991");
141         assertNotContains(null, vVCARD.longitude, "-97.739568");
142     }
143 
144     @Test
145     public void testAncestors() throws Exception {
146         assertExtract("/microformats/hcard/26-ancestors.html");
147         assertModelNotEmpty();
148 
149         assertContains(vVCARD.fn, "John Doe");
150         assertNotContains(null, vVCARD.fn, "Mister Jonathan John Doe-Smith Medical Doctor");
151         assertContains(vVCARD.nickname, "JJ");
152         assertNotContains(RDF.TYPE, vVCARD.Address);
153         assertContains(vVCARD.tz, "-0700");
154         assertContains(vVCARD.title, "President");
155         assertContains(vVCARD.role, "Chief");
156         assertContains(vVCARD.organization_name, "Intellicorp");
157         assertContains(vVCARD.organization_unit, "Intelligence");
158 
159         assertContains(vVCARD.tel, RDFUtils.iri("tel:415.555.1234"));
160         assertContains(vVCARD.uid, "abcdefghijklmnopqrstuvwxyz");
161         assertContains(vVCARD.note, "this is a note");
162         assertContains(vVCARD.class_, "public");
163 
164         assertNotContains(RDF.TYPE, vVCARD.Location);
165         assertContains(vVCARD.geo, (Resource) null);
166         assertNotContains(null, vVCARD.latitude, "37.77");
167         assertNotContains(null, vVCARD.longitude, "-122.41");
168 
169         assertContains(RDF.TYPE, vVCARD.Name);
170         assertContains(vVCARD.additional_name, "John");
171         assertContains(vVCARD.given_name, "Jonathan");
172         assertContains(vVCARD.family_name, "Doe-Smith");
173         assertContains(vVCARD.honorific_prefix, "Mister");
174         assertContains(vVCARD.honorific_suffix, "Medical Doctor");
175 
176         assertNotContains(null, vVCARD.post_office_box, "Box 1234");
177         assertNotContains(null, vVCARD.extended_address, "Suite 100");
178         assertNotContains(null, vVCARD.street_address, "123 Fake Street");
179         assertNotContains(null, vVCARD.locality, "San Francisco");
180         assertNotContains(null, vVCARD.region, "California");
181         assertNotContains(null, vVCARD.postal_code, "12345-6789");
182         assertNotContains(null, vVCARD.country_name, "United States of America");
183         assertNotContains(null, vVCARD.addressType, "work");
184     }
185 
186     @Test
187     public void testfnOrg() throws Exception {
188         assertExtract("/microformats/hcard/30-fn-org.html");
189         assertModelNotEmpty();
190         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
191         RepositoryResult<Statement> repositoryResult = getStatements(null, RDF.TYPE, vVCARD.VCard);
192         try {
193             while (repositoryResult.hasNext()) {
194                 Resource card = repositoryResult.next().getSubject();
195                 assertNotNull(findObject(card, vVCARD.fn));
196                 String name = findObjectAsLiteral(card, vVCARD.fn);
197 
198                 assertNotNull(findObject(card, vVCARD.org));
199                 Resource org = findObjectAsResource(card, vVCARD.org);
200                 assertNotNull(findObject(org, vVCARD.organization_name));
201 
202                 if (name.equals("Dan Connolly")) {
203                     assertNotNull(findObject(card, vVCARD.n));
204                     assertFalse(name.equals(org.stringValue()));
205                 }
206             }
207         } finally {
208             repositoryResult.close();
209         }
210     }
211 
212     @Test
213     public void testInclude() throws Exception {
214         assertExtract("/microformats/hcard/31-include.html");
215         assertModelNotEmpty();
216         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 3);
217         assertStatementsSize(vVCARD.email, (Value) null, 3);
218 
219         RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
220         try {
221             while (statements.hasNext()) {
222                 Resource vcard = statements.next().getSubject();
223 
224                 assertNotNull(findObject(vcard, vVCARD.fn));
225                 assertEquals("Brian Suda", findObjectAsLiteral(vcard, vVCARD.fn));
226 
227                 assertNotNull(findObject(vcard, vVCARD.url));
228                 String url = findObjectAsResource(vcard, vVCARD.url).stringValue();
229                 assertEquals("http://suda.co.uk/", url);
230 
231                 Resource name = findObjectAsResource(vcard, vVCARD.n);
232                 assertEquals("Brian", findObjectAsLiteral(name, vVCARD.given_name));
233                 assertEquals("Suda", findObjectAsLiteral(name, vVCARD.family_name));
234 
235                 // Included data.
236                 assertNotNull(findObject(vcard, vVCARD.email));
237                 String mail = findObjectAsLiteral(vcard, vVCARD.email);
238                 assertEquals("mailto:correct@example.com", mail);
239             }
240         } finally {
241             statements.close();
242         }
243     }
244 
245     @Test
246     public void testHeader() throws Exception {
247         assertExtract("/microformats/hcard/32-header.html");
248         assertModelNotEmpty();
249         // check fn, name, family, nick.
250         assertJohn();
251 
252         RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
253         try {
254             Resource example = RDFUtils.iri("http://example.org/");
255             while (statements.hasNext()) {
256                 Resource card = statements.next().getSubject();
257                 assertNotNull(findObject(card, vVCARD.fn));
258 
259                 String fn = findObjectAsLiteral(card, vVCARD.fn);
260                 if ("Jane Doe".equals(fn)) {
261                     assertNotFound(card, vVCARD.org);
262                 } else {
263                     assertTrue("John Doe".equals(fn) || "Brian Suda".equals(fn));
264 
265                     assertNotNull(findObject(card, vVCARD.url));
266                     assertEquals(example, findObjectAsResource(card, vVCARD.url));
267 
268                     assertNotNull(findObject(card, vVCARD.org));
269                     Resource org = findObjectAsResource(card, vVCARD.org);
270                     assertContains(org, RDF.TYPE, vVCARD.Organization);
271                     assertNotNull(org);
272                     assertNotNull(findObject(card, vVCARD.org));
273                     assertNotNull(findObject(org, vVCARD.organization_name));
274                     assertEquals("example.org", findObjectAsLiteral(org, vVCARD.organization_name));
275                 }
276             }
277             // Just to be sure there are no spurious statements.
278             // assertStatementsSize(VCARD.org, null, 2);
279             assertStatementsSize(vVCARD.url, example, 2);
280         } finally {
281             statements.close();
282         }
283     }
284 
285     @Test
286     public void testAreaFull() throws Exception {
287         assertExtract("/microformats/hcard/33-area.html");
288         assertModelNotEmpty();
289         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 5);
290 
291         RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
292         while (statements.hasNext()) {
293             Resource vcard = statements.next().getSubject();
294             final Value fnValue = findObject(vcard, vVCARD.fn);
295             assertNotNull(fnValue);
296             String fn = fnValue.stringValue();
297             final Value vcardValue = findObject(vcard, vVCARD.url);
298             assertNotNull(vcardValue);
299             String url = vcardValue.stringValue();
300             final Value emailValue = findObject(vcard, vVCARD.email);
301             assertNotNull(emailValue);
302             String mail = emailValue.stringValue();
303             assertEquals("Joe Public", fn);
304             assertEquals("http://example.com/", url);
305             assertEquals("mailto:joe@example.com", mail);
306         }
307     }
308 
309     @Test
310     public void testCategories() throws Exception {
311         assertExtract("/microformats/hcard/36-categories.html");
312         assertModelNotEmpty();
313         assertContains(vVCARD.given_name, "Joe");
314         assertContains(vVCARD.given_name, "john");
315         assertContains(vVCARD.family_name, "doe");
316         assertContains(vVCARD.family_name, "User");
317         assertContains(vVCARD.fn, "john doe");
318         assertContains(vVCARD.fn, "Joe User");
319 
320         assertContains(vVCARD.category, "C1");
321         assertContains(vVCARD.category, "C2a");
322         assertContains(vVCARD.category, "C4");
323         assertContains(vVCARD.category, "User");
324         String[] cats = { "C3", "C5", "C6", "C7", "C9", "luser", "D1", "D2", "D3" };
325         for (String cat : cats)
326             assertContains(vVCARD.category, "http://example.com/tag/" + cat);
327 
328         assertNotContains(null, vVCARD.category, "D4");
329     }
330 
331     @Test
332     public void testSingleton() throws Exception {
333         // this tests probably tests that e just get the first fn and so on
334         assertExtract("/microformats/hcard/37-singleton.html");
335         assertModelNotEmpty();
336         assertStatementsSize(vVCARD.fn, (Value) null, 1);
337         assertContains(vVCARD.fn, "john doe 1");
338 
339         assertStatementsSize(RDF.TYPE, vVCARD.Name, 1);
340         assertStatementsSize(vVCARD.given_name, (Value) null, 1);
341         assertContains(vVCARD.given_name, "john");
342         assertStatementsSize(vVCARD.family_name, (Value) null, 1);
343         assertContains(vVCARD.family_name, "doe");
344         assertStatementsSize(vVCARD.sort_string, (Value) null, 1);
345         assertContains(vVCARD.sort_string, "d");
346 
347         assertStatementsSize(vVCARD.bday, (Value) null, 1);
348         assertContains(vVCARD.bday, "20060707");
349         assertStatementsSize(vVCARD.rev, (Value) null, 1);
350         assertContains(vVCARD.rev, "20060707");
351         assertStatementsSize(vVCARD.class_, (Value) null, 1);
352         assertContains(vVCARD.class_, "public");
353         assertStatementsSize(vVCARD.tz, (Value) null, 1);
354         assertContains(vVCARD.tz, "+0600");
355 
356         // Why 0? because the extractor does not look at geo uF!
357         assertStatementsSize(RDF.TYPE, vVCARD.Location, 0);
358         assertStatementsSize(vVCARD.geo, (Value) null, 2);
359 
360         assertNotContains(null, vVCARD.latitude, "123.45");
361         assertNotContains(null, vVCARD.longitude, "67.89");
362 
363         assertStatementsSize(vVCARD.uid, (Value) null, 1);
364         assertContains(vVCARD.uid, "unique-id-1");
365     }
366 
367     @Test
368     public void testUidFull() throws Exception {
369         assertExtract("/microformats/hcard/38-uid.html");
370         assertModelNotEmpty();
371         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
372         RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
373 
374         try {
375             while (statements.hasNext()) {
376                 Resource vcard = statements.next().getSubject();
377                 assertNotNull(findObject(vcard, vVCARD.fn));
378                 String fn = findObjectAsLiteral(vcard, vVCARD.fn);
379                 assertEquals("Ryan King", fn);
380 
381                 assertNotNull(findObject(vcard, vVCARD.n));
382                 Resource n = findObjectAsResource(vcard, vVCARD.n);
383                 assertNotNull(n);
384                 assertNotNull(findObject(n, vVCARD.given_name));
385                 assertEquals("Ryan", findObjectAsLiteral(n, vVCARD.given_name));
386                 assertNotNull(findObject(n, vVCARD.family_name));
387                 assertEquals("King", findObjectAsLiteral(n, vVCARD.family_name));
388 
389                 assertNotNull(findObject(vcard, vVCARD.url));
390                 Resource url = findObjectAsResource(vcard, vVCARD.url);
391 
392                 assertNotNull(findObject(vcard, vVCARD.uid));
393                 String uid = findObjectAsLiteral(vcard, vVCARD.uid);
394 
395                 assertEquals("http://theryanking.com/contact/", url.stringValue());
396                 assertEquals("http://theryanking.com/contact/", uid);
397             }
398         } finally {
399             statements.close();
400         }
401     }
402 
403     @Test
404     public void testRomanianWikipedia() throws Exception {
405         assertExtract("/microformats/hcard/40-fn-inside-adr.html");
406         assertModelNotEmpty();
407         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
408         RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
409 
410         try {
411             while (statements.hasNext()) {
412                 Resource card = statements.next().getSubject();
413                 assertNotNull(findObject(card, vVCARD.fn));
414                 String fn = findObjectAsLiteral(card, vVCARD.fn);
415                 assertEquals("Berlin", fn);
416 
417                 assertNotNull(findObject(card, vVCARD.org));
418                 Resource org = findObjectAsResource(card, vVCARD.org);
419                 assertContains(org, RDF.TYPE, vVCARD.Organization);
420                 assertNotNull(org);
421                 assertNotNull(findObject(card, vVCARD.org));
422                 assertNotNull(findObject(org, vVCARD.organization_name));
423                 assertEquals("Berlin", findObjectAsLiteral(org, vVCARD.organization_name));
424 
425             }
426         } finally {
427             statements.close();
428         }
429     }
430 
431     @Test
432     public void testNoMicroformats() throws Exception, IOException, ExtractionException {
433         extract("/html/html-without-uf.html");
434         assertModelEmpty();
435     }
436 
437     @Test
438     public void testBasic() throws Exception {
439         assertExtract("/microformats/hcard/01-tantek-basic.html");
440         assertModelNotEmpty();
441         assertContains(RDF.TYPE, vVCARD.VCard);
442         // assertContains(RDF.TYPE, vVCARD.Organization);
443         assertContains(RDF.TYPE, vVCARD.Name);
444         // assertContains(vVCARD.organization_name, "Technorati");
445         Resource person = findExactlyOneBlankSubject(vVCARD.fn, RDFUtils.literal("Tantek Celik"));
446         assertNotNull(person);
447         Resource org = findExactlyOneBlankSubject(vVCARD.organization_name, RDFUtils.literal("Technorati"));
448         assertNotNull(org);
449         assertContains(person, vVCARD.url, RDFUtils.iri("http://tantek.com/"));
450         assertContains(person, vVCARD.n, (Resource) null);
451         assertContains(person, vVCARD.org, (Resource) null);
452     }
453 
454     @Test
455     public void testMultipleclassNamesOnVCard() throws Exception {
456         assertExtract("/microformats/hcard/02-multiple-class-names-on-vcard.html");
457         assertModelNotEmpty();
458         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
459         Resource name;
460         RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
461         while (statements.hasNext()) {
462             name = statements.next().getSubject();
463             assertContains(name, vVCARD.fn, "Ryan King");
464         }
465     }
466 
467     @Test
468     public void testImpliedNames() throws Exception {
469         String[] ns = { "Ryan King", "King", "Ryan",
470 
471                 "Ryan King", "King", "Ryan",
472 
473                 "Ryan King", "King", "Ryan",
474 
475                 "Brian Suda", "Suda", "Brian",
476 
477                 "King, Ryan", "King", "Ryan",
478 
479                 "King, R", "King", "R",
480 
481                 "King R", "R", "King",
482 
483                 "R King", "King", "R",
484 
485                 "King R.", "R.", "King",
486 
487                 "Jesse James Garrett", "Garrett", "Jesse",
488 
489                 "Thomas Vander Wall", "Wall", "Thomas" };
490         List<String> NAMES = Arrays.asList(ns);
491         assertExtract("/microformats/hcard/03-implied-n.html");
492         assertModelNotEmpty();
493 
494         RepositoryResult<Statement> statements = getStatements(null, vVCARD.fn, null);
495         Resource vcard;
496         int count = 0;
497         try {
498             while (statements.hasNext()) {
499                 vcard = statements.next().getSubject();
500                 assertContains(vcard, RDF.TYPE, vVCARD.VCard);
501                 Resource name = findObjectAsResource(vcard, vVCARD.n);
502 
503                 final String objLiteral = findObjectAsLiteral(vcard, vVCARD.fn);
504                 int idx = NAMES.indexOf(objLiteral);
505                 assertTrue(String.format(Locale.ROOT, "not in names: '%s'", objLiteral), idx >= 0);
506                 assertEquals(NAMES.get(idx + 1), findObjectAsLiteral(name, vVCARD.family_name));
507                 assertEquals(NAMES.get(idx + 2), findObjectAsLiteral(name, vVCARD.given_name));
508                 count++;
509             }
510         } finally {
511             statements.close();
512         }
513         assertEquals(10, count);
514     }
515 
516     @Test
517     public void testIgnoreUnknowns() throws Exception {
518         assertExtract("/microformats/hcard/04-ignore-unknowns.html");
519         assertDefaultVCard();
520         assertContains(vVCARD.fn, "Ryan King");
521         assertContains(vVCARD.n, (Resource) null);
522         assertContains(null, "Ryan");
523         assertContains(vVCARD.given_name, "Ryan");
524         assertContains(vVCARD.family_name, "King");
525     }
526 
527     @Test
528     public void testMailto1() throws Exception {
529         assertExtract("/microformats/hcard/05-mailto-1.html");
530         assertDefaultVCard();
531         assertContains(vVCARD.fn, "Ryan King");
532         assertContains(RDF.TYPE, vVCARD.Name);
533 
534         assertContains(vVCARD.email, RDFUtils.iri("mailto:ryan@technorati.com"));
535 
536         assertContains(vVCARD.given_name, "Ryan");
537         assertContains(vVCARD.family_name, "King");
538     }
539 
540     @Test
541     public void testMailto2() throws Exception {
542         assertExtract("/microformats/hcard/06-mailto-2.html");
543         assertDefaultVCard();
544         assertContains(vVCARD.fn, "Brian Suda");
545 
546         assertContains(vVCARD.email, RDFUtils.iri("mailto:brian@example.com"));
547         assertContains(vVCARD.given_name, "Brian");
548         assertContains(vVCARD.family_name, "Suda");
549     }
550 
551     @Test
552     public void testRelativeUrl() throws Exception {
553         assertExtract("/microformats/hcard/07-relative-url.html");
554         assertDefaultVCard();
555         assertJohn();
556         assertContains(vVCARD.url, RDFUtils.iri(baseIRI + "home/blah"));
557     }
558 
559     @Test
560     public void testRelativeUrlBase() throws Exception {
561         assertExtract("/microformats/hcard/08-relative-url-base.html");
562         assertDefaultVCard();
563         assertContains(vVCARD.url, RDFUtils.iri(baseIRI + "home/blah"));
564         assertJohn();
565     }
566 
567     @Test
568     public void testRelativeUrlXmlBase1() throws Exception {
569         assertExtract("/microformats/hcard/09-relative-url-xmlbase-1.html");
570         assertDefaultVCard();
571         assertContains(vVCARD.url, RDFUtils.iri((baseIRI + "home/blah")));
572         assertJohn();
573     }
574 
575     @Test
576     public void testRelativeUrlXmlBase2() throws Exception {
577         assertExtract("/microformats/hcard/10-relative-url-xmlbase-2.html");
578         assertDefaultVCard();
579         assertContains(vVCARD.url, RDFUtils.iri((baseIRI + "home/blah")));
580         assertJohn();
581     }
582 
583     @Test
584     public void testMultipleUrls() throws Exception {
585         assertExtract("/microformats/hcard/11-multiple-urls.html");
586         assertDefaultVCard();
587         assertContains(vVCARD.url, RDFUtils.iri(("http://example.com/foo")));
588         assertContains(vVCARD.url, RDFUtils.iri(("http://example.com/bar")));
589 
590         assertJohn();
591     }
592 
593     @Test
594     public void testImageSrc() throws Exception {
595         assertExtract("/microformats/hcard/12-img-src-url.html");
596         assertDefaultVCard();
597         assertJohn();
598     }
599 
600     @Test
601     public void testPhotoLogo() throws Exception {
602         assertExtract("/microformats/hcard/13-photo-logo.html");
603         assertDefaultVCard();
604         assertContains(vVCARD.photo, RDFUtils.iri(("http://example.org/picture1.png")));
605         assertContains(vVCARD.photo, RDFUtils.iri(("http://example.org/picture2.png")));
606         assertContains(vVCARD.logo, RDFUtils.iri(("http://example.org/picture1.png")));
607         assertContains(vVCARD.logo, RDFUtils.iri(("http://example.org/picture2.png")));
608         assertJohn();
609     }
610 
611     @Test
612     public void testImgSrcDataUrl() throws Exception {
613         assertExtract("/microformats/hcard/14-img-src-data-url.html");
614         assertDefaultVCard();
615         Resource data = RDFUtils.iri("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADIAAAAyCAMAAAAp4XiDAAAABGdBTUEAAK/"
616                 + "INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAASUExURf///8zMzJmZmWZmZ"
617                 + "jMzMwAAAPOPemkAAAM1SURBVHjaYmBgYGBkYQUBFkYWFiCPCchixQAMCCZAACF0MAMVM4K4TFh0IGsBCC"
618                 + "AkOxhYmBnAAKaHhZkZmxaAAGJgYIbpYGBihGgBWsTMzMwE4jIhaWGAYoAAYmCECDExYAcwGxkg5oNIgAB"
619                 + "igDqLARdgZmGB2wICrKwAAcSA3xKgIxlZ0PwCEEAMBCxhgHoWSQtAADFAAxgfYEJ1GEAAQbQw4tUCsocB"
620                 + "YQVAADEgu4uRkREeUCwszEwwLhOKLQABhNDCBA4aSDgwwhIAJKqYUPwCEEAMUK/AUwnc9aywJMCI7DAgA"
621                 + "AggBohZ8JTBhGIJzCoWZL8ABBCYidAB8RUjWppkYUG2BSCAGMDqEMZiswUtXgACiAHsFYixTMywGGLGpgU"
622                 + "WYgABxAA2mQkWCMyMqFoYmdD8ACQAAogBHJHMrCxg1cyIiICmCkYWDFsAAgiihYmZCewFFpR0BfI3LLch+"
623                 + "QUggBiQ0iQjEyMDmh54qCBlUIAAYsCRJsElADQvgWKTlRGeKwECiAF3XgGmMEYQYADZzcoA9z5AAMG9RQC"
624                 + "AtEC9DxBADFiyFyMjVi0wABBAWLQwQdIiuhYGWJIACCBg+KKUJ9BoBRdS2LQALQMIIGDQIEmwAO1kYcVWH"
625                 + "CDZAhBAqFqYmOAxj2YNtAwDAYAAYmDEiBYWzHKKkRERYiwAAYSphZEZwxZGZiZQVEJTJkAAMTCyokc7M5o"
626                 + "ORlC5wcoEjxeAAAJqQXU0UB6W5WFmABMtEzMi1wEEEFAbE0YyAUuzMMEsYQalMkQSBQggUDmNPU3C9IA4L"
627                 + "CxI+QUggEBiKOU8yExgqccCL3chnkPKlQABhGo6ejHBDKmdUHMlQAAhhQvQaGZGkBIkjcAMywLmI+VKgAB"
628                 + "CSowsTJhZkhlWXiBpAQggYBqBZl9GVOdBcz0LZqEEEEAMqLULMBLg1THWog9IAwQQA0qiZcRW5aPbAhBAD"
629                 + "Cg1El4tMAAQQAxoiZYZXnTh1AIQQAzo2QlYpDDjcBgrxGEAAcSAJTthswmiBUwDBBC2GpkZJTaRvQ+mAQK"
630                 + "IAUuuxdZWQvILQABBmSxMjBj5EpcWgACCMoFOYYSpZyHQHgMIMACt2hmoVEikCQAAAABJRU5ErkJggg==");
631 
632         assertContains(vVCARD.photo, data);
633         assertContains(vVCARD.logo, data);
634         assertJohn();
635     }
636 
637     @Test
638     public void testHonorificAdditionalSingle() throws Exception {
639         assertExtract("/microformats/hcard/15-honorific-additional-single.html");
640         assertDefaultVCard();
641         assertContains(vVCARD.fn, "Mr. John Maurice Doe, Ph.D.");
642 
643         assertContains(vVCARD.honorific_prefix, "Mr.");
644         assertContains(vVCARD.honorific_suffix, "Ph.D.");
645 
646         assertContains(vVCARD.given_name, "John");
647         assertContains(vVCARD.additional_name, "Maurice");
648         assertContains(vVCARD.family_name, "Doe");
649     }
650 
651     @Test
652     public void testHonorificAdditionalMultiple() throws Exception {
653         assertExtract("/microformats/hcard/16-honorific-additional-multiple.html");
654         assertDefaultVCard();
655         assertContains(vVCARD.honorific_prefix, "Mr.");
656         assertContains(vVCARD.honorific_prefix, "Dr.");
657 
658         assertContains(vVCARD.honorific_suffix, "Ph.D.");
659         assertContains(vVCARD.honorific_suffix, "J.D.");
660 
661         assertContains(vVCARD.given_name, "John");
662         assertContains(vVCARD.additional_name, "Maurice");
663         assertContains(vVCARD.additional_name, "Benjamin");
664         assertContains(vVCARD.family_name, "Doe");
665 
666         assertContains(vVCARD.fn, "Mr. Dr. John Maurice Benjamin Doe Ph.D., J.D.");
667     }
668 
669     @Test
670     public void testEMailNotUri() throws Exception {
671         assertExtract("/microformats/hcard/17-email-not-uri.html");
672         assertDefaultVCard();
673         assertJohn();
674         assertContains(vVCARD.email, RDFUtils.iri("mailto:john@example.com"));
675     }
676 
677     @Test
678     public void testObjectDataHttpUri() throws Exception {
679         assertExtract("/microformats/hcard/18-object-data-http-uri.html");
680         assertDefaultVCard();
681         assertJohn();
682     }
683 
684     @Test
685     public void testObjectDataDataUri() throws Exception {
686         assertExtract("/microformats/hcard/19-object-data-data-uri.html");
687         assertDefaultVCard();
688         assertJohn();
689 
690         assertContains(vVCARD.photo, (Resource) null);
691         assertContains(vVCARD.logo, (Resource) null);
692     }
693 
694     @Test
695     public void testImgAlt() throws Exception {
696         assertExtract("/microformats/hcard/20-image-alt.html");
697         assertDefaultVCard();
698         Resource uri = RDFUtils.iri("http://example.com/foo.png");
699         assertContains(vVCARD.photo, uri);
700         assertContains(vVCARD.logo, uri);
701         assertJohn();
702     }
703 
704     @Test
705     public void testAdr() throws Exception {
706         assertExtract("/microformats/hcard/22-adr.html");
707         assertDefaultVCard();
708         assertJohn();
709         assertStatementsSize(RDF.TYPE, vVCARD.Address, 0);
710     }
711 
712     @Test
713     public void testBirthDayDate() throws Exception {
714         assertExtract("/microformats/hcard/27-bday-date.html");
715         assertModelNotEmpty();
716         assertContains(vVCARD.fn, "john doe");
717         assertContains(vVCARD.given_name, "john");
718         assertContains(vVCARD.family_name, "doe");
719         assertContains(vVCARD.bday, "2000-01-01");
720     }
721 
722     @Test
723     public void testBirthDayDateTime() throws Exception {
724         assertExtract("/microformats/hcard/28-bday-datetime.html");
725         assertModelNotEmpty();
726         assertContains(vVCARD.fn, "john doe");
727         assertContains(vVCARD.given_name, "john");
728         assertContains(vVCARD.family_name, "doe");
729         assertContains(vVCARD.bday, "2000-01-01T00:00:00");
730     }
731 
732     @Test
733     public void testBirthDayDateTimeTimeZone() throws Exception {
734         assertExtract("/microformats/hcard/29-bday-datetime-timezone.html");
735         assertModelNotEmpty();
736         assertContains(vVCARD.fn, "john doe");
737         assertContains(vVCARD.given_name, "john");
738         assertContains(vVCARD.family_name, "doe");
739         assertContains(vVCARD.bday, "2000-01-01T00:00:00-0800");
740     }
741 
742     @Test
743     public void testArea() throws Exception {
744         assertExtract("/microformats/hcard/33-area.html");
745         assertModelNotEmpty();
746         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 5);
747         RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
748         try {
749             while (statements.hasNext()) {
750                 Resource vcard = statements.next().getSubject();
751 
752                 assertNotNull(findObject(vcard, vVCARD.fn));
753                 assertEquals("Joe Public", findObjectAsLiteral(vcard, vVCARD.fn));
754                 assertNotNull(findObject(vcard, vVCARD.url));
755                 String url = findObjectAsLiteral(vcard, vVCARD.url);
756                 assertNotNull(findObject(vcard, vVCARD.email));
757                 String mail = findObjectAsLiteral(vcard, vVCARD.email);
758                 assertEquals("http://example.com/", url);
759                 assertEquals("mailto:joe@example.com", mail);
760             }
761         } finally {
762             statements.close();
763         }
764 
765         // Check that there are 4 organizations.
766         assertStatementsSize(RDF.TYPE, vVCARD.Organization, 4);
767         statements = getStatements(null, RDF.TYPE, vVCARD.Organization);
768         try {
769             while (statements.hasNext()) {
770                 Resource org = statements.next().getSubject();
771                 assertContains(null, vVCARD.org, org);
772                 assertNotNull(findObject(org, vVCARD.organization_name));
773                 assertEquals("Joe Public", findObjectAsLiteral(org, vVCARD.organization_name));
774             }
775         } finally {
776             statements.close();
777         }
778     }
779 
780     @Test
781     public void testNotes() throws Exception {
782         final String[] NOTES = { "Note 1", "Note 3", "Note 4 with a ; and a , to be escaped" };
783 
784         assertExtract("/microformats/hcard/34-notes.html");
785         assertModelNotEmpty();
786         RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.VCard);
787         try {
788             while (statements.hasNext()) {
789                 Resource vcard = statements.next().getSubject();
790                 String fn = findObjectAsLiteral(vcard, vVCARD.fn);
791                 String mail = findObjectAsLiteral(vcard, vVCARD.email);
792                 assertEquals("Joe Public", fn);
793                 assertEquals("mailto:joe@example.com", mail);
794             }
795         } finally {
796             statements.close();
797         }
798         for (String note : NOTES) {
799             assertContains(vVCARD.note, note);
800         }
801     }
802 
803     @Test
804     public void testIncludePattern() throws Exception {
805         assertExtract("/microformats/hcard/35-include-pattern.html");
806         assertModelNotEmpty();
807         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 3);
808 
809         RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE, vVCARD.Name);
810         try {
811             while (statements.hasNext()) {
812                 Resource name = statements.next().getSubject();
813                 assertNotNull(findObject(name, vVCARD.given_name));
814                 String gn = findObjectAsLiteral(name, vVCARD.given_name);
815                 assertEquals("James", gn);
816                 assertNotNull(findObject(name, vVCARD.family_name));
817                 String fn = findObjectAsLiteral(name, vVCARD.family_name);
818                 assertEquals("Levine", fn);
819             }
820         } finally {
821             statements.close();
822         }
823 
824         assertStatementsSize(RDF.TYPE, vVCARD.Organization, 2);
825         statements = getStatements(null, RDF.TYPE, vVCARD.Organization);
826         try {
827             while (statements.hasNext()) {
828                 Resource org = statements.next().getSubject();
829                 assertNotNull(findObject(org, vVCARD.organization_name));
830                 assertEquals("SimplyHired", findObjectAsLiteral(org, vVCARD.organization_name));
831 
832                 RepositoryResult<Statement> statements2 = getStatements(null, vVCARD.org, org);
833                 try {
834                     while (statements2.hasNext()) {
835                         Resource vcard = statements2.next().getSubject();
836                         assertNotNull(findObject(vcard, vVCARD.title));
837                         assertEquals("Microformat Brainstormer", findObjectAsLiteral(vcard, vVCARD.title));
838                     }
839                 } finally {
840                     statements2.close();
841                 }
842             }
843         } finally {
844             statements.close();
845         }
846     }
847 
848     @Test
849     public void testUid() throws Exception {
850         assertExtract("/microformats/hcard/38-uid.html");
851         assertModelNotEmpty();
852         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
853         RepositoryResult<Statement> iter = getStatements(null, RDF.TYPE, vVCARD.VCard);
854         while (iter.hasNext()) {
855             Resource vcard = iter.next().getSubject();
856             assertNotNull(findObject(vcard, vVCARD.fn));
857             String fn = findObjectAsLiteral(vcard, vVCARD.fn);
858             assertNotNull(findObject(vcard, vVCARD.url));
859             String url = findObjectAsLiteral(vcard, vVCARD.url);
860             assertNotNull(findObject(vcard, vVCARD.uid));
861             String uid = findObjectAsLiteral(vcard, vVCARD.uid);
862             assertEquals("Ryan King", fn);
863             assertEquals("http://theryanking.com/contact/", url);
864             assertEquals("http://theryanking.com/contact/", uid);
865 
866         }
867     }
868 
869     @Test
870     public void testIgnoreChildren() throws Exception {
871         assertExtract("/microformats/hcard/41-ignore-children.html");
872         assertModelNotEmpty();
873         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
874         assertContains(vVCARD.fn, "Melanie Kl\u00f6\u00df");
875         assertContains(vVCARD.email, RDFUtils.iri("mailto:mkloes@gmail.com"));
876         assertContains(vVCARD.adr, (Resource) null);
877         assertNotContains(null, vVCARD.postal_code, "53127");
878         assertNotContains(null, vVCARD.locality, "Bonn");
879         assertNotContains(null, vVCARD.street_address, "Ippendorfer Weg. 24");
880         assertNotContains(null, vVCARD.country_name, "Germany");
881     }
882 
883     /**
884      * Tests that the HCardName data is not cumulative and is cleaned up at each extraction.
885      *
886      * @throws Exception
887      *             if there is an error asserting the test data.
888      */
889     @Test
890     public void testCumulativeHNames() throws Exception {
891         assertExtract("/microformats/hcard/linkedin-michelemostarda.html");
892         assertModelNotEmpty();
893         assertStatementsSize(vVCARD.given_name, "Michele", 7);
894         assertStatementsSize(vVCARD.family_name, "Mostarda", 7);
895     }
896 
897     /**
898      * Tests the detection and prevention of the inclusion of an ancestor by a sibling node. This test is related to
899      * issue <a href="https://issues.apache.org/jira/browse/ANY23-58">ANY23-58</a>.
900      *
901      * @throws IOException
902      *             if there is an error interpreting the input data
903      * @throws ExtractionException
904      *             if there is an exception during extraction
905      */
906     @Test
907     public void testInfiniteLoop() throws IOException, ExtractionException {
908         assertExtract("/microformats/hcard/infinite-loop.html", false);
909         assertIssue(IssueReport.IssueLevel.WARNING, ".*Current node tries to include an ancestor node.*");
910     }
911 
912     /**
913      * Tests extractor performances. This test is related to issue
914      * <a href="https://issues.apache.org/jira/browse/ANY23-76">ANY23-76</a>.
915      */
916     @Test(timeout = 30 * 1000)
917     public void testExtractionPerformance() {
918         assertExtract("/microformats/hcard/performance.html");
919     }
920 
921     private void assertDefaultVCard() throws Exception {
922         assertModelNotEmpty();
923         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
924     }
925 
926     private void assertJohn() throws Exception {
927         assertContains(vVCARD.fn, "John Doe");
928         assertContains(vVCARD.given_name, "John");
929         assertContains(vVCARD.family_name, "Doe");
930     }
931 
932 }