This project has retired. For details please refer to its
Attic page.
HResumeExtractorTest xref
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import org.apache.any23.extractor.ExtractorFactory;
21 import org.apache.any23.vocab.DOAC;
22 import org.apache.any23.vocab.FOAF;
23 import org.apache.any23.vocab.SINDICE;
24 import org.apache.any23.vocab.VCard;
25 import org.junit.Assert;
26 import org.junit.Test;
27 import org.eclipse.rdf4j.model.Resource;
28 import org.eclipse.rdf4j.model.Statement;
29 import org.eclipse.rdf4j.model.Value;
30 import org.eclipse.rdf4j.model.vocabulary.RDF;
31 import org.eclipse.rdf4j.repository.RepositoryResult;
32 import org.slf4j.Logger;
33 import org.slf4j.LoggerFactory;
34
35 import java.util.HashSet;
36 import java.util.Set;
37
38
39
40
41
42
43 public class HResumeExtractorTest extends AbstractExtractorTestCase {
44
45 private static final SINDICE vSINDICE = SINDICE.getInstance();
46 private static final FOAF vFOAF = FOAF.getInstance();
47 private static final DOAC vDOAC = DOAC.getInstance();
48 private static final VCard vVCARD = VCard.getInstance();
49
50 private static final Logger logger = LoggerFactory.getLogger(HReviewExtractorTest.class);
51
52 protected ExtractorFactory<?> getExtractorFactory() {
53 return new HResumeExtractorFactory();
54 }
55
56 @Test
57 public void testNoMicroformats() throws Exception {
58 assertExtract("/html/html-without-uf.html");
59 assertModelEmpty();
60 }
61
62 @Test
63 public void testLinkedIn() throws Exception {
64 assertExtract("/microformats/hresume/steveganz.html");
65 assertModelNotEmpty();
66 assertStatementsSize(RDF.TYPE, vFOAF.Person, 1);
67
68 Resource person = findExactlyOneBlankSubject(RDF.TYPE, vFOAF.Person);
69
70 assertContains(person, vDOAC.summary, (Resource) null);
71
72 assertContains(person, vDOAC.summary,
73 "Steve Ganz is passionate about connecting people,\n"
74 + "semantic markup, sushi, and disc golf - not necessarily in that order.\n"
75 + "Currently obsessed with developing the user experience at LinkedIn,\n"
76 + "Steve is a second generation Silicon Valley geek and a veteran web\n"
77 + "professional who has been building human-computer interfaces since 1994.");
78
79 assertContains(person, vFOAF.isPrimaryTopicOf, (Resource) null);
80
81 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0);
82
83 assertStatementsSize(vDOAC.experience, (Value) null, 7);
84 assertStatementsSize(vDOAC.education, (Value) null, 2);
85 assertStatementsSize(vDOAC.affiliation, (Value) null, 8);
86 }
87
88 @Test
89 public void testLinkedInComplete() throws Exception {
90
91 assertExtract("/microformats/hresume/steveganz.html");
92 assertModelNotEmpty();
93
94 assertStatementsSize(RDF.TYPE, vFOAF.Person, 1);
95
96 assertStatementsSize(vDOAC.experience, (Value) null, 7);
97 assertStatementsSize(vDOAC.education, (Value) null, 2);
98 assertStatementsSize(vDOAC.affiliation, (Value) null, 8);
99 assertStatementsSize(vDOAC.skill, (Value) null, 17);
100
101 RepositoryResult<Statement> statements = getStatements(null, vDOAC.organization, null);
102
103 Set<String> checkSet = new HashSet<String>();
104
105 try {
106 while (statements.hasNext()) {
107 Statement statement = statements.next();
108 checkSet.add(statement.getObject().stringValue());
109 logger.debug(statement.getObject().stringValue());
110 }
111
112 } finally {
113 statements.close();
114 }
115
116 String[] names = new String[] { "LinkedIn Corporation", "PayPal, an eBay Company", "McAfee, Inc.",
117 "Printable Technologies", "Collabria, Inc.", "Self-employed", "3G Productions",
118 "Lee Strasberg Theatre and Film\n" + "\tInstitute", "Leland High School" };
119
120 for (String name : names)
121 Assert.assertTrue(checkSet.contains(name));
122
123 Resource person = findExactlyOneBlankSubject(RDF.TYPE, vFOAF.Person);
124 assertContains(person, vFOAF.isPrimaryTopicOf, (Value) null);
125 findExactlyOneObject(person, vFOAF.isPrimaryTopicOf);
126 }
127
128 @Test
129 public void testAnt() throws Exception {
130 assertExtract("/microformats/hresume/ant.html");
131 assertModelNotEmpty();
132
133 assertStatementsSize(RDF.TYPE, vFOAF.Person, 1);
134
135 Resource person = findExactlyOneBlankSubject(RDF.TYPE, vFOAF.Person);
136 assertContains(person, vDOAC.summary, (Resource) null);
137
138 assertContains(person, vDOAC.summary,
139 "Senior Systems\n Analyst/Developer.\n "
140 + "Experienced in the analysis, design and\n "
141 + "implementation of distributed, multi-tier\n "
142 + "applications using Microsoft\n technologies.\n"
143 + " Specialising in data capture applications on the\n" + " Web.");
144
145 assertContains(person, vFOAF.isPrimaryTopicOf, (Resource) null);
146
147 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0);
148
149 assertStatementsSize(vDOAC.experience, (Value) null, 16);
150 assertStatementsSize(vDOAC.education, (Value) null, 2);
151 assertStatementsSize(vDOAC.affiliation, (Value) null, 0);
152 assertStatementsSize(vDOAC.skill, (Value) null, 4);
153 }
154
155 }