1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.any23.extractor.html;
19
20 import org.apache.any23.AbstractAny23TestBase;
21 import org.apache.any23.extractor.IssueReport;
22 import org.apache.any23.extractor.IssueReport.Issue;
23 import org.apache.any23.extractor.IssueReport.IssueLevel;
24 import org.apache.any23.extractor.ExtractionException;
25 import org.apache.any23.extractor.ExtractorFactory;
26 import org.apache.any23.extractor.SingleDocumentExtraction;
27 import org.apache.any23.extractor.SingleDocumentExtractionReport;
28 import org.apache.any23.rdf.RDFUtils;
29 import org.apache.any23.vocab.SINDICE;
30 import org.apache.any23.writer.RepositoryWriter;
31 import org.junit.After;
32 import org.junit.Assert;
33 import org.junit.Before;
34 import org.eclipse.rdf4j.common.iteration.Iterations;
35 import org.eclipse.rdf4j.model.BNode;
36 import org.eclipse.rdf4j.model.Literal;
37 import org.eclipse.rdf4j.model.Resource;
38 import org.eclipse.rdf4j.model.Statement;
39 import org.eclipse.rdf4j.model.IRI;
40 import org.eclipse.rdf4j.model.Value;
41 import org.eclipse.rdf4j.repository.RepositoryConnection;
42 import org.eclipse.rdf4j.repository.RepositoryException;
43 import org.eclipse.rdf4j.repository.RepositoryResult;
44 import org.eclipse.rdf4j.repository.sail.SailRepository;
45 import org.eclipse.rdf4j.rio.RDFFormat;
46 import org.eclipse.rdf4j.rio.RDFHandlerException;
47 import org.eclipse.rdf4j.rio.RDFParseException;
48 import org.eclipse.rdf4j.rio.Rio;
49 import org.eclipse.rdf4j.sail.Sail;
50 import org.eclipse.rdf4j.sail.memory.MemoryStore;
51 import org.slf4j.Logger;
52 import org.slf4j.LoggerFactory;
53
54 import java.io.ByteArrayOutputStream;
55 import java.io.IOException;
56 import java.io.PrintStream;
57 import java.io.StringWriter;
58 import java.lang.invoke.MethodHandles;
59 import java.nio.charset.StandardCharsets;
60 import java.util.ArrayList;
61 import java.util.Collection;
62 import java.util.Collections;
63 import java.util.List;
64 import java.util.Locale;
65 import java.util.Map;
66
67 /**
68 * Abstract class used to write {@link org.apache.any23.extractor.Extractor} specific test cases.
69 */
70 public abstract class AbstractExtractorTestCase extends AbstractAny23TestBase {
71
72 private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
73
74 /**
75 * Base test document.
76 */
77 // TODO: change base IRI string.
78 protected static IRI baseIRI = RDFUtils.iri("http://bob.example.com/");
79
80 /**
81 * Internal connection used to collect extraction results.
82 */
83 protected RepositoryConnection conn;
84
85 /**
86 * The latest generated report.
87 */
88 private SingleDocumentExtractionReport report;
89
90 private Sail store;
91
92 private SailRepository repository;
93
94 /**
95 * Constructor.
96 */
97 public AbstractExtractorTestCase() {
98 super();
99 }
100
101 /**
102 * @return the factory of the extractor to be tested.
103 */
104 protected abstract ExtractorFactory<?> getExtractorFactory();
105
106 /**
107 * Test case initialization.
108 *
109 * @throws Exception
110 * if there is an error constructing input objects
111 */
112 @Before
113 public void setUp() throws Exception {
114 super.setUp();
115 store = new MemoryStore();
116 repository = new SailRepository(store);
117 repository.init();
118 conn = repository.getConnection();
119 }
120
121 /**
122 * Test case resources release.
123 *
124 * @throws RepositoryException
125 * if an error is encountered whilst loading content from a storage connection
126 *
127 */
128 @After
129 public void tearDown() throws RepositoryException {
130 try {
131 conn.close();
132 } finally {
133 repository.shutDown();
134 }
135 conn = null;
136 report = null;
137 store = null;
138 repository = null;
139 }
140
141 /**
142 * @return the connection to the memory repository.
143 */
144 protected RepositoryConnection getConnection() {
145 return conn;
146 }
147
148 /**
149 * @return the last generated report.
150 */
151 protected SingleDocumentExtractionReport getReport() {
152 return report;
153 }
154
155 /**
156 * Returns the list of issues raised by a given extractor.
157 *
158 * @param extractorName
159 * name of the extractor.
160 *
161 * @return collection of issues.
162 */
163 protected Collection<IssueReport.Issue> getIssues(String extractorName) {
164 for (Map.Entry<String, Collection<IssueReport.Issue>> issueEntry : report.getExtractorToIssues().entrySet()) {
165 if (issueEntry.getKey().equals(extractorName)) {
166 return issueEntry.getValue();
167 }
168 }
169 return Collections.emptyList();
170 }
171
172 /**
173 * Returns the list of issues raised by the extractor under testing.
174 *
175 * @return collection of issues.
176 */
177 protected Collection<IssueReport.Issue> getIssues() {
178 return getIssues(getExtractorFactory().getExtractorName());
179 }
180
181 /**
182 * Applies the extractor provided by the {@link #getExtractorFactory()} to the specified resource.
183 *
184 * @param resource
185 * resource name.
186 *
187 * @throws org.apache.any23.extractor.ExtractionException
188 * if there is an exception during extraction
189 * @throws IOException
190 * if there is an error processing the input data
191 */
192 // TODO: MimeType detector to null forces the execution of all extractors,
193 // but extraction
194 // tests should be based on mimetype detection.
195 protected void extract(String resource) throws ExtractionException, IOException {
196 SingleDocumentExtraction ex = new SingleDocumentExtraction(
197 new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseIRI.toString()), getExtractorFactory(),
198 new RepositoryWriter(conn));
199 ex.setMIMETypeDetector(null);
200 report = ex.run();
201 }
202
203 /**
204 * Performs data extraction over the content of a resource and assert that the extraction was fine.
205 *
206 * @param resource
207 * resource name.
208 * @param assertNoIssues
209 * if <code>true</code>invokes {@link #assertNoIssues()} after the extraction.
210 */
211 protected void assertExtract(String resource, boolean assertNoIssues) {
212 try {
213 extract(resource);
214 if (assertNoIssues)
215 assertNoIssues();
216 } catch (ExtractionException ex) {
217 throw new RuntimeException(ex);
218 } catch (IOException ex) {
219 throw new RuntimeException(ex);
220 }
221 }
222
223 /**
224 * Performs data extraction over the content of a resource and assert that the extraction was fine and raised no
225 * issues.
226 *
227 * @param resource
228 * input resource to test extraction on.
229 */
230 protected void assertExtract(String resource) {
231 assertExtract(resource, true);
232 }
233
234 /**
235 * Asserts that the extracted triples contain the pattern <code>(_ p o)</code>.
236 *
237 * @param p
238 * predicate
239 * @param o
240 * object.
241 *
242 * @throws RepositoryException
243 * if an error is encountered whilst loading content from a storage connection
244 *
245 */
246 protected void assertContains(IRI p, Resource o) throws RepositoryException {
247 assertContains(null, p, o);
248 }
249
250 /**
251 * Asserts that the extracted triples contain the pattern <code>(_ p o)</code>.
252 *
253 * @param p
254 * predicate
255 * @param o
256 * object.
257 *
258 * @throws RepositoryException
259 * if an error is encountered whilst loading content from a storage connection
260 *
261 */
262 protected void assertContains(IRI p, String o) throws RepositoryException {
263 assertContains(null, p, RDFUtils.literal(o));
264 }
265
266 /**
267 * Asserts that the extracted triples contain the pattern <code>(_ p o)</code>.
268 *
269 * @param p
270 * predicate
271 * @param o
272 * object.
273 *
274 * @throws RepositoryException
275 * if an error is encountered whilst loading content from a storage connection
276 *
277 */
278 protected void assertNotContains(IRI p, Resource o) throws RepositoryException {
279 assertNotContains(null, p, o);
280 }
281
282 /**
283 * Asserts that the extracted triples contain the pattern <code>(s p o)</code>.
284 *
285 * @param s
286 * subject.
287 * @param p
288 * predicate.
289 * @param o
290 * object.
291 *
292 * @throws RepositoryException
293 * if an error is encountered whilst loading content from a storage connection
294 *
295 */
296 protected void assertContains(Resource s, IRI p, Value o) throws RepositoryException {
297 Assert.assertTrue(
298 getFailedExtractionMessage() + String.format(Locale.ROOT, "Cannot find triple (%s %s %s)", s, p, o),
299 conn.hasStatement(s, p, o, false));
300 }
301
302 /**
303 * Asserts that the extracted triples contain the pattern <code>(s p o)</code>.
304 *
305 * @param s
306 * subject.
307 * @param p
308 * predicate.
309 * @param o
310 * object.
311 *
312 * @throws RepositoryException
313 * if an error is encountered whilst loading content from a storage connection
314 *
315 */
316 protected void assertNotContains(Resource s, IRI p, String o) throws RepositoryException {
317 Assert.assertFalse(getFailedExtractionMessage(), conn.hasStatement(s, p, RDFUtils.literal(o), false));
318 }
319
320 /**
321 * Asserts that the extracted triples contain the pattern <code>(s p o)</code>.
322 *
323 * @param s
324 * subject.
325 * @param p
326 * predicate.
327 * @param o
328 * object.
329 *
330 * @throws RepositoryException
331 * if an error is encountered whilst loading content from a storage connection
332 *
333 */
334 protected void assertNotContains(Resource s, IRI p, Resource o) throws RepositoryException {
335 Assert.assertFalse(getFailedExtractionMessage(), conn.hasStatement(s, p, o, false));
336 }
337
338 /**
339 * Asserts that the model contains at least a statement.
340 *
341 * @throws RepositoryException
342 * if an error is encountered whilst loading content from a storage connection
343 *
344 */
345 protected void assertModelNotEmpty() throws RepositoryException {
346 Assert.assertFalse("The model is expected to not be empty." + getFailedExtractionMessage(), conn.isEmpty());
347 }
348
349 /**
350 * Asserts that the model doesn't contain the pattern <code>(s p o)</code>
351 *
352 * @param s
353 * subject.
354 * @param p
355 * predicate.
356 * @param o
357 * object.
358 *
359 * @throws RepositoryException
360 * if an error is encountered whilst loading content from a storage connection
361 *
362 */
363 protected void assertNotContains(Resource s, IRI p, Literal o) throws RepositoryException {
364 Assert.assertFalse(getFailedExtractionMessage(), conn.hasStatement(s, p, o, false));
365 }
366
367 /**
368 * Asserts that the model is expected to contains no statements.
369 *
370 * @throws RepositoryException
371 * if an error is encountered whilst loading content from a storage connection
372 *
373 */
374 protected void assertModelEmpty() throws RepositoryException {
375 Assert.assertTrue(getFailedExtractionMessage(), conn.isEmpty());
376 }
377
378 /**
379 * Asserts that the extraction generated no issues.
380 */
381 protected void assertNoIssues() {
382 for (Map.Entry<String, Collection<IssueReport.Issue>> entry : report.getExtractorToIssues().entrySet()) {
383 if (entry.getValue().size() > 0) {
384 log.debug("Unexpected issue for extractor " + entry.getKey() + " : " + entry.getValue());
385 }
386 for (Issue nextIssue : entry.getValue()) {
387 if (nextIssue.getLevel() == IssueLevel.ERROR || nextIssue.getLevel() == IssueLevel.FATAL) {
388 Assert.fail("Unexpected issue for extractor " + entry.getKey() + " : " + entry.getValue());
389 }
390 }
391 }
392 }
393
394 /**
395 * Asserts that an issue has been produced by the processed {@link org.apache.any23.extractor.Extractor}.
396 *
397 * @param level
398 * expected issue level
399 * @param issueRegex
400 * regex matching the expected human readable issue message.
401 */
402 protected void assertIssue(IssueReport.IssueLevel level, String issueRegex) {
403 final Collection<IssueReport.Issue> issues = getIssues(getExtractorFactory().getExtractorName());
404 boolean found = false;
405 for (IssueReport.Issue issue : issues) {
406 if (issue.getLevel() == level && issue.getMessage().matches(issueRegex)) {
407 found = true;
408 break;
409 }
410 }
411 Assert.assertTrue(String.format(Locale.ROOT, "Cannot find issue with level %s matching expression '%s'", level,
412 issueRegex), found);
413 }
414
415 /**
416 * Verifies that the current model contains all the given statements.
417 *
418 * @param statements
419 * list of statements to be verified.
420 *
421 * @throws RepositoryException
422 * if an error is encountered whilst loading content from a storage connection
423 *
424 */
425 public void assertContainsModel(Statement[] statements) throws RepositoryException {
426 for (Statement statement : statements) {
427 assertContains(statement);
428 }
429 }
430
431 /**
432 * Verifies that the current model contains all the statements declared in the specified <code>modelFile</code>.
433 *
434 * @param modelResource
435 * the resource containing the model.
436 *
437 * @throws RDFHandlerException
438 * if there is an error within the {@link org.eclipse.rdf4j.rio.RDFHandler}
439 * @throws IOException
440 * if there is an error processing the input data
441 * @throws RDFParseException
442 * if there is an exception parsing the RDF stream
443 * @throws RepositoryException
444 * if an error is encountered whilst loading content from a storage connection
445 *
446 */
447 public void assertContainsModel(String modelResource)
448 throws RDFHandlerException, IOException, RDFParseException, RepositoryException {
449 getConnection().remove(null, SINDICE.getInstance().date, (Value) null, (Resource) null);
450 getConnection().remove(null, SINDICE.getInstance().size, (Value) null, (Resource) null);
451 assertContainsModel(RDFUtils.parseRDF(modelResource));
452 }
453
454 /**
455 * Asserts that the given pattern <code>(s p o)</code> satisfies the expected number of statements.
456 *
457 * @param s
458 * subject.
459 * @param p
460 * predicate.
461 * @param o
462 * object.
463 * @param expected
464 * expected matches.
465 *
466 * @throws RepositoryException
467 * if an error is encountered whilst loading content from a storage connection
468 *
469 */
470 protected void assertStatementsSize(Resource s, IRI p, Value o, int expected)
471 throws RDFHandlerException, RepositoryException {
472 int statementsSize = getStatementsSize(s, p, o);
473 if (statementsSize != expected) {
474 final ByteArrayOutputStream baos = new ByteArrayOutputStream();
475 PrintStream ps = new PrintStream(baos, true, StandardCharsets.UTF_8);
476 getConnection().exportStatements(s, p, o, true, Rio.createWriter(RDFFormat.NQUADS, ps));
477 }
478
479 Assert.assertEquals("Unexpected number of matching statements.", expected, statementsSize);
480 }
481
482 /**
483 * Asserts that the given pattern <code>(_ p o)</code> satisfies the expected number of statements.
484 *
485 * @param p
486 * predicate.
487 * @param o
488 * object.
489 * @param expected
490 * expected matches.
491 *
492 * @throws RepositoryException
493 * if an error is encountered whilst loading content from a storage connection
494 *
495 */
496 protected void assertStatementsSize(IRI p, Value o, int expected) throws RDFHandlerException, RepositoryException {
497 assertStatementsSize(null, p, o, expected);
498 }
499
500 /**
501 * Asserts that the given pattern <code>(_ p o)</code> satisfies the expected number of statements.
502 *
503 * @param p
504 * predicate.
505 * @param o
506 * object.
507 * @param expected
508 * expected matches.
509 *
510 * @throws RepositoryException
511 * if an error is encountered whilst loading content from a storage connection
512 *
513 */
514 protected void assertStatementsSize(IRI p, String o, int expected) throws RDFHandlerException, RepositoryException {
515 assertStatementsSize(p, o == null ? null : RDFUtils.literal(o), expected);
516 }
517
518 /**
519 * Asserts that the given pattern <code>(s p _)</code> is not present.
520 *
521 * @param s
522 * subject.
523 * @param p
524 * predicate.
525 *
526 * @throws RepositoryException
527 * if an error is encountered whilst loading content from a storage connection
528 *
529 */
530 protected void assertNotFound(Resource s, IRI p) throws RepositoryException {
531 RepositoryResult<Statement> statements = conn.getStatements(s, p, null, true);
532 try {
533 Assert.assertFalse("Expected no statements.", statements.hasNext());
534 } finally {
535 statements.close();
536 }
537 }
538
539 /**
540 * Returns the blank subject matching the pattern <code>(_:b p o)</code>, it is expected to exists and be just one.
541 *
542 * @param p
543 * predicate.
544 * @param o
545 * object.
546 *
547 * @return the matching blank subject.
548 *
549 * @throws RepositoryException
550 * if an error is encountered whilst loading content from a storage connection
551 *
552 */
553 protected Resource findExactlyOneBlankSubject(IRI p, Value o) throws RepositoryException {
554 RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
555 try {
556 Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
557 Statement stmt = it.next();
558 Resource result = stmt.getSubject();
559 Assert.assertTrue(getFailedExtractionMessage(), result instanceof BNode);
560 Assert.assertFalse(getFailedExtractionMessage(), it.hasNext());
561 return result;
562 } finally {
563 it.close();
564 }
565 }
566
567 /**
568 * Returns the object matching the pattern <code>(s p o)</code>, it is expected to exists and be just one.
569 *
570 * @param s
571 * subject.
572 * @param p
573 * predicate.
574 *
575 * @return the matching object.
576 *
577 * @throws RepositoryException
578 * if an error is encountered whilst loading content from a storage connection
579 *
580 */
581 protected Value findExactlyOneObject(Resource s, IRI p) throws RepositoryException {
582 RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
583 try {
584 Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
585 return it.next().getObject();
586 } finally {
587 it.close();
588 }
589 }
590
591 /**
592 * Returns all the subjects matching the pattern <code>(s? p o)</code>.
593 *
594 * @param p
595 * predicate.
596 * @param o
597 * object.
598 *
599 * @return list of matching subjects.
600 *
601 * @throws RepositoryException
602 * if an error is encountered whilst loading content from a storage connection
603 *
604 */
605 protected List<Resource> findSubjects(IRI p, Value o) throws RepositoryException {
606 RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
607 List<Resource> subjects = new ArrayList<Resource>();
608 try {
609 Statement statement;
610 while (it.hasNext()) {
611 statement = it.next();
612 subjects.add(statement.getSubject());
613 }
614 } finally {
615 it.close();
616 }
617 return subjects;
618 }
619
620 /**
621 * Returns all the objects matching the pattern <code>(s p _)</code>.
622 *
623 * @param s
624 * predicate.
625 * @param p
626 * predicate.
627 *
628 * @return list of matching objects.
629 *
630 * @throws RepositoryException
631 * if an error is encountered whilst loading content from a storage connection
632 *
633 */
634 protected List<Value> findObjects(Resource s, IRI p) throws RepositoryException {
635 RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
636 List<Value> objects = new ArrayList<Value>();
637 try {
638 Statement statement;
639 while (it.hasNext()) {
640 statement = it.next();
641 objects.add(statement.getObject());
642 }
643 } finally {
644 it.close();
645 }
646 return objects;
647 }
648
649 /**
650 * Finds the object matching the pattern <code>(s p _)</code>, asserts to find exactly one result.
651 *
652 * @param s
653 * subject.
654 * @param p
655 * predicate
656 *
657 * @return matching object.
658 *
659 * @throws org.eclipse.rdf4j.repository.RepositoryException
660 * if an error is encountered whilst loading content from a storage connection
661 */
662 protected Value findObject(Resource s, IRI p) throws RepositoryException {
663 RepositoryResult<Statement> statements = conn.getStatements(s, p, null, true);
664 try {
665 Assert.assertTrue("Expected at least a statement.", statements.hasNext());
666 return (statements.next().getObject());
667 } finally {
668 statements.close();
669 }
670 }
671
672 /**
673 * Finds the resource object matching the pattern <code>(s p _)</code>, asserts to find exactly one result.
674 *
675 * @param s
676 * subject.
677 * @param p
678 * predicate.
679 *
680 * @return matching object.
681 *
682 * @throws RepositoryException
683 * if an error is encountered whilst loading content from a storage connection
684 *
685 */
686 protected Resource findObjectAsResource(Resource s, IRI p) throws RepositoryException {
687 final Value v = findObject(s, p);
688 try {
689 return (Resource) v;
690 } catch (ClassCastException cce) {
691 Assert.fail("Expected resource object, found: " + v.getClass().getSimpleName());
692 throw new IllegalStateException();
693 }
694 }
695
696 /**
697 * Finds the literal object matching the pattern <code>(s p _)</code>, asserts to find exactly one result.
698 *
699 * @param s
700 * subject.
701 * @param p
702 * predicate.
703 *
704 * @return matching object.
705 *
706 * @throws RepositoryException
707 * if an error is encountered whilst loading content from a storage connection
708 *
709 */
710 protected String findObjectAsLiteral(Resource s, IRI p) throws RepositoryException {
711 return findObject(s, p).stringValue();
712 }
713
714 /**
715 * Dumps the extracted model in <i>Turtle</i> format.
716 *
717 * @return a string containing the model in Turtle.
718 *
719 * @throws RepositoryException
720 * if an error is encountered whilst loading content from a storage connection
721 *
722 */
723 protected String dumpModelToTurtle() throws RepositoryException {
724 StringWriter w = new StringWriter();
725 try {
726 conn.export(Rio.createWriter(RDFFormat.TURTLE, w));
727 return w.toString();
728 } catch (RDFHandlerException ex) {
729 throw new RuntimeException(ex);
730 }
731 }
732
733 /**
734 * Dumps the extracted model in <i>NQuads</i> format.
735 *
736 * @return a string containing the model in NQuads.
737 *
738 * @throws RepositoryException
739 * if an error is encountered whilst loading content from a storage connection
740 *
741 */
742 protected String dumpModelToNQuads() throws RepositoryException {
743 StringWriter w = new StringWriter();
744 try {
745 conn.export(Rio.createWriter(RDFFormat.NQUADS, w));
746 return w.toString();
747 } catch (RDFHandlerException ex) {
748 throw new RuntimeException(ex);
749 }
750 }
751
752 /**
753 * Dumps the extracted model in <i>RDFXML</i> format.
754 *
755 * @return a string containing the model in RDFXML.
756 *
757 * @throws RepositoryException
758 * if an error is encountered whilst loading content from a storage connection
759 *
760 */
761 protected String dumpModelToRDFXML() throws RepositoryException {
762 StringWriter w = new StringWriter();
763 try {
764 conn.export(Rio.createWriter(RDFFormat.RDFXML, w));
765 return w.toString();
766 } catch (RDFHandlerException ex) {
767 throw new RuntimeException(ex);
768 }
769 }
770
771 /**
772 * Dumps the list of statements contained in the extracted model.
773 *
774 * @return list of extracted statements.
775 *
776 * @throws RepositoryException
777 * if an error is encountered whilst loading content from a storage connection
778 *
779 */
780 protected List<Statement> dumpAsListOfStatements() throws RepositoryException {
781 return Iterations.asList(conn.getStatements(null, null, null, false));
782 }
783
784 /**
785 * @return string containing human readable statements.
786 *
787 * @throws RepositoryException
788 * if an error is encountered whilst loading content from a storage connection
789 *
790 */
791 protected String dumpHumanReadableTriples() throws RepositoryException {
792 StringBuilder sb = new StringBuilder();
793 RepositoryResult<Statement> result = conn.getStatements(null, null, null, false);
794 while (result.hasNext()) {
795 Statement statement = result.next();
796 sb.append(String.format(Locale.ROOT, "%s %s %s %s\n", statement.getSubject(), statement.getPredicate(),
797 statement.getObject(), statement.getContext()));
798
799 }
800 return sb.toString();
801 }
802
803 /**
804 * Checks that a statement is contained in the extracted model. If the statement declares bnodes, they are replaced
805 * with <code>_</code> patterns.
806 *
807 * @param statement
808 * an RDF {@link org.eclipse.rdf4j.model.Statement} implementation
809 *
810 * @throws RepositoryException
811 * if an error is encountered whilst loading content from a storage connection
812 *
813 */
814 // TODO: bnode check is too weak, introduce graph omomorphism check.
815 protected void assertContains(Statement statement) throws RepositoryException {
816 Assert.assertTrue("Cannot find statement " + statement + " in model.",
817 conn.hasStatement(statement.getSubject() instanceof BNode ? null : statement.getSubject(),
818 statement.getPredicate(), statement.getObject() instanceof BNode ? null : statement.getObject(),
819 false));
820 }
821
822 /**
823 * Assert that the model contains the statement <code>(s p l)</code> where <code>l</code> is a literal.
824 *
825 * @param s
826 * subject.
827 * @param p
828 * predicate.
829 * @param l
830 * literal content.
831 *
832 * @throws RepositoryException
833 * if an error is encountered whilst loading content from a storage connection
834 *
835 */
836 protected void assertContains(Resource s, IRI p, String l) throws RepositoryException {
837 assertContains(s, p, RDFUtils.literal(l));
838 }
839
840 /**
841 * Assert that the model contains the statement <code>(s p l)</code> where <code>l</code> is a language literal.
842 *
843 * @param s
844 * subject.
845 * @param p
846 * predicate.
847 * @param l
848 * literal content.
849 * @param lang
850 * literal language.
851 *
852 * @throws RepositoryException
853 * if an error is encountered whilst loading content from a storage connection
854 *
855 */
856 protected void assertContains(Resource s, IRI p, String l, String lang) throws RepositoryException {
857 assertContains(s, p, RDFUtils.literal(l, lang));
858 }
859
860 /**
861 * Returns all statements matching the pattern <code>(s p o)</code>.
862 *
863 * @param s
864 * subject.
865 * @param p
866 * predicate.
867 * @param o
868 * object.
869 *
870 * @return list of statements.
871 *
872 * @throws RepositoryException
873 * if an error is encountered whilst loading content from a storage connection
874 *
875 */
876 protected RepositoryResult<Statement> getStatements(Resource s, IRI p, Value o) throws RepositoryException {
877 return conn.getStatements(s, p, o, false);
878 }
879
880 /**
881 * Counts all statements matching the pattern <code>(s p o)</code>.
882 *
883 * @param s
884 * subject.
885 * @param p
886 * predicate.
887 * @param o
888 * object.
889 *
890 * @return number of matches.
891 *
892 * @throws RepositoryException
893 * if an error is encountered whilst loading content from a storage connection
894 *
895 */
896 protected int getStatementsSize(Resource s, IRI p, Value o) throws RepositoryException {
897 RepositoryResult<Statement> result = getStatements(s, p, o);
898 int count = 0;
899 try {
900 while (result.hasNext()) {
901 result.next();
902 count++;
903 }
904 } finally {
905 result.close();
906 }
907 return count;
908 }
909
910 private String getFailedExtractionMessage() throws RepositoryException {
911 return "Assertion failed! Extracted triples:\n" + dumpModelToNQuads();
912 }
913
914 }