This project has retired. For details please refer to its Attic page.
AbstractExtractorTestCase xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html;
19  
20  import org.apache.any23.AbstractAny23TestBase;
21  import org.apache.any23.extractor.IssueReport;
22  import org.apache.any23.extractor.IssueReport.Issue;
23  import org.apache.any23.extractor.IssueReport.IssueLevel;
24  import org.apache.any23.extractor.ExtractionException;
25  import org.apache.any23.extractor.ExtractorFactory;
26  import org.apache.any23.extractor.SingleDocumentExtraction;
27  import org.apache.any23.extractor.SingleDocumentExtractionReport;
28  import org.apache.any23.rdf.RDFUtils;
29  import org.apache.any23.vocab.SINDICE;
30  import org.apache.any23.writer.RepositoryWriter;
31  import org.junit.After;
32  import org.junit.Assert;
33  import org.junit.Before;
34  import org.eclipse.rdf4j.common.iteration.Iterations;
35  import org.eclipse.rdf4j.model.BNode;
36  import org.eclipse.rdf4j.model.Literal;
37  import org.eclipse.rdf4j.model.Resource;
38  import org.eclipse.rdf4j.model.Statement;
39  import org.eclipse.rdf4j.model.IRI;
40  import org.eclipse.rdf4j.model.Value;
41  import org.eclipse.rdf4j.repository.RepositoryConnection;
42  import org.eclipse.rdf4j.repository.RepositoryException;
43  import org.eclipse.rdf4j.repository.RepositoryResult;
44  import org.eclipse.rdf4j.repository.sail.SailRepository;
45  import org.eclipse.rdf4j.rio.RDFFormat;
46  import org.eclipse.rdf4j.rio.RDFHandlerException;
47  import org.eclipse.rdf4j.rio.RDFParseException;
48  import org.eclipse.rdf4j.rio.Rio;
49  import org.eclipse.rdf4j.sail.Sail;
50  import org.eclipse.rdf4j.sail.memory.MemoryStore;
51  import org.slf4j.Logger;
52  import org.slf4j.LoggerFactory;
53  
54  import java.io.ByteArrayOutputStream;
55  import java.io.IOException;
56  import java.io.PrintStream;
57  import java.io.StringWriter;
58  import java.lang.invoke.MethodHandles;
59  import java.nio.charset.StandardCharsets;
60  import java.util.ArrayList;
61  import java.util.Collection;
62  import java.util.Collections;
63  import java.util.List;
64  import java.util.Locale;
65  import java.util.Map;
66  
67  /**
68   * Abstract class used to write {@link org.apache.any23.extractor.Extractor} specific test cases.
69   */
70  public abstract class AbstractExtractorTestCase extends AbstractAny23TestBase {
71  
72      private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
73  
74      /**
75       * Base test document.
76       */
77      // TODO: change base IRI string.
78      protected static IRI baseIRI = RDFUtils.iri("http://bob.example.com/");
79  
80      /**
81       * Internal connection used to collect extraction results.
82       */
83      protected RepositoryConnection conn;
84  
85      /**
86       * The latest generated report.
87       */
88      private SingleDocumentExtractionReport report;
89  
90      private Sail store;
91  
92      private SailRepository repository;
93  
94      /**
95       * Constructor.
96       */
97      public AbstractExtractorTestCase() {
98          super();
99      }
100 
101     /**
102      * @return the factory of the extractor to be tested.
103      */
104     protected abstract ExtractorFactory<?> getExtractorFactory();
105 
106     /**
107      * Test case initialization.
108      * 
109      * @throws Exception
110      *             if there is an error constructing input objects
111      */
112     @Before
113     public void setUp() throws Exception {
114         super.setUp();
115         store = new MemoryStore();
116         repository = new SailRepository(store);
117         repository.init();
118         conn = repository.getConnection();
119     }
120 
121     /**
122      * Test case resources release.
123      *
124      * @throws RepositoryException
125      *             if an error is encountered whilst loading content from a storage connection
126      * 
127      */
128     @After
129     public void tearDown() throws RepositoryException {
130         try {
131             conn.close();
132         } finally {
133             repository.shutDown();
134         }
135         conn = null;
136         report = null;
137         store = null;
138         repository = null;
139     }
140 
141     /**
142      * @return the connection to the memory repository.
143      */
144     protected RepositoryConnection getConnection() {
145         return conn;
146     }
147 
148     /**
149      * @return the last generated report.
150      */
151     protected SingleDocumentExtractionReport getReport() {
152         return report;
153     }
154 
155     /**
156      * Returns the list of issues raised by a given extractor.
157      *
158      * @param extractorName
159      *            name of the extractor.
160      * 
161      * @return collection of issues.
162      */
163     protected Collection<IssueReport.Issue> getIssues(String extractorName) {
164         for (Map.Entry<String, Collection<IssueReport.Issue>> issueEntry : report.getExtractorToIssues().entrySet()) {
165             if (issueEntry.getKey().equals(extractorName)) {
166                 return issueEntry.getValue();
167             }
168         }
169         return Collections.emptyList();
170     }
171 
172     /**
173      * Returns the list of issues raised by the extractor under testing.
174      *
175      * @return collection of issues.
176      */
177     protected Collection<IssueReport.Issue> getIssues() {
178         return getIssues(getExtractorFactory().getExtractorName());
179     }
180 
181     /**
182      * Applies the extractor provided by the {@link #getExtractorFactory()} to the specified resource.
183      *
184      * @param resource
185      *            resource name.
186      * 
187      * @throws org.apache.any23.extractor.ExtractionException
188      *             if there is an exception during extraction
189      * @throws IOException
190      *             if there is an error processing the input data
191      */
192     // TODO: MimeType detector to null forces the execution of all extractors,
193     // but extraction
194     // tests should be based on mimetype detection.
195     protected void extract(String resource) throws ExtractionException, IOException {
196         SingleDocumentExtraction ex = new SingleDocumentExtraction(
197                 new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseIRI.toString()), getExtractorFactory(),
198                 new RepositoryWriter(conn));
199         ex.setMIMETypeDetector(null);
200         report = ex.run();
201     }
202 
203     /**
204      * Performs data extraction over the content of a resource and assert that the extraction was fine.
205      *
206      * @param resource
207      *            resource name.
208      * @param assertNoIssues
209      *            if <code>true</code>invokes {@link #assertNoIssues()} after the extraction.
210      */
211     protected void assertExtract(String resource, boolean assertNoIssues) {
212         try {
213             extract(resource);
214             if (assertNoIssues)
215                 assertNoIssues();
216         } catch (ExtractionException ex) {
217             throw new RuntimeException(ex);
218         } catch (IOException ex) {
219             throw new RuntimeException(ex);
220         }
221     }
222 
223     /**
224      * Performs data extraction over the content of a resource and assert that the extraction was fine and raised no
225      * issues.
226      *
227      * @param resource
228      *            input resource to test extraction on.
229      */
230     protected void assertExtract(String resource) {
231         assertExtract(resource, true);
232     }
233 
234     /**
235      * Asserts that the extracted triples contain the pattern <code>(_ p o)</code>.
236      *
237      * @param p
238      *            predicate
239      * @param o
240      *            object.
241      * 
242      * @throws RepositoryException
243      *             if an error is encountered whilst loading content from a storage connection
244      * 
245      */
246     protected void assertContains(IRI p, Resource o) throws RepositoryException {
247         assertContains(null, p, o);
248     }
249 
250     /**
251      * Asserts that the extracted triples contain the pattern <code>(_ p o)</code>.
252      *
253      * @param p
254      *            predicate
255      * @param o
256      *            object.
257      * 
258      * @throws RepositoryException
259      *             if an error is encountered whilst loading content from a storage connection
260      * 
261      */
262     protected void assertContains(IRI p, String o) throws RepositoryException {
263         assertContains(null, p, RDFUtils.literal(o));
264     }
265 
266     /**
267      * Asserts that the extracted triples contain the pattern <code>(_ p o)</code>.
268      *
269      * @param p
270      *            predicate
271      * @param o
272      *            object.
273      * 
274      * @throws RepositoryException
275      *             if an error is encountered whilst loading content from a storage connection
276      * 
277      */
278     protected void assertNotContains(IRI p, Resource o) throws RepositoryException {
279         assertNotContains(null, p, o);
280     }
281 
282     /**
283      * Asserts that the extracted triples contain the pattern <code>(s p o)</code>.
284      *
285      * @param s
286      *            subject.
287      * @param p
288      *            predicate.
289      * @param o
290      *            object.
291      * 
292      * @throws RepositoryException
293      *             if an error is encountered whilst loading content from a storage connection
294      * 
295      */
296     protected void assertContains(Resource s, IRI p, Value o) throws RepositoryException {
297         Assert.assertTrue(
298                 getFailedExtractionMessage() + String.format(Locale.ROOT, "Cannot find triple (%s %s %s)", s, p, o),
299                 conn.hasStatement(s, p, o, false));
300     }
301 
302     /**
303      * Asserts that the extracted triples contain the pattern <code>(s p o)</code>.
304      *
305      * @param s
306      *            subject.
307      * @param p
308      *            predicate.
309      * @param o
310      *            object.
311      * 
312      * @throws RepositoryException
313      *             if an error is encountered whilst loading content from a storage connection
314      * 
315      */
316     protected void assertNotContains(Resource s, IRI p, String o) throws RepositoryException {
317         Assert.assertFalse(getFailedExtractionMessage(), conn.hasStatement(s, p, RDFUtils.literal(o), false));
318     }
319 
320     /**
321      * Asserts that the extracted triples contain the pattern <code>(s p o)</code>.
322      *
323      * @param s
324      *            subject.
325      * @param p
326      *            predicate.
327      * @param o
328      *            object.
329      * 
330      * @throws RepositoryException
331      *             if an error is encountered whilst loading content from a storage connection
332      * 
333      */
334     protected void assertNotContains(Resource s, IRI p, Resource o) throws RepositoryException {
335         Assert.assertFalse(getFailedExtractionMessage(), conn.hasStatement(s, p, o, false));
336     }
337 
338     /**
339      * Asserts that the model contains at least a statement.
340      *
341      * @throws RepositoryException
342      *             if an error is encountered whilst loading content from a storage connection
343      * 
344      */
345     protected void assertModelNotEmpty() throws RepositoryException {
346         Assert.assertFalse("The model is expected to not be empty." + getFailedExtractionMessage(), conn.isEmpty());
347     }
348 
349     /**
350      * Asserts that the model doesn't contain the pattern <code>(s p o)</code>
351      *
352      * @param s
353      *            subject.
354      * @param p
355      *            predicate.
356      * @param o
357      *            object.
358      * 
359      * @throws RepositoryException
360      *             if an error is encountered whilst loading content from a storage connection
361      * 
362      */
363     protected void assertNotContains(Resource s, IRI p, Literal o) throws RepositoryException {
364         Assert.assertFalse(getFailedExtractionMessage(), conn.hasStatement(s, p, o, false));
365     }
366 
367     /**
368      * Asserts that the model is expected to contains no statements.
369      *
370      * @throws RepositoryException
371      *             if an error is encountered whilst loading content from a storage connection
372      * 
373      */
374     protected void assertModelEmpty() throws RepositoryException {
375         Assert.assertTrue(getFailedExtractionMessage(), conn.isEmpty());
376     }
377 
378     /**
379      * Asserts that the extraction generated no issues.
380      */
381     protected void assertNoIssues() {
382         for (Map.Entry<String, Collection<IssueReport.Issue>> entry : report.getExtractorToIssues().entrySet()) {
383             if (entry.getValue().size() > 0) {
384                 log.debug("Unexpected issue for extractor " + entry.getKey() + " : " + entry.getValue());
385             }
386             for (Issue nextIssue : entry.getValue()) {
387                 if (nextIssue.getLevel() == IssueLevel.ERROR || nextIssue.getLevel() == IssueLevel.FATAL) {
388                     Assert.fail("Unexpected issue for extractor " + entry.getKey() + " : " + entry.getValue());
389                 }
390             }
391         }
392     }
393 
394     /**
395      * Asserts that an issue has been produced by the processed {@link org.apache.any23.extractor.Extractor}.
396      *
397      * @param level
398      *            expected issue level
399      * @param issueRegex
400      *            regex matching the expected human readable issue message.
401      */
402     protected void assertIssue(IssueReport.IssueLevel level, String issueRegex) {
403         final Collection<IssueReport.Issue> issues = getIssues(getExtractorFactory().getExtractorName());
404         boolean found = false;
405         for (IssueReport.Issue issue : issues) {
406             if (issue.getLevel() == level && issue.getMessage().matches(issueRegex)) {
407                 found = true;
408                 break;
409             }
410         }
411         Assert.assertTrue(String.format(Locale.ROOT, "Cannot find issue with level %s matching expression '%s'", level,
412                 issueRegex), found);
413     }
414 
415     /**
416      * Verifies that the current model contains all the given statements.
417      *
418      * @param statements
419      *            list of statements to be verified.
420      * 
421      * @throws RepositoryException
422      *             if an error is encountered whilst loading content from a storage connection
423      * 
424      */
425     public void assertContainsModel(Statement[] statements) throws RepositoryException {
426         for (Statement statement : statements) {
427             assertContains(statement);
428         }
429     }
430 
431     /**
432      * Verifies that the current model contains all the statements declared in the specified <code>modelFile</code>.
433      *
434      * @param modelResource
435      *            the resource containing the model.
436      * 
437      * @throws RDFHandlerException
438      *             if there is an error within the {@link org.eclipse.rdf4j.rio.RDFHandler}
439      * @throws IOException
440      *             if there is an error processing the input data
441      * @throws RDFParseException
442      *             if there is an exception parsing the RDF stream
443      * @throws RepositoryException
444      *             if an error is encountered whilst loading content from a storage connection
445      * 
446      */
447     public void assertContainsModel(String modelResource)
448             throws RDFHandlerException, IOException, RDFParseException, RepositoryException {
449         getConnection().remove(null, SINDICE.getInstance().date, (Value) null, (Resource) null);
450         getConnection().remove(null, SINDICE.getInstance().size, (Value) null, (Resource) null);
451         assertContainsModel(RDFUtils.parseRDF(modelResource));
452     }
453 
454     /**
455      * Asserts that the given pattern <code>(s p o)</code> satisfies the expected number of statements.
456      *
457      * @param s
458      *            subject.
459      * @param p
460      *            predicate.
461      * @param o
462      *            object.
463      * @param expected
464      *            expected matches.
465      * 
466      * @throws RepositoryException
467      *             if an error is encountered whilst loading content from a storage connection
468      * 
469      */
470     protected void assertStatementsSize(Resource s, IRI p, Value o, int expected)
471             throws RDFHandlerException, RepositoryException {
472         int statementsSize = getStatementsSize(s, p, o);
473         if (statementsSize != expected) {
474             final ByteArrayOutputStream baos = new ByteArrayOutputStream();
475             PrintStream ps = new PrintStream(baos, true, StandardCharsets.UTF_8);
476             getConnection().exportStatements(s, p, o, true, Rio.createWriter(RDFFormat.NQUADS, ps));
477         }
478 
479         Assert.assertEquals("Unexpected number of matching statements.", expected, statementsSize);
480     }
481 
482     /**
483      * Asserts that the given pattern <code>(_ p o)</code> satisfies the expected number of statements.
484      *
485      * @param p
486      *            predicate.
487      * @param o
488      *            object.
489      * @param expected
490      *            expected matches.
491      * 
492      * @throws RepositoryException
493      *             if an error is encountered whilst loading content from a storage connection
494      * 
495      */
496     protected void assertStatementsSize(IRI p, Value o, int expected) throws RDFHandlerException, RepositoryException {
497         assertStatementsSize(null, p, o, expected);
498     }
499 
500     /**
501      * Asserts that the given pattern <code>(_ p o)</code> satisfies the expected number of statements.
502      *
503      * @param p
504      *            predicate.
505      * @param o
506      *            object.
507      * @param expected
508      *            expected matches.
509      * 
510      * @throws RepositoryException
511      *             if an error is encountered whilst loading content from a storage connection
512      * 
513      */
514     protected void assertStatementsSize(IRI p, String o, int expected) throws RDFHandlerException, RepositoryException {
515         assertStatementsSize(p, o == null ? null : RDFUtils.literal(o), expected);
516     }
517 
518     /**
519      * Asserts that the given pattern <code>(s p _)</code> is not present.
520      *
521      * @param s
522      *            subject.
523      * @param p
524      *            predicate.
525      * 
526      * @throws RepositoryException
527      *             if an error is encountered whilst loading content from a storage connection
528      * 
529      */
530     protected void assertNotFound(Resource s, IRI p) throws RepositoryException {
531         RepositoryResult<Statement> statements = conn.getStatements(s, p, null, true);
532         try {
533             Assert.assertFalse("Expected no statements.", statements.hasNext());
534         } finally {
535             statements.close();
536         }
537     }
538 
539     /**
540      * Returns the blank subject matching the pattern <code>(_:b p o)</code>, it is expected to exists and be just one.
541      *
542      * @param p
543      *            predicate.
544      * @param o
545      *            object.
546      * 
547      * @return the matching blank subject.
548      * 
549      * @throws RepositoryException
550      *             if an error is encountered whilst loading content from a storage connection
551      * 
552      */
553     protected Resource findExactlyOneBlankSubject(IRI p, Value o) throws RepositoryException {
554         RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
555         try {
556             Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
557             Statement stmt = it.next();
558             Resource result = stmt.getSubject();
559             Assert.assertTrue(getFailedExtractionMessage(), result instanceof BNode);
560             Assert.assertFalse(getFailedExtractionMessage(), it.hasNext());
561             return result;
562         } finally {
563             it.close();
564         }
565     }
566 
567     /**
568      * Returns the object matching the pattern <code>(s p o)</code>, it is expected to exists and be just one.
569      *
570      * @param s
571      *            subject.
572      * @param p
573      *            predicate.
574      * 
575      * @return the matching object.
576      * 
577      * @throws RepositoryException
578      *             if an error is encountered whilst loading content from a storage connection
579      * 
580      */
581     protected Value findExactlyOneObject(Resource s, IRI p) throws RepositoryException {
582         RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
583         try {
584             Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
585             return it.next().getObject();
586         } finally {
587             it.close();
588         }
589     }
590 
591     /**
592      * Returns all the subjects matching the pattern <code>(s? p o)</code>.
593      *
594      * @param p
595      *            predicate.
596      * @param o
597      *            object.
598      * 
599      * @return list of matching subjects.
600      * 
601      * @throws RepositoryException
602      *             if an error is encountered whilst loading content from a storage connection
603      * 
604      */
605     protected List<Resource> findSubjects(IRI p, Value o) throws RepositoryException {
606         RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
607         List<Resource> subjects = new ArrayList<Resource>();
608         try {
609             Statement statement;
610             while (it.hasNext()) {
611                 statement = it.next();
612                 subjects.add(statement.getSubject());
613             }
614         } finally {
615             it.close();
616         }
617         return subjects;
618     }
619 
620     /**
621      * Returns all the objects matching the pattern <code>(s p _)</code>.
622      *
623      * @param s
624      *            predicate.
625      * @param p
626      *            predicate.
627      * 
628      * @return list of matching objects.
629      * 
630      * @throws RepositoryException
631      *             if an error is encountered whilst loading content from a storage connection
632      * 
633      */
634     protected List<Value> findObjects(Resource s, IRI p) throws RepositoryException {
635         RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
636         List<Value> objects = new ArrayList<Value>();
637         try {
638             Statement statement;
639             while (it.hasNext()) {
640                 statement = it.next();
641                 objects.add(statement.getObject());
642             }
643         } finally {
644             it.close();
645         }
646         return objects;
647     }
648 
649     /**
650      * Finds the object matching the pattern <code>(s p _)</code>, asserts to find exactly one result.
651      *
652      * @param s
653      *            subject.
654      * @param p
655      *            predicate
656      * 
657      * @return matching object.
658      * 
659      * @throws org.eclipse.rdf4j.repository.RepositoryException
660      *             if an error is encountered whilst loading content from a storage connection
661      */
662     protected Value findObject(Resource s, IRI p) throws RepositoryException {
663         RepositoryResult<Statement> statements = conn.getStatements(s, p, null, true);
664         try {
665             Assert.assertTrue("Expected at least a statement.", statements.hasNext());
666             return (statements.next().getObject());
667         } finally {
668             statements.close();
669         }
670     }
671 
672     /**
673      * Finds the resource object matching the pattern <code>(s p _)</code>, asserts to find exactly one result.
674      *
675      * @param s
676      *            subject.
677      * @param p
678      *            predicate.
679      * 
680      * @return matching object.
681      * 
682      * @throws RepositoryException
683      *             if an error is encountered whilst loading content from a storage connection
684      * 
685      */
686     protected Resource findObjectAsResource(Resource s, IRI p) throws RepositoryException {
687         final Value v = findObject(s, p);
688         try {
689             return (Resource) v;
690         } catch (ClassCastException cce) {
691             Assert.fail("Expected resource object, found: " + v.getClass().getSimpleName());
692             throw new IllegalStateException();
693         }
694     }
695 
696     /**
697      * Finds the literal object matching the pattern <code>(s p _)</code>, asserts to find exactly one result.
698      *
699      * @param s
700      *            subject.
701      * @param p
702      *            predicate.
703      * 
704      * @return matching object.
705      * 
706      * @throws RepositoryException
707      *             if an error is encountered whilst loading content from a storage connection
708      * 
709      */
710     protected String findObjectAsLiteral(Resource s, IRI p) throws RepositoryException {
711         return findObject(s, p).stringValue();
712     }
713 
714     /**
715      * Dumps the extracted model in <i>Turtle</i> format.
716      *
717      * @return a string containing the model in Turtle.
718      * 
719      * @throws RepositoryException
720      *             if an error is encountered whilst loading content from a storage connection
721      * 
722      */
723     protected String dumpModelToTurtle() throws RepositoryException {
724         StringWriter w = new StringWriter();
725         try {
726             conn.export(Rio.createWriter(RDFFormat.TURTLE, w));
727             return w.toString();
728         } catch (RDFHandlerException ex) {
729             throw new RuntimeException(ex);
730         }
731     }
732 
733     /**
734      * Dumps the extracted model in <i>NQuads</i> format.
735      *
736      * @return a string containing the model in NQuads.
737      * 
738      * @throws RepositoryException
739      *             if an error is encountered whilst loading content from a storage connection
740      * 
741      */
742     protected String dumpModelToNQuads() throws RepositoryException {
743         StringWriter w = new StringWriter();
744         try {
745             conn.export(Rio.createWriter(RDFFormat.NQUADS, w));
746             return w.toString();
747         } catch (RDFHandlerException ex) {
748             throw new RuntimeException(ex);
749         }
750     }
751 
752     /**
753      * Dumps the extracted model in <i>RDFXML</i> format.
754      *
755      * @return a string containing the model in RDFXML.
756      * 
757      * @throws RepositoryException
758      *             if an error is encountered whilst loading content from a storage connection
759      * 
760      */
761     protected String dumpModelToRDFXML() throws RepositoryException {
762         StringWriter w = new StringWriter();
763         try {
764             conn.export(Rio.createWriter(RDFFormat.RDFXML, w));
765             return w.toString();
766         } catch (RDFHandlerException ex) {
767             throw new RuntimeException(ex);
768         }
769     }
770 
771     /**
772      * Dumps the list of statements contained in the extracted model.
773      *
774      * @return list of extracted statements.
775      * 
776      * @throws RepositoryException
777      *             if an error is encountered whilst loading content from a storage connection
778      * 
779      */
780     protected List<Statement> dumpAsListOfStatements() throws RepositoryException {
781         return Iterations.asList(conn.getStatements(null, null, null, false));
782     }
783 
784     /**
785      * @return string containing human readable statements.
786      * 
787      * @throws RepositoryException
788      *             if an error is encountered whilst loading content from a storage connection
789      * 
790      */
791     protected String dumpHumanReadableTriples() throws RepositoryException {
792         StringBuilder sb = new StringBuilder();
793         RepositoryResult<Statement> result = conn.getStatements(null, null, null, false);
794         while (result.hasNext()) {
795             Statement statement = result.next();
796             sb.append(String.format(Locale.ROOT, "%s %s %s %s\n", statement.getSubject(), statement.getPredicate(),
797                     statement.getObject(), statement.getContext()));
798 
799         }
800         return sb.toString();
801     }
802 
803     /**
804      * Checks that a statement is contained in the extracted model. If the statement declares bnodes, they are replaced
805      * with <code>_</code> patterns.
806      *
807      * @param statement
808      *            an RDF {@link org.eclipse.rdf4j.model.Statement} implementation
809      * 
810      * @throws RepositoryException
811      *             if an error is encountered whilst loading content from a storage connection
812      * 
813      */
814     // TODO: bnode check is too weak, introduce graph omomorphism check.
815     protected void assertContains(Statement statement) throws RepositoryException {
816         Assert.assertTrue("Cannot find statement " + statement + " in model.",
817                 conn.hasStatement(statement.getSubject() instanceof BNode ? null : statement.getSubject(),
818                         statement.getPredicate(), statement.getObject() instanceof BNode ? null : statement.getObject(),
819                         false));
820     }
821 
822     /**
823      * Assert that the model contains the statement <code>(s p l)</code> where <code>l</code> is a literal.
824      *
825      * @param s
826      *            subject.
827      * @param p
828      *            predicate.
829      * @param l
830      *            literal content.
831      * 
832      * @throws RepositoryException
833      *             if an error is encountered whilst loading content from a storage connection
834      * 
835      */
836     protected void assertContains(Resource s, IRI p, String l) throws RepositoryException {
837         assertContains(s, p, RDFUtils.literal(l));
838     }
839 
840     /**
841      * Assert that the model contains the statement <code>(s p l)</code> where <code>l</code> is a language literal.
842      *
843      * @param s
844      *            subject.
845      * @param p
846      *            predicate.
847      * @param l
848      *            literal content.
849      * @param lang
850      *            literal language.
851      * 
852      * @throws RepositoryException
853      *             if an error is encountered whilst loading content from a storage connection
854      * 
855      */
856     protected void assertContains(Resource s, IRI p, String l, String lang) throws RepositoryException {
857         assertContains(s, p, RDFUtils.literal(l, lang));
858     }
859 
860     /**
861      * Returns all statements matching the pattern <code>(s p o)</code>.
862      *
863      * @param s
864      *            subject.
865      * @param p
866      *            predicate.
867      * @param o
868      *            object.
869      * 
870      * @return list of statements.
871      * 
872      * @throws RepositoryException
873      *             if an error is encountered whilst loading content from a storage connection
874      * 
875      */
876     protected RepositoryResult<Statement> getStatements(Resource s, IRI p, Value o) throws RepositoryException {
877         return conn.getStatements(s, p, o, false);
878     }
879 
880     /**
881      * Counts all statements matching the pattern <code>(s p o)</code>.
882      *
883      * @param s
884      *            subject.
885      * @param p
886      *            predicate.
887      * @param o
888      *            object.
889      * 
890      * @return number of matches.
891      * 
892      * @throws RepositoryException
893      *             if an error is encountered whilst loading content from a storage connection
894      * 
895      */
896     protected int getStatementsSize(Resource s, IRI p, Value o) throws RepositoryException {
897         RepositoryResult<Statement> result = getStatements(s, p, o);
898         int count = 0;
899         try {
900             while (result.hasNext()) {
901                 result.next();
902                 count++;
903             }
904         } finally {
905             result.close();
906         }
907         return count;
908     }
909 
910     private String getFailedExtractionMessage() throws RepositoryException {
911         return "Assertion failed! Extracted triples:\n" + dumpModelToNQuads();
912     }
913 
914 }