This project has retired. For details please refer to its
Attic page.
CrawlerTest xref
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.cli;
19
20 import org.apache.any23.Any23OnlineTestBase;
21 import org.apache.any23.rdf.RDFUtils;
22 import org.apache.any23.util.FileUtils;
23 import org.junit.Test;
24 import org.eclipse.rdf4j.model.Statement;
25 import org.eclipse.rdf4j.rio.RDFFormat;
26 import org.eclipse.rdf4j.rio.RDFHandlerException;
27 import org.eclipse.rdf4j.rio.RDFParseException;
28 import org.slf4j.Logger;
29 import org.slf4j.LoggerFactory;
30
31 import java.io.File;
32 import java.io.IOException;
33 import java.util.concurrent.Executors;
34 import java.util.concurrent.Future;
35 import java.util.concurrent.TimeUnit;
36 import java.util.concurrent.TimeoutException;
37
38 import static org.junit.Assert.assertTrue;
39
40
41
42
43
44
45 public class CrawlerTest extends Any23OnlineTestBase {
46
47 public static final Logger logger = LoggerFactory.getLogger(CrawlerTest.class);
48
49 @Test
50 public void testCLI() throws IOException, RDFHandlerException, RDFParseException {
51 assumeOnlineAllowed();
52
53 final File outFile = File.createTempFile("crawler-test", ".nq", tempDirectory);
54 outFile.delete();
55 logger.info( "Outfile: " + outFile.getAbsolutePath() );
56
57 final Future<?> future = Executors.newSingleThreadExecutor().submit(
58 new Runnable() {
59 @Override
60 public void run() {
61 try {
62 ToolRunner.main(
63 String.format(
64 "crawler -f nquads --maxpages 50 --maxdepth 1 --politenessdelay 500 -o %s " +
65 "http://any23.apache.org",
66 outFile.getAbsolutePath()
67 ).split(" ")
68 );
69 } catch (Exception e) {
70 e.printStackTrace();
71 }
72 }
73 }
74 );
75
76 try {
77 future.get(10, TimeUnit.SECONDS);
78 } catch (Exception e) {
79
80 if( ! (e instanceof TimeoutException) ) {
81 e.printStackTrace();
82 }
83 }
84 assertTrue("The output file has not been created.", outFile.exists());
85
86 final String[] lines = FileUtils.readFileLines(outFile);
87 final StringBuilder allLinesExceptLast = new StringBuilder();
88 for (int i = 0; i < lines.length - 1; i++) {
89 allLinesExceptLast.append(lines[i]);
90 allLinesExceptLast.append("\n");
91 }
92
93 final Statement[] statements = RDFUtils.parseRDF(RDFFormat.NQUADS, allLinesExceptLast.toString());
94 assertTrue(statements.length > 0);
95 }
96
97 }