This project has retired. For details please refer to its Attic page.
CrawlerTest xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.cli;
19  
20  import org.apache.any23.Any23OnlineTestBase;
21  import org.apache.any23.rdf.RDFUtils;
22  import org.apache.any23.util.FileUtils;
23  import org.junit.Test;
24  import org.eclipse.rdf4j.model.Statement;
25  import org.eclipse.rdf4j.rio.RDFFormat;
26  import org.eclipse.rdf4j.rio.RDFHandlerException;
27  import org.eclipse.rdf4j.rio.RDFParseException;
28  import org.slf4j.Logger;
29  import org.slf4j.LoggerFactory;
30  
31  import java.io.File;
32  import java.io.IOException;
33  import java.util.concurrent.Executors;
34  import java.util.concurrent.Future;
35  import java.util.concurrent.TimeUnit;
36  import java.util.concurrent.TimeoutException;
37  
38  import static org.junit.Assert.assertTrue;
39  
40  /**
41   * Test case for {@link Crawler} CLI.
42   *
43   * @author Michele Mostarda (mostarda@fbk.eu)
44   */
45  public class CrawlerTest extends Any23OnlineTestBase {
46  
47      public static final Logger logger = LoggerFactory.getLogger(CrawlerTest.class);
48  
49      @Test
50      public void testCLI() throws IOException, RDFHandlerException, RDFParseException {
51          assumeOnlineAllowed();
52  
53          final File outFile = File.createTempFile("crawler-test", ".nq", tempDirectory);
54          outFile.delete();
55          logger.info( "Outfile: " + outFile.getAbsolutePath() );
56  
57          final Future<?> future = Executors.newSingleThreadExecutor().submit(
58              new Runnable() {
59                  @Override
60                  public void run() {
61                      try {
62                          ToolRunner.main(
63                                  String.format(
64                                          "crawler -f nquads --maxpages 50 --maxdepth 1 --politenessdelay 500 -o %s " +
65                                          "http://any23.apache.org",
66                                          outFile.getAbsolutePath()
67                                  ).split(" ")
68                          );
69                      } catch (Exception e) {
70                          e.printStackTrace();
71                      }
72                  }
73              }
74          );
75  
76          try {
77              future.get(10, TimeUnit.SECONDS);
78          } catch (Exception e) {
79              // OK.
80              if( ! (e instanceof TimeoutException) ) {
81                  e.printStackTrace();
82              }
83          }
84          assertTrue("The output file has not been created.", outFile.exists());
85  
86          final String[] lines = FileUtils.readFileLines(outFile);
87          final StringBuilder allLinesExceptLast = new StringBuilder();
88          for (int i = 0; i < lines.length - 1; i++) {
89              allLinesExceptLast.append(lines[i]);
90              allLinesExceptLast.append("\n");
91          }
92  
93          final Statement[] statements = RDFUtils.parseRDF(RDFFormat.NQUADS, allLinesExceptLast.toString());
94          assertTrue(statements.length > 0);
95      }
96  
97  }