This project has retired. For details please refer to its Attic page.
YAMLTikaParserTest xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.any23.extractor.yaml;
18  
19  import java.io.BufferedInputStream;
20  import java.io.InputStream;
21  import org.apache.any23.mime.MIMEType;
22  import org.apache.any23.mime.TikaMIMETypeDetector;
23  import org.apache.any23.mime.purifier.WhiteSpacesPurifier;
24  import org.junit.Assert;
25  import org.junit.Before;
26  import org.junit.Test;
27  import org.slf4j.Logger;
28  import org.slf4j.LoggerFactory;
29  
30  /**
31   * @author jacek
32   */
33  public class YAMLTikaParserTest {
34  
35      private final String file1 = "/org/apache/any23/extractor/yaml/simple-load.yml";
36  
37      private final Logger log = LoggerFactory.getLogger(getClass());
38  
39      private TikaMIMETypeDetector detector;
40  
41      @Before
42      public void prepareDetector() throws Exception {
43          detector = new TikaMIMETypeDetector(new WhiteSpacesPurifier());
44      }
45  
46      /**
47       * Yaml type is detected by file name only so detector returns octet type.
48       * 
49       * @throws Exception
50       *             if there is an error asserting the test data.
51       */
52      @Test
53      public void tikaStreamDetect() throws Exception {
54          InputStream is = new BufferedInputStream(this.getClass().getResourceAsStream(file1));
55          Assert.assertNotNull("Could not find test file: " + file1, is);
56          MIMEType type = detector.guessMIMEType(null, is, null);
57  
58          // Not currently doing stream detection for YAML, so it returns the default, octet-stream
59          Assert.assertEquals("application/octet-stream", type.toString());
60      }
61  
62      @Test
63      public void tikaNameDetect() throws Exception {
64          String fileName = java.net.URI.create(file1).getPath();
65  
66          log.debug("normatised file name: {}", fileName);
67          MIMEType type = detector.guessMIMEType(fileName, null, null);
68  
69          Assert.assertEquals("text/x-yaml", type.toString());
70      }
71  
72  }