View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.mime;
19  
20  import org.eclipse.rdf4j.rio.RDFFormat;
21  import org.eclipse.rdf4j.rio.Rio;
22  
23  import java.io.InputStream;
24  import java.util.HashMap;
25  import java.util.Map;
26  import java.util.Optional;
27  import java.util.regex.Matcher;
28  import java.util.regex.Pattern;
29  
30  /**
31   * Basic implementation of {@link MIMETypeDetector} based on file extensions.
32   */
33  public class NaiveMIMETypeDetector implements MIMETypeDetector {
34  
35      private final static Map<String, String> extensions = new HashMap<String, String>() {
36          /**
37           * 
38           */
39          private static final long serialVersionUID = 1L;
40  
41          {
42              // extension -> mime type
43              put("html", "text/html");
44              put("htm", "text/html");
45              put("xhtml", "application/xhtml+xml");
46              put("xht", "application/xhtml+xml");
47              put("xrdf", "application/rdf+xml");
48              put("rdfx", "application/rdf+xml");
49              put("owl", "application/rdf+xml");
50              put("txt", "text/plain");
51          }
52      };
53  
54      private final static Pattern extensionRegex = Pattern.compile(".*\\.([a-z0-9]+)");
55  
56      public MIMEType guessMIMEType(String fileName, InputStream input,
57  
58              MIMEType mimeTypeFromMetadata) {
59          if (mimeTypeFromMetadata != null) {
60              return mimeTypeFromMetadata;
61          }
62  
63          final Optional<RDFFormat> parserFormatForFileName = Rio.getParserFormatForFileName(fileName);
64          if (parserFormatForFileName.isPresent()) {
65              return MIMEType.parse(parserFormatForFileName.get().getDefaultMIMEType());
66          }
67  
68          String extension = getExtension(fileName);
69          if (extension == null) {
70              // Assume index file on web server.
71              extension = "html";
72          }
73          if (extensions.containsKey(extension)) {
74              return MIMEType.parse(extensions.get(extension));
75          }
76          return null;
77      }
78  
79      private String getExtension(String filename) {
80          Matcher m = extensionRegex.matcher(filename);
81          if (!m.matches())
82              return null;
83          return m.group(1);
84      }
85  
86  }