View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.mime;
19  
20  import org.eclipse.rdf4j.rio.RDFFormat;
21  import org.eclipse.rdf4j.rio.Rio;
22  
23  import java.io.InputStream;
24  import java.util.HashMap;
25  import java.util.Map;
26  import java.util.Optional;
27  import java.util.regex.Matcher;
28  import java.util.regex.Pattern;
29  
30  /**
31   * Basic implementation of {@link MIMETypeDetector} based
32   * on file extensions.
33   */
34  public class NaiveMIMETypeDetector implements MIMETypeDetector {
35  
36      private final static Map<String, String> extensions = new HashMap<String, String>() {
37        /**
38         * 
39         */
40        private static final long serialVersionUID = 1L;
41  
42          {
43              // extension -> mime type
44              put("html" , "text/html"            );
45              put("htm"  , "text/html"            );
46              put("xhtml", "application/xhtml+xml");
47              put("xht"  , "application/xhtml+xml");
48              put("xrdf" , "application/rdf+xml"  );
49              put("rdfx" , "application/rdf+xml"  );
50              put("owl"  , "application/rdf+xml"  );
51              put("txt"  , "text/plain"           );
52          }
53      };
54  
55      private final static Pattern extensionRegex = Pattern.compile(".*\\.([a-z0-9]+)");
56  
57      public MIMEType guessMIMEType(
58              String fileName,
59              InputStream input,
60  
61              MIMEType mimeTypeFromMetadata
62      ) {
63          if (mimeTypeFromMetadata != null) {
64              return mimeTypeFromMetadata;
65          }
66  
67          final Optional<RDFFormat> parserFormatForFileName = Rio.getParserFormatForFileName(fileName);
68          if (parserFormatForFileName.isPresent()) {
69              return MIMEType.parse(parserFormatForFileName.get().getDefaultMIMEType());
70          }
71  
72          String extension = getExtension(fileName);
73          if (extension == null) {
74              // Assume index file on web server.
75              extension = "html";
76          }
77          if (extensions.containsKey(extension)) {
78              return MIMEType.parse(extensions.get(extension));
79          }
80          return null;
81      }
82  
83      private String getExtension(String filename) {
84          Matcher m = extensionRegex.matcher(filename);
85          if (!m.matches()) return null;
86          return m.group(1);
87      }
88  
89  }