View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.writer;
19  
20  import org.eclipse.rdf4j.model.IRI;
21  import org.eclipse.rdf4j.model.ValueFactory;
22  import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
23  import org.eclipse.rdf4j.rio.RDFFormat;
24  
25  import java.nio.charset.Charset;
26  import java.util.Collection;
27  import java.util.Collections;
28  import java.util.List;
29  import java.util.Locale;
30  import java.util.Optional;
31  import java.util.stream.Collectors;
32  
33  /**
34   * @author Hans Brende (hansbrende@apache.org)
35   */
36  public class TripleFormat {
37      private final String name;
38      private final IRI standardIRI;
39      private final List<String> mimeTypes;
40      private final Charset charset;
41      private final List<String> fileExtensions;
42      private final Capabilities capabilities;
43      RDFFormat rdfFormat;
44  
45      private static final ValueFactory vf = SimpleValueFactory.getInstance();
46  
47      private static final int WRITES_TRIPLES = 1;
48      private static final int WRITES_GRAPHS = 1 << 1;
49      private static final int WRITES_NAMESPACES = 1 << 2;
50  
51      public static final Capabilities NONSTANDARD = new Capabilities(0);
52      public static final Capabilities TRIPLES = new Capabilities(WRITES_TRIPLES);
53      public static final Capabilities QUADS = new Capabilities(WRITES_TRIPLES | WRITES_GRAPHS);
54      public static final Capabilities TRIPLES_AND_NAMESPACES = TRIPLES.withNamespaces();
55      public static final Capabilities QUADS_AND_NAMESPACES = QUADS.withNamespaces();
56  
57      public static class Capabilities {
58          private final int raw;
59  
60          private Capabilities(int raw) {
61              this.raw = raw;
62          }
63  
64          public boolean has(Capabilities other) {
65              int oraw = other.raw;
66              return (raw & oraw) == oraw;
67          }
68  
69          private Capabilities withNamespaces() {
70              return new Capabilities(raw | WRITES_NAMESPACES);
71          }
72  
73          //TODO: add "supportsComments()"
74      }
75  
76      private static IllegalArgumentException mimeTypeErr(String mt) {
77          return new IllegalArgumentException(mt + " is not a valid mimetype");
78      }
79  
80      private static IllegalArgumentException extensionErr(String ext) {
81          return new IllegalArgumentException(ext + " is not a valid extension");
82      }
83  
84      private static <E> E checkNonNull(E object, String name) {
85          if (object == null) {
86              throw new IllegalArgumentException(name + " must not be null");
87          }
88          return object;
89      }
90  
91      //see https://tools.ietf.org/html/rfc2045#section-5.1
92      private static void checkMimeTypes(List<String> mts) {
93          if (checkNonNull(mts, "mimetypes").isEmpty()) {
94              throw new IllegalArgumentException("mimetypes must not be empty");
95          }
96          for (String mt : mts) {
97              boolean slash = false;
98              for (int i = 0, len = checkNonNull(mt, "mimetype").length(); i < len; i++) {
99                  char ch = mt.charAt(i);
100                 if (ch <= ' ' || ch >= 127 || ch == '(' || ch == ')' ||
101                         ch == '<' || ch == '>' || ch == '@' || ch == ',' ||
102                         ch == ';' || ch == ':' || ch == '\\' || ch == '"' ||
103                         ch == '[' || ch == ']' || ch == '?' || ch == '='
104                         //also disallow wildcards:
105                         || ch == '*') {
106                     throw mimeTypeErr(mt);
107                 } else if (ch == '/') {
108                     if (slash || i == 0 || i + 1 == len) {
109                         throw mimeTypeErr(mt);
110                     }
111                     slash = true;
112                 }
113             }
114             if (!slash) {
115                 throw mimeTypeErr(mt);
116             }
117         }
118     }
119 
120     private static void checkExtensions(List<String> exts) {
121         for (String ext : checkNonNull(exts, "extensions")) {
122             int illegalDot = 0;
123             for (int i = 0, len = checkNonNull(ext, "extension").length(); i < len; i++) {
124                 char ch = ext.charAt(i);
125                 if (ch <= ' ' || ch >= 127 || ch == '<' || ch == '>' ||
126                         ch == ':' || ch == '"' || ch == '/' || ch == '\\' ||
127                         ch == '|' || ch == '?' || ch == '*') {
128                     throw extensionErr(ext);
129                 } else if (ch == '.') {
130                     int next = i + 1;
131                     if (i == illegalDot || next == len) {
132                         throw extensionErr(ext);
133                     }
134                     illegalDot = next;
135                 }
136             }
137         }
138     }
139 
140     private static String normalizeMimeType(String mt) {
141         return mt.toLowerCase(Locale.ENGLISH);
142     }
143 
144     private static String normalizeExtension(String ext) {
145         return ext.toLowerCase(Locale.ENGLISH);
146     }
147 
148     private TripleFormat(String name, Collection<String> mimeTypes, Charset charset,
149                      Collection<String> fileExtensions, String standardIRI, Capabilities capabilities) {
150         this.name = checkNonNull(name, "display name");
151         checkMimeTypes(this.mimeTypes = Collections.unmodifiableList(mimeTypes.stream()
152                 .map(TripleFormat::normalizeMimeType).distinct().collect(Collectors.toList())));
153         if ((this.charset = charset) != null && !charset.canEncode()) {
154             throw new IllegalArgumentException(charset + " does not allow encoding");
155         }
156         checkExtensions(this.fileExtensions = Collections.unmodifiableList(fileExtensions.stream()
157                 .map(TripleFormat::normalizeExtension).distinct().collect(Collectors.toList())));
158         this.standardIRI = standardIRI == null ? null : vf.createIRI(standardIRI);
159         this.capabilities = checkNonNull(capabilities, "capabilities");
160     }
161 
162     public static TripleFormat of(String displayName, Collection<String> mimeTypes, Charset defaultCharset,
163                                   Collection<String> fileExtensions, String standardIRI, Capabilities capabilities) {
164         return new TripleFormat(displayName, mimeTypes, defaultCharset, fileExtensions, standardIRI, capabilities);
165     }
166 
167     public Optional<Charset> getCharset() {
168         return Optional.ofNullable(charset);
169     }
170 
171     static Capabilities capabilities(RDFFormat format) {
172         if (format.supportsContexts()) {
173             return format.supportsNamespaces() ? QUADS_AND_NAMESPACES : QUADS;
174         } else {
175             return format.supportsNamespaces() ? TRIPLES_AND_NAMESPACES : TRIPLES;
176         }
177     }
178 
179     private static String iri(IRI iri) {
180         return iri == null ? null : iri.stringValue();
181     }
182 
183     static TripleFormat of(RDFFormat format) {
184         TripleFormat f = of(format.getName(), format.getMIMETypes(),
185                 format.getCharset(), format.getFileExtensions(), iri(format.getStandardURI()),
186                 capabilities(format));
187         f.rdfFormat = format;
188         return f;
189     }
190 
191     RDFFormat toRDFFormat() {
192         RDFFormat fmt = rdfFormat;
193         if (fmt != null) {
194             return fmt;
195         }
196         Capabilities capabilities = this.capabilities;
197         if (!capabilities.has(TRIPLES)) {
198             throw new UnsupportedOperationException("This format does not print RDF triples");
199         }
200         return rdfFormat = new RDFFormat(name, mimeTypes, charset, fileExtensions, standardIRI,
201                 capabilities.has(TRIPLES_AND_NAMESPACES), capabilities.has(QUADS));
202     }
203 
204     public Optional<IRI> getStandardIRI() {
205         return Optional.ofNullable(standardIRI);
206     }
207 
208     public List<String> getMimeTypes() {
209         return mimeTypes;
210     }
211 
212     public String getMimeType() {
213         return mimeTypes.get(0);
214     }
215 
216     public List<String> getExtensions() {
217         return fileExtensions;
218     }
219 
220     public Optional<String> getExtension() {
221         return fileExtensions.isEmpty() ? Optional.empty() : Optional.of(fileExtensions.get(0));
222     }
223 
224     public Capabilities getCapabilities() {
225         return capabilities;
226     }
227 
228     public String getDisplayName() {
229         return name;
230     }
231 
232     public String toString() {
233         return name + mimeTypes.stream().collect(
234                 Collectors.joining(", ", " (mimeTypes=", "; "))
235                 + fileExtensions.stream().collect(
236                         Collectors.joining(", ", "ext=", ")"));
237     }
238 
239 }