View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.writer;
19  
20  import org.eclipse.rdf4j.model.IRI;
21  import org.eclipse.rdf4j.model.ValueFactory;
22  import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
23  import org.eclipse.rdf4j.rio.RDFFormat;
24  
25  import java.nio.charset.Charset;
26  import java.util.Collection;
27  import java.util.Collections;
28  import java.util.List;
29  import java.util.Locale;
30  import java.util.Optional;
31  import java.util.stream.Collectors;
32  
33  /**
34   * @author Hans Brende (hansbrende@apache.org)
35   */
36  public class TripleFormat {
37      private final String name;
38      private final IRI standardIRI;
39      private final List<String> mimeTypes;
40      private final Charset charset;
41      private final List<String> fileExtensions;
42      private final Capabilities capabilities;
43      RDFFormat rdfFormat;
44  
45      private static final ValueFactory vf = SimpleValueFactory.getInstance();
46  
47      private static final int WRITES_TRIPLES = 1;
48      private static final int WRITES_GRAPHS = 1 << 1;
49      private static final int WRITES_NAMESPACES = 1 << 2;
50  
51      public static final Capabilities NONSTANDARD = new Capabilities(0);
52      public static final Capabilities TRIPLES = new Capabilities(WRITES_TRIPLES);
53      public static final Capabilities QUADS = new Capabilities(WRITES_TRIPLES | WRITES_GRAPHS);
54      public static final Capabilities TRIPLES_AND_NAMESPACES = TRIPLES.withNamespaces();
55      public static final Capabilities QUADS_AND_NAMESPACES = QUADS.withNamespaces();
56  
57      public static class Capabilities {
58          private final int raw;
59  
60          private Capabilities(int raw) {
61              this.raw = raw;
62          }
63  
64          public boolean has(Capabilities other) {
65              int oraw = other.raw;
66              return (raw & oraw) == oraw;
67          }
68  
69          private Capabilities withNamespaces() {
70              return new Capabilities(raw | WRITES_NAMESPACES);
71          }
72  
73          // TODO: add "supportsComments()"
74      }
75  
76      private static IllegalArgumentException mimeTypeErr(String mt) {
77          return new IllegalArgumentException(mt + " is not a valid mimetype");
78      }
79  
80      private static IllegalArgumentException extensionErr(String ext) {
81          return new IllegalArgumentException(ext + " is not a valid extension");
82      }
83  
84      private static <E> E checkNonNull(E object, String name) {
85          if (object == null) {
86              throw new IllegalArgumentException(name + " must not be null");
87          }
88          return object;
89      }
90  
91      // see https://tools.ietf.org/html/rfc2045#section-5.1
92      private static void checkMimeTypes(List<String> mts) {
93          if (checkNonNull(mts, "mimetypes").isEmpty()) {
94              throw new IllegalArgumentException("mimetypes must not be empty");
95          }
96          for (String mt : mts) {
97              boolean slash = false;
98              for (int i = 0, len = checkNonNull(mt, "mimetype").length(); i < len; i++) {
99                  char ch = mt.charAt(i);
100                 if (ch <= ' ' || ch >= 127 || ch == '(' || ch == ')' || ch == '<' || ch == '>' || ch == '@' || ch == ','
101                         || ch == ';' || ch == ':' || ch == '\\' || ch == '"' || ch == '[' || ch == ']' || ch == '?'
102                         || ch == '='
103                         // also disallow wildcards:
104                         || ch == '*') {
105                     throw mimeTypeErr(mt);
106                 } else if (ch == '/') {
107                     if (slash || i == 0 || i + 1 == len) {
108                         throw mimeTypeErr(mt);
109                     }
110                     slash = true;
111                 }
112             }
113             if (!slash) {
114                 throw mimeTypeErr(mt);
115             }
116         }
117     }
118 
119     private static void checkExtensions(List<String> exts) {
120         for (String ext : checkNonNull(exts, "extensions")) {
121             int illegalDot = 0;
122             for (int i = 0, len = checkNonNull(ext, "extension").length(); i < len; i++) {
123                 char ch = ext.charAt(i);
124                 if (ch <= ' ' || ch >= 127 || ch == '<' || ch == '>' || ch == ':' || ch == '"' || ch == '/'
125                         || ch == '\\' || ch == '|' || ch == '?' || ch == '*') {
126                     throw extensionErr(ext);
127                 } else if (ch == '.') {
128                     int next = i + 1;
129                     if (i == illegalDot || next == len) {
130                         throw extensionErr(ext);
131                     }
132                     illegalDot = next;
133                 }
134             }
135         }
136     }
137 
138     private static String normalizeMimeType(String mt) {
139         return mt.toLowerCase(Locale.ENGLISH);
140     }
141 
142     private static String normalizeExtension(String ext) {
143         return ext.toLowerCase(Locale.ENGLISH);
144     }
145 
146     private TripleFormat(String name, Collection<String> mimeTypes, Charset charset, Collection<String> fileExtensions,
147             String standardIRI, Capabilities capabilities) {
148         this.name = checkNonNull(name, "display name");
149         checkMimeTypes(this.mimeTypes = Collections.unmodifiableList(
150                 mimeTypes.stream().map(TripleFormat::normalizeMimeType).distinct().collect(Collectors.toList())));
151         if ((this.charset = charset) != null && !charset.canEncode()) {
152             throw new IllegalArgumentException(charset + " does not allow encoding");
153         }
154         checkExtensions(this.fileExtensions = Collections.unmodifiableList(
155                 fileExtensions.stream().map(TripleFormat::normalizeExtension).distinct().collect(Collectors.toList())));
156         this.standardIRI = standardIRI == null ? null : vf.createIRI(standardIRI);
157         this.capabilities = checkNonNull(capabilities, "capabilities");
158     }
159 
160     public static TripleFormat of(String displayName, Collection<String> mimeTypes, Charset defaultCharset,
161             Collection<String> fileExtensions, String standardIRI, Capabilities capabilities) {
162         return new TripleFormat(displayName, mimeTypes, defaultCharset, fileExtensions, standardIRI, capabilities);
163     }
164 
165     public Optional<Charset> getCharset() {
166         return Optional.ofNullable(charset);
167     }
168 
169     static Capabilities capabilities(RDFFormat format) {
170         if (format.supportsContexts()) {
171             return format.supportsNamespaces() ? QUADS_AND_NAMESPACES : QUADS;
172         } else {
173             return format.supportsNamespaces() ? TRIPLES_AND_NAMESPACES : TRIPLES;
174         }
175     }
176 
177     private static String iri(IRI iri) {
178         return iri == null ? null : iri.stringValue();
179     }
180 
181     static TripleFormat of(RDFFormat format) {
182         TripleFormat f = of(format.getName(), format.getMIMETypes(), format.getCharset(), format.getFileExtensions(),
183                 iri(format.getStandardURI()), capabilities(format));
184         f.rdfFormat = format;
185         return f;
186     }
187 
188     RDFFormat toRDFFormat() {
189         RDFFormat fmt = rdfFormat;
190         if (fmt != null) {
191             return fmt;
192         }
193         Capabilities capabilities = this.capabilities;
194         if (!capabilities.has(TRIPLES)) {
195             throw new UnsupportedOperationException("This format does not print RDF triples");
196         }
197         return rdfFormat = new RDFFormat(name, mimeTypes, charset, fileExtensions, standardIRI,
198                 capabilities.has(TRIPLES_AND_NAMESPACES), capabilities.has(QUADS));
199     }
200 
201     public Optional<IRI> getStandardIRI() {
202         return Optional.ofNullable(standardIRI);
203     }
204 
205     public List<String> getMimeTypes() {
206         return mimeTypes;
207     }
208 
209     public String getMimeType() {
210         return mimeTypes.get(0);
211     }
212 
213     public List<String> getExtensions() {
214         return fileExtensions;
215     }
216 
217     public Optional<String> getExtension() {
218         return fileExtensions.isEmpty() ? Optional.empty() : Optional.of(fileExtensions.get(0));
219     }
220 
221     public Capabilities getCapabilities() {
222         return capabilities;
223     }
224 
225     public String getDisplayName() {
226         return name;
227     }
228 
229     public String toString() {
230         return name + mimeTypes.stream().collect(Collectors.joining(", ", " (mimeTypes=", "; "))
231                 + fileExtensions.stream().collect(Collectors.joining(", ", "ext=", ")"));
232     }
233 
234 }