MATSIM
MatsimFileTypeGuesser.java
Go to the documentation of this file.
1 /* *********************************************************************** *
2  * project: org.matsim.*
3  * MatsimFileTypeGuesser.java
4  * *
5  * *********************************************************************** *
6  * *
7  * copyright : (C) 2009 by the members listed in the COPYING, *
8  * LICENSE and WARRANTY file. *
9  * email : info at matsim dot org *
10  * *
11  * *********************************************************************** *
12  * *
13  * This program is free software; you can redistribute it and/or modify *
14  * it under the terms of the GNU General Public License as published by *
15  * the Free Software Foundation; either version 2 of the License, or *
16  * (at your option) any later version. *
17  * See also COPYING, LICENSE and WARRANTY file *
18  * *
19  * *********************************************************************** */
20 
21 package org.matsim.core.utils.io;
22 
23 import java.io.IOException;
24 import java.io.UncheckedIOException;
25 import java.util.Locale;
26 
27 import javax.xml.parsers.ParserConfigurationException;
28 import javax.xml.parsers.SAXParser;
29 import javax.xml.parsers.SAXParserFactory;
30 
31 import org.apache.logging.log4j.LogManager;
32 import org.apache.logging.log4j.Logger;
33 import org.xml.sax.Attributes;
34 import org.xml.sax.InputSource;
35 import org.xml.sax.SAXException;
36 import org.xml.sax.XMLReader;
37 import org.xml.sax.helpers.DefaultHandler;
38 
44 public class MatsimFileTypeGuesser extends DefaultHandler {
45 
46  private static final Logger log = LogManager.getLogger(MatsimFileTypeGuesser.class);
50  public enum FileType {Config, Network, Facilities, Population, World,
51  Counts, Events, Households, TransimsVehicle, OTFVis, SignalSystems, LaneDefinitions, SignalGroups, SignalControl, AmberTimes,
52  TransitSchedule, Vehicles, ObjectAttributes}
53 
54  public static final String SYSTEMIDNOTFOUNDMESSAGE = "System Id of xml document couldn't be detected. " +
55  "Make sure that you try to read a xml document with a valid header. " +
56  "If your header seems to be ok, make shure that there is no / at the " +
57  "end of the first part of the tuple used as value for xsi:schemaLocation.";
58 
59 
60  private FileType fileType = null;
61  private String xmlPublicId = null;
62  private String xmlSystemId = null;
63 
64  public MatsimFileTypeGuesser(final String fileName) throws UncheckedIOException {
65  String name = fileName.toLowerCase(Locale.ROOT);
66  if (name.endsWith(".xml.gz") || name.endsWith(".xml")) {
67  guessFileTypeXml(fileName);
68  // I think the following would also be useful for the API, but with which name?
69  String shortSystemId = null;
70  if (this.xmlSystemId != null) {
71  shortSystemId = this.xmlSystemId.substring(this.xmlSystemId.replace('\\', '/').lastIndexOf('/') + 1);
72  }
73  if (shortSystemId != null) {
74  if (shortSystemId.startsWith("network_")) {
75  this.fileType = FileType.Network;
76  } else if (shortSystemId.startsWith("world_")) {
77  this.fileType = FileType.World;
78  } else if (shortSystemId.startsWith("plans_")) {
79  this.fileType = FileType.Population;
80  } else if (shortSystemId.startsWith("population_")) {
81  this.fileType = FileType.Population;
82  } else if (shortSystemId.startsWith("facilities_")) {
83  this.fileType = FileType.Facilities;
84  } else if (shortSystemId.startsWith("config_")) {
85  this.fileType = FileType.Config;
86  } else if (shortSystemId.startsWith("counts_")) {
87  this.fileType = FileType.Counts;
88  } else if (shortSystemId.startsWith("vehicleDefinitions_")) {
89  this.fileType = FileType.Vehicles;
90  } else if (shortSystemId.startsWith("transitSchedule_")) {
91  this.fileType = FileType.TransitSchedule;
92  } else if (shortSystemId.startsWith("objectattributes_")) {
93  this.fileType = FileType.ObjectAttributes;
94  }
95  }
96 
97  } else if (name.endsWith(".txt.gz") || name.endsWith(".txt")) {
98  this.fileType = FileType.Events;
99  } else if (name.endsWith(".mvi.gz") || name.endsWith(".mvi")) {
100  this.fileType = FileType.OTFVis;
101  } else if (name.endsWith(".veh.gz") || name.endsWith(".veh")) {
102  this.fileType = FileType.TransimsVehicle;
103  }
104  }
105 
107  return this.fileType;
108  }
109 
114  public String getPublicId() {
115  return this.xmlPublicId;
116  }
117 
122  public String getSystemId() {
123  return this.xmlSystemId;
124  }
125 
126  private void guessFileTypeXml(final String fileName) throws UncheckedIOException {
127  SAXParserFactory factory = SAXParserFactory.newInstance();
128  factory.setValidating(false);
129  factory.setNamespaceAware(true);
130  try {
131  XmlHandler handler = new XmlHandler();
132  InputSource input = new InputSource(IOUtils.getBufferedReader(fileName));
133  factory.setFeature("http://apache.org/xml/features/validation/schema", true);
134  SAXParser parser = factory.newSAXParser();
135  XMLReader reader = parser.getXMLReader();
136  reader.setContentHandler(handler);
137  reader.setErrorHandler(handler);
138  reader.setEntityResolver(handler);
139  reader.parse(input);
140  } catch (IOException e) {
141  throw new UncheckedIOException(e);
142  } catch (SAXException e) {
143  throw new UncheckedIOException(new IOException(e));
144  } catch (ParserConfigurationException e) {
145  throw new UncheckedIOException(new IOException (e));
146  } catch (XMLTypeDetectionException e) {
147  this.xmlPublicId = e.publicId;
148  this.xmlSystemId = e.systemId;
149  log.debug("Detected public id: " + this.xmlPublicId);
150  log.debug("Detected system Id: " + this.xmlSystemId);
151  if (e.rootTag != null) {
152  log.debug("Detected root tag: " + e.rootTag);
153  if ("events".equals(e.rootTag)) {
154  this.fileType = FileType.Events;
155  } else if ("signalGroups".equals(e.rootTag)) {
156  this.fileType = FileType.SignalGroups;
157  } else if ("signalSystems".equals(e.rootTag)) {
158  this.fileType = FileType.SignalSystems;
159  } else if ("signalControl".equals(e.rootTag)) {
160  this.fileType = FileType.SignalControl;
161  } else if ("laneDefinitions".equals(e.rootTag)) {
162  this.fileType = FileType.LaneDefinitions;
163  } else if ("counts".equals(e.rootTag)) {
164  this.fileType = FileType.Counts;
165  } else if ("transitSchedule".equals(e.rootTag)) {
166  this.fileType = FileType.TransitSchedule;
167  } else if ("objectAttributes".equals(e.rootTag)) {
168  this.fileType = FileType.ObjectAttributes;
169  } else {
170  log.warn("got unexpected rootTag: " + e.rootTag);
171  }
172  }
173  }
174  }
175 
176  private final static class XmlHandler extends DefaultHandler {
177 
179 
180  private boolean detectedFirstEntity = false;
181 
182  public XmlHandler() {
183  // public constructor for private inner class
184  }
185 
186  @Override
187  public InputSource resolveEntity(final String publicId, final String systemId) {
188  /*
189  * As the xml schema of interest may be derived from other schema instances we
190  * are only interested in the first entity resolved.
191  */
192  if (! this.detectedFirstEntity){
193  this.exception = new XMLTypeDetectionException(publicId, systemId);
194  this.detectedFirstEntity = true;
195  }
196  if (systemId.endsWith(".dtd")){
197  throw this.exception;
198  }
199  return null;
200  }
201 
202  @Override
203  public void startElement(final String uri, final String localName, final String qName, final Attributes atts) throws SAXException {
204  String tag = (uri.length() == 0) ? qName : localName;
205  if (this.exception == null) {
206  this.exception = new XMLTypeDetectionException(null, null);
207  }
208  this.exception.rootTag = tag;
209  throw this.exception;
210  }
211  }
212 
219  private final static class XMLTypeDetectionException extends RuntimeException {
220 
221  private static final long serialVersionUID = 1L;
222 
223  public final String publicId;
224  public final String systemId;
225  public String rootTag;
226 
227  public XMLTypeDetectionException(final String publicId, final String systemId){
228  this.publicId = publicId;
229  this.systemId = systemId;
230  }
231 
232  @Override
233  public synchronized Throwable fillInStackTrace() {
234  return this; // optimization, as we're never interested in that stack trace
235  }
236 
237  }
238 }
void startElement(final String uri, final String localName, final String qName, final Attributes atts)
static BufferedReader getBufferedReader(URL url, Charset charset)
Definition: IOUtils.java:321
InputSource resolveEntity(final String publicId, final String systemId)