MATSIM
MatsimXmlParser.java
Go to the documentation of this file.
1 /* *********************************************************************** *
2  * project: org.matsim.*
3  * MatsimXmlParser.java
4  * *
5  * *********************************************************************** *
6  * *
7  * copyright : (C) 2007 by the members listed in the COPYING, *
8  * LICENSE and WARRANTY file. *
9  * email : info at matsim dot org *
10  * *
11  * *********************************************************************** *
12  * *
13  * This program is free software; you can redistribute it and/or modify *
14  * it under the terms of the GNU General Public License as published by *
15  * the Free Software Foundation; either version 2 of the License, or *
16  * (at your option) any later version. *
17  * See also COPYING, LICENSE and WARRANTY file *
18  * *
19  * *********************************************************************** */
20 
21 package org.matsim.core.utils.io;
22 
23 import com.ctc.wstx.sax.WstxSAXParserFactory;
24 import org.apache.logging.log4j.LogManager;
25 import org.apache.logging.log4j.Logger;
27 import org.matsim.core.gbl.Gbl;
29 import org.xml.sax.*;
30 import org.xml.sax.helpers.DefaultHandler;
31 
32 import javax.xml.parsers.ParserConfigurationException;
33 import javax.xml.parsers.SAXParser;
34 import javax.xml.parsers.SAXParserFactory;
35 import java.io.File;
36 import java.io.IOException;
37 import java.io.InputStream;
38 import java.io.UncheckedIOException;
39 import java.net.URL;
40 import java.net.URLConnection;
41 import java.util.Stack;
42 
55 public abstract class MatsimXmlParser extends DefaultHandler implements MatsimReader {
56 
57  public enum ValidationType { NO_VALIDATION, DTD_ONLY, XSD_ONLY, DTD_OR_XSD }
58 
59  private static final Logger log = LogManager.getLogger(MatsimXmlParser.class);
60 
61  private final Stack<StringBuffer> buffers = new Stack<>();
62  private final Stack<String> theContext = new Stack<>();
63 
64  private boolean isValidating = true;
65  private boolean isNamespaceAware = true;
67 
68  private String localDtdBase = null;
69  // yy this is NOT working for me with "dtd", but it IS working with null.
70  // Note that I am typically NOT running java from the root of the classpath. kai, mar'15
71 
72  private final boolean preferLocalDtds;
73 
74  private String doctype = null;
79  private String theSource;
80 
86  public MatsimXmlParser(ValidationType validationType) {
87  this.validationType = validationType;
88  this.preferLocalDtds = FeatureFlags.preferLocalDTDs();
89  }
90 
98  public abstract void startTag(String name, Attributes atts, Stack<String> context);
99 
108  public abstract void endTag(String name, String content, Stack<String> context);
109 
117  public final void setValidating(final boolean validateXml) {
118  this.isValidating = validateXml;
119  }
120 
128  public final void setNamespaceAware(final boolean awareness) {
129  this.isNamespaceAware = awareness;
130  }
131 
138  public final void setLocalDtdDirectory(final String localDtdDirectory) {
139  this.localDtdBase = localDtdDirectory;
140  }
141 
151  @Override
152  public final void readFile(final String filename) throws UncheckedIOException {
153  log.info("starting to parse xml from file " + filename + " ...");
154  this.theSource = filename;
155  parse(new InputSource(IOUtils.getBufferedReader(filename)));
156  }
157 
158  @Override
159  public final void readURL( final URL url ) throws UncheckedIOException {
160  parse( url ) ;
161  }
162 
163  public final void readStream(InputStream stream) throws UncheckedIOException {
164  parse(stream);
165  }
166 
167  public final void parse(final URL url) throws UncheckedIOException {
168  Gbl.assertNotNull(url);
169  this.theSource = url.toString();
170  log.info("starting to parse xml from url " + this.theSource + " ...");
171  System.out.flush();
172  parse(new InputSource(IOUtils.getBufferedReader(this.theSource)));
173  }
174 
175  public final void parse(final InputStream stream) throws UncheckedIOException {
176  this.theSource = "stream";
177  parse(new InputSource(stream));
178  }
179 
180  public final void parse(final InputSource input) throws UncheckedIOException {
181  try {
182  boolean validating = this.isValidating && this.validationType != ValidationType.NO_VALIDATION;
183  boolean useWstxParser = !validating || this.validationType == ValidationType.DTD_ONLY;
184 
185  if (useWstxParser) {
186  // use Woodstox-library as XML parser when no validation or only DTD-validation is required, as it is much faster than the default (xerces)
187 
188  WstxSAXParserFactory factory = new WstxSAXParserFactory();
189  factory.setValidating(validating);
190  factory.setNamespaceAware(this.isNamespaceAware);
191  factory.setFeature("http://xml.org/sax/features/external-general-entities", false); // prevent XEE attack: https://en.wikipedia.org/wiki/XML_external_entity_attack
192 
193  if (validating) {
194  factory.setFeature("http://xml.org/sax/features/validation", true); // required to enable DTD validation in Woodstox
195  SAXParser parser = factory.newSAXParser();
196  XMLReader reader = parser.getXMLReader();
197  reader.setContentHandler(this);
198  reader.setErrorHandler(this);
199  reader.setEntityResolver(this);
200  reader.parse(input);
201  } else {
202  SAXParser parser = factory.newSAXParser();
203  parser.parse(input, this);
204  }
205 
206  } else {
207  // use the default (Xerces) SAX parser, it is slower than Woodstox, but supports XSD validation
208 
209  SAXParserFactory factory = SAXParserFactory.newInstance();
210  factory.setValidating(validating);
211  factory.setNamespaceAware(this.isNamespaceAware);
212  factory.setFeature("http://xml.org/sax/features/external-general-entities", false); // prevent XEE attack: https://en.wikipedia.org/wiki/XML_external_entity_attack
213 
214  if (validating) {
215  // enable optional support for XML Schemas
216  factory.setFeature("http://apache.org/xml/features/validation/schema", true);
217  SAXParser parser = factory.newSAXParser();
218  XMLReader reader = parser.getXMLReader();
219  reader.setContentHandler(this);
220  reader.setErrorHandler(this);
221  reader.setEntityResolver(this);
222  reader.parse(input);
223  } else {
224  SAXParser parser = factory.newSAXParser();
225  parser.parse(input, this);
226  }
227  }
228  } catch (IOException e) {
229  throw new UncheckedIOException(e);
230  } catch (SAXException | ParserConfigurationException e) {
231  throw new UncheckedIOException(new IOException(e));
232  }
233  }
234 
235  public final String getDoctype() {
236  return this.doctype;
237  }
238 
239  protected void setDoctype(final String doctype) {
240  // implementation of this method is what reacts to the different version of the file formats, so we cannot make it final. kai, jul'16
241 
242  this.doctype = doctype;
243  }
244 
245  /* implement EntityResolver */
246 
247  @Override
248  public final InputSource resolveEntity(final String publicId, final String systemId) {
249  // ConfigReader* did override this. Not sure if it did that for good reasons. kai, jul'16
250 
251  // extract the last part of the systemId
252  int index = systemId.replace('\\', '/').lastIndexOf('/');
253  String shortSystemId = systemId.substring(index + 1);
254 
255  if (this.doctype == null) {
256  // this is the first systemId we have to resolve, assume it's the doctype
257  // I haven't found any other way to determine the doctype of the currently read file
258  setDoctype(shortSystemId);
259  }
260 
261  InputSource source;
262  if (this.preferLocalDtds) {
263  source = findDtdInLocalFilesystem(shortSystemId);
264  if (source == null) {
265  source = findDtdInClasspath(shortSystemId);
266  }
267  if (source == null) {
268  source = findDtdInDefaultLocation(shortSystemId);
269  }
270  if (source == null) {
271  source = findDtdInRemoteLocation(systemId);
272  }
273  } else {
274  source = findDtdInRemoteLocation(systemId);
275  if (source == null) {
276  source = findDtdInLocalFilesystem(shortSystemId);
277  }
278  if (source == null) {
279  source = findDtdInClasspath(shortSystemId);
280  }
281  if (source == null) {
282  source = findDtdInDefaultLocation(shortSystemId);
283  }
284  }
285 
286  if (source == null) {
287  // We could neither get the remote nor the local version of the dtd, show a warning
288  log.warn("Could neither get the DTD from the web nor a local one. " + systemId);
289  } else {
290  source.setSystemId(systemId);
291  }
292  return source;
293  }
294 
295  private static InputSource findDtdInRemoteLocation(final String fullSystemId) {
296  log.info("Trying to load " + fullSystemId + ". In some cases (e.g. network interface up but no connection), this may take a bit.");
297  try {
298  URL url = new URL(fullSystemId);
299  URLConnection urlConn = url.openConnection();
300  urlConn.setConnectTimeout(5000);
301  urlConn.setReadTimeout(5000);
302  urlConn.setAllowUserInteraction(false);
303 
304  InputStream is = urlConn.getInputStream();
305  /* If there was no exception until here, then the path is valid.
306  * Return the opened stream as a source. If we would return null, then the SAX-Parser
307  * would have to fetch the same file again, requiring two accesses to the webserver */
308  return new InputSource(is);
309  } catch (IOException e) {
310  // There was a problem getting the (remote) file, just show the error as information for the user
311  log.info(e.toString() + ". May not be fatal, will try to load it locally.");
312  }
313  return null;
314  }
315 
316  private InputSource findDtdInLocalFilesystem(final String shortSystemId) {
317  if (this.localDtdBase != null) {
318  String localFileName = this.localDtdBase + "/" + shortSystemId;
319  File dtdFile = new File(localFileName);
320 // log.debug("dtdfile: " + dtdFile.getAbsolutePath());
321  if (dtdFile.exists() && dtdFile.isFile() && dtdFile.canRead()) {
322  log.info("Using the local DTD " + localFileName + " with absolute path " + dtdFile.getAbsolutePath() );
323  return new InputSource(dtdFile.getAbsolutePath());
324  }
325  }
326  return null;
327  }
328 
329  private InputSource findDtdInClasspath(final String shortSystemId) {
330  // still no success, try to load it with the ClassLoader, in case we're stuck in a jar...
331  InputStream stream = this.getClass().getResourceAsStream("/dtd/" + shortSystemId);
332  if (stream != null) {
333  log.info("Using local DTD from classpath:dtd/" + shortSystemId);
334  return new InputSource(stream);
335  }
336  return null;
337  }
338 
339  private static InputSource findDtdInDefaultLocation(final String shortSystemId) {
340  log.info("Trying to access local dtd folder at standard location ./dtd...");
341  File dtdFile = new File("./dtd/" + shortSystemId);
342  if (dtdFile.exists() && dtdFile.isFile() && dtdFile.canRead()) {
343  log.info("Using the local DTD " + dtdFile.getAbsolutePath());
344  return new InputSource(dtdFile.getAbsolutePath());
345  }
346  return null;
347  }
348 
349  /* implement ContentHandler */
350 
351  @Override
352  public void characters(final char[] ch, final int start, final int length) throws SAXException {
353  // has to be non-final since otherwise the events parser does not work. Probably ok (this here is just a default implementation). kai, jul'16
354 
355  StringBuffer buffer = this.buffers.peek();
356  if (buffer != null) {
357  buffer.append(ch, start, length);
358  }
359  }
360 
361  @Override
362  public final void startElement(final String uri, final String localName, final String qName, Attributes atts) {
363  // I have not good intuition if making this one non-final might be ok. kai, jul'16
364 
365  String tag = (uri.length() == 0) ? qName : localName;
366  this.buffers.push(new StringBuffer());
367  this.startTag(tag, atts, this.theContext);
368  this.theContext.push(tag);
369  }
370 
371  @Override
372  public final void endElement(final String uri, final String localName, final String qName) throws SAXException {
373  // I have not good intuition if making this one non-final might be ok. kai, jul'16
374 
375  String tag = (uri.length() == 0) ? qName : localName;
376  this.theContext.pop();
377  StringBuffer buffer = this.buffers.pop();
378  this.endTag(tag, buffer.toString(), this.theContext);
379  }
380 
381  /* implement ErrorHandler */
382 
383  @Override
384  public final void error(final SAXParseException ex) throws SAXException {
385  if (this.theContext.isEmpty()) {
386  System.err.println("Missing DOCTYPE.");
387  }
388  System.err.println("XML-ERROR: " + getInputSource(ex) + ", line " + ex.getLineNumber() + ", column " + ex.getColumnNumber() + ":");
389  System.err.println(ex.toString());
390  throw ex;
391  }
392 
393  @Override
394  public final void fatalError(final SAXParseException ex) throws SAXException {
395  System.err.println("XML-FATAL: " + getInputSource(ex) + ", line " + ex.getLineNumber() + ", column " + ex.getColumnNumber() + ":");
396  System.err.println(ex.toString());
397  throw ex;
398  }
399 
400  @Override
401  public final void warning(final SAXParseException ex) throws SAXException {
402  System.err.println("XML-WARNING: " + getInputSource(ex) + ", line " + ex.getLineNumber() + ", column " + ex.getColumnNumber() + ":");
403  System.err.println(ex.getMessage());
404  }
405 
406  private String getInputSource(final SAXParseException ex) {
407  System.out.println(ex.getPublicId());
408  System.out.println(ex.getSystemId());
409  if (ex.getCause() != null) {
410  System.out.println(ex.getCause().getMessage());
411  }
412  System.out.println(ex.getLocalizedMessage());
413  System.out.println(ex.getMessage());
414  if (ex.getSystemId() != null) {
415  return ex.getSystemId();
416  }
417  else if (ex.getPublicId() != null) {
418  return ex.getPublicId();
419  }
420  //try to use the locally stored inputSource
421  return this.theSource;
422  }
423 
426  public static double parseDouble(String value) throws NumberFormatException {
427  if ("INF".equals(value)) {
428  return Double.POSITIVE_INFINITY;
429  }
430  if ("-INF".equals(value)) {
431  return Double.NEGATIVE_INFINITY;
432  }
433  if ("NaN".equals(value)) {
434  return Double.NaN;
435  }
436  return Double.parseDouble(value);
437  }
438 
439 }
void characters(final char[] ch, final int start, final int length)
static InputSource findDtdInDefaultLocation(final String shortSystemId)
final void setValidating(final boolean validateXml)
String getInputSource(final SAXParseException ex)
static InputSource findDtdInRemoteLocation(final String fullSystemId)
InputSource findDtdInLocalFilesystem(final String shortSystemId)
abstract void startTag(String name, Attributes atts, Stack< String > context)
final void parse(final InputSource input)
final void setLocalDtdDirectory(final String localDtdDirectory)
static BufferedReader getBufferedReader(URL url, Charset charset)
Definition: IOUtils.java:321
final void setNamespaceAware(final boolean awareness)
final void error(final SAXParseException ex)
final InputSource resolveEntity(final String publicId, final String systemId)
final void parse(final InputStream stream)
static boolean preferLocalDTDs()
InputSource findDtdInClasspath(final String shortSystemId)
final void warning(final SAXParseException ex)
final void startElement(final String uri, final String localName, final String qName, Attributes atts)
static void assertNotNull(Object obj)
Definition: Gbl.java:212
final void readStream(InputStream stream)
abstract void endTag(String name, String content, Stack< String > context)
final void readFile(final String filename)
MatsimXmlParser(ValidationType validationType)
final void endElement(final String uri, final String localName, final String qName)
static double parseDouble(String value)
final void fatalError(final SAXParseException ex)