From 9b2d4bcb111f67df7a4ae7ca3d223fa1c3daf5fc Mon Sep 17 00:00:00 2001 From: Ben Fry Date: Sat, 28 Sep 2013 15:53:46 -0400 Subject: [PATCH] workaround for unicode issues with NLF in XML files (#2100) --- core/src/processing/core/PApplet.java | 1 + core/src/processing/data/XML.java | 63 +++++++++++++++++++-------- core/todo.txt | 1 + todo.txt | 13 ++++-- 4 files changed, 56 insertions(+), 22 deletions(-) diff --git a/core/src/processing/core/PApplet.java b/core/src/processing/core/PApplet.java index 023917444..71c85020b 100755 --- a/core/src/processing/core/PApplet.java +++ b/core/src/processing/core/PApplet.java @@ -6104,6 +6104,7 @@ public class PApplet extends Applet public XML loadXML(String filename, String options) { try { return new XML(createReader(filename), options); +// return new XML(createInput(filename), options); } catch (Exception e) { e.printStackTrace(); return null; diff --git a/core/src/processing/data/XML.java b/core/src/processing/data/XML.java index 28023b768..5860eafc2 100644 --- a/core/src/processing/data/XML.java +++ b/core/src/processing/data/XML.java @@ -82,7 +82,9 @@ public class XML implements Serializable { /** - * Advanced users only; see loadXML() in PApplet. + * Advanced users only; use loadXML() in PApplet. This is not a supported + * function and is subject to change. It is available simply for users that + * would like to handle the exceptions in a particular way. * * @nowebref */ @@ -92,7 +94,7 @@ public class XML implements Serializable { /** - * Advanced users only; see loadXML() in PApplet. + * Advanced users only; use loadXML() in PApplet. * * @nowebref */ @@ -109,19 +111,31 @@ public class XML implements Serializable { /** - * Shouldn't be part of main p5 reference, this is for advanced users. - * Note that while it doesn't accept anything but UTF-8, this is preserved - * so that we have some chance of implementing that in the future. + * Unlike the loadXML() method in PApplet, this version works with files + * that are not in UTF-8 format. * * @nowebref */ public XML(InputStream input, String options) throws IOException, ParserConfigurationException, SAXException { - this(PApplet.createReader(input), options); + //this(PApplet.createReader(input), options); // won't handle non-UTF8 + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + + try { + // Prevent 503 errors from www.w3.org + factory.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + } catch (IllegalArgumentException e) { + // ignore this; Android doesn't like it + } + + factory.setExpandEntityReferences(false); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document document = builder.parse(new InputSource(input)); + node = document.getDocumentElement(); } /** - * Advanced users only; see loadXML() in PApplet. + * Advanced users only; use loadXML() in PApplet. * * @nowebref */ @@ -131,11 +145,17 @@ public class XML implements Serializable { /** - * Advanced users only; see loadXML() in PApplet. + * Advanced users only; use loadXML() in PApplet. + * + * Added extra code to handle \u2028 (Unicode NLF), which is sometimes + * inserted by web browsers (Safari?) and not distinguishable from a "real" + * LF (or CRLF) in some text editors (i.e. TextEdit on OS X). Only doing + * this for XML (and not all Reader objects) because LFs are essential. + * https://github.com/processing/processing/issues/2100 * * @nowebref */ - public XML(Reader reader, String options) throws IOException, ParserConfigurationException, SAXException { + public XML(final Reader reader, String options) throws IOException, ParserConfigurationException, SAXException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); // Prevent 503 errors from www.w3.org @@ -164,17 +184,24 @@ public class XML implements Serializable { // builder = new SAXBuilder(); // builder.setValidation(validating); -// print(dataPath("1broke.html"), System.out); + Document document = builder.parse(new InputSource(new Reader() { + @Override + public int read(char[] cbuf, int off, int len) throws IOException { + int count = reader.read(cbuf, off, len); + for (int i = 0; i < count; i++) { + if (cbuf[off+i] == '\u2028') { + cbuf[off+i] = '\n'; + } + } + return count; + } -// Document document = builder.parse(dataPath("1_alt.html")); - Document document = builder.parse(new InputSource(reader)); + @Override + public void close() throws IOException { + reader.close(); + } + })); node = document.getDocumentElement(); -// name = node.getNodeName(); - -// NodeList nodeList = document.getDocumentElement().getChildNodes(); -// for (int i = 0; i < nodeList.getLength(); i++) { -// } -// print(createWriter("data/1_alt_reparse.html"), document.getDocumentElement(), 0); } diff --git a/core/todo.txt b/core/todo.txt index a13ccb15e..f5891d7f2 100644 --- a/core/todo.txt +++ b/core/todo.txt @@ -7,6 +7,7 @@ X screen stops updating sometimes with retina X https://github.com/processing/processing/issues/1699 _ Unicode NLF causing problems in XML files +_ https://github.com/processing/processing/issues/2100 _ not handled by BufferedReader (or XML parser) _ http://stackoverflow.com/questions/10556875/list-of-unicode-characters-that-should-be-filtered-in-output _ http://stackoverflow.com/questions/3072152/what-is-unicode-character-2028-ls-line-separator-used-for diff --git a/todo.txt b/todo.txt index d41afc57f..b5b17f66d 100644 --- a/todo.txt +++ b/todo.txt @@ -22,6 +22,13 @@ also need to have 10.8 version of the SDK (old Xcode won't work) _ fix console font on Windows and Linux with 7u40 _ the message area text also looks ugly.. can we fix? +_ add pref to select PDE font (so that CJKV languages work better) +_ https://github.com/processing/processing/issues/2078 +_ should we embed the PDE font into the JRE? +_ this would allow it to show up in the menus, etc +_ but might be a problem on Linux +_ where the JRE is often replaced +_ and where the font is needed most build X remove video library for other platforms in download @@ -93,8 +100,6 @@ _ "String index out of range" error _ https://github.com/processing/processing/issues/1940 _ freeze after splash screen on OS X (looks like core.jar in the path) _ https://github.com/processing/processing/issues/1872 -_ add pref to select PDE font (so that CJKV languages work better) -_ https://github.com/processing/processing/issues/2078 medium _ use platformDelete() to remove untitled sketches? @@ -511,6 +516,8 @@ _ mark examples as untitled (rather than read-only) _ maybe even pull these directly from the zip file? _ load examples from zip files _ http://code.google.com/p/processing/issues/detail?id=143 +_ don't make examples read-only +_ just do them from psk files _ disallow add file to sketch, export, export application _ pretty much anything inside the sketch? _ but don't do this with untitled, cuz it kinda stinks @@ -519,8 +526,6 @@ _ mark example files as untitled _ though will that require the sketch to be saved before export? _ examples window sketches should load in proper environment _ write build.xml file to automatically update the examples -_ don't make examples read-only -_ just do them from psk files _ sketch.isReadOnly returns false for examples coming from multiple modes _ http://code.google.com/p/processing/issues/detail?id=734 _ see how library installation goes, then possibly do same w/ examples