workaround for unicode issues with NLF in XML files (#2100)

This commit is contained in:
Ben Fry
2013-09-28 15:53:46 -04:00
parent 08e93b02d3
commit 9b2d4bcb11
4 changed files with 56 additions and 22 deletions

View File

@@ -6104,6 +6104,7 @@ public class PApplet extends Applet
public XML loadXML(String filename, String options) {
try {
return new XML(createReader(filename), options);
// return new XML(createInput(filename), options);
} catch (Exception e) {
e.printStackTrace();
return null;

View File

@@ -82,7 +82,9 @@ public class XML implements Serializable {
/**
* Advanced users only; see loadXML() in PApplet.
* Advanced users only; use loadXML() in PApplet. This is not a supported
* function and is subject to change. It is available simply for users that
* would like to handle the exceptions in a particular way.
*
* @nowebref
*/
@@ -92,7 +94,7 @@ public class XML implements Serializable {
/**
* Advanced users only; see loadXML() in PApplet.
* Advanced users only; use loadXML() in PApplet.
*
* @nowebref
*/
@@ -109,19 +111,31 @@ public class XML implements Serializable {
/**
* Shouldn't be part of main p5 reference, this is for advanced users.
* Note that while it doesn't accept anything but UTF-8, this is preserved
* so that we have some chance of implementing that in the future.
* Unlike the loadXML() method in PApplet, this version works with files
* that are not in UTF-8 format.
*
* @nowebref
*/
public XML(InputStream input, String options) throws IOException, ParserConfigurationException, SAXException {
this(PApplet.createReader(input), options);
//this(PApplet.createReader(input), options); // won't handle non-UTF8
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
try {
// Prevent 503 errors from www.w3.org
factory.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
} catch (IllegalArgumentException e) {
// ignore this; Android doesn't like it
}
factory.setExpandEntityReferences(false);
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(new InputSource(input));
node = document.getDocumentElement();
}
/**
* Advanced users only; see loadXML() in PApplet.
* Advanced users only; use loadXML() in PApplet.
*
* @nowebref
*/
@@ -131,11 +145,17 @@ public class XML implements Serializable {
/**
* Advanced users only; see loadXML() in PApplet.
* Advanced users only; use loadXML() in PApplet.
*
* Added extra code to handle \u2028 (Unicode NLF), which is sometimes
* inserted by web browsers (Safari?) and not distinguishable from a "real"
* LF (or CRLF) in some text editors (i.e. TextEdit on OS X). Only doing
* this for XML (and not all Reader objects) because LFs are essential.
* https://github.com/processing/processing/issues/2100
*
* @nowebref
*/
public XML(Reader reader, String options) throws IOException, ParserConfigurationException, SAXException {
public XML(final Reader reader, String options) throws IOException, ParserConfigurationException, SAXException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// Prevent 503 errors from www.w3.org
@@ -164,17 +184,24 @@ public class XML implements Serializable {
// builder = new SAXBuilder();
// builder.setValidation(validating);
// print(dataPath("1broke.html"), System.out);
Document document = builder.parse(new InputSource(new Reader() {
@Override
public int read(char[] cbuf, int off, int len) throws IOException {
int count = reader.read(cbuf, off, len);
for (int i = 0; i < count; i++) {
if (cbuf[off+i] == '\u2028') {
cbuf[off+i] = '\n';
}
}
return count;
}
// Document document = builder.parse(dataPath("1_alt.html"));
Document document = builder.parse(new InputSource(reader));
@Override
public void close() throws IOException {
reader.close();
}
}));
node = document.getDocumentElement();
// name = node.getNodeName();
// NodeList nodeList = document.getDocumentElement().getChildNodes();
// for (int i = 0; i < nodeList.getLength(); i++) {
// }
// print(createWriter("data/1_alt_reparse.html"), document.getDocumentElement(), 0);
}

View File

@@ -7,6 +7,7 @@ X screen stops updating sometimes with retina
X https://github.com/processing/processing/issues/1699
_ Unicode NLF causing problems in XML files
_ https://github.com/processing/processing/issues/2100
_ not handled by BufferedReader (or XML parser)
_ http://stackoverflow.com/questions/10556875/list-of-unicode-characters-that-should-be-filtered-in-output
_ http://stackoverflow.com/questions/3072152/what-is-unicode-character-2028-ls-line-separator-used-for

View File

@@ -22,6 +22,13 @@ also need to have 10.8 version of the SDK (old Xcode won't work)
_ fix console font on Windows and Linux with 7u40
_ the message area text also looks ugly.. can we fix?
_ add pref to select PDE font (so that CJKV languages work better)
_ https://github.com/processing/processing/issues/2078
_ should we embed the PDE font into the JRE?
_ this would allow it to show up in the menus, etc
_ but might be a problem on Linux
_ where the JRE is often replaced
_ and where the font is needed most
build
X remove video library for other platforms in download
@@ -93,8 +100,6 @@ _ "String index out of range" error
_ https://github.com/processing/processing/issues/1940
_ freeze after splash screen on OS X (looks like core.jar in the path)
_ https://github.com/processing/processing/issues/1872
_ add pref to select PDE font (so that CJKV languages work better)
_ https://github.com/processing/processing/issues/2078
medium
_ use platformDelete() to remove untitled sketches?
@@ -511,6 +516,8 @@ _ mark examples as untitled (rather than read-only)
_ maybe even pull these directly from the zip file?
_ load examples from zip files
_ http://code.google.com/p/processing/issues/detail?id=143
_ don't make examples read-only
_ just do them from psk files
_ disallow add file to sketch, export, export application
_ pretty much anything inside the sketch?
_ but don't do this with untitled, cuz it kinda stinks
@@ -519,8 +526,6 @@ _ mark example files as untitled
_ though will that require the sketch to be saved before export?
_ examples window sketches should load in proper environment
_ write build.xml file to automatically update the examples
_ don't make examples read-only
_ just do them from psk files
_ sketch.isReadOnly returns false for examples coming from multiple modes
_ http://code.google.com/p/processing/issues/detail?id=734
_ see how library installation goes, then possibly do same w/ examples