workaround for unicode issues with NLF in XML files (#2100)

This commit is contained in:
Ben Fry
2013-09-28 15:53:46 -04:00
parent 08e93b02d3
commit 9b2d4bcb11
4 changed files with 56 additions and 22 deletions

View File

@@ -82,7 +82,9 @@ public class XML implements Serializable {
/**
* Advanced users only; see loadXML() in PApplet.
* Advanced users only; use loadXML() in PApplet. This is not a supported
* function and is subject to change. It is available simply for users that
* would like to handle the exceptions in a particular way.
*
* @nowebref
*/
@@ -92,7 +94,7 @@ public class XML implements Serializable {
/**
* Advanced users only; see loadXML() in PApplet.
* Advanced users only; use loadXML() in PApplet.
*
* @nowebref
*/
@@ -109,19 +111,31 @@ public class XML implements Serializable {
/**
* Shouldn't be part of main p5 reference, this is for advanced users.
* Note that while it doesn't accept anything but UTF-8, this is preserved
* so that we have some chance of implementing that in the future.
* Unlike the loadXML() method in PApplet, this version works with files
* that are not in UTF-8 format.
*
* @nowebref
*/
public XML(InputStream input, String options) throws IOException, ParserConfigurationException, SAXException {
this(PApplet.createReader(input), options);
//this(PApplet.createReader(input), options); // won't handle non-UTF8
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
try {
// Prevent 503 errors from www.w3.org
factory.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
} catch (IllegalArgumentException e) {
// ignore this; Android doesn't like it
}
factory.setExpandEntityReferences(false);
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(new InputSource(input));
node = document.getDocumentElement();
}
/**
* Advanced users only; see loadXML() in PApplet.
* Advanced users only; use loadXML() in PApplet.
*
* @nowebref
*/
@@ -131,11 +145,17 @@ public class XML implements Serializable {
/**
* Advanced users only; see loadXML() in PApplet.
* Advanced users only; use loadXML() in PApplet.
*
* Added extra code to handle \u2028 (Unicode NLF), which is sometimes
* inserted by web browsers (Safari?) and not distinguishable from a "real"
* LF (or CRLF) in some text editors (i.e. TextEdit on OS X). Only doing
* this for XML (and not all Reader objects) because LFs are essential.
* https://github.com/processing/processing/issues/2100
*
* @nowebref
*/
public XML(Reader reader, String options) throws IOException, ParserConfigurationException, SAXException {
public XML(final Reader reader, String options) throws IOException, ParserConfigurationException, SAXException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// Prevent 503 errors from www.w3.org
@@ -164,17 +184,24 @@ public class XML implements Serializable {
// builder = new SAXBuilder();
// builder.setValidation(validating);
// print(dataPath("1broke.html"), System.out);
Document document = builder.parse(new InputSource(new Reader() {
@Override
public int read(char[] cbuf, int off, int len) throws IOException {
int count = reader.read(cbuf, off, len);
for (int i = 0; i < count; i++) {
if (cbuf[off+i] == '\u2028') {
cbuf[off+i] = '\n';
}
}
return count;
}
// Document document = builder.parse(dataPath("1_alt.html"));
Document document = builder.parse(new InputSource(reader));
@Override
public void close() throws IOException {
reader.close();
}
}));
node = document.getDocumentElement();
// name = node.getNodeName();
// NodeList nodeList = document.getDocumentElement().getChildNodes();
// for (int i = 0; i < nodeList.getLength(); i++) {
// }
// print(createWriter("data/1_alt_reparse.html"), document.getDocumentElement(), 0);
}