mirror of
https://github.com/processing/processing4.git
synced 2026-02-04 06:09:17 +01:00
workaround for unicode issues with NLF in XML files (#2100)
This commit is contained in:
@@ -82,7 +82,9 @@ public class XML implements Serializable {
|
||||
|
||||
|
||||
/**
|
||||
* Advanced users only; see loadXML() in PApplet.
|
||||
* Advanced users only; use loadXML() in PApplet. This is not a supported
|
||||
* function and is subject to change. It is available simply for users that
|
||||
* would like to handle the exceptions in a particular way.
|
||||
*
|
||||
* @nowebref
|
||||
*/
|
||||
@@ -92,7 +94,7 @@ public class XML implements Serializable {
|
||||
|
||||
|
||||
/**
|
||||
* Advanced users only; see loadXML() in PApplet.
|
||||
* Advanced users only; use loadXML() in PApplet.
|
||||
*
|
||||
* @nowebref
|
||||
*/
|
||||
@@ -109,19 +111,31 @@ public class XML implements Serializable {
|
||||
|
||||
|
||||
/**
|
||||
* Shouldn't be part of main p5 reference, this is for advanced users.
|
||||
* Note that while it doesn't accept anything but UTF-8, this is preserved
|
||||
* so that we have some chance of implementing that in the future.
|
||||
* Unlike the loadXML() method in PApplet, this version works with files
|
||||
* that are not in UTF-8 format.
|
||||
*
|
||||
* @nowebref
|
||||
*/
|
||||
public XML(InputStream input, String options) throws IOException, ParserConfigurationException, SAXException {
|
||||
this(PApplet.createReader(input), options);
|
||||
//this(PApplet.createReader(input), options); // won't handle non-UTF8
|
||||
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
||||
|
||||
try {
|
||||
// Prevent 503 errors from www.w3.org
|
||||
factory.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
|
||||
} catch (IllegalArgumentException e) {
|
||||
// ignore this; Android doesn't like it
|
||||
}
|
||||
|
||||
factory.setExpandEntityReferences(false);
|
||||
DocumentBuilder builder = factory.newDocumentBuilder();
|
||||
Document document = builder.parse(new InputSource(input));
|
||||
node = document.getDocumentElement();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Advanced users only; see loadXML() in PApplet.
|
||||
* Advanced users only; use loadXML() in PApplet.
|
||||
*
|
||||
* @nowebref
|
||||
*/
|
||||
@@ -131,11 +145,17 @@ public class XML implements Serializable {
|
||||
|
||||
|
||||
/**
|
||||
* Advanced users only; see loadXML() in PApplet.
|
||||
* Advanced users only; use loadXML() in PApplet.
|
||||
*
|
||||
* Added extra code to handle \u2028 (Unicode NLF), which is sometimes
|
||||
* inserted by web browsers (Safari?) and not distinguishable from a "real"
|
||||
* LF (or CRLF) in some text editors (i.e. TextEdit on OS X). Only doing
|
||||
* this for XML (and not all Reader objects) because LFs are essential.
|
||||
* https://github.com/processing/processing/issues/2100
|
||||
*
|
||||
* @nowebref
|
||||
*/
|
||||
public XML(Reader reader, String options) throws IOException, ParserConfigurationException, SAXException {
|
||||
public XML(final Reader reader, String options) throws IOException, ParserConfigurationException, SAXException {
|
||||
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
||||
|
||||
// Prevent 503 errors from www.w3.org
|
||||
@@ -164,17 +184,24 @@ public class XML implements Serializable {
|
||||
// builder = new SAXBuilder();
|
||||
// builder.setValidation(validating);
|
||||
|
||||
// print(dataPath("1broke.html"), System.out);
|
||||
Document document = builder.parse(new InputSource(new Reader() {
|
||||
@Override
|
||||
public int read(char[] cbuf, int off, int len) throws IOException {
|
||||
int count = reader.read(cbuf, off, len);
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (cbuf[off+i] == '\u2028') {
|
||||
cbuf[off+i] = '\n';
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
// Document document = builder.parse(dataPath("1_alt.html"));
|
||||
Document document = builder.parse(new InputSource(reader));
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
reader.close();
|
||||
}
|
||||
}));
|
||||
node = document.getDocumentElement();
|
||||
// name = node.getNodeName();
|
||||
|
||||
// NodeList nodeList = document.getDocumentElement().getChildNodes();
|
||||
// for (int i = 0; i < nodeList.getLength(); i++) {
|
||||
// }
|
||||
// print(createWriter("data/1_alt_reparse.html"), document.getDocumentElement(), 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user