consume Unicode BOM (0xFEFF) in createReader() and Table parser

This commit is contained in:
Ben Fry
2017-01-30 19:23:02 -05:00
parent da04e3682f
commit ea65d7f034
3 changed files with 29 additions and 2 deletions

View File

@@ -6744,7 +6744,20 @@ public class PApplet implements PConstants {
static public BufferedReader createReader(InputStream input) {
InputStreamReader isr =
new InputStreamReader(input, StandardCharsets.UTF_8);
return new BufferedReader(isr);
BufferedReader reader = new BufferedReader(isr);
// consume the Unicode BOM (byte order marker) if present
try {
reader.mark(1);
int c = reader.read();
// if not the BOM, back up to the beginning again
if (c != '\uFEFF') {
reader.reset();
}
} catch (IOException e) {
e.printStackTrace();
}
return reader;
}

View File

@@ -378,6 +378,15 @@ public class Table {
} else {
InputStreamReader isr = new InputStreamReader(input, encoding);
BufferedReader reader = new BufferedReader(isr);
// strip out the Unicode BOM, if present
reader.mark(1);
int c = reader.read();
// if not the BOM, back up to the beginning again
if (c != '\uFEFF') {
reader.reset();
}
/*
if (awfulCSV) {
parseAwfulCSV(reader, header);
@@ -4039,6 +4048,7 @@ public class Table {
// . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
/**
* @webref table:method
* @brief Trims whitespace from values

View File

@@ -2,7 +2,11 @@
X return null for PApplet.trim(null)
X StringDict(TableRow) constructor
X allow lone double quotes in the midst of csv strings
_ make trim() work on column titles as well
X make trim() work on column titles as well
_ add trimRows() and trimColumns()
_ would you ever use one w/o the other?
_ should trim() just handle it?
X consume Unicode BOM (0xFEFF) in createReader() and Table parser
_ no prompt shows with selectInput() on 10.11 and 10.12