diff --git a/core/src/processing/core/PApplet.java b/core/src/processing/core/PApplet.java index e77a217af..b0f83ab57 100755 --- a/core/src/processing/core/PApplet.java +++ b/core/src/processing/core/PApplet.java @@ -6249,22 +6249,28 @@ public class PApplet extends Applet /** - * @param options may contain "header", "tsv", "csv", or "bin" separated by commas + * Options may contain "header", "tsv", "csv", or "bin" separated by commas. + * + * Another option is "dictionary=filename.tsv", which allows users to + * specify a "dictionary" file that contains a mapping of the column titles + * and the data types used in the table file. This can be far more efficient + * (in terms of speed and memory usage) for loading and parsing tables. The + * dictionary file can only be tab separated values (.tsv) and its extension + * will be ignored. This option was added in Processing 2.0.2. */ public Table loadTable(String filename, String options) { try { -// String ext = checkExtension(filename); -// if (ext != null) { -// if (ext.equals("csv") || ext.equals("tsv") || ext.equals("bin")) { -// if (options == null) { -// options = ext; -// } else { -// options = ext + "," + options; -// } -// } -// } - return new Table(createInput(filename), - Table.extensionOptions(true, filename, options)); + String optionStr = Table.extensionOptions(true, filename, options); + String[] optionList = split(optionStr, ','); + + Table dictionary = null; + for (String opt : optionList) { + if (opt.startsWith("dictionary=")) { + dictionary = loadTable(opt.substring(opt.indexOf('=') + 1), "tsv"); + return dictionary.typedParse(createInput(filename), optionStr); + } + } + return new Table(createInput(filename), optionStr); } catch (IOException e) { e.printStackTrace(); diff --git a/core/src/processing/data/Table.java b/core/src/processing/data/Table.java index 744c98f7f..a91b088b8 100644 --- a/core/src/processing/data/Table.java +++ b/core/src/processing/data/Table.java @@ -119,6 +119,7 @@ public class Table { */ public Table(File file, String options) throws IOException { // uses createInput() to handle .gz (and eventually .bz2) files + init(); parse(PApplet.createInput(file), extensionOptions(true, file.getName(), options)); } @@ -146,9 +147,11 @@ public class Table { * @throws IOException */ public Table(InputStream input, String options) throws IOException { + init(); parse(input, options); } + /** * @nowebref */ @@ -210,6 +213,14 @@ public class Table { } + public Table typedParse(InputStream input, String options) throws IOException { + Table table = new Table(); + table.setColumnTypes(this); + table.parse(input, options); + return table; + } + + protected void init() { columns = new Object[0]; columnTypes = new int[0]; @@ -272,7 +283,7 @@ public class Table { protected void parse(InputStream input, String options) throws IOException { - init(); + //init(); boolean awfulCSV = false; boolean header = false; @@ -302,6 +313,8 @@ public class Table { header = true; } else if (opt.startsWith(sheetParam)) { worksheet = opt.substring(sheetParam.length()); + } else if (opt.startsWith("dictionary=")) { + // ignore option, this is only handled by PApplet } else { throw new IllegalArgumentException("'" + opt + "' is not a valid option for loading a Table"); } @@ -352,9 +365,9 @@ public class Table { row++; } - /* - // this is problematic unless we're going to calculate rowCount first - if (row % 10000 == 0) { + // this is problematic unless we're going to calculate rowCount first + if (row % 10000 == 0) { + /* if (row < rowCount) { int pct = (100 * row) / rowCount; if (pct != prev) { // also prevents "0%" from showing up @@ -362,14 +375,15 @@ public class Table { prev = pct; } } - try { - Thread.sleep(5); - } catch (InterruptedException e) { - e.printStackTrace(); + */ + try { + // Sleep this thread so that the GC can catch up + Thread.sleep(10); + } catch (InterruptedException e) { + e.printStackTrace(); + } } } - */ - } } catch (Exception e) { throw new RuntimeException("Error reading table on line " + row, e); } diff --git a/core/todo.txt b/core/todo.txt index 458e555d8..40732ddaf 100644 --- a/core/todo.txt +++ b/core/todo.txt @@ -70,7 +70,11 @@ _ draw(s) doesn't work on the returned PShape table X add sort() to Table -_ implement version of Table that takes a dictionary file +X implement version of Table that takes a dictionary file +X dictionary=blah.tsv +X tsv only, ignores extension +X if allowed extension, we couldn't use .dict instead +X and that's probably the most useful _ addRow() is not efficient, probably need to do the doubling _ or have a setIncrement() function? _ it would default to 1 on tables loaded from a file