From 058c19c958fd2c4a607c127239a274dc5f265ea9 Mon Sep 17 00:00:00 2001 From: Ben Fry Date: Mon, 29 Apr 2013 12:29:09 -0400 Subject: [PATCH] add ODS table parsing, also cleanup load/save for Table --- core/src/processing/core/PApplet.java | 91 +++++---- core/src/processing/data/Table.java | 283 ++++++++++++++++++++++++-- 2 files changed, 317 insertions(+), 57 deletions(-) diff --git a/core/src/processing/core/PApplet.java b/core/src/processing/core/PApplet.java index 2cf4df079..deaca8e87 100644 --- a/core/src/processing/core/PApplet.java +++ b/core/src/processing/core/PApplet.java @@ -6180,17 +6180,18 @@ public class PApplet extends Applet */ public Table loadTable(String filename, String options) { try { - String ext = checkExtension(filename); - if (ext != null) { - if (ext.equals("csv") || ext.equals("tsv") || ext.equals("bin")) { - if (options == null) { - options = ext; - } else { - options = ext + "," + options; - } - } - } - return new Table(createInput(filename), options); +// String ext = checkExtension(filename); +// if (ext != null) { +// if (ext.equals("csv") || ext.equals("tsv") || ext.equals("bin")) { +// if (options == null) { +// options = ext; +// } else { +// options = ext + "," + options; +// } +// } +// } + return new Table(createInput(filename), + Table.extensionOptions(true, filename, options)); } catch (IOException e) { e.printStackTrace(); @@ -6216,34 +6217,27 @@ public class PApplet extends Applet * @param options can be one of "tsv", "csv", "bin", or "html" */ public boolean saveTable(Table table, String filename, String options) { - String ext = checkExtension(filename); - if (ext != null) { - if (ext.equals("csv") || ext.equals("tsv") || ext.equals("bin") || ext.equals("html")) { - if (options == null) { - options = ext; - } else { - options = ext + "," + options; - } - } - } - // Figure out location and make sure the target path exists - File outputFile = saveFile(filename); - // Open a stream and take care of .gz if necessary - return table.save(createOutput(outputFile), options); - } +// String ext = checkExtension(filename); +// if (ext != null) { +// if (ext.equals("csv") || ext.equals("tsv") || ext.equals("bin") || ext.equals("html")) { +// if (options == null) { +// options = ext; +// } else { +// options = ext + "," + options; +// } +// } +// } + try { + // Figure out location and make sure the target path exists + File outputFile = saveFile(filename); + // Open a stream and take care of .gz if necessary + return table.save(outputFile, options); - protected String checkExtension(String filename) { - // Don't consider the .gz as part of the name, createInput() - // and createOuput() will take care of fixing that up. - if (filename.toLowerCase().endsWith(".gz")) { - filename = filename.substring(0, filename.length() - 3); + } catch (IOException e) { + e.printStackTrace(); + return false; } - int index = filename.lastIndexOf('.'); - if (index == -1) { - return null; - } - return filename.substring(index + 1).toLowerCase(); } @@ -6637,6 +6631,31 @@ public class PApplet extends Applet + ////////////////////////////////////////////////////////////// + + // EXTENSIONS + + + /** + * Get the compression-free extension for this filename. + * @param filename The filename to check + * @return an extension, skipping past .gz if it's present + */ + static public String checkExtension(String filename) { + // Don't consider the .gz as part of the name, createInput() + // and createOuput() will take care of fixing that up. + if (filename.toLowerCase().endsWith(".gz")) { + filename = filename.substring(0, filename.length() - 3); + } + int dotIndex = filename.lastIndexOf('.'); + if (dotIndex != -1) { + return filename.substring(dotIndex + 1).toLowerCase(); + } + return null; + } + + + ////////////////////////////////////////////////////////////// // READERS AND WRITERS diff --git a/core/src/processing/data/Table.java b/core/src/processing/data/Table.java index f36b29d22..397f68afb 100644 --- a/core/src/processing/data/Table.java +++ b/core/src/processing/data/Table.java @@ -32,6 +32,12 @@ import java.sql.Types; import java.util.*; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +import javax.xml.parsers.ParserConfigurationException; + +import org.xml.sax.SAXException; import processing.core.PApplet; import processing.core.PConstants; @@ -102,7 +108,9 @@ public class Table { // version that uses a File object; future releases (or data types) // may include additional optimizations here public Table(File file, String options) throws IOException { - parse(new FileInputStream(file), checkOptions(file, options)); + // uses createInput() to handle .gz (and eventually .bz2) files + parse(PApplet.createInput(file), + extensionOptions(true, file.getName(), options)); } @@ -193,6 +201,7 @@ public class Table { } + /* protected String checkOptions(File file, String options) throws IOException { String extension = null; String filename = file.getName(); @@ -221,6 +230,29 @@ public class Table { } return options; } + */ + + + static final String[] loadExtensions = { "csv", "tsv", "ods", "bin" }; + static final String[] saveExtensions = { "csv", "tsv", "html", "bin" }; + + static public String extensionOptions(boolean loading, String filename, String options) { + String extension = PApplet.checkExtension(filename); + if (extension != null) { + for (String possible : loading ? loadExtensions : saveExtensions) { + if (extension.equals(possible)) { + if (options == null) { + return extension; + } else { + // prepend the extension to the options (will be replaced by other + // options that override it later in the load loop) + return extension + "," + options; + } + } + } + } + return options; + } protected void parse(InputStream input, String options) throws IOException { @@ -230,27 +262,48 @@ public class Table { boolean header = false; String extension = null; boolean binary = false; + + String worksheet = null; + final String sheetParam = "worksheet="; + + String[] opts = null; if (options != null) { - String[] opts = PApplet.splitTokens(options, " ,"); + opts = PApplet.splitTokens(options, " ,"); for (String opt : opts) { if (opt.equals("tsv")) { extension = "tsv"; } else if (opt.equals("csv")) { extension = "csv"; + } else if (opt.equals("ods")) { + extension = "ods"; + } else if (opt.equals("newlines")) { awfulCSV = true; + } else if (opt.equals("bin")) { binary = true; } else if (opt.equals("header")) { header = true; + + } else if (opt.startsWith(sheetParam)) { + worksheet = opt.substring(sheetParam.length()); + } else { throw new IllegalArgumentException("'" + opt + "' is not a valid option for loading a Table"); } } } + if (extension == null) { + throw new IllegalArgumentException("No extension specified for this Table"); + } + if (binary) { loadBinary(input); + + } else if (extension.equals("ods")) { + odsParse(input, worksheet); + } else { BufferedReader reader = PApplet.createReader(input); if (awfulCSV) { @@ -462,6 +515,189 @@ public class Table { } + /** + * Read a .ods (OpenDoc spreadsheet) zip file from an InputStream, and + * return the InputStream for content.xml contained inside. + */ + private InputStream odsFindContentXML(InputStream input) { + ZipInputStream zis = new ZipInputStream(input); + ZipEntry entry = null; + try { + while ((entry = zis.getNextEntry()) != null) { + if (entry.getName().equals("content.xml")) { + return zis; + } + } + } catch (IOException e) { + e.printStackTrace(); + } + return null; + } + + + protected void odsParse(InputStream input, String worksheet) { + try { + InputStream contentStream = odsFindContentXML(input); + XML xml = new XML(contentStream); + + // table files will have multiple sheets.. + // + // + // + XML[] sheets = + xml.getChildren("office:body/office:spreadsheet/table:table"); + + boolean found = false; + for (XML sheet : sheets) { +// System.out.println(sheet.getAttribute("table:name")); + if (worksheet == null || worksheet.equals(sheet.getString("table:name"))) { + odsParseSheet(sheet); + found = true; + if (worksheet == null) { + break; // only read the first sheet + } + } + } + if (!found) { + if (worksheet == null) { + throw new RuntimeException("No worksheets found in the ODS file."); + } else { + throw new RuntimeException("No worksheet named " + worksheet + + " found in the ODS file."); + } + } + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } catch (ParserConfigurationException e) { + e.printStackTrace(); + } catch (SAXException e) { + e.printStackTrace(); + } + } + + + /** + * Parses a single sheet of XML from this file. + * @param The XML object for a single worksheet from the ODS file + */ + private void odsParseSheet(XML sheet) { + // Extra

or tags inside the text tag for the cell will be stripped. + // Different from showing formulas, and not quite the same as 'save as + // displayed' option when saving from inside OpenOffice. Only time we + // wouldn't want this would be so that we could parse hyperlinks and + // styling information intact, but that's out of scope for the p5 version. + final boolean ignoreTags = true; + + XML[] rows = sheet.getChildren("table:table-row"); + //xml.getChildren("office:body/office:spreadsheet/table:table/table:table-row"); + + int rowIndex = 0; + for (XML row : rows) { + int rowRepeat = row.getInt("table:number-rows-repeated", 1); +// if (rowRepeat != 1) { +// System.out.println(rowRepeat + " " + rowCount + " " + (rowCount + rowRepeat)); +// } + boolean rowNotNull = false; + XML[] cells = row.getChildren(); + int columnIndex = 0; + + for (XML cell : cells) { + int cellRepeat = cell.getInt("table:number-columns-repeated", 1); + +// +// 4150.00 +// + + String cellData = ignoreTags ? cell.getString("office:value") : null; + + // if there's an office:value in the cell, just roll with that + if (cellData == null) { + int cellKids = cell.getChildCount(); + if (cellKids != 0) { + XML[] paragraphElements = cell.getChildren("text:p"); + if (paragraphElements.length != 1) { + for (XML el : paragraphElements) { + System.err.println(el.toString()); + } + throw new RuntimeException("found more than one text:p element"); + } + XML textp = paragraphElements[0]; + String textpContent = textp.getContent(); + // if there are sub-elements, the content shows up as a child element + // (for which getName() returns null.. which seems wrong) + if (textpContent != null) { + cellData = textpContent; // nothing fancy, the text is in the text:p element + } else { + XML[] textpKids = textp.getChildren(); + StringBuffer cellBuffer = new StringBuffer(); + for (XML kid : textpKids) { + String kidName = kid.getName(); + if (kidName == null) { + odsAppendNotNull(kid, cellBuffer); + + } else if (kidName.equals("text:s")) { + int spaceCount = kid.getInt("text:c", 1); + for (int space = 0; space < spaceCount; space++) { + cellBuffer.append(' '); + } + } else if (kidName.equals("text:span")) { + odsAppendNotNull(kid, cellBuffer); + + } else if (kidName.equals("text:a")) { + // blah.com + if (ignoreTags) { + cellBuffer.append(kid.getString("xlink:href")); + } else { + odsAppendNotNull(kid, cellBuffer); + } + + } else { + odsAppendNotNull(kid, cellBuffer); + System.err.println(getClass().getName() + ": don't understand: " + kid); + //throw new RuntimeException("I'm not used to this."); + } + } + cellData = cellBuffer.toString(); + } + //setString(rowIndex, columnIndex, c); //text[0].getContent()); + //columnIndex++; + } + } + for (int r = 0; r < cellRepeat; r++) { + if (cellData != null) { + //System.out.println("setting " + rowIndex + "," + columnIndex + " to " + cellData); + setString(rowIndex, columnIndex, cellData); + } + columnIndex++; + if (cellData != null) { +// if (columnIndex > columnMax) { +// columnMax = columnIndex; +// } + rowNotNull = true; + } + } + } + if (rowNotNull && rowRepeat > 1) { + String[] rowStrings = getStringRow(rowIndex); + for (int r = 1; r < rowRepeat; r++) { + addRow(rowStrings); + } + } + rowIndex += rowRepeat; + } + } + + + private void odsAppendNotNull(XML kid, StringBuffer buffer) { + String content = kid.getContent(); + if (content != null) { + buffer.append(content); + } + } + + // A 'Class' object is used here, so the syntax for this function is: // Table t = loadTable("cars3.tsv", "header"); // Record[] records = (Record[]) t.parse(Record.class); @@ -624,35 +860,40 @@ public class Table { public boolean save(File file, String options) throws IOException { - return save(new FileOutputStream(file), checkOptions(file, options)); + return save(PApplet.createOutput(file), + Table.extensionOptions(false, file.getName(), options)); } public boolean save(OutputStream output, String options) { PrintWriter writer = PApplet.createWriter(output); - String opt = null; - if (options != null) { - String[] opts = PApplet.splitTokens(options, ", "); - opt = opts[opts.length - 1]; - if (!opt.equals("csv") && - !opt.equals("tsv") && - !opt.equals("html") && - !opt.equals("bin")) { - throw new IllegalArgumentException("'" + opt + "' not understood. " + - "Only csv, tsv, bin, and html are " + - "accepted as save parameters"); - } - } else { - opt = "tsv"; // fall back to saving as TSV + String extension = null; + if (options == null) { + throw new IllegalArgumentException("No extension specified for saving this Table"); } - if (opt.equals("csv")) { + String[] opts = PApplet.splitTokens(options, ", "); + // Only option for save is the extension, so we can safely grab the last + extension = opts[opts.length - 1]; + boolean found = false; + for (String ext : saveExtensions) { + if (extension.equals(ext)) { + found = true; + break; + } + } + // Not providing a fallback; let's make users specify an extension + if (!found) { + throw new IllegalArgumentException("'" + extension + "' not available for Table"); + } + + if (extension.equals("csv")) { writeCSV(writer); - } else if (opt.equals("tsv")) { + } else if (extension.equals("tsv")) { writeTSV(writer); - } else if (opt.equals("html")) { + } else if (extension.equals("html")) { writeHTML(writer); - } else if (opt.equals("bin")) { + } else if (extension.equals("bin")) { try { saveBinary(output); } catch (IOException e) {