From 058c19c958fd2c4a607c127239a274dc5f265ea9 Mon Sep 17 00:00:00 2001
From: Ben Fry <fry@processing.org>
Date: Mon, 29 Apr 2013 12:29:09 -0400
Subject: [PATCH] add ODS table parsing, also cleanup load/save for Table

---
 core/src/processing/core/PApplet.java |  91 +++++----
 core/src/processing/data/Table.java   | 283 ++++++++++++++++++++++++--
 2 files changed, 317 insertions(+), 57 deletions(-)

diff --git a/core/src/processing/core/PApplet.java b/core/src/processing/core/PApplet.java
index 2cf4df079..deaca8e87 100644
--- a/core/src/processing/core/PApplet.java
+++ b/core/src/processing/core/PApplet.java
@@ -6180,17 +6180,18 @@ public class PApplet extends Applet
    */
   public Table loadTable(String filename, String options) {
     try {
-      String ext = checkExtension(filename);
-      if (ext != null) {
-        if (ext.equals("csv") || ext.equals("tsv") || ext.equals("bin")) {
-          if (options == null) {
-            options = ext;
-          } else {
-            options = ext + "," + options;
-          }
-        }
-      }
-      return new Table(createInput(filename), options);
+//      String ext = checkExtension(filename);
+//      if (ext != null) {
+//        if (ext.equals("csv") || ext.equals("tsv") || ext.equals("bin")) {
+//          if (options == null) {
+//            options = ext;
+//          } else {
+//            options = ext + "," + options;
+//          }
+//        }
+//      }
+      return new Table(createInput(filename),
+                       Table.extensionOptions(true, filename, options));
 
     } catch (IOException e) {
       e.printStackTrace();
@@ -6216,34 +6217,27 @@ public class PApplet extends Applet
    * @param options can be one of "tsv", "csv", "bin", or "html"
    */
   public boolean saveTable(Table table, String filename, String options) {
-    String ext = checkExtension(filename);
-    if (ext != null) {
-      if (ext.equals("csv") || ext.equals("tsv") || ext.equals("bin") || ext.equals("html")) {
-        if (options == null) {
-          options = ext;
-        } else {
-          options = ext + "," + options;
-        }
-      }
-    }
-    // Figure out location and make sure the target path exists
-    File outputFile = saveFile(filename);
-    // Open a stream and take care of .gz if necessary
-    return table.save(createOutput(outputFile), options);
-  }
+//    String ext = checkExtension(filename);
+//    if (ext != null) {
+//      if (ext.equals("csv") || ext.equals("tsv") || ext.equals("bin") || ext.equals("html")) {
+//        if (options == null) {
+//          options = ext;
+//        } else {
+//          options = ext + "," + options;
+//        }
+//      }
+//    }
 
+    try {
+      // Figure out location and make sure the target path exists
+      File outputFile = saveFile(filename);
+      // Open a stream and take care of .gz if necessary
+      return table.save(outputFile, options);
 
-  protected String checkExtension(String filename) {
-    // Don't consider the .gz as part of the name, createInput()
-    // and createOuput() will take care of fixing that up.
-    if (filename.toLowerCase().endsWith(".gz")) {
-      filename = filename.substring(0, filename.length() - 3);
+    } catch (IOException e) {
+      e.printStackTrace();
+      return false;
     }
-    int index = filename.lastIndexOf('.');
-    if (index == -1) {
-      return null;
-    }
-    return filename.substring(index + 1).toLowerCase();
   }
 
 
@@ -6637,6 +6631,31 @@ public class PApplet extends Applet
 
 
 
+  //////////////////////////////////////////////////////////////
+
+  // EXTENSIONS
+
+
+  /**
+   * Get the compression-free extension for this filename.
+   * @param filename The filename to check
+   * @return an extension, skipping past .gz if it's present
+   */
+  static public String checkExtension(String filename) {
+    // Don't consider the .gz as part of the name, createInput()
+    // and createOuput() will take care of fixing that up.
+    if (filename.toLowerCase().endsWith(".gz")) {
+      filename = filename.substring(0, filename.length() - 3);
+    }
+    int dotIndex = filename.lastIndexOf('.');
+    if (dotIndex != -1) {
+      return filename.substring(dotIndex + 1).toLowerCase();
+    }
+    return null;
+  }
+
+
+
   //////////////////////////////////////////////////////////////
 
   // READERS AND WRITERS
diff --git a/core/src/processing/data/Table.java b/core/src/processing/data/Table.java
index f36b29d22..397f68afb 100644
--- a/core/src/processing/data/Table.java
+++ b/core/src/processing/data/Table.java
@@ -32,6 +32,12 @@ import java.sql.Types;
 import java.util.*;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.xml.sax.SAXException;
 
 import processing.core.PApplet;
 import processing.core.PConstants;
@@ -102,7 +108,9 @@ public class Table {
   // version that uses a File object; future releases (or data types)
   // may include additional optimizations here
   public Table(File file, String options) throws IOException {
-    parse(new FileInputStream(file), checkOptions(file, options));
+    // uses createInput() to handle .gz (and eventually .bz2) files
+    parse(PApplet.createInput(file),
+          extensionOptions(true, file.getName(), options));
   }
 
 
@@ -193,6 +201,7 @@ public class Table {
   }
 
 
+  /*
   protected String checkOptions(File file, String options) throws IOException {
     String extension = null;
     String filename = file.getName();
@@ -221,6 +230,29 @@ public class Table {
     }
     return options;
   }
+  */
+
+
+  static final String[] loadExtensions = { "csv", "tsv", "ods", "bin" };
+  static final String[] saveExtensions = { "csv", "tsv", "html", "bin" };
+
+  static public String extensionOptions(boolean loading, String filename, String options) {
+    String extension = PApplet.checkExtension(filename);
+    if (extension != null) {
+      for (String possible : loading ? loadExtensions : saveExtensions) {
+        if (extension.equals(possible)) {
+          if (options == null) {
+            return extension;
+          } else {
+            // prepend the extension to the options (will be replaced by other
+            // options that override it later in the load loop)
+            return extension + "," + options;
+          }
+        }
+      }
+    }
+    return options;
+  }
 
 
   protected void parse(InputStream input, String options) throws IOException {
@@ -230,27 +262,48 @@ public class Table {
     boolean header = false;
     String extension = null;
     boolean binary = false;
+
+    String worksheet = null;
+    final String sheetParam = "worksheet=";
+
+    String[] opts = null;
     if (options != null) {
-      String[] opts = PApplet.splitTokens(options, " ,");
+      opts = PApplet.splitTokens(options, " ,");
       for (String opt : opts) {
         if (opt.equals("tsv")) {
           extension = "tsv";
         } else if (opt.equals("csv")) {
           extension = "csv";
+        } else if (opt.equals("ods")) {
+          extension = "ods";
+
         } else if (opt.equals("newlines")) {
           awfulCSV = true;
+
         } else if (opt.equals("bin")) {
           binary = true;
         } else if (opt.equals("header")) {
           header = true;
+
+        } else if (opt.startsWith(sheetParam)) {
+          worksheet = opt.substring(sheetParam.length());
+
         } else {
           throw new IllegalArgumentException("'" + opt + "' is not a valid option for loading a Table");
         }
       }
     }
 
+    if (extension == null) {
+      throw new IllegalArgumentException("No extension specified for this Table");
+    }
+
     if (binary) {
       loadBinary(input);
+
+    } else if (extension.equals("ods")) {
+      odsParse(input, worksheet);
+
     } else {
       BufferedReader reader = PApplet.createReader(input);
       if (awfulCSV) {
@@ -462,6 +515,189 @@ public class Table {
   }
 
 
+  /**
+   * Read a .ods (OpenDoc spreadsheet) zip file from an InputStream, and
+   * return the InputStream for content.xml contained inside.
+   */
+  private InputStream odsFindContentXML(InputStream input) {
+    ZipInputStream zis = new ZipInputStream(input);
+    ZipEntry entry = null;
+    try {
+      while ((entry = zis.getNextEntry()) != null) {
+        if (entry.getName().equals("content.xml")) {
+          return zis;
+        }
+      }
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    return null;
+  }
+
+
+  protected void odsParse(InputStream input, String worksheet) {
+    try {
+      InputStream contentStream = odsFindContentXML(input);
+      XML xml = new XML(contentStream);
+
+      // table files will have multiple sheets..
+      // <table:table table:name="Sheet1" table:style-name="ta1" table:print="false">
+      // <table:table table:name="Sheet2" table:style-name="ta1" table:print="false">
+      // <table:table table:name="Sheet3" table:style-name="ta1" table:print="false">
+      XML[] sheets =
+        xml.getChildren("office:body/office:spreadsheet/table:table");
+
+      boolean found = false;
+      for (XML sheet : sheets) {
+//        System.out.println(sheet.getAttribute("table:name"));
+        if (worksheet == null || worksheet.equals(sheet.getString("table:name"))) {
+          odsParseSheet(sheet);
+          found = true;
+          if (worksheet == null) {
+            break;  // only read the first sheet
+          }
+        }
+      }
+      if (!found) {
+        if (worksheet == null) {
+          throw new RuntimeException("No worksheets found in the ODS file.");
+        } else {
+          throw new RuntimeException("No worksheet named " + worksheet +
+                                     " found in the ODS file.");
+        }
+      }
+    } catch (UnsupportedEncodingException e) {
+      e.printStackTrace();
+    } catch (IOException e) {
+      e.printStackTrace();
+    } catch (ParserConfigurationException e) {
+      e.printStackTrace();
+    } catch (SAXException e) {
+      e.printStackTrace();
+    }
+  }
+
+
+  /**
+   * Parses a single sheet of XML from this file.
+   * @param The XML object for a single worksheet from the ODS file
+   */
+  private void odsParseSheet(XML sheet) {
+    // Extra <p> or <a> tags inside the text tag for the cell will be stripped.
+    // Different from showing formulas, and not quite the same as 'save as
+    // displayed' option when saving from inside OpenOffice. Only time we
+    // wouldn't want this would be so that we could parse hyperlinks and
+    // styling information intact, but that's out of scope for the p5 version.
+    final boolean ignoreTags = true;
+
+    XML[] rows = sheet.getChildren("table:table-row");
+    //xml.getChildren("office:body/office:spreadsheet/table:table/table:table-row");
+
+    int rowIndex = 0;
+    for (XML row : rows) {
+      int rowRepeat = row.getInt("table:number-rows-repeated", 1);
+//      if (rowRepeat != 1) {
+//          System.out.println(rowRepeat + " " + rowCount + " " + (rowCount + rowRepeat));
+//      }
+      boolean rowNotNull = false;
+      XML[] cells = row.getChildren();
+      int columnIndex = 0;
+
+      for (XML cell : cells) {
+        int cellRepeat = cell.getInt("table:number-columns-repeated", 1);
+
+//        <table:table-cell table:formula="of:=SUM([.E7:.E8])" office:value-type="float" office:value="4150">
+//        <text:p>4150.00</text:p>
+//        </table:table-cell>
+
+        String cellData = ignoreTags ? cell.getString("office:value") : null;
+
+        // if there's an office:value in the cell, just roll with that
+        if (cellData == null) {
+          int cellKids = cell.getChildCount();
+          if (cellKids != 0) {
+            XML[] paragraphElements = cell.getChildren("text:p");
+            if (paragraphElements.length != 1) {
+              for (XML el : paragraphElements) {
+                System.err.println(el.toString());
+              }
+              throw new RuntimeException("found more than one text:p element");
+            }
+            XML textp = paragraphElements[0];
+            String textpContent = textp.getContent();
+            // if there are sub-elements, the content shows up as a child element
+            // (for which getName() returns null.. which seems wrong)
+            if (textpContent != null) {
+              cellData = textpContent;  // nothing fancy, the text is in the text:p element
+            } else {
+              XML[] textpKids = textp.getChildren();
+              StringBuffer cellBuffer = new StringBuffer();
+              for (XML kid : textpKids) {
+                String kidName = kid.getName();
+                if (kidName == null) {
+                  odsAppendNotNull(kid, cellBuffer);
+
+                } else if (kidName.equals("text:s")) {
+                  int spaceCount = kid.getInt("text:c", 1);
+                  for (int space = 0; space < spaceCount; space++) {
+                    cellBuffer.append(' ');
+                  }
+                } else if (kidName.equals("text:span")) {
+                  odsAppendNotNull(kid, cellBuffer);
+
+                } else if (kidName.equals("text:a")) {
+                  // <text:a xlink:href="http://blah.com/">blah.com</text:a>
+                  if (ignoreTags) {
+                    cellBuffer.append(kid.getString("xlink:href"));
+                  } else {
+                    odsAppendNotNull(kid, cellBuffer);
+                  }
+
+                } else {
+                  odsAppendNotNull(kid, cellBuffer);
+                  System.err.println(getClass().getName() + ": don't understand: " + kid);
+                  //throw new RuntimeException("I'm not used to this.");
+                }
+              }
+              cellData = cellBuffer.toString();
+            }
+            //setString(rowIndex, columnIndex, c); //text[0].getContent());
+            //columnIndex++;
+          }
+        }
+        for (int r = 0; r < cellRepeat; r++) {
+          if (cellData != null) {
+            //System.out.println("setting " + rowIndex + "," + columnIndex + " to " + cellData);
+            setString(rowIndex, columnIndex, cellData);
+          }
+          columnIndex++;
+          if (cellData != null) {
+//            if (columnIndex > columnMax) {
+//              columnMax = columnIndex;
+//            }
+            rowNotNull = true;
+          }
+        }
+      }
+      if (rowNotNull && rowRepeat > 1) {
+        String[] rowStrings = getStringRow(rowIndex);
+        for (int r = 1; r < rowRepeat; r++) {
+          addRow(rowStrings);
+        }
+      }
+      rowIndex += rowRepeat;
+    }
+  }
+
+
+  private void odsAppendNotNull(XML kid, StringBuffer buffer) {
+    String content = kid.getContent();
+    if (content != null) {
+      buffer.append(content);
+    }
+  }
+
+
   // A 'Class' object is used here, so the syntax for this function is:
   // Table t = loadTable("cars3.tsv", "header");
   // Record[] records = (Record[]) t.parse(Record.class);
@@ -624,35 +860,40 @@ public class Table {
 
 
   public boolean save(File file, String options) throws IOException {
-    return save(new FileOutputStream(file), checkOptions(file, options));
+    return save(PApplet.createOutput(file),
+                Table.extensionOptions(false, file.getName(), options));
   }
 
 
   public boolean save(OutputStream output, String options) {
     PrintWriter writer = PApplet.createWriter(output);
-    String opt = null;
-    if (options != null) {
-      String[] opts = PApplet.splitTokens(options, ", ");
-      opt = opts[opts.length - 1];
-      if (!opt.equals("csv") &&
-          !opt.equals("tsv") &&
-          !opt.equals("html") &&
-          !opt.equals("bin")) {
-        throw new IllegalArgumentException("'" + opt + "' not understood. " +
-          "Only csv, tsv, bin, and html are " +
-          "accepted as save parameters");
-      }
-    } else {
-      opt = "tsv";  // fall back to saving as TSV
+    String extension = null;
+    if (options == null) {
+      throw new IllegalArgumentException("No extension specified for saving this Table");
     }
 
-    if (opt.equals("csv")) {
+    String[] opts = PApplet.splitTokens(options, ", ");
+    // Only option for save is the extension, so we can safely grab the last
+    extension = opts[opts.length - 1];
+    boolean found = false;
+    for (String ext : saveExtensions) {
+      if (extension.equals(ext)) {
+        found = true;
+        break;
+      }
+    }
+    // Not providing a fallback; let's make users specify an extension
+    if (!found) {
+      throw new IllegalArgumentException("'" + extension + "' not available for Table");
+    }
+
+    if (extension.equals("csv")) {
       writeCSV(writer);
-    } else if (opt.equals("tsv")) {
+    } else if (extension.equals("tsv")) {
       writeTSV(writer);
-    } else if (opt.equals("html")) {
+    } else if (extension.equals("html")) {
       writeHTML(writer);
-    } else if (opt.equals("bin")) {
+    } else if (extension.equals("bin")) {
       try {
         saveBinary(output);
       } catch (IOException e) {