rename to use Dict instead of Hash, remove HTML and ODS tables

This commit is contained in:
Ben Fry
2013-04-28 15:11:07 -04:00
parent 7ac6a0bd79
commit 2daa6269b2
10 changed files with 53 additions and 525 deletions

View File

@@ -10,7 +10,7 @@ import processing.core.PApplet;
/**
* A simple table class to use a String as a lookup for an float value.
*/
public class FloatHash {
public class FloatDict {
/** Number of elements in the table */
protected int count;
@@ -22,7 +22,7 @@ public class FloatHash {
private HashMap<String, Integer> indices = new HashMap<String, Integer>();
public FloatHash() {
public FloatDict() {
count = 0;
keys = new String[10];
values = new float[10];
@@ -33,7 +33,7 @@ public class FloatHash {
* Create a new lookup with a specific size. This is more efficient than not
* specifying a size. Use it when you know the rough size of the thing you're creating.
*/
public FloatHash(int length) {
public FloatDict(int length) {
count = 0;
keys = new String[length];
values = new float[length];
@@ -44,7 +44,7 @@ public class FloatHash {
* Read a set of entries from a Reader that has each key/value pair on
* a single line, separated by a tab.
*/
public FloatHash(BufferedReader reader) {
public FloatDict(BufferedReader reader) {
// public FloatHash(PApplet parent, String filename) {
String[] lines = PApplet.loadStrings(reader);
keys = new String[lines.length];
@@ -485,7 +485,7 @@ public class FloatHash {
@Override
public void swap(int a, int b) {
FloatHash.this.swap(a, b);
FloatDict.this.swap(a, b);
}
};
s.run();
@@ -493,8 +493,8 @@ public class FloatHash {
/** Returns a duplicate copy of this object. */
public FloatHash copy() {
FloatHash outgoing = new FloatHash(count);
public FloatDict copy() {
FloatDict outgoing = new FloatDict(count);
System.arraycopy(keys, 0, outgoing.keys, 0, count);
System.arraycopy(values, 0, outgoing.values, 0, count);
for (int i = 0; i < count; i++) {

View File

@@ -10,7 +10,7 @@ import processing.core.PApplet;
/**
* A simple class to use a String as a lookup for an int value.
*/
public class IntHash {
public class IntDict {
/** Number of elements in the table */
protected int count;
@@ -45,7 +45,7 @@ public class IntHash {
// }
public IntHash() {
public IntDict() {
count = 0;
keys = new String[10];
values = new int[10];
@@ -56,7 +56,7 @@ public class IntHash {
* Create a new lookup with a specific size. This is more efficient than not
* specifying a size. Use it when you know the rough size of the thing you're creating.
*/
public IntHash(int length) {
public IntDict(int length) {
count = 0;
keys = new String[length];
values = new int[length];
@@ -67,7 +67,7 @@ public class IntHash {
* Read a set of entries from a Reader that has each key/value pair on
* a single line, separated by a tab.
*/
public IntHash(BufferedReader reader) {
public IntDict(BufferedReader reader) {
// public IntHash(PApplet parent, String filename) {
String[] lines = PApplet.loadStrings(reader);
keys = new String[lines.length];
@@ -375,7 +375,7 @@ public class IntHash {
@Override
public void swap(int a, int b) {
IntHash.this.swap(a, b);
IntDict.this.swap(a, b);
}
};
s.run();
@@ -383,8 +383,8 @@ public class IntHash {
/** Returns a duplicate copy of this object. */
public IntHash copy() {
IntHash outgoing = new IntHash(count);
public IntDict copy() {
IntDict outgoing = new IntDict(count);
System.arraycopy(keys, 0, outgoing.keys, 0, count);
System.arraycopy(values, 0, outgoing.values, 0, count);
for (int i = 0; i < count; i++) {

View File

@@ -299,7 +299,7 @@ public class JSONObject {
}
public JSONObject(IntHash dict) {
public JSONObject(IntDict dict) {
map = new HashMap<String, Object>();
for (int i = 0; i < dict.size(); i++) {
setInt(dict.key(i), dict.value(i));
@@ -307,7 +307,7 @@ public class JSONObject {
}
public JSONObject(FloatHash dict) {
public JSONObject(FloatDict dict) {
map = new HashMap<String, Object>();
for (int i = 0; i < dict.size(); i++) {
setFloat(dict.key(i), dict.value(i));
@@ -315,7 +315,7 @@ public class JSONObject {
}
public JSONObject(StringHash dict) {
public JSONObject(StringDict dict) {
map = new HashMap<String, Object>();
for (int i = 0; i < dict.size(); i++) {
setString(dict.key(i), dict.value(i));

View File

@@ -10,7 +10,7 @@ import processing.core.PApplet;
/**
* A simple table class to use a String as a lookup for another String value.
*/
public class StringHash {
public class StringDict {
/** Number of elements in the table */
protected int count;
@@ -22,7 +22,7 @@ public class StringHash {
private HashMap<String, Integer> indices = new HashMap<String, Integer>();
public StringHash() {
public StringDict() {
count = 0;
keys = new String[10];
values = new String[10];
@@ -34,7 +34,7 @@ public class StringHash {
* change the size(), but is more efficient than not specifying a length.
* Use it when you know the rough size of the thing you're creating.
*/
public StringHash(int length) {
public StringDict(int length) {
count = 0;
keys = new String[length];
values = new String[length];
@@ -45,7 +45,7 @@ public class StringHash {
* Read a set of entries from a Reader that has each key/value pair on
* a single line, separated by a tab.
*/
public StringHash(BufferedReader reader) {
public StringDict(BufferedReader reader) {
String[] lines = PApplet.loadStrings(reader);
keys = new String[lines.length];
values = new String[lines.length];
@@ -312,7 +312,7 @@ public class StringHash {
@Override
public void swap(int a, int b) {
StringHash.this.swap(a, b);
StringDict.this.swap(a, b);
}
};
s.run();
@@ -320,8 +320,8 @@ public class StringHash {
/** Returns a duplicate copy of this object. */
public StringHash copy() {
StringHash outgoing = new StringHash(count);
public StringDict copy() {
StringDict outgoing = new StringDict(count);
System.arraycopy(keys, 0, outgoing.keys, 0, count);
System.arraycopy(values, 0, outgoing.values, 0, count);
for (int i = 0; i < count; i++) {

View File

@@ -601,15 +601,15 @@ public class StringList implements Iterable<String> {
/** Remove all non-unique entries. */
public void unique() {
IntHash cheat = getTally();
IntDict cheat = getTally();
data = cheat.keyArray();
count = cheat.size();
}
/** Count the number of times each String entry is found in this list. */
public IntHash getTally() {
IntHash outgoing = new IntHash();
public IntDict getTally() {
IntDict outgoing = new IntDict();
for (int i = 0; i < count; i++) {
outgoing.increment(data[i]);
}
@@ -618,8 +618,8 @@ public class StringList implements Iterable<String> {
/** Create a dictionary associating each entry in this list to its index. */
public IntHash getOrder() {
IntHash outgoing = new IntHash();
public IntDict getOrder() {
IntDict outgoing = new IntDict();
for (int i = 0; i < count; i++) {
outgoing.set(data[i], i);
}

View File

@@ -1,226 +0,0 @@
package processing.data;
import java.io.*;
import java.util.*;
import javax.swing.text.*;
import javax.swing.text.html.*;
import processing.core.PApplet;
import processing.data.Table;
class HTMLTableScraper {
ArrayList<Table> tables;
TableHTML currentTable;
public HTMLTableScraper(PApplet parent, String where) {
this(parent.createReader(where));
}
public HTMLTableScraper(File file) {
this(PApplet.createReader(file));
}
public HTMLTableScraper(String html) {
this(new StringReader(html));
}
public HTMLTableScraper(Reader reader) {
tables = new ArrayList<Table>();
TableHandler handler = new TableHandler();
parse(reader, handler);
}
// The actual class doing some of the work:
// javax.swing.text.html.parser.ParserDelegator pd;
void parse(Reader reader, HTMLEditorKit.ParserCallback handler) {
HTMLEditorKit.Parser parser = new HTMLEditorKit() {
@Override
public HTMLEditorKit.Parser getParser() {
return super.getParser();
}
}.getParser();
try {
parser.parse(reader, handler, true);
} catch (Exception e) {
e.printStackTrace();
}
}
public int getTableCount() {
return tables.size();
}
public Table getTable(int index) {
return tables.get(index);
}
/**
* Get the list of tables as an array of Table objects.
*/
public Table[] getTables() {
TableHTML[] outgoing = new TableHTML[tables.size()];
tables.toArray(outgoing);
return outgoing;
}
/**
* Get the list of tables as an ArrayList of Table objects.
*/
public ArrayList<Table> getTableList() {
return tables;
}
public void writeTables(PApplet parent, String prefix) {
int digits = 0;
int num = getTableCount();
while (num > 0) {
num /= 10;
digits++;
}
for (int i = 0; i < getTableCount(); i++) {
String name = prefix + PApplet.nf(i, digits);
//tables.get(i).writeCSV(parent.createWriter(name + ".csv"));
parent.saveTable(tables.get(i), name + ".csv");
}
}
////////////////////////////////////////////////////////////////////////////////
class TableHandler extends HTMLEditorKit.ParserCallback {
@Override
public void handleStartTag(HTML.Tag tag, MutableAttributeSet a, int pos) {
if (tag == HTML.Tag.TABLE) {
currentTable = new TableHTML(currentTable);
tables.add(currentTable);
} else if (currentTable != null) {
if (tag == HTML.Tag.TR) {
currentTable.beginTableRow();
} else if (tag == HTML.Tag.TD || tag == HTML.Tag.TH) {
int advance = 1;
String colspanStr = (String) a.getAttribute(HTML.Attribute.COLSPAN);
if (colspanStr != null) {
advance = Integer.parseInt(colspanStr);
}
currentTable.beginTableData(advance);
if (a.getAttribute(HTML.Attribute.ROWSPAN) != null) {
System.err.println("rowspan attribute in this table is being ignored");
}
}
}
}
@Override
public void handleText(char[] c,int pos) {
if (currentTable != null) {
String data = new String(c).trim();
currentTable.setContent(data);
}
}
@Override
public void handleEndTag(HTML.Tag tag, int pos) {
if (currentTable != null) {
if (tag == HTML.Tag.TABLE) {
currentTable = currentTable.parent;
} else if (tag == HTML.Tag.TR) {
currentTable.endTableRow();
} else if (tag == HTML.Tag.TD || tag == HTML.Tag.TH) {
currentTable.endTableData();
}
}
}
}
}
////////////////////////////////////////////////////////////////////////////////
public class TableHTML extends Table {
// used during parse to capture state
TableHTML parent;
int colAdvance;
int rowIndex, colIndex;
int colCount;
TableHTML(TableHTML parent) {
super();
this.parent = parent;
}
void beginTableRow() {
// make sure we have enough room for these rows
addRow();
// if (rowCount == data.length) {
// String[][] temp = new String[data.length << 1][];
// System.arraycopy(data, 0, temp, 0, rowCount);
// data = temp;
// for (int j = rowCount; j < data.length; j++) {
// data[j] = new String[data[0].length];
// }
// }
}
void beginTableData(int advance) {
colAdvance = advance;
// expand the number of columns if necessary
ensureColumn(colIndex + colAdvance - 1);
// if (colIndex + colAdvance > data[0].length) {
// int needed = (colIndex + colAdvance) * 2;
// for (int i = 0; i < data.length; i++) {
// String[] temp = new String[needed];
// System.arraycopy(data[i], 0, temp, 0, colCount);
// data[i] = temp;
// }
// }
}
void setContent(String what) {
//data[rowIndex][colIndex] = what;
String cell = getString(rowIndex, colIndex);
if (cell != null) {
cell += what;
} else {
cell = what;
}
// setString(rowIndex, colIndex, what);
setString(rowIndex, colIndex, cell);
}
void endTableData() {
colIndex += colAdvance;
colCount = Math.max(colIndex, colCount);
}
void endTableRow() {
rowIndex++;
rowCount = rowIndex;
colIndex = 0;
}
}

View File

@@ -1,259 +0,0 @@
package processing.data;
import java.io.*;
import java.util.zip.*;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.SAXException;
import processing.core.*;
public class TableODS extends Table {
public TableODS(File odsFile) {
this(getContentXML(odsFile), null, false);
}
public TableODS(File odsFile, boolean actual) {
this(getContentXML(odsFile), null, actual);
}
public TableODS(PApplet parent, String filename) {
this(getContentXML(parent.createInput(filename)), null, false);
}
public TableODS(PApplet parent, String filename, boolean actual) {
this(getContentXML(parent.createInput(filename)), null, actual);
}
public TableODS(PApplet parent, String filename, String worksheet, boolean actual) {
this(getContentXML(parent.createInput(filename)), worksheet, actual);
}
/**
* Parse spreadsheet content.
* @param input InputStream of the content.xml file inside the .ods
*/
protected TableODS(InputStream input, String worksheet, boolean actual) {
try {
// InputStreamReader isr = new InputStreamReader(input, "UTF-8");
// BufferedReader reader = new BufferedReader(isr);
// read(reader, worksheet, actual);
read(input, worksheet, actual);
} catch (UnsupportedEncodingException uee) {
uee.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
}
}
// protected void read(BufferedReader reader, String worksheet, boolean actual) throws IOException, ParserConfigurationException, SAXException {
// XML xml = new XML(reader);
protected void read(InputStream input, String worksheet, boolean actual) throws IOException, ParserConfigurationException, SAXException {
XML xml = new XML(PApplet.createReader(input));
// XML x = new XML(reader);
// PApplet.saveStrings(new File("/Users/fry/Desktop/namespacefix.xml"), new String[] { xml.toString() });
// PApplet.saveStrings(new File("/Users/fry/Desktop/newparser.xml"), new String[] { x.toString() });
// table files will have multiple sheets.. argh
// <table:table table:name="Sheet1" table:style-name="ta1" table:print="false">
// <table:table table:name="Sheet2" table:style-name="ta1" table:print="false">
// <table:table table:name="Sheet3" table:style-name="ta1" table:print="false">
XML[] sheets =
xml.getChildren("office:body/office:spreadsheet/table:table");
//xml.getChildren("office:body/office:spreadsheet/table:table/table");
// System.out.println("found " + sheets.length + " sheets.");
for (XML sheet : sheets) {
// System.out.println(sheet.getAttribute("table:name"));
if (worksheet == null || worksheet.equals(sheet.getString("table:name"))) {
parseSheet(sheet, actual);
}
}
}
protected void parseSheet(XML sheet, boolean actual) {
XML[] rows = sheet.getChildren("table:table-row");
//xml.getChildren("office:body/office:spreadsheet/table:table/table:table-row");
int rowIndex = 0;
for (XML row : rows) {
int rowRepeat = row.getInt("table:number-rows-repeated", 1);
// if (rowRepeat != 1) {
// System.out.println(rowRepeat + " " + rowCount + " " + (rowCount + rowRepeat));
// }
boolean rowNotNull = false;
XML[] cells = row.getChildren();
int columnIndex = 0;
for (XML cell : cells) {
int cellRepeat = cell.getInt("table:number-columns-repeated", 1);
// <table:table-cell table:formula="of:=SUM([.E7:.E8])" office:value-type="float" office:value="4150">
// <text:p>4150.00</text:p>
// </table:table-cell>
String cellData = actual ? cell.getString("office:value") : null;
// if there's an office:value in the cell, just roll with that
if (cellData == null) {
int cellKids = cell.getChildCount();
if (cellKids != 0) {
XML[] paragraphElements = cell.getChildren("text:p");
if (paragraphElements.length != 1) {
for (XML el : paragraphElements) {
System.err.println(el.toString());
}
throw new RuntimeException("found more than one text:p element");
}
XML textp = paragraphElements[0];
String textpContent = textp.getContent();
// if there are sub-elements, the content shows up as a child element
// (for which getName() returns null.. which seems wrong)
if (textpContent != null) {
cellData = textpContent; // nothing fancy, the text is in the text:p element
} else {
XML[] textpKids = textp.getChildren();
StringBuffer cellBuffer = new StringBuffer();
for (XML kid : textpKids) {
String kidName = kid.getName();
if (kidName == null) {
appendNotNull(kid, cellBuffer);
} else if (kidName.equals("text:s")) {
int spaceCount = kid.getInt("text:c", 1);
for (int space = 0; space < spaceCount; space++) {
cellBuffer.append(' ');
}
} else if (kidName.equals("text:span")) {
appendNotNull(kid, cellBuffer);
} else if (kidName.equals("text:a")) {
// <text:a xlink:href="http://blah.com/">blah.com</text:a>
if (actual) {
cellBuffer.append(kid.getString("xlink:href"));
} else {
appendNotNull(kid, cellBuffer);
}
} else {
appendNotNull(kid, cellBuffer);
System.err.println(getClass().getName() + ": don't understand: " + kid);
//throw new RuntimeException("I'm not used to this.");
}
}
cellData = cellBuffer.toString();
}
//setString(rowIndex, columnIndex, c); //text[0].getContent());
//columnIndex++;
}
}
for (int r = 0; r < cellRepeat; r++) {
if (cellData != null) {
//System.out.println("setting " + rowIndex + "," + columnIndex + " to " + cellData);
setString(rowIndex, columnIndex, cellData);
}
columnIndex++;
if (cellData != null) {
// if (columnIndex > columnMax) {
// columnMax = columnIndex;
// }
rowNotNull = true;
}
}
}
if (rowNotNull && rowRepeat > 1) {
String[] rowStrings = getStringRow(rowIndex);
for (int r = 1; r < rowRepeat; r++) {
addRow(rowStrings);
}
}
rowIndex += rowRepeat;
// if (rowNotNull) {
// rowMax = rowIndex;
// }
}
// if (rowMax != getRowCount()) {
// System.out.println("removing empty rows: " + rowMax + " instead of " + getRowCount());
// setRowCount(rowMax);
// }
// if (columnMax != getColumnCount()) {
// System.out.println("removing empty columns: " + columnMax + " instead of " + getColumnCount());
// setColumnCount(columnMax);
// }
}
protected void appendNotNull(XML kid, StringBuffer buffer) {
String content = kid.getContent();
if (content != null) {
buffer.append(content);
}
}
// static public PNode getContentXML(File file) {
// return new PNode(getContentReader(file));
// }
// static public BufferedReader getContentReader(File file) {
// return PApplet.createReader(getContentInput(file));
// }
/**
* Read zip file from a local file, and return the InputStream for content.xml.
*/
static protected InputStream getContentXML(File file) {
try {
ZipFile zip = new ZipFile(file);
ZipEntry entry = zip.getEntry("content.xml");
return zip.getInputStream(entry);
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
/**
* Read zip file from an InputStream, and return the InputStream for content.xml.
*/
static protected InputStream getContentXML(InputStream input) {
ZipInputStream zis = new ZipInputStream(input);
ZipEntry entry = null;
try {
while ((entry = zis.getNextEntry()) != null) {
if (entry.getName().equals("content.xml")) {
return zis;
// InputStreamReader isr = new InputStreamReader(zis);
// BufferedReader reader = new BufferedReader(isr);
// read(reader, actual);
// break;
//return entry.getInputStream();
}
}
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}

View File

@@ -80,7 +80,7 @@ public class XML implements Serializable {
/**
* @param file description TBD
* Advanced users only; see loadXML() in PApplet.
*/
public XML(File file) throws IOException, ParserConfigurationException, SAXException {
this(file, null);
@@ -88,31 +88,39 @@ public class XML implements Serializable {
/**
* @param options description TBD
* Advanced users only; see loadXML() in PApplet.
*/
public XML(File file, String options) throws IOException, ParserConfigurationException, SAXException {
this(PApplet.createReader(file), options);
}
// /**
// * @param input description TBD
// */
// public XML(InputStream input) throws IOException, ParserConfigurationException, SAXException {
// this(input, null);
// }
public XML(InputStream input) throws IOException, ParserConfigurationException, SAXException {
this(input, null);
}
// public XML(InputStream input, String options) throws IOException, ParserConfigurationException, SAXException {
// this(PApplet.createReader(input), options);
// }
/**
* Shouldn't be part of main p5 reference, this is for advanced users.
* Note that while it doesn't accept anything but UTF-8, this is preserved
* so that we have some chance of implementing that in the future.
*/
public XML(InputStream input, String options) throws IOException, ParserConfigurationException, SAXException {
this(PApplet.createReader(input), options);
}
/**
* Advanced users only; see loadXML() in PApplet.
*/
public XML(Reader reader) throws IOException, ParserConfigurationException, SAXException {
this(reader, null);
}
/**
* Advanced users only; see loadXML() in PApplet.
*/
public XML(Reader reader, String options) throws IOException, ParserConfigurationException, SAXException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

View File

@@ -12,11 +12,14 @@ X getFloatContent()
X getContent() or getStringContent()?
X switch to CATEGORY instead of CATEGORICAL
https://blogs.oracle.com/CoreJavaTechTips/entry/using_enhanced_for_loops_with
table
X do we need getColumnType() inside TableRow?
X also inside Table
X also do we make the constants public?
// function that will convert awful CSV to TSV.. or something else?
// maybe to write binary instead? then read the binary file once it's ok?
@@ -53,6 +56,7 @@ _ sortHighLow, sortHigh, sortHighest, sortDown
it's going to be File or Reader (mostly BufferedReader) everywhere
though TableODS needs an InputStream...
and XML could use InputStream if we hope to be able to reflect doc encoding
setMissingXxxx() -> should this live in PApplet? be static?
cons: static stinks, diff tables might use diff values

View File

@@ -545,6 +545,7 @@ _ when opening from double-click on the mac, doesn't replace untitled
_ or in general, issues between opening new window and another launching
_ probably need to synchronize the file open methods inside Base
_ (could in fact cause nastiness with editors[] access)
_ https://github.com/processing/processing/issues/1745
_ editors opening up at the same time on load?
_ either synchronize the open (at a minimum)
_ or wait for mac handlers to register an open event