Refactor out preproc.issue.

Removed the preproc.issue package by relocating supporting classes for PreprocessIssueMessageSimplifier and PdeIssueEmitter as inner classes. This assists with https://github.com/processing/processing4/issues/10.
This commit is contained in:
A Pottinger
2020-04-26 09:51:31 -07:00
parent c06e960fe1
commit e543edce99
38 changed files with 923 additions and 968 deletions

View File

@@ -6,7 +6,7 @@ import java.util.stream.IntStream;
import processing.app.Problem;
import processing.app.ui.Editor;
import processing.mode.java.preproc.issue.PdePreprocessIssue;
import processing.mode.java.preproc.PdePreprocessIssue;
/**

View File

@@ -0,0 +1,357 @@
/* -*- mode: java; c-basic-offset: 2; indent-tabs-mode: nil -*- */
/*
Part of the Processing project - http://processing.org
Copyright (c) 2019 The Processing Foundation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2
as published by the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package processing.mode.java.preproc;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
import processing.mode.java.SourceUtil;
import java.util.Optional;
/**
* ANTLR error listener to inform a preprocess issue listener when syntax errors are encountered.
*
* <p>
* A {BaseErrorListener} which looks for syntax errors reported by ANTLR and converts them to
* {PdePreprocessIssue}s that are consumable by a {PdePreprocessIssueListener}. It does this by
* running the {PreprocessIssueMessageSimplifier} to generate a more user-friendly error message
* before informing the provided listener.
* </p>
*/
public class PdeIssueEmitter extends BaseErrorListener {
private final PdePreprocessIssueListener listener;
private final Optional<SourceEmitter> sourceMaybe;
/**
* Create a new issue emitter.
*
* <p>
* Create a new issue emitter when access to the processing sketch source is not available.
* Note that this will not allow some error beautification and, if sketch source is available,
* use other constructor.
* </p>
*
* @param newListener The listener to inform when encountering a syntax error.
*/
public PdeIssueEmitter(PdePreprocessIssueListener newListener) {
listener = newListener;
sourceMaybe = Optional.empty();
}
/**
* Create a new issue emitter.
*
* @param newListener The listener to inform when encountering a syntax error.
* @param newSourceEmitter The sketch source to use when helping beautify certain syntax error
* messages.
*/
public PdeIssueEmitter(PdePreprocessIssueListener newListener, SourceEmitter newSourceEmitter) {
listener = newListener;
sourceMaybe = Optional.of(newSourceEmitter);
}
@Override
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line,
int charPositionInLine, String msg, RecognitionException e) {
PreprocessIssueMessageSimplifier facade = PreprocessIssueMessageSimplifier.get();
IssueMessageSimplification simplification = facade.simplify(msg);
IssueLocation issueLocation;
if (sourceMaybe.isPresent()) {
issueLocation = IssueLocationFactory.getLineWithOffset(
simplification,
line,
charPositionInLine,
sourceMaybe.get().getSource()
);
} else {
issueLocation = new IssueLocation(line, charPositionInLine);
}
listener.onIssue(new PdePreprocessIssue(
issueLocation.getLine(),
issueLocation.getCharPosition(),
simplification.getMessage()
));
}
/**
* Simple interface for strategy which can emit the full body of a processing sketch.
*/
public static interface SourceEmitter {
/**
* Get the full body of the processing sketch.
*
* @return String processing sketch source code across all tabs.
*/
String getSource();
}
/**
* Interface for listener that responds to issues reported by the preprocessor.
*/
public static interface PdePreprocessIssueListener {
/**
* Callback to invoke when an issue is encountered in preprocesing.
*
* @param issue Description of the issue.
*/
void onIssue(PdePreprocessIssue issue);
}
/**
* ================================
* == Supporting data structures ==
* ================================
*/
/**
* Data structure describing an issue simplification or explanation.
*
* <p>
* Data structure describing an edit that was made to an error message or warning to be shown to
* the user based on a series of rules that attempts to make error messages easier to understand
* for the user.
* </p>
*/
public static class IssueMessageSimplification {
private final String message;
private final boolean attributeToPriorToken;
/**
* Create a new issue message simplification.
*
* <p>
* Create a new issue message simplification that leaves the token attribution alone (the token
* on which the error was reported will be the same before error message simplification).
* </p>
*
* @param newMessage The message to show to the user.
*/
public IssueMessageSimplification(String newMessage) {
message = newMessage;
attributeToPriorToken = false;
}
/**
* Create a new issue message simplification.
*
* <p>
* Create a new issue message simplification. Note that there is an option to have the error
* attributed to the "prior token". This is helpful, for example, when a semicolon is missing.
* The error is generated on the token after the line on which the semicolon was omitted so,
* while the error technically emerges on the next line, it is better for the user for it to
* appear earlier. Specifically, it is most sensible for it to appear on the "prior token".
* </p>
*
* @param newMessage The message to show to the user.
* @param newAttributeToPriorToken Boolean flag indicating if the error should be shown on the
* token prior to the one on which the error was originally generated. True if the error should
* be attributed to the prior token. False otherwise.
*/
public IssueMessageSimplification(String newMessage, boolean newAttributeToPriorToken) {
message = newMessage;
attributeToPriorToken = newAttributeToPriorToken;
}
/**
* Get the error message text that should be shown to the user.
*
* @return The error message text that should be shown to the user.
*/
public String getMessage() {
return message;
}
/**
* Flag indicating if the error should be attributed to the prior token.
*
* @return True if the error should be attributed to the prior non-skip token (not whitepsace or
* comment). This is useful when a mistake on a prior line like omitted semicolon causes an
* error on a later line but one wants error highlighting closer to the mistake itself. False
* if the error should be attributed to the original offending token.
*/
public boolean getAttributeToPriorToken() {
return attributeToPriorToken;
}
}
/**
* Data structure describing where an issue occurred.
*/
public static class IssueLocation {
private final int line;
private final int charPosition;
/**
* Create a new issue location structure.
*
* @param newLine The line (1-indexed) where the issue occurred. This should be in the global file
* generated by the preprocessor and not relative to the start of the tab.
* @param newCharPosition The position on the line.
*/
public IssueLocation(int newLine, int newCharPosition) {
line = newLine;
charPosition = newCharPosition;
}
/**
* Get the 1-indexed line on which this error occurred.
*
* @return The line on which this error occurred. Note that this will be relative to the global
* file generated by the preprocessor and not relative to the start of the tab.
*/
public int getLine() {
return line;
}
/**
* The the position of the error within the line.
*
* @return The number of characters including whitespace from the start of the line at which the
* error occurred.
*/
public int getCharPosition() {
return charPosition;
}
}
/**
* =====================
* == Utility classes ==
* =====================
*/
/**
* Utility that can help clean up where in source an issue should be reported.
*
* <p>
* For some errors, the location of the "mistake" does not appear close to where the actual error
* is generated. For example, consider omitting a semicolon. Though the "mistake" is arguably on
* the line on which a semicolon is forgotten, the grammatical error appears in the first
* non-skip token after the omitted character. This means that the issue shown to the user may
* be far away from the line they would want to edit. This utility helps determine if an issue
* requires a new location and, if so, where the location should be.
* </p>
*/
public static class IssueLocationFactory {
/**
* Determine where an issue should be reported.
*
* @param simplification The issue simplification generated from {PreprocessIssueMessageSimplifier}.
* @param originalLine The original line (1 indexed) on which the issue was reported.
* @param originalOffset The original number of characters from the start of the line where the
* the issue was reported.
* @param source The full concatenated source of the sketch being built.
* @param lineCount The total
* @return The new location where the issue should be reported. This may be identical to the
* original location if the issue was not moved.
*/
public static IssueLocation getLineWithOffset(IssueMessageSimplification simplification,
int originalLine, int originalOffset, String source) {
// Determine if the issue should be relocated
boolean shouldAttributeToPrior = simplification.getAttributeToPriorToken();
shouldAttributeToPrior = shouldAttributeToPrior && originalLine != 0;
if (!shouldAttributeToPrior) {
return new IssueLocation(originalLine, originalOffset);
}
// Find the code prior the issue
String priorCode = getContentsUpToLine(source, originalLine);
// Find the token immediately prior to the issue
PreprocessIssueMessageSimplifier.PriorTokenFinder finder = new PreprocessIssueMessageSimplifier.PriorTokenFinder();
int charPos = priorCode.length();
while (!finder.isDone() && charPos > 0) {
charPos--;
finder.step(priorCode.charAt(charPos));
}
// Find the location offset depending on if the prior token could be found
Optional<Integer> foundStartOfMatchMaybe = finder.getTokenPositionMaybe();
int startOfMatch;
int linesOffset;
if (foundStartOfMatchMaybe.isPresent()) {
startOfMatch = priorCode.length() - foundStartOfMatchMaybe.get();
String contentsOfMatch = priorCode.substring(startOfMatch);
linesOffset = SourceUtil.getCount(contentsOfMatch, "\n");
} else {
startOfMatch = priorCode.length();
linesOffset = 0;
}
// Apply the location offset and highlight to the end of the line
String contentsPriorToMatch = priorCode.substring(0, startOfMatch);
int newLine = originalLine - linesOffset;
int lengthIncludingLine = contentsPriorToMatch.length();
int lengthExcludingLine = contentsPriorToMatch.lastIndexOf('\n');
int lineLength = lengthIncludingLine - lengthExcludingLine;
int col = lineLength - 1; // highlight from start of line to end
// Build the new issue location
return new IssueLocation(newLine, col);
}
/**
* Get all of the contents of source leading up to a line.
*
* @param source The full concatenated sketch source.
* @param endLineExclusive The line up to which code should be returned. Note that this is an
* "exclusive" boundary. Code from this line itself will not be included.
* @return All of the sketch code leading up to but not including the line given.
*/
private static String getContentsUpToLine(String source, int endLineExclusive) {
int line = 0;
int stringCursor = 0;
int strLength = source.length();
while (line < endLineExclusive-1 && stringCursor < strLength) {
if (source.charAt(stringCursor) == '\n') {
line++;
}
stringCursor++;
}
return source.substring(0, stringCursor);
}
}
}

View File

@@ -35,8 +35,6 @@ import processing.mode.java.ImportStatement;
import processing.mode.java.SourceUtil;
import processing.mode.java.TextTransform;
import processing.mode.java.preproc.PdePreprocessor.Mode;
import processing.mode.java.preproc.issue.PdePreprocessIssue;
import processing.mode.java.preproc.issue.PreprocessIssueMessageSimplifier;
/**
* ANTLR tree traversal listener that preforms code rewrites as part of sketch preprocessing.

View File

@@ -0,0 +1,52 @@
package processing.mode.java.preproc;
/**
* Issue emitted from the preprocessor.
*/
public class PdePreprocessIssue {
private final int line;
private final int charPositionInLine;
private final String msg;
/**
* Create a new record of an issue emitted from the preprocessor.
*
* @param newLine The line in the generated java file.
* @param newCharPositionInLine The character position in the source line.
* @param newMsg Description of the issue.
*/
public PdePreprocessIssue(int newLine, int newCharPositionInLine, String newMsg) {
line = newLine;
charPositionInLine = newCharPositionInLine;
msg = newMsg;
}
/**
* Get the unified source line where the issue was found.
*
* @return The line in the output java source where the issue was found.
*/
public int getLine() {
return line;
}
/**
* Get the character position in the source line of the issue.
*
* @return The source column where the issue was found.
*/
public int getCharPositionInLine() {
return charPositionInLine;
}
/**
* Get a description of the issue found.
*
* @return Human-readable localized message describing the issue.
*/
public String getMsg() {
return msg;
}
}

View File

@@ -0,0 +1,32 @@
package processing.mode.java.preproc;
import processing.mode.java.preproc.PdePreprocessIssue;
/**
* Exception indicating that a preprocessor issue was found.
*/
public class PdePreprocessIssueException extends RuntimeException {
private final PdePreprocessIssue preprocessIssue;
/**
* Create a new exception indicating that there was a preprocessing issue.
*
* @param newPreprocessIssue Issue encountered.
*/
public PdePreprocessIssueException(PdePreprocessIssue newPreprocessIssue) {
super(newPreprocessIssue.getMsg());
preprocessIssue = newPreprocessIssue;
}
/**
* Get information about the preprocessing issue found.
*
* @return Record of the preprocessor issue.
*/
public PdePreprocessIssue getIssue() {
return preprocessIssue;
}
}

View File

@@ -34,8 +34,6 @@ import org.antlr.v4.runtime.tree.ParseTreeWalker;
import processing.app.Preferences;
import processing.app.SketchException;
import processing.mode.java.preproc.issue.PdeIssueEmitter;
import processing.mode.java.preproc.issue.PdePreprocessIssue;
/**

View File

@@ -19,28 +19,26 @@ along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package processing.mode.java.preproc.issue;
package processing.mode.java.preproc;
import processing.app.Language;
import processing.app.Platform;
import processing.mode.java.SourceUtil;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.*;
import java.util.concurrent.atomic.AtomicReference;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Facade that tries to create a better error message for syntax issues in input source.
* Utility class that generates localized error messages for incorrect sketch syntax.
*
* <p>
* Facade that interprets error messages from ANTLR in an attempt to generate an improved error
* Utility that interprets error messages from ANTLR in an attempt to generate an improved error
* message when describing grammatically incorrect input. This is distinct from compiler errors
* caused after generating an AST.
* caused after generating an AST. This is required to produce the localized error messages.
* </p>
*
* <p>
@@ -100,19 +98,21 @@ public class PreprocessIssueMessageSimplifier {
* @param originalMessage Error message generated from ANTLR.
* @return An improved error message or the originalMessage if no improvements could be made.
*/
public IssueMessageSimplification simplify(String originalMessage) {
Optional<IssueMessageSimplification> matching = strategies.stream()
public PdeIssueEmitter.IssueMessageSimplification simplify(String originalMessage) {
Optional<PdeIssueEmitter.IssueMessageSimplification> matching = strategies.stream()
.map((x) -> x.simplify(originalMessage))
.filter(Optional::isPresent)
.map(Optional::get)
.findFirst();
return matching.orElse(new IssueMessageSimplification(originalMessage));
return matching.orElse(new PdeIssueEmitter.IssueMessageSimplification(originalMessage));
}
/* ============================
* === Enumerate strategies ===
* ============================
*
*
*/
/**
@@ -216,7 +216,7 @@ public class PreprocessIssueMessageSimplifier {
* @return An optional with an improved message or an empty optional if no improvements could be
* made by this strategy.
*/
Optional<IssueMessageSimplification> simplify(String message);
Optional<PdeIssueEmitter.IssueMessageSimplification> simplify(String message);
}
@@ -266,7 +266,7 @@ public class PreprocessIssueMessageSimplifier {
}
@Override
public Optional<IssueMessageSimplification> simplify(String message) {
public Optional<PdeIssueEmitter.IssueMessageSimplification> simplify(String message) {
String messageContent = getOffendingArea(message);
if (filter.isPresent()) {
@@ -283,7 +283,7 @@ public class PreprocessIssueMessageSimplifier {
token
);
return Optional.of(
new IssueMessageSimplification(newMessage)
new PdeIssueEmitter.IssueMessageSimplification(newMessage)
);
}
}
@@ -339,7 +339,7 @@ public class PreprocessIssueMessageSimplifier {
}
@Override
public Optional<IssueMessageSimplification> simplify(String message) {
public Optional<PdeIssueEmitter.IssueMessageSimplification> simplify(String message) {
String messageContent = getOffendingArea(message);
int count1 = SourceUtil.getCount(messageContent, token1);
@@ -361,7 +361,7 @@ public class PreprocessIssueMessageSimplifier {
.replace("%c", "%s"), missingToken);
return Optional.of(
new IssueMessageSimplification(newMessage)
new PdeIssueEmitter.IssueMessageSimplification(newMessage)
);
}
@@ -420,7 +420,7 @@ public class PreprocessIssueMessageSimplifier {
}
@Override
public Optional<IssueMessageSimplification> simplify(String message) {
public Optional<PdeIssueEmitter.IssueMessageSimplification> simplify(String message) {
if (pattern.matcher(message).find()) {
String newMessage = String.format(
hintTemplate,
@@ -428,7 +428,7 @@ public class PreprocessIssueMessageSimplifier {
);
return Optional.of(
new IssueMessageSimplification(newMessage, getAttributeToPrior())
new PdeIssueEmitter.IssueMessageSimplification(newMessage, getAttributeToPrior())
);
} else {
return Optional.empty();
@@ -553,7 +553,7 @@ public class PreprocessIssueMessageSimplifier {
return Optional.empty();
}
return Optional.of(new IssueMessageSimplification(
return Optional.of(new PdeIssueEmitter.IssueMessageSimplification(
getLocalStr("editor.status.missing.left_curly_bracket")
));
};
@@ -568,7 +568,7 @@ public class PreprocessIssueMessageSimplifier {
return Optional.empty();
}
return Optional.of(new IssueMessageSimplification(
return Optional.of(new PdeIssueEmitter.IssueMessageSimplification(
getLocalStr("editor.status.missing.right_curly_bracket")
));
};
@@ -585,7 +585,7 @@ public class PreprocessIssueMessageSimplifier {
message.replace("missing Identifier at", "")
);
return Optional.of(
new IssueMessageSimplification(newMessage)
new PdeIssueEmitter.IssueMessageSimplification(newMessage)
);
} else {
return Optional.empty();
@@ -613,7 +613,7 @@ public class PreprocessIssueMessageSimplifier {
String newMessage = String.format(langTemplate, missingPiece);
return Optional.of(new IssueMessageSimplification(newMessage));
return Optional.of(new PdeIssueEmitter.IssueMessageSimplification(newMessage));
} else {
return Optional.empty();
}
@@ -632,7 +632,7 @@ public class PreprocessIssueMessageSimplifier {
String newMessage = String.format(newMessageOuter, innerMsg);
return Optional.of(
new IssueMessageSimplification(newMessage)
new PdeIssueEmitter.IssueMessageSimplification(newMessage)
);
} else {
return Optional.empty();
@@ -655,7 +655,7 @@ public class PreprocessIssueMessageSimplifier {
);
return Optional.of(
new IssueMessageSimplification(
new PdeIssueEmitter.IssueMessageSimplification(
newMessage
)
);
@@ -671,21 +671,376 @@ public class PreprocessIssueMessageSimplifier {
protected static class DefaultMessageSimplifier implements PreprocIssueMessageSimplifierStrategy {
@Override
public Optional<IssueMessageSimplification> simplify(String message) {
public Optional<PdeIssueEmitter.IssueMessageSimplification> simplify(String message) {
if (message.contains("viable alternative")) {
String newMessage = String.format(
getLocalizedGenericError("%s"),
getOffendingArea(message)
);
return Optional.of(
new IssueMessageSimplification(newMessage)
new PdeIssueEmitter.IssueMessageSimplification(newMessage)
);
} else {
return Optional.of(
new IssueMessageSimplification(message)
new PdeIssueEmitter.IssueMessageSimplification(message)
);
}
}
}
/**
* Simple automaton that reads backwards from a position in source to find the prior token.
*
* <p>
* When helping generate messages for the user, it is often useful to be able to locate the
* position of the first token immediately before another location in source. For example,
* consider error reporting when a semicolon is missing. The error is generated on the token after
* the line on which the semicolon was omitted so, while the error technically emerges on the next
* line, it is better for the user for it to appear earlier. Specifically, it is most sensible for
* it to appear on the "prior token" because this is where it was forgotten.
* </p>
*
* <p>
* To that end, this finite state automaton can read backwards from a position in source to locate
* the first "non-skip token" preceding that location. Here a "skip" token means one that is
* ignored by the preprocessor and does not impact output code (this includes comments and
* whitespace). This automaton will read character by character from source until it knows it has
* seen a non-skip token, returning the location of that non-skip token.
* </p>
*
* <p>
* A formalized FSA is useful here in order to traverse code which can have a complex grammar.
* As there are a number of ways in the Java / Processing grammar one can encounter skip tokens,
* this formalized implementation describes the state machine directly in order to provide
* hopefully more readability / transparency compared to a regex without requiring the use of
* something heavier like ANTLR.
* </p>
*/
public static class PriorTokenFinder {
// Simple regex matching all "whitespace" characters recognized by the ANTLR grammar.
private static final String WS_PATTERN = "[ \\t\\r\\n\\u000C]";
// Possible states for this FSA
private enum AutomatonState {
// Automaton is not certain if it is parsing a skip or non-skip character
UNKNOWN,
// Automaton has found a possible token but it is not sure if inside a comment
POSSIBLE_TOKEN,
// Automaton has found a token but also a forward slash so, if the next character is also a "/",
// it is inside a single line comment.
TOKEN_OR_MAYBE_SL_COMMENT,
// Automaton has found a forward slash so, depending on the next character, it may be inside a
// single line comment, multi-line comment, or it may have found a standalone token.
TOKEN_OR_MAYBE_COMMENT,
// Automaton has found a token and hit its terminal state.
TOKEN,
// Automaton is current traversing a multi-line comment.
MULTI_LINE_COMMENT,
// Automaton is maybe leaving a multi line comment because it found an "*". If it picks up a "/"
// next, the automaton knows it is no longer within a multi-line comment.
MAYBE_LEAVE_MULTI_LINE_COMMENT
}
private boolean done;
private Optional<Integer> tokenPosition;
private AutomatonState state;
private int charPosition;
private Pattern whitespacePattern;
/**
* Create a new automaton in unknown state and a character position of zero.
*/
PriorTokenFinder() {
whitespacePattern = Pattern.compile(WS_PATTERN);
reset();
}
/**
* Determine if this automaton has found a token.
*
* @return True if this automaton has found a token and, thus, is in terminal state (so will
* ignore all future input). False if this autoamton has not yet found a token since creation
* or last call to reset.
*/
boolean isDone() {
return done;
}
/**
* Get the position of the token found.
*
* @return Optional containing the number of characters processed prior to finding the token or
* empty if no token found. Note that this is different the number of total characters
* processed as some extra characters have to be read prior to the token itself to ensure it is
* not part of a comment or something similar.
*/
Optional<Integer> getTokenPositionMaybe() {
return tokenPosition;
}
/**
* Reset this automaton to UNKNOWN state with a character count of zero.
*/
void reset() {
done = false;
tokenPosition = Optional.empty();
state = AutomatonState.UNKNOWN;
charPosition = 0;
}
/**
* Process a character.
*
* <p>
* Process the next character in an effort to find the "prior token". Note that this is
* expecting the processing sketch source code to be fed one character at a time
* <i>backwards</i> from the starting position in code. This is because it is looking for the
* first non-skip token immediately <i>preceding</i> a position in source.
* </p>
*
* @param input The next character to process.
*/
void step(char input) {
switch(state) {
case UNKNOWN: stepUnknown(input); break;
case POSSIBLE_TOKEN: stepPossibleToken(input); break;
case TOKEN_OR_MAYBE_SL_COMMENT: stepTokenOrMaybeSingleLineComment(input); break;
case TOKEN_OR_MAYBE_COMMENT: stepTokenOrMaybeComment(input); break;
case MULTI_LINE_COMMENT: stepMultiLineComment(input); break;
case MAYBE_LEAVE_MULTI_LINE_COMMENT: stepMaybeLeaveMultiLineComment(input); break;
case TOKEN: /* Already have token. Nothing to be done. */ break;
}
charPosition++;
}
/**
* Process the next character while in the UNKNOWN state.
*
* <p>
* While not certain if looking at a skip or non-skip token, read the next character. If
* whitespace, can ignore. If a forward slash, could indicate either a comment or a possible
* token (move to TOKEN_OR_MAYBE_COMMENT). If anything else, may have found token but need to
* ensure this line isn't part of a comment (move to POSSIBLE_TOKEN).
* </p>
*
* @param input The next character to process.
*/
private void stepUnknown(char input) {
if (isWhitespace(input)) {
return;
}
tokenPosition = Optional.of(charPosition);
if (input == '/') {
state = AutomatonState.TOKEN_OR_MAYBE_COMMENT;
} else {
state = AutomatonState.POSSIBLE_TOKEN;
}
}
/**
* Process the next character while in the POSSIBLE_TOKEN state.
*
* <p>
* After having found a character that could indicate a token, need to ensure that the token
* wasn't actually part of a single line comment ("//") so look for forward slashes (if found
* move to TOKEN_OR_MAYBE_SL_COMMENT). If encountered a newline, the earlier found token was
* not part of a comment so enter TOKEN state.
* </p>
*
* @param input The next character to process.
*/
private void stepPossibleToken(char input) {
if (input == '\n') {
enterNonSkipTokenState();
} else if (input == '/') {
state = AutomatonState.TOKEN_OR_MAYBE_SL_COMMENT;
}
// Else stay put
}
/**
* Process the next character while in the TOKEN_OR_MAYBE_SL_COMMENT state.
*
* <p>
* After having found a forward slash after encountering something else which may be a non-skip
* token, one needs to check that it is preceded by another forward slash to have detected a
* single line comment (return to UNKNOWN state). If found a new line, that forward slash was
* actually a non-skip token itself so enter TOKEN state. Finally, if anything else, it is still
* possible that we are traversing a single line comment so return to POSSIBLE_TOKEN state.
* </p>
*
* @param input The next character to process.
*/
private void stepTokenOrMaybeSingleLineComment(char input) {
if (input == '\n') {
enterNonSkipTokenState();
} else if (input == '/') {
returnToUnknownState();
} else {
state = AutomatonState.POSSIBLE_TOKEN;
}
}
/**
* Process the next character while in the TOKEN_OR_MAYBE_COMMENT state.
*
* <p>
* After having found a forward slash without encountering something else that may be a non-skip
* token: that forward slash is a non-skip token if preceded by a newline, could be a single
* line comment if preceded by a forward slash, could be a multi-line comment if preceded
* by an asterisk, or could by a non-skip token otherwise.
* </p>
*
* @param input The next character to process.
*/
private void stepTokenOrMaybeComment(char input) {
if (input == '\n') {
enterNonSkipTokenState();
} else if (input == '/') {
returnToUnknownState();
} else if (input == '*') {
enterMultilineComment();
} else {
state = AutomatonState.POSSIBLE_TOKEN;
}
}
/**
* Process the next character while in the MULTI_LINE_COMMENT state.
*
* <p>
* Process the next character while traversing a multi-line comment. If an asterisk, we may be
* encountering the end of the multiline comment (move to MAYBE_LEAVE_MULTI_LINE_COMMENT).
* Otherwise, can ignore character.
* </p>
*
* @param input The next character to process.
*/
private void stepMultiLineComment(char input) {
if (input == '*') {
state = AutomatonState.MAYBE_LEAVE_MULTI_LINE_COMMENT;
}
// else stay put
}
/**
* Process the next character while in the MAYBE_LEAVE_MULTI_LINE_COMMENT state.
*
* <p>
* If already found an asterisk while inside a multi-line comment, one may be leaving the multi-
* line comment depending on the next character. If forward slash, at end of comment (return to
* UNKNOWN state). If another asterisk, could still end comment depending on next character
* (stay in current state). Finally, if anything else, we are still in the body of the multi-
* line comment and not about to leave (return to MULTI_LINE_COMMENT state).
* </p>
*
* @param input
*/
private void stepMaybeLeaveMultiLineComment(char input) {
if (input == '/') {
state = AutomatonState.UNKNOWN;
} else if (input != '*') {
state = AutomatonState.MULTI_LINE_COMMENT;
}
// If * stay put
}
/**
* Convenience function to set up internal FSA state when entering a multi-line comment.
*/
private void enterMultilineComment() {
tokenPosition = Optional.of(charPosition);
state = AutomatonState.MULTI_LINE_COMMENT;
}
/**
* Convenience function to set up internal FSA state when having found a non-skip token.
*/
private void enterNonSkipTokenState() {
done = true;
state = AutomatonState.TOKEN;
}
/**
* Convenience function to set up internal FSA state when entering UNKNOWN state.
*/
private void returnToUnknownState() {
tokenPosition = Optional.empty();
state = AutomatonState.UNKNOWN;
}
/**
* Convenience function which determines if a character is whitespace.
*
* @param input The character to test.
* @return True if whitespace. False otherwise.
*/
private boolean isWhitespace(char input) {
return whitespacePattern.matcher("" + input).find();
}
}
/**
* Singleton with fallback error localizations.
*/
public static class DefaultErrorLocalStrSet {
private static final AtomicReference<DefaultErrorLocalStrSet> instance = new AtomicReference<>();
private final Map<String, String> localizations = new HashMap<>();
/**
* Get shared copy of this singleton.
*
* @return Shared singleton copy.
*/
public static DefaultErrorLocalStrSet get() {
instance.compareAndSet(null, new DefaultErrorLocalStrSet());
return instance.get();
}
/**
* Private hidden constructor.
*/
private DefaultErrorLocalStrSet() {
localizations.put("editor.status.error", "Error");
localizations.put("editor.status.error.syntax", "Syntax Error - %s");
localizations.put("editor.status.bad.assignment", "Error on variable assignment near %s?");
localizations.put("editor.status.bad.identifier", "Identifier cannot start with digits near %s?");
localizations.put("editor.status.bad.parameter", "Error on parameter or method declaration near %s?");
localizations.put("editor.status.extraneous", "Unexpected extra code near %s?");
localizations.put("editor.status.mismatched", "Missing operator or semicolon near %s?");
localizations.put("editor.status.missing.name", "Missing name near %s?");
localizations.put("editor.status.missing.type", "Missing name or type near %s?");
localizations.put("editor.status.missing.default", "Missing '%s'?");
localizations.put("editor.status.missing.right_curly_bracket", "Missing '}'");
localizations.put("editor.status.missing.left_curly_bracket", "Missing '{'");
}
/**
* Lookup localization.
*
* @param key Name of string.
* @return Value of string or empty if not given.
*/
public Optional<String> get(String key) {
return Optional.ofNullable(localizations.getOrDefault(key, null));
}
}
}

View File

@@ -27,7 +27,6 @@ import java.util.List;
import processing.mode.java.ImportStatement;
import processing.mode.java.TextTransform;
import processing.mode.java.preproc.issue.PdePreprocessIssue;
/**

View File

@@ -1,55 +0,0 @@
package processing.mode.java.preproc.issue;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicReference;
/**
* Singleton with fallback error localizations.
*/
public class DefaultErrorLocalStrSet {
private static final AtomicReference<DefaultErrorLocalStrSet> instance = new AtomicReference<>();
private final Map<String, String> localizations = new HashMap<>();
/**
* Get shared copy of this singleton.
*
* @return Shared singleton copy.
*/
public static DefaultErrorLocalStrSet get() {
instance.compareAndSet(null, new DefaultErrorLocalStrSet());
return instance.get();
}
/**
* Private hidden constructor.
*/
private DefaultErrorLocalStrSet() {
localizations.put("editor.status.error", "Error");
localizations.put("editor.status.error.syntax", "Syntax Error - %s");
localizations.put("editor.status.bad.assignment", "Error on variable assignment near %s?");
localizations.put("editor.status.bad.identifier", "Identifier cannot start with digits near %s?");
localizations.put("editor.status.bad.parameter", "Error on parameter or method declaration near %s?");
localizations.put("editor.status.extraneous", "Unexpected extra code near %s?");
localizations.put("editor.status.mismatched", "Missing operator or semicolon near %s?");
localizations.put("editor.status.missing.name", "Missing name near %s?");
localizations.put("editor.status.missing.type", "Missing name or type near %s?");
localizations.put("editor.status.missing.default", "Missing '%s'?");
localizations.put("editor.status.missing.right_curly_bracket", "Missing '}'");
localizations.put("editor.status.missing.left_curly_bracket", "Missing '{'");
}
/**
* Lookup localization.
*
* @param key Name of string.
* @return Value of string or empty if not given.
*/
public Optional<String> get(String key) {
return Optional.ofNullable(localizations.getOrDefault(key, null));
}
}

View File

@@ -1,65 +0,0 @@
/* -*- mode: java; c-basic-offset: 2; indent-tabs-mode: nil -*- */
/*
Part of the Processing project - http://processing.org
Copyright (c) 2012-19 The Processing Foundation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2
as published by the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package processing.mode.java.preproc.issue;
/**
* Data structure describing where an issue occurred.
*/
public class IssueLocation {
private final int line;
private final int charPosition;
/**
* Create a new issue location structure.
*
* @param newLine The line (1-indexed) where the issue occurred. This should be in the global file
* generated by the preprocessor and not relative to the start of the tab.
* @param newCharPosition The position on the line.
*/
public IssueLocation(int newLine, int newCharPosition) {
line = newLine;
charPosition = newCharPosition;
}
/**
* Get the 1-indexed line on which this error occurred.
*
* @return The line on which this error occurred. Note that this will be relative to the global
* file generated by the preprocessor and not relative to the start of the tab.
*/
public int getLine() {
return line;
}
/**
* The the position of the error within the line.
*
* @return The number of characters including whitespace from the start of the line at which the
* error occurred.
*/
public int getCharPosition() {
return charPosition;
}
}

View File

@@ -1,127 +0,0 @@
/* -*- mode: java; c-basic-offset: 2; indent-tabs-mode: nil -*- */
/*
Part of the Processing project - http://processing.org
Copyright (c) 2012-19 The Processing Foundation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2
as published by the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package processing.mode.java.preproc.issue;
import java.util.Optional;
import processing.mode.java.SourceUtil;
/**
* Utility that can help clean up where in source an issue should be reported.
*
* <p>
* For some errors, the location of the "mistake" does not appear close to where the actual error
* is generated. For example, consider omitting a semicolon. Though the "mistake" is arguably on
* the line on which a semicolon is forgotten, the grammatical error appears in the first
* non-skip token after the omitted character. This means that the issue shown to the user may
* be far away from the line they would want to edit. This utility helps determine if an issue
* requires a new location and, if so, where the location should be.
* </p>
*/
public class IssueLocationFactory {
/**
* Determine where an issue should be reported.
*
* @param simplification The issue simplification generated from {PreprocessIssueMessageSimplifier}.
* @param originalLine The original line (1 indexed) on which the issue was reported.
* @param originalOffset The original number of characters from the start of the line where the
* the issue was reported.
* @param source The full concatenated source of the sketch being built.
* @param lineCount The total
* @return The new location where the issue should be reported. This may be identical to the
* original location if the issue was not moved.
*/
public static IssueLocation getLineWithOffset(IssueMessageSimplification simplification,
int originalLine, int originalOffset, String source) {
// Determine if the issue should be relocated
boolean shouldAttributeToPrior = simplification.getAttributeToPriorToken();
shouldAttributeToPrior = shouldAttributeToPrior && originalLine != 0;
if (!shouldAttributeToPrior) {
return new IssueLocation(originalLine, originalOffset);
}
// Find the code prior the issue
String priorCode = getContentsUpToLine(source, originalLine);
// Find the token immediately prior to the issue
PriorTokenFinder finder = new PriorTokenFinder();
int charPos = priorCode.length();
while (!finder.isDone() && charPos > 0) {
charPos--;
finder.step(priorCode.charAt(charPos));
}
// Find the location offset depending on if the prior token could be found
Optional<Integer> foundStartOfMatchMaybe = finder.getTokenPositionMaybe();
int startOfMatch;
int linesOffset;
if (foundStartOfMatchMaybe.isPresent()) {
startOfMatch = priorCode.length() - foundStartOfMatchMaybe.get();
String contentsOfMatch = priorCode.substring(startOfMatch);
linesOffset = SourceUtil.getCount(contentsOfMatch, "\n");
} else {
startOfMatch = priorCode.length();
linesOffset = 0;
}
// Apply the location offset and highlight to the end of the line
String contentsPriorToMatch = priorCode.substring(0, startOfMatch);
int newLine = originalLine - linesOffset;
int lengthIncludingLine = contentsPriorToMatch.length();
int lengthExcludingLine = contentsPriorToMatch.lastIndexOf('\n');
int lineLength = lengthIncludingLine - lengthExcludingLine;
int col = lineLength - 1; // highlight from start of line to end
// Build the new issue location
return new IssueLocation(newLine, col);
}
/**
* Get all of the contents of source leading up to a line.
*
* @param source The full concatenated sketch source.
* @param endLineExclusive The line up to which code should be returned. Note that this is an
* "exclusive" boundary. Code from this line itself will not be included.
* @return All of the sketch code leading up to but not including the line given.
*/
private static String getContentsUpToLine(String source, int endLineExclusive) {
int line = 0;
int stringCursor = 0;
int strLength = source.length();
while (line < endLineExclusive-1 && stringCursor < strLength) {
if (source.charAt(stringCursor) == '\n') {
line++;
}
stringCursor++;
}
return source.substring(0, stringCursor);
}
}

View File

@@ -1,96 +0,0 @@
/* -*- mode: java; c-basic-offset: 2; indent-tabs-mode: nil -*- */
/*
Part of the Processing project - http://processing.org
Copyright (c) 2012-19 The Processing Foundation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2
as published by the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package processing.mode.java.preproc.issue;
/**
* Data structure describing an issue simplification or explanation.
*
* <p>
* Data structure describing an edit that was made to an error message or warning to be shown to
* the user based on a series of rules that attempts to make error messages easier to understand
* for the user.
* </p>
*/
public class IssueMessageSimplification {
private final String message;
private final boolean attributeToPriorToken;
/**
* Create a new issue message simplification.
*
* <p>
* Create a new issue message simplification that leaves the token attribution alone (the token
* on which the error was reported will be the same before error message simplification).
* </p>
*
* @param newMessage The message to show to the user.
*/
public IssueMessageSimplification(String newMessage) {
message = newMessage;
attributeToPriorToken = false;
}
/**
* Create a new issue message simplification.
*
* <p>
* Create a new issue message simplification. Note that there is an option to have the error
* attributed to the "prior token". This is helpful, for example, when a semicolon is missing.
* The error is generated on the token after the line on which the semicolon was omitted so,
* while the error technically emerges on the next line, it is better for the user for it to
* appear earlier. Specifically, it is most sensible for it to appear on the "prior token".
* </p>
*
* @param newMessage The message to show to the user.
* @param newAttributeToPriorToken Boolean flag indicating if the error should be shown on the
* token prior to the one on which the error was originally generated. True if the error should
* be attributed to the prior token. False otherwise.
*/
public IssueMessageSimplification(String newMessage, boolean newAttributeToPriorToken) {
message = newMessage;
attributeToPriorToken = newAttributeToPriorToken;
}
/**
* Get the error message text that should be shown to the user.
*
* @return The error message text that should be shown to the user.
*/
public String getMessage() {
return message;
}
/**
* Flag indicating if the error should be attributed to the prior token.
*
* @return True if the error should be attributed to the prior non-skip token (not whitepsace or
* comment). This is useful when a mistake on a prior line like omitted semicolon causes an
* error on a later line but one wants error highlighting closer to the mistake itself. False
* if the error should be attributed to the original offending token.
*/
public boolean getAttributeToPriorToken() {
return attributeToPriorToken;
}
}

View File

@@ -1,128 +0,0 @@
/* -*- mode: java; c-basic-offset: 2; indent-tabs-mode: nil -*- */
/*
Part of the Processing project - http://processing.org
Copyright (c) 2019 The Processing Foundation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2
as published by the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package processing.mode.java.preproc.issue;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
import java.util.Optional;
/**
* ANTLR error listener to inform a preprocess issue listener when syntax errors are encountered.
*
* <p>
* A {BaseErrorListener} which looks for syntax errors reported by ANTLR and converts them to
* {PdePreprocessIssue}s that are consumable by a {PdePreprocessIssueListener}. It does this by
* running the {PreprocessIssueMessageSimplifier} to generate a more user-friendly error message
* before informing the provided listener.
* </p>
*/
public class PdeIssueEmitter extends BaseErrorListener {
private final PdePreprocessIssueListener listener;
private final Optional<SourceEmitter> sourceMaybe;
/**
* Create a new issue emitter.
*
* <p>
* Create a new issue emitter when access to the processing sketch source is not available.
* Note that this will not allow some error beautification and, if sketch source is available,
* use other constructor.
* </p>
*
* @param newListener The listener to inform when encountering a syntax error.
*/
public PdeIssueEmitter(PdePreprocessIssueListener newListener) {
listener = newListener;
sourceMaybe = Optional.empty();
}
/**
* Create a new issue emitter.
*
* @param newListener The listener to inform when encountering a syntax error.
* @param newSourceEmitter The sketch source to use when helping beautify certain syntax error
* messages.
*/
public PdeIssueEmitter(PdePreprocessIssueListener newListener, SourceEmitter newSourceEmitter) {
listener = newListener;
sourceMaybe = Optional.of(newSourceEmitter);
}
@Override
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line,
int charPositionInLine, String msg, RecognitionException e) {
PreprocessIssueMessageSimplifier facade = PreprocessIssueMessageSimplifier.get();
IssueMessageSimplification simplification = facade.simplify(msg);
IssueLocation issueLocation;
if (sourceMaybe.isPresent()) {
issueLocation = IssueLocationFactory.getLineWithOffset(
simplification,
line,
charPositionInLine,
sourceMaybe.get().getSource()
);
} else {
issueLocation = new IssueLocation(line, charPositionInLine);
}
listener.onIssue(new PdePreprocessIssue(
issueLocation.getLine(),
issueLocation.getCharPosition(),
simplification.getMessage()
));
}
/**
* Simple interface for strategy which can emit the full body of a processing sketch.
*/
public static interface SourceEmitter {
/**
* Get the full body of the processing sketch.
*
* @return String processing sketch source code across all tabs.
*/
String getSource();
}
/**
* Interface for listener that responds to issues reported by the preprocessor.
*/
public static interface PdePreprocessIssueListener {
/**
* Callback to invoke when an issue is encountered in preprocesing.
*
* @param issue Description of the issue.
*/
void onIssue(PdePreprocessIssue issue);
}
}

View File

@@ -1,27 +0,0 @@
package processing.mode.java.preproc.issue;
public class PdePreprocessIssue {
private final int line;
private final int charPositionInLine;
private final String msg;
public PdePreprocessIssue(int newLine, int newCharPositionInLine, String newMsg) {
line = newLine;
charPositionInLine = newCharPositionInLine;
msg = newMsg;
}
public int getLine() {
return line;
}
public int getCharPositionInLine() {
return charPositionInLine;
}
public String getMsg() {
return msg;
}
}

View File

@@ -1,17 +0,0 @@
package processing.mode.java.preproc.issue;
public class PdePreprocessIssueException extends RuntimeException {
private final PdePreprocessIssue preprocessIssue;
public PdePreprocessIssueException(PdePreprocessIssue newPreprocessIssue) {
super(newPreprocessIssue.getMsg());
preprocessIssue = newPreprocessIssue;
}
public PdePreprocessIssue getIssue() {
return preprocessIssue;
}
}

View File

@@ -1,333 +0,0 @@
/* -*- mode: java; c-basic-offset: 2; indent-tabs-mode: nil -*- */
/*
Part of the Processing project - http://processing.org
Copyright (c) 2012-19 The Processing Foundation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2
as published by the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package processing.mode.java.preproc.issue;
import java.util.Optional;
import java.util.regex.Pattern;
/**
* Simple automaton that reads backwards from a position in source to find the prior token.
*
* <p>
* When helping generate messages for the user, it is often useful to be able to locate the
* position of the first token immediately before another location in source. For example,
* consider error reporting when a semicolon is missing. The error is generated on the token after
* the line on which the semicolon was omitted so, while the error technically emerges on the next
* line, it is better for the user for it to appear earlier. Specifically, it is most sensible for
* it to appear on the "prior token" because this is where it was forgotten.
* </p>
*
* <p>
* To that end, this finite state automaton can read backwards from a position in source to locate
* the first "non-skip token" preceding that location. Here a "skip" token means one that is
* ignored by the preprocessor and does not impact output code (this includes comments and
* whitespace). This automaton will read character by character from source until it knows it has
* seen a non-skip token, returning the location of that non-skip token.
* </p>
*
* <p>
* A formalized FSA is useful here in order to traverse code which can have a complex grammar.
* As there are a number of ways in the Java / Processing grammar one can encounter skip tokens,
* this formalized implementation describes the state machine directly in order to provide
* hopefully more readability / transparency compared to a regex without requiring the use of
* something heavier like ANTLR.
* </p>
*/
public class PriorTokenFinder {
// Simple regex matching all "whitespace" characters recognized by the ANTLR grammar.
private static final String WS_PATTERN = "[ \\t\\r\\n\\u000C]";
// Possible states for this FSA
private enum AutomatonState {
// Automaton is not certain if it is parsing a skip or non-skip character
UNKNOWN,
// Automaton has found a possible token but it is not sure if inside a comment
POSSIBLE_TOKEN,
// Automaton has found a token but also a forward slash so, if the next character is also a "/",
// it is inside a single line comment.
TOKEN_OR_MAYBE_SL_COMMENT,
// Automaton has found a forward slash so, depending on the next character, it may be inside a
// single line comment, multi-line comment, or it may have found a standalone token.
TOKEN_OR_MAYBE_COMMENT,
// Automaton has found a token and hit its terminal state.
TOKEN,
// Automaton is current traversing a multi-line comment.
MULTI_LINE_COMMENT,
// Automaton is maybe leaving a multi line comment because it found an "*". If it picks up a "/"
// next, the automaton knows it is no longer within a multi-line comment.
MAYBE_LEAVE_MULTI_LINE_COMMENT
}
private boolean done;
private Optional<Integer> tokenPosition;
private AutomatonState state;
private int charPosition;
private Pattern whitespacePattern;
/**
* Create a new automaton in unknown state and a character position of zero.
*/
public PriorTokenFinder() {
whitespacePattern = Pattern.compile(WS_PATTERN);
reset();
}
/**
* Determine if this automaton has found a token.
*
* @return True if this automaton has found a token and, thus, is in terminal state (so will
* ignore all future input). False if this autoamton has not yet found a token since creation
* or last call to reset.
*/
public boolean isDone() {
return done;
}
/**
* Get the position of the token found.
*
* @return Optional containing the number of characters processed prior to finding the token or
* empty if no token found. Note that this is different the number of total characters
* processed as some extra characters have to be read prior to the token itself to ensure it is
* not part of a comment or something similar.
*/
public Optional<Integer> getTokenPositionMaybe() {
return tokenPosition;
}
/**
* Reset this automaton to UNKNOWN state with a character count of zero.
*/
public void reset() {
done = false;
tokenPosition = Optional.empty();
state = AutomatonState.UNKNOWN;
charPosition = 0;
}
/**
* Process a character.
*
* <p>
* Process the next character in an effort to find the "prior token". Note that this is
* expecting the processing sketch source code to be fed one character at a time
* <i>backwards</i> from the starting position in code. This is because it is looking for the
* first non-skip token immediately <i>preceding</i> a position in source.
* </p>
*
* @param input The next character to process.
*/
public void step(char input) {
switch(state) {
case UNKNOWN: stepUnknown(input); break;
case POSSIBLE_TOKEN: stepPossibleToken(input); break;
case TOKEN_OR_MAYBE_SL_COMMENT: stepTokenOrMaybeSingleLineComment(input); break;
case TOKEN_OR_MAYBE_COMMENT: stepTokenOrMaybeComment(input); break;
case MULTI_LINE_COMMENT: stepMultiLineComment(input); break;
case MAYBE_LEAVE_MULTI_LINE_COMMENT: stepMaybeLeaveMultiLineComment(input); break;
case TOKEN: /* Already have token. Nothing to be done. */ break;
}
charPosition++;
}
/**
* Process the next character while in the UNKNOWN state.
*
* <p>
* While not certain if looking at a skip or non-skip token, read the next character. If
* whitespace, can ignore. If a forward slash, could indicate either a comment or a possible
* token (move to TOKEN_OR_MAYBE_COMMENT). If anything else, may have found token but need to
* ensure this line isn't part of a comment (move to POSSIBLE_TOKEN).
* </p>
*
* @param input The next character to process.
*/
private void stepUnknown(char input) {
if (isWhitespace(input)) {
return;
}
tokenPosition = Optional.of(charPosition);
if (input == '/') {
state = AutomatonState.TOKEN_OR_MAYBE_COMMENT;
} else {
state = AutomatonState.POSSIBLE_TOKEN;
}
}
/**
* Process the next character while in the POSSIBLE_TOKEN state.
*
* <p>
* After having found a character that could indicate a token, need to ensure that the token
* wasn't actually part of a single line comment ("//") so look for forward slashes (if found
* move to TOKEN_OR_MAYBE_SL_COMMENT). If encountered a newline, the earlier found token was
* not part of a comment so enter TOKEN state.
* </p>
*
* @param input The next character to process.
*/
private void stepPossibleToken(char input) {
if (input == '\n') {
enterNonSkipTokenState();
} else if (input == '/') {
state = AutomatonState.TOKEN_OR_MAYBE_SL_COMMENT;
}
// Else stay put
}
/**
* Process the next character while in the TOKEN_OR_MAYBE_SL_COMMENT state.
*
* <p>
* After having found a forward slash after encountering something else which may be a non-skip
* token, one needs to check that it is preceded by another forward slash to have detected a
* single line comment (return to UNKNOWN state). If found a new line, that forward slash was
* actually a non-skip token itself so enter TOKEN state. Finally, if anything else, it is still
* possible that we are traversing a single line comment so return to POSSIBLE_TOKEN state.
* </p>
*
* @param input The next character to process.
*/
private void stepTokenOrMaybeSingleLineComment(char input) {
if (input == '\n') {
enterNonSkipTokenState();
} else if (input == '/') {
returnToUnknownState();
} else {
state = AutomatonState.POSSIBLE_TOKEN;
}
}
/**
* Process the next character while in the TOKEN_OR_MAYBE_COMMENT state.
*
* <p>
* After having found a forward slash without encountering something else that may be a non-skip
* token: that forward slash is a non-skip token if preceded by a newline, could be a single
* line comment if preceded by a forward slash, could be a multi-line comment if preceded
* by an asterisk, or could by a non-skip token otherwise.
* </p>
*
* @param input The next character to process.
*/
private void stepTokenOrMaybeComment(char input) {
if (input == '\n') {
enterNonSkipTokenState();
} else if (input == '/') {
returnToUnknownState();
} else if (input == '*') {
enterMultilineComment();
} else {
state = AutomatonState.POSSIBLE_TOKEN;
}
}
/**
* Process the next character while in the MULTI_LINE_COMMENT state.
*
* <p>
* Process the next character while traversing a multi-line comment. If an asterisk, we may be
* encountering the end of the multiline comment (move to MAYBE_LEAVE_MULTI_LINE_COMMENT).
* Otherwise, can ignore character.
* </p>
*
* @param input The next character to process.
*/
private void stepMultiLineComment(char input) {
if (input == '*') {
state = AutomatonState.MAYBE_LEAVE_MULTI_LINE_COMMENT;
}
// else stay put
}
/**
* Process the next character while in the MAYBE_LEAVE_MULTI_LINE_COMMENT state.
*
* <p>
* If already found an asterisk while inside a multi-line comment, one may be leaving the multi-
* line comment depending on the next character. If forward slash, at end of comment (return to
* UNKNOWN state). If another asterisk, could still end comment depending on next character
* (stay in current state). Finally, if anything else, we are still in the body of the multi-
* line comment and not about to leave (return to MULTI_LINE_COMMENT state).
* </p>
*
* @param input
*/
private void stepMaybeLeaveMultiLineComment(char input) {
if (input == '/') {
state = AutomatonState.UNKNOWN;
} else if (input != '*') {
state = AutomatonState.MULTI_LINE_COMMENT;
}
// If * stay put
}
/**
* Convenience function to set up internal FSA state when entering a multi-line comment.
*/
private void enterMultilineComment() {
tokenPosition = Optional.of(charPosition);
state = AutomatonState.MULTI_LINE_COMMENT;
}
/**
* Convenience function to set up internal FSA state when having found a non-skip token.
*/
private void enterNonSkipTokenState() {
done = true;
state = AutomatonState.TOKEN;
}
/**
* Convenience function to set up internal FSA state when entering UNKNOWN state.
*/
private void returnToUnknownState() {
tokenPosition = Optional.empty();
state = AutomatonState.UNKNOWN;
}
/**
* Convenience function which determines if a character is whitespace.
*
* @param input The character to test.
* @return True if whitespace. False otherwise.
*/
private boolean isWhitespace(char input) {
return whitespacePattern.matcher("" + input).find();
}
}