Я просматриваю свой код и пытаюсь разобрать текстовый документ на языке точек. Я понимаю общий синтаксис, но либо подозреваю, что мой тип перечисления ARROW сбивает с толку, потому что я не уверен, что я должен с ними делать. Цель состоит в том, чтобы go получить два одинаковых файла. Первое, что я подозреваю, верное, но заявляет, что файл не является диграфом. Однако во втором файле есть ошибки, и мы должны правильно выдать ошибку, а затем исправить ее и продолжить снова. Любая помощь будет принята с благодарностью.
Код
/*
* Lexer
*/
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.stream.Stream;
public class Lexer {
private StringBuilder input = new StringBuilder();
private TokenType token;
private String lex;
private boolean empty = false;
private String errorMessage = "";
public Lexer(String filePath) {
try (Stream<String> st = Files.lines(Paths.get(filePath))) {
st.forEach(input::append);
} catch (IOException ex) {
empty = true;
errorMessage = "file not read: " + filePath;
return;
}
}
public void next() {
if (empty) {
return;
}
if (input.length() == 0) {
empty = true;
return;
}
if (nextToken()) {
return;
}
empty = true;
}
private boolean nextToken() {
for (TokenType t : TokenType.values()) {
int end = t.endOfMatch(input.toString());
if (end != -1) {
token = t;
lex = input.substring(0, end);
input.delete(0, end);
return true;
}
}
return false;
}
public TokenType currentToken() {
return token;
}
public String currentLexema() {
return lex;
}
public boolean isEmpty() {
return empty;
}
public String dot_parse() {
graph_parse();
if(errorMessage == "") {
return "Text has no errors!";
} else {
return errorMessage;
}
}
public void graph_parse() {
if(currentToken() == TokenType.DIGRAPH || currentToken() == TokenType.GRAPH){
next();
if(currentToken() == TokenType.ID || currentToken() == TokenType.MAIN) {
next();
if(currentToken() == TokenType.LBRACE) {
next();
if(stmnt_list()) {
next();
if(!(currentToken() == TokenType.RBRACE)) {
next();
} else if( currentToken() == TokenType.RBRACE) {
return;
}
} else {
errorMessage = "Graph: Must include statement list";
}
} else {
errorMessage = "Graph: ID must be followed by Left Brace";
}
} else {
errorMessage = "Graph: Digraph must include ID";
}
} else {
errorMessage = "Graph: No Digraph";
}
}
public Boolean stmnt_list() {
if(stmnt()) {
next();
if (currentToken() == TokenType.SEMI) {
while(currentToken() == TokenType.SEMI) {
next();
if(stmnt()) {
next();
}
}
}
} else {
errorMessage = "Statement List: No statement list";
return false;
}
return true;
}
public Boolean stmnt() {
if(node_stmnt() || edge_stmnt() || attr_stmnt() || subgraph()) {
next();
} else if((currentToken() == TokenType.ID || currentToken() == TokenType.MAIN)) {
next();
if(currentToken() == TokenType.EQUAL) {
next();
if(currentToken() == TokenType.ID || currentToken() == TokenType.MAIN) {
next();
}
}
} else {
errorMessage = "Statement: No Statement";
return false;
}
return true;
}
public Boolean attr_stmnt() {
if(currentToken() == TokenType.GRAPH) {
next();
if(attr_list() || currentToken() == TokenType.NODE || currentToken() == TokenType.EDGE || currentToken() == TokenType.MAIN) {
next();
} else {
errorMessage = "Attribute Statement: Not a proper attribute statement";
return false;
}
}
return true;
}
public Boolean attr_list() {
if(currentToken() == TokenType.LBRACKET) {
next();
if(a_list()) {
while(currentToken() == TokenType.RBRACKET) {
next();
if(currentToken() == TokenType.LBRACKET) {
next();
}
}
} else {
errorMessage = "Attribute List: Must have an a list";
return false;
}
} else {
errorMessage = "Attribute List: Must begin with left bracket";
return false;
}
return true;
}
public Boolean a_list() {
if(currentToken() == TokenType.ID || currentToken() == TokenType.MAIN) {
next();
if(currentToken() == TokenType.EQUAL) {
next();
if(currentToken() == TokenType.ID || currentToken() == TokenType.MAIN) {
next();
if(currentToken() == TokenType.SEMI) {
next();
while(currentToken() == TokenType.SEMI || currentToken() == TokenType.COMMA) {
if(currentToken() == TokenType.ID || currentToken() == TokenType.MAIN) {
next();
if(currentToken() == TokenType.EQUAL) {
next();
if(currentToken() == TokenType.ID || currentToken() == TokenType.MAIN) {
next();
}
}
}
}
} else {
errorMessage = "A List: Must end with a comma or semi-colon";
return false;
}
} else {
errorMessage = "A List: Must be followed by ID";
return false;
}
} else {
errorMessage = "A List: ID must be follwed by =";
return false;
}
} else {
errorMessage = "A List: Must begin with ID";
return false;
}
return true;
}
public Boolean edge_stmnt() {
if(node_id() || subgraph()) {
next();
if(edge_RHS()) {
next();
if(attr_list()) {
next();
}
} else {
errorMessage = "Edge Statement: Must have edgeRHS";
return false;
}
}
return true;
}
public Boolean edge_RHS() {
if(currentToken() == TokenType.EDGEOP || currentToken() == TokenType.ARROW) {
next();
if(node_id()) {
next();
while(currentToken() == TokenType.EDGEOP || currentToken() == TokenType.ARROW) {
next();
if(node_id() || subgraph()) {
next();
}
}
} else {
errorMessage = "Edge RHS: Edgepop must be followed by node id or subgraph";
return false;
}
} else {
errorMessage = "Edge RHS: Must begin with edgepop";
return false;
}
return true;
}
public Boolean node_stmnt() {
if(node_id()) {
next();
if(attr_list()) {
next();
}
} else {
errorMessage = "Node Statment: Must begin with node id";
return false;
}
return true;
}
public Boolean node_id() {
if(currentToken() == TokenType.ID) {
next();
if(port()) {
next();
} else {
errorMessage = "Node ID: Must be followed by port";
return false;
}
}else {
errorMessage = "Node ID: node id must begin with ID";
return false;
}
return true;
}
public Boolean port() {
if(currentToken() == TokenType.COLON) {
next();
if(currentToken() == TokenType.ID || compass_pt()) {
next();
if(compass_pt()) {
next();
}
} else {
errorMessage = "Port: Must follow a colon with an ID or compass_pt";
return false;
}
} else {
errorMessage = "Port: Must begin with colon";
return false;
}
return true;
}
public Boolean subgraph() {
if(currentToken() == TokenType.SUBGRAPH || currentToken() == TokenType.MAIN) {
next();
if(currentToken() == TokenType.ID || currentToken() == TokenType.MAIN) {
next();
if(currentToken() == TokenType.LBRACE) {
next();
if(stmnt_list()) {
next();
if(currentToken() == TokenType.RBRACE) {
next();
} else {
errorMessage = "Subgraph: Must end with }";
return false;
}
} else {
errorMessage = "Subgraph: Must have a statement list";
return false;
}
} else {
errorMessage = "Subgraph: ID ust be followed by {";
return false;
}
} else {
errorMessage = "Subgraph: should have an ID";
return false;
}
} else {
errorMessage = "Subgraph: Must begin with subgraph";
return false;
}
return true;
}
public Boolean compass_pt() {
if(currentToken() == TokenType.N) {
next();
} else if(currentToken() == TokenType.NE) {
next();
}else if(currentToken() == TokenType.E) {
next();
} else if(currentToken() == TokenType.SE) {
next();
} else if(currentToken() == TokenType.S) {
next();
} else if(currentToken() == TokenType.SW) {
next();
} else if(currentToken() == TokenType.W) {
next();
} else if(currentToken() == TokenType.NW) {
next();
} else if(currentToken() == TokenType.C) {
next();
} else {
errorMessage = "Compass: Must declare a direction";
return false;
}
return true;
}
public static void main(String[] args) {
Lexer lexer = new Lexer("filepath");
//String output = "";
while (!lexer.isEmpty()) {
if((lexer.currentToken() != TokenType.COMMENT) && (lexer.currentToken() != TokenType.WSPACE) && (lexer.currentToken() != null)) {
System.out.printf("%s" + " ", lexer.currentToken());
lexer.next();
} else {
lexer.next();
}
}
System.out.printf(lexer.dot_parse());
}
}
/*
* Lexer
*/
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public enum TokenType {
EQUAL ("\\="),
LPARAN ("\\("),
RPARAN ("\\)"),
SEMI (";"),
COLON(":"),
COMMA (","),
LBRACE ("\\{"),
PERIOD("\\."),
RBRACE ("\\}"),
LBRACKET("\\["),
RBRACKET("\\]"),
STRING ("\"[^\"]+\""),
COMMENT("\\/\\*.+\\*\\/"),
DIGRAPH("digraph"),
GRAPH("/graph$"),
EDGEOP("edgeop"),
ARROW("\\-\\>"),
EDGE("edge"),
NODE("node"),
SUBGRAPH("subgraph"),
MAIN("main"),
WSPACE(" +"),
N("/^n$"),
NE("/ne$"),
E("/e$"),
SE("/se$"),
S("/s$"),
SW("/sw$"),
W("/w$"),
NW("/nw$"),
C("/c$"),
ID ("\\w+");
private final Pattern pattern;
TokenType(String regex) {
pattern = Pattern.compile("^" + regex);
}
int endOfMatch(String s) {
Matcher m = pattern.matcher(s);
if (m.find()) {
return m.end();
}
return -1;
}
}
Text 1:
digraph G {
main [shape=box]; /* this is a comment */
main -> parse [weight=8];
parse -> execute;
main -> init [style=dotted];
main -> cleanup;
execute -> make_string;
init -> make_string;
main -> printf [style=bold,label="100 times"];
make_string [label="make a\nstring"];
node [shape=box,style=filled,color=".7 .3 1.0"];
execute -> compare;
}
Text 2:
digraph G {
main [shape=box; /* missing the closing ] */
main -> parse [weight=8];
parse -> execute;
main -> [style=dotted]; /* missing the target node ID */
main -> cleanup;
execute -> make_string;
init -> make_string;
main -> printf [style=bold, label=]; /* missing value */
make_string [label="make a\nstring"];
node [shape=box,style=filled,color=".7 .3 1.0"];
execute -> compare;
}