Анализ документа на языке Dot не дает ожидаемых результатов - PullRequest
0 голосов
/ 30 мая 2020

Я просматриваю свой код и пытаюсь разобрать текстовый документ на языке точек. Я понимаю общий синтаксис, но либо подозреваю, что мой тип перечисления ARROW сбивает с толку, потому что я не уверен, что я должен с ними делать. Цель состоит в том, чтобы go получить два одинаковых файла. Первое, что я подозреваю, верное, но заявляет, что файл не является диграфом. Однако во втором файле есть ошибки, и мы должны правильно выдать ошибку, а затем исправить ее и продолжить снова. Любая помощь будет принята с благодарностью.

Код

/*
 * Lexer
 */

import java.io.IOException; 
import java.nio.file.Files; 
import java.nio.file.Paths;
import java.util.stream.Stream;

public class Lexer {
    private StringBuilder input = new StringBuilder();
    private TokenType token;
    private String lex;
    private boolean empty = false;
    private String errorMessage = "";



    public Lexer(String filePath) {
        try (Stream<String> st = Files.lines(Paths.get(filePath))) {
            st.forEach(input::append);
        } catch (IOException ex) {
            empty = true;
            errorMessage = "file not read: " + filePath;
            return;
        }
        }

        public void next() {
            if (empty) {
                return;
            }
            if (input.length() == 0) {
                empty = true;
                return;
            }
            if (nextToken()) {
                return;
            }
            empty = true;  
        }
        private boolean nextToken() {
            for (TokenType t : TokenType.values()) {
                int end = t.endOfMatch(input.toString());

                if (end != -1) {
                    token = t;
                    lex = input.substring(0, end);
                    input.delete(0, end);
                    return true;
                }
            }

            return false;
        }

        public TokenType currentToken() {
            return token;
        }

        public String currentLexema() {
            return lex;
        }


        public boolean isEmpty() {
            return empty;
        }

        public String dot_parse() {
            graph_parse();
            if(errorMessage == "") {
                return "Text has no errors!";
            } else {
            return errorMessage;
            }
        }

        public void graph_parse() {
            if(currentToken() == TokenType.DIGRAPH || currentToken() == TokenType.GRAPH){
                    next();
                if(currentToken() == TokenType.ID || currentToken() == TokenType.MAIN) {
                    next();
                    if(currentToken() == TokenType.LBRACE) {
                        next();
                        if(stmnt_list()) {
                            next();
                            if(!(currentToken() == TokenType.RBRACE)) {
                                next();
                            } else if( currentToken() == TokenType.RBRACE) {
                                return;
                            }
                        } else {
                            errorMessage = "Graph: Must include statement list";
                        }
                    } else {
                        errorMessage = "Graph: ID must be followed by Left Brace";
                    }
                } else {
                    errorMessage = "Graph: Digraph must include ID";
                }
            } else {
                errorMessage = "Graph: No Digraph";
            }
        }

        public Boolean stmnt_list() {
            if(stmnt()) {
                next();
                if (currentToken() == TokenType.SEMI) {
                    while(currentToken() == TokenType.SEMI) {
                        next();
                        if(stmnt()) {
                            next();
                        }
                    }
                }
            } else {
                errorMessage = "Statement List: No statement list";
                return false;
            }
            return true;
        }

        public Boolean stmnt() {
            if(node_stmnt() || edge_stmnt() || attr_stmnt() || subgraph()) {
                next();

            } else if((currentToken() == TokenType.ID || currentToken() == TokenType.MAIN)) {
                next();
                if(currentToken() == TokenType.EQUAL) {
                    next();
                    if(currentToken() == TokenType.ID || currentToken() == TokenType.MAIN) {
                        next();
                    }
                }
            } else {
                errorMessage = "Statement: No Statement";
                return false;

            }
            return true;
        }

        public Boolean attr_stmnt() {
            if(currentToken() == TokenType.GRAPH) {
                next();
                if(attr_list() || currentToken() == TokenType.NODE || currentToken() == TokenType.EDGE || currentToken() == TokenType.MAIN) {
                    next();
            } else {
                errorMessage = "Attribute Statement: Not a proper attribute statement";
                return false;
                    }
                }
            return true;
        }

        public Boolean attr_list() {
            if(currentToken() == TokenType.LBRACKET) {
                next();
                if(a_list()) {
                    while(currentToken() == TokenType.RBRACKET) {
                        next();
                        if(currentToken() == TokenType.LBRACKET) {
                            next();
                        }
                    } 
                } else {
                    errorMessage = "Attribute List: Must have an a list";
                    return false;
                }
            } else {
                errorMessage = "Attribute List: Must begin with left bracket";
                return false;
            }
            return true;
        }

        public Boolean a_list() {
            if(currentToken() == TokenType.ID || currentToken() == TokenType.MAIN) {
                next();
                if(currentToken() == TokenType.EQUAL) {
                    next();
                    if(currentToken() == TokenType.ID || currentToken() == TokenType.MAIN) {
                        next();
                        if(currentToken() == TokenType.SEMI) {
                            next();
                            while(currentToken() == TokenType.SEMI || currentToken() == TokenType.COMMA) {
                                if(currentToken() == TokenType.ID || currentToken() == TokenType.MAIN) {
                                    next();
                                    if(currentToken() == TokenType.EQUAL) {
                                        next();
                                        if(currentToken() == TokenType.ID || currentToken() == TokenType.MAIN) {
                                            next();
                                            }
                                        }

                                    }
                                }

                            } else {
                                errorMessage = "A List: Must end with a comma or semi-colon";
                                return false;
                            }
                        } else {
                            errorMessage = "A List: Must be followed by ID";
                            return false;
                        }
                    } else {
                        errorMessage = "A List: ID must be follwed by =";
                        return false;
                    }
                } else {
                    errorMessage = "A List: Must begin with ID";
                    return false;
                }   
                return true;

            }

        public Boolean edge_stmnt() {
            if(node_id() || subgraph()) {
                next();
                if(edge_RHS()) {
                    next();
                    if(attr_list()) {
                        next();
                    }
                } else {
                    errorMessage = "Edge Statement: Must have edgeRHS";
                    return false;
                }
            }
            return true;
        }

        public Boolean edge_RHS() {
            if(currentToken() == TokenType.EDGEOP || currentToken() == TokenType.ARROW) {
                next();
                if(node_id()) {
                    next();
                    while(currentToken() == TokenType.EDGEOP || currentToken() == TokenType.ARROW) {
                        next();
                        if(node_id() || subgraph()) {
                            next();
                        }
                    }

                } else {
                    errorMessage = "Edge RHS: Edgepop must be followed by node id or subgraph";
                    return false;
                }
            } else {
                errorMessage = "Edge RHS: Must begin with edgepop";
                return false;
            }
            return true;
        }

        public Boolean node_stmnt() {
            if(node_id()) {
                next();
                if(attr_list()) {
                    next();
                }
            } else {
                errorMessage = "Node Statment: Must begin with node id";
                return false;
            }
            return true;
        }

        public Boolean node_id() {
            if(currentToken() ==  TokenType.ID) {
                next();
                if(port()) {
                    next();
                } else {
                    errorMessage = "Node ID: Must be followed by port";
                    return false;
                }
            }else {
                errorMessage = "Node ID: node id must begin with ID";
                return false;
            }
            return true;
        }

        public Boolean port() {
            if(currentToken() == TokenType.COLON) {
                next();
                if(currentToken() == TokenType.ID || compass_pt()) {
                    next();
                    if(compass_pt()) {
                        next();
                    }
                } else {
                    errorMessage = "Port: Must follow a colon with an ID or compass_pt";
                    return false;
                }
            } else {
                errorMessage = "Port: Must begin with colon";
                return false;
            }
            return true;
        }

        public Boolean subgraph() {
            if(currentToken() == TokenType.SUBGRAPH || currentToken() == TokenType.MAIN) {
                next();
                if(currentToken() == TokenType.ID || currentToken() == TokenType.MAIN) {
                    next();
                    if(currentToken() == TokenType.LBRACE) {
                        next();
                        if(stmnt_list()) {
                            next();
                            if(currentToken() == TokenType.RBRACE) {
                                next();
                            } else {
                                errorMessage = "Subgraph: Must end with }";
                                return false;
                            }
                        } else {
                            errorMessage = "Subgraph: Must have a statement list";
                            return false;
                        }
                    } else {
                        errorMessage = "Subgraph: ID ust be followed by {";
                        return false;
                    }
                } else {
                    errorMessage = "Subgraph: should have an ID";
                    return false;
                }
            } else {
                errorMessage = "Subgraph: Must begin with subgraph";
                return false;
            }
            return true;
        }

        public Boolean compass_pt() {
            if(currentToken() == TokenType.N) {
                next();
            } else if(currentToken() == TokenType.NE) {
                next();
            }else if(currentToken() == TokenType.E) {
                next();
            } else if(currentToken() == TokenType.SE) {
                next();
            } else if(currentToken() == TokenType.S) {
                next();
            } else if(currentToken() == TokenType.SW) {
                next();
            } else if(currentToken() == TokenType.W) {
                next();
            } else if(currentToken() == TokenType.NW) {
                next();
            } else if(currentToken() == TokenType.C) {
                next();
            } else {
                errorMessage = "Compass: Must declare a direction";
                return false;
            }
            return true;
        }


        public static void main(String[] args) {

            Lexer lexer = new Lexer("filepath");
            //String output = "";


            while (!lexer.isEmpty()) {
                if((lexer.currentToken() != TokenType.COMMENT) && (lexer.currentToken() != TokenType.WSPACE) && (lexer.currentToken() != null)) {
                    System.out.printf("%s" + " ", lexer.currentToken());
                lexer.next();
                } else {
                    lexer.next();
                }
            }

            System.out.printf(lexer.dot_parse());


        }
}
/*
 * Lexer
 */
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public enum TokenType {

    EQUAL ("\\="),
    LPARAN ("\\("),
    RPARAN ("\\)"), 
    SEMI (";"),
    COLON(":"),
    COMMA (","), 
    LBRACE ("\\{"),
    PERIOD("\\."),
    RBRACE ("\\}"),
    LBRACKET("\\["),
    RBRACKET("\\]"),
    STRING ("\"[^\"]+\""),
    COMMENT("\\/\\*.+\\*\\/"),
    DIGRAPH("digraph"),
    GRAPH("/graph$"),
    EDGEOP("edgeop"),
    ARROW("\\-\\>"),
    EDGE("edge"),
    NODE("node"),
    SUBGRAPH("subgraph"),
    MAIN("main"),
    WSPACE(" +"),
    N("/^n$"),
    NE("/ne$"),
    E("/e$"),
    SE("/se$"),
    S("/s$"),
    SW("/sw$"),
    W("/w$"),
    NW("/nw$"),
    C("/c$"),
    ID ("\\w+");



    private final Pattern pattern;

    TokenType(String regex) {
        pattern = Pattern.compile("^" + regex);
    }

    int endOfMatch(String s) {
        Matcher m = pattern.matcher(s);

        if (m.find()) {
            return m.end();
        }
        return -1;
    }
}
Text 1:
        digraph G {
            main [shape=box]; /* this is a comment */
            main -> parse [weight=8];
            parse -> execute;
            main -> init [style=dotted];
            main -> cleanup;
            execute -> make_string;
            init -> make_string;
            main -> printf [style=bold,label="100 times"];
            make_string [label="make a\nstring"];
            node [shape=box,style=filled,color=".7 .3 1.0"];
            execute -> compare;
         }
Text 2:
       digraph G {
            main [shape=box; /* missing the closing ] */
            main -> parse [weight=8];
            parse -> execute;
            main ->  [style=dotted]; /* missing the target node ID */
            main -> cleanup;
            execute -> make_string;
            init -> make_string;
            main -> printf [style=bold, label=]; /* missing value */
            make_string [label="make a\nstring"];
            node [shape=box,style=filled,color=".7 .3 1.0"];
            execute -> compare;
         }
...