Не удалось получить правильный вывод для лексического анализатора - PullRequest
0 голосов
/ 01 октября 2018

Ниже приведен мой код лексемы.Принимая входные данные, я не могу получить правильный вывод.Я считаю, что проблема заключается в том, что я делю функцию.Может кто-нибудь помочь?Я также включу изображения правильного вывода и что вывод в получении. Мой вывод Правильный вывод Также, это вход для программы Ввод Может кто-нибудьскажите что я делаю не так?

#include <stdlib.h>
#include <vector>
#include <string>
#include <cstring>
#include <iostream>
#include <algorithm>

using namespace std;

void lexEme(string str);
string getTokenID(string str);
vector <string> tokenize(string str);
string ReplaceAll(string str, string from, string to);
bool is_number(string s);
bool isalphanum(string s);
bool isOperator(char str);
vector<string> split(string str, string token);
vector<string> simpleSplit(string str, string token);
static bool is_decimal(string str);
string merge(vector<string> x);
string toLower(string str);
string toUpper(string str);
bool contain(string str , char token);
vector<string> betweenQuotes(string str);
//Store alphabet we are matching


int main(){

    cout<< "Enter string"  << endl;
    string x;
    vector<string> in;
    while (getline(cin , x)){
        if(x.empty()){
            break;
        }
        in.push_back(x);
    }

    for(int i = 0;i < in.size(); i++){
        lexEme(in[i]);
    }
    cout << "done" << endl;

    return 0;

}
//This handles the lexical analysis
void lexEme(string str){
    //We store our broken up string in here
    vector<string> tokens;
    //We handle some preparsing to avoid potential errors when reading lines with ( and )
    str = ReplaceAll(str, "(", " ( ");
    str = ReplaceAll(str, ")", " ) ");

    str = ReplaceAll(str , "\n" , " ");
    bool base = true;
    bool hasStrings = false;
    vector<string> temp;
    //We assign tokens to our tokenized string
    // str = ReplaceAll(str , "\"" , "?\"" );
    for (int k = 0; k < str.length(); ++k) {
        if(str[k] == '"') hasStrings = true;
        if(str[k] == ' ') base = false;
    }

    if(base){
        //   cout << "this is base" << endl;
        if (isalphanum(str) && isdigit(str[0])){
            int iterate = 0;
            string num;
            string t;
            while (iterate < str.length()) {
                if (isalpha(str[iterate])) {
                    num += str[iterate];
                } else {
                    t += str[iterate];
                }
                iterate++;
            }
            temp.push_back(t);
            temp.push_back(num);
            temp = split(str , " ");
            cout << "lexeme: |" + temp[0] + "| length:" + to_string(temp[0].size()) + " token: " << getTokenID(temp[0]) << endl;
            cout << "lexeme: |" + temp[1] + "| length:" + to_string(temp[1].size()) + " token: " << getTokenID(temp[1]) << endl;
            return;
        }else if(!isalphanum(str)){
            //  int index = 0;

        }

        else{
            cout << "lexeme: |" + str + "| length:" + to_string(str.size()) + " token: " << getTokenID(str) << endl;
        }
    }else{

        if(hasStrings) {
            temp = split(str, "\"");
            for (int i = 0; i < temp.size(); i++) {
                if (i % 2 == 0) {
                    vector<string> hold = split(temp[i], " ");
                    for (int j = 0; j < hold.size(); j++) {
                        tokens.push_back(hold[j]);
                    }
                } else {
                    if (temp[i][temp[i].length()] == '?' && temp[i][0] == '?') {
                        temp[i] = ReplaceAll(temp[i], "?", "\"");
                        tokens.push_back(temp[i]);
                    } else {
                        temp[i] = ReplaceAll(temp[i], "?", "\"");
                        tokens.push_back(temp[i]);
                    }

                }
            }
        }else{

            tokens = simpleSplit(str , " ");
            // cout << "breakpt" << endl;
        }
        //Here we iterate and print out our results
        for (int i = 0; i < tokens.size(); ++i) {
            if(tokens[i] != " " && tokens.size() > 0) {
                cout << "lexeme: |" + tokens[i] + "| length:" + to_string(tokens[i].size()) + " token: "
                << getTokenID(tokens[i]) << endl;
            }
        }

    }
}

bool contain(string str , char token){
    for (char i : str) {
        if(i == token){
            return true;
        }
    }
}

vector<string> simpleSplit(string str, string token) {
    vector<string> result;
    vector<string> finalResults;
    while (str.size()) {
        int index = str.find(token);
        if (index != string::npos) {
            result.push_back(str.substr(0, index));
            str = str.substr(index + token.size());
            if (str.size() == 0)result.push_back(str);
        } else {
            result.push_back(str);
            str = "";
        }
    }
    return result;
}

vector<string> split(string str, string token){
    vector<string>result;
    vector<string> finalResults;
    while(str.size()){
        int index = str.find(token);
        if(index!=string::npos){
            result.push_back(str.substr(0,index));
            str = str.substr(index+token.size());
            if(str.size()==0)result.push_back(str);
        }else{
            result.push_back(str);
            str = "";
        }
    }
    //clean out
    for (int i = 0; i < result.size();i++){
        if (!result[i].empty() && result[i] != " "  && result[i].length() > 0){
            //Weird cases like 123abc
            if(isalphanum(result[i]) && isdigit(result[i][0])){
                int iterate = 0;
                string num;
                string t;
                while (iterate < result[i].length()) {
                    if (isalpha(result[i][iterate])) {
                        num += result[i][iterate];
                    } else {
                        t += result[i][iterate];
                    }
                    iterate++;
                }
                finalResults.push_back(t);
                finalResults.push_back(num);

            }else if(isalphanum(result[i])){
                for(int i2 = 0; i2 < result[i].length(); i2++){
                    string tmp ="";
                    if (!isalnum(result[i][i2]) && isOperator(result[i][i2]))
                        if(tmp.length() < 2) {
                            tmp += result[i][i2];
                        }
                    finalResults.push_back(tmp);
                    tmp = "";
                }

            }else if(i != result.size() - 1){
                if(result[i + 1][result[i + 1].length()] == '"'){
                    result[i] = result[i] + " " + result[i + 1];
                    result[i + 1] = " ";
                    finalResults.push_back(result[i]);
                    i++;
                }
            }

            finalResults.push_back(result[i]);
        }
    }

    vector<string> reclean;

    for(int i = 0; i < finalResults.size(); i++){
        if (finalResults[i].length() > 0 && finalResults[i].length() != ' '){
            ReplaceAll(finalResults[i] , "?" , "");
            reclean.push_back(finalResults[i]);
        }
    }
    return reclean;
}


//This function handles encoding tokens.
string getTokenID(string str){
    string id = ""; //our result will be stored here

    //Here we create 2 arrays for each section, the 1st represents the value of our identifiers,
    // the 2nd represents the respective encoding
    vector<string> keywords = {"if","else","for","while","print","return","continue","break","debug","read","let"};
    vector<string> keywordsEnc = {"1001","1002","1003","1004","1005","1006","1007","1008","1009","1010","1011"};

    vector<string> datatypes = {"int" , "float" , "string"};
    vector<string> datatypesEnc = {"1100" , "1101" , "1102"};

    vector<string> punctuations = {";" , "(" , ")" , "[" , "]" , "{" , "}" , ","};
    vector<string> punctuationsEnc = {"2000" , "2001" , "2002" , "2003" , "2004" , "2005" , "2006" , "2007"};

    vector<string> operators = {"+" , "-" , "*" , "/" , ":=" , "==" , "<" , ">" , "<>" , "and" , "or" , "not" , "length"};
    vector<string> operatorsEnc = {"3000" , "3001" , "3002" , "3003" , "3004" , "3005" , "3006" , "3007" , "3008" , "3009" , "3010" , "3011" , "3012"};

    vector<string> abstractions = {"identifier" , "integer literal" , "floating-point literal" , "End of file" , "Unknown lexeme"};
    vector<string> abstractionsEnc = {"4000" , "4001" , "4002" , "4003" , "5000" , "6000"};

    //Now we run through and determine where our cases match.
    for (int i = 0; i < keywords.size(); ++i) {
        if (str == keywords[i]){
            id = keywordsEnc[i];
            return id;
        }
    }
    for (int i = 0; i < datatypes.size(); ++i) {
        if (str == datatypes[i]){
            id = datatypesEnc[i];
            return id;
        }
    }
    for (int i = 0; i < punctuations.size(); ++i) {
        if (str == punctuations[i]){
            id = punctuationsEnc[i];
            return id;
        }
    }
    for (int i = 0; i < operators.size(); ++i) {
        if (str == operators[i]){
            id = operatorsEnc[i];
            return id;
        }
    }
    for (int i = 0; i < abstractions.size(); ++i) {
        if (str == abstractions[i]){
            id = abstractionsEnc[i];
            return id;
        }
    }

    //Special conditions for strings, decimals and integers are handled below
    if(id == "") {
        if (str[0] == '"' && str[str.length()] == '"'){
            id = "4003";
            return id;
        } else if (is_number(str)){
            id = "4001";
            return id;
        }else if(is_decimal(str)) {
            id = "4002";
            return id;
        }else if(str == toUpper(str)){
            if(str == toLower(str) && str[0] != EOF){
                id = "6000";
                return id;
            }else{
                id = "5001";
                return id;
            }
        }else{
            id = "4000";
            return id;
        }
    }
    return id;

}
string toLower(string str)
{
    std::transform(str.begin(), str.end(), str.begin(), ::tolower);

    return str;
}
bool isalphanum(string str){
    int i = 0;
    while(i < str.length()){
        return isalnum(str[i]) != 0;
    }

}
string toUpper(string str)
{
    std::transform(str.begin(), str.end(), str.begin(), ::toupper);

    return str;
}

//Checks to see if s is an integer
bool is_number(string s)
{
    string::const_iterator it = s.begin();
    while (it != s.end() && std::isdigit(*it)) ++it;
    return !s.empty() && it == s.end();
}

//Combines all in x and returns as one string
string merge(vector<string> x){
    string ans;
    for (int i = 0; i < x.size(); ++i) {
        ans += x[i];
    }
}


//Checks to see if str is a decimal or float
static bool is_decimal(string str){
    string::const_iterator it = str.begin();
    bool decimalPoint = false;
    int minSize = 0;
    if(str.size()>0 && (str[0] == '-' || str[0] == '+')){
        it++;
        minSize++;
    }
    while(it != str.end()){
        if(*it == '.'){
            if(!decimalPoint) decimalPoint = true;
            else break;
        }else if(!isdigit(*it) && ((*it!='f') || it+1 != str.end() || !decimalPoint)){
            break;
        }
        ++it;
    }
    return str.size()>minSize && it == str.end();
}

string ReplaceAll(string str, string from, string to) {
    size_t start_pos = 0;
    while((start_pos = str.find(from, start_pos)) != std::string::npos) {
        str.replace(start_pos, from.length(), to);
        start_pos += to.length(); // Handles case where 'to' is a substring of 'from'
    }
    return str;
}

bool isOperator(char str){
    string operators[] = {"+", "-", "/", "*", "%", "^", ">", "<"};
    for (int i = 0; i < operators->length(); ++i) {
        if (str == operators[i][0]){
            return true;
        }
    }
}

vector<string> operatorExtractor(string str){
    vector<string> ans;
    string temp;
    int index = 0;
    while (index < str.length()) {
        if (isOperator(str[index])) {
            temp += str[index];
            if(isOperator(str[index + 1] && str[index + 1] != '<' && str[index + 1] != '>')){
                temp += str[index + 1];
                ans.push_back(temp);
                temp = "";
            }
        }
    }
}
...