Question

Итак, я запрограммировал простой лексер, который сейчас изучаю, - это шаблоны реализации языка.

Коды в указанной книге написаны на JAVA, и я попытался закодировать его с помощью C ++.

Ему просто нужно распознать токены, которые я предоставил в программе, например, и пробелы будут пропущены:

'[ABCDE, EFGH]'

 [     - LBRACK
 ABCDE - NAME
 ,     - COMMA
 EFGH  - NAME

Поэтому я создаю разные классы по мере преобразования классов из JAVA в C ++.

Token.h

#pragma once
#include <string>
#include <vector>

using namespace std;

constexpr auto NAME = 2;
constexpr auto COMMA = 3;
constexpr auto LBRACK = 4;
constexpr auto RBRACK = 5;

class Token
{
public:
    Token();
    ~Token();
    Token(int , const char * text );
    char * to_string();
    string tokenNames[6] = { "n/a" , "<EOF>", "NAME", "COMMA", "LBRACK", "RBRACK" };
    int token_type() { return this->type; }

private:
    int type = 0;
    char * text = NULL;

};

Token. cpp

#include "Token.h"

Token::Token() {
}

Token::~Token(){
}


Token::Token(int type, const  char * text) {
    this->text = (char *)text;
    this->type = type;
}

char* Token::to_string() {
    
    char* out_buffer = new char[255];

    vector <string> tokenNames;

    int size = sizeof(this->tokenNames) / sizeof(this->tokenNames[0]);
    for (int i = 0; i <size; i++)
    {
        tokenNames.push_back(this->tokenNames[i]);
    }

    printf("%s", this->text);

    sprintf(out_buffer, "%s , %s \n", this->text, tokenNames[this->type].c_str());
    return out_buffer;
}

Lexer.h

#pragma once
#include <string>
#include <vector>
#include "Token.h"

#define EOF -1
constexpr auto EOF_TYPE = 1;;

using namespace std;

class Lexer
{
public:
    Lexer();
    ~Lexer();
    Lexer(vector <char> input);
    void consume();
    void match(char x);
    char get_c() {
        return c;
    }
    vector<char> input;
    char c;

private:
    int p = 0;
};

Lexer. cpp

#include "Lexer.h"

Lexer::Lexer()
{
}

Lexer::~Lexer()
{
}

Lexer::Lexer(vector <char> input) {
    this->input = input;
    c = input.at(p);
}
void Lexer::consume() {
    p++;

    if (p >= this->input.size()) c = EOF;
    else c = this->input.at(p);
}
void Lexer::match(char x) {
    if (this->c == x) consume();
    else throw "wrong input";
}

ListLexer.h

#pragma once
#include <vector>
#include "Token.h"
#include "Lexer.h"
class ListLexer :  public Lexer, Token
{

public:
    ListLexer();
    ~ListLexer();
    Lexer* lexer;
    ListLexer(vector <char> x) {
        lexer = new Lexer(x); 
        c = lexer->get_c();
    }
    char* get_token_names(int x);
    Token* next_token();
    void skip_space();

    bool is_letter();

    char * name();

private: 
    char c = NULL;
};

ListLexer. cpp

#include "ListLexer.h"

ListLexer::ListLexer()
{
    c = lexer->c;
}

ListLexer::~ListLexer()
{
}

char* ListLexer::get_token_names(int x) {
    char out_buffer[100];
    sprintf(out_buffer, "%s", Token::tokenNames[x]);
    return out_buffer;
}

Token* ListLexer::next_token() {
    while ((c= lexer->c) != EOF) {
        switch (c) {
        case ' ': case '\t': case '\n': case '\r': skip_space(); continue;
        case '[':
            lexer->consume();
            return new Token(LBRACK, "LBRACK");
        case ']':
            lexer->consume();
            return new Token(RBRACK, "RBRACK");
        case ',':
            lexer->consume();
            return new Token(COMMA, "COMMA");
        default:

            if (is_letter()) return new Token(NAME, name());

            throw "wrong input";
        }
    }
    return new Token(EOF_TYPE, "EOF");
}

void ListLexer::skip_space() {
    while (c == ' ' || c == '\t' || c == '\n' || c == '\r')
        consume();
}

bool ListLexer::is_letter() {
    return (lexer->c >= 'a' && lexer->c <= 'z') || (lexer->c >= 'A' && lexer->c <= 'Z');
}

char* ListLexer::name() {

    vector <char> buffer;
    char out_buffer[255];
    while (is_letter()) {
        
        buffer.push_back(lexer->c);
        lexer->consume();
    }
    buffer.push_back('\x00');

    std::copy(buffer.begin(), buffer.end(), out_buffer);

    printf("%s", out_buffer);

    return out_buffer;
}

main. cpp

#include <stdio.h>
#include <vector>
#include "Token.h"
#include "Lexer.h"
#include "ListLexer.h"


using namespace std;

int main(int argc, char* argv[])

{
    vector <char> vec;

    vec.push_back('a');
    vec.push_back('b');
    vec.push_back('c');
    vec.push_back('d');
    vec.push_back(',');
    vec.push_back('e');
    vec.push_back('f');
    vec.push_back('g');
    vec.push_back('h');
    vec.push_back('\xff');
    
    ListLexer* listlexer = new ListLexer(vec);
    

    try {
        Token* t = listlexer->next_token();

        while (t->token_type() != EOF_TYPE)
        {
            printf("%s", t->to_string());
            t = listlexer->next_token();
        }
        printf("%s", t->to_string());
    }
    catch (const char * e) {
        printf("Error: %s \n", e);
    }
    return 0;
}

Однако каждый раз, когда он вызывает t-> to_string () , значение text , которое должно быть name " ABCD"или" EFGH"отличается.

Выделенные цифры взяты из списка ListLe xer :: name () .

The value of the text which is private from the instantiated token class is modified.

введите описание изображения здесь

Я что-то пропустил? Пожалуйста, помогите мне. Я только учусь OOP через это.

C ++ OOP частная переменная изменяет само значение

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

C ++ OOP частная переменная изменяет само значение

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Похожие темы