Question

Я пытаюсь написать очень простой парсер на Rust. У меня есть while l oop, который просто проверяет, был ли достигнут конец ввода, сравнивая текущую позицию в источнике с длиной источника, но вместо остановки, когда достигается конец ввода (точнее, end_of_input установлен в true), он продолжает движение и достигает той части, где я читал символ в текущей позиции. Поскольку эта позиция совпадает с длиной источника, я получаю pani c, и я не вижу, где я допустил ошибку в logi c (когда я добавляю break, последний токен не добавлен к вектору токенов).

Вот минимальный воспроизводимый пример . Я использую regex 1.3.9.

use regex::Regex;

#[derive(Debug)]
enum TokenType {
    CommandOperator,
    Keyword(String),
    Str(String),
    Const(String)
}

#[derive(Debug)]
struct Token {
    token_type: TokenType,
    start_position: usize,
    end_position: usize,
}

impl Token {
    fn new(token_type: TokenType, start_position: usize, end_position: usize) -> Token {
        Token { token_type, start_position, end_position }
    }
}

fn keyword(string: &str) -> String {
    String::from(string)
}

fn has_previous_char(string: &str, position: usize, character: char) -> bool {
    match string.chars().nth(position - 1) {
        Some(c) => c == character,
        None => false
    }
}

fn char_is_whitespace(character: char) -> bool {
    character == ' ' || character == '\n' || character == '\t'
}

fn parse(input: String) -> Vec<Token> {
    let mut tokens: Vec<Token> = Vec::new();
    let mut position: usize = 0;
    let mut end_of_input = false;
    let keywords = [
        keyword("import"),
        keyword("as"),
    ];

    while !end_of_input {
        if position >= input.chars().count() {
            end_of_input = true;
        }

        let character = match input.chars().nth(position) {
            Some(c) => c,
            _ => panic!("Fatal error: No character available at the position {}", position)
        };

        if character == '@' && (position == 0 || has_previous_char(&input, position, '\n')) {
            tokens.push(Token::new(TokenType::CommandOperator, position, position));
            position += 1;
            continue;
        }

        if character == ' ' || character == '\n' || character == '\t' {
            position += 1;
            continue;
        }

        if character == '"' {
            let mut str = String::from("");
            position += 1;

            for string_character in input[position..].chars() {
                if string_character == '"' {
                    if input.chars().nth(position - 1) == Some('\\') {
                        str.push_str("\"");
                        position += 2;
                    } else {
                        tokens.push(Token::new(TokenType::Str(str.clone()), position - str.chars().count() - 1, position));
                        position += 1;
                        break;
                    }
                } else {
                    str.push_str(&string_character.to_string());
                    position += 1;
                }
            }
        }

        let alphanumeric_re = Regex::new(r"[[:alpha:]]").unwrap();

        let constant_re = Regex::new(r"[A-Za-z0-9_]").unwrap();

        if alphanumeric_re.is_match(&character.to_string()) {
            for word in keywords.iter() {
                if &input[position..position + word.chars().count()] == word {
                    tokens.push(Token::new(TokenType::Keyword(word.clone()), position, position + word.chars().count() - 1));
                    position += word.chars().count();
                    break;
                }
            }
        }

        if constant_re.is_match(&character.to_string()) {
            let mut constant = String::from("");

            for constant_character in input[position..].chars() {
                let constant_character_as_string = &constant_character.to_string();

                if char_is_whitespace(character) || !constant_re.is_match(constant_character_as_string) {
                    if constant.chars().count() > 0 {
                        tokens.push(Token::new(TokenType::Const(constant.clone()), position - constant.chars().count(), position - 1));
                    }
                    break;
                } else if constant_re.is_match(constant_character_as_string) {
                    constant.push_str(constant_character_as_string);
                    position += 1;
                }
            }
        }

        println!("{:#?}", tokens);
    }

    tokens
}

fn main() {
    let input = String::from("@import \"intro.txt\" as intro1\n@import \"goals.txt\" as goals_section");
    println!("{:#?}", parse(input));
}

Я попытался поместить оператор println! в первую проверку while l oop, и это это true.

loganfsmyth · Answer 1 · 16 июня 2020

while !end_of_input { запускается каждый раз, когда выполнение l oop возвращается к вершине l oop. Ваш фрагмент

if position >= input.chars().count() {
  end_of_input = true;
}

назначит end_of_input, но это не остановит выполнение

input.chars().nth(position)

с position, указывающим на конец строки.

Похоже, вы могли бы вместо этого захотеть, чтобы эта строка была

if position >= input.chars().count() {
  break;
}

Пани c в то время как l oop вместо остановки, когда условие выполнено

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пани c в то время как l oop вместо остановки, когда условие выполнено

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Похожие темы