Разобрать IRC-сообщение с помощью Boost Spirit - PullRequest
1 голос
/ 03 ноября 2019

Я могу получить базовый анализ для запуска с Boost.Spirit, но у меня проблемы с получением тегов сообщений (IRCv3) для полного анализа. Я хочу, чтобы теги хотя бы по отдельности анализировались в vector<>, но хотелось бы, чтобы они анализировались в map<>.

#include <string>
#include <optional>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>

    /// Flags used for IRC protocol messages
    enum MSG_FLAGS : uint32_t {
        /// Last arg is a trailing parameter
        MSG_TRAILING_ARG = (1 << 0),

        /// When the message is being wrapped due to excess params, repeat the first arg;
        /// e.g., for ISUPPORT this will consistently place the client's name (1st arg) in front of each ISUPPORT message.
        MSG_REPEAT_1ST = (1 << 1),

        /// Indicates message should never include a prefix; e.g, PING and ERROR for local clients
        MSG_NO_PREFIX = (1 << 2),
    };

    /// Structure describing an IRC protocol message
    struct message {
        /// IRCv3 tags associated with this message
        std::vector<std::string> tags;

        /// Source prefix - usually blank from clients
        std::string prefix;

        /// Command that was received
        std::string command;

        /// Command arguments
        std::vector<std::string> args;

        /// Flags for internal processing (not received via IRC)
        uint32_t flags;
    };

BOOST_FUSION_ADAPT_STRUCT(message,
    (std::vector<std::string>, tags)
    (std::string, prefix),
    (std::string, command),
    (std::vector<std::string>, args));

std::optional<message> tokenize(std::string const& data)
{
    namespace x3 = boost::spirit::x3;
    namespace ascii = boost::spirit::x3::ascii;
    namespace phx = boost::phoenix;
    using x3::rule;
    using x3::int_;
    using x3::lit;
    using x3::double_;
    using x3::lexeme;
    using x3::omit;
    using ascii::char_;
    message msg;
    msg.flags = 0;
    // parser rules
    static auto on_trailing_arg = [&](auto& ctx) { msg.flags |= MSG_TRAILING_ARG; };
    static auto const token = lexeme[+(char_ - ' ' - ':')];
    static auto const prefix = omit[':'] >> token;
    static auto const trail = (omit[':'] >> lexeme[*char_])[on_trailing_arg];
    static auto const tags = omit['@'] >> token % ';';
    static auto const line = -tags
        >> -prefix
        >> token
        >> ((+token > -trail) | trail);
    // run the parse
    auto iter = data.begin();
    auto const end = data.end();
    bool r = x3::phrase_parse(iter, end, line, ascii::space, msg);
    if (r && iter == end) {
        return msg;
    } else {
        return std::nullopt;
    }
}

Учитывая следующее сообщение IRC:

"@aaa=bbb;ccc;example.com/ddd=eee :nick!ident@host.com PRIVMSG me :Hello"

Я ожидаю, что объект message будет построен как:

tags = ["aaa=bbb", "ccc", "example.com/ddd=eee"]
prefix = "nick!ident@host.com"
command = "PRIVMSG"
args = ["me", "Hello"]

В настоящее время tags создается как одно значение (aaa=bbb;ccc;example.com/ddd=eee).

Что бы я на самом деленравится делать это map<> для тегов:

tags = [["aaa": "bbb"], "ccc", ["example.com/ddd": "eee"]]
prefix = "nick!ident@host.com"
command = "PRIVMSG"
args = ["me", "Hello"]

1 Ответ

2 голосов
/ 04 ноября 2019

Первый шаг: упростить и упростить AST:

using Tag = std::string;
using Tags = std::vector<Tag>;
struct message {
    /// IRCv3 tags associated with this message
    Tags tags;

    /// Source prefix - usually blank from clients
    std::string prefix;

    /// Command that was received
    std::string command;

    /// Command arguments
    std::vector<std::string> args;

    /// Flags for internal processing (not received via IRC)
    uint32_t flags;
};

BOOST_FUSION_ADAPT_STRUCT(message, tags, prefix, command, args);

Теперь немного изменим правила:

static auto const tagname = lexeme[+~char_(" :;=")];
static auto const tagvalue = tagname; // TODO be more specific?
// ...
static auto const tag = rule<struct tags_, Tag> {"tag"} = x3::raw[tagname >> -('=' >> tagvalue)];
static auto const tags = rule<struct tags_, Tags> {"tags"} = omit['@'] >> tag % ';';

Уже различает теги и готовит ключ / значениеразделение: Live On Wandbox

int main() {
    auto m = tokenize("@aaa=bbb;ccc;example.com/ddd=eee :nick!ident@host.com PRIVMSG me :Hello");

    if (m) {
        for (auto& tag : m->tags) {
            std::cout << "tag: " << std::quoted(tag) << "\n";
        }
        std::cout << "prefix: " << std::quoted(m->prefix) << "\n";
        std::cout << "command: " << std::quoted(m->command) << "\n";
        for (auto& arg : m->args) {
            std::cout << "arg: " << std::quoted(arg) << "\n";
        }
        std::cout << "flags: " << m->flags << "\n";
    }
}

Печать:

tag: "aaa=bbb"
tag: "ccc"
tag: "example.com/ddd=eee"
prefix: "nick!ident@host.com"
command: "PRIVMSG"
arg: "me"
arg: "Hello"
flags: 1

БОНУС: Карты

Я не уверен, что вам нужны карты, потому что ключи могут быть не уникальными и порядок может иметь значение. Но независимо от того:

#include <boost/fusion/include/std_pair.hpp>

Это волшебство для распространения на записи карты, затем:

using Tags = std::map<std::string, std::string>;
using Tag = std::pair<std::string, std::string>;

Просто настройте эти typedefs,

static auto const tagname = rule<struct tagname_, std::string> {"tagname"} = lexeme[+~char_(" :;=")];
// ...
static auto const tag = rule<struct tags_, Tag> {"tag"} = tagname >> -('=' >> tagvalue);
static auto const tags = rule<struct tags_, Tags> {"tags"} = omit['@'] >> tag % ';';

Отбросьте raw[] директива разрешает распространение в пару Tag.

См. вывод: Live On Wandbox

//#define BOOST_SPIRIT_X3_DEBUG
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <optional>
#include <string>
#include <map>
#include <iostream>
#include <iomanip>

/// Flags used for IRC protocol messages
enum MSG_FLAGS : uint32_t {
    /// Last arg is a trailing parameter
    MSG_TRAILING_ARG = (1 << 0),

    /// When the message is being wrapped due to excess params, repeat the first
    /// arg;
    /// e.g., for ISUPPORT this will consistently place the client's name (1st
    /// arg) in front of each ISUPPORT message.
    MSG_REPEAT_1ST = (1 << 1),

    /// Indicates message should never include a prefix; e.g, PING and ERROR for
    /// local clients
    MSG_NO_PREFIX = (1 << 2),
};

/// Structure describing an IRC protocol message
using Tags = std::map<std::string, std::string>;
using Tag = std::pair<std::string, std::string>;
struct message {
    /// IRCv3 tags associated with this message
    Tags tags;

    /// Source prefix - usually blank from clients
    std::string prefix;

    /// Command that was received
    std::string command;

    /// Command arguments
    std::vector<std::string> args;

    /// Flags for internal processing (not received via IRC)
    uint32_t flags;
};

BOOST_FUSION_ADAPT_STRUCT(message, tags, prefix, command, args);

std::optional<message> tokenize(std::string const &data) {
    namespace x3 = boost::spirit::x3;
    namespace ascii = boost::spirit::x3::ascii;
    namespace phx = boost::phoenix;
    using ascii::char_;
    using x3::double_;
    using x3::int_;
    using x3::lexeme;
    using x3::lit;
    using x3::omit;
    using x3::rule;
    message msg;
    msg.flags = 0;
    // parser rules
    static auto on_trailing_arg = [&](auto &ctx) {
        msg.flags |= MSG_TRAILING_ARG;
    };
    static auto const token = lexeme[+(char_ - ' ' - ':')];
    static auto const tagname = rule<struct tagname_, std::string> {"tagname"} = lexeme[+~char_(" :;=")];
    static auto const tagvalue = tagname; // TODO be more specific?
    static auto const prefix = omit[':'] >> token;
    static auto const trail = (omit[':'] >> lexeme[*char_])[on_trailing_arg];
    static auto const tag = rule<struct tags_, Tag> {"tag"} = tagname >> -('=' >> tagvalue);
    static auto const tags = rule<struct tags_, Tags> {"tags"} = omit['@'] >> tag % ';';
    static auto const line =
        -tags >> -prefix >> token >> ((+token > -trail) | trail);
    // run the parse
    auto iter = data.begin();
    auto const end = data.end();
    bool r = x3::phrase_parse(iter, end, line, ascii::space, msg);
    if (r && iter == end) {
        return msg;
    } else {
        return std::nullopt;
    }
}

int main() {
    auto m = tokenize("@aaa=bbb;ccc;example.com/ddd=eee :nick!ident@host.com PRIVMSG me :Hello");

    if (m) {
        for (auto& [key,value] : m->tags) {
            std::cout << "tag: " << std::quoted(key) << "=" << std::quoted(value) << "\n";
        }
        std::cout << "prefix: " << std::quoted(m->prefix) << "\n";
        std::cout << "command: " << std::quoted(m->command) << "\n";
        for (auto& arg : m->args) {
            std::cout << "arg: " << std::quoted(arg) << "\n";
        }
        std::cout << "flags: " << m->flags << "\n";
    }
}

Печать:

tag: "aaa"="bbb"
tag: "ccc"=""
tag: "example.com/ddd"="eee"
prefix: "nick!ident@host.com"
command: "PRIVMSG"
arg: "me"
arg: "Hello"
flags: 1
...