Как можно получить такое огромное количество непроверенного кода? Я полагаю, что имеет смысл взглянуть на свернутую версию этого кода с нуля и остановиться на самой ранней точке, когда он перестает работать, вместо того, чтобы откладывать проверки работоспособности до тех пор, пока она не станет неуправляемой. 100
Я собираюсь указать вам в некоторых местах, где вы можете увидеть, что делать.
Я должен предупредить, я не думаю, что std::variant
или std::optional
еще поддерживаются Ци. Я могу ошибаться.
Обзор и исправление раунда
Я потратил слишком много времени, пытаясь решить многие проблемы, тонкие и не очень тонкие.
Я с удовольствием объясню немного, но сейчас я просто отбрасываю результат:
Live On Coliru
#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <string>
#include <vector>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi_auto.hpp>
//#include <boost/container/vector.hpp>
namespace AST {
using boost::variant;
using boost::optional;
enum class bool_t { false_, true_ };
enum class syntax_t { proto2 };
using str_t = std::string;
struct full_id_t {
std::string full_id;
};
using int_t = intmax_t;
using float_t = double;
/// See: http://www.boost.org/doc/libs/1_68_0/libs/spirit/example/qi/compiler_tutorial/calc8/ast.hpp
/// Specifically, struct nil {}.
struct empty_statement_t {};
// TODO: TBD: we may need/want to dissect this one still further... i.e. to ident, message/enum-name, etc.
struct element_type_t : std::string {
using std::string::string;
using std::string::operator=;
};
// TODO: TBD: let's not get too fancy with the inheritance, ...
// TODO: TBD: however, scanning the other types, we could potentially do more of it, strategically, here and there
struct msg_type_t : element_type_t {};
struct enum_type_t : element_type_t {};
struct package_t {
std::string full_id;
};
using const_t = variant<full_id_t, int_t, float_t, str_t, bool_t>;
struct import_modifier_t {
std::string val;
};
struct import_t {
optional<import_modifier_t> mod;
std::string target_name;
};
struct option_t {
std::string name;
const_t val;
};
using label_t = std::string;
using type_t = variant<std::string, msg_type_t, enum_type_t>;
// TODO: TBD: could potentially get more meta-dissected based on the specification:
struct field_opt_t {
std::string name;
const_t val;
};
struct field_t {
label_t label; // this would benefit from being an enum instead
type_t type;
std::string name;
int_t number;
std::vector<field_opt_t> opts;
};
// TODO: TBD: add extend_t after msg_t ...
struct field_t;
struct enum_t;
struct msg_t;
struct extend_t;
struct extensions_t;
struct group_t;
struct option_t;
struct oneof_t;
struct map_field_t;
struct reserved_t;
using msg_body_t = std::vector<variant<
field_t,
enum_t,
msg_t,
extend_t,
extensions_t,
group_t,
option_t,
oneof_t,
map_field_t,
reserved_t,
empty_statement_t
>>;
struct group_t {
label_t label;
std::string name;
int_t number;
msg_body_t body;
};
struct oneof_field_t {
type_t type;
std::string name;
int_t number;
optional<std::vector<field_opt_t>> opts;
};
struct oneof_t {
std::string name;
std::vector<variant<oneof_field_t, empty_statement_t>> choices;
};
struct key_type_t {
std::string val;
};
struct map_field_t {
key_type_t key_type;
type_t type;
std::string name;
int_t number;
optional<std::vector<field_opt_t>> opts;
};
struct range_t {
int_t min;
optional<int_t> max;
};
struct extensions_t {
std::vector<range_t> ranges;
};
struct reserved_t {
variant<std::vector<range_t>, std::vector<std::string>> val;
};
struct enum_val_opt_t {
std::string name;
const_t val;
};
struct enum_field_t {
std::string name;
std::string ordinal;
std::vector<enum_val_opt_t> opt; // consistency
};
using enum_body_t = std::vector<variant<option_t, enum_field_t, empty_statement_t> >;
struct enum_t {
std::string name;
enum_body_t body;
};
struct msg_t {
std::string name;
// TODO: TBD: here is another case where forward declaration is necessary in terms of the AST definition.
msg_body_t body;
};
struct extend_t {
using content_t = variant<field_t, group_t, empty_statement_t>;
// TODO: TBD: actually, this use case may beg the question whether
// "message type", et al, in some way deserve a first class definition?
msg_type_t msg_type;
std::vector<content_t> content;
};
struct top_level_def_t {
// TODO: TBD: may add svc_t after extend_t ...
variant<msg_t, enum_t, extend_t> content;
};
struct proto_t {
syntax_t syntax;
std::vector<variant<import_t, package_t, option_t, top_level_def_t, empty_statement_t>> content;
};
template <typename T>
static inline std::ostream& operator<<(std::ostream& os, T const&) {
std::operator<<(os, "[");
std::operator<<(os, typeid(T).name());
std::operator<<(os, "]");
return os;
}
}
BOOST_FUSION_ADAPT_STRUCT(AST::option_t, name, val)
BOOST_FUSION_ADAPT_STRUCT(AST::full_id_t, full_id)
BOOST_FUSION_ADAPT_STRUCT(AST::package_t, full_id)
BOOST_FUSION_ADAPT_STRUCT(AST::import_modifier_t, val)
BOOST_FUSION_ADAPT_STRUCT(AST::import_t, mod, target_name)
BOOST_FUSION_ADAPT_STRUCT(AST::field_opt_t, name, val)
BOOST_FUSION_ADAPT_STRUCT(AST::field_t, label, type, name, number, opts)
BOOST_FUSION_ADAPT_STRUCT(AST::group_t, label, name, number, body)
BOOST_FUSION_ADAPT_STRUCT(AST::oneof_field_t, type, name, number, opts)
BOOST_FUSION_ADAPT_STRUCT(AST::oneof_t, name, choices)
BOOST_FUSION_ADAPT_STRUCT(AST::key_type_t, val)
BOOST_FUSION_ADAPT_STRUCT(AST::map_field_t, key_type, type, name, number, opts)
BOOST_FUSION_ADAPT_STRUCT(AST::range_t, min, max)
BOOST_FUSION_ADAPT_STRUCT(AST::extensions_t, ranges)
BOOST_FUSION_ADAPT_STRUCT(AST::reserved_t, val)
BOOST_FUSION_ADAPT_STRUCT(AST::enum_val_opt_t, name, val)
BOOST_FUSION_ADAPT_STRUCT(AST::enum_field_t, name, ordinal, opt)
BOOST_FUSION_ADAPT_STRUCT(AST::enum_t, name, body)
BOOST_FUSION_ADAPT_STRUCT(AST::msg_t, name, body)
BOOST_FUSION_ADAPT_STRUCT(AST::extend_t, msg_type, content)
BOOST_FUSION_ADAPT_STRUCT(AST::top_level_def_t, content)
BOOST_FUSION_ADAPT_STRUCT(AST::proto_t, syntax, content)
namespace qi = boost::spirit::qi;
template<typename It>
struct ProtoGrammar : qi::grammar<It, AST::proto_t()> {
using char_rule_type = qi::rule<It, char()>;
using string_rule_type = qi::rule<It, std::string()>;
using skipper_type = qi::space_type;
ProtoGrammar() : ProtoGrammar::base_type(start) {
using qi::lit;
using qi::digit;
using qi::lexeme; // redundant, because no rule declares a skipper
using qi::char_;
// Identifiers
id = lexeme[qi::alpha >> *char_("A-Za-z0-9_")];
full_id = id;
msg_name = id;
enum_name = id;
field_name = id;
oneof_name = id;
map_name = id;
service_name = id;
rpc_name = id;
stream_name = id;
// These distincions aren't very useful until in the semantic analysis
// stage. I'd suggest to not conflate that with parsing.
msg_type = qi::as_string[ -char_('.') >> *(qi::hold[id >> char_('.')]) >> msg_name ];
enum_type = qi::as_string[ -char_('.') >> *(qi::hold[id >> char_('.')]) >> enum_name ];
// group_name = lexeme[qi::upper >> *char_("A-Za-z0-9_")];
// simpler:
group_name = &qi::upper >> id;
// Integer literals
oct_lit = &char_('0') >> qi::uint_parser<AST::int_t, 8>{};
hex_lit = qi::no_case["0x"] >> qi::uint_parser<AST::int_t, 16>{};
dec_lit = qi::uint_parser<AST::int_t, 10>{};
int_lit = lexeme[hex_lit | oct_lit | dec_lit]; // ordering is important
// Floating-point literals
float_lit = qi::real_parser<double, qi::strict_real_policies<double> >{};
// String literals
oct_esc = '\\' >> qi::uint_parser<unsigned char, 8, 3, 3>{};
hex_esc = qi::no_case["\\x"] >> qi::uint_parser<unsigned char, 16, 2, 2>{};
// The last bit in this phrase is literally, "Or Any Characters Not in the Sequence" (fixed)
char_val = hex_esc | oct_esc | char_esc | ~char_("\0\n\\");
str_lit = lexeme["'" >> *(char_val - "'") >> "'"]
| lexeme['"' >> *(char_val - '"') >> '"']
;
// Empty Statement - likely redundant
empty_statement = ';' >> qi::attr(AST::empty_statement_t{});
// Constant
const_
= bool_lit
| str_lit
| float_lit // again, ordering is important
| int_lit
| full_id
;
// keyword helper
#define KW(p) (lexeme[(p) >> !(qi::alnum | '_')])
// Syntax
syntax = KW("syntax") >> '=' >> lexeme[ lit("'proto2'") | "\"proto2\"" ] >> ';' >> qi::attr(AST::syntax_t::proto2);
// Import Statement
import_modifier = KW("weak") | KW("public");
import = KW("import") >> -import_modifier >> str_lit >> ';';
// Package
package = KW("package") >> full_id >> ';';
// Option
opt_name = qi::raw[ (id | '(' >> full_id >> ')') >> *('.' >> id) ];
opt = KW("option") >> opt_name >> '=' >> const_ >> ';';
// Fields
field_num = int_lit;
label = KW("required")
| KW("optional")
| KW("repeated")
;
type
= KW(builtin_type)
| msg_type
| enum_type
;
// Normal field
field_opt = opt_name >> '=' >> const_;
field_opts = -('[' >> field_opt % ',' >> ']');
field = label >> type >> field_name >> '=' >> field_num >> field_opts >> ';';
// Group field
group = label >> KW("group") >> group_name >> '=' >> field_num >> msg_body;
// Oneof and oneof field
oneof_field = type >> field_name >> '=' >> field_num >> field_opts >> ';';
oneof = KW("oneof") >> oneof_name >> '{'
>> *(
oneof_field
// TODO: TBD: ditto how to handle "empty" not synthesizing any attributes ...
| empty_statement
) >> '}';
// Map field
key_type = KW(builtin_type);
// mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";"
map_field = KW("map") >> '<' >> key_type >> ',' >> type >> '>' >> map_name
>> '=' >> field_num >> field_opts >> ';';
// Extensions and Reserved, Extensions ...
range = int_lit >> -(KW("to") >> (int_lit | KW("max")));
ranges = range % ',';
extensions = KW("extensions") >> ranges >> ';';
// Reserved
reserved = KW("reserved") >> (ranges | field_names) >> ';';
field_names = field_name % ',';
// Enum definition
enum_val_opt = opt_name >> '=' >> const_;
enum_val_opts = -('[' >> (enum_val_opt % ',') >> ']');
enum_field = id >> '=' >> int_lit >> enum_val_opts >> ';';
enum_body = '{' >> *(opt | enum_field | empty_statement) >> '}';
enum_ = KW("enum") >> enum_name >> enum_body;
// Message definition
msg = KW("message") >> msg_name >> msg_body;
msg_body = '{' >> *(
field
| enum_
| msg
| extend
| extensions
| group
| opt
| oneof
| map_field
| reserved
//// TODO: TBD: how to "include" an empty statement ... ? "empty" does not synthesize anything, right?
| empty_statement
) >> '}';
// Extend
extend_content = field | group | empty_statement;
extend_contents = '{' >> *extend_content >> '}';
extend = KW("extend") >> msg_type >> extend_contents;
top_level_def = msg | enum_ | extend /*| service*/;
proto = syntax >> *(import | package | opt | top_level_def | empty_statement);
start = qi::skip(qi::space) [ proto ];
BOOST_SPIRIT_DEBUG_NODES(
(id) (full_id) (msg_name) (enum_name) (field_name) (oneof_name)
(map_name) (service_name) (rpc_name) (stream_name) (group_name)
(msg_type) (enum_type)
(oct_lit) (hex_lit) (dec_lit) (int_lit)
(float_lit)
(oct_esc) (hex_esc) (char_val) (str_lit)
(empty_statement)
(const_)
(syntax)
(import_modifier) (import)
(package)
(opt_name) (opt)
(field_num)
(label)
(type)
(field_opt) (field_opts) (field)
(group)
(oneof_field)
(oneof)
(key_type) (map_field)
(range) (ranges) (extensions) (reserved)
(field_names)
(enum_val_opt) (enum_val_opts) (enum_field) (enum_body) (enum_)
(msg) (msg_body)
(extend_content) (extend_contents) (extend)
(top_level_def) (proto))
}
private:
struct escapes_t : qi::symbols<char, char> {
escapes_t() { this->add
("\\a", '\a')
("\\b", '\b')
("\\f", '\f')
("\\n", '\n')
("\\r", '\r')
("\\t", '\t')
("\\v", '\v')
("\\\\", '\\')
("\\'", '\'')
("\\\"", '"');
}
} char_esc;
string_rule_type id, full_id, msg_name, enum_name, field_name, oneof_name,
map_name, service_name, rpc_name, stream_name, group_name;
qi::rule<It, AST::msg_type_t(), skipper_type> msg_type;
qi::rule<It, AST::enum_type_t(), skipper_type> enum_type;
qi::rule<It, AST::int_t()> int_lit, dec_lit, oct_lit, hex_lit;
qi::rule<It, AST::float_t()> float_lit;
/// true | false
struct bool_lit_t : qi::symbols<char, AST::bool_t> {
bool_lit_t() { this->add
("true", AST::bool_t::true_)
("false", AST::bool_t::false_);
}
} bool_lit;
char_rule_type oct_esc, hex_esc, char_val;
qi::rule<It, AST::str_t()> str_lit;
// TODO: TBD: there are moments when this is a case in a variant or vector<variant>
qi::rule<It, AST::empty_statement_t(), skipper_type> empty_statement;
qi::rule<It, AST::const_t(), skipper_type> const_;
/// syntax = {'proto2' | "proto2"} ;
qi::rule<It, AST::syntax_t(), skipper_type> syntax;
/// import [weak|public] <targetName/> ;
qi::rule<It, AST::import_t(), skipper_type> import;
qi::rule<It, AST::import_modifier_t(), skipper_type> import_modifier;
/// package <fullIdent/> ;
qi::rule<It, AST::package_t(), skipper_type> package;
/// option <optionName/> = <const/> ;
qi::rule<It, AST::option_t(), skipper_type> opt;
/// <ident/> | "(" <fullIdent/> ")" ("." <ident/>)*
string_rule_type opt_name;
qi::rule<It, AST::label_t(), skipper_type> label;
qi::rule<It, AST::type_t(), skipper_type> type;
struct builtin_type_t : qi::symbols<char, std::string> {
builtin_type_t() { this->add
("double", "double")
("float", "float")
("int32", "int32")
("int64", "int64")
("uint32", "uint32")
("uint64", "uint64")
("sint32", "sint32")
("sint64", "sint64")
("fixed32", "fixed32")
("fixed64", "fixed64")
("sfixed32", "sfixed32")
("sfixed64", "sfixed64")
("bool", "bool")
("string", "string")
("bytes", "bytes");
}
} builtin_type;
qi::rule<It, AST::int_t()> field_num;
qi::rule<It, AST::field_opt_t(), skipper_type> field_opt;
qi::rule<It, std::vector<AST::field_opt_t>(), skipper_type> field_opts;
qi::rule<It, AST::field_t(), skipper_type> field;
qi::rule<It, AST::group_t(), skipper_type> group;
qi::rule<It, AST::oneof_t(), skipper_type> oneof;
qi::rule<It, AST::oneof_field_t(), skipper_type> oneof_field;
qi::rule<It, AST::key_type_t(), skipper_type> key_type;
qi::rule<It, AST::map_field_t(), skipper_type> map_field;
/// <int/> [ to ( <int/> | "max" ) ]
qi::rule<It, AST::range_t(), skipper_type> range;
qi::rule<It, std::vector<AST::range_t>(), skipper_type> ranges;
/// extensions <ranges/> ;
qi::rule<It, AST::extensions_t(), skipper_type> extensions;
/// reserved <ranges/>|<fieldNames/> ;
qi::rule<It, AST::reserved_t(), skipper_type> reserved;
qi::rule<It, std::vector<std::string>(), skipper_type> field_names;
/// <optionName/> = <constant/>
qi::rule<It, AST::enum_val_opt_t(), skipper_type> enum_val_opt;
qi::rule<It, std::vector<AST::enum_val_opt_t>(), skipper_type> enum_val_opts;
/// <ident/> = <int/> [ +<enumValueOption/> ] ;
qi::rule<It, AST::enum_field_t(), skipper_type> enum_field;
qi::rule<It, AST::enum_body_t(), skipper_type> enum_body;
qi::rule<It, AST::enum_t(), skipper_type> enum_;
// TODO: TBD: continue here: https://developers.google.com/protocol-buffers/docs/reference/proto2-spec#message_definition
/// message <messageName/> <messageBody/>
qi::rule<It, AST::msg_t(), skipper_type> msg;
/// *{ <field/> | <enum/> | <message/> | <extend/> | <extensions/> | <group/>
/// | <option/> | <oneof/> | <mapField/> | <reserved/> | <emptyStatement/> }
qi::rule<It, AST::msg_body_t(), skipper_type> msg_body;
// TODO: TBD: not sure how appropriate it would be to reach these cases, but we'll see what happens...
/// extend <messageType/> *{ <field/> | <group/> | <emptyStatement/> }
qi::rule<It, AST::extend_t::content_t(), skipper_type> extend_content;
qi::rule<It, std::vector<AST::extend_t::content_t>(), skipper_type> extend_contents;
qi::rule<It, AST::extend_t(), skipper_type> extend;
// TODO: TBD: ditto comments in the rule definition section.
// service; rpc; stream;
/// topLevelDef = <message/> | <enum/> | <extend/> | <service/>
qi::rule<It, AST::top_level_def_t(), skipper_type> top_level_def;
/// <syntax/> { <import/> | <package/> | <option/> | <option/> | <emptyStatement/> }
qi::rule<It, AST::proto_t(), skipper_type> proto;
qi::rule<It, AST::proto_t()> start;
};
#include <fstream>
int main() {
std::ifstream ifs("sample.proto");
std::string const input(std::istreambuf_iterator<char>(ifs), {});
using It = std::string::const_iterator;
It f = input.begin(), l = input.end();
ProtoGrammar<It> const g;
AST::proto_t parsed;
bool ok = qi::parse(f, l, g, parsed);
if (ok) {
std::cout << "Parse succeeded\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
}
Который для выборочного ввода
syntax = "proto2";
import "demo_stuff.proto";
package StackOverflow;
message Sample {
optional StuffMsg foo_list = 1;
optional StuffMsg bar_list = 2;
optional StuffMsg qux_list = 3;
}
message TransportResult {
message Sentinel {}
oneof Chunk {
Sample payload = 1;
Sentinel end_of_stream = 2;
}
}
message ShowTime {
optional uint32 magic = 1 [ default = 0xBDF69E88 ];
repeated string parameters = 2;
optional string version_info = 3;
}
Печать
<proto>
<try>syntax = "proto2";\ni</try>
<syntax>
<try>syntax = "proto2";\ni</try>
<success>\nimport "demo_stuff.</success>
<attributes>[[N3AST8syntax_tE]]</attributes>
</syntax>
<import>
<try>\nimport "demo_stuff.</try>
<import_modifier>
<try> "demo_stuff.proto";</try>
<fail/>
</import_modifier>
<str_lit>
<try>"demo_stuff.proto";\n</try>
[ ...
much
snipped
... ]
<empty_statement>
<try>\n\n</try>
<fail/>
</empty_statement>
<success>\n\n</success>
<attributes>[[[N3AST8syntax_tE], [[[empty], [d, e, m, o, _, s, t, u, f, f, ., p, r, o, t, o]], [[S, t, a, c, k, O, v, e, r, f, l, o, w]], [[[S, a, m, p, l, e], [[[], [S, t, u, f, f, M, s, g], [f, o, o, _, l, i, s, t], 1, []], [[], [S, t, u, f, f, M, s, g], [b, a, r, _, l, i, s, t], 2, []], [[], [S, t, u, f, f, M, s, g], [q, u, x, _, l, i, s, t], 3, []]]]], [[[T, r, a, n, s, p, o, r, t, R, e, s, u, l, t], [[[S, e, n, t, i, n, e, l], []], [[C, h, u, n, k], [[[S, a, m, p, l, e], [p, a, y, l, o, a, d], 1, []], [[S, e, n, t, i, n, e, l], [e, n, d, _, o, f, _, s, t, r, e, a, m], 2, []]]]]]], [[[S, h, o, w, T, i, m, e], [[[], [u, i, n, t, 3, 2], [m, a, g, i, c], 1, [[[d, e, f, a, u, l, t], 3187056264]]], [[], [s, t, r, i, n, g], [p, a, r, a, m, e, t, e, r, s], 2, []], [[], [s, t, r, i, n, g], [v, e, r, s, i, o, n, _, i, n, f, o], 3, []]]]]]]]</attributes>
</proto>
Parse succeeded
Remaining unparsed input: '
'
¹ (Смешение «рекурсивного спуска» (концепция синтаксического анализа) с рекурсивными вариантами тоже сбивает с толку).
² К сожалению, он превышает возможности Wandbox и Coliru