как часть школьного проекта, мне поручено создать парсер xml с помощью flex / bison.(не каждый аспект XML, только некоторые основы).Вот мой код для flex и bison.Вот мой гибкий код (его части):
%{
#include "y.tab.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int lineNumber = 1;
%}
%option noyywrap
letter [a-zA-Z]
digit [0-9]
other_characters [./_-]
whitespace [ \t]
newline [\n]
string ({letter}|{digit}|{other_characters})({letter}|{digit}|{other_characters})+
%%
"ss:Workbook" {printf("%s", yytext); return WORKBOOK;}
"ss:Styles" {printf("%s", yytext); return STYLES;}
"\/ss:Style" {printf("%s", yytext); return CLOSINGSTYLE;}
"ss:Style" {printf("%s", yytext); return STYLE;}
"<" {printf("%s", yytext); return START;}
">" {printf("%s", yytext); return CLOSE;}
"/" {printf("%s", yytext); return SLASH;}
{string}({whitespace}|{string})* {printf ("%s", yytext); return KEIMENO;}
"ss:ID\="\"{string}\" {printf("%s", yytext); return ID;}
{whitespace} {printf(" "); } /* This is to deal with whitespaces */
{newline} {printf("\n");lineNumber++; } /*Counting line*/
"<!--"[^-]*"-->" {printf("%s",yytext); } /* this is to deal with comments */
%%
, а вот мой код бизона (его части):
%{
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
int errorCount = 0;
extern int lineNumber;
int yylex();
void yyerror(char *);
extern FILE *yyin;
extern FILE *yyout;
%}
%start WorkbookElement
%token KEIMENO
%token WORKBOOK
%token STYLES
%token STYLE
%token CLOSINGSTYLE
%token START
%token CLOSE
%token SLASH
%token ID
%%
text : KEIMENO | ;
StyleElement : START STYLE ID CLOSE START SLASH STYLE CLOSE ;
Stylescontext : text StyleElement text ;
StylesElement : START STYLES CLOSE Stylescontext START CLOSINGSTYLE CLOSE | START STYLES SLASH CLOSE | START STYLES CLOSE text START SLASH STYLES CLOSE ;
Workbookcontext : text StylesElement text ;
WorkbookElement : START WORKBOOK CLOSE Workbookcontext START SLASH WORKBOOK CLOSE | START WORKBOOK SLASH CLOSE ;
%%
void yyerror (char *s)
{
errorCount++;
fprintf(stderr,"%s on line %d \n",s,lineNumber); /*here I get the info about errors */
}
int main(int argc, const char **argv)
{
if (argc > 1) {
yyin = fopen(argv[1], "r");
} else {
yyin = stdin;
}
int result;
if ((result = yyparse()) == 0) {
printf("No syntax errors\n");
} else {
printf("\nFound %d syntax errors\n",errorCount);
}
return 0;
}
Теперь моя проблема в том, что пока он компилируется (с некоторыминезначительные проблемы, такие как правило, бесполезное из-за конфликта), когда я тестирую его на простом XML-файле, он находит ошибки, которые не должны завершаться.например, я использовал синтаксический анализатор в следующем XML-файле:
<ss:Workbook>
<ss:Styles>
<ss:Style ss:ID=”s123”></ss:Style>
<ss:Style ss:ID=”x123”></ss:Style>
</ss:Styles>
</ss:Workbook>
Этот файл должен быть корректным, но, тем не менее, синтаксический анализатор находит `синтаксическую ошибку в строке 4:
Starting parse
Entering state 0
Reading a token: --(end of buffer or a NUL)
--accepting rule at line 19 ("<")
Next token is token START ()
Shifting token START ()
Entering state 1
Reading a token: --accepting rule at line 15 ("ss:Workbook")
Next token is token WORKBOOK ()
Shifting token WORKBOOK ()
Entering state 3
Reading a token: --accepting rule at line 20 (">")
Next token is token CLOSE ()
Shifting token CLOSE ()
Entering state 5
Reading a token: --accepting rule at line 25 ("
")
<ss:Workbook>
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 19 ("<")
Next token is token START ()
Reducing stack by rule 10 (line 28):
-> $$ = nterm text ()
Stack now 0 1 3 5
Entering state 9
Next token is token START ()
Shifting token START ()
Entering state 12
Reading a token: --accepting rule at line 18 ("ss:Styles")
Next token is token STYLES ()
Shifting token STYLES ()
Entering state 15
Reading a token: --accepting rule at line 20 (">")
Next token is token CLOSE ()
Shifting token CLOSE ()
Entering state 18
Reading a token: --accepting rule at line 24 (" ")
--accepting rule at line 25 ("
")
<ss:Styles>
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 19 ("<")
Next token is token START ()
Reducing stack by rule 10 (line 28):
-> $$ = nterm text ()
Stack now 0 1 3 5 9 12 15 18
Entering state 22
Next token is token START ()
Shifting token START ()
Entering state 25
Reading a token: --accepting rule at line 16 ("ss:Style")
Next token is token STYLE ()
Shifting token STYLE ()
Entering state 28
Reading a token: --accepting rule at line 24 (" ")
--accepting rule at line 23 ("ss:ID=”s123”")
Next token is token ID ()
Shifting token ID ()
Entering state 32
Reading a token: --accepting rule at line 20 (">")
Next token is token CLOSE ()
Shifting token CLOSE ()
Entering state 35
Reading a token: --accepting rule at line 19 ("<")
Next token is token START ()
Shifting token START ()
Entering state 37
Reading a token: --accepting rule at line 17 ("/ss:Style")
Next token is token CLOSINGSTYLE ()
Shifting token CLOSINGSTYLE ()
Entering state 38
Reading a token: --accepting rule at line 20 (">")
Next token is token CLOSE ()
Shifting token CLOSE ()
Entering state 39
Reducing stack by rule 1 (line 23):
$1 = token START ()
$2 = token STYLE ()
$3 = token ID ()
$4 = token CLOSE ()
$5 = token START ()
$6 = token CLOSINGSTYLE ()
$7 = token CLOSE ()
-> $$ = nterm StyleElement ()
Stack now 0 1 3 5 9 12 15 18 22
Entering state 26
Reading a token: --accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 25 ("
")
<ss:Style ss:ID=”s123”></ss:Style>
--accepting rule at line 24 (" ")
--accepting default rule (" ")
--accepting rule at line 24 (" ")
--accepting rule at line 19 ("<")
Next token is token START ()
Reducing stack by rule 10 (line 28):
-> $$ = nterm text ()
Stack now 0 1 3 5 9 12 15 18 22 26
Entering state 30
Reducing stack by rule 2 (line 24):
$1 = nterm text ()
$2 = nterm StyleElement ()
$3 = nterm text ()
-> $$ = nterm Stylescontext ()
Stack now 0 1 3 5 9 12 15 18
Entering state 21
Next token is token START ()
Shifting token START ()
Entering state 24
Reading a token: --accepting rule at line 16 ("ss:Style")
Next token is token STYLE ()
syntax error on line 4
Error: popping token START ()
Stack now 0 1 3 5 9 12 15 18 21
Error: popping nterm Stylescontext ()
Stack now 0 1 3 5 9 12 15 18
Error: popping token CLOSE ()
Stack now 0 1 3 5 9 12 15
Error: popping token STYLES ()
Stack now 0 1 3 5 9 12
Error: popping token START ()
Stack now 0 1 3 5 9
Error: popping nterm text ()
Stack now 0 1 3 5
Error: popping token CLOSE ()
Stack now 0 1 3
Error: popping token WORKBOOK ()
Stack now 0 1
Error: popping token START ()
Stack now 0
Cleanup: discarding lookahead token STYLE ()
Stack now 0
<ss:Style
Found 1 syntax errors
IЯ новичок и в флексах, и в зубрах, поэтому я наверняка пропустил некоторые вещи.Из того, что я могу понять, должно быть что-то не так с файлом зубров.Любые подсказки о том, что не так и как это исправить?