Мне нужно использовать лексический анализатор для сравнения 2 строк текстового файла. Если 2 строки идентичны, я объединяю их следующим образом:
STATE a TO a b b b OUT 0 1 1 0
STATE b TO a b b b OUT 0 1 1 0
STATE ab TO a b b b OUT 0 1 1 0
Я написал код без лексического Сначала анализатор, но теперь мне как-то нужно добавить его в код лексического анализатора, это мой код:
#include <iostream>
#include <fstream>
#include <vector>
#include <iterator>
#include <sstream>
#include <string>
#include <map>
using namespace std;
int main() {
map<std::string, std::string> _map;
map<string, string>::iterator it;
string str1, str2;
ifstream reader("Hajer.txt"); // your full file path here
while (getline(reader, str2)) {
str1 = str2.substr(0, 8);
str2.erase(0, 8);
auto found = _map.find(str2);
if (found == _map.end())
_map[str2] = str1; // first occurrence
else
found->second += str1.substr(6); // append subsequent occurrences
}
reader.close();
ofstream writer("New_Hajer.txt"); // your full file path here
for (it = _map.begin(); it != _map.end(); ++it)
//cout << it->second << " " << it->first << '\n';
writer << it->second << it->first << '\n';
writer.close();
return 0;
}
Код лексического анализатора выглядит следующим образом:
#pragma warning(disable:4786)
#include <string>
#include <map>
#include <vector>
#include <iostream>
#include <stdio.h>
using namespace std;
#define Getc(s) getc(s)
#define Ungetc(c) {ungetc(c,IP_Input); ugetflag=1;}
int tok = 0;
/*
* Lexical analyzer states.
*/
enum lexstate {
L_START, L_INT, L_IDENT, L_STRING, L_STRING2,
L_COMMENT, L_TEXT_COMMENT, L_LINE_COMMENT, L_END_TEXT_COMMENT
};
const int STRING1 = 3;
const int IDENTIFIER = 4;
const int INTEGER1 = 5;
const int TOKENSTART = 300;
class CParser
{
public:
string yytext; //input buffer
struct tyylval { //value return structure
string s; //string
int i; //integer
}yylval;
FILE* IP_Input; //Input File
FILE* IP_Error; //Error Output
FILE* IP_List; //List Output
int IP_LineNumber; //Line counter
int ugetflag; //checks ungets
int prflag; //controls printing
map<string, int> IP_Token_table; //Tokendefinitions
map<int, string> IP_revToken_table; //reverse Tokendefinitions
int yylex(); //lexical analyser
void yyerror(char* ers); //error reporter
int IP_MatchToken(string& tok); //checks the token
void InitParse(FILE* inp, FILE* err, FILE* lst); //Sets the File pointers
int yyparse(); //parser
list fusion_lines(list& list_one, list& list_two);
void pr_tokentable(); //test output for tokens
void IP_init_token_table(); //loads the tokens
void Load_tokenentry(string str, int index); //load one token
void PushString(char c); //Used for dtring assembly
CParser() { IP_LineNumber = 1; ugetflag = 0; prflag = 0; }; //Constructor
};
//------------------------------------------------------------------------
// Adds a character to the string value
void CParser::PushString(char c)
{
yylval.s += c;
}
//------------------------------------------------------------------------
void CParser::Load_tokenentry(string str, int index)
{
IP_Token_table[str] = index;
IP_revToken_table[index] = str;
}
void CParser::IP_init_token_table()
{
Load_tokenentry("STRING1", 3);
Load_tokenentry("IDENTIFIER", 4);
Load_tokenentry("INTEGER1", 5);
int ii = TOKENSTART;
Load_tokenentry("AND", ii++);
Load_tokenentry("OR", ii++);
Load_tokenentry("Begin", ii++);
Load_tokenentry("End", ii++);
}
//------------------------------------------------------------------------
void CParser::pr_tokentable()
{
typedef map<string, int>::const_iterator CI;
const char* buf;
printf("Symbol Table ---------------------------------------------\n");
for (CI p = IP_Token_table.begin(); p != IP_Token_table.end(); ++p) {
buf = p->first.c_str();
printf(" key:%s val:%d\n ", buf, p->second);
}
}
int CParser::yyparse()
{
int tok;
if (prflag)fprintf(IP_List, "%5d ", (int)IP_LineNumber);
/*
* Go parse things!
*/
while ((tok = yylex()) != 0) {
printf("%d ", tok);
if (tok == STRING1)
printf("%s %s ", IP_revToken_table[tok].c_str(), yylval.s.c_str());
else
if (tok == INTEGER1)
printf("%s %d ", IP_revToken_table[tok].c_str(), yylval.i);
else
if (tok == IDENTIFIER)
printf("%s %s ", IP_revToken_table[tok].c_str(), yylval.s.c_str());
else
if (tok >= TOKENSTART)
printf("%s ", IP_revToken_table[tok].c_str());
else
printf("%c ", tok);
if (!prflag)printf("\n");
}
return 0;
}
//------------------------------------------------------------------------
/*
* Parse File:
*
*/
void CParser::InitParse(FILE* inp, FILE* err, FILE* lst)
{
/*
* Set up the file state to something useful.
*/
IP_Input = inp;
IP_Error = err;
IP_List = lst;
IP_LineNumber = 1;
ugetflag = 0;
/*
* Define both the enabled token and keyword strings.
*/
IP_init_token_table();
}
//------------------------------------------------------------------------
int CParser::IP_MatchToken(string& tok)
{
int retval;
if (IP_Token_table.find(tok) != IP_Token_table.end()) {
retval = (IP_Token_table[tok]);
}
else {
retval = (0);
}
return retval;
}
//------------------------------------------------------------------------
/*
* yylex:
*
*/
int CParser::yylex()
{
//Locals
int c;
lexstate s;
/*
* Keep on sucking up characters until we find something which
* explicitly forces us out of this function.
*/
for (s = L_START, yytext = ""; 1;) {
c = Getc(IP_Input);
yytext = yytext + (char)c;
if (!ugetflag) {
if (c != EOF)if (prflag)fprintf(IP_List, "%c", c);
}
else ugetflag = 0;
switch (s) {
//Starting state, look for something resembling a token.
case L_START:
if (isdigit(c)) {
s = L_INT;
}
else if (isalpha(c) || c == '\\') {
s = L_IDENT;
}
else if (isspace(c)) {
if (c == '\n') {
IP_LineNumber += 1;
if (prflag)
fprintf(IP_List, "%5d ", (int)IP_LineNumber);
}
yytext = "";
}
else if (c == '/') {
yytext = "";
s = L_COMMENT;
}
else if (c == '"') {
s = L_STRING;
}
else if (c == EOF) {
return ('\0');
}
else {
return (c);
}
break;
case L_COMMENT:
if (c == '/')
s = L_LINE_COMMENT;
else if (c == '*')
s = L_TEXT_COMMENT;
else {
Ungetc(c);
return('/'); /* its the division operator not a comment */
}
break;
case L_LINE_COMMENT:
if (c == '\n') {
s = L_START;
Ungetc(c);
}
yytext = "";
break;
case L_TEXT_COMMENT:
if (c == '\n') {
IP_LineNumber += 1;
}
else if (c == '*')
s = L_END_TEXT_COMMENT;
yytext = "";
break;
case L_END_TEXT_COMMENT:
if (c == '/') {
s = L_START;
}
else {
s = L_TEXT_COMMENT;
}
yytext = "";
break;
/*
* Suck up the integer digits.
*/
case L_INT:
if (isdigit(c)) {
break;
}
else {
Ungetc(c);
yylval.s = yytext.substr(0, yytext.size() - 1);
yylval.i = atoi(yylval.s.c_str());
return (INTEGER1);
}
break;
/*
* Grab an identifier, see if the current context enables
* it with a specific token value.
*/
case L_IDENT:
if (isalpha(c) || isdigit(c) || c == '_')
break;
Ungetc(c);
yytext = yytext.substr(0, yytext.size() - 1);
yylval.s = yytext;
if (c = IP_MatchToken(yytext)) {
return (c);
}
else {
return (IDENTIFIER);
}
/*
* Suck up string characters but once resolved they should
* be deposited in the string bucket because they can be
* arbitrarily long.
*/
case L_STRING2:
s = L_STRING;
if (c == '"') {
PushString((char)c);
}
else {
if (c == '\\') {
PushString((char)c);
}
else {
PushString((char)'\\');
PushString((char)c);
}
}
break;
case L_STRING:
if (c == '\n')
IP_LineNumber += 1;
else if (c == '\r')
;
else if (c == '"' || c == EOF) {
return (STRING1);
}
else if (c == '\\') {
s = L_STRING2;
}
else
PushString((char)c);
break;
default: printf("***Fatal Error*** Wrong case label in yylex\n");
}
}
}
//------------------------------------------------------------------------
int main(int argc, char* argv[])
{
FILE* inf;
//char fistr[100];
string fistr;
printf("Enter filename:\n");
//gets(fistr);
cin >> fistr;
inf = fopen(fistr.c_str(), "r");
if (inf == NULL) {
printf("Cannot open input file %s\n", fistr);
return 0;
}
CParser obj;
obj.InitParse(inf, stderr, stdout);
// obj.pr_tokentable();
obj.yyparse();
return 0;
//system("pause");
}
This класс int CParser::yylex()
различает целые числа идентификаторов и строк
Как я могу написать эти 2 кода в одном?