Subscribe to MentalFloss Minutes

## Parsing : Polynomials

Part 1
Part 2

Recall:

```poly -> term | term + poly | term - poly
term -> coef | coef x | coef x ^ DIGIT DIGIT*
coef -> INT | REAL
INT -> - DIGIT DIGIT* | DIGIT DIGIT*
REAL -> - DIGIT DIGIT* . DIGIT DIGIT* | DIGIT DIGIT* . DIGIT DIGIT*
DIGIT-> 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9

```

Here is the bare-bones parser:

```#include <iostream>
#include <cctype>

using namespace std;

enum TokenType { NONE, PLUS, MINUS, POWER, X, INT, REAL, END_OF_FILE };

struct Token
{
TokenType type;
string lexeme;
};

class Input
{
private:
string buffer;

public:
Input(string data)
{
cout << "data=" << data << endl;
for(char c : data)
{
if(c != ' ')
buffer += c;
}
}

char pop()
{
if(buffer.length() == 0)
return '\0';

char c = buffer[0];
buffer = buffer.substr(1);
return c;
}

void push(char c)
{
buffer = c + buffer;
}

string get_buffer()
{
return buffer;
}
};

class Lexer
{
private:
Input *input;

public:
Lexer(string data)
{
input = new Input(data);
}

Token get_token()
{
Token token;
token.type = NONE;
token.lexeme = "";

char c = input->pop();
switch(c)
{
case '+':
token.type = PLUS;
token.lexeme = "+";
break;
case '-':
token.type = MINUS;
token.lexeme = '-';
break;
case '^':
token.type = POWER;
token.lexeme = '^';
break;
case 'x':
token.type = X;
token.lexeme = "x";
break;
case '\0':
token.type = END_OF_FILE;
token.lexeme = "";
break;
default:
if(isdigit(c) || c == '.')
{
token.type = INT;
token.lexeme += c;
bool has_dot = (c == '.');
token.type = has_dot ? REAL : INT;

while(isdigit(c) || c == '.')
{
c = input->pop();
if(c == '.' && !has_dot)
{
token.type = REAL;
token.lexeme += c;
has_dot = true;

}
else if(c == '.')
{
cout << "ERROR" << endl;
exit(1);
}
else
{
if(isdigit(c))
token.lexeme += c;
}
}

input->push(c);
}
else
{
cout << "ERROR" << endl;
exit(1);
}
break;
}
}

};

class Parser
{
private:
Lexer *lex;

void syntax_error()
{
cout << "SYNTAX ERROR" << endl;
exit(1);
}

Token expect(TokenType type)
{
Token t = lex->get_token();
if(t.type != type)
syntax_error();

return t;
}

//coef -> INT | REAL
void parse_coef()
{

}

// term -> coef | coef x | coef x ^ DIGIT DIGIT*
void parse_term()
{

}

// poly -> term | term + poly | term - poly
void parse_poly()
{
string names[] = { "NONE", "PLUS", "MINUS", "POWER", "X", "INT", "REAL", "EOF" };
Token t;
while((t = lex->get_token()).type != END_OF_FILE)
cout << names[t.type] << ", " << t.lexeme << endl;
}

public:
Parser(string data)
{
lex = new Lexer(data);
}

void parse_input()
{
parse_poly();
expect(END_OF_FILE);
}
};

int main()
{

string line;
getline(cin, line);
Parser *parser = new Parser(line);
parser->parse_input();

return 0;
}

```

So, at this point we implement the grammar rules as they are. We aren't concerned with creating data structures yet. What we'll be left with is a program that parses polynomials or outputs syntax error.

```#include <iostream>
#include <cctype>

using namespace std;

enum TokenType { NONE, PLUS, MINUS, POWER, X, INT, REAL, END_OF_FILE };

struct Token
{
TokenType type;
string lexeme;
};

class Input
{
private:
string buffer;

public:
Input(string data)
{
cout << "data=" << data << endl;
for(char c : data)
{
if(c != ' ')
buffer += c;
}
}

char pop()
{
if(buffer.length() == 0)
return '\0';

char c = buffer[0];
buffer = buffer.substr(1);
return c;
}

void push(string s)
{
buffer = s + buffer;
}

void push(char c)
{
buffer = c + buffer;
}

string get_buffer()
{
return buffer;
}
};

class Lexer
{
private:
Input *input;

public:
Lexer(string data)
{
input = new Input(data);
}

void give_token(Token token)
{
input->push(token.lexeme);
}

Token get_token()
{
Token token;
token.type = NONE;
token.lexeme = "";

char c = input->pop();
switch(c)
{
case '+':
token.type = PLUS;
token.lexeme = "+";
break;
case '-':
token.type = MINUS;
token.lexeme = '-';
break;
case '^':
token.type = POWER;
token.lexeme = '^';
break;
case 'x':
token.type = X;
token.lexeme = "x";
break;
case '\0':
token.type = END_OF_FILE;
token.lexeme = "";
break;
default:
if(isdigit(c) || c == '.')
{
token.type = INT;
token.lexeme += c;
bool has_dot = (c == '.');
token.type = has_dot ? REAL : INT;

while(isdigit(c) || c == '.')
{
c = input->pop();
if(c == '.' && !has_dot)
{
token.type = REAL;
token.lexeme += c;
has_dot = true;

}
else if(c == '.')
{
cout << "ERROR" << endl;
exit(1);
}
else
{
if(isdigit(c))
token.lexeme += c;
}
}

input->push(c);
}
else
{
cout << "ERROR" << endl;
exit(1);
}
break;
}
}

};

class Parser
{
private:
string names[8] = { "NONE", "PLUS", "MINUS", "POWER", "X", "INT", "REAL", "EOF" };
Lexer *lex;

void syntax_error()
{
cout << "SYNTAX ERROR" << endl;
exit(1);
}

Token expect(TokenType type)
{
Token t = lex->get_token();
if(t.type != type)
{
cout << names[t.type] << ", " << t.lexeme << endl;
syntax_error();
}

return t;
}

Token peek()
{
Token t = lex->get_token();
lex->give_token(t);
return t;
}

// exp -> PINT
void parse_exponent()
{
Token t = lex->get_token();
if(t.type == INT)
{
cout << names[t.type] << ", " << t.lexeme << endl;
// success
}
else
{
syntax_error();
}
}

//coef -> INT | REAL
void parse_coef()
{
Token t = lex->get_token();

if(t.type == INT || t.type == REAL)
{
cout << names[t.type] << ", " << t.lexeme << endl;
// success
}
else
{
syntax_error();
}
}

// term -> coef | coef x | coef x ^ exp
void parse_term()
{
parse_coef();
Token t = lex->get_token();

if(t.type == X)
{
cout << names[t.type] << ", " << t.lexeme << endl;
Token t2 = lex->get_token();
if(t2.type == POWER)
{
cout << names[t2.type] << ", " << t2.lexeme << endl;
parse_exponent();
}
else
{
lex->give_token(t2);
}
}
else
{
lex->give_token(t);
}
}

// poly -> term | term + poly | term - poly
void parse_poly()
{
parse_term();
Token t = peek();
if(t.type == PLUS || t.type == MINUS)
{
lex->get_token();
parse_poly();
}
}

public:
Parser(string data)
{
lex = new Lexer(data);
}

void parse_input()
{
parse_poly();
expect(END_OF_FILE);
}
};

int main()
{

string line;
getline(cin, line);
Parser *parser = new Parser(line);
parser->parse_input();

return 0;
}

```

Here is some sample output:

```data=13x^5 + 17x^2 - 0.5
INT, 13
X, x
X, x
INT, 5
INT, 17
X, x
X, x
INT, 2
REAL, 0.5

```

```data=-7x ^ 3+ -4x^2 + 2
SYNTAX ERROR

```

That syntax error is because I don't currently support + -.

Anyway, that's parsing mostly done. I must note that exponents received a rule of their own, which should only support positive exponents.

```data=9x^4 + 14.5x^-2
INT, 9
X, x
POWER, ^
INT, 4
REAL, 14.5
X, x
POWER, ^
SYNTAX ERROR

```

So, til next time.

S M T W T F S
123
45678910
11121314 15 1617
18192021222324
252627282930