Subscribe to MentalFloss Minutes        RSS Feed
-----

Parsing : Polynomials

Icon Leave Comment
Part 1
Part 2

Recall:

poly -> term | term + poly | term - poly
term -> coef | coef x | coef x ^ DIGIT DIGIT*
coef -> INT | REAL
INT -> - DIGIT DIGIT* | DIGIT DIGIT*
REAL -> - DIGIT DIGIT* . DIGIT DIGIT* | DIGIT DIGIT* . DIGIT DIGIT*
DIGIT-> 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9



Here is the bare-bones parser:

#include <iostream>
#include <cctype>

using namespace std;

enum TokenType { NONE, PLUS, MINUS, POWER, X, INT, REAL, END_OF_FILE };

struct Token
{
	TokenType type;
	string lexeme;
};

class Input
{
	private: 
	string buffer;
	
	public:
	Input(string data)
	{
		cout << "data=" << data << endl;
		for(char c : data)
		{
			if(c != ' ')
				buffer += c;
		}
	}
	
	char pop()
	{
		if(buffer.length() == 0)
			return '\0';
	
		char c = buffer[0];
		buffer = buffer.substr(1);
		return c;
	}
	
	void push(char c)
	{
		buffer = c + buffer;
	}
	
	string get_buffer()
	{
		return buffer;
	}
};

class Lexer
{
	private:
	Input *input;
	
	public:
	Lexer(string data)
	{
		input = new Input(data);
	}
	
	Token get_token()
	{
		Token token;
		token.type = NONE;
		token.lexeme = "";
		
		char c = input->pop();
		switch(c)
		{
			case '+':
				token.type = PLUS;
				token.lexeme = "+";
				break;
			case '-':
				token.type = MINUS;
				token.lexeme = '-';
				break;
			case '^':
				token.type = POWER;
				token.lexeme = '^';
				break;
			case 'x':
				token.type = X;
				token.lexeme = "x";
				break;
			case '\0':
				token.type = END_OF_FILE;
				token.lexeme = "";
				break;
			default:
				if(isdigit(c) || c == '.')
				{
					token.type = INT;
					token.lexeme += c;
					bool has_dot = (c == '.');
					token.type = has_dot ? REAL : INT;
					
					while(isdigit(c) || c == '.')
					{
						c = input->pop();
						if(c == '.' && !has_dot)
						{
							token.type = REAL;
							token.lexeme += c;
							has_dot = true;

						}
						else if(c == '.')
						{
							cout << "ERROR" << endl;
							exit(1);
						}
						else
						{
							if(isdigit(c))
								token.lexeme += c;
						}
					}
					
					input->push(c);
				}
				else
				{
					cout << "ERROR" << endl;
					exit(1);
				}
				break;
		}
		return token;
	}
	
};

class Parser
{
	private:
	Lexer *lex;
	
	void syntax_error()
	{
		cout << "SYNTAX ERROR" << endl;
		exit(1);
	}
	
	Token expect(TokenType type)
	{
		Token t = lex->get_token();
		if(t.type != type)
			syntax_error();
			
		return t;
	}
	
	//coef -> INT | REAL
	void parse_coef()
	{
	
	}
	
	// term -> coef | coef x | coef x ^ DIGIT DIGIT*
	void parse_term()
	{
	
	}
	
	// poly -> term | term + poly | term - poly
	void parse_poly()
	{
		string names[] = { "NONE", "PLUS", "MINUS", "POWER", "X", "INT", "REAL", "EOF" };
		Token t;
		while((t = lex->get_token()).type != END_OF_FILE)
			cout << names[t.type] << ", " << t.lexeme << endl;
	}
	
	public:
	Parser(string data)
	{
		lex = new Lexer(data);
	}
	
	void parse_input()
	{
		parse_poly();
		expect(END_OF_FILE);
	}
};

int main()
{
	
	string line;
	getline(cin, line);
	Parser *parser = new Parser(line);
	parser->parse_input();

	return 0;
}



So, at this point we implement the grammar rules as they are. We aren't concerned with creating data structures yet. What we'll be left with is a program that parses polynomials or outputs syntax error.

#include <iostream>
#include <cctype>

using namespace std;

enum TokenType { NONE, PLUS, MINUS, POWER, X, INT, REAL, END_OF_FILE };

struct Token
{
	TokenType type;
	string lexeme;
};

class Input
{
	private: 
	string buffer;
	
	public:
	Input(string data)
	{
		cout << "data=" << data << endl;
		for(char c : data)
		{
			if(c != ' ')
				buffer += c;
		}
	}
	
	char pop()
	{
		if(buffer.length() == 0)
			return '\0';
	
		char c = buffer[0];
		buffer = buffer.substr(1);
		return c;
	}
	
	void push(string s)
	{
		buffer = s + buffer;
	}
	
	void push(char c)
	{
		buffer = c + buffer;
	}
	
	string get_buffer()
	{
		return buffer;
	}
};

class Lexer
{
	private:
	Input *input;
	
	public:
	Lexer(string data)
	{
		input = new Input(data);
	}
	
	void give_token(Token token)
	{
		input->push(token.lexeme);
	}
	
	Token get_token()
	{
		Token token;
		token.type = NONE;
		token.lexeme = "";
		
		char c = input->pop();
		switch(c)
		{
			case '+':
				token.type = PLUS;
				token.lexeme = "+";
				break;
			case '-':
				token.type = MINUS;
				token.lexeme = '-';
				break;
			case '^':
				token.type = POWER;
				token.lexeme = '^';
				break;
			case 'x':
				token.type = X;
				token.lexeme = "x";
				break;
			case '\0':
				token.type = END_OF_FILE;
				token.lexeme = "";
				break;
			default:
				if(isdigit(c) || c == '.')
				{
					token.type = INT;
					token.lexeme += c;
					bool has_dot = (c == '.');
					token.type = has_dot ? REAL : INT;
					
					while(isdigit(c) || c == '.')
					{
						c = input->pop();
						if(c == '.' && !has_dot)
						{
							token.type = REAL;
							token.lexeme += c;
							has_dot = true;

						}
						else if(c == '.')
						{
							cout << "ERROR" << endl;
							exit(1);
						}
						else
						{
							if(isdigit(c))
								token.lexeme += c;
						}
					}
					
					input->push(c);
				}
				else
				{
					cout << "ERROR" << endl;
					exit(1);
				}
				break;
		}
		return token;
	}
	
};

class Parser
{
	private:
	string names[8] = { "NONE", "PLUS", "MINUS", "POWER", "X", "INT", "REAL", "EOF" };
	Lexer *lex;
	
	void syntax_error()
	{
		cout << "SYNTAX ERROR" << endl;
		exit(1);
	}
	
	Token expect(TokenType type)
	{
		Token t = lex->get_token();
		if(t.type != type)
		{
			cout << names[t.type] << ", " << t.lexeme << endl;
			syntax_error();
		}
			
		return t;
	}
	
	Token peek()
	{
		Token t = lex->get_token();
		lex->give_token(t);
		return t;
	}
	
	// exp -> PINT
	void parse_exponent()
	{
		Token t = lex->get_token();
		if(t.type == INT)
		{
			cout << names[t.type] << ", " << t.lexeme << endl;
			// success
		}
		else
		{
			syntax_error();
		}
	}
	
	//coef -> INT | REAL
	void parse_coef()
	{
		Token t = lex->get_token();
		
		if(t.type == INT || t.type == REAL)
		{
			cout << names[t.type] << ", " << t.lexeme << endl;
			// success
		}
		else
		{
			syntax_error();
		}
	}
	
	// term -> coef | coef x | coef x ^ exp
	void parse_term()
	{
		parse_coef();
		Token t = lex->get_token();
		
		if(t.type == X)
		{
			cout << names[t.type] << ", " << t.lexeme << endl;
			Token t2 = lex->get_token();
			if(t2.type == POWER)
			{
				cout << names[t2.type] << ", " << t2.lexeme << endl;
				parse_exponent();
			}
			else
			{
				lex->give_token(t2);
			}
		}
		else
		{
			lex->give_token(t);
		}
	}
	
	// poly -> term | term + poly | term - poly
	void parse_poly()
	{
		parse_term();
		Token t = peek();
		if(t.type == PLUS || t.type == MINUS)
		{
			lex->get_token();
			parse_poly();
		}
	}
	
	public:
	Parser(string data)
	{
		lex = new Lexer(data);
	}
	
	void parse_input()
	{
		parse_poly();
		expect(END_OF_FILE);
	}
};

int main()
{
	
	string line;
	getline(cin, line);
	Parser *parser = new Parser(line);
	parser->parse_input();

	return 0;
}



Here is some sample output:

data=13x^5 + 17x^2 - 0.5
INT, 13
X, x
X, x
INT, 5
INT, 17
X, x
X, x
INT, 2
REAL, 0.5



data=-7x ^ 3+ -4x^2 + 2
SYNTAX ERROR



That syntax error is because I don't currently support + -.

Anyway, that's parsing mostly done. I must note that exponents received a rule of their own, which should only support positive exponents.

data=9x^4 + 14.5x^-2
INT, 9
X, x
POWER, ^
INT, 4
REAL, 14.5
X, x
POWER, ^
SYNTAX ERROR



So, til next time.

0 Comments On This Entry

 

December 2018

S M T W T F S
      1
2345678
9101112131415
1617 18 19202122
23242526272829
3031     

Tags

    Recent Entries

    Recent Comments

    Search My Blog

    0 user(s) viewing

    0 Guests
    0 member(s)
    0 anonymous member(s)