Ok, the following has not been tested to the fullest extent. What inputs I gave it were accepted or rejected as expected, but I didn't spend a lot of time *trying* to break it.
This is not technically a FSM or a Pushdown automaton. It is closer to a FSM and by some definitions outside of a theory of computation class it IS a FSM. That having been said it is not exactly what you are trying to do but is an example of something simmilar.
This program accepts inputs with the following
BaseTems:
Identifiers: Strings of letters. AAAaaa etx.
Numbers: either a string of digits, or a negitive sign and a string of digits: ### or -###
Operators: {+, -, *, /}
Expresions:
BaseTerm
(Expresion)
Expresion op Expresion
So the following sould all be accepted: -5 + ((4+ten)/zero + 6), ( a + (8 +(-5))+((9)), (1023 * 3) + (9 + (10 -(5/6)/3)/3),...etc.
Again this has not been given a true gauntlet of error conditions and I am sure that some work can be done on better explaining what the errors are:
CODE
#include <iostream> //Input output
using namespace std;
int ParseExp(char *sInput);
//Determine if cIn is a letter...
inline int isAlpha(int cIn)
{
return (((cIn >= 'A') && (cIn <= 'Z')) || ((cIn >= 'a') && (cIn <= 'z')));
}
//Determine if cIn is a number...
inline int isNum(int cIn)
{
return ((cIn >= '0') && (cIn <= '9'));
}
//Determine if cIn is white space...
inline int isWhiteSpace(int cIn)
{
return ((cIn == 32 ) || (cIn == 9) || (cIn == 10) || (cIn == 13));
}
//Determine if cIn is and operator...
inline int isOp(int cIn)
{
return ((cIn == '+' ) || (cIn == '-') || (cIn == '*') || (cIn == '/'));
}
/*************************************************
/* MAIN *
/*************************************************/
int main()
{
char str[254]; //Max Length of the expresion...
int retCode;
int i=0;
cout << "Enter an expresion to parse: ";
cin.get(str, 255);
retCode = ParseExp(str);
return retCode;
}
int ParseExp(char *sInput)
{
int stackCount = 0; //Used to keep track of the ('s and )'s
int state = 0; //Our current state
int loc = 0; //The location of the next char
int done = 0;
int ch;
while (!done)
{
ch = sInput[loc++];
//*** State 0: eats WS, inc counter for (,
// look for Numbers or Itentifiers...
if (state==0)
{
if (ch == '(') {stackCount++; state = 0;}
else if (isNum(ch)) {state = 1;} //Found a number.
else if (ch == '-') {state = 1;} //Also found a number.
else if (isAlpha(ch)) {state = 2;} //Found and Identifier.
else if (isWhiteSpace(ch)) {state = 0;} //Eat WS.
else if (ch == 0) //EOL...
{
if (stackCount == 0 ) {state = -1; done = 1;}
}
else {state = -2; done = 1;} //Exit with error code
}
//*** State 1: Numbers... look for WS or Operator
else if (state == 1) //Value is a number
{
if (isNum(ch)) {state = 1;} //Add another digit...
else if (isOp(ch)) {state = 3;} //End Number
else if (isWhiteSpace(ch)) {state = 5;} //End Number
else if (ch == ')') //Found a ')'
{
state = 4;
stackCount--;
if (stackCount < 0) { state = -5; done = 1; } //-5 invalid )
}
else if (ch == 0) //EOL...
{
if (stackCount == 0 ) {state = -1; done = 1;}
}
else {state = -3; done = 1;} //-3 invalid number
}
//*** State 2: Identifiers... Look for WS or Operator
else if (state == 2)
{
if (isAlpha(ch)) {state = 2;}
else if (isOp(ch)) {state = 3;}
else if (isWhiteSpace(ch)) {state = 5;} //End Number
else if (ch == ')')
{
state = 4;
stackCount--;
if (stackCount < 0) { state = -5; done = 1; }
}
else if (ch == 0) //EOL...
{
if (stackCount == 0 ) {state = -1; done = 1;}
}
else {state = -4; done = 1;} //-4 invalid identifier
}
//*** State 3: Eat WS, look for Num or Itentifier.
else if(state == 3) //Eat WS loop for number or var
{
if (ch == '(') {stackCount++; state = 0;}
else if (isNum(ch)) {state = 1;} //Found a number.
else if (ch == '-') {state = 1;} //Also found a number.
else if (isAlpha(ch)) {state = 2;} //Found and Identifier.
else if (isWhiteSpace(ch)) {state = 3;} //Eat WS.
else {state = -6; done = 1;} //-6 Expected Ident, Num, or '('
}
else if (state == 4)
{
if (ch == ')')
{
state = 4;
stackCount--;
if (stackCount < 0) { state = -5; done = 1; }
}
else if (isWhiteSpace(ch)) {state = 4;}
else if (isOp(ch)) {state = 0;}
else if (ch == 0) //EOL...
{
if (stackCount == 0 ) {state = -1; done = 1;}
}
else {state = -7; done = 1;} //-7 expected ')' or op.
}
else if (state == 5)
{
if (isOp(ch)) {state = 3;} //Operation found
else if (isWhiteSpace(ch)) {state = 5;} //Eat WS
else if (ch == ')') //Found a ')'
{
state = 4;
stackCount--;
if (stackCount < 0) { state = -5; done = 1; } //-5 invalid )
}
else if (ch == 0) //EOL...
{
if (stackCount == 0 ) {state = -1; done = 1;}
}
else {state = -3; done = 1;} //-3 invalid number
}
}
if (state != -1)
{
if (state == -2)
{
sInput[loc]=0;
cout << "\nExpected Number, Itentifier, or '(' at:"
<< loc - 1 << " in: " << sInput;
}
else if (state == -3)
{
sInput[loc]=0;
cout << "\nInvalid Number at:"
<< loc - 1 << " in: " << sInput;
}
else if (state == -4)
{
sInput[loc]=0;
cout << "\nInvalid Identifier at:"
<< loc - 1 << " in: " << sInput;
}
else if (state == -5)
{
sInput[loc]=0;
cout << "\nUnmatched ')' at:"
<< loc - 1 << " in: " << sInput;
}
else if (state == -5)
{
sInput[loc]=0;
cout << "\nUnmatched ')' at:"
<< loc - 1 << " in: " << sInput;
}
else if (state == -7)
{
sInput[loc]=0;
cout << "\nExpected Number, Itentifier, or '(' at:"
<< loc - 1 << " in: " << sInput;
}
else
{
sInput[loc]=0;
cout << "\nUnknown error at:"
<< loc - 1 << " in: " << sInput;
}
}
else
{
cout << "\n\nString accepted\n";
}
return state;
}
Run this though with the debuger on in the ParseExp routine a few times to get a handle on why and how it works (or if you find a bug why it dosn't work).
BTW. Innitialy I was going to write a parsing routine... thus the name ParseExp... thus the name, but I belive that would technically be called scanning. Not sure.
This post has been edited by NickDMax: 23 Feb, 2007 - 12:41 AM