## Thursday, December 24, 2015

### Infix Notation Parser via Shunting-Yard Algorithm

Infix notation is the typical notation for writing equations in algebra.
An example would be: 7 - (2 * 5)

Parsing such an equation is not a trivial task, but I wanted one for my EquationFinder project, as I wanted to respect order of operations.

Strategies include substitution/replacement algorithms, recursion to parse into a tree and then tree traversal, or converting the infix notation to reverse polish notation (RPN), also known as post-fix notation, then using a stack based postfix notation evaluator. I choose the latter, as such algorithms are well defined in many places on the web.

My code consists of 3 classes, all static:
(Links go to the .cs file on GitHub)
1. InfixNotation - this simply holds a few public variables and calls the public methods on the below two classes.
2. ShuntingYardAlgorithm - this converts an equation in infix notation into postfix notation (aka RPN).
3. PostfixNotation - this evaluates the equation in postfix notation and returns a numerical result value.

In order to implement the shunting-yard algorithm and the postfix evaluator, I simply wrote the steps to the algorithms as written on Wikipedia:
(Links go to the Wikipedia article)
Link to the Shunting-Yard Algorithm to convert Infix notation to Postfix notation.
Link to the Postfix Notation Evaluation Algorithm.

The code for this is pretty extensive, but I will prettify it and present it below. Alternatively, you can view and download the code from the MathNotationConverter project on my GitHub.

### InfixNotationParser:

``````
public static class InfixNotation
{
public static string Numbers = "0123456789";
public static string Operators = "+-*/^";

public static bool IsNumeric(string text)
{
return string.IsNullOrWhiteSpace(text) ? false : text.All(c => Numbers.Contains(c));
}

public static int Evaluate(string infixNotationString)
{
string postFixNotationString = ShuntingYardConverter.Convert(infixNotationString);
int result = PostfixNotation.Evaluate(postFixNotationString);
return result;
}
}
``````

## ShuntingYardConverter

(converts an equation from infix notation into postfix notation):
``````
public static class ShuntingYardAlgorithm
{
private static string AllowedCharacters = InfixNotation.Numbers + InfixNotation.Operators + "()";

private enum Associativity
{
Left, Right
}
private static Dictionary<char, int> PrecedenceDictionary = new Dictionary<char, int>()
{
{'(', 0}, {')', 0},
{'+', 1}, {'-', 1},
{'*', 2}, {'/', 2},
{'^', 3}
};
private static Dictionary<char, Associativity> AssociativityDictionary = new Dictionary<char, Associativity>()
{
{'+', Associativity.Left},
{'-', Associativity.Left},
{'*', Associativity.Left},
{'/', Associativity.Left},
{'^', Associativity.Right}
};

private static void AddToOutput(List<char> output, params char[] chars)
{
if (chars != null && chars.Length > 0)
{
foreach (char c in chars)
{
}
}
}

public static string Convert(string infixNotationString)
{
if (string.IsNullOrWhiteSpace(infixNotationString))
{
throw new ArgumentException("Argument infixNotationString must not be null, empty or whitespace.", "infixNotationString");
}

List<char> output = new List<char>();
Stack<char> operatorStack = new Stack<char>();
string sanitizedString = new string(infixNotationString.Where(c => AllowedCharacters.Contains(c)).ToArray());

string number = string.Empty;
List<string> enumerableInfixTokens = new List<string>();
foreach (char c in sanitizedString)
{
if (InfixNotation.Operators.Contains(c) || "()".Contains(c))
{
if (number.Length > 0)
{
number = string.Empty;
}
}
else if (InfixNotation.Numbers.Contains(c))
{
number += c.ToString();
}
else
{
throw new Exception(string.Format("Unexpected character '{0}'.", c));
}
}

if (number.Length > 0)
{
number = string.Empty;
}

foreach (string token in enumerableInfixTokens)
{
if (InfixNotation.IsNumeric(token))
{
}
else if (token.Length == 1)
{
char c = token[0];

if (InfixNotation.Numbers.Contains(c)) // Numbers (operands)
{
}
else if (InfixNotation.Operators.Contains(c)) // Operators
if (operatorStack.Count > 0)
{
char o = operatorStack.Peek();
if ((AssociativityDictionary[c] == Associativity.Left &&
PrecedenceDictionary[c] <= PrecedenceDictionary[o])
||
(AssociativityDictionary[c] == Associativity.Right &&
PrecedenceDictionary[c] < PrecedenceDictionary[o]))
{
}
}
operatorStack.Push(c);
}
else if (c == '(') // open brace
{
operatorStack.Push(c);
}
else if (c == ')') // close brace
{
bool leftParenthesisFound = false;
while (operatorStack.Count > 0 )
{
char o = operatorStack.Peek();
if (o != '(')
{
}
else
{
operatorStack.Pop();
leftParenthesisFound = true;
break;
}
}

if (!leftParenthesisFound)
{
throw new FormatException("The algebraic string contains mismatched parentheses (missing a left parenthesis).");
}
}
else // wtf?
{
throw new Exception(string.Format("Unrecognized character '{0}'.", c));
}
}
else
{
throw new Exception(string.Format("String '{0}' is not numeric and has a length greater than 1.", token));
}
} // end foreach

while (operatorStack.Count > 0)
{
char o = operatorStack.Pop();
if (o == '(')
{
throw new FormatException("The algebraic string contains mismatched parentheses (extra left parenthesis).");
}
else if (o == ')')
{
throw new FormatException("The algebraic string contains mismatched parentheses (extra right parenthesis).");
}
else
{
}
}

return new string(output.ToArray());
}
}
``````

### PostfixNotation

(evaluates the postfix notation and returns a numerical result):
``````
public static class PostfixNotation
{
private static string AllowedCharacters = InfixNotation.Numbers + InfixNotation.Operators + " ";

public static int Evaluate(string postfixNotationString)
{
if (string.IsNullOrWhiteSpace(postfixNotationString))
{
throw new ArgumentException("Argument postfixNotationString must not be null, empty or whitespace.", "postfixNotationString");
}

Stack<string> stack = new Stack<string>();
string sanitizedString = new string(postfixNotationString.Where(c => AllowedCharacters.Contains(c)).ToArray());
List<string> enumerablePostfixTokens = sanitizedString.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList();

foreach (string token in enumerablePostfixTokens)
{
if (token.Length > 0)
{
if (token.Length > 1)
{
if (InfixNotation.IsNumeric(token))
{
stack.Push(token);
}
else
{
throw new Exception("Operators and operands must be separated by a space.");
}
}
else
{
char tokenChar = token[0];

if (InfixNotation.Numbers.Contains(tokenChar))
{
stack.Push(tokenChar.ToString());
}
else if (InfixNotation.Operators.Contains(tokenChar))
{
if (stack.Count < 2)
{
throw new FormatException("The algebraic string has not sufficient values in the expression for the number of operators.");
}

string r = stack.Pop();
string l = stack.Pop();

int rhs = int.MinValue;
int lhs = int.MinValue;

bool parseSuccess = int.TryParse(r, out rhs);
parseSuccess &= int.TryParse(l, out lhs);
parseSuccess &= (rhs != int.MinValue && lhs != int.MinValue);

if (!parseSuccess)
{
throw new Exception("Unable to parse valueStack characters to Int32.");
}

int value = int.MinValue;
if (tokenChar == '+')
{
value = lhs + rhs;
}
else if (tokenChar == '-')
{
value = lhs - rhs;
}
else if (tokenChar == '*')
{
value = lhs * rhs;
}
else if (tokenChar == '/')
{
value = lhs / rhs;
}
else if (tokenChar == '^')
{
value = (int)Math.Pow(lhs, rhs);
}

if (value != int.MinValue)
{
stack.Push(value.ToString());
}
else
{
throw new Exception("Value never got set.");
}
}
else
{
throw new Exception(string.Format("Unrecognized character '{0}'.", tokenChar));
}
}
}
else
{
throw new Exception("Token length is less than one.");
}
}

if (stack.Count == 1)
{
int result = 0;
if (!int.TryParse(stack.Pop(), out result))
{
throw new Exception("Last value on stack could not be parsed into an integer.");
}
else
{
return result;
}
}
else
{
throw new Exception("The input has too many values for the number of operators.");
}

} // method
} // class
``````

Another alternative technique is to using the Shunting-Yard Algorithm to turn infix notation into an abstract syntax tree (Linq.Expressions anyone?). I will likely post this technique later.

Other blog posts by me that are related to this article are the Threaded Equation Finder, a Mixed Radix System Calulator and Drawing Text Along a Bezier Spline.