From b2213663d1132ecdc6cc4b3f7e6960d6b33cdfbe Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Tue, 19 Aug 2025 04:00:31 -0500 Subject: [PATCH] Fixed minor gramar issue. --- src/core/VariableType.cs | 2 - src/core/cocor/Interpreter.atg | 1 + src/core/cocor/Parser.cs | 522 +++++++++++++++++++++++++++++++++ src/core/cocor/Parser.cs.old | 522 +++++++++++++++++++++++++++++++++ src/core/cocor/Scanner.cs | 489 ++++++++++++++++++++++++++++++ src/core/cocor/Scanner.cs.old | 489 ++++++++++++++++++++++++++++++ 6 files changed, 2023 insertions(+), 2 deletions(-) create mode 100644 src/core/cocor/Parser.cs create mode 100644 src/core/cocor/Parser.cs.old create mode 100644 src/core/cocor/Scanner.cs create mode 100644 src/core/cocor/Scanner.cs.old diff --git a/src/core/VariableType.cs b/src/core/VariableType.cs index 2c4bc76..4061f70 100644 --- a/src/core/VariableType.cs +++ b/src/core/VariableType.cs @@ -11,8 +11,6 @@ namespace cs_mic.core None, Numeric, NumericArray, - String, - StringArray, Expression, } } diff --git a/src/core/cocor/Interpreter.atg b/src/core/cocor/Interpreter.atg index 5bc3ca1..7fedc2a 100644 --- a/src/core/cocor/Interpreter.atg +++ b/src/core/cocor/Interpreter.atg @@ -96,6 +96,7 @@ PRODUCTIONS INTERPRETER (. FunctionValue fv = new FunctionValue(); bool success = true; + decimal r = 0; if (this.interpreter == null) { return; } .) = diff --git a/src/core/cocor/Parser.cs b/src/core/cocor/Parser.cs new file mode 100644 index 0000000..880d873 --- /dev/null +++ b/src/core/cocor/Parser.cs @@ -0,0 +1,522 @@ +using System; +using System.Text; +using System.Collections.Generic; +using csmic; + + + +using System; + +namespace csmic.Interpreter { + + + +public class Parser { + public const int _EOF = 0; + public const int _identifier = 1; + public const int _sign = 2; + public const int _binary = 3; + public const int _hex = 4; + public const int _number = 5; + public const int _string = 6; + public const int _LPAREN = 7; + public const int _RPAREN = 8; + public const int _COMPARER = 9; + public const int maxT = 22; + + const bool _T = true; + const bool _x = false; + const int minErrDist = 2; + + public Scanner scanner; + public Errors errors; + + public Token t; // last recognized token + public Token la; // lookahead token + int errDist = minErrDist; + +private FunctionValue functionValue = new FunctionValue(); + +public FunctionValue Result +{ + get { return this.functionValue; } + set { this.functionValue = value; } +} + +private InputInterpreter interpreter = null; + +public InputInterpreter Interpreter +{ + get { return this.interpreter; } + set { this.interpreter = value; } +} + +bool IsFunctionCall() +{ + scanner.ResetPeek(); + Token next = scanner.Peek(); + if (next.kind == _LPAREN && la.kind == _identifier) return true; + return false; +} + +bool IsCompare() +{ + scanner.ResetPeek(); + Token next = scanner.Peek(); + if (next.kind == _COMPARER) return true; + return false; +} + +bool IsAssignment() +{ + scanner.ResetPeek(); + Token next = scanner.Peek(); + if (next.val == "::" || next.val == ":=" || next.val == "->") return true; + return false; +} + +bool IsArrayCall() +{ + scanner.ResetPeek(); + Token next = scanner.Peek(); + if (next.val == "[") return true; + return false; +} + +/* + * Character sets and tokens + */ + + + + public Parser(Scanner scanner) { + this.scanner = scanner; + errors = new Errors(); + } + + void SynErr (int n) { + if (errDist >= minErrDist) errors.SynErr(la.line, la.col, n); + errDist = 0; + } + + public void SemErr (string msg) { + if (errDist >= minErrDist) errors.SemErr(t.line, t.col, msg); + errDist = 0; + } + + void Get () { + for (;;) { + t = la; + la = scanner.Scan(); + if (la.kind <= maxT) { ++errDist; break; } + + la = t; + } + } + + void Expect (int n) { + if (la.kind==n) Get(); else { SynErr(n); } + } + + bool StartOf (int s) { + return set[s, la.kind]; + } + + void ExpectWeak (int n, int follow) { + if (la.kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } + } + + + bool WeakSeparator(int n, int syFol, int repFol) { + int kind = la.kind; + if (kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(set[syFol, kind] || set[repFol, kind] || set[0, kind])) { + Get(); + kind = la.kind; + } + return StartOf(syFol); + } + } + + + void INTERPRETER() { + FunctionValue fv = new FunctionValue(); + bool success = true; + decimal r = 0; + if (this.interpreter == null) { return; } + + if (IsCompare()) { + Comparison(out success); + this.functionValue = (success == true) ? FunctionValue.TRUE : FunctionValue.FALSE; + this.interpreter.ProduceOutput(this.functionValue); + + } else if (IsAssignment()) { + Assignment(out r); + this.functionValue = new FunctionValue(ValueType.Numeric, r); + this.interpreter.ProduceOutput(this.functionValue); + + } else if (StartOf(1)) { + Expression(out r); + this.functionValue = new FunctionValue(ValueType.Numeric, r); + this.interpreter.ProduceOutput(this.functionValue); + + } else SynErr(23); + } + + void Comparison(out bool result) { + decimal firstValue = 0; decimal secondValue = 0; string compareType = string.Empty; result = false; + Expression(out firstValue); + Expect(9); + compareType = t.val; + Expression(out secondValue); + switch(compareType) + { + case "==": result = (firstValue == secondValue); break; + case ">": result = (firstValue > secondValue); break; + case "<": result = (firstValue < secondValue); break; + case ">=": result = (firstValue >= secondValue); break; + case "<=": result = (firstValue <= secondValue); break; + default: result = false; break; + } + + } + + void Assignment(out decimal r) { + string identifier = string.Empty; string expression = string.Empty; decimal[] d = new decimal[0]; r = 0; + Expect(1); + identifier = t.val; + if (la.kind == 19) { + Get(); + Expression(out r); + this.interpreter.AssignNumeric(identifier, r); + } else if (la.kind == 20) { + Get(); + AnyExpression(out expression); + this.interpreter.AssignExpression(identifier, expression); r = 0; + } else if (la.kind == 21) { + Get(); + ArrayL(out d); + this.interpreter.AssignNumericArray(identifier, d); r = 0; + } else SynErr(24); + } + + void Expression(out decimal r) { + decimal r1 = 0; r = 0; + Term(out r); + while (la.kind == 10 || la.kind == 11) { + if (la.kind == 10) { + Get(); + Term(out r1); + r += r1; + } else { + Get(); + Term(out r1); + r -= r1; + } + } + } + + void Term(out decimal r) { + decimal r1 = 0; r = 0; + Factor(out r); + while (la.kind == 12 || la.kind == 13 || la.kind == 14) { + if (la.kind == 12) { + Get(); + Factor(out r1); + r *= r1; + } else if (la.kind == 13) { + Get(); + Factor(out r1); + r /= r1; + } else { + Get(); + Term(out r1); + r %= r1; + } + } + } + + void Factor(out decimal r) { + decimal r1 = 0; + Value(out r); + while (la.kind == 15) { + Get(); + Expression(out r1); + r = Convert.ToDecimal(Math.Pow(Convert.ToDouble(r), Convert.ToDouble(r1))); + } + } + + void Value(out decimal r) { + r = 0; decimal r1 = 0; int signum = 1; + FunctionValue fvr = new FunctionValue(); + string ident = string.Empty; + + if (la.kind == 10 || la.kind == 11) { + if (la.kind == 10) { + Get(); + } else { + Get(); + signum = -1; + } + } + if (IsFunctionCall()) { + Function(out fvr); + if (fvr.Type == ValueType.Numeric && fvr.Value != null) + { + try { r = signum * Convert.ToDecimal(fvr.Value); } + catch { SemErr("function returned non-numeric"); r = 0; } + } + else + { + SemErr("function returned a string; number required"); + r = 0; + } + + } else if (IsArrayCall()) { + ArrayCall(out r); + r = signum * r; + } else if (la.kind == 1) { + Get(); + ident = t.val; + decimal temp = 0; + string expr = string.Empty; + bool ok = false; + // Prefer numeric binding + try + { + // runtime method expected + ok = this.interpreter.TryGetNumeric(ident, out temp); + } + catch { ok = false; } + if (ok) + { + r = signum * temp; + } + else + { + // Check expression binding + try + { + if (this.interpreter.TryGetExpression(ident, out expr)) + { + FunctionValue eval = this.interpreter.EvaluateExpression(expr); + if (eval.Type == ValueType.Numeric && eval.Value != null) + { + r = signum * Convert.ToDecimal(eval.Value); + } + else + { + SemErr("expression variable did not evaluate to a number"); + r = 0; + } + } + else + { + SemErr("variable '" + ident + "' is not numeric"); + r = 0; + } + } + catch { SemErr("error evaluating expression variable"); r = 0; } + } + + } else if (la.kind == 5) { + Get(); + r = signum * Convert.ToDecimal(t.val); + } else if (la.kind == 4) { + Get(); + string hx = t.val.Remove(0,2); + try { r = signum * Convert.ToDecimal(Convert.ToInt64(hx, 16)); } + catch { r = 0; } + + } else if (la.kind == 3) { + Get(); + string bx = t.val.Remove(t.val.Length - 1); + try { r = signum * Convert.ToDecimal(Convert.ToInt64(bx, 2)); } + catch { r = 0; } + + } else if (la.kind == 7) { + Get(); + Expression(out r); + Expect(8); + r = signum * r; + } else SynErr(25); + } + + void Function(out FunctionValue r) { + string functionName = string.Empty; FunctionArgument[] args = new FunctionArgument[0]; r = new FunctionValue(); + Expect(1); + functionName = t.val; + Expect(7); + ArgList(out args); + Expect(8); + r = this.interpreter.ExecuteFunction(functionName, args); + } + + void ArrayCall(out decimal r) { + string ident = string.Empty; r = 0; decimal pos = 0; + Expect(1); + ident = t.val; + Expect(16); + Expression(out pos); + try + { + int i = Convert.ToInt32(pos); + decimal[] values; + if (this.interpreter.TryGetNumericArray(ident, out values)) + { + if (i >= 0 && i < values.Length) { r = values[i]; } + else { SemErr("array index out of range"); r = 0; } + } + else + { + SemErr("variable '" + ident + "' is not a numeric array"); + r = 0; + } + } + catch { SemErr("invalid array index"); r = 0; } + + Expect(18); + } + + void ArrayL(out decimal[] d) { + List list = new List(); decimal r = 0; d = new decimal[0]; + Expect(16); + Expression(out r); + list.Add(r); d = list.ToArray(); + while (la.kind == 17) { + Get(); + Expression(out r); + list.Add(r); d = list.ToArray(); + } + Expect(18); + } + + void AnyExpression(out string value) { + value = string.Empty; StringBuilder builder = new StringBuilder(); + Get(); + builder.Append(t.val); + while (StartOf(2)) { + Get(); + builder.Append(t.val); + } + value = builder.ToString(); + } + + void ArgList(out FunctionArgument[] args) { + List list = new List(); FunctionArgument a = new FunctionArgument { Name = string.Empty, Value = new FunctionValue() }; args = new FunctionArgument[0]; + if (StartOf(3)) { + Arg(out a); + list.Add(a); args = list.ToArray(); + while (la.kind == 17) { + Get(); + Arg(out a); + list.Add(a); args = list.ToArray(); + } + } + } + + void Arg(out FunctionArgument arg) { + arg = new FunctionArgument { Name = string.Empty, Value = new FunctionValue() }; decimal r = 0; string s = string.Empty; + if (la.kind == 6) { + Get(); + s = t.val.Substring(1, t.val.Length - 2); arg = new FunctionArgument { Name = string.Empty, Value = new FunctionValue(ValueType.String, s) }; + } else if (StartOf(1)) { + Expression(out r); + arg = new FunctionArgument { Name = string.Empty, Value = new FunctionValue(ValueType.Numeric, r) }; + } else SynErr(26); + } + + + + public void Parse() { + la = new Token(); + la.val = ""; + Get(); + INTERPRETER(); + Expect(0); + + } + + static readonly bool[,] set = { + {_T,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x}, + {_x,_T,_x,_T, _T,_T,_x,_T, _x,_x,_T,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x}, + {_x,_T,_x,_T, _T,_T,_T,_T, _x,_x,_T,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x} + + }; +} // end Parser + + +public class Errors { + public int count = 0; // number of errors detected + public System.IO.TextWriter errorStream = Console.Out; // error messages go to this stream + public string errMsgFormat = "-- line {0} col {1}: {2}"; // 0=line, 1=column, 2=text + + public virtual void SynErr (int line, int col, int n) { + string s; + switch (n) { + case 0: s = "EOF expected"; break; + case 1: s = "identifier expected"; break; + case 2: s = "sign expected"; break; + case 3: s = "binary expected"; break; + case 4: s = "hex expected"; break; + case 5: s = "number expected"; break; + case 6: s = "string expected"; break; + case 7: s = "LPAREN expected"; break; + case 8: s = "RPAREN expected"; break; + case 9: s = "COMPARER expected"; break; + case 10: s = "\"+\" expected"; break; + case 11: s = "\"-\" expected"; break; + case 12: s = "\"*\" expected"; break; + case 13: s = "\"/\" expected"; break; + case 14: s = "\"%\" expected"; break; + case 15: s = "\"^\" expected"; break; + case 16: s = "\"[\" expected"; break; + case 17: s = "\",\" expected"; break; + case 18: s = "\"]\" expected"; break; + case 19: s = "\"::\" expected"; break; + case 20: s = "\":=\" expected"; break; + case 21: s = "\"->\" expected"; break; + case 22: s = "??? expected"; break; + case 23: s = "invalid INTERPRETER"; break; + case 24: s = "invalid Assignment"; break; + case 25: s = "invalid Value"; break; + case 26: s = "invalid Arg"; break; + + default: s = "error " + n; break; + } + errorStream.WriteLine(errMsgFormat, line, col, s); + count++; + } + + public virtual void SemErr (int line, int col, string s) { + errorStream.WriteLine(errMsgFormat, line, col, s); + count++; + } + + public virtual void SemErr (string s) { + errorStream.WriteLine(s); + count++; + } + + public virtual void Warning (int line, int col, string s) { + errorStream.WriteLine(errMsgFormat, line, col, s); + } + + public virtual void Warning(string s) { + errorStream.WriteLine(s); + } +} // Errors + + +public class FatalError: Exception { + public FatalError(string m): base(m) {} +} +} \ No newline at end of file diff --git a/src/core/cocor/Parser.cs.old b/src/core/cocor/Parser.cs.old new file mode 100644 index 0000000..639d4f6 --- /dev/null +++ b/src/core/cocor/Parser.cs.old @@ -0,0 +1,522 @@ +using System; +using System.Text; +using System.Collections.Generic; +using csmic; + + + +using System; + +namespace csmic.Interpreter { + + + +public class Parser { + public const int _EOF = 0; + public const int _identifier = 1; + public const int _sign = 2; + public const int _binary = 3; + public const int _hex = 4; + public const int _number = 5; + public const int _string = 6; + public const int _LPAREN = 7; + public const int _RPAREN = 8; + public const int _COMPARER = 9; + public const int maxT = 22; + + const bool _T = true; + const bool _x = false; + const int minErrDist = 2; + + public Scanner scanner; + public Errors errors; + + public Token t; // last recognized token + public Token la; // lookahead token + int errDist = minErrDist; + +private FunctionValue functionValue = new FunctionValue(); + +public FunctionValue Result +{ + get { return this.functionValue; } + set { this.functionValue = value; } +} + +private InputInterpreter interpreter = null; + +public InputInterpreter Interpreter +{ + get { return this.interpreter; } + set { this.interpreter = value; } +} + +bool IsFunctionCall() +{ + scanner.ResetPeek(); + Token next = scanner.Peek(); + if (next.kind == _LPAREN && la.kind == _identifier) return true; + return false; +} + +bool IsCompare() +{ + scanner.ResetPeek(); + Token next = scanner.Peek(); + if (next.kind == _COMPARER) return true; + return false; +} + +bool IsAssignment() +{ + scanner.ResetPeek(); + Token next = scanner.Peek(); + if (next.val == "::" || next.val == ":=" || next.val == "->") return true; + return false; +} + +bool IsArrayCall() +{ + scanner.ResetPeek(); + Token next = scanner.Peek(); + if (next.val == "[") return true; + return false; +} + +/* + * Character sets and tokens + */ + + + + public Parser(Scanner scanner) { + this.scanner = scanner; + errors = new Errors(); + } + + void SynErr (int n) { + if (errDist >= minErrDist) errors.SynErr(la.line, la.col, n); + errDist = 0; + } + + public void SemErr (string msg) { + if (errDist >= minErrDist) errors.SemErr(t.line, t.col, msg); + errDist = 0; + } + + void Get () { + for (;;) { + t = la; + la = scanner.Scan(); + if (la.kind <= maxT) { ++errDist; break; } + + la = t; + } + } + + void Expect (int n) { + if (la.kind==n) Get(); else { SynErr(n); } + } + + bool StartOf (int s) { + return set[s, la.kind]; + } + + void ExpectWeak (int n, int follow) { + if (la.kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } + } + + + bool WeakSeparator(int n, int syFol, int repFol) { + int kind = la.kind; + if (kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(set[syFol, kind] || set[repFol, kind] || set[0, kind])) { + Get(); + kind = la.kind; + } + return StartOf(syFol); + } + } + + + void INTERPRETER() { + FunctionValue fv = new FunctionValue(); + bool success = true; + decimal r = 0.0; + if (this.interpreter == null) { return; } + + if (IsCompare()) { + Comparison(out success); + this.functionValue = (success == true) ? FunctionValue.TRUE : FunctionValue.FALSE; + this.interpreter.ProduceOutput(this.functionValue); + + } else if (IsAssignment()) { + Assignment(out r); + this.functionValue = new FunctionValue(ValueType.Numeric, r); + this.interpreter.ProduceOutput(this.functionValue); + + } else if (StartOf(1)) { + Expression(out r); + this.functionValue = new FunctionValue(ValueType.Numeric, r); + this.interpreter.ProduceOutput(this.functionValue); + + } else SynErr(23); + } + + void Comparison(out bool result) { + decimal firstValue = 0; decimal secondValue = 0; string compareType = string.Empty; result = false; + Expression(out firstValue); + Expect(9); + compareType = t.val; + Expression(out secondValue); + switch(compareType) + { + case "==": result = (firstValue == secondValue); break; + case ">": result = (firstValue > secondValue); break; + case "<": result = (firstValue < secondValue); break; + case ">=": result = (firstValue >= secondValue); break; + case "<=": result = (firstValue <= secondValue); break; + default: result = false; break; + } + + } + + void Assignment(out decimal r) { + string identifier = string.Empty; string expression = string.Empty; decimal[] d = new decimal[0]; r = 0; + Expect(1); + identifier = t.val; + if (la.kind == 19) { + Get(); + Expression(out r); + this.interpreter.AssignNumeric(identifier, r); + } else if (la.kind == 20) { + Get(); + AnyExpression(out expression); + this.interpreter.AssignExpression(identifier, expression); r = 0; + } else if (la.kind == 21) { + Get(); + ArrayL(out d); + this.interpreter.AssignNumericArray(identifier, d); r = 0; + } else SynErr(24); + } + + void Expression(out decimal r) { + decimal r1 = 0; r = 0; + Term(out r); + while (la.kind == 10 || la.kind == 11) { + if (la.kind == 10) { + Get(); + Term(out r1); + r += r1; + } else { + Get(); + Term(out r1); + r -= r1; + } + } + } + + void Term(out decimal r) { + decimal r1 = 0; r = 0; + Factor(out r); + while (la.kind == 12 || la.kind == 13 || la.kind == 14) { + if (la.kind == 12) { + Get(); + Factor(out r1); + r *= r1; + } else if (la.kind == 13) { + Get(); + Factor(out r1); + r /= r1; + } else { + Get(); + Term(out r1); + r %= r1; + } + } + } + + void Factor(out decimal r) { + decimal r1 = 0; + Value(out r); + while (la.kind == 15) { + Get(); + Expression(out r1); + r = Convert.ToDecimal(Math.Pow(Convert.ToDouble(r), Convert.ToDouble(r1))); + } + } + + void Value(out decimal r) { + r = 0; decimal r1 = 0; int signum = 1; + FunctionValue fvr = new FunctionValue(); + string ident = string.Empty; + + if (la.kind == 10 || la.kind == 11) { + if (la.kind == 10) { + Get(); + } else { + Get(); + signum = -1; + } + } + if (IsFunctionCall()) { + Function(out fvr); + if (fvr.Type == ValueType.Numeric && fvr.Value != null) + { + try { r = signum * Convert.ToDecimal(fvr.Value); } + catch { SemErr("function returned non-numeric"); r = 0; } + } + else + { + SemErr("function returned a string; number required"); + r = 0; + } + + } else if (IsArrayCall()) { + ArrayCall(out r); + r = signum * r; + } else if (la.kind == 1) { + Get(); + ident = t.val; + decimal temp = 0; + string expr = string.Empty; + bool ok = false; + // Prefer numeric binding + try + { + // runtime method expected + ok = this.interpreter.TryGetNumeric(ident, out temp); + } + catch { ok = false; } + if (ok) + { + r = signum * temp; + } + else + { + // Check expression binding + try + { + if (this.interpreter.TryGetExpression(ident, out expr)) + { + FunctionValue eval = this.interpreter.EvaluateExpression(expr); + if (eval.Type == ValueType.Numeric && eval.Value != null) + { + r = signum * Convert.ToDecimal(eval.Value); + } + else + { + SemErr("expression variable did not evaluate to a number"); + r = 0; + } + } + else + { + SemErr("variable '" + ident + "' is not numeric"); + r = 0; + } + } + catch { SemErr("error evaluating expression variable"); r = 0; } + } + + } else if (la.kind == 5) { + Get(); + r = signum * Convert.ToDecimal(t.val); + } else if (la.kind == 4) { + Get(); + string hx = t.val.Remove(0,2); + try { r = signum * Convert.ToDecimal(Convert.ToInt64(hx, 16)); } + catch { r = 0; } + + } else if (la.kind == 3) { + Get(); + string bx = t.val.Remove(t.val.Length - 1); + try { r = signum * Convert.ToDecimal(Convert.ToInt64(bx, 2)); } + catch { r = 0; } + + } else if (la.kind == 7) { + Get(); + Expression(out r); + Expect(8); + r = signum * r; + } else SynErr(25); + } + + void Function(out FunctionValue r) { + string functionName = string.Empty; FunctionArgument[] args = new FunctionArgument[0]; r = new FunctionValue(); + Expect(1); + functionName = t.val; + Expect(7); + ArgList(out args); + Expect(8); + r = this.interpreter.ExecuteFunction(functionName, args); + } + + void ArrayCall(out decimal r) { + string ident = string.Empty; r = 0; decimal pos = 0; + Expect(1); + ident = t.val; + Expect(16); + Expression(out pos); + try + { + int i = Convert.ToInt32(pos); + decimal[] values; + if (this.interpreter.TryGetNumericArray(ident, out values)) + { + if (i >= 0 && i < values.Length) { r = values[i]; } + else { SemErr("array index out of range"); r = 0; } + } + else + { + SemErr("variable '" + ident + "' is not a numeric array"); + r = 0; + } + } + catch { SemErr("invalid array index"); r = 0; } + + Expect(18); + } + + void ArrayL(out decimal[] d) { + List list = new List(); decimal r = 0; d = new decimal[0]; + Expect(16); + Expression(out r); + list.Add(r); d = list.ToArray(); + while (la.kind == 17) { + Get(); + Expression(out r); + list.Add(r); d = list.ToArray(); + } + Expect(18); + } + + void AnyExpression(out string value) { + value = string.Empty; StringBuilder builder = new StringBuilder(); + Get(); + builder.Append(t.val); + while (StartOf(2)) { + Get(); + builder.Append(t.val); + } + value = builder.ToString(); + } + + void ArgList(out FunctionArgument[] args) { + List list = new List(); FunctionArgument a = new FunctionArgument { Name = string.Empty, Value = new FunctionValue() }; args = new FunctionArgument[0]; + if (StartOf(3)) { + Arg(out a); + list.Add(a); args = list.ToArray(); + while (la.kind == 17) { + Get(); + Arg(out a); + list.Add(a); args = list.ToArray(); + } + } + } + + void Arg(out FunctionArgument arg) { + arg = new FunctionArgument { Name = string.Empty, Value = new FunctionValue() }; decimal r = 0; string s = string.Empty; + if (la.kind == 6) { + Get(); + s = t.val.Substring(1, t.val.Length - 2); arg = new FunctionArgument { Name = string.Empty, Value = new FunctionValue(ValueType.String, s) }; + } else if (StartOf(1)) { + Expression(out r); + arg = new FunctionArgument { Name = string.Empty, Value = new FunctionValue(ValueType.Numeric, r) }; + } else SynErr(26); + } + + + + public void Parse() { + la = new Token(); + la.val = ""; + Get(); + INTERPRETER(); + Expect(0); + + } + + static readonly bool[,] set = { + {_T,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x}, + {_x,_T,_x,_T, _T,_T,_x,_T, _x,_x,_T,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x}, + {_x,_T,_x,_T, _T,_T,_T,_T, _x,_x,_T,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x} + + }; +} // end Parser + + +public class Errors { + public int count = 0; // number of errors detected + public System.IO.TextWriter errorStream = Console.Out; // error messages go to this stream + public string errMsgFormat = "-- line {0} col {1}: {2}"; // 0=line, 1=column, 2=text + + public virtual void SynErr (int line, int col, int n) { + string s; + switch (n) { + case 0: s = "EOF expected"; break; + case 1: s = "identifier expected"; break; + case 2: s = "sign expected"; break; + case 3: s = "binary expected"; break; + case 4: s = "hex expected"; break; + case 5: s = "number expected"; break; + case 6: s = "string expected"; break; + case 7: s = "LPAREN expected"; break; + case 8: s = "RPAREN expected"; break; + case 9: s = "COMPARER expected"; break; + case 10: s = "\"+\" expected"; break; + case 11: s = "\"-\" expected"; break; + case 12: s = "\"*\" expected"; break; + case 13: s = "\"/\" expected"; break; + case 14: s = "\"%\" expected"; break; + case 15: s = "\"^\" expected"; break; + case 16: s = "\"[\" expected"; break; + case 17: s = "\",\" expected"; break; + case 18: s = "\"]\" expected"; break; + case 19: s = "\"::\" expected"; break; + case 20: s = "\":=\" expected"; break; + case 21: s = "\"->\" expected"; break; + case 22: s = "??? expected"; break; + case 23: s = "invalid INTERPRETER"; break; + case 24: s = "invalid Assignment"; break; + case 25: s = "invalid Value"; break; + case 26: s = "invalid Arg"; break; + + default: s = "error " + n; break; + } + errorStream.WriteLine(errMsgFormat, line, col, s); + count++; + } + + public virtual void SemErr (int line, int col, string s) { + errorStream.WriteLine(errMsgFormat, line, col, s); + count++; + } + + public virtual void SemErr (string s) { + errorStream.WriteLine(s); + count++; + } + + public virtual void Warning (int line, int col, string s) { + errorStream.WriteLine(errMsgFormat, line, col, s); + } + + public virtual void Warning(string s) { + errorStream.WriteLine(s); + } +} // Errors + + +public class FatalError: Exception { + public FatalError(string m): base(m) {} +} +} \ No newline at end of file diff --git a/src/core/cocor/Scanner.cs b/src/core/cocor/Scanner.cs new file mode 100644 index 0000000..023a785 --- /dev/null +++ b/src/core/cocor/Scanner.cs @@ -0,0 +1,489 @@ + +using System; +using System.IO; +using System.Collections; + +namespace csmic.Interpreter { + +public class Token { + public int kind; // token kind + public int pos; // token position in bytes in the source text (starting at 0) + public int charPos; // token position in characters in the source text (starting at 0) + public int col; // token column (starting at 1) + public int line; // token line (starting at 1) + public string val; // token value + public Token next; // ML 2005-03-11 Tokens are kept in linked list +} + +//----------------------------------------------------------------------------------- +// Buffer +//----------------------------------------------------------------------------------- +public class Buffer { + // This Buffer supports the following cases: + // 1) seekable stream (file) + // a) whole stream in buffer + // b) part of stream in buffer + // 2) non seekable stream (network, console) + + public const int EOF = char.MaxValue + 1; + const int MIN_BUFFER_LENGTH = 1024; // 1KB + const int MAX_BUFFER_LENGTH = MIN_BUFFER_LENGTH * 64; // 64KB + byte[] buf; // input buffer + int bufStart; // position of first byte in buffer relative to input stream + int bufLen; // length of buffer + int fileLen; // length of input stream (may change if the stream is no file) + int bufPos; // current position in buffer + Stream stream; // input stream (seekable) + bool isUserStream; // was the stream opened by the user? + + public Buffer (Stream s, bool isUserStream) { + stream = s; this.isUserStream = isUserStream; + + if (stream.CanSeek) { + fileLen = (int) stream.Length; + bufLen = Math.Min(fileLen, MAX_BUFFER_LENGTH); + bufStart = Int32.MaxValue; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + + buf = new byte[(bufLen>0) ? bufLen : MIN_BUFFER_LENGTH]; + if (fileLen > 0) Pos = 0; // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && stream.CanSeek) Close(); + } + + protected Buffer(Buffer b) { // called in UTF8Buffer constructor + buf = b.buf; + bufStart = b.bufStart; + bufLen = b.bufLen; + fileLen = b.fileLen; + bufPos = b.bufPos; + stream = b.stream; + // keep destructor from closing the stream + b.stream = null; + isUserStream = b.isUserStream; + } + + ~Buffer() { Close(); } + + protected void Close() { + if (!isUserStream && stream != null) { + stream.Close(); + stream = null; + } + } + + public virtual int Read () { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (Pos < fileLen) { + Pos = Pos; // shift buffer start to Pos + return buf[bufPos++]; + } else if (stream != null && !stream.CanSeek && ReadNextStreamChunk() > 0) { + return buf[bufPos++]; + } else { + return EOF; + } + } + + public int Peek () { + int curPos = Pos; + int ch = Read(); + Pos = curPos; + return ch; + } + + // beg .. begin, zero-based, inclusive, in byte + // end .. end, zero-based, exclusive, in byte + public string GetString (int beg, int end) { + int len = 0; + char[] buf = new char[end - beg]; + int oldPos = Pos; + Pos = beg; + while (Pos < end) buf[len++] = (char) Read(); + Pos = oldPos; + return new String(buf, 0, len); + } + + public int Pos { + get { return bufPos + bufStart; } + set { + if (value >= fileLen && stream != null && !stream.CanSeek) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while (value >= fileLen && ReadNextStreamChunk() > 0); + } + + if (value < 0 || value > fileLen) { + throw new FatalError("buffer out of bounds access, position: " + value); + } + + if (value >= bufStart && value < bufStart + bufLen) { // already in buffer + bufPos = value - bufStart; + } else if (stream != null) { // must be swapped in + stream.Seek(value, SeekOrigin.Begin); + bufLen = stream.Read(buf, 0, buf.Length); + bufStart = value; bufPos = 0; + } else { + // set the position to the end of the file, Pos will return fileLen. + bufPos = fileLen - bufStart; + } + } + } + + // Read the next chunk of bytes from the stream, increases the buffer + // if needed and updates the fields fileLen and bufLen. + // Returns the number of bytes read. + private int ReadNextStreamChunk() { + int free = buf.Length - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + byte[] newBuf = new byte[bufLen * 2]; + Array.Copy(buf, newBuf, bufLen); + buf = newBuf; + free = bufLen; + } + int read = stream.Read(buf, bufLen, free); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; + } +} + +//----------------------------------------------------------------------------------- +// UTF8Buffer +//----------------------------------------------------------------------------------- +public class UTF8Buffer: Buffer { + public UTF8Buffer(Buffer b): base(b) {} + + public override int Read() { + int ch; + do { + ch = base.Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EOF)); + if (ch < 128 || ch == EOF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = base.Read(); + int c2 = ch & 0x3F; ch = base.Read(); + int c3 = ch & 0x3F; ch = base.Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = base.Read(); + int c2 = ch & 0x3F; ch = base.Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = base.Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; + } +} + +//----------------------------------------------------------------------------------- +// Scanner +//----------------------------------------------------------------------------------- +public class Scanner { + const char EOL = '\n'; + const int eofSym = 0; /* pdt */ + const int maxT = 22; + const int noSym = 22; + + + public Buffer buffer; // scanner buffer + + Token t; // current token + int ch; // current input character + int pos; // byte position of current character + int charPos; // position by unicode characters starting with 0 + int col; // column number of current character + int line; // line number of current character + int oldEols; // EOLs that appeared in a comment; + static readonly Hashtable start; // maps first token character to start state + + Token tokens; // list of tokens already peeked (first token is a dummy) + Token pt; // current peek token + + char[] tval = new char[128]; // text of current token + int tlen; // length of current token + + static Scanner() { + start = new Hashtable(128); + for (int i = 65; i <= 90; ++i) start[i] = 1; + for (int i = 97; i <= 122; ++i) start[i] = 1; + start[43] = 2; + for (int i = 50; i <= 57; ++i) start[i] = 6; + start[48] = 17; + start[49] = 18; + start[34] = 11; + start[40] = 13; + start[41] = 14; + start[61] = 15; + start[60] = 19; + start[62] = 20; + start[42] = 21; + start[47] = 22; + start[37] = 23; + start[94] = 24; + start[91] = 25; + start[44] = 26; + start[93] = 27; + start[58] = 31; + start[45] = 32; + start[Buffer.EOF] = -1; + + } + + public Scanner (string fileName) { + try { + Stream stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read); + buffer = new Buffer(stream, false); + Init(); + } catch (IOException) { + throw new FatalError("Cannot open file " + fileName); + } + } + + public Scanner (Stream s) { + buffer = new Buffer(s, true); + Init(); + } + + void Init() { + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + throw new FatalError(String.Format("illegal byte order mark: EF {0,2:X} {1,2:X}", ch1, ch2)); + } + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + NextCh(); + } + pt = tokens = new Token(); // first token is a dummy + } + + void NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer.Pos; + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer.Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == '\r' && buffer.Peek() != '\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + + } + + void AddCh() { + if (tlen >= tval.Length) { + char[] newBuf = new char[2 * tval.Length]; + Array.Copy(tval, 0, newBuf, 0, tval.Length); + tval = newBuf; + } + if (ch != Buffer.EOF) { + tval[tlen++] = (char) ch; + NextCh(); + } + } + + + + + void CheckLiteral() { + switch (t.val) { + case "+": t.kind = 10; break; + case "-": t.kind = 11; break; + default: break; + } + } + + Token NextToken() { + while (ch == ' ' || + ch == 9 || ch == 13 + ) NextCh(); + + int recKind = noSym; + int recEnd = pos; + t = new Token(); + t.pos = pos; t.col = col; t.line = line; t.charPos = charPos; + int state; + if (start.ContainsKey(ch)) { state = (int) start[ch]; } + else { state = 0; } + tlen = 0; AddCh(); + + switch (state) { + case -1: { t.kind = eofSym; break; } // NextCh already done + case 0: { + if (recKind != noSym) { + tlen = recEnd - t.pos; + SetScannerBehindT(); + } + t.kind = recKind; break; + } // NextCh already done + case 1: + recEnd = pos; recKind = 1; + if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 1;} + else {t.kind = 1; break;} + case 2: + {t.kind = 2; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;} + case 3: + {t.kind = 3; break;} + case 4: + if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f') {AddCh(); goto case 5;} + else {goto case 0;} + case 5: + recEnd = pos; recKind = 4; + if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f') {AddCh(); goto case 5;} + else {t.kind = 4; break;} + case 6: + recEnd = pos; recKind = 5; + if (ch >= '0' && ch <= '9') {AddCh(); goto case 6;} + else if (ch == 'E' || ch == 'e') {AddCh(); goto case 7;} + else if (ch == '.') {AddCh(); goto case 10;} + else {t.kind = 5; break;} + case 7: + if (ch >= '0' && ch <= '9') {AddCh(); goto case 9;} + else if (ch == '+' || ch == '-') {AddCh(); goto case 8;} + else {goto case 0;} + case 8: + if (ch >= '0' && ch <= '9') {AddCh(); goto case 9;} + else {goto case 0;} + case 9: + recEnd = pos; recKind = 5; + if (ch >= '0' && ch <= '9') {AddCh(); goto case 9;} + else {t.kind = 5; break;} + case 10: + recEnd = pos; recKind = 5; + if (ch >= '0' && ch <= '9') {AddCh(); goto case 10;} + else if (ch == 'E' || ch == 'e') {AddCh(); goto case 7;} + else {t.kind = 5; break;} + case 11: + if (ch <= '!' || ch >= '#' && ch <= 65535) {AddCh(); goto case 11;} + else if (ch == '"') {AddCh(); goto case 12;} + else {goto case 0;} + case 12: + {t.kind = 6; break;} + case 13: + {t.kind = 7; break;} + case 14: + {t.kind = 8; break;} + case 15: + if (ch == '=') {AddCh(); goto case 16;} + else {goto case 0;} + case 16: + {t.kind = 9; break;} + case 17: + recEnd = pos; recKind = 5; + if (ch >= '2' && ch <= '9') {AddCh(); goto case 6;} + else if (ch == 'B' || ch == 'b') {AddCh(); goto case 3;} + else if (ch >= '0' && ch <= '1') {AddCh(); goto case 18;} + else if (ch == 'x') {AddCh(); goto case 4;} + else if (ch == 'E' || ch == 'e') {AddCh(); goto case 7;} + else if (ch == '.') {AddCh(); goto case 10;} + else {t.kind = 5; break;} + case 18: + recEnd = pos; recKind = 5; + if (ch >= '2' && ch <= '9') {AddCh(); goto case 6;} + else if (ch == 'B' || ch == 'b') {AddCh(); goto case 3;} + else if (ch >= '0' && ch <= '1') {AddCh(); goto case 18;} + else if (ch == 'E' || ch == 'e') {AddCh(); goto case 7;} + else if (ch == '.') {AddCh(); goto case 10;} + else {t.kind = 5; break;} + case 19: + recEnd = pos; recKind = 9; + if (ch == '=') {AddCh(); goto case 16;} + else {t.kind = 9; break;} + case 20: + recEnd = pos; recKind = 9; + if (ch == '=') {AddCh(); goto case 16;} + else {t.kind = 9; break;} + case 21: + {t.kind = 12; break;} + case 22: + {t.kind = 13; break;} + case 23: + {t.kind = 14; break;} + case 24: + {t.kind = 15; break;} + case 25: + {t.kind = 16; break;} + case 26: + {t.kind = 17; break;} + case 27: + {t.kind = 18; break;} + case 28: + {t.kind = 19; break;} + case 29: + {t.kind = 20; break;} + case 30: + {t.kind = 21; break;} + case 31: + if (ch == ':') {AddCh(); goto case 28;} + else if (ch == '=') {AddCh(); goto case 29;} + else {goto case 0;} + case 32: + recEnd = pos; recKind = 2; + if (ch == '>') {AddCh(); goto case 30;} + else {t.kind = 2; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;} + + } + t.val = new String(tval, 0, tlen); + return t; + } + + private void SetScannerBehindT() { + buffer.Pos = t.pos; + NextCh(); + line = t.line; col = t.col; charPos = t.charPos; + for (int i = 0; i < tlen; i++) NextCh(); + } + + // get the next token (possibly a token already seen during peeking) + public Token Scan () { + if (tokens.next == null) { + return NextToken(); + } else { + pt = tokens = tokens.next; + return tokens; + } + } + + // peek for the next token, ignore pragmas + public Token Peek () { + do { + if (pt.next == null) { + pt.next = NextToken(); + } + pt = pt.next; + } while (pt.kind > maxT); // skip pragmas + + return pt; + } + + // make sure that peeking starts at the current scan position + public void ResetPeek () { pt = tokens; } + +} // end Scanner +} \ No newline at end of file diff --git a/src/core/cocor/Scanner.cs.old b/src/core/cocor/Scanner.cs.old new file mode 100644 index 0000000..023a785 --- /dev/null +++ b/src/core/cocor/Scanner.cs.old @@ -0,0 +1,489 @@ + +using System; +using System.IO; +using System.Collections; + +namespace csmic.Interpreter { + +public class Token { + public int kind; // token kind + public int pos; // token position in bytes in the source text (starting at 0) + public int charPos; // token position in characters in the source text (starting at 0) + public int col; // token column (starting at 1) + public int line; // token line (starting at 1) + public string val; // token value + public Token next; // ML 2005-03-11 Tokens are kept in linked list +} + +//----------------------------------------------------------------------------------- +// Buffer +//----------------------------------------------------------------------------------- +public class Buffer { + // This Buffer supports the following cases: + // 1) seekable stream (file) + // a) whole stream in buffer + // b) part of stream in buffer + // 2) non seekable stream (network, console) + + public const int EOF = char.MaxValue + 1; + const int MIN_BUFFER_LENGTH = 1024; // 1KB + const int MAX_BUFFER_LENGTH = MIN_BUFFER_LENGTH * 64; // 64KB + byte[] buf; // input buffer + int bufStart; // position of first byte in buffer relative to input stream + int bufLen; // length of buffer + int fileLen; // length of input stream (may change if the stream is no file) + int bufPos; // current position in buffer + Stream stream; // input stream (seekable) + bool isUserStream; // was the stream opened by the user? + + public Buffer (Stream s, bool isUserStream) { + stream = s; this.isUserStream = isUserStream; + + if (stream.CanSeek) { + fileLen = (int) stream.Length; + bufLen = Math.Min(fileLen, MAX_BUFFER_LENGTH); + bufStart = Int32.MaxValue; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + + buf = new byte[(bufLen>0) ? bufLen : MIN_BUFFER_LENGTH]; + if (fileLen > 0) Pos = 0; // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && stream.CanSeek) Close(); + } + + protected Buffer(Buffer b) { // called in UTF8Buffer constructor + buf = b.buf; + bufStart = b.bufStart; + bufLen = b.bufLen; + fileLen = b.fileLen; + bufPos = b.bufPos; + stream = b.stream; + // keep destructor from closing the stream + b.stream = null; + isUserStream = b.isUserStream; + } + + ~Buffer() { Close(); } + + protected void Close() { + if (!isUserStream && stream != null) { + stream.Close(); + stream = null; + } + } + + public virtual int Read () { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (Pos < fileLen) { + Pos = Pos; // shift buffer start to Pos + return buf[bufPos++]; + } else if (stream != null && !stream.CanSeek && ReadNextStreamChunk() > 0) { + return buf[bufPos++]; + } else { + return EOF; + } + } + + public int Peek () { + int curPos = Pos; + int ch = Read(); + Pos = curPos; + return ch; + } + + // beg .. begin, zero-based, inclusive, in byte + // end .. end, zero-based, exclusive, in byte + public string GetString (int beg, int end) { + int len = 0; + char[] buf = new char[end - beg]; + int oldPos = Pos; + Pos = beg; + while (Pos < end) buf[len++] = (char) Read(); + Pos = oldPos; + return new String(buf, 0, len); + } + + public int Pos { + get { return bufPos + bufStart; } + set { + if (value >= fileLen && stream != null && !stream.CanSeek) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while (value >= fileLen && ReadNextStreamChunk() > 0); + } + + if (value < 0 || value > fileLen) { + throw new FatalError("buffer out of bounds access, position: " + value); + } + + if (value >= bufStart && value < bufStart + bufLen) { // already in buffer + bufPos = value - bufStart; + } else if (stream != null) { // must be swapped in + stream.Seek(value, SeekOrigin.Begin); + bufLen = stream.Read(buf, 0, buf.Length); + bufStart = value; bufPos = 0; + } else { + // set the position to the end of the file, Pos will return fileLen. + bufPos = fileLen - bufStart; + } + } + } + + // Read the next chunk of bytes from the stream, increases the buffer + // if needed and updates the fields fileLen and bufLen. + // Returns the number of bytes read. + private int ReadNextStreamChunk() { + int free = buf.Length - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + byte[] newBuf = new byte[bufLen * 2]; + Array.Copy(buf, newBuf, bufLen); + buf = newBuf; + free = bufLen; + } + int read = stream.Read(buf, bufLen, free); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; + } +} + +//----------------------------------------------------------------------------------- +// UTF8Buffer +//----------------------------------------------------------------------------------- +public class UTF8Buffer: Buffer { + public UTF8Buffer(Buffer b): base(b) {} + + public override int Read() { + int ch; + do { + ch = base.Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EOF)); + if (ch < 128 || ch == EOF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = base.Read(); + int c2 = ch & 0x3F; ch = base.Read(); + int c3 = ch & 0x3F; ch = base.Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = base.Read(); + int c2 = ch & 0x3F; ch = base.Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = base.Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; + } +} + +//----------------------------------------------------------------------------------- +// Scanner +//----------------------------------------------------------------------------------- +public class Scanner { + const char EOL = '\n'; + const int eofSym = 0; /* pdt */ + const int maxT = 22; + const int noSym = 22; + + + public Buffer buffer; // scanner buffer + + Token t; // current token + int ch; // current input character + int pos; // byte position of current character + int charPos; // position by unicode characters starting with 0 + int col; // column number of current character + int line; // line number of current character + int oldEols; // EOLs that appeared in a comment; + static readonly Hashtable start; // maps first token character to start state + + Token tokens; // list of tokens already peeked (first token is a dummy) + Token pt; // current peek token + + char[] tval = new char[128]; // text of current token + int tlen; // length of current token + + static Scanner() { + start = new Hashtable(128); + for (int i = 65; i <= 90; ++i) start[i] = 1; + for (int i = 97; i <= 122; ++i) start[i] = 1; + start[43] = 2; + for (int i = 50; i <= 57; ++i) start[i] = 6; + start[48] = 17; + start[49] = 18; + start[34] = 11; + start[40] = 13; + start[41] = 14; + start[61] = 15; + start[60] = 19; + start[62] = 20; + start[42] = 21; + start[47] = 22; + start[37] = 23; + start[94] = 24; + start[91] = 25; + start[44] = 26; + start[93] = 27; + start[58] = 31; + start[45] = 32; + start[Buffer.EOF] = -1; + + } + + public Scanner (string fileName) { + try { + Stream stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read); + buffer = new Buffer(stream, false); + Init(); + } catch (IOException) { + throw new FatalError("Cannot open file " + fileName); + } + } + + public Scanner (Stream s) { + buffer = new Buffer(s, true); + Init(); + } + + void Init() { + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + throw new FatalError(String.Format("illegal byte order mark: EF {0,2:X} {1,2:X}", ch1, ch2)); + } + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + NextCh(); + } + pt = tokens = new Token(); // first token is a dummy + } + + void NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer.Pos; + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer.Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == '\r' && buffer.Peek() != '\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + + } + + void AddCh() { + if (tlen >= tval.Length) { + char[] newBuf = new char[2 * tval.Length]; + Array.Copy(tval, 0, newBuf, 0, tval.Length); + tval = newBuf; + } + if (ch != Buffer.EOF) { + tval[tlen++] = (char) ch; + NextCh(); + } + } + + + + + void CheckLiteral() { + switch (t.val) { + case "+": t.kind = 10; break; + case "-": t.kind = 11; break; + default: break; + } + } + + Token NextToken() { + while (ch == ' ' || + ch == 9 || ch == 13 + ) NextCh(); + + int recKind = noSym; + int recEnd = pos; + t = new Token(); + t.pos = pos; t.col = col; t.line = line; t.charPos = charPos; + int state; + if (start.ContainsKey(ch)) { state = (int) start[ch]; } + else { state = 0; } + tlen = 0; AddCh(); + + switch (state) { + case -1: { t.kind = eofSym; break; } // NextCh already done + case 0: { + if (recKind != noSym) { + tlen = recEnd - t.pos; + SetScannerBehindT(); + } + t.kind = recKind; break; + } // NextCh already done + case 1: + recEnd = pos; recKind = 1; + if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 1;} + else {t.kind = 1; break;} + case 2: + {t.kind = 2; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;} + case 3: + {t.kind = 3; break;} + case 4: + if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f') {AddCh(); goto case 5;} + else {goto case 0;} + case 5: + recEnd = pos; recKind = 4; + if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f') {AddCh(); goto case 5;} + else {t.kind = 4; break;} + case 6: + recEnd = pos; recKind = 5; + if (ch >= '0' && ch <= '9') {AddCh(); goto case 6;} + else if (ch == 'E' || ch == 'e') {AddCh(); goto case 7;} + else if (ch == '.') {AddCh(); goto case 10;} + else {t.kind = 5; break;} + case 7: + if (ch >= '0' && ch <= '9') {AddCh(); goto case 9;} + else if (ch == '+' || ch == '-') {AddCh(); goto case 8;} + else {goto case 0;} + case 8: + if (ch >= '0' && ch <= '9') {AddCh(); goto case 9;} + else {goto case 0;} + case 9: + recEnd = pos; recKind = 5; + if (ch >= '0' && ch <= '9') {AddCh(); goto case 9;} + else {t.kind = 5; break;} + case 10: + recEnd = pos; recKind = 5; + if (ch >= '0' && ch <= '9') {AddCh(); goto case 10;} + else if (ch == 'E' || ch == 'e') {AddCh(); goto case 7;} + else {t.kind = 5; break;} + case 11: + if (ch <= '!' || ch >= '#' && ch <= 65535) {AddCh(); goto case 11;} + else if (ch == '"') {AddCh(); goto case 12;} + else {goto case 0;} + case 12: + {t.kind = 6; break;} + case 13: + {t.kind = 7; break;} + case 14: + {t.kind = 8; break;} + case 15: + if (ch == '=') {AddCh(); goto case 16;} + else {goto case 0;} + case 16: + {t.kind = 9; break;} + case 17: + recEnd = pos; recKind = 5; + if (ch >= '2' && ch <= '9') {AddCh(); goto case 6;} + else if (ch == 'B' || ch == 'b') {AddCh(); goto case 3;} + else if (ch >= '0' && ch <= '1') {AddCh(); goto case 18;} + else if (ch == 'x') {AddCh(); goto case 4;} + else if (ch == 'E' || ch == 'e') {AddCh(); goto case 7;} + else if (ch == '.') {AddCh(); goto case 10;} + else {t.kind = 5; break;} + case 18: + recEnd = pos; recKind = 5; + if (ch >= '2' && ch <= '9') {AddCh(); goto case 6;} + else if (ch == 'B' || ch == 'b') {AddCh(); goto case 3;} + else if (ch >= '0' && ch <= '1') {AddCh(); goto case 18;} + else if (ch == 'E' || ch == 'e') {AddCh(); goto case 7;} + else if (ch == '.') {AddCh(); goto case 10;} + else {t.kind = 5; break;} + case 19: + recEnd = pos; recKind = 9; + if (ch == '=') {AddCh(); goto case 16;} + else {t.kind = 9; break;} + case 20: + recEnd = pos; recKind = 9; + if (ch == '=') {AddCh(); goto case 16;} + else {t.kind = 9; break;} + case 21: + {t.kind = 12; break;} + case 22: + {t.kind = 13; break;} + case 23: + {t.kind = 14; break;} + case 24: + {t.kind = 15; break;} + case 25: + {t.kind = 16; break;} + case 26: + {t.kind = 17; break;} + case 27: + {t.kind = 18; break;} + case 28: + {t.kind = 19; break;} + case 29: + {t.kind = 20; break;} + case 30: + {t.kind = 21; break;} + case 31: + if (ch == ':') {AddCh(); goto case 28;} + else if (ch == '=') {AddCh(); goto case 29;} + else {goto case 0;} + case 32: + recEnd = pos; recKind = 2; + if (ch == '>') {AddCh(); goto case 30;} + else {t.kind = 2; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;} + + } + t.val = new String(tval, 0, tlen); + return t; + } + + private void SetScannerBehindT() { + buffer.Pos = t.pos; + NextCh(); + line = t.line; col = t.col; charPos = t.charPos; + for (int i = 0; i < tlen; i++) NextCh(); + } + + // get the next token (possibly a token already seen during peeking) + public Token Scan () { + if (tokens.next == null) { + return NextToken(); + } else { + pt = tokens = tokens.next; + return tokens; + } + } + + // peek for the next token, ignore pragmas + public Token Peek () { + do { + if (pt.next == null) { + pt.next = NextToken(); + } + pt = pt.next; + } while (pt.kind > maxT); // skip pragmas + + return pt; + } + + // make sure that peeking starts at the current scan position + public void ResetPeek () { pt = tokens; } + +} // end Scanner +} \ No newline at end of file