From de12594067508df6f9115aade9a7e900ffcc77e0 Mon Sep 17 00:00:00 2001 From: Suzanne Soy Date: Tue, 1 Sep 2020 06:41:07 +0000 Subject: [PATCH] WIP --- AstGenerator.cs | 4 ++ Compilers/JS.cs | 7 ++- DefaultGrammar.cs | 22 +++++--- Evaluator.cs | 44 +++++++++++++-- Exceptions.cs | 4 ++ Lexer.cs | 12 ++-- LexerGenerator.cs | 2 + MixFix.cs | 16 +++--- MixFixGenerator.cs | 7 +++ Parser.cs | 133 ++++++++++++++++----------------------------- Tests/001-42.e | 1 + Tests/006-eq.e | 2 +- main.cs | 4 +- 13 files changed, 137 insertions(+), 121 deletions(-) diff --git a/AstGenerator.cs b/AstGenerator.cs index 861b4c9..8c38c0e 100644 --- a/AstGenerator.cs +++ b/AstGenerator.cs @@ -13,6 +13,10 @@ public static class AstGenerator { Case("int", "Int"), Case("string", "String")), + Variant("Val", + Case("int", "Int"), + Case("string", "String")), + Variant("ParserResult", Case("(MixFix.Annotation, ParserResult)", "Annotated"), Case("Lexer.Lexeme", "Terminal"), diff --git a/Compilers/JS.cs b/Compilers/JS.cs index 40eb90b..995fc7d 100644 --- a/Compilers/JS.cs +++ b/Compilers/JS.cs @@ -1,11 +1,12 @@ namespace Compilers { public class JS { - public static string Compile(Ast.Expr source) { + public static string Compile(Ast.AstNode source) { return "process.stdout.write(String(" - + source.Match( + + "\"no JS for now\"" +/* + source.Match( Int: i => i.ToString(), String: s => $"'{s.ToString()}'" - ) + )*/ + "));"; } } diff --git a/DefaultGrammar.cs b/DefaultGrammar.cs index 9290693..6835e5e 100644 --- a/DefaultGrammar.cs +++ b/DefaultGrammar.cs @@ -3,18 +3,22 @@ using PrecedenceDAG = ImmutableDefaultDictionary; using static Global; using static MixFix; using static MixFix.Associativity; +using static MixFix.Semantics; public static class DefaultGrammar { public static PrecedenceDAG DefaultPrecedenceDAG = EmptyPrecedenceDAG - .WithOperator("bool", NonAssociative, "equality|terminal", S.And, "equality|terminal") - .WithOperator("equality", NonAssociative, "int|terminal|additive|multiplicative", S.Eq, "int|terminal|additive|multiplicative") - .WithOperator("int", NonAssociative, S.Int) - .WithOperator("additive", LeftAssociative, "int|terminal|multiplicative", S.Plus, "int|terminal|multiplicative") - .WithOperator("multiplicative", LeftAssociative, "int|terminal", S.Times, "int|terminal") - .WithOperator("terminal", NonAssociative, S.Ident) + .WithOperator("bool", Unsupported, NonAssociative, "equality|terminal", S.And, "equality|terminal") + .WithOperator("equality", Unsupported, NonAssociative, "int|terminal", S.Eq, "int|terminal") // |additive|multiplicative + .WithOperator("int", LiteralInt, NonAssociative, S.Int) + .WithOperator("additive", Unsupported, LeftAssociative, "int|terminal|multiplicative", S.Plus, "int|terminal|multiplicative") + .WithOperator("multiplicative", Unsupported, LeftAssociative, "int|terminal", S.Times, "int|terminal") + .WithOperator("terminal", Unsupported, NonAssociative, S.Ident) // This is the root set of operators - .WithOperator("program", LeftAssociative, - // "bool" // TODO: this needs aliases - "equality|terminal", S.And, "equality|terminal"); + // TODO: this needs aliases + .WithOperator("prog", Unsupported, LeftAssociative, "equality|terminal", S.And, "equality|terminal") + .WithOperator("prog", LiteralInt, NonAssociative, S.Int) + .WithOperator("prog", LiteralString, NonAssociative, S.StringOpen, S.String, S.StringClose) + .WithOperator("program", Program, NonAssociative, S.StartOfInput, "prog", S.EndOfInput) + ; } \ No newline at end of file diff --git a/Evaluator.cs b/Evaluator.cs index 31af1ab..6da61bf 100644 --- a/Evaluator.cs +++ b/Evaluator.cs @@ -1,10 +1,42 @@ +using System.Collections.Generic; +using System.Linq; +using Immutable; +using static Global; + public class Evaluator { - public static string Evaluate(Ast.Expr source) { - return source.Match( - Int: i => i.ToString(), - String: s => s.ToString() - ); - } + public static string Evaluate(Ast.AstNode source) + // => Log(source.Str(), () + => source.Match( + Operator: o => o.Item1.semantics.Match( + // The wrapper around the whole program: + Program: () => { + if (o.Item2.Count() != 3) { + throw new RuntimeErrorException("The Program wrapper should contain two parts: StartOfInput, prog and EndOfInput"); + } + // TODO: check that the last token is indeed Program + return Evaluate(o.Item2.ElementAt(1)); + }, + LiteralInt: () => + o.Item2 + .Single() + .ElseThrow( + new RuntimeErrorException("LiteralInt should contain a single lexeme")) + .AsTerminal + .ElseThrow( + new RuntimeErrorException("LiteralInt's contents should be a lexeme")) + .lexeme, + LiteralString: () => { + if (o.Item2.Count() != 3) { + throw new RuntimeErrorException("LiteralString should contain three lexemes: OpenString, String and CloseString"); + } + // TODO: check that the open & close are indeed that + return o.Item2.ElementAt(1) + .AsTerminal.ElseThrow( + new RuntimeErrorException("LiteralInt's contents should be a lexeme")) + .lexeme; + }, + Unsupported: () => throw new RuntimeErrorException($"Unsupported opeartor {o}, sorry.")), + Terminal: t => t.lexeme/*TODO*/); } // Note: for typeclass resolution, ask that functions have their parameters and return types annotated. This annotation is added to the values at run-time, which allows to dispatch based on the annotation rather than on the actual value. \ No newline at end of file diff --git a/Exceptions.cs b/Exceptions.cs index 4a92770..5799163 100644 --- a/Exceptions.cs +++ b/Exceptions.cs @@ -16,6 +16,10 @@ public class LexerErrorException : UserErrorException { public LexerErrorException(string e) : base("Lexer error: " + e) {} } +public class RuntimeErrorException : UserErrorException { + public RuntimeErrorException(string e) : base("Runtime error: " + e) {} +} + public class TestFailedException : UserErrorException { public TestFailedException(string e) : base("Test failed: " + e) {} } \ No newline at end of file diff --git a/Lexer.cs b/Lexer.cs index 7a5d9c4..89b5232 100644 --- a/Lexer.cs +++ b/Lexer.cs @@ -64,7 +64,7 @@ public static partial class Lexer { public static ImmutableList Default = ImmutableList( Rule(S.Space, C.DecimalDigitNumber, S.Int), Rule(S.Space, C.SpaceSeparator, S.Space), - Rule(S.Space, EOF, S.End), + Rule(S.Space, EOF, S.EndOfInput, S.End), Rule(S.Space, '"', S.StringOpen, S.String), Rule(S.Space, '=', S.Eq), Rule(S.Eq, '=', S.Eq, S.Space), @@ -242,8 +242,10 @@ public static partial class Lexer { } public static IImmutableEnumerator Lex(string source) - => Lex1(source) - .Flatten() - //.Lazy(SkipInitialEmptyWhitespace.Eq) - .Lazy(DiscardWhitespace.Eq); + => new Lexeme(S.StartOfInput, "").ImSingleton() + .Concat( + Lex1(source) + .Flatten() + //.Lazy(SkipInitialEmptyWhitespace.Eq) + .Lazy(DiscardWhitespace.Eq)); } \ No newline at end of file diff --git a/LexerGenerator.cs b/LexerGenerator.cs index 58cf9bb..c2627b5 100644 --- a/LexerGenerator.cs +++ b/LexerGenerator.cs @@ -15,7 +15,9 @@ public static class LexerGenerator { // grapheme clusters Field("string", "lexeme")), Variant("S", + Case("StartOfInput"), Case("End"), + Case("EndOfInput"), Case("Space"), Case("Int"), Case("Decimal"), diff --git a/MixFix.cs b/MixFix.cs index 0b58357..b457022 100644 --- a/MixFix.cs +++ b/MixFix.cs @@ -401,10 +401,11 @@ public static partial class MixFix { return precedenceDAG.lens[@operator.precedenceGroup].Add(@operator); } - public static PrecedenceDAG WithOperator(this PrecedenceDAG precedenceDAG, string precedenceGroup, Associativity associativity, params Part[] parts) + public static PrecedenceDAG WithOperator(this PrecedenceDAG precedenceDAG, string precedenceGroup, Semantics semantics, Associativity associativity, params Part[] parts) => precedenceDAG.With( new Operator( precedenceGroup: precedenceGroup, + semantics: semantics, associativity: associativity, parts: parts.ToImmutableList())); @@ -436,7 +437,6 @@ public static partial class MixFix { Func L = g => SamePrecedence(Associativity.LeftAssociative, g); Func N = g => SamePrecedence(Associativity.NonAssociative, g); Func H = g => Grammar1.Annotated((Annotation.Hole, g)); - var Impossible = Grammar1.Impossible; var lsucc = H(node.leftmostHole_.ToGrammar1()); var rsucc = H(node.rightmostHole_.ToGrammar1()); @@ -448,12 +448,10 @@ public static partial class MixFix { var infixr = node.infixRightAssociative.ToGrammar1(); return - // TODO: we can normally remove the ?: checks, as the constructors for grammars - // now coalesce Impossible cases in the correct way. - (closed ? N(closed) : Impossible) - | (nonAssoc ? N( (lsucc, nonAssoc, rsucc) ) : Impossible) - | ((prefix || infixr) ? R( ((prefix | (lsucc, infixr))["+"], rsucc) ) : Impossible) - | ((postfix || infixl) ? L( (lsucc, (postfix || (infixl, rsucc))["+"]) ) : Impossible); + N(closed) + | N( (lsucc, nonAssoc, rsucc) ) + | R( ((prefix | (lsucc, infixr))["+"], rsucc) ) + | L( (lsucc, (postfix || (infixl, rsucc))["+"]) ); } public static EquatableDictionary ToGrammar1(this PrecedenceDAG precedenceDAG) @@ -468,7 +466,7 @@ public static partial class MixFix { Grammar1 lr = null; try { lr = labeled[r]; - } catch (Exception e) { + } catch (Exception) { throw new ParserExtensionException($"Internal error: could not find node {r} in labeled grammar. It only contains labels for: {labeled.Select(kvp => kvp.Key.ToString()).JoinWith(", ")}."); } return recur(labeled[r], labeled); diff --git a/MixFixGenerator.cs b/MixFixGenerator.cs index 21163d6..355c220 100644 --- a/MixFixGenerator.cs +++ b/MixFixGenerator.cs @@ -49,10 +49,17 @@ public static class ParserGenerator { Case("RightAssociative")), Record("Operator", + Field("Semantics", "semantics"), Field("PrecedenceGroupName", "precedenceGroup"), Field("Associativity", "associativity"), Field("ImmutableList", "parts")), + Variant("Semantics", + Case("Program"), + Case("LiteralInt"), + Case("LiteralString"), + Case("Unsupported")), + Variant("Part", Case("S", "Name"), Case("ImmutableHashSet", "Hole")), diff --git a/Parser.cs b/Parser.cs index cea8519..54cfd58 100644 --- a/Parser.cs +++ b/Parser.cs @@ -29,67 +29,33 @@ public static partial class Parser { .IfSome((restN, nodes) => (restN, ParserResult.Productions(nodes))), // TODO: to check for ambiguous parses, we can use // .Single(…) instead of .First(…). - Or: l => - l.First(g => Parse3(tokens, g)), + Or: l => { + var i = 0; + return l.First(g => { + i++; + //Log($"{i}/{l.Count()}: trying…"); + var res = Parse3(tokens, g); + //Log($"{i}/{l.Count()}: {res}"); + return res; + }); + }, Sequence: l => l.BindFoldMap(tokens, (restI, g) => Parse3(restI, g)) .IfSome((restN, nodes) => (restN, ParserResult.Productions(nodes))), - Terminal: t => - // TODO: move the FirstAndRest here! - tokens + Terminal: t => { + var attempt = tokens .FirstAndRest() // When EOF is reached, the parser can't accept this derivation. .If((first, rest) => first.state.Equals(t)) - .IfSome((first, rest) => (rest, ParserResult.Terminal(first))), + .IfSome((first, rest) => (rest, ParserResult.Terminal(first))); + /*if (attempt.IsNone) { + Log($"failed to match {tokens.FirstAndRest().IfSome((first, rest) => first)} against terminal {t}."); + }*/ + return attempt; + }, Annotated: a => - // TODO: use the annotation to give some shape to these lists Parse3(tokens, a.Item2).IfSome((rest, g) => (rest, ParserResult.Annotated((a.Item1, g))))); - // TODO: at the top-level, check that the lexemes - // are empty if the parser won't accept anything else. - - - - - - - - - - - - - // Variant("ParserResult", - // Case("(MixFix.Annotation, ParserResult)", "Annotated"), - // Case("Lexer.Lexeme", "Terminal"), - // Case("IEnumerable", "Productions")), - - // ParserResult = A(SamePrecedence, *) | A(Operator, repeat|Terminal) | A(Hole, SamePrecedence) - - // Variant("ParserResult", - // Case("(MixFix.Annotation, ParserResult)", "Annotated"), - // Case("Lexer.Lexeme", "Terminal"), - // Case("IEnumerable", "Productions")), - - // Variant("ParserResult2", - // Case("IEnumerable", "SamePrecedence")), - // Variant("OperatorOrHole", - // Case("IEnumerable", "Operator") - // Case("Ast.SamePrecedence", "Hole")), - // Variant("SamePrecedenceOrTerminal", - // Case("Ast.SamePrecedence", "SamePrecedence"), - // Case("Lexer.Lexeme", "Terminal")), - - // Annotated(Hole, lsucc); - // Annotated(Operator, closed, nonAssoc, prefix, postfix, infixl, infixr) - - // return - // // TODO: we can normally remove the ?: checks, as the constructors for grammars - // // now coalesce Impossible cases in the correct way. - // (closed ? N(closed) : Impossible) - // | (nonAssoc ? N( (lsucc, nonAssoc, rsucc) ) : Impossible) - // | ((prefix || infixr) ? R( ((prefix | (lsucc, infixr))["+"], rsucc) ) : Impossible) - // | ((postfix || infixl) ? L( (lsucc, (postfix || (infixl, rsucc))["+"]) ) : Impossible); // We lost some typing information and the structure is scattered around // in Annotation nodes. For now gather everything back into the right @@ -430,7 +396,7 @@ public static partial class Parser { } */ - public static Option, AstNode>> Parse2(string source) { + public static ValueTuple, AstNode> Parse2(string source) { Grammar2 grammar = DefaultGrammar.DefaultPrecedenceDAG.ToGrammar2(); //Log(grammar.Str()); @@ -442,42 +408,37 @@ public static partial class Parser { Parse3 ); - Log(grammar.ToString()); + var lexSrc = Lexer.Lex(source); - return P(Lexer.Lex(source), grammar) - .IfSome((rest, result) => (rest, result.Gather().PostProcess())); + /*lexSrc + .ToIEnumerable() + .Select(c => c.state.ToString()) + .JoinWith(" ") + .Pipe(x => Log(x));*/ + + //Log(""); + + var parsed = P(lexSrc, grammar) + .IfSome((rest, result) => (rest, result.Gather().PostProcess())) + .ElseThrow(() => new ParserErrorException("Parse error.")); + + parsed.Item1.FirstAndRest().IfSome( + (first, rest) => { + lexSrc + .ToIEnumerable() + .TakeUntil(c => c.Equals(parsed.Item1)) + .Select(c => c.lexeme.ToString()) + .JoinWith(" ") + .Pipe(x => throw new ParserErrorException( + $"Trailing rubbish: {x}.")); + return Unit.unit; + }); + + return parsed; } - public static Ast.Expr Parse(string source) { - Log(""); - Log("Parsed:" + Parse2(source).ToString()); - Log(""); - - return Lexer.Lex(source) - .SelectMany(lexeme => - lexeme.state.Match( - Int: () => Ast.Expr.Int(Int32.Parse(lexeme.lexeme)).Singleton(), - String: () => Ast.Expr.String(lexeme.lexeme).Singleton(), - Ident: () => Enumerable.Empty(), // TODO - And: () => Enumerable.Empty(), // TODO - Plus: () => Enumerable.Empty(), // TODO - Times: () => Enumerable.Empty(), // TODO - Space: () => Enumerable.Empty(), // ignore - Eq: () => Enumerable.Empty(), // TODO - End: () => Enumerable.Empty(), // TODO - Decimal: () => Enumerable.Empty(), // TODO - StringOpen: () => Enumerable.Empty(), // TODO - StringClose: () => Enumerable.Empty() - ) - ) - .Single() - .ElseThrow(() => new ParserErrorException( - "empty file or more than one expression in file.")); - } - - public static void RecursiveDescent(IEnumerable e) { - - } + public static Ast.AstNode Parse(string source) + => Parse2(source).Item2; } // Notes: diff --git a/Tests/001-42.e b/Tests/001-42.e index e69de29..f70d7bb 100644 --- a/Tests/001-42.e +++ b/Tests/001-42.e @@ -0,0 +1 @@ +42 \ No newline at end of file diff --git a/Tests/006-eq.e b/Tests/006-eq.e index b9fa03e..3ea1d31 100644 --- a/Tests/006-eq.e +++ b/Tests/006-eq.e @@ -1 +1 @@ -40 + 2 == 40 + 1 + 1 && true \ No newline at end of file +1 == 1 && true \ No newline at end of file diff --git a/main.cs b/main.cs index 58e701b..d6fd44e 100644 --- a/main.cs +++ b/main.cs @@ -3,7 +3,7 @@ using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using SearchOption = System.IO.SearchOption; -using Compiler = System.Func; +using Compiler = System.Func; using static Global; public static class MainClass { @@ -47,7 +47,7 @@ public static class MainClass { Console.WriteLine($"\x1b[1;33m{source}: expected {expectedStr} but got {actualStr}.\x1b[m\n"); return false; } else { - Console.Write("\x1b[1;32mOK\x1b[m"); // \r at the end for quiet + Console.Write("\x1b[1;32mOK\x1b[m\n"); // \r at the end for quiet return true; } }