From c534c2c6ab36d07be76bef9826f3e0b5d9afbf18 Mon Sep 17 00:00:00 2001 From: Suzanne Soy Date: Sun, 16 Aug 2020 02:57:20 +0000 Subject: [PATCH] Epsilon transitions in lexer --- Ast.cs | 4 ++++ Lexer.cs | 43 ++++++++++++++++++++++------------- LexerGenerated.cs | 14 ++++++++++++ T4/Generator.cs | 2 ++ Tests/004-foo42.e | 1 + Tests/004-foo42.o | 1 + Tests/005-42foo.e | 1 + Tests/005-42foo.o | 1 + Utils/Enumerable.cs | 48 +++++++++++++++++++++++++++++++++++++++ Utils/Immutable/Option.cs | 33 ++++++++++++++++++++++++--- Utils/Unicode.cs | 3 +++ 11 files changed, 132 insertions(+), 19 deletions(-) create mode 100644 Tests/004-foo42.e create mode 100644 Tests/004-foo42.o create mode 100644 Tests/005-42foo.e create mode 100644 Tests/005-42foo.o diff --git a/Ast.cs b/Ast.cs index 3e9c7cd..2a4d045 100644 --- a/Ast.cs +++ b/Ast.cs @@ -42,6 +42,8 @@ namespace Ast { public override int GetHashCode() { return HashCode.Combine("Int", this.value); } + + public override string ToString() => "Int"; } public partial class Visitor { public Func String { get; set; } } @@ -62,6 +64,8 @@ namespace Ast { public override int GetHashCode() { return HashCode.Combine("String", this.value); } + + public override string ToString() => "String"; } } diff --git a/Lexer.cs b/Lexer.cs index a24c1c2..3d771a0 100644 --- a/Lexer.cs +++ b/Lexer.cs @@ -78,10 +78,9 @@ public static partial class Lexer { Rule(S.Space, '"', S.StringOpen, S.String), Rule(S.Int, C.DecimalDigitNumber, S.Int), - Rule(S.Int, C.SpaceSeparator, S.Space), // epsilon + Rule(S.Int, C.SpaceSeparator, S.Space), Rule(S.Int, new[]{'.',','}, S.Decimal, S.Int), - Rule(S.Int, EOF, S.End), // epsilon - Rule(S.Decimal, C.SpaceSeparator, S.Space), // epsilon + Rule(S.Decimal, C.SpaceSeparator, S.Space), Rule(S.String, C.LowercaseLetter, S.String), Rule(S.String, C.UppercaseLetter, S.String), @@ -92,8 +91,22 @@ public static partial class Lexer { public static Dictionary> Dict = Default .GroupBy(r => r.oldState, r => r) - .ToDictionary(rs => rs.Key, rs => rs.ToList()) ; - // TODO: upon failure, do an epsilon-transition to the whitespace state, and try again. + .ToDefaultDictionary( + new List(), + rs => rs.Key, + rs => rs.ToList()) ; + + // This adds transitions through an implicit empty whitespace. + public static Dictionary> WithEpsilonTransitions = + Dict.ToDefaultDictionary( + new List(), + kv => kv.Key, + kv => kv.Value.Any(r => true) // r.test(" ") + // This is a bit of a hack, the lexer tries the rules in + // order so later rules with different results are masked + // by former rules + ? kv.Value.Concat(Dict[S.Space]).ToList() + : kv.Value); } public struct Lexeme { @@ -125,7 +138,7 @@ public static partial class Lexer { return result; } - public static void ParseError(StringBuilder context, IEnumerator stream, S state, List possibleNext, GraphemeCluster gc) { + public static Exception ParseError(StringBuilder context, IEnumerator stream, S state, List possibleNext, GraphemeCluster gc) { var rest = stream .SingleUseEnumerable() @@ -139,7 +152,7 @@ public static partial class Lexer { .First() .Match(some: (x => x.UnicodeCategory(0).ToString()), none: "None (empty string)"); - throw new Exception( + return new Exception( $"Unexpected {actual} (Unicode category {cat}) while the lexer was in state {state}: expected one of {expected}{Environment.NewLine}{context} <--HERE {rest}" ); } @@ -155,15 +168,13 @@ public static partial class Lexer { while (e.MoveNext()) { var c = e.Current; context.Append(c.str); - List possibleNext; - if (Rules.Dict.TryGetValue(state, out possibleNext)) { - var rule = possibleNext.FirstOrDefault(r => r.test(c)); - if (rule != null) { - yield return Transition(ref state, ref lexeme, c, rule); - } else { - ParseError(context, e, state, possibleNext, c); - } - } + var possibleNext = Rules.WithEpsilonTransitions + .GetOrDefault(state, new List()); + yield return + possibleNext + .First(r => r.test(c)) + .IfSome(rule => Transition(ref state, ref lexeme, c, rule)) + .ElseThrow(() => ParseError(context, e, state, possibleNext, c)); } } diff --git a/LexerGenerated.cs b/LexerGenerated.cs index 596b183..431fd0a 100644 --- a/LexerGenerated.cs +++ b/LexerGenerated.cs @@ -56,6 +56,8 @@ public static partial class Lexer { public override int GetHashCode() { return "C".GetHashCode(); } + + public override string ToString() => "End"; } public partial class Visitor { public Func Space { get; set; } } @@ -70,6 +72,8 @@ public static partial class Lexer { public override int GetHashCode() { return "C".GetHashCode(); } + + public override string ToString() => "Space"; } public partial class Visitor { public Func Int { get; set; } } @@ -84,6 +88,8 @@ public static partial class Lexer { public override int GetHashCode() { return "C".GetHashCode(); } + + public override string ToString() => "Int"; } public partial class Visitor { public Func Decimal { get; set; } } @@ -98,6 +104,8 @@ public static partial class Lexer { public override int GetHashCode() { return "C".GetHashCode(); } + + public override string ToString() => "Decimal"; } public partial class Visitor { public Func String { get; set; } } @@ -112,6 +120,8 @@ public static partial class Lexer { public override int GetHashCode() { return "C".GetHashCode(); } + + public override string ToString() => "String"; } public partial class Visitor { public Func StringOpen { get; set; } } @@ -126,6 +136,8 @@ public static partial class Lexer { public override int GetHashCode() { return "C".GetHashCode(); } + + public override string ToString() => "StringOpen"; } public partial class Visitor { public Func StringClose { get; set; } } @@ -140,6 +152,8 @@ public static partial class Lexer { public override int GetHashCode() { return "C".GetHashCode(); } + + public override string ToString() => "StringClose"; } } diff --git a/T4/Generator.cs b/T4/Generator.cs index 28e8e63..b8389d9 100644 --- a/T4/Generator.cs +++ b/T4/Generator.cs @@ -90,6 +90,8 @@ public static class Generator { o.WriteLine($" return HashCode.Combine(\"{C}\", this.value);"); } o.WriteLine($" }}"); + o.WriteLine(""); + o.WriteLine($" public override string ToString() => \"{C}\";"); o.WriteLine($" }}"); o.WriteLine(""); } diff --git a/Tests/004-foo42.e b/Tests/004-foo42.e new file mode 100644 index 0000000..44ccc5a --- /dev/null +++ b/Tests/004-foo42.e @@ -0,0 +1 @@ +"foo"42 \ No newline at end of file diff --git a/Tests/004-foo42.o b/Tests/004-foo42.o new file mode 100644 index 0000000..94a441a --- /dev/null +++ b/Tests/004-foo42.o @@ -0,0 +1 @@ +foo42 \ No newline at end of file diff --git a/Tests/005-42foo.e b/Tests/005-42foo.e new file mode 100644 index 0000000..c18f1e7 --- /dev/null +++ b/Tests/005-42foo.e @@ -0,0 +1 @@ +42"foo" \ No newline at end of file diff --git a/Tests/005-42foo.o b/Tests/005-42foo.o new file mode 100644 index 0000000..c7a5400 --- /dev/null +++ b/Tests/005-42foo.o @@ -0,0 +1 @@ +42foo \ No newline at end of file diff --git a/Utils/Enumerable.cs b/Utils/Enumerable.cs index f5907c8..4903566 100644 --- a/Utils/Enumerable.cs +++ b/Utils/Enumerable.cs @@ -128,6 +128,17 @@ public static class Collection { } } + public static Option First(this IEnumerable ie, Func predicate) { + var e = ie.GetEnumerator(); + bool found = false; + while (e.MoveNext()) { + if (predicate(e.Current)) { + return e.Current.Some(); + } + } + return Option.None(); + } + public static Option Single(this IEnumerable ie) { var e = ie.GetEnumerator(); if (e.MoveNext()) { @@ -141,4 +152,41 @@ public static class Collection { return Option.None(); } } + + public static Option GetValue(this Dictionary d, K key) { + V result = default(V); + if (d.TryGetValue(key, out result)) { + return result.Some(); + } else { + return Option.None(); + } + } + + public static V GetOrDefault(this Dictionary d, K key, V defaultValue) { + V result = default(V); + if (d.TryGetValue(key, out result)) { + return result; + } else { + return defaultValue; + } + } + + public class DefaultDictionary : Dictionary { + public readonly TValue defaultValue; + //public readonly Dictionary dictionary; + + public DefaultDictionary(TValue defaultValue, Dictionary dictionary) : base(dictionary) { + this.defaultValue = defaultValue; + //this.dictionary = dictionary; + } + + public TValue this[TKey key] { + get { + return this.GetOrDefault(key, defaultValue); + } + } + } + + public static DefaultDictionary ToDefaultDictionary(this IEnumerable e, UValue defaultValue, Func key, Func value) + => new DefaultDictionary(defaultValue, e.ToDictionary(key, value)); } \ No newline at end of file diff --git a/Utils/Immutable/Option.cs b/Utils/Immutable/Option.cs index f62d3e3..df85c80 100644 --- a/Utils/Immutable/Option.cs +++ b/Utils/Immutable/Option.cs @@ -1,7 +1,7 @@ namespace Immutable { using System; - public interface Option { + public interface Option : System.Collections.Generic.IEnumerable { U Match_(Func some, Func none); } @@ -10,18 +10,30 @@ namespace Immutable { public static Option None() => new Types.None(); private static class Types { - public class Some : Option { + public class Some : Option, System.Collections.IEnumerable { public readonly T value; public Some(T value) { this.value = value; } public U Match_(Func Some, Func None) => Some(value); + + public System.Collections.Generic.IEnumerator GetEnumerator() + => value.Singleton().GetEnumerator(); + + System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() + => this.GetEnumerator(); } - public class None : Option { + public class None : Option, System.Collections.IEnumerable { public None() { } public U Match_(Func Some, Func None) => None(); + + public System.Collections.Generic.IEnumerator GetEnumerator() + => System.Linq.Enumerable.Empty().GetEnumerator(); + + System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() + => this.GetEnumerator(); } } } @@ -34,5 +46,20 @@ namespace Immutable { public static U Match(this Option o, Func some, U none) => o.Match_(some, () => none); + + public static Option Map(this Option o, Func some) + => o.Match_(value => some(value).Some(), () => Option.None()); + + public static Option IfSome(this Option o, Func some) + => o.Map(some); + + public static T Else(this Option o, Func none) + => o.Match_(some => some, none); + + public static Option Else(this Option o, Func> none) + => o.Match_(value => value.Some(), none); + + public static T ElseThrow(this Option o, Func none) + => o.Match_(value => value, () => throw none()); } } \ No newline at end of file diff --git a/Utils/Unicode.cs b/Utils/Unicode.cs index 918a92e..abb810a 100644 --- a/Utils/Unicode.cs +++ b/Utils/Unicode.cs @@ -12,6 +12,9 @@ public struct GraphemeCluster { this.str = str; this.codePoints = codePoints; } + + public static implicit operator GraphemeCluster(char c) + => new GraphemeCluster(false, c.ToString(), c.ToString().Singleton()); } public static class UnicodeExtensionMethods {