Some notes on parsers

This commit is contained in:
Suzanne Soy 2020-08-17 02:11:47 +00:00
parent c534c2c6ab
commit c1b3bd783b
3 changed files with 34 additions and 17 deletions

View File

@ -5,4 +5,6 @@ public class Evaluator {
String: s => s.ToString()
);
}
}
}
// Note: for typeclass resolution, ask that functions have their parameters and return types annotated. This annotation is added to the values at run-time, which allows to dispatch based on the annotation rather than on the actual value.

View File

@ -2,23 +2,39 @@ using System;
using System.Text;
using System.Collections.Generic;
using System.Linq;
using System.Globalization;
using Immutable;
using S = Lexer.S;
using static Global;
public static class Parser {
public static Ast.Expr Parse(string source) {
foreach (var lexeme in Lexer.Lex(source)) {
return lexeme.state.Match(
Int: () => Ast.Expr.Int(Int32.Parse(lexeme.lexeme)),
String: () => Ast.Expr.String(lexeme.lexeme),
Space: () => throw new NotImplementedException(), // ignore
End: () => throw new NotImplementedException(),
Decimal: () => throw new NotImplementedException(),
StringOpen: () => throw new NotImplementedException(),
StringClose: () => throw new NotImplementedException()
);
}
throw new Exception("empty file, rm this when consuming the whole stream of lexemes.");
return Lexer.Lex(source)
.SelectMany(lexeme =>
lexeme.state.Match(
Int: () => Ast.Expr.Int(Int32.Parse(lexeme.lexeme)).Singleton(),
String: () => Ast.Expr.String(lexeme.lexeme).Singleton(),
Space: () => Enumerable.Empty<Ast.Expr>(), // ignore
End: () => Enumerable.Empty<Ast.Expr>(),
Decimal: () => Enumerable.Empty<Ast.Expr>(),
StringOpen: () => Enumerable.Empty<Ast.Expr>(),
StringClose: () => Enumerable.Empty<Ast.Expr>()
)
)
.Single()
.ElseThrow(() => new Exception("empty file or more than one expression in file."));
}
}
}
// Notes:
// (a, b, c) is parsed as (expr (paren (expr comma (expr a) (expr comma (expr b) (expr c))))) where expr is a run-time wrapper allowing e.g. passing an explicit environment or (useful in this case) distinguish between a tuple-value referenced by c and a paren expression. In contrast, (a, (b, c)) is parsed as (expr (paren (expr comma (expr a) (expr paren (expr comma (expr b) (expr c))))))
// (a < b <= c < d > e) is parsed similarly as the sequence of commas, allowing the comparison operators to compare their predecessor instead of the boolean output value.
// ("if" condition "then" clause) returns a boolean-like value, indicating what the original condition was. It's as simple as (operator ("if" condition "then" clause) = real_if condition real_then { clause with condition_was = true } real_else { condition_was = false }). (ifthen "else" clause) is just a binary operator.
// It is also possible to have the "else" operator taks an AST as its left operand, and inspect it to extract and rewrite the "if".
// -3 is recognized by the lexer, but -x is not allowed. Otherwise f -x is ambiguous, could be f (-x) or (f) - (x)
// relaxed unicity: the symbols must not appear in other operators of the same namespace nor as the closing bracket symbols which delimit the uses of this namespace in closed operators. Rationale: once the closing bracket is known, if the entire sub-expression doesn't include that bracket then the parser can fast-forward until the closing bracket, only caring about matching open and close symbols which may delimit sub-expressions with different namespaces, and know that whatever's inside is unambiguous.

View File

@ -130,7 +130,6 @@ public static class Collection {
public static Option<T> First<T>(this IEnumerable<T> ie, Func<T, bool> predicate) {
var e = ie.GetEnumerator();
bool found = false;
while (e.MoveNext()) {
if (predicate(e.Current)) {
return e.Current.Some();
@ -180,7 +179,7 @@ public static class Collection {
//this.dictionary = dictionary;
}
public TValue this[TKey key] {
public new TValue this[TKey key] {
get {
return this.GetOrDefault(key, defaultValue);
}