umx_compiler

UMX virtual machine "Monkey" interpreter / bytecode compiler
git clone git://bsandro.tech/umx_compiler
Log | Files | Refs

commit e4e1fdf1534021f113946a65701fdba78d1be9d9
parent b2cb2a1c4d76faa74a9a398f498bc0d866628f0b
Author: bsandro <email@bsandro.tech>
Date:   Sun, 12 Jun 2022 23:13:04 +0300

Basic lexer error checking

Diffstat:
Aast/ast.go | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Mmain.go | 2+-
Aparser/parser.go | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparser/parser_test.go | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mrepl/repl.go | 2+-
5 files changed, 229 insertions(+), 2 deletions(-)

diff --git a/ast/ast.go b/ast/ast.go @@ -0,0 +1,52 @@ +package ast + +import ( + "interp/token" +) + +type Node interface { + TokenLiteral() string +} + +type Statement interface { + Node + statementNode() +} + +type Expression interface { + Node + expressionNode() +} + +type Identifier struct { + Token token.Token + Value string +} + +func (i *Identifier) expressionNode() {} +func (i *Identifier) TokenLiteral() string { + return i.Token.Literal +} + +type LetStatement struct { + Token token.Token + Name *Identifier + Value Expression +} + +func (l *LetStatement) statementNode() {} +func (l *LetStatement) TokenLiteral() string { + return l.Token.Literal +} + +type Program struct { + Statements []Statement +} + +func (p *Program) TokenLiteral() string { + if len(p.Statements) > 0 { + return p.Statements[0].TokenLiteral() + } else { + return "" + } +} diff --git a/main.go b/main.go @@ -2,9 +2,9 @@ package main import ( "fmt" + "interp/repl" "os" "os/user" - "interp/repl" ) func main() { diff --git a/parser/parser.go b/parser/parser.go @@ -0,0 +1,96 @@ +package parser + +import ( + "fmt" + "interp/ast" + "interp/lexer" + "interp/token" +) + +type Parser struct { + l *lexer.Lexer + curToken token.Token + peekToken token.Token + errors []string +} + +func New(l *lexer.Lexer) *Parser { + p := &Parser{ + l: l, + errors: []string{}, + } + p.nextToken() + p.nextToken() + return p +} + +func (p *Parser) Errors() []string { + return p.errors +} + +func (p *Parser) nextToken() { + p.curToken = p.peekToken + p.peekToken = p.l.NextToken() +} + +func (p *Parser) ParseProgram() *ast.Program { + program := &ast.Program{} + program.Statements = []ast.Statement{} + for p.curToken.Type != token.EOF { + statement := p.parseStatement() + if statement != nil { + program.Statements = append(program.Statements, statement) + } + p.nextToken() + } + return program +} + +func (p *Parser) parseStatement() ast.Statement { + switch p.curToken.Type { + case token.LET: + return p.parseLetStatement() + default: + return nil + } +} + +func (p *Parser) parseLetStatement() *ast.LetStatement { + statement := &ast.LetStatement{Token: p.curToken} + if !p.expectPeek(token.IDENT) { + return nil + } + statement.Name = &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal} + if !p.expectPeek(token.ASSIGN) { + return nil + } + + for !p.curTokenIs(token.SEMICOLON) { + p.nextToken() + } + + return statement +} + +func (p *Parser) curTokenIs(t token.TokenType) bool { + return p.curToken.Type == t +} + +func (p *Parser) peekTokenIs(t token.TokenType) bool { + return p.peekToken.Type == t +} + +func (p *Parser) expectPeek(t token.TokenType) bool { + if p.peekTokenIs(t) { + p.nextToken() + return true + } else { + p.peekError(t) + return false + } +} + +func (p *Parser) peekError(t token.TokenType) { + msg := fmt.Sprintf("token expected: %s, got: %s", t, p.peekToken.Type); + p.errors = append(p.errors, msg) +} diff --git a/parser/parser_test.go b/parser/parser_test.go @@ -0,0 +1,79 @@ +package parser + +import ( + "interp/ast" + "interp/lexer" + "testing" +) + +func TestLetStatements(t *testing.T) { + input := ` +let x = 5; +let y = 10; +let foo = 112233; +` + l := lexer.New(input) + p := New(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if program == nil { + t.Fatalf("ParseProgram() returned nil") + } + if len(program.Statements) != 3 { + t.Fatalf("program.Statements has not 3 elements but %d", len(program.Statements)) + } + + tests := []struct { + expectedIdentifier string + }{ + {"x"}, + {"y"}, + {"foo"}, + } + + for i, tt := range tests { + statement := program.Statements[i] + if !testLetStatement(t, statement, tt.expectedIdentifier) { + return + } + } +} + +func testLetStatement(t *testing.T, s ast.Statement, name string) bool { + if s.TokenLiteral() != "let" { + t.Errorf("s.TokenLiteral() is not 'let' but '%s' instead", s.TokenLiteral()) + return false + } + + letStatement, ok := s.(*ast.LetStatement) + if !ok { + t.Errorf("s is not *ast.LetStatement but %T", s) + return false + } + + if letStatement.Name.Value != name { + t.Errorf("letStatement.Name.Value is not %s but %s", name, letStatement.Name.Value) + return false + } + + if letStatement.Name.TokenLiteral() != name { + t.Errorf("letStatement.Name.TokenLiteral() is not %s but %s", name, letStatement.Name.TokenLiteral()) + return false + } + + return true +} + +func checkParserErrors(t *testing.T, p *Parser) { + errors := p.Errors() + if len(errors) == 0 { + return + } + + t.Errorf("%d parse errors:", len(errors)) + for _, msg := range errors { + t.Errorf("%s", msg) + } + t.FailNow() +} diff --git a/repl/repl.go b/repl/repl.go @@ -3,9 +3,9 @@ package repl import ( "bufio" "fmt" - "io" "interp/lexer" "interp/token" + "io" ) const PROMPT = "$> "