umx_compiler

UMX virtual machine "Monkey" interpreter / bytecode compiler
git clone git://bsandro.tech/umx_compiler
Log | Files | Refs

commit aa71b3b112ee508812b5ec1328cef8a8602b58ab
Author: bsandro <email@bsandro.tech>
Date:   Fri, 10 Jun 2022 23:48:52 +0300

Simple lexer

Diffstat:
Ago.mod | 3+++
Alexer/lexer.go | 101+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alexer/lexer_test.go | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atoken/token.go | 37+++++++++++++++++++++++++++++++++++++
4 files changed, 212 insertions(+), 0 deletions(-)

diff --git a/go.mod b/go.mod @@ -0,0 +1,3 @@ +module interp + +go 1.17 diff --git a/lexer/lexer.go b/lexer/lexer.go @@ -0,0 +1,101 @@ +package lexer + +import "interp/token" + +type Lexer struct { + input string + position int + readPosition int + ch byte +} + +func New(input string) *Lexer { + l := &Lexer{input: input} + l.readChar() + return l +} + +func (l *Lexer) readChar() { + if l.readPosition < len(l.input) { + l.ch = l.input[l.readPosition] + } else { + l.ch = 0 + } + l.position = l.readPosition + l.readPosition++ +} + +func (l *Lexer) NextToken() token.Token { + var tok token.Token + l.skipWhitespace() + switch l.ch { + case '=': + tok = newToken(token.ASSIGN, l.ch) + case ';': + tok = newToken(token.SEMICOLON, l.ch) + case '(': + tok = newToken(token.LPAREN, l.ch) + case ')': + tok = newToken(token.RPAREN, l.ch) + case '{': + tok = newToken(token.LCURLY, l.ch) + case '}': + tok = newToken(token.RCURLY, l.ch) + case '+': + tok = newToken(token.PLUS, l.ch) + case ',': + tok = newToken(token.COMMA, l.ch) + case 0: + tok.Literal = "" + tok.Type = token.EOF + default: + if isLetter(l.ch) { + tok.Literal = l.readIdentifier() + tok.Type = token.LookupIdentifier(tok.Literal) + return tok + } else if isDigit(l.ch) { + tok.Literal = l.readNumber() + tok.Type = token.INT + return tok + } else { + tok = newToken(token.ILLEGAL, l.ch) + } + } + + l.readChar() + return tok +} + +func newToken(tokenType token.TokenType, ch byte) token.Token { + return token.Token{Type: tokenType, Literal: string(ch)} +} + +func (l *Lexer) readIdentifier() string { + position := l.position + for isLetter(l.ch) { + l.readChar() + } + return l.input[position:l.position] +} + +func (l *Lexer) readNumber() string { + position := l.position + for isDigit(l.ch) { + l.readChar() + } + return l.input[position:l.position] +} + +func isLetter(ch byte) bool { + return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch == '_' +} + +func isDigit(ch byte) bool { + return ch >= '0' && ch <= '9' +} + +func (l *Lexer) skipWhitespace() { + for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { + l.readChar() + } +} diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go @@ -0,0 +1,71 @@ +package lexer + +import ( + "testing" + "interp/token" +) + +func TestNextToken(t *testing.T) { + input := `let five = 5; +let ten = 10; +let add = fn(x, y) { + x + y; +}; +let result = add(five, ten); +` + + tests := []struct{ + expectedType token.TokenType + expectedLiteral string + }{ + {token.LET, "let"}, + {token.IDENT, "five"}, + {token.ASSIGN, "="}, + {token.INT, "5"}, + {token.SEMICOLON, ";"}, + {token.LET, "let"}, + {token.IDENT, "ten"}, + {token.ASSIGN, "="}, + {token.INT, "10"}, + {token.SEMICOLON, ";"}, + {token.LET, "let"}, + {token.IDENT, "add"}, + {token.ASSIGN, "="}, + {token.FUNCTION, "fn"}, + {token.LPAREN, "("}, + {token.IDENT, "x"}, + {token.COMMA, ","}, + {token.IDENT, "y"}, + {token.RPAREN, ")"}, + {token.LCURLY, "{"}, + {token.IDENT, "x"}, + {token.PLUS, "+"}, + {token.IDENT, "y"}, + {token.SEMICOLON, ";"}, + {token.RCURLY, "}"}, + {token.SEMICOLON, ";"}, + {token.LET, "let"}, + {token.IDENT, "result"}, + {token.ASSIGN, "="}, + {token.IDENT, "add"}, + {token.LPAREN, "("}, + {token.IDENT, "five"}, + {token.COMMA, ","}, + {token.IDENT, "ten"}, + {token.RPAREN, ")"}, + {token.SEMICOLON, ";"}, + {token.EOF, ""}, + } + + l := New(input) + + for i, tt := range tests { + tok := l.NextToken() + if tok.Type != tt.expectedType { + t.Fatalf("tests[%d] error: token type is wrong, expected %q, got %q", i, tt.expectedType, tok.Type) + } + if tok.Literal != tt.expectedLiteral { + t.Fatalf("tests[%d] error: literal is wrong, expected %q, got %q", i, tt.expectedLiteral, tok.Literal) + } + } +} diff --git a/token/token.go b/token/token.go @@ -0,0 +1,37 @@ +package token + +type TokenType string + +type Token struct { + Type TokenType + Literal string +} + +const ( + ILLEGAL = "ILLEGAL" + EOF = "EOF" + IDENT = "IDENT" + INT = "INT" + ASSIGN = "=" + PLUS = "+" + COMMA = "," + SEMICOLON = ";" + LPAREN = "(" + RPAREN = ")" + LCURLY = "{" + RCURLY = "}" + FUNCTION = "FUNCTION" + LET = "LET" +) + +var keywords = map[string]TokenType{ + "fn": FUNCTION, + "let": LET, +} + +func LookupIdentifier(ident string) TokenType { + if tok, ok := keywords[ident]; ok { + return tok; + } + return IDENT +}