commit aa71b3b112ee508812b5ec1328cef8a8602b58ab
Author: bsandro <email@bsandro.tech>
Date: Fri, 10 Jun 2022 23:48:52 +0300
Simple lexer
Diffstat:
A | go.mod | | | 3 | +++ |
A | lexer/lexer.go | | | 101 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | lexer/lexer_test.go | | | 71 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | token/token.go | | | 37 | +++++++++++++++++++++++++++++++++++++ |
4 files changed, 212 insertions(+), 0 deletions(-)
diff --git a/go.mod b/go.mod
@@ -0,0 +1,3 @@
+module interp
+
+go 1.17
diff --git a/lexer/lexer.go b/lexer/lexer.go
@@ -0,0 +1,101 @@
+package lexer
+
+import "interp/token"
+
+type Lexer struct {
+ input string
+ position int
+ readPosition int
+ ch byte
+}
+
+func New(input string) *Lexer {
+ l := &Lexer{input: input}
+ l.readChar()
+ return l
+}
+
+func (l *Lexer) readChar() {
+ if l.readPosition < len(l.input) {
+ l.ch = l.input[l.readPosition]
+ } else {
+ l.ch = 0
+ }
+ l.position = l.readPosition
+ l.readPosition++
+}
+
+func (l *Lexer) NextToken() token.Token {
+ var tok token.Token
+ l.skipWhitespace()
+ switch l.ch {
+ case '=':
+ tok = newToken(token.ASSIGN, l.ch)
+ case ';':
+ tok = newToken(token.SEMICOLON, l.ch)
+ case '(':
+ tok = newToken(token.LPAREN, l.ch)
+ case ')':
+ tok = newToken(token.RPAREN, l.ch)
+ case '{':
+ tok = newToken(token.LCURLY, l.ch)
+ case '}':
+ tok = newToken(token.RCURLY, l.ch)
+ case '+':
+ tok = newToken(token.PLUS, l.ch)
+ case ',':
+ tok = newToken(token.COMMA, l.ch)
+ case 0:
+ tok.Literal = ""
+ tok.Type = token.EOF
+ default:
+ if isLetter(l.ch) {
+ tok.Literal = l.readIdentifier()
+ tok.Type = token.LookupIdentifier(tok.Literal)
+ return tok
+ } else if isDigit(l.ch) {
+ tok.Literal = l.readNumber()
+ tok.Type = token.INT
+ return tok
+ } else {
+ tok = newToken(token.ILLEGAL, l.ch)
+ }
+ }
+
+ l.readChar()
+ return tok
+}
+
+func newToken(tokenType token.TokenType, ch byte) token.Token {
+ return token.Token{Type: tokenType, Literal: string(ch)}
+}
+
+func (l *Lexer) readIdentifier() string {
+ position := l.position
+ for isLetter(l.ch) {
+ l.readChar()
+ }
+ return l.input[position:l.position]
+}
+
+func (l *Lexer) readNumber() string {
+ position := l.position
+ for isDigit(l.ch) {
+ l.readChar()
+ }
+ return l.input[position:l.position]
+}
+
+func isLetter(ch byte) bool {
+ return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch == '_'
+}
+
+func isDigit(ch byte) bool {
+ return ch >= '0' && ch <= '9'
+}
+
+func (l *Lexer) skipWhitespace() {
+ for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
+ l.readChar()
+ }
+}
diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go
@@ -0,0 +1,71 @@
+package lexer
+
+import (
+ "testing"
+ "interp/token"
+)
+
+func TestNextToken(t *testing.T) {
+ input := `let five = 5;
+let ten = 10;
+let add = fn(x, y) {
+ x + y;
+};
+let result = add(five, ten);
+`
+
+ tests := []struct{
+ expectedType token.TokenType
+ expectedLiteral string
+ }{
+ {token.LET, "let"},
+ {token.IDENT, "five"},
+ {token.ASSIGN, "="},
+ {token.INT, "5"},
+ {token.SEMICOLON, ";"},
+ {token.LET, "let"},
+ {token.IDENT, "ten"},
+ {token.ASSIGN, "="},
+ {token.INT, "10"},
+ {token.SEMICOLON, ";"},
+ {token.LET, "let"},
+ {token.IDENT, "add"},
+ {token.ASSIGN, "="},
+ {token.FUNCTION, "fn"},
+ {token.LPAREN, "("},
+ {token.IDENT, "x"},
+ {token.COMMA, ","},
+ {token.IDENT, "y"},
+ {token.RPAREN, ")"},
+ {token.LCURLY, "{"},
+ {token.IDENT, "x"},
+ {token.PLUS, "+"},
+ {token.IDENT, "y"},
+ {token.SEMICOLON, ";"},
+ {token.RCURLY, "}"},
+ {token.SEMICOLON, ";"},
+ {token.LET, "let"},
+ {token.IDENT, "result"},
+ {token.ASSIGN, "="},
+ {token.IDENT, "add"},
+ {token.LPAREN, "("},
+ {token.IDENT, "five"},
+ {token.COMMA, ","},
+ {token.IDENT, "ten"},
+ {token.RPAREN, ")"},
+ {token.SEMICOLON, ";"},
+ {token.EOF, ""},
+ }
+
+ l := New(input)
+
+ for i, tt := range tests {
+ tok := l.NextToken()
+ if tok.Type != tt.expectedType {
+ t.Fatalf("tests[%d] error: token type is wrong, expected %q, got %q", i, tt.expectedType, tok.Type)
+ }
+ if tok.Literal != tt.expectedLiteral {
+ t.Fatalf("tests[%d] error: literal is wrong, expected %q, got %q", i, tt.expectedLiteral, tok.Literal)
+ }
+ }
+}
diff --git a/token/token.go b/token/token.go
@@ -0,0 +1,37 @@
+package token
+
+type TokenType string
+
+type Token struct {
+ Type TokenType
+ Literal string
+}
+
+const (
+ ILLEGAL = "ILLEGAL"
+ EOF = "EOF"
+ IDENT = "IDENT"
+ INT = "INT"
+ ASSIGN = "="
+ PLUS = "+"
+ COMMA = ","
+ SEMICOLON = ";"
+ LPAREN = "("
+ RPAREN = ")"
+ LCURLY = "{"
+ RCURLY = "}"
+ FUNCTION = "FUNCTION"
+ LET = "LET"
+)
+
+var keywords = map[string]TokenType{
+ "fn": FUNCTION,
+ "let": LET,
+}
+
+func LookupIdentifier(ident string) TokenType {
+ if tok, ok := keywords[ident]; ok {
+ return tok;
+ }
+ return IDENT
+}