umx_compiler

UMX virtual machine "Monkey" interpreter / bytecode compiler
git clone git://bsandro.tech/umx_compiler
Log | Files | Refs | README | LICENSE

lexer.go (3016B)


      1 package lexer
      2 
      3 import "umx_compiler/token"
      4 
      5 type Lexer struct {
      6 	input        string
      7 	position     int
      8 	readPosition int
      9 	ch           byte
     10 }
     11 
     12 func New(input string) *Lexer {
     13 	l := &Lexer{input: input}
     14 	l.readChar()
     15 	return l
     16 }
     17 
     18 func (l *Lexer) readChar() {
     19 	if l.readPosition < len(l.input) {
     20 		l.ch = l.input[l.readPosition]
     21 	} else {
     22 		l.ch = 0
     23 	}
     24 	l.position = l.readPosition
     25 	l.readPosition++
     26 }
     27 
     28 func (l *Lexer) peekChar() byte {
     29 	if l.readPosition < len(l.input) {
     30 		return l.input[l.readPosition]
     31 	} else {
     32 		return 0
     33 	}
     34 }
     35 
     36 func (l *Lexer) NextToken() token.Token {
     37 	var tok token.Token
     38 	l.skipWhitespace()
     39 	switch l.ch {
     40 	case '=':
     41 		if l.peekChar() == '=' {
     42 			l.readChar()
     43 			tok = token.Token{Type: token.EQUAL, Literal: "=="}
     44 		} else {
     45 			tok = newToken(token.ASSIGN, l.ch)
     46 		}
     47 	case ';':
     48 		tok = newToken(token.SEMICOLON, l.ch)
     49 	case '(':
     50 		tok = newToken(token.LPAREN, l.ch)
     51 	case ')':
     52 		tok = newToken(token.RPAREN, l.ch)
     53 	case '{':
     54 		tok = newToken(token.LCURLY, l.ch)
     55 	case '}':
     56 		tok = newToken(token.RCURLY, l.ch)
     57 	case '[':
     58 		tok = newToken(token.LBRACKET, l.ch)
     59 	case ']':
     60 		tok = newToken(token.RBRACKET, l.ch)
     61 	case '+':
     62 		tok = newToken(token.PLUS, l.ch)
     63 	case '-':
     64 		tok = newToken(token.MINUS, l.ch)
     65 	case '!':
     66 		if l.peekChar() == '=' {
     67 			l.readChar()
     68 			tok = token.Token{Type: token.NOT_EQUAL, Literal: "!="}
     69 		} else {
     70 			tok = newToken(token.SHRIEK, l.ch)
     71 		}
     72 	case '*':
     73 		tok = newToken(token.ASTERISK, l.ch)
     74 	case '/':
     75 		tok = newToken(token.SLASH, l.ch)
     76 	case '<':
     77 		tok = newToken(token.LT, l.ch)
     78 	case '>':
     79 		tok = newToken(token.GT, l.ch)
     80 	case ',':
     81 		tok = newToken(token.COMMA, l.ch)
     82 	case '"':
     83 		tok.Type = token.STRING
     84 		tok.Literal = l.readString()
     85 	case ':':
     86 		tok = newToken(token.COLON, l.ch)
     87 	case 0:
     88 		tok.Literal = ""
     89 		tok.Type = token.EOF
     90 	default:
     91 		if isLetter(l.ch) {
     92 			tok.Literal = l.readIdentifier()
     93 			tok.Type = token.LookupIdentifier(tok.Literal)
     94 			return tok
     95 		} else if isDigit(l.ch) {
     96 			tok.Literal = l.readNumber()
     97 			tok.Type = token.INT
     98 			return tok
     99 		} else {
    100 			tok = newToken(token.ILLEGAL, l.ch)
    101 		}
    102 	}
    103 
    104 	l.readChar()
    105 	return tok
    106 }
    107 
    108 func newToken(tokenType token.TokenType, ch byte) token.Token {
    109 	return token.Token{Type: tokenType, Literal: string(ch)}
    110 }
    111 
    112 func (l *Lexer) readIdentifier() string {
    113 	position := l.position
    114 	for isLetter(l.ch) || isDigit(l.ch) || l.ch == '_' {
    115 		l.readChar()
    116 	}
    117 	return l.input[position:l.position]
    118 }
    119 
    120 func (l *Lexer) readNumber() string {
    121 	position := l.position
    122 	for isDigit(l.ch) {
    123 		l.readChar()
    124 	}
    125 	return l.input[position:l.position]
    126 }
    127 
    128 func isLetter(ch byte) bool {
    129 	return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch == '_'
    130 }
    131 
    132 func isDigit(ch byte) bool {
    133 	return ch >= '0' && ch <= '9'
    134 }
    135 
    136 func (l *Lexer) skipWhitespace() {
    137 	for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
    138 		l.readChar()
    139 	}
    140 }
    141 
    142 func (l *Lexer) readString() string {
    143 	position := l.position + 1
    144 	for {
    145 		l.readChar()
    146 		if l.ch == '"' || l.ch == 0 {
    147 			break
    148 		}
    149 	}
    150 	return l.input[position:l.position]
    151 }