lexer.go (3016B)
1 package lexer 2 3 import "umx_compiler/token" 4 5 type Lexer struct { 6 input string 7 position int 8 readPosition int 9 ch byte 10 } 11 12 func New(input string) *Lexer { 13 l := &Lexer{input: input} 14 l.readChar() 15 return l 16 } 17 18 func (l *Lexer) readChar() { 19 if l.readPosition < len(l.input) { 20 l.ch = l.input[l.readPosition] 21 } else { 22 l.ch = 0 23 } 24 l.position = l.readPosition 25 l.readPosition++ 26 } 27 28 func (l *Lexer) peekChar() byte { 29 if l.readPosition < len(l.input) { 30 return l.input[l.readPosition] 31 } else { 32 return 0 33 } 34 } 35 36 func (l *Lexer) NextToken() token.Token { 37 var tok token.Token 38 l.skipWhitespace() 39 switch l.ch { 40 case '=': 41 if l.peekChar() == '=' { 42 l.readChar() 43 tok = token.Token{Type: token.EQUAL, Literal: "=="} 44 } else { 45 tok = newToken(token.ASSIGN, l.ch) 46 } 47 case ';': 48 tok = newToken(token.SEMICOLON, l.ch) 49 case '(': 50 tok = newToken(token.LPAREN, l.ch) 51 case ')': 52 tok = newToken(token.RPAREN, l.ch) 53 case '{': 54 tok = newToken(token.LCURLY, l.ch) 55 case '}': 56 tok = newToken(token.RCURLY, l.ch) 57 case '[': 58 tok = newToken(token.LBRACKET, l.ch) 59 case ']': 60 tok = newToken(token.RBRACKET, l.ch) 61 case '+': 62 tok = newToken(token.PLUS, l.ch) 63 case '-': 64 tok = newToken(token.MINUS, l.ch) 65 case '!': 66 if l.peekChar() == '=' { 67 l.readChar() 68 tok = token.Token{Type: token.NOT_EQUAL, Literal: "!="} 69 } else { 70 tok = newToken(token.SHRIEK, l.ch) 71 } 72 case '*': 73 tok = newToken(token.ASTERISK, l.ch) 74 case '/': 75 tok = newToken(token.SLASH, l.ch) 76 case '<': 77 tok = newToken(token.LT, l.ch) 78 case '>': 79 tok = newToken(token.GT, l.ch) 80 case ',': 81 tok = newToken(token.COMMA, l.ch) 82 case '"': 83 tok.Type = token.STRING 84 tok.Literal = l.readString() 85 case ':': 86 tok = newToken(token.COLON, l.ch) 87 case 0: 88 tok.Literal = "" 89 tok.Type = token.EOF 90 default: 91 if isLetter(l.ch) { 92 tok.Literal = l.readIdentifier() 93 tok.Type = token.LookupIdentifier(tok.Literal) 94 return tok 95 } else if isDigit(l.ch) { 96 tok.Literal = l.readNumber() 97 tok.Type = token.INT 98 return tok 99 } else { 100 tok = newToken(token.ILLEGAL, l.ch) 101 } 102 } 103 104 l.readChar() 105 return tok 106 } 107 108 func newToken(tokenType token.TokenType, ch byte) token.Token { 109 return token.Token{Type: tokenType, Literal: string(ch)} 110 } 111 112 func (l *Lexer) readIdentifier() string { 113 position := l.position 114 for isLetter(l.ch) || isDigit(l.ch) || l.ch == '_' { 115 l.readChar() 116 } 117 return l.input[position:l.position] 118 } 119 120 func (l *Lexer) readNumber() string { 121 position := l.position 122 for isDigit(l.ch) { 123 l.readChar() 124 } 125 return l.input[position:l.position] 126 } 127 128 func isLetter(ch byte) bool { 129 return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch == '_' 130 } 131 132 func isDigit(ch byte) bool { 133 return ch >= '0' && ch <= '9' 134 } 135 136 func (l *Lexer) skipWhitespace() { 137 for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { 138 l.readChar() 139 } 140 } 141 142 func (l *Lexer) readString() string { 143 position := l.position + 1 144 for { 145 l.readChar() 146 if l.ch == '"' || l.ch == 0 { 147 break 148 } 149 } 150 return l.input[position:l.position] 151 }