From 21778a745e2f0d82b72ad9e3187c218baa7965ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emmanuel=20Beno=C3=AEt?= Date: Fri, 30 Dec 2022 20:11:08 +0100 Subject: [PATCH] Scanner - Support for identifiers and keywords --- Cargo.lock | 9 ++++++++ Cargo.toml | 1 + src/scanner.rs | 60 ++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 63 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0da50d9..40f7df9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "slox" version = "0.1.0" +dependencies = [ + "lazy_static", +] diff --git a/Cargo.toml b/Cargo.toml index 48e6640..807258c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,3 +5,4 @@ version = "0.1.0" edition = "2021" [dependencies] +lazy_static = "1.4.0" diff --git a/src/scanner.rs b/src/scanner.rs index 1e9df17..e82ec26 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,6 +1,35 @@ -use crate::{tokens::TokenType, ErrorHandler}; +use std::collections::HashMap; -use super::tokens::Token; +use lazy_static::lazy_static; + +use crate::{ + tokens::{Token, TokenType}, + ErrorHandler, +}; + +lazy_static! { + /// A map of keywords to token types. + static ref KEYWORDS: HashMap<&'static str, TokenType> = { + let mut keywords = HashMap::new(); + keywords.insert("and", TokenType::And); + keywords.insert("class", TokenType::Class); + keywords.insert("else", TokenType::Else); + keywords.insert("false", TokenType::False); + keywords.insert("for", TokenType::For); + keywords.insert("fun", TokenType::Fun); + keywords.insert("if", TokenType::If); + keywords.insert("nil", TokenType::Nil); + keywords.insert("or", TokenType::Or); + keywords.insert("print", TokenType::Print); + keywords.insert("return", TokenType::Return); + keywords.insert("super", TokenType::Super); + keywords.insert("this", TokenType::This); + keywords.insert("true", TokenType::True); + keywords.insert("var", TokenType::Var); + keywords.insert("while", TokenType::While); + keywords + }; +} /// The scanner's state, including the source it is scanning. pub struct Scanner { @@ -91,15 +120,15 @@ impl Scanner { } // String litterals '"' => self.string_litteral(err_hdl), - // Numbers - '0'..='9' => self.number(err_hdl), // Handle whitespace ' ' | '\r' | '\t' => (), '\n' => self.line += 1, + // Numbers + ch if ch.is_digit(10) => self.number(err_hdl), + // Identifiers + ch if ch.is_ascii_alphabetic() => self.identifier(), // Anything else is an error - ch => { - err_hdl.error(self.line, &format!("unexpected character {:#?}", ch)); - } + ch => err_hdl.error(self.line, &format!("unexpected character {:#?}", ch)), } } @@ -154,6 +183,18 @@ impl Scanner { }; } + /// Read the rest of an identifier or keyword. + fn identifier(&mut self) { + while is_word_char(self.peek()) { + self.current += 1; + } + let word = self.get_substring(self.start, self.current); + match KEYWORDS.get(&word as &str) { + Some(tt) => self.add_token(tt.clone()), + None => self.add_token(TokenType::Identifier(word)), + } + } + /// Check whether the end of the input has been reached. fn is_at_end(&self) -> bool { self.current >= self.len @@ -224,3 +265,8 @@ impl Scanner { .collect::() } } + +/// Check whether a character is either alphanumeric or an underscore. +fn is_word_char(c: char) -> bool { + c.is_ascii_alphanumeric() || c == '_' +}