rust-crafting-interpreters-.../src/scanner.rs

227 lines
6.7 KiB
Rust
Raw Normal View History

2022-12-30 18:13:52 +01:00
use crate::{tokens::TokenType, ErrorHandler};
use super::tokens::Token;
/// The scanner's state, including the source it is scanning.
pub struct Scanner {
source: String,
tokens: Vec<Token>,
start: usize,
current: usize,
2022-12-30 18:13:52 +01:00
len: usize,
line: usize,
}
impl Scanner {
/// Initialize a scanner by specifying the source code to scan.
pub fn new(source: String) -> Scanner {
2022-12-30 18:13:52 +01:00
let len = source.chars().count();
Scanner {
source,
tokens: Vec::new(),
start: 0,
current: 0,
2022-12-30 18:13:52 +01:00
len,
line: 1,
}
}
/// Scan the source code, generating the list of tokens and returning it.
/// The scanner itself is destroyed once the process is complete.
2022-12-30 18:13:52 +01:00
pub fn scan_tokens(mut self, err_hdl: &mut ErrorHandler) -> Vec<Token> {
while !self.is_at_end() {
self.start = self.current;
self.scan_token(err_hdl);
}
self.tokens
}
2022-12-30 18:13:52 +01:00
/// Read the next token from the input
fn scan_token(&mut self, err_hdl: &mut ErrorHandler) {
match self.advance() {
2022-12-30 18:28:36 +01:00
// Single-character tokens
2022-12-30 18:13:52 +01:00
'(' => self.add_token(TokenType::LeftParen),
')' => self.add_token(TokenType::RightParen),
'{' => self.add_token(TokenType::LeftBrace),
'}' => self.add_token(TokenType::RightBrace),
',' => self.add_token(TokenType::Comma),
'.' => self.add_token(TokenType::Dot),
'-' => self.add_token(TokenType::Minus),
'+' => self.add_token(TokenType::Plus),
';' => self.add_token(TokenType::Semicolon),
'*' => self.add_token(TokenType::Star),
2022-12-30 18:28:36 +01:00
// Slash is a special case as it may be a line comment
'/' => {
if self.is_match('/') {
while self.peek() != '\n' && !self.is_at_end() {
2022-12-30 19:10:14 +01:00
self.current += 1;
2022-12-30 18:28:36 +01:00
}
} else {
self.add_token(TokenType::Slash)
}
2022-12-30 18:40:56 +01:00
}
2022-12-30 18:28:36 +01:00
// Things that may be either alone or followed by '='
2022-12-30 18:20:45 +01:00
'!' => {
if self.is_match('=') {
self.add_token(TokenType::BangEqual)
} else {
self.add_token(TokenType::Bang)
}
2022-12-30 18:40:56 +01:00
}
2022-12-30 18:20:45 +01:00
'=' => {
if self.is_match('=') {
self.add_token(TokenType::EqualEqual)
} else {
self.add_token(TokenType::Equal)
}
2022-12-30 18:40:56 +01:00
}
2022-12-30 18:20:45 +01:00
'<' => {
if self.is_match('=') {
self.add_token(TokenType::LessEqual)
} else {
self.add_token(TokenType::Less)
}
2022-12-30 18:40:56 +01:00
}
2022-12-30 18:20:45 +01:00
'>' => {
if self.is_match('=') {
self.add_token(TokenType::GreaterEqual)
} else {
self.add_token(TokenType::Greater)
}
2022-12-30 18:40:56 +01:00
}
// String litterals
'"' => self.string_litteral(err_hdl),
2022-12-30 19:10:14 +01:00
// Numbers
'0'..='9' => self.number(err_hdl),
2022-12-30 18:28:36 +01:00
// Handle whitespace
' ' | '\r' | '\t' => (),
'\n' => self.line += 1,
// Anything else is an error
2022-12-30 19:10:14 +01:00
ch => {
err_hdl.error(self.line, &format!("unexpected character {:#?}", ch));
}
2022-12-30 18:40:56 +01:00
}
}
2022-12-30 19:10:14 +01:00
/// Read the rest of a string litteral
2022-12-30 18:40:56 +01:00
fn string_litteral(&mut self, err_hdl: &mut ErrorHandler) {
loop {
let p = self.peek();
if p == '"' || self.is_at_end() {
break;
}
if p == '\n' {
self.line += 1;
}
2022-12-30 19:10:14 +01:00
self.current += 1;
2022-12-30 18:40:56 +01:00
}
if self.is_at_end() {
err_hdl.error(self.line, "unterminated string");
} else {
2022-12-30 19:10:14 +01:00
self.current += 1; // Last '"'
2022-12-30 18:40:56 +01:00
let value = self.get_substring(self.start + 1, self.current - 1);
self.add_token(TokenType::String(value));
2022-12-30 18:13:52 +01:00
}
}
2022-12-30 19:10:14 +01:00
/// Read the rest of a number.
fn number(&mut self, err_hdl: &mut ErrorHandler) {
while self.peek().is_digit(10) {
self.current += 1;
}
if self.peek() == '.' && self.peek_next().is_digit(10) {
self.current += 1;
while self.peek().is_digit(10) {
self.current += 1;
}
}
let tok_string = self.get_substring(self.start, self.current);
match tok_string.parse::<f64>() {
Err(e) => {
err_hdl.error(
self.line,
&format!(
"Could not parse {} as a floating point number: {:?}",
tok_string, e
),
);
}
Ok(value) => {
self.add_token(TokenType::Number(value));
}
};
}
2022-12-30 18:21:17 +01:00
/// Check whether the end of the input has been reached.
fn is_at_end(&self) -> bool {
self.current >= self.len
}
2022-12-30 18:13:52 +01:00
/// Advance to the next character and return it.
fn advance(&mut self) -> char {
2022-12-30 18:20:45 +01:00
let ch = self.cur_char();
2022-12-30 18:13:52 +01:00
self.current += 1;
ch
}
2022-12-30 18:20:45 +01:00
/// Consume the current character if it matches the argument.
fn is_match(&mut self, expected: char) -> bool {
if self.is_at_end() {
false
} else if self.cur_char() == expected {
self.current += 1;
true
} else {
false
}
}
2022-12-30 18:28:36 +01:00
/// Returns the current character, or a NULL character if the end has been
/// reached.
fn peek(&self) -> char {
if self.is_at_end() {
'\0'
} else {
self.cur_char()
}
}
2022-12-30 19:10:14 +01:00
/// Returns the next character, or a NULL character if the end has been
/// reached.
fn peek_next(&self) -> char {
if self.current + 1 >= self.source.chars().count() {
'\0'
} else {
self.source.chars().nth(self.current + 1).unwrap()
}
}
2022-12-30 18:20:45 +01:00
/// Read the current character.
fn cur_char(&self) -> char {
self.source.chars().nth(self.current).unwrap()
}
2022-12-30 18:13:52 +01:00
/// Add a token to the output.
fn add_token(&mut self, token_type: TokenType) {
2022-12-30 18:40:56 +01:00
let lexeme = self.get_substring(self.start, self.current);
2022-12-30 18:13:52 +01:00
let token = Token {
token_type,
lexeme,
line: self.line,
};
self.tokens.push(token)
}
2022-12-30 18:40:56 +01:00
/// Get a substring from the source.
fn get_substring(&self, start: usize, end: usize) -> String {
assert!(start <= end);
self.source
.chars()
.skip(start)
.take(end - start)
.collect::<String>()
}
}