rust-crafting-interpreters-.../src/parser.rs

509 lines
17 KiB
Rust
Raw Normal View History

2022-12-31 00:50:31 +01:00
use crate::{
ast,
errors::{ErrorHandler, ParserError},
tokens::{Token, TokenType},
};
2022-12-30 23:50:33 +01:00
2022-12-31 10:12:11 +01:00
/// The parser contains the input tokens and the current input position.
2022-12-30 23:50:33 +01:00
#[derive(Debug)]
pub struct Parser {
tokens: Vec<Token>,
2022-12-31 00:50:31 +01:00
current: usize,
2022-12-30 23:50:33 +01:00
}
2022-12-31 16:27:32 +01:00
/// The result of one of the parser's functions.
type ParserResult<T> = Result<T, ParserError>;
2022-12-30 23:50:33 +01:00
impl Parser {
2022-12-31 10:12:11 +01:00
/// Initialize the parser.
2022-12-30 23:50:33 +01:00
pub fn new(tokens: Vec<Token>) -> Self {
2022-12-31 00:50:31 +01:00
Self { tokens, current: 0 }
}
2022-12-31 10:12:11 +01:00
/// Parse the tokens into an AST and return it, or return nothing if a
/// parser error occurs.
pub fn parse(mut self, err_hdl: &mut ErrorHandler) -> Option<ast::ProgramNode> {
2022-12-31 16:40:59 +01:00
self.parse_program(err_hdl)
2022-12-31 00:50:31 +01:00
}
2022-12-31 12:27:51 +01:00
/// Synchronize the parser after an error.
2022-12-31 16:40:59 +01:00
fn synchronize(&mut self) {
2022-12-31 12:27:51 +01:00
self.advance();
while !self.is_at_end() {
if self.previous().token_type == TokenType::Semicolon
|| matches!(
self.peek().token_type,
TokenType::Class
| TokenType::Fun
| TokenType::If
| TokenType::Print
| TokenType::Return
| TokenType::Var
| TokenType::While
)
{
return;
}
2022-12-31 16:42:32 +01:00
self.current += 1;
2022-12-31 12:27:51 +01:00
}
}
2022-12-31 10:12:11 +01:00
/* ------------------------ *
* RECURSIVE DESCENT PARSER *
* ------------------------ */
/// Parse the following rule:
/// ```
/// program := statement*
/// ```
2022-12-31 16:40:59 +01:00
fn parse_program(&mut self, err_hdl: &mut ErrorHandler) -> Option<ast::ProgramNode> {
let mut stmts: Vec<ast::StmtNode> = Vec::new();
while !self.is_at_end() {
2022-12-31 16:40:59 +01:00
match self.parse_statement() {
Ok(node) => stmts.push(node),
Err(err) => {
err.report(err_hdl);
self.synchronize()
}
}
}
if err_hdl.had_error().is_none() {
Some(ast::ProgramNode(stmts))
} else {
None
}
}
/// Parse the following rule:
/// ```
/// statement := expression ";"
/// statement := "print" expression ";"
2022-12-31 15:39:52 +01:00
/// statement := declaration ";"
2022-12-31 17:05:58 +01:00
/// statement := block
2023-01-01 18:40:05 +01:00
/// statement := if_statement
/// statement := while_statement
/// statement := for_statement
/// ```
2022-12-31 16:27:32 +01:00
fn parse_statement(&mut self) -> ParserResult<ast::StmtNode> {
2022-12-31 15:39:52 +01:00
if self.expect(&[TokenType::Var]).is_some() {
self.parse_declaration()
2022-12-31 17:05:58 +01:00
} else if self.expect(&[TokenType::LeftBrace]).is_some() {
self.parse_block()
} else if self.expect(&[TokenType::If]).is_some() {
self.parse_if_statement()
2023-01-01 18:40:05 +01:00
} else if self.expect(&[TokenType::While]).is_some() {
self.parse_while_statement()
} else if self.expect(&[TokenType::For]).is_some() {
self.parse_for_statement()
2022-12-31 15:39:52 +01:00
} else if self.expect(&[TokenType::Print]).is_some() {
let expression = self.parse_expression()?;
2022-12-31 15:19:41 +01:00
self.consume(&TokenType::Semicolon, "expected ';' after value")?;
Ok(ast::StmtNode::Print(expression))
} else {
self.parse_expression_stmt()
}
}
/// Parse the following rule:
/// ```
/// expression_stmt := expression ";"
/// ```
fn parse_expression_stmt(&mut self) -> ParserResult<ast::StmtNode> {
let expression = self.parse_expression()?;
self.consume(&TokenType::Semicolon, "expected ';' after expression")?;
Ok(ast::StmtNode::Expression(expression))
}
2022-12-31 15:39:52 +01:00
/// Parse the following rule:
/// ```
/// declaration := "var" IDENTIFIER ";"
/// declaration := "var" IDENTIFIER "=" expression ";"
/// ```
2022-12-31 16:27:32 +01:00
fn parse_declaration(&mut self) -> ParserResult<ast::StmtNode> {
2022-12-31 15:39:52 +01:00
let name = match self.peek().token_type {
TokenType::Identifier(_) => self.advance().clone(),
_ => return Err(ParserError::new(self.peek(), "expected variable name")),
};
let initializer: Option<ast::ExprNode> = match self.expect(&[TokenType::Equal]) {
Some(_) => Some(self.parse_expression()?),
None => None,
};
self.consume(
&TokenType::Semicolon,
"expected ';' after variable declaration",
)?;
Ok(ast::StmtNode::VarDecl(name, initializer))
2022-12-31 15:39:52 +01:00
}
2022-12-31 17:05:58 +01:00
/// Parse the following rule:
/// ```
/// block := "{" statement* "}"
/// ```
fn parse_block(&mut self) -> ParserResult<ast::StmtNode> {
let mut stmts: Vec<Box<ast::StmtNode>> = Vec::new();
while !(self.check(&TokenType::RightBrace) || self.is_at_end()) {
let stmt = self.parse_statement()?;
stmts.push(Box::new(stmt));
}
self.consume(&TokenType::RightBrace, "expected '}' after block.")?;
Ok(ast::StmtNode::Block(stmts))
}
/// Parse the following rule:
/// ```
/// if_statement := "if" "(" expression ")" statement
/// if_statement := "if" "(" expression ")" statement "else" statement
/// ```
fn parse_if_statement(&mut self) -> ParserResult<ast::StmtNode> {
self.consume(&TokenType::LeftParen, "expected '(' after 'if'")?;
let expression = self.parse_expression()?;
self.consume(
&TokenType::RightParen,
"expected ')' after condition in 'if' statement",
)?;
let then_branch = Box::new(self.parse_statement()?);
let else_branch = match self.expect(&[TokenType::Else]) {
Some(_) => Some(Box::new(self.parse_statement()?)),
None => None,
};
Ok(ast::StmtNode::IfStmt {
condition: expression,
then_branch,
else_branch,
})
}
2023-01-01 18:40:05 +01:00
/// Parse the following rule:
/// ```
/// while_statement := "while" "(" expression ")" statement
2023-01-01 18:40:05 +01:00
/// ```
fn parse_while_statement(&mut self) -> ParserResult<ast::StmtNode> {
self.consume(&TokenType::LeftParen, "expected '(' after 'while'")?;
2023-01-01 18:40:05 +01:00
let condition = self.parse_expression()?;
self.consume(
&TokenType::RightParen,
"expected ')' after condition in 'while' statement",
)?;
2023-01-01 18:40:05 +01:00
let body = Box::new(self.parse_statement()?);
Ok(ast::StmtNode::WhileStmt { condition, body })
}
/// Parse the following rules:
/// ```
/// for_statement := "for" "(" for_initializer ";" expression ";" expression ")" statement
/// for_initializer := declaration
/// for_initializer := expression
/// for_initializer :=
/// ```
fn parse_for_statement(&mut self) -> ParserResult<ast::StmtNode> {
self.consume(&TokenType::LeftParen, "expected '(' after 'for'")?;
let initializer = if self.expect(&[TokenType::Semicolon]).is_some() {
None
} else if self.expect(&[TokenType::Var]).is_some() {
Some(self.parse_declaration()?)
} else {
Some(self.parse_expression_stmt()?)
};
let condition = if self.check(&TokenType::Semicolon) {
ast::ExprNode::Litteral {
value: Token {
token_type: TokenType::True,
lexeme: String::from("true"),
line: self.peek().line,
},
}
} else {
self.parse_expression()?
};
self.consume(
&TokenType::Semicolon,
"expected ';' after condition in 'for' statement",
)?;
let increment = if self.check(&TokenType::RightParen) {
None
} else {
Some(self.parse_expression()?)
};
self.consume(
&TokenType::RightParen,
"expected ')' after increment in 'for' statement",
)?;
// Generate a while loop, with an optional initializer which may be
// inside a specific block if the initializer declares a variable.
let body_stmt = self.parse_statement()?;
let body_with_incr = if let Some(incr) = increment {
ast::StmtNode::Block(vec![
Box::new(body_stmt),
Box::new(ast::StmtNode::Expression(incr)),
])
} else {
body_stmt
};
let while_stmt = ast::StmtNode::WhileStmt {
condition,
body: Box::new(body_with_incr),
};
if let Some(init_stmt) = initializer {
Ok(ast::StmtNode::Block(vec![
Box::new(init_stmt),
Box::new(while_stmt),
]))
} else {
Ok(while_stmt)
}
}
2022-12-31 10:12:11 +01:00
/// Parse the following rule:
/// ```
2022-12-31 16:27:32 +01:00
/// expression := assignment
/// ```
fn parse_expression(&mut self) -> ParserResult<ast::ExprNode> {
self.parse_assignment()
}
/// Parse the following rule:
2022-12-31 10:12:11 +01:00
/// ```
2022-12-31 16:27:32 +01:00
/// assignment := IDENTIFIER "=" equality
/// assignment := equality
/// ```
fn parse_assignment(&mut self) -> ParserResult<ast::ExprNode> {
2023-01-01 11:18:08 +01:00
let expr = self.parse_logic_or()?;
2022-12-31 16:27:32 +01:00
if let Some(equals) = self.expect(&[TokenType::Equal]) {
let value = self.parse_assignment()?;
if let ast::ExprNode::Variable { name } = expr {
Ok(ast::ExprNode::Assignment {
name,
value: Box::new(value),
})
} else {
Err(ParserError::new(&equals, "invalid assignment target"))
}
} else {
Ok(expr)
}
2022-12-31 00:50:31 +01:00
}
2023-01-01 11:18:08 +01:00
/// Parse the following rule:
/// ```
/// logic_or := logic_and ( "or" logic_and )*
/// ```
fn parse_logic_or(&mut self) -> ParserResult<ast::ExprNode> {
let mut expr = self.parse_logic_and()?;
while let Some(operator) = self.expect(&[TokenType::Or]) {
let right = self.parse_logic_and()?;
expr = ast::ExprNode::Logical {
left: Box::new(expr),
operator: operator.clone(),
right: Box::new(right),
};
}
Ok(expr)
}
/// Parse the following rule:
/// ```
/// logic_and := equality ( "and" equality )*
/// ```
fn parse_logic_and(&mut self) -> ParserResult<ast::ExprNode> {
let mut expr = self.parse_equality()?;
while let Some(operator) = self.expect(&[TokenType::And]) {
let right = self.parse_equality()?;
expr = ast::ExprNode::Logical {
left: Box::new(expr),
operator: operator.clone(),
right: Box::new(right),
};
}
Ok(expr)
}
2022-12-31 10:12:11 +01:00
/// Parse the following rule:
/// ```
/// equality := comparison "==" comparison
/// equality := comparison "!=" comparison
/// ```
2022-12-31 16:27:32 +01:00
fn parse_equality(&mut self) -> ParserResult<ast::ExprNode> {
2022-12-31 00:50:31 +01:00
let mut expr = self.parse_comparison()?;
while let Some(operator) = self.expect(&[TokenType::BangEqual, TokenType::EqualEqual]) {
let right = self.parse_comparison()?;
expr = ast::ExprNode::Binary {
left: Box::new(expr),
operator: operator.clone(),
right: Box::new(right),
};
}
Ok(expr)
}
2022-12-31 10:12:11 +01:00
/// Parse the following rule:
/// ```
/// comparison := term comparison_operator term
/// comparison_operator := "<" | "<=" | ">" | ">="
/// ```
2022-12-31 16:27:32 +01:00
fn parse_comparison(&mut self) -> ParserResult<ast::ExprNode> {
2022-12-31 00:50:31 +01:00
let mut expr = self.parse_term()?;
while let Some(operator) = self.expect(&[
TokenType::Greater,
TokenType::GreaterEqual,
TokenType::Less,
TokenType::LessEqual,
]) {
let right = self.parse_term()?;
expr = ast::ExprNode::Binary {
left: Box::new(expr),
operator: operator.clone(),
right: Box::new(right),
};
}
Ok(expr)
}
2022-12-31 10:12:11 +01:00
/// Parse the following rule:
/// ```
2023-01-01 18:13:32 +01:00
/// term := factor ( "+" factor )*
/// term := factor ( "-" factor )*
2022-12-31 10:12:11 +01:00
/// ```
2022-12-31 16:27:32 +01:00
fn parse_term(&mut self) -> ParserResult<ast::ExprNode> {
2022-12-31 00:50:31 +01:00
let mut expr = self.parse_factor()?;
while let Some(operator) = self.expect(&[TokenType::Minus, TokenType::Plus]) {
let right = self.parse_factor()?;
expr = ast::ExprNode::Binary {
left: Box::new(expr),
operator: operator.clone(),
right: Box::new(right),
};
}
Ok(expr)
}
2022-12-31 10:12:11 +01:00
/// Parse the following rule:
/// ```
2023-01-01 18:13:32 +01:00
/// factor := unary ( "*" unary )*
/// factor := unary ( "/" unary )*
2022-12-31 10:12:11 +01:00
/// ```
2022-12-31 16:27:32 +01:00
fn parse_factor(&mut self) -> ParserResult<ast::ExprNode> {
2022-12-31 00:50:31 +01:00
let mut expr = self.parse_unary()?;
while let Some(operator) = self.expect(&[TokenType::Slash, TokenType::Star]) {
let right = self.parse_unary()?;
expr = ast::ExprNode::Binary {
left: Box::new(expr),
operator: operator.clone(),
right: Box::new(right),
};
}
Ok(expr)
}
2022-12-31 10:12:11 +01:00
/// Parse the following rule:
/// ```
/// unary := "-" unary
/// unary := "!" unary
/// unary := primary
/// ```
2022-12-31 16:27:32 +01:00
fn parse_unary(&mut self) -> ParserResult<ast::ExprNode> {
2022-12-31 00:50:31 +01:00
if let Some(operator) = self.expect(&[TokenType::Bang, TokenType::Minus]) {
Ok(ast::ExprNode::Unary {
2022-12-31 10:14:58 +01:00
operator,
2022-12-31 00:50:31 +01:00
right: Box::new(self.parse_unary()?),
})
} else {
self.parse_primary()
}
}
2022-12-31 10:12:11 +01:00
/// Parse the following rule:
/// ```
/// primary := "(" expression ")"
/// primary := FALSE | TRUE | NIL | STRING | NUMBER
/// primary := IDENTIFIER
2022-12-31 10:12:11 +01:00
/// ```
2022-12-31 16:27:32 +01:00
fn parse_primary(&mut self) -> ParserResult<ast::ExprNode> {
2022-12-31 00:50:31 +01:00
if self.expect(&[TokenType::LeftParen]).is_some() {
let expr = self.parse_expression()?;
self.consume(&TokenType::RightParen, "expected ')' after expression")?;
Ok(ast::ExprNode::Grouping {
expression: Box::new(expr),
})
} else if let Some(token) =
self.expect(&[TokenType::False, TokenType::True, TokenType::Nil])
{
Ok(ast::ExprNode::Litteral { value: token })
} else {
match &self.peek().token_type {
TokenType::Number(_) | &TokenType::String(_) => Ok(ast::ExprNode::Litteral {
value: self.advance().clone(),
}),
TokenType::Identifier(_) => Ok(ast::ExprNode::Variable {
name: self.advance().clone(),
}),
2022-12-31 00:50:31 +01:00
_ => Err(ParserError::new(self.peek(), "expected expression")),
}
}
}
2022-12-31 10:12:11 +01:00
/* -------------- *
* HELPER METHODS *
* -------------- */
/// Expect a token of some types. If a matching token is found, the read
/// pointer is moved and a clone of the token is returned.
2022-12-31 00:50:31 +01:00
fn expect(&mut self, accepts: &[TokenType]) -> Option<Token> {
for tt in accepts {
if self.check(tt) {
return Some(self.advance().clone());
}
}
None
}
2022-12-31 10:12:11 +01:00
/// Consume a token of a given type. If no matching token is found, a
/// parse error is returned instead. Otherwise the read pointer is moved.
2022-12-31 16:27:32 +01:00
fn consume(&mut self, token_type: &TokenType, error: &str) -> ParserResult<&Token> {
2022-12-31 00:50:31 +01:00
if self.check(token_type) {
Ok(self.advance())
} else {
Err(ParserError::new(self.peek(), error))
}
}
2022-12-31 10:12:11 +01:00
/// Check for a token of some type. Returns `false` if the end of the input
/// has been reached. The read pointer isn't affected.
2022-12-31 00:50:31 +01:00
fn check(&self, token_type: &TokenType) -> bool {
if self.is_at_end() {
false
} else {
&self.peek().token_type == token_type
}
}
2022-12-31 10:12:11 +01:00
/// Move the read pointer forward if the end hasn't been reached. In all
/// cases, return the previous element (so either the element that was
/// current before the pointer moved, or the last, non-`EOF` token).
2022-12-31 00:50:31 +01:00
fn advance(&mut self) -> &Token {
if !self.is_at_end() {
self.current += 1
}
self.previous()
}
2022-12-31 10:12:11 +01:00
/// Check whether the end of token stream has been reached by checking
/// for the `EOF` token.
2022-12-31 00:50:31 +01:00
fn is_at_end(&self) -> bool {
2022-12-31 10:17:25 +01:00
self.peek().token_type == TokenType::Eof
2022-12-31 00:50:31 +01:00
}
2022-12-31 10:12:11 +01:00
/// Return a reference to the current token in the stream.
2022-12-31 00:50:31 +01:00
fn peek(&self) -> &Token {
&self.tokens[self.current]
}
2022-12-31 10:12:11 +01:00
/// Return a reference to the previous token in the stream.
2022-12-31 00:50:31 +01:00
fn previous(&self) -> &Token {
&self.tokens[self.current - 1]
2022-12-30 23:50:33 +01:00
}
}