From f5050e369e5d53ae5aa69d2e1bed8d5acd631297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20M=C3=BCller?= Date: Fri, 6 Mar 2026 08:25:17 +0100 Subject: [PATCH] Add ast parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marcel Müller --- src/ast/mod.rs | 267 ++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 60 ++++++++++- src/parser/mod.rs | 240 ++++++++++++++++++++++++----------------- 3 files changed, 467 insertions(+), 100 deletions(-) create mode 100644 src/ast/mod.rs diff --git a/src/ast/mod.rs b/src/ast/mod.rs new file mode 100644 index 0000000..62f86c7 --- /dev/null +++ b/src/ast/mod.rs @@ -0,0 +1,267 @@ +use winnow::Parser; +use winnow::RecoverableParser; +use winnow::combinator::alt; +use winnow::combinator::cut_err; +use winnow::combinator::delimited; +use winnow::combinator::opt; +use winnow::combinator::repeat; +use winnow::combinator::repeat_till; +use winnow::error::AddContext; +use winnow::error::FromRecoverableError; +use winnow::error::ModalError; +use winnow::error::ParserError; +use winnow::stream::Recoverable; +use winnow::stream::Stream; +use winnow::stream::TokenSlice; +use winnow::token::any; + +use crate::parser::TemplateToken; +use crate::parser::TokenKind; +use crate::resume_after_cut; + +#[derive(Debug, Clone)] +pub struct TemplateAst<'input> { + root: Vec>, +} + +#[derive(Debug, Clone)] +pub struct AstError { + pub(crate) message: Option, + pub(crate) help: Option, + pub(crate) span: Option, + + is_fatal: bool, +} + +impl AstError { + fn ctx() -> Self { + AstError { + message: None, + help: None, + span: None, + + is_fatal: false, + } + } + + fn msg(mut self, message: &str) -> Self { + self.message = Some(message.to_string()); + self + } + + fn help(mut self, help: &str) -> Self { + self.help = Some(help.to_string()); + self + } +} + +impl ModalError for AstError { + fn cut(mut self) -> Self { + self.is_fatal = true; + self + } + + fn backtrack(mut self) -> Self { + self.is_fatal = false; + self + } +} + +impl<'input> FromRecoverableError, AstError> for AstError { + fn from_recoverable_error( + token_start: & as winnow::stream::Stream>::Checkpoint, + _err_start: & as winnow::stream::Stream>::Checkpoint, + input: &Input<'input>, + mut e: AstError, + ) -> Self { + e + } +} + +impl<'input> AddContext, AstError> for AstError { + fn add_context( + mut self, + _input: &Input<'input>, + _token_start: & as Stream>::Checkpoint, + context: AstError, + ) -> Self { + self.message = context.message.or(self.message); + self.help = context.help.or(self.help); + self + } +} + +impl<'input> ParserError> for AstError { + type Inner = AstError; + + fn from_input(_input: &Input<'input>) -> Self { + AstError::ctx() + } + + fn into_inner(self) -> winnow::Result { + Ok(self) + } + + fn is_backtrack(&self) -> bool { + !self.is_fatal + } +} + +#[derive(Debug)] +pub struct AstFailure {} + +impl AstFailure { + fn from_errors(_errors: Vec, _input: &[TemplateToken<'_>]) -> AstFailure { + AstFailure {} + } +} + +type Input<'input> = Recoverable>, AstError>; + +impl<'i> Parser, TemplateToken<'i>, AstError> for TokenKind { + fn parse_next(&mut self, input: &mut Input<'i>) -> winnow::Result, AstError> { + winnow::token::literal(*self) + .parse_next(input) + .map(|t| t[0].clone()) + } +} + +pub fn parse<'input>( + input: &'input [TemplateToken<'input>], +) -> Result, AstFailure> { + let (_remaining, val, errors) = parse_ast.recoverable_parse(TokenSlice::new(input)); + + if errors.is_empty() + && let Some(val) = val + { + Ok(TemplateAst { root: val }) + } else { + Err(AstFailure::from_errors(errors, input)) + } +} + +#[derive(Debug, Clone)] +pub enum TemplateAstExpr<'input> { + StaticContent(TemplateToken<'input>), + Interpolation { + prev_whitespace: Option>, + expression: Box>, + post_whitespace: Option>, + }, + VariableAccess(TemplateToken<'input>), + Invalid(&'input [TemplateToken<'input>]), +} + +fn parse_ast<'input>(input: &mut Input<'input>) -> Result>, AstError> { + repeat( + 0.., + alt(( + TokenKind::Content.map(TemplateAstExpr::StaticContent), + parse_interpolation, + )), + ) + .parse_next(input) +} + +fn parse_interpolation<'input>( + input: &mut Input<'input>, +) -> Result, AstError> { + let expr_parser = resume_after_cut( + alt((parse_variable_access,)), + repeat_till(1.., any, TokenKind::RightDelim).map(|((), _)| ()), + ) + .with_taken() + .map(|(expr, taken)| expr.unwrap_or(TemplateAstExpr::Invalid(taken))); + let (prev_whitespace, _left, (expression, _right, post_whitespace)) = ( + opt(TokenKind::Whitespace), + TokenKind::LeftDelim, + cut_err(( + delimited(ignore_ws, expr_parser, ignore_ws).map(Box::new), + TokenKind::RightDelim, + opt(TokenKind::Whitespace), + )), + ) + .parse_next(input)?; + + Ok(TemplateAstExpr::Interpolation { + prev_whitespace, + expression, + post_whitespace, + }) +} + +fn parse_variable_access<'input>( + input: &mut Input<'input>, +) -> Result, AstError> { + TokenKind::Ident + .map(TemplateAstExpr::VariableAccess) + .parse_next(input) +} + +fn ignore_ws<'input>(input: &mut Input<'input>) -> Result<(), AstError> { + repeat(.., TokenKind::Whitespace).parse_next(input) +} + +#[cfg(test)] +mod tests { + use crate::ast::parse; + + #[test] + fn check_only_content() { + let input = "Hello World"; + + let parsed = crate::parser::parse(input).unwrap(); + + let ast = parse(parsed.tokens()).unwrap(); + + insta::assert_debug_snapshot!(ast, @r#" + TemplateAst { + root: [ + StaticContent( + TemplateToken { + kind: Content, + source: "Hello World", + }, + ), + ], + } + "#); + } + + #[test] + fn check_simple_variable_interpolation() { + let input = "Hello {{ world }}"; + + let parsed = crate::parser::parse(input).unwrap(); + + let ast = parse(parsed.tokens()).unwrap(); + + insta::assert_debug_snapshot!(ast, @r#" + TemplateAst { + root: [ + StaticContent( + TemplateToken { + kind: Content, + source: "Hello", + }, + ), + Interpolation { + prev_whitespace: Some( + TemplateToken { + kind: Whitespace, + source: " ", + }, + ), + expression: VariableAccess( + TemplateToken { + kind: Ident, + source: "world", + }, + ), + post_whitespace: None, + }, + ], + } + "#); + } +} diff --git a/src/lib.rs b/src/lib.rs index 1ea1247..41b0715 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,8 +5,9 @@ use displaydoc::Display; use serde::Serialize; use thiserror::Error; -pub mod parser; +pub mod ast; pub mod eval; +pub mod parser; #[derive(Debug, Error, Display)] pub enum TempleError { @@ -79,6 +80,63 @@ impl Context { } } +#[derive(Debug, Clone)] +pub struct SourceSpan { + pub range: std::ops::Range, +} + +// This is just like the standard .resume_after(), except we only resume on Cut errors. +fn resume_after_cut( + mut parser: ParseNext, + mut recover: ParseRecover, +) -> impl winnow::Parser, Error> +where + Input: winnow::stream::Stream + winnow::stream::Recover, + Error: winnow::error::ParserError + winnow::error::FromRecoverableError, + ParseNext: winnow::Parser, + ParseRecover: winnow::Parser, +{ + winnow::combinator::trace("resume_after_cut", move |input: &mut Input| { + resume_after_cut_inner(&mut parser, &mut recover, input) + }) +} + +fn resume_after_cut_inner( + parser: &mut P, + recover: &mut R, + i: &mut I, +) -> winnow::Result, E> +where + P: winnow::Parser, + R: winnow::Parser, + I: winnow::stream::Stream, + I: winnow::stream::Recover, + E: winnow::error::ParserError + winnow::error::FromRecoverableError, +{ + let token_start = i.checkpoint(); + let mut err = match parser.parse_next(i) { + Ok(o) => { + return Ok(Some(o)); + } + Err(e) if e.is_incomplete() || e.is_backtrack() => { + return Err(e); + } + Err(err) => err, + }; + let err_start = i.checkpoint(); + if recover.parse_next(i).is_ok() { + if let Err(err_) = i.record_err(&token_start, &err_start, err) { + err = err_; + } else { + return Ok(None); + } + } + + i.reset(&err_start); + err = E::from_recoverable_error(&token_start, &err_start, i, err); + Err(err) +} + #[cfg(test)] mod tests { use crate::Context; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8fa35c0..df24653 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,4 +1,3 @@ -use std::ops::Range; use std::sync::Arc; use annotate_snippets::AnnotationKind; @@ -31,14 +30,12 @@ use winnow::token::rest; use winnow::token::take_until; use winnow::token::take_while; +use crate::SourceSpan; +use crate::resume_after_cut; + type Input<'input> = Recoverable, ParseError>; type PResult<'input, T> = Result; -#[derive(Debug, Clone)] -pub struct SourceSpan { - pub range: Range, -} - #[derive(Debug)] pub struct ParseFailure { source: Arc, @@ -85,7 +82,7 @@ impl ParseFailure { pub struct ParseError { pub(crate) message: Option, pub(crate) help: Option, - pub(crate) span: Option, + pub(crate) span: Option, is_fatal: bool, } @@ -190,15 +187,104 @@ impl<'input> ParsedTemplate<'input> { } } -#[derive(Debug, Clone)] -pub enum TemplateToken<'input> { - Content(&'input str), - LeftDelim(&'input str), - RightDelim(&'input str), - WantsOutput(&'input str), - Ident(&'input str), - Whitespace(&'input str), - Invalid(&'input str), +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum TokenKind { + Content, + LeftDelim, + RightDelim, + WantsOutput, + Ident, + Whitespace, + Invalid, +} + +impl PartialEq for TemplateToken<'_> { + fn eq(&self, other: &TokenKind) -> bool { + self.kind == *other + } +} + +impl winnow::stream::ContainsToken<&'_ TemplateToken<'_>> for TokenKind { + fn contains_token(&self, token: &'_ TemplateToken<'_>) -> bool { + *self == token.kind + } +} + +impl winnow::stream::ContainsToken<&'_ TemplateToken<'_>> for &'_ [TokenKind] { + fn contains_token(&self, token: &'_ TemplateToken<'_>) -> bool { + self.contains(&token.kind) + } +} + +impl winnow::stream::ContainsToken<&'_ TemplateToken<'_>> + for &'_ [TokenKind; LEN] +{ + fn contains_token(&self, token: &'_ TemplateToken<'_>) -> bool { + self.contains(&token.kind) + } +} + +impl winnow::stream::ContainsToken<&'_ TemplateToken<'_>> for [TokenKind; LEN] { + fn contains_token(&self, token: &'_ TemplateToken<'_>) -> bool { + self.contains(&token.kind) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TemplateToken<'input> { + kind: TokenKind, + source: &'input str, +} + +impl<'input> TemplateToken<'input> { + fn content(source: &'input str) -> Self { + TemplateToken { + kind: TokenKind::Content, + source, + } + } + + fn left_delim(source: &'input str) -> Self { + TemplateToken { + kind: TokenKind::LeftDelim, + source, + } + } + + fn right_delim(source: &'input str) -> Self { + TemplateToken { + kind: TokenKind::RightDelim, + source, + } + } + + fn wants_output(source: &'input str) -> Self { + TemplateToken { + kind: TokenKind::WantsOutput, + source, + } + } + + fn ident(source: &'input str) -> Self { + TemplateToken { + kind: TokenKind::Ident, + source, + } + } + + fn whitespace(source: &'input str) -> Self { + TemplateToken { + kind: TokenKind::Whitespace, + source, + } + } + + fn invalid(source: &'input str) -> Self { + TemplateToken { + kind: TokenKind::Invalid, + source, + } + } } pub fn parse(input: &str) -> Result, ParseFailure> { @@ -225,7 +311,7 @@ fn parse_content<'input>(input: &mut Input<'input>) -> PResult<'input, Vec( input: &mut Input<'input>, ) -> PResult<'input, Vec>> { let prev_whitespace = opt(parse_whitespace).parse_next(input)?; - let left_delim = "{{".map(TemplateToken::LeftDelim).parse_next(input)?; + let left_delim = "{{".map(TemplateToken::left_delim).parse_next(input)?; let get_tokens = repeat_till(1.., parse_interpolate_token, peek("}}")); let recover = take_until(0.., "}}").void(); @@ -242,10 +328,10 @@ fn parse_interpolate<'input>( let (inside_tokens, _): (Vec<_>, _) = get_tokens .resume_after(recover) .with_taken() - .map(|(val, taken)| val.unwrap_or_else(|| (vec![TemplateToken::Invalid(taken)], ""))) + .map(|(val, taken)| val.unwrap_or_else(|| (vec![TemplateToken::invalid(taken)], ""))) .parse_next(input)?; - let right_delim = "}}".map(TemplateToken::RightDelim).parse_next(input)?; + let right_delim = "}}".map(TemplateToken::right_delim).parse_next(input)?; let post_whitespace = opt(parse_whitespace).parse_next(input)?; let mut tokens = vec![]; @@ -271,7 +357,7 @@ fn parse_interpolate_token<'input>( fn parse_whitespace<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken<'input>> { trace( "parse_whitespace", - multispace1.map(TemplateToken::Whitespace), + multispace1.map(TemplateToken::whitespace), ) .parse_next(input) } @@ -279,7 +365,7 @@ fn parse_whitespace<'input>(input: &mut Input<'input>) -> PResult<'input, Templa fn parse_ident<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken<'input>> { resume_after_cut( terminated( - ident.map(TemplateToken::Ident), + ident.map(TemplateToken::ident), cut_err(ident_terminator_check), ) .context( @@ -290,7 +376,7 @@ fn parse_ident<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateTok bad_ident, ) .with_taken() - .map(|(val, taken)| val.unwrap_or(TemplateToken::Invalid(taken))) + .map(|(val, taken)| val.unwrap_or(TemplateToken::invalid(taken))) .parse_next(input) } @@ -317,58 +403,6 @@ fn ident_terminator<'input>(input: &mut Input<'input>) -> PResult<'input, ()> { .parse_next(input) } -// This is just like the standard .resume_after(), except we only resume on Cut errors. -fn resume_after_cut( - mut parser: ParseNext, - mut recover: ParseRecover, -) -> impl Parser, Error> -where - Input: Stream + winnow::stream::Recover, - Error: ParserError + FromRecoverableError, - ParseNext: Parser, - ParseRecover: Parser, -{ - trace("resume_after_cut", move |input: &mut Input| { - resume_after_cut_inner(&mut parser, &mut recover, input) - }) -} - -fn resume_after_cut_inner( - parser: &mut P, - recover: &mut R, - i: &mut I, -) -> winnow::Result, E> -where - P: Parser, - R: Parser, - I: Stream, - I: winnow::stream::Recover, - E: ParserError + FromRecoverableError, -{ - let token_start = i.checkpoint(); - let mut err = match parser.parse_next(i) { - Ok(o) => { - return Ok(Some(o)); - } - Err(e) if e.is_incomplete() || e.is_backtrack() => { - return Err(e); - } - Err(err) => err, - }; - let err_start = i.checkpoint(); - if recover.parse_next(i).is_ok() { - if let Err(err_) = i.record_err(&token_start, &err_start, err) { - err = err_; - } else { - return Ok(None); - } - } - - i.reset(&err_start); - err = E::from_recoverable_error(&token_start, &err_start, i, err); - Err(err) -} - #[cfg(test)] mod tests { use crate::parser::parse; @@ -382,9 +416,10 @@ mod tests { Ok( ParsedTemplate { tokens: [ - Content( - "Hello There", - ), + TemplateToken { + kind: Content, + source: "Hello There", + }, ], }, ) @@ -400,27 +435,34 @@ mod tests { Ok( ParsedTemplate { tokens: [ - Content( - "Hello", - ), - Whitespace( - " ", - ), - LeftDelim( - "{{", - ), - Whitespace( - " ", - ), - Ident( - "there", - ), - Whitespace( - " ", - ), - RightDelim( - "}}", - ), + TemplateToken { + kind: Content, + source: "Hello", + }, + TemplateToken { + kind: Whitespace, + source: " ", + }, + TemplateToken { + kind: LeftDelim, + source: "{{", + }, + TemplateToken { + kind: Whitespace, + source: " ", + }, + TemplateToken { + kind: Ident, + source: "there", + }, + TemplateToken { + kind: Whitespace, + source: " ", + }, + TemplateToken { + kind: RightDelim, + source: "}}", + }, ], }, )