Add ast parsing

Signed-off-by: Marcel Müller <neikos@neikos.email>
This commit is contained in:
Marcel Müller 2026-03-06 08:25:17 +01:00
parent c5a2179b9e
commit f5050e369e
3 changed files with 467 additions and 100 deletions

267
src/ast/mod.rs Normal file
View file

@ -0,0 +1,267 @@
use winnow::Parser;
use winnow::RecoverableParser;
use winnow::combinator::alt;
use winnow::combinator::cut_err;
use winnow::combinator::delimited;
use winnow::combinator::opt;
use winnow::combinator::repeat;
use winnow::combinator::repeat_till;
use winnow::error::AddContext;
use winnow::error::FromRecoverableError;
use winnow::error::ModalError;
use winnow::error::ParserError;
use winnow::stream::Recoverable;
use winnow::stream::Stream;
use winnow::stream::TokenSlice;
use winnow::token::any;
use crate::parser::TemplateToken;
use crate::parser::TokenKind;
use crate::resume_after_cut;
#[derive(Debug, Clone)]
pub struct TemplateAst<'input> {
root: Vec<TemplateAstExpr<'input>>,
}
#[derive(Debug, Clone)]
pub struct AstError {
pub(crate) message: Option<String>,
pub(crate) help: Option<String>,
pub(crate) span: Option<crate::SourceSpan>,
is_fatal: bool,
}
impl AstError {
fn ctx() -> Self {
AstError {
message: None,
help: None,
span: None,
is_fatal: false,
}
}
fn msg(mut self, message: &str) -> Self {
self.message = Some(message.to_string());
self
}
fn help(mut self, help: &str) -> Self {
self.help = Some(help.to_string());
self
}
}
impl ModalError for AstError {
fn cut(mut self) -> Self {
self.is_fatal = true;
self
}
fn backtrack(mut self) -> Self {
self.is_fatal = false;
self
}
}
impl<'input> FromRecoverableError<Input<'input>, AstError> for AstError {
fn from_recoverable_error(
token_start: &<Input<'input> as winnow::stream::Stream>::Checkpoint,
_err_start: &<Input<'input> as winnow::stream::Stream>::Checkpoint,
input: &Input<'input>,
mut e: AstError,
) -> Self {
e
}
}
impl<'input> AddContext<Input<'input>, AstError> for AstError {
fn add_context(
mut self,
_input: &Input<'input>,
_token_start: &<Input<'input> as Stream>::Checkpoint,
context: AstError,
) -> Self {
self.message = context.message.or(self.message);
self.help = context.help.or(self.help);
self
}
}
impl<'input> ParserError<Input<'input>> for AstError {
type Inner = AstError;
fn from_input(_input: &Input<'input>) -> Self {
AstError::ctx()
}
fn into_inner(self) -> winnow::Result<Self::Inner, Self> {
Ok(self)
}
fn is_backtrack(&self) -> bool {
!self.is_fatal
}
}
#[derive(Debug)]
pub struct AstFailure {}
impl AstFailure {
fn from_errors(_errors: Vec<AstError>, _input: &[TemplateToken<'_>]) -> AstFailure {
AstFailure {}
}
}
type Input<'input> = Recoverable<TokenSlice<'input, TemplateToken<'input>>, AstError>;
impl<'i> Parser<Input<'i>, TemplateToken<'i>, AstError> for TokenKind {
fn parse_next(&mut self, input: &mut Input<'i>) -> winnow::Result<TemplateToken<'i>, AstError> {
winnow::token::literal(*self)
.parse_next(input)
.map(|t| t[0].clone())
}
}
pub fn parse<'input>(
input: &'input [TemplateToken<'input>],
) -> Result<TemplateAst<'input>, AstFailure> {
let (_remaining, val, errors) = parse_ast.recoverable_parse(TokenSlice::new(input));
if errors.is_empty()
&& let Some(val) = val
{
Ok(TemplateAst { root: val })
} else {
Err(AstFailure::from_errors(errors, input))
}
}
#[derive(Debug, Clone)]
pub enum TemplateAstExpr<'input> {
StaticContent(TemplateToken<'input>),
Interpolation {
prev_whitespace: Option<TemplateToken<'input>>,
expression: Box<TemplateAstExpr<'input>>,
post_whitespace: Option<TemplateToken<'input>>,
},
VariableAccess(TemplateToken<'input>),
Invalid(&'input [TemplateToken<'input>]),
}
fn parse_ast<'input>(input: &mut Input<'input>) -> Result<Vec<TemplateAstExpr<'input>>, AstError> {
repeat(
0..,
alt((
TokenKind::Content.map(TemplateAstExpr::StaticContent),
parse_interpolation,
)),
)
.parse_next(input)
}
fn parse_interpolation<'input>(
input: &mut Input<'input>,
) -> Result<TemplateAstExpr<'input>, AstError> {
let expr_parser = resume_after_cut(
alt((parse_variable_access,)),
repeat_till(1.., any, TokenKind::RightDelim).map(|((), _)| ()),
)
.with_taken()
.map(|(expr, taken)| expr.unwrap_or(TemplateAstExpr::Invalid(taken)));
let (prev_whitespace, _left, (expression, _right, post_whitespace)) = (
opt(TokenKind::Whitespace),
TokenKind::LeftDelim,
cut_err((
delimited(ignore_ws, expr_parser, ignore_ws).map(Box::new),
TokenKind::RightDelim,
opt(TokenKind::Whitespace),
)),
)
.parse_next(input)?;
Ok(TemplateAstExpr::Interpolation {
prev_whitespace,
expression,
post_whitespace,
})
}
fn parse_variable_access<'input>(
input: &mut Input<'input>,
) -> Result<TemplateAstExpr<'input>, AstError> {
TokenKind::Ident
.map(TemplateAstExpr::VariableAccess)
.parse_next(input)
}
fn ignore_ws<'input>(input: &mut Input<'input>) -> Result<(), AstError> {
repeat(.., TokenKind::Whitespace).parse_next(input)
}
#[cfg(test)]
mod tests {
use crate::ast::parse;
#[test]
fn check_only_content() {
let input = "Hello World";
let parsed = crate::parser::parse(input).unwrap();
let ast = parse(parsed.tokens()).unwrap();
insta::assert_debug_snapshot!(ast, @r#"
TemplateAst {
root: [
StaticContent(
TemplateToken {
kind: Content,
source: "Hello World",
},
),
],
}
"#);
}
#[test]
fn check_simple_variable_interpolation() {
let input = "Hello {{ world }}";
let parsed = crate::parser::parse(input).unwrap();
let ast = parse(parsed.tokens()).unwrap();
insta::assert_debug_snapshot!(ast, @r#"
TemplateAst {
root: [
StaticContent(
TemplateToken {
kind: Content,
source: "Hello",
},
),
Interpolation {
prev_whitespace: Some(
TemplateToken {
kind: Whitespace,
source: " ",
},
),
expression: VariableAccess(
TemplateToken {
kind: Ident,
source: "world",
},
),
post_whitespace: None,
},
],
}
"#);
}
}

View file

@ -5,8 +5,9 @@ use displaydoc::Display;
use serde::Serialize;
use thiserror::Error;
pub mod parser;
pub mod ast;
pub mod eval;
pub mod parser;
#[derive(Debug, Error, Display)]
pub enum TempleError {
@ -79,6 +80,63 @@ impl Context {
}
}
#[derive(Debug, Clone)]
pub struct SourceSpan {
pub range: std::ops::Range<usize>,
}
// This is just like the standard .resume_after(), except we only resume on Cut errors.
fn resume_after_cut<Input, Output, Error, ParseNext, ParseRecover>(
mut parser: ParseNext,
mut recover: ParseRecover,
) -> impl winnow::Parser<Input, Option<Output>, Error>
where
Input: winnow::stream::Stream + winnow::stream::Recover<Error>,
Error: winnow::error::ParserError<Input> + winnow::error::FromRecoverableError<Input, Error>,
ParseNext: winnow::Parser<Input, Output, Error>,
ParseRecover: winnow::Parser<Input, (), Error>,
{
winnow::combinator::trace("resume_after_cut", move |input: &mut Input| {
resume_after_cut_inner(&mut parser, &mut recover, input)
})
}
fn resume_after_cut_inner<P, R, I, O, E>(
parser: &mut P,
recover: &mut R,
i: &mut I,
) -> winnow::Result<Option<O>, E>
where
P: winnow::Parser<I, O, E>,
R: winnow::Parser<I, (), E>,
I: winnow::stream::Stream,
I: winnow::stream::Recover<E>,
E: winnow::error::ParserError<I> + winnow::error::FromRecoverableError<I, E>,
{
let token_start = i.checkpoint();
let mut err = match parser.parse_next(i) {
Ok(o) => {
return Ok(Some(o));
}
Err(e) if e.is_incomplete() || e.is_backtrack() => {
return Err(e);
}
Err(err) => err,
};
let err_start = i.checkpoint();
if recover.parse_next(i).is_ok() {
if let Err(err_) = i.record_err(&token_start, &err_start, err) {
err = err_;
} else {
return Ok(None);
}
}
i.reset(&err_start);
err = E::from_recoverable_error(&token_start, &err_start, i, err);
Err(err)
}
#[cfg(test)]
mod tests {
use crate::Context;

View file

@ -1,4 +1,3 @@
use std::ops::Range;
use std::sync::Arc;
use annotate_snippets::AnnotationKind;
@ -31,14 +30,12 @@ use winnow::token::rest;
use winnow::token::take_until;
use winnow::token::take_while;
use crate::SourceSpan;
use crate::resume_after_cut;
type Input<'input> = Recoverable<LocatingSlice<&'input str>, ParseError>;
type PResult<'input, T> = Result<T, ParseError>;
#[derive(Debug, Clone)]
pub struct SourceSpan {
pub range: Range<usize>,
}
#[derive(Debug)]
pub struct ParseFailure {
source: Arc<str>,
@ -85,7 +82,7 @@ impl ParseFailure {
pub struct ParseError {
pub(crate) message: Option<String>,
pub(crate) help: Option<String>,
pub(crate) span: Option<SourceSpan>,
pub(crate) span: Option<crate::SourceSpan>,
is_fatal: bool,
}
@ -190,15 +187,104 @@ impl<'input> ParsedTemplate<'input> {
}
}
#[derive(Debug, Clone)]
pub enum TemplateToken<'input> {
Content(&'input str),
LeftDelim(&'input str),
RightDelim(&'input str),
WantsOutput(&'input str),
Ident(&'input str),
Whitespace(&'input str),
Invalid(&'input str),
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum TokenKind {
Content,
LeftDelim,
RightDelim,
WantsOutput,
Ident,
Whitespace,
Invalid,
}
impl PartialEq<TokenKind> for TemplateToken<'_> {
fn eq(&self, other: &TokenKind) -> bool {
self.kind == *other
}
}
impl winnow::stream::ContainsToken<&'_ TemplateToken<'_>> for TokenKind {
fn contains_token(&self, token: &'_ TemplateToken<'_>) -> bool {
*self == token.kind
}
}
impl winnow::stream::ContainsToken<&'_ TemplateToken<'_>> for &'_ [TokenKind] {
fn contains_token(&self, token: &'_ TemplateToken<'_>) -> bool {
self.contains(&token.kind)
}
}
impl<const LEN: usize> winnow::stream::ContainsToken<&'_ TemplateToken<'_>>
for &'_ [TokenKind; LEN]
{
fn contains_token(&self, token: &'_ TemplateToken<'_>) -> bool {
self.contains(&token.kind)
}
}
impl<const LEN: usize> winnow::stream::ContainsToken<&'_ TemplateToken<'_>> for [TokenKind; LEN] {
fn contains_token(&self, token: &'_ TemplateToken<'_>) -> bool {
self.contains(&token.kind)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TemplateToken<'input> {
kind: TokenKind,
source: &'input str,
}
impl<'input> TemplateToken<'input> {
fn content(source: &'input str) -> Self {
TemplateToken {
kind: TokenKind::Content,
source,
}
}
fn left_delim(source: &'input str) -> Self {
TemplateToken {
kind: TokenKind::LeftDelim,
source,
}
}
fn right_delim(source: &'input str) -> Self {
TemplateToken {
kind: TokenKind::RightDelim,
source,
}
}
fn wants_output(source: &'input str) -> Self {
TemplateToken {
kind: TokenKind::WantsOutput,
source,
}
}
fn ident(source: &'input str) -> Self {
TemplateToken {
kind: TokenKind::Ident,
source,
}
}
fn whitespace(source: &'input str) -> Self {
TemplateToken {
kind: TokenKind::Whitespace,
source,
}
}
fn invalid(source: &'input str) -> Self {
TemplateToken {
kind: TokenKind::Invalid,
source,
}
}
}
pub fn parse(input: &str) -> Result<ParsedTemplate<'_>, ParseFailure> {
@ -225,7 +311,7 @@ fn parse_content<'input>(input: &mut Input<'input>) -> PResult<'input, Vec<Templ
rest.void(),
))
.take()
.map(TemplateToken::Content)
.map(TemplateToken::content)
.map(|v| vec![v])
.parse_next(input)
}
@ -234,7 +320,7 @@ fn parse_interpolate<'input>(
input: &mut Input<'input>,
) -> PResult<'input, Vec<TemplateToken<'input>>> {
let prev_whitespace = opt(parse_whitespace).parse_next(input)?;
let left_delim = "{{".map(TemplateToken::LeftDelim).parse_next(input)?;
let left_delim = "{{".map(TemplateToken::left_delim).parse_next(input)?;
let get_tokens = repeat_till(1.., parse_interpolate_token, peek("}}"));
let recover = take_until(0.., "}}").void();
@ -242,10 +328,10 @@ fn parse_interpolate<'input>(
let (inside_tokens, _): (Vec<_>, _) = get_tokens
.resume_after(recover)
.with_taken()
.map(|(val, taken)| val.unwrap_or_else(|| (vec![TemplateToken::Invalid(taken)], "")))
.map(|(val, taken)| val.unwrap_or_else(|| (vec![TemplateToken::invalid(taken)], "")))
.parse_next(input)?;
let right_delim = "}}".map(TemplateToken::RightDelim).parse_next(input)?;
let right_delim = "}}".map(TemplateToken::right_delim).parse_next(input)?;
let post_whitespace = opt(parse_whitespace).parse_next(input)?;
let mut tokens = vec![];
@ -271,7 +357,7 @@ fn parse_interpolate_token<'input>(
fn parse_whitespace<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken<'input>> {
trace(
"parse_whitespace",
multispace1.map(TemplateToken::Whitespace),
multispace1.map(TemplateToken::whitespace),
)
.parse_next(input)
}
@ -279,7 +365,7 @@ fn parse_whitespace<'input>(input: &mut Input<'input>) -> PResult<'input, Templa
fn parse_ident<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken<'input>> {
resume_after_cut(
terminated(
ident.map(TemplateToken::Ident),
ident.map(TemplateToken::ident),
cut_err(ident_terminator_check),
)
.context(
@ -290,7 +376,7 @@ fn parse_ident<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateTok
bad_ident,
)
.with_taken()
.map(|(val, taken)| val.unwrap_or(TemplateToken::Invalid(taken)))
.map(|(val, taken)| val.unwrap_or(TemplateToken::invalid(taken)))
.parse_next(input)
}
@ -317,58 +403,6 @@ fn ident_terminator<'input>(input: &mut Input<'input>) -> PResult<'input, ()> {
.parse_next(input)
}
// This is just like the standard .resume_after(), except we only resume on Cut errors.
fn resume_after_cut<Input, Output, Error, ParseNext, ParseRecover>(
mut parser: ParseNext,
mut recover: ParseRecover,
) -> impl Parser<Input, Option<Output>, Error>
where
Input: Stream + winnow::stream::Recover<Error>,
Error: ParserError<Input> + FromRecoverableError<Input, Error>,
ParseNext: Parser<Input, Output, Error>,
ParseRecover: Parser<Input, (), Error>,
{
trace("resume_after_cut", move |input: &mut Input| {
resume_after_cut_inner(&mut parser, &mut recover, input)
})
}
fn resume_after_cut_inner<P, R, I, O, E>(
parser: &mut P,
recover: &mut R,
i: &mut I,
) -> winnow::Result<Option<O>, E>
where
P: Parser<I, O, E>,
R: Parser<I, (), E>,
I: Stream,
I: winnow::stream::Recover<E>,
E: ParserError<I> + FromRecoverableError<I, E>,
{
let token_start = i.checkpoint();
let mut err = match parser.parse_next(i) {
Ok(o) => {
return Ok(Some(o));
}
Err(e) if e.is_incomplete() || e.is_backtrack() => {
return Err(e);
}
Err(err) => err,
};
let err_start = i.checkpoint();
if recover.parse_next(i).is_ok() {
if let Err(err_) = i.record_err(&token_start, &err_start, err) {
err = err_;
} else {
return Ok(None);
}
}
i.reset(&err_start);
err = E::from_recoverable_error(&token_start, &err_start, i, err);
Err(err)
}
#[cfg(test)]
mod tests {
use crate::parser::parse;
@ -382,9 +416,10 @@ mod tests {
Ok(
ParsedTemplate {
tokens: [
Content(
"Hello There",
),
TemplateToken {
kind: Content,
source: "Hello There",
},
],
},
)
@ -400,27 +435,34 @@ mod tests {
Ok(
ParsedTemplate {
tokens: [
Content(
"Hello",
),
Whitespace(
" ",
),
LeftDelim(
"{{",
),
Whitespace(
" ",
),
Ident(
"there",
),
Whitespace(
" ",
),
RightDelim(
"}}",
),
TemplateToken {
kind: Content,
source: "Hello",
},
TemplateToken {
kind: Whitespace,
source: " ",
},
TemplateToken {
kind: LeftDelim,
source: "{{",
},
TemplateToken {
kind: Whitespace,
source: " ",
},
TemplateToken {
kind: Ident,
source: "there",
},
TemplateToken {
kind: Whitespace,
source: " ",
},
TemplateToken {
kind: RightDelim,
source: "}}",
},
],
},
)