nomo/src/parser/mod.rs
Marcel Müller 8afc2d1bde Add parsing for conditionals (cont.)
Signed-off-by: Marcel Müller <neikos@neikos.email>
2026-03-08 15:06:29 +01:00

570 lines
15 KiB
Rust

use std::sync::Arc;
use annotate_snippets::AnnotationKind;
use annotate_snippets::Level;
use annotate_snippets::Renderer;
use annotate_snippets::Snippet;
use thiserror::Error;
use winnow::LocatingSlice;
use winnow::Parser;
use winnow::RecoverableParser;
use winnow::ascii::multispace0;
use winnow::ascii::multispace1;
use winnow::combinator::alt;
use winnow::combinator::cut_err;
use winnow::combinator::eof;
use winnow::combinator::not;
use winnow::combinator::opt;
use winnow::combinator::peek;
use winnow::combinator::repeat_till;
use winnow::combinator::terminated;
use winnow::combinator::trace;
use winnow::error::AddContext;
use winnow::error::FromRecoverableError;
use winnow::error::ModalError;
use winnow::error::ParserError;
use winnow::stream::Location;
use winnow::stream::Recoverable;
use winnow::stream::Stream;
use winnow::token::any;
use winnow::token::one_of;
use winnow::token::rest;
use winnow::token::take_until;
use winnow::token::take_while;
use crate::SourceSpan;
use crate::input::NomoInput;
use crate::resume_after_cut;
type Input<'input> = Recoverable<LocatingSlice<NomoInput>, ParseError>;
type PResult<'input, T> = Result<T, ParseError>;
#[derive(Debug, Error)]
pub struct ParseFailure {
input: Arc<str>,
errors: Vec<ParseError>,
}
impl std::fmt::Display for ParseFailure {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.to_report())
}
}
impl ParseFailure {
fn from_errors(errors: Vec<ParseError>, input: NomoInput) -> ParseFailure {
ParseFailure {
input: Arc::from(input.to_string()),
errors,
}
}
pub fn to_report(&self) -> String {
let reports = self
.errors
.iter()
.map(|error| {
Level::ERROR
.primary_title(
error
.message
.as_deref()
.unwrap_or("An error occurred while parsing"),
)
.element(
Snippet::source(self.input.as_ref()).annotation(
AnnotationKind::Primary
.span(error.span.clone().map(|s| s.range).unwrap_or_else(|| 0..0)),
),
)
.elements(error.help.as_ref().map(|help| Level::HELP.message(help)))
})
.collect::<Vec<_>>();
let renderer =
Renderer::styled().decor_style(annotate_snippets::renderer::DecorStyle::Unicode);
renderer.render(&reports)
}
}
#[derive(Debug, Clone)]
pub struct ParseError {
pub(crate) message: Option<String>,
pub(crate) help: Option<String>,
pub(crate) span: Option<crate::SourceSpan>,
is_fatal: bool,
}
impl ParseError {
fn ctx() -> Self {
ParseError {
message: None,
help: None,
span: None,
is_fatal: false,
}
}
fn msg(mut self, message: &str) -> Self {
self.message = Some(message.to_string());
self
}
fn help(mut self, help: &str) -> Self {
self.help = Some(help.to_string());
self
}
}
impl ModalError for ParseError {
fn cut(mut self) -> Self {
self.is_fatal = true;
self
}
fn backtrack(mut self) -> Self {
self.is_fatal = false;
self
}
}
impl<'input> FromRecoverableError<Input<'input>, ParseError> for ParseError {
fn from_recoverable_error(
token_start: &<Input<'input> as winnow::stream::Stream>::Checkpoint,
_err_start: &<Input<'input> as winnow::stream::Stream>::Checkpoint,
input: &Input<'input>,
mut e: ParseError,
) -> Self {
e.span = e
.span
.or_else(|| Some(span_from_checkpoint(input, token_start)));
e
}
}
impl<'input> AddContext<Input<'input>, ParseError> for ParseError {
fn add_context(
mut self,
_input: &Input<'input>,
_token_start: &<Input<'input> as Stream>::Checkpoint,
context: ParseError,
) -> Self {
self.message = context.message.or(self.message);
self.help = context.help.or(self.help);
self
}
}
fn span_from_checkpoint<I: Stream + Location>(
input: &I,
token_start: &<I as Stream>::Checkpoint,
) -> SourceSpan {
let offset = input.offset_from(token_start);
SourceSpan {
range: (input.current_token_start() - offset)..input.current_token_start(),
}
}
impl<'input> ParserError<Input<'input>> for ParseError {
type Inner = ParseError;
fn from_input(_input: &Input<'input>) -> Self {
ParseError::ctx()
}
fn into_inner(self) -> winnow::Result<Self::Inner, Self> {
Ok(self)
}
fn is_backtrack(&self) -> bool {
!self.is_fatal
}
}
#[derive(Debug)]
pub struct ParsedTemplate {
tokens: Vec<TemplateToken>,
}
impl ParsedTemplate {
pub fn tokens(&self) -> &[TemplateToken] {
&self.tokens
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum TokenKind {
Content,
LeftDelim,
RightDelim,
WantsOutput,
Ident,
Whitespace,
Invalid,
ConditionalIf,
End,
Literal(TokenLiteral),
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum TokenLiteral {
Bool(bool),
}
impl PartialEq<TokenKind> for TemplateToken {
fn eq(&self, other: &TokenKind) -> bool {
self.kind == *other
}
}
impl winnow::stream::ContainsToken<&'_ TemplateToken> for TokenKind {
fn contains_token(&self, token: &'_ TemplateToken) -> bool {
*self == token.kind
}
}
impl winnow::stream::ContainsToken<&'_ TemplateToken> for &'_ [TokenKind] {
fn contains_token(&self, token: &'_ TemplateToken) -> bool {
self.contains(&token.kind)
}
}
impl<const LEN: usize> winnow::stream::ContainsToken<&'_ TemplateToken> for &'_ [TokenKind; LEN] {
fn contains_token(&self, token: &'_ TemplateToken) -> bool {
self.contains(&token.kind)
}
}
impl<const LEN: usize> winnow::stream::ContainsToken<&'_ TemplateToken> for [TokenKind; LEN] {
fn contains_token(&self, token: &'_ TemplateToken) -> bool {
self.contains(&token.kind)
}
}
#[derive(Clone, PartialEq, Eq)]
pub struct TemplateToken {
kind: TokenKind,
source: NomoInput,
}
impl std::fmt::Debug for TemplateToken {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.source())
}
}
impl Location for TemplateToken {
fn previous_token_end(&self) -> usize {
NomoInput::get_range(&self.source).start
}
fn current_token_start(&self) -> usize {
NomoInput::get_range(&self.source).start
}
}
macro_rules! impl_token_kind_builders {
($($name:ident => $kind:expr),+ $(,)?) => {
$(
fn $name(source: NomoInput) -> Self {
TemplateToken {
kind: $kind,
source,
}
}
)+
};
}
impl TemplateToken {
impl_token_kind_builders! {
content => TokenKind::Content,
left_delim => TokenKind::LeftDelim,
right_delim => TokenKind::RightDelim,
wants_output => TokenKind::WantsOutput,
ident => TokenKind::Ident,
whitespace => TokenKind::Whitespace,
invalid => TokenKind::Invalid,
conditional_if => TokenKind::ConditionalIf,
end => TokenKind::End,
}
pub fn literal(literal: TokenLiteral, source: NomoInput) -> Self {
TemplateToken {
kind: TokenKind::Literal(literal),
source,
}
}
pub fn kind(&self) -> TokenKind {
self.kind
}
pub fn source(&self) -> NomoInput {
self.source.clone()
}
}
pub fn parse(input: NomoInput) -> Result<ParsedTemplate, ParseFailure> {
let (_remaining, val, errors) =
parse_tokens.recoverable_parse(LocatingSlice::new(input.clone()));
if errors.is_empty()
&& let Some(val) = val
{
Ok(ParsedTemplate { tokens: val })
} else {
Err(ParseFailure::from_errors(errors, input))
}
}
fn parse_tokens<'input>(input: &mut Input<'input>) -> PResult<'input, Vec<TemplateToken>> {
repeat_till(0.., alt((parse_interpolate, parse_content)), eof)
.map(|(v, _): (Vec<_>, _)| v.into_iter().flatten().collect())
.parse_next(input)
}
fn parse_content<'input>(input: &mut Input<'input>) -> PResult<'input, Vec<TemplateToken>> {
alt((
repeat_till(1.., any, peek((multispace0, "{{"))).map(|((), _)| ()),
rest.void(),
))
.take()
.map(TemplateToken::content)
.map(|v| vec![v])
.parse_next(input)
}
fn parse_interpolate<'input>(input: &mut Input<'input>) -> PResult<'input, Vec<TemplateToken>> {
let prev_whitespace = opt(parse_whitespace).parse_next(input)?;
let left_delim = "{{".map(TemplateToken::left_delim).parse_next(input)?;
let wants_output = opt("=".map(TemplateToken::wants_output)).parse_next(input)?;
let get_tokens = repeat_till(1.., parse_interpolate_token, peek("}}"));
let recover = take_until(0.., "}}").void();
let (inside_tokens, _): (Vec<_>, _) = get_tokens
.resume_after(recover)
.with_taken()
.map(|(val, taken)| {
val.unwrap_or_else(|| (vec![TemplateToken::invalid(taken)], NomoInput::from("")))
})
.parse_next(input)?;
let right_delim = "}}".map(TemplateToken::right_delim).parse_next(input)?;
let post_whitespace = opt(parse_whitespace).parse_next(input)?;
let mut tokens = vec![];
tokens.extend(prev_whitespace);
tokens.push(left_delim);
tokens.extend(wants_output);
tokens.extend(inside_tokens);
tokens.push(right_delim);
tokens.extend(post_whitespace);
Ok(tokens)
}
fn parse_interpolate_token<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> {
trace(
"parse_interpolate_token",
alt((
parse_ident,
parse_literal,
parse_condition,
parse_end,
parse_whitespace,
)),
)
.parse_next(input)
}
fn parse_literal<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> {
trace(
"parse_condition",
alt((parse_boolean,))
.with_taken()
.map(|(lit, span)| TemplateToken::literal(lit, span)),
)
.parse_next(input)
}
fn parse_boolean<'input>(input: &mut Input<'input>) -> PResult<'input, TokenLiteral> {
alt((
"true".value(TokenLiteral::Bool(true)),
"false".value(TokenLiteral::Bool(false)),
))
.parse_next(input)
}
fn parse_condition<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> {
trace("parse_condition", "if".map(TemplateToken::conditional_if)).parse_next(input)
}
fn parse_end<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> {
trace("parse_condition", "end".map(TemplateToken::end)).parse_next(input)
}
fn parse_whitespace<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> {
trace(
"parse_whitespace",
multispace1.map(TemplateToken::whitespace),
)
.parse_next(input)
}
fn parse_ident<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> {
resume_after_cut(
terminated(
ident.map(TemplateToken::ident),
cut_err(ident_terminator_check),
)
.context(
ParseError::ctx()
.msg("Invalid variable identifier")
.help("valid variable identifiers are alphanumeric"),
),
bad_ident,
)
.with_taken()
.map(|(val, taken)| val.unwrap_or(TemplateToken::invalid(taken)))
.parse_next(input)
}
fn ident<'input>(input: &mut Input<'input>) -> PResult<'input, NomoInput> {
peek(not(alt((parse_literal, parse_condition, parse_end))))
.context(ParseError::ctx().msg("Expected an ident, but found a literal instead"))
.parse_next(input)?;
take_while(1.., |c: char| c.is_alphanumeric() || "_".contains(c)).parse_next(input)
}
fn bad_ident<'input>(input: &mut Input<'input>) -> PResult<'input, ()> {
repeat_till(1.., any, ident_terminator_check)
.map(|((), _)| ())
.parse_next(input)
}
fn ident_terminator_check<'input>(input: &mut Input<'input>) -> PResult<'input, ()> {
peek(ident_terminator).parse_next(input)
}
fn ident_terminator<'input>(input: &mut Input<'input>) -> PResult<'input, ()> {
alt((
eof.void(),
one_of(('{', '}')).void(),
one_of((' ', '\t', '\r', '\n')).void(),
))
.parse_next(input)
}
#[cfg(test)]
mod tests {
use crate::parser::parse;
#[test]
fn parse_simple() {
let input = "Hello There";
let output = parse(input.into());
insta::assert_debug_snapshot!(output, @r#"
Ok(
ParsedTemplate {
tokens: [
"Hello There" (0..11),
],
},
)
"#);
}
#[test]
fn parse_interpolate() {
let input = "Hello {{ there }}";
let output = parse(input.into());
insta::assert_debug_snapshot!(output, @r#"
Ok(
ParsedTemplate {
tokens: [
"Hello" (0..5),
" " (5..6),
"{{" (6..8),
" " (8..9),
"there" (9..14),
" " (14..15),
"}}" (15..17),
],
},
)
"#);
}
#[test]
fn parse_interpolate_bad() {
let input = "Hello {{ the2re }} {{ the@re }}";
let output = parse(input.into());
insta::assert_debug_snapshot!(output, @r#"
Err(
ParseFailure {
input: "Hello {{ the2re }} {{ the@re }}",
errors: [
ParseError {
message: Some(
"Invalid variable identifier",
),
help: Some(
"valid variable identifiers are alphanumeric",
),
span: Some(
SourceSpan {
range: 22..28,
},
),
is_fatal: true,
},
],
},
)
"#);
let error = output.unwrap_err();
insta::assert_snapshot!(error.to_report());
}
#[test]
fn parse_simple_condition() {
let input = "{{ if true }} Hello! {{ end }}";
let output = parse(input.into());
insta::assert_debug_snapshot!(output, @r#"
Ok(
ParsedTemplate {
tokens: [
"{{" (0..2),
" " (2..3),
"if" (3..5),
" " (5..6),
"true" (6..10),
" " (10..11),
"}}" (11..13),
" " (13..14),
"Hello!" (14..20),
" " (20..21),
"{{" (21..23),
" " (23..24),
"end" (24..27),
" " (27..28),
"}}" (28..30),
],
},
)
"#);
}
}