use std::sync::Arc; use annotate_snippets::AnnotationKind; use annotate_snippets::Level; use annotate_snippets::Renderer; use annotate_snippets::Snippet; use thiserror::Error; use winnow::LocatingSlice; use winnow::Parser; use winnow::RecoverableParser; use winnow::ascii::alpha1; use winnow::ascii::digit1; use winnow::ascii::multispace0; use winnow::ascii::multispace1; use winnow::combinator::alt; use winnow::combinator::cut_err; use winnow::combinator::dispatch; use winnow::combinator::empty; use winnow::combinator::eof; use winnow::combinator::fail; use winnow::combinator::not; use winnow::combinator::opt; use winnow::combinator::peek; use winnow::combinator::preceded; use winnow::combinator::repeat_till; use winnow::combinator::terminated; use winnow::combinator::trace; use winnow::error::AddContext; use winnow::error::FromRecoverableError; use winnow::error::ModalError; use winnow::error::ParserError; use winnow::stream::Location; use winnow::stream::Recoverable; use winnow::stream::Stream; use winnow::token::any; use winnow::token::literal; use winnow::token::one_of; use winnow::token::rest; use winnow::token::take_until; use winnow::token::take_while; use crate::SourceSpan; use crate::input::NomoInput; use crate::resume_after_cut; type Input<'input> = Recoverable, ParseError>; type PResult<'input, T> = Result; #[derive(Debug, Error)] pub struct ParseFailure { input: Arc, errors: Vec, } impl std::fmt::Display for ParseFailure { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(&self.to_report()) } } impl ParseFailure { fn from_errors(errors: Vec, input: NomoInput) -> ParseFailure { ParseFailure { input: Arc::from(input.to_string()), errors, } } pub fn to_report(&self) -> String { let reports = self .errors .iter() .map(|error| { Level::ERROR .primary_title( error .message .as_deref() .unwrap_or("An error occurred while parsing"), ) .element( Snippet::source(self.input.as_ref()).annotation( AnnotationKind::Primary .span(error.span.clone().map(|s| s.range).unwrap_or_else(|| 0..0)), ), ) .elements(error.help.as_ref().map(|help| Level::HELP.message(help))) }) .collect::>(); let renderer = Renderer::styled().decor_style(annotate_snippets::renderer::DecorStyle::Unicode); renderer.render(&reports) } } #[derive(Debug, Clone)] pub struct ParseError { pub(crate) message: Option, pub(crate) help: Option, pub(crate) span: Option, is_fatal: bool, } impl ParseError { fn ctx() -> Self { ParseError { message: None, help: None, span: None, is_fatal: false, } } fn msg(mut self, message: &str) -> Self { self.message = Some(message.to_string()); self } fn help(mut self, help: &str) -> Self { self.help = Some(help.to_string()); self } } impl ModalError for ParseError { fn cut(mut self) -> Self { self.is_fatal = true; self } fn backtrack(mut self) -> Self { self.is_fatal = false; self } } impl<'input> FromRecoverableError, ParseError> for ParseError { fn from_recoverable_error( token_start: & as winnow::stream::Stream>::Checkpoint, _err_start: & as winnow::stream::Stream>::Checkpoint, input: &Input<'input>, mut e: ParseError, ) -> Self { e.span = e .span .or_else(|| Some(span_from_checkpoint(input, token_start))); e } } impl<'input> AddContext, ParseError> for ParseError { fn add_context( mut self, _input: &Input<'input>, _token_start: & as Stream>::Checkpoint, context: ParseError, ) -> Self { self.message = context.message.or(self.message); self.help = context.help.or(self.help); self } } fn span_from_checkpoint( input: &I, token_start: &::Checkpoint, ) -> SourceSpan { let offset = input.offset_from(token_start); SourceSpan { range: (input.current_token_start() - offset)..input.current_token_start(), } } impl<'input> ParserError> for ParseError { type Inner = ParseError; fn from_input(_input: &Input<'input>) -> Self { ParseError::ctx() } fn into_inner(self) -> winnow::Result { Ok(self) } fn is_backtrack(&self) -> bool { !self.is_fatal } } #[derive(Debug)] pub struct ParsedTemplate { tokens: Vec, } impl ParsedTemplate { pub fn tokens(&self) -> &[TemplateToken] { &self.tokens } } #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum TokenKind { Content, LeftDelim, RightDelim, TrimWhitespace, WantsOutput, Ident, Whitespace, Invalid, ConditionalIf, ConditionalElse, For, In, End, Literal(TokenLiteral), Operator(TokenOperator), } #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum TokenOperator { Plus, Minus, Times, Divide, And, Or, Equal, NotEqual, Greater, GreaterOrEqual, Lesser, LesserOrEqual, } #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum TokenLiteral { Bool(bool), Integer(u64), } impl PartialEq for TemplateToken { fn eq(&self, other: &TokenKind) -> bool { self.kind == *other } } impl winnow::stream::ContainsToken<&'_ TemplateToken> for TokenKind { fn contains_token(&self, token: &'_ TemplateToken) -> bool { *self == token.kind } } impl winnow::stream::ContainsToken<&'_ TemplateToken> for &'_ [TokenKind] { fn contains_token(&self, token: &'_ TemplateToken) -> bool { self.contains(&token.kind) } } impl winnow::stream::ContainsToken<&'_ TemplateToken> for &'_ [TokenKind; LEN] { fn contains_token(&self, token: &'_ TemplateToken) -> bool { self.contains(&token.kind) } } impl winnow::stream::ContainsToken<&'_ TemplateToken> for [TokenKind; LEN] { fn contains_token(&self, token: &'_ TemplateToken) -> bool { self.contains(&token.kind) } } #[derive(Clone, PartialEq, Eq)] pub struct TemplateToken { kind: TokenKind, source: NomoInput, } impl std::fmt::Debug for TemplateToken { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "[{:?}]{:?}", self.kind(), self.source()) } } impl Location for TemplateToken { fn previous_token_end(&self) -> usize { NomoInput::get_range(&self.source).start } fn current_token_start(&self) -> usize { NomoInput::get_range(&self.source).start } } macro_rules! impl_token_kind_builders { ($($name:ident => $kind:expr),+ $(,)?) => { $( fn $name(source: NomoInput) -> Self { TemplateToken { kind: $kind, source, } } )+ }; } impl TemplateToken { impl_token_kind_builders! { content => TokenKind::Content, left_delim => TokenKind::LeftDelim, right_delim => TokenKind::RightDelim, trim_whitespace => TokenKind::TrimWhitespace, wants_output => TokenKind::WantsOutput, ident => TokenKind::Ident, whitespace => TokenKind::Whitespace, invalid => TokenKind::Invalid, conditional_if => TokenKind::ConditionalIf, conditional_else => TokenKind::ConditionalElse, keyword_for => TokenKind::For, keyword_in => TokenKind::In, end => TokenKind::End, } pub fn literal(literal: TokenLiteral, source: NomoInput) -> Self { TemplateToken { kind: TokenKind::Literal(literal), source, } } pub fn operator(operator: TokenOperator, source: NomoInput) -> Self { TemplateToken { kind: TokenKind::Operator(operator), source, } } pub fn kind(&self) -> TokenKind { self.kind } pub fn source(&self) -> NomoInput { self.source.clone() } } pub fn parse(input: NomoInput) -> Result { let (_remaining, val, errors) = parse_tokens.recoverable_parse(LocatingSlice::new(input.clone())); if errors.is_empty() && let Some(val) = val { Ok(ParsedTemplate { tokens: val }) } else { Err(ParseFailure::from_errors(errors, input)) } } fn parse_tokens<'input>(input: &mut Input<'input>) -> PResult<'input, Vec> { repeat_till(0.., alt((parse_interpolate, parse_content)), eof) .map(|(v, _): (Vec<_>, _)| v.into_iter().flatten().collect()) .parse_next(input) } fn parse_content<'input>(input: &mut Input<'input>) -> PResult<'input, Vec> { alt(( repeat_till(1.., any, peek((multispace0, "{{"))).map(|((), _)| ()), rest.void(), )) .take() .map(TemplateToken::content) .map(|v| vec![v]) .parse_next(input) } fn parse_interpolate<'input>(input: &mut Input<'input>) -> PResult<'input, Vec> { let prev_whitespace = opt(parse_whitespace).parse_next(input)?; let left_delim = "{{".map(TemplateToken::left_delim).parse_next(input)?; let left_trim = opt("-".map(TemplateToken::trim_whitespace)).parse_next(input)?; let wants_output = opt("=".map(TemplateToken::wants_output)).parse_next(input)?; let get_tokens = repeat_till(1.., parse_block_token, peek(preceded(opt("-"), "}}"))); let recover = take_until(0.., "}}").void(); let (inside_tokens, _): (Vec<_>, _) = get_tokens .resume_after(recover) .with_taken() .map(|(val, taken)| { val.unwrap_or_else(|| (vec![TemplateToken::invalid(taken)], NomoInput::from(""))) }) .parse_next(input)?; let right_trim = opt("-".map(TemplateToken::trim_whitespace)).parse_next(input)?; let right_delim = "}}".map(TemplateToken::right_delim).parse_next(input)?; let post_whitespace = opt(parse_whitespace).parse_next(input)?; let mut tokens = vec![]; tokens.extend(prev_whitespace); tokens.push(left_delim); tokens.extend(left_trim); tokens.extend(wants_output); tokens.extend(inside_tokens); tokens.extend(right_trim); tokens.push(right_delim); tokens.extend(post_whitespace); Ok(tokens) } fn parse_block_token<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> { trace( "parse_block_token", alt((parse_ident, parse_keyword, parse_whitespace, parse_operator)), ) .parse_next(input) } fn parse_literal<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> { trace( "parse_literal", alt((parse_boolean, parse_number)) .with_taken() .map(|(lit, span)| TemplateToken::literal(lit, span)), ) .parse_next(input) } fn parse_number<'input>(input: &mut Input<'input>) -> PResult<'input, TokenLiteral> { digit1 .verify_map(|digits: NomoInput| digits.parse::().ok()) .map(TokenLiteral::Integer) .parse_next(input) } fn parse_boolean<'input>(input: &mut Input<'input>) -> PResult<'input, TokenLiteral> { alt(( "true".value(TokenLiteral::Bool(true)), "false".value(TokenLiteral::Bool(false)), )) .parse_next(input) } fn parse_condition_if<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> { trace( "parse_condition_if", "if".map(TemplateToken::conditional_if), ) .parse_next(input) } fn parse_condition_else<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> { trace( "parse_condition_else", "else".map(TemplateToken::conditional_else), ) .parse_next(input) } fn parse_end<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> { trace("parse_end", "end".map(TemplateToken::end)).parse_next(input) } fn parse_for<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> { trace("parse_for", "for".map(TemplateToken::keyword_for)).parse_next(input) } fn parse_in<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> { trace("parse_in", "in".map(TemplateToken::keyword_in)).parse_next(input) } fn parse_keyword<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> { alt(( terminated(parse_literal, ident_terminator_check), terminated(parse_condition_if, ident_terminator_check), terminated(parse_condition_else, ident_terminator_check), terminated(parse_for, ident_terminator_check), terminated(parse_in, ident_terminator_check), terminated(parse_end, ident_terminator_check), )) .parse_next(input) } fn parse_whitespace<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> { trace( "parse_whitespace", multispace1.map(TemplateToken::whitespace), ) .parse_next(input) } fn parse_ident<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> { resume_after_cut( terminated( ident.map(TemplateToken::ident), cut_err(ident_terminator_check), ) .context( ParseError::ctx() .msg("Invalid variable identifier") .help("valid variable identifiers are alphanumeric"), ), bad_ident, ) .with_taken() .map(|(val, taken)| val.unwrap_or(TemplateToken::invalid(taken))) .parse_next(input) } fn parse_operator<'input>(input: &mut Input<'input>) -> PResult<'input, TemplateToken> { let (operator, source) = trace( "operator", dispatch! {any; '+' => empty.value(TokenOperator::Plus), '-' => empty.value(TokenOperator::Minus), '*' => empty.value(TokenOperator::Times), '/' => empty.value(TokenOperator::Divide), '&' => alt(( "&".value(TokenOperator::And), cut_err(fail), )), '|' => alt(( "|".value(TokenOperator::Or), cut_err(fail), )), '<' => alt(( "=".value(TokenOperator::LesserOrEqual), empty.value(TokenOperator::Lesser), )), '>' => alt(( "=".value(TokenOperator::GreaterOrEqual), empty.value(TokenOperator::Greater), )), '!' => alt(( "=".value(TokenOperator::NotEqual), cut_err(fail), )), '=' => alt(( "=".value(TokenOperator::Equal), cut_err(fail), )), _ => fail, }, ) .with_taken() .parse_next(input)?; Ok(TemplateToken::operator(operator, source)) } fn ident<'input>(input: &mut Input<'input>) -> PResult<'input, NomoInput> { peek(not(parse_keyword)) .context(ParseError::ctx().msg("Expected an ident, but found a literal instead")) .parse_next(input)?; let literal_start = alt((alpha1, "_")); ( literal_start, take_while(0.., |c: char| c.is_alphanumeric() || "_".contains(c)), ) .take() .parse_next(input) } fn bad_ident<'input>(input: &mut Input<'input>) -> PResult<'input, ()> { repeat_till(1.., any, ident_terminator_check) .map(|((), _)| ()) .parse_next(input) } fn ident_terminator_check<'input>(input: &mut Input<'input>) -> PResult<'input, ()> { peek(ident_terminator).parse_next(input) } fn ident_terminator<'input>(input: &mut Input<'input>) -> PResult<'input, ()> { alt(( eof.void(), one_of(('{', '}')).void(), one_of((' ', '\t', '\r', '\n')).void(), )) .parse_next(input) } #[cfg(test)] mod tests { use crate::parser::parse; #[test] fn parse_simple() { let input = "Hello There"; let output = parse(input.into()); insta::assert_debug_snapshot!(output, @r#" Ok( ParsedTemplate { tokens: [ [Content]"Hello There" (0..11), ], }, ) "#); } #[test] fn parse_interpolate() { let input = "Hello {{ there }}"; let output = parse(input.into()); insta::assert_debug_snapshot!(output, @r#" Ok( ParsedTemplate { tokens: [ [Content]"Hello" (0..5), [Whitespace]" " (5..6), [LeftDelim]"{{" (6..8), [Whitespace]" " (8..9), [Ident]"there" (9..14), [Whitespace]" " (14..15), [RightDelim]"}}" (15..17), ], }, ) "#); } #[test] fn parse_interpolate_bad() { let input = "Hello {{ the2re }} {{ the@re }}"; let output = parse(input.into()); insta::assert_debug_snapshot!(output, @r#" Err( ParseFailure { input: "Hello {{ the2re }} {{ the@re }}", errors: [ ParseError { message: Some( "Invalid variable identifier", ), help: Some( "valid variable identifiers are alphanumeric", ), span: Some( SourceSpan { range: 22..28, }, ), is_fatal: true, }, ], }, ) "#); let error = output.unwrap_err(); insta::assert_snapshot!(error.to_report()); } #[test] fn parse_simple_condition() { let input = "{{ if true }} Hello! {{ else }} Bye {{ end }}"; let output = parse(input.into()); insta::assert_debug_snapshot!(output, @r#" Ok( ParsedTemplate { tokens: [ [LeftDelim]"{{" (0..2), [Whitespace]" " (2..3), [ConditionalIf]"if" (3..5), [Whitespace]" " (5..6), [Literal(Bool(true))]"true" (6..10), [Whitespace]" " (10..11), [RightDelim]"}}" (11..13), [Whitespace]" " (13..14), [Content]"Hello!" (14..20), [Whitespace]" " (20..21), [LeftDelim]"{{" (21..23), [Whitespace]" " (23..24), [ConditionalElse]"else" (24..28), [Whitespace]" " (28..29), [RightDelim]"}}" (29..31), [Whitespace]" " (31..32), [Content]"Bye" (32..35), [Whitespace]" " (35..36), [LeftDelim]"{{" (36..38), [Whitespace]" " (38..39), [End]"end" (39..42), [Whitespace]" " (42..43), [RightDelim]"}}" (43..45), ], }, ) "#); } #[test] fn parse_trim_whitespace() { let input = "\n\n{{-= hello -}} \n\n"; let output = parse(input.into()); insta::assert_debug_snapshot!(output, @r#" Ok( ParsedTemplate { tokens: [ [Whitespace]"\n\n" (0..2), [LeftDelim]"{{" (2..4), [TrimWhitespace]"-" (4..5), [WantsOutput]"=" (5..6), [Whitespace]" " (6..7), [Ident]"hello" (7..12), [Whitespace]" " (12..13), [TrimWhitespace]"-" (13..14), [RightDelim]"}}" (14..16), [Whitespace]" \n\n" (16..19), ], }, ) "#); } #[test] fn parse_for_loop() { let input = "{{ for value in array }} Hi: {{= value }} {{ end }}"; let output = parse(input.into()); insta::assert_debug_snapshot!(output, @r#" Ok( ParsedTemplate { tokens: [ [LeftDelim]"{{" (0..2), [Whitespace]" " (2..3), [For]"for" (3..6), [Whitespace]" " (6..7), [Ident]"value" (7..12), [Whitespace]" " (12..13), [In]"in" (13..15), [Whitespace]" " (15..16), [Ident]"array" (16..21), [Whitespace]" " (21..22), [RightDelim]"}}" (22..24), [Whitespace]" " (24..25), [Content]"Hi:" (25..28), [Whitespace]" " (28..29), [LeftDelim]"{{" (29..31), [WantsOutput]"=" (31..32), [Whitespace]" " (32..33), [Ident]"value" (33..38), [Whitespace]" " (38..39), [RightDelim]"}}" (39..41), [Whitespace]" " (41..42), [LeftDelim]"{{" (42..44), [Whitespace]" " (44..45), [End]"end" (45..48), [Whitespace]" " (48..49), [RightDelim]"}}" (49..51), ], }, ) "#); } #[test] fn parse_operations() { let input = "{{= 5 * 14 + 3 / 2 - 1 }}{{ if foo && bar || baz && 2 != 3 || 4 > 2 || 43 <= 5 }}{{ end }}"; let output = parse(input.into()); insta::assert_debug_snapshot!(output, @r#" Ok( ParsedTemplate { tokens: [ [LeftDelim]"{{" (0..2), [WantsOutput]"=" (2..3), [Whitespace]" " (3..4), [Literal(Integer(5))]"5" (4..5), [Whitespace]" " (5..6), [Operator(Times)]"*" (6..7), [Whitespace]" " (7..8), [Literal(Integer(14))]"14" (8..10), [Whitespace]" " (10..11), [Operator(Plus)]"+" (11..12), [Whitespace]" " (12..13), [Literal(Integer(3))]"3" (13..14), [Whitespace]" " (14..15), [Operator(Divide)]"/" (15..16), [Whitespace]" " (16..17), [Literal(Integer(2))]"2" (17..18), [Whitespace]" " (18..19), [Operator(Minus)]"-" (19..20), [Whitespace]" " (20..21), [Literal(Integer(1))]"1" (21..22), [Whitespace]" " (22..23), [RightDelim]"}}" (23..25), [LeftDelim]"{{" (25..27), [Whitespace]" " (27..28), [ConditionalIf]"if" (28..30), [Whitespace]" " (30..31), [Ident]"foo" (31..34), [Whitespace]" " (34..35), [Operator(And)]"&&" (35..37), [Whitespace]" " (37..38), [Ident]"bar" (38..41), [Whitespace]" " (41..42), [Operator(Or)]"||" (42..44), [Whitespace]" " (44..45), [Ident]"baz" (45..48), [Whitespace]" " (48..49), [Operator(And)]"&&" (49..51), [Whitespace]" " (51..52), [Literal(Integer(2))]"2" (52..53), [Whitespace]" " (53..54), [Operator(NotEqual)]"!=" (54..56), [Whitespace]" " (56..57), [Literal(Integer(3))]"3" (57..58), [Whitespace]" " (58..59), [Operator(Or)]"||" (59..61), [Whitespace]" " (61..62), [Literal(Integer(4))]"4" (62..63), [Whitespace]" " (63..64), [Operator(Greater)]">" (64..65), [Whitespace]" " (65..66), [Literal(Integer(2))]"2" (66..67), [Whitespace]" " (67..68), [Operator(Or)]"||" (68..70), [Whitespace]" " (70..71), [Literal(Integer(43))]"43" (71..73), [Whitespace]" " (73..74), [Operator(LesserOrEqual)]"<=" (74..76), [Whitespace]" " (76..77), [Literal(Integer(5))]"5" (77..78), [Whitespace]" " (78..79), [RightDelim]"}}" (79..81), [LeftDelim]"{{" (81..83), [Whitespace]" " (83..84), [End]"end" (84..87), [Whitespace]" " (87..88), [RightDelim]"}}" (88..90), ], }, ) "#); } }