Add initial parsing

Signed-off-by: Marcel Müller <neikos@neikos.email>
This commit is contained in:
Marcel Müller 2025-06-29 12:07:07 +02:00
parent 12327bb44c
commit 87db209786
6 changed files with 244 additions and 2 deletions

7
Cargo.lock generated
View file

@ -97,6 +97,12 @@ version = "2.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
[[package]]
name = "camino"
version = "1.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0da45bc31171d8d6960122e222a67740df867c1dd53b4d51caa297084c185cab"
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.1" version = "1.0.1"
@ -193,6 +199,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
name = "hem" name = "hem"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"camino",
"clap", "clap",
"insta", "insta",
"miette", "miette",

View file

@ -5,6 +5,7 @@ edition = "2024"
description = "Hemera's Expression Manipulator, editing text on the CLI" description = "Hemera's Expression Manipulator, editing text on the CLI"
[dependencies] [dependencies]
camino = "1.1.10"
clap = { version = "4.5.40", features = ["derive"] } clap = { version = "4.5.40", features = ["derive"] }
insta = "1.43.1" insta = "1.43.1"
miette = { version = "7.6.0", features = ["fancy"] } miette = { version = "7.6.0", features = ["fancy"] }

View file

@ -100,6 +100,7 @@
rustfmt' rustfmt'
rustTarget rustTarget
pkgs.cargo-insta
inputs.cargo-changelog.packages.${system}.default inputs.cargo-changelog.packages.${system}.default
]; ];
}; };

49
src/cli.rs Normal file
View file

@ -0,0 +1,49 @@
use camino::Utf8PathBuf;
use clap::Parser;
#[derive(Debug, Parser)]
pub struct Args {
#[clap(short, long)]
pub expression: String,
#[clap(short, long, default_value_t = Utf8PathBuf::from("-"))]
input: Utf8PathBuf,
#[clap(short, long, group = "delimiters")]
lines: bool,
#[clap(short, long, group = "delimiters", default_value_t = true)]
words: bool,
}
pub enum InputDelimiter {
Lines,
Words,
}
pub enum Input {
Stdin,
FilePath(Utf8PathBuf),
}
impl Args {
pub fn delimiter(&self) -> InputDelimiter {
if self.lines {
return InputDelimiter::Lines;
}
if self.words {
return InputDelimiter::Words;
}
unreachable!("Either lines or words has to be true")
}
pub fn input(&self) -> Input {
if self.input == "-" {
Input::Stdin
} else {
Input::FilePath(self.input.clone())
}
}
}

173
src/expr.rs Normal file
View file

@ -0,0 +1,173 @@
use std::ops::Range;
use miette::LabeledSpan;
use winnow::LocatingSlice;
use winnow::ModalResult;
use winnow::Parser;
use winnow::ascii::escaped;
use winnow::ascii::space0;
use winnow::ascii::space1;
use winnow::combinator::alt;
use winnow::combinator::delimited;
use winnow::combinator::separated;
use winnow::error::ContextError;
use winnow::error::StrContext;
use winnow::token::none_of;
pub fn parse(src: &str) -> miette::Result<TokenList<'_>> {
Ok(TokenList {
expressions: parse_program
.parse(LocatingSlice::new(src))
.map_err(|parse_error| {
let labels = vec![LabeledSpan::new(
Some("Here".to_string()),
parse_error.offset(),
0,
)];
miette::diagnostic!(
labels = labels,
"Could not parse expression: {:?}",
parse_error
)
})?,
src,
})
}
type Input<'p> = LocatingSlice<&'p str>;
type Error = ContextError;
#[derive(Debug, Clone)]
pub struct TokenList<'p> {
expressions: Vec<Token<'p>>,
src: &'p str,
}
#[derive(Debug, Clone)]
pub struct Token<'p> {
span: Range<usize>,
expr: Tok<'p>,
}
#[derive(Debug, Clone)]
pub enum Tok<'p> {
Print,
PerWord,
PerLine,
Match,
FunctionApplication,
String(String),
Dummy(&'p ()),
}
fn parse_program<'p>(input: &mut Input<'p>) -> ModalResult<Vec<Token<'p>>, Error> {
delimited(space0, separated(1.., parse_expression, space1), space0)
.context(StrContext::Label("program"))
.parse_next(input)
}
fn parse_expression<'p>(input: &mut Input<'p>) -> ModalResult<Token<'p>, Error> {
alt((parse_builtin, parse_string))
.context(StrContext::Label("expression"))
.parse_next(input)
}
fn parse_builtin<'p>(input: &mut Input<'p>) -> ModalResult<Token<'p>, Error> {
alt((
"@w".value(Tok::PerWord)
.context(winnow::error::StrContext::Label("@w")),
"@l".value(Tok::PerLine)
.context(winnow::error::StrContext::Label("@l")),
"print"
.value(Tok::Print)
.context(winnow::error::StrContext::Label("print")),
"match"
.value(Tok::Match)
.context(winnow::error::StrContext::Label("match")),
"|".value(Tok::FunctionApplication)
.context(winnow::error::StrContext::Label("|")),
))
.with_span()
.map(|(expr, span)| Token { expr, span })
.parse_next(input)
}
fn parse_string<'p>(input: &mut Input<'p>) -> ModalResult<Token<'p>, Error> {
let content = escaped(
none_of(['\'', '\\']),
'\\',
alt(("\\".value("\\"), "\'".value("\'"))),
)
.map(Tok::String);
delimited('\'', content, '\'')
.with_span()
.map(|(expr, span)| Token { expr, span })
.parse_next(input)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn check_simple_print() {
let input = "@w print";
let expr = parse(input).unwrap();
insta::assert_debug_snapshot!(expr, @r#"
TokenList {
expressions: [
Token {
span: 0..2,
expr: PerWord,
},
Token {
span: 3..8,
expr: Print,
},
],
src: "@w print",
}
"#);
}
#[test]
fn check_complex_print() {
let input = "@l match 'foo' | print";
let expr = parse(input).unwrap();
insta::assert_debug_snapshot!(expr, @r#"
TokenList {
expressions: [
Token {
span: 0..2,
expr: PerLine,
},
Token {
span: 3..8,
expr: Match,
},
Token {
span: 9..14,
expr: String(
"foo",
),
},
Token {
span: 15..16,
expr: FunctionApplication,
},
Token {
span: 17..22,
expr: Print,
},
],
src: "@l match 'foo' | print",
}
"#);
}
}

View file

@ -1,3 +1,14 @@
fn main() { use clap::Parser;
println!("Hello, world!");
mod cli;
mod expr;
fn main() -> miette::Result<()> {
let args = cli::Args::parse();
let input_delim = args.delimiter();
let input = args.input();
let expression = expr::parse(&args.expression)?;
Ok(())
} }