Move parsing to own module

Signed-off-by: Marcel Müller <neikos@neikos.email>
This commit is contained in:
Marcel Müller 2025-02-01 10:30:03 +01:00
parent 51012c19a7
commit 9a8441ed7d
4 changed files with 465 additions and 345 deletions

90
Cargo.lock generated
View file

@ -354,6 +354,17 @@ dependencies = [
"slab",
]
[[package]]
name = "getrandom"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "gimli"
version = "0.28.1"
@ -372,6 +383,22 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "human-panic"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80b84a66a325082740043a6c28bbea400c129eac0d3a27673a1de971e44bf1f7"
dependencies = [
"anstream",
"anstyle",
"backtrace",
"os_info",
"serde",
"serde_derive",
"toml",
"uuid",
]
[[package]]
name = "indexmap"
version = "2.7.1"
@ -683,6 +710,17 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "os_info"
version = "3.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e6520c8cc998c5741ee68ec1dc369fc47e5f0ea5320018ecf2a1ccd6328f48b"
dependencies = [
"log",
"serde",
"windows-sys 0.52.0",
]
[[package]]
name = "overload"
version = "0.1.1"
@ -743,6 +781,7 @@ dependencies = [
"camino",
"clap",
"futures",
"human-panic",
"jiff",
"kdl",
"miette",
@ -939,6 +978,15 @@ dependencies = [
"serde",
]
[[package]]
name = "serde_spanned"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1"
dependencies = [
"serde",
]
[[package]]
name = "sharded-slab"
version = "0.1.7"
@ -1183,6 +1231,39 @@ dependencies = [
"tokio",
]
[[package]]
name = "toml"
version = "0.8.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e"
dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"toml_edit",
]
[[package]]
name = "toml_datetime"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.22.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02a8b472d1a3d7c18e2d61a489aee3453fd9031c33e4f55bd533f4a7adca1bee"
dependencies = [
"indexmap",
"serde",
"serde_spanned",
"toml_datetime",
]
[[package]]
name = "tracing"
version = "0.1.41"
@ -1268,6 +1349,15 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "uuid"
version = "1.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3758f5e68192bb96cc8f9b7e2c2cfdabb435499a28499a42f8f984092adad4b"
dependencies = [
"getrandom",
]
[[package]]
name = "valuable"
version = "0.1.1"

View file

@ -9,6 +9,7 @@ license.workspace = true
camino = { version = "1.1.9", features = ["serde", "serde1"] }
clap = { version = "4.5.27", features = ["derive"] }
futures = "0.3.31"
human-panic = "2.0.2"
jiff = "0.1.28"
kdl.workspace = true
miette = { version = "7.4.0", features = ["fancy", "syntect-highlighter"] }

View file

@ -1,364 +1,47 @@
#![allow(dead_code)]
use std::collections::BTreeMap;
use std::collections::HashMap;
use camino::Utf8PathBuf;
use clap::Parser;
use futures::StreamExt;
use futures::TryStreamExt;
use jiff::fmt::temporal::DateTimeParser;
use jiff::Timestamp;
use kdl::KdlDocument;
use kdl::KdlValue;
use miette::IntoDiagnostic;
use miette::LabeledSpan;
use miette::NamedSource;
use owo_colors::OwoColorize;
use tokio_stream::wrappers::ReadDirStream;
use tracing::info;
use clap::Subcommand;
use clap::ValueHint;
use human_panic::Metadata;
mod parsing;
#[derive(Debug, Parser)]
struct Args {
#[arg(short, long)]
#[arg(short, long, value_hint(ValueHint::DirPath))]
path: Utf8PathBuf,
#[command(subcommand)]
mode: ArgMode,
}
#[derive(Debug, Subcommand)]
enum ArgMode {
Dump,
}
#[tokio::main]
async fn main() -> miette::Result<()> {
human_panic::setup_panic!(
Metadata::new(env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
.authors(env!("CARGO_PKG_AUTHORS"))
);
tracing_subscriber::fmt().pretty().init();
let args = Args::parse();
let load_records = async {
let definitions = parsing::load_definitions(args.path.join("definitions")).await?;
parsing::load_records(args.path, &definitions).await
};
let definitions = load_definitions(args.path.join("definitions")).await?;
let records = load_records(args.path, &definitions).await?;
info!(?records, "Got");
match args.mode {
ArgMode::Dump => {
let records = load_records.await?;
}
}
Ok(())
}
#[derive(Debug)]
pub struct Record {
kind: String,
at: Timestamp,
fields: BTreeMap<String, KdlValue>,
}
fn parse_timestamp(value: &str) -> miette::Result<Timestamp> {
let parser = DateTimeParser::new();
parser
.parse_timestamp(value)
.or_else(|_| {
parser
.parse_datetime(value)
.and_then(|date| date.in_tz("UTC").map(|z| z.timestamp()))
})
.or_else(|_| {
parser
.parse_date(value)
.and_then(|date| date.in_tz("UTC").map(|z| z.timestamp()))
})
.into_diagnostic()
}
fn parse_record(
bytes: &str,
definitions: &BTreeMap<String, Vec<Definition>>,
) -> miette::Result<Vec<Record>> {
let doc: KdlDocument = bytes.parse()?;
let mut recs = vec![];
for node in doc.nodes() {
let Some(def) = definitions.get(node.name().value()) else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(None, node.name().span())],
"Unknown record kind"
))?;
};
let Some(at_entry) = node.entry(0) else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(None, node.name().span())],
"Every record has to have a first argument with a datetime formatted as RFC3339."
))?;
};
let KdlValue::String(at) = at_entry.value() else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(None, at_entry.span())],
"This datetime should be a string formatted as RFC3339."
))?;
};
let Ok(at) = parse_timestamp(at) else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(None, at_entry.span())],
"This datetime should be a string formatted as RFC3339."
))?;
};
let fields = node
.iter_children()
.map(|field| {
let Some(get) = field.get(0) else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(None, at_entry.span())],
"This datetime should be a string formatted as RFC3339."
))?;
};
Ok::<_, miette::Report>((field.name().clone(), get.clone()))
})
.map(|val| match val {
Ok((name, val)) => {
let matching_def =
&def[def.partition_point(|v| v.since > at).saturating_sub(1)];
let kind = &matching_def.fields[name.value()];
if let Err(e) = kind.validate(&val) {
Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("here")),
name.span()
)],
help = e,
"This field has the wrong kind."
))?;
}
Ok((name.to_string(), val))
}
Err(err) => Err(err),
})
.collect::<Result<_, _>>()?;
recs.push(Record {
kind: node.name().to_string(),
at,
fields,
});
}
Ok(recs)
}
async fn load_records(
path: Utf8PathBuf,
definitions: &BTreeMap<String, Vec<Definition>>,
) -> miette::Result<Vec<Record>> {
let defs = ReadDirStream::new(tokio::fs::read_dir(path).await.into_diagnostic()?)
.map_err(miette::Report::from_err)
.and_then(|entry| async move {
if entry.file_type().await.into_diagnostic()?.is_file() {
Ok(Some((
Utf8PathBuf::from_path_buf(entry.path().to_path_buf()).unwrap(),
tokio::fs::read_to_string(entry.path())
.await
.into_diagnostic()?,
)))
} else {
Ok(None)
}
})
.flat_map(|val| futures::stream::iter(val.transpose()))
.and_then(|(name, bytes)| async move {
parse_record(&bytes, definitions)
.map_err(|e| e.with_source_code(NamedSource::new(name, bytes).with_language("kdl")))
})
.map(|val| val.map(|recs| futures::stream::iter(recs).map(Ok::<_, miette::Report>)))
.try_flatten()
.try_collect()
.await?;
Ok(defs)
}
#[derive(Debug)]
pub enum DefinitionKind {
String,
OneOf(Vec<String>),
}
impl DefinitionKind {
fn validate(&self, val: &KdlValue) -> Result<(), String> {
match self {
DefinitionKind::String => val
.is_string()
.then_some(())
.ok_or("Expected a string here".to_string()),
DefinitionKind::OneOf(options) => val
.as_string()
.is_some_and(|val| options.iter().any(|o| o == val))
.then_some(())
.ok_or_else(|| format!("Expected one of: {}", options.join(", "))),
}
}
}
impl TryFrom<&str> for DefinitionKind {
type Error = miette::Report;
fn try_from(value: &str) -> Result<Self, Self::Error> {
match value.to_ascii_lowercase().as_str() {
"string" => Ok(DefinitionKind::String),
other => miette::bail!("Did not recognize valid field kind: \"{other}\""),
}
}
}
#[derive(Debug)]
pub struct Definition {
since: Timestamp,
fields: HashMap<String, DefinitionKind>,
}
fn parse_definition(bytes: &str) -> miette::Result<Vec<Definition>> {
let doc: KdlDocument = bytes.parse()?;
let mut defs = vec![];
for node in doc.nodes() {
match node.name().value() {
"define" => {
let Some(since_entry) = node.entry("since") else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("this define")),
node.name().span()
)],
"Missing `since` property. Every `define` block requires one."
))?;
};
let KdlValue::String(since) = since_entry.value() else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("in this define")),
since_entry.span()
)],
"The `since` property needs to be a string in RFC3339 format."
))?;
};
let since = match parse_timestamp(since) {
Ok(since) => since,
Err(_err) => {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("in this define")),
since_entry.span()
)],
"Could not parse the `since` property as a valid RFC3339 time"
))?;
}
};
let Some(fields) = node
.iter_children()
.find(|field| field.name().value() == "fields")
else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("in this define")),
node.span()
)],
"Could not find `fields` child, which is a required child node."
))?;
};
let fields = fields
.iter_children()
.map(|field| {
let kind = if let Some(kind) = field.get("is") {
kind.as_string()
.ok_or_else(|| {
miette::Report::from(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("in this define")),
field.span()
)],
"The `is` field needs to be a string."
))
})
.and_then(DefinitionKind::try_from)?
} else {
let Some(children) = field.children() else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("in this define")),
field.span()
)],
"Either set a `is` property, or a child with the given definition"
))?;
};
if let Some(one_of) = children.get("oneOf") {
DefinitionKind::OneOf(
one_of.iter().map(|opt| opt.value().to_string()).collect(),
)
} else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("in this define")),
field.span()
)],
"Unrecognizable field definition"
))?;
}
};
Ok((field.name().to_string(), kind))
})
.collect::<miette::Result<_>>()?;
defs.push(Definition { since, fields });
}
unknown => {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("here")),
node.name().span()
)],
help = "Allowed nodes are: \"define\"",
"Unknown node \"{}\".",
unknown.red(),
))?
}
}
}
defs.sort_by_key(|d| d.since);
Ok(defs)
}
async fn load_definitions(path: Utf8PathBuf) -> miette::Result<BTreeMap<String, Vec<Definition>>> {
let defs = ReadDirStream::new(tokio::fs::read_dir(path).await.into_diagnostic()?)
.map_err(miette::Report::from_err)
.and_then(|entry| async move {
if entry.file_type().await.into_diagnostic()?.is_file() {
Ok(Some((
Utf8PathBuf::from_path_buf(entry.path().to_path_buf()).unwrap(),
tokio::fs::read_to_string(entry.path())
.await
.into_diagnostic()?,
)))
} else {
Ok(None)
}
})
.flat_map(|val| futures::stream::iter(val.transpose()))
.and_then(|(name, bytes)| async move {
Ok((
name.file_stem().unwrap().to_string(),
parse_definition(&bytes).map_err(|e| {
e.with_source_code(NamedSource::new(name, bytes).with_language("kdl"))
})?,
))
})
.try_collect()
.await?;
Ok(defs)
}

View file

@ -0,0 +1,346 @@
use std::collections::BTreeMap;
use std::collections::HashMap;
use camino::Utf8PathBuf;
use futures::StreamExt;
use futures::TryStreamExt;
use jiff::fmt::temporal::DateTimeParser;
use jiff::Timestamp;
use kdl::KdlDocument;
use kdl::KdlValue;
use miette::IntoDiagnostic;
use miette::LabeledSpan;
use miette::NamedSource;
use owo_colors::OwoColorize;
use tokio_stream::wrappers::ReadDirStream;
#[derive(Debug)]
pub struct Record {
pub(crate) kind: String,
pub(crate) at: Timestamp,
pub(crate) fields: BTreeMap<String, KdlValue>,
}
pub(crate) fn parse_timestamp(value: &str) -> miette::Result<Timestamp> {
let parser = DateTimeParser::new();
parser
.parse_timestamp(value)
.or_else(|_| {
parser
.parse_datetime(value)
.and_then(|date| date.in_tz("UTC").map(|z| z.timestamp()))
})
.or_else(|_| {
parser
.parse_date(value)
.and_then(|date| date.in_tz("UTC").map(|z| z.timestamp()))
})
.into_diagnostic()
}
pub(crate) fn parse_record(
bytes: &str,
definitions: &BTreeMap<String, Vec<Definition>>,
) -> miette::Result<Vec<Record>> {
let doc: KdlDocument = bytes.parse()?;
let mut recs = vec![];
for node in doc.nodes() {
let Some(def) = definitions.get(node.name().value()) else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(None, node.name().span())],
"Unknown record kind"
))?;
};
let Some(at_entry) = node.entry(0) else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(None, node.name().span())],
"Every record has to have a first argument with a datetime formatted as RFC3339."
))?;
};
let KdlValue::String(at) = at_entry.value() else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(None, at_entry.span())],
"This datetime should be a string formatted as RFC3339."
))?;
};
let Ok(at) = parse_timestamp(at) else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(None, at_entry.span())],
"This datetime should be a string formatted as RFC3339."
))?;
};
let fields = node
.iter_children()
.map(|field| {
let Some(get) = field.get(0) else {
return Err(miette::diagnostic!(
labels =
vec![LabeledSpan::new_primary_with_span(None, at_entry.span())],
"This datetime should be a string formatted as RFC3339."
))?;
};
Ok::<_, miette::Report>((field.name().clone(), get.clone()))
})
.map(|val| match val {
Ok((name, val)) => {
let matching_def =
&def[def.partition_point(|v| v.since > at).saturating_sub(1)];
let kind = &matching_def.fields[name.value()];
if let Err(e) = kind.validate(&val) {
Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("here")),
name.span()
)],
help = e,
"This field has the wrong kind."
))?;
}
Ok((name.to_string(), val))
}
Err(err) => Err(err),
})
.collect::<Result<_, _>>()?;
recs.push(Record {
kind: node.name().to_string(),
at,
fields,
});
}
Ok(recs)
}
pub(crate) async fn load_records(
path: Utf8PathBuf,
definitions: &BTreeMap<String, Vec<Definition>>,
) -> miette::Result<Vec<Record>> {
let defs = ReadDirStream::new(tokio::fs::read_dir(path).await.into_diagnostic()?)
.map_err(miette::Report::from_err)
.and_then(|entry| async move {
if entry.file_type().await.into_diagnostic()?.is_file() {
Ok(Some((
Utf8PathBuf::from_path_buf(entry.path().to_path_buf()).unwrap(),
tokio::fs::read_to_string(entry.path())
.await
.into_diagnostic()?,
)))
} else {
Ok(None)
}
})
.flat_map(|val| futures::stream::iter(val.transpose()))
.and_then(|(name, bytes)| async move {
parse_record(&bytes, definitions).map_err(|e| {
e.with_source_code(NamedSource::new(name, bytes).with_language("kdl"))
})
})
.map(|val| val.map(|recs| futures::stream::iter(recs).map(Ok::<_, miette::Report>)))
.try_flatten()
.try_collect()
.await?;
Ok(defs)
}
#[derive(Debug)]
pub enum DefinitionKind {
String,
OneOf(Vec<String>),
}
impl DefinitionKind {
pub(crate) fn validate(&self, val: &KdlValue) -> Result<(), String> {
match self {
DefinitionKind::String => val
.is_string()
.then_some(())
.ok_or("Expected a string here".to_string()),
DefinitionKind::OneOf(options) => val
.as_string()
.is_some_and(|val| options.iter().any(|o| o == val))
.then_some(())
.ok_or_else(|| format!("Expected one of: {}", options.join(", "))),
}
}
}
impl TryFrom<&str> for DefinitionKind {
type Error = miette::Report;
fn try_from(value: &str) -> Result<Self, Self::Error> {
match value.to_ascii_lowercase().as_str() {
"string" => Ok(DefinitionKind::String),
other => miette::bail!("Did not recognize valid field kind: \"{other}\""),
}
}
}
#[derive(Debug)]
pub struct Definition {
pub(crate) since: Timestamp,
pub(crate) fields: HashMap<String, DefinitionKind>,
}
pub(crate) fn parse_definition(bytes: &str) -> miette::Result<Vec<Definition>> {
let doc: KdlDocument = bytes.parse()?;
let mut defs = vec![];
for node in doc.nodes() {
match node.name().value() {
"define" => {
let Some(since_entry) = node.entry("since") else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("this define")),
node.name().span()
)],
"Missing `since` property. Every `define` block requires one."
))?;
};
let KdlValue::String(since) = since_entry.value() else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("in this define")),
since_entry.span()
)],
"The `since` property needs to be a string in RFC3339 format."
))?;
};
let since = match parse_timestamp(since) {
Ok(since) => since,
Err(_err) => {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("in this define")),
since_entry.span()
)],
"Could not parse the `since` property as a valid RFC3339 time"
))?;
}
};
let Some(fields) = node
.iter_children()
.find(|field| field.name().value() == "fields")
else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("in this define")),
node.span()
)],
"Could not find `fields` child, which is a required child node."
))?;
};
let fields = fields
.iter_children()
.map(|field| {
let kind = if let Some(kind) = field.get("is") {
kind.as_string()
.ok_or_else(|| {
miette::Report::from(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("in this define")),
field.span()
)],
"The `is` field needs to be a string."
))
})
.and_then(DefinitionKind::try_from)?
} else {
let Some(children) = field.children() else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("in this define")),
field.span()
)],
"Either set a `is` property, or a child with the given definition"
))?;
};
if let Some(one_of) = children.get("oneOf") {
DefinitionKind::OneOf(
one_of.iter().map(|opt| opt.value().to_string()).collect(),
)
} else {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("in this define")),
field.span()
)],
"Unrecognizable field definition"
))?;
}
};
Ok((field.name().to_string(), kind))
})
.collect::<miette::Result<_>>()?;
defs.push(Definition { since, fields });
}
unknown => {
return Err(miette::diagnostic!(
labels = vec![LabeledSpan::new_primary_with_span(
Some(String::from("here")),
node.name().span()
)],
help = "Allowed nodes are: \"define\"",
"Unknown node \"{}\".",
unknown.red(),
))?
}
}
}
defs.sort_by_key(|d| d.since);
Ok(defs)
}
pub(crate) async fn load_definitions(
path: Utf8PathBuf,
) -> miette::Result<BTreeMap<String, Vec<Definition>>> {
let defs = ReadDirStream::new(tokio::fs::read_dir(path).await.into_diagnostic()?)
.map_err(miette::Report::from_err)
.and_then(|entry| async move {
if entry.file_type().await.into_diagnostic()?.is_file() {
Ok(Some((
Utf8PathBuf::from_path_buf(entry.path().to_path_buf()).unwrap(),
tokio::fs::read_to_string(entry.path())
.await
.into_diagnostic()?,
)))
} else {
Ok(None)
}
})
.flat_map(|val| futures::stream::iter(val.transpose()))
.and_then(|(name, bytes)| async move {
Ok((
name.file_stem().unwrap().to_string(),
parse_definition(&bytes).map_err(|e| {
e.with_source_code(NamedSource::new(name, bytes).with_language("kdl"))
})?,
))
})
.try_collect()
.await?;
Ok(defs)
}