use std::fmt::{Display, Formatter};
use std::path::{Component, Path, PathBuf};
use std::str::FromStr;
use serde::{Deserialize, Serialize};
use smol_str::SmolStr;
use thiserror::Error;
pub const ENTRY_ATOM_MAX_BYTES: usize = 252;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
#[serde(transparent)]
pub struct EntryAtom(SmolStr);
impl EntryAtom {
pub fn new(raw: impl AsRef<str>) -> Result<Self, EntryAtomError> {
let raw = raw.as_ref();
Self::validate(raw)?;
Ok(Self(SmolStr::new(raw)))
}
pub fn as_str(&self) -> &str {
&self.0
}
fn validate(raw: &str) -> Result<(), EntryAtomError> {
if raw.is_empty() {
return Err(EntryAtomError::Empty);
}
if raw.len() > ENTRY_ATOM_MAX_BYTES {
return Err(EntryAtomError::TooLong {
atom: raw.to_owned(),
max: ENTRY_ATOM_MAX_BYTES,
});
}
if raw.ends_with(' ') {
return Err(EntryAtomError::TrailingSpace(raw.to_owned()));
}
if raw.contains('.') {
return Err(EntryAtomError::ReservedDot(raw.to_owned()));
}
if windows_device_name(raw).is_some() {
return Err(EntryAtomError::ReservedFilename(raw.to_owned()));
}
for character in raw.chars() {
if is_forbidden_filename_character(character) {
return Err(EntryAtomError::InvalidCharacter { atom: raw.to_owned(), character });
}
}
Ok(())
}
}
fn is_forbidden_filename_character(character: char) -> bool {
character.is_control()
|| matches!(character, '<' | '>' | ':' | '"' | '/' | '\\' | '|' | '?' | '*' | ',')
}
fn windows_device_name(raw: &str) -> Option<&str> {
let basename = raw.split('.').next().unwrap_or(raw);
let uppercase = basename.to_ascii_uppercase();
matches!(
uppercase.as_str(),
"CON"
| "PRN"
| "AUX"
| "NUL"
| "COM1"
| "COM2"
| "COM3"
| "COM4"
| "COM5"
| "COM6"
| "COM7"
| "COM8"
| "COM9"
| "LPT1"
| "LPT2"
| "LPT3"
| "LPT4"
| "LPT5"
| "LPT6"
| "LPT7"
| "LPT8"
| "LPT9"
)
.then_some(basename)
}
impl Display for EntryAtom {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
Display::fmt(self.as_str(), f)
}
}
impl AsRef<str> for EntryAtom {
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl FromStr for EntryAtom {
type Err = EntryAtomError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::new(s)
}
}
impl TryFrom<String> for EntryAtom {
type Error = EntryAtomError;
fn try_from(value: String) -> Result<Self, Self::Error> {
Self::new(value)
}
}
impl TryFrom<&str> for EntryAtom {
type Error = EntryAtomError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
Self::new(value)
}
}
impl<'de> Deserialize<'de> for EntryAtom {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let value = SmolStr::deserialize(deserializer)?;
Self::new(value.as_str()).map_err(serde::de::Error::custom)
}
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
#[serde(transparent)]
pub struct EntryAddress(SmolStr);
impl EntryAddress {
pub fn new(raw: impl AsRef<str>) -> Result<Self, EntryAddressError> {
let raw = raw.as_ref();
Self::validate(raw)?;
Ok(Self(SmolStr::new(raw)))
}
pub fn from_atom(atom: EntryAtom) -> Self {
Self(SmolStr::new(atom.as_str()))
}
pub fn as_str(&self) -> &str {
&self.0
}
pub fn local_atom(&self) -> EntryAtom {
EntryAtom::new(self.as_str().rsplit('.').next().expect("entry address is non-empty"))
.expect("entry address segment is valid")
}
pub fn under_domain(&self, domain: &EntryAtom) -> Self {
Self::new(format!("{}.{}", domain.as_str(), self.as_str()))
.expect("domain and entry address compose into a valid entry address")
}
pub fn starts_with_domain(&self, domain: &EntryAtom) -> bool {
self.as_str().strip_prefix(domain.as_str()).is_some_and(|suffix| suffix.starts_with('.'))
}
pub fn to_lake_relative_path(&self) -> PathBuf {
let mut path = PathBuf::new();
let mut segments = self.as_str().split('.').peekable();
while let Some(segment) = segments.next() {
if segments.peek().is_some() {
path.push(segment);
} else {
path.push(format!("{segment}.md"));
}
}
path
}
pub fn from_lake_relative_path(path: &Path) -> Result<Self, EntryAddressError> {
if path.as_os_str().is_empty() || path.is_absolute() {
return Err(EntryAddressError::InvalidRelativePath(path.to_path_buf()));
}
let mut segments = Vec::new();
let mut components = path.components().peekable();
while let Some(component) = components.next() {
let Component::Normal(component) = component else {
return Err(EntryAddressError::InvalidRelativePath(path.to_path_buf()));
};
let Some(component) = component.to_str() else {
return Err(EntryAddressError::NonUtf8Path(path.to_path_buf()));
};
if component.starts_with('.') {
return Err(EntryAddressError::ReservedBuiltinPath(component.to_owned()));
}
if components.peek().is_some() {
segments.push(EntryAtom::new(component)?);
continue;
}
let Some(stem) = component.strip_suffix(".md") else {
return Err(EntryAddressError::MissingMarkdownExtension(path.to_path_buf()));
};
if stem.contains('.') {
return Err(EntryAddressError::DottedFilename(component.to_owned()));
}
segments.push(EntryAtom::new(stem)?);
}
if segments.is_empty() {
return Err(EntryAddressError::Empty);
}
Ok(Self::from_segments(segments))
}
fn from_segments(segments: Vec<EntryAtom>) -> Self {
let path = segments.iter().map(EntryAtom::as_str).collect::<Vec<_>>().join(".");
Self(SmolStr::new(path))
}
fn validate(raw: &str) -> Result<(), EntryAddressError> {
if raw.is_empty() {
return Err(EntryAddressError::Empty);
}
if raw.starts_with('.') {
return Err(EntryAddressError::ReservedBuiltinPath(raw.to_owned()));
}
let mut saw_segment = false;
for segment in raw.split('.') {
if segment.is_empty() {
return Err(EntryAddressError::EmptySegment(raw.to_owned()));
}
EntryAtom::new(segment)?;
saw_segment = true;
}
if !saw_segment {
return Err(EntryAddressError::Empty);
}
Ok(())
}
}
impl Display for EntryAddress {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
Display::fmt(self.as_str(), f)
}
}
impl AsRef<str> for EntryAddress {
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl From<EntryAtom> for EntryAddress {
fn from(value: EntryAtom) -> Self {
Self::from_atom(value)
}
}
impl FromStr for EntryAddress {
type Err = EntryAddressError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::new(s)
}
}
impl TryFrom<String> for EntryAddress {
type Error = EntryAddressError;
fn try_from(value: String) -> Result<Self, Self::Error> {
Self::new(value)
}
}
impl TryFrom<&str> for EntryAddress {
type Error = EntryAddressError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
Self::new(value)
}
}
impl<'de> Deserialize<'de> for EntryAddress {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let value = SmolStr::deserialize(deserializer)?;
Self::new(value.as_str()).map_err(serde::de::Error::custom)
}
}
#[derive(Debug, Error, PartialEq, Eq)]
pub enum EntryAtomError {
#[error("entry atom must not be empty")]
Empty,
#[error(
"entry atom is too long for a cross-platform Markdown filename: {atom}; maximum is {max} bytes"
)]
TooLong {
atom: String,
max: usize,
},
#[error("entry atom contains invalid filename character `{character}`: {atom}")]
InvalidCharacter {
atom: String,
character: char,
},
#[error("entry atom must not end with a space: {0}")]
TrailingSpace(String),
#[error("entry atom must not contain reserved dot character: {0}")]
ReservedDot(String),
#[error("entry atom uses a reserved filename: {0}")]
ReservedFilename(String),
}
#[derive(Debug, Error, PartialEq, Eq)]
pub enum EntryAddressError {
#[error("entry address must not be empty")]
Empty,
#[error("entry address contains an empty segment: {0}")]
EmptySegment(String),
#[error("entry address uses reserved built-in path form: {0}")]
ReservedBuiltinPath(String),
#[error("entry address must be a normal lake-relative Markdown path: {0}")]
InvalidRelativePath(PathBuf),
#[error("entry address must be valid UTF-8: {0}")]
NonUtf8Path(PathBuf),
#[error("entry address must point at a Markdown file: {0}")]
MissingMarkdownExtension(PathBuf),
#[error("entry filename must not contain dots: {0}")]
DottedFilename(String),
#[error(transparent)]
EntryAtom(#[from] EntryAtomError),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn accepts_cross_platform_filename_stems() {
for raw in ["concept-2", "Concept 2", "api_v2+draft", "è¨è¨ˆãƒŽãƒ¼ãƒˆ"] {
let id = EntryAtom::new(raw).unwrap();
assert_eq!(id.as_str(), raw);
}
}
#[test]
fn rejects_empty_id() {
assert_eq!(EntryAtom::new("").unwrap_err(), EntryAtomError::Empty);
}
#[test]
fn rejects_too_long_id() {
assert!(matches!(
EntryAtom::new("a".repeat(ENTRY_ATOM_MAX_BYTES + 1)).unwrap_err(),
EntryAtomError::TooLong { max: ENTRY_ATOM_MAX_BYTES, .. }
));
}
#[test]
fn rejects_forbidden_filename_characters() {
for raw in ["a/b", "a\\b", "a:b", "a*b", "a?b", "a\"b", "a<b", "a>b", "a|b", "a\nb"] {
assert!(matches!(
EntryAtom::new(raw).unwrap_err(),
EntryAtomError::InvalidCharacter { .. }
));
}
}
#[test]
fn rejects_trailing_space() {
assert!(matches!(
EntryAtom::new("concept ").unwrap_err(),
EntryAtomError::TrailingSpace(_)
));
}
#[test]
fn rejects_reserved_dot() {
assert!(matches!(EntryAtom::new("concept.").unwrap_err(), EntryAtomError::ReservedDot(_)));
assert!(matches!(EntryAtom::new("api.v2").unwrap_err(), EntryAtomError::ReservedDot(_)));
assert!(matches!(EntryAtom::new(".").unwrap_err(), EntryAtomError::ReservedDot(_)));
}
#[test]
fn rejects_reserved_filenames() {
for raw in ["con", "NUL", "com1", "LPT9"] {
assert!(matches!(
EntryAtom::new(raw).unwrap_err(),
EntryAtomError::ReservedFilename(_)
));
}
}
#[test]
fn entry_address_accepts_one_or_more_atoms() {
for raw in ["concept", "core.design", "core.design.routes"] {
let path = EntryAddress::new(raw).unwrap();
assert_eq!(path.as_str(), raw);
}
}
#[test]
fn entry_address_maps_to_lake_relative_markdown_path() {
assert_eq!(
EntryAddress::new("concept").unwrap().to_lake_relative_path(),
Path::new("concept.md")
);
assert_eq!(
EntryAddress::new("core.design").unwrap().to_lake_relative_path(),
Path::new("core").join("design.md")
);
}
#[test]
fn entry_address_parses_lake_relative_markdown_path() {
assert_eq!(
EntryAddress::from_lake_relative_path(Path::new("core").join("design.md").as_path())
.unwrap()
.as_str(),
"core.design"
);
}
#[test]
fn entry_address_domain_prefix_requires_dot_boundary() {
let domain = EntryAtom::new("core").unwrap();
assert!(EntryAddress::new("core.design").unwrap().starts_with_domain(&domain));
assert!(!EntryAddress::new("core").unwrap().starts_with_domain(&domain));
assert!(!EntryAddress::new("corelib.design").unwrap().starts_with_domain(&domain));
}
#[test]
fn entry_address_rejects_reserved_builtins_and_empty_segments() {
assert!(matches!(
EntryAddress::new(".artifacts").unwrap_err(),
EntryAddressError::ReservedBuiltinPath(_)
));
assert!(matches!(
EntryAddress::new("core.").unwrap_err(),
EntryAddressError::EmptySegment(_)
));
assert!(matches!(
EntryAddress::new("core..design").unwrap_err(),
EntryAddressError::EmptySegment(_)
));
}
#[test]
fn entry_address_rejects_dotted_lake_filenames() {
assert!(matches!(
EntryAddress::from_lake_relative_path(Path::new("core.design.md")).unwrap_err(),
EntryAddressError::DottedFilename(_)
));
}
}