use std::error::Error;
use std::fmt;
use std::fmt::Write as _;
use std::sync::Arc;
use links_notation::{parse_lino_to_links, LiNo};
use crate::link_flags::LinkFlags;
use crate::link_network::{Link, LinkId, LinkMetadata, LinkNetwork, LinkType};
use crate::source::{ByteRange, Point, SourceSpan};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LinoSerializationError {
Parse(String),
Structure(String),
}
impl fmt::Display for LinoSerializationError {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Parse(message) => write!(formatter, "links-notation parse error: {message}"),
Self::Structure(message) => {
write!(formatter, "serialization structure error: {message}")
}
}
}
}
impl Error for LinoSerializationError {}
impl LinkNetwork {
#[must_use]
pub fn to_lino(&self) -> String {
let registered: std::collections::BTreeSet<u64> =
self.terms.values().map(|id| id.0).collect();
let mut output = String::new();
for link in self.links.values() {
encode_link(link, registered.contains(&link.id.0), &mut output);
output.push('\n');
}
output
}
pub fn from_lino(text: &str) -> Result<Self, LinoSerializationError> {
let statements = parse_lino_to_links(text)
.map_err(|error| LinoSerializationError::Parse(error.to_string()))?;
let mut network = Self::new();
for statement in &statements {
let LiNo::Link {
id: Some(id),
values,
} = statement
else {
return Err(LinoSerializationError::Structure(
"top-level statement must be an identified link".to_string(),
));
};
let link_id = LinkId(parse_u64(id)?);
let mut references = Vec::new();
let mut meta_values: Option<&Vec<LiNo<String>>> = None;
for value in values {
match value {
LiNo::Ref(reference) => references.push(LinkId(parse_u64(reference)?)),
LiNo::Link {
id: Some(key),
values: fields,
} if key == "meta" => meta_values = Some(fields),
LiNo::Link { .. } => {
return Err(LinoSerializationError::Structure(
"statement values must be references or a meta sublink".to_string(),
))
}
}
}
let meta_values = meta_values.ok_or_else(|| {
LinoSerializationError::Structure(
"statement is missing its meta sublink".to_string(),
)
})?;
let (metadata, registered) = decode_meta(meta_values)?;
if registered {
if let Some(term) = metadata.term() {
network.terms.insert(Arc::from(term), link_id);
}
}
network.next_id = network.next_id.max(link_id.0 + 1);
network.links.insert(
link_id,
Arc::new(Link {
id: link_id,
references: Arc::from(references),
metadata,
}),
);
}
Ok(network)
}
}
fn encode_link(link: &Link, registered: bool, output: &mut String) {
write!(output, "({}:", link.id.0).expect("writing to a String never fails");
for reference in link.references.iter() {
write!(output, " {}", reference.0).expect("writing to a String never fails");
}
output.push_str(" (meta:");
let metadata = &link.metadata;
if let Some(link_type) = metadata.link_type() {
write!(output, " (t: {link_type})").expect("writing to a String never fails");
}
write!(output, " (n: {})", u8::from(metadata.is_named()))
.expect("writing to a String never fails");
if let Some(term) = metadata.term() {
write!(output, " (term: {})", percent_encode(term))
.expect("writing to a String never fails");
}
if let Some(definition) = metadata.definition() {
write!(output, " (def: {})", percent_encode(definition))
.expect("writing to a String never fails");
}
if let Some(language) = metadata.language() {
write!(output, " (lang: {})", percent_encode(language))
.expect("writing to a String never fails");
}
if let Some(span) = metadata.span() {
let byte_range = span.byte_range();
let start = span.start_point();
let end = span.end_point();
write!(
output,
" (span: {} {} {} {} {} {})",
byte_range.start(),
byte_range.end(),
start.row(),
start.column(),
end.row(),
end.column(),
)
.expect("writing to a String never fails");
}
let bits = flag_bits(metadata.flags());
if bits != 0 {
write!(output, " (flags: {bits})").expect("writing to a String never fails");
}
if registered {
output.push_str(" (reg: 1)");
}
output.push_str("))");
}
fn decode_meta(fields: &[LiNo<String>]) -> Result<(LinkMetadata, bool), LinoSerializationError> {
let mut metadata = LinkMetadata::new();
let mut registered = false;
let mut flag_bits = 0u8;
for field in fields {
let LiNo::Link {
id: Some(key),
values,
} = field
else {
return Err(LinoSerializationError::Structure(
"meta field must be an identified link".to_string(),
));
};
match key.as_str() {
"t" => metadata = metadata.with_link_type(parse_link_type(single_ref(values)?)?),
"n" => metadata = metadata.with_named(single_ref(values)? == "1"),
"term" => metadata = metadata.with_term(percent_decode(single_ref(values)?)?),
"def" => metadata = metadata.with_definition(percent_decode(single_ref(values)?)?),
"lang" => metadata = metadata.with_language(percent_decode(single_ref(values)?)?),
"span" => metadata = metadata.with_span(parse_span(values)?),
"flags" => flag_bits = parse_u8(single_ref(values)?)?,
"reg" => registered = true,
other => {
return Err(LinoSerializationError::Structure(format!(
"unknown meta field `{other}`"
)))
}
}
}
if flag_bits != 0 {
let mut flags = LinkFlags::clean();
if flag_bits & 0b0001 != 0 {
flags = flags.with_error();
}
if flag_bits & 0b0010 != 0 {
flags = flags.with_containing_error();
}
if flag_bits & 0b0100 != 0 {
flags = flags.with_missing();
}
if flag_bits & 0b1000 != 0 {
flags = flags.with_extra();
}
metadata = metadata.with_flags(flags);
}
Ok((metadata, registered))
}
fn flag_bits(flags: LinkFlags) -> u8 {
u8::from(flags.is_error())
| (u8::from(flags.has_error()) << 1)
| (u8::from(flags.is_missing()) << 2)
| (u8::from(flags.is_extra()) << 3)
}
fn parse_span(values: &[LiNo<String>]) -> Result<SourceSpan, LinoSerializationError> {
if values.len() != 6 {
return Err(LinoSerializationError::Structure(
"span field requires six numbers".to_string(),
));
}
let mut numbers = [0usize; 6];
for (slot, value) in numbers.iter_mut().zip(values) {
let LiNo::Ref(reference) = value else {
return Err(LinoSerializationError::Structure(
"span field values must be numbers".to_string(),
));
};
*slot = reference.parse().map_err(|_| {
LinoSerializationError::Structure(format!("invalid span number `{reference}`"))
})?;
}
Ok(SourceSpan::new(
ByteRange::new(numbers[0], numbers[1]),
Point::new(numbers[2], numbers[3]),
Point::new(numbers[4], numbers[5]),
))
}
fn single_ref(values: &[LiNo<String>]) -> Result<&str, LinoSerializationError> {
match values {
[LiNo::Ref(reference)] => Ok(reference),
_ => Err(LinoSerializationError::Structure(
"meta field must hold exactly one reference".to_string(),
)),
}
}
fn parse_link_type(token: &str) -> Result<LinkType, LinoSerializationError> {
Ok(match token {
"link" => LinkType::Link,
"reference" => LinkType::Reference,
"relation" => LinkType::Relation,
"language" => LinkType::Language,
"grammar" => LinkType::Grammar,
"type" => LinkType::Type,
"concept" => LinkType::Concept,
"syntax" => LinkType::Syntax,
"field" => LinkType::Field,
"trivia" => LinkType::Trivia,
"token" => LinkType::Token,
"document" => LinkType::Document,
"semantic" => LinkType::Semantic,
"region" => LinkType::Region,
"object" => LinkType::Object,
other => {
return Err(LinoSerializationError::Structure(format!(
"unknown link type `{other}`"
)))
}
})
}
fn parse_u64(value: &str) -> Result<u64, LinoSerializationError> {
value
.parse()
.map_err(|_| LinoSerializationError::Structure(format!("invalid link id `{value}`")))
}
fn parse_u8(value: &str) -> Result<u8, LinoSerializationError> {
value
.parse()
.map_err(|_| LinoSerializationError::Structure(format!("invalid flags value `{value}`")))
}
fn percent_encode(value: &str) -> String {
if value.is_empty() {
return "%".to_string();
}
let mut encoded = String::with_capacity(value.len());
for &byte in value.as_bytes() {
if byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_' | b'.') {
encoded.push(byte as char);
} else {
write!(encoded, "%{byte:02X}").expect("writing to a String never fails");
}
}
encoded
}
fn percent_decode(value: &str) -> Result<String, LinoSerializationError> {
if value == "%" {
return Ok(String::new());
}
let bytes = value.as_bytes();
let mut decoded = Vec::with_capacity(bytes.len());
let mut index = 0;
while index < bytes.len() {
if bytes[index] == b'%' {
if index + 2 >= bytes.len() {
return Err(LinoSerializationError::Structure(
"truncated percent escape".to_string(),
));
}
let high = hex_value(bytes[index + 1])?;
let low = hex_value(bytes[index + 2])?;
decoded.push((high << 4) | low);
index += 3;
} else {
decoded.push(bytes[index]);
index += 1;
}
}
String::from_utf8(decoded).map_err(|_| {
LinoSerializationError::Structure("percent escape is not valid UTF-8".to_string())
})
}
fn hex_value(byte: u8) -> Result<u8, LinoSerializationError> {
match byte {
b'0'..=b'9' => Ok(byte - b'0'),
b'a'..=b'f' => Ok(byte - b'a' + 10),
b'A'..=b'F' => Ok(byte - b'A' + 10),
_ => Err(LinoSerializationError::Structure(
"invalid percent escape digit".to_string(),
)),
}
}