use sim_kernel::{Cx, EncodeOptions, EncodePosition, Expr, ReadPolicy, SourceId, Symbol};
use crate::{Input, Output, decode_tree_with_codec, encode_with_codec};
pub trait CodecPrism {
fn parse(&self, cx: &mut Cx, text: &str) -> PrismParse;
fn encode(&self, cx: &mut Cx, id: &SemanticId, position: EncodePosition) -> PrismEncode;
fn round_trip(&self, cx: &mut Cx, text: &str, position: EncodePosition) -> RoundTrip;
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct RuntimeCodecPrism {
codec: Symbol,
surface: PrismSurface,
}
impl RuntimeCodecPrism {
pub fn general(codec: Symbol) -> Self {
Self {
codec,
surface: PrismSurface::GeneralPurpose,
}
}
pub fn domain(codec: Symbol, domain: impl Into<String>) -> Self {
Self {
codec,
surface: PrismSurface::Domain {
name: domain.into(),
},
}
}
pub fn binary(codec: Symbol) -> Self {
Self {
codec,
surface: PrismSurface::BinaryInspection {
carrier: BinaryCarrier::Bytes,
},
}
}
pub fn binary_base64(codec: Symbol) -> Self {
Self {
codec,
surface: PrismSurface::BinaryInspection {
carrier: BinaryCarrier::Base64Text,
},
}
}
pub fn codec(&self) -> &Symbol {
&self.codec
}
pub fn parse_bytes(&self, cx: &mut Cx, bytes: &[u8]) -> PrismParse {
self.parse_input(cx, Input::Bytes(bytes.to_vec()), bytes.len())
}
pub fn round_trip_bytes(
&self,
cx: &mut Cx,
bytes: &[u8],
position: EncodePosition,
) -> RoundTrip {
self.round_trip_input(cx, Input::Bytes(bytes.to_vec()), bytes.len(), position)
}
fn parse_input(&self, cx: &mut Cx, input: Input, source_len: usize) -> PrismParse {
let input_kind = match &input {
Input::Text(_) => PrismInputKind::Text,
Input::Bytes(_) => PrismInputKind::Bytes,
};
let source_id = format!("codec-prism:{}", self.codec);
match decode_tree_with_codec(
cx,
&self.codec,
input.clone(),
ReadPolicy::default(),
source_id.clone(),
) {
Ok(tree) => {
let semantic_id = SemanticId::from_expr(tree.expr.clone());
let mut span_map = Vec::new();
collect_spans(&tree, &mut span_map);
if span_map.is_empty() {
span_map.push(PrismSpan {
source: SourceId(source_id),
start: 0,
end: source_len,
});
}
let diagnostics = self.surface_diagnostics(true, None);
PrismParse {
codec: self.codec.clone(),
semantic_id: Some(semantic_id),
expr: Some(tree.expr),
span_map,
diagnostics,
inspection: PrismInspection::new(input_kind, self.surface.is_executable()),
}
}
Err(error) => PrismParse {
codec: self.codec.clone(),
semantic_id: None,
expr: None,
span_map: Vec::new(),
diagnostics: self.surface_diagnostics(false, Some(error.to_string())),
inspection: PrismInspection::new(input_kind, self.surface.is_executable()),
},
}
}
fn surface_diagnostics(&self, accepted: bool, error: Option<String>) -> Vec<PrismDiagnostic> {
match (&self.surface, accepted, error) {
(PrismSurface::Domain { name }, false, Some(error)) => vec![PrismDiagnostic::error(
"domain-rejected",
format!("{name} codec rejected non-domain input: {error}"),
)],
(_, false, Some(error)) => {
vec![PrismDiagnostic::error("parse-error", error)]
}
_ => Vec::new(),
}
}
fn output_to_input(&self, output: &PrismOutput) -> Input {
match output {
PrismOutput::Text(text) => Input::Text(text.clone()),
PrismOutput::Bytes(bytes) => Input::Bytes(bytes.clone()),
}
}
fn round_trip_input(
&self,
cx: &mut Cx,
input: Input,
source_len: usize,
position: EncodePosition,
) -> RoundTrip {
let parse = self.parse_input(cx, input, source_len);
let encode = parse
.semantic_id
.as_ref()
.map(|id| self.encode(cx, id, position))
.unwrap_or_else(|| PrismEncode {
codec: self.codec.clone(),
position,
output: None,
diagnostics: vec![PrismDiagnostic::error(
"parse-missing",
"parse did not produce a semantic id",
)],
});
let reparsed = encode.output.as_ref().map(|output| {
let input = self.output_to_input(output);
let len = output.len();
self.parse_input(cx, input, len)
});
let loss_report = LossReport::from_parts(&parse, &encode, reparsed.as_ref());
RoundTrip {
parse,
encode,
reparsed,
loss_report,
}
}
}
impl CodecPrism for RuntimeCodecPrism {
fn parse(&self, cx: &mut Cx, text: &str) -> PrismParse {
self.parse_input(cx, Input::Text(text.to_owned()), text.len())
}
fn encode(&self, cx: &mut Cx, id: &SemanticId, position: EncodePosition) -> PrismEncode {
let Some(expr) = &id.expr else {
return PrismEncode {
codec: self.codec.clone(),
position,
output: None,
diagnostics: vec![PrismDiagnostic::error(
"semantic-id-missing",
"semantic id does not carry an expression for encoding",
)],
};
};
let options = EncodeOptions {
position,
..EncodeOptions::default()
};
match encode_with_codec(cx, &self.codec, expr, options) {
Ok(Output::Text(text)) => PrismEncode {
codec: self.codec.clone(),
position,
output: Some(PrismOutput::Text(text)),
diagnostics: Vec::new(),
},
Ok(Output::Bytes(bytes)) => PrismEncode {
codec: self.codec.clone(),
position,
output: Some(PrismOutput::Bytes(bytes)),
diagnostics: Vec::new(),
},
Err(error) => PrismEncode {
codec: self.codec.clone(),
position,
output: None,
diagnostics: vec![PrismDiagnostic::error("encode-error", error.to_string())],
},
}
}
fn round_trip(&self, cx: &mut Cx, text: &str, position: EncodePosition) -> RoundTrip {
self.round_trip_input(cx, Input::Text(text.to_owned()), text.len(), position)
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum PrismSurface {
GeneralPurpose,
Domain {
name: String,
},
BinaryInspection {
carrier: BinaryCarrier,
},
}
impl PrismSurface {
fn is_executable(&self) -> bool {
false
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum BinaryCarrier {
Bytes,
Base64Text,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum PrismInputKind {
Text,
Bytes,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PrismInspection {
pub input: PrismInputKind,
pub trusted_executable: bool,
}
impl PrismInspection {
fn new(input: PrismInputKind, trusted_executable: bool) -> Self {
Self {
input,
trusted_executable,
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct SemanticId {
pub stable: String,
pub expr: Option<Expr>,
}
impl SemanticId {
pub fn from_expr(expr: Expr) -> Self {
let stable = format!(
"expr:{}",
stable_hash(&format!("{:?}", expr.canonical_key()))
);
Self {
stable,
expr: Some(expr),
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PrismSpan {
pub source: SourceId,
pub start: usize,
pub end: usize,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PrismDiagnostic {
pub severity: DiagnosticSeverity,
pub code: String,
pub message: String,
pub span: Option<PrismSpan>,
}
impl PrismDiagnostic {
pub fn error(code: impl Into<String>, message: impl Into<String>) -> Self {
Self {
severity: DiagnosticSeverity::Error,
code: code.into(),
message: message.into(),
span: None,
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum DiagnosticSeverity {
Info,
Warning,
Error,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PrismParse {
pub codec: Symbol,
pub semantic_id: Option<SemanticId>,
pub expr: Option<Expr>,
pub span_map: Vec<PrismSpan>,
pub diagnostics: Vec<PrismDiagnostic>,
pub inspection: PrismInspection,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum PrismOutput {
Text(String),
Bytes(Vec<u8>),
}
impl PrismOutput {
pub fn display(&self) -> String {
match self {
Self::Text(text) => text.clone(),
Self::Bytes(bytes) => {
let hex = bytes
.iter()
.map(|byte| format!("{byte:02x}"))
.collect::<Vec<_>>()
.join("");
format!("{} bytes: {hex}", bytes.len())
}
}
}
pub fn len(&self) -> usize {
match self {
Self::Text(text) => text.len(),
Self::Bytes(bytes) => bytes.len(),
}
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PrismEncode {
pub codec: Symbol,
pub position: EncodePosition,
pub output: Option<PrismOutput>,
pub diagnostics: Vec<PrismDiagnostic>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct LossReport {
pub lossless: bool,
pub semantic_identity: bool,
pub diagnostics: Vec<PrismDiagnostic>,
}
impl LossReport {
fn from_parts(parse: &PrismParse, encode: &PrismEncode, reparsed: Option<&PrismParse>) -> Self {
let semantic_identity = match (
parse.semantic_id.as_ref(),
reparsed.and_then(|parse| parse.semantic_id.as_ref()),
) {
(Some(left), Some(right)) => left.stable == right.stable,
_ => false,
};
let mut diagnostics = Vec::new();
diagnostics.extend(parse.diagnostics.clone());
diagnostics.extend(encode.diagnostics.clone());
if let Some(reparsed) = reparsed {
diagnostics.extend(reparsed.diagnostics.clone());
}
if !semantic_identity {
diagnostics.push(PrismDiagnostic::error(
"semantic-identity-loss",
"parse and reparse semantic ids differ",
));
}
Self {
lossless: semantic_identity && diagnostics.is_empty(),
semantic_identity,
diagnostics,
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct RoundTrip {
pub parse: PrismParse,
pub encode: PrismEncode,
pub reparsed: Option<PrismParse>,
pub loss_report: LossReport,
}
fn collect_spans(tree: &sim_kernel::LocatedExprTree, spans: &mut Vec<PrismSpan>) {
if let Some(origin) = &tree.origin {
spans.push(PrismSpan {
source: origin.source.clone(),
start: origin.span.start,
end: origin.span.end,
});
}
for child in &tree.children {
collect_spans(child, spans);
}
}
fn stable_hash(text: &str) -> String {
let mut hash = 0xcbf29ce484222325u64;
for byte in text.as_bytes() {
hash ^= u64::from(*byte);
hash = hash.wrapping_mul(0x100000001b3);
}
format!("{hash:016x}")
}