1use sim_kernel::{Cx, EncodeOptions, EncodePosition, Expr, ReadPolicy, SourceId, Symbol};
9
10use crate::{Input, Output, decode_tree_with_codec, encode_with_codec};
11
12pub trait CodecPrism {
14 fn parse(&self, cx: &mut Cx, text: &str) -> PrismParse;
16
17 fn encode(&self, cx: &mut Cx, id: &SemanticId, position: EncodePosition) -> PrismEncode;
19
20 fn round_trip(&self, cx: &mut Cx, text: &str, position: EncodePosition) -> RoundTrip;
22}
23
24#[derive(Clone, Debug, PartialEq, Eq)]
26pub struct RuntimeCodecPrism {
27 codec: Symbol,
28 surface: PrismSurface,
29}
30
31impl RuntimeCodecPrism {
32 pub fn general(codec: Symbol) -> Self {
34 Self {
35 codec,
36 surface: PrismSurface::GeneralPurpose,
37 }
38 }
39
40 pub fn domain(codec: Symbol, domain: impl Into<String>) -> Self {
42 Self {
43 codec,
44 surface: PrismSurface::Domain {
45 name: domain.into(),
46 },
47 }
48 }
49
50 pub fn binary(codec: Symbol) -> Self {
52 Self {
53 codec,
54 surface: PrismSurface::BinaryInspection {
55 carrier: BinaryCarrier::Bytes,
56 },
57 }
58 }
59
60 pub fn binary_base64(codec: Symbol) -> Self {
62 Self {
63 codec,
64 surface: PrismSurface::BinaryInspection {
65 carrier: BinaryCarrier::Base64Text,
66 },
67 }
68 }
69
70 pub fn codec(&self) -> &Symbol {
72 &self.codec
73 }
74
75 pub fn parse_bytes(&self, cx: &mut Cx, bytes: &[u8]) -> PrismParse {
78 self.parse_input(cx, Input::Bytes(bytes.to_vec()), bytes.len())
79 }
80
81 pub fn round_trip_bytes(
83 &self,
84 cx: &mut Cx,
85 bytes: &[u8],
86 position: EncodePosition,
87 ) -> RoundTrip {
88 self.round_trip_input(cx, Input::Bytes(bytes.to_vec()), bytes.len(), position)
89 }
90
91 fn parse_input(&self, cx: &mut Cx, input: Input, source_len: usize) -> PrismParse {
92 let input_kind = match &input {
93 Input::Text(_) => PrismInputKind::Text,
94 Input::Bytes(_) => PrismInputKind::Bytes,
95 };
96 let source_id = format!("codec-prism:{}", self.codec);
97 match decode_tree_with_codec(
98 cx,
99 &self.codec,
100 input.clone(),
101 ReadPolicy::default(),
102 source_id.clone(),
103 ) {
104 Ok(tree) => {
105 let semantic_id = SemanticId::from_expr(tree.expr.clone());
106 let mut span_map = Vec::new();
107 collect_spans(&tree, &mut span_map);
108 if span_map.is_empty() {
109 span_map.push(PrismSpan {
110 source: SourceId(source_id),
111 start: 0,
112 end: source_len,
113 });
114 }
115 let diagnostics = self.surface_diagnostics(true, None);
116 PrismParse {
117 codec: self.codec.clone(),
118 semantic_id: Some(semantic_id),
119 expr: Some(tree.expr),
120 span_map,
121 diagnostics,
122 inspection: PrismInspection::new(input_kind, self.surface.is_executable()),
123 }
124 }
125 Err(error) => PrismParse {
126 codec: self.codec.clone(),
127 semantic_id: None,
128 expr: None,
129 span_map: Vec::new(),
130 diagnostics: self.surface_diagnostics(false, Some(error.to_string())),
131 inspection: PrismInspection::new(input_kind, self.surface.is_executable()),
132 },
133 }
134 }
135
136 fn surface_diagnostics(&self, accepted: bool, error: Option<String>) -> Vec<PrismDiagnostic> {
137 match (&self.surface, accepted, error) {
138 (PrismSurface::Domain { name }, false, Some(error)) => vec![PrismDiagnostic::error(
139 "domain-rejected",
140 format!("{name} codec rejected non-domain input: {error}"),
141 )],
142 (_, false, Some(error)) => {
143 vec![PrismDiagnostic::error("parse-error", error)]
144 }
145 _ => Vec::new(),
146 }
147 }
148
149 fn output_to_input(&self, output: &PrismOutput) -> Input {
150 match output {
151 PrismOutput::Text(text) => Input::Text(text.clone()),
152 PrismOutput::Bytes(bytes) => Input::Bytes(bytes.clone()),
153 }
154 }
155
156 fn round_trip_input(
157 &self,
158 cx: &mut Cx,
159 input: Input,
160 source_len: usize,
161 position: EncodePosition,
162 ) -> RoundTrip {
163 let parse = self.parse_input(cx, input, source_len);
164 let encode = parse
165 .semantic_id
166 .as_ref()
167 .map(|id| self.encode(cx, id, position))
168 .unwrap_or_else(|| PrismEncode {
169 codec: self.codec.clone(),
170 position,
171 output: None,
172 diagnostics: vec![PrismDiagnostic::error(
173 "parse-missing",
174 "parse did not produce a semantic id",
175 )],
176 });
177 let reparsed = encode.output.as_ref().map(|output| {
178 let input = self.output_to_input(output);
179 let len = output.len();
180 self.parse_input(cx, input, len)
181 });
182 let loss_report = LossReport::from_parts(&parse, &encode, reparsed.as_ref());
183 RoundTrip {
184 parse,
185 encode,
186 reparsed,
187 loss_report,
188 }
189 }
190}
191
192impl CodecPrism for RuntimeCodecPrism {
193 fn parse(&self, cx: &mut Cx, text: &str) -> PrismParse {
194 self.parse_input(cx, Input::Text(text.to_owned()), text.len())
195 }
196
197 fn encode(&self, cx: &mut Cx, id: &SemanticId, position: EncodePosition) -> PrismEncode {
198 let Some(expr) = &id.expr else {
199 return PrismEncode {
200 codec: self.codec.clone(),
201 position,
202 output: None,
203 diagnostics: vec![PrismDiagnostic::error(
204 "semantic-id-missing",
205 "semantic id does not carry an expression for encoding",
206 )],
207 };
208 };
209 let options = EncodeOptions {
210 position,
211 ..EncodeOptions::default()
212 };
213 match encode_with_codec(cx, &self.codec, expr, options) {
214 Ok(Output::Text(text)) => PrismEncode {
215 codec: self.codec.clone(),
216 position,
217 output: Some(PrismOutput::Text(text)),
218 diagnostics: Vec::new(),
219 },
220 Ok(Output::Bytes(bytes)) => PrismEncode {
221 codec: self.codec.clone(),
222 position,
223 output: Some(PrismOutput::Bytes(bytes)),
224 diagnostics: Vec::new(),
225 },
226 Err(error) => PrismEncode {
227 codec: self.codec.clone(),
228 position,
229 output: None,
230 diagnostics: vec![PrismDiagnostic::error("encode-error", error.to_string())],
231 },
232 }
233 }
234
235 fn round_trip(&self, cx: &mut Cx, text: &str, position: EncodePosition) -> RoundTrip {
236 self.round_trip_input(cx, Input::Text(text.to_owned()), text.len(), position)
237 }
238}
239
240#[derive(Clone, Debug, PartialEq, Eq)]
242pub enum PrismSurface {
243 GeneralPurpose,
245 Domain {
247 name: String,
249 },
250 BinaryInspection {
252 carrier: BinaryCarrier,
254 },
255}
256
257impl PrismSurface {
258 fn is_executable(&self) -> bool {
259 false
260 }
261}
262
263#[derive(Clone, Copy, Debug, PartialEq, Eq)]
265pub enum BinaryCarrier {
266 Bytes,
268 Base64Text,
270}
271
272#[derive(Clone, Copy, Debug, PartialEq, Eq)]
274pub enum PrismInputKind {
275 Text,
277 Bytes,
279}
280
281#[derive(Clone, Debug, PartialEq, Eq)]
283pub struct PrismInspection {
284 pub input: PrismInputKind,
286 pub trusted_executable: bool,
288}
289
290impl PrismInspection {
291 fn new(input: PrismInputKind, trusted_executable: bool) -> Self {
292 Self {
293 input,
294 trusted_executable,
295 }
296 }
297}
298
299#[derive(Clone, Debug, PartialEq, Eq)]
301pub struct SemanticId {
302 pub stable: String,
304 pub expr: Option<Expr>,
306}
307
308impl SemanticId {
309 pub fn from_expr(expr: Expr) -> Self {
311 let stable = format!(
312 "expr:{}",
313 stable_hash(&format!("{:?}", expr.canonical_key()))
314 );
315 Self {
316 stable,
317 expr: Some(expr),
318 }
319 }
320}
321
322#[derive(Clone, Debug, PartialEq, Eq)]
324pub struct PrismSpan {
325 pub source: SourceId,
327 pub start: usize,
329 pub end: usize,
331}
332
333#[derive(Clone, Debug, PartialEq, Eq)]
335pub struct PrismDiagnostic {
336 pub severity: DiagnosticSeverity,
338 pub code: String,
340 pub message: String,
342 pub span: Option<PrismSpan>,
344}
345
346impl PrismDiagnostic {
347 pub fn error(code: impl Into<String>, message: impl Into<String>) -> Self {
349 Self {
350 severity: DiagnosticSeverity::Error,
351 code: code.into(),
352 message: message.into(),
353 span: None,
354 }
355 }
356}
357
358#[derive(Clone, Debug, PartialEq, Eq)]
360pub enum DiagnosticSeverity {
361 Info,
363 Warning,
365 Error,
367}
368
369#[derive(Clone, Debug, PartialEq, Eq)]
371pub struct PrismParse {
372 pub codec: Symbol,
374 pub semantic_id: Option<SemanticId>,
376 pub expr: Option<Expr>,
378 pub span_map: Vec<PrismSpan>,
380 pub diagnostics: Vec<PrismDiagnostic>,
382 pub inspection: PrismInspection,
384}
385
386#[derive(Clone, Debug, PartialEq, Eq)]
388pub enum PrismOutput {
389 Text(String),
391 Bytes(Vec<u8>),
393}
394
395impl PrismOutput {
396 pub fn display(&self) -> String {
398 match self {
399 Self::Text(text) => text.clone(),
400 Self::Bytes(bytes) => {
401 let hex = bytes
402 .iter()
403 .map(|byte| format!("{byte:02x}"))
404 .collect::<Vec<_>>()
405 .join("");
406 format!("{} bytes: {hex}", bytes.len())
407 }
408 }
409 }
410
411 pub fn len(&self) -> usize {
413 match self {
414 Self::Text(text) => text.len(),
415 Self::Bytes(bytes) => bytes.len(),
416 }
417 }
418
419 pub fn is_empty(&self) -> bool {
421 self.len() == 0
422 }
423}
424
425#[derive(Clone, Debug, PartialEq, Eq)]
427pub struct PrismEncode {
428 pub codec: Symbol,
430 pub position: EncodePosition,
432 pub output: Option<PrismOutput>,
434 pub diagnostics: Vec<PrismDiagnostic>,
436}
437
438#[derive(Clone, Debug, PartialEq, Eq)]
440pub struct LossReport {
441 pub lossless: bool,
443 pub semantic_identity: bool,
445 pub diagnostics: Vec<PrismDiagnostic>,
447}
448
449impl LossReport {
450 fn from_parts(parse: &PrismParse, encode: &PrismEncode, reparsed: Option<&PrismParse>) -> Self {
451 let semantic_identity = match (
452 parse.semantic_id.as_ref(),
453 reparsed.and_then(|parse| parse.semantic_id.as_ref()),
454 ) {
455 (Some(left), Some(right)) => left.stable == right.stable,
456 _ => false,
457 };
458 let mut diagnostics = Vec::new();
459 diagnostics.extend(parse.diagnostics.clone());
460 diagnostics.extend(encode.diagnostics.clone());
461 if let Some(reparsed) = reparsed {
462 diagnostics.extend(reparsed.diagnostics.clone());
463 }
464 if !semantic_identity {
465 diagnostics.push(PrismDiagnostic::error(
466 "semantic-identity-loss",
467 "parse and reparse semantic ids differ",
468 ));
469 }
470 Self {
471 lossless: semantic_identity && diagnostics.is_empty(),
472 semantic_identity,
473 diagnostics,
474 }
475 }
476}
477
478#[derive(Clone, Debug, PartialEq, Eq)]
480pub struct RoundTrip {
481 pub parse: PrismParse,
483 pub encode: PrismEncode,
485 pub reparsed: Option<PrismParse>,
487 pub loss_report: LossReport,
489}
490
491fn collect_spans(tree: &sim_kernel::LocatedExprTree, spans: &mut Vec<PrismSpan>) {
492 if let Some(origin) = &tree.origin {
493 spans.push(PrismSpan {
494 source: origin.source.clone(),
495 start: origin.span.start,
496 end: origin.span.end,
497 });
498 }
499 for child in &tree.children {
500 collect_spans(child, spans);
501 }
502}
503
504fn stable_hash(text: &str) -> String {
505 let mut hash = 0xcbf29ce484222325u64;
506 for byte in text.as_bytes() {
507 hash ^= u64::from(*byte);
508 hash = hash.wrapping_mul(0x100000001b3);
509 }
510 format!("{hash:016x}")
511}