1use std::fmt::{Display, Formatter};
9use std::fs;
10use std::path::{Path, PathBuf};
11
12use index_core::{IndexDocument, IndexNode, IndexUrl, Redactor, UrlError};
13
14const ARTIFACT_HEADER: &str = "index-capture-v1";
15const HTML_BEGIN: &str = "---BEGIN REDACTED HTML---";
16const HTML_END: &str = "---END REDACTED HTML---";
17const DIAGNOSTIC_BEGIN: &str = "---BEGIN DIAGNOSTIC---";
18const DIAGNOSTIC_END: &str = "---END DIAGNOSTIC---";
19const PREVIEW_BEGIN: &str = "---BEGIN CAPTURE PREVIEW---";
20const PREVIEW_END: &str = "---END CAPTURE PREVIEW---";
21const REPAIR_BEGIN: &str = "---BEGIN REPAIR HINTS---";
22const REPAIR_END: &str = "---END REPAIR HINTS---";
23const INDEX_ARTIFACT_HEADER: &str = "index-artifact-v1";
24const INDEX_ARTIFACT_VERSION: u8 = 1;
25const ARTIFACT_CAPTURE_BEGIN: &str = "---BEGIN CAPTURE ARTIFACT---";
26const ARTIFACT_CAPTURE_END: &str = "---END CAPTURE ARTIFACT---";
27const REDACTED: &str = "[REDACTED]";
28
29#[derive(Debug, Clone, PartialEq, Eq)]
31pub enum CaptureError {
32 InvalidSourceUrl(UrlError),
34 InvalidArtifact(String),
36}
37
38impl Display for CaptureError {
39 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
40 match self {
41 Self::InvalidSourceUrl(error) => write!(f, "capture source URL rejected: {error}"),
42 Self::InvalidArtifact(reason) => write!(f, "capture artifact is invalid: {reason}"),
43 }
44 }
45}
46
47impl std::error::Error for CaptureError {}
48
49#[derive(Debug)]
51pub enum ArtifactStoreError {
52 Io(std::io::Error),
54 Parse(String),
56 Url(UrlError),
58 Capture(CaptureError),
60}
61
62impl Display for ArtifactStoreError {
63 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
64 match self {
65 Self::Io(error) => write!(f, "artifact store IO failed: {error}"),
66 Self::Parse(reason) => write!(f, "artifact store parse failed: {reason}"),
67 Self::Url(error) => write!(f, "artifact store URL failed: {error}"),
68 Self::Capture(error) => write!(f, "artifact store capture failed: {error}"),
69 }
70 }
71}
72
73impl std::error::Error for ArtifactStoreError {}
74
75impl From<std::io::Error> for ArtifactStoreError {
76 fn from(value: std::io::Error) -> Self {
77 Self::Io(value)
78 }
79}
80
81#[derive(Debug, Clone, PartialEq, Eq)]
83pub struct CaptureRequest {
84 pub source_url: IndexUrl,
86 pub html: String,
88 pub diagnostic: Option<String>,
90}
91
92impl CaptureRequest {
93 pub fn new(source_url: impl AsRef<str>, html: impl Into<String>) -> Result<Self, CaptureError> {
95 let source_url = IndexUrl::parse(source_url).map_err(CaptureError::InvalidSourceUrl)?;
96 Ok(Self {
97 source_url,
98 html: html.into(),
99 diagnostic: None,
100 })
101 }
102
103 #[must_use]
105 pub fn with_diagnostic(mut self, diagnostic: impl Into<String>) -> Self {
106 self.diagnostic = Some(diagnostic.into());
107 self
108 }
109}
110
111#[derive(Debug, Clone, PartialEq, Eq)]
113pub struct CaptureArtifact {
114 pub source_url: String,
116 pub redacted_html: String,
118 pub diagnostic: Option<String>,
120 pub reproduction_command: String,
122}
123
124#[derive(Debug, Clone, Copy, PartialEq, Eq)]
126pub enum ArtifactContext {
127 LiveGet,
129 LiveSubmit,
131 Offline,
133}
134
135impl ArtifactContext {
136 #[must_use]
138 pub const fn as_str(self) -> &'static str {
139 match self {
140 Self::LiveGet => "live-get",
141 Self::LiveSubmit => "live-submit",
142 Self::Offline => "offline",
143 }
144 }
145
146 pub fn parse(input: &str) -> Result<Self, String> {
148 match input.trim() {
149 "live-get" => Ok(Self::LiveGet),
150 "live-submit" => Ok(Self::LiveSubmit),
151 "offline" => Ok(Self::Offline),
152 other => Err(format!("unsupported artifact context: {other}")),
153 }
154 }
155}
156
157#[derive(Debug, Clone, Copy, PartialEq, Eq)]
159pub enum ArtifactFreshness {
160 Fresh,
162 Stale,
164}
165
166#[derive(Debug, Clone, PartialEq, Eq)]
168pub struct IndexArtifact {
169 pub version: u8,
171 pub canonical_url: String,
173 pub final_url: String,
175 pub context: ArtifactContext,
177 pub stored_at_unix_secs: u64,
179 pub max_age_secs: u64,
181 pub capture: CaptureArtifact,
183}
184
185impl IndexArtifact {
186 pub fn from_document(
188 document: &IndexDocument,
189 canonical_url: &IndexUrl,
190 final_url: &IndexUrl,
191 context: ArtifactContext,
192 stored_at_unix_secs: u64,
193 max_age_secs: u64,
194 ) -> Result<Self, CaptureError> {
195 let mut capture = capture_document(document)?;
196 let canonical = redact_sensitive_pairs(canonical_url.as_str());
197 let final_url = redact_sensitive_pairs(final_url.as_str());
198 capture.source_url = canonical.clone();
199 capture.reproduction_command =
200 format!("index capture --redact {canonical} - < local-page.html");
201 Ok(Self {
202 version: INDEX_ARTIFACT_VERSION,
203 canonical_url: canonical,
204 final_url,
205 context,
206 stored_at_unix_secs,
207 max_age_secs,
208 capture,
209 })
210 }
211
212 #[must_use]
214 pub fn freshness(&self, now_unix_secs: u64) -> ArtifactFreshness {
215 let expires_at = self.stored_at_unix_secs.saturating_add(self.max_age_secs);
216 if now_unix_secs <= expires_at {
217 ArtifactFreshness::Fresh
218 } else {
219 ArtifactFreshness::Stale
220 }
221 }
222
223 #[must_use]
225 pub fn is_fresh(&self, now_unix_secs: u64) -> bool {
226 self.freshness(now_unix_secs) == ArtifactFreshness::Fresh
227 }
228
229 #[must_use]
231 pub fn to_text(&self) -> String {
232 format!(
233 "{INDEX_ARTIFACT_HEADER}\nversion: {}\ncontext: {}\ncanonical_url: {}\nfinal_url: {}\nstored_at_unix_secs: {}\nmax_age_secs: {}\n{ARTIFACT_CAPTURE_BEGIN}\n{}\
234\n{ARTIFACT_CAPTURE_END}\n",
235 self.version,
236 self.context.as_str(),
237 self.canonical_url,
238 self.final_url,
239 self.stored_at_unix_secs,
240 self.max_age_secs,
241 self.capture.to_text().trim_end()
242 )
243 }
244
245 pub fn from_text(input: &str) -> Result<Self, ArtifactStoreError> {
247 let mut lines = input.lines();
248 if lines.next() != Some(INDEX_ARTIFACT_HEADER) {
249 return Err(ArtifactStoreError::Parse(
250 "missing artifact header".to_owned(),
251 ));
252 }
253 let version = parse_artifact_u8_line(lines.next(), "version: ")?;
254 if version != INDEX_ARTIFACT_VERSION {
255 return Err(ArtifactStoreError::Parse(format!(
256 "unsupported artifact version: {version}"
257 )));
258 }
259 let context = ArtifactContext::parse(
260 &parse_prefixed_line(lines.next(), "context: ")
261 .map_err(|error| ArtifactStoreError::Parse(error.to_string()))?,
262 )
263 .map_err(ArtifactStoreError::Parse)?;
264 let canonical_url = parse_prefixed_line(lines.next(), "canonical_url: ")
265 .map_err(|error| ArtifactStoreError::Parse(error.to_string()))?;
266 let final_url = parse_prefixed_line(lines.next(), "final_url: ")
267 .map_err(|error| ArtifactStoreError::Parse(error.to_string()))?;
268 let stored_at_unix_secs = parse_artifact_u64_line(lines.next(), "stored_at_unix_secs: ")?;
269 let max_age_secs = parse_artifact_u64_line(lines.next(), "max_age_secs: ")?;
270 if lines.next() != Some(ARTIFACT_CAPTURE_BEGIN) {
271 return Err(ArtifactStoreError::Parse(
272 "missing capture section".to_owned(),
273 ));
274 }
275 let mut capture_lines = Vec::new();
276 for line in &mut lines {
277 if line == ARTIFACT_CAPTURE_END {
278 let capture_text = capture_lines.join("\n");
279 let capture =
280 validate_capture_bundle(&capture_text).map_err(ArtifactStoreError::Capture)?;
281 IndexUrl::parse(canonical_url.replace(REDACTED, "redacted"))
282 .map_err(ArtifactStoreError::Url)?;
283 IndexUrl::parse(final_url.replace(REDACTED, "redacted"))
284 .map_err(ArtifactStoreError::Url)?;
285 return Ok(Self {
286 version,
287 canonical_url,
288 final_url,
289 context,
290 stored_at_unix_secs,
291 max_age_secs,
292 capture,
293 });
294 }
295 capture_lines.push(line.to_owned());
296 }
297 Err(ArtifactStoreError::Parse(
298 "unterminated capture section".to_owned(),
299 ))
300 }
301}
302
303#[derive(Debug, Clone, PartialEq, Eq)]
305pub struct ArtifactStore {
306 root: PathBuf,
307}
308
309impl ArtifactStore {
310 #[must_use]
312 pub fn new(root: impl Into<PathBuf>) -> Self {
313 Self { root: root.into() }
314 }
315
316 #[must_use]
318 pub fn root(&self) -> &Path {
319 &self.root
320 }
321
322 #[must_use]
324 pub fn path_for(&self, canonical_url: &IndexUrl, context: ArtifactContext) -> PathBuf {
325 self.root.join(format!(
326 "{}.{}.idx",
327 canonical_url.cache_key(),
328 context.as_str()
329 ))
330 }
331
332 pub fn store(&self, artifact: &IndexArtifact) -> Result<PathBuf, ArtifactStoreError> {
334 fs::create_dir_all(&self.root)?;
335 let canonical_url = IndexUrl::parse(artifact.canonical_url.replace(REDACTED, "redacted"))
336 .map_err(ArtifactStoreError::Url)?;
337 let path = self.path_for(&canonical_url, artifact.context);
338 fs::write(&path, artifact.to_text())?;
339 Ok(path)
340 }
341
342 pub fn load(
344 &self,
345 canonical_url: &IndexUrl,
346 context: ArtifactContext,
347 ) -> Result<Option<IndexArtifact>, ArtifactStoreError> {
348 let path = self.path_for(canonical_url, context);
349 if !path.exists() {
350 return Ok(None);
351 }
352 let contents = fs::read_to_string(path)?;
353 let artifact = IndexArtifact::from_text(&contents)?;
354 Ok(Some(artifact))
355 }
356}
357
358#[derive(Debug, Clone, PartialEq, Eq, Default)]
360pub struct RedactionSummary {
361 pub source_url_values: usize,
363 pub html_values: usize,
365 pub diagnostic_values: usize,
367}
368
369impl RedactionSummary {
370 #[must_use]
372 pub const fn total(&self) -> usize {
373 self.source_url_values + self.html_values + self.diagnostic_values
374 }
375
376 #[must_use]
378 pub fn to_text(&self) -> String {
379 format!(
380 "redaction-summary-v1\nsource_url_values: {}\nhtml_values: {}\ndiagnostic_values: {}\ntotal: {}",
381 self.source_url_values,
382 self.html_values,
383 self.diagnostic_values,
384 self.total()
385 )
386 }
387}
388
389#[derive(Debug, Clone, PartialEq, Eq)]
391pub struct CapturePreview {
392 pub artifact: CaptureArtifact,
394 pub summary: RedactionSummary,
396 pub checklist: String,
398}
399
400#[derive(Debug, Clone, PartialEq, Eq)]
402pub struct CaptureReviewBundle {
403 pub preview: CapturePreview,
405 pub repair_hints: String,
407 pub catalog_entry: String,
409}
410
411impl CapturePreview {
412 #[must_use]
414 pub fn to_text(&self) -> String {
415 format!(
416 "index-capture-preview-v1\n{}\n\n{}\n\n{}",
417 self.summary.to_text(),
418 self.checklist,
419 self.artifact.to_text()
420 )
421 }
422}
423
424impl CaptureReviewBundle {
425 #[must_use]
427 pub fn to_text(&self) -> String {
428 format!(
429 "index-capture-review-bundle-v1\n{PREVIEW_BEGIN}\n{}\n{PREVIEW_END}\n{REPAIR_BEGIN}\n{}\n{REPAIR_END}\ncatalog_entry: {}\n",
430 self.preview.to_text(),
431 self.repair_hints,
432 self.catalog_entry
433 )
434 }
435
436 pub fn from_text(input: &str) -> Result<Self, CaptureError> {
438 let mut lines = input.lines();
439 if lines.next() != Some("index-capture-review-bundle-v1") {
440 return Err(CaptureError::InvalidArtifact(
441 "missing review bundle header".to_owned(),
442 ));
443 }
444 if lines.next() != Some(PREVIEW_BEGIN) {
445 return Err(CaptureError::InvalidArtifact(
446 "missing capture preview section".to_owned(),
447 ));
448 }
449 let mut preview_lines = Vec::new();
450 for line in &mut lines {
451 if line == PREVIEW_END {
452 break;
453 }
454 preview_lines.push(line.to_owned());
455 }
456 if lines.next() != Some(REPAIR_BEGIN) {
457 return Err(CaptureError::InvalidArtifact(
458 "missing repair hint section".to_owned(),
459 ));
460 }
461 let mut repair_lines = Vec::new();
462 for line in &mut lines {
463 if line == REPAIR_END {
464 break;
465 }
466 repair_lines.push(line.to_owned());
467 }
468 let catalog_entry = parse_prefixed_line(lines.next(), "catalog_entry: ")?;
469 let preview = parse_preview_from_text(&preview_lines.join("\n"))?;
470 Ok(Self {
471 preview,
472 repair_hints: repair_lines.join("\n"),
473 catalog_entry,
474 })
475 }
476}
477
478impl CaptureArtifact {
479 #[must_use]
481 pub fn to_text(&self) -> String {
482 let diagnostic = self.diagnostic.as_deref().unwrap_or("none");
483 format!(
484 "{ARTIFACT_HEADER}\nsource_url: {}\nreproduce: {}\n{HTML_BEGIN}\n{}\n{HTML_END}\n{DIAGNOSTIC_BEGIN}\n{}\n{DIAGNOSTIC_END}\n",
485 self.source_url, self.reproduction_command, self.redacted_html, diagnostic
486 )
487 }
488
489 #[must_use]
491 pub fn submission_checklist(&self) -> String {
492 format!(
493 "fixture-submission-checklist-v1\nsource_url: {}\nreproduce: {}\n[ ] confirm the URL is public or rewritten to a public equivalent\n[ ] confirm no cookies, credentials, account identifiers, private messages, or private URLs remain\n[ ] reduce HTML to the smallest shape that reproduces the behavior\n[ ] classify intent and support tier\n[ ] add or update regression tests\n[ ] record the fixture in docs/COVERAGE_CATALOG.md",
494 self.source_url, self.reproduction_command
495 )
496 }
497
498 pub fn validate_bundle(&self) -> Result<(), CaptureError> {
500 let parseable_source_url = self.source_url.replace(REDACTED, "redacted");
501 IndexUrl::parse(&parseable_source_url).map_err(CaptureError::InvalidSourceUrl)?;
502 if self.redacted_html.trim().is_empty() {
503 return Err(CaptureError::InvalidArtifact(
504 "redacted HTML section is empty".to_owned(),
505 ));
506 }
507 if contains_unredacted_sensitive_pair(&self.source_url)
508 || contains_unredacted_sensitive_pair(&self.redacted_html)
509 || self
510 .diagnostic
511 .as_deref()
512 .is_some_and(contains_unredacted_sensitive_pair)
513 {
514 return Err(CaptureError::InvalidArtifact(
515 "artifact contains unredacted credential-shaped content".to_owned(),
516 ));
517 }
518 Ok(())
519 }
520
521 pub fn from_text(input: &str) -> Result<Self, CaptureError> {
523 let mut lines = input.lines();
524 if lines.next() != Some(ARTIFACT_HEADER) {
525 return Err(CaptureError::InvalidArtifact("missing header".to_owned()));
526 }
527
528 let source_url = parse_prefixed_line(lines.next(), "source_url: ")?;
529 let reproduction_command = parse_prefixed_line(lines.next(), "reproduce: ")?;
530 if lines.next() != Some(HTML_BEGIN) {
531 return Err(CaptureError::InvalidArtifact(
532 "missing redacted HTML section".to_owned(),
533 ));
534 }
535
536 let mut redacted_html = Vec::new();
537 for line in &mut lines {
538 if line == HTML_END {
539 break;
540 }
541 redacted_html.push(line.to_owned());
542 }
543
544 if lines.next() != Some(DIAGNOSTIC_BEGIN) {
545 return Err(CaptureError::InvalidArtifact(
546 "missing diagnostic section".to_owned(),
547 ));
548 }
549
550 let mut diagnostic = Vec::new();
551 for line in &mut lines {
552 if line == DIAGNOSTIC_END {
553 let diagnostic = diagnostic.join("\n");
554 let diagnostic = if diagnostic == "none" {
555 None
556 } else {
557 Some(diagnostic)
558 };
559 return Ok(Self {
560 source_url,
561 redacted_html: redacted_html.join("\n"),
562 diagnostic,
563 reproduction_command,
564 });
565 }
566 diagnostic.push(line.to_owned());
567 }
568
569 Err(CaptureError::InvalidArtifact(
570 "unterminated diagnostic section".to_owned(),
571 ))
572 }
573}
574
575pub fn capture_redacted(request: &CaptureRequest) -> CaptureArtifact {
577 let mut redactor = Redactor::new();
578 add_html_secret_values(&request.html, &mut redactor);
579 if let Some(diagnostic) = &request.diagnostic {
580 add_query_secret_values(diagnostic, &mut redactor);
581 }
582 add_query_secret_values(request.source_url.as_str(), &mut redactor);
583
584 let source_url = redact_sensitive_pairs(&redactor.redact(request.source_url.as_str()));
585 let redacted_html = redact_html(&request.html, &redactor);
586 let diagnostic = request
587 .diagnostic
588 .as_ref()
589 .map(|diagnostic| redact_sensitive_pairs(&redactor.redact(diagnostic)));
590
591 CaptureArtifact {
592 reproduction_command: format!("index capture --redact {source_url} - < local-page.html"),
593 source_url,
594 redacted_html,
595 diagnostic,
596 }
597}
598
599pub fn preview_redacted(request: &CaptureRequest) -> CapturePreview {
601 let artifact = capture_redacted(request);
602 let summary = summarize_redactions(request, &artifact);
603 let checklist = artifact.submission_checklist();
604 CapturePreview {
605 artifact,
606 summary,
607 checklist,
608 }
609}
610
611pub fn capture_review_bundle(
613 request: &CaptureRequest,
614 fixture_path: &str,
615 repair_hints: impl Into<String>,
616) -> Result<CaptureReviewBundle, CaptureError> {
617 let preview = preview_redacted(request);
618 let catalog_entry = catalog_entry_for_fixture(fixture_path, "unknown", 0)?;
619 Ok(CaptureReviewBundle {
620 preview,
621 repair_hints: repair_hints.into(),
622 catalog_entry,
623 })
624}
625
626pub fn catalog_entry_for_fixture(
628 fixture_path: &str,
629 intent: &str,
630 tier: u8,
631) -> Result<String, CaptureError> {
632 if !Path::new(fixture_path).exists() {
633 return Err(CaptureError::InvalidArtifact(format!(
634 "fixture path does not exist: {fixture_path}"
635 )));
636 }
637 Ok(format!(
638 "| `{fixture_path}` | {intent} | Tier {tier} | capture | review private data before submission |"
639 ))
640}
641
642pub fn capture_document(document: &IndexDocument) -> Result<CaptureArtifact, CaptureError> {
648 Ok(capture_redacted(&document_capture_request(document)?))
649}
650
651pub fn preview_document(document: &IndexDocument) -> Result<CapturePreview, CaptureError> {
653 Ok(preview_redacted(&document_capture_request(document)?))
654}
655
656fn document_capture_request(document: &IndexDocument) -> Result<CaptureRequest, CaptureError> {
657 let source_url = document
658 .metadata
659 .canonical_url
660 .as_deref()
661 .unwrap_or("https://index.local/current");
662 let source_url = if IndexUrl::parse(source_url).is_ok() {
663 source_url
664 } else {
665 "https://index.local/current"
666 };
667 CaptureRequest::new(source_url, document_to_html(document)).map(|request| {
668 request.with_diagnostic(format!(
669 "captured from current Index document: title={}",
670 document.title
671 ))
672 })
673}
674
675pub fn validate_capture_bundle(input: &str) -> Result<CaptureArtifact, CaptureError> {
677 let artifact = CaptureArtifact::from_text(input)?;
678 artifact.validate_bundle()?;
679 Ok(artifact)
680}
681
682fn parse_preview_from_text(input: &str) -> Result<CapturePreview, CaptureError> {
683 if !input.starts_with("index-capture-preview-v1\n") {
684 return Err(CaptureError::InvalidArtifact(
685 "missing capture preview header".to_owned(),
686 ));
687 }
688 let artifact_start = input
689 .find(ARTIFACT_HEADER)
690 .ok_or_else(|| CaptureError::InvalidArtifact("missing embedded artifact".to_owned()))?;
691 let artifact = validate_capture_bundle(&input[artifact_start..])?;
692 let checklist = artifact.submission_checklist();
693 Ok(CapturePreview {
694 artifact,
695 summary: RedactionSummary::default(),
696 checklist,
697 })
698}
699
700fn document_to_html(document: &IndexDocument) -> String {
701 let mut html = String::from("<!doctype html><html><head><meta charset=\"utf-8\"><title>");
702 html.push_str(&escape_html(&document.title));
703 html.push_str("</title></head><body><main>");
704 for node in &document.nodes {
705 push_node_html(node, &mut html);
706 }
707 html.push_str("</main></body></html>");
708 html
709}
710
711fn push_node_html(node: &IndexNode, output: &mut String) {
712 match node {
713 IndexNode::Heading { level, text } => {
714 let level = (*level).clamp(1, 6);
715 output.push_str(&format!("<h{level}>"));
716 output.push_str(&escape_html(text));
717 output.push_str(&format!("</h{level}>"));
718 }
719 IndexNode::Paragraph(text) => {
720 output.push_str("<p>");
721 output.push_str(&escape_html(text));
722 output.push_str("</p>");
723 }
724 IndexNode::Link(link) => {
725 output.push_str("<p><a href=\"");
726 output.push_str(&escape_html(&link.href));
727 output.push_str("\">");
728 output.push_str(&escape_html(&link.text));
729 output.push_str("</a></p>");
730 }
731 IndexNode::List { ordered, items } => {
732 let tag = if *ordered { "ol" } else { "ul" };
733 output.push_str(&format!("<{tag}>"));
734 for item in items {
735 output.push_str("<li>");
736 output.push_str(&escape_html(item));
737 output.push_str("</li>");
738 }
739 output.push_str(&format!("</{tag}>"));
740 }
741 IndexNode::CodeBlock { language, code } => {
742 output.push_str("<pre><code");
743 if let Some(language) = language {
744 output.push_str(" class=\"language-");
745 output.push_str(&escape_html(language));
746 output.push('"');
747 }
748 output.push('>');
749 output.push_str(&escape_html(code));
750 output.push_str("</code></pre>");
751 }
752 IndexNode::Table { rows } => {
753 output.push_str("<table>");
754 for row in rows {
755 output.push_str("<tr>");
756 for cell in row {
757 output.push_str("<td>");
758 output.push_str(&escape_html(cell));
759 output.push_str("</td>");
760 }
761 output.push_str("</tr>");
762 }
763 output.push_str("</table>");
764 }
765 IndexNode::Spacer { .. } => {}
766 IndexNode::Section { title, nodes, .. } => {
767 output.push_str("<section>");
768 if let Some(title) = title {
769 output.push_str("<h2>");
770 output.push_str(&escape_html(title));
771 output.push_str("</h2>");
772 }
773 for node in nodes {
774 push_node_html(node, output);
775 }
776 output.push_str("</section>");
777 }
778 IndexNode::Image { alt, src } => {
779 output.push_str("<img alt=\"");
780 output.push_str(&escape_html(alt));
781 output.push('"');
782 if let Some(src) = src {
783 output.push_str(" src=\"");
784 output.push_str(&escape_html(src));
785 output.push('"');
786 }
787 output.push('>');
788 }
789 IndexNode::Form(form) => {
790 output.push_str("<form action=\"");
791 output.push_str(&escape_html(&form.action));
792 output.push_str("\" method=\"");
793 output.push_str(form.method.as_str());
794 output.push_str("\"><p>");
795 output.push_str(&escape_html(&form.name));
796 output.push_str("</p></form>");
797 }
798 IndexNode::Error(error) => {
799 output.push_str("<p data-index-error=\"true\">");
800 output.push_str(&escape_html(error));
801 output.push_str("</p>");
802 }
803 }
804}
805
806fn escape_html(input: &str) -> String {
807 input
808 .replace('&', "&")
809 .replace('<', "<")
810 .replace('>', ">")
811 .replace('"', """)
812}
813
814fn summarize_redactions(request: &CaptureRequest, artifact: &CaptureArtifact) -> RedactionSummary {
815 RedactionSummary {
816 source_url_values: redaction_delta(request.source_url.as_str(), &artifact.source_url),
817 html_values: redaction_delta(&request.html, &artifact.redacted_html),
818 diagnostic_values: request
819 .diagnostic
820 .as_deref()
821 .zip(artifact.diagnostic.as_deref())
822 .map_or(0, |(before, after)| redaction_delta(before, after)),
823 }
824}
825
826fn redaction_delta(before: &str, after: &str) -> usize {
827 let before_count = before.matches(REDACTED).count();
828 let after_count = after.matches(REDACTED).count();
829 after_count.saturating_sub(before_count)
830}
831
832fn parse_prefixed_line(line: Option<&str>, prefix: &str) -> Result<String, CaptureError> {
833 let Some(line) = line else {
834 return Err(CaptureError::InvalidArtifact(format!(
835 "missing {prefix} line"
836 )));
837 };
838 let Some(value) = line.strip_prefix(prefix) else {
839 return Err(CaptureError::InvalidArtifact(format!(
840 "invalid {prefix} line"
841 )));
842 };
843 Ok(value.to_owned())
844}
845
846fn parse_artifact_u64_line(line: Option<&str>, prefix: &str) -> Result<u64, ArtifactStoreError> {
847 let line = line.ok_or_else(|| ArtifactStoreError::Parse(format!("missing {prefix} line")))?;
848 let value = line
849 .strip_prefix(prefix)
850 .ok_or_else(|| ArtifactStoreError::Parse(format!("invalid {prefix} line")))?;
851 value.parse::<u64>().map_err(|error| {
852 ArtifactStoreError::Parse(format!("failed to parse {prefix} value as u64: {error}"))
853 })
854}
855
856fn parse_artifact_u8_line(line: Option<&str>, prefix: &str) -> Result<u8, ArtifactStoreError> {
857 let line = line.ok_or_else(|| ArtifactStoreError::Parse(format!("missing {prefix} line")))?;
858 let value = line
859 .strip_prefix(prefix)
860 .ok_or_else(|| ArtifactStoreError::Parse(format!("invalid {prefix} line")))?;
861 value.parse::<u8>().map_err(|error| {
862 ArtifactStoreError::Parse(format!("failed to parse {prefix} value as u8: {error}"))
863 })
864}
865
866fn redact_html(input: &str, redactor: &Redactor) -> String {
867 let mut output = redact_sensitive_pairs(&redactor.redact(input));
868 output = redact_sensitive_attributes(&output);
869 output
870}
871
872fn add_html_secret_values(input: &str, redactor: &mut Redactor) {
873 add_query_secret_values(input, redactor);
874 let bytes = input.as_bytes();
875 let mut index = 0;
876 while index < bytes.len() {
877 let Some(name_start) = find_ascii_case_insensitive(&input[index..], "name=") else {
878 break;
879 };
880 let absolute_name_start = index + name_start + "name=".len();
881 let Some((name, after_name)) = read_quoted_value(input, absolute_name_start) else {
882 index = absolute_name_start;
883 continue;
884 };
885 if !is_sensitive_key(&name) {
886 index = after_name;
887 continue;
888 }
889
890 if let Some(value_start) = find_ascii_case_insensitive(&input[after_name..], "value=") {
891 let absolute_value_start = after_name + value_start + "value=".len();
892 if let Some((value, after_value)) = read_quoted_value(input, absolute_value_start) {
893 redactor.add_secret(value);
894 index = after_value;
895 continue;
896 }
897 }
898 index = after_name;
899 }
900}
901
902fn add_query_secret_values(input: &str, redactor: &mut Redactor) {
903 for marker in ["=", "%3D", "%3d"] {
904 let mut search_start = 0;
905 while let Some(relative_position) = input[search_start..].find(marker) {
906 let marker_position = search_start + relative_position;
907 let key_start = input[..marker_position]
908 .rfind(|ch: char| !is_key_char(ch))
909 .map_or(0, |position| position + 1);
910 let key = &input[key_start..marker_position];
911 if !is_sensitive_key(key) {
912 search_start = marker_position + marker.len();
913 continue;
914 }
915
916 let value_start = marker_position + marker.len();
917 let value_end = input[value_start..]
918 .find(is_value_delimiter)
919 .map_or(input.len(), |position| value_start + position);
920 if value_end > value_start {
921 redactor.add_secret(&input[value_start..value_end]);
922 }
923 search_start = value_end;
924 }
925 }
926}
927
928fn redact_sensitive_pairs(input: &str) -> String {
929 let mut output = String::with_capacity(input.len());
930 let mut index = 0;
931 while index < input.len() {
932 let Some(eq_relative) = input[index..].find('=') else {
933 output.push_str(&input[index..]);
934 break;
935 };
936
937 let eq_position = index + eq_relative;
938 let key_start = input[..eq_position]
939 .rfind(|ch: char| !is_key_char(ch))
940 .map_or(0, |position| position + 1);
941 let key = &input[key_start..eq_position];
942 if !is_sensitive_key(key) {
943 output.push_str(&input[index..=eq_position]);
944 index = eq_position + 1;
945 continue;
946 }
947
948 output.push_str(&input[index..eq_position + 1]);
949 let value_start = eq_position + 1;
950 let value_end = input[value_start..]
951 .find(is_value_delimiter)
952 .map_or(input.len(), |position| value_start + position);
953 output.push_str(REDACTED);
954 index = value_end;
955 }
956 output
957}
958
959fn redact_sensitive_attributes(input: &str) -> String {
960 let mut output = String::with_capacity(input.len());
961 let mut index = 0;
962 while index < input.len() {
963 let Some(relative_value) = find_ascii_case_insensitive(&input[index..], "value=") else {
964 output.push_str(&input[index..]);
965 break;
966 };
967 let absolute_value = index + relative_value;
968 output.push_str(&input[index..absolute_value]);
969 output.push_str("value=");
970
971 let value_start = absolute_value + "value=".len();
972 let Some((value, after_value, quote)) = read_quoted_value_with_quote(input, value_start)
973 else {
974 index = value_start;
975 continue;
976 };
977
978 let nearby_start = input[..absolute_value].rfind('<').map_or(index, |pos| pos);
979 let nearby = &input[nearby_start..absolute_value];
980 if find_ascii_case_insensitive(nearby, "password").is_some()
981 || find_ascii_case_insensitive(nearby, "token").is_some()
982 || find_ascii_case_insensitive(nearby, "secret").is_some()
983 || find_ascii_case_insensitive(nearby, "cookie").is_some()
984 || find_ascii_case_insensitive(nearby, "session").is_some()
985 {
986 output.push(quote);
987 output.push_str(REDACTED);
988 output.push(quote);
989 } else {
990 output.push(quote);
991 output.push_str(&value);
992 output.push(quote);
993 }
994 index = after_value;
995 }
996 output
997}
998
999fn read_quoted_value(input: &str, start: usize) -> Option<(String, usize)> {
1000 read_quoted_value_with_quote(input, start).map(|(value, after, _quote)| (value, after))
1001}
1002
1003fn read_quoted_value_with_quote(input: &str, start: usize) -> Option<(String, usize, char)> {
1004 let quote = input[start..].chars().next()?;
1005 if quote != '"' && quote != '\'' {
1006 return None;
1007 }
1008 let value_start = start + quote.len_utf8();
1009 let value_end = input[value_start..].find(quote)? + value_start;
1010 let after = value_end + quote.len_utf8();
1011 Some((input[value_start..value_end].to_owned(), after, quote))
1012}
1013
1014fn find_ascii_case_insensitive(haystack: &str, needle: &str) -> Option<usize> {
1015 let haystack = haystack.as_bytes();
1016 let needle = needle.as_bytes();
1017 if needle.is_empty() || needle.len() > haystack.len() {
1018 return None;
1019 }
1020
1021 haystack
1022 .windows(needle.len())
1023 .position(|window| window.eq_ignore_ascii_case(needle))
1024}
1025
1026fn is_sensitive_key(key: &str) -> bool {
1027 matches!(
1028 key.trim_matches(|ch: char| !ch.is_ascii_alphanumeric() && ch != '_' && ch != '-')
1029 .to_ascii_lowercase()
1030 .as_str(),
1031 "authorization"
1032 | "auth"
1033 | "api_key"
1034 | "api-key"
1035 | "cookie"
1036 | "csrf"
1037 | "csrf_token"
1038 | "key"
1039 | "password"
1040 | "passwd"
1041 | "secret"
1042 | "session"
1043 | "sessionid"
1044 | "sid"
1045 | "token"
1046 | "access_token"
1047 | "refresh_token"
1048 )
1049}
1050
1051fn is_key_char(ch: char) -> bool {
1052 ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'
1053}
1054
1055fn is_value_delimiter(ch: char) -> bool {
1056 matches!(
1057 ch,
1058 '&' | '"' | '\'' | '<' | '>' | ' ' | '\t' | '\r' | '\n' | ';'
1059 )
1060}
1061
1062fn contains_unredacted_sensitive_pair(input: &str) -> bool {
1063 let mut index = 0;
1064 while index < input.len() {
1065 let Some(eq_relative) = input[index..].find('=') else {
1066 return false;
1067 };
1068 let eq_position = index + eq_relative;
1069 let key_start = input[..eq_position]
1070 .rfind(|ch: char| !is_key_char(ch))
1071 .map_or(0, |position| position + 1);
1072 let key = &input[key_start..eq_position];
1073 let value_start = eq_position + 1;
1074 let value_end = input[value_start..]
1075 .find(is_value_delimiter)
1076 .map_or(input.len(), |position| value_start + position);
1077 let value = &input[value_start..value_end];
1078 if is_sensitive_key(key) && !value.is_empty() && value != REDACTED {
1079 return true;
1080 }
1081 index = value_end.saturating_add(1);
1082 }
1083 false
1084}
1085
1086#[cfg(test)]
1087mod tests {
1088 use std::fs;
1089 use std::path::PathBuf;
1090 use std::time::{SystemTime, UNIX_EPOCH};
1091
1092 use index_core::{Form, IndexDocument, IndexNode, IndexUrl, Input, Link, SectionRole};
1093
1094 use super::{
1095 ArtifactContext, ArtifactFreshness, ArtifactStore, CaptureArtifact, CaptureError,
1096 CaptureRequest, CaptureReviewBundle, IndexArtifact, capture_document, capture_redacted,
1097 capture_review_bundle, catalog_entry_for_fixture, preview_document, preview_redacted,
1098 validate_capture_bundle,
1099 };
1100
1101 fn temp_artifact_dir(label: &str) -> PathBuf {
1102 let nanos = SystemTime::now()
1103 .duration_since(UNIX_EPOCH)
1104 .map_or(0, |duration| duration.as_nanos());
1105 std::env::temp_dir().join(format!("index-artifacts-{label}-{nanos}"))
1106 }
1107
1108 #[test]
1109 fn capture_redacts_credentials_cookies_and_private_fields()
1110 -> Result<(), Box<dyn std::error::Error>> {
1111 let request = CaptureRequest::new(
1112 "https://example.org/private?token=url-secret&topic=docs",
1113 r#"<html>
1114 <a href="/search?password=link-secret&q=docs">Search</a>
1115 <form action="/login?session=form-secret">
1116 <input name="password" value="field-secret">
1117 <input name="q" value="public">
1118 </form>
1119 <p>Cookie: sid=cookie-secret Authorization: Bearer bearer-secret</p>
1120 </html>"#,
1121 )?;
1122
1123 let artifact = capture_redacted(&request);
1124 let text = artifact.to_text();
1125
1126 for secret in [
1127 "url-secret",
1128 "link-secret",
1129 "form-secret",
1130 "field-secret",
1131 "cookie-secret",
1132 "bearer-secret",
1133 ] {
1134 assert!(!text.contains(secret), "leaked {secret}");
1135 }
1136 assert!(text.contains("[REDACTED]"));
1137 assert!(text.contains("topic=docs"));
1138 assert!(text.contains("value=\"public\""));
1139 Ok(())
1140 }
1141
1142 #[test]
1143 fn capture_artifact_roundtrips_deterministically() -> Result<(), Box<dyn std::error::Error>> {
1144 let request = CaptureRequest::new("https://example.org/docs", "<main>Docs</main>")?
1145 .with_diagnostic("token=diagnostic-secret path=/tmp/index");
1146 let artifact = capture_redacted(&request);
1147 let text = artifact.to_text();
1148 let parsed = CaptureArtifact::from_text(&text)?;
1149
1150 assert_eq!(parsed, artifact);
1151 assert!(!text.contains("diagnostic-secret"));
1152 assert!(text.contains("path=/tmp/index"));
1153 Ok(())
1154 }
1155
1156 #[test]
1157 fn capture_preview_reports_summary_and_checklist() -> Result<(), Box<dyn std::error::Error>> {
1158 let request = CaptureRequest::new(
1159 "https://example.org/page?token=url-secret",
1160 r#"<input name="password" value="field-secret"><p>public</p>"#,
1161 )?
1162 .with_diagnostic("session=diagnostic-secret");
1163 let preview = preview_redacted(&request);
1164 let text = preview.to_text();
1165
1166 assert!(text.contains("index-capture-preview-v1"));
1167 assert!(text.contains("redaction-summary-v1"));
1168 assert!(text.contains("fixture-submission-checklist-v1"));
1169 assert!(preview.summary.total() >= 3);
1170 assert!(!text.contains("url-secret"));
1171 assert!(!text.contains("field-secret"));
1172 assert!(!text.contains("diagnostic-secret"));
1173 Ok(())
1174 }
1175
1176 #[test]
1177 fn capture_review_bundle_roundtrips_with_repair_hints() -> Result<(), Box<dyn std::error::Error>>
1178 {
1179 let request = CaptureRequest::new(
1180 "https://example.org/private?token=url-secret",
1181 "<main><input name=\"password\" value=\"field-secret\"></main>",
1182 )?;
1183 let fixture_path = "../../examples/sample.html";
1184 let bundle = capture_review_bundle(&request, fixture_path, "index-repair-v1\nmain next")?;
1185 let text = bundle.to_text();
1186
1187 assert!(text.contains("index-capture-review-bundle-v1"));
1188 assert!(text.contains("index-repair-v1"));
1189 assert!(!text.contains("url-secret"));
1190 assert!(!text.contains("field-secret"));
1191
1192 let parsed = CaptureReviewBundle::from_text(&text)?;
1193 assert_eq!(parsed.repair_hints, "index-repair-v1\nmain next");
1194 assert!(parsed.catalog_entry.contains("examples/sample.html"));
1195 Ok(())
1196 }
1197
1198 #[test]
1199 fn catalog_entry_helper_rejects_missing_fixture_paths() {
1200 let result = catalog_entry_for_fixture("missing/not-here.html", "article", 1);
1201 assert!(
1202 matches!(result, Err(CaptureError::InvalidArtifact(reason)) if reason.contains("does not exist"))
1203 );
1204 }
1205
1206 #[test]
1207 fn capture_document_creates_valid_local_artifact() -> Result<(), Box<dyn std::error::Error>> {
1208 let mut document = IndexDocument::titled("Captured");
1209 document.metadata.canonical_url = Some("https://example.org/page?token=secret".to_owned());
1210 document.push(IndexNode::Heading {
1211 level: 2,
1212 text: "Main".to_owned(),
1213 });
1214 document.push(IndexNode::Paragraph("public text".to_owned()));
1215 document.push(IndexNode::Link(Link::new(
1216 "Docs",
1217 "https://example.org/docs",
1218 )));
1219
1220 let artifact = capture_document(&document)?;
1221 artifact.validate_bundle()?;
1222 assert!(artifact.redacted_html.contains("<main>"));
1223 assert!(artifact.redacted_html.contains("public text"));
1224 assert!(!artifact.to_text().contains("secret"));
1225
1226 let preview = preview_document(&document)?;
1227 assert!(preview.to_text().contains("index-capture-preview-v1"));
1228 Ok(())
1229 }
1230
1231 #[test]
1232 fn capture_document_projects_structured_nodes() -> Result<(), Box<dyn std::error::Error>> {
1233 let mut document = IndexDocument::titled("Structured <Capture>");
1234 document.push(IndexNode::List {
1235 ordered: true,
1236 items: vec!["one".to_owned(), "two".to_owned()],
1237 });
1238 document.push(IndexNode::List {
1239 ordered: false,
1240 items: vec!["plain".to_owned()],
1241 });
1242 document.push(IndexNode::CodeBlock {
1243 language: Some("rust".to_owned()),
1244 code: "fn main() { println!(\"hi\"); }".to_owned(),
1245 });
1246 document.push(IndexNode::CodeBlock {
1247 language: None,
1248 code: "<raw>".to_owned(),
1249 });
1250 document.push(IndexNode::Table {
1251 rows: vec![
1252 vec!["Name".to_owned(), "Value".to_owned()],
1253 vec!["A".to_owned(), "1".to_owned()],
1254 ],
1255 });
1256 document.push(IndexNode::Spacer { lines: 2 });
1257 document.push(IndexNode::Section {
1258 role: SectionRole::Main,
1259 title: Some("Body".to_owned()),
1260 collapsed: false,
1261 nodes: vec![IndexNode::Paragraph("inside".to_owned())],
1262 });
1263 document.push(IndexNode::Image {
1264 alt: "diagram".to_owned(),
1265 src: Some("https://example.org/image.png".to_owned()),
1266 });
1267 document.push(IndexNode::Image {
1268 alt: "missing".to_owned(),
1269 src: None,
1270 });
1271 document.push(IndexNode::Form(Form {
1272 name: "Search".to_owned(),
1273 method: "GET".to_owned(),
1274 action: "https://example.org/search".to_owned(),
1275 inputs: vec![Input {
1276 name: "q".to_owned(),
1277 kind: "text".to_owned(),
1278 value: None,
1279 required: false,
1280 }],
1281 buttons: Vec::new(),
1282 }));
1283 document.push(IndexNode::Error("could not parse sidebar".to_owned()));
1284
1285 let artifact = capture_document(&document)?;
1286 let html = artifact.redacted_html;
1287 assert!(html.contains("<Capture>"));
1288 assert!(html.contains("<ol><li>one</li><li>two</li></ol>"));
1289 assert!(html.contains("<ul><li>plain</li></ul>"));
1290 assert!(html.contains("class=\"language-rust\""));
1291 assert!(html.contains("<raw>"));
1292 assert!(html.contains("<table><tr><td>Name</td><td>Value</td></tr>"));
1293 assert!(html.contains("<section><h2>Body</h2><p>inside</p></section>"));
1294 assert!(html.contains("<img alt=\"diagram\" src=\"https://example.org/image.png\">"));
1295 assert!(html.contains("<img alt=\"missing\">"));
1296 assert!(html.contains("<form action=\"https://example.org/search\" method=\"GET\">"));
1297 assert!(html.contains("data-index-error=\"true\""));
1298 Ok(())
1299 }
1300
1301 #[test]
1302 fn capture_document_falls_back_from_invalid_canonical_url()
1303 -> Result<(), Box<dyn std::error::Error>> {
1304 let mut document = IndexDocument::titled("Fallback");
1305 document.metadata.canonical_url = Some("javascript:alert(1)".to_owned());
1306 document.push(IndexNode::Paragraph("content".to_owned()));
1307
1308 let artifact = capture_document(&document)?;
1309
1310 assert_eq!(artifact.source_url, "https://index.local/current");
1311 assert!(artifact.redacted_html.contains("content"));
1312 Ok(())
1313 }
1314
1315 #[test]
1316 fn capture_bundle_validation_accepts_redacted_artifact()
1317 -> Result<(), Box<dyn std::error::Error>> {
1318 let artifact = capture_redacted(&CaptureRequest::new(
1319 "https://example.org/page?token=secret",
1320 "<main>Public</main>",
1321 )?);
1322 let parsed = validate_capture_bundle(&artifact.to_text())?;
1323
1324 assert_eq!(
1325 parsed.source_url,
1326 "https://example.org/page?token=[REDACTED]"
1327 );
1328 Ok(())
1329 }
1330
1331 #[test]
1332 fn capture_bundle_validation_rejects_unredacted_sensitive_pairs() {
1333 let input = "index-capture-v1\nsource_url: https://example.org/page?token=secret\nreproduce: index capture --redact https://example.org/page - < local-page.html\n---BEGIN REDACTED HTML---\n<main>Public</main>\n---END REDACTED HTML---\n---BEGIN DIAGNOSTIC---\nnone\n---END DIAGNOSTIC---\n";
1334 let result = validate_capture_bundle(input);
1335
1336 assert!(
1337 matches!(result, Err(CaptureError::InvalidArtifact(reason)) if reason.contains("unredacted"))
1338 );
1339 }
1340
1341 #[test]
1342 fn capture_rejects_unsafe_source_url() {
1343 let request = CaptureRequest::new("javascript:alert(1)", "<main>Bad</main>");
1344 assert!(matches!(request, Err(CaptureError::InvalidSourceUrl(_))));
1345 }
1346
1347 #[test]
1348 fn artifact_parser_rejects_missing_header() {
1349 let artifact = CaptureArtifact::from_text("source_url: https://example.org");
1350 assert!(matches!(
1351 artifact,
1352 Err(CaptureError::InvalidArtifact(reason)) if reason.contains("header")
1353 ));
1354 }
1355
1356 #[test]
1357 fn index_artifact_roundtrips_deterministically() -> Result<(), Box<dyn std::error::Error>> {
1358 let canonical = IndexUrl::parse("https://example.org/docs?token=secret")?;
1359 let final_url = IndexUrl::parse("https://example.org/docs")?;
1360 let mut document = IndexDocument::titled("Artifact");
1361 document.push(IndexNode::Heading {
1362 level: 1,
1363 text: "Title".to_owned(),
1364 });
1365 document.push(IndexNode::Paragraph("Body".to_owned()));
1366
1367 let artifact = IndexArtifact::from_document(
1368 &document,
1369 &canonical,
1370 &final_url,
1371 ArtifactContext::LiveGet,
1372 1234,
1373 300,
1374 )?;
1375 let text = artifact.to_text();
1376 let parsed = IndexArtifact::from_text(&text)?;
1377
1378 assert_eq!(parsed, artifact);
1379 assert!(text.contains("index-artifact-v1"));
1380 assert!(text.contains("context: live-get"));
1381 assert!(text.contains("token=[REDACTED]"));
1382 Ok(())
1383 }
1384
1385 #[test]
1386 fn index_artifact_freshness_transitions_are_deterministic()
1387 -> Result<(), Box<dyn std::error::Error>> {
1388 let canonical = IndexUrl::parse("https://example.org/docs")?;
1389 let final_url = IndexUrl::parse("https://example.org/docs")?;
1390 let mut document = IndexDocument::titled("Freshness");
1391 document.push(IndexNode::Paragraph("Body".to_owned()));
1392
1393 let artifact = IndexArtifact::from_document(
1394 &document,
1395 &canonical,
1396 &final_url,
1397 ArtifactContext::LiveGet,
1398 100,
1399 60,
1400 )?;
1401
1402 assert_eq!(artifact.freshness(120), ArtifactFreshness::Fresh);
1403 assert_eq!(artifact.freshness(161), ArtifactFreshness::Stale);
1404 assert!(artifact.is_fresh(120));
1405 assert!(!artifact.is_fresh(161));
1406 Ok(())
1407 }
1408
1409 #[test]
1410 fn artifact_store_keys_by_url_and_context() -> Result<(), Box<dyn std::error::Error>> {
1411 let root = temp_artifact_dir("store");
1412 let store = ArtifactStore::new(&root);
1413 let canonical = IndexUrl::parse("https://example.org/forum/thread/1")?;
1414 let final_url = IndexUrl::parse("https://example.org/forum/thread/1")?;
1415 let mut document = IndexDocument::titled("Thread");
1416 document.push(IndexNode::Paragraph("Payload".to_owned()));
1417
1418 let get_artifact = IndexArtifact::from_document(
1419 &document,
1420 &canonical,
1421 &final_url,
1422 ArtifactContext::LiveGet,
1423 10,
1424 600,
1425 )?;
1426 let submit_artifact = IndexArtifact::from_document(
1427 &document,
1428 &canonical,
1429 &final_url,
1430 ArtifactContext::LiveSubmit,
1431 10,
1432 600,
1433 )?;
1434
1435 let get_path = store.store(&get_artifact)?;
1436 let submit_path = store.store(&submit_artifact)?;
1437 assert_ne!(get_path, submit_path);
1438 assert!(get_path.exists());
1439 assert!(submit_path.exists());
1440
1441 let loaded_get = store
1442 .load(&canonical, ArtifactContext::LiveGet)?
1443 .ok_or("missing live-get artifact")?;
1444 let loaded_submit = store
1445 .load(&canonical, ArtifactContext::LiveSubmit)?
1446 .ok_or("missing live-submit artifact")?;
1447 assert_eq!(loaded_get.context, ArtifactContext::LiveGet);
1448 assert_eq!(loaded_submit.context, ArtifactContext::LiveSubmit);
1449
1450 if root.exists() {
1451 let _ = fs::remove_dir_all(root);
1452 }
1453 Ok(())
1454 }
1455
1456 #[test]
1457 fn artifact_schema_rejects_unsupported_versions() -> Result<(), Box<dyn std::error::Error>> {
1458 let canonical = IndexUrl::parse("https://example.org/docs")?;
1459 let final_url = IndexUrl::parse("https://example.org/docs")?;
1460 let mut document = IndexDocument::titled("Schema");
1461 document.push(IndexNode::Paragraph("Body".to_owned()));
1462 let artifact = IndexArtifact::from_document(
1463 &document,
1464 &canonical,
1465 &final_url,
1466 ArtifactContext::LiveGet,
1467 1,
1468 60,
1469 )?;
1470 let invalid = artifact.to_text().replace("version: 1", "version: 9");
1471 let parsed = IndexArtifact::from_text(&invalid);
1472 assert!(parsed.is_err());
1473 Ok(())
1474 }
1475}