1use anyhow::{Context, Result};
2use serde::Deserialize;
3use std::fmt;
4use std::path::{Path, PathBuf};
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum DocType {
9 Ailog,
10 Aidec,
11 Adr,
12 Eth,
13 Req,
14 Tes,
15 Inc,
16 Tde,
17 Sec,
18 Mcard,
19 Sbom,
20 Dpia,
21 Pipia,
23 Cacfile,
24 Tc260ra,
25 Ailabel,
26}
27
28impl DocType {
29 pub fn prefix(&self) -> &'static str {
31 match self {
32 DocType::Ailog => "AILOG",
33 DocType::Aidec => "AIDEC",
34 DocType::Adr => "ADR",
35 DocType::Eth => "ETH",
36 DocType::Req => "REQ",
37 DocType::Tes => "TES",
38 DocType::Inc => "INC",
39 DocType::Tde => "TDE",
40 DocType::Sec => "SEC",
41 DocType::Mcard => "MCARD",
42 DocType::Sbom => "SBOM",
43 DocType::Dpia => "DPIA",
44 DocType::Pipia => "PIPIA",
45 DocType::Cacfile => "CACFILE",
46 DocType::Tc260ra => "TC260RA",
47 DocType::Ailabel => "AILABEL",
48 }
49 }
50
51 pub fn from_prefix(prefix: &str) -> Option<DocType> {
53 match prefix {
54 "AILOG" => Some(DocType::Ailog),
55 "AIDEC" => Some(DocType::Aidec),
56 "ADR" => Some(DocType::Adr),
57 "ETH" => Some(DocType::Eth),
58 "REQ" => Some(DocType::Req),
59 "TES" => Some(DocType::Tes),
60 "INC" => Some(DocType::Inc),
61 "TDE" => Some(DocType::Tde),
62 "SEC" => Some(DocType::Sec),
63 "MCARD" => Some(DocType::Mcard),
64 "SBOM" => Some(DocType::Sbom),
65 "DPIA" => Some(DocType::Dpia),
66 "PIPIA" => Some(DocType::Pipia),
67 "CACFILE" => Some(DocType::Cacfile),
68 "TC260RA" => Some(DocType::Tc260ra),
69 "AILABEL" => Some(DocType::Ailabel),
70 _ => None,
71 }
72 }
73
74 pub const ALL_PREFIXES: &'static [&'static str] = &[
82 "AILOG", "AIDEC", "ADR", "ETH", "REQ", "TES", "INC", "TDE",
83 "SEC", "MCARD", "SBOM", "DPIA",
84 "PIPIA", "CACFILE", "TC260RA", "AILABEL",
85 ];
86
87 pub const ALL: &'static [DocType] = &[
89 DocType::Ailog, DocType::Aidec, DocType::Adr, DocType::Eth,
90 DocType::Req, DocType::Tes, DocType::Inc, DocType::Tde,
91 DocType::Sec, DocType::Mcard, DocType::Sbom, DocType::Dpia,
92 DocType::Pipia, DocType::Cacfile, DocType::Tc260ra, DocType::Ailabel,
93 ];
94
95 pub const CHINA_ONLY: &'static [DocType] = &[
99 DocType::Pipia, DocType::Cacfile, DocType::Tc260ra, DocType::Ailabel,
100 ];
101
102 pub fn is_china_only(&self) -> bool {
104 Self::CHINA_ONLY.contains(self)
105 }
106
107 pub fn display_name(&self) -> &'static str {
109 match self {
110 DocType::Ailog => "AI Action Log",
111 DocType::Aidec => "AI Decision",
112 DocType::Adr => "Architecture Decision Record",
113 DocType::Eth => "Ethical Review",
114 DocType::Req => "Requirement",
115 DocType::Tes => "Test Plan",
116 DocType::Inc => "Incident Post-mortem",
117 DocType::Tde => "Technical Debt",
118 DocType::Sec => "Security Assessment",
119 DocType::Mcard => "Model/System Card",
120 DocType::Sbom => "Software Bill of Materials",
121 DocType::Dpia => "Data Protection Impact Assessment",
122 DocType::Pipia => "Personal Information Protection Impact Assessment",
123 DocType::Cacfile => "CAC Algorithm Filing",
124 DocType::Tc260ra => "TC260 Risk Assessment",
125 DocType::Ailabel => "GB 45438 Content Labeling Plan",
126 }
127 }
128
129 pub fn directory(&self) -> &'static str {
131 match self {
132 DocType::Ailog => "07-ai-audit/agent-logs",
133 DocType::Aidec => "07-ai-audit/decisions",
134 DocType::Eth => "07-ai-audit/ethical-reviews",
135 DocType::Adr => "02-design/decisions",
136 DocType::Req => "01-requirements",
137 DocType::Tes => "04-testing",
138 DocType::Inc => "05-operations/incidents",
139 DocType::Tde => "06-evolution/technical-debt",
140 DocType::Sec => "08-security",
141 DocType::Mcard => "09-ai-models",
142 DocType::Sbom => "07-ai-audit",
143 DocType::Dpia => "07-ai-audit/ethical-reviews",
144 DocType::Pipia => "07-ai-audit/ethical-reviews",
145 DocType::Cacfile => "07-ai-audit/regulatory-filings",
146 DocType::Tc260ra => "07-ai-audit/risk-assessments",
147 DocType::Ailabel => "09-ai-models/labeling",
148 }
149 }
150
151 pub fn from_str_loose(s: &str) -> Option<DocType> {
153 match s.to_lowercase().as_str() {
154 "ailog" => Some(DocType::Ailog),
155 "aidec" => Some(DocType::Aidec),
156 "adr" => Some(DocType::Adr),
157 "eth" => Some(DocType::Eth),
158 "req" => Some(DocType::Req),
159 "tes" => Some(DocType::Tes),
160 "inc" => Some(DocType::Inc),
161 "tde" => Some(DocType::Tde),
162 "sec" => Some(DocType::Sec),
163 "mcard" => Some(DocType::Mcard),
164 "sbom" => Some(DocType::Sbom),
165 "dpia" => Some(DocType::Dpia),
166 "pipia" => Some(DocType::Pipia),
167 "cacfile" => Some(DocType::Cacfile),
168 "tc260ra" => Some(DocType::Tc260ra),
169 "ailabel" => Some(DocType::Ailabel),
170 _ => None,
171 }
172 }
173}
174
175impl fmt::Display for DocType {
176 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
177 write!(f, "{}", self.prefix())
178 }
179}
180
181#[derive(Debug, Clone, Deserialize, Default)]
184#[allow(dead_code)]
185pub struct Frontmatter {
186 pub id: Option<String>,
187 pub title: Option<String>,
188 pub status: Option<String>,
189 pub created: Option<String>,
190 pub agent: Option<String>,
191 pub confidence: Option<String>,
192 pub review_required: Option<bool>,
193 pub reviewed_by: Option<String>,
196 pub reviewed_at: Option<String>,
198 pub review_outcome: Option<String>,
201 pub risk_level: Option<String>,
202 pub eu_ai_act_risk: Option<String>,
203 pub nist_genai_risks: Option<Vec<String>>,
204 pub iso_42001_clause: Option<Vec<u8>>,
205 pub tags: Option<Vec<String>>,
206 pub related: Option<Vec<String>>,
207 pub supersedes: Option<Vec<String>>,
209 pub alternatives_documented: Option<Vec<String>>,
211 pub originating_ailogs: Option<Vec<String>>,
213 pub severity: Option<String>,
215 pub gdpr_legal_basis: Option<String>,
217 pub threat_model_methodology: Option<String>,
219 pub owasp_asvs_level: Option<serde_yaml::Value>,
220 pub model_name: Option<String>,
222 pub model_type: Option<String>,
223 pub model_version: Option<String>,
224 pub provider: Option<String>,
225 pub license: Option<String>,
226 pub sbom_format_reference: Option<String>,
228 pub system_name: Option<String>,
229 pub gdpr_article_35: Option<bool>,
231 pub dpo_consulted: Option<bool>,
232 pub supervisory_authority_consulted: Option<bool>,
233 pub api_changes: Option<Vec<String>>,
235 pub api_spec_path: Option<String>,
237
238 pub tc260_risk_level: Option<String>,
243 pub tc260_application_scenario: Option<String>,
244 pub tc260_intelligence_level: Option<String>,
246 pub tc260_application_scale: Option<String>,
248 pub tc260_endogenous_risks: Option<Vec<String>>,
249 pub tc260_application_risks: Option<Vec<String>>,
250 pub tc260_derivative_risks: Option<Vec<String>>,
251
252 pub pipl_applicable: Option<bool>,
254 pub pipl_article_55_trigger: Option<String>,
257 pub pipl_sensitive_data: Option<bool>,
258 pub pipl_cross_border_transfer: Option<bool>,
259 pub pipl_retention_until: Option<String>,
261
262 pub gb45438_applicable: Option<bool>,
264 pub gb45438_content_types: Option<Vec<String>>,
266 pub gb45438_explicit_label_strategy: Option<String>,
268 pub gb45438_implicit_metadata_format: Option<String>,
270 pub gb45438_distributor_obligations_documented: Option<bool>,
271
272 pub cac_filing_required: Option<bool>,
274 pub cac_filing_number: Option<String>,
275 pub cac_filing_status: Option<String>,
278 pub cac_filing_type: Option<String>,
280 pub cac_provincial_authority: Option<String>,
281 pub cac_national_decision_date: Option<String>,
282
283 pub gb45652_training_data_compliance: Option<bool>,
285
286 pub csl_severity_level: Option<String>,
289 pub csl_report_deadline_hours: Option<u32>,
291}
292
293#[derive(Debug)]
295pub struct StrayMarkDocument {
296 pub path: PathBuf,
297 pub filename: String,
298 pub doc_type: DocType,
299 pub frontmatter: Frontmatter,
300 pub body: String,
301}
302
303pub fn parse_document(path: &Path) -> Result<StrayMarkDocument> {
305 let content = std::fs::read_to_string(path)
306 .with_context(|| format!("Failed to read {}", path.display()))?;
307
308 let filename = path
309 .file_name()
310 .and_then(|n| n.to_str())
311 .unwrap_or("")
312 .to_string();
313
314 let doc_type = detect_doc_type(&filename)
316 .with_context(|| format!("Cannot determine document type for {}", filename))?;
317
318 let (frontmatter, body) = extract_frontmatter(&content)
320 .with_context(|| format!("Failed to parse frontmatter in {}", path.display()))?;
321
322 Ok(StrayMarkDocument {
323 path: path.to_path_buf(),
324 filename,
325 doc_type,
326 frontmatter,
327 body,
328 })
329}
330
331pub fn detect_doc_type(filename: &str) -> Option<DocType> {
333 for prefix in DocType::ALL_PREFIXES {
334 if filename.starts_with(&format!("{}-", prefix)) {
335 return DocType::from_prefix(prefix);
336 }
337 }
338 None
339}
340
341fn extract_frontmatter(content: &str) -> Result<(Frontmatter, String)> {
343 let trimmed = content.trim_start();
344 if !trimmed.starts_with("---") {
345 anyhow::bail!("No frontmatter found (missing opening ---)");
346 }
347
348 let after_first = &trimmed[3..];
349 let end_pos = after_first
350 .find("\n---")
351 .ok_or_else(|| anyhow::anyhow!("No closing --- found for frontmatter"))?;
352
353 let yaml_str = &after_first[..end_pos];
354 let body_start = end_pos + 4; let body = if body_start < after_first.len() {
356 after_first[body_start..].to_string()
357 } else {
358 String::new()
359 };
360
361 let frontmatter: Frontmatter = serde_yaml::from_str(yaml_str)
362 .with_context(|| "Failed to deserialize frontmatter YAML")?;
363
364 Ok((frontmatter, body))
365}
366
367pub fn discover_documents(straymark_dir: &Path) -> Vec<PathBuf> {
369 let mut results = Vec::new();
370 walk_for_documents(straymark_dir, &mut results);
371 results.sort();
372 results
373}
374
375fn walk_for_documents(dir: &Path, results: &mut Vec<PathBuf>) {
376 let entries = match std::fs::read_dir(dir) {
377 Ok(e) => e,
378 Err(_) => return,
379 };
380
381 for entry in entries.flatten() {
382 let path = entry.path();
383 if path.is_dir() {
384 if path.ends_with("templates") {
386 continue;
387 }
388 walk_for_documents(&path, results);
389 } else if path.extension().and_then(|e| e.to_str()) == Some("md") {
390 let filename = path
391 .file_name()
392 .and_then(|n| n.to_str())
393 .unwrap_or("");
394 if detect_doc_type(filename).is_some() && is_dated_document(filename) {
396 results.push(path);
397 }
398 }
399 }
400}
401
402fn is_dated_document(filename: &str) -> bool {
404 let after_prefix = match filename.find('-') {
406 Some(pos) => &filename[pos + 1..],
407 None => return false,
408 };
409 if after_prefix.len() < 10 {
411 return false;
412 }
413 let date_part = &after_prefix[..10];
414 date_part.len() == 10
416 && date_part.chars().nth(4) == Some('-')
417 && date_part.chars().nth(7) == Some('-')
418 && date_part[..4].chars().all(|c| c.is_ascii_digit())
419 && date_part[5..7].chars().all(|c| c.is_ascii_digit())
420 && date_part[8..10].chars().all(|c| c.is_ascii_digit())
421}
422
423#[cfg(test)]
424mod tests {
425 use super::*;
426
427 #[test]
428 fn test_detect_doc_type() {
429 assert_eq!(detect_doc_type("AILOG-2025-01-01-001-test.md"), Some(DocType::Ailog));
430 assert_eq!(detect_doc_type("SEC-2025-01-01-001-auth.md"), Some(DocType::Sec));
431 assert_eq!(detect_doc_type("MCARD-2025-01-01-001-gpt.md"), Some(DocType::Mcard));
432 assert_eq!(detect_doc_type("SBOM-2025-01-01-001-deps.md"), Some(DocType::Sbom));
433 assert_eq!(detect_doc_type("DPIA-2025-01-01-001-gdpr.md"), Some(DocType::Dpia));
434 assert_eq!(detect_doc_type("README.md"), None);
435 assert_eq!(detect_doc_type("TEMPLATE-SEC.md"), None);
436 }
437
438 #[test]
439 fn test_is_dated_document() {
440 assert!(is_dated_document("AILOG-2025-01-27-001-implement-auth.md"));
441 assert!(is_dated_document("SEC-2026-03-24-001-api-review.md"));
442 assert!(!is_dated_document("TEMPLATE-SEC.md"));
443 assert!(!is_dated_document("README.md"));
444 }
445
446 #[test]
447 fn test_extract_frontmatter() {
448 let content = "---\nid: AILOG-2025-01-01-001\ntitle: Test\nstatus: draft\n---\n\n# Body";
449 let (fm, body) = extract_frontmatter(content).unwrap();
450 assert_eq!(fm.id.as_deref(), Some("AILOG-2025-01-01-001"));
451 assert_eq!(fm.title.as_deref(), Some("Test"));
452 assert!(body.contains("# Body"));
453 }
454
455 #[test]
456 fn test_doc_type_all_has_16_entries() {
457 assert_eq!(DocType::ALL.len(), 16);
459 assert_eq!(DocType::ALL_PREFIXES.len(), 16);
460 }
461
462 #[test]
463 fn test_china_only_doc_types() {
464 assert_eq!(DocType::CHINA_ONLY.len(), 4);
465 assert!(DocType::Pipia.is_china_only());
466 assert!(DocType::Cacfile.is_china_only());
467 assert!(DocType::Tc260ra.is_china_only());
468 assert!(DocType::Ailabel.is_china_only());
469 assert!(!DocType::Ailog.is_china_only());
470 assert!(!DocType::Dpia.is_china_only());
471 }
472
473 #[test]
474 fn test_china_doc_type_detection() {
475 assert_eq!(detect_doc_type("PIPIA-2026-04-25-001-chatbot.md"), Some(DocType::Pipia));
476 assert_eq!(detect_doc_type("CACFILE-2026-04-25-001-chatbot.md"), Some(DocType::Cacfile));
477 assert_eq!(detect_doc_type("TC260RA-2026-04-25-001-chatbot.md"), Some(DocType::Tc260ra));
478 assert_eq!(detect_doc_type("AILABEL-2026-04-25-001-chatbot.md"), Some(DocType::Ailabel));
479 }
480
481 #[test]
482 fn test_china_doc_type_directories() {
483 assert_eq!(DocType::Pipia.directory(), "07-ai-audit/ethical-reviews");
484 assert_eq!(DocType::Cacfile.directory(), "07-ai-audit/regulatory-filings");
485 assert_eq!(DocType::Tc260ra.directory(), "07-ai-audit/risk-assessments");
486 assert_eq!(DocType::Ailabel.directory(), "09-ai-models/labeling");
487 }
488
489 #[test]
490 fn test_china_frontmatter_parsing() {
491 let content = "---\n\
492 id: PIPIA-2026-04-25-001\n\
493 title: Test PIPIA\n\
494 pipl_applicable: true\n\
495 pipl_sensitive_data: true\n\
496 pipl_cross_border_transfer: false\n\
497 pipl_retention_until: 2029-04-25\n\
498 tc260_risk_level: high\n\
499 cac_filing_number: CAC-2026-00123\n\
500 cac_filing_status: national_approved\n\
501 gb45438_content_types: [text, image]\n\
502 csl_severity_level: relatively_major\n\
503 csl_report_deadline_hours: 4\n\
504 ---\n\nbody";
505 let (fm, _) = extract_frontmatter(content).unwrap();
506 assert_eq!(fm.pipl_applicable, Some(true));
507 assert_eq!(fm.pipl_sensitive_data, Some(true));
508 assert_eq!(fm.pipl_retention_until.as_deref(), Some("2029-04-25"));
509 assert_eq!(fm.tc260_risk_level.as_deref(), Some("high"));
510 assert_eq!(fm.cac_filing_number.as_deref(), Some("CAC-2026-00123"));
511 assert_eq!(fm.cac_filing_status.as_deref(), Some("national_approved"));
512 assert_eq!(fm.gb45438_content_types.as_ref().unwrap().len(), 2);
513 assert_eq!(fm.csl_severity_level.as_deref(), Some("relatively_major"));
514 assert_eq!(fm.csl_report_deadline_hours, Some(4));
515 }
516
517 #[test]
518 fn test_doc_type_directory_mapping() {
519 for dt in DocType::ALL {
520 let dir = dt.directory();
521 assert!(!dir.is_empty(), "{} has empty directory", dt.prefix());
522 assert!(!dir.starts_with('/'), "{} directory should be relative", dt.prefix());
523 }
524 }
525
526 #[test]
527 fn test_doc_type_display_names() {
528 for dt in DocType::ALL {
529 let name = dt.display_name();
530 assert!(!name.is_empty(), "{} has empty display_name", dt.prefix());
531 }
532 }
533
534 #[test]
535 fn test_doc_type_from_str_loose() {
536 assert_eq!(DocType::from_str_loose("ailog"), Some(DocType::Ailog));
537 assert_eq!(DocType::from_str_loose("AILOG"), Some(DocType::Ailog));
538 assert_eq!(DocType::from_str_loose("AiLog"), Some(DocType::Ailog));
539 assert_eq!(DocType::from_str_loose("sec"), Some(DocType::Sec));
540 assert_eq!(DocType::from_str_loose("mcard"), Some(DocType::Mcard));
541 assert_eq!(DocType::from_str_loose("invalid"), None);
542 }
543}