1use std::collections::HashSet;
2use std::path::Path;
3use std::path::PathBuf;
4
5use anyhow::{Result, bail};
6use serde::{Deserialize, Serialize};
7use walkdir::WalkDir;
8
9use crate::config::{RedactConfig, ValidationConfig};
10use crate::frontmatter;
11use crate::git;
12use crate::ops::redact::{RedactionMatch, RedactionReport, redact_body};
13use crate::type_registry::SpaceTypeRegistry;
14
15pub fn normalize_line_endings(input: &str) -> String {
17 input.replace("\r\n", "\n").replace('\r', "\n")
18}
19
20#[derive(Debug, Clone, Default)]
22pub struct IngestOptions {
23 pub dry_run: bool,
25 pub auto_commit: bool,
27 pub changed_paths: Option<HashSet<PathBuf>>,
30 pub redact: Option<RedactConfig>,
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize, Default)]
36pub struct IngestReport {
37 pub pages_validated: usize,
39 pub assets_found: usize,
41 pub warnings: Vec<String>,
43 pub commit: String,
45 #[serde(default)]
47 pub unchanged_count: usize,
48 #[serde(default)]
50 pub redacted: Vec<RedactionReport>,
51}
52
53pub fn ingest(
55 path: &Path,
56 options: &IngestOptions,
57 wiki_root: &Path,
58 registry: &SpaceTypeRegistry,
59 validation: &ValidationConfig,
60) -> Result<IngestReport> {
61 let repo_root = wiki_root
62 .parent()
63 .ok_or_else(|| anyhow::anyhow!("wiki_root has no parent"))?;
64
65 let full_path = if path.is_absolute() {
66 path.to_path_buf()
67 } else {
68 wiki_root.join(path)
69 };
70
71 if !full_path.exists() {
72 bail!("path does not exist: {}", full_path.display());
73 }
74
75 let canonical = full_path.canonicalize()?;
77 let canonical_root = wiki_root.canonicalize()?;
78 if !canonical.starts_with(&canonical_root) {
79 bail!("path is outside wiki root");
80 }
81
82 let mut report = IngestReport::default();
83
84 if full_path.is_file() {
85 let skip = should_skip(&full_path, wiki_root, &options.changed_paths);
86 if skip {
87 report.unchanged_count += 1;
88 } else {
89 validate_file(
90 &full_path,
91 wiki_root,
92 registry,
93 validation,
94 options.redact.as_ref(),
95 &mut report,
96 )?;
97 }
98 } else {
99 for entry in WalkDir::new(&full_path).into_iter().filter_map(|e| e.ok()) {
100 let p = entry.path();
101 if p.is_file() {
102 if p.extension().and_then(|e| e.to_str()) == Some("md") {
103 if should_skip(p, wiki_root, &options.changed_paths) {
104 report.unchanged_count += 1;
105 } else {
106 validate_file(
107 p,
108 wiki_root,
109 registry,
110 validation,
111 options.redact.as_ref(),
112 &mut report,
113 )?;
114 }
115 } else {
116 report.assets_found += 1;
117 }
118 }
119 }
120 }
121
122 if !options.dry_run && options.auto_commit {
123 let msg = format!(
124 "ingest: {} — +{} pages, +{} assets",
125 path.display(),
126 report.pages_validated,
127 report.assets_found
128 );
129 let hash = git::commit(repo_root, &msg)?;
130 report.commit = hash;
131 }
132
133 Ok(report)
134}
135
136fn should_skip(abs_path: &Path, wiki_root: &Path, changed: &Option<HashSet<PathBuf>>) -> bool {
137 let Some(set) = changed else { return false };
138 if set.is_empty() {
139 return false;
140 }
141 let rel = abs_path.strip_prefix(wiki_root).unwrap_or(abs_path);
142 !set.contains(rel)
143}
144
145fn slug_from_path(abs_path: &Path, wiki_root: &Path) -> String {
146 abs_path
147 .strip_prefix(wiki_root)
148 .unwrap_or(abs_path)
149 .with_extension("")
150 .to_string_lossy()
151 .into_owned()
152}
153
154fn validate_file(
155 path: &Path,
156 wiki_root: &Path,
157 registry: &SpaceTypeRegistry,
158 validation: &ValidationConfig,
159 redact_cfg: Option<&RedactConfig>,
160 report: &mut IngestReport,
161) -> Result<()> {
162 let raw = std::fs::read_to_string(path)?;
163 let mut content = normalize_line_endings(&raw);
164
165 if let Some(cfg) = redact_cfg {
167 let parsed = frontmatter::parse(&content);
168 let separator = "---";
169 let body_start = if content.starts_with(separator) {
171 let after_open = &content[3..];
173 after_open
174 .find("\n---")
175 .map(|pos| 3 + pos + 4 + 1)
176 .unwrap_or(0)
177 } else {
178 0
179 };
180
181 if body_start > 0 && body_start <= content.len() {
182 let front = &content[..body_start];
183 let body = &content[body_start..];
184 let (redacted_body, matches) = redact_body(body, cfg);
185 if !matches.is_empty() {
186 let slug = slug_from_path(path, wiki_root);
187 let fm_lines = front.lines().count();
189 let adjusted: Vec<RedactionMatch> = matches
190 .into_iter()
191 .map(|m| RedactionMatch {
192 pattern_name: m.pattern_name,
193 line_number: m.line_number + fm_lines,
194 })
195 .collect();
196 report.redacted.push(RedactionReport {
197 slug,
198 matches: adjusted,
199 });
200 std::fs::write(path, format!("{front}{redacted_body}"))?;
201 content = normalize_line_endings(&std::fs::read_to_string(path)?);
202 }
203 } else {
204 let (redacted, matches) = redact_body(&content, cfg);
206 if !matches.is_empty() {
207 let slug = slug_from_path(path, wiki_root);
208 report.redacted.push(RedactionReport { slug, matches });
209 std::fs::write(path, &redacted)?;
210 content = normalize_line_endings(&redacted);
211 }
212 }
213 let _ = parsed; }
215
216 let page = frontmatter::parse(&content);
217
218 if page.frontmatter.is_empty() {
220 report
221 .warnings
222 .push(format!("{}: no frontmatter found", path.display()));
223 report.pages_validated += 1;
224 return Ok(());
225 }
226
227 let warnings = registry.validate(&page.frontmatter, &validation.type_strictness)?;
229 for w in warnings {
230 report.warnings.push(format!("{}: {}", path.display(), w));
231 }
232
233 report.pages_validated += 1;
234 Ok(())
235}