1pub mod git_diff;
19pub mod json_diff;
20pub mod snapshot;
21
22use crw_core::types::{
23 ChangeDiff, ChangeStatus, ChangeTrackingMode, ChangeTrackingOptions, ChangeTrackingResult,
24 ChangeTrackingSnapshot,
25};
26use serde_json::Value;
27
28pub const DEFAULT_MAX_DIFF_CHANGES: usize = 5000;
30
31#[derive(Debug, Clone, Copy)]
33pub struct DiffLimits {
34 pub max_diff_changes: usize,
35}
36
37impl Default for DiffLimits {
38 fn default() -> Self {
39 Self {
40 max_diff_changes: DEFAULT_MAX_DIFF_CHANGES,
41 }
42 }
43}
44
45pub fn compute_change_tracking(
48 opts: &ChangeTrackingOptions,
49 current_markdown: &str,
50 current_json: Option<&Value>,
51 content_type: Option<&str>,
52) -> ChangeTrackingResult {
53 compute_change_tracking_with_limits(
54 opts,
55 current_markdown,
56 current_json,
57 content_type,
58 &DiffLimits::default(),
59 )
60}
61
62pub fn compute_change_tracking_with_limits(
64 opts: &ChangeTrackingOptions,
65 current_markdown: &str,
66 current_json: Option<&Value>,
67 content_type: Option<&str>,
68 limits: &DiffLimits,
69) -> ChangeTrackingResult {
70 let has_git = opts.modes.is_empty() || opts.modes.contains(&ChangeTrackingMode::GitDiff);
71 let has_json = opts.modes.contains(&ChangeTrackingMode::Json);
72 let json_only = has_json && !has_git;
73
74 if !is_text(content_type) {
76 return binary_result(opts, current_markdown);
77 }
78
79 let content_hash = if json_only {
81 match current_json {
82 Some(j) => snapshot::hash_json(j),
83 None => snapshot::hash_str(""),
84 }
85 } else {
86 snapshot::hash_markdown(current_markdown)
87 };
88
89 let current_snapshot = ChangeTrackingSnapshot {
91 markdown: if has_git {
92 Some(current_markdown.to_string())
93 } else {
94 None
95 },
96 json: if has_json {
97 current_json.cloned()
98 } else {
99 None
100 },
101 content_hash: content_hash.clone(),
102 captured_at: None,
103 };
104
105 let Some(previous) = &opts.previous else {
107 return ChangeTrackingResult {
108 status: ChangeStatus::Changed,
109 first_observation: true,
110 content_hash,
111 snapshot: Some(current_snapshot),
112 diff: None,
113 judgment: None,
114 tag: opts.tag.clone(),
115 truncated: false,
116 };
117 };
118
119 let prev_md_norm = previous
121 .markdown
122 .as_deref()
123 .map(snapshot::normalize_markdown);
124 let cur_md_norm = snapshot::normalize_markdown(current_markdown);
125 let markdown_changed = has_git
126 && prev_md_norm
127 .as_deref()
128 .map(|p| p != cur_md_norm)
129 .unwrap_or(true);
130
131 let empty_json = Value::Null;
132 let prev_json = previous.json.as_ref().unwrap_or(&empty_json);
133 let cur_json_val = current_json.unwrap_or(&empty_json);
134 let json_changed = has_json && json_diff::changed(prev_json, cur_json_val);
135
136 let changed = (has_git && markdown_changed) || (has_json && json_changed);
137
138 if !changed {
139 return ChangeTrackingResult {
140 status: ChangeStatus::Same,
141 first_observation: false,
142 content_hash,
143 snapshot: Some(current_snapshot),
144 diff: None,
145 judgment: None,
146 tag: opts.tag.clone(),
147 truncated: false,
148 };
149 }
150
151 let mut text: Option<String> = None;
153 let mut ast_value: Option<Value> = None;
154 let mut truncated = false;
155
156 if has_git {
157 let g = git_diff::compute(
158 prev_md_norm.as_deref().unwrap_or(""),
159 &cur_md_norm,
160 limits.max_diff_changes,
161 );
162 truncated = g.ast.truncated;
163 text = Some(g.text);
164 if !has_json {
167 ast_value = Some(serde_json::to_value(&g.ast).unwrap_or(Value::Null));
168 }
169 }
170
171 let json_value: Option<Value> = if has_json {
172 Some(json_diff::compute(prev_json, cur_json_val))
173 } else {
174 None
175 };
176
177 let diff_json = json_value.or(ast_value);
179 let diff = ChangeDiff {
180 text,
181 json: diff_json,
182 };
183
184 ChangeTrackingResult {
185 status: ChangeStatus::Changed,
186 first_observation: false,
187 content_hash,
188 snapshot: Some(current_snapshot),
189 diff: Some(diff),
190 judgment: None,
191 tag: opts.tag.clone(),
192 truncated,
193 }
194}
195
196fn binary_result(opts: &ChangeTrackingOptions, current_text: &str) -> ChangeTrackingResult {
199 let content_hash = snapshot::hash_str(current_text);
200 let snapshot = ChangeTrackingSnapshot {
201 markdown: None,
202 json: None,
203 content_hash: content_hash.clone(),
204 captured_at: None,
205 };
206 match &opts.previous {
207 None => ChangeTrackingResult {
208 status: ChangeStatus::Changed,
209 first_observation: true,
210 content_hash,
211 snapshot: Some(snapshot),
212 diff: None,
213 judgment: None,
214 tag: opts.tag.clone(),
215 truncated: false,
216 },
217 Some(prev) => {
218 let status = if prev.content_hash == content_hash {
219 ChangeStatus::Same
220 } else {
221 ChangeStatus::Changed
222 };
223 ChangeTrackingResult {
224 status,
225 first_observation: false,
226 content_hash,
227 snapshot: Some(snapshot),
228 diff: None,
229 judgment: None,
230 tag: opts.tag.clone(),
231 truncated: false,
232 }
233 }
234 }
235}
236
237fn is_text(content_type: Option<&str>) -> bool {
241 let Some(ct) = content_type else {
242 return true;
243 };
244 let ct = ct.to_ascii_lowercase();
245 ct.starts_with("text/")
246 || ct.contains("json")
247 || ct.contains("xml")
248 || ct.contains("html")
249 || ct.contains("markdown")
250 || ct.contains("javascript")
251 || ct.contains("csv")
252 || ct.contains("yaml")
253}
254
255#[cfg(test)]
256mod tests {
257 use super::*;
258 use crw_core::types::ChangeTrackingMode;
259 use serde_json::json;
260
261 fn opts(
262 modes: Vec<ChangeTrackingMode>,
263 previous: Option<ChangeTrackingSnapshot>,
264 ) -> ChangeTrackingOptions {
265 ChangeTrackingOptions {
266 modes,
267 schema: None,
268 prompt: None,
269 previous,
270 tag: None,
271 content_type: None,
272 }
273 }
274
275 fn snap_md(md: &str) -> ChangeTrackingSnapshot {
276 ChangeTrackingSnapshot {
277 markdown: Some(md.to_string()),
278 json: None,
279 content_hash: snapshot::hash_markdown(md),
280 captured_at: None,
281 }
282 }
283
284 #[test]
285 fn first_observation_no_previous() {
286 let r = compute_change_tracking(
287 &opts(vec![ChangeTrackingMode::GitDiff], None),
288 "# Hi",
289 None,
290 None,
291 );
292 assert!(r.first_observation);
293 assert_eq!(r.status, ChangeStatus::Changed);
294 assert!(r.diff.is_none());
295 assert!(r.snapshot.is_some());
296 }
297
298 #[test]
299 fn identical_markdown_is_same() {
300 let o = opts(
301 vec![ChangeTrackingMode::GitDiff],
302 Some(snap_md("# Hi\n\nbody")),
303 );
304 let r = compute_change_tracking(&o, "# Hi\n\nbody", None, None);
305 assert_eq!(r.status, ChangeStatus::Same);
306 assert!(r.diff.is_none());
307 }
308
309 #[test]
310 fn whitespace_only_change_is_same() {
311 let o = opts(
312 vec![ChangeTrackingMode::GitDiff],
313 Some(snap_md("# Hi\n\nbody")),
314 );
315 let r = compute_change_tracking(&o, "# Hi \n\n\n\nbody \n", None, None);
316 assert_eq!(r.status, ChangeStatus::Same);
317 }
318
319 #[test]
320 fn markdown_change_emits_text_and_ast_in_git_mode() {
321 let o = opts(
322 vec![ChangeTrackingMode::GitDiff],
323 Some(snap_md("Starter $19")),
324 );
325 let r = compute_change_tracking(&o, "Starter $24", None, None);
326 assert_eq!(r.status, ChangeStatus::Changed);
327 let diff = r.diff.unwrap();
328 assert!(diff.text.unwrap().contains("+Starter $24"));
329 assert!(diff.json.unwrap().get("files").is_some());
331 }
332
333 #[test]
334 fn json_mode_per_field_diff() {
335 let prev = ChangeTrackingSnapshot {
336 markdown: None,
337 json: Some(json!({"price": "$19"})),
338 content_hash: snapshot::hash_json(&json!({"price": "$19"})),
339 captured_at: None,
340 };
341 let o = opts(vec![ChangeTrackingMode::Json], Some(prev));
342 let cur = json!({"price": "$24"});
343 let r = compute_change_tracking(&o, "ignored markdown", Some(&cur), None);
344 assert_eq!(r.status, ChangeStatus::Changed);
345 let diff = r.diff.unwrap();
346 assert!(diff.text.is_none());
347 assert_eq!(
348 diff.json.unwrap()["price"],
349 json!({"previous": "$19", "current": "$24"})
350 );
351 }
352
353 #[test]
354 fn json_mode_same_when_tracked_fields_unchanged_even_if_markdown_differs() {
355 let prev = ChangeTrackingSnapshot {
356 markdown: None,
357 json: Some(json!({"price": "$19"})),
358 content_hash: snapshot::hash_json(&json!({"price": "$19"})),
359 captured_at: None,
360 };
361 let o = opts(vec![ChangeTrackingMode::Json], Some(prev));
362 let cur = json!({"price": "$19"});
363 let r = compute_change_tracking(&o, "totally different markdown", Some(&cur), None);
364 assert_eq!(r.status, ChangeStatus::Same);
365 }
366
367 #[test]
368 fn mixed_mode_either_surface_changes() {
369 let prev = ChangeTrackingSnapshot {
370 markdown: Some("Starter $19".into()),
371 json: Some(json!({"price": "$19"})),
372 content_hash: snapshot::hash_markdown("Starter $19"),
373 captured_at: None,
374 };
375 let o = opts(
376 vec![ChangeTrackingMode::Json, ChangeTrackingMode::GitDiff],
377 Some(prev),
378 );
379 let cur = json!({"price": "$24"});
380 let r = compute_change_tracking(&o, "Starter $24", Some(&cur), None);
381 assert_eq!(r.status, ChangeStatus::Changed);
382 let diff = r.diff.unwrap();
383 assert!(diff.text.is_some());
385 assert_eq!(
386 diff.json.unwrap()["price"],
387 json!({"previous": "$19", "current": "$24"})
388 );
389 }
390
391 #[test]
392 fn binary_content_hashes_no_diff() {
393 let prev = ChangeTrackingSnapshot {
394 markdown: None,
395 json: None,
396 content_hash: snapshot::hash_str("old pdf text"),
397 captured_at: None,
398 };
399 let o = ChangeTrackingOptions {
400 modes: vec![ChangeTrackingMode::GitDiff],
401 content_type: Some("application/pdf".into()),
402 ..opts(vec![ChangeTrackingMode::GitDiff], Some(prev))
403 };
404 let r = compute_change_tracking(&o, "new pdf text", None, Some("application/pdf"));
405 assert_eq!(r.status, ChangeStatus::Changed);
406 assert!(r.diff.is_none());
407 }
408}