Skip to main content

crw_server/routes/
change_tracking.rs

1//! `POST /v1/change-tracking/diff` — stateless change-tracking diff endpoint.
2//!
3//! This is the crawl-path workhorse: the SaaS monitor reconciler scrapes pages
4//! (via `/v1/crawl`), then calls this endpoint with each page's current
5//! markdown/json plus the prior snapshot to get a per-page diff. opencore
6//! stores nothing — `previous` is supplied by the caller.
7//!
8//! Two wire shapes on one route, discriminated by the presence of the `batch`
9//! key (no `deny_unknown_fields`, so a Single body's extra fields and a Batch
10//! body's shared fields never reject each other):
11//!   - Single: `{ current, previous?, modes, schema?, prompt?, contentType?, tag? }`
12//!   - Batch:  `{ batch: [ { url?, current, previous?, ... } ], modes, schema?, ... }`
13//!     where top-level `modes/schema/prompt/contentType` are shared defaults
14//!     each item may override.
15//!
16//! The LLM judge (`goal` / `judgeEnabled`) is accepted but not yet applied here
17//! — judging is wired in M2.
18
19use axum::Json;
20use axum::extract::State;
21use axum::extract::rejection::JsonRejection;
22use crw_core::error::CrwError;
23use crw_core::types::{
24    ApiResponse, ChangeTrackingMode, ChangeTrackingOptions, ChangeTrackingResult,
25    ChangeTrackingSnapshot,
26};
27use serde::Deserialize;
28use serde_json::Value;
29
30use crate::error::AppError;
31use crate::state::AppState;
32
33/// The current scrape content for one page.
34#[derive(Debug, Clone, Deserialize)]
35#[serde(rename_all = "camelCase")]
36pub struct DiffCurrent {
37    #[serde(default)]
38    pub markdown: Option<String>,
39    #[serde(default)]
40    pub json: Option<Value>,
41}
42
43/// One page to diff (single body, or one entry of a batch).
44#[derive(Debug, Clone, Deserialize)]
45#[serde(rename_all = "camelCase")]
46pub struct DiffItem {
47    #[serde(default)]
48    pub url: Option<String>,
49    #[serde(default)]
50    pub current: Option<DiffCurrent>,
51    #[serde(default)]
52    pub previous: Option<ChangeTrackingSnapshot>,
53    #[serde(default)]
54    pub modes: Option<Vec<ChangeTrackingMode>>,
55    #[serde(default)]
56    pub schema: Option<Value>,
57    #[serde(default)]
58    pub prompt: Option<String>,
59    #[serde(default, alias = "content_type")]
60    pub content_type: Option<String>,
61    #[serde(default)]
62    pub tag: Option<String>,
63    // Accepted for forward-compat; judging is applied in M2.
64    #[serde(default)]
65    pub goal: Option<String>,
66    #[serde(default, alias = "judge_enabled")]
67    pub judge_enabled: Option<bool>,
68}
69
70/// Request body. The presence of `batch` selects batch mode. Single-mode
71/// fields are flattened onto the same struct; in batch mode `modes/schema/
72/// prompt/contentType` act as shared defaults for items that omit them.
73#[derive(Debug, Clone, Deserialize)]
74#[serde(rename_all = "camelCase")]
75pub struct DiffRequest {
76    #[serde(default)]
77    pub batch: Option<Vec<DiffItem>>,
78    // ---- single-mode (and batch shared-default) fields ----
79    #[serde(default)]
80    pub current: Option<DiffCurrent>,
81    #[serde(default)]
82    pub previous: Option<ChangeTrackingSnapshot>,
83    #[serde(default)]
84    pub modes: Option<Vec<ChangeTrackingMode>>,
85    #[serde(default)]
86    pub schema: Option<Value>,
87    #[serde(default)]
88    pub prompt: Option<String>,
89    #[serde(default, alias = "content_type")]
90    pub content_type: Option<String>,
91    #[serde(default)]
92    pub tag: Option<String>,
93    #[serde(default)]
94    pub goal: Option<String>,
95    #[serde(default, alias = "judge_enabled")]
96    pub judge_enabled: Option<bool>,
97}
98
99fn default_modes() -> Vec<ChangeTrackingMode> {
100    vec![ChangeTrackingMode::GitDiff]
101}
102
103/// Build options + run the diff for one item, applying shared defaults.
104fn diff_one(
105    item: &DiffItem,
106    shared_modes: &Option<Vec<ChangeTrackingMode>>,
107    shared_schema: &Option<Value>,
108    shared_prompt: &Option<String>,
109    shared_content_type: &Option<String>,
110) -> Result<ChangeTrackingResult, CrwError> {
111    let current = item.current.as_ref().ok_or_else(|| {
112        CrwError::InvalidRequest("each diff item requires a 'current' object".into())
113    })?;
114
115    let modes = item
116        .modes
117        .clone()
118        .or_else(|| shared_modes.clone())
119        .unwrap_or_else(default_modes);
120
121    let opts = ChangeTrackingOptions {
122        modes,
123        schema: item.schema.clone().or_else(|| shared_schema.clone()),
124        prompt: item.prompt.clone().or_else(|| shared_prompt.clone()),
125        previous: item.previous.clone(),
126        tag: item.tag.clone(),
127        content_type: item
128            .content_type
129            .clone()
130            .or_else(|| shared_content_type.clone()),
131    };
132
133    let markdown = current.markdown.as_deref().unwrap_or("");
134    Ok(crw_diff::compute_change_tracking(
135        &opts,
136        markdown,
137        current.json.as_ref(),
138        opts.content_type.as_deref(),
139    ))
140}
141
142pub async fn diff(
143    State(_state): State<AppState>,
144    body: Result<Json<DiffRequest>, JsonRejection>,
145) -> Result<Json<ApiResponse<Value>>, AppError> {
146    let Json(req) = body.map_err(AppError::from)?;
147
148    // Batch mode: presence of `batch` wins.
149    if let Some(items) = &req.batch {
150        if items.is_empty() {
151            return Err(AppError::from(CrwError::InvalidRequest(
152                "'batch' must contain at least one item".into(),
153            )));
154        }
155        let mut results: Vec<ChangeTrackingResult> = Vec::with_capacity(items.len());
156        for item in items {
157            results.push(diff_one(
158                item,
159                &req.modes,
160                &req.schema,
161                &req.prompt,
162                &req.content_type,
163            )?);
164        }
165        let data = serde_json::to_value(results)
166            .map_err(|e| CrwError::Internal(format!("failed to serialize diff results: {e}")))?;
167        return Ok(Json(ApiResponse::ok(data)));
168    }
169
170    // Single mode.
171    let single = DiffItem {
172        url: None,
173        current: req.current.clone(),
174        previous: req.previous.clone(),
175        modes: req.modes.clone(),
176        schema: req.schema.clone(),
177        prompt: req.prompt.clone(),
178        content_type: req.content_type.clone(),
179        tag: req.tag.clone(),
180        goal: req.goal.clone(),
181        judge_enabled: req.judge_enabled,
182    };
183    let result = diff_one(&single, &None, &None, &None, &None)?;
184    let data = serde_json::to_value(result)
185        .map_err(|e| CrwError::Internal(format!("failed to serialize diff result: {e}")))?;
186    Ok(Json(ApiResponse::ok(data)))
187}