Skip to main content

converge_domain/
form_filler.rs

1// Copyright 2024-2025 Aprio One AB, Sweden
2// Author: Kenneth Pernyer, kenneth@aprio.one
3// SPDX-License-Identifier: MIT
4// See LICENSE file in the project root for full license information.
5
6//! Form Filler pack agents (PDF-first).
7//!
8//! This module produces a governed, reviewable fill plan and proposed field values.
9//! It is intentionally minimal and deterministic to keep invariants in focus.
10
11use converge_core::{Agent, AgentEffect, Context, ContextKey, Fact, ProposedFact};
12use serde::{Deserialize, Serialize};
13
14const FORM_REQUEST_SEED_ID: &str = "form_filler:request";
15const SCHEMA_FACT_ID: &str = "form_filler:schema";
16const MAPPINGS_FACT_ID: &str = "form_filler:field_mappings";
17const NORMALIZED_FACT_ID: &str = "form_filler:normalized_fields";
18const COMPLETENESS_FACT_ID: &str = "form_filler:completeness";
19const RISK_FACT_ID: &str = "form_filler:risk_classification";
20const FILL_PLAN_FACT_ID: &str = "form_filler:fill_plan";
21const PROPOSAL_PREFIX: &str = "form_filler:proposed_field:";
22
23fn has_fact(ctx: &Context, key: ContextKey, id: &str) -> bool {
24    ctx.get(key).iter().any(|fact| fact.id == id)
25}
26
27fn parse_form_request(ctx: &Context) -> Option<FormRequestSeed> {
28    ctx.get(ContextKey::Seeds)
29        .iter()
30        .find(|seed| seed.id == FORM_REQUEST_SEED_ID)
31        .and_then(|seed| serde_json::from_str::<FormRequestSeed>(&seed.content).ok())
32}
33
34#[derive(Debug, Clone, Deserialize)]
35struct FormRequestSeed {
36    #[serde(default)]
37    form_id: Option<String>,
38    #[serde(default)]
39    fields: Vec<String>,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43struct FieldMapping {
44    field_id: String,
45    source: String,
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize)]
49struct NormalizedField {
50    field_id: String,
51    normalized_value: String,
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
55struct CompletenessStatus {
56    missing_fields: Vec<String>,
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize)]
60struct RiskClassification {
61    high_risk_fields: Vec<String>,
62}
63
64#[derive(Debug, Clone, Serialize)]
65struct FillPlan {
66    form_id: String,
67    missing_fields: Vec<String>,
68    high_risk_fields: Vec<String>,
69    ready_for_submit: bool,
70}
71
72fn classify_risk(field_id: &str) -> bool {
73    let lower = field_id.to_lowercase();
74    ["ssn", "bank", "account", "passport", "tax", "salary"]
75        .iter()
76        .any(|keyword| lower.contains(keyword))
77}
78
79/// Extracts a schema from the seed request (PDF-first entry).
80pub struct FormSchemaAgent;
81
82impl Agent for FormSchemaAgent {
83    fn name(&self) -> &str {
84        "FormSchemaAgent"
85    }
86
87    fn dependencies(&self) -> &[ContextKey] {
88        &[ContextKey::Seeds]
89    }
90
91    fn accepts(&self, ctx: &Context) -> bool {
92        ctx.has(ContextKey::Seeds) && !has_fact(ctx, ContextKey::Signals, SCHEMA_FACT_ID)
93    }
94
95    fn execute(&self, ctx: &Context) -> AgentEffect {
96        let request = match parse_form_request(ctx) {
97            Some(request) => request,
98            None => return AgentEffect::empty(),
99        };
100
101        let payload = serde_json::json!({
102            "form_id": request.form_id.unwrap_or_else(|| "unknown".to_string()),
103            "fields": request.fields,
104        });
105
106        AgentEffect::with_fact(Fact {
107            key: ContextKey::Signals,
108            id: SCHEMA_FACT_ID.to_string(),
109            content: payload.to_string(),
110        })
111    }
112}
113
114/// Maps schema fields to candidate sources (deterministic placeholder).
115pub struct FieldMappingAgent;
116
117impl Agent for FieldMappingAgent {
118    fn name(&self) -> &str {
119        "FieldMappingAgent"
120    }
121
122    fn dependencies(&self) -> &[ContextKey] {
123        &[ContextKey::Signals]
124    }
125
126    fn accepts(&self, ctx: &Context) -> bool {
127        ctx.has(ContextKey::Signals) && !has_fact(ctx, ContextKey::Hypotheses, MAPPINGS_FACT_ID)
128    }
129
130    fn execute(&self, ctx: &Context) -> AgentEffect {
131        let schema = ctx
132            .get(ContextKey::Signals)
133            .iter()
134            .find(|fact| fact.id == SCHEMA_FACT_ID)
135            .and_then(|fact| serde_json::from_str::<serde_json::Value>(&fact.content).ok());
136
137        let fields = schema
138            .and_then(|value| value.get("fields").cloned())
139            .and_then(|value| serde_json::from_value::<Vec<String>>(value).ok())
140            .unwrap_or_default();
141
142        let mappings: Vec<FieldMapping> = fields
143            .iter()
144            .map(|field_id| FieldMapping {
145                field_id: field_id.to_string(),
146                source: "unknown".to_string(),
147            })
148            .collect();
149
150        let payload = serde_json::json!({ "mappings": mappings });
151        AgentEffect::with_fact(Fact {
152            key: ContextKey::Hypotheses,
153            id: MAPPINGS_FACT_ID.to_string(),
154            content: payload.to_string(),
155        })
156    }
157}
158
159/// Normalizes candidate values (placeholder deterministic normalization).
160pub struct NormalizationAgent;
161
162impl Agent for NormalizationAgent {
163    fn name(&self) -> &str {
164        "NormalizationAgent"
165    }
166
167    fn dependencies(&self) -> &[ContextKey] {
168        &[ContextKey::Hypotheses]
169    }
170
171    fn accepts(&self, ctx: &Context) -> bool {
172        ctx.has(ContextKey::Hypotheses)
173            && !has_fact(ctx, ContextKey::Hypotheses, NORMALIZED_FACT_ID)
174    }
175
176    fn execute(&self, ctx: &Context) -> AgentEffect {
177        let mappings = ctx
178            .get(ContextKey::Hypotheses)
179            .iter()
180            .find(|fact| fact.id == MAPPINGS_FACT_ID)
181            .and_then(|fact| serde_json::from_str::<serde_json::Value>(&fact.content).ok())
182            .and_then(|value| value.get("mappings").cloned())
183            .and_then(|value| serde_json::from_value::<Vec<FieldMapping>>(value).ok())
184            .unwrap_or_default();
185
186        let normalized: Vec<NormalizedField> = mappings
187            .into_iter()
188            .map(|mapping| NormalizedField {
189                field_id: mapping.field_id,
190                normalized_value: String::new(),
191            })
192            .collect();
193
194        let payload = serde_json::json!({ "normalized": normalized });
195        AgentEffect::with_fact(Fact {
196            key: ContextKey::Hypotheses,
197            id: NORMALIZED_FACT_ID.to_string(),
198            content: payload.to_string(),
199        })
200    }
201}
202
203/// Detects missing required fields (based on empty normalized values).
204pub struct CompletenessAgent;
205
206impl Agent for CompletenessAgent {
207    fn name(&self) -> &str {
208        "CompletenessAgent"
209    }
210
211    fn dependencies(&self) -> &[ContextKey] {
212        &[ContextKey::Hypotheses]
213    }
214
215    fn accepts(&self, ctx: &Context) -> bool {
216        ctx.has(ContextKey::Hypotheses)
217            && !has_fact(ctx, ContextKey::Constraints, COMPLETENESS_FACT_ID)
218    }
219
220    fn execute(&self, ctx: &Context) -> AgentEffect {
221        let normalized = ctx
222            .get(ContextKey::Hypotheses)
223            .iter()
224            .find(|fact| fact.id == NORMALIZED_FACT_ID)
225            .and_then(|fact| serde_json::from_str::<serde_json::Value>(&fact.content).ok())
226            .and_then(|value| value.get("normalized").cloned())
227            .and_then(|value| serde_json::from_value::<Vec<NormalizedField>>(value).ok())
228            .unwrap_or_default();
229
230        let missing_fields: Vec<String> = normalized
231            .iter()
232            .filter(|field| field.normalized_value.trim().is_empty())
233            .map(|field| field.field_id.clone())
234            .collect();
235
236        let payload = CompletenessStatus { missing_fields };
237        AgentEffect::with_fact(Fact {
238            key: ContextKey::Constraints,
239            id: COMPLETENESS_FACT_ID.to_string(),
240            content: serde_json::to_string(&payload).unwrap_or_default(),
241        })
242    }
243}
244
245/// Classifies high-risk fields that require approval.
246pub struct RiskClassifierAgent;
247
248impl Agent for RiskClassifierAgent {
249    fn name(&self) -> &str {
250        "RiskClassifierAgent"
251    }
252
253    fn dependencies(&self) -> &[ContextKey] {
254        &[ContextKey::Signals]
255    }
256
257    fn accepts(&self, ctx: &Context) -> bool {
258        ctx.has(ContextKey::Signals) && !has_fact(ctx, ContextKey::Constraints, RISK_FACT_ID)
259    }
260
261    fn execute(&self, ctx: &Context) -> AgentEffect {
262        let schema = ctx
263            .get(ContextKey::Signals)
264            .iter()
265            .find(|fact| fact.id == SCHEMA_FACT_ID)
266            .and_then(|fact| serde_json::from_str::<serde_json::Value>(&fact.content).ok());
267
268        let fields = schema
269            .and_then(|value| value.get("fields").cloned())
270            .and_then(|value| serde_json::from_value::<Vec<String>>(value).ok())
271            .unwrap_or_default();
272
273        let high_risk_fields = fields
274            .into_iter()
275            .filter(|field| classify_risk(field))
276            .collect::<Vec<_>>();
277
278        let payload = RiskClassification { high_risk_fields };
279        AgentEffect::with_fact(Fact {
280            key: ContextKey::Constraints,
281            id: RISK_FACT_ID.to_string(),
282            content: serde_json::to_string(&payload).unwrap_or_default(),
283        })
284    }
285}
286
287/// Produces a consolidated fill plan.
288pub struct FillPlanAgent;
289
290impl Agent for FillPlanAgent {
291    fn name(&self) -> &str {
292        "FillPlanAgent"
293    }
294
295    fn dependencies(&self) -> &[ContextKey] {
296        &[ContextKey::Signals, ContextKey::Constraints]
297    }
298
299    fn accepts(&self, ctx: &Context) -> bool {
300        ctx.has(ContextKey::Signals)
301            && ctx.has(ContextKey::Constraints)
302            && !has_fact(ctx, ContextKey::Strategies, FILL_PLAN_FACT_ID)
303    }
304
305    fn execute(&self, ctx: &Context) -> AgentEffect {
306        let schema = ctx
307            .get(ContextKey::Signals)
308            .iter()
309            .find(|fact| fact.id == SCHEMA_FACT_ID)
310            .and_then(|fact| serde_json::from_str::<serde_json::Value>(&fact.content).ok());
311
312        let form_id = schema
313            .and_then(|value| {
314                value
315                    .get("form_id")
316                    .and_then(|id| id.as_str())
317                    .map(|s| s.to_string())
318            })
319            .unwrap_or_else(|| "unknown".to_string());
320
321        let missing_fields = ctx
322            .get(ContextKey::Constraints)
323            .iter()
324            .find(|fact| fact.id == COMPLETENESS_FACT_ID)
325            .and_then(|fact| serde_json::from_str::<CompletenessStatus>(&fact.content).ok())
326            .map(|status| status.missing_fields)
327            .unwrap_or_default();
328
329        let high_risk_fields = ctx
330            .get(ContextKey::Constraints)
331            .iter()
332            .find(|fact| fact.id == RISK_FACT_ID)
333            .and_then(|fact| serde_json::from_str::<RiskClassification>(&fact.content).ok())
334            .map(|status| status.high_risk_fields)
335            .unwrap_or_default();
336
337        let ready_for_submit = missing_fields.is_empty() && high_risk_fields.is_empty();
338        let plan = FillPlan {
339            form_id,
340            missing_fields,
341            high_risk_fields,
342            ready_for_submit,
343        };
344
345        AgentEffect::with_fact(Fact {
346            key: ContextKey::Strategies,
347            id: FILL_PLAN_FACT_ID.to_string(),
348            content: serde_json::to_string(&plan).unwrap_or_default(),
349        })
350    }
351}
352
353/// Emits proposed field values (for approval and promotion).
354pub struct ProposalEmitterAgent;
355
356impl Agent for ProposalEmitterAgent {
357    fn name(&self) -> &str {
358        "ProposalEmitterAgent"
359    }
360
361    fn dependencies(&self) -> &[ContextKey] {
362        &[ContextKey::Hypotheses, ContextKey::Signals]
363    }
364
365    fn accepts(&self, ctx: &Context) -> bool {
366        ctx.has(ContextKey::Hypotheses) && ctx.has(ContextKey::Signals)
367    }
368
369    fn execute(&self, ctx: &Context) -> AgentEffect {
370        let normalized = ctx
371            .get(ContextKey::Hypotheses)
372            .iter()
373            .find(|fact| fact.id == NORMALIZED_FACT_ID)
374            .and_then(|fact| serde_json::from_str::<serde_json::Value>(&fact.content).ok())
375            .and_then(|value| value.get("normalized").cloned())
376            .and_then(|value| serde_json::from_value::<Vec<NormalizedField>>(value).ok())
377            .unwrap_or_default();
378
379        let proposals: Vec<ProposedFact> = normalized
380            .into_iter()
381            .filter(|field| !field.normalized_value.trim().is_empty())
382            .map(|field| ProposedFact {
383                key: ContextKey::Proposals,
384                id: format!("{}{}", PROPOSAL_PREFIX, field.field_id),
385                content: serde_json::json!({
386                    "field_id": field.field_id,
387                    "value": field.normalized_value,
388                    "provenance": "form_filler:deterministic",
389                    "risk": "unknown",
390                })
391                .to_string(),
392                confidence: 0.8,
393                provenance: "form_filler:deterministic".to_string(),
394            })
395            .collect();
396
397        let mut effect = AgentEffect::empty();
398        effect.proposals = proposals;
399        effect
400    }
401}