Skip to main content

decapod/plugins/
internalize.rs

1//! Internalized Context Artifacts plugin.
2//!
3//! Provides governance-native lifecycle for context internalization:
4//! turning long documents into mountable, verifiable context adapters
5//! so agents stop paying the long-context tax over and over.
6//!
7//! Artifacts are produced by pluggable "internalizer profiles" (external
8//! executables) and stored under `.decapod/generated/artifacts/internalizations/`.
9//!
10//! Truth label: REAL
11//! Proof surface: `decapod internalize inspect --id <id>`
12
13use crate::core::store::Store;
14use clap::Subcommand;
15use serde::{Deserialize, Serialize};
16use sha2::{Digest, Sha256};
17use std::collections::BTreeMap;
18use std::fs;
19use std::path::{Path, PathBuf};
20use std::process::Command as ProcessCommand;
21use std::time::{SystemTime, UNIX_EPOCH};
22
23#[derive(clap::Args, Debug)]
24pub struct InternalizeCli {
25    #[clap(subcommand)]
26    pub command: InternalizeCommand,
27}
28
29#[derive(Subcommand, Debug)]
30pub enum InternalizeCommand {
31    /// Produce an internalized context artifact from a source document
32    Create {
33        #[clap(long)]
34        source: String,
35        #[clap(long)]
36        model: String,
37        #[clap(long, default_value = "noop")]
38        profile: String,
39        #[clap(long, default_value_t = 0)]
40        ttl: u64,
41        #[clap(long = "scope", value_delimiter = ',')]
42        scopes: Vec<String>,
43        #[clap(long, default_value = "json")]
44        format: String,
45    },
46    /// Attach an internalized context artifact to a session-scoped mount lease
47    Attach {
48        #[clap(long)]
49        id: String,
50        #[clap(long)]
51        session: String,
52        #[clap(long, default_value = "decapod-cli")]
53        tool: String,
54        #[clap(long, default_value_t = 1800)]
55        lease_seconds: u64,
56        #[clap(long, default_value = "json")]
57        format: String,
58    },
59    /// Explicitly revoke a session-scoped internalization mount
60    Detach {
61        #[clap(long)]
62        id: String,
63        #[clap(long)]
64        session: String,
65        #[clap(long, default_value = "json")]
66        format: String,
67    },
68    /// Inspect an internalized context artifact (manifest + integrity)
69    Inspect {
70        #[clap(long)]
71        id: String,
72        #[clap(long, default_value = "json")]
73        format: String,
74    },
75}
76
77pub const SCHEMA_VERSION: &str = "1.2.0";
78pub const DEFAULT_ATTACH_LEASE_SECONDS: u64 = 1800;
79
80#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
81#[serde(rename_all = "snake_case")]
82pub enum DeterminismClass {
83    Deterministic,
84    BestEffort,
85}
86
87#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
88#[serde(rename_all = "snake_case")]
89pub enum ReplayClass {
90    Replayable,
91    NonReplayable,
92}
93
94#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
95pub struct InternalizationManifest {
96    pub schema_version: String,
97    pub id: String,
98    pub source_hash: String,
99    pub source_path: String,
100    pub extraction_method: String,
101    pub chunking_params: BTreeMap<String, serde_json::Value>,
102    pub base_model_id: String,
103    pub internalizer_profile: String,
104    pub internalizer_version: String,
105    pub adapter_format: String,
106    pub created_at: String,
107    pub ttl_seconds: u64,
108    #[serde(skip_serializing_if = "Option::is_none")]
109    pub expires_at: Option<String>,
110    pub provenance: Vec<ProvenanceEntry>,
111    pub replay_recipe: ReplayRecipe,
112    pub adapter_hash: String,
113    pub adapter_path: String,
114    pub capabilities_contract: CapabilitiesContract,
115    pub risk_tier: RiskTier,
116    pub determinism_class: DeterminismClass,
117    pub binary_hash: String,
118    pub runtime_fingerprint: String,
119}
120
121#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
122pub struct ProvenanceEntry {
123    pub op: String,
124    pub timestamp: String,
125    pub actor: String,
126    pub inputs_hash: String,
127}
128
129#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
130pub struct ReplayRecipe {
131    pub mode: ReplayClass,
132    pub command: String,
133    pub args: Vec<String>,
134    pub env: BTreeMap<String, String>,
135    pub reason: String,
136}
137
138#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
139pub struct CapabilitiesContract {
140    pub allowed_scopes: Vec<String>,
141    pub permitted_tools: Vec<String>,
142    pub allow_code_gen: bool,
143}
144
145#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
146pub struct RiskTier {
147    pub creation: String,
148    pub attach: String,
149    pub inspect: String,
150}
151
152impl Default for RiskTier {
153    fn default() -> Self {
154        Self {
155            creation: "compute-risky".to_string(),
156            attach: "behavior-changing".to_string(),
157            inspect: "read-only".to_string(),
158        }
159    }
160}
161
162#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct InternalizationCreateResult {
164    pub schema_version: String,
165    pub success: bool,
166    pub artifact_id: String,
167    pub artifact_path: String,
168    pub cache_hit: bool,
169    pub manifest: InternalizationManifest,
170    pub source_hash: String,
171    pub adapter_hash: String,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub struct InternalizationAttachResult {
176    pub schema_version: String,
177    pub success: bool,
178    pub artifact_id: String,
179    pub session_id: String,
180    pub tool: String,
181    pub attached_at: String,
182    pub lease_id: String,
183    pub lease_seconds: u64,
184    pub lease_expires_at: String,
185    pub expires_at: Option<String>,
186    pub capabilities_contract: CapabilitiesContract,
187    pub risk_classification: String,
188    pub source_verification: String,
189    pub provenance_entry: ProvenanceEntry,
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct InternalizationDetachResult {
194    pub schema_version: String,
195    pub success: bool,
196    pub artifact_id: String,
197    pub session_id: String,
198    pub detached_at: String,
199    pub lease_id: String,
200    pub detached: bool,
201}
202
203#[derive(Debug, Clone, Serialize, Deserialize)]
204pub struct InternalizationInspectResult {
205    pub schema_version: String,
206    pub artifact_id: String,
207    pub manifest: InternalizationManifest,
208    pub integrity: IntegrityCheck,
209    pub status: String,
210}
211
212#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct IntegrityCheck {
214    pub source_hash_valid: bool,
215    pub source_verification: String,
216    pub adapter_hash_valid: bool,
217    pub manifest_consistent: bool,
218    pub expired: bool,
219    pub replayable_claim_valid: bool,
220}
221
222#[derive(Debug, Clone, Serialize, Deserialize)]
223pub struct InternalizerProfile {
224    pub name: String,
225    pub version: String,
226    pub executable: String,
227    pub default_params: BTreeMap<String, serde_json::Value>,
228    pub adapter_format: String,
229    pub determinism_class: DeterminismClass,
230}
231
232impl InternalizerProfile {
233    pub fn noop() -> Self {
234        Self {
235            name: "noop".to_string(),
236            version: "1.0.0".to_string(),
237            executable: "builtin:noop".to_string(),
238            default_params: BTreeMap::new(),
239            adapter_format: "noop".to_string(),
240            determinism_class: DeterminismClass::Deterministic,
241        }
242    }
243
244    pub fn resolve(name: &str, store_root: &Path) -> Result<Self, InternalizeError> {
245        if name == "noop" {
246            return Ok(Self::noop());
247        }
248        let profile_path = control_root(store_root)
249            .join("generated")
250            .join("profiles")
251            .join("internalizers")
252            .join(format!("{}.json", name));
253        if !profile_path.exists() {
254            return Err(InternalizeError::ProfileNotFound(name.to_string()));
255        }
256        let raw = fs::read_to_string(&profile_path).map_err(InternalizeError::Io)?;
257        serde_json::from_str(&raw).map_err(InternalizeError::Json)
258    }
259
260    pub fn binary_hash(&self) -> Result<String, InternalizeError> {
261        if self.executable == "builtin:noop" {
262            return sha256_bytes(self.executable.as_bytes());
263        }
264        let path = Path::new(&self.executable);
265        if !path.exists() {
266            return Err(InternalizeError::ProfileExecution(format!(
267                "Internalizer binary not found: {}",
268                self.executable
269            )));
270        }
271        sha256_file(path)
272    }
273
274    pub fn runtime_fingerprint(&self) -> String {
275        format!(
276            "os={} arch={} executable={}",
277            std::env::consts::OS,
278            std::env::consts::ARCH,
279            self.executable
280        )
281    }
282
283    pub fn execute(
284        &self,
285        source_path: &Path,
286        base_model: &str,
287        output_dir: &Path,
288    ) -> Result<(PathBuf, BTreeMap<String, serde_json::Value>), InternalizeError> {
289        let adapter_file = output_dir.join("adapter.bin");
290
291        if self.executable == "builtin:noop" {
292            fs::write(&adapter_file, b"").map_err(InternalizeError::Io)?;
293            return Ok((adapter_file, self.default_params.clone()));
294        }
295
296        let input = serde_json::json!({
297            "source_path": source_path.to_string_lossy(),
298            "base_model": base_model,
299            "output_dir": output_dir.to_string_lossy(),
300            "params": self.default_params,
301        });
302
303        let output = ProcessCommand::new(&self.executable)
304            .arg("--input")
305            .arg(serde_json::to_string(&input).unwrap_or_default())
306            .output()
307            .map_err(InternalizeError::Io)?;
308
309        if !output.status.success() {
310            let stderr = String::from_utf8_lossy(&output.stderr);
311            return Err(InternalizeError::ProfileExecution(format!(
312                "Internalizer '{}' failed: {}",
313                self.name, stderr
314            )));
315        }
316
317        if !adapter_file.exists() {
318            return Err(InternalizeError::ProfileExecution(format!(
319                "Internalizer '{}' did not produce adapter at {}",
320                self.name,
321                adapter_file.display()
322            )));
323        }
324
325        let stdout = String::from_utf8_lossy(&output.stdout);
326        let params = serde_json::from_str(&stdout).unwrap_or_else(|_| self.default_params.clone());
327
328        Ok((adapter_file, params))
329    }
330}
331
332#[derive(Debug)]
333pub enum InternalizeError {
334    Io(std::io::Error),
335    Json(serde_json::Error),
336    ProfileNotFound(String),
337    ProfileExecution(String),
338    ArtifactNotFound(String),
339    MountNotFound {
340        artifact_id: String,
341        session_id: String,
342    },
343    SourceIntegrityFailed {
344        expected: String,
345        actual: String,
346    },
347    AdapterIntegrityFailed {
348        expected: String,
349        actual: String,
350    },
351    Expired {
352        artifact_id: String,
353        expired_at: String,
354    },
355    ToolNotPermitted {
356        tool: String,
357        artifact_id: String,
358    },
359    ValidationError(String),
360}
361
362impl std::fmt::Display for InternalizeError {
363    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
364        match self {
365            Self::Io(e) => write!(f, "IO error: {}", e),
366            Self::Json(e) => write!(f, "JSON error: {}", e),
367            Self::ProfileNotFound(n) => write!(f, "Internalizer profile '{}' not found", n),
368            Self::ProfileExecution(s) => write!(f, "Profile execution error: {}", s),
369            Self::ArtifactNotFound(id) => write!(f, "Artifact '{}' not found", id),
370            Self::MountNotFound {
371                artifact_id,
372                session_id,
373            } => write!(
374                f,
375                "No active mount for artifact '{}' in session '{}'",
376                artifact_id, session_id
377            ),
378            Self::SourceIntegrityFailed { expected, actual } => write!(
379                f,
380                "Source integrity check failed: expected {}, got {}",
381                expected, actual
382            ),
383            Self::AdapterIntegrityFailed { expected, actual } => write!(
384                f,
385                "Adapter integrity check failed: expected {}, got {}",
386                expected, actual
387            ),
388            Self::Expired {
389                artifact_id,
390                expired_at,
391            } => write!(
392                f,
393                "Artifact '{}' expired at {}; renew with a new create",
394                artifact_id, expired_at
395            ),
396            Self::ToolNotPermitted { tool, artifact_id } => write!(
397                f,
398                "Tool '{}' is not permitted to mount artifact '{}'",
399                tool, artifact_id
400            ),
401            Self::ValidationError(s) => write!(f, "Validation error: {}", s),
402        }
403    }
404}
405
406impl std::error::Error for InternalizeError {}
407
408impl From<InternalizeError> for crate::core::error::DecapodError {
409    fn from(e: InternalizeError) -> Self {
410        crate::core::error::DecapodError::ValidationError(e.to_string())
411    }
412}
413
414fn sha256_file(path: &Path) -> Result<String, InternalizeError> {
415    let bytes = fs::read(path).map_err(InternalizeError::Io)?;
416    sha256_bytes(&bytes)
417}
418
419fn sha256_bytes(bytes: &[u8]) -> Result<String, InternalizeError> {
420    let mut hasher = Sha256::new();
421    hasher.update(bytes);
422    Ok(format!("{:x}", hasher.finalize()))
423}
424
425fn iso8601_from_epoch(secs: u64) -> String {
426    let days = secs / 86400;
427    let time_of_day = secs % 86400;
428    let hours = time_of_day / 3600;
429    let minutes = (time_of_day % 3600) / 60;
430    let seconds = time_of_day % 60;
431    let mut year = 1970i64;
432    let mut remaining_days = days as i64;
433    loop {
434        let days_in_year = if year % 4 == 0 && (year % 100 != 0 || year % 400 == 0) {
435            366
436        } else {
437            365
438        };
439        if remaining_days < days_in_year {
440            break;
441        }
442        remaining_days -= days_in_year;
443        year += 1;
444    }
445    let leap = year % 4 == 0 && (year % 100 != 0 || year % 400 == 0);
446    let month_days = [
447        31,
448        if leap { 29 } else { 28 },
449        31,
450        30,
451        31,
452        30,
453        31,
454        31,
455        30,
456        31,
457        30,
458        31,
459    ];
460    let mut month = 0usize;
461    for (i, &md) in month_days.iter().enumerate() {
462        if remaining_days < md as i64 {
463            month = i;
464            break;
465        }
466        remaining_days -= md as i64;
467    }
468    format!(
469        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
470        year,
471        month + 1,
472        remaining_days + 1,
473        hours,
474        minutes,
475        seconds
476    )
477}
478
479fn now_unix() -> u64 {
480    SystemTime::now()
481        .duration_since(UNIX_EPOCH)
482        .unwrap_or_default()
483        .as_secs()
484}
485
486pub fn now_iso8601() -> String {
487    iso8601_from_epoch(now_unix())
488}
489
490fn iso8601_after_secs(secs: u64) -> String {
491    iso8601_from_epoch(now_unix().saturating_add(secs))
492}
493
494fn control_root(store_root: &Path) -> PathBuf {
495    if store_root.file_name().and_then(|s| s.to_str()) == Some("data")
496        && store_root
497            .parent()
498            .and_then(|p| p.file_name())
499            .and_then(|s| s.to_str())
500            == Some(".decapod")
501    {
502        store_root
503            .parent()
504            .map(Path::to_path_buf)
505            .unwrap_or_else(|| store_root.to_path_buf())
506    } else {
507        store_root.to_path_buf()
508    }
509}
510
511fn artifacts_dir(store_root: &Path) -> PathBuf {
512    control_root(store_root)
513        .join("generated")
514        .join("artifacts")
515        .join("internalizations")
516}
517
518fn artifact_dir(store_root: &Path, id: &str) -> PathBuf {
519    artifacts_dir(store_root).join(id)
520}
521
522fn session_dir(store_root: &Path, session_id: &str) -> PathBuf {
523    control_root(store_root)
524        .join("generated")
525        .join("sessions")
526        .join(session_id)
527}
528
529fn mount_dir(store_root: &Path, session_id: &str) -> PathBuf {
530    session_dir(store_root, session_id).join("internalize_mounts")
531}
532
533fn mount_id(artifact_id: &str) -> String {
534    format!("mount_{}", artifact_id)
535}
536
537fn mount_path(store_root: &Path, session_id: &str, artifact_id: &str) -> PathBuf {
538    mount_dir(store_root, session_id).join(format!("{}.json", mount_id(artifact_id)))
539}
540
541fn is_non_local_source(source: &str) -> bool {
542    source == "-" || source.starts_with("http://") || source.starts_with("https://")
543}
544
545fn is_expired(expires_at: Option<&str>) -> bool {
546    expires_at.is_some_and(|exp| now_iso8601().as_str() > exp)
547}
548
549fn verify_source_binding(
550    manifest: &InternalizationManifest,
551) -> Result<(bool, String), InternalizeError> {
552    if manifest.source_path == "-" {
553        return Ok((false, "best-effort-stdin-source".to_string()));
554    }
555    if manifest.source_path.starts_with("http://") || manifest.source_path.starts_with("https://") {
556        return Ok((false, "best-effort-nonlocal-source".to_string()));
557    }
558
559    let source_path = Path::new(&manifest.source_path);
560    if !source_path.exists() {
561        return Ok((false, "best-effort-source-unavailable".to_string()));
562    }
563
564    let actual = sha256_file(source_path)?;
565    if actual == manifest.source_hash {
566        Ok((true, "verified".to_string()))
567    } else {
568        Ok((false, "mismatch".to_string()))
569    }
570}
571
572fn tool_is_permitted(contract: &CapabilitiesContract, tool: &str) -> bool {
573    contract
574        .permitted_tools
575        .iter()
576        .any(|entry| entry == "*" || entry == tool)
577}
578
579fn artifact_id_for_request(
580    source_hash: &str,
581    source_path: &str,
582    model: &str,
583    profile: &InternalizerProfile,
584    ttl: u64,
585    scopes: &[String],
586) -> Result<String, InternalizeError> {
587    let mut normalized_scopes = scopes.to_vec();
588    normalized_scopes.sort();
589    normalized_scopes.dedup();
590    let binding = serde_json::json!({
591        "schema_version": SCHEMA_VERSION,
592        "source_hash": source_hash,
593        "source_path": source_path,
594        "base_model_id": model,
595        "internalizer_profile": profile.name,
596        "internalizer_version": profile.version,
597        "adapter_format": profile.adapter_format,
598        "determinism_class": profile.determinism_class,
599        "ttl_seconds": ttl,
600        "scopes": normalized_scopes,
601        "chunking_params": profile.default_params,
602    });
603    let bytes = serde_json::to_vec(&binding).map_err(InternalizeError::Json)?;
604    let hex = sha256_bytes(&bytes)?;
605    Ok(format!("int_{}", &hex[..24]))
606}
607
608fn build_replay_recipe(
609    profile: &InternalizerProfile,
610    binary_hash: &str,
611    source_path: &str,
612    model: &str,
613    ttl: u64,
614    scopes: &[String],
615) -> ReplayRecipe {
616    let mut replay_args = vec![
617        "internalize".to_string(),
618        "create".to_string(),
619        "--source".to_string(),
620        source_path.to_string(),
621        "--model".to_string(),
622        model.to_string(),
623        "--profile".to_string(),
624        profile.name.clone(),
625    ];
626    if ttl > 0 {
627        replay_args.push("--ttl".to_string());
628        replay_args.push(ttl.to_string());
629    }
630    for scope in scopes {
631        replay_args.push("--scope".to_string());
632        replay_args.push(scope.clone());
633    }
634
635    let (mode, reason) = match profile.determinism_class {
636        DeterminismClass::Deterministic if !binary_hash.is_empty() => (
637            ReplayClass::Replayable,
638            "deterministic profile with pinned binary hash".to_string(),
639        ),
640        DeterminismClass::Deterministic => (
641            ReplayClass::NonReplayable,
642            "deterministic profile missing pinned binary hash".to_string(),
643        ),
644        DeterminismClass::BestEffort => (
645            ReplayClass::NonReplayable,
646            "best_effort profile may depend on nondeterministic runtime or hardware".to_string(),
647        ),
648    };
649
650    ReplayRecipe {
651        mode,
652        command: "decapod".to_string(),
653        args: replay_args,
654        env: BTreeMap::new(),
655        reason,
656    }
657}
658
659fn replayable_claim_valid(manifest: &InternalizationManifest) -> bool {
660    match manifest.replay_recipe.mode {
661        ReplayClass::Replayable => {
662            manifest.determinism_class == DeterminismClass::Deterministic
663                && !manifest.binary_hash.trim().is_empty()
664        }
665        ReplayClass::NonReplayable => {
666            if manifest.determinism_class == DeterminismClass::BestEffort {
667                !manifest.binary_hash.trim().is_empty()
668                    && !manifest.runtime_fingerprint.trim().is_empty()
669            } else {
670                true
671            }
672        }
673    }
674}
675
676pub fn create_internalization(
677    store_root: &Path,
678    source: &str,
679    model: &str,
680    profile_name: &str,
681    ttl: u64,
682    scopes: &[String],
683) -> Result<InternalizationCreateResult, InternalizeError> {
684    if is_non_local_source(source) {
685        return Err(InternalizeError::ValidationError(
686            "MVP only supports local file sources; URL and stdin sources are intentionally not implemented"
687                .to_string(),
688        ));
689    }
690
691    let source_path = Path::new(source);
692    if !source_path.exists() {
693        return Err(InternalizeError::Io(std::io::Error::new(
694            std::io::ErrorKind::NotFound,
695            format!("Source document not found: {}", source),
696        )));
697    }
698    let canonical_source = fs::canonicalize(source_path).map_err(InternalizeError::Io)?;
699    let source_hash = sha256_file(&canonical_source)?;
700    let profile = InternalizerProfile::resolve(profile_name, store_root)?;
701
702    let effective_scopes = if scopes.is_empty() {
703        vec!["qa".to_string()]
704    } else {
705        let mut normalized = scopes.to_vec();
706        normalized.sort();
707        normalized.dedup();
708        normalized
709    };
710    let allow_code_gen = effective_scopes.iter().any(|s| s == "code-gen");
711    let binary_hash = profile.binary_hash()?;
712    let runtime_fingerprint = profile.runtime_fingerprint();
713    let source_path_string = canonical_source.to_string_lossy().to_string();
714    let artifact_id = artifact_id_for_request(
715        &source_hash,
716        &source_path_string,
717        model,
718        &profile,
719        ttl,
720        &effective_scopes,
721    )?;
722    let art_dir = artifact_dir(store_root, &artifact_id);
723    let manifest_path = art_dir.join("manifest.json");
724    if manifest_path.exists() {
725        let raw = fs::read_to_string(&manifest_path).map_err(InternalizeError::Io)?;
726        let manifest: InternalizationManifest =
727            serde_json::from_str(&raw).map_err(InternalizeError::Json)?;
728        return Ok(InternalizationCreateResult {
729            schema_version: SCHEMA_VERSION.to_string(),
730            success: true,
731            artifact_id,
732            artifact_path: art_dir.to_string_lossy().to_string(),
733            cache_hit: true,
734            source_hash: manifest.source_hash.clone(),
735            adapter_hash: manifest.adapter_hash.clone(),
736            manifest,
737        });
738    }
739
740    fs::create_dir_all(&art_dir).map_err(InternalizeError::Io)?;
741    let (adapter_path, chunking_params) = profile.execute(&canonical_source, model, &art_dir)?;
742    let adapter_hash = sha256_file(&adapter_path)?;
743    let now = now_iso8601();
744    let expires_at = if ttl > 0 {
745        Some(iso8601_after_secs(ttl))
746    } else {
747        None
748    };
749
750    let replay_recipe = build_replay_recipe(
751        &profile,
752        &binary_hash,
753        &source_path_string,
754        model,
755        ttl,
756        &effective_scopes,
757    );
758    let provenance_entry = ProvenanceEntry {
759        op: "internalize.create".to_string(),
760        timestamp: now.clone(),
761        actor: "decapod-cli".to_string(),
762        inputs_hash: source_hash.clone(),
763    };
764
765    let manifest = InternalizationManifest {
766        schema_version: SCHEMA_VERSION.to_string(),
767        id: artifact_id.clone(),
768        source_hash: source_hash.clone(),
769        source_path: source_path_string,
770        extraction_method: profile.name.clone(),
771        chunking_params,
772        base_model_id: model.to_string(),
773        internalizer_profile: profile.name.clone(),
774        internalizer_version: profile.version.clone(),
775        adapter_format: profile.adapter_format.clone(),
776        created_at: now,
777        ttl_seconds: ttl,
778        expires_at,
779        provenance: vec![provenance_entry],
780        replay_recipe,
781        adapter_hash: adapter_hash.clone(),
782        adapter_path: "adapter.bin".to_string(),
783        capabilities_contract: CapabilitiesContract {
784            allowed_scopes: effective_scopes,
785            permitted_tools: vec!["decapod-cli".to_string()],
786            allow_code_gen,
787        },
788        risk_tier: RiskTier::default(),
789        determinism_class: profile.determinism_class,
790        binary_hash,
791        runtime_fingerprint,
792    };
793
794    let manifest_json = serde_json::to_string_pretty(&manifest).map_err(InternalizeError::Json)?;
795    fs::write(&manifest_path, manifest_json).map_err(InternalizeError::Io)?;
796
797    Ok(InternalizationCreateResult {
798        schema_version: SCHEMA_VERSION.to_string(),
799        success: true,
800        artifact_id,
801        artifact_path: art_dir.to_string_lossy().to_string(),
802        cache_hit: false,
803        manifest,
804        source_hash,
805        adapter_hash,
806    })
807}
808
809pub fn inspect_internalization(
810    store_root: &Path,
811    id: &str,
812) -> Result<InternalizationInspectResult, InternalizeError> {
813    let art_dir = artifact_dir(store_root, id);
814    let manifest_path = art_dir.join("manifest.json");
815    if !manifest_path.exists() {
816        return Err(InternalizeError::ArtifactNotFound(id.to_string()));
817    }
818
819    let raw = fs::read_to_string(&manifest_path).map_err(InternalizeError::Io)?;
820    let manifest: InternalizationManifest =
821        serde_json::from_str(&raw).map_err(InternalizeError::Json)?;
822
823    let (source_hash_valid, source_verification) = verify_source_binding(&manifest)?;
824    let adapter_full_path = art_dir.join(&manifest.adapter_path);
825    let adapter_hash_valid = if adapter_full_path.exists() {
826        sha256_file(&adapter_full_path)? == manifest.adapter_hash
827    } else {
828        false
829    };
830    let expired = is_expired(manifest.expires_at.as_deref());
831    let replayable_claim_valid = replayable_claim_valid(&manifest);
832
833    let status = if expired {
834        "expired".to_string()
835    } else if !adapter_hash_valid || source_verification == "mismatch" || !replayable_claim_valid {
836        "integrity-failed".to_string()
837    } else if source_verification.starts_with("best-effort") {
838        "best-effort".to_string()
839    } else {
840        "valid".to_string()
841    };
842
843    Ok(InternalizationInspectResult {
844        schema_version: SCHEMA_VERSION.to_string(),
845        artifact_id: id.to_string(),
846        manifest,
847        integrity: IntegrityCheck {
848            source_hash_valid,
849            source_verification,
850            adapter_hash_valid,
851            manifest_consistent: true,
852            expired,
853            replayable_claim_valid,
854        },
855        status,
856    })
857}
858
859pub fn attach_internalization(
860    store_root: &Path,
861    id: &str,
862    session_id: &str,
863    tool: &str,
864    lease_seconds: u64,
865) -> Result<InternalizationAttachResult, InternalizeError> {
866    let inspection = inspect_internalization(store_root, id)?;
867
868    if inspection.integrity.expired {
869        return Err(InternalizeError::Expired {
870            artifact_id: id.to_string(),
871            expired_at: inspection
872                .manifest
873                .expires_at
874                .clone()
875                .unwrap_or_else(|| "unknown".to_string()),
876        });
877    }
878    if inspection.integrity.source_verification == "mismatch" {
879        let actual = if Path::new(&inspection.manifest.source_path).exists() {
880            sha256_file(Path::new(&inspection.manifest.source_path))?
881        } else {
882            "unavailable".to_string()
883        };
884        return Err(InternalizeError::SourceIntegrityFailed {
885            expected: inspection.manifest.source_hash.clone(),
886            actual,
887        });
888    }
889    if !inspection.integrity.adapter_hash_valid {
890        return Err(InternalizeError::AdapterIntegrityFailed {
891            expected: inspection.manifest.adapter_hash.clone(),
892            actual: "corrupted".to_string(),
893        });
894    }
895    if !inspection.integrity.replayable_claim_valid {
896        return Err(InternalizeError::ValidationError(
897            "Artifact replayability metadata is inconsistent with determinism policy".to_string(),
898        ));
899    }
900    if !tool_is_permitted(&inspection.manifest.capabilities_contract, tool) {
901        return Err(InternalizeError::ToolNotPermitted {
902            tool: tool.to_string(),
903            artifact_id: id.to_string(),
904        });
905    }
906
907    let attached_at = now_iso8601();
908    let lease_id = mount_id(id);
909    let lease_expires_at = iso8601_after_secs(lease_seconds);
910    let provenance_entry = ProvenanceEntry {
911        op: "internalize.attach".to_string(),
912        timestamp: attached_at.clone(),
913        actor: format!("session:{}", session_id),
914        inputs_hash: inspection.manifest.adapter_hash.clone(),
915    };
916
917    let mounts_dir = mount_dir(store_root, session_id);
918    fs::create_dir_all(&mounts_dir).map_err(InternalizeError::Io)?;
919    let mount = serde_json::json!({
920        "schema_version": SCHEMA_VERSION,
921        "artifact_id": id,
922        "session_id": session_id,
923        "tool": tool,
924        "lease_id": lease_id,
925        "lease_seconds": lease_seconds,
926        "mounted_at": attached_at,
927        "lease_expires_at": lease_expires_at,
928        "adapter_hash": inspection.manifest.adapter_hash,
929        "source_verification": inspection.integrity.source_verification,
930        "capabilities_contract": inspection.manifest.capabilities_contract,
931        "risk_classification": inspection.manifest.risk_tier.attach
932    });
933    fs::write(
934        mount_path(store_root, session_id, id),
935        serde_json::to_string_pretty(&mount).map_err(InternalizeError::Json)?,
936    )
937    .map_err(InternalizeError::Io)?;
938
939    let session_prov_dir = session_dir(store_root, session_id);
940    fs::create_dir_all(&session_prov_dir).map_err(InternalizeError::Io)?;
941    let attach_log = session_prov_dir.join(format!("internalize_attach_{}.json", id));
942    let attach_entry = serde_json::json!({
943        "op": "internalize.attach",
944        "artifact_id": id,
945        "session_id": session_id,
946        "tool": tool,
947        "lease_id": lease_id,
948        "lease_seconds": lease_seconds,
949        "lease_expires_at": lease_expires_at,
950        "timestamp": attached_at,
951        "adapter_hash": inspection.manifest.adapter_hash,
952        "capabilities_contract": inspection.manifest.capabilities_contract,
953        "risk_classification": inspection.manifest.risk_tier.attach,
954        "source_verification": inspection.integrity.source_verification,
955    });
956    fs::write(
957        attach_log,
958        serde_json::to_string_pretty(&attach_entry).map_err(InternalizeError::Json)?,
959    )
960    .map_err(InternalizeError::Io)?;
961
962    Ok(InternalizationAttachResult {
963        schema_version: SCHEMA_VERSION.to_string(),
964        success: true,
965        artifact_id: id.to_string(),
966        session_id: session_id.to_string(),
967        tool: tool.to_string(),
968        attached_at,
969        lease_id,
970        lease_seconds,
971        lease_expires_at,
972        expires_at: inspection.manifest.expires_at,
973        capabilities_contract: inspection.manifest.capabilities_contract,
974        risk_classification: inspection.manifest.risk_tier.attach,
975        source_verification: inspection.integrity.source_verification,
976        provenance_entry,
977    })
978}
979
980pub fn detach_internalization(
981    store_root: &Path,
982    id: &str,
983    session_id: &str,
984) -> Result<InternalizationDetachResult, InternalizeError> {
985    let mount_file = mount_path(store_root, session_id, id);
986    if !mount_file.exists() {
987        return Err(InternalizeError::MountNotFound {
988            artifact_id: id.to_string(),
989            session_id: session_id.to_string(),
990        });
991    }
992
993    let raw = fs::read_to_string(&mount_file).map_err(InternalizeError::Io)?;
994    let mount: serde_json::Value = serde_json::from_str(&raw).map_err(InternalizeError::Json)?;
995    let lease_id = mount
996        .get("lease_id")
997        .and_then(|v| v.as_str())
998        .unwrap_or("unknown")
999        .to_string();
1000    fs::remove_file(&mount_file).map_err(InternalizeError::Io)?;
1001
1002    let detached_at = now_iso8601();
1003    let session_prov_dir = session_dir(store_root, session_id);
1004    fs::create_dir_all(&session_prov_dir).map_err(InternalizeError::Io)?;
1005    let detach_log = session_prov_dir.join(format!("internalize_detach_{}.json", id));
1006    let detach_entry = serde_json::json!({
1007        "op": "internalize.detach",
1008        "artifact_id": id,
1009        "session_id": session_id,
1010        "lease_id": lease_id,
1011        "timestamp": detached_at,
1012    });
1013    fs::write(
1014        detach_log,
1015        serde_json::to_string_pretty(&detach_entry).map_err(InternalizeError::Json)?,
1016    )
1017    .map_err(InternalizeError::Io)?;
1018
1019    Ok(InternalizationDetachResult {
1020        schema_version: SCHEMA_VERSION.to_string(),
1021        success: true,
1022        artifact_id: id.to_string(),
1023        session_id: session_id.to_string(),
1024        detached_at,
1025        lease_id,
1026        detached: true,
1027    })
1028}
1029
1030pub fn manifest_json_schema() -> serde_json::Value {
1031    serde_json::json!({
1032        "$schema": "https://json-schema.org/draft/2020-12/schema",
1033        "$id": "https://decapod.dev/schemas/internalization/manifest-1.2.0.json",
1034        "title": "InternalizationManifest",
1035        "type": "object",
1036        "required": [
1037            "schema_version", "id", "source_hash", "source_path", "base_model_id",
1038            "internalizer_profile", "internalizer_version", "adapter_format", "created_at",
1039            "ttl_seconds", "provenance", "replay_recipe", "adapter_hash", "adapter_path",
1040            "capabilities_contract", "risk_tier", "determinism_class", "binary_hash",
1041            "runtime_fingerprint"
1042        ],
1043        "properties": {
1044            "schema_version": { "const": SCHEMA_VERSION },
1045            "id": { "type": "string", "pattern": "^int_[a-f0-9]{24}$" },
1046            "source_hash": { "type": "string", "pattern": "^[a-f0-9]{64}$" },
1047            "determinism_class": { "enum": ["deterministic", "best_effort"] },
1048            "binary_hash": { "type": "string", "minLength": 1 },
1049            "runtime_fingerprint": { "type": "string", "minLength": 1 }
1050        }
1051    })
1052}
1053
1054pub fn create_result_json_schema() -> serde_json::Value {
1055    serde_json::json!({
1056        "$schema": "https://json-schema.org/draft/2020-12/schema",
1057        "$id": "https://decapod.dev/schemas/internalization/create-result-1.2.0.json",
1058        "title": "InternalizationCreateResult",
1059        "type": "object",
1060        "required": [
1061            "schema_version", "success", "artifact_id", "artifact_path",
1062            "cache_hit", "manifest", "source_hash", "adapter_hash"
1063        ]
1064    })
1065}
1066
1067pub fn attach_result_json_schema() -> serde_json::Value {
1068    serde_json::json!({
1069        "$schema": "https://json-schema.org/draft/2020-12/schema",
1070        "$id": "https://decapod.dev/schemas/internalization/attach-result-1.2.0.json",
1071        "title": "InternalizationAttachResult",
1072        "type": "object",
1073        "required": [
1074            "schema_version", "success", "artifact_id", "session_id", "tool",
1075            "attached_at", "lease_id", "lease_seconds", "lease_expires_at"
1076        ]
1077    })
1078}
1079
1080pub fn detach_result_json_schema() -> serde_json::Value {
1081    serde_json::json!({
1082        "$schema": "https://json-schema.org/draft/2020-12/schema",
1083        "$id": "https://decapod.dev/schemas/internalization/detach-result-1.2.0.json",
1084        "title": "InternalizationDetachResult",
1085        "type": "object",
1086        "required": [
1087            "schema_version", "success", "artifact_id", "session_id",
1088            "detached_at", "lease_id", "detached"
1089        ]
1090    })
1091}
1092
1093pub fn inspect_result_json_schema() -> serde_json::Value {
1094    serde_json::json!({
1095        "$schema": "https://json-schema.org/draft/2020-12/schema",
1096        "$id": "https://decapod.dev/schemas/internalization/inspect-result-1.2.0.json",
1097        "title": "InternalizationInspectResult",
1098        "type": "object",
1099        "required": ["schema_version", "artifact_id", "manifest", "integrity", "status"]
1100    })
1101}
1102
1103pub fn schema() -> serde_json::Value {
1104    serde_json::json!({
1105        "name": "internalize",
1106        "version": SCHEMA_VERSION,
1107        "description": "Internalized context artifact lifecycle with explicit create, attach lease, detach, and inspect gates",
1108        "commands": [
1109            { "name": "create", "parameters": ["source", "model", "profile", "ttl", "scope", "format"] },
1110            { "name": "attach", "parameters": ["id", "session", "tool", "lease_seconds", "format"] },
1111            { "name": "detach", "parameters": ["id", "session", "format"] },
1112            { "name": "inspect", "parameters": ["id", "format"] }
1113        ]
1114    })
1115}
1116
1117pub fn run_internalize_cli(
1118    _store: &Store,
1119    store_root: &Path,
1120    cli: InternalizeCli,
1121) -> Result<(), crate::core::error::DecapodError> {
1122    match cli.command {
1123        InternalizeCommand::Create {
1124            source,
1125            model,
1126            profile,
1127            ttl,
1128            scopes,
1129            format,
1130        } => {
1131            let result =
1132                create_internalization(store_root, &source, &model, &profile, ttl, &scopes)?;
1133            if format == "json" {
1134                println!("{}", serde_json::to_string_pretty(&result).unwrap());
1135            } else {
1136                println!("Created internalization artifact: {}", result.artifact_id);
1137            }
1138        }
1139        InternalizeCommand::Attach {
1140            id,
1141            session,
1142            tool,
1143            lease_seconds,
1144            format,
1145        } => {
1146            let result = attach_internalization(store_root, &id, &session, &tool, lease_seconds)?;
1147            if format == "json" {
1148                println!("{}", serde_json::to_string_pretty(&result).unwrap());
1149            } else {
1150                println!(
1151                    "Attached {} to session {} until {}",
1152                    result.artifact_id, result.session_id, result.lease_expires_at
1153                );
1154            }
1155        }
1156        InternalizeCommand::Detach {
1157            id,
1158            session,
1159            format,
1160        } => {
1161            let result = detach_internalization(store_root, &id, &session)?;
1162            if format == "json" {
1163                println!("{}", serde_json::to_string_pretty(&result).unwrap());
1164            } else {
1165                println!(
1166                    "Detached {} from session {}",
1167                    result.artifact_id, result.session_id
1168                );
1169            }
1170        }
1171        InternalizeCommand::Inspect { id, format } => {
1172            let result = inspect_internalization(store_root, &id)?;
1173            if format == "json" {
1174                println!("{}", serde_json::to_string_pretty(&result).unwrap());
1175            } else {
1176                println!("Artifact: {}", result.artifact_id);
1177                println!("  Status: {}", result.status);
1178            }
1179        }
1180    }
1181    Ok(())
1182}