1use crate::core::store::Store;
14use clap::Subcommand;
15use serde::{Deserialize, Serialize};
16use sha2::{Digest, Sha256};
17use std::collections::BTreeMap;
18use std::fs;
19use std::path::{Path, PathBuf};
20use std::process::Command as ProcessCommand;
21use std::time::{SystemTime, UNIX_EPOCH};
22
23#[derive(clap::Args, Debug)]
24pub struct InternalizeCli {
25 #[clap(subcommand)]
26 pub command: InternalizeCommand,
27}
28
29#[derive(Subcommand, Debug)]
30pub enum InternalizeCommand {
31 Create {
33 #[clap(long)]
34 source: String,
35 #[clap(long)]
36 model: String,
37 #[clap(long, default_value = "noop")]
38 profile: String,
39 #[clap(long, default_value_t = 0)]
40 ttl: u64,
41 #[clap(long = "scope", value_delimiter = ',')]
42 scopes: Vec<String>,
43 #[clap(long, default_value = "json")]
44 format: String,
45 },
46 Attach {
48 #[clap(long)]
49 id: String,
50 #[clap(long)]
51 session: String,
52 #[clap(long, default_value = "decapod-cli")]
53 tool: String,
54 #[clap(long, default_value_t = 1800)]
55 lease_seconds: u64,
56 #[clap(long, default_value = "json")]
57 format: String,
58 },
59 Detach {
61 #[clap(long)]
62 id: String,
63 #[clap(long)]
64 session: String,
65 #[clap(long, default_value = "json")]
66 format: String,
67 },
68 Inspect {
70 #[clap(long)]
71 id: String,
72 #[clap(long, default_value = "json")]
73 format: String,
74 },
75}
76
77pub const SCHEMA_VERSION: &str = "1.2.0";
78pub const DEFAULT_ATTACH_LEASE_SECONDS: u64 = 1800;
79
80#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
81#[serde(rename_all = "snake_case")]
82pub enum DeterminismClass {
83 Deterministic,
84 BestEffort,
85}
86
87#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
88#[serde(rename_all = "snake_case")]
89pub enum ReplayClass {
90 Replayable,
91 NonReplayable,
92}
93
94#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
95pub struct InternalizationManifest {
96 pub schema_version: String,
97 pub id: String,
98 pub source_hash: String,
99 pub source_path: String,
100 pub extraction_method: String,
101 pub chunking_params: BTreeMap<String, serde_json::Value>,
102 pub base_model_id: String,
103 pub internalizer_profile: String,
104 pub internalizer_version: String,
105 pub adapter_format: String,
106 pub created_at: String,
107 pub ttl_seconds: u64,
108 #[serde(skip_serializing_if = "Option::is_none")]
109 pub expires_at: Option<String>,
110 pub provenance: Vec<ProvenanceEntry>,
111 pub replay_recipe: ReplayRecipe,
112 pub adapter_hash: String,
113 pub adapter_path: String,
114 pub capabilities_contract: CapabilitiesContract,
115 pub risk_tier: RiskTier,
116 pub determinism_class: DeterminismClass,
117 pub binary_hash: String,
118 pub runtime_fingerprint: String,
119}
120
121#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
122pub struct ProvenanceEntry {
123 pub op: String,
124 pub timestamp: String,
125 pub actor: String,
126 pub inputs_hash: String,
127}
128
129#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
130pub struct ReplayRecipe {
131 pub mode: ReplayClass,
132 pub command: String,
133 pub args: Vec<String>,
134 pub env: BTreeMap<String, String>,
135 pub reason: String,
136}
137
138#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
139pub struct CapabilitiesContract {
140 pub allowed_scopes: Vec<String>,
141 pub permitted_tools: Vec<String>,
142 pub allow_code_gen: bool,
143}
144
145#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
146pub struct RiskTier {
147 pub creation: String,
148 pub attach: String,
149 pub inspect: String,
150}
151
152impl Default for RiskTier {
153 fn default() -> Self {
154 Self {
155 creation: "compute-risky".to_string(),
156 attach: "behavior-changing".to_string(),
157 inspect: "read-only".to_string(),
158 }
159 }
160}
161
162#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct InternalizationCreateResult {
164 pub schema_version: String,
165 pub success: bool,
166 pub artifact_id: String,
167 pub artifact_path: String,
168 pub cache_hit: bool,
169 pub manifest: InternalizationManifest,
170 pub source_hash: String,
171 pub adapter_hash: String,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub struct InternalizationAttachResult {
176 pub schema_version: String,
177 pub success: bool,
178 pub artifact_id: String,
179 pub session_id: String,
180 pub tool: String,
181 pub attached_at: String,
182 pub lease_id: String,
183 pub lease_seconds: u64,
184 pub lease_expires_at: String,
185 pub expires_at: Option<String>,
186 pub capabilities_contract: CapabilitiesContract,
187 pub risk_classification: String,
188 pub source_verification: String,
189 pub provenance_entry: ProvenanceEntry,
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct InternalizationDetachResult {
194 pub schema_version: String,
195 pub success: bool,
196 pub artifact_id: String,
197 pub session_id: String,
198 pub detached_at: String,
199 pub lease_id: String,
200 pub detached: bool,
201}
202
203#[derive(Debug, Clone, Serialize, Deserialize)]
204pub struct InternalizationInspectResult {
205 pub schema_version: String,
206 pub artifact_id: String,
207 pub manifest: InternalizationManifest,
208 pub integrity: IntegrityCheck,
209 pub status: String,
210}
211
212#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct IntegrityCheck {
214 pub source_hash_valid: bool,
215 pub source_verification: String,
216 pub adapter_hash_valid: bool,
217 pub manifest_consistent: bool,
218 pub expired: bool,
219 pub replayable_claim_valid: bool,
220}
221
222#[derive(Debug, Clone, Serialize, Deserialize)]
223pub struct InternalizerProfile {
224 pub name: String,
225 pub version: String,
226 pub executable: String,
227 pub default_params: BTreeMap<String, serde_json::Value>,
228 pub adapter_format: String,
229 pub determinism_class: DeterminismClass,
230}
231
232impl InternalizerProfile {
233 pub fn noop() -> Self {
234 Self {
235 name: "noop".to_string(),
236 version: "1.0.0".to_string(),
237 executable: "builtin:noop".to_string(),
238 default_params: BTreeMap::new(),
239 adapter_format: "noop".to_string(),
240 determinism_class: DeterminismClass::Deterministic,
241 }
242 }
243
244 pub fn resolve(name: &str, store_root: &Path) -> Result<Self, InternalizeError> {
245 if name == "noop" {
246 return Ok(Self::noop());
247 }
248 let profile_path = control_root(store_root)
249 .join("generated")
250 .join("profiles")
251 .join("internalizers")
252 .join(format!("{}.json", name));
253 if !profile_path.exists() {
254 return Err(InternalizeError::ProfileNotFound(name.to_string()));
255 }
256 let raw = fs::read_to_string(&profile_path).map_err(InternalizeError::Io)?;
257 serde_json::from_str(&raw).map_err(InternalizeError::Json)
258 }
259
260 pub fn binary_hash(&self) -> Result<String, InternalizeError> {
261 if self.executable == "builtin:noop" {
262 return sha256_bytes(self.executable.as_bytes());
263 }
264 let path = Path::new(&self.executable);
265 if !path.exists() {
266 return Err(InternalizeError::ProfileExecution(format!(
267 "Internalizer binary not found: {}",
268 self.executable
269 )));
270 }
271 sha256_file(path)
272 }
273
274 pub fn runtime_fingerprint(&self) -> String {
275 format!(
276 "os={} arch={} executable={}",
277 std::env::consts::OS,
278 std::env::consts::ARCH,
279 self.executable
280 )
281 }
282
283 pub fn execute(
284 &self,
285 source_path: &Path,
286 base_model: &str,
287 output_dir: &Path,
288 ) -> Result<(PathBuf, BTreeMap<String, serde_json::Value>), InternalizeError> {
289 let adapter_file = output_dir.join("adapter.bin");
290
291 if self.executable == "builtin:noop" {
292 fs::write(&adapter_file, b"").map_err(InternalizeError::Io)?;
293 return Ok((adapter_file, self.default_params.clone()));
294 }
295
296 let input = serde_json::json!({
297 "source_path": source_path.to_string_lossy(),
298 "base_model": base_model,
299 "output_dir": output_dir.to_string_lossy(),
300 "params": self.default_params,
301 });
302
303 let output = ProcessCommand::new(&self.executable)
304 .arg("--input")
305 .arg(serde_json::to_string(&input).unwrap_or_default())
306 .output()
307 .map_err(InternalizeError::Io)?;
308
309 if !output.status.success() {
310 let stderr = String::from_utf8_lossy(&output.stderr);
311 return Err(InternalizeError::ProfileExecution(format!(
312 "Internalizer '{}' failed: {}",
313 self.name, stderr
314 )));
315 }
316
317 if !adapter_file.exists() {
318 return Err(InternalizeError::ProfileExecution(format!(
319 "Internalizer '{}' did not produce adapter at {}",
320 self.name,
321 adapter_file.display()
322 )));
323 }
324
325 let stdout = String::from_utf8_lossy(&output.stdout);
326 let params = serde_json::from_str(&stdout).unwrap_or_else(|_| self.default_params.clone());
327
328 Ok((adapter_file, params))
329 }
330}
331
332#[derive(Debug)]
333pub enum InternalizeError {
334 Io(std::io::Error),
335 Json(serde_json::Error),
336 ProfileNotFound(String),
337 ProfileExecution(String),
338 ArtifactNotFound(String),
339 MountNotFound {
340 artifact_id: String,
341 session_id: String,
342 },
343 SourceIntegrityFailed {
344 expected: String,
345 actual: String,
346 },
347 AdapterIntegrityFailed {
348 expected: String,
349 actual: String,
350 },
351 Expired {
352 artifact_id: String,
353 expired_at: String,
354 },
355 ToolNotPermitted {
356 tool: String,
357 artifact_id: String,
358 },
359 ValidationError(String),
360}
361
362impl std::fmt::Display for InternalizeError {
363 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
364 match self {
365 Self::Io(e) => write!(f, "IO error: {}", e),
366 Self::Json(e) => write!(f, "JSON error: {}", e),
367 Self::ProfileNotFound(n) => write!(f, "Internalizer profile '{}' not found", n),
368 Self::ProfileExecution(s) => write!(f, "Profile execution error: {}", s),
369 Self::ArtifactNotFound(id) => write!(f, "Artifact '{}' not found", id),
370 Self::MountNotFound {
371 artifact_id,
372 session_id,
373 } => write!(
374 f,
375 "No active mount for artifact '{}' in session '{}'",
376 artifact_id, session_id
377 ),
378 Self::SourceIntegrityFailed { expected, actual } => write!(
379 f,
380 "Source integrity check failed: expected {}, got {}",
381 expected, actual
382 ),
383 Self::AdapterIntegrityFailed { expected, actual } => write!(
384 f,
385 "Adapter integrity check failed: expected {}, got {}",
386 expected, actual
387 ),
388 Self::Expired {
389 artifact_id,
390 expired_at,
391 } => write!(
392 f,
393 "Artifact '{}' expired at {}; renew with a new create",
394 artifact_id, expired_at
395 ),
396 Self::ToolNotPermitted { tool, artifact_id } => write!(
397 f,
398 "Tool '{}' is not permitted to mount artifact '{}'",
399 tool, artifact_id
400 ),
401 Self::ValidationError(s) => write!(f, "Validation error: {}", s),
402 }
403 }
404}
405
406impl std::error::Error for InternalizeError {}
407
408impl From<InternalizeError> for crate::core::error::DecapodError {
409 fn from(e: InternalizeError) -> Self {
410 crate::core::error::DecapodError::ValidationError(e.to_string())
411 }
412}
413
414fn sha256_file(path: &Path) -> Result<String, InternalizeError> {
415 let bytes = fs::read(path).map_err(InternalizeError::Io)?;
416 sha256_bytes(&bytes)
417}
418
419fn sha256_bytes(bytes: &[u8]) -> Result<String, InternalizeError> {
420 let mut hasher = Sha256::new();
421 hasher.update(bytes);
422 Ok(format!("{:x}", hasher.finalize()))
423}
424
425fn iso8601_from_epoch(secs: u64) -> String {
426 let days = secs / 86400;
427 let time_of_day = secs % 86400;
428 let hours = time_of_day / 3600;
429 let minutes = (time_of_day % 3600) / 60;
430 let seconds = time_of_day % 60;
431 let mut year = 1970i64;
432 let mut remaining_days = days as i64;
433 loop {
434 let days_in_year = if year % 4 == 0 && (year % 100 != 0 || year % 400 == 0) {
435 366
436 } else {
437 365
438 };
439 if remaining_days < days_in_year {
440 break;
441 }
442 remaining_days -= days_in_year;
443 year += 1;
444 }
445 let leap = year % 4 == 0 && (year % 100 != 0 || year % 400 == 0);
446 let month_days = [
447 31,
448 if leap { 29 } else { 28 },
449 31,
450 30,
451 31,
452 30,
453 31,
454 31,
455 30,
456 31,
457 30,
458 31,
459 ];
460 let mut month = 0usize;
461 for (i, &md) in month_days.iter().enumerate() {
462 if remaining_days < md as i64 {
463 month = i;
464 break;
465 }
466 remaining_days -= md as i64;
467 }
468 format!(
469 "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
470 year,
471 month + 1,
472 remaining_days + 1,
473 hours,
474 minutes,
475 seconds
476 )
477}
478
479fn now_unix() -> u64 {
480 SystemTime::now()
481 .duration_since(UNIX_EPOCH)
482 .unwrap_or_default()
483 .as_secs()
484}
485
486pub fn now_iso8601() -> String {
487 iso8601_from_epoch(now_unix())
488}
489
490fn iso8601_after_secs(secs: u64) -> String {
491 iso8601_from_epoch(now_unix().saturating_add(secs))
492}
493
494fn control_root(store_root: &Path) -> PathBuf {
495 if store_root.file_name().and_then(|s| s.to_str()) == Some("data")
496 && store_root
497 .parent()
498 .and_then(|p| p.file_name())
499 .and_then(|s| s.to_str())
500 == Some(".decapod")
501 {
502 store_root
503 .parent()
504 .map(Path::to_path_buf)
505 .unwrap_or_else(|| store_root.to_path_buf())
506 } else {
507 store_root.to_path_buf()
508 }
509}
510
511fn artifacts_dir(store_root: &Path) -> PathBuf {
512 control_root(store_root)
513 .join("generated")
514 .join("artifacts")
515 .join("internalizations")
516}
517
518fn artifact_dir(store_root: &Path, id: &str) -> PathBuf {
519 artifacts_dir(store_root).join(id)
520}
521
522fn session_dir(store_root: &Path, session_id: &str) -> PathBuf {
523 control_root(store_root)
524 .join("generated")
525 .join("sessions")
526 .join(session_id)
527}
528
529fn mount_dir(store_root: &Path, session_id: &str) -> PathBuf {
530 session_dir(store_root, session_id).join("internalize_mounts")
531}
532
533fn mount_id(artifact_id: &str) -> String {
534 format!("mount_{}", artifact_id)
535}
536
537fn mount_path(store_root: &Path, session_id: &str, artifact_id: &str) -> PathBuf {
538 mount_dir(store_root, session_id).join(format!("{}.json", mount_id(artifact_id)))
539}
540
541fn is_non_local_source(source: &str) -> bool {
542 source == "-" || source.starts_with("http://") || source.starts_with("https://")
543}
544
545fn is_expired(expires_at: Option<&str>) -> bool {
546 expires_at.is_some_and(|exp| now_iso8601().as_str() > exp)
547}
548
549fn verify_source_binding(
550 manifest: &InternalizationManifest,
551) -> Result<(bool, String), InternalizeError> {
552 if manifest.source_path == "-" {
553 return Ok((false, "best-effort-stdin-source".to_string()));
554 }
555 if manifest.source_path.starts_with("http://") || manifest.source_path.starts_with("https://") {
556 return Ok((false, "best-effort-nonlocal-source".to_string()));
557 }
558
559 let source_path = Path::new(&manifest.source_path);
560 if !source_path.exists() {
561 return Ok((false, "best-effort-source-unavailable".to_string()));
562 }
563
564 let actual = sha256_file(source_path)?;
565 if actual == manifest.source_hash {
566 Ok((true, "verified".to_string()))
567 } else {
568 Ok((false, "mismatch".to_string()))
569 }
570}
571
572fn tool_is_permitted(contract: &CapabilitiesContract, tool: &str) -> bool {
573 contract
574 .permitted_tools
575 .iter()
576 .any(|entry| entry == "*" || entry == tool)
577}
578
579fn artifact_id_for_request(
580 source_hash: &str,
581 source_path: &str,
582 model: &str,
583 profile: &InternalizerProfile,
584 ttl: u64,
585 scopes: &[String],
586) -> Result<String, InternalizeError> {
587 let mut normalized_scopes = scopes.to_vec();
588 normalized_scopes.sort();
589 normalized_scopes.dedup();
590 let binding = serde_json::json!({
591 "schema_version": SCHEMA_VERSION,
592 "source_hash": source_hash,
593 "source_path": source_path,
594 "base_model_id": model,
595 "internalizer_profile": profile.name,
596 "internalizer_version": profile.version,
597 "adapter_format": profile.adapter_format,
598 "determinism_class": profile.determinism_class,
599 "ttl_seconds": ttl,
600 "scopes": normalized_scopes,
601 "chunking_params": profile.default_params,
602 });
603 let bytes = serde_json::to_vec(&binding).map_err(InternalizeError::Json)?;
604 let hex = sha256_bytes(&bytes)?;
605 Ok(format!("int_{}", &hex[..24]))
606}
607
608fn build_replay_recipe(
609 profile: &InternalizerProfile,
610 binary_hash: &str,
611 source_path: &str,
612 model: &str,
613 ttl: u64,
614 scopes: &[String],
615) -> ReplayRecipe {
616 let mut replay_args = vec![
617 "internalize".to_string(),
618 "create".to_string(),
619 "--source".to_string(),
620 source_path.to_string(),
621 "--model".to_string(),
622 model.to_string(),
623 "--profile".to_string(),
624 profile.name.clone(),
625 ];
626 if ttl > 0 {
627 replay_args.push("--ttl".to_string());
628 replay_args.push(ttl.to_string());
629 }
630 for scope in scopes {
631 replay_args.push("--scope".to_string());
632 replay_args.push(scope.clone());
633 }
634
635 let (mode, reason) = match profile.determinism_class {
636 DeterminismClass::Deterministic if !binary_hash.is_empty() => (
637 ReplayClass::Replayable,
638 "deterministic profile with pinned binary hash".to_string(),
639 ),
640 DeterminismClass::Deterministic => (
641 ReplayClass::NonReplayable,
642 "deterministic profile missing pinned binary hash".to_string(),
643 ),
644 DeterminismClass::BestEffort => (
645 ReplayClass::NonReplayable,
646 "best_effort profile may depend on nondeterministic runtime or hardware".to_string(),
647 ),
648 };
649
650 ReplayRecipe {
651 mode,
652 command: "decapod".to_string(),
653 args: replay_args,
654 env: BTreeMap::new(),
655 reason,
656 }
657}
658
659fn replayable_claim_valid(manifest: &InternalizationManifest) -> bool {
660 match manifest.replay_recipe.mode {
661 ReplayClass::Replayable => {
662 manifest.determinism_class == DeterminismClass::Deterministic
663 && !manifest.binary_hash.trim().is_empty()
664 }
665 ReplayClass::NonReplayable => {
666 if manifest.determinism_class == DeterminismClass::BestEffort {
667 !manifest.binary_hash.trim().is_empty()
668 && !manifest.runtime_fingerprint.trim().is_empty()
669 } else {
670 true
671 }
672 }
673 }
674}
675
676pub fn create_internalization(
677 store_root: &Path,
678 source: &str,
679 model: &str,
680 profile_name: &str,
681 ttl: u64,
682 scopes: &[String],
683) -> Result<InternalizationCreateResult, InternalizeError> {
684 if is_non_local_source(source) {
685 return Err(InternalizeError::ValidationError(
686 "MVP only supports local file sources; URL and stdin sources are intentionally not implemented"
687 .to_string(),
688 ));
689 }
690
691 let source_path = Path::new(source);
692 if !source_path.exists() {
693 return Err(InternalizeError::Io(std::io::Error::new(
694 std::io::ErrorKind::NotFound,
695 format!("Source document not found: {}", source),
696 )));
697 }
698 let canonical_source = fs::canonicalize(source_path).map_err(InternalizeError::Io)?;
699 let source_hash = sha256_file(&canonical_source)?;
700 let profile = InternalizerProfile::resolve(profile_name, store_root)?;
701
702 let effective_scopes = if scopes.is_empty() {
703 vec!["qa".to_string()]
704 } else {
705 let mut normalized = scopes.to_vec();
706 normalized.sort();
707 normalized.dedup();
708 normalized
709 };
710 let allow_code_gen = effective_scopes.iter().any(|s| s == "code-gen");
711 let binary_hash = profile.binary_hash()?;
712 let runtime_fingerprint = profile.runtime_fingerprint();
713 let source_path_string = canonical_source.to_string_lossy().to_string();
714 let artifact_id = artifact_id_for_request(
715 &source_hash,
716 &source_path_string,
717 model,
718 &profile,
719 ttl,
720 &effective_scopes,
721 )?;
722 let art_dir = artifact_dir(store_root, &artifact_id);
723 let manifest_path = art_dir.join("manifest.json");
724 if manifest_path.exists() {
725 let raw = fs::read_to_string(&manifest_path).map_err(InternalizeError::Io)?;
726 let manifest: InternalizationManifest =
727 serde_json::from_str(&raw).map_err(InternalizeError::Json)?;
728 return Ok(InternalizationCreateResult {
729 schema_version: SCHEMA_VERSION.to_string(),
730 success: true,
731 artifact_id,
732 artifact_path: art_dir.to_string_lossy().to_string(),
733 cache_hit: true,
734 source_hash: manifest.source_hash.clone(),
735 adapter_hash: manifest.adapter_hash.clone(),
736 manifest,
737 });
738 }
739
740 fs::create_dir_all(&art_dir).map_err(InternalizeError::Io)?;
741 let (adapter_path, chunking_params) = profile.execute(&canonical_source, model, &art_dir)?;
742 let adapter_hash = sha256_file(&adapter_path)?;
743 let now = now_iso8601();
744 let expires_at = if ttl > 0 {
745 Some(iso8601_after_secs(ttl))
746 } else {
747 None
748 };
749
750 let replay_recipe = build_replay_recipe(
751 &profile,
752 &binary_hash,
753 &source_path_string,
754 model,
755 ttl,
756 &effective_scopes,
757 );
758 let provenance_entry = ProvenanceEntry {
759 op: "internalize.create".to_string(),
760 timestamp: now.clone(),
761 actor: "decapod-cli".to_string(),
762 inputs_hash: source_hash.clone(),
763 };
764
765 let manifest = InternalizationManifest {
766 schema_version: SCHEMA_VERSION.to_string(),
767 id: artifact_id.clone(),
768 source_hash: source_hash.clone(),
769 source_path: source_path_string,
770 extraction_method: profile.name.clone(),
771 chunking_params,
772 base_model_id: model.to_string(),
773 internalizer_profile: profile.name.clone(),
774 internalizer_version: profile.version.clone(),
775 adapter_format: profile.adapter_format.clone(),
776 created_at: now,
777 ttl_seconds: ttl,
778 expires_at,
779 provenance: vec![provenance_entry],
780 replay_recipe,
781 adapter_hash: adapter_hash.clone(),
782 adapter_path: "adapter.bin".to_string(),
783 capabilities_contract: CapabilitiesContract {
784 allowed_scopes: effective_scopes,
785 permitted_tools: vec!["decapod-cli".to_string()],
786 allow_code_gen,
787 },
788 risk_tier: RiskTier::default(),
789 determinism_class: profile.determinism_class,
790 binary_hash,
791 runtime_fingerprint,
792 };
793
794 let manifest_json = serde_json::to_string_pretty(&manifest).map_err(InternalizeError::Json)?;
795 fs::write(&manifest_path, manifest_json).map_err(InternalizeError::Io)?;
796
797 Ok(InternalizationCreateResult {
798 schema_version: SCHEMA_VERSION.to_string(),
799 success: true,
800 artifact_id,
801 artifact_path: art_dir.to_string_lossy().to_string(),
802 cache_hit: false,
803 manifest,
804 source_hash,
805 adapter_hash,
806 })
807}
808
809pub fn inspect_internalization(
810 store_root: &Path,
811 id: &str,
812) -> Result<InternalizationInspectResult, InternalizeError> {
813 let art_dir = artifact_dir(store_root, id);
814 let manifest_path = art_dir.join("manifest.json");
815 if !manifest_path.exists() {
816 return Err(InternalizeError::ArtifactNotFound(id.to_string()));
817 }
818
819 let raw = fs::read_to_string(&manifest_path).map_err(InternalizeError::Io)?;
820 let manifest: InternalizationManifest =
821 serde_json::from_str(&raw).map_err(InternalizeError::Json)?;
822
823 let (source_hash_valid, source_verification) = verify_source_binding(&manifest)?;
824 let adapter_full_path = art_dir.join(&manifest.adapter_path);
825 let adapter_hash_valid = if adapter_full_path.exists() {
826 sha256_file(&adapter_full_path)? == manifest.adapter_hash
827 } else {
828 false
829 };
830 let expired = is_expired(manifest.expires_at.as_deref());
831 let replayable_claim_valid = replayable_claim_valid(&manifest);
832
833 let status = if expired {
834 "expired".to_string()
835 } else if !adapter_hash_valid || source_verification == "mismatch" || !replayable_claim_valid {
836 "integrity-failed".to_string()
837 } else if source_verification.starts_with("best-effort") {
838 "best-effort".to_string()
839 } else {
840 "valid".to_string()
841 };
842
843 Ok(InternalizationInspectResult {
844 schema_version: SCHEMA_VERSION.to_string(),
845 artifact_id: id.to_string(),
846 manifest,
847 integrity: IntegrityCheck {
848 source_hash_valid,
849 source_verification,
850 adapter_hash_valid,
851 manifest_consistent: true,
852 expired,
853 replayable_claim_valid,
854 },
855 status,
856 })
857}
858
859pub fn attach_internalization(
860 store_root: &Path,
861 id: &str,
862 session_id: &str,
863 tool: &str,
864 lease_seconds: u64,
865) -> Result<InternalizationAttachResult, InternalizeError> {
866 let inspection = inspect_internalization(store_root, id)?;
867
868 if inspection.integrity.expired {
869 return Err(InternalizeError::Expired {
870 artifact_id: id.to_string(),
871 expired_at: inspection
872 .manifest
873 .expires_at
874 .clone()
875 .unwrap_or_else(|| "unknown".to_string()),
876 });
877 }
878 if inspection.integrity.source_verification == "mismatch" {
879 let actual = if Path::new(&inspection.manifest.source_path).exists() {
880 sha256_file(Path::new(&inspection.manifest.source_path))?
881 } else {
882 "unavailable".to_string()
883 };
884 return Err(InternalizeError::SourceIntegrityFailed {
885 expected: inspection.manifest.source_hash.clone(),
886 actual,
887 });
888 }
889 if !inspection.integrity.adapter_hash_valid {
890 return Err(InternalizeError::AdapterIntegrityFailed {
891 expected: inspection.manifest.adapter_hash.clone(),
892 actual: "corrupted".to_string(),
893 });
894 }
895 if !inspection.integrity.replayable_claim_valid {
896 return Err(InternalizeError::ValidationError(
897 "Artifact replayability metadata is inconsistent with determinism policy".to_string(),
898 ));
899 }
900 if !tool_is_permitted(&inspection.manifest.capabilities_contract, tool) {
901 return Err(InternalizeError::ToolNotPermitted {
902 tool: tool.to_string(),
903 artifact_id: id.to_string(),
904 });
905 }
906
907 let attached_at = now_iso8601();
908 let lease_id = mount_id(id);
909 let lease_expires_at = iso8601_after_secs(lease_seconds);
910 let provenance_entry = ProvenanceEntry {
911 op: "internalize.attach".to_string(),
912 timestamp: attached_at.clone(),
913 actor: format!("session:{}", session_id),
914 inputs_hash: inspection.manifest.adapter_hash.clone(),
915 };
916
917 let mounts_dir = mount_dir(store_root, session_id);
918 fs::create_dir_all(&mounts_dir).map_err(InternalizeError::Io)?;
919 let mount = serde_json::json!({
920 "schema_version": SCHEMA_VERSION,
921 "artifact_id": id,
922 "session_id": session_id,
923 "tool": tool,
924 "lease_id": lease_id,
925 "lease_seconds": lease_seconds,
926 "mounted_at": attached_at,
927 "lease_expires_at": lease_expires_at,
928 "adapter_hash": inspection.manifest.adapter_hash,
929 "source_verification": inspection.integrity.source_verification,
930 "capabilities_contract": inspection.manifest.capabilities_contract,
931 "risk_classification": inspection.manifest.risk_tier.attach
932 });
933 fs::write(
934 mount_path(store_root, session_id, id),
935 serde_json::to_string_pretty(&mount).map_err(InternalizeError::Json)?,
936 )
937 .map_err(InternalizeError::Io)?;
938
939 let session_prov_dir = session_dir(store_root, session_id);
940 fs::create_dir_all(&session_prov_dir).map_err(InternalizeError::Io)?;
941 let attach_log = session_prov_dir.join(format!("internalize_attach_{}.json", id));
942 let attach_entry = serde_json::json!({
943 "op": "internalize.attach",
944 "artifact_id": id,
945 "session_id": session_id,
946 "tool": tool,
947 "lease_id": lease_id,
948 "lease_seconds": lease_seconds,
949 "lease_expires_at": lease_expires_at,
950 "timestamp": attached_at,
951 "adapter_hash": inspection.manifest.adapter_hash,
952 "capabilities_contract": inspection.manifest.capabilities_contract,
953 "risk_classification": inspection.manifest.risk_tier.attach,
954 "source_verification": inspection.integrity.source_verification,
955 });
956 fs::write(
957 attach_log,
958 serde_json::to_string_pretty(&attach_entry).map_err(InternalizeError::Json)?,
959 )
960 .map_err(InternalizeError::Io)?;
961
962 Ok(InternalizationAttachResult {
963 schema_version: SCHEMA_VERSION.to_string(),
964 success: true,
965 artifact_id: id.to_string(),
966 session_id: session_id.to_string(),
967 tool: tool.to_string(),
968 attached_at,
969 lease_id,
970 lease_seconds,
971 lease_expires_at,
972 expires_at: inspection.manifest.expires_at,
973 capabilities_contract: inspection.manifest.capabilities_contract,
974 risk_classification: inspection.manifest.risk_tier.attach,
975 source_verification: inspection.integrity.source_verification,
976 provenance_entry,
977 })
978}
979
980pub fn detach_internalization(
981 store_root: &Path,
982 id: &str,
983 session_id: &str,
984) -> Result<InternalizationDetachResult, InternalizeError> {
985 let mount_file = mount_path(store_root, session_id, id);
986 if !mount_file.exists() {
987 return Err(InternalizeError::MountNotFound {
988 artifact_id: id.to_string(),
989 session_id: session_id.to_string(),
990 });
991 }
992
993 let raw = fs::read_to_string(&mount_file).map_err(InternalizeError::Io)?;
994 let mount: serde_json::Value = serde_json::from_str(&raw).map_err(InternalizeError::Json)?;
995 let lease_id = mount
996 .get("lease_id")
997 .and_then(|v| v.as_str())
998 .unwrap_or("unknown")
999 .to_string();
1000 fs::remove_file(&mount_file).map_err(InternalizeError::Io)?;
1001
1002 let detached_at = now_iso8601();
1003 let session_prov_dir = session_dir(store_root, session_id);
1004 fs::create_dir_all(&session_prov_dir).map_err(InternalizeError::Io)?;
1005 let detach_log = session_prov_dir.join(format!("internalize_detach_{}.json", id));
1006 let detach_entry = serde_json::json!({
1007 "op": "internalize.detach",
1008 "artifact_id": id,
1009 "session_id": session_id,
1010 "lease_id": lease_id,
1011 "timestamp": detached_at,
1012 });
1013 fs::write(
1014 detach_log,
1015 serde_json::to_string_pretty(&detach_entry).map_err(InternalizeError::Json)?,
1016 )
1017 .map_err(InternalizeError::Io)?;
1018
1019 Ok(InternalizationDetachResult {
1020 schema_version: SCHEMA_VERSION.to_string(),
1021 success: true,
1022 artifact_id: id.to_string(),
1023 session_id: session_id.to_string(),
1024 detached_at,
1025 lease_id,
1026 detached: true,
1027 })
1028}
1029
1030pub fn manifest_json_schema() -> serde_json::Value {
1031 serde_json::json!({
1032 "$schema": "https://json-schema.org/draft/2020-12/schema",
1033 "$id": "https://decapod.dev/schemas/internalization/manifest-1.2.0.json",
1034 "title": "InternalizationManifest",
1035 "type": "object",
1036 "required": [
1037 "schema_version", "id", "source_hash", "source_path", "base_model_id",
1038 "internalizer_profile", "internalizer_version", "adapter_format", "created_at",
1039 "ttl_seconds", "provenance", "replay_recipe", "adapter_hash", "adapter_path",
1040 "capabilities_contract", "risk_tier", "determinism_class", "binary_hash",
1041 "runtime_fingerprint"
1042 ],
1043 "properties": {
1044 "schema_version": { "const": SCHEMA_VERSION },
1045 "id": { "type": "string", "pattern": "^int_[a-f0-9]{24}$" },
1046 "source_hash": { "type": "string", "pattern": "^[a-f0-9]{64}$" },
1047 "determinism_class": { "enum": ["deterministic", "best_effort"] },
1048 "binary_hash": { "type": "string", "minLength": 1 },
1049 "runtime_fingerprint": { "type": "string", "minLength": 1 }
1050 }
1051 })
1052}
1053
1054pub fn create_result_json_schema() -> serde_json::Value {
1055 serde_json::json!({
1056 "$schema": "https://json-schema.org/draft/2020-12/schema",
1057 "$id": "https://decapod.dev/schemas/internalization/create-result-1.2.0.json",
1058 "title": "InternalizationCreateResult",
1059 "type": "object",
1060 "required": [
1061 "schema_version", "success", "artifact_id", "artifact_path",
1062 "cache_hit", "manifest", "source_hash", "adapter_hash"
1063 ]
1064 })
1065}
1066
1067pub fn attach_result_json_schema() -> serde_json::Value {
1068 serde_json::json!({
1069 "$schema": "https://json-schema.org/draft/2020-12/schema",
1070 "$id": "https://decapod.dev/schemas/internalization/attach-result-1.2.0.json",
1071 "title": "InternalizationAttachResult",
1072 "type": "object",
1073 "required": [
1074 "schema_version", "success", "artifact_id", "session_id", "tool",
1075 "attached_at", "lease_id", "lease_seconds", "lease_expires_at"
1076 ]
1077 })
1078}
1079
1080pub fn detach_result_json_schema() -> serde_json::Value {
1081 serde_json::json!({
1082 "$schema": "https://json-schema.org/draft/2020-12/schema",
1083 "$id": "https://decapod.dev/schemas/internalization/detach-result-1.2.0.json",
1084 "title": "InternalizationDetachResult",
1085 "type": "object",
1086 "required": [
1087 "schema_version", "success", "artifact_id", "session_id",
1088 "detached_at", "lease_id", "detached"
1089 ]
1090 })
1091}
1092
1093pub fn inspect_result_json_schema() -> serde_json::Value {
1094 serde_json::json!({
1095 "$schema": "https://json-schema.org/draft/2020-12/schema",
1096 "$id": "https://decapod.dev/schemas/internalization/inspect-result-1.2.0.json",
1097 "title": "InternalizationInspectResult",
1098 "type": "object",
1099 "required": ["schema_version", "artifact_id", "manifest", "integrity", "status"]
1100 })
1101}
1102
1103pub fn schema() -> serde_json::Value {
1104 serde_json::json!({
1105 "name": "internalize",
1106 "version": SCHEMA_VERSION,
1107 "description": "Internalized context artifact lifecycle with explicit create, attach lease, detach, and inspect gates",
1108 "commands": [
1109 { "name": "create", "parameters": ["source", "model", "profile", "ttl", "scope", "format"] },
1110 { "name": "attach", "parameters": ["id", "session", "tool", "lease_seconds", "format"] },
1111 { "name": "detach", "parameters": ["id", "session", "format"] },
1112 { "name": "inspect", "parameters": ["id", "format"] }
1113 ]
1114 })
1115}
1116
1117pub fn run_internalize_cli(
1118 _store: &Store,
1119 store_root: &Path,
1120 cli: InternalizeCli,
1121) -> Result<(), crate::core::error::DecapodError> {
1122 match cli.command {
1123 InternalizeCommand::Create {
1124 source,
1125 model,
1126 profile,
1127 ttl,
1128 scopes,
1129 format,
1130 } => {
1131 let result =
1132 create_internalization(store_root, &source, &model, &profile, ttl, &scopes)?;
1133 if format == "json" {
1134 println!("{}", serde_json::to_string_pretty(&result).unwrap());
1135 } else {
1136 println!("Created internalization artifact: {}", result.artifact_id);
1137 }
1138 }
1139 InternalizeCommand::Attach {
1140 id,
1141 session,
1142 tool,
1143 lease_seconds,
1144 format,
1145 } => {
1146 let result = attach_internalization(store_root, &id, &session, &tool, lease_seconds)?;
1147 if format == "json" {
1148 println!("{}", serde_json::to_string_pretty(&result).unwrap());
1149 } else {
1150 println!(
1151 "Attached {} to session {} until {}",
1152 result.artifact_id, result.session_id, result.lease_expires_at
1153 );
1154 }
1155 }
1156 InternalizeCommand::Detach {
1157 id,
1158 session,
1159 format,
1160 } => {
1161 let result = detach_internalization(store_root, &id, &session)?;
1162 if format == "json" {
1163 println!("{}", serde_json::to_string_pretty(&result).unwrap());
1164 } else {
1165 println!(
1166 "Detached {} from session {}",
1167 result.artifact_id, result.session_id
1168 );
1169 }
1170 }
1171 InternalizeCommand::Inspect { id, format } => {
1172 let result = inspect_internalization(store_root, &id)?;
1173 if format == "json" {
1174 println!("{}", serde_json::to_string_pretty(&result).unwrap());
1175 } else {
1176 println!("Artifact: {}", result.artifact_id);
1177 println!(" Status: {}", result.status);
1178 }
1179 }
1180 }
1181 Ok(())
1182}