Skip to main content

git_internal/internal/object/
provenance.rs

1//! AI Provenance snapshot.
2//!
3//! `Provenance` records the immutable model/provider configuration used
4//! for a `Run`.
5//!
6//! # How to use this object
7//!
8//! - Create `Provenance` when Libra has chosen the provider, model, and
9//!   generation parameters for a run.
10//! - Populate optional sampling and parameter fields before
11//!   persistence.
12//! - Keep it immutable after writing; usage and cost belong elsewhere.
13//!
14//! # How it works with other objects
15//!
16//! - `Run` is the canonical owner via `run_id`.
17//! - `RunUsage` stores tokens and cost for the same run.
18//!
19//! # How Libra should call it
20//!
21//! Libra should write `Provenance` once near run start, then later write
22//! `RunUsage` when consumption totals are known. Do not backfill usage
23//! onto the provenance snapshot.
24
25use std::fmt;
26
27use serde::{Deserialize, Serialize};
28use uuid::Uuid;
29
30use crate::{
31    errors::GitError,
32    hash::ObjectHash,
33    internal::object::{
34        ObjectTrait,
35        types::{ActorRef, Header, ObjectType},
36    },
37};
38
39/// Immutable provider/model configuration for one execution attempt.
40#[derive(Debug, Clone, Serialize, Deserialize)]
41#[serde(deny_unknown_fields)]
42pub struct Provenance {
43    /// Common object header carrying the immutable object id, type,
44    /// creator, and timestamps.
45    #[serde(flatten)]
46    header: Header,
47    /// Canonical owning run for this provider/model configuration.
48    run_id: Uuid,
49    /// Provider identifier, such as `openai`.
50    provider: String,
51    /// Model identifier, such as `gpt-5`.
52    model: String,
53    /// Provider-specific structured parameters captured as raw JSON.
54    #[serde(default, skip_serializing_if = "Option::is_none")]
55    parameters: Option<serde_json::Value>,
56    /// Optional top-level temperature convenience field.
57    #[serde(default, skip_serializing_if = "Option::is_none")]
58    temperature: Option<f64>,
59    /// Optional top-level max token convenience field.
60    #[serde(default, skip_serializing_if = "Option::is_none")]
61    max_tokens: Option<u64>,
62}
63
64impl Provenance {
65    /// Create a new provider/model configuration record for one run.
66    pub fn new(
67        created_by: ActorRef,
68        run_id: Uuid,
69        provider: impl Into<String>,
70        model: impl Into<String>,
71    ) -> Result<Self, String> {
72        Ok(Self {
73            header: Header::new(ObjectType::Provenance, created_by)?,
74            run_id,
75            provider: provider.into(),
76            model: model.into(),
77            parameters: None,
78            temperature: None,
79            max_tokens: None,
80        })
81    }
82
83    /// Return the immutable header for this provenance record.
84    pub fn header(&self) -> &Header {
85        &self.header
86    }
87
88    /// Return the canonical owning run id.
89    pub fn run_id(&self) -> Uuid {
90        self.run_id
91    }
92
93    /// Return the provider identifier.
94    pub fn provider(&self) -> &str {
95        &self.provider
96    }
97
98    /// Return the model identifier.
99    pub fn model(&self) -> &str {
100        &self.model
101    }
102
103    /// Return the raw structured parameters, if present.
104    pub fn parameters(&self) -> Option<&serde_json::Value> {
105        self.parameters.as_ref()
106    }
107
108    /// Return the effective temperature, checking the explicit field
109    /// first and the raw parameters second.
110    pub fn temperature(&self) -> Option<f64> {
111        self.temperature.or_else(|| {
112            self.parameters
113                .as_ref()
114                .and_then(|p| p.get("temperature"))
115                .and_then(|v| v.as_f64())
116        })
117    }
118
119    /// Return the effective max token limit, checking the explicit field
120    /// first and the raw parameters second.
121    pub fn max_tokens(&self) -> Option<u64> {
122        self.max_tokens.or_else(|| {
123            self.parameters
124                .as_ref()
125                .and_then(|p| p.get("max_tokens"))
126                .and_then(|v| v.as_u64())
127        })
128    }
129
130    /// Set or clear the raw structured provider parameters.
131    pub fn set_parameters(&mut self, parameters: Option<serde_json::Value>) {
132        self.parameters = parameters;
133    }
134
135    /// Set or clear the top-level temperature field.
136    pub fn set_temperature(&mut self, temperature: Option<f64>) {
137        self.temperature = temperature;
138    }
139
140    /// Set or clear the top-level max token field.
141    pub fn set_max_tokens(&mut self, max_tokens: Option<u64>) {
142        self.max_tokens = max_tokens;
143    }
144}
145
146impl fmt::Display for Provenance {
147    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
148        write!(f, "Provenance: {}", self.header.object_id())
149    }
150}
151
152impl ObjectTrait for Provenance {
153    fn from_bytes(data: &[u8], _hash: ObjectHash) -> Result<Self, GitError>
154    where
155        Self: Sized,
156    {
157        serde_json::from_slice(data).map_err(|e| GitError::InvalidObjectInfo(e.to_string()))
158    }
159
160    fn get_type(&self) -> ObjectType {
161        ObjectType::Provenance
162    }
163
164    fn get_size(&self) -> usize {
165        match serde_json::to_vec(self) {
166            Ok(v) => v.len(),
167            Err(e) => {
168                tracing::warn!("failed to compute Provenance size: {}", e);
169                0
170            }
171        }
172    }
173
174    fn to_data(&self) -> Result<Vec<u8>, GitError> {
175        serde_json::to_vec(self).map_err(|e| GitError::InvalidObjectInfo(e.to_string()))
176    }
177}
178
179#[cfg(test)]
180mod tests {
181    use super::*;
182
183    // Coverage:
184    // - canonical run/provider/model storage
185    // - fallback lookup of temperature and max_tokens from parameters
186
187    #[test]
188    fn test_provenance_fields() {
189        let actor = ActorRef::agent("planner").expect("actor");
190        let run_id = Uuid::from_u128(0x42);
191        let mut provenance = Provenance::new(actor, run_id, "openai", "gpt-5").expect("prov");
192
193        provenance.set_parameters(Some(
194            serde_json::json!({"temperature": 0.2, "max_tokens": 2048}),
195        ));
196
197        assert_eq!(provenance.run_id(), run_id);
198        assert_eq!(provenance.provider(), "openai");
199        assert_eq!(provenance.model(), "gpt-5");
200        assert_eq!(provenance.temperature(), Some(0.2));
201        assert_eq!(provenance.max_tokens(), Some(2048));
202    }
203}