Skip to main content

git_internal/internal/object/
provenance.rs

1//! AI Provenance Definition
2//!
3//! `Provenance` captures metadata about *how* a run was executed, specifically focusing on
4//! the model (LLM) and provider configuration.
5//!
6//! # Usage
7//!
8//! This is critical for:
9//! - **Reproducibility**: Knowing which model version produced a result.
10//! - **Cost Accounting**: Tracking token usage per run.
11//! - **Optimization**: Comparing performance across different models or parameters.
12
13use std::fmt;
14
15use serde::{Deserialize, Serialize};
16use uuid::Uuid;
17
18use crate::{
19    errors::GitError,
20    hash::ObjectHash,
21    internal::object::{
22        ObjectTrait,
23        types::{ActorRef, Header, ObjectType},
24    },
25};
26
27/// Provenance object for model/provider metadata.
28/// Captures model/provider settings and usage.
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct Provenance {
31    #[serde(flatten)]
32    header: Header,
33    run_id: Uuid,
34    provider: String,
35    model: String,
36    parameters: Option<serde_json::Value>,
37    token_usage: Option<serde_json::Value>,
38}
39
40impl Provenance {
41    pub fn new(
42        repo_id: Uuid,
43        created_by: ActorRef,
44        run_id: Uuid,
45        provider: impl Into<String>,
46        model: impl Into<String>,
47    ) -> Result<Self, String> {
48        Ok(Self {
49            header: Header::new(ObjectType::Provenance, repo_id, created_by)?,
50            run_id,
51            provider: provider.into(),
52            model: model.into(),
53            parameters: None,
54            token_usage: None,
55        })
56    }
57
58    pub fn header(&self) -> &Header {
59        &self.header
60    }
61
62    pub fn run_id(&self) -> Uuid {
63        self.run_id
64    }
65
66    pub fn provider(&self) -> &str {
67        &self.provider
68    }
69
70    pub fn model(&self) -> &str {
71        &self.model
72    }
73
74    pub fn parameters(&self) -> Option<&serde_json::Value> {
75        self.parameters.as_ref()
76    }
77
78    pub fn token_usage(&self) -> Option<&serde_json::Value> {
79        self.token_usage.as_ref()
80    }
81
82    pub fn set_parameters(&mut self, parameters: Option<serde_json::Value>) {
83        self.parameters = parameters;
84    }
85
86    pub fn set_token_usage(&mut self, token_usage: Option<serde_json::Value>) {
87        self.token_usage = token_usage;
88    }
89}
90
91impl fmt::Display for Provenance {
92    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
93        write!(f, "Provenance: {}", self.header.object_id())
94    }
95}
96
97impl ObjectTrait for Provenance {
98    fn from_bytes(data: &[u8], _hash: ObjectHash) -> Result<Self, GitError>
99    where
100        Self: Sized,
101    {
102        serde_json::from_slice(data).map_err(|e| GitError::InvalidObjectInfo(e.to_string()))
103    }
104
105    fn get_type(&self) -> ObjectType {
106        ObjectType::Provenance
107    }
108
109    fn get_size(&self) -> usize {
110        serde_json::to_vec(self).map(|v| v.len()).unwrap_or(0)
111    }
112
113    fn to_data(&self) -> Result<Vec<u8>, GitError> {
114        serde_json::to_vec(self).map_err(|e| GitError::InvalidObjectInfo(e.to_string()))
115    }
116}
117
118#[cfg(test)]
119mod tests {
120    use super::*;
121
122    #[test]
123    fn test_provenance_fields() {
124        let repo_id = Uuid::from_u128(0x0123456789abcdef0123456789abcdef);
125        let actor = ActorRef::agent("test-agent").expect("actor");
126        let run_id = Uuid::from_u128(0x1);
127
128        let mut provenance =
129            Provenance::new(repo_id, actor, run_id, "openai", "gpt-4").expect("provenance");
130        provenance.set_parameters(Some(serde_json::json!({"temperature": 0.2})));
131        provenance.set_token_usage(Some(serde_json::json!({"input": 10, "output": 5})));
132
133        assert!(provenance.parameters().is_some());
134        assert!(provenance.token_usage().is_some());
135    }
136}