Skip to main content

agentforge_core/
finetune.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use uuid::Uuid;
4
5/// Supported export formats for fine-tuning datasets.
6#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
7#[serde(rename_all = "snake_case")]
8pub enum ExportFormat {
9    /// OpenAI fine-tuning JSONL format (chat messages).
10    OpenAi,
11    /// Anthropic fine-tuning JSONL format (prompt + completion pairs).
12    Anthropic,
13    /// HuggingFace datasets JSON-lines format.
14    HuggingFace,
15}
16
17impl std::fmt::Display for ExportFormat {
18    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
19        match self {
20            ExportFormat::OpenAi => write!(f, "openai"),
21            ExportFormat::Anthropic => write!(f, "anthropic"),
22            ExportFormat::HuggingFace => write!(f, "huggingface"),
23        }
24    }
25}
26
27impl std::str::FromStr for ExportFormat {
28    type Err = String;
29
30    fn from_str(s: &str) -> Result<Self, Self::Err> {
31        match s.to_lowercase().as_str() {
32            "openai" => Ok(ExportFormat::OpenAi),
33            "anthropic" => Ok(ExportFormat::Anthropic),
34            "huggingface" | "hf" => Ok(ExportFormat::HuggingFace),
35            other => Err(format!("Unknown export format: {other}")),
36        }
37    }
38}
39
40/// Status of a fine-tune export job.
41#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
42#[serde(rename_all = "snake_case")]
43pub enum ExportStatus {
44    Pending,
45    Running,
46    Complete,
47    Error,
48}
49
50impl std::fmt::Display for ExportStatus {
51    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52        match self {
53            ExportStatus::Pending => write!(f, "pending"),
54            ExportStatus::Running => write!(f, "running"),
55            ExportStatus::Complete => write!(f, "complete"),
56            ExportStatus::Error => write!(f, "error"),
57        }
58    }
59}
60
61/// Persisted record of a fine-tune export.
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct FineTuneExport {
64    pub id: Uuid,
65    pub run_id: Uuid,
66    pub format: ExportFormat,
67    pub status: ExportStatus,
68    /// Number of (prompt, completion) pairs exported.
69    pub row_count: Option<u32>,
70    /// Path to the exported file (local or S3 URI).
71    pub file_path: Option<String>,
72    pub error_message: Option<String>,
73    pub created_at: DateTime<Utc>,
74    pub completed_at: Option<DateTime<Utc>>,
75}
76
77/// Minimum number of labeled traces required before export is allowed.
78pub const MIN_TRACES_FOR_EXPORT: usize = 500;
79
80#[cfg(test)]
81mod tests {
82    use super::*;
83    use std::str::FromStr;
84
85    #[test]
86    fn format_roundtrip() {
87        for (s, expected) in [
88            ("openai", ExportFormat::OpenAi),
89            ("anthropic", ExportFormat::Anthropic),
90            ("huggingface", ExportFormat::HuggingFace),
91            ("hf", ExportFormat::HuggingFace),
92        ] {
93            let parsed: ExportFormat = s.parse().unwrap();
94            assert_eq!(parsed, expected);
95            assert_eq!(
96                expected.to_string(),
97                ExportFormat::from_str(s).unwrap().to_string()
98            );
99        }
100    }
101
102    #[test]
103    fn unknown_format_errors() {
104        assert!("parquet".parse::<ExportFormat>().is_err());
105    }
106}