1pub mod safetensors;
31pub mod dataset;
32pub mod huggingface_hub;
33pub mod pretrain;
34
35pub use safetensors::SafeTensorsExporter;
36pub use dataset::DatasetExporter;
37pub use huggingface_hub::HuggingFaceHub;
38pub use pretrain::{PretrainConfig, PretrainPipeline};
39
40use crate::engine::SonaEngine;
41use crate::types::{LearnedPattern, SonaConfig};
42use crate::lora::{MicroLoRA, BaseLoRA};
43use serde::{Deserialize, Serialize};
44use std::path::Path;
45
46#[derive(Clone, Debug, Serialize, Deserialize)]
48pub struct ExportConfig {
49 pub model_name: String,
51 pub organization: Option<String>,
53 pub target_architecture: String,
55 pub include_patterns: bool,
57 pub include_lora: bool,
59 pub include_preferences: bool,
61 pub min_quality_threshold: f32,
63 pub compress: bool,
65}
66
67impl Default for ExportConfig {
68 fn default() -> Self {
69 Self {
70 model_name: "sona-adapter".to_string(),
71 organization: None,
72 target_architecture: "phi-4".to_string(),
73 include_patterns: true,
74 include_lora: true,
75 include_preferences: true,
76 min_quality_threshold: 0.5,
77 compress: false,
78 }
79 }
80}
81
82pub struct HuggingFaceExporter<'a> {
84 engine: &'a SonaEngine,
86 config: ExportConfig,
88}
89
90impl<'a> HuggingFaceExporter<'a> {
91 pub fn new(engine: &'a SonaEngine) -> Self {
93 Self {
94 engine,
95 config: ExportConfig::default(),
96 }
97 }
98
99 pub fn with_config(engine: &'a SonaEngine, config: ExportConfig) -> Self {
101 Self { engine, config }
102 }
103
104 pub fn export_lora_safetensors<P: AsRef<Path>>(&self, output_dir: P) -> Result<ExportResult, ExportError> {
106 let exporter = SafeTensorsExporter::new(&self.config);
107 exporter.export_engine(self.engine, output_dir)
108 }
109
110 pub fn export_patterns_jsonl<P: AsRef<Path>>(&self, output_path: P) -> Result<ExportResult, ExportError> {
112 let exporter = DatasetExporter::new(&self.config);
113 exporter.export_patterns(self.engine, output_path)
114 }
115
116 pub fn export_preference_pairs<P: AsRef<Path>>(&self, output_path: P) -> Result<ExportResult, ExportError> {
118 let exporter = DatasetExporter::new(&self.config);
119 exporter.export_preferences(self.engine, output_path)
120 }
121
122 pub fn push_to_hub(&self, repo_id: &str, token: Option<&str>) -> Result<ExportResult, ExportError> {
124 let hub = HuggingFaceHub::new(token);
125 hub.push_all(self.engine, &self.config, repo_id)
126 }
127
128 pub fn export_all<P: AsRef<Path>>(&self, output_dir: P) -> Result<Vec<ExportResult>, ExportError> {
130 let output_dir = output_dir.as_ref();
131 std::fs::create_dir_all(output_dir).map_err(ExportError::Io)?;
132
133 let mut results = Vec::new();
134
135 if self.config.include_lora {
136 results.push(self.export_lora_safetensors(output_dir.join("lora"))?);
137 }
138
139 if self.config.include_patterns {
140 results.push(self.export_patterns_jsonl(output_dir.join("patterns.jsonl"))?);
141 }
142
143 if self.config.include_preferences {
144 results.push(self.export_preference_pairs(output_dir.join("preferences.jsonl"))?);
145 }
146
147 let config_path = output_dir.join("adapter_config.json");
149 let config_json = serde_json::to_string_pretty(&self.create_adapter_config())?;
150 std::fs::write(&config_path, config_json).map_err(ExportError::Io)?;
151
152 let readme_path = output_dir.join("README.md");
154 let readme = self.generate_readme();
155 std::fs::write(&readme_path, readme).map_err(ExportError::Io)?;
156
157 Ok(results)
158 }
159
160 fn create_adapter_config(&self) -> AdapterConfig {
162 let sona_config = self.engine.config();
163 AdapterConfig {
164 peft_type: "LORA".to_string(),
165 auto_mapping: None,
166 base_model_name_or_path: self.config.target_architecture.clone(),
167 revision: None,
168 task_type: "CAUSAL_LM".to_string(),
169 inference_mode: true,
170 r: sona_config.micro_lora_rank,
171 lora_alpha: sona_config.micro_lora_rank as f32,
172 lora_dropout: 0.0,
173 fan_in_fan_out: false,
174 bias: "none".to_string(),
175 target_modules: vec![
176 "q_proj".to_string(),
177 "k_proj".to_string(),
178 "v_proj".to_string(),
179 "o_proj".to_string(),
180 ],
181 modules_to_save: None,
182 layers_to_transform: None,
183 layers_pattern: None,
184 }
185 }
186
187 fn generate_readme(&self) -> String {
189 let stats = self.engine.stats();
190 format!(r#"---
191license: mit
192library_name: peft
193base_model: {}
194tags:
195 - sona
196 - lora
197 - adaptive-learning
198 - ruvector
199---
200
201# {} SONA Adapter
202
203This adapter was generated using [SONA (Self-Optimizing Neural Architecture)](https://github.com/ruvnet/ruvector/tree/main/crates/sona).
204
205## Model Details
206
207- **Base Model**: {}
208- **PEFT Type**: LoRA
209- **Rank**: {}
210- **Patterns Learned**: {}
211- **Trajectories Processed**: {}
212
213## Training Details
214
215SONA uses two-tier LoRA adaptation:
216- **MicroLoRA**: Rank 1-2 for instant adaptation (<0.5ms)
217- **BaseLoRA**: Rank 4-16 for background learning
218
219### Performance Benchmarks
220
221| Metric | Value |
222|--------|-------|
223| Throughput | 2211 ops/sec |
224| Latency | <0.5ms per layer |
225| Quality Improvement | +55% max |
226
227## Usage
228
229```python
230from peft import PeftModel, PeftConfig
231from transformers import AutoModelForCausalLM
232
233# Load adapter
234config = PeftConfig.from_pretrained("your-username/{}")
235model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
236model = PeftModel.from_pretrained(model, "your-username/{}")
237```
238
239## License
240
241MIT License - see [LICENSE](LICENSE) for details.
242
243---
244
245Generated with [ruvector-sona](https://crates.io/crates/ruvector-sona) v0.1.0
246"#,
247 self.config.target_architecture,
248 self.config.model_name,
249 self.config.target_architecture,
250 self.engine.config().micro_lora_rank,
251 stats.patterns_stored,
252 stats.trajectories_buffered,
253 self.config.model_name,
254 self.config.model_name,
255 )
256 }
257}
258
259#[derive(Clone, Debug, Serialize, Deserialize)]
261pub struct AdapterConfig {
262 pub peft_type: String,
263 #[serde(skip_serializing_if = "Option::is_none")]
264 pub auto_mapping: Option<serde_json::Value>,
265 pub base_model_name_or_path: String,
266 #[serde(skip_serializing_if = "Option::is_none")]
267 pub revision: Option<String>,
268 pub task_type: String,
269 pub inference_mode: bool,
270 pub r: usize,
271 pub lora_alpha: f32,
272 pub lora_dropout: f32,
273 pub fan_in_fan_out: bool,
274 pub bias: String,
275 pub target_modules: Vec<String>,
276 #[serde(skip_serializing_if = "Option::is_none")]
277 pub modules_to_save: Option<Vec<String>>,
278 #[serde(skip_serializing_if = "Option::is_none")]
279 pub layers_to_transform: Option<Vec<usize>>,
280 #[serde(skip_serializing_if = "Option::is_none")]
281 pub layers_pattern: Option<String>,
282}
283
284#[derive(Clone, Debug)]
286pub struct ExportResult {
287 pub export_type: ExportType,
289 pub items_exported: usize,
291 pub output_path: String,
293 pub size_bytes: u64,
295}
296
297#[derive(Clone, Debug)]
299pub enum ExportType {
300 SafeTensors,
301 PatternsDataset,
302 PreferencePairs,
303 DistillationTargets,
304 AdapterConfig,
305}
306
307#[derive(Debug)]
309pub enum ExportError {
310 Io(std::io::Error),
311 Serialization(serde_json::Error),
312 InvalidData(String),
313 HubError(String),
314}
315
316impl From<std::io::Error> for ExportError {
317 fn from(e: std::io::Error) -> Self {
318 ExportError::Io(e)
319 }
320}
321
322impl From<serde_json::Error> for ExportError {
323 fn from(e: serde_json::Error) -> Self {
324 ExportError::Serialization(e)
325 }
326}
327
328impl std::fmt::Display for ExportError {
329 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
330 match self {
331 ExportError::Io(e) => write!(f, "IO error: {}", e),
332 ExportError::Serialization(e) => write!(f, "Serialization error: {}", e),
333 ExportError::InvalidData(msg) => write!(f, "Invalid data: {}", msg),
334 ExportError::HubError(msg) => write!(f, "HuggingFace Hub error: {}", msg),
335 }
336 }
337}
338
339impl std::error::Error for ExportError {}
340
341#[cfg(test)]
342mod tests {
343 use super::*;
344
345 #[test]
346 fn test_export_config_default() {
347 let config = ExportConfig::default();
348 assert_eq!(config.model_name, "sona-adapter");
349 assert!(config.include_patterns);
350 assert!(config.include_lora);
351 }
352
353 #[test]
354 fn test_adapter_config_serialization() {
355 let config = AdapterConfig {
356 peft_type: "LORA".to_string(),
357 auto_mapping: None,
358 base_model_name_or_path: "microsoft/phi-4".to_string(),
359 revision: None,
360 task_type: "CAUSAL_LM".to_string(),
361 inference_mode: true,
362 r: 2,
363 lora_alpha: 2.0,
364 lora_dropout: 0.0,
365 fan_in_fan_out: false,
366 bias: "none".to_string(),
367 target_modules: vec!["q_proj".to_string()],
368 modules_to_save: None,
369 layers_to_transform: None,
370 layers_pattern: None,
371 };
372
373 let json = serde_json::to_string_pretty(&config).unwrap();
374 assert!(json.contains("LORA"));
375 assert!(json.contains("phi-4"));
376 }
377}