1pub mod dataset;
31pub mod huggingface_hub;
32pub mod pretrain;
33pub mod safetensors;
34
35pub use dataset::DatasetExporter;
36pub use huggingface_hub::HuggingFaceHub;
37pub use pretrain::{PretrainConfig, PretrainPipeline};
38pub use safetensors::SafeTensorsExporter;
39
40use crate::engine::SonaEngine;
41use crate::lora::{BaseLoRA, MicroLoRA};
42use crate::types::{LearnedPattern, SonaConfig};
43use serde::{Deserialize, Serialize};
44use std::path::Path;
45
46#[derive(Clone, Debug, Serialize, Deserialize)]
48pub struct ExportConfig {
49 pub model_name: String,
51 pub organization: Option<String>,
53 pub target_architecture: String,
55 pub include_patterns: bool,
57 pub include_lora: bool,
59 pub include_preferences: bool,
61 pub min_quality_threshold: f32,
63 pub compress: bool,
65}
66
67impl Default for ExportConfig {
68 fn default() -> Self {
69 Self {
70 model_name: "sona-adapter".to_string(),
71 organization: None,
72 target_architecture: "phi-4".to_string(),
73 include_patterns: true,
74 include_lora: true,
75 include_preferences: true,
76 min_quality_threshold: 0.5,
77 compress: false,
78 }
79 }
80}
81
82pub struct HuggingFaceExporter<'a> {
84 engine: &'a SonaEngine,
86 config: ExportConfig,
88}
89
90impl<'a> HuggingFaceExporter<'a> {
91 pub fn new(engine: &'a SonaEngine) -> Self {
93 Self {
94 engine,
95 config: ExportConfig::default(),
96 }
97 }
98
99 pub fn with_config(engine: &'a SonaEngine, config: ExportConfig) -> Self {
101 Self { engine, config }
102 }
103
104 pub fn export_lora_safetensors<P: AsRef<Path>>(
106 &self,
107 output_dir: P,
108 ) -> Result<ExportResult, ExportError> {
109 let exporter = SafeTensorsExporter::new(&self.config);
110 exporter.export_engine(self.engine, output_dir)
111 }
112
113 pub fn export_patterns_jsonl<P: AsRef<Path>>(
115 &self,
116 output_path: P,
117 ) -> Result<ExportResult, ExportError> {
118 let exporter = DatasetExporter::new(&self.config);
119 exporter.export_patterns(self.engine, output_path)
120 }
121
122 pub fn export_preference_pairs<P: AsRef<Path>>(
124 &self,
125 output_path: P,
126 ) -> Result<ExportResult, ExportError> {
127 let exporter = DatasetExporter::new(&self.config);
128 exporter.export_preferences(self.engine, output_path)
129 }
130
131 pub fn push_to_hub(
133 &self,
134 repo_id: &str,
135 token: Option<&str>,
136 ) -> Result<ExportResult, ExportError> {
137 let hub = HuggingFaceHub::new(token);
138 hub.push_all(self.engine, &self.config, repo_id)
139 }
140
141 pub fn export_all<P: AsRef<Path>>(
143 &self,
144 output_dir: P,
145 ) -> Result<Vec<ExportResult>, ExportError> {
146 let output_dir = output_dir.as_ref();
147 std::fs::create_dir_all(output_dir).map_err(ExportError::Io)?;
148
149 let mut results = Vec::new();
150
151 if self.config.include_lora {
152 results.push(self.export_lora_safetensors(output_dir.join("lora"))?);
153 }
154
155 if self.config.include_patterns {
156 results.push(self.export_patterns_jsonl(output_dir.join("patterns.jsonl"))?);
157 }
158
159 if self.config.include_preferences {
160 results.push(self.export_preference_pairs(output_dir.join("preferences.jsonl"))?);
161 }
162
163 let config_path = output_dir.join("adapter_config.json");
165 let config_json = serde_json::to_string_pretty(&self.create_adapter_config())?;
166 std::fs::write(&config_path, config_json).map_err(ExportError::Io)?;
167
168 let readme_path = output_dir.join("README.md");
170 let readme = self.generate_readme();
171 std::fs::write(&readme_path, readme).map_err(ExportError::Io)?;
172
173 Ok(results)
174 }
175
176 fn create_adapter_config(&self) -> AdapterConfig {
178 let sona_config = self.engine.config();
179 AdapterConfig {
180 peft_type: "LORA".to_string(),
181 auto_mapping: None,
182 base_model_name_or_path: self.config.target_architecture.clone(),
183 revision: None,
184 task_type: "CAUSAL_LM".to_string(),
185 inference_mode: true,
186 r: sona_config.micro_lora_rank,
187 lora_alpha: sona_config.micro_lora_rank as f32,
188 lora_dropout: 0.0,
189 fan_in_fan_out: false,
190 bias: "none".to_string(),
191 target_modules: vec![
192 "q_proj".to_string(),
193 "k_proj".to_string(),
194 "v_proj".to_string(),
195 "o_proj".to_string(),
196 ],
197 modules_to_save: None,
198 layers_to_transform: None,
199 layers_pattern: None,
200 }
201 }
202
203 fn generate_readme(&self) -> String {
205 let stats = self.engine.stats();
206 format!(
207 r#"---
208license: mit
209library_name: peft
210base_model: {}
211tags:
212 - sona
213 - lora
214 - adaptive-learning
215 - ruvector
216---
217
218# {} SONA Adapter
219
220This adapter was generated using [SONA (Self-Optimizing Neural Architecture)](https://github.com/ruvnet/ruvector/tree/main/crates/sona).
221
222## Model Details
223
224- **Base Model**: {}
225- **PEFT Type**: LoRA
226- **Rank**: {}
227- **Patterns Learned**: {}
228- **Trajectories Processed**: {}
229
230## Training Details
231
232SONA uses two-tier LoRA adaptation:
233- **MicroLoRA**: Rank 1-2 for instant adaptation (<0.5ms)
234- **BaseLoRA**: Rank 4-16 for background learning
235
236### Performance Benchmarks
237
238| Metric | Value |
239|--------|-------|
240| Throughput | 2211 ops/sec |
241| Latency | <0.5ms per layer |
242| Quality Improvement | +55% max |
243
244## Usage
245
246```python
247from peft import PeftModel, PeftConfig
248from transformers import AutoModelForCausalLM
249
250# Load adapter
251config = PeftConfig.from_pretrained("your-username/{}")
252model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
253model = PeftModel.from_pretrained(model, "your-username/{}")
254```
255
256## License
257
258MIT License - see [LICENSE](LICENSE) for details.
259
260---
261
262Generated with [ruvector-sona](https://crates.io/crates/ruvector-sona) v0.1.0
263"#,
264 self.config.target_architecture,
265 self.config.model_name,
266 self.config.target_architecture,
267 self.engine.config().micro_lora_rank,
268 stats.patterns_stored,
269 stats.trajectories_buffered,
270 self.config.model_name,
271 self.config.model_name,
272 )
273 }
274}
275
276#[derive(Clone, Debug, Serialize, Deserialize)]
278pub struct AdapterConfig {
279 pub peft_type: String,
280 #[serde(skip_serializing_if = "Option::is_none")]
281 pub auto_mapping: Option<serde_json::Value>,
282 pub base_model_name_or_path: String,
283 #[serde(skip_serializing_if = "Option::is_none")]
284 pub revision: Option<String>,
285 pub task_type: String,
286 pub inference_mode: bool,
287 pub r: usize,
288 pub lora_alpha: f32,
289 pub lora_dropout: f32,
290 pub fan_in_fan_out: bool,
291 pub bias: String,
292 pub target_modules: Vec<String>,
293 #[serde(skip_serializing_if = "Option::is_none")]
294 pub modules_to_save: Option<Vec<String>>,
295 #[serde(skip_serializing_if = "Option::is_none")]
296 pub layers_to_transform: Option<Vec<usize>>,
297 #[serde(skip_serializing_if = "Option::is_none")]
298 pub layers_pattern: Option<String>,
299}
300
301#[derive(Clone, Debug)]
303pub struct ExportResult {
304 pub export_type: ExportType,
306 pub items_exported: usize,
308 pub output_path: String,
310 pub size_bytes: u64,
312}
313
314#[derive(Clone, Debug)]
316pub enum ExportType {
317 SafeTensors,
318 PatternsDataset,
319 PreferencePairs,
320 DistillationTargets,
321 AdapterConfig,
322}
323
324#[derive(Debug)]
326pub enum ExportError {
327 Io(std::io::Error),
328 Serialization(serde_json::Error),
329 InvalidData(String),
330 HubError(String),
331}
332
333impl From<std::io::Error> for ExportError {
334 fn from(e: std::io::Error) -> Self {
335 ExportError::Io(e)
336 }
337}
338
339impl From<serde_json::Error> for ExportError {
340 fn from(e: serde_json::Error) -> Self {
341 ExportError::Serialization(e)
342 }
343}
344
345impl std::fmt::Display for ExportError {
346 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
347 match self {
348 ExportError::Io(e) => write!(f, "IO error: {}", e),
349 ExportError::Serialization(e) => write!(f, "Serialization error: {}", e),
350 ExportError::InvalidData(msg) => write!(f, "Invalid data: {}", msg),
351 ExportError::HubError(msg) => write!(f, "HuggingFace Hub error: {}", msg),
352 }
353 }
354}
355
356impl std::error::Error for ExportError {}
357
358#[cfg(test)]
359mod tests {
360 use super::*;
361
362 #[test]
363 fn test_export_config_default() {
364 let config = ExportConfig::default();
365 assert_eq!(config.model_name, "sona-adapter");
366 assert!(config.include_patterns);
367 assert!(config.include_lora);
368 }
369
370 #[test]
371 fn test_adapter_config_serialization() {
372 let config = AdapterConfig {
373 peft_type: "LORA".to_string(),
374 auto_mapping: None,
375 base_model_name_or_path: "microsoft/phi-4".to_string(),
376 revision: None,
377 task_type: "CAUSAL_LM".to_string(),
378 inference_mode: true,
379 r: 2,
380 lora_alpha: 2.0,
381 lora_dropout: 0.0,
382 fan_in_fan_out: false,
383 bias: "none".to_string(),
384 target_modules: vec!["q_proj".to_string()],
385 modules_to_save: None,
386 layers_to_transform: None,
387 layers_pattern: None,
388 };
389
390 let json = serde_json::to_string_pretty(&config).unwrap();
391 assert!(json.contains("LORA"));
392 assert!(json.contains("phi-4"));
393 }
394}