1pub mod dataset;
31pub mod huggingface_hub;
32pub mod pretrain;
33pub mod safetensors;
34
35pub use dataset::DatasetExporter;
36pub use huggingface_hub::HuggingFaceHub;
37pub use pretrain::{PretrainConfig, PretrainPipeline};
38pub use safetensors::SafeTensorsExporter;
39
40use crate::engine::SonaEngine;
41use serde::{Deserialize, Serialize};
42use std::path::Path;
43
44#[derive(Clone, Debug, Serialize, Deserialize)]
46pub struct ExportConfig {
47 pub model_name: String,
49 pub organization: Option<String>,
51 pub target_architecture: String,
53 pub include_patterns: bool,
55 pub include_lora: bool,
57 pub include_preferences: bool,
59 pub min_quality_threshold: f32,
61 pub compress: bool,
63}
64
65impl Default for ExportConfig {
66 fn default() -> Self {
67 Self {
68 model_name: "sona-adapter".to_string(),
69 organization: None,
70 target_architecture: "phi-4".to_string(),
71 include_patterns: true,
72 include_lora: true,
73 include_preferences: true,
74 min_quality_threshold: 0.5,
75 compress: false,
76 }
77 }
78}
79
80pub struct HuggingFaceExporter<'a> {
82 engine: &'a SonaEngine,
84 config: ExportConfig,
86}
87
88impl<'a> HuggingFaceExporter<'a> {
89 pub fn new(engine: &'a SonaEngine) -> Self {
91 Self {
92 engine,
93 config: ExportConfig::default(),
94 }
95 }
96
97 pub fn with_config(engine: &'a SonaEngine, config: ExportConfig) -> Self {
99 Self { engine, config }
100 }
101
102 pub fn export_lora_safetensors<P: AsRef<Path>>(
104 &self,
105 output_dir: P,
106 ) -> Result<ExportResult, ExportError> {
107 let exporter = SafeTensorsExporter::new(&self.config);
108 exporter.export_engine(self.engine, output_dir)
109 }
110
111 pub fn export_patterns_jsonl<P: AsRef<Path>>(
113 &self,
114 output_path: P,
115 ) -> Result<ExportResult, ExportError> {
116 let exporter = DatasetExporter::new(&self.config);
117 exporter.export_patterns(self.engine, output_path)
118 }
119
120 pub fn export_preference_pairs<P: AsRef<Path>>(
122 &self,
123 output_path: P,
124 ) -> Result<ExportResult, ExportError> {
125 let exporter = DatasetExporter::new(&self.config);
126 exporter.export_preferences(self.engine, output_path)
127 }
128
129 pub fn push_to_hub(
131 &self,
132 repo_id: &str,
133 token: Option<&str>,
134 ) -> Result<ExportResult, ExportError> {
135 let hub = HuggingFaceHub::new(token);
136 hub.push_all(self.engine, &self.config, repo_id)
137 }
138
139 pub fn export_all<P: AsRef<Path>>(
141 &self,
142 output_dir: P,
143 ) -> Result<Vec<ExportResult>, ExportError> {
144 let output_dir = output_dir.as_ref();
145 std::fs::create_dir_all(output_dir).map_err(ExportError::Io)?;
146
147 let mut results = Vec::new();
148
149 if self.config.include_lora {
150 results.push(self.export_lora_safetensors(output_dir.join("lora"))?);
151 }
152
153 if self.config.include_patterns {
154 results.push(self.export_patterns_jsonl(output_dir.join("patterns.jsonl"))?);
155 }
156
157 if self.config.include_preferences {
158 results.push(self.export_preference_pairs(output_dir.join("preferences.jsonl"))?);
159 }
160
161 let config_path = output_dir.join("adapter_config.json");
163 let config_json = serde_json::to_string_pretty(&self.create_adapter_config())?;
164 std::fs::write(&config_path, config_json).map_err(ExportError::Io)?;
165
166 let readme_path = output_dir.join("README.md");
168 let readme = self.generate_readme();
169 std::fs::write(&readme_path, readme).map_err(ExportError::Io)?;
170
171 Ok(results)
172 }
173
174 fn create_adapter_config(&self) -> AdapterConfig {
176 let sona_config = self.engine.config();
177 AdapterConfig {
178 peft_type: "LORA".to_string(),
179 auto_mapping: None,
180 base_model_name_or_path: self.config.target_architecture.clone(),
181 revision: None,
182 task_type: "CAUSAL_LM".to_string(),
183 inference_mode: true,
184 r: sona_config.micro_lora_rank,
185 lora_alpha: sona_config.micro_lora_rank as f32,
186 lora_dropout: 0.0,
187 fan_in_fan_out: false,
188 bias: "none".to_string(),
189 target_modules: vec![
190 "q_proj".to_string(),
191 "k_proj".to_string(),
192 "v_proj".to_string(),
193 "o_proj".to_string(),
194 ],
195 modules_to_save: None,
196 layers_to_transform: None,
197 layers_pattern: None,
198 }
199 }
200
201 fn generate_readme(&self) -> String {
203 let stats = self.engine.stats();
204 format!(
205 r#"---
206license: mit
207library_name: peft
208base_model: {}
209tags:
210 - sona
211 - lora
212 - adaptive-learning
213 - ruvector
214---
215
216# {} SONA Adapter
217
218This adapter was generated using [SONA (Self-Optimizing Neural Architecture)](https://github.com/ruvnet/ruvector/tree/main/crates/sona).
219
220## Model Details
221
222- **Base Model**: {}
223- **PEFT Type**: LoRA
224- **Rank**: {}
225- **Patterns Learned**: {}
226- **Trajectories Processed**: {}
227
228## Training Details
229
230SONA uses two-tier LoRA adaptation:
231- **MicroLoRA**: Rank 1-2 for instant adaptation (<0.5ms)
232- **BaseLoRA**: Rank 4-16 for background learning
233
234### Performance Benchmarks
235
236| Metric | Value |
237|--------|-------|
238| Throughput | 2211 ops/sec |
239| Latency | <0.5ms per layer |
240| Quality Improvement | +55% max |
241
242## Usage
243
244```python
245from peft import PeftModel, PeftConfig
246from transformers import AutoModelForCausalLM
247
248# Load adapter
249config = PeftConfig.from_pretrained("your-username/{}")
250model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
251model = PeftModel.from_pretrained(model, "your-username/{}")
252```
253
254## License
255
256MIT License - see [LICENSE](LICENSE) for details.
257
258---
259
260Generated with [ruvector-sona](https://crates.io/crates/ruvector-sona) v0.1.0
261"#,
262 self.config.target_architecture,
263 self.config.model_name,
264 self.config.target_architecture,
265 self.engine.config().micro_lora_rank,
266 stats.patterns_stored,
267 stats.trajectories_buffered,
268 self.config.model_name,
269 self.config.model_name,
270 )
271 }
272}
273
274#[derive(Clone, Debug, Serialize, Deserialize)]
276pub struct AdapterConfig {
277 pub peft_type: String,
278 #[serde(skip_serializing_if = "Option::is_none")]
279 pub auto_mapping: Option<serde_json::Value>,
280 pub base_model_name_or_path: String,
281 #[serde(skip_serializing_if = "Option::is_none")]
282 pub revision: Option<String>,
283 pub task_type: String,
284 pub inference_mode: bool,
285 pub r: usize,
286 pub lora_alpha: f32,
287 pub lora_dropout: f32,
288 pub fan_in_fan_out: bool,
289 pub bias: String,
290 pub target_modules: Vec<String>,
291 #[serde(skip_serializing_if = "Option::is_none")]
292 pub modules_to_save: Option<Vec<String>>,
293 #[serde(skip_serializing_if = "Option::is_none")]
294 pub layers_to_transform: Option<Vec<usize>>,
295 #[serde(skip_serializing_if = "Option::is_none")]
296 pub layers_pattern: Option<String>,
297}
298
299#[derive(Clone, Debug)]
301pub struct ExportResult {
302 pub export_type: ExportType,
304 pub items_exported: usize,
306 pub output_path: String,
308 pub size_bytes: u64,
310}
311
312#[derive(Clone, Debug)]
314pub enum ExportType {
315 SafeTensors,
316 PatternsDataset,
317 PreferencePairs,
318 DistillationTargets,
319 AdapterConfig,
320}
321
322#[derive(Debug)]
324pub enum ExportError {
325 Io(std::io::Error),
326 Serialization(serde_json::Error),
327 InvalidData(String),
328 HubError(String),
329}
330
331impl From<std::io::Error> for ExportError {
332 fn from(e: std::io::Error) -> Self {
333 ExportError::Io(e)
334 }
335}
336
337impl From<serde_json::Error> for ExportError {
338 fn from(e: serde_json::Error) -> Self {
339 ExportError::Serialization(e)
340 }
341}
342
343impl std::fmt::Display for ExportError {
344 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
345 match self {
346 ExportError::Io(e) => write!(f, "IO error: {}", e),
347 ExportError::Serialization(e) => write!(f, "Serialization error: {}", e),
348 ExportError::InvalidData(msg) => write!(f, "Invalid data: {}", msg),
349 ExportError::HubError(msg) => write!(f, "HuggingFace Hub error: {}", msg),
350 }
351 }
352}
353
354impl std::error::Error for ExportError {}
355
356#[cfg(test)]
357mod tests {
358 use super::*;
359
360 #[test]
361 fn test_export_config_default() {
362 let config = ExportConfig::default();
363 assert_eq!(config.model_name, "sona-adapter");
364 assert!(config.include_patterns);
365 assert!(config.include_lora);
366 }
367
368 #[test]
369 fn test_adapter_config_serialization() {
370 let config = AdapterConfig {
371 peft_type: "LORA".to_string(),
372 auto_mapping: None,
373 base_model_name_or_path: "microsoft/phi-4".to_string(),
374 revision: None,
375 task_type: "CAUSAL_LM".to_string(),
376 inference_mode: true,
377 r: 2,
378 lora_alpha: 2.0,
379 lora_dropout: 0.0,
380 fan_in_fan_out: false,
381 bias: "none".to_string(),
382 target_modules: vec!["q_proj".to_string()],
383 modules_to_save: None,
384 layers_to_transform: None,
385 layers_pattern: None,
386 };
387
388 let json = serde_json::to_string_pretty(&config).unwrap();
389 assert!(json.contains("LORA"));
390 assert!(json.contains("phi-4"));
391 }
392}