1use crate::engine::SonaEngine;
7use super::{ExportConfig, ExportResult, ExportType, ExportError, SafeTensorsExporter, DatasetExporter};
8use std::path::Path;
9
10#[cfg(feature = "serde-support")]
11use serde::{Deserialize, Serialize};
12
13pub struct HuggingFaceHub {
15 token: Option<String>,
17 api_url: String,
19}
20
21impl HuggingFaceHub {
22 pub fn new(token: Option<&str>) -> Self {
24 Self {
25 token: token.map(|t| t.to_string()),
26 api_url: "https://huggingface.co/api".to_string(),
27 }
28 }
29
30 pub fn from_env() -> Self {
32 let token = std::env::var("HF_TOKEN")
33 .or_else(|_| std::env::var("HUGGING_FACE_HUB_TOKEN"))
34 .ok();
35 Self::new(token.as_deref())
36 }
37
38 pub fn push_all(
40 &self,
41 engine: &SonaEngine,
42 config: &ExportConfig,
43 repo_id: &str,
44 ) -> Result<ExportResult, ExportError> {
45 let temp_dir = std::env::temp_dir().join(format!("sona-export-{}", uuid_v4()));
47 std::fs::create_dir_all(&temp_dir).map_err(ExportError::Io)?;
48
49 let safetensors_exporter = SafeTensorsExporter::new(config);
51 let dataset_exporter = DatasetExporter::new(config);
52
53 let mut total_items = 0;
54 let mut total_size = 0u64;
55
56 if config.include_lora {
58 let result = safetensors_exporter.export_engine(engine, temp_dir.join("lora"))?;
59 total_items += result.items_exported;
60 total_size += result.size_bytes;
61 }
62
63 if config.include_patterns {
65 let result = dataset_exporter.export_patterns(engine, temp_dir.join("patterns.jsonl"))?;
66 total_items += result.items_exported;
67 total_size += result.size_bytes;
68 }
69
70 if config.include_preferences {
72 let result = dataset_exporter.export_preferences(engine, temp_dir.join("preferences.jsonl"))?;
73 total_items += result.items_exported;
74 total_size += result.size_bytes;
75 }
76
77 let readme = self.create_model_card(engine, config);
79 let readme_path = temp_dir.join("README.md");
80 std::fs::write(&readme_path, readme).map_err(ExportError::Io)?;
81
82 let adapter_config = self.create_adapter_config(engine, config);
84 let config_path = temp_dir.join("adapter_config.json");
85 let config_json = serde_json::to_string_pretty(&adapter_config)?;
86 std::fs::write(&config_path, config_json).map_err(ExportError::Io)?;
87
88 self.upload_directory(&temp_dir, repo_id)?;
90
91 let _ = std::fs::remove_dir_all(&temp_dir);
93
94 Ok(ExportResult {
95 export_type: ExportType::SafeTensors,
96 items_exported: total_items,
97 output_path: format!("https://huggingface.co/{}", repo_id),
98 size_bytes: total_size,
99 })
100 }
101
102 fn upload_directory(&self, local_path: &Path, repo_id: &str) -> Result<(), ExportError> {
104 let has_git = std::process::Command::new("git")
106 .arg("--version")
107 .output()
108 .is_ok();
109
110 if !has_git {
111 return Err(ExportError::HubError(
112 "git is required for HuggingFace Hub upload. Install git and git-lfs.".to_string()
113 ));
114 }
115
116 let repo_url = if let Some(ref token) = self.token {
118 format!("https://{}@huggingface.co/{}", token, repo_id)
119 } else {
120 format!("https://huggingface.co/{}", repo_id)
121 };
122
123 let clone_dir = local_path.parent().unwrap().join("hf-repo");
124
125 let clone_result = std::process::Command::new("git")
127 .args(["clone", &repo_url, clone_dir.to_str().unwrap()])
128 .output();
129
130 if clone_result.is_err() {
131 self.create_repo(repo_id)?;
133
134 std::process::Command::new("git")
136 .args(["clone", &repo_url, clone_dir.to_str().unwrap()])
137 .output()
138 .map_err(|e| ExportError::HubError(format!("Failed to clone repo: {}", e)))?;
139 }
140
141 copy_dir_recursive(local_path, &clone_dir)?;
143
144 std::process::Command::new("git")
146 .args(["-C", clone_dir.to_str().unwrap(), "add", "-A"])
147 .output()
148 .map_err(|e| ExportError::HubError(format!("git add failed: {}", e)))?;
149
150 std::process::Command::new("git")
151 .args(["-C", clone_dir.to_str().unwrap(), "commit", "-m", "Upload SONA adapter"])
152 .output()
153 .map_err(|e| ExportError::HubError(format!("git commit failed: {}", e)))?;
154
155 let push_result = std::process::Command::new("git")
156 .args(["-C", clone_dir.to_str().unwrap(), "push"])
157 .output()
158 .map_err(|e| ExportError::HubError(format!("git push failed: {}", e)))?;
159
160 if !push_result.status.success() {
161 let stderr = String::from_utf8_lossy(&push_result.stderr);
162 return Err(ExportError::HubError(format!("git push failed: {}", stderr)));
163 }
164
165 let _ = std::fs::remove_dir_all(&clone_dir);
167
168 Ok(())
169 }
170
171 fn create_repo(&self, repo_id: &str) -> Result<(), ExportError> {
173 let token = self.token.as_ref().ok_or_else(|| {
174 ExportError::HubError("HuggingFace token required to create repos".to_string())
175 })?;
176
177 let (organization, name) = if let Some(idx) = repo_id.find('/') {
179 (Some(&repo_id[..idx]), &repo_id[idx + 1..])
180 } else {
181 (None, repo_id)
182 };
183
184 let create_request = CreateRepoRequest {
185 name: name.to_string(),
186 organization: organization.map(|s| s.to_string()),
187 private: false,
188 repo_type: "model".to_string(),
189 };
190
191 let url = format!("{}/repos/create", self.api_url);
192
193 let body = serde_json::to_string(&create_request)?;
196
197 let output = std::process::Command::new("curl")
198 .args([
199 "-X", "POST",
200 "-H", &format!("Authorization: Bearer {}", token),
201 "-H", "Content-Type: application/json",
202 "-d", &body,
203 &url,
204 ])
205 .output()
206 .map_err(|e| ExportError::HubError(format!("curl failed: {}", e)))?;
207
208 if !output.status.success() {
209 let stderr = String::from_utf8_lossy(&output.stderr);
210 if !stderr.contains("already exists") {
212 return Err(ExportError::HubError(format!("Failed to create repo: {}", stderr)));
213 }
214 }
215
216 Ok(())
217 }
218
219 fn create_model_card(&self, engine: &SonaEngine, config: &ExportConfig) -> String {
221 let stats = engine.stats();
222 format!(r#"---
223license: mit
224library_name: peft
225base_model: {}
226tags:
227 - sona
228 - lora
229 - adaptive-learning
230 - ruvector
231---
232
233# {} SONA Adapter
234
235This adapter was generated using [SONA (Self-Optimizing Neural Architecture)](https://github.com/ruvnet/ruvector/tree/main/crates/sona) - a runtime-adaptive learning system.
236
237## Model Details
238
239- **Base Model**: {}
240- **PEFT Type**: LoRA (Two-Tier)
241- **MicroLoRA Rank**: {} (instant adaptation)
242- **BaseLoRA Rank**: {} (background learning)
243- **Patterns Learned**: {}
244- **Trajectories Processed**: {}
245
246## SONA Features
247
248### Two-Tier LoRA Architecture
249- **MicroLoRA**: Rank 1-2 for instant adaptation (<0.5ms latency)
250- **BaseLoRA**: Rank 4-16 for background learning
251
252### EWC++ (Elastic Weight Consolidation)
253Prevents catastrophic forgetting when learning new patterns.
254
255### ReasoningBank
256K-means++ clustering for efficient pattern storage and retrieval.
257
258## Performance Benchmarks
259
260| Metric | Value |
261|--------|-------|
262| Throughput | 2211 ops/sec |
263| Latency | <0.5ms per layer |
264| Quality Improvement | +55% max |
265
266## Usage with PEFT
267
268```python
269from peft import PeftModel, PeftConfig
270from transformers import AutoModelForCausalLM
271
272# Load adapter
273config = PeftConfig.from_pretrained("your-username/{}")
274model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
275model = PeftModel.from_pretrained(model, "your-username/{}")
276
277# Use for inference
278outputs = model.generate(input_ids)
279```
280
281## Training with Included Datasets
282
283### Patterns Dataset
284```python
285from datasets import load_dataset
286
287patterns = load_dataset("json", data_files="patterns.jsonl")
288```
289
290### Preference Pairs (for DPO/RLHF)
291```python
292preferences = load_dataset("json", data_files="preferences.jsonl")
293```
294
295## License
296
297MIT License - see [LICENSE](LICENSE) for details.
298
299---
300
301Generated with [ruvector-sona](https://crates.io/crates/ruvector-sona) v{}
302"#,
303 config.target_architecture,
304 config.model_name,
305 config.target_architecture,
306 engine.config().micro_lora_rank,
307 engine.config().base_lora_rank,
308 stats.patterns_stored,
309 stats.trajectories_buffered,
310 config.model_name,
311 config.model_name,
312 env!("CARGO_PKG_VERSION"),
313 )
314 }
315
316 fn create_adapter_config(&self, engine: &SonaEngine, config: &ExportConfig) -> AdapterConfigJson {
318 let sona_config = engine.config();
319 AdapterConfigJson {
320 peft_type: "LORA".to_string(),
321 auto_mapping: None,
322 base_model_name_or_path: config.target_architecture.clone(),
323 revision: None,
324 task_type: "CAUSAL_LM".to_string(),
325 inference_mode: true,
326 r: sona_config.base_lora_rank,
327 lora_alpha: sona_config.base_lora_rank as f32,
328 lora_dropout: 0.0,
329 fan_in_fan_out: false,
330 bias: "none".to_string(),
331 target_modules: vec![
332 "q_proj".to_string(),
333 "k_proj".to_string(),
334 "v_proj".to_string(),
335 "o_proj".to_string(),
336 ],
337 modules_to_save: None,
338 layers_to_transform: None,
339 layers_pattern: None,
340 }
341 }
342}
343
344#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
346#[derive(Clone, Debug)]
347struct CreateRepoRequest {
348 name: String,
349 #[serde(skip_serializing_if = "Option::is_none")]
350 organization: Option<String>,
351 private: bool,
352 #[serde(rename = "type")]
353 repo_type: String,
354}
355
356#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
358#[derive(Clone, Debug)]
359pub struct AdapterConfigJson {
360 pub peft_type: String,
361 #[serde(skip_serializing_if = "Option::is_none")]
362 pub auto_mapping: Option<serde_json::Value>,
363 pub base_model_name_or_path: String,
364 #[serde(skip_serializing_if = "Option::is_none")]
365 pub revision: Option<String>,
366 pub task_type: String,
367 pub inference_mode: bool,
368 pub r: usize,
369 pub lora_alpha: f32,
370 pub lora_dropout: f32,
371 pub fan_in_fan_out: bool,
372 pub bias: String,
373 pub target_modules: Vec<String>,
374 #[serde(skip_serializing_if = "Option::is_none")]
375 pub modules_to_save: Option<Vec<String>>,
376 #[serde(skip_serializing_if = "Option::is_none")]
377 pub layers_to_transform: Option<Vec<usize>>,
378 #[serde(skip_serializing_if = "Option::is_none")]
379 pub layers_pattern: Option<String>,
380}
381
382fn uuid_v4() -> String {
384 use rand::Rng;
385 let mut rng = rand::thread_rng();
386 let bytes: [u8; 16] = rng.gen();
387 format!(
388 "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}",
389 bytes[0], bytes[1], bytes[2], bytes[3],
390 bytes[4], bytes[5],
391 (bytes[6] & 0x0f) | 0x40, bytes[7],
392 (bytes[8] & 0x3f) | 0x80, bytes[9],
393 bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]
394 )
395}
396
397fn copy_dir_recursive(src: &Path, dst: &Path) -> Result<(), ExportError> {
399 if !dst.exists() {
400 std::fs::create_dir_all(dst).map_err(ExportError::Io)?;
401 }
402
403 for entry in std::fs::read_dir(src).map_err(ExportError::Io)? {
404 let entry = entry.map_err(ExportError::Io)?;
405 let path = entry.path();
406 let file_name = path.file_name().unwrap();
407 let dest_path = dst.join(file_name);
408
409 if path.is_dir() {
410 copy_dir_recursive(&path, &dest_path)?;
411 } else {
412 std::fs::copy(&path, &dest_path).map_err(ExportError::Io)?;
413 }
414 }
415
416 Ok(())
417}
418
419#[cfg(test)]
420mod tests {
421 use super::*;
422
423 #[test]
424 fn test_hub_from_env() {
425 let _hub = HuggingFaceHub::from_env();
427 }
428
429 #[test]
430 fn test_uuid_v4() {
431 let uuid = uuid_v4();
432 assert_eq!(uuid.len(), 36);
433 assert!(uuid.contains('-'));
434 }
435
436 #[test]
437 fn test_adapter_config_json() {
438 let config = AdapterConfigJson {
439 peft_type: "LORA".to_string(),
440 auto_mapping: None,
441 base_model_name_or_path: "microsoft/phi-4".to_string(),
442 revision: None,
443 task_type: "CAUSAL_LM".to_string(),
444 inference_mode: true,
445 r: 8,
446 lora_alpha: 8.0,
447 lora_dropout: 0.0,
448 fan_in_fan_out: false,
449 bias: "none".to_string(),
450 target_modules: vec!["q_proj".to_string()],
451 modules_to_save: None,
452 layers_to_transform: None,
453 layers_pattern: None,
454 };
455
456 let json = serde_json::to_string_pretty(&config).unwrap();
457 assert!(json.contains("LORA"));
458 assert!(json.contains("phi-4"));
459 }
460}