use super::super::{base64_encode, HfHubClient, HfHubError, ModelCard, Result};
const LFS_CHUNK_SIZE: usize = 5 * 1024 * 1024 * 1024;
impl HfHubClient {
#[cfg(feature = "hf-hub-integration")]
pub(crate) fn send_preupload_request(
&self,
repo_id: &str,
filename: &str,
data: &[u8],
sha256: &str,
token: &str,
) -> Result<serde_json::Value> {
let preupload_url = format!("{}/api/models/{}/preupload/main", self.api_base, repo_id);
eprintln!(
"[LFS] Step 1: Requesting upload URLs from {}",
preupload_url
);
#[allow(clippy::disallowed_methods)]
let preupload_body = serde_json::json!({
"files": [{
"path": filename,
"size": data.len(),
"sample": base64_encode(&data[..data.len().min(512)])
}]
});
eprintln!(
"[LFS] Preupload request (size={}, sha256={}...)",
data.len(),
sha256.get(..16).unwrap_or(sha256)
);
let preupload_resp = match ureq::post(&preupload_url)
.set("Authorization", &format!("Bearer {token}"))
.set("Content-Type", "application/json")
.send_json(&preupload_body)
{
Ok(resp) => resp,
Err(ureq::Error::Status(code, resp)) => {
let body = resp
.into_string()
.unwrap_or_else(|_| "unable to read body".to_string());
eprintln!(
"[LFS] ERROR: Preupload failed with status {}: {}",
code, body
);
return Err(HfHubError::NetworkError(format!(
"Preupload failed (HTTP {}): {}",
code, body
)));
}
Err(e) => {
eprintln!("[LFS] ERROR: Preupload request failed: {}", e);
return Err(HfHubError::NetworkError(format!("Preupload failed: {e}")));
}
};
eprintln!(
"[LFS] Preupload response status: {}",
preupload_resp.status()
);
let preupload_data: serde_json::Value = preupload_resp.into_json().map_err(|e| {
eprintln!("[LFS] ERROR: Failed to parse preupload response: {}", e);
HfHubError::NetworkError(format!("Preupload parse failed: {e}"))
})?;
eprintln!(
"[LFS] Preupload response: {}",
serde_json::to_string_pretty(&preupload_data).unwrap_or_default()
);
let files = preupload_data["files"].as_array().ok_or_else(|| {
eprintln!("[LFS] ERROR: Invalid preupload response - no 'files' array");
HfHubError::NetworkError("Invalid preupload response".to_string())
})?;
if files.is_empty() {
eprintln!("[LFS] ERROR: Empty files array in preupload response");
return Err(HfHubError::NetworkError(
"No file info returned".to_string(),
));
}
Ok(files[0].clone())
}
#[cfg(feature = "hf-hub-integration")]
fn upload_chunks(
data: &[u8],
urls: &[serde_json::Value],
file_info: &serde_json::Value,
token: &str,
) -> Result<()> {
use std::time::Instant;
eprintln!(
"[LFS] Step 2: Multipart upload with {} presigned URLs",
urls.len()
);
let file_size = data.len();
for (i, url_value) in urls.iter().enumerate() {
let chunk_url = url_value.as_str().ok_or_else(|| {
HfHubError::NetworkError(format!("Invalid chunk URL at index {}", i))
})?;
let chunk_start = i * LFS_CHUNK_SIZE;
let chunk_end = ((i + 1) * LFS_CHUNK_SIZE).min(file_size);
let chunk_data = &data[chunk_start..chunk_end];
eprintln!(
"[LFS] Uploading chunk {}/{}: bytes {}-{} ({:.1} MB)",
i + 1,
urls.len(),
chunk_start,
chunk_end,
chunk_data.len() as f64 / 1_000_000.0
);
let t = Instant::now();
let resp = ureq::put(chunk_url)
.set("Content-Type", "application/octet-stream")
.timeout(std::time::Duration::from_secs(7200))
.send_bytes(chunk_data)
.map_err(|e| {
eprintln!("[LFS] ERROR: Chunk {} upload failed: {}", i + 1, e);
HfHubError::NetworkError(format!("Chunk upload failed: {e}"))
})?;
let status = resp.status();
eprintln!(
"[LFS] Chunk {}/{} uploaded: status={}, elapsed={:.1}s",
i + 1,
urls.len(),
status,
t.elapsed().as_secs_f64()
);
if !(200..300).contains(&status) {
return Err(HfHubError::NetworkError(format!(
"Chunk upload failed with status {}",
status
)));
}
}
if let Some(completion_url) = file_info.get("completionUrl").and_then(|v| v.as_str()) {
eprintln!("[LFS] Calling completion URL: {}", completion_url);
let _ = ureq::post(completion_url)
.set("Authorization", &format!("Bearer {token}"))
.set("Content-Type", "application/json")
.send_json(serde_json::json!({}));
}
Ok(())
}
#[cfg(feature = "hf-hub-integration")]
fn upload_single(data: &[u8], url: &str, file_info: &serde_json::Value) -> Result<()> {
use std::time::Instant;
eprintln!(
"[LFS] Step 2: Single URL upload to {}",
&url[..url.len().min(100)]
);
let upload_start = Instant::now();
let headers = file_info.get("uploadHeader").and_then(|v| v.as_object());
let mut request = ureq::put(url)
.set("Content-Type", "application/octet-stream")
.timeout(std::time::Duration::from_secs(7200));
if let Some(hdrs) = headers {
for (key, value) in hdrs {
if let Some(v) = value.as_str() {
eprintln!("[LFS] Adding header: {}: {}...", key, &v[..v.len().min(20)]);
request = request.set(key, v);
}
}
}
let resp = request.send_bytes(data).map_err(|e| {
eprintln!("[LFS] ERROR: Upload failed: {}", e);
HfHubError::NetworkError(format!("Upload failed: {e}"))
})?;
let status = resp.status();
eprintln!(
"[LFS] Upload complete: status={}, elapsed={:.1}s, speed={:.1} MB/s",
status,
upload_start.elapsed().as_secs_f64(),
(data.len() as f64 / 1_000_000.0) / upload_start.elapsed().as_secs_f64()
);
if !(200..300).contains(&status) {
let body = resp.into_string().unwrap_or_default();
return Err(HfHubError::NetworkError(format!(
"Upload failed (HTTP {}): {}",
status, body
)));
}
Ok(())
}
#[cfg(feature = "hf-hub-integration")]
#[allow(clippy::disallowed_methods)]
fn commit_lfs_pointer(
&self,
repo_id: &str,
filename: &str,
sha256: &str,
file_size: usize,
commit_msg: &str,
token: &str,
) -> Result<()> {
eprintln!("[LFS] Step 3: Committing LFS pointer");
let lfs_pointer = format!(
"version https://git-lfs.github.com/spec/v1\noid sha256:{}\nsize {}\n",
sha256, file_size
);
eprintln!("[LFS] Pointer content:\n{}", lfs_pointer);
let commit_url = format!("{}/api/models/{}/commit/main", self.api_base, repo_id);
eprintln!("[LFS] Commit URL: {}", commit_url);
let commit_body = serde_json::json!({
"summary": commit_msg,
"operations": [{
"op": "addOrUpdate",
"path": filename,
"content": base64_encode(lfs_pointer.as_bytes()),
"encoding": "base64",
"lfs": { "sha256": sha256, "size": file_size }
}]
});
let commit_resp = ureq::post(&commit_url)
.set("Authorization", &format!("Bearer {token}"))
.set("Content-Type", "application/json")
.send_json(&commit_body);
match commit_resp {
Ok(resp) if (200..300).contains(&resp.status()) => {
let body = resp.into_string().unwrap_or_default();
eprintln!("[LFS] Commit successful: {}", &body[..body.len().min(200)]);
Ok(())
}
Ok(resp) => {
let status = resp.status();
let body = resp.into_string().unwrap_or_default();
eprintln!(
"[LFS] ERROR: Commit failed with status {}: {}",
status,
&body[..body.len().min(500)]
);
Err(HfHubError::NetworkError(format!(
"Commit failed (HTTP {}): {}",
status, body
)))
}
Err(ureq::Error::Status(code, resp)) => {
let body = resp.into_string().unwrap_or_default();
eprintln!(
"[LFS] ERROR: Commit failed with status {}: {}",
code,
&body[..body.len().min(500)]
);
Err(HfHubError::NetworkError(format!(
"Commit failed (HTTP {code}): {body}"
)))
}
Err(e) => {
eprintln!("[LFS] ERROR: Network error during commit: {}", e);
Err(HfHubError::NetworkError(format!("Network error: {e}")))
}
}
}
#[cfg(all(feature = "hf-hub-integration", not(feature = "xet")))]
fn reject_needs_xet_feature(filename: &str, file_size: usize) -> Result<()> {
let gib = file_size as f64 / (1024.0 * 1024.0 * 1024.0);
eprintln!(
"[LFS] ERROR: File {filename} ({gib:.2} GiB) exceeds HF Hub's 5 GiB HTTP threshold"
);
eprintln!("[LFS] HF Hub returned uploadMode=lfs with no presigned URLs, which means");
eprintln!("[LFS] the file must transfer via the Xet content-addressable protocol.");
eprintln!("[LFS] Rebuild apr with Xet support:");
eprintln!("[LFS] cargo build --release --features cuda,apr-cli/xet");
eprintln!("[LFS] (See contracts/apr-publish-hf-large-file-v1.yaml and");
eprintln!("[LFS] docs/specifications/aprender-train/ship-two-models-spec.md §12.8.)");
Err(HfHubError::NetworkError(format!(
"File {filename} ({gib:.2} GiB) exceeds HF Hub's 5 GiB HTTP threshold; \
rebuild with `--features xet` to enable the Xet upload path."
)))
}
#[cfg(feature = "xet")]
fn upload_via_xet(
&self,
repo_id: &str,
filename: &str,
data: &[u8],
commit_msg: &str,
token: &str,
) -> Result<()> {
use std::io::Write;
eprintln!(
"[XET] Dispatching {} ({:.2} GiB) via hf-xet (>5 GiB path)",
filename,
data.len() as f64 / (1024.0 * 1024.0 * 1024.0)
);
let mut tmp = tempfile::NamedTempFile::new()
.map_err(|e| HfHubError::XetUpload(format!("tempfile create failed: {e}")))?;
tmp.write_all(data)
.map_err(|e| HfHubError::XetUpload(format!("tempfile write failed: {e}")))?;
tmp.flush()
.map_err(|e| HfHubError::XetUpload(format!("tempfile flush failed: {e}")))?;
let uploader = super::super::xet::XetUploader {
api_base: &self.api_base,
repo_id,
revision: "main",
token,
};
uploader.upload_file(tmp.path(), commit_msg)?;
eprintln!("[XET] Xet upload + LFS pointer commit succeeded for {filename}");
Ok(())
}
#[cfg(feature = "hf-hub-integration")]
#[allow(clippy::disallowed_methods)]
pub(crate) fn upload_via_lfs(
&self,
repo_id: &str,
filename: &str,
data: &[u8],
commit_msg: &str,
token: &str,
) -> Result<()> {
use sha2::{Digest, Sha256};
use std::time::Instant;
let start = Instant::now();
let file_size = data.len();
eprintln!(
"[LFS] Calculating SHA256 for {} ({:.1} MB)...",
filename,
file_size as f64 / 1_000_000.0
);
let mut hasher = Sha256::new();
hasher.update(data);
let sha256 = format!("{:x}", hasher.finalize());
eprintln!("[LFS] SHA256: {}", sha256);
eprintln!("[LFS] Using token: {}...", &token[..12.min(token.len())]);
let num_chunks = (file_size + LFS_CHUNK_SIZE - 1) / LFS_CHUNK_SIZE;
eprintln!(
"[LFS] File size: {} bytes, will upload in {} chunk(s)",
file_size, num_chunks
);
let file_info = self.send_preupload_request(repo_id, filename, data, &sha256, token)?;
let upload_mode = file_info
.get("uploadMode")
.and_then(|v| v.as_str())
.unwrap_or("unknown");
eprintln!("[LFS] Upload mode: {}", upload_mode);
let upload_url = file_info
.get("uploadUrl")
.or_else(|| file_info.get("upload_url"))
.and_then(|v| v.as_str());
let chunk_urls = file_info
.get("chunkUrls")
.or_else(|| file_info.get("chunk_urls"))
.or_else(|| file_info.get("urls"))
.and_then(|v| v.as_array());
eprintln!("[LFS] Upload URL present: {}", upload_url.is_some());
eprintln!("[LFS] Chunk URLs present: {}", chunk_urls.is_some());
if upload_url.is_none()
&& chunk_urls.is_none()
&& upload_mode == "lfs"
&& super::super::xet::should_use_xet(file_size as u64)
{
#[cfg(feature = "xet")]
{
return self.upload_via_xet(repo_id, filename, data, commit_msg, token);
}
#[cfg(not(feature = "xet"))]
{
return Self::reject_needs_xet_feature(filename, file_size);
}
}
if let Some(urls) = chunk_urls {
Self::upload_chunks(data, urls, &file_info, token)?;
} else if let Some(url) = upload_url {
Self::upload_single(data, url, &file_info)?;
} else {
eprintln!("[LFS] No upload URL returned - proceeding to commit LFS pointer");
eprintln!("[LFS] (This may mean the file content already exists on HF's LFS storage)");
}
self.commit_lfs_pointer(repo_id, filename, &sha256, file_size, commit_msg, token)?;
eprintln!(
"[LFS] Total upload time: {:.1}s",
start.elapsed().as_secs_f64()
);
Ok(())
}
#[must_use]
pub fn auto_generate_card(repo_id: &str, model_type: &str, version: &str) -> ModelCard {
ModelCard::new(repo_id, version)
.with_name(repo_id.split('/').next_back().unwrap_or(repo_id))
.with_architecture(model_type)
.with_description(format!("{model_type} model trained with aprender"))
}
}