1use std::io::{BufRead, BufReader, Read, Seek};
22use std::os::fd::OwnedFd;
23
24use base64::prelude::*;
25use cap_std::fs::{Dir, File};
26use crc::{CRC_64_GO_ISO, Crc};
27use flate2::read::GzDecoder;
28use serde::Deserialize;
29
30use crate::error::{Result, StorageError};
31use crate::layer::Layer;
32use crate::storage::Storage;
33
34const CRC64_ISO: Crc<u64> = Crc::<u64>::new(&CRC_64_GO_ISO);
36
37#[derive(Debug)]
39pub enum TarSplitItem {
40 Segment(Vec<u8>),
42
43 FileContent {
45 fd: OwnedFd,
50 size: u64,
56 name: String,
61 },
62}
63
64#[derive(Debug, Deserialize)]
66struct TarSplitEntryRaw {
67 #[serde(rename = "type")]
69 type_id: u8,
70 #[serde(default)]
72 name: Option<String>,
73 #[serde(default)]
75 size: Option<u64>,
76 #[serde(default)]
78 crc64: Option<String>,
79 #[serde(default)]
81 payload: Option<String>,
82}
83
84#[derive(Debug)]
86enum TarSplitEntry {
87 File {
89 name: Option<String>,
91 size: Option<u64>,
93 crc64: Option<String>,
95 },
96 Segment {
98 payload: Option<String>,
100 },
101}
102
103impl TarSplitEntry {
104 fn from_raw(raw: TarSplitEntryRaw) -> Result<Self> {
106 match raw.type_id {
107 1 => Ok(TarSplitEntry::File {
108 name: raw.name,
109 size: raw.size,
110 crc64: raw.crc64,
111 }),
112 2 => Ok(TarSplitEntry::Segment {
113 payload: raw.payload,
114 }),
115 _ => Err(StorageError::TarSplitError(format!(
116 "Invalid tar-split entry type: {}",
117 raw.type_id
118 ))),
119 }
120 }
121}
122
123#[derive(Debug, Clone)]
125pub struct TarHeader {
126 pub name: String,
128
129 pub mode: u32,
131
132 pub uid: u32,
134
135 pub gid: u32,
137
138 pub size: u64,
140
141 pub mtime: i64,
143
144 pub typeflag: u8,
146
147 pub linkname: String,
149
150 pub uname: String,
152
153 pub gname: String,
155
156 pub devmajor: u32,
158
159 pub devminor: u32,
161}
162
163impl TarHeader {
164 pub fn from_bytes(header_bytes: &[u8]) -> Result<Self> {
170 let header_array: &[u8; tar_core::HEADER_SIZE] = header_bytes.try_into().map_err(|_| {
171 StorageError::TarSplitError(format!(
172 "TAR header wrong size: {} bytes (expected {})",
173 header_bytes.len(),
174 tar_core::HEADER_SIZE
175 ))
176 })?;
177 let header = tar_core::Header::from_bytes(header_array);
178
179 let name = String::from_utf8(header.path_bytes().to_vec()).map_err(|e| {
180 StorageError::TarSplitError(format!("Non-UTF-8 path in TAR header: {}", e))
181 })?;
182 let mode = header
183 .mode()
184 .map_err(|e| StorageError::TarSplitError(format!("Invalid mode: {}", e)))?;
185 let uid = header
186 .uid()
187 .map_err(|e| StorageError::TarSplitError(format!("Invalid uid: {}", e)))?
188 as u32;
189 let gid = header
190 .gid()
191 .map_err(|e| StorageError::TarSplitError(format!("Invalid gid: {}", e)))?
192 as u32;
193 let size = header
194 .entry_size()
195 .map_err(|e| StorageError::TarSplitError(format!("Invalid size: {}", e)))?;
196 let mtime = header
197 .mtime()
198 .map_err(|e| StorageError::TarSplitError(format!("Invalid mtime: {}", e)))?
199 as i64;
200 let typeflag = header.entry_type().as_byte();
201 let link_bytes = header.link_name_bytes();
202 let linkname = if link_bytes.is_empty() {
203 String::new()
204 } else {
205 String::from_utf8(link_bytes.to_vec()).map_err(|e| {
206 StorageError::TarSplitError(format!("Non-UTF-8 link name in TAR header: {}", e))
207 })?
208 };
209 let uname = header
210 .username()
211 .map(|b| {
212 String::from_utf8(b.to_vec()).map_err(|e| {
213 StorageError::TarSplitError(format!("Non-UTF-8 username in TAR header: {}", e))
214 })
215 })
216 .transpose()?
217 .unwrap_or_default();
218 let gname = header
219 .groupname()
220 .map(|b| {
221 String::from_utf8(b.to_vec()).map_err(|e| {
222 StorageError::TarSplitError(format!(
223 "Non-UTF-8 group name in TAR header: {}",
224 e
225 ))
226 })
227 })
228 .transpose()?
229 .unwrap_or_default();
230 let devmajor = header
231 .device_major()
232 .map_err(|e| StorageError::TarSplitError(format!("Invalid devmajor: {}", e)))?
233 .unwrap_or(0);
234 let devminor = header
235 .device_minor()
236 .map_err(|e| StorageError::TarSplitError(format!("Invalid devminor: {}", e)))?
237 .unwrap_or(0);
238
239 Ok(TarHeader {
240 name,
241 mode,
242 uid,
243 gid,
244 size,
245 mtime,
246 typeflag,
247 linkname,
248 uname,
249 gname,
250 devmajor,
251 devminor,
252 })
253 }
254
255 pub fn is_regular_file(&self) -> bool {
257 self.typeflag == b'0' || self.typeflag == b'\0'
258 }
259
260 pub fn is_directory(&self) -> bool {
262 self.typeflag == b'5'
263 }
264
265 pub fn is_symlink(&self) -> bool {
267 self.typeflag == b'2'
268 }
269
270 pub fn is_hardlink(&self) -> bool {
272 self.typeflag == b'1'
273 }
274
275 pub fn normalized_name(&self) -> &str {
277 self.name.strip_prefix("./").unwrap_or(&self.name)
278 }
279}
280
281#[derive(Debug)]
283pub struct TarSplitFdStream {
284 layer: Layer,
286
287 storage_root: Dir,
289
290 reader: BufReader<GzDecoder<File>>,
292
293 entry_count: usize,
295}
296
297impl TarSplitFdStream {
298 pub fn new(storage: &Storage, layer: &Layer) -> Result<Self> {
304 let layers_dir = storage.root_dir().open_dir("overlay-layers").map_err(|e| {
306 StorageError::TarSplitError(format!("Failed to open overlay-layers directory: {}", e))
307 })?;
308
309 let filename = format!("{}.tar-split.gz", layer.id());
311 let file = layers_dir.open(&filename).map_err(|e| {
312 StorageError::TarSplitError(format!(
313 "Failed to open tar-split file {}: {}",
314 filename, e
315 ))
316 })?;
317
318 let gz_decoder = GzDecoder::new(file);
320 let reader = BufReader::new(gz_decoder);
321
322 let layer = Layer::open(storage, layer.id())?;
324
325 let storage_root = storage.root_dir().try_clone()?;
327
328 Ok(Self {
329 layer,
330 storage_root,
331 reader,
332 entry_count: 0,
333 })
334 }
335
336 fn open_file_in_chain(&self, path: &str) -> Result<cap_std::fs::File> {
338 let normalized_path = path.strip_prefix("./").unwrap_or(path);
340
341 match self.layer.diff_dir().open(normalized_path) {
343 Ok(file) => return Ok(file),
344 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
345 }
347 Err(e) => return Err(StorageError::Io(e)),
348 }
349
350 self.search_parent_layers(&self.layer, normalized_path, 0)
352 }
353
354 fn search_parent_layers(
356 &self,
357 current_layer: &Layer,
358 path: &str,
359 depth: usize,
360 ) -> Result<cap_std::fs::File> {
361 const MAX_DEPTH: usize = 500;
362
363 if depth >= MAX_DEPTH {
364 return Err(StorageError::TarSplitError(format!(
365 "Layer chain exceeds maximum depth of {} while searching for file: {}",
366 MAX_DEPTH, path
367 )));
368 }
369
370 let parent_links = current_layer.parent_links();
372
373 for link_id in parent_links {
375 let parent_id = self.resolve_link_direct(link_id)?;
377
378 match self.open_file_in_layer(&parent_id, path) {
380 Ok(file) => return Ok(file),
381 Err(StorageError::Io(e)) if e.kind() == std::io::ErrorKind::NotFound => {
382 match self.search_by_layer_id(&parent_id, path, depth + 1) {
384 Ok(file) => return Ok(file),
385 Err(StorageError::TarSplitError(_)) => continue, Err(e) => return Err(e),
387 }
388 }
389 Err(e) => return Err(e),
390 }
391 }
392
393 Err(StorageError::TarSplitError(format!(
394 "File not found in layer chain: {}",
395 path
396 )))
397 }
398
399 fn search_by_layer_id(
401 &self,
402 layer_id: &str,
403 path: &str,
404 depth: usize,
405 ) -> Result<cap_std::fs::File> {
406 const MAX_DEPTH: usize = 500;
407
408 if depth >= MAX_DEPTH {
409 return Err(StorageError::TarSplitError(format!(
410 "Layer chain exceeds maximum depth of {} while searching for file: {}",
411 MAX_DEPTH, path
412 )));
413 }
414
415 match self.open_file_in_layer(layer_id, path) {
417 Ok(file) => return Ok(file),
418 Err(StorageError::Io(e)) if e.kind() == std::io::ErrorKind::NotFound => {
419 }
421 Err(e) => return Err(e),
422 }
423
424 let parent_links = self.read_layer_parent_links(layer_id)?;
426
427 for link_id in parent_links {
429 let parent_id = self.resolve_link_direct(&link_id)?;
430 match self.search_by_layer_id(&parent_id, path, depth + 1) {
431 Ok(file) => return Ok(file),
432 Err(StorageError::TarSplitError(_)) => continue, Err(e) => return Err(e),
434 }
435 }
436
437 Err(StorageError::TarSplitError(format!(
438 "File not found in layer chain: {}",
439 path
440 )))
441 }
442
443 fn resolve_link_direct(&self, link_id: &str) -> Result<String> {
445 let overlay_dir = self.storage_root.open_dir("overlay")?;
446 let link_dir = overlay_dir.open_dir("l")?;
447 let target = link_dir.read_link(link_id).map_err(|e| {
448 StorageError::LinkReadError(format!("Failed to read link {}: {}", link_id, e))
449 })?;
450
451 let target_str = target.to_str().ok_or_else(|| {
453 StorageError::LinkReadError("Invalid UTF-8 in link target".to_string())
454 })?;
455 let components: Vec<&str> = target_str.split('/').collect();
456 if components.len() >= 2 {
457 let layer_id = components[components.len() - 2];
458 if !layer_id.is_empty() && layer_id != ".." {
459 return Ok(layer_id.to_string());
460 }
461 }
462 Err(StorageError::LinkReadError(format!(
463 "Invalid link target format: {}",
464 target_str
465 )))
466 }
467
468 fn open_file_in_layer(&self, layer_id: &str, path: &str) -> Result<cap_std::fs::File> {
470 let overlay_dir = self.storage_root.open_dir("overlay")?;
471 let layer_dir = overlay_dir.open_dir(layer_id)?;
472 let diff_dir = layer_dir.open_dir("diff")?;
473 diff_dir.open(path).map_err(StorageError::Io)
474 }
475
476 fn read_layer_parent_links(&self, layer_id: &str) -> Result<Vec<String>> {
478 let overlay_dir = self.storage_root.open_dir("overlay")?;
479 let layer_dir = overlay_dir.open_dir(layer_id)?;
480
481 match layer_dir.read_to_string("lower") {
482 Ok(content) => Ok(content
483 .trim()
484 .split(':')
485 .filter_map(|s| s.strip_prefix("l/"))
486 .map(|s| s.to_string())
487 .collect()),
488 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(Vec::new()), Err(e) => Err(StorageError::Io(e)),
490 }
491 }
492
493 fn verify_crc64(
495 &self,
496 file: &mut cap_std::fs::File,
497 expected_b64: &str,
498 size: u64,
499 ) -> Result<()> {
500 let expected_bytes = BASE64_STANDARD.decode(expected_b64).map_err(|e| {
502 StorageError::TarSplitError(format!("Failed to decode base64 CRC64: {}", e))
503 })?;
504
505 if expected_bytes.len() != 8 {
506 return Err(StorageError::TarSplitError(format!(
507 "Invalid CRC64 length: {} bytes",
508 expected_bytes.len()
509 )));
510 }
511
512 let expected = u64::from_be_bytes(expected_bytes.try_into().unwrap());
514
515 let mut digest = CRC64_ISO.digest();
517 let mut buffer = vec![0u8; 8192];
518 let mut bytes_read = 0u64;
519
520 loop {
521 let n = file.read(&mut buffer).map_err(|e| {
522 StorageError::TarSplitError(format!(
523 "Failed to read file for CRC64 verification: {}",
524 e
525 ))
526 })?;
527 if n == 0 {
528 break;
529 }
530 digest.update(&buffer[..n]);
531 bytes_read += n as u64;
532 }
533
534 if bytes_read != size {
536 return Err(StorageError::TarSplitError(format!(
537 "File size mismatch: expected {}, got {}",
538 size, bytes_read
539 )));
540 }
541
542 let computed = digest.finalize();
543 if computed != expected {
544 return Err(StorageError::TarSplitError(format!(
545 "CRC64 mismatch: expected {:016x}, got {:016x}",
546 expected, computed
547 )));
548 }
549
550 Ok(())
551 }
552
553 #[allow(clippy::should_implement_trait)]
560 pub fn next(&mut self) -> Result<Option<TarSplitItem>> {
561 loop {
562 let mut line = String::new();
564 match self.reader.read_line(&mut line) {
565 Ok(0) => {
566 return Ok(None);
567 }
568 Ok(_) => {
569 let raw: TarSplitEntryRaw = serde_json::from_str(&line).map_err(|e| {
571 StorageError::TarSplitError(format!(
572 "Failed to parse tar-split entry: {}",
573 e
574 ))
575 })?;
576 let entry = TarSplitEntry::from_raw(raw)?;
577
578 match entry {
579 TarSplitEntry::Segment { payload } => {
580 if let Some(payload_b64) = payload {
581 let payload_bytes =
582 BASE64_STANDARD.decode(&payload_b64).map_err(|e| {
583 StorageError::TarSplitError(format!(
584 "Failed to decode base64 payload: {}",
585 e
586 ))
587 })?;
588
589 return Ok(Some(TarSplitItem::Segment(payload_bytes)));
590 }
591 }
593
594 TarSplitEntry::File { name, size, crc64 } => {
595 self.entry_count += 1;
596
597 let file_size = size.unwrap_or(0);
599 if file_size > 0 {
600 let path = name.as_ref().ok_or_else(|| {
602 StorageError::TarSplitError(
603 "FileType entry missing name".to_string(),
604 )
605 })?;
606
607 let mut file = self.open_file_in_chain(path)?;
608
609 if let Some(ref crc64_b64) = crc64 {
611 self.verify_crc64(&mut file, crc64_b64, file_size)?;
612
613 file.rewind().map_err(StorageError::Io)?;
615 }
616
617 let std_file = file.into_std();
619 let owned_fd: OwnedFd = std_file.into();
620 return Ok(Some(TarSplitItem::FileContent {
621 fd: owned_fd,
622 size: file_size,
623 name: path.clone(),
624 }));
625 }
626 }
628 }
629 }
630 Err(e) => {
631 return Err(StorageError::TarSplitError(format!(
632 "Failed to read tar-split line: {}",
633 e
634 )));
635 }
636 }
637 }
638 }
639
640 pub fn entry_count(&self) -> usize {
642 self.entry_count
643 }
644}
645
646#[cfg(test)]
647mod tests {
648 use super::*;
649
650 #[test]
651 fn test_tar_header_type_checks() {
652 let mut header = TarHeader {
653 name: "test.txt".to_string(),
654 mode: 0o644,
655 uid: 1000,
656 gid: 1000,
657 size: 100,
658 mtime: 0,
659 typeflag: b'0',
660 linkname: String::new(),
661 uname: "user".to_string(),
662 gname: "group".to_string(),
663 devmajor: 0,
664 devminor: 0,
665 };
666
667 assert!(header.is_regular_file());
668 assert!(!header.is_directory());
669 assert!(!header.is_symlink());
670
671 header.typeflag = b'5';
672 assert!(!header.is_regular_file());
673 assert!(header.is_directory());
674
675 header.typeflag = b'2';
676 assert!(header.is_symlink());
677 }
678
679 #[test]
680 fn test_tar_split_entry_deserialization() {
681 let json_segment = r#"{"type":2,"payload":"dXN0YXIAMDA="}"#;
683 let raw: TarSplitEntryRaw = serde_json::from_str(json_segment).unwrap();
684 let entry = TarSplitEntry::from_raw(raw).unwrap();
685 match entry {
686 TarSplitEntry::Segment { payload } => {
687 assert_eq!(payload, Some("dXN0YXIAMDA=".to_string()));
688 }
689 _ => panic!("Expected Segment variant"),
690 }
691
692 let json_file = r#"{"type":1,"name":"./etc/hosts","size":123,"crc64":"AAAAAAAAAA=="}"#;
694 let raw: TarSplitEntryRaw = serde_json::from_str(json_file).unwrap();
695 let entry = TarSplitEntry::from_raw(raw).unwrap();
696 match entry {
697 TarSplitEntry::File { name, size, crc64 } => {
698 assert_eq!(name, Some("./etc/hosts".to_string()));
699 assert_eq!(size, Some(123));
700 assert_eq!(crc64, Some("AAAAAAAAAA==".to_string()));
701 }
702 _ => panic!("Expected File variant"),
703 }
704
705 let json_invalid = r#"{"type":99}"#;
707 let raw: TarSplitEntryRaw = serde_json::from_str(json_invalid).unwrap();
708 let result = TarSplitEntry::from_raw(raw);
709 assert!(result.is_err());
710 }
711}