Skip to main content

plugin_packager/
extractor.rs

1// Copyright 2024 Vincents AI
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Plugin artifact extraction with security checks
5//!
6//! This module provides secure extraction of plugin .tar.gz artifacts with:
7//! - Path traversal prevention
8//! - Symlink handling
9//! - Permission preservation
10//! - Size limits
11//! - File type validation
12
13use anyhow::{bail, Context, Result};
14use std::fs::{self, File};
15use std::io::{Read, Write};
16#[cfg(unix)]
17use std::os::unix::fs::PermissionsExt;
18use std::path::{Component, Path, PathBuf};
19
20/// Maximum allowed extracted file size (100 MB)
21const MAX_FILE_SIZE: u64 = 100 * 1024 * 1024;
22
23/// Maximum allowed total extracted size (1 GB)
24const MAX_TOTAL_SIZE: u64 = 1024 * 1024 * 1024;
25
26/// Maximum allowed path length
27const MAX_PATH_LENGTH: usize = 4096;
28
29/// Maximum allowed number of entries in archive
30const MAX_ENTRIES: usize = 10000;
31
32/// Extraction configuration options
33#[derive(Debug, Clone)]
34pub struct ExtractorConfig {
35    /// Maximum single file size in bytes
36    pub max_file_size: u64,
37    /// Maximum total extracted size in bytes
38    pub max_total_size: u64,
39    /// Maximum path length
40    pub max_path_length: usize,
41    /// Maximum number of entries
42    pub max_entries: usize,
43    /// Allow symlinks (dangerous if from untrusted sources)
44    pub allow_symlinks: bool,
45    /// Allow absolute paths (usually dangerous)
46    pub allow_absolute_paths: bool,
47    /// Set executable permissions on .so/.dll/.dylib files
48    pub set_executable: bool,
49    /// Overwrite existing files
50    pub overwrite: bool,
51}
52
53impl Default for ExtractorConfig {
54    fn default() -> Self {
55        ExtractorConfig {
56            max_file_size: MAX_FILE_SIZE,
57            max_total_size: MAX_TOTAL_SIZE,
58            max_path_length: MAX_PATH_LENGTH,
59            max_entries: MAX_ENTRIES,
60            allow_symlinks: false,
61            allow_absolute_paths: false,
62            set_executable: true,
63            overwrite: false,
64        }
65    }
66}
67
68impl ExtractorConfig {
69    /// Create a new config with secure defaults
70    pub fn new() -> Self {
71        Self::default()
72    }
73
74    /// Create a permissive config for trusted sources
75    pub fn permissive() -> Self {
76        ExtractorConfig {
77            max_file_size: MAX_FILE_SIZE * 10,
78            max_total_size: MAX_TOTAL_SIZE * 10,
79            max_path_length: MAX_PATH_LENGTH,
80            max_entries: MAX_ENTRIES * 10,
81            allow_symlinks: true,
82            allow_absolute_paths: false, // Still dangerous
83            set_executable: true,
84            overwrite: true,
85        }
86    }
87}
88
89/// Extraction result with statistics
90#[derive(Debug, Clone)]
91pub struct ExtractionResult {
92    /// Path to the extracted plugin directory
93    pub plugin_dir: PathBuf,
94    /// Number of files extracted
95    pub files_extracted: usize,
96    /// Number of directories created
97    pub directories_created: usize,
98    /// Total bytes extracted
99    pub total_bytes: u64,
100    /// Plugin name from manifest
101    pub plugin_name: String,
102    /// Plugin version from manifest
103    pub plugin_version: String,
104}
105
106/// Plugin artifact extractor
107pub struct PluginExtractor {
108    config: ExtractorConfig,
109}
110
111impl PluginExtractor {
112    /// Create a new extractor with the given configuration
113    pub fn new(config: ExtractorConfig) -> Self {
114        PluginExtractor { config }
115    }
116
117    /// Create an extractor with secure default settings
118    pub fn secure() -> Self {
119        PluginExtractor::new(ExtractorConfig::new())
120    }
121
122    /// Extract a plugin artifact to the specified directory
123    ///
124    /// # Arguments
125    /// * `artifact_path` - Path to the .tar.gz artifact
126    /// * `dest_dir` - Destination directory for extraction
127    ///
128    /// # Returns
129    /// The path to the extracted plugin directory (inside dest_dir)
130    pub fn extract(&self, artifact_path: &Path, dest_dir: &Path) -> Result<ExtractionResult> {
131        // Validate artifact exists
132        if !artifact_path.exists() {
133            bail!("Artifact not found: {}", artifact_path.display());
134        }
135
136        // Create destination if it doesn't exist
137        fs::create_dir_all(dest_dir)
138            .with_context(|| format!("Creating destination directory {}", dest_dir.display()))?;
139
140        // Open and decompress
141        let file = File::open(artifact_path)
142            .with_context(|| format!("Opening artifact {}", artifact_path.display()))?;
143        let decoder = flate2::read::GzDecoder::new(file);
144        let mut archive = tar::Archive::new(decoder);
145
146        let mut files_extracted = 0;
147        let mut directories_created = 0;
148        let mut total_bytes = 0u64;
149        let mut plugin_name = String::new();
150        let mut plugin_version = String::new();
151        let mut root_dir: Option<PathBuf> = None;
152        let mut entry_count = 0;
153
154        // Process each entry
155        for entry_result in archive.entries()? {
156            let mut entry = entry_result?;
157            entry_count += 1;
158
159            if entry_count > self.config.max_entries {
160                bail!(
161                    "Archive contains too many entries (max {})",
162                    self.config.max_entries
163                );
164            }
165
166            let entry_path = entry.path()?.to_path_buf();
167
168            // Validate path for security
169            self.validate_path(&entry_path)?;
170
171            // Determine the root directory
172            if root_dir.is_none() {
173                if let Some(first_component) = entry_path.components().next() {
174                    root_dir = Some(PathBuf::from(first_component.as_os_str()));
175                }
176            }
177
178            // Calculate destination path
179            let dest_path = dest_dir.join(&entry_path);
180
181            // Check file size limit
182            let size = entry.size();
183            if size > self.config.max_file_size {
184                bail!(
185                    "File {} exceeds maximum size ({} > {} bytes)",
186                    entry_path.display(),
187                    size,
188                    self.config.max_file_size
189                );
190            }
191
192            // Handle different entry types
193            let header = entry.header();
194            match header.entry_type() {
195                tar::EntryType::Directory => {
196                    self.extract_directory(&dest_path)?;
197                    directories_created += 1;
198                }
199                tar::EntryType::Regular | tar::EntryType::Continuous => {
200                    self.extract_file(&mut entry, &dest_path, size)?;
201                    files_extracted += 1;
202                    total_bytes += size;
203                }
204                tar::EntryType::Symlink => {
205                    if !self.config.allow_symlinks {
206                        bail!("Symlinks are not allowed: {}", entry_path.display());
207                    }
208                    #[cfg(unix)]
209                    {
210                        let target = entry.link_name()?.context("Symlink target missing")?;
211                        self.extract_symlink(&dest_path, &target)?;
212                    }
213                    #[cfg(not(unix))]
214                    {
215                        bail!("Symlinks not supported on this platform");
216                    }
217                }
218                tar::EntryType::Link => {
219                    // Hard links - skip for now, treat as regular file copy
220                    // This is a simplification; proper hard link handling is complex
221                }
222                _ => {
223                    // Skip other types (block devices, char devices, etc.)
224                }
225            }
226
227            // Check for manifest to extract plugin name/version
228            // Read from the extracted file on disk rather than the tar entry,
229            // since extract_file() already consumed the entry's data stream.
230            if entry_path.ends_with("plugin.toml") && dest_path.exists() {
231                let content = fs::read_to_string(&dest_path)?;
232                if let Some((name, version)) = self.parse_manifest_basic(&content)? {
233                    plugin_name = name;
234                    plugin_version = version;
235                }
236            }
237
238            // Check total size
239            if total_bytes > self.config.max_total_size {
240                bail!(
241                    "Total extracted size exceeds limit ({} > {} bytes)",
242                    total_bytes,
243                    self.config.max_total_size
244                );
245            }
246        }
247
248        // Verify we found a valid root directory
249        let plugin_dir = match root_dir {
250            Some(root) => dest_dir.join(root),
251            None => bail!("Archive has no root directory"),
252        };
253
254        // Verify plugin.toml exists
255        let manifest_path = plugin_dir.join("plugin.toml");
256        if !manifest_path.exists() {
257            bail!("Extracted archive missing plugin.toml");
258        }
259
260        Ok(ExtractionResult {
261            plugin_dir,
262            files_extracted,
263            directories_created,
264            total_bytes,
265            plugin_name,
266            plugin_version,
267        })
268    }
269
270    /// Validate a path for security issues
271    fn validate_path(&self, path: &Path) -> Result<()> {
272        // Check path length
273        let path_str = path.to_string_lossy();
274        if path_str.len() > self.config.max_path_length {
275            bail!(
276                "Path too long ({} > {})",
277                path_str.len(),
278                self.config.max_path_length
279            );
280        }
281
282        // Check for path traversal
283        for component in path.components() {
284            match component {
285                Component::ParentDir => {
286                    bail!("Path traversal detected: {}", path.display());
287                }
288                Component::RootDir => {
289                    if !self.config.allow_absolute_paths {
290                        bail!("Absolute path not allowed: {}", path.display());
291                    }
292                }
293                Component::Prefix(_) => {
294                    if !self.config.allow_absolute_paths {
295                        bail!("Absolute path (prefix) not allowed: {}", path.display());
296                    }
297                }
298                _ => {}
299            }
300        }
301
302        Ok(())
303    }
304
305    /// Extract a directory
306    fn extract_directory(&self, path: &Path) -> Result<()> {
307        if path.exists() {
308            if !path.is_dir() {
309                bail!("Path exists but is not a directory: {}", path.display());
310            }
311            return Ok(());
312        }
313
314        fs::create_dir_all(path)
315            .with_context(|| format!("Creating directory {}", path.display()))?;
316
317        // Set directory permissions
318        #[cfg(unix)]
319        {
320            fs::set_permissions(path, fs::Permissions::from_mode(0o755))?;
321        }
322
323        Ok(())
324    }
325
326    /// Extract a regular file
327    fn extract_file(
328        &self,
329        entry: &mut tar::Entry<impl Read>,
330        path: &Path,
331        expected_size: u64,
332    ) -> Result<()> {
333        // Check if file exists
334        if path.exists() && !self.config.overwrite {
335            bail!("File already exists: {}", path.display());
336        }
337
338        // Create parent directories
339        if let Some(parent) = path.parent() {
340            if !parent.exists() {
341                fs::create_dir_all(parent)
342                    .with_context(|| format!("Creating parent directory {}", parent.display()))?;
343            }
344        }
345
346        // Extract file with size validation
347        let mut file =
348            File::create(path).with_context(|| format!("Creating file {}", path.display()))?;
349
350        let mut bytes_written = 0u64;
351        let mut buffer = [0u8; 8192];
352
353        loop {
354            let bytes_read = entry.read(&mut buffer)?;
355            if bytes_read == 0 {
356                break;
357            }
358
359            file.write_all(&buffer[..bytes_read])?;
360            bytes_written += bytes_read as u64;
361
362            // Check for size mismatch (potential zip bomb)
363            if bytes_written > expected_size {
364                bail!(
365                    "File size mismatch during extraction: {} > {}",
366                    bytes_written,
367                    expected_size
368                );
369            }
370        }
371
372        // Set executable permission for plugin binaries
373        if self.config.set_executable {
374            if let Some(filename) = path.file_name().and_then(|s| s.to_str()) {
375                let lower = filename.to_lowercase();
376                if lower == "plugin.so"
377                    || lower == "plugin.dll"
378                    || lower == "plugin.dylib"
379                    || lower.ends_with(".so")
380                    || lower.ends_with(".dll")
381                    || lower.ends_with(".dylib")
382                {
383                    #[cfg(unix)]
384                    {
385                        fs::set_permissions(path, fs::Permissions::from_mode(0o755))?;
386                    }
387                }
388            }
389        }
390
391        Ok(())
392    }
393
394    /// Extract a symlink (Unix only)
395    #[cfg(unix)]
396    fn extract_symlink(&self, link_path: &Path, target: &Path) -> Result<()> {
397        // Check if file exists
398        if link_path.exists() && !self.config.overwrite {
399            bail!("Symlink already exists: {}", link_path.display());
400        }
401
402        // Remove existing symlink if overwrite is set
403        if link_path.exists() {
404            fs::remove_file(link_path)?;
405        }
406
407        // Create parent directories
408        if let Some(parent) = link_path.parent() {
409            if !parent.exists() {
410                fs::create_dir_all(parent)?;
411            }
412        }
413
414        // Create symlink
415        std::os::unix::fs::symlink(target, link_path).with_context(|| {
416            format!(
417                "Creating symlink {} -> {}",
418                link_path.display(),
419                target.display()
420            )
421        })?;
422
423        Ok(())
424    }
425
426    /// Basic manifest parsing to extract name and version
427    fn parse_manifest_basic(&self, content: &str) -> Result<Option<(String, String)>> {
428        // Simple TOML parsing for [package] section
429        let mut in_package = false;
430        let mut name = String::new();
431        let mut version = String::new();
432
433        for line in content.lines() {
434            let trimmed = line.trim();
435
436            if trimmed == "[package]" {
437                in_package = true;
438                continue;
439            }
440
441            if trimmed.starts_with('[') && trimmed != "[package]" {
442                in_package = false;
443                continue;
444            }
445
446            if in_package {
447                if let Some((key, value)) = trimmed.split_once('=') {
448                    let key = key.trim();
449                    let value = value.trim().trim_matches('"');
450
451                    match key {
452                        "name" => name = value.to_string(),
453                        "version" => version = value.to_string(),
454                        _ => {}
455                    }
456                }
457            } else if !in_package {
458                // Try flat format (top-level keys outside any section)
459                if let Some((key, value)) = trimmed.split_once('=') {
460                    let key = key.trim();
461                    let value = value.trim().trim_matches('"');
462
463                    match key {
464                        "name" if name.is_empty() => name = value.to_string(),
465                        "version" if version.is_empty() => version = value.to_string(),
466                        _ => {}
467                    }
468                }
469            }
470        }
471
472        if !name.is_empty() && !version.is_empty() {
473            Ok(Some((name, version)))
474        } else {
475            Ok(None)
476        }
477    }
478}
479
480/// Convenience function to extract an artifact with secure defaults
481pub fn extract_artifact(artifact_path: &Path, dest_dir: &Path) -> Result<ExtractionResult> {
482    let extractor = PluginExtractor::secure();
483    extractor.extract(artifact_path, dest_dir)
484}
485
486#[cfg(test)]
487mod tests {
488    use super::*;
489    use tempfile::TempDir;
490
491    fn create_test_artifact(dir: &Path) -> PathBuf {
492        use std::fs;
493
494        // Create test plugin structure
495        fs::write(
496            dir.join("plugin.toml"),
497            r#"[package]
498name = "test-plugin"
499version = "1.0.0"
500abi_version = "2.0""#,
501        )
502        .unwrap();
503        fs::write(dir.join("plugin.so"), b"binary content").unwrap();
504        fs::write(dir.join("LICENSE"), "MIT").unwrap();
505        fs::write(dir.join("README.md"), "Test plugin").unwrap();
506
507        // Create artifact
508        let artifact_path = dir.parent().unwrap().join("test-plugin.tar.gz");
509        let file = File::create(&artifact_path).unwrap();
510        let enc = flate2::write::GzEncoder::new(file, flate2::Compression::default());
511        let mut builder = tar::Builder::new(enc);
512
513        // Add root directory
514        let mut header = tar::Header::new_gnu();
515        header.set_entry_type(tar::EntryType::Directory);
516        header.set_mode(0o755);
517        header.set_size(0);
518        header.set_cksum();
519        builder
520            .append_data(
521                &mut header,
522                Path::new("test-plugin-1.0.0"),
523                std::io::empty(),
524            )
525            .unwrap();
526
527        // Add files
528        for (name, content) in [
529            (
530                "plugin.toml",
531                fs::read_to_string(dir.join("plugin.toml")).unwrap(),
532            ),
533            (
534                "plugin.so",
535                fs::read_to_string(dir.join("plugin.so")).unwrap(),
536            ),
537            ("LICENSE", fs::read_to_string(dir.join("LICENSE")).unwrap()),
538            (
539                "README.md",
540                fs::read_to_string(dir.join("README.md")).unwrap(),
541            ),
542        ] {
543            let mut header = tar::Header::new_gnu();
544            header.set_size(content.len() as u64);
545            header.set_mode(0o644);
546            header.set_cksum();
547            let path = format!("test-plugin-1.0.0/{}", name);
548            builder
549                .append_data(&mut header, Path::new(&path), content.as_bytes())
550                .unwrap();
551        }
552
553        let enc = builder.into_inner().unwrap();
554        enc.finish().unwrap();
555
556        artifact_path
557    }
558
559    #[test]
560    fn test_extract_artifact() {
561        let temp_dir = TempDir::new().unwrap();
562        let plugin_dir = temp_dir.path().join("plugin_src");
563        fs::create_dir_all(&plugin_dir).unwrap();
564
565        let artifact_path = create_test_artifact(&plugin_dir);
566        let dest_dir = temp_dir.path().join("extracted");
567
568        let result = extract_artifact(&artifact_path, &dest_dir).unwrap();
569
570        assert!(result.plugin_dir.exists());
571        assert!(result.plugin_dir.join("plugin.toml").exists());
572        assert_eq!(result.plugin_name, "test-plugin");
573        assert_eq!(result.plugin_version, "1.0.0");
574        assert!(result.files_extracted > 0);
575    }
576
577    #[test]
578    fn test_path_traversal_detection() {
579        let config = ExtractorConfig::default();
580        let extractor = PluginExtractor::new(config);
581
582        // Should reject path traversal
583        assert!(extractor.validate_path(Path::new("../etc/passwd")).is_err());
584        assert!(extractor
585            .validate_path(Path::new("safe/../../../etc/passwd"))
586            .is_err());
587    }
588
589    #[test]
590    fn test_absolute_path_rejection() {
591        let config = ExtractorConfig::default();
592        let extractor = PluginExtractor::new(config);
593
594        // Should reject absolute paths
595        assert!(extractor.validate_path(Path::new("/etc/passwd")).is_err());
596    }
597
598    #[test]
599    fn test_valid_path() {
600        let config = ExtractorConfig::default();
601        let extractor = PluginExtractor::new(config);
602
603        // Should accept valid relative paths
604        assert!(extractor
605            .validate_path(Path::new("plugin-1.0.0/plugin.toml"))
606            .is_ok());
607        assert!(extractor
608            .validate_path(Path::new("plugin-1.0.0/lib/plugin.so"))
609            .is_ok());
610    }
611
612    #[test]
613    fn test_config_defaults() {
614        let config = ExtractorConfig::default();
615
616        assert_eq!(config.max_file_size, MAX_FILE_SIZE);
617        assert_eq!(config.max_total_size, MAX_TOTAL_SIZE);
618        assert!(!config.allow_symlinks);
619        assert!(!config.allow_absolute_paths);
620        assert!(config.set_executable);
621        assert!(!config.overwrite);
622    }
623
624    #[test]
625    fn test_parse_manifest_basic() {
626        let extractor = PluginExtractor::secure();
627
628        let toml = r#"[package]
629name = "my-plugin"
630version = "2.0.0"
631abi_version = "2.0""#;
632
633        let result = extractor.parse_manifest_basic(toml).unwrap();
634        assert_eq!(result, Some(("my-plugin".to_string(), "2.0.0".to_string())));
635    }
636
637    #[test]
638    fn test_parse_manifest_flat() {
639        let extractor = PluginExtractor::secure();
640
641        let toml = r#"name = "flat-plugin"
642version = "1.5.0""#;
643
644        let result = extractor.parse_manifest_basic(toml).unwrap();
645        assert_eq!(
646            result,
647            Some(("flat-plugin".to_string(), "1.5.0".to_string()))
648        );
649    }
650}