ccd-cli 1.0.0-beta.2

Bootstrap and validate Continuous Context Development repositories
use std::path::{Component, Path, PathBuf};

use anyhow::{bail, Context, Result};

// The filesystem never percent-decodes path segments before canonicalization, so
// two rounds are enough to catch the common encoded and double-encoded traversal
// cases without rejecting deeper literal `%25...` directory names.
const MAX_PERCENT_DECODE_ROUNDS: usize = 2;

pub fn resolve(path: &Path) -> Result<PathBuf> {
    let raw = path
        .to_str()
        .ok_or_else(|| anyhow::anyhow!("unsafe --path value: paths must be valid UTF-8"))?;

    reject_control_chars(raw)?;
    reject_parent_traversal(path)?;
    reject_encoded_unsafe_sequences(raw)?;

    let canonical = path
        .canonicalize()
        .with_context(|| format!("failed to resolve --path `{raw}`"))?;

    if !canonical.is_dir() {
        bail!("--path must point to a directory: {}", canonical.display());
    }

    Ok(canonical)
}

fn reject_control_chars(raw: &str) -> Result<()> {
    if raw.chars().any(char::is_control) {
        bail!("unsafe --path value: control characters are not allowed");
    }

    Ok(())
}

fn reject_parent_traversal(path: &Path) -> Result<()> {
    if path
        .components()
        .any(|component| matches!(component, Component::ParentDir))
    {
        bail!("unsafe --path value: parent-directory traversal (`..`) is not allowed");
    }

    Ok(())
}

fn reject_encoded_unsafe_sequences(raw: &str) -> Result<()> {
    let mut decoded = raw.to_owned();

    for _ in 0..MAX_PERCENT_DECODE_ROUNDS {
        let next = percent_decode_once(&decoded);
        if next == decoded {
            break;
        }

        if next.chars().any(char::is_control) {
            bail!("unsafe --path value: encoded control characters are not allowed");
        }

        if has_parent_component(&next) {
            bail!("unsafe --path value: encoded parent-directory traversal is not allowed");
        }

        decoded = next;
    }

    Ok(())
}

fn has_parent_component(input: &str) -> bool {
    input.split(['/', '\\']).any(|segment| segment == "..")
}

fn percent_decode_once(input: &str) -> String {
    let bytes = input.as_bytes();
    let mut decoded = Vec::with_capacity(bytes.len());
    let mut idx = 0usize;

    while idx < bytes.len() {
        if bytes[idx] == b'%' && idx + 2 < bytes.len() {
            if let (Some(hi), Some(lo)) = (from_hex(bytes[idx + 1]), from_hex(bytes[idx + 2])) {
                decoded.push((hi << 4) | lo);
                idx += 3;
                continue;
            }
        }

        decoded.push(bytes[idx]);
        idx += 1;
    }

    String::from_utf8_lossy(&decoded).into_owned()
}

fn from_hex(byte: u8) -> Option<u8> {
    match byte {
        b'0'..=b'9' => Some(byte - b'0'),
        b'a'..=b'f' => Some(byte - b'a' + 10),
        b'A'..=b'F' => Some(byte - b'A' + 10),
        _ => None,
    }
}

#[cfg(test)]
mod tests {
    use super::reject_encoded_unsafe_sequences;

    #[test]
    fn rejects_double_encoded_parent_traversal() {
        let error = reject_encoded_unsafe_sequences("%252e%252e/%252e%252e/outside").unwrap_err();
        assert!(error
            .to_string()
            .contains("encoded parent-directory traversal"));
    }

    #[test]
    fn treats_triple_encoded_parent_traversal_as_literal_path() {
        assert!(reject_encoded_unsafe_sequences("%25252e%25252e/%25252e%25252e/outside").is_ok());
    }
}