rspack_util 0.100.1

rspack util
Documentation
use std::{
  borrow::Cow,
  path::{Path, PathBuf},
  sync::LazyLock,
};

use concat_string::concat_string;
use cow_utils::CowUtils;
use regex::Regex;
use sugar_path::SugarPath;

static SEGMENTS_SPLIT_REGEXP: LazyLock<Regex> =
  LazyLock::new(|| Regex::new(r"([|!])").expect("should be a valid regex"));
static WINDOWS_PATH_SEPARATOR: &[char] = &['/', '\\'];

/// # Example
///  ```ignore
/// assert_eq!(
///   split_at_query_mark("/hello?world=1"),
///   ("/hello", Some("?world=1"))
/// )
/// ```
fn split_at_query_mark(path: &str) -> (&str, Option<&str>) {
  let query_mark_pos = path.find('?');
  query_mark_pos.map_or((path, None), |pos| (&path[..pos], Some(&path[pos..])))
}

// Port from https://github.com/webpack/webpack/blob/4b4ca3bb53f36a5b8fc6bc1bd976ed7af161bd80/lib/util/identifier.js#L30
pub fn absolute_to_request<'b>(context: &str, maybe_absolute_path: &'b str) -> Cow<'b, str> {
  if maybe_absolute_path.starts_with('/')
    && maybe_absolute_path.len() > 1
    && maybe_absolute_path.ends_with('/')
  {
    // this 'path' is actually a regexp generated by dynamic requires.
    // Don't treat it as an absolute path.
    return Cow::Borrowed(maybe_absolute_path);
  }

  if !maybe_absolute_path.starts_with('/') && !is_windows_absolute_path(maybe_absolute_path) {
    // not an absolute path
    return Cow::Borrowed(maybe_absolute_path);
  }

  let mut result = String::with_capacity(maybe_absolute_path.len());
  push_absolute_to_request(context, maybe_absolute_path, &mut result);
  Cow::Owned(result)
}

/// # Context
/// First introduced at https://github.com/webpack/webpack/commit/5563ee9e583602eb38ab21219a327d346cd16218#r120784061
/// Introduced at https://github.com/webpack/webpack/commit/c76be4d7383f35b3260dafefbcd24cac245d9e42
/// Fix https://github.com/webpack/webpack/issues/14014
pub fn relative_path_to_request(rel: &str) -> Cow<'_, str> {
  if rel.is_empty() {
    Cow::Borrowed("./.")
  } else if rel == ".." {
    Cow::Borrowed("../.")
  } else if rel.starts_with("../") {
    Cow::Borrowed(rel)
  } else {
    Cow::Owned(concat_string!("./", rel))
  }
}

#[inline]
fn is_windows_absolute_path(path: &str) -> bool {
  let bytes = path.as_bytes();
  bytes.len() >= 3
    && bytes[0].is_ascii_alphabetic()
    && bytes[1] == b':'
    && matches!(bytes[2], b'/' | b'\\')
}

#[inline]
fn push_relative_path_to_request(rel: &str, out: &mut String) {
  if rel.is_empty() {
    out.push_str("./.");
  } else if rel == ".." {
    out.push_str("../.");
  } else if rel.starts_with("../") {
    out.push_str(rel);
  } else {
    out.push_str("./");
    out.push_str(rel);
  }
}

/// Appends a request-form path for `maybe_absolute_path` into `out`.
///
/// This function appends to the provided output buffer and does not clear or
/// overwrite existing contents in `out`.
///
/// Accepted inputs:
/// - Absolute POSIX paths are converted to a request relative to `context`.
/// - Absolute Windows paths are converted similarly when possible.
/// - Query parts (for example `?foo`) are preserved and appended.
/// - Non-absolute inputs are accepted and appended unchanged.
pub fn push_absolute_to_request(context: &str, maybe_absolute_path: &str, out: &mut String) {
  if maybe_absolute_path.starts_with('/')
    && maybe_absolute_path.len() > 1
    && maybe_absolute_path.ends_with('/')
  {
    out.push_str(maybe_absolute_path);
    return;
  }

  if maybe_absolute_path.starts_with('/') {
    let (maybe_absolute_resource, query_part) = split_at_query_mark(maybe_absolute_path);
    let tmp = Path::new(maybe_absolute_resource).relative(context);
    let tmp_path = tmp.to_string_lossy();
    push_relative_path_to_request(&tmp_path, out);
    if let Some(query_part) = query_part {
      out.push_str(query_part);
    }
    return;
  }

  if is_windows_absolute_path(maybe_absolute_path) {
    let (maybe_absolute_resource, query_part) = split_at_query_mark(maybe_absolute_path);
    let relative_resource = maybe_absolute_resource.as_path().relative(context);
    let resource = relative_resource.to_string_lossy();

    // In windows, A path that relative to a another path could still be absolute.
    // ("d:/aaaa/cccc").relative("c:/aaaaa/") would get "d:/aaaa/cccc".
    if is_windows_absolute_path(resource.as_ref()) {
      out.push_str(resource.as_ref());
    } else {
      let resource = resource.cow_replace(WINDOWS_PATH_SEPARATOR, "/");
      push_relative_path_to_request(resource.as_ref(), out);
    }

    if let Some(query_part) = query_part {
      out.push_str(query_part);
    }
    return;
  }

  out.push_str(maybe_absolute_path);
}

fn request_to_absolute(context: &str, relative_path: &str) -> String {
  if relative_path.starts_with("./") || relative_path.starts_with("../") {
    let relative_path = if relative_path.starts_with("./") {
      relative_path
        .strip_prefix("./")
        .expect("should start with ./")
    } else {
      relative_path
    };
    Path::new(context)
      .join(relative_path)
      .to_string_lossy()
      .to_string()
  } else {
    PathBuf::from(relative_path).to_string_lossy().to_string()
  }
}

pub fn make_paths_absolute(context: &str, identifier: &str) -> String {
  split_keep(&SEGMENTS_SPLIT_REGEXP, identifier)
    .into_iter()
    .map(|str| request_to_absolute(context, str))
    .collect()
}

fn push_make_paths_relative(context: &str, identifier: &str, out: &mut String) {
  let mut last = 0;

  for (index, byte) in identifier.bytes().enumerate() {
    if matches!(byte, b'|' | b'!') {
      push_absolute_to_request(context, &identifier[last..index], out);
      out.push(byte as char);
      last = index + 1;
    }
  }

  push_absolute_to_request(context, &identifier[last..], out);
}

pub fn make_paths_relative(context: &str, identifier: &str) -> String {
  let mut result = String::with_capacity(identifier.len());
  push_make_paths_relative(context, identifier, &mut result);
  result
}

pub fn strip_zero_width_space_for_fragment(s: &str) -> Cow<'_, str> {
  s.cow_replace("\u{200b}#", "#")
}

pub fn insert_zero_width_space_for_fragment(s: &str) -> Cow<'_, str> {
  s.cow_replace("#", "\u{200b}#")
}

fn split_keep<'a>(r: &Regex, text: &'a str) -> Vec<&'a str> {
  let mut result = Vec::new();
  let mut last = 0;
  for (index, matched) in text.match_indices(r) {
    if last != index {
      result.push(&text[last..index]);
    }
    result.push(matched);
    last = index + matched.len();
  }
  if last < text.len() {
    result.push(&text[last..]);
  }
  result
}

static REQUEST_TO_ID_REGEX1: LazyLock<Regex> =
  LazyLock::new(|| Regex::new(r"^(\.\.?/)+").expect("Failed to initialize REQUEST_TO_ID_REGEX1"));
static REQUEST_TO_ID_REGEX2: LazyLock<Regex> = LazyLock::new(|| {
  Regex::new(r"(^[.-]|[^a-zA-Z0-9_-])+").expect("Failed to initialize REQUEST_TO_ID_REGEX2")
});

/// Convert a request string to a valid identifier by removing relative path prefixes
/// and replacing illegal characters with underscores
pub fn request_to_id(request: &str) -> String {
  REQUEST_TO_ID_REGEX2
    .replace_all(&REQUEST_TO_ID_REGEX1.replace(request, ""), "_")
    .to_string()
}

#[test]
fn test_push_absolute_to_request() {
  let mut out = String::new();
  push_absolute_to_request(
    "/workspace/app",
    "/workspace/app/src/index.js?foo=1",
    &mut out,
  );
  assert_eq!(out, "./src/index.js?foo=1");

  let mut out = String::new();
  push_absolute_to_request("/workspace/app", "/regexp/", &mut out);
  assert_eq!(out, "/regexp/");

  let mut out = String::new();
  push_absolute_to_request("/workspace/app", "loader", &mut out);
  assert_eq!(out, "loader");
}