use std::collections::BTreeMap;
use std::fs;
use std::io;
use std::path::{Path, PathBuf};
use blake3::Hasher;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use crate::project::path_utils::canonicalize_path;
use crate::project::types::ProjectRootMode;
use super::registry::WorkspaceRegistry;
#[derive(Debug, Error)]
pub enum LogicalWorkspaceError {
#[error("io error: {0}")]
Io(#[from] io::Error),
#[error("failed to canonicalize {path}: {source}")]
Canonicalization {
path: PathBuf,
source: io::Error,
},
#[error("failed to parse .sqry-workspace registry: {0}")]
ParseSqryWorkspace(serde_json::Error),
#[error("failed to parse .code-workspace file: {0}")]
ParseCodeWorkspace(serde_json::Error),
#[error("malformed .code-workspace folder entry: {reason}")]
MalformedFolderEntry {
reason: String,
},
#[error("conflicting classification for {path}: {kinds}")]
ConflictingClassification {
path: PathBuf,
kinds: String,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct WorkspaceId([u8; 32]);
impl WorkspaceId {
#[must_use]
pub fn from_identity(identity: &WorkspaceIdentity) -> Self {
let mut hasher = Hasher::new();
identity.write_hash_input(&mut hasher);
Self(*hasher.finalize().as_bytes())
}
#[must_use]
pub fn as_bytes(&self) -> &[u8; 32] {
&self.0
}
#[must_use]
pub fn as_short_hex(&self) -> String {
let full = self.as_full_hex();
full[..16].to_string()
}
#[must_use]
pub fn as_full_hex(&self) -> String {
use std::fmt::Write as _;
let mut s = String::with_capacity(64);
for byte in &self.0 {
let _ = write!(s, "{byte:02x}");
}
s
}
}
impl std::fmt::Display for WorkspaceId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.as_short_hex())
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "camelCase")]
pub enum WorkspaceIdentity {
SqryWorkspaceFile {
path: PathBuf,
symlink_unresolved: bool,
},
VsCodeWorkspaceFile {
path: PathBuf,
symlink_unresolved: bool,
},
AnonymousMultiRoot {
folders: Vec<PathBuf>,
symlink_unresolved: bool,
},
SingleRoot {
path: PathBuf,
symlink_unresolved: bool,
},
}
impl WorkspaceIdentity {
fn tag_byte(&self) -> u8 {
match self {
Self::SqryWorkspaceFile { .. } => 0,
Self::VsCodeWorkspaceFile { .. } => 1,
Self::AnonymousMultiRoot { .. } => 2,
Self::SingleRoot { .. } => 3,
}
}
fn symlink_unresolved(&self) -> bool {
match self {
Self::SqryWorkspaceFile {
symlink_unresolved, ..
}
| Self::VsCodeWorkspaceFile {
symlink_unresolved, ..
}
| Self::AnonymousMultiRoot {
symlink_unresolved, ..
}
| Self::SingleRoot {
symlink_unresolved, ..
} => *symlink_unresolved,
}
}
fn write_hash_input(&self, hasher: &mut Hasher) {
hasher.update(&[self.tag_byte()]);
hasher.update(&[u8::from(self.symlink_unresolved())]);
match self {
Self::SqryWorkspaceFile { path, .. }
| Self::VsCodeWorkspaceFile { path, .. }
| Self::SingleRoot { path, .. } => {
hash_path(hasher, path);
}
Self::AnonymousMultiRoot { folders, .. } => {
let count = u32::try_from(folders.len()).unwrap_or(u32::MAX);
hasher.update(&count.to_le_bytes());
for folder in folders {
hash_path(hasher, folder);
}
}
}
}
}
fn hash_path(hasher: &mut Hasher, path: &Path) {
let s = path.to_string_lossy();
let bytes = s.as_bytes();
let len = u32::try_from(bytes.len()).unwrap_or(u32::MAX);
hasher.update(&len.to_le_bytes());
hasher.update(bytes);
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SourceRoot {
pub path: PathBuf,
pub index_path: PathBuf,
pub language_hints: Option<Vec<String>>,
pub classpath_dir: Option<PathBuf>,
pub config_fingerprint: u64,
}
impl SourceRoot {
#[must_use]
pub fn from_path(path: PathBuf) -> Self {
let index_path = path.join(".sqry").join("graph").join("manifest.json");
Self {
path,
index_path,
language_hints: None,
classpath_dir: None,
config_fingerprint: 0,
}
}
pub fn populate_classpath_dir(&mut self) -> io::Result<()> {
let probe = self.path.join(".sqry").join("classpath");
match fs::metadata(&probe) {
Ok(meta) if meta.is_dir() => {
self.classpath_dir = Some(probe);
Ok(())
}
Ok(_) => {
self.classpath_dir = None;
Ok(())
}
Err(err) if err.kind() == io::ErrorKind::NotFound => {
self.classpath_dir = None;
Ok(())
}
Err(err) => Err(err),
}
}
#[must_use]
pub fn with_config_fingerprint(mut self, fingerprint: u64) -> Self {
self.config_fingerprint = fingerprint;
self
}
#[must_use]
pub fn effective_config_fingerprint(&self, workspace_default: u64) -> u64 {
if self.config_fingerprint == 0 {
workspace_default
} else {
self.config_fingerprint
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum MemberReason {
OperationalFolder,
NonSourceFolder,
NoLanguagePluginMatch,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct MemberFolder {
pub path: PathBuf,
pub reason: MemberReason,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "camelCase")]
pub enum Classification {
Source,
Member {
reason: MemberReason,
},
Excluded,
Unknown,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum HeuristicVerdict {
Source,
Member {
reason: MemberReason,
},
Excluded,
Unknown,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct LogicalWorkspace {
identity: WorkspaceIdentity,
workspace_id: WorkspaceId,
source_roots: Vec<SourceRoot>,
member_folders: Vec<MemberFolder>,
exclusions: Vec<PathBuf>,
project_root_mode: ProjectRootMode,
index_root_override: Option<PathBuf>,
config_fingerprint: u64,
}
impl LogicalWorkspace {
pub fn from_sqry_workspace(path: &Path) -> Result<Self, LogicalWorkspaceError> {
let registry = WorkspaceRegistry::load(path).map_err(|err| match err {
super::error::WorkspaceError::Serialization(e) => {
LogicalWorkspaceError::ParseSqryWorkspace(e)
}
super::error::WorkspaceError::Io { source, .. } => LogicalWorkspaceError::Io(source),
other => LogicalWorkspaceError::Io(io::Error::other(other.to_string())),
})?;
let (canonical_path, symlink_unresolved) = canonicalize_with_flag(path)?;
let identity = WorkspaceIdentity::SqryWorkspaceFile {
path: maybe_lowercase(&canonical_path),
symlink_unresolved,
};
let workspace_id = WorkspaceId::from_identity(&identity);
let mut source_roots = Vec::with_capacity(registry.repositories.len());
for repo in ®istry.repositories {
let (canonical_repo, _unresolved) = canonicalize_with_flag(&repo.root)?;
let mut root = SourceRoot::from_path(canonical_repo);
root.index_path.clone_from(&repo.index_path);
if let Some(lang) = repo.primary_language.clone() {
root.language_hints = Some(vec![lang]);
}
source_roots.push(root);
}
let mut member_folders = Vec::with_capacity(registry.member_folders.len());
for member in ®istry.member_folders {
let (canonical_root, _unresolved) = canonicalize_with_flag(&member.root)?;
member_folders.push(MemberFolder {
path: canonical_root,
reason: member.reason,
});
}
let mut exclusions = Vec::with_capacity(registry.exclusions.len());
for excluded in ®istry.exclusions {
let (canonical_excluded, _unresolved) = canonicalize_with_flag(excluded)?;
exclusions.push(canonical_excluded);
}
let mut ws = Self {
identity,
workspace_id,
source_roots,
member_folders,
exclusions,
project_root_mode: registry.project_root_mode,
index_root_override: None,
config_fingerprint: 0,
};
let _failures = ws.populate_classpath_dirs();
Ok(ws)
}
#[allow(clippy::too_many_lines)] pub fn from_code_workspace(
workspace_file: &Path,
heuristic_fn: &dyn Fn(&Path) -> HeuristicVerdict,
) -> Result<Self, LogicalWorkspaceError> {
let bytes = fs::read(workspace_file)?;
let json: serde_json::Value =
serde_json::from_slice(&bytes).map_err(LogicalWorkspaceError::ParseCodeWorkspace)?;
let workspace_dir = workspace_file
.parent()
.map_or_else(|| PathBuf::from("."), Path::to_path_buf);
let (canonical_workspace_file, symlink_unresolved) =
canonicalize_with_flag(workspace_file)?;
let identity = WorkspaceIdentity::VsCodeWorkspaceFile {
path: maybe_lowercase(&canonical_workspace_file),
symlink_unresolved,
};
let workspace_id = WorkspaceId::from_identity(&identity);
let folders_v = json.get("folders").cloned().unwrap_or_default();
let folders_arr = folders_v.as_array().cloned().unwrap_or_default();
let sqry_top = json.get("sqry.workspace");
let top_source_roots = path_set_from_value(sqry_top, "sourceRoots", &workspace_dir);
let top_exclusions = path_set_from_value(sqry_top, "exclusions", &workspace_dir);
let top_members = member_overrides_from_value(sqry_top, &workspace_dir)?;
let project_root_mode = sqry_top
.and_then(|v| v.get("projectRootMode"))
.and_then(|v| v.as_str())
.and_then(ProjectRootMode::from_str_opt)
.unwrap_or_default();
let mut classified: BTreeMap<PathBuf, FolderClassKind> = BTreeMap::new();
let mut all_folders: Vec<PathBuf> = Vec::new();
for (idx, entry) in folders_arr.iter().enumerate() {
let raw_path = entry.get("path").and_then(|v| v.as_str()).ok_or_else(|| {
LogicalWorkspaceError::MalformedFolderEntry {
reason: format!("folders[{idx}] missing string `path`"),
}
})?;
let abs = if Path::new(raw_path).is_absolute() {
PathBuf::from(raw_path)
} else {
workspace_dir.join(raw_path)
};
all_folders.push(abs.clone());
if let Some(role) = entry.get("sqry.role").and_then(|v| v.as_str()) {
let kind = match role {
"source" => FolderClassKind::Source,
"operational" => FolderClassKind::Member(MemberReason::OperationalFolder),
"non-source" | "nonSource" | "non_source" => {
FolderClassKind::Member(MemberReason::NonSourceFolder)
}
"excluded" => FolderClassKind::Excluded,
other => {
return Err(LogicalWorkspaceError::MalformedFolderEntry {
reason: format!(
"folders[{idx}].sqry.role = '{other}' (expected source|operational|excluded|non-source)"
),
});
}
};
classified.insert(abs, kind);
continue;
}
if top_exclusions.contains(&abs) {
classified.insert(abs, FolderClassKind::Excluded);
continue;
}
if top_source_roots.contains(&abs) {
classified.insert(abs, FolderClassKind::Source);
continue;
}
if let Some(reason) = top_members.get(&abs).copied() {
classified.insert(abs, FolderClassKind::Member(reason));
continue;
}
let verdict = heuristic_fn(&abs);
let kind = match verdict {
HeuristicVerdict::Source => FolderClassKind::Source,
HeuristicVerdict::Member { reason } => FolderClassKind::Member(reason),
HeuristicVerdict::Excluded => FolderClassKind::Excluded,
HeuristicVerdict::Unknown => {
FolderClassKind::Member(MemberReason::NoLanguagePluginMatch)
}
};
classified.insert(abs, kind);
}
for path in &top_source_roots {
classified
.entry(path.clone())
.or_insert(FolderClassKind::Source);
}
for path in &top_exclusions {
classified
.entry(path.clone())
.or_insert(FolderClassKind::Excluded);
}
for (path, reason) in &top_members {
classified
.entry(path.clone())
.or_insert(FolderClassKind::Member(*reason));
}
let mut source_roots = Vec::new();
let mut member_folders = Vec::new();
let mut exclusions = Vec::new();
for (raw_path, kind) in classified {
let (canonical, _unresolved) = canonicalize_with_flag(&raw_path)?;
let canonical = maybe_lowercase(&canonical);
match kind {
FolderClassKind::Source => source_roots.push(SourceRoot::from_path(canonical)),
FolderClassKind::Member(reason) => member_folders.push(MemberFolder {
path: canonical,
reason,
}),
FolderClassKind::Excluded => exclusions.push(canonical),
}
}
let mut ws = Self {
identity,
workspace_id,
source_roots,
member_folders,
exclusions,
project_root_mode,
index_root_override: None,
config_fingerprint: 0,
};
let _failures = ws.populate_classpath_dirs();
Ok(ws)
}
#[allow(clippy::needless_pass_by_value)] pub fn anonymous_multi_root(folders: Vec<PathBuf>) -> Result<Self, LogicalWorkspaceError> {
let mut canonical_folders = Vec::with_capacity(folders.len());
let mut symlink_unresolved = false;
for folder in &folders {
let (canon, unresolved) = canonicalize_with_flag(folder)?;
symlink_unresolved |= unresolved;
canonical_folders.push(maybe_lowercase(&canon));
}
canonical_folders.sort();
let identity = WorkspaceIdentity::AnonymousMultiRoot {
folders: canonical_folders.clone(),
symlink_unresolved,
};
let workspace_id = WorkspaceId::from_identity(&identity);
let source_roots = canonical_folders
.iter()
.cloned()
.map(SourceRoot::from_path)
.collect();
let mut ws = Self {
identity,
workspace_id,
source_roots,
member_folders: Vec::new(),
exclusions: Vec::new(),
project_root_mode: ProjectRootMode::default(),
index_root_override: None,
config_fingerprint: 0,
};
let _failures = ws.populate_classpath_dirs();
Ok(ws)
}
#[allow(clippy::needless_pass_by_value)] pub fn single_root(path: PathBuf) -> Result<Self, LogicalWorkspaceError> {
let (canonical, symlink_unresolved) = canonicalize_with_flag(&path)?;
let canonical = maybe_lowercase(&canonical);
let identity = WorkspaceIdentity::SingleRoot {
path: canonical.clone(),
symlink_unresolved,
};
let workspace_id = WorkspaceId::from_identity(&identity);
let mut ws = Self {
identity,
workspace_id,
source_roots: vec![SourceRoot::from_path(canonical)],
member_folders: Vec::new(),
exclusions: Vec::new(),
project_root_mode: ProjectRootMode::default(),
index_root_override: None,
config_fingerprint: 0,
};
let _failures = ws.populate_classpath_dirs();
Ok(ws)
}
#[cfg(test)]
#[allow(clippy::needless_pass_by_value)]
pub(crate) fn single_root_with_case_sensitivity(
path: PathBuf,
case_insensitive: bool,
) -> Result<Self, LogicalWorkspaceError> {
let (canonical, symlink_unresolved) = canonicalize_with_flag(&path)?;
let canonical = if case_insensitive {
PathBuf::from(canonical.to_string_lossy().to_lowercase())
} else {
canonical
};
let identity = WorkspaceIdentity::SingleRoot {
path: canonical.clone(),
symlink_unresolved,
};
let workspace_id = WorkspaceId::from_identity(&identity);
Ok(Self {
identity,
workspace_id,
source_roots: vec![SourceRoot::from_path(canonical)],
member_folders: Vec::new(),
exclusions: Vec::new(),
project_root_mode: ProjectRootMode::default(),
index_root_override: None,
config_fingerprint: 0,
})
}
#[must_use]
pub fn workspace_id(&self) -> &WorkspaceId {
&self.workspace_id
}
#[must_use]
pub fn identity(&self) -> &WorkspaceIdentity {
&self.identity
}
#[must_use]
pub fn source_roots(&self) -> &[SourceRoot] {
&self.source_roots
}
#[must_use]
pub fn member_folders(&self) -> &[MemberFolder] {
&self.member_folders
}
#[must_use]
pub fn exclusions(&self) -> &[PathBuf] {
&self.exclusions
}
#[must_use]
pub fn project_root_mode(&self) -> ProjectRootMode {
self.project_root_mode
}
#[must_use]
pub fn index_root_override(&self) -> Option<&Path> {
self.index_root_override.as_deref()
}
#[must_use]
pub fn config_fingerprint(&self) -> u64 {
self.config_fingerprint
}
pub fn set_config_fingerprint(&mut self, fingerprint: u64) {
self.config_fingerprint = fingerprint;
}
pub fn set_config_fingerprint_with_inheritance(&mut self, fingerprint: u64) {
self.config_fingerprint = fingerprint;
for root in &mut self.source_roots {
if root.config_fingerprint == 0 {
root.config_fingerprint = fingerprint;
}
}
}
pub fn populate_classpath_dirs(&mut self) -> Vec<(PathBuf, io::Error)> {
let mut failures = Vec::new();
for root in &mut self.source_roots {
if let Err(err) = root.populate_classpath_dir() {
failures.push((root.path.clone(), err));
}
}
failures
}
#[must_use]
pub fn is_source_root(&self, path: &Path) -> bool {
let canonical =
canonicalize_path(path).map_or_else(|_| path.to_path_buf(), |p| maybe_lowercase(&p));
self.source_roots.iter().any(|r| r.path == canonical)
}
#[must_use]
pub fn classify(&self, path: &Path) -> Classification {
let canonical =
canonicalize_path(path).map_or_else(|_| path.to_path_buf(), |p| maybe_lowercase(&p));
if self
.exclusions
.iter()
.any(|excl| path_matches(&canonical, excl))
{
return Classification::Excluded;
}
if self
.source_roots
.iter()
.any(|r| path_matches(&canonical, &r.path))
{
return Classification::Source;
}
for member in &self.member_folders {
if path_matches(&canonical, &member.path) {
return Classification::Member {
reason: member.reason,
};
}
}
Classification::Unknown
}
}
#[derive(Debug, Clone, Copy)]
enum FolderClassKind {
Source,
Member(MemberReason),
Excluded,
}
fn path_matches(path: &Path, prefix: &Path) -> bool {
path == prefix || path.starts_with(prefix)
}
fn canonicalize_with_flag(path: &Path) -> Result<(PathBuf, bool), LogicalWorkspaceError> {
let real_canon_succeeded = fs::canonicalize(path).is_ok();
let canonical =
canonicalize_path(path).map_err(|source| LogicalWorkspaceError::Canonicalization {
path: path.to_path_buf(),
source,
})?;
Ok((canonical, !real_canon_succeeded))
}
fn maybe_lowercase(path: &Path) -> PathBuf {
if is_case_insensitive_mount(path) {
let s = path.to_string_lossy().to_lowercase();
PathBuf::from(s)
} else {
path.to_path_buf()
}
}
fn is_case_insensitive_mount(path: &Path) -> bool {
let s = path.to_string_lossy();
if !s.chars().any(|c| c.is_ascii_alphabetic()) {
return false;
}
let Ok(orig) = fs::metadata(path) else {
return false;
};
let lower = PathBuf::from(s.to_lowercase());
let upper = PathBuf::from(s.to_uppercase());
let lower_ok = fs::metadata(&lower)
.ok()
.filter(|m| same_inode(m, &orig))
.is_some();
let upper_ok = fs::metadata(&upper)
.ok()
.filter(|m| same_inode(m, &orig))
.is_some();
let varies = lower != path || upper != path;
varies && lower_ok && upper_ok
}
#[cfg(unix)]
fn same_inode(a: &fs::Metadata, b: &fs::Metadata) -> bool {
use std::os::unix::fs::MetadataExt;
a.ino() == b.ino() && a.dev() == b.dev()
}
#[cfg(not(unix))]
fn same_inode(a: &fs::Metadata, b: &fs::Metadata) -> bool {
a.len() == b.len() && a.modified().ok() == b.modified().ok()
}
fn path_set_from_value(
sqry_top: Option<&serde_json::Value>,
key: &str,
base_dir: &Path,
) -> std::collections::BTreeSet<PathBuf> {
let mut set = std::collections::BTreeSet::new();
let Some(top) = sqry_top else { return set };
let Some(arr) = top.get(key).and_then(|v| v.as_array()) else {
return set;
};
for item in arr {
if let Some(s) = item.as_str() {
let p = if Path::new(s).is_absolute() {
PathBuf::from(s)
} else {
base_dir.join(s)
};
set.insert(p);
}
}
set
}
fn member_overrides_from_value(
sqry_top: Option<&serde_json::Value>,
base_dir: &Path,
) -> Result<BTreeMap<PathBuf, MemberReason>, LogicalWorkspaceError> {
let mut map = BTreeMap::new();
let Some(top) = sqry_top else { return Ok(map) };
let Some(arr) = top.get("memberFolders").and_then(|v| v.as_array()) else {
return Ok(map);
};
for (idx, item) in arr.iter().enumerate() {
let (path_str, reason) = if let Some(s) = item.as_str() {
(s.to_string(), MemberReason::OperationalFolder)
} else if let Some(obj) = item.as_object() {
let path = obj
.get("path")
.and_then(|v| v.as_str())
.ok_or_else(|| LogicalWorkspaceError::MalformedFolderEntry {
reason: format!(
"sqry.workspace.memberFolders[{idx}] object missing string `path`"
),
})?
.to_string();
#[allow(clippy::match_same_arms)]
let reason = obj.get("reason").and_then(|v| v.as_str()).map_or(
MemberReason::OperationalFolder,
|s| match s {
"operational" => MemberReason::OperationalFolder,
"non-source" | "nonSource" | "non_source" => MemberReason::NonSourceFolder,
"noLanguagePluginMatch" | "no-language-plugin-match" => {
MemberReason::NoLanguagePluginMatch
}
_ => MemberReason::OperationalFolder,
},
);
(path, reason)
} else {
return Err(LogicalWorkspaceError::MalformedFolderEntry {
reason: format!(
"sqry.workspace.memberFolders[{idx}] is neither a string nor an object"
),
});
};
let abs = if Path::new(&path_str).is_absolute() {
PathBuf::from(&path_str)
} else {
base_dir.join(&path_str)
};
map.insert(abs, reason);
}
Ok(map)
}