use std::collections::BTreeSet;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use crate::workspace_ignores::hard_excluded_runtime_path;
use ignore::{WalkBuilder, WalkState};
use super::surfaces::{
NESTED_ROOT_SCOPED_RUNTIME_CONFIG_PATHS, is_ci_workflow_path,
is_entrypoint_build_workflow_path, is_root_scoped_runtime_config_path,
};
use super::{
HybridRankingIntent, NormalizedSearchFilters, SearchDiagnostic, SearchDiagnosticKind,
SearchExecutionDiagnostics, SearchTextQuery,
};
pub(super) fn merge_candidate_files(
base: &mut Vec<(String, PathBuf)>,
supplement: Vec<(String, PathBuf)>,
) {
let mut seen = base
.iter()
.map(|(rel_path, path)| (rel_path.clone(), path.clone()))
.collect::<BTreeSet<_>>();
for candidate in supplement {
if seen.insert((candidate.0.clone(), candidate.1.clone())) {
base.push(candidate);
}
}
}
pub(super) fn walk_candidate_files_for_repository(
repository_id: &str,
root: &Path,
query: &SearchTextQuery,
filters: &NormalizedSearchFilters,
diagnostics: &mut SearchExecutionDiagnostics,
) -> Vec<(String, PathBuf)> {
let path_regex = query.path_regex.clone();
let language = filters.language;
let (file_candidates, mut walk_diagnostics) = collect_candidate_files_parallel(
repository_id,
root,
search_walk_builder(root),
None,
move |path, rel_path| {
if let Some(language) = language
&& !language.matches_path(path)
{
return false;
}
if let Some(path_regex) = &path_regex
&& !path_regex.is_match(rel_path)
{
return false;
}
true
},
);
diagnostics.entries.append(&mut walk_diagnostics);
file_candidates
}
pub(super) fn hidden_workflow_candidates_for_repository(
repository_id: &str,
root: &Path,
filters: &NormalizedSearchFilters,
intent: &HybridRankingIntent,
diagnostics: &mut SearchExecutionDiagnostics,
) -> Vec<(String, PathBuf)> {
if !intent.wants_entrypoint_build_flow && !intent.wants_ci_workflow_witnesses {
return Vec::new();
}
let workflows_root = root.join(".github/workflows");
if !workflows_root.is_dir() {
return Vec::new();
}
let mut builder = WalkBuilder::new(&workflows_root);
builder
.standard_filters(true)
.hidden(false)
.require_git(false);
let language = filters.language;
let wants_entrypoint_build_flow = intent.wants_entrypoint_build_flow;
let wants_ci_workflow_witnesses = intent.wants_ci_workflow_witnesses;
let (file_candidates, mut walk_diagnostics) = collect_candidate_files_parallel(
repository_id,
root,
builder,
Some(".github/workflows".to_owned()),
move |path, rel_path| {
if wants_entrypoint_build_flow {
let allow_ci_witness = wants_ci_workflow_witnesses && is_ci_workflow_path(rel_path);
if !allow_ci_witness && !is_entrypoint_build_workflow_path(rel_path) {
return false;
}
} else if !is_ci_workflow_path(rel_path) {
return false;
}
if let Some(language) = language
&& !language.matches_path(path)
{
return false;
}
true
},
);
diagnostics.entries.append(&mut walk_diagnostics);
file_candidates
}
pub(super) fn root_scoped_runtime_config_candidates_for_repository(
repository_id: &str,
root: &Path,
_filters: &NormalizedSearchFilters,
intent: &HybridRankingIntent,
diagnostics: &mut SearchExecutionDiagnostics,
) -> Vec<(String, PathBuf)> {
if !intent.wants_entrypoint_build_flow && !intent.wants_runtime_config_artifacts {
return Vec::new();
}
collect_root_scoped_runtime_config_candidates(
repository_id,
root,
diagnostics,
|_path, rel_path| is_root_scoped_runtime_config_path(rel_path),
)
}
pub(super) fn search_root_scoped_runtime_config_candidates_for_repository(
repository_id: &str,
root: &Path,
query: &SearchTextQuery,
filters: &NormalizedSearchFilters,
diagnostics: &mut SearchExecutionDiagnostics,
) -> Vec<(String, PathBuf)> {
let path_regex = query.path_regex.clone();
let language = filters.language;
collect_root_scoped_runtime_config_candidates(
repository_id,
root,
diagnostics,
move |path, rel_path| {
if !is_root_scoped_runtime_config_path(rel_path) {
return false;
}
if let Some(language) = language
&& !language.matches_path(path)
{
return false;
}
if let Some(path_regex) = &path_regex
&& !path_regex.is_match(rel_path)
{
return false;
}
true
},
)
}
pub(super) fn normalize_repository_relative_path(root: &Path, path: &Path) -> String {
path.strip_prefix(root)
.ok()
.map(|path| path.to_string_lossy().to_string())
.unwrap_or_else(|| path.to_string_lossy().to_string())
.replace('\\', "/")
.trim_start_matches("./")
.to_owned()
}
fn search_walk_builder(root: &Path) -> WalkBuilder {
let mut builder = WalkBuilder::new(root);
builder.standard_filters(true).require_git(false);
builder
}
fn collect_candidate_files_parallel(
repository_id: &str,
root: &Path,
builder: WalkBuilder,
diagnostic_path: Option<String>,
should_include: impl Fn(&Path, &str) -> bool + Send + Sync + 'static,
) -> (Vec<(String, PathBuf)>, Vec<SearchDiagnostic>) {
let repository_id = Arc::<str>::from(repository_id.to_owned());
let root = Arc::new(root.to_path_buf());
let diagnostic_path = diagnostic_path.map(Arc::<str>::from);
let should_include = Arc::new(should_include);
let candidates = Arc::new(Mutex::new(Vec::new()));
let diagnostics = Arc::new(Mutex::new(Vec::new()));
builder.build_parallel().run(|| {
let repository_id = Arc::clone(&repository_id);
let root = Arc::clone(&root);
let diagnostic_path = diagnostic_path.clone();
let should_include = Arc::clone(&should_include);
let candidates = Arc::clone(&candidates);
let diagnostics = Arc::clone(&diagnostics);
Box::new(move |dent| {
match dent {
Ok(entry) => {
if !entry.file_type().is_some_and(|ft| ft.is_file()) {
return WalkState::Continue;
}
let path = entry.path();
if hard_excluded_runtime_path(root.as_ref(), path) {
return WalkState::Continue;
}
let rel_path = normalize_repository_relative_path(root.as_ref(), path);
if !should_include(path, &rel_path) {
return WalkState::Continue;
}
candidates
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
.push((rel_path, path.to_path_buf()));
}
Err(err) => {
diagnostics
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
.push(SearchDiagnostic {
repository_id: repository_id.to_string(),
path: diagnostic_path.as_deref().map(str::to_owned),
kind: SearchDiagnosticKind::Walk,
message: err.to_string(),
});
}
}
WalkState::Continue
})
});
let mut candidates = candidates
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
.clone();
let mut diagnostics = diagnostics
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
.clone();
candidates.sort_by(|left, right| left.0.cmp(&right.0).then(left.1.cmp(&right.1)));
candidates.dedup_by(|left, right| left.0 == right.0 && left.1 == right.1);
sort_search_diagnostics(&mut diagnostics);
(candidates, diagnostics)
}
fn collect_root_scoped_runtime_config_candidates(
repository_id: &str,
root: &Path,
diagnostics: &mut SearchExecutionDiagnostics,
should_include: impl Fn(&Path, &str) -> bool,
) -> Vec<(String, PathBuf)> {
let mut candidates = Vec::new();
let mut seen = BTreeSet::new();
match fs::read_dir(root) {
Ok(entries) => {
for entry in entries {
let Ok(entry) = entry else {
continue;
};
let path = entry.path();
if hard_excluded_runtime_path(root, &path) || !path.is_file() {
continue;
}
let relative_path = normalize_repository_relative_path(root, &path);
if !is_root_scoped_runtime_config_path(&relative_path)
|| !should_include(&path, &relative_path)
{
continue;
}
if seen.insert((relative_path.clone(), path.clone())) {
candidates.push((relative_path, path));
}
}
}
Err(error) => diagnostics.entries.push(SearchDiagnostic {
repository_id: repository_id.to_owned(),
path: Some(root.display().to_string()),
kind: SearchDiagnosticKind::Read,
message: format!("failed to enumerate root-scoped runtime config candidates: {error}"),
}),
}
for relative_path in NESTED_ROOT_SCOPED_RUNTIME_CONFIG_PATHS {
let path = root.join(relative_path);
if hard_excluded_runtime_path(root, &path) || !path.is_file() {
continue;
}
let normalized_relative_path = normalize_repository_relative_path(root, &path);
if !should_include(&path, &normalized_relative_path) {
continue;
}
if seen.insert((normalized_relative_path.clone(), path.clone())) {
candidates.push((normalized_relative_path, path));
}
}
candidates
}
fn sort_search_diagnostics(diagnostics: &mut [SearchDiagnostic]) {
diagnostics.sort_by(|left, right| {
left.repository_id
.cmp(&right.repository_id)
.then(left.path.cmp(&right.path))
.then(left.kind.cmp(&right.kind))
.then(left.message.cmp(&right.message))
});
}