use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use smol_str::SmolStr;
use crate::bib::semantic::Model as BibModel;
use crate::file_discovery::FileKind;
use crate::incremental::{
IncrementalDb, QueryKind, QueryLogEntry, file_cite_facts, file_cite_names,
file_is_document_root,
};
use crate::project::graph::{IncludeGraph, Project, project_graph};
use crate::project::include::BibTarget;
pub fn document_cite_names(model: &BibModel) -> Vec<SmolStr> {
let mut names: Vec<SmolStr> = model.entries().iter().map(|e| e.key.clone()).collect();
names.sort_unstable();
names.dedup();
names
}
#[derive(Debug, Clone)]
pub struct CiteFileFacts {
pub path: PathBuf,
pub bib_targets: Vec<BibTarget>,
pub nocite_all: bool,
pub is_document_root: bool,
}
#[derive(Debug, Default)]
struct Component {
keys: HashSet<SmolStr>,
bib_paths: Vec<PathBuf>,
closed: bool,
rooted: bool,
wildcard: bool,
}
#[derive(Debug, Default)]
pub struct ResolvedCitations {
component_of: HashMap<PathBuf, usize>,
components: Vec<Component>,
}
impl ResolvedCitations {
pub fn build(
files: &[CiteFileFacts],
graph: &IncludeGraph,
bib_keys: &HashMap<PathBuf, Vec<SmolStr>>,
) -> Self {
let mut paths: Vec<&Path> = files.iter().map(|f| f.path.as_path()).collect();
paths.sort_unstable();
paths.dedup();
let index: HashMap<&Path, usize> = paths.iter().enumerate().map(|(i, p)| (*p, i)).collect();
let mut uf = UnionFind::new(paths.len());
for (&path, &i) in &index {
for edge in graph.outgoing(path) {
if let Some(&j) = index.get(edge.to.as_path()) {
uf.union(i, j);
}
}
for included in graph.included_by(path) {
if let Some(&j) = index.get(included.as_path()) {
uf.union(i, j);
}
}
}
let mut root_to_id: HashMap<usize, usize> = HashMap::new();
let mut component_of: HashMap<PathBuf, usize> = HashMap::new();
for (i, &path) in paths.iter().enumerate() {
let root = uf.find(i);
let next = root_to_id.len();
let id = *root_to_id.entry(root).or_insert(next);
component_of.insert(path.to_path_buf(), id);
}
let mut components: Vec<Component> = (0..root_to_id.len())
.map(|_| Component {
closed: true,
..Component::default()
})
.collect();
for facts in files {
let Some(&id) = component_of.get(&facts.path) else {
continue;
};
let comp = &mut components[id];
comp.rooted |= facts.is_document_root;
comp.wildcard |= facts.nocite_all;
for target in &facts.bib_targets {
match target {
BibTarget::Dynamic => comp.closed = false,
BibTarget::Path(path) => match bib_keys.get(path) {
Some(keys) => {
comp.keys
.extend(keys.iter().map(|k| SmolStr::from(k.to_lowercase())));
comp.bib_paths.push(path.clone());
}
None => comp.closed = false,
},
}
}
}
for edge in graph.unresolved() {
if let Some(&id) = component_of.get(&edge.from) {
components[id].closed = false;
}
}
for comp in &mut components {
comp.bib_paths.sort_unstable();
comp.bib_paths.dedup();
}
Self {
component_of,
components,
}
}
pub fn bib_definers(&self, file: &Path) -> &[PathBuf] {
self.component_of
.get(file)
.map_or(&[], |&id| self.components[id].bib_paths.as_slice())
}
pub fn namespace_members(&self, file: &Path) -> Vec<&Path> {
let Some(&id) = self.component_of.get(file) else {
return Vec::new();
};
let mut members: Vec<&Path> = self
.component_of
.iter()
.filter(|&(_, &cid)| cid == id)
.map(|(p, _)| p.as_path())
.collect();
members.sort_unstable();
members
}
pub fn bib_citers(&self, bib_path: &Path) -> Vec<&Path> {
let ids: HashSet<usize> = self
.components
.iter()
.enumerate()
.filter(|(_, comp)| comp.bib_paths.iter().any(|p| p == bib_path))
.map(|(id, _)| id)
.collect();
let mut members: Vec<&Path> = self
.component_of
.iter()
.filter(|&(_, id)| ids.contains(id))
.map(|(p, _)| p.as_path())
.collect();
members.sort_unstable();
members.dedup();
members
}
pub fn is_defined(&self, file: &Path, key: &str) -> bool {
self.component_of.get(file).is_some_and(|&id| {
self.components[id]
.keys
.contains(&SmolStr::from(key.to_lowercase()))
})
}
pub fn is_closed(&self, file: &Path) -> bool {
self.component_of
.get(file)
.is_some_and(|&id| self.components[id].closed)
}
pub fn is_root_component(&self, file: &Path) -> bool {
self.component_of
.get(file)
.is_some_and(|&id| self.components[id].rooted)
}
pub fn has_wildcard_nocite(&self, file: &Path) -> bool {
self.component_of
.get(file)
.is_some_and(|&id| self.components[id].wildcard)
}
}
#[salsa::tracked(returns(ref), no_eq, unsafe(non_update_types))]
pub fn resolved_citations<'db>(
db: &'db dyn IncrementalDb,
project: Project<'db>,
) -> ResolvedCitations {
db.record_query(QueryLogEntry {
kind: QueryKind::ResolvedCitations,
file: None,
});
let graph = project_graph(db, project);
let mut cite_facts: Vec<CiteFileFacts> = Vec::new();
let mut bib_keys: HashMap<PathBuf, Vec<SmolStr>> = HashMap::new();
for member in project.members(db) {
match member.kind {
FileKind::Tex | FileKind::Sty | FileKind::Cls => {
let facts = file_cite_facts(db, member.file);
cite_facts.push(CiteFileFacts {
path: member.path.clone(),
bib_targets: facts.bib_targets.clone(),
nocite_all: facts.nocite_all,
is_document_root: *file_is_document_root(db, member.file),
});
}
FileKind::Bib => {
bib_keys.insert(
member.path.clone(),
file_cite_names(db, member.file).clone(),
);
}
}
}
ResolvedCitations::build(&cite_facts, graph, &bib_keys)
}
struct UnionFind {
parent: Vec<usize>,
size: Vec<usize>,
}
impl UnionFind {
fn new(n: usize) -> Self {
Self {
parent: (0..n).collect(),
size: vec![1; n],
}
}
fn find(&mut self, mut x: usize) -> usize {
while self.parent[x] != x {
self.parent[x] = self.parent[self.parent[x]];
x = self.parent[x];
}
x
}
fn union(&mut self, a: usize, b: usize) {
let (mut ra, mut rb) = (self.find(a), self.find(b));
if ra == rb {
return;
}
if self.size[ra] < self.size[rb] {
std::mem::swap(&mut ra, &mut rb);
}
self.parent[rb] = ra;
self.size[ra] += self.size[rb];
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::project::graph::FileFacts;
use crate::project::include::{IncludeEdgeKey, IncludeKind, IncludeTarget};
fn graph(files: &[(&str, &[(IncludeKind, &str)])]) -> IncludeGraph {
let facts: Vec<FileFacts> = files
.iter()
.map(|(path, edges)| FileFacts {
path: PathBuf::from(path),
include_edges: edges
.iter()
.map(|(kind, target)| IncludeEdgeKey {
kind: *kind,
target: IncludeTarget::Path(PathBuf::from(target)),
})
.collect(),
})
.collect();
IncludeGraph::build(&facts, None)
}
fn keys(list: &[&str]) -> Vec<SmolStr> {
list.iter().map(SmolStr::new).collect()
}
fn facts(path: &str, bib: &[&str], root: bool) -> CiteFileFacts {
CiteFileFacts {
path: PathBuf::from(path),
bib_targets: bib
.iter()
.map(|b| BibTarget::Path(PathBuf::from(b)))
.collect(),
nocite_all: false,
is_document_root: root,
}
}
#[test]
fn key_in_referenced_bib_is_defined() {
let g = graph(&[("/p/main.tex", &[])]);
let mut bib = HashMap::new();
bib.insert(PathBuf::from("/p/refs.bib"), keys(&["knuth1984"]));
let r = ResolvedCitations::build(&[facts("/p/main.tex", &["/p/refs.bib"], true)], &g, &bib);
assert!(r.is_defined(Path::new("/p/main.tex"), "knuth1984"));
assert!(r.is_defined(Path::new("/p/main.tex"), "Knuth1984"));
assert!(!r.is_defined(Path::new("/p/main.tex"), "missing"));
assert!(r.is_closed(Path::new("/p/main.tex")));
assert!(r.is_root_component(Path::new("/p/main.tex")));
}
#[test]
fn keys_union_across_included_files() {
let g = graph(&[
("/p/main.tex", &[(IncludeKind::Input, "/p/chap.tex")]),
("/p/chap.tex", &[]),
]);
let mut bib = HashMap::new();
bib.insert(PathBuf::from("/p/a.bib"), keys(&["alpha"]));
bib.insert(PathBuf::from("/p/b.bib"), keys(&["beta"]));
let r = ResolvedCitations::build(
&[
facts("/p/main.tex", &["/p/a.bib"], true),
facts("/p/chap.tex", &["/p/b.bib"], false),
],
&g,
&bib,
);
assert!(r.is_defined(Path::new("/p/chap.tex"), "alpha"));
assert!(r.is_defined(Path::new("/p/main.tex"), "beta"));
}
#[test]
fn unanalyzed_bib_opens_the_component() {
let g = graph(&[("/p/main.tex", &[])]);
let bib = HashMap::new(); let r = ResolvedCitations::build(&[facts("/p/main.tex", &["/p/refs.bib"], true)], &g, &bib);
assert!(!r.is_closed(Path::new("/p/main.tex")));
}
#[test]
fn dynamic_bib_target_opens_the_component() {
let g = graph(&[("/p/main.tex", &[])]);
let r = ResolvedCitations::build(
&[CiteFileFacts {
path: PathBuf::from("/p/main.tex"),
bib_targets: vec![BibTarget::Dynamic],
nocite_all: false,
is_document_root: true,
}],
&g,
&HashMap::new(),
);
assert!(!r.is_closed(Path::new("/p/main.tex")));
}
#[test]
fn dynamic_tex_include_opens_the_component() {
let facts_list = vec![FileFacts {
path: PathBuf::from("/p/main.tex"),
include_edges: vec![IncludeEdgeKey {
kind: IncludeKind::Input,
target: IncludeTarget::Dynamic,
}],
}];
let g = IncludeGraph::build(&facts_list, None);
let mut bib = HashMap::new();
bib.insert(PathBuf::from("/p/refs.bib"), keys(&["k"]));
let r = ResolvedCitations::build(&[facts("/p/main.tex", &["/p/refs.bib"], true)], &g, &bib);
assert!(!r.is_closed(Path::new("/p/main.tex")));
}
#[test]
fn bib_definers_are_namespace_scoped() {
let g = graph(&[("/p/main1.tex", &[]), ("/p/main2.tex", &[])]);
let mut bib = HashMap::new();
bib.insert(PathBuf::from("/p/a.bib"), keys(&["alpha"]));
bib.insert(PathBuf::from("/p/b.bib"), keys(&["beta"]));
let r = ResolvedCitations::build(
&[
facts("/p/main1.tex", &["/p/a.bib"], true),
facts("/p/main2.tex", &["/p/b.bib"], true),
],
&g,
&bib,
);
assert_eq!(
r.bib_definers(Path::new("/p/main1.tex")),
&[PathBuf::from("/p/a.bib")]
);
assert_eq!(
r.bib_definers(Path::new("/p/main2.tex")),
&[PathBuf::from("/p/b.bib")]
);
assert!(r.bib_definers(Path::new("/p/none.tex")).is_empty());
}
#[test]
fn namespace_members_and_bib_citers_scope_to_the_component() {
let g = graph(&[
("/p/main1.tex", &[(IncludeKind::Input, "/p/chap1.tex")]),
("/p/chap1.tex", &[]),
("/p/main2.tex", &[]),
]);
let mut bib = HashMap::new();
bib.insert(PathBuf::from("/p/common.bib"), keys(&["k"]));
let r = ResolvedCitations::build(
&[
facts("/p/main1.tex", &["/p/common.bib"], true),
facts("/p/chap1.tex", &[], false),
facts("/p/main2.tex", &["/p/common.bib"], true),
],
&g,
&bib,
);
assert_eq!(
r.namespace_members(Path::new("/p/chap1.tex")),
&[Path::new("/p/chap1.tex"), Path::new("/p/main1.tex")]
);
assert_eq!(
r.namespace_members(Path::new("/p/main2.tex")),
&[Path::new("/p/main2.tex")]
);
assert_eq!(
r.bib_citers(Path::new("/p/common.bib")),
&[
Path::new("/p/chap1.tex"),
Path::new("/p/main1.tex"),
Path::new("/p/main2.tex"),
]
);
assert!(r.bib_citers(Path::new("/p/unknown.bib")).is_empty());
assert!(r.namespace_members(Path::new("/p/none.tex")).is_empty());
}
#[test]
fn bib_definers_only_lists_analyzed_bibs() {
let g = graph(&[("/p/main.tex", &[])]);
let mut bib = HashMap::new();
bib.insert(PathBuf::from("/p/a.bib"), keys(&["alpha"]));
let r = ResolvedCitations::build(
&[facts("/p/main.tex", &["/p/a.bib", "/p/missing.bib"], true)],
&g,
&bib,
);
assert_eq!(
r.bib_definers(Path::new("/p/main.tex")),
&[PathBuf::from("/p/a.bib")]
);
assert!(!r.is_closed(Path::new("/p/main.tex")));
}
#[test]
fn rootless_and_wildcard_flags() {
let g = graph(&[("/p/frag.tex", &[])]);
let mut bib = HashMap::new();
bib.insert(PathBuf::from("/p/refs.bib"), keys(&["k"]));
let r =
ResolvedCitations::build(&[facts("/p/frag.tex", &["/p/refs.bib"], false)], &g, &bib);
assert!(!r.is_root_component(Path::new("/p/frag.tex")));
let with_wildcard = ResolvedCitations::build(
&[CiteFileFacts {
path: PathBuf::from("/p/main.tex"),
bib_targets: vec![BibTarget::Path(PathBuf::from("/p/refs.bib"))],
nocite_all: true,
is_document_root: true,
}],
&graph(&[("/p/main.tex", &[])]),
&bib,
);
assert!(with_wildcard.has_wildcard_nocite(Path::new("/p/main.tex")));
}
}