use std::collections::{BTreeMap, HashMap};
use std::hash::{Hash, Hasher};
use std::mem;
use std::rc::Rc;
use serde::Serialize;
use serde::de::DeserializeOwned;
use crate::common::reference::RefOr;
use crate::loader::{Loader, LoaderError};
#[derive(Debug, thiserror::Error)]
pub enum CollapseError {
#[error("failed to resolve external reference `{reference}`")]
External {
reference: String,
#[source]
source: LoaderError,
},
#[error("failed to serialise component for dedup")]
Serialize(#[from] serde_json::Error),
}
pub struct Bag<T> {
entries: BTreeMap<String, RefOr<T>>,
seen: HashMap<u64, Vec<String>>,
}
fn digest(canonical: &str) -> u64 {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
canonical.hash(&mut hasher);
hasher.finish()
}
impl<T> Default for Bag<T> {
fn default() -> Self {
Self {
entries: BTreeMap::new(),
seen: HashMap::new(),
}
}
}
impl<T> Bag<T> {
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn into_map(self) -> BTreeMap<String, RefOr<T>> {
self.entries
}
}
impl<T: Serialize> Bag<T> {
pub fn seed(&mut self, initial: BTreeMap<String, RefOr<T>>) -> Result<(), CollapseError> {
for (name, value) in initial {
if let RefOr::Item(item) = &value {
let d = digest(&serde_json::to_string(item)?);
self.seen.entry(d).or_default().push(name.clone());
}
self.entries.insert(name, value);
}
Ok(())
}
pub fn intern(&mut self, item: T, base: &str) -> Result<String, CollapseError> {
let canonical = serde_json::to_string(&item)?;
let d = digest(&canonical);
if let Some(candidates) = self.seen.get(&d) {
for name in candidates {
if let Some(RefOr::Item(existing)) = self.entries.get(name)
&& serde_json::to_string(existing)? == canonical
{
return Ok(name.clone());
}
}
}
let name = unique_name(&self.entries, base);
self.seen.entry(d).or_default().push(name.clone());
self.entries.insert(name.clone(), RefOr::new_item(item));
Ok(name)
}
pub fn inline_names(&self) -> Vec<String> {
self.entries
.iter()
.filter_map(|(name, value)| match value {
RefOr::Item(_) => Some(name.clone()),
_ => None,
})
.collect()
}
pub fn take_inline(&mut self, name: &str) -> Option<T> {
match self.entries.remove(name)? {
RefOr::Item(item) => Some(item),
r @ RefOr::Ref(_) => {
self.entries.insert(name.to_owned(), r);
None
}
}
}
pub fn put_inline(&mut self, name: String, item: T) -> Result<(), CollapseError> {
let d = digest(&serde_json::to_string(&item)?);
let candidates = self.seen.entry(d).or_default();
if !candidates.contains(&name) {
candidates.push(name.clone());
}
self.entries.insert(name, RefOr::new_item(item));
Ok(())
}
}
pub fn unique_name<V>(bag: &BTreeMap<String, V>, base: &str) -> String {
if !bag.contains_key(base) {
return base.to_owned();
}
for i in 2..u32::MAX {
let candidate = format!("{base}_{i}");
if !bag.contains_key(&candidate) {
return candidate;
}
}
unreachable!("exhausted u32 suffixes for `{base}`");
}
#[derive(Clone)]
pub struct NameContext {
node: Option<Rc<NameNode>>,
}
struct NameNode {
part: String,
parent: Option<Rc<NameNode>>,
}
impl NameContext {
pub fn new<I, S>(parts: I) -> Self
where
I: IntoIterator<Item = S>,
S: Into<String>,
{
let mut ctx = NameContext { node: None };
for part in parts {
ctx = ctx.push_owned(part.into());
}
ctx
}
fn push_owned(&self, part: String) -> Self {
NameContext {
node: Some(Rc::new(NameNode {
part,
parent: self.node.clone(),
})),
}
}
pub fn push(&self, part: &str) -> Self {
self.push_owned(part.to_owned())
}
fn segments(&self) -> Vec<&str> {
let mut out = Vec::new();
let mut cur = self.node.as_deref();
while let Some(node) = cur {
out.push(node.part.as_str());
cur = node.parent.as_deref();
}
out.reverse();
out
}
pub fn derive_name(&self) -> String {
sanitize_component_name(self.segments().join("_"))
}
pub fn from_external_ref(reference: &str, fallback: &NameContext) -> Self {
if let Some((_, fragment)) = reference.split_once('#')
&& let Some(last) = fragment.rsplit('/').next()
&& !last.is_empty()
{
return NameContext::new([last.to_owned()]);
}
fallback.clone()
}
}
pub fn sanitize_component_name(s: impl AsRef<str>) -> String {
let s = s.as_ref();
let mut out = String::with_capacity(s.len());
for c in s.chars() {
if c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' {
out.push(c);
} else {
out.push('_');
}
}
while out.contains("__") {
out = out.replace("__", "_");
}
let trimmed = out.trim_matches('_').to_owned();
if trimmed.is_empty() {
"Schema".to_owned()
} else {
trimmed
}
}
pub fn is_internal_ref(reference: &str) -> bool {
reference.starts_with('#')
}
pub trait HasLoader {
fn loader_mut(&mut self) -> Option<&mut Loader>;
}
pub trait LiftableBag<C>: Sized + Serialize + DeserializeOwned + 'static {
const PREFIX: &'static str;
fn bag(c: &mut C) -> &mut Bag<Self>;
fn walk(item: &mut Self, ctx: &NameContext, c: &mut C) -> Result<(), CollapseError>;
fn name_hint(_item: &Self) -> Option<String> {
None
}
}
pub fn lift_ref_or<T, C>(
slot: &mut RefOr<T>,
ctx: NameContext,
c: &mut C,
) -> Result<(), CollapseError>
where
T: LiftableBag<C> + Clone,
C: HasLoader,
{
match slot {
RefOr::Ref(r) => {
if is_internal_ref(&r.reference) {
return Ok(());
}
let reference = r.reference.clone();
let Some(loader) = c.loader_mut() else {
return Ok(());
};
let mut fetched: T = loader.resolve_reference_as(&reference).map_err(|source| {
CollapseError::External {
reference: reference.clone(),
source,
}
})?;
let derived_ctx = NameContext::from_external_ref(&reference, &ctx);
T::walk(&mut fetched, &derived_ctx, c)?;
let name = intern(c, fetched, &derived_ctx)?;
*slot = RefOr::new_ref(format!("{}{name}", T::PREFIX));
Ok(())
}
RefOr::Item(_) => {
let placeholder = RefOr::new_ref(String::new());
let owned = mem::replace(slot, placeholder);
let RefOr::Item(mut item) = owned else {
unreachable!("matched RefOr::Item above");
};
T::walk(&mut item, &ctx, c)?;
let name = intern(c, item, &ctx)?;
*slot = RefOr::new_ref(format!("{}{name}", T::PREFIX));
Ok(())
}
}
}
fn intern<T, C>(c: &mut C, item: T, ctx: &NameContext) -> Result<String, CollapseError>
where
T: LiftableBag<C>,
{
let base = match T::name_hint(&item) {
Some(h) if !h.is_empty() => sanitize_component_name(h),
_ => ctx.derive_name(),
};
T::bag(c).intern(item, &base)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn unique_name_appends_suffix_against_existing_keys() {
let mut bag: BTreeMap<String, ()> = BTreeMap::new();
assert_eq!(unique_name(&bag, "foo"), "foo");
bag.insert("foo".to_owned(), ());
assert_eq!(unique_name(&bag, "foo"), "foo_2");
bag.insert("foo_2".to_owned(), ());
assert_eq!(unique_name(&bag, "foo"), "foo_3");
}
#[test]
fn sanitize_component_name_handles_edge_cases() {
assert_eq!(sanitize_component_name("Pet"), "Pet");
assert_eq!(
sanitize_component_name("paths./pets[0].schema"),
"paths._pets_0_.schema"
);
assert_eq!(sanitize_component_name("/foo/"), "foo");
assert_eq!(sanitize_component_name("Hello World"), "Hello_World");
assert_eq!(sanitize_component_name("///"), "Schema");
assert_eq!(sanitize_component_name(""), "Schema");
}
#[test]
fn name_context_from_external_ref_uses_last_pointer_segment() {
let fallback = NameContext::new(["fallback"]);
let ctx =
NameContext::from_external_ref("external.json#/components/schemas/Pet", &fallback);
assert_eq!(ctx.derive_name(), "Pet");
}
#[test]
fn name_context_from_external_ref_falls_back_on_empty_fragment() {
let fallback = NameContext::new(["fallback"]);
let ctx = NameContext::from_external_ref("external.json", &fallback);
assert_eq!(ctx.derive_name(), "fallback");
let ctx = NameContext::from_external_ref("external.json#", &fallback);
assert_eq!(ctx.derive_name(), "fallback");
let ctx = NameContext::from_external_ref("external.json#/", &fallback);
assert_eq!(ctx.derive_name(), "fallback");
}
#[test]
fn is_internal_ref_routes_by_leading_hash() {
assert!(is_internal_ref("#/components/schemas/Pet"));
assert!(!is_internal_ref("external.json#/Pet"));
assert!(!is_internal_ref("https://example.com/spec#/Pet"));
}
}