use crate::dataset::refs::Branches;
use lance_core::{Error, Result};
use object_store::path::Path;
pub const BRANCH_DIR: &str = "tree";
#[derive(Debug, Clone, PartialEq, Hash)]
pub struct BranchLocation {
pub path: Path,
pub uri: String,
pub branch: Option<String>,
}
impl BranchLocation {
pub fn find_main(&self) -> Result<Self> {
if let Some(branch_name) = self.branch.as_deref() {
let root_path_str = Self::get_root_path(self.path.as_ref(), branch_name)?;
let root_uri = Self::get_root_path(self.uri.as_str(), branch_name)?;
Ok(Self {
path: Path::parse(root_path_str)?,
uri: root_uri,
branch: None,
})
} else {
Ok(self.clone())
}
}
fn get_root_path(path_str: &str, branch_name: &str) -> Result<String> {
let branch_suffix = format!("{}/{}", BRANCH_DIR, branch_name);
let branch_suffix = branch_suffix.as_str();
let root_path_str = path_str
.strip_suffix(branch_suffix)
.or_else(|| {
if cfg!(windows) {
let windows_suffix = branch_suffix.replace('/', "\\");
path_str.strip_suffix(&windows_suffix)
} else {
None
}
})
.ok_or_else(|| {
Error::invalid_input(format!(
"Can not find the root location of branch {} by uri {}",
branch_name, path_str,
))
})?;
let root_path_str = if root_path_str.ends_with('/') {
root_path_str.trim_end_matches('/').to_string()
} else if cfg!(windows) {
root_path_str.trim_end_matches('\\').to_string()
} else {
return Err(Error::invalid_input(format!(
"Invalid dataset root uri {} for branch {}",
root_path_str, path_str,
)));
};
Ok(root_path_str)
}
pub fn find_branch(&self, branch_name: Option<&str>) -> Result<Self> {
if branch_name == self.branch.as_deref() {
return Ok(self.clone());
}
let root_location = self.find_main()?;
if Branches::is_main_branch(branch_name) {
return Ok(root_location);
}
if let Some(target_branch) = branch_name {
let (new_path, new_uri) = {
if target_branch.is_empty() {
(self.path.clone(), self.uri.clone())
} else {
let segments = target_branch.split('/');
let mut new_path_str = Self::join_str(root_location.path.as_ref(), "tree")?;
let mut new_uri = Self::join_str(root_location.uri.as_str(), "tree")?;
for segment in segments {
new_path_str = Self::join_str(new_path_str.as_str(), segment)?;
new_uri = Self::join_str(new_uri.as_str(), segment)?;
}
(Path::parse(new_path_str)?, new_uri)
}
};
Ok(Self {
path: new_path,
uri: new_uri,
branch: Some(target_branch.to_string()),
})
} else {
Ok(root_location)
}
}
fn join_str(base: &str, segment: &str) -> Result<String> {
let normalized_segment = segment.trim_start_matches('/');
let is_base_dir = base.ends_with("/");
if is_base_dir {
Ok(format!("{}{}", base, normalized_segment))
} else {
Ok(format!("{}/{}", base, normalized_segment))
}
}
}
#[cfg(test)]
mod tests {
use crate::dataset::branch_location::BranchLocation;
use lance_core::utils::tempfile::TempStdDir;
use object_store::path::Path;
use std::fs;
use std::path::PathBuf;
fn create_branch_location(root_path: PathBuf) -> BranchLocation {
let branch_dir = root_path.join("tree/feature/new");
let test_uri = branch_dir.to_str().unwrap().to_string();
BranchLocation {
path: Path::parse(&test_uri).unwrap(),
uri: test_uri,
branch: Some("feature/new".to_string()),
}
}
#[test]
fn test_find_main_from_branch() {
let root_path = TempStdDir::default().to_owned();
let location = create_branch_location(root_path.clone());
let main_location = location.find_main().unwrap();
assert_eq!(
main_location.path.as_ref(),
Path::parse(root_path.to_str().unwrap()).unwrap().as_ref()
);
assert_eq!(main_location.uri, root_path.to_str().unwrap().to_string());
assert_eq!(main_location.branch, None);
assert!(fs::create_dir(std::path::Path::new(main_location.uri.as_str())).is_ok());
}
#[test]
fn test_find_main_from_root() {
let root_path = TempStdDir::default().to_owned();
let mut location = create_branch_location(root_path);
location.branch = None;
let root_location = location.find_main().unwrap();
assert_eq!(root_location.path, location.path);
assert_eq!(root_location.uri, location.uri);
assert_eq!(root_location.branch, None);
assert!(fs::create_dir_all(std::path::Path::new(root_location.uri.as_str())).is_ok());
}
#[test]
fn test_find_branch_from_same_branch() {
let root_path = TempStdDir::default().to_owned();
let location = create_branch_location(root_path);
let target_branch = location.branch.as_deref();
let new_location = location.find_branch(target_branch).unwrap();
assert_eq!(new_location.path, location.path);
assert_eq!(new_location.uri, location.uri);
assert_eq!(new_location.branch, location.branch);
assert!(fs::create_dir_all(std::path::Path::new(new_location.uri.as_str())).is_ok());
}
#[test]
fn test_find_main_branch() {
let root_path = TempStdDir::default().to_owned();
let location = create_branch_location(root_path);
let main_location = location.find_branch(None).unwrap();
let expected_root = location.find_main().unwrap();
assert_eq!(main_location.path, expected_root.path);
assert_eq!(main_location.uri, expected_root.uri);
assert_eq!(main_location.branch, None);
assert!(fs::create_dir_all(std::path::Path::new(main_location.uri.as_str())).is_ok());
}
#[test]
fn test_find_simple_branch() {
let root_path = TempStdDir::default().to_owned();
let location = create_branch_location(root_path);
let new_branch = Some("featureA");
let main_location = location.find_main().unwrap();
let new_location = location.find_branch(new_branch).unwrap();
assert_eq!(
new_location.path.as_ref(),
format!("{}/tree/featureA", main_location.path.as_ref())
);
assert_eq!(
new_location.uri,
format!("{}/tree/featureA", main_location.uri)
);
assert_eq!(new_location.branch.as_deref(), new_branch);
assert!(fs::create_dir_all(std::path::Path::new(new_location.uri.as_str())).is_ok());
}
#[test]
fn test_find_complex_branch() {
let root_path = TempStdDir::default().to_owned();
let location = create_branch_location(root_path);
let new_branch = Some("bugfix/issue-123");
let main_location = location.find_main().unwrap();
let new_location = location.find_branch(new_branch).unwrap();
assert_eq!(
new_location.path.as_ref(),
format!("{}/tree/bugfix/issue-123", main_location.path.as_ref())
);
assert_eq!(
new_location.uri,
format!("{}/tree/bugfix/issue-123", main_location.uri)
);
assert!(fs::create_dir_all(std::path::Path::new(new_location.uri.as_str())).is_ok());
}
#[test]
fn test_find_empty_branch() {
let root_path = TempStdDir::default().to_owned();
let location = create_branch_location(root_path);
let new_branch = Some("");
let new_location = location.find_branch(new_branch).unwrap();
assert_eq!(new_location.path, location.path);
assert_eq!(new_location.uri, location.uri);
assert_eq!(new_location.branch.as_deref(), new_branch);
}
#[test]
#[cfg(windows)]
fn test_branch_location_on_windows() {
let branch_location = BranchLocation {
path: Path::parse("C:\\Users\\Username\\Documents\\dataset\\tree\\feature\\new")
.unwrap(),
uri: "C:\\Users\\Username\\Documents\\dataset\\tree\\feature\\new".to_string(),
branch: Some("feature/new".to_string()),
};
let main_location = branch_location.find_main().unwrap();
assert_eq!(main_location.uri, "C:\\Users\\Username\\Documents\\dataset");
assert_eq!(
main_location.path.as_ref(),
Path::parse("C:\\Users\\Username\\Documents\\dataset")
.unwrap()
.as_ref()
);
assert_eq!(main_location.branch, None);
let new_branch = branch_location
.find_branch(Some("feature/nathan/A"))
.unwrap();
assert_eq!(
new_branch.uri,
"C:\\Users\\Username\\Documents\\dataset/tree/feature/nathan/A"
);
assert_eq!(
new_branch.path.as_ref(),
Path::parse("C:\\Users\\Username\\Documents\\dataset/tree/feature/nathan/A")
.unwrap()
.as_ref()
);
assert_eq!(new_branch.branch.as_deref(), Some("feature/nathan/A"));
}
}