use std::env;
use std::path::{Path, PathBuf};
use std::process::Command;
const DEFAULT_ZVEC_GIT_REF: &str = "v0.5.0";
fn zvec_git_ref() -> String {
env::var("ZVEC_GIT_REF").unwrap_or_else(|_| DEFAULT_ZVEC_GIT_REF.to_string())
}
fn ensure_zvec_source(manifest_dir: &Path, out_dir: &Path) -> PathBuf {
let git_ref = zvec_git_ref();
let dev_vendor = manifest_dir.join("vendor/zvec");
if dev_vendor.join("CMakeLists.txt").exists() {
println!("cargo:warning=zvec source already present (dev mode)");
return dev_vendor;
}
let published_vendor = out_dir.join("vendor/zvec");
if published_vendor.join("CMakeLists.txt").exists() {
println!("cargo:warning=zvec source already present (OUT_DIR)");
return published_vendor;
}
println!(
"cargo:warning=Cloning zvec {} (this may take a few minutes)...",
git_ref
);
let _ = std::fs::create_dir_all(published_vendor.parent().unwrap());
let status = Command::new("git")
.args([
"clone",
"--depth",
"1",
"--branch",
&git_ref,
"--recursive",
"https://github.com/alibaba/zvec.git",
published_vendor.to_str().unwrap(),
])
.status()
.expect("Failed to execute git clone. Please ensure git is installed.");
if !status.success() {
panic!("git clone failed. Please check your network connection and that git is installed.");
}
published_vendor
}
fn patch_zvec_source(zvec_src: &Path) {
patch_cstdint_includes(zvec_src);
}
fn patch_cstdint_includes(zvec_src: &Path) {
patch_include(
&zvec_src.join("thirdparty/rocksdb/rocksdb-8.1.1/db/blob/blob_file_meta.h"),
"#include <cassert>\n",
"#include <cassert>\n#include <cstdint>\n",
);
patch_include(
&zvec_src.join("thirdparty/rocksdb/rocksdb-8.1.1/include/rocksdb/trace_record_result.h"),
"#include <string>\n",
"#include <cstdint>\n#include <string>\n",
);
patch_include(
&zvec_src.join("thirdparty/rocksdb/rocksdb-8.1.1/include/rocksdb/trace_record.h"),
"#include <memory>\n",
"#include <cstdint>\n#include <memory>\n",
);
patch_include(
&zvec_src.join("thirdparty/rocksdb/rocksdb-8.1.1/include/rocksdb/utilities/checkpoint.h"),
"#include <string>\n",
"#include <cstdint>\n#include <string>\n",
);
patch_include(
&zvec_src.join("src/db/index/storage/wal/wal_file.h"),
"#include <vector>\n",
"#include <vector>\n#include <cstdint>\n",
);
}
fn patch_include(path: &Path, old: &str, new: &str) {
let Ok(contents) = std::fs::read_to_string(path) else {
return;
};
if contents.contains("#include <cstdint>") || !contents.contains(old) {
return;
}
let patched = contents.replace(old, new);
std::fs::write(path, patched)
.unwrap_or_else(|err| panic!("failed to patch {}: {err}", path.display()));
}
fn main() {
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set"));
println!("cargo:rerun-if-env-changed=ZVEC_GIT_REF");
println!("cargo:rerun-if-env-changed=ZVEC_BUILD_TYPE");
println!("cargo:rerun-if-env-changed=ZVEC_BUILD_PARALLEL");
println!("cargo:rerun-if-env-changed=ZVEC_CPU_ARCH");
println!("cargo:rerun-if-env-changed=ZVEC_OPENMP");
let zvec_src = ensure_zvec_source(&manifest_dir, &out_dir);
patch_zvec_source(&zvec_src);
let zvec_build = zvec_src.join("build");
let zvec_lib = zvec_build.join("lib");
let build_type = env::var("ZVEC_BUILD_TYPE").unwrap_or_else(|_| "Release".to_string());
let parallel_jobs = env::var("ZVEC_BUILD_PARALLEL")
.map(|s| s.parse::<usize>().unwrap_or_else(|_| num_cpus()))
.unwrap_or_else(|_| num_cpus());
let c_api_overlay_dir = manifest_dir.join("c-api-static");
let c_api_build = out_dir.join("c-api-static-build");
let groupby_shim_dir = manifest_dir.join("groupby-shim");
let groupby_shim_build = out_dir.join("groupby-shim-build");
let zvec_built = zvec_lib.join("libzvec.a");
if !zvec_built.exists() {
println!("cargo:warning=Building zvec C++ library...");
build_zvec(&zvec_src, &zvec_build, &build_type, parallel_jobs);
} else {
println!("cargo:warning=zvec C++ library already built");
}
let c_api_built = c_api_build.join("libzvec_c_api_static.a");
if !c_api_built.exists() {
println!("cargo:warning=Building zvec_c_api_static (compiling upstream c_api.cc)...");
build_c_api_static(
&c_api_overlay_dir,
&c_api_build,
&zvec_src,
&zvec_build,
&build_type,
parallel_jobs,
);
} else {
println!("cargo:warning=zvec_c_api_static already built");
}
let groupby_built = groupby_shim_build.join("libzvec_groupby_shim.a");
if !groupby_built.exists() {
println!("cargo:warning=Building zvec_groupby_shim...");
build_groupby_shim(
&groupby_shim_dir,
&groupby_shim_build,
&zvec_src,
&build_type,
parallel_jobs,
);
} else {
println!("cargo:warning=zvec_groupby_shim already built");
}
generate_bindings(&zvec_src, &zvec_build, &groupby_shim_dir);
link_libraries(&zvec_lib, &c_api_build, &groupby_shim_build);
}
fn build_zvec(_src: &Path, build: &Path, build_type: &str, parallel_jobs: usize) {
let _ = std::fs::create_dir_all(build);
let mut cmake_args = vec![
format!("-DCMAKE_BUILD_TYPE={}", build_type),
"-DCMAKE_POLICY_VERSION_MINIMUM=3.5".to_string(),
"-DBUILD_PYTHON_BINDINGS=OFF".to_string(),
"-DBUILD_TOOLS=OFF".to_string(),
"-DBUILD_C_BINDINGS=ON".to_string(),
];
if let Ok(arch) = env::var("ZVEC_CPU_ARCH") {
cmake_args.push(format!("-DENABLE_{}=ON", arch));
}
if env::var("ZVEC_OPENMP")
.map(|v| v == "ON" || v == "1")
.unwrap_or(false)
{
cmake_args.push("-DENABLE_OPENMP=ON".to_string());
}
cmake_args.push(format!("-S{}", _src.display()));
cmake_args.push(format!("-B{}", build.display()));
run(
Command::new("cmake").args(&cmake_args),
"cmake configure for zvec",
);
run(
Command::new("cmake").args([
"--build",
build.to_str().expect("Invalid build path"),
"-j",
parallel_jobs.to_string().as_str(),
]),
"build zvec",
);
}
fn build_c_api_static(
overlay_dir: &Path,
build: &Path,
zvec_src: &Path,
zvec_build: &Path,
build_type: &str,
parallel_jobs: usize,
) {
let _ = std::fs::create_dir_all(build);
run(
Command::new("cmake").args([
format!("-S{}", overlay_dir.display()).as_str(),
format!("-B{}", build.display()).as_str(),
format!("-DZVEC_SRC={}", zvec_src.display()).as_str(),
format!("-DZVEC_BUILD={}", zvec_build.display()).as_str(),
format!("-DCMAKE_BUILD_TYPE={}", build_type).as_str(),
"-DCMAKE_POLICY_VERSION_MINIMUM=3.5",
]),
"cmake configure for zvec_c_api_static",
);
run(
Command::new("cmake").args([
"--build",
build.to_str().expect("Invalid c-api-static build path"),
"-j",
parallel_jobs.to_string().as_str(),
]),
"build zvec_c_api_static",
);
}
fn build_groupby_shim(
shim_dir: &Path,
build: &Path,
zvec_src: &Path,
build_type: &str,
parallel_jobs: usize,
) {
let _ = std::fs::create_dir_all(build);
run(
Command::new("cmake").args([
format!("-S{}", shim_dir.display()).as_str(),
format!("-B{}", build.display()).as_str(),
format!("-DZVEC_SRC={}", zvec_src.display()).as_str(),
format!("-DCMAKE_BUILD_TYPE={}", build_type).as_str(),
"-DCMAKE_POLICY_VERSION_MINIMUM=3.5",
]),
"cmake configure for zvec_groupby_shim",
);
run(
Command::new("cmake").args([
"--build",
build.to_str().expect("Invalid groupby-shim build path"),
"-j",
parallel_jobs.to_string().as_str(),
]),
"build zvec_groupby_shim",
);
}
fn generate_bindings(zvec_src: &Path, zvec_build: &Path, groupby_shim_dir: &Path) {
let c_api_header = zvec_build.join("src/generated/zvec/c_api.h");
let groupby_header = groupby_shim_dir.join("include/zvec_groupby_shim.h");
let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
if !c_api_header.exists() {
panic!(
"Configured c_api.h not found at {}. Run cmake configure for zvec first.",
c_api_header.display()
);
}
if !groupby_header.exists() {
panic!(
"Group-by shim header not found at {}.",
groupby_header.display()
);
}
let wrapper_header = out_path.join("wrapper.h");
std::fs::write(
&wrapper_header,
format!(
"#include \"{}\"\n#include \"{}\"\n",
c_api_header.display(),
groupby_header.display()
),
)
.expect("Failed to write wrapper.h");
let mut builder = bindgen::Builder::default()
.header(
wrapper_header
.to_str()
.expect("Invalid wrapper header path"),
)
.parse_callbacks(Box::new(bindgen::CargoCallbacks::new()))
.generate_comments(true)
.allowlist_function("zvec_.*")
.allowlist_function("zvecgb_.*")
.allowlist_type("zvec_.*")
.allowlist_type("zvecgb_.*")
.allowlist_var("ZVEC_.*")
.blocklist_type("__.*")
.default_macro_constant_type(bindgen::MacroTypeVariation::Signed)
.clang_arg(format!("-I{}", zvec_build.join("src/generated").display()))
.clang_arg(format!("-I{}", zvec_src.join("src/include").display()))
.clang_arg(format!("-I{}", groupby_shim_dir.join("include").display()));
for path in ["/usr/include", "/usr/local/include"] {
if Path::new(path).exists() {
builder = builder.clang_arg(format!("-I{}", path));
}
}
if let Ok(gcc_includes) = discover_gcc_include_dirs() {
for path in gcc_includes {
if Path::new(&path).exists() {
builder = builder.clang_arg(format!("-I{}", path));
}
}
}
let bindings = builder.generate().expect("Unable to generate bindings");
bindings
.write_to_file(out_path.join("bindings.rs"))
.expect("Couldn't write bindings!");
}
fn link_libraries(zvec_lib: &Path, c_api_build: &Path, groupby_shim_build: &Path) {
println!("cargo:rustc-link-search=native={}", c_api_build.display());
println!("cargo:rustc-link-lib=static:+whole-archive=zvec_c_api_static");
println!(
"cargo:rustc-link-search=native={}",
groupby_shim_build.display()
);
println!("cargo:rustc-link-lib=static:+whole-archive=zvec_groupby_shim");
println!("cargo:rustc-link-search=native={}", zvec_lib.display());
let external_lib = zvec_lib.parent().unwrap().join("external/usr/local/lib");
println!("cargo:rustc-link-search=native={}", external_lib.display());
let arrow_build = zvec_lib
.parent()
.unwrap()
.join("thirdparty/arrow/arrow/src/ARROW.BUILD-build");
println!(
"cargo:rustc-link-search=native={}",
arrow_build.join("lib").display()
);
println!(
"cargo:rustc-link-search=native={}",
arrow_build.join("release").display()
);
println!(
"cargo:rustc-link-search=native={}",
arrow_build.join("re2_ep-install/lib").display()
);
println!(
"cargo:rustc-link-search=native={}",
arrow_build.join("utf8proc_ep-install/lib").display()
);
println!(
"cargo:rustc-link-search=native={}",
arrow_build
.join("zlib_ep/src/zlib_ep-install/lib")
.display()
);
let boost_build = arrow_build.join("_deps/boost-build/libs");
println!(
"cargo:rustc-link-search=native={}",
boost_build.join("atomic").display()
);
println!(
"cargo:rustc-link-search=native={}",
boost_build.join("charconv").display()
);
println!(
"cargo:rustc-link-search=native={}",
boost_build.join("chrono").display()
);
println!(
"cargo:rustc-link-search=native={}",
boost_build.join("container").display()
);
println!(
"cargo:rustc-link-search=native={}",
boost_build.join("date_time").display()
);
println!(
"cargo:rustc-link-search=native={}",
boost_build.join("locale").display()
);
println!(
"cargo:rustc-link-search=native={}",
boost_build.join("thread").display()
);
let lz4_build = zvec_lib
.parent()
.unwrap()
.join("thirdparty/lz4/lz4/src/Lz4.BUILD/lib");
println!("cargo:rustc-link-search=native={}", lz4_build.display());
let whole_archive_libs = ["zvec", "zvec_core", "zvec_ailego", "zvec_turbo"];
for lib in &whole_archive_libs {
println!("cargo:rustc-link-lib=static:+whole-archive={}", lib);
}
let thirdparty_libs = [
"parquet",
"arrow_acero",
"arrow_dataset",
"arrow_compute",
"arrow",
"arrow_bundled_dependencies",
"roaring",
"rocksdb",
"lz4",
"protobuf",
"protoc",
"boost_thread",
"boost_atomic",
"boost_chrono",
"boost_container",
"boost_date_time",
"boost_locale",
"boost_charconv",
"glog",
"gflags_nothreads",
"antlr4-runtime",
"FastPFOR",
];
for lib in &thirdparty_libs {
println!("cargo:rustc-link-lib=static:+whole-archive={}", lib);
}
let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
if target_os == "linux" {
println!("cargo:rustc-link-lib=icui18n");
println!("cargo:rustc-link-lib=icuuc");
println!("cargo:rustc-link-lib=icudata");
}
if target_os == "macos" {
println!("cargo:rustc-link-lib=c++");
} else {
println!("cargo:rustc-link-lib=stdc++");
}
println!("cargo:rustc-link-lib=pthread");
println!("cargo:rustc-link-lib=dl");
println!("cargo:rustc-link-lib=m");
}
fn run(cmd: &mut Command, context: &str) {
println!("cargo:warning=Running: {:?}", cmd);
let status = cmd.status().unwrap_or_else(|_| {
panic!("Failed to execute command: {}", context);
});
if !status.success() {
panic!("Command failed ({}): {:?}", context, cmd);
}
}
fn num_cpus() -> usize {
std::thread::available_parallelism()
.map(|p| p.get())
.unwrap_or(4)
}
fn discover_gcc_include_dirs() -> std::io::Result<Vec<String>> {
let output = Command::new("gcc")
.args(["-E", "-Wp,-v", "-"])
.stdin(std::process::Stdio::null())
.output()?;
let stderr = String::from_utf8_lossy(&output.stderr);
let mut dirs = Vec::new();
let mut in_search_list = false;
for line in stderr.lines() {
if line.contains("#include <...> search starts here:") {
in_search_list = true;
continue;
}
if line.contains("End of search list.") {
break;
}
if in_search_list {
let trimmed = line.trim();
if !trimmed.is_empty() {
dirs.push(trimmed.to_string());
}
}
}
Ok(dirs)
}