use std::env;
use std::fs;
use std::io;
use std::path::{Path, PathBuf};
use std::process::Command;
use walkdir::WalkDir;
fn main() {
if env::var("DOCS_RS").is_ok() {
return;
}
let root_dir = env::var("CARGO_MANIFEST_DIR").map(PathBuf::from).unwrap();
let tika_native_source_dir = root_dir.join("tika-native");
let python_bind_dir = root_dir.join("../bindings/extractous-python/python/extractous");
let out_dir = env::var("OUT_DIR").map(PathBuf::from).unwrap();
let libs_out_dir = out_dir.join("libs");
let tika_native_dir = out_dir.join("tika-native");
let mut need_build = false;
if is_dir_updated(&tika_native_source_dir, &tika_native_dir) {
println!("Lib tika_native files were updated");
fs_extra::dir::remove(&libs_out_dir).ok();
fs_extra::dir::remove(&tika_native_dir).ok();
need_build = true;
} else {
println!("Lib tika_native files were not updated");
}
match find_already_built_libs(&out_dir) {
Some(libs_dir) => {
if out_dir.join("libs") != libs_dir {
copy_build_artifacts(&libs_dir, vec![&libs_out_dir], false);
}
}
None => { need_build = true; }
}
if need_build {
gradle_build(
&tika_native_source_dir,
&out_dir,
&libs_out_dir,
&python_bind_dir,
);
}
println!("cargo:rustc-link-search={}", libs_out_dir.display());
let lib_tika_name = if cfg!(target_os = "windows") {
"libtika_native"
} else {
"tika_native"
};
println!("cargo:rustc-link-lib=dylib={}", lib_tika_name);
}
fn find_already_built_libs(out_dir: &Path) -> Option<PathBuf> {
if let Some(parent_dir) = out_dir.parent().and_then(|p| p.parent()) {
if let Ok(entries) = fs::read_dir(parent_dir) {
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
if let Some(dir_name) = path.file_name().and_then(|name| name.to_str()) {
if dir_name.starts_with("extractous-") {
let libs_dir = path.join("out").join("libs");
let tika_native_dir = path.join("out").join("tika-native");
if libs_dir.is_dir() && tika_native_dir.is_dir() {
return Some(libs_dir);
}
}
}
}
}
}
}
None
}
fn is_dir_updated(src: &Path, dest: &Path) -> bool {
for entry in WalkDir::new(src).into_iter().filter_map(|e| e.ok()) {
if entry.file_type().is_file() {
let src_file = entry.path();
let relative_path = src_file.strip_prefix(src).unwrap();
let dest_file = dest.join(relative_path);
if !dest_file.exists() {
return true;
}
let src_modified = match fs::metadata(src_file).and_then(|meta| meta.modified()) {
Ok(time) => time,
Err(_) => continue, };
let dest_modified = match fs::metadata(&dest_file).and_then(|meta| meta.modified()) {
Ok(time) => time,
Err(_) => return true, };
if src_modified > dest_modified {
return true;
}
}
}
false
}
fn gradle_build(
tika_native_source_dir: &PathBuf,
out_dir: &PathBuf,
libs_out_dir: &PathBuf,
python_bind_dir: &PathBuf,
) {
let jdk_install_dir = out_dir.join("graalvm-jdk"); let tika_native_dir = out_dir.join("tika-native");
let graalvm_home = get_graalvm_home(&jdk_install_dir);
println!("Using GraalVM JDK found at {}", graalvm_home.display());
println!("Building tika_native libs this might take a while ... Please be patient!!");
if is_dir_updated(&tika_native_source_dir, &tika_native_dir) {
println!("Lib tika_native files were updated");
fs_extra::dir::remove(&tika_native_dir).ok();
}
if !tika_native_dir.is_dir() {
fs_extra::dir::copy(
tika_native_source_dir,
out_dir,
&fs_extra::dir::CopyOptions::new(),
)
.expect("Failed to copy tika-native source to OUT_DIR");
}
let gradlew = if cfg!(target_os = "windows") {
tika_native_dir.join("gradlew.bat")
} else {
tika_native_dir.join("gradlew")
};
Command::new(gradlew)
.current_dir(&tika_native_dir)
.arg("--no-daemon")
.arg("nativeCompile")
.env("JAVA_HOME", graalvm_home)
.status()
.expect("Failed to build tika-native");
let mut copy_to_dirs = vec![libs_out_dir];
if python_bind_dir.is_dir() {
copy_to_dirs.push(python_bind_dir);
};
let build_path = tika_native_dir.join("build/native/nativeCompile");
copy_build_artifacts(&build_path, copy_to_dirs, true);
println!("Successfully built libs 🚀");
}
pub fn copy_build_artifacts(from_path: &PathBuf, copy_to_dirs: Vec<&PathBuf>, clean: bool) {
let mut options = fs_extra::dir::CopyOptions::new();
options.overwrite = true;
options.content_only = true;
for dir in copy_to_dirs.iter() {
fs_extra::dir::copy(from_path, dir, &options)
.expect("Failed to copy build artifacts to OUTPUT_DIR");
if clean {
fs::remove_file(dir.join("graal_isolate_dynamic.h")).unwrap();
fs::remove_file(dir.join("graal_isolate.h")).unwrap();
fs::remove_file(dir.join("libtika_native_dynamic.h")).unwrap();
fs::remove_file(dir.join("libtika_native.h")).unwrap();
}
}
}
pub fn get_graalvm_home(install_dir: &PathBuf) -> PathBuf {
let graalvm_home_env = env::var("GRAALVM_HOME");
match graalvm_home_env {
Ok(graalvm_home_val) => {
let graalvm_home = PathBuf::from(graalvm_home_val);
check_graalvm(&graalvm_home, true);
graalvm_home
}
Err(_) => {
let java_home_env = env::var("JAVA_HOME");
match java_home_env {
Ok(java_home_val) => {
let mut graalvm_home = PathBuf::from(java_home_val);
if !check_graalvm(&graalvm_home, false) {
graalvm_home = install_graalvm_ce(install_dir);
check_graalvm(&graalvm_home, true);
}
graalvm_home
}
Err(_) => {
let graalvm_home = install_graalvm_ce(install_dir);
check_graalvm(&graalvm_home, true);
graalvm_home
}
}
}
}
}
pub fn check_graalvm(graalvm_home: &Path, panic: bool) -> bool {
let native_image_exe = if cfg!(target_os = "windows") {
"native-image.cmd"
} else {
"native-image"
};
let native_image = graalvm_home.join("bin").join(native_image_exe);
let exists = native_image.exists();
if panic && !exists {
panic!(
"Your GraalVM JDK installation is pointing to: {}. Please make sure \
it is a valid GraalVM JDK. {}",
graalvm_home.display(),
graalvm_install_help_msg()
);
}
exists
}
fn graalvm_install_help_msg() -> String {
let sdkman_graalvm_version = if cfg!(target_os = "macos") {
"24.1.1.r23-nik" } else {
"23.0.1-graalce"
};
format!(
"\nWe recommend using sdkman to install and \
manage different JDKs. See https://sdkman.io/usage for more information.\n\
You can install graalvm using:\n \
sdk install java {} \n \
sdk use java {}",
sdkman_graalvm_version, sdkman_graalvm_version
)
}
pub fn install_graalvm_ce(install_dir: &PathBuf) -> PathBuf {
let (base_url, archive_ext, main_dir) = if cfg!(target_os = "windows") {
let url = if cfg!(target_arch = "x86_64") {
"https://github.com/graalvm/graalvm-ce-builds/releases/download/jdk-23.0.1/graalvm-community-jdk-23.0.1_windows-x64_bin.zip"
} else {
panic!("Unsupported windows architecture");
};
(url, "zip", "graalvm-community-openjdk-23.0.1+11.1")
} else if cfg!(target_os = "macos") {
let (url, dir) = if cfg!(target_arch = "x86_64") {
("https://github.com/bell-sw/LibericaNIK/releases/download/24.1.1+1-23.0.1+13/bellsoft-liberica-vm-full-openjdk23.0.1+13-24.1.1+1-macos-amd64.tar.gz",
"bellsoft-liberica-vm-full-openjdk23-24.1.1/Contents/Home")
} else if cfg!(target_arch = "aarch64") {
("https://github.com/bell-sw/LibericaNIK/releases/download/24.1.1+1-23.0.1+13/bellsoft-liberica-vm-openjdk23.0.1+13-24.1.1+1-macos-aarch64.tar.gz",
"bellsoft-liberica-vm-openjdk23-24.1.1/Contents/Home")
} else {
panic!("Unsupported macos architecture ");
};
(url, "tar.gz", dir)
} else {
let url = if cfg!(target_arch = "x86_64") {
"https://github.com/graalvm/graalvm-ce-builds/releases/download/jdk-23.0.1/graalvm-community-jdk-23.0.1_linux-x64_bin.tar.gz"
} else if cfg!(target_arch = "aarch64") {
"https://github.com/graalvm/graalvm-ce-builds/releases/download/jdk-23.0.1/graalvm-community-jdk-23.0.1_linux-aarch64_bin.tar.gz"
} else {
panic!("Unsupported linux architecture");
};
(url, "tar.gz", "graalvm-community-openjdk-23.0.1+11.1")
};
let graalvm_home = install_dir.join(main_dir);
if !graalvm_home.exists() {
fs::create_dir_all(install_dir).unwrap();
let archive_path = install_dir
.join("graalvm-ce-archive")
.with_extension(archive_ext);
if !archive_path.exists() {
let client = reqwest::blocking::Client::builder()
.timeout(std::time::Duration::from_secs(60 * 5)) .build()
.unwrap();
let response = client.get(base_url).send().unwrap();
let mut buffer: Vec<u8> = vec![];
io::copy(
&mut response
.bytes()
.unwrap_or_else(|_| panic!("Failed to download GraalVM JDK from {}", base_url))
.as_ref(),
&mut buffer,
)
.unwrap();
fs::write(&archive_path, &buffer).expect("Failed to write archive file");
}
if archive_path.exists() {
println!("Extracting GraalVM JDK archive {}", archive_path.display());
if cfg!(target_os = "windows") {
let archive_file = fs::File::open(&archive_path).unwrap();
let mut archive =
zip::ZipArchive::new(std::io::BufReader::new(archive_file)).unwrap();
for i in 0..archive.len() {
let mut file = archive.by_index(i).unwrap();
let outpath = install_dir.join(file.name());
if file.is_dir() {
fs::create_dir_all(&outpath).unwrap();
} else {
if let Some(parent) = outpath.parent() {
if !parent.exists() {
fs::create_dir_all(parent).unwrap();
}
}
let mut outfile = fs::File::create(&outpath).unwrap();
io::copy(&mut file, &mut outfile).unwrap();
}
}
} else {
let tar_gz_file = fs::File::open(&archive_path).unwrap();
let tar = flate2::read::GzDecoder::new(tar_gz_file);
let mut archive = tar::Archive::new(tar);
archive.unpack(install_dir).unwrap();
}
} else {
panic!("Failed to download GraalVM JDK from {}", base_url);
}
}
install_dir.join(main_dir)
}