import gzip
import json
import os
import shutil
import stat
import sys
import tarfile
import tempfile
from pathlib import Path
SCRIPT_DIR = Path(__file__).resolve().parent
DUCKDB_SCRIPTS_DIR = SCRIPT_DIR / "duckdb-sources" / "scripts"
TARGET_DIR = SCRIPT_DIR / "duckdb"
ARCHIVE_PATH = SCRIPT_DIR / "duckdb.tar.gz"
PACKAGE_BUILD_LOADER_PATH = (
SCRIPT_DIR / "duckdb-sources" / "generated_extension_loader_package_build.cpp"
)
SRC_DIR = SCRIPT_DIR / "src"
EXTENSIONS = ["core_functions", "parquet", "json"]
try:
shutil.rmtree(TARGET_DIR)
except FileNotFoundError:
pass
TARGET_DIR.mkdir()
sys.path.append(str(DUCKDB_SCRIPTS_DIR))
import package_build
def get_sources(extensions, default_linked_extensions=None):
kwargs = {}
if default_linked_extensions is not None:
kwargs["default_linked_extensions"] = default_linked_extensions
(source_list, include_list, _) = package_build.build_package(
str(TARGET_DIR), extensions, False, **kwargs
)
PACKAGE_BUILD_LOADER_PATH.unlink(missing_ok=True)
script_dir_prefix = f"{SCRIPT_DIR}{os.path.sep}"
source_list = [
x[len(script_dir_prefix) :] if x.startswith(script_dir_prefix) else x
for x in source_list
]
return set(source_list), set(include_list)
base_source_list, base_include_list = get_sources([])
extension_sources = {}
for e in EXTENSIONS:
source_list, include_list = get_sources([e])
extension_sources[e] = {
"cpp_files": sorted(source_list - base_source_list),
"include_dirs": sorted(include_list - base_include_list),
}
get_sources(EXTENSIONS, default_linked_extensions=[])
manifest = {
"base": {
"cpp_files": sorted(base_source_list),
"include_dirs": sorted(base_include_list),
},
"extensions": extension_sources,
}
with (TARGET_DIR / "manifest.json").open("w") as f:
json.dump(manifest, f, indent=2, sort_keys=True)
f.write("\n")
def iter_archive_paths(root):
yield root
for dirpath, dirnames, filenames in os.walk(root):
dirpath = Path(dirpath)
dirnames.sort()
filenames.sort()
for dirname in dirnames:
yield dirpath / dirname
for filename in filenames:
yield dirpath / filename
def normalized_tarinfo(path):
archive_name = path.relative_to(SCRIPT_DIR).as_posix()
tarinfo = tarfile.TarInfo(archive_name)
tarinfo.uid = 0
tarinfo.gid = 0
tarinfo.uname = ""
tarinfo.gname = ""
tarinfo.mtime = 0
tarinfo.pax_headers = {}
if path.is_symlink():
tarinfo.type = tarfile.SYMTYPE
tarinfo.mode = 0o777
tarinfo.linkname = str(path.readlink())
elif path.is_dir():
tarinfo.type = tarfile.DIRTYPE
tarinfo.mode = 0o755
elif path.is_file():
tarinfo.type = tarfile.REGTYPE
tarinfo.mode = 0o644
tarinfo.size = path.stat().st_size
else:
filemode = stat.filemode(path.lstat().st_mode)
raise RuntimeError(f"unsupported archive entry type: {path} ({filemode})")
return tarinfo
def write_archive_to(archive_path):
with archive_path.open("wb") as archive_file:
with gzip.GzipFile(
filename="", mode="wb", fileobj=archive_file, mtime=0
) as gzip_file:
with tarfile.open(
fileobj=gzip_file, mode="w", format=tarfile.PAX_FORMAT
) as tar:
for path in iter_archive_paths(TARGET_DIR):
tarinfo = normalized_tarinfo(path)
if tarinfo.isfile():
with path.open("rb") as fileobj:
tar.addfile(tarinfo, fileobj)
else:
tar.addfile(tarinfo)
def replace_archive_if_changed():
temp_fd, temp_archive_name = tempfile.mkstemp(
dir=SCRIPT_DIR, prefix=f".{ARCHIVE_PATH.name}."
)
os.close(temp_fd)
temp_archive_path = Path(temp_archive_name)
try:
write_archive_to(temp_archive_path)
if (
ARCHIVE_PATH.exists()
and temp_archive_path.read_bytes() == ARCHIVE_PATH.read_bytes()
):
temp_archive_path.unlink()
return
temp_archive_path.replace(ARCHIVE_PATH)
except Exception:
temp_archive_path.unlink(missing_ok=True)
raise
replace_archive_if_changed()