stringzilla 3.1.0

Faster SIMD-accelerated string search, sorting, fingerprints, and edit distances
Documentation
import os
import sys
import platform
from setuptools import setup, Extension
from typing import List, Tuple
import sysconfig
import glob


def get_compiler() -> str:
    if platform.python_implementation() == "CPython":
        compiler = platform.python_compiler().lower()
        return "gcc" if "gcc" in compiler else "llvm" if "clang" in compiler else ""
    return ""


def is_64bit_x86() -> bool:
    arch = platform.machine()
    return arch == "x86_64" or arch == "i386"


def is_64bit_arm() -> bool:
    arch = platform.machine()
    return arch.startswith("arm")


def is_big_endian() -> bool:
    return sys.byteorder == "big"


def linux_settings() -> Tuple[List[str], List[str], List[Tuple[str]]]:
    compile_args = [
        "-std=c99",  # use the C 99 language dialect
        "-pedantic",  # stick close to the C language standard, avoid compiler extensions
        "-O3",  # maximum optimization level
        "-fdiagnostics-color=always",  # color console output
        "-Wno-unknown-pragmas",  # like: `pragma region` and some unrolls
        "-Wno-unused-function",  # like: ... declared ‘static’ but never defined
        "-Wno-incompatible-pointer-types",  # like: passing argument 4 of ‘sz_export_prefix_u32’ from incompatible pointer type
        "-Wno-discarded-qualifiers",  # like: passing argument 1 of ‘free’ discards ‘const’ qualifier from pointer target type
        "-fPIC",  # to enable dynamic dispatch
    ]
    link_args = [
        "-fPIC",  # to enable dynamic dispatch
    ]

    # GCC is our primary compiler, so when packaging the library, even if the current machine
    # doesn't support AVX-512 or SVE, still precompile those.
    macros_args = [
        ("SZ_USE_X86_AVX512", "1" if is_64bit_x86() else "0"),
        ("SZ_USE_X86_AVX2", "1" if is_64bit_x86() else "0"),
        ("SZ_USE_ARM_SVE", "1" if is_64bit_arm() else "0"),
        ("SZ_USE_ARM_NEON", "1" if is_64bit_arm() else "0"),
        ("SZ_DETECT_BIG_ENDIAN", "1" if is_big_endian() else "0"),
    ]

    if is_64bit_arm():
        compile_args.append("-march=armv8-a+simd")

    return compile_args, link_args, macros_args


def darwin_settings() -> Tuple[List[str], List[str], List[Tuple[str]]]:
    compile_args = [
        "-std=c99",  # use the C 99 language dialect
        "-pedantic",  # stick close to the C language standard, avoid compiler extensions
        "-O3",  # maximum optimization level
        "-fcolor-diagnostics",  # color console output
        "-Wno-unknown-pragmas",  # like: `pragma region` and some unrolls
        "-Wno-incompatible-function-pointer-types",
        "-Wno-incompatible-pointer-types",  # like: passing argument 4 of ‘sz_export_prefix_u32’ from incompatible pointer type
        "-Wno-discarded-qualifiers",  # like: passing argument 1 of ‘free’ discards ‘const’ qualifier from pointer target type
        "-fPIC",  # to enable dynamic dispatch
    ]
    link_args = [
        "-fPIC",  # to enable dynamic dispatch
    ]

    # Apple Clang doesn't support the `-march=native` argument,
    # so we must pre-set the CPU generation. Technically the last Intel-based Apple
    # product was the 2021 MacBook Pro, which had the "Coffee Lake" architecture.
    # During Universal builds, however, even AVX header cause compilation errors.
    can_use_avx2 = is_64bit_x86() and sysconfig.get_platform().startswith("universal")
    macros_args = [
        ("SZ_USE_X86_AVX512", "0"),
        ("SZ_USE_X86_AVX2", "1" if can_use_avx2 else "0"),
        ("SZ_USE_ARM_SVE", "0"),
        ("SZ_USE_ARM_NEON", "1" if is_64bit_arm() else "0"),
    ]

    return compile_args, link_args, macros_args


def windows_settings() -> Tuple[List[str], List[str], List[Tuple[str]]]:
    compile_args = [
        "/std:c99",  # use the C 99 language dialect
        "/Wall",  # stick close to the C language standard, avoid compiler extensions
        "/O2",  # maximum optimization level
    ]

    # Detect supported architectures for MSVC.
    macros_args = []
    if "AVX512" in platform.processor():
        macros_args.append(("SZ_USE_X86_AVX512", "1"))
        compile_args.append("/arch:AVX512")
    if "AVX2" in platform.processor():
        macros_args.append(("SZ_USE_X86_AVX2", "1"))
        compile_args.append("/arch:AVX2")

    link_args = []
    return compile_args, link_args, macros_args


if sys.platform == "linux":
    compile_args, link_args, macros_args = linux_settings()

elif sys.platform == "darwin":
    compile_args, link_args, macros_args = darwin_settings()

elif sys.platform == "win32":
    compile_args, link_args, macros_args = windows_settings()


ext_modules = [
    Extension(
        "stringzilla",
        ["python/lib.c"] + glob.glob("c/*.c"),
        # In the past I've used `np.get_include()` to include NumPy headers,
        # but it's not necessary for this library.
        include_dirs=["include"],
        extra_compile_args=compile_args,
        extra_link_args=link_args,
        define_macros=[("SZ_DYNAMIC_DISPATCH", "1")] + macros_args,
    ),
]

__version__ = open("VERSION", "r").read().strip()
__lib_name__ = "stringzilla"


this_directory = os.path.abspath(os.path.dirname(__file__))
with open(os.path.join(this_directory, "README.md"), "r", encoding="utf-8") as f:
    long_description = f.read()


setup(
    name=__lib_name__,
    version=__version__,
    author="Ash Vardanian",
    description="SIMD-accelerated string search, sort, hashes, fingerprints, & edit distances",
    long_description=long_description,
    long_description_content_type="text/markdown",
    license="Apache-2.0",
    classifiers=[
        "Development Status :: 5 - Production/Stable",
        "Natural Language :: English",
        "Intended Audience :: Developers",
        "Intended Audience :: Information Technology",
        "License :: OSI Approved :: Apache Software License",
        "Programming Language :: C++",
        "Programming Language :: Python :: 3 :: Only",
        "Programming Language :: Python :: Implementation :: CPython",
        "Operating System :: MacOS",
        "Operating System :: Unix",
        "Operating System :: Microsoft :: Windows",
        "Topic :: File Formats",
        "Topic :: Internet :: Log Analysis",
        "Topic :: Scientific/Engineering :: Information Analysis",
        "Topic :: System :: Logging",
        "Topic :: Text Processing :: General",
        "Topic :: Text Processing :: Indexing",
    ],
    include_dirs=[],
    setup_requires=[],
    ext_modules=ext_modules,
)