zlob 1.3.3

SIMD optimized glob pattern matching library faster than glob crate
Documentation
///! Path sorting utilities for zlob
///!
///! Provides POSIX-compliant sorting of glob results.
///! Per POSIX, when GLOB_NOSORT is not set, pathnames shall be sorted as if
///! by a call to strcoll(). In the C/POSIX locale this is equivalent to strcmp()
///! (byte-by-byte unsigned comparison), which is what we implement here.
const std = @import("std");

fn simdStrCmp(a: []const u8, b: []const u8) std.math.Order {
    const min_len = @min(a.len, b.len);
    const vec_len = std.simd.suggestVectorLength(u8) orelse 16;

    if (min_len >= vec_len) {
        const Vec = @Vector(vec_len, u8);
        const MaskInt = std.meta.Int(.unsigned, vec_len);
        const all_ones: MaskInt = @as(MaskInt, 0) -% 1;

        var i: usize = 0;
        while (i + vec_len <= min_len) : (i += vec_len) {
            const a_vec: Vec = a[i..][0..vec_len].*;
            const b_vec: Vec = b[i..][0..vec_len].*;
            const eq = a_vec == b_vec;
            const mask = @as(MaskInt, @bitCast(eq));

            if (mask != all_ones) {
                const first_diff = @ctz(~mask);
                const a_byte = a[i + first_diff];
                const b_byte = b[i + first_diff];
                if (a_byte < b_byte) return .lt;
                return .gt;
            }
        }
        for (a[i..min_len], b[i..min_len]) |ab, bb| {
            if (ab < bb) return .lt;
            if (ab > bb) return .gt;
        }
    } else {
        for (a[0..min_len], b[0..min_len]) |ab, bb| {
            if (ab < bb) return .lt;
            if (ab > bb) return .gt;
        }
    }

    return std.math.order(a.len, b.len);
}

/// Sort context for in-place pdqsort of parallel paths + lengths arrays.
/// Implements the swap/lessThan interface expected by std.mem.sortUnstableContext.
const PathSortCtx = struct {
    paths: [*][*c]u8,
    lengths: [*]usize,

    pub fn swap(ctx: PathSortCtx, a: usize, b: usize) void {
        const tmp_path = ctx.paths[a];
        ctx.paths[a] = ctx.paths[b];
        ctx.paths[b] = tmp_path;

        const tmp_len = ctx.lengths[a];
        ctx.lengths[a] = ctx.lengths[b];
        ctx.lengths[b] = tmp_len;
    }

    pub fn lessThan(ctx: PathSortCtx, a: usize, b: usize) bool {
        const slice_a = @as([*]const u8, @ptrCast(ctx.paths[a]))[0..ctx.lengths[a]];
        const slice_b = @as([*]const u8, @ptrCast(ctx.paths[b]))[0..ctx.lengths[b]];
        return simdStrCmp(slice_a, slice_b) == .lt;
    }
};

pub fn sortPaths(paths: [*][*c]u8, lengths: [*]usize, count: usize) void {
    if (count <= 1) return;
    std.mem.sortUnstableContext(0, count, PathSortCtx{
        .paths = paths,
        .lengths = lengths,
    });
}

const testing = std.testing;

test "simdStrCmp - equal strings" {
    try testing.expectEqual(std.math.Order.eq, simdStrCmp("hello", "hello"));
    try testing.expectEqual(std.math.Order.eq, simdStrCmp("", ""));
    try testing.expectEqual(std.math.Order.eq, simdStrCmp("a", "a"));
}

test "simdStrCmp - less than" {
    try testing.expectEqual(std.math.Order.lt, simdStrCmp("a", "b"));
    try testing.expectEqual(std.math.Order.lt, simdStrCmp("abc", "abd"));
    try testing.expectEqual(std.math.Order.lt, simdStrCmp("abc", "abcd"));
    try testing.expectEqual(std.math.Order.lt, simdStrCmp("", "a"));
}

test "simdStrCmp - greater than" {
    try testing.expectEqual(std.math.Order.gt, simdStrCmp("b", "a"));
    try testing.expectEqual(std.math.Order.gt, simdStrCmp("abd", "abc"));
    try testing.expectEqual(std.math.Order.gt, simdStrCmp("abcd", "abc"));
    try testing.expectEqual(std.math.Order.gt, simdStrCmp("a", ""));
}

test "simdStrCmp - long strings with SIMD" {
    const a = "this is a fairly long string that should use SIMD comparison";
    const b = "this is a fairly long string that should use SIMD comparison";
    const c = "this is a fairly long string that should use SIMD comparisoN";

    try testing.expectEqual(std.math.Order.eq, simdStrCmp(a, b));
    try testing.expectEqual(std.math.Order.gt, simdStrCmp(a, c)); // 'n' > 'N'
}

test "sortPaths - basic sorting" {
    var paths: [3][*c]u8 = undefined;
    var lengths: [3]usize = undefined;

    var str1: [10:0]u8 = "charlie\x00\x00\x00".*;
    var str2: [10:0]u8 = "alpha\x00\x00\x00\x00\x00".*;
    var str3: [10:0]u8 = "bravo\x00\x00\x00\x00\x00".*;

    paths[0] = &str1;
    paths[1] = &str2;
    paths[2] = &str3;
    lengths[0] = 7;
    lengths[1] = 5;
    lengths[2] = 5;

    sortPaths(&paths, &lengths, 3);

    const sorted0 = @as([*]const u8, @ptrCast(paths[0]))[0..lengths[0]];
    const sorted1 = @as([*]const u8, @ptrCast(paths[1]))[0..lengths[1]];
    const sorted2 = @as([*]const u8, @ptrCast(paths[2]))[0..lengths[2]];

    try testing.expectEqualStrings("alpha", sorted0);
    try testing.expectEqualStrings("bravo", sorted1);
    try testing.expectEqualStrings("charlie", sorted2);
}

test "sortPaths - empty and single element" {
    var paths: [1][*c]u8 = undefined;
    var lengths: [1]usize = undefined;

    // Empty case
    sortPaths(&paths, &lengths, 0);

    // Single element case
    var str: [5:0]u8 = "test\x00".*;
    paths[0] = &str;
    lengths[0] = 4;
    sortPaths(&paths, &lengths, 1);

    const result = @as([*]const u8, @ptrCast(paths[0]))[0..lengths[0]];
    try testing.expectEqualStrings("test", result);
}

test "sortPaths - already sorted" {
    var paths: [3][*c]u8 = undefined;
    var lengths: [3]usize = undefined;

    var str1: [6:0]u8 = "alpha\x00".*;
    var str2: [6:0]u8 = "bravo\x00".*;
    var str3: [8:0]u8 = "charlie\x00".*;

    paths[0] = &str1;
    paths[1] = &str2;
    paths[2] = &str3;
    lengths[0] = 5;
    lengths[1] = 5;
    lengths[2] = 7;

    sortPaths(&paths, &lengths, 3);

    try testing.expectEqualStrings("alpha", @as([*]const u8, @ptrCast(paths[0]))[0..lengths[0]]);
    try testing.expectEqualStrings("bravo", @as([*]const u8, @ptrCast(paths[1]))[0..lengths[1]]);
    try testing.expectEqualStrings("charlie", @as([*]const u8, @ptrCast(paths[2]))[0..lengths[2]]);
}

test "sortPaths - reverse sorted" {
    var paths: [3][*c]u8 = undefined;
    var lengths: [3]usize = undefined;

    var str1: [8:0]u8 = "charlie\x00".*;
    var str2: [6:0]u8 = "bravo\x00".*;
    var str3: [6:0]u8 = "alpha\x00".*;

    paths[0] = &str1;
    paths[1] = &str2;
    paths[2] = &str3;
    lengths[0] = 7;
    lengths[1] = 5;
    lengths[2] = 5;

    sortPaths(&paths, &lengths, 3);

    try testing.expectEqualStrings("alpha", @as([*]const u8, @ptrCast(paths[0]))[0..lengths[0]]);
    try testing.expectEqualStrings("bravo", @as([*]const u8, @ptrCast(paths[1]))[0..lengths[1]]);
    try testing.expectEqualStrings("charlie", @as([*]const u8, @ptrCast(paths[2]))[0..lengths[2]]);
}

test "sortPaths - POSIX byte ordering (paths with slashes)" {
    var paths: [4][*c]u8 = undefined;
    var lengths: [4]usize = undefined;

    var s1: [12:0]u8 = "dir/file.c\x00\x00".*;
    var s2: [14:0]u8 = "dir/subdir/a\x00\x00".*;
    var s3: [10:0]u8 = "dir/aaa.c\x00".*;
    var s4: [8:0]u8 = "abc.txt\x00".*;

    paths[0] = &s1;
    paths[1] = &s2;
    paths[2] = &s3;
    paths[3] = &s4;
    lengths[0] = 10;
    lengths[1] = 12;
    lengths[2] = 9;
    lengths[3] = 7;

    sortPaths(&paths, &lengths, 4);

    try testing.expectEqualStrings("abc.txt", @as([*]const u8, @ptrCast(paths[0]))[0..lengths[0]]);
    try testing.expectEqualStrings("dir/aaa.c", @as([*]const u8, @ptrCast(paths[1]))[0..lengths[1]]);
    try testing.expectEqualStrings("dir/file.c", @as([*]const u8, @ptrCast(paths[2]))[0..lengths[2]]);
    try testing.expectEqualStrings("dir/subdir/a", @as([*]const u8, @ptrCast(paths[3]))[0..lengths[3]]);
}

test "sortPaths - duplicate paths" {
    var paths: [4][*c]u8 = undefined;
    var lengths: [4]usize = undefined;

    var s1: [6:0]u8 = "bravo\x00".*;
    var s2: [6:0]u8 = "alpha\x00".*;
    var s3: [6:0]u8 = "bravo\x00".*;
    var s4: [6:0]u8 = "alpha\x00".*;

    paths[0] = &s1;
    paths[1] = &s2;
    paths[2] = &s3;
    paths[3] = &s4;
    lengths[0] = 5;
    lengths[1] = 5;
    lengths[2] = 5;
    lengths[3] = 5;

    sortPaths(&paths, &lengths, 4);

    try testing.expectEqualStrings("alpha", @as([*]const u8, @ptrCast(paths[0]))[0..lengths[0]]);
    try testing.expectEqualStrings("alpha", @as([*]const u8, @ptrCast(paths[1]))[0..lengths[1]]);
    try testing.expectEqualStrings("bravo", @as([*]const u8, @ptrCast(paths[2]))[0..lengths[2]]);
    try testing.expectEqualStrings("bravo", @as([*]const u8, @ptrCast(paths[3]))[0..lengths[3]]);
}