ip4sum 0.1.0

Highly optimized IPv4 checksum calculation, no-std compatible
Documentation
/* SPDX-License-Identifier: MIT */
/* Copyright (c) 2026 Khashayar Fereidani */

#include "checksum.h"

/* ---- Endianness detection (C99 portable) ---- */
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define IP4SUM_LITTLE_ENDIAN 1
#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define IP4SUM_LITTLE_ENDIAN 0
#elif defined(_WIN32) || defined(_WIN64)
#define IP4SUM_LITTLE_ENDIAN 1
#elif defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__THUMBEL__) \
   || defined(__AARCH64EL__) || defined(_MIPSEL) || defined(__MIPSEL) \
   || defined(__MIPSEL__) || defined(__riscv) || defined(__loongarch64) \
   || defined(__i386__) || defined(__x86_64__) || defined(__X86__) \
   || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64)
#define IP4SUM_LITTLE_ENDIAN 1
#elif defined(__BIG_ENDIAN__) || defined(__ARMEB__) || defined(__THUMBEB__) \
   || defined(__AARCH64EB__) || defined(_MIPSEB) || defined(__MIPSEB) \
   || defined(__MIPSEB__) || defined(__s390__) || defined(__s390x__) \
   || defined(__SPU__) || defined(__hppa__)
#define IP4SUM_LITTLE_ENDIAN 0
#else
/* Assume little-endian as default (covers 99%+ of modern platforms).
 * If building for a big-endian target not listed above, define
 * IP4SUM_LITTLE_ENDIAN=0 in your build flags. */
#define IP4SUM_LITTLE_ENDIAN 1
#endif

/* ---- Byte-swap primitives (C99 portable) ---- */

#if defined(__has_builtin)
#if __has_builtin(__builtin_bswap16)
#define IP4SUM_BSWAP16(x) __builtin_bswap16(x)
#endif
#if __has_builtin(__builtin_bswap64)
#define IP4SUM_BSWAP64(x) __builtin_bswap64(x)
#endif
#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
#define IP4SUM_BSWAP16(x) __builtin_bswap16(x)
#define IP4SUM_BSWAP64(x) __builtin_bswap64(x)
#elif defined(_MSC_VER)
#include <stdlib.h>
#define IP4SUM_BSWAP16(x) _byteswap_ushort(x)
#define IP4SUM_BSWAP64(x) _byteswap_uint64(x)
#endif

#if !defined(IP4SUM_BSWAP16)
static inline uint16_t ip4sum_bswap16(uint16_t value) {
    return (uint16_t)(((value << 8) & 0xFF00u) | ((value >> 8) & 0x00FFu));
}
#define IP4SUM_BSWAP16(x) ip4sum_bswap16(x)
#endif

#if !defined(IP4SUM_BSWAP64)
static inline uint64_t ip4sum_bswap64(uint64_t value) {
    return ((value << 56) & 0xFF00000000000000ULL) |
           ((value << 40) & 0x00FF000000000000ULL) |
           ((value << 24) & 0x0000FF0000000000ULL) |
           ((value << 8)  & 0x000000FF00000000ULL) |
           ((value >> 8)  & 0x00000000FF000000ULL) |
           ((value >> 24) & 0x0000000000FF0000ULL) |
           ((value >> 40) & 0x000000000000FF00ULL) |
           ((value >> 56) & 0x00000000000000FFULL);
}
#define IP4SUM_BSWAP64(x) ip4sum_bswap64(x)
#endif

/*
 * Fold a 64-bit native-order accumulator into a 16-bit one's-complement
 * checksum in network byte order.
 *
 * One's complement addition is endian-independent: accumulating in native
 * order and byte-swapping only the final u16 is equivalent to swapping
 * on every update.  This saves two swap_bytes calls per update for the
 * cost of one swap_bytes at the end.
 */
static inline uint16_t fold(uint64_t sum) {
    sum = (sum >> 32) + (sum & 0xffffffffu);
    sum = (sum >> 16) + (sum & 0xffffu);
    sum += sum >> 16;
    uint16_t result = ~(uint16_t)sum;

#if IP4SUM_LITTLE_ENDIAN
    return IP4SUM_BSWAP16(result);
#else
    return result;
#endif
}

/*
 * Accumulate the Internet checksum of data into c->acc.
 *
 * Processes data in 32-bit chunks, adding each to the running
 * accumulator in native byte order.
 */
void ip4sum_checksum_update(ip4sum_checksum *c, const unsigned char *data,
                            size_t len) {
    uint64_t ac = c->acc;
    const unsigned char *ptr = data;

    while (len >= 4) {
        ac += *(uint32_t *)ptr;
        ptr += 4;
        len -= 4;
    }
    if (len >= 2) {
        ac += *(uint16_t *)ptr;
        ptr += 2;
        len -= 2;
    }
    if (len > 0) {
        ac += ptr[0];
    }

    c->acc = ac;
}

uint16_t ip4sum_checksum_finalize(ip4sum_checksum c) {
    return fold(c.acc);
}

uint16_t ip4sum_checksum_oneshot(const unsigned char *data, size_t len) {
    ip4sum_checksum c = ip4sum_checksum_new();
    ip4sum_checksum_update(&c, data, len);
    return ip4sum_checksum_finalize(c);
}