#include "hwy/timer.h"
#include <stdlib.h>
#include <chrono>
#include <ratio>
#include "hwy/base.h"
#include "hwy/robust_statistics.h"
#include "hwy/x86_cpuid.h"
namespace hwy {
#if HWY_ARCH_X86
namespace x86 {
static bool HasRDTSCP() {
uint32_t abcd[4];
Cpuid(0x80000001U, 0, abcd); if ((abcd[3] & (1u << 27)) == 0) return false;
Cpuid(0x80000007U, 0, abcd);
if ((abcd[3] & (1u << 8)) == 0) {
HWY_WARN("TSC not constant/invariant, may vary frequency or jump.");
}
return true;
}
} #endif
static HWY_MAYBE_UNUSED double MeasureNominalClockRate() {
double max_ticks_per_sec = 0.0;
for (int rep = 0; rep < 3; ++rep) {
auto time0 = std::chrono::steady_clock::now();
using Time = decltype(time0);
const timer::Ticks ticks0 = timer::Start();
const Time time_min = time0 + std::chrono::milliseconds(10);
Time time1;
timer::Ticks ticks1;
for (;;) {
time1 = std::chrono::steady_clock::now();
ticks1 = timer::Start(); if (time1 >= time_min) break;
}
const double dticks = static_cast<double>(ticks1 - ticks0);
std::chrono::duration<double, std::ratio<1>> dtime = time1 - time0;
const double ticks_per_sec = dticks / dtime.count();
max_ticks_per_sec = HWY_MAX(max_ticks_per_sec, ticks_per_sec);
}
return max_ticks_per_sec;
}
#if HWY_ARCH_PPC && defined(__GLIBC__) && defined(__powerpc64__)
namespace ppc {
static HWY_INLINE double GetTimebaseFreq() {
const auto timebase_freq = __ppc_get_timebase_freq();
return (timebase_freq > 0) ? static_cast<double>(timebase_freq)
: MeasureNominalClockRate();
}
} #endif
namespace platform {
HWY_DLLEXPORT bool GetCpuString(char* cpu100) {
#if HWY_ARCH_X86
uint32_t abcd[4];
x86::Cpuid(0x80000000U, 0, abcd);
if (abcd[0] < 0x80000004U) {
cpu100[0] = '\0';
return false;
}
for (size_t i = 0; i < 3; ++i) {
x86::Cpuid(static_cast<uint32_t>(0x80000002U + i), 0, abcd);
CopyBytes<sizeof(abcd)>(&abcd[0], cpu100 + i * 16); }
cpu100[48] = '\0';
return true;
#else
cpu100[0] = '?';
cpu100[1] = '\0';
return false;
#endif
}
HWY_DLLEXPORT double Now() {
static const double mul = 1.0 / InvariantTicksPerSecond();
return static_cast<double>(timer::Start()) * mul;
}
HWY_DLLEXPORT bool HaveTimerStop(char* cpu100) {
#if HWY_ARCH_X86
if (!x86::HasRDTSCP()) {
(void)GetCpuString(cpu100);
return false;
}
#endif
*cpu100 = '\0';
return true;
}
HWY_DLLEXPORT double InvariantTicksPerSecond() {
#if HWY_ARCH_PPC && defined(__GLIBC__) && defined(__powerpc64__)
static const double freq = ppc::GetTimebaseFreq();
return freq;
#elif HWY_ARCH_X86 || HWY_ARCH_RISCV || (HWY_ARCH_ARM_A64 && !HWY_COMPILER_MSVC)
static const double freq = MeasureNominalClockRate();
return freq;
#elif defined(_WIN32) || defined(_WIN64)
LARGE_INTEGER freq;
(void)QueryPerformanceFrequency(&freq);
return static_cast<double>(freq.QuadPart);
#elif defined(__APPLE__)
mach_timebase_info_data_t timebase;
(void)mach_timebase_info(&timebase);
return static_cast<double>(timebase.denom) / timebase.numer * 1E9;
#else
return 1E9; #endif
}
HWY_DLLEXPORT uint64_t TimerResolution() {
char cpu100[100];
bool can_use_stop = HaveTimerStop(cpu100);
constexpr size_t kTimerSamples = 256;
timer::Ticks repetitions[kTimerSamples];
for (size_t rep = 0; rep < kTimerSamples; ++rep) {
timer::Ticks samples[kTimerSamples];
if (can_use_stop) {
for (size_t i = 0; i < kTimerSamples; ++i) {
const timer::Ticks t0 = timer::Start();
const timer::Ticks t1 = timer::Stop(); samples[i] = t1 - t0;
}
} else {
for (size_t i = 0; i < kTimerSamples; ++i) {
const timer::Ticks t0 = timer::Start();
const timer::Ticks t1 = timer::Start(); samples[i] = t1 - t0;
}
}
repetitions[rep] = robust_statistics::Mode(samples);
}
return robust_statistics::Mode(repetitions);
}
} }