#ifndef CPUBENCHMARK_H_
#define CPUBENCHMARK_H_
#include "common.h"
namespace FastPForLib {
#if defined(__corei7__)
static __inline__ unsigned long long startRDTSC(void) {
unsigned cycles_low, cycles_high;
asm volatile("CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
: "=r"(cycles_high), "=r"(cycles_low)::"%rax", "%rbx", "%rcx",
"%rdx");
return (static_cast<unsigned long long>(cycles_high) << 32) | cycles_low;
}
static __inline__ unsigned long long stopRDTSCP(void) {
unsigned cycles_low, cycles_high;
asm volatile("RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
"CPUID\n\t"
: "=r"(cycles_high), "=r"(cycles_low)::"%rax", "%rbx", "%rcx",
"%rdx");
return (static_cast<unsigned long long>(cycles_high) << 32) | cycles_low;
}
#elif (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64)))
static inline unsigned long long startRDTSC(void) { return __rdtsc(); }
static inline unsigned long long stopRDTSCP(void) { return __rdtsc(); }
#elif defined(_MSC_VER) && defined(_M_ARM64)
inline unsigned long long rdtsc() {
const int64_t pmccntr_el0 = (((3 & 1) << 14) | ((3 & 7) << 11) | ((9 & 15) << 7) | ((13 & 15) << 3) | ((0 & 7) << 0));
return _ReadStatusReg (pmccntr_el0);
}
static inline unsigned long long startRDTSC(void) { return rdtsc(); }
static inline unsigned long long stopRDTSCP(void) { return rdtsc(); }
#elif (defined(_MSC_VER) && (defined(_M_ARM64)))
static inline unsigned long long startRDTSC(void) { return __rdpmccntr64(); }
static inline unsigned long long stopRDTSCP(void) { return __rdpmccntr64(); }
#elif defined(__i386__) || defined(__x86_64__)
inline unsigned long long rdtsc() {
unsigned int lo, hi;
asm volatile("cpuid \n"
"rdtsc"
: "=a"(lo), "=d"(hi)
: "a"(0)
: "%ebx", "%ecx");
return (static_cast<unsigned long long>(lo)) |
((static_cast<unsigned long long>(hi)) << 32);
}
static __inline__ unsigned long long startRDTSC(void) { return rdtsc(); }
static __inline__ unsigned long long stopRDTSCP(void) { return rdtsc(); }
#elif (defined(__GNUC__) && (defined(__aarch64__)))
inline uint64_t rdtsc() {
uint64_t cycles;
asm volatile("mrs %0, cntvct_el0"
: "=r"(cycles));
return cycles;
}
static __inline__ uint64_t startRDTSC(void) { return rdtsc(); }
static __inline__ uint64_t stopRDTSCP(void) { return rdtsc(); }
#elif(defined(__arm__) || defined(__ppc__) || defined(__ppc64__)) || (defined(_MSC_VER) && defined(_M_ARM64))
inline uint64_t rdtsc() { return 0; }
static __inline__ uint64_t startRDTSC(void) { return 0; }
static __inline__ uint64_t stopRDTSCP(void) { return 0; }
#else
#error Unknown architecture
#endif
class CPUBenchmark {
public:
CPUBenchmark() : ticktime(0) { start(); }
unsigned long long ticktime;
void start() { ticktime = startRDTSC(); }
unsigned long long stop() { return stopRDTSCP() - ticktime; }
};
}
#endif