#include #define N 128 #define ITERATIONS 10 // Static "messy" data to ensure the branch predictor cannot "learn" the pattern static const uint64_t src_a[N] = { 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1 }; static const uint64_t src_b[N] = { 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 320, 330, 340, 350, 360, 370, 380, 390, 400, 410, 420, 430, 440, 450, 460, 470, 480, 490, 500, 510, 520, 530, 540, 550, 560, 570, 580, 590, 600, 610, 620, 630, 640, 650, 660, 670, 680, 690, 700, 710, 720, 730, 740, 750, 760, 770, 780, 790, 800, 810, 820, 830, 840, 850, 860, 870, 880, 890, 900, 910, 920, 930, 940, 950, 960, 970, 980, 990, 1000, 1010, 1020, 1030, 1040, 1050, 1060, 1070, 1080, 1090, 1100, 1110, 1120, 1130, 1140, 1150, 1160, 1170, 1180, 1190, 1200, 1210, 1220, 1230, 1240, 1250, 1260, 1270, 1280 }; volatile uint64_t results[N]; static inline uint64_t read_cycles() { uint64_t val; asm volatile ("rdcycle %0" : "=r"(val)); return val; } int main() { uint64_t start, end; // --- Benchmark 1: Trivial czero.nez --- // Pattern: if (a != 0) return b else return 0 start = read_cycles(); for (int j = 0; j < ITERATIONS; j++) { for (int i = 0; i < N; i++) { uint64_t a = src_a[i]; uint64_t b = src_b[i]; // GCC will use czero.eqz here to zero out b if a is 0 results[i] = (a != 0) ? b : 0; } } end = read_cycles(); // Record (end - start) for Zicond enabled vs disabled // --- Benchmark 2: Logic AND (czero with complex condition) --- // Pattern: if (a != 0 AND b > 500) return b else return 0 start = read_cycles(); for (int j = 0; j < ITERATIONS; j++) { for (int i = 0; i < N; i++) { uint64_t a = src_a[i]; uint64_t b = src_b[i]; // Uses 'and' to combine conditions, then 'czero' if (a != 0 && b > 500) { results[i] = b; } else { results[i] = 0; } } } end = read_cycles(); return 0; }