#include #include #include #define N 10 #define ZFA static inline uint64_t read_cycles() { uint64_t start; asm volatile ("rdcycle %0" : "=r"(start)); return start; } int main() { // fround.s volatile float a; volatile float b; volatile double c; volatile double d; // fround.s read_cycles(); for (int i = 0; i < N; i++) { a = 3.25f; b = round(a); } read_cycles(); // fround.d read_cycles(); for (int i = 0; i < N; i++) { c = 3.25f; d = round(c); } read_cycles(); int res; // fleq.s read_cycles(); for (int i = 0; i < N; i++) { #ifndef ZFA asm volatile ( "fclass.s t0, %1\n\t" // Classify a "fclass.s t1, %2\n\t" // Classify b "or t0, t0, t1\n\t" // Combine classes "andi t2, t0, 0x200\n\t" // 0x200 is the mask for Quiet NaN "bnez t2, 1f\n\t" // If qNaN is present, skip to return 0 "fle.s %0, %1, %2\n\t" // Safe to use signaling comparison "j 2f\n\t" "1:\n\t" "li %0, 0\n\t" // Result is false for NaNs "2:\n\t" : "=r" (res) : "f" (a), "f" (b) : "t0", "t1", "t2" ); #else asm volatile("fleq.s t0, ft0, ft1"); #endif } read_cycles(); // fleq.d read_cycles(); for (int i = 0; i < N; i++) { #ifndef ZFA asm volatile ( "fclass.d t0, %1\n\t" // Classify double a "fclass.d t1, %2\n\t" // Classify double b "or t0, t0, t1\n\t" // Combine classification masks "andi t2, t0, 0x200\n\t" // 0x200 is the bit for Quiet NaN (qNaN) "bnez t2, 1f\n\t" // If a qNaN is detected, skip to return 0 "fle.d %0, %1, %2\n\t" // Signaling comparison: signals on sNaN, result in %0 "j 2f\n\t" "1:\n\t" "li %0, 0\n\t" // Quietly return 0 (false) for qNaNs "2:\n\t" : "=r" (res) : "f" (a), "f" (b) : "t0", "t1", "t2" ); #else asm volatile ("fleq.d t0, ft0, ft1"); #endif } read_cycles(); // fminm.s float a_fmin = 0.0f, b_fmin = -0.0f; float res_fmin; read_cycles(); for (int i = 0; i < N; i++) { #ifndef ZFA asm volatile ( "fclass.s t0, %1\n\t" // Classify a "fclass.s t1, %2\n\t" // Classify b "li t2, 0x300\n\t" // Mask for any NaN (0x100 sNaN | 0x200 qNaN) "and t3, t0, t2\n\t" // t3 = is_nan(a) "and t4, t1, t2\n\t" // t4 = is_nan(b) "bnez t3, 1f\n\t" // If a is NaN, jump to handle it "bnez t4, 2f\n\t" // If b is NaN, jump to handle it "fmin.s %0, %1, %2\n\t" // Neither is NaN, use standard min "j 3f\n\t" "1:\n\t" // Case: a is NaN "bnez t4, 4f\n\t" // If b is also NaN, jump to both-NaN case "fmv.s %0, %2\n\t" // a is NaN, b is number -> return b "j 3f\n\t" "2:\n\t" // Case: b is NaN, a is number -> return a "fmv.s %0, %1\n\t" "j 3f\n\t" "4:\n\t" // Case: Both are NaNs "fmin.s %0, %1, %2\n\t" // Standard min handles both-NaNs correctly "3:\n\t" : "=f" (res_fmin) : "f" (a_fmin), "f" (b_fmin) : "t0", "t1", "t2", "t3", "t4" ); #else asm volatile ("fminm.s ft0, ft1, ft2"); #endif } read_cycles(); // fli.s read_cycles(); volatile float res_fli_s[32]; for (int i = 0; i < N; i++) { res_fli_s[0] = -1.0f; res_fli_s[1] = -1.0f; res_fli_s[2] = 0x1p-16f; res_fli_s[3] = 0x1p-15f; res_fli_s[4] = 0x1p-14f; res_fli_s[5] = 0x1p-13f; res_fli_s[6] = 0x1p-12f; res_fli_s[7] = 0x1p-11f; res_fli_s[8] = 0x1p-10f; res_fli_s[9] = 0x1p-9f; res_fli_s[10] = 0x1p-8f; res_fli_s[11] = 0x1p-7f; res_fli_s[12] = 0x1p-6f; res_fli_s[13] = 0x1p-5f; res_fli_s[14] = 0x1p-4f; res_fli_s[15] = 0x1p-3f; res_fli_s[16] = 0.25f; res_fli_s[17] = 0.5f; res_fli_s[18] = 0.75f; res_fli_s[19] = 1.0f; res_fli_s[20] = 1.25f; res_fli_s[21] = 1.5f; res_fli_s[22] = 1.75f; res_fli_s[23] = 2.0f; res_fli_s[24] = 2.5f; res_fli_s[25] = 3.0f; res_fli_s[26] = 4.0f; res_fli_s[27] = 8.0f; res_fli_s[28] = 16.0f; res_fli_s[29] = 32.0f; res_fli_s[30] = INFINITY; res_fli_s[31] = NAN; } read_cycles(); // fli.d read_cycles(); volatile double res_fli_d[32]; for (int i = 0; i < N; i++) { res_fli_s[0] = -1.0f; res_fli_s[1] = -1.0f; res_fli_s[2] = 0x1p-16f; res_fli_s[3] = 0x1p-15f; res_fli_s[4] = 0x1p-14f; res_fli_s[5] = 0x1p-13f; res_fli_s[6] = 0x1p-12f; res_fli_s[7] = 0x1p-11f; res_fli_s[8] = 0x1p-10f; res_fli_s[9] = 0x1p-9f; res_fli_s[10] = 0x1p-8f; res_fli_s[11] = 0x1p-7f; res_fli_s[12] = 0x1p-6f; res_fli_s[13] = 0x1p-5f; res_fli_s[14] = 0x1p-4f; res_fli_s[15] = 0x1p-3f; res_fli_s[16] = 0.25f; res_fli_s[17] = 0.5f; res_fli_s[18] = 0.75f; res_fli_s[19] = 1.0f; res_fli_s[20] = 1.25f; res_fli_s[21] = 1.5f; res_fli_s[22] = 1.75f; res_fli_s[23] = 2.0f; res_fli_s[24] = 2.5f; res_fli_s[25] = 3.0f; res_fli_s[26] = 4.0f; res_fli_s[27] = 8.0f; res_fli_s[28] = 16.0f; res_fli_s[29] = 32.0f; res_fli_s[30] = INFINITY; res_fli_s[31] = NAN; } read_cycles(); // fcvtmod.w.d }