209 lines
6.3 KiB
C
209 lines
6.3 KiB
C
#include <stdint.h>
|
|
#include <stdbool.h>
|
|
#include <math.h>
|
|
|
|
#define N 10
|
|
|
|
#define ZFA
|
|
|
|
static inline uint64_t read_cycles() {
|
|
uint64_t start;
|
|
asm volatile ("rdcycle %0" : "=r"(start));
|
|
return start;
|
|
}
|
|
|
|
int main() {
|
|
// fround.s
|
|
volatile float a;
|
|
volatile float b;
|
|
volatile double c;
|
|
volatile double d;
|
|
|
|
// fround.s
|
|
read_cycles();
|
|
for (int i = 0; i < N; i++) {
|
|
a = 3.25f;
|
|
b = round(a);
|
|
}
|
|
read_cycles();
|
|
|
|
// fround.d
|
|
read_cycles();
|
|
for (int i = 0; i < N; i++) {
|
|
c = 3.25f;
|
|
d = round(c);
|
|
}
|
|
read_cycles();
|
|
int res;
|
|
|
|
// fleq.s
|
|
|
|
read_cycles();
|
|
for (int i = 0; i < N; i++) {
|
|
#ifndef ZFA
|
|
asm volatile (
|
|
"fclass.s t0, %1\n\t" // Classify a
|
|
"fclass.s t1, %2\n\t" // Classify b
|
|
"or t0, t0, t1\n\t" // Combine classes
|
|
"andi t2, t0, 0x200\n\t" // 0x200 is the mask for Quiet NaN
|
|
"bnez t2, 1f\n\t" // If qNaN is present, skip to return 0
|
|
"fle.s %0, %1, %2\n\t" // Safe to use signaling comparison
|
|
"j 2f\n\t"
|
|
"1:\n\t"
|
|
"li %0, 0\n\t" // Result is false for NaNs
|
|
"2:\n\t"
|
|
: "=r" (res)
|
|
: "f" (a), "f" (b)
|
|
: "t0", "t1", "t2"
|
|
);
|
|
|
|
#else
|
|
asm volatile("fleq.s t0, ft0, ft1");
|
|
#endif
|
|
}
|
|
read_cycles();
|
|
|
|
// fleq.d
|
|
read_cycles();
|
|
for (int i = 0; i < N; i++) {
|
|
#ifndef ZFA
|
|
asm volatile (
|
|
"fclass.d t0, %1\n\t" // Classify double a
|
|
"fclass.d t1, %2\n\t" // Classify double b
|
|
"or t0, t0, t1\n\t" // Combine classification masks
|
|
"andi t2, t0, 0x200\n\t" // 0x200 is the bit for Quiet NaN (qNaN)
|
|
"bnez t2, 1f\n\t" // If a qNaN is detected, skip to return 0
|
|
"fle.d %0, %1, %2\n\t" // Signaling comparison: signals on sNaN, result in %0
|
|
"j 2f\n\t"
|
|
"1:\n\t"
|
|
"li %0, 0\n\t" // Quietly return 0 (false) for qNaNs
|
|
"2:\n\t"
|
|
: "=r" (res)
|
|
: "f" (a), "f" (b)
|
|
: "t0", "t1", "t2"
|
|
);
|
|
|
|
#else
|
|
asm volatile ("fleq.d t0, ft0, ft1");
|
|
#endif
|
|
}
|
|
read_cycles();
|
|
|
|
// fminm.s
|
|
float a_fmin = 0.0f, b_fmin = -0.0f;
|
|
float res_fmin;
|
|
|
|
read_cycles();
|
|
for (int i = 0; i < N; i++) {
|
|
#ifndef ZFA
|
|
asm volatile (
|
|
"fclass.s t0, %1\n\t" // Classify a
|
|
"fclass.s t1, %2\n\t" // Classify b
|
|
"li t2, 0x300\n\t" // Mask for any NaN (0x100 sNaN | 0x200 qNaN)
|
|
"and t3, t0, t2\n\t" // t3 = is_nan(a)
|
|
"and t4, t1, t2\n\t" // t4 = is_nan(b)
|
|
"bnez t3, 1f\n\t" // If a is NaN, jump to handle it
|
|
"bnez t4, 2f\n\t" // If b is NaN, jump to handle it
|
|
"fmin.s %0, %1, %2\n\t" // Neither is NaN, use standard min
|
|
"j 3f\n\t"
|
|
"1:\n\t" // Case: a is NaN
|
|
"bnez t4, 4f\n\t" // If b is also NaN, jump to both-NaN case
|
|
"fmv.s %0, %2\n\t" // a is NaN, b is number -> return b
|
|
"j 3f\n\t"
|
|
"2:\n\t" // Case: b is NaN, a is number -> return a
|
|
"fmv.s %0, %1\n\t"
|
|
"j 3f\n\t"
|
|
"4:\n\t" // Case: Both are NaNs
|
|
"fmin.s %0, %1, %2\n\t" // Standard min handles both-NaNs correctly
|
|
"3:\n\t"
|
|
: "=f" (res_fmin)
|
|
: "f" (a_fmin), "f" (b_fmin)
|
|
: "t0", "t1", "t2", "t3", "t4"
|
|
);
|
|
#else
|
|
asm volatile ("fminm.s ft0, ft1, ft2");
|
|
#endif
|
|
}
|
|
read_cycles();
|
|
// fli.s
|
|
|
|
read_cycles();
|
|
volatile float res_fli_s[32];
|
|
for (int i = 0; i < N; i++) {
|
|
res_fli_s[0] = -1.0f;
|
|
res_fli_s[1] = -1.0f;
|
|
res_fli_s[2] = 0x1p-16f;
|
|
res_fli_s[3] = 0x1p-15f;
|
|
res_fli_s[4] = 0x1p-14f;
|
|
res_fli_s[5] = 0x1p-13f;
|
|
res_fli_s[6] = 0x1p-12f;
|
|
res_fli_s[7] = 0x1p-11f;
|
|
res_fli_s[8] = 0x1p-10f;
|
|
res_fli_s[9] = 0x1p-9f;
|
|
res_fli_s[10] = 0x1p-8f;
|
|
res_fli_s[11] = 0x1p-7f;
|
|
res_fli_s[12] = 0x1p-6f;
|
|
res_fli_s[13] = 0x1p-5f;
|
|
res_fli_s[14] = 0x1p-4f;
|
|
res_fli_s[15] = 0x1p-3f;
|
|
res_fli_s[16] = 0.25f;
|
|
res_fli_s[17] = 0.5f;
|
|
res_fli_s[18] = 0.75f;
|
|
res_fli_s[19] = 1.0f;
|
|
res_fli_s[20] = 1.25f;
|
|
res_fli_s[21] = 1.5f;
|
|
res_fli_s[22] = 1.75f;
|
|
res_fli_s[23] = 2.0f;
|
|
res_fli_s[24] = 2.5f;
|
|
res_fli_s[25] = 3.0f;
|
|
res_fli_s[26] = 4.0f;
|
|
res_fli_s[27] = 8.0f;
|
|
res_fli_s[28] = 16.0f;
|
|
res_fli_s[29] = 32.0f;
|
|
res_fli_s[30] = INFINITY;
|
|
res_fli_s[31] = NAN;
|
|
}
|
|
read_cycles();
|
|
|
|
// fli.d
|
|
read_cycles();
|
|
volatile double res_fli_d[32];
|
|
for (int i = 0; i < N; i++) {
|
|
res_fli_s[0] = -1.0f;
|
|
res_fli_s[1] = -1.0f;
|
|
res_fli_s[2] = 0x1p-16f;
|
|
res_fli_s[3] = 0x1p-15f;
|
|
res_fli_s[4] = 0x1p-14f;
|
|
res_fli_s[5] = 0x1p-13f;
|
|
res_fli_s[6] = 0x1p-12f;
|
|
res_fli_s[7] = 0x1p-11f;
|
|
res_fli_s[8] = 0x1p-10f;
|
|
res_fli_s[9] = 0x1p-9f;
|
|
res_fli_s[10] = 0x1p-8f;
|
|
res_fli_s[11] = 0x1p-7f;
|
|
res_fli_s[12] = 0x1p-6f;
|
|
res_fli_s[13] = 0x1p-5f;
|
|
res_fli_s[14] = 0x1p-4f;
|
|
res_fli_s[15] = 0x1p-3f;
|
|
res_fli_s[16] = 0.25f;
|
|
res_fli_s[17] = 0.5f;
|
|
res_fli_s[18] = 0.75f;
|
|
res_fli_s[19] = 1.0f;
|
|
res_fli_s[20] = 1.25f;
|
|
res_fli_s[21] = 1.5f;
|
|
res_fli_s[22] = 1.75f;
|
|
res_fli_s[23] = 2.0f;
|
|
res_fli_s[24] = 2.5f;
|
|
res_fli_s[25] = 3.0f;
|
|
res_fli_s[26] = 4.0f;
|
|
res_fli_s[27] = 8.0f;
|
|
res_fli_s[28] = 16.0f;
|
|
res_fli_s[29] = 32.0f;
|
|
res_fli_s[30] = INFINITY;
|
|
res_fli_s[31] = NAN;
|
|
}
|
|
read_cycles();
|
|
|
|
// fcvtmod.w.d
|
|
|
|
}
|