40 lines
807 B
C
40 lines
807 B
C
|
|
#include <math.h>
|
||
|
|
#include <stdint.h>
|
||
|
|
#include <riscv_vector.h>
|
||
|
|
|
||
|
|
#include <stdint.h>
|
||
|
|
|
||
|
|
#define N 32
|
||
|
|
|
||
|
|
// Use 'aligned' to help the autovectorizer
|
||
|
|
_Float16 a[N] __attribute__((aligned(16)));
|
||
|
|
float b[N] __attribute__((aligned(16)));
|
||
|
|
|
||
|
|
static inline uint64_t read_cycles() {
|
||
|
|
uint64_t start;
|
||
|
|
asm volatile ("rdcycle %0" : "=r"(start));
|
||
|
|
return start;
|
||
|
|
}
|
||
|
|
|
||
|
|
void benchmark() {
|
||
|
|
// 1. Widening: _Float16 -> float
|
||
|
|
uint64_t t0 = read_cycles();
|
||
|
|
for (int i = 0; i < N; i++) {
|
||
|
|
b[i] = (float)a[i];
|
||
|
|
}
|
||
|
|
uint64_t t1 = read_cycles();
|
||
|
|
|
||
|
|
// 2. Narrowing: float -> _Float16
|
||
|
|
uint64_t t2 = read_cycles();
|
||
|
|
for (int i = 0; i < N; i++) {
|
||
|
|
a[i] = (_Float16)b[i];
|
||
|
|
}
|
||
|
|
uint64_t t3 = read_cycles();
|
||
|
|
|
||
|
|
// In a real app, print (t1-t0) and (t3-t2)
|
||
|
|
}
|
||
|
|
|
||
|
|
int main() {
|
||
|
|
benchmark();
|
||
|
|
}
|