changes to benchmarks and script to extract data for plotting
This commit is contained in:
parent
15959973d2
commit
f639be16a0
10 changed files with 153267 additions and 181 deletions
87
extract_data.py
Normal file
87
extract_data.py
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import pandas as pd
|
||||
|
||||
def parse_cycle_deltas(file_path):
|
||||
cycle_values = []
|
||||
|
||||
csrr_pattern = re.compile(r"csrr\s+\w+,\s+cycle")
|
||||
value_pattern = re.compile(r"0x[0-9a-fA-F]+$")
|
||||
|
||||
with open(file_path, 'r') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
if csrr_pattern.search(line):
|
||||
if i + 1 < len(lines):
|
||||
next_line = lines[i+1].strip()
|
||||
match = value_pattern.search(next_line)
|
||||
|
||||
if match:
|
||||
val = int(match.group(), 16)
|
||||
cycle_values.append(val)
|
||||
i += 1
|
||||
else:
|
||||
print(f"Warning: Found csrr at line {i} but couldn't find value on line {i+1}")
|
||||
i += 1
|
||||
|
||||
if len(cycle_values) % 2 != 0:
|
||||
raise ValueError(
|
||||
f"Hanging CSRR detected! Found {len(cycle_values)} cycle reads. "
|
||||
"Each 'start' must have a corresponding 'end'."
|
||||
)
|
||||
|
||||
data = []
|
||||
for j in range(0, len(cycle_values), 2):
|
||||
start_val = cycle_values[j]
|
||||
end_val = cycle_values[j+1]
|
||||
delta = end_val - start_val
|
||||
|
||||
data.append({
|
||||
'start_cycle': start_val,
|
||||
'end_cycle': end_val,
|
||||
'delta': delta
|
||||
})
|
||||
|
||||
return pd.DataFrame(data)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Extract cycle deltas from trace files and calculate speedup')
|
||||
parser.add_argument('with_file', help='Path to the trace file (with optimization)')
|
||||
parser.add_argument('without_file', help='Path to the trace file (without optimization - baseline)')
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
df_with = parse_cycle_deltas(args.with_file)
|
||||
df_without = parse_cycle_deltas(args.without_file)
|
||||
|
||||
if len(df_with) != len(df_without):
|
||||
raise ValueError(
|
||||
f"Mismatch in number of deltas: {args.with_file} has {len(df_with)} deltas, "
|
||||
f"but {args.without_file} has {len(df_without)} deltas. Cannot pair them."
|
||||
)
|
||||
|
||||
result = pd.DataFrame({
|
||||
'start_cycle_with': df_with['start_cycle'],
|
||||
'end_cycle_with': df_with['end_cycle'],
|
||||
'delta_with': df_with['delta'],
|
||||
'start_cycle_without': df_without['start_cycle'],
|
||||
'end_cycle_without': df_without['end_cycle'],
|
||||
'delta_without': df_without['delta'],
|
||||
'speedup': df_without['delta'] / df_with['delta']
|
||||
})
|
||||
|
||||
print("Cycle Delta Analysis:")
|
||||
print(result)
|
||||
|
||||
result.to_parquet("result.parquet")
|
||||
|
||||
except FileNotFoundError as e:
|
||||
print(f"Error: {e}")
|
||||
except ValueError as e:
|
||||
print(f"Error: {e}")
|
||||
22237
traces/with_czero.txt
Normal file
22237
traces/with_czero.txt
Normal file
File diff suppressed because it is too large
Load diff
22277
traces/with_zfa.txt
Normal file
22277
traces/with_zfa.txt
Normal file
File diff suppressed because it is too large
Load diff
20339
traces/with_zfhmin.txt
Normal file
20339
traces/with_zfhmin.txt
Normal file
File diff suppressed because it is too large
Load diff
41965
traces/without_czero.txt
Normal file
41965
traces/without_czero.txt
Normal file
File diff suppressed because it is too large
Load diff
23123
traces/without_zfa.txt
Normal file
23123
traces/without_zfa.txt
Normal file
File diff suppressed because it is too large
Load diff
23057
traces/without_zfhmin.txt
Normal file
23057
traces/without_zfhmin.txt
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -4,48 +4,42 @@
|
|||
|
||||
#define N 10
|
||||
|
||||
#define ZFA
|
||||
|
||||
static inline uint64_t read_cycles() {
|
||||
uint64_t start;
|
||||
asm volatile ("rdcycle %0" : "=r"(start));
|
||||
return start;
|
||||
}
|
||||
|
||||
// Zfa constant table for Single Precision (fli.s)
|
||||
const float zfa_constants_s[32] = {
|
||||
-1.0f, -1.0f, 0x1p-16f, 0x1p-15f, // 0 - 3
|
||||
0x1p-14f, 0x1p-13f, 0x1p-12f, 0x1p-11f, // 4 - 7
|
||||
0x1p-10f, 0x1p-9f, 0x1p-8f, 0x1p-7f, // 8 - 11
|
||||
0x1p-6f, 0x1p-5f, 0x1p-4f, 0x1p-3f, // 12 - 15
|
||||
0.25f, 0.5f, 0.75f, 1.0f, // 16 - 19
|
||||
1.25f, 1.5f, 1.75f, 2.0f, // 20 - 23
|
||||
2.5f, 3.0f, 4.0f, 8.0f, // 24 - 27
|
||||
16.0f, 32.0f, INFINITY, NAN // 28 - 31
|
||||
};
|
||||
|
||||
// Zfa constant table for Double Precision (fli.d)
|
||||
const double zfa_constants_d[32] = {
|
||||
-1.0, -1.0f, 0x1p-16, 0x1p-15, // 0 - 3
|
||||
0x1p-14, 0x1p-13, 0x1p-12, 0x1p-11, // 4 - 7
|
||||
0x1p-10, 0x1p-9, 0x1p-8, 0x1p-7, // 8 - 11
|
||||
0x1p-6, 0x1p-5, 0x1p-4, 0x1p-3, // 12 - 15
|
||||
0.25, 0.5, 0.75, 1.0, // 16 - 19
|
||||
1.25, 1.5, 1.75, 2.0, // 20 - 23
|
||||
2.5, 3.0, 4.0, 8.0, // 24 - 27
|
||||
16.0, 32.0, INFINITY, NAN // 28 - 31
|
||||
};
|
||||
|
||||
int main() {
|
||||
// fround.s
|
||||
volatile float a = 3.25f;
|
||||
volatile float b = round(a);
|
||||
volatile float a;
|
||||
volatile float b;
|
||||
volatile double c;
|
||||
volatile double d;
|
||||
|
||||
// fround.s
|
||||
read_cycles();
|
||||
for (int i = 0; i < N; i++) {
|
||||
a = 3.25f;
|
||||
b = round(a);
|
||||
}
|
||||
read_cycles();
|
||||
|
||||
// fround.d
|
||||
volatile double c = 3.25f;
|
||||
volatile double d = round(c);
|
||||
read_cycles();
|
||||
for (int i = 0; i < N; i++) {
|
||||
c = 3.25f;
|
||||
d = round(c);
|
||||
}
|
||||
read_cycles();
|
||||
int res;
|
||||
|
||||
// fleq.s
|
||||
|
||||
read_cycles();
|
||||
for (int i = 0; i < N; i++) {
|
||||
#ifndef ZFA
|
||||
asm volatile (
|
||||
"fclass.s t0, %1\n\t" // Classify a
|
||||
|
|
@ -66,8 +60,12 @@ int main() {
|
|||
#else
|
||||
asm volatile("fleq.s t0, ft0, ft1");
|
||||
#endif
|
||||
}
|
||||
read_cycles();
|
||||
|
||||
// fleq.d
|
||||
read_cycles();
|
||||
for (int i = 0; i < N; i++) {
|
||||
#ifndef ZFA
|
||||
asm volatile (
|
||||
"fclass.d t0, %1\n\t" // Classify double a
|
||||
|
|
@ -88,11 +86,15 @@ int main() {
|
|||
#else
|
||||
asm volatile ("fleq.d t0, ft0, ft1");
|
||||
#endif
|
||||
}
|
||||
read_cycles();
|
||||
|
||||
// fminm.s
|
||||
float a_fmin = 0.0f, b_fmin = -0.0f;
|
||||
float res_fmin;
|
||||
|
||||
read_cycles();
|
||||
for (int i = 0; i < N; i++) {
|
||||
#ifndef ZFA
|
||||
asm volatile (
|
||||
"fclass.s t0, %1\n\t" // Classify a
|
||||
|
|
@ -121,10 +123,13 @@ int main() {
|
|||
#else
|
||||
asm volatile ("fminm.s ft0, ft1, ft2");
|
||||
#endif
|
||||
}
|
||||
read_cycles();
|
||||
// fli.s
|
||||
|
||||
read_cycles();
|
||||
volatile float res_fli_s[32];
|
||||
for (int i = 0; i < N; i++) {
|
||||
res_fli_s[0] = -1.0f;
|
||||
res_fli_s[1] = -1.0f;
|
||||
res_fli_s[2] = 0x1p-16f;
|
||||
|
|
@ -157,9 +162,13 @@ int main() {
|
|||
res_fli_s[29] = 32.0f;
|
||||
res_fli_s[30] = INFINITY;
|
||||
res_fli_s[31] = NAN;
|
||||
}
|
||||
read_cycles();
|
||||
|
||||
// fli.d
|
||||
read_cycles();
|
||||
volatile double res_fli_d[32];
|
||||
for (int i = 0; i < N; i++) {
|
||||
res_fli_s[0] = -1.0f;
|
||||
res_fli_s[1] = -1.0f;
|
||||
res_fli_s[2] = 0x1p-16f;
|
||||
|
|
@ -192,6 +201,7 @@ int main() {
|
|||
res_fli_s[29] = 32.0f;
|
||||
res_fli_s[30] = INFINITY;
|
||||
res_fli_s[31] = NAN;
|
||||
}
|
||||
read_cycles();
|
||||
|
||||
// fcvtmod.w.d
|
||||
|
|
|
|||
|
|
@ -17,7 +17,9 @@ int main() {
|
|||
volatile double e;
|
||||
volatile _Float16 g;
|
||||
|
||||
volatile _Float16 a = 3.0f;
|
||||
volatile _Float16 a = 3.25f;
|
||||
|
||||
// fcvt.s.h
|
||||
|
||||
read_cycles();
|
||||
for (int i = 0; i < N; i++) {
|
||||
|
|
@ -27,7 +29,7 @@ int main() {
|
|||
|
||||
// fcvt.h.s
|
||||
|
||||
volatile float c = 3.0f;
|
||||
volatile float c = 3.25f;
|
||||
|
||||
read_cycles();
|
||||
for (int i = 0; i < N; i++) {
|
||||
|
|
@ -39,13 +41,13 @@ int main() {
|
|||
|
||||
read_cycles();
|
||||
for (int i = 0; i < N; i++) {
|
||||
double e = (double) a;
|
||||
e = (double) a;
|
||||
}
|
||||
read_cycles();
|
||||
|
||||
// fcvt.h.d
|
||||
|
||||
volatile double f = 3.0f;
|
||||
volatile double f = 3.25f;
|
||||
|
||||
read_cycles();
|
||||
for (int i = 0; i < N; i++) {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#include <stdint.h>
|
||||
|
||||
#define N 128
|
||||
#define ITERATIONS 10
|
||||
#define ITERATIONS 1
|
||||
|
||||
// Static "messy" data to ensure the branch predictor cannot "learn" the pattern
|
||||
static const uint64_t src_a[N] = {
|
||||
|
|
@ -31,38 +31,27 @@ static inline uint64_t read_cycles() {
|
|||
}
|
||||
|
||||
int main() {
|
||||
uint64_t start, end;
|
||||
|
||||
// --- Benchmark 1: Trivial czero.nez ---
|
||||
// Pattern: if (a != 0) return b else return 0
|
||||
start = read_cycles();
|
||||
read_cycles();
|
||||
for (int j = 0; j < ITERATIONS; j++) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
uint64_t a = src_a[i];
|
||||
uint64_t b = src_b[i];
|
||||
// GCC will use czero.eqz here to zero out b if a is 0
|
||||
|
||||
results[i] = (a != 0) ? b : 0;
|
||||
}
|
||||
}
|
||||
end = read_cycles();
|
||||
// Record (end - start) for Zicond enabled vs disabled
|
||||
read_cycles();
|
||||
|
||||
// --- Benchmark 2: Logic AND (czero with complex condition) ---
|
||||
// Pattern: if (a != 0 AND b > 500) return b else return 0
|
||||
start = read_cycles();
|
||||
read_cycles();
|
||||
for (int j = 0; j < ITERATIONS; j++) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
uint64_t a = src_a[i];
|
||||
uint64_t b = src_b[i];
|
||||
// Uses 'and' to combine conditions, then 'czero'
|
||||
if (a != 0 && b > 500) {
|
||||
results[i] = b;
|
||||
} else {
|
||||
results[i] = 0;
|
||||
|
||||
results[i] = (a != 0) ? b : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
end = read_cycles();
|
||||
read_cycles();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue