TFG-benchmarks/extract_data.py

88 lines
2.7 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
import argparse
import re
import pandas as pd
def parse_cycle_deltas(file_path):
cycle_values = []
csrr_pattern = re.compile(r"csrr\s+\w+,\s+cycle")
value_pattern = re.compile(r"0x[0-9a-fA-F]+$")
with open(file_path, 'r') as f:
lines = f.readlines()
i = 0
while i < len(lines):
line = lines[i].strip()
if csrr_pattern.search(line):
if i + 1 < len(lines):
next_line = lines[i+1].strip()
match = value_pattern.search(next_line)
if match:
val = int(match.group(), 16)
cycle_values.append(val)
i += 1
else:
print(f"Warning: Found csrr at line {i} but couldn't find value on line {i+1}")
i += 1
if len(cycle_values) % 2 != 0:
raise ValueError(
f"Hanging CSRR detected! Found {len(cycle_values)} cycle reads. "
"Each 'start' must have a corresponding 'end'."
)
data = []
for j in range(0, len(cycle_values), 2):
start_val = cycle_values[j]
end_val = cycle_values[j+1]
delta = end_val - start_val
data.append({
'start_cycle': start_val,
'end_cycle': end_val,
'delta': delta
})
return pd.DataFrame(data)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Extract cycle deltas from trace files and calculate speedup')
parser.add_argument('with_file', help='Path to the trace file (with optimization)')
parser.add_argument('without_file', help='Path to the trace file (without optimization - baseline)')
args = parser.parse_args()
try:
df_with = parse_cycle_deltas(args.with_file)
df_without = parse_cycle_deltas(args.without_file)
if len(df_with) != len(df_without):
raise ValueError(
f"Mismatch in number of deltas: {args.with_file} has {len(df_with)} deltas, "
f"but {args.without_file} has {len(df_without)} deltas. Cannot pair them."
)
result = pd.DataFrame({
'start_cycle_with': df_with['start_cycle'],
'end_cycle_with': df_with['end_cycle'],
'delta_with': df_with['delta'],
'start_cycle_without': df_without['start_cycle'],
'end_cycle_without': df_without['end_cycle'],
'delta_without': df_without['delta'],
'speedup': df_without['delta'] / df_with['delta']
})
print("Cycle Delta Analysis:")
print(result)
result.to_parquet("result.parquet")
except FileNotFoundError as e:
print(f"Error: {e}")
except ValueError as e:
print(f"Error: {e}")