diff --git a/zfa_micro/zfa.c b/zfa_micro/zfa.c
new file mode 100644
index 0000000..d513c04
--- /dev/null
+++ b/zfa_micro/zfa.c
@@ -0,0 +1,199 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <math.h>
+
+#define N 10
+
+static inline uint64_t read_cycles() {
+  uint64_t start;
+  asm volatile ("rdcycle %0" : "=r"(start));
+  return start;
+}
+
+// Zfa constant table for Single Precision (fli.s)
+const float zfa_constants_s[32] = {
+  -1.0f,        -1.0f,      0x1p-16f,     0x1p-15f,     // 0 - 3
+  0x1p-14f,     0x1p-13f,     0x1p-12f,     0x1p-11f,     // 4 - 7
+  0x1p-10f,     0x1p-9f,      0x1p-8f,      0x1p-7f,      // 8 - 11
+  0x1p-6f,      0x1p-5f,      0x1p-4f,      0x1p-3f,      // 12 - 15
+  0.25f,        0.5f,         0.75f,        1.0f,         // 16 - 19
+  1.25f,        1.5f,         1.75f,        2.0f,         // 20 - 23
+  2.5f,         3.0f,         4.0f,         8.0f,         // 24 - 27
+  16.0f,        32.0f,        INFINITY,     NAN           // 28 - 31
+};
+
+// Zfa constant table for Double Precision (fli.d)
+const double zfa_constants_d[32] = {
+  -1.0,         -1.0f,      0x1p-16,      0x1p-15,      // 0 - 3
+  0x1p-14,      0x1p-13,      0x1p-12,      0x1p-11,      // 4 - 7
+  0x1p-10,      0x1p-9,       0x1p-8,       0x1p-7,       // 8 - 11
+  0x1p-6,       0x1p-5,       0x1p-4,       0x1p-3,       // 12 - 15
+  0.25,         0.5,          0.75,         1.0,          // 16 - 19
+  1.25,         1.5,          1.75,         2.0,          // 20 - 23
+  2.5,          3.0,          4.0,          8.0,          // 24 - 27
+  16.0,         32.0,         INFINITY,     NAN           // 28 - 31
+};
+
+int main() {
+  // fround.s
+  volatile float a = 3.25f;
+  volatile float b = round(a);
+
+  // fround.d
+  volatile double c = 3.25f;
+  volatile double d = round(c);
+  int res;
+
+  // fleq.s
+
+  #ifndef ZFA
+    asm volatile (
+                    "fclass.s t0, %1\n\t"     // Classify a
+                    "fclass.s t1, %2\n\t"     // Classify b
+                    "or       t0, t0, t1\n\t" // Combine classes
+                    "andi     t2, t0, 0x200\n\t" // 0x200 is the mask for Quiet NaN
+                    "bnez     t2, 1f\n\t"     // If qNaN is present, skip to return 0
+                    "fle.s    %0, %1, %2\n\t" // Safe to use signaling comparison
+                    "j        2f\n\t"
+                    "1:\n\t"
+                    "li       %0, 0\n\t"      // Result is false for NaNs
+                    "2:\n\t"
+                    : "=r" (res)
+                    : "f" (a), "f" (b)
+                    : "t0", "t1", "t2"
+                    );
+
+  #else
+    asm volatile("fleq.s t0, ft0, ft1");
+  #endif
+
+  // fleq.d
+  #ifndef ZFA
+    asm volatile (
+                    "fclass.d t0, %1\n\t"        // Classify double a
+                    "fclass.d t1, %2\n\t"        // Classify double b
+                    "or       t0, t0, t1\n\t"    // Combine classification masks
+                    "andi     t2, t0, 0x200\n\t" // 0x200 is the bit for Quiet NaN (qNaN)
+                    "bnez     t2, 1f\n\t"        // If a qNaN is detected, skip to return 0
+                    "fle.d    %0, %1, %2\n\t"    // Signaling comparison: signals on sNaN, result in %0
+                    "j        2f\n\t"
+                    "1:\n\t"
+                    "li       %0, 0\n\t"         // Quietly return 0 (false) for qNaNs
+                    "2:\n\t"
+                    : "=r" (res)
+                    : "f" (a), "f" (b)
+                    : "t0", "t1", "t2"
+                    );
+
+  #else
+    asm volatile ("fleq.d t0, ft0, ft1");
+  #endif
+
+  // fminm.s
+  float a_fmin = 0.0f, b_fmin = -0.0f;
+  float res_fmin;
+
+  #ifndef ZFA
+    asm volatile (
+                    "fclass.s t0, %1\n\t"      // Classify a
+                    "fclass.s t1, %2\n\t"      // Classify b
+                    "li       t2, 0x300\n\t"   // Mask for any NaN (0x100 sNaN | 0x200 qNaN)
+                    "and      t3, t0, t2\n\t"  // t3 = is_nan(a)
+                    "and      t4, t1, t2\n\t"  // t4 = is_nan(b)
+                    "bnez     t3, 1f\n\t"      // If a is NaN, jump to handle it
+                    "bnez     t4, 2f\n\t"      // If b is NaN, jump to handle it
+                    "fmin.s   %0, %1, %2\n\t"  // Neither is NaN, use standard min
+                    "j        3f\n\t"
+                    "1:\n\t"                   // Case: a is NaN
+                    "bnez     t4, 4f\n\t"      // If b is also NaN, jump to both-NaN case
+                    "fmv.s    %0, %2\n\t"      // a is NaN, b is number -> return b
+                    "j        3f\n\t"
+                    "2:\n\t"                   // Case: b is NaN, a is number -> return a
+                    "fmv.s    %0, %1\n\t"
+                    "j        3f\n\t"
+                    "4:\n\t"                   // Case: Both are NaNs
+                    "fmin.s   %0, %1, %2\n\t"  // Standard min handles both-NaNs correctly
+                    "3:\n\t"
+                    : "=f" (res_fmin)
+                    : "f" (a_fmin), "f" (b_fmin)
+                    : "t0", "t1", "t2", "t3", "t4"
+                    );
+  #else
+    asm volatile ("fminm.s ft0, ft1, ft2");
+  #endif
+  // fli.s
+
+  read_cycles();
+  volatile float res_fli_s[32];
+  res_fli_s[0]  = -1.0f;
+  res_fli_s[1]  = -1.0f;
+  res_fli_s[2]  = 0x1p-16f;
+  res_fli_s[3]  = 0x1p-15f;
+  res_fli_s[4]  = 0x1p-14f;
+  res_fli_s[5]  = 0x1p-13f;
+  res_fli_s[6]  = 0x1p-12f;
+  res_fli_s[7]  = 0x1p-11f;
+  res_fli_s[8]  = 0x1p-10f;
+  res_fli_s[9]  = 0x1p-9f;
+  res_fli_s[10] = 0x1p-8f;
+  res_fli_s[11] = 0x1p-7f;
+  res_fli_s[12] = 0x1p-6f;
+  res_fli_s[13] = 0x1p-5f;
+  res_fli_s[14] = 0x1p-4f;
+  res_fli_s[15] = 0x1p-3f;
+  res_fli_s[16] = 0.25f;
+  res_fli_s[17] = 0.5f;
+  res_fli_s[18] = 0.75f;
+  res_fli_s[19] = 1.0f;
+  res_fli_s[20] = 1.25f;
+  res_fli_s[21] = 1.5f;
+  res_fli_s[22] = 1.75f;
+  res_fli_s[23] = 2.0f;
+  res_fli_s[24] = 2.5f;
+  res_fli_s[25] = 3.0f;
+  res_fli_s[26] = 4.0f;
+  res_fli_s[27] = 8.0f;
+  res_fli_s[28] = 16.0f;
+  res_fli_s[29] = 32.0f;
+  res_fli_s[30] = INFINITY;
+  res_fli_s[31] = NAN;
+
+  // fli.d
+  volatile double res_fli_d[32];
+  res_fli_s[0]  = -1.0f;
+  res_fli_s[1]  = -1.0f;
+  res_fli_s[2]  = 0x1p-16f;
+  res_fli_s[3]  = 0x1p-15f;
+  res_fli_s[4]  = 0x1p-14f;
+  res_fli_s[5]  = 0x1p-13f;
+  res_fli_s[6]  = 0x1p-12f;
+  res_fli_s[7]  = 0x1p-11f;
+  res_fli_s[8]  = 0x1p-10f;
+  res_fli_s[9]  = 0x1p-9f;
+  res_fli_s[10] = 0x1p-8f;
+  res_fli_s[11] = 0x1p-7f;
+  res_fli_s[12] = 0x1p-6f;
+  res_fli_s[13] = 0x1p-5f;
+  res_fli_s[14] = 0x1p-4f;
+  res_fli_s[15] = 0x1p-3f;
+  res_fli_s[16] = 0.25f;
+  res_fli_s[17] = 0.5f;
+  res_fli_s[18] = 0.75f;
+  res_fli_s[19] = 1.0f;
+  res_fli_s[20] = 1.25f;
+  res_fli_s[21] = 1.5f;
+  res_fli_s[22] = 1.75f;
+  res_fli_s[23] = 2.0f;
+  res_fli_s[24] = 2.5f;
+  res_fli_s[25] = 3.0f;
+  res_fli_s[26] = 4.0f;
+  res_fli_s[27] = 8.0f;
+  res_fli_s[28] = 16.0f;
+  res_fli_s[29] = 32.0f;
+  res_fli_s[30] = INFINITY;
+  res_fli_s[31] = NAN;
+  read_cycles();
+
+  // fcvtmod.w.d
+
+}
diff --git a/zicond_micro/zicond.c b/zicond_micro/zicond.c
new file mode 100644
index 0000000..b73e24d
--- /dev/null
+++ b/zicond_micro/zicond.c
@@ -0,0 +1,68 @@
+#include <stdint.h>
+
+#define N 128
+#define ITERATIONS 10
+
+// Static "messy" data to ensure the branch predictor cannot "learn" the pattern
+static const uint64_t src_a[N] = {
+    0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1,
+    1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1,
+    1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1
+};
+
+static const uint64_t src_b[N] = {
+    10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160,
+    170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 320,
+    330, 340, 350, 360, 370, 380, 390, 400, 410, 420, 430, 440, 450, 460, 470, 480,
+    490, 500, 510, 520, 530, 540, 550, 560, 570, 580, 590, 600, 610, 620, 630, 640,
+    650, 660, 670, 680, 690, 700, 710, 720, 730, 740, 750, 760, 770, 780, 790, 800,
+    810, 820, 830, 840, 850, 860, 870, 880, 890, 900, 910, 920, 930, 940, 950, 960,
+    970, 980, 990, 1000, 1010, 1020, 1030, 1040, 1050, 1060, 1070, 1080, 1090, 1100, 1110, 1120,
+    1130, 1140, 1150, 1160, 1170, 1180, 1190, 1200, 1210, 1220, 1230, 1240, 1250, 1260, 1270, 1280
+};
+
+volatile uint64_t results[N];
+
+static inline uint64_t read_cycles() {
+    uint64_t val;
+    asm volatile ("rdcycle %0" : "=r"(val));
+    return val;
+}
+
+int main() {
+    uint64_t start, end;
+
+    // --- Benchmark 1: Trivial czero.nez ---
+    // Pattern: if (a != 0) return b else return 0
+    start = read_cycles();
+    for (int j = 0; j < ITERATIONS; j++) {
+        for (int i = 0; i < N; i++) {
+            uint64_t a = src_a[i];
+            uint64_t b = src_b[i];
+            // GCC will use czero.eqz here to zero out b if a is 0
+            results[i] = (a != 0) ? b : 0;
+        }
+    }
+    end = read_cycles();
+    // Record (end - start) for Zicond enabled vs disabled
+
+    // --- Benchmark 2: Logic AND (czero with complex condition) ---
+    // Pattern: if (a != 0 AND b > 500) return b else return 0
+    start = read_cycles();
+    for (int j = 0; j < ITERATIONS; j++) {
+        for (int i = 0; i < N; i++) {
+            uint64_t a = src_a[i];
+            uint64_t b = src_b[i];
+            // Uses 'and' to combine conditions, then 'czero'
+            if (a != 0 && b > 500) {
+                results[i] = b;
+            } else {
+                results[i] = 0;
+            }
+        }
+    }
+    end = read_cycles();
+
+    return 0;
+}
diff --git a/zvfhmin_micro/zvfhmin.c b/zvfhmin_micro/zvfhmin.c
new file mode 100644
index 0000000..b27543c
--- /dev/null
+++ b/zvfhmin_micro/zvfhmin.c
@@ -0,0 +1,39 @@
+#include <math.h>
+#include <stdint.h>
+#include <riscv_vector.h>
+
+#include <stdint.h>
+
+#define N 32
+
+// Use 'aligned' to help the autovectorizer
+_Float16 a[N] __attribute__((aligned(16)));
+float b[N] __attribute__((aligned(16)));
+
+static inline uint64_t read_cycles() {
+    uint64_t start;
+    asm volatile ("rdcycle %0" : "=r"(start));
+    return start;
+}
+
+void benchmark() {
+    // 1. Widening: _Float16 -> float
+    uint64_t t0 = read_cycles();
+    for (int i = 0; i < N; i++) {
+        b[i] = (float)a[i];
+    }
+    uint64_t t1 = read_cycles();
+
+    // 2. Narrowing: float -> _Float16
+    uint64_t t2 = read_cycles();
+    for (int i = 0; i < N; i++) {
+        a[i] = (_Float16)b[i];
+    }
+    uint64_t t3 = read_cycles();
+
+    // In a real app, print (t1-t0) and (t3-t2)
+}
+
+int main() {
+  benchmark();
+}