utcs-scea · chri2tine · Apr 3, 2024
diff --git a/src/linnos/data_gather/Makefile b/src/linnos/data_gather/Makefile
@@ -0,0 +1,21 @@
+# Makefile for running insert_mod.sh 10 times
+
+# Number of times to run the script
+NUM_RUNS := 1
+
+# Default target
+.PHONY: all
+all: run_script
+
+# Rule to run the script multiple times
+.PHONY: run_script
+run_script:
+	@for i in $$(seq 1 $(NUM_RUNS)); do \
+		./run_benchmark.sh; \
+    done
+
+# Clean rule
+.PHONY: clean
+clean:
+    # Add clean commands if needed
+
diff --git a/src/linnos/data_gather/parse-info.py b/src/linnos/data_gather/parse-info.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+
+import os
+
+# Initialize variables to store cumulative sums
+total_avg_read_latency = 0
+total_read_latency_p95 = 0
+total_read_latency_p99 = 0
+total_avg_write_latency = 0
+total_io_inter_arrival_time = 0
+
+# Count the number of entries
+count = 0
+
+# Change to user's directory
+os.chdir("/home/cyuan/LAKE/src/linnos/io_replayer")
+
+# Change to the specific policy's output file
+with open("baseline_random_cancel_mix.out", "r") as file:
+    lines = file.readlines()
+    for i in range(0, len(lines), 6):
+        count += 1
+        avg_read_latency = float(lines[i+1].split(":")[1].strip().split()[0])
+        read_latency_p95 = float(lines[i+2].split(":")[1].strip().split()[0])
+        read_latency_p99 = float(lines[i+3].split(":")[1].strip().split()[0])
+        avg_write_latency = float(lines[i+4].split(":")[1].strip().split()[0])
+        io_inter_arrival_time = float(lines[i+5].split(":")[1].strip().split()[0])
+
+        # Add to cumulative sums
+        total_avg_read_latency += avg_read_latency
+        total_read_latency_p95 += read_latency_p95
+        total_read_latency_p99 += read_latency_p99
+        total_avg_write_latency += avg_write_latency
+        total_io_inter_arrival_time += io_inter_arrival_time
+
+# Calculate averages
+average_avg_read_latency = total_avg_read_latency / count
+average_read_latency_p95 = total_read_latency_p95 / count
+average_read_latency_p99 = total_read_latency_p99 / count
+average_avg_write_latency = total_avg_write_latency / count
+average_io_inter_arrival_time = total_io_inter_arrival_time / count
+
+# Print results
+print("Average read latency:", average_avg_read_latency, "us")
+print("Average read latency p95:", average_read_latency_p95, "us")
+print("Average read latency p99:", average_read_latency_p99, "us")
+print("Average write latency:", average_avg_write_latency, "us")
+print("Average IO inter arrival time:", average_io_inter_arrival_time, "us")
diff --git a/src/linnos/data_gather/run_benchmark.sh b/src/linnos/data_gather/run_benchmark.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# enable, disable, make_then_disable, make_then_enable
+prep="disable"
+count=1
+benchmark="mix"
+# baseline, baseline_random_cancel, failover, failover_random_cancel
+policy="baseline_random_cancel" 
+outfile="${policy}_${benchmark}.out"
+
+if [ "$prep" = "disable" ]; then
+    sudo ./disable_linnos.sh
+elif [ "$prep" = "make_then_disable" ]; then
+    cd ..
+    sudo make
+    cd kernel_hook
+    echo "building kernel_hook main"
+    sudo make clean all
+    sudo ./disable_linnos.sh
+elif [ "$prep" = "enable" ]; then
+    sudo ./disable_linnos.sh
+    sudo ./enable_linnos_gpu.sh
+elif [ "$prep" = "make_then_enable" ]; then
+    cd ..
+    sudo make
+    cd kernel_hook
+    echo "building kernel_hook main"
+    sudo make clean all
+    sudo ./disable_linnos.sh
+    sudo ./enable_linnos_gpu.sh
+fi
+
+# remake io_replayer
+cd ../io_replayer
+sudo make
+
+if [ "$benchmark" != "mix" ]; then
+    echo "first"
+    # for all benchmarks that are not mix
+    for i in $(seq $count); do 
+        echo "Run $i"
+        ./run_3ssds.sh ${policy} ../trace_tools/${benchmark}/${benchmark}1.trace \ ../trace_tools/${benchmark}/${benchmark}1.trace \ ../trace_tools/${benchmark}/${benchmark}.trace
+        cd ../io_replayer
+        python3 stats.py 3ssds_${policy}.data>>${outfile}
+        sudo rm 3ssds_${policy}.data
+    done
+else 
+    echo "second"
+    for i in $(seq $count); do 
+        echo "Run $i"
+        ./run_3ssds.sh ${policy} ../trace_tools/azure/azure1.trace \ ../trace_tools/bing_i/bing_i1.trace \ ../trace_tools/cosmos/cosmos1.trace
+        cd ../io_replayer
+        python3 stats.py 3ssds_${policy}.data>>${outfile}
+        sudo rm 3ssds_${policy}.data
+    done
+fi
diff --git a/src/linnos/io_replayer/src/op_replayers.cpp b/src/linnos/io_replayer/src/op_replayers.cpp
@@ -18,17 +18,22 @@
  */
 
 
-#include <errno.h>
+#include <ctime>
+#include <iostream>
+#include <cinttypes>
 #include "op_replayers.hpp"
-
-// static int64_t get_next_multiple(uint64_t A, uint64_t B) {
-//     if (A % B)
-//         A = A + (B - A % B);
-//     return A;
-// }
+#include <errno.h>
+#include <random>
+#include <cstdlib>
 
 #define MAX_FAIL 2
 
+std::random_device rd;
+std::mt19937 gen(rd());
+std::mt19937 gen2(rd());
+std::uniform_int_distribution<> distribution2(1, 2);
+std::uniform_int_distribution<> distribution(1, 100); // Example range from 1 to 100
+
 static int sleep_until(uint64_t next) {
     uint64_t now = get_ns_ts();
     int64_t diff = next - now;
@@ -51,7 +56,49 @@ void baseline_execute_op(TraceOp &trace_op, Trace *trace, uint32_t device, char*
     if(trace_op.op == 0) {
         trace->add_io_count(device);
         ret = pread(fds[device], buf, trace_op.size, trace_op.offset);
-    } else if(trace_op.op == 1) {
+    } 
+    else if(trace_op.op == 1) {
+        trace->add_io_count(device);
+        ret = pwrite(fds[device], buf, trace_op.size, trace_op.offset);
+    } else {
+        printf("Wrong OP code! %d\n", trace_op.op);
+    }
+
+    if (ret < 0){
+        printf("FAILURE ret: %d\n", ret);
+        printf("IO error during pread/pwrite: %s\n", strerror(errno));
+        printf("err %d\n", errno);
+        printf("offset in B : %lu\n", trace_op.offset );
+        printf("size in B : %lu\n", trace_op.size);
+    }
+}
+
+/*
+    This method randomly cancels the request for LAKE, as specified by the rand_int generated. After selecting
+    whether or not to cancel, the device that the request will be assigned to will also be randomly generated/selected.
+    This gives us data on the accuracy of the policy when collecting different percentages of cancellation.
+*/
+void baseline_random_cancel_execute_op(TraceOp &trace_op, Trace *trace, uint32_t device, char* buf) {
+    int ret;
+    int *fds = trace->get_fds();
+    //read
+    if(trace_op.op == 0) {
+        baseline_total_count++;
+        std::srand(static_cast<unsigned int>(std::time(nullptr)));
+        int rand_int = distribution(gen); // Generate a random number
+        if (1 <= rand_int && rand_int <= 5) { // Decides whether ot not to cancel, change the 5 to 10 or 50 to edit percentages
+            baseline_random_cancel_count++;
+            int random = distribution2(gen2); // Randomly picks a device to send request to
+            if (random == 1) {
+                device = (--device + 3) % 3;
+            } else {
+                device = ++device % 3;
+            }
+        }
+        trace->add_io_count(device);
+        ret = pread(fds[device], buf, trace_op.size, trace_op.offset);
+    } 
+    else if(trace_op.op == 1) {
         trace->add_io_count(device);
         ret = pwrite(fds[device], buf, trace_op.size, trace_op.offset);
     } else {
@@ -86,7 +133,6 @@ void failover_execute_op(TraceOp &trace_op, Trace *trace, uint32_t device, char*
                 trace->add_fail(device);
             }
             if (!success) {
-                //printf("IO never finished..\n");
                 trace->add_unique_fail(device);
                 pread(fds[device], buf, trace_op.size, 0); //this is what linnos does
             }
@@ -97,33 +143,64 @@ void failover_execute_op(TraceOp &trace_op, Trace *trace, uint32_t device, char*
     } else {
         printf("Wrong OP code! %d\n", trace_op.op);
     }
+}
+
+/*
+    This method randomly cancels the request for LAKE, as specified by the rand_int generated. After selecting
+    whether or not to cancel, the device that the request will be assigned to will also be randomly generated/selected.
+    This gives us data on the accuracy of the policy when collecting different percentages of cancellation.
+*/
+void failover_random_cancel_execute_op(TraceOp &trace_op, Trace *trace, uint32_t device, char* buf) {
+    int ret, i;
+    int *fds = trace->get_fds();
+    bool success = false;
+    bool random_cancel;
 
-    // int ret, i;
-    // int *fds = trace->get_fds();
-    // bool success = false;
-    // //read
-    // if(trace_op.op == 0) {
-    //     for (i = 0 ; i < MAX_FAIL ; i++) {
-    //         trace->add_io_count((device+i)%2);
-    //         ret = pread(fds[(device+i)%2], buf, trace_op.size, trace_op.offset);
-    //         if (ret > 0) {
-    //             success = true;
-    //             break;
-    //         }
-    //         trace->add_fail(device);
-    //     }
-    //     //max fail.. it looped around, linnos never handled this case
-    //     if (!success) {
-    //         //printf("IO never finished..\n");
-    //         trace->add_unique_fail(device);
-    //         pread(fds[device], buf, trace_op.size, 0); //this is what linnos does
-    //     }
-    // } else if(trace_op.op == 1) {
-    //     trace->add_io_count(device);
-    //     ret = pwrite(fds[device], buf, trace_op.size, trace_op.offset);
-    // } else {
-    //     printf("Wrong OP code! %d\n", trace_op.op);
-    // }
+    //read
+    if(trace_op.op == 0) {
+        total_count++;
+        trace->add_io_count(device);
+        ret = pread(fds[device], buf, trace_op.size, trace_op.offset);
+        if (ret < 0) {
+            trace->add_fail(device);
+            for (i = 0 ; i < MAX_FAIL ; i++) {
+                std::srand(static_cast<unsigned int>(std::time(nullptr)));
+                int rand_int = distribution(gen); // Generate a random number
+                if (1 <= rand_int && rand_int <= 5) { // Decides whether ot not to cancel, change the 5 to 10 or 50 to edit percentages
+                    random_cancel_count++;
+                    random_cancel = true;
+                    device = (--device + 3) % 3;
+                } else {
+                    random_cancel = false;
+                    device = ++device % 3;
+                }
+                trace->add_io_count(device);
+                ret = pread(fds[device], buf, trace_op.size, trace_op.offset);
+                if (random_cancel == true) {
+                    if (ret < 0) {
+                        success = false;
+                    } else {
+                        success = true;
+                    }
+                    break;
+                }
+                if (ret > 0) {
+                    success = true;
+                    break;
+                }
+                trace->add_fail(device);
+            }
+            if (!success) {
+                trace->add_unique_fail(device);
+                pread(fds[device], buf, trace_op.size, 0); //this is what linnos does
+            }
+        }
+    } else if(trace_op.op == 1) {
+        trace->add_io_count(device);
+        ret = pwrite(fds[device], buf, trace_op.size, trace_op.offset);
+    } else {
+        printf("Wrong OP code! %d\n", trace_op.op);
+    }
 }
 
 void strawman_execute_op(TraceOp &trace_op, Trace *trace, uint32_t device, char* buf) {
@@ -194,6 +271,7 @@ void* replayer_fn(void* arg) {
     //start together
     pthread_barrier_wait(targ->sync_barrier);
     int is_late;
+    //add variable to see if ml_enabled has changed
     while (1) {
         trace_op = trace->get_line(device);
         if (trace_op.timestamp == -1) {
@@ -212,6 +290,8 @@ void* replayer_fn(void* arg) {
         uint32_t elaps =  std::chrono::duration_cast<std::chrono::microseconds>(end - begin).count();
         uint64_t end_ts = get_ns_ts();
 
+        //print out whether it's changed or not within the loop
+
         //store results
         trace->write_output_line(end_ts/1000, elaps, trace_op.op,
                 trace_op.size, trace_op.offset, submission/1000,
@@ -220,3 +300,5 @@ void* replayer_fn(void* arg) {
     free(buf);
     return 0;
 }
+
+
diff --git a/src/linnos/io_replayer/src/op_replayers.hpp b/src/linnos/io_replayer/src/op_replayers.hpp
@@ -26,22 +26,30 @@
 #include <thread>
 #include "replayer.hpp"
 
+// added 
+#include <mutex>
+
 struct Thread_arg {
     Trace *trace;
     uint32_t device;
     pthread_barrier_t *sync_barrier;
     uint64_t start_ts;
 
     void (*executor)(TraceOp &trace_op, Trace *trace, uint32_t device, char* buf);
+    //int percent
 };
 
-
 void baseline_execute_op(TraceOp &trace_op, Trace *trace, uint32_t device, char* buf);
+void baseline_random_cancel_execute_op(TraceOp &trace_op, Trace *trace, uint32_t device, char* buf);
 void strawman_execute_op(TraceOp &trace_op, Trace *trace, uint32_t device, char* buf);
 void failover_execute_op(TraceOp &trace_op, Trace *trace, uint32_t device, char* buf);
+void failover_random_cancel_execute_op(TraceOp &trace_op, Trace *trace, uint32_t device, char* buf);
 void strawman_2ssds_execute_op(TraceOp &trace_op, Trace *trace, uint32_t device, char* buf);
 
 void* replayer_fn(void* arg);
-
+static uint64_t random_cancel_count = 0;
+static uint64_t total_count = 0;
+static uint64_t baseline_random_cancel_count = 0;
+static uint64_t baseline_total_count = 0;
 
 #endif