vul337 · edmcman · Dec 11, 2025 · Copilot · Dec 15, 2025 · Copilot
diff --git a/compile_ossfuzz.py b/compile_ossfuzz.py
@@ -1,4 +1,5 @@
 import argparse
+import functools
 import os
 import pathlib
 import re
@@ -168,7 +169,7 @@ def process_project_linearly(project_path):
 OUTPUT_BINARY_PATH = OUTPUT_PATH / "binary"
 OUTPUT_BINARY_PATH.mkdir(exist_ok=True, parents=True)
 
-extra_flags = ' '.join([
+extra_flags = [
     "-mno-sse",
     "-fno-eliminate-unused-debug-types",
     "-fno-lto",
@@ -177,7 +178,7 @@ def process_project_linearly(project_path):
     # "-fno-inline-functions-called-once",  # not supported in clang
     "-fno-inline",
     # "-fno-reorder-blocks-and-partition",  # not supported in clang
-])
+]
 
 
 def compile(row, container: DockerContainer):
@@ -197,12 +198,11 @@ def compile(row, container: DockerContainer):
                 f.write(func)
 
             output_file = OUTPUT_BINARY_PATH / f'task-{idx}-{opt}.so'
-            output_file_indocker = pathlib.Path(
-                '/challenges') / f'task-{idx}-{opt}.so'
+            output_file_indocker = pathlib.Path('/challenges/binary') / f'task-{idx}-{opt}.so'
             cmd = ['clang', filepath, f'-{opt}', '-shared', '-fPIC',
-                   '-o', output_file_indocker, extra_flags, '-lm']
-            container.exec_in_container(
-                cmd, cwd='/challenges', shell=True, check=True)
+                   '-o', str(output_file_indocker)] + extra_flags + ['-lm']
+            out = container.exec_in_container(
-            out = container.exec_in_container(
+            container.exec_in_container(
-            out = container.exec_in_container(
+            container.exec_in_container(
+                cmd, cwd='/challenges', shell=False, check=True, capture_output=True)
 
             ret = subprocess.run(
                 f'nm {output_file} | egrep " {function_name}$"', stdout=subprocess.PIPE, shell=True, check=True)
@@ -224,10 +224,10 @@ def compile(row, container: DockerContainer):
     return challenge
 
 
-def tqdm_progress_map(func, iterable, num_workers, container):
+def tqdm_progress_map(func, iterable, num_workers):
     results = []
     with Pool(num_workers) as pool:
-        for result in tqdm(pool.imap_unordered(func, iterable, container), total=len(iterable)):
+        for result in tqdm(pool.imap_unordered(func, iterable), total=len(iterable)):
             results.append(result)
     return results
 
@@ -236,7 +236,7 @@ def tqdm_progress_map(func, iterable, num_workers, container):
     f'{OUTPUT_PATH}': '/challenges',
     '/dev/shm': '/dev/shm'
 }) as container:
-    res = tqdm_progress_map(compile, ds, args.num_workers, container)
+    res = tqdm_progress_map(functools.partial(compile, container=container), ds, args.num_workers)
 res = list(chain(*res))
 ds = datasets.Dataset.from_list(res)
 print(len(ds))

diff --git a/evaluate_rsr.py b/evaluate_rsr.py
@@ -19,32 +19,19 @@
 
 repo_path = pathlib.Path(__file__).resolve().parent
 
-parser = argparse.ArgumentParser()
-parser.add_argument('--config', type=str, default="./config.yaml",
-                    help='Path to the configuration file')
-parser.add_argument("--decompiled-dataset", type=str)
-parser.add_argument("--decompilers", type=str, nargs='*',
-                    help="Decompilers to evaluate, leave empty to evaluate all decompilers specified in the config")
-args = parser.parse_args()
+oss_fuzz_path: pathlib.Path | None = None
+decompilers: Set[str] = set()
 
-with open(args.config, 'r') as f:
-    config = yaml.safe_load(f)
 
-oss_fuzz_path = pathlib.Path(config['oss_fuzz_path'])
-decompilers: Set[str] = set(config['decompilers'])
-
-if args.decompilers:
-    decompilers = decompilers.intersection(set(args.decompilers))
-
-ds_with_decompile_code = datasets.Dataset.load_from_disk(
-    args.decompiled_dataset)
-
-for col in ['include', 'opt']:
-    if col not in ds_with_decompile_code.column_names:
-        raise ValueError(f"Column {col} not found in the dataset, please make sure the dataset is a merged dataset")
-
-df = ds_with_decompile_code.to_pandas()
-assert isinstance(df, pd.DataFrame)
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--config', type=str, default="./config.yaml",
+                        help='Path to the configuration file')
+    parser.add_argument("--decompiled-dataset", type=str, required=True,
+                        help="Path to the merged decompiled dataset produced earlier")
+    parser.add_argument("--decompilers", type=str, nargs='*',
+                        help="Decompilers to evaluate, leave empty to evaluate all decompilers specified in the config")
+    return parser.parse_args()
 
 
 class DockerContainer:
@@ -329,28 +316,61 @@ def decompile_pass_rate(gen_results, compiler, num_workers, container):
     return ret
 
 
-for d in decompilers:
-    print(f'Decompiler: {d}')
+def main():
+    global oss_fuzz_path, decompilers
+
+    args = parse_args()
+
+    with open(args.config, 'r') as f:
+        config = yaml.safe_load(f)
+
+    oss_fuzz_path = pathlib.Path(config['oss_fuzz_path'])
+    decompilers = set(config['decompilers'])
+
+    if args.decompilers:
+        decompilers = decompilers.intersection(set(args.decompilers))
 
-    if d not in df.columns:
-        continue
+    if not args.decompiled_dataset:
+        raise ValueError(
+            "--decompiled-dataset is required. Please provide the path to the merged dataset.")
 
-    if not args.decompiled_dataset:
-        raise ValueError(
-            "--decompiled-dataset is required. Please provide the path to the merged dataset.")
-    if not args.decompiled_dataset:
-        raise ValueError(
-            "--decompiled-dataset is required. Please provide the path to the merged dataset.")
-    with DockerContainer('evaluate_in_docker', {
-        f'{oss_fuzz_path}/build/challenges': '/challenges',
-        f'{repo_path}/fix': '/fix'
-    }) as container:
-        eval_result_df = pd.DataFrame(
-            decompile_pass_rate(df, d, 64, container))
+    ds_with_decompile_code = datasets.Dataset.load_from_disk(
+        args.decompiled_dataset)
+
+    for col in ['include', 'opt']:
+        if col not in ds_with_decompile_code.column_names:
+            raise ValueError(
+                f"Column {col} not found in the dataset, please make sure the dataset is a merged dataset")
+
+    df = ds_with_decompile_code.to_pandas()
+    assert isinstance(df, pd.DataFrame)
+
+    for d in decompilers:
+        print(f'Decompiler: {d}')
+
+        if d not in df.columns:
+            continue
+
+        with DockerContainer('evaluate_in_docker', {
+            f'{oss_fuzz_path}/build/challenges': '/challenges',
+            f'{repo_path}/fix': '/fix'
+        }) as container:
+            eval_result_df = pd.DataFrame(
+                decompile_pass_rate(df, d, 64, container))
+
+        for opt, per_opt_df in eval_result_df.groupby('opt'):
+            compile_rate = per_opt_df['flag_compile'].mean()
+
+            print(
+                f"Optimization {opt}: Compile Rate: {compile_rate:.4f}")
+        print('-' * 30)
 
-    for opt, per_opt_df in eval_result_df.groupby('opt'):
-        compile_rate = per_opt_df['flag_compile'].mean()
+    rm_docker_cmd = "docker rm -f evaluate_in_docker"
+    result = subprocess.run(rm_docker_cmd, shell=True,
+                            capture_output=True, text=True)
+    if result.returncode == 0:
+        print("Container evaluate_in_docker removed successfully")
 
-        print(
-            f"Optimization {opt}: Compile Rate: {compile_rate:.4f}")
-    print('-' * 30)
 
-rm_docker_cmd = "docker rm -f evaluate_in_docker"
-result = subprocess.run(rm_docker_cmd, shell=True,
-                        capture_output=True, text=True)
-if result.returncode == 0:
-    print("Container evaluate_in_docker removed successfully")
+if __name__ == "__main__":
+    main()