diff --git a/compile_ossfuzz.py b/compile_ossfuzz.py index 438f617..b98f352 100644 --- a/compile_ossfuzz.py +++ b/compile_ossfuzz.py @@ -1,4 +1,5 @@ import argparse +import functools import os import pathlib import re @@ -168,7 +169,7 @@ def process_project_linearly(project_path): OUTPUT_BINARY_PATH = OUTPUT_PATH / "binary" OUTPUT_BINARY_PATH.mkdir(exist_ok=True, parents=True) -extra_flags = ' '.join([ +extra_flags = [ "-mno-sse", "-fno-eliminate-unused-debug-types", "-fno-lto", @@ -177,7 +178,7 @@ def process_project_linearly(project_path): # "-fno-inline-functions-called-once", # not supported in clang "-fno-inline", # "-fno-reorder-blocks-and-partition", # not supported in clang -]) +] def compile(row, container: DockerContainer): @@ -197,12 +198,11 @@ def compile(row, container: DockerContainer): f.write(func) output_file = OUTPUT_BINARY_PATH / f'task-{idx}-{opt}.so' - output_file_indocker = pathlib.Path( - '/challenges') / f'task-{idx}-{opt}.so' + output_file_indocker = pathlib.Path('/challenges/binary') / f'task-{idx}-{opt}.so' cmd = ['clang', filepath, f'-{opt}', '-shared', '-fPIC', - '-o', output_file_indocker, extra_flags, '-lm'] - container.exec_in_container( - cmd, cwd='/challenges', shell=True, check=True) + '-o', str(output_file_indocker)] + extra_flags + ['-lm'] + out = container.exec_in_container( + cmd, cwd='/challenges', shell=False, check=True, capture_output=True) ret = subprocess.run( f'nm {output_file} | egrep " {function_name}$"', stdout=subprocess.PIPE, shell=True, check=True) @@ -224,10 +224,10 @@ def compile(row, container: DockerContainer): return challenge -def tqdm_progress_map(func, iterable, num_workers, container): +def tqdm_progress_map(func, iterable, num_workers): results = [] with Pool(num_workers) as pool: - for result in tqdm(pool.imap_unordered(func, iterable, container), total=len(iterable)): + for result in tqdm(pool.imap_unordered(func, iterable), total=len(iterable)): results.append(result) return results @@ -236,7 +236,7 @@ def tqdm_progress_map(func, iterable, num_workers, container): f'{OUTPUT_PATH}': '/challenges', '/dev/shm': '/dev/shm' }) as container: - res = tqdm_progress_map(compile, ds, args.num_workers, container) + res = tqdm_progress_map(functools.partial(compile, container=container), ds, args.num_workers) res = list(chain(*res)) ds = datasets.Dataset.from_list(res) print(len(ds)) diff --git a/evaluate_rsr.py b/evaluate_rsr.py index 7a9ec3c..e071913 100644 --- a/evaluate_rsr.py +++ b/evaluate_rsr.py @@ -19,32 +19,19 @@ repo_path = pathlib.Path(__file__).resolve().parent -parser = argparse.ArgumentParser() -parser.add_argument('--config', type=str, default="./config.yaml", - help='Path to the configuration file') -parser.add_argument("--decompiled-dataset", type=str) -parser.add_argument("--decompilers", type=str, nargs='*', - help="Decompilers to evaluate, leave empty to evaluate all decompilers specified in the config") -args = parser.parse_args() +oss_fuzz_path: pathlib.Path | None = None +decompilers: Set[str] = set() -with open(args.config, 'r') as f: - config = yaml.safe_load(f) -oss_fuzz_path = pathlib.Path(config['oss_fuzz_path']) -decompilers: Set[str] = set(config['decompilers']) - -if args.decompilers: - decompilers = decompilers.intersection(set(args.decompilers)) - -ds_with_decompile_code = datasets.Dataset.load_from_disk( - args.decompiled_dataset) - -for col in ['include', 'opt']: - if col not in ds_with_decompile_code.column_names: - raise ValueError(f"Column {col} not found in the dataset, please make sure the dataset is a merged dataset") - -df = ds_with_decompile_code.to_pandas() -assert isinstance(df, pd.DataFrame) +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('--config', type=str, default="./config.yaml", + help='Path to the configuration file') + parser.add_argument("--decompiled-dataset", type=str, required=True, + help="Path to the merged decompiled dataset produced earlier") + parser.add_argument("--decompilers", type=str, nargs='*', + help="Decompilers to evaluate, leave empty to evaluate all decompilers specified in the config") + return parser.parse_args() class DockerContainer: @@ -329,28 +316,61 @@ def decompile_pass_rate(gen_results, compiler, num_workers, container): return ret -for d in decompilers: - print(f'Decompiler: {d}') +def main(): + global oss_fuzz_path, decompilers + + args = parse_args() + + with open(args.config, 'r') as f: + config = yaml.safe_load(f) + + oss_fuzz_path = pathlib.Path(config['oss_fuzz_path']) + decompilers = set(config['decompilers']) + + if args.decompilers: + decompilers = decompilers.intersection(set(args.decompilers)) - if d not in df.columns: - continue + if not args.decompiled_dataset: + raise ValueError( + "--decompiled-dataset is required. Please provide the path to the merged dataset.") - with DockerContainer('evaluate_in_docker', { - f'{oss_fuzz_path}/build/challenges': '/challenges', - f'{repo_path}/fix': '/fix' - }) as container: - eval_result_df = pd.DataFrame( - decompile_pass_rate(df, d, 64, container)) + ds_with_decompile_code = datasets.Dataset.load_from_disk( + args.decompiled_dataset) + + for col in ['include', 'opt']: + if col not in ds_with_decompile_code.column_names: + raise ValueError( + f"Column {col} not found in the dataset, please make sure the dataset is a merged dataset") + + df = ds_with_decompile_code.to_pandas() + assert isinstance(df, pd.DataFrame) + + for d in decompilers: + print(f'Decompiler: {d}') + + if d not in df.columns: + continue + + with DockerContainer('evaluate_in_docker', { + f'{oss_fuzz_path}/build/challenges': '/challenges', + f'{repo_path}/fix': '/fix' + }) as container: + eval_result_df = pd.DataFrame( + decompile_pass_rate(df, d, 64, container)) + + for opt, per_opt_df in eval_result_df.groupby('opt'): + compile_rate = per_opt_df['flag_compile'].mean() + + print( + f"Optimization {opt}: Compile Rate: {compile_rate:.4f}") + print('-' * 30) - for opt, per_opt_df in eval_result_df.groupby('opt'): - compile_rate = per_opt_df['flag_compile'].mean() + rm_docker_cmd = "docker rm -f evaluate_in_docker" + result = subprocess.run(rm_docker_cmd, shell=True, + capture_output=True, text=True) + if result.returncode == 0: + print("Container evaluate_in_docker removed successfully") - print( - f"Optimization {opt}: Compile Rate: {compile_rate:.4f}") - print('-' * 30) -rm_docker_cmd = "docker rm -f evaluate_in_docker" -result = subprocess.run(rm_docker_cmd, shell=True, - capture_output=True, text=True) -if result.returncode == 0: - print("Container evaluate_in_docker removed successfully") +if __name__ == "__main__": + main()