Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions compile_ossfuzz.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import functools
import os
import pathlib
import re
Expand Down Expand Up @@ -168,7 +169,7 @@ def process_project_linearly(project_path):
OUTPUT_BINARY_PATH = OUTPUT_PATH / "binary"
OUTPUT_BINARY_PATH.mkdir(exist_ok=True, parents=True)

extra_flags = ' '.join([
extra_flags = [
"-mno-sse",
"-fno-eliminate-unused-debug-types",
"-fno-lto",
Expand All @@ -177,7 +178,7 @@ def process_project_linearly(project_path):
# "-fno-inline-functions-called-once", # not supported in clang
"-fno-inline",
# "-fno-reorder-blocks-and-partition", # not supported in clang
])
]


def compile(row, container: DockerContainer):
Expand All @@ -197,12 +198,11 @@ def compile(row, container: DockerContainer):
f.write(func)

output_file = OUTPUT_BINARY_PATH / f'task-{idx}-{opt}.so'
output_file_indocker = pathlib.Path(
'/challenges') / f'task-{idx}-{opt}.so'
output_file_indocker = pathlib.Path('/challenges/binary') / f'task-{idx}-{opt}.so'
cmd = ['clang', filepath, f'-{opt}', '-shared', '-fPIC',
'-o', output_file_indocker, extra_flags, '-lm']
container.exec_in_container(
cmd, cwd='/challenges', shell=True, check=True)
'-o', str(output_file_indocker)] + extra_flags + ['-lm']
out = container.exec_in_container(
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable out is assigned but never used. If the intention is to capture and suppress the output from the Docker container execution, consider either using the result (e.g., for logging or debugging) or removing the assignment and just calling the function directly.

Suggested change
out = container.exec_in_container(
container.exec_in_container(

Copilot uses AI. Check for mistakes.
cmd, cwd='/challenges', shell=False, check=True, capture_output=True)

ret = subprocess.run(
f'nm {output_file} | egrep " {function_name}$"', stdout=subprocess.PIPE, shell=True, check=True)
Expand All @@ -224,10 +224,10 @@ def compile(row, container: DockerContainer):
return challenge


def tqdm_progress_map(func, iterable, num_workers, container):
def tqdm_progress_map(func, iterable, num_workers):
results = []
with Pool(num_workers) as pool:
for result in tqdm(pool.imap_unordered(func, iterable, container), total=len(iterable)):
for result in tqdm(pool.imap_unordered(func, iterable), total=len(iterable)):
results.append(result)
return results

Expand All @@ -236,7 +236,7 @@ def tqdm_progress_map(func, iterable, num_workers, container):
f'{OUTPUT_PATH}': '/challenges',
'/dev/shm': '/dev/shm'
}) as container:
res = tqdm_progress_map(compile, ds, args.num_workers, container)
res = tqdm_progress_map(functools.partial(compile, container=container), ds, args.num_workers)
res = list(chain(*res))
ds = datasets.Dataset.from_list(res)
print(len(ds))
Expand Down
108 changes: 64 additions & 44 deletions evaluate_rsr.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,32 +19,19 @@

repo_path = pathlib.Path(__file__).resolve().parent

parser = argparse.ArgumentParser()
parser.add_argument('--config', type=str, default="./config.yaml",
help='Path to the configuration file')
parser.add_argument("--decompiled-dataset", type=str)
parser.add_argument("--decompilers", type=str, nargs='*',
help="Decompilers to evaluate, leave empty to evaluate all decompilers specified in the config")
args = parser.parse_args()
oss_fuzz_path: pathlib.Path | None = None
decompilers: Set[str] = set()

with open(args.config, 'r') as f:
config = yaml.safe_load(f)

oss_fuzz_path = pathlib.Path(config['oss_fuzz_path'])
decompilers: Set[str] = set(config['decompilers'])

if args.decompilers:
decompilers = decompilers.intersection(set(args.decompilers))

ds_with_decompile_code = datasets.Dataset.load_from_disk(
args.decompiled_dataset)

for col in ['include', 'opt']:
if col not in ds_with_decompile_code.column_names:
raise ValueError(f"Column {col} not found in the dataset, please make sure the dataset is a merged dataset")

df = ds_with_decompile_code.to_pandas()
assert isinstance(df, pd.DataFrame)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--config', type=str, default="./config.yaml",
help='Path to the configuration file')
parser.add_argument("--decompiled-dataset", type=str, required=True,
help="Path to the merged decompiled dataset produced earlier")
parser.add_argument("--decompilers", type=str, nargs='*',
help="Decompilers to evaluate, leave empty to evaluate all decompilers specified in the config")
return parser.parse_args()


class DockerContainer:
Expand Down Expand Up @@ -329,28 +316,61 @@ def decompile_pass_rate(gen_results, compiler, num_workers, container):
return ret


for d in decompilers:
print(f'Decompiler: {d}')
def main():
global oss_fuzz_path, decompilers

args = parse_args()

with open(args.config, 'r') as f:
config = yaml.safe_load(f)

oss_fuzz_path = pathlib.Path(config['oss_fuzz_path'])
decompilers = set(config['decompilers'])

if args.decompilers:
decompilers = decompilers.intersection(set(args.decompilers))

if d not in df.columns:
continue
if not args.decompiled_dataset:
raise ValueError(
"--decompiled-dataset is required. Please provide the path to the merged dataset.")

Comment on lines +333 to 336
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This validation check is redundant because the argument parser already has required=True for the --decompiled-dataset argument (line 30). The argparse module will raise an error if this argument is not provided, so this condition will never be true.

Suggested change
if not args.decompiled_dataset:
raise ValueError(
"--decompiled-dataset is required. Please provide the path to the merged dataset.")

Copilot uses AI. Check for mistakes.
with DockerContainer('evaluate_in_docker', {
f'{oss_fuzz_path}/build/challenges': '/challenges',
f'{repo_path}/fix': '/fix'
}) as container:
eval_result_df = pd.DataFrame(
decompile_pass_rate(df, d, 64, container))
ds_with_decompile_code = datasets.Dataset.load_from_disk(
args.decompiled_dataset)

for col in ['include', 'opt']:
if col not in ds_with_decompile_code.column_names:
raise ValueError(
f"Column {col} not found in the dataset, please make sure the dataset is a merged dataset")

df = ds_with_decompile_code.to_pandas()
assert isinstance(df, pd.DataFrame)

for d in decompilers:
print(f'Decompiler: {d}')

if d not in df.columns:
continue

with DockerContainer('evaluate_in_docker', {
f'{oss_fuzz_path}/build/challenges': '/challenges',
f'{repo_path}/fix': '/fix'
}) as container:
eval_result_df = pd.DataFrame(
decompile_pass_rate(df, d, 64, container))

for opt, per_opt_df in eval_result_df.groupby('opt'):
compile_rate = per_opt_df['flag_compile'].mean()

print(
f"Optimization {opt}: Compile Rate: {compile_rate:.4f}")
print('-' * 30)

for opt, per_opt_df in eval_result_df.groupby('opt'):
compile_rate = per_opt_df['flag_compile'].mean()
rm_docker_cmd = "docker rm -f evaluate_in_docker"
result = subprocess.run(rm_docker_cmd, shell=True,
capture_output=True, text=True)
if result.returncode == 0:
print("Container evaluate_in_docker removed successfully")

print(
f"Optimization {opt}: Compile Rate: {compile_rate:.4f}")
print('-' * 30)

rm_docker_cmd = "docker rm -f evaluate_in_docker"
result = subprocess.run(rm_docker_cmd, shell=True,
capture_output=True, text=True)
if result.returncode == 0:
print("Container evaluate_in_docker removed successfully")
if __name__ == "__main__":
main()