diff --git a/python/private/zipapp/py_zipapp_rule.bzl b/python/private/zipapp/py_zipapp_rule.bzl index ac7944726f..1655f63cc9 100644 --- a/python/private/zipapp/py_zipapp_rule.bzl +++ b/python/private/zipapp/py_zipapp_rule.bzl @@ -18,7 +18,7 @@ def _is_symlink(f): else: return "-1" -def _create_zipapp_main_py(ctx, py_runtime, py_executable, stage2_bootstrap): +def _create_zipapp_main_py(ctx, py_runtime, py_executable, stage2_bootstrap, runfiles): venv_python_exe = py_executable.venv_python_exe if venv_python_exe: venv_python_exe_path = runfiles_root_path(ctx, venv_python_exe.short_path) @@ -31,20 +31,40 @@ def _create_zipapp_main_py(ctx, py_runtime, py_executable, stage2_bootstrap): python_binary_actual_path = py_runtime.interpreter_path zip_main_py = ctx.actions.declare_file(ctx.label.name + ".zip_main.py") - ctx.actions.expand_template( - template = py_runtime.zip_main_template, - output = zip_main_py, - substitutions = { - "%EXTRACT_DIR%": paths.join( - (ctx.label.repo_name or "_main"), - ctx.label.package, - ctx.label.name, - ), - "%python_binary%": venv_python_exe_path, - "%python_binary_actual%": python_binary_actual_path, - "%stage2_bootstrap%": runfiles_root_path(ctx, stage2_bootstrap.short_path), - "%workspace_name%": ctx.workspace_name, - }, + + args = ctx.actions.args() + args.add(py_runtime.zip_main_template, format = "--template=%s") + args.add(zip_main_py, format = "--output=%s") + + args.add( + "%EXTRACT_DIR%=" + paths.join( + (ctx.label.repo_name or "_main"), + ctx.label.package, + ctx.label.name, + ), + format = "--substitution=%s", + ) + args.add("%python_binary%=" + venv_python_exe_path, format = "--substitution=%s") + args.add("%python_binary_actual%=" + python_binary_actual_path, format = "--substitution=%s") + args.add("%stage2_bootstrap%=" + runfiles_root_path(ctx, stage2_bootstrap.short_path), format = "--substitution=%s") + args.add("%workspace_name%=" + ctx.workspace_name, format = "--substitution=%s") + + hash_files_manifest = ctx.actions.args() + hash_files_manifest.use_param_file("--hash_files_manifest=%s", use_always = True) + hash_files_manifest.set_param_file_format("multiline") + + inputs = builders.DepsetBuilder() + inputs.add(py_runtime.zip_main_template) + _build_manifest(ctx, hash_files_manifest, runfiles, inputs) + + actions_run( + ctx, + executable = ctx.attr._zip_main_maker, + arguments = [args, hash_files_manifest], + inputs = inputs.build(), + outputs = [zip_main_py], + mnemonic = "PyZipAppCreateMainPy", + progress_message = "Generating zipapp __main__.py: %{label}", ) return zip_main_py @@ -60,9 +80,7 @@ def _map_zip_symlinks(entry): def _map_zip_root_symlinks(entry): return "rf-root-symlink|" + _is_symlink(entry.target_file) + "|" + entry.path + "|" + entry.target_file.path -def _build_manifest(ctx, manifest, runfiles, zip_main): - manifest.add("regular|0|__main__.py|{}".format(zip_main.path)) - +def _build_manifest(ctx, manifest, runfiles, inputs): manifest.add_all( # NOTE: Accessing runfiles.empty_filenames materializes them. A lambda # is used to defer that. @@ -75,7 +93,10 @@ def _build_manifest(ctx, manifest, runfiles, zip_main): manifest.add_all(runfiles.symlinks, map_each = _map_zip_symlinks) manifest.add_all(runfiles.root_symlinks, map_each = _map_zip_root_symlinks) - inputs = [zip_main] + inputs.add(runfiles.files) + inputs.add([entry.target_file for entry in runfiles.symlinks.to_list()]) + inputs.add([entry.target_file for entry in runfiles.root_symlinks.to_list()]) + zip_repo_mapping_manifest = maybe_create_repo_mapping( ctx = ctx, runfiles = runfiles, @@ -87,8 +108,7 @@ def _build_manifest(ctx, manifest, runfiles, zip_main): zip_repo_mapping_manifest.path, format = "rf-root-symlink|0|_repo_mapping|%s", ) - inputs.append(zip_repo_mapping_manifest) - return inputs + inputs.add(zip_repo_mapping_manifest) def _create_zip(ctx, py_runtime, py_executable, stage2_bootstrap): output = ctx.actions.declare_file(ctx.label.name + ".zip") @@ -106,8 +126,17 @@ def _create_zip(ctx, py_runtime, py_executable, stage2_bootstrap): runfiles = runfiles.build(ctx) - zip_main = _create_zipapp_main_py(ctx, py_runtime, py_executable, stage2_bootstrap) - inputs = _build_manifest(ctx, manifest, runfiles, zip_main) + zip_main = _create_zipapp_main_py( + ctx, + py_runtime, + py_executable, + stage2_bootstrap, + runfiles, + ) + inputs = builders.DepsetBuilder() + manifest.add("regular|0|__main__.py|{}".format(zip_main.path)) + inputs.add(zip_main) + _build_manifest(ctx, manifest, runfiles, inputs) zipper_args = ctx.actions.args() zipper_args.add(output) @@ -124,7 +153,7 @@ def _create_zip(ctx, py_runtime, py_executable, stage2_bootstrap): ctx, executable = ctx.attr._zipper, arguments = [manifest, zipper_args], - inputs = depset(inputs, transitive = [runfiles.files]), + inputs = inputs.build(), outputs = [output], mnemonic = "PyZipAppCreateZip", progress_message = "Reticulating zipapp archive: %{label} into %{output}", @@ -315,6 +344,10 @@ Whether the output should be an executable zip file. "@platforms//os:windows", ], ), + "_zip_main_maker": attr.label( + cfg = "exec", + default = "//tools/private/zipapp:zip_main_maker", + ), "_zip_shell_template": attr.label( default = ":zip_shell_template", allow_single_file = True, diff --git a/python/private/zipapp/zip_main_template.py b/python/private/zipapp/zip_main_template.py index 3c25d1d722..97c37fee0b 100644 --- a/python/private/zipapp/zip_main_template.py +++ b/python/private/zipapp/zip_main_template.py @@ -27,7 +27,7 @@ import subprocess import tempfile import zipfile -from os.path import dirname, join +from os.path import dirname, join, basename # runfiles-root-relative path _STAGE2_BOOTSTRAP = "%stage2_bootstrap%" @@ -39,8 +39,10 @@ _WORKSPACE_NAME = "%workspace_name%" # relative path under EXTRACT_ROOT to extract to. EXTRACT_DIR = "%EXTRACT_DIR%" +APP_HASH = "%APP_HASH%" EXTRACT_ROOT = os.environ.get("RULES_PYTHON_EXTRACT_ROOT") +IS_WINDOWS = os.name == "nt" def print_verbose(*args, mapping=None, values=None): @@ -67,10 +69,6 @@ def print_verbose(*args, mapping=None, values=None): print("bootstrap: stage 1:", *args, file=sys.stderr, flush=True) -# Return True if running on Windows -def is_windows(): - return os.name == "nt" - def get_windows_path_with_unc_prefix(path): """Adds UNC prefix after getting a normalized absolute Windows path. @@ -81,7 +79,7 @@ def get_windows_path_with_unc_prefix(path): # No need to add prefix for non-Windows platforms. # And \\?\ doesn't work in python 2 or on mingw - if not is_windows() or sys.version_info[0] < 3: + if not IS_WINDOWS or sys.version_info[0] < 3: return path # Starting in Windows 10, version 1607(OS build 14393), MAX_PATH limitations have been @@ -113,7 +111,7 @@ def has_windows_executable_extension(path): if ( _PYTHON_BINARY_VENV - and is_windows() + and IS_WINDOWS and not has_windows_executable_extension(_PYTHON_BINARY_VENV) ): _PYTHON_BINARY_VENV = _PYTHON_BINARY_VENV + ".exe" @@ -197,7 +195,14 @@ def extract_zip(zip_path, dest_dir): # Create the runfiles tree by extracting the zip file def create_runfiles_root(): if EXTRACT_ROOT: - extract_root = join(EXTRACT_ROOT, EXTRACT_DIR) + # Shorten the path for Windows in case long path support is disabled + if IS_WINDOWS: + hash_dir = APP_HASH[0:32] + extract_dir = basename(EXTRACT_DIR) + extract_root = join(EXTRACT_ROOT, extract_dir, hash_dir) + else: + extract_root = join(EXTRACT_ROOT, EXTRACT_DIR, APP_HASH) + extract_root = get_windows_path_with_unc_prefix(extract_root) else: extract_root = tempfile.mkdtemp("", "Bazel.runfiles_") extract_zip(dirname(__file__), extract_root) @@ -245,9 +250,9 @@ def execute_file( subprocess_argv.append(f"-XRULES_PYTHON_ZIP_DIR={dirname(runfiles_root)}") subprocess_argv.append(main_filename) subprocess_argv += args - print_verbose("subprocess argv:", values=subprocess_argv) print_verbose("subprocess env:", mapping=env) print_verbose("subprocess cwd:", workspace) + print_verbose("subprocess argv:", values=subprocess_argv) ret_code = subprocess.call(subprocess_argv, env=env, cwd=workspace) sys.exit(ret_code) finally: @@ -277,7 +282,7 @@ def main(): # The main Python source file. main_rel_path = _STAGE2_BOOTSTRAP - if is_windows(): + if IS_WINDOWS: main_rel_path = main_rel_path.replace("/", os.sep) runfiles_root = create_runfiles_root() diff --git a/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh b/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh index bb4ba640d3..e7396007d9 100755 --- a/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh +++ b/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh @@ -16,10 +16,17 @@ export RULES_PYTHON_BOOTSTRAP_VERBOSE=1 # We're testing the invocation of `__main__.py`, so we have to # manually pass the zipapp to python. +echo "=====================================================================" echo "Running zipapp using an automatic temp directory..." +echo "=====================================================================" "$PYTHON" "$ZIPAPP" +echo +echo + +echo "=====================================================================" echo "Running zipapp with extract root set..." +echo "=====================================================================" export RULES_PYTHON_EXTRACT_ROOT="${TEST_TMPDIR:-/tmp}/extract_root_test" "$PYTHON" "$ZIPAPP" @@ -29,5 +36,19 @@ if [[ ! -d "$RULES_PYTHON_EXTRACT_ROOT" ]]; then exit 1 fi +# On windows, the path is shortened to just the basename to avoid long path errors. +# Other platforms use the full path. +# Note: [ -d ... ] expands globs, while [[ -d ... ]] does not. +if [ -d "$RULES_PYTHON_EXTRACT_ROOT/_main/tests/py_zipapp/system_python_zipapp"/*/runfiles ]; then + echo "Found runfiles at $RULES_PYTHON_EXTRACT_ROOT/_main/tests/py_zipapp/system_python_zipapp/*/runfiles" +elif [ -d "$RULES_PYTHON_EXTRACT_ROOT/system_python_zipapp"/*/runfiles ]; then + echo "Found runfiles at $RULES_PYTHON_EXTRACT_ROOT/system_python_zipapp/*/runfiles" +else + echo "Error: Could not find 'runfiles' directory" + exit 1 +fi + +echo "=====================================================================" echo "Running zipapp with extract root set a second time..." +echo "=====================================================================" "$PYTHON" "$ZIPAPP" diff --git a/tests/tools/zipapp/BUILD.bazel b/tests/tools/zipapp/BUILD.bazel index 84902b76b8..b71e9b2589 100644 --- a/tests/tools/zipapp/BUILD.bazel +++ b/tests/tools/zipapp/BUILD.bazel @@ -11,3 +11,9 @@ py_test( srcs = ["exe_zip_maker_test.py"], deps = ["//tools/private/zipapp:exe_zip_maker_lib"], ) + +py_test( + name = "zip_main_maker_test", + srcs = ["zip_main_maker_test.py"], + deps = ["//tools/private/zipapp:zip_main_maker_lib"], +) diff --git a/tests/tools/zipapp/zip_main_maker_test.py b/tests/tools/zipapp/zip_main_maker_test.py new file mode 100644 index 0000000000..afcaf294d1 --- /dev/null +++ b/tests/tools/zipapp/zip_main_maker_test.py @@ -0,0 +1,101 @@ +import hashlib +import os +import tempfile +import unittest +from unittest import mock + +from tools.private.zipapp import zip_main_maker + + +class ZipMainMakerTest(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + self.addCleanup(self.temp_dir.cleanup) + + def test_creates_zip_main(self): + template_path = os.path.join(self.temp_dir.name, "template.py") + with open(template_path, "w", encoding="utf-8") as f: + f.write("hash=%APP_HASH%\nfoo=%FOO%\n") + + output_path = os.path.join(self.temp_dir.name, "output.py") + + file1_path = os.path.join(self.temp_dir.name, "file1.txt") + with open(file1_path, "wb") as f: + f.write(b"content1") + + file2_path = os.path.join(self.temp_dir.name, "file2.txt") + with open(file2_path, "wb") as f: + f.write(b"content2") + + # Add a symlink to test symlink hashing + symlink_path = os.path.join(self.temp_dir.name, "symlink.txt") + os.symlink(file1_path, symlink_path) + + manifest_path = os.path.join(self.temp_dir.name, "manifest.txt") + with open(manifest_path, "w", encoding="utf-8") as f: + f.write(f"rf-file|0|file1.txt|{file1_path}\n") + f.write(f"rf-file|0|file2.txt|{file2_path}\n") + f.write(f"rf-symlink|1|symlink.txt|{symlink_path}\n") + f.write(f"rf-empty|empty_file.txt\n") + + argv = [ + "zip_main_maker.py", + "--template", + template_path, + "--output", + output_path, + "--substitution", + "%FOO%=bar", + "--hash_files_manifest", + manifest_path, + ] + + with mock.patch("sys.argv", argv): + zip_main_maker.main() + + # Calculate expected hash + h = hashlib.sha256() + line1 = f"rf-file|0|file1.txt|{file1_path}" + line2 = f"rf-file|0|file2.txt|{file2_path}" + line3 = f"rf-symlink|1|symlink.txt|{symlink_path}" + line4 = f"rf-empty|empty_file.txt" + + # Sort lines like the program does + lines = sorted([line1, line2, line3, line4]) + for line in lines: + parts = line.split("|") + if len(parts) > 1: + _, rest = line.split("|", 1) + h.update(rest.encode("utf-8")) + else: + h.update(line.encode("utf-8")) + + type_ = parts[0] + if type_ == "rf-empty": + continue + if len(parts) >= 4: + is_symlink_str = parts[1] + path = parts[-1] + if not path: + continue + if is_symlink_str == "-1": + is_symlink = not os.path.exists(path) + else: + is_symlink = is_symlink_str == "1" + + if is_symlink: + h.update(os.readlink(path).encode("utf-8")) + else: + with open(path, "rb") as f: + h.update(f.read()) + + expected_hash = h.hexdigest() + + with open(output_path, "r", encoding="utf-8") as f: + content = f.read() + + self.assertEqual(content, f"hash={expected_hash}\nfoo=bar\n") + + +if __name__ == "__main__": + unittest.main() diff --git a/tools/BUILD.bazel b/tools/BUILD.bazel index 0fcce8f729..7829b33318 100644 --- a/tools/BUILD.bazel +++ b/tools/BUILD.bazel @@ -31,6 +31,7 @@ filegroup( "wheelmaker.py", "//tools/launcher:distribution", "//tools/precompiler:distribution", + "//tools/private:distribution", "//tools/publish:distribution", ], visibility = ["//:__pkg__"], diff --git a/tools/private/BUILD.bazel b/tools/private/BUILD.bazel index e69de29bb2..adc8de3b0f 100644 --- a/tools/private/BUILD.bazel +++ b/tools/private/BUILD.bazel @@ -0,0 +1,10 @@ +package( + default_visibility = ["//:__subpackages__"], +) + +filegroup( + name = "distribution", + srcs = glob(["**"]) + [ + "//tools/private/zipapp:distribution", + ], +) diff --git a/tools/private/zipapp/BUILD.bazel b/tools/private/zipapp/BUILD.bazel index 7a2002cd72..7420776ef3 100644 --- a/tools/private/zipapp/BUILD.bazel +++ b/tools/private/zipapp/BUILD.bazel @@ -34,3 +34,23 @@ py_library( name = "exe_zip_maker_lib", srcs = ["exe_zip_maker.py"], ) + +py_interpreter_program( + name = "zip_main_maker", + main = "zip_main_maker.py", + visibility = [ + # Not actually public. Only public so rules_python-generated toolchains + # are able to reference it. + "//visibility:public", + ], +) + +py_library( + name = "zip_main_maker_lib", + srcs = ["zip_main_maker.py"], +) + +filegroup( + name = "distribution", + srcs = glob(["**"]), +) diff --git a/tools/private/zipapp/zip_main_maker.py b/tools/private/zipapp/zip_main_maker.py new file mode 100644 index 0000000000..78ac17eb17 --- /dev/null +++ b/tools/private/zipapp/zip_main_maker.py @@ -0,0 +1,90 @@ +"""Creates the __main__.py for a zipapp by populating a template. + +This program also calculates a hash of the application files to include in +the template, which allows making the extraction directory unique to the +content of the zipapp. +""" + +import argparse +import hashlib +import os + +BLOCK_SIZE = 256 * 1024 + + +def create_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(fromfile_prefix_chars="@") + parser.add_argument("--template", required=True) + parser.add_argument("--output", required=True) + parser.add_argument("--substitution", action="append", default=[]) + parser.add_argument( + "--hash_files_manifest", + required=True, + help="A file containing lines in rf-XXX formats (rf-empty, rf-file, rf-symlink, etc.)", + ) + return parser + + +def compute_inputs_hash(manifest_path: str) -> str: + h = hashlib.sha256() + with open(manifest_path, "r", encoding="utf-8") as f: + manifest_lines = f.read().splitlines() + + # Sort lines for determinism. Hash the paths (to capture structure) and the + # content. + for line in sorted(manifest_lines): + type_, _, rest = line.partition("|") + h.update(rest.encode("utf-8")) + parts = rest.split("|") + + if type_ == "rf-empty": + continue + + is_symlink_str = parts[0] + path = parts[-1] + + if is_symlink_str == "-1": + is_symlink = not os.path.exists(path) + else: + is_symlink = is_symlink_str == "1" + + if is_symlink: + h.update(os.readlink(path).encode("utf-8")) + else: + with open(path, "rb") as f: + while True: + chunk = f.read(BLOCK_SIZE) + if not chunk: + break + h.update(chunk) + + return h.hexdigest() + + +def expand_template(template_path: str, output_path: str, substitutions: dict) -> None: + with open(template_path, "r", encoding="utf-8") as f: + content = f.read() + + for key, val in substitutions.items(): + content = content.replace(key, val) + + with open(output_path, "w", encoding="utf-8") as f: + f.write(content) + + +def main(): + parser = create_parser() + args = parser.parse_args() + + app_hash = compute_inputs_hash(args.hash_files_manifest) + + substitutions = {"%APP_HASH%": app_hash} + for s in args.substitution: + key, val = s.split("=", 1) + substitutions[key] = val + + expand_template(args.template, args.output, substitutions) + + +if __name__ == "__main__": + main()