diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e0f207e2a..d908992c9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -74,6 +74,12 @@ Other changes: * (zipapp) Resolve issue passing through compression settings in `py_zippapp_binary` targets ([#3646](https://github.com/bazel-contrib/rules_python/issues/3646)). +* (toolchains) The pyc created at runtime in the stdlib should no longer + cause the Python runtime repository to be invalidated. The stdlib pyc files + _may_ be reused in between invocations, depending upon the sandboxing + configuration. See the {any}`RULES_PYTHON_PYCACHE_DIR` environment variable + for more information. + ([#3643](https://github.com/bazel-contrib/rules_python/issues/3643)). {#v0-0-0-added} ### Added diff --git a/docs/environment-variables.md b/docs/environment-variables.md index fb48f434cd..d322f601a9 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -116,6 +116,34 @@ Valid values: * Other non-empty values mean to use isolated mode. ::: +:::{envvar} RULES_PYTHON_PYCACHE_DIR + +Determines the directory that runtime-generated pyc cache files will +be stored in. + +This directory may be reused between invocations, depending on the sandboxing +configuration. Setting it to `/dev/null` will, in effect, disable runtime +pyc caching. By setting e.g. +`--sandbox_add_mount_pair=/tmp/rules_python_pycache`, it's possible for pyc +caching to persist across invocations. + +**Behavior specific to downloaded runtimes:** +First `RULES_PYTHON_PYCACHE_DIR` is checked. If set, it is used as-is for +the root pycache directory. + +Otherwise, the following environment variables are checked in the following +order. Their values will have `rules_python_pycache` appended to them to form +the root pycache directory: +1. `XDG_CACHE_HOME`. +2. `TMP` (non-Windows) or `TEMP` (Windows). +3. The common platform-specific temporary directory (`/tmp` (non-Windows) or + `C:\Temp` (Windows)). + +If such a diretory cannot be found, or created, then `/dev/null` will be used, +which will effectively disable pyc caching. + +::: + :::{envvar} RULES_PYTHON_REPO_DEBUG When `1`, repository rules will print debug information about what they're diff --git a/python/private/hermetic_runtime_repo_setup.bzl b/python/private/hermetic_runtime_repo_setup.bzl index c3c275546d..d860983e22 100644 --- a/python/private/hermetic_runtime_repo_setup.bzl +++ b/python/private/hermetic_runtime_repo_setup.bzl @@ -58,30 +58,35 @@ def define_hermetic_runtime_toolchain_impl( "major": version_info.release[0], "minor": version_info.release[1], } + files_include = [ + "bin/**", + "extensions/**", + "include/**", + "libs/**", + "share/**", + ] + files_include += extra_files_glob_include + files_exclude = [ + # Unused shared libraries. `python` executable and the `:libpython` target + # depend on `libpython{python_version}.so.1.0`. + "lib/libpython{major}.{minor}*.so".format(**version_dict), + # static libraries + "lib/**/*.a", + # tests for the standard libraries. + "lib/python{major}.{minor}*/**/test/**".format(**version_dict), + "lib/python{major}.{minor}*/**/tests/**".format(**version_dict), + # During pyc creation, temp files named *.pyc.NNN are created + "**/__pycache__/*.pyc.*", + ] + files_exclude += extra_files_glob_exclude + native.filegroup( name = "files", srcs = native.glob( - include = [ - "bin/**", - "extensions/**", - "include/**", - "libs/**", - "share/**", - ] + extra_files_glob_include, + include = files_include, # Platform-agnostic filegroup can't match on all patterns. allow_empty = True, - exclude = [ - # Unused shared libraries. `python` executable and the `:libpython` target - # depend on `libpython{python_version}.so.1.0`. - "lib/libpython{major}.{minor}*.so".format(**version_dict), - # static libraries - "lib/**/*.a", - # tests for the standard libraries. - "lib/python{major}.{minor}*/**/test/**".format(**version_dict), - "lib/python{major}.{minor}*/**/tests/**".format(**version_dict), - # During pyc creation, temp files named *.pyc.NNN are created - "**/__pycache__/*.pyc.*", - ] + extra_files_glob_exclude, + exclude = files_exclude, ), ) cc_import( diff --git a/python/private/python_repository.bzl b/python/private/python_repository.bzl index 3d54b8a26d..9c44971117 100644 --- a/python/private/python_repository.bzl +++ b/python/private/python_repository.bzl @@ -52,6 +52,98 @@ def is_standalone_interpreter(rctx, python_interpreter_path, *, logger = None): logger = logger, ).return_code == 0 +def _get_pycache_root(rctx): + """Calculates and creates the pycache root directory. + + Returns: + {type}`path | None` The path to the pycache root, or None if it couldn't + be created. + """ + os_name = repo_utils.get_platforms_os_name(rctx) + is_windows = os_name == "windows" + + # 1. RULES_PYTHON_PYCACHE_DIR + res = rctx.getenv("RULES_PYTHON_PYCACHE_DIR") + if res: + res = res + "/" + rctx.name + return repo_utils.mkdir(rctx, res) + + # Suffix for cases 2-4 + # The first level directory is static and documented so that it is easy to + # use with e.g. --sandbox_add_mount_pair=/tmp/rules_python_pycache + suffix = "rules_python_pycache/{}/{}".format(hash(str(rctx.workspace_root)), rctx.name) + + # 2. XDG_CACHE_HOME + res = rctx.getenv("XDG_CACHE_HOME") + if res: + path = repo_utils.mkdir(rctx, rctx.path(res).get_child(suffix)) + if path: + return path + + # 3. TMP or TEMP + res = rctx.getenv("TMP") or rctx.getenv("TEMP") + if res: + path = repo_utils.mkdir(rctx, rctx.path(res).get_child(suffix)) + if path: + return path + + # 4. /tmp or Windows equivalent + if is_windows: + path = rctx.path("C:/Temp").get_child(suffix) + else: + path = rctx.path("/tmp").get_child(suffix) + + return repo_utils.mkdir(rctx, path) + +def _create_pycache_symlinks(rctx, logger): + """Finds all directories with a .py file and creates __pycache__ symlinks. + + Args: + rctx: {type}`repository_ctx` The repository rule's context object. + logger: Optional logger to use for operations. + """ + pycache_root = _get_pycache_root(rctx) + logger.info(lambda: "pycache root: {}".format(pycache_root)) + pycache_root_str = str(pycache_root) if pycache_root else None + + os_name = repo_utils.get_platforms_os_name(rctx) + null_device = "NUL" if os_name == "windows" else "/dev/null" + + queue = [rctx.path(".")] + + # Starlark doesn't support recursion, use a loop with a queue. + # Using a large range as a safeguard. + for _ in range(1000000): + if not queue: + break + p = queue.pop() + + has_py = False + for child in p.readdir(): + # Skip hidden files and directories + if child.basename.startswith("."): + continue + + if child.is_dir: + if child.basename == "__pycache__" or str(child) == pycache_root_str: + continue + queue.append(child) + elif child.basename.endswith(".py"): + has_py = True + + if has_py: + pycache_dir = p.get_child("__pycache__") + if pycache_root: + pycache_relative = repo_utils.repo_root_relative_path(rctx, pycache_dir) + target_dir = pycache_root.get_child(pycache_relative) + + repo_utils.mkdir(rctx, target_dir) + rctx.delete(pycache_dir) + rctx.symlink(target_dir, pycache_dir) + else: + rctx.delete(pycache_dir) + rctx.symlink(null_device, pycache_dir) + def _python_repository_impl(rctx): if rctx.attr.distutils and rctx.attr.distutils_content: fail("Only one of (distutils, distutils_content) should be set.") @@ -123,6 +215,7 @@ def _python_repository_impl(rctx): logger = logger, ) + _create_pycache_symlinks(rctx, logger) python_bin = "python.exe" if ("windows" in platform) else "bin/python3" if "linux" in platform: diff --git a/python/private/repo_utils.bzl b/python/private/repo_utils.bzl index 702a333772..00f43f3521 100644 --- a/python/private/repo_utils.bzl +++ b/python/private/repo_utils.bzl @@ -319,6 +319,49 @@ def _which_describe_failure(binary_name, path): path = path, ) +def _mkdir(mrctx, path): + path = mrctx.path(path) + if path.exists: + return path + + repo_root = str(mrctx.path(".")) + path_str = str(path) + + if not path_str.startswith(repo_root): + mkdir_bin = mrctx.which("mkdir") + if not mkdir_bin: + return None + res = mrctx.execute([mkdir_bin, "-p", path_str]) + if res.return_code != 0: + return None + return path + else: + placeholder = path.get_child(".placeholder") + mrctx.file(placeholder) + mrctx.delete(placeholder) + return path + +def _repo_root_relative_path(mrctx, path): + """Takes a path object and returns a repo-relative path string. + + Args: + mrctx: module_ctx or repository_ctx + path: {type}`path` a path within `mrctx` + + Returns: + {type}`str` a repo-root-relative path string. + """ + repo_root = str(mrctx.path(".")) + path_str = str(path) + relative_path = path_str[len(repo_root):] + if relative_path[0] != "/": + fail("{path} not under {repo_root}".format( + path = path, + repo_root = repo_root, + )) + relative_path = relative_path[1:] + return relative_path + def _args_to_str(arguments): return " ".join([_arg_repr(a) for a in arguments]) @@ -465,6 +508,8 @@ repo_utils = struct( get_platforms_os_name = _get_platforms_os_name, is_repo_debug_enabled = _is_repo_debug_enabled, logger = _logger, + mkdir = _mkdir, + repo_root_relative_path = _repo_root_relative_path, which_checked = _which_checked, which_unchecked = _which_unchecked, )