diff --git a/graalpython/com.oracle.graal.python.test/src/tests/test_reparse.py b/graalpython/com.oracle.graal.python.test/src/tests/test_reparse.py new file mode 100644 index 0000000000..ca2deb4c34 --- /dev/null +++ b/graalpython/com.oracle.graal.python.test/src/tests/test_reparse.py @@ -0,0 +1,184 @@ +# Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# The Universal Permissive License (UPL), Version 1.0 +# +# Subject to the condition set forth below, permission is hereby granted to any +# person obtaining a copy of this software, associated documentation and/or +# data (collectively the "Software"), free of charge and under any and all +# copyright rights in the Software, and any and all patent rights owned or +# freely licensable by each licensor hereunder covering either (i) the +# unmodified Software as contributed to or provided by such licensor, or (ii) +# the Larger Works (as defined below), to deal in both +# +# (a) the Software, and +# +# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if +# one is included with the Software each a "Larger Work" to which the Software +# is contributed by such licensors), +# +# without restriction, including without limitation the rights to copy, create +# derivative works of, display, perform, and distribute the Software and make, +# use, sell, offer for sale, import, export, have made, and have sold the +# Software and the Larger Work(s), and to sublicense the foregoing rights on +# either these or other terms. +# +# This license is subject to the following condition: +# +# The above copyright notice and either this complete permission notice or at a +# minimum a reference to the UPL must be included in all copies or substantial +# portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import compileall +import contextlib +import os +import re +import socket +import subprocess +import sys +import tempfile +import time +import unittest +from pathlib import Path + +SYNC_PREAMBLE = ''' +import sys +import socket + +with socket.create_connection(('localhost', int(sys.argv[1]))) as sock: + sock.recv(1) +''' + + +@contextlib.contextmanager +def pyc_reparse(test_content, expect_success=True, python_options=()): + if sys.implementation.name != "graalpy" or not __graalpython__.is_bytecode_dsl_interpreter: + raise unittest.SkipTest("Reparsing tests are only meaningful on bytecode DSL interpreter") + with tempfile.TemporaryDirectory() as tempdir: + tempdir_path = Path(tempdir) + example_module_path = tempdir_path / "example.py" + with open(example_module_path, "w") as f: + f.write(SYNC_PREAMBLE) + f.write(test_content) + # Change mtime of the example module source to the past a bit to avoid mtime resolution issues + os.utime(example_module_path, (time.time() - 1000, time.time() - 1000)) + compileall.compile_file(example_module_path, force=True, quiet=True) + pyc_files = list((tempdir_path / '__pycache__').glob('*.pyc')) + assert len(pyc_files) == 1, "Didn't find a .pyc file" + with socket.create_server(('0.0.0.0', 0)) as server: + port = server.getsockname()[1] + env = os.environ.copy() + env['PYTHONPATH'] = str(tempdir_path) + proc = subprocess.Popen( + [sys.executable, *python_options, "-m", "example", str(port)], + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + server.settimeout(3.0) + retries = 20 + while retries: + try: + with server.accept()[0] as sock: + yield example_module_path, pyc_files[0] + sock.sendall(b"x") + break + except socket.timeout: + assert proc.poll() is None, proc.communicate()[0] + retries -= 1 + else: + assert False, "Timed out wating for connection" + out = proc.communicate()[0] + if expect_success: + assert proc.wait() == 0, out + else: + assert proc.wait() == 1 and re.search(r"SystemError:.*--python\.KeepBytecodeInMemory", out), out + + +TRACING_TEST = ''' +import sys + +def foo(): + a = 42 + return a + +lines = [] + +def tracefunc(frame, event, arg): + if event == "line" and frame.f_code is foo.__code__: + lines.append(frame.f_lineno) + return tracefunc + +sys.settrace(tracefunc) +assert foo() == 42 +firstlineno = foo.__code__.co_firstlineno +assert lines == [firstlineno + 1, firstlineno + 2], "Code didn't trace when expected" +''' + + +def test_reparse(): + with pyc_reparse(TRACING_TEST): + pass + + +def test_reparse_deleted(): + with pyc_reparse(TRACING_TEST, expect_success=False) as (example_file, pyc_file): + pyc_file.unlink() + + +def test_reparse_truncated(): + with pyc_reparse(TRACING_TEST, expect_success=False) as (example_file, pyc_file): + with open(pyc_file, 'r+') as f: + f.truncate() + + +def test_reparse_truncated_part(): + with pyc_reparse(TRACING_TEST, expect_success=False) as (example_file, pyc_file): + with open(pyc_file, 'r+') as f: + f.truncate(30) + + +def test_reparse_modified(): + with pyc_reparse(TRACING_TEST, expect_success=False) as (example_file, pyc_file): + pyc_file.unlink() + with open(example_file, 'w') as f: + f.write(SYNC_PREAMBLE) + f.write(TRACING_TEST.replace('a = 42', 'a = 32')) + compileall.compile_file(example_file, force=True, quiet=True) + assert pyc_file.exists() + + +def test_reparse_disabled(): + with pyc_reparse(TRACING_TEST, python_options=["--python.KeepBytecodeInMemory"], expect_success=True) \ + as (example_file, pyc_file): + pyc_file.unlink() + + +CO_CODE_TEST = ''' +def foo(): + a = 42 + return a + +assert foo() == 42 +foo.__code__ = foo.__code__.replace(co_code=foo.__code__.co_code) +assert foo() == 42 +''' + + +def test_reparse_co_code(): + with pyc_reparse(CO_CODE_TEST): + pass + + +def test_reparse_co_code_deleted(): + with pyc_reparse(CO_CODE_TEST, expect_success=False) as (example_file, pyc_file): + pyc_file.unlink() diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/PythonLanguage.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/PythonLanguage.java index fa83888791..664704efba 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/PythonLanguage.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/PythonLanguage.java @@ -207,6 +207,16 @@ public final class PythonLanguage extends TruffleLanguage { public static final int GRAALVM_MICRO; public static final String DEV_TAG; + /* Magic number used to mark pyc files */ + public static final int MAGIC_NUMBER = 21000 + Compiler.BYTECODE_VERSION * 10; + public static final byte[] MAGIC_NUMBER_BYTES = new byte[4]; + + static { + PythonUtils.ARRAY_ACCESSOR_LE.putInt(PythonLanguage.MAGIC_NUMBER_BYTES, 0, PythonLanguage.MAGIC_NUMBER); + PythonLanguage.MAGIC_NUMBER_BYTES[2] = '\r'; + PythonLanguage.MAGIC_NUMBER_BYTES[3] = '\n'; + } + /** * The version generated at build time is stored in an ASCII-compatible way. Add build time, we * added the ordinal value of some base character (in this case {@code '!'}) to ensure that we diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/GraalPythonModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/GraalPythonModuleBuiltins.java index bed6d34c7b..042ab27c0c 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/GraalPythonModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/GraalPythonModuleBuiltins.java @@ -44,11 +44,17 @@ import static com.oracle.graal.python.PythonLanguage.GRAALVM_MICRO; import static com.oracle.graal.python.PythonLanguage.GRAALVM_MINOR; import static com.oracle.graal.python.PythonLanguage.J_GRAALPYTHON_ID; +import static com.oracle.graal.python.PythonLanguage.MAGIC_NUMBER; +import static com.oracle.graal.python.PythonLanguage.MAGIC_NUMBER_BYTES; import static com.oracle.graal.python.PythonLanguage.RELEASE_LEVEL; import static com.oracle.graal.python.PythonLanguage.RELEASE_LEVEL_FINAL; import static com.oracle.graal.python.nodes.BuiltinNames.J_EXTEND; import static com.oracle.graal.python.nodes.BuiltinNames.J___GRAALPYTHON__; +import static com.oracle.graal.python.nodes.BuiltinNames.T_FORMAT; +import static com.oracle.graal.python.nodes.BuiltinNames.T_MTIME; import static com.oracle.graal.python.nodes.BuiltinNames.T_SHA3; +import static com.oracle.graal.python.nodes.BuiltinNames.T_SIZE; +import static com.oracle.graal.python.nodes.BuiltinNames.T__IMP; import static com.oracle.graal.python.nodes.BuiltinNames.T___GRAALPYTHON__; import static com.oracle.graal.python.nodes.BuiltinNames.T___MAIN__; import static com.oracle.graal.python.nodes.SpecialAttributeNames.T___NAME__; @@ -63,6 +69,7 @@ import static com.oracle.graal.python.runtime.exception.PythonErrorType.ImportError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.SystemError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; +import static com.oracle.graal.python.util.PythonUtils.ARRAY_ACCESSOR_LE; import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached; import static com.oracle.graal.python.util.PythonUtils.tsLiteral; @@ -128,8 +135,10 @@ import com.oracle.graal.python.builtins.objects.str.StringUtils; import com.oracle.graal.python.builtins.objects.tuple.PTuple; import com.oracle.graal.python.lib.OsEnvironGetNode; +import com.oracle.graal.python.lib.PyNumberLongNode; import com.oracle.graal.python.lib.PyObjectCallMethodObjArgs; import com.oracle.graal.python.lib.PyObjectGetItem; +import com.oracle.graal.python.lib.PyObjectStrAsTruffleStringNode; import com.oracle.graal.python.nodes.ErrorMessages; import com.oracle.graal.python.nodes.PConstructAndRaiseNode; import com.oracle.graal.python.nodes.PRaiseNode; @@ -150,6 +159,9 @@ import com.oracle.graal.python.nodes.object.GetClassNode; import com.oracle.graal.python.nodes.object.GetOrCreateDictNode; import com.oracle.graal.python.nodes.statement.AbstractImportNode; +import com.oracle.graal.python.nodes.util.CannotCastException; +import com.oracle.graal.python.nodes.util.CastToJavaLongLossyNode; +import com.oracle.graal.python.nodes.util.CastToJavaStringNode; import com.oracle.graal.python.nodes.util.CastToTruffleStringNode; import com.oracle.graal.python.nodes.util.ToNativePrimitiveStorageNode; import com.oracle.graal.python.runtime.ExecutionContext; @@ -457,6 +469,102 @@ private static Object[] convertToObjectArray(TruffleString[] arr) { return objectArr; } + @Builtin(name = "load_bytecode_file", minNumOfPositionalArgs = 3) + @GenerateNodeFactory + abstract static class LoadBytecodeFileNode extends PythonBuiltinNode { + + static final TruffleString T_CHECK_HASH_BASED_PYCS = tsLiteral("check_hash_based_pycs"); + static final TruffleString T__BOOTSTRAP = tsLiteral("_bootstrap"); + public static final TruffleString T__VERBOSE_MESSAGE = tsLiteral("_verbose_message"); + public static final TruffleString MESSAGE = tsLiteral("'{} matches {}'"); + + @Specialization + static Object doit(VirtualFrame frame, Object bytecodePath, Object sourcePath, Object statResult, + @Bind Node inliningTarget, + @Bind PythonContext context, + @Cached("createFor($node)") BoundaryCallData boundaryCallData) { + Object savedState = BoundaryCallContext.enter(frame, boundaryCallData); + try { + return doLoadBytecodeFile(bytecodePath, sourcePath, statResult, inliningTarget, context); + } finally { + BoundaryCallContext.exit(frame, boundaryCallData, savedState); + } + } + + @TruffleBoundary + private static Object doLoadBytecodeFile(Object bytecodePath, Object sourcePath, Object statResult, Node inliningTarget, PythonContext context) { + /* + * This builtin is used to load a bytecode file (.pyc) in a way that we can trust that + * it really comes from that file. It enables unloading serialized DSL bytecode from + * memory, so that it can be reparsed later from the same file. It also provides the + * cache key for CallTarget cache in multicontext mode. + */ + try { + // get_data + TruffleString strBytecodePath = PyObjectStrAsTruffleStringNode.executeUncached(bytecodePath); + TruffleFile bytecodeFile = context.getEnv().getPublicTruffleFile(strBytecodePath.toJavaStringUncached()); + byte[] bytes = bytecodeFile.readAllBytes(); + // _classify_pyc + if (bytes.length < 16 || !Arrays.equals(bytes, 0, 4, MAGIC_NUMBER_BYTES, 0, 4)) { + return PNone.NONE; + } + int flags = ARRAY_ACCESSOR_LE.getInt(bytes, 4); + if ((flags & ~0b11) != 0) { + return PNone.NONE; + } + long cacheKey; + boolean hashBased = (flags & 0b1) != 0; + // Note that mtime-based validation is the default, hashing is opt-in + if (hashBased) { + boolean checkSource = (flags & 0b10) != 0; + cacheKey = ARRAY_ACCESSOR_LE.getLong(bytes, 16); + String checkHashBasedPycs = ""; + try { + checkHashBasedPycs = CastToJavaStringNode.getUncached().execute(context.lookupBuiltinModule(T__IMP).getAttribute(T_CHECK_HASH_BASED_PYCS)); + } catch (CannotCastException e) { + // ignore + } + if (!checkHashBasedPycs.equals("never") && (checkSource || checkHashBasedPycs.equals("always"))) { + // get_data + TruffleString strSourcePath = PyObjectStrAsTruffleStringNode.executeUncached(sourcePath); + TruffleFile sourceFile = context.getEnv().getPublicTruffleFile(strSourcePath.toJavaStringUncached()); + byte[] sourceBytes = sourceFile.readAllBytes(); + long sourceHash = ARRAY_ACCESSOR_LE.getLong(ImpModuleBuiltins.SourceHashNode.hashSource(MAGIC_NUMBER, sourceBytes, sourceBytes.length), 0); + // _validate_hash_pyc + if (cacheKey != sourceHash) { + return PNone.NONE; + } + } + } else { + // _validate_timestamp_pyc + Object mTimeObj = PyNumberLongNode.executeUncached(PyObjectGetItem.executeUncached(statResult, T_MTIME)); + long mTime = CastToJavaLongLossyNode.executeUncached(mTimeObj); + if (Integer.toUnsignedLong(ARRAY_ACCESSOR_LE.getInt(bytes, 8)) != mTime) { + return PNone.NONE; + } + Object sizeObj = PyObjectGetItem.executeUncached(statResult, T_SIZE); + if (sizeObj != PNone.NONE) { + long size = CastToJavaLongLossyNode.executeUncached(sizeObj); + if (Integer.toUnsignedLong(ARRAY_ACCESSOR_LE.getInt(bytes, 12)) != size) { + return PNone.NONE; + } + } + cacheKey = ARRAY_ACCESSOR_LE.getLong(bytes, 8); + } + if (context.getOption(PythonOptions.VerboseFlag)) { + Object message = PyObjectCallMethodObjArgs.executeUncached(MESSAGE, T_FORMAT, bytecodePath, sourcePath); + CallNode.executeUncached(context.lookupBuiltinModule(T__BOOTSTRAP).getAttribute(T__VERBOSE_MESSAGE), message); + } + return MarshalModuleBuiltins.fromBytecodeFile(context, bytecodeFile, bytes, 16, bytes.length - 16, cacheKey); + } catch (MarshalModuleBuiltins.Marshal.MarshalError me) { + throw PRaiseNode.raiseStatic(inliningTarget, me.type, me.message, me.arguments); + } catch (IOException | SecurityException | UnsupportedOperationException | IllegalArgumentException e) { + LOGGER.fine(() -> PythonUtils.formatJString("Failed to load bytecode file using load_bytecode_file: %s", e)); + return PNone.NONE; + } + } + } + @Builtin(name = "read_file", minNumOfPositionalArgs = 1) @GenerateNodeFactory public abstract static class ReadFileNode extends PythonUnaryBuiltinNode { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ImpModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ImpModuleBuiltins.java index 6296ba999f..b9b5feb9c7 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ImpModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ImpModuleBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -92,7 +92,6 @@ import com.oracle.graal.python.builtins.objects.str.PString; import com.oracle.graal.python.builtins.objects.str.StringNodes; import com.oracle.graal.python.compiler.CodeUnit; -import com.oracle.graal.python.compiler.Compiler; import com.oracle.graal.python.lib.PyMemoryViewFromObject; import com.oracle.graal.python.lib.PyObjectGetAttr; import com.oracle.graal.python.lib.PyObjectLookupAttr; @@ -129,7 +128,6 @@ import com.oracle.truffle.api.frame.VirtualFrame; import com.oracle.truffle.api.interop.InteropLibrary; import com.oracle.truffle.api.library.CachedLibrary; -import com.oracle.truffle.api.memory.ByteArraySupport; import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.source.Source; import com.oracle.truffle.api.strings.TruffleString; @@ -238,14 +236,6 @@ public boolean run() { @Builtin(name = "get_magic") @GenerateNodeFactory public abstract static class GetMagic extends PythonBuiltinNode { - static final int MAGIC_NUMBER = 21000 + Compiler.BYTECODE_VERSION * 10; - static final byte[] MAGIC_NUMBER_BYTES = new byte[4]; - static { - ByteArraySupport.littleEndian().putInt(MAGIC_NUMBER_BYTES, 0, MAGIC_NUMBER); - MAGIC_NUMBER_BYTES[2] = '\r'; - MAGIC_NUMBER_BYTES[3] = '\n'; - } - @Specialization(guards = "isSingleContext()") PBytes runCachedSingleContext( @Cached(value = "getMagicNumberPBytes()", weak = true) PBytes magicBytes) { @@ -255,11 +245,11 @@ PBytes runCachedSingleContext( @Specialization(replaces = "runCachedSingleContext") PBytes run( @Bind PythonLanguage language) { - return PFactory.createBytes(language, MAGIC_NUMBER_BYTES); + return PFactory.createBytes(language, PythonLanguage.MAGIC_NUMBER_BYTES); } protected PBytes getMagicNumberPBytes() { - return PFactory.createBytes(PythonLanguage.get(this), MAGIC_NUMBER_BYTES); + return PFactory.createBytes(PythonLanguage.get(this), PythonLanguage.MAGIC_NUMBER_BYTES); } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MarshalModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MarshalModuleBuiltins.java index f3437eee7c..d827a87f2c 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MarshalModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MarshalModuleBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, 2025, Oracle and/or its affiliates. + * Copyright (c) 2024, 2026, Oracle and/or its affiliates. * Copyright (c) 2013, Regents of the University of California * * All rights reserved. @@ -25,12 +25,15 @@ */ package com.oracle.graal.python.builtins.modules; +import static com.oracle.graal.python.PythonLanguage.MAGIC_NUMBER_BYTES; +import static com.oracle.graal.python.builtins.PythonBuiltinClassType.SystemError; import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ValueError; import static com.oracle.graal.python.builtins.modules.io.IONodes.T_READ; import static com.oracle.graal.python.builtins.modules.io.IONodes.T_READINTO; import static com.oracle.graal.python.builtins.modules.io.IONodes.T_WRITE; import static com.oracle.graal.python.nodes.StringLiterals.T_VERSION; import static com.oracle.graal.python.nodes.truffle.TruffleStringMigrationHelpers.isJavaString; +import static com.oracle.graal.python.util.PythonUtils.ARRAY_ACCESSOR_LE; import static com.oracle.graal.python.util.PythonUtils.EMPTY_BOOLEAN_ARRAY; import static com.oracle.graal.python.util.PythonUtils.EMPTY_DOUBLE_ARRAY; import static com.oracle.graal.python.util.PythonUtils.EMPTY_INT_ARRAY; @@ -51,11 +54,14 @@ import java.math.BigInteger; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.channels.SeekableByteChannel; import java.nio.charset.StandardCharsets; +import java.nio.file.StandardOpenOption; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; -import java.util.function.Supplier; +import java.util.Set; import com.oracle.graal.python.PythonLanguage; import com.oracle.graal.python.annotations.ArgumentClinic; @@ -120,6 +126,7 @@ import com.oracle.graal.python.nodes.function.PythonBuiltinNode; import com.oracle.graal.python.nodes.function.builtins.PythonBinaryClinicBuiltinNode; import com.oracle.graal.python.nodes.function.builtins.PythonTernaryClinicBuiltinNode; +import com.oracle.graal.python.nodes.function.builtins.PythonUnaryClinicBuiltinNode; import com.oracle.graal.python.nodes.function.builtins.clinic.ArgumentClinicProvider; import com.oracle.graal.python.runtime.ExecutionContext.BoundaryCallContext; import com.oracle.graal.python.runtime.IndirectCallData.BoundaryCallData; @@ -135,9 +142,10 @@ import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.RootCallTarget; +import com.oracle.truffle.api.TruffleFile; import com.oracle.truffle.api.bytecode.BytecodeConfig; import com.oracle.truffle.api.bytecode.BytecodeRootNodes; -import com.oracle.truffle.api.bytecode.serialization.BytecodeDeserializer; +import com.oracle.truffle.api.bytecode.serialization.ByteBufferDataInput; import com.oracle.truffle.api.bytecode.serialization.BytecodeSerializer; import com.oracle.truffle.api.bytecode.serialization.SerializationUtils; import com.oracle.truffle.api.dsl.Bind; @@ -262,18 +270,13 @@ static Object doit(VirtualFrame frame, Object file, } } - /* - * cache_key is a GraalPy-specific keyword used in the code cache in multi-context mode, - * together with the filename from the unmarshalled code - */ - @Builtin(name = "loads", minNumOfPositionalArgs = 1, numOfPositionalOnlyArgs = 1, parameterNames = {"bytes"}, keywordOnlyNames = {"cache_key"}) + @Builtin(name = "loads", minNumOfPositionalArgs = 1, numOfPositionalOnlyArgs = 1, parameterNames = {"bytes"}) @ArgumentClinic(name = "bytes", conversion = ClinicConversion.ReadableBuffer) - @ArgumentClinic(name = "cache_key", conversion = ClinicConversion.Long, defaultValue = "0") @GenerateNodeFactory - abstract static class LoadsNode extends PythonBinaryClinicBuiltinNode { + abstract static class LoadsNode extends PythonUnaryClinicBuiltinNode { @Specialization - static Object doit(VirtualFrame frame, Object buffer, long cacheKey, + static Object doit(VirtualFrame frame, Object buffer, @Bind Node inliningTarget, @Bind PythonContext context, @Cached("createFor($node)") InteropCallData callData, @@ -283,7 +286,8 @@ static Object doit(VirtualFrame frame, Object buffer, long cacheKey, try { byte[] bytes = bufferLib.getInternalOrCopiedByteArray(buffer); int length = bufferLib.getBufferLength(buffer); - if (!language.isSingleContext() && cacheKey < 0) { + long cacheKey = 0; + if (!language.isSingleContext()) { cacheKey = language.cacheKeyForBytecode(bytes, length); } return Marshal.load(context, bytes, length, cacheKey); @@ -303,7 +307,7 @@ protected ArgumentClinicProvider getArgumentClinic() { } - static final class Marshal { + public static final class Marshal { private static final char TYPE_NULL = '0'; private static final char TYPE_NONE = 'N'; private static final char TYPE_NOVALUE = 'n'; @@ -473,6 +477,10 @@ public int read(byte[] b, int off, int len) { final PInt pyFalse; int depth = 0; long cacheKey; + TruffleFile bytecodeFile; + // Offset of the buffer in parent buffer in nested deserializations + int baseOffset; + /* * A DSL node needs access to its Source during deserialization, but we do not wish to * actually encode it in the serialized representation. Instead, we supply a Source to the @@ -505,15 +513,20 @@ public int read(byte[] b, int off, int len) { } Marshal(PythonContext context, byte[] in, int length, long cacheKey) { - this(context, SerializationUtils.createDataInput(ByteBuffer.wrap(in, 0, length)), null); + this(context, SerializationUtils.createByteBufferDataInput(ByteBuffer.wrap(in, 0, length)), null, null, 0); + this.cacheKey = cacheKey; + } + + Marshal(PythonContext context, byte[] in, int length, long cacheKey, TruffleFile bytecodeFile, int baseOffset) { + this(context, SerializationUtils.createByteBufferDataInput(ByteBuffer.wrap(in, 0, length)), null, bytecodeFile, baseOffset); this.cacheKey = cacheKey; } Marshal(PythonContext context, Object in) { - this(context, new DataInputStream(new FileLikeInputStream(in)), null); + this(context, new DataInputStream(new FileLikeInputStream(in)), null, null, 0); } - Marshal(PythonContext context, DataInput in, Source source) { + Marshal(PythonContext context, DataInput in, Source source, TruffleFile bytecodeFile, int baseOffset) { this.context = context; this.in = in; this.source = source; @@ -524,6 +537,8 @@ public int read(byte[] b, int off, int len) { this.outData = null; this.out = null; this.refMap = null; + this.bytecodeFile = bytecodeFile; + this.baseOffset = baseOffset; } private PythonLanguage getLanguage() { @@ -1434,7 +1449,12 @@ private BytecodeDSLCodeUnit readBytecodeDSLCodeUnit() { "Attempted to deserialize a code object from the Bytecode DSL interpreter, but the manual interpreter is enabled. Consider clearing or setting a different pycache folder.")); } - byte[] serialized = readBytes(); + int bytecodeSize = readSize(); + int bytecodeOffset = -1; + if (in instanceof ByteBufferDataInput bufferIn) { + bytecodeOffset = baseOffset + bufferIn.position(); + } + byte[] serialized = readNBytes(bytecodeSize, new byte[bytecodeSize]); TruffleString name = readString(true); TruffleString qualname = readString(true); int argCount = readInt(); @@ -1457,8 +1477,9 @@ private BytecodeDSLCodeUnit readBytecodeDSLCodeUnit() { int classcellIndex = readInt(); int selfIndex = readInt(); + BytecodeSupplier provider = new BytecodeSupplier(serialized, bytecodeFile, bytecodeOffset, bytecodeSize, cacheKey); return new BytecodeDSLCodeUnit(name, qualname, argCount, kwOnlyArgCount, positionalOnlyArgCount, flags, names, varnames, cellvars, freevars, cell2arg, constants, - startLine, startColumn, endLine, endColumn, classcellIndex, selfIndex, serialized, null); + startLine, startColumn, endLine, endColumn, classcellIndex, selfIndex, provider); } private void writeCodeUnit(CodeUnit code) throws IOException { @@ -1507,7 +1528,6 @@ private void writeBytecodeCodeUnit(BytecodeCodeUnit code) throws IOException { writeIntArray(code.generalizeVarsValues); } - @SuppressWarnings("unchecked") private void writeBytecodeDSLCodeUnit(BytecodeDSLCodeUnit code) throws IOException { byte[] serialized = code.getSerialized(context); writeBytes(serialized); @@ -1568,6 +1588,8 @@ public static byte[] serializeCodeUnit(Node locationForRaise, PythonContext cont throw CompilerDirectives.shouldNotReachHere(e); } catch (Marshal.MarshalError me) { throw PRaiseNode.raiseStatic(locationForRaise, me.type, me.message, me.arguments); + } catch (ReparseError e) { + throw PRaiseNode.raiseStatic(locationForRaise, SystemError, ErrorMessages.FAILED_TO_REPARSE_BYTECODE_FILE); } } @@ -1583,40 +1605,118 @@ public static CodeUnit deserializeCodeUnit(Node node, PythonContext context, byt } } - public static BytecodeRootNodes deserializeBytecodeNodes(PythonContext context, Source source, byte[] serialized) { - try { - Supplier supplier = () -> SerializationUtils.createDataInput(ByteBuffer.wrap(serialized)); - return PBytecodeDSLRootNodeGen.deserialize(context.getLanguage(), BytecodeConfig.WITH_SOURCE, supplier, new MarshalModuleBuiltins.PBytecodeDSLDeserializer(source)); - } catch (IOException e) { - throw CompilerDirectives.shouldNotReachHere("Deserialization error."); + public static class BytecodeSupplier extends BytecodeDSLCodeUnit.BytecodeSupplier { + private byte[] serialized; + // Original file for reparsing + private final TruffleFile bytecodeFile; + // Offset within the bytecode file, points directly at the start of serialized bytecode + private final int bytecodeOffset; + private final int bytecodeSize; + private final long cacheKey; + + public BytecodeSupplier(byte[] serialized, TruffleFile bytecodeFile, int bytecodeOffset, int bytecodeSize, long cacheKey) { + this.serialized = serialized; + this.bytecodeFile = bytecodeFile; + this.bytecodeOffset = bytecodeOffset; + this.bytecodeSize = bytecodeSize; + this.cacheKey = cacheKey; + } + + @Override + public PBytecodeDSLRootNode createRootNode(PythonContext context, Source source) { + BytecodeRootNodes deserialized; + try { + deserialized = PBytecodeDSLRootNodeGen.deserialize(context.getLanguage(), BytecodeConfig.WITH_SOURCE, + () -> SerializationUtils.createByteBufferDataInput(ByteBuffer.wrap(getBytecode())), + /* + * NB: Since a DSL node may reparse multiple times, we cannot reuse + * a common Marshal object across calls (each call may take a + * different buffer). + */ + (deserializerContext, buffer) -> { + Marshal marshal = new Marshal(PythonContext.get(null), buffer, source, bytecodeFile, bytecodeOffset); + marshal.cacheKey = cacheKey; + return marshal.readObject(); + }); + } catch (IOException e) { + throw CompilerDirectives.shouldNotReachHere("Deserialization error."); + } + if (bytecodeFile != null && bytecodeOffset >= 0 && cacheKey != 0 && !context.getOption(PythonOptions.KeepBytecodeInMemory)) { + // Free the serialized bytecode, we will fetch it from the file if needed again + serialized = null; + } + assert deserialized.count() == 1; + return deserialized.getNode(0); + } + + byte[] getBytecodeFromFile() { + try (SeekableByteChannel channel = bytecodeFile.newByteChannel(Set.of(StandardOpenOption.READ))) { + ByteBuffer buffer = ByteBuffer.allocate(16); + int read = channel.read(buffer); + byte[] header = buffer.array(); + if (read != 16) { + throw new ReparseError("EOF when reparsing: " + bytecodeFile); + } + if (!Arrays.equals(header, 0, 4, MAGIC_NUMBER_BYTES, 0, 4)) { + throw new ReparseError("Invalid bytecode file: " + bytecodeFile); + } + if (ARRAY_ACCESSOR_LE.getLong(header, 8) != cacheKey) { + throw new ReparseError("Bytecode file was modified (header mismatch): " + bytecodeFile); + } + buffer = ByteBuffer.allocate(bytecodeSize); + channel.position(bytecodeOffset); + read = channel.read(buffer); + if (read != bytecodeSize) { + throw new ReparseError("EOF when reparsing: " + bytecodeFile); + } + return buffer.array(); + } catch (IOException e) { + throw new ReparseError("IOError when reparsing: " + bytecodeFile); + } + } + + private byte[] getBytecode() { + if (serialized != null) { + return serialized; + } + return getBytecodeFromFile(); + } + + @Override + public byte[] createSerializedBytecode(PythonContext context) { + return getBytecode(); } } public static class PBytecodeDSLSerializer implements BytecodeSerializer { + private final PythonContext pythonContext; + + public PBytecodeDSLSerializer(PythonContext pythonContext) { + this.pythonContext = pythonContext; + } + public void serialize(SerializerContext context, DataOutput buffer, Object object) throws IOException { /* * NB: Since the deserializer uses a fresh Marshal instance for each object (see below) * we must also do the same here. Otherwise, the encoding may be different (e.g., a * reference for an already-emitted object). */ - PythonContext pythonContext = PythonContext.get(null); new Marshal(pythonContext, CURRENT_VERSION, pythonContext.getTrue(), pythonContext.getFalse(), buffer).writeObject(object); } } - public static class PBytecodeDSLDeserializer implements BytecodeDeserializer { - final Source source; + public static final class ReparseError extends RuntimeException { + static final long serialVersionUID = 5323687983726237119L; - public PBytecodeDSLDeserializer(Source source) { - this.source = source; + public ReparseError(String message) { + super(message); } + } - public Object deserialize(DeserializerContext context, DataInput buffer) throws IOException { - /* - * NB: Since a DSL node may reparse multiple times, we cannot reuse a common Marshal - * object across calls (each call may take a different buffer). - */ - return new Marshal(PythonContext.get(null), buffer, source).readObject(); - } + @TruffleBoundary + public static Object fromBytecodeFile(PythonContext context, TruffleFile file, byte[] bytes, int offset, int length, long cacheKey) throws IOException { + MarshalModuleBuiltins.Marshal marshal = new MarshalModuleBuiltins.Marshal(context, bytes, length + offset, cacheKey, file, 0); + marshal.in.skipBytes(offset); + return marshal.readObject(); } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/code/CodeBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/code/CodeBuiltins.java index 955a74624f..3e663b23ce 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/code/CodeBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/code/CodeBuiltins.java @@ -287,8 +287,9 @@ static Object get(PCode self) { public abstract static class GetCodeNode extends PythonUnaryBuiltinNode { @Specialization static Object get(PCode self, + @Bind Node inliningTarget, @Bind PythonLanguage language) { - return self.co_code(language); + return self.co_code(language, inliningTarget); } } @@ -565,7 +566,7 @@ boolean eq(PCode self, PCode other, RichCmpOp op) { self.co_nlocals() != other.co_nlocals() || self.co_flags() != other.co_flags() || self.co_firstlineno() != other.co_firstlineno()) { return op.isNe(); } - if (!Arrays.equals(self.getCodestring(), other.getCodestring())) { + if (!Arrays.equals(self.getCodestring(this), other.getCodestring(this))) { return op.isNe(); } // TODO compare co_const @@ -594,7 +595,7 @@ static long hash(VirtualFrame frame, PCode self, long h, h0, h1, h2, h3, h4, h5, h6; h0 = hashNode.execute(frame, inliningTarget, self.co_name()); - h1 = hashNode.execute(frame, inliningTarget, self.co_code(language)); + h1 = hashNode.execute(frame, inliningTarget, self.co_code(language, inliningTarget)); h2 = hashNode.execute(frame, inliningTarget, self.co_consts(language)); h3 = hashNode.execute(frame, inliningTarget, self.co_names(language)); h4 = hashNode.execute(frame, inliningTarget, self.co_varnames(language)); @@ -662,7 +663,7 @@ static PCode create(VirtualFrame frame, PCode self, int coArgcount, coNlocals == -1 ? self.co_nlocals() : coNlocals, coStacksize == -1 ? self.co_stacksize() : coStacksize, coFlags == -1 ? self.co_flags() : coFlags, - PGuards.isNone(coCode) ? self.getCodestring() : bufferLib.getInternalOrCopiedByteArray(coCode), + PGuards.isNone(coCode) ? self.getCodestring(inliningTarget) : bufferLib.getInternalOrCopiedByteArray(coCode), coConsts.length == 0 ? null : coConsts, coNames.length == 0 ? null : objectArrayToTruffleStringArray(inliningTarget, coNames, castToTruffleStringNode), coVarnames.length == 0 ? null : objectArrayToTruffleStringArray(inliningTarget, coVarnames, castToTruffleStringNode), diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/code/PCode.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/code/PCode.java index 771982f32f..7e4b524a9c 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/code/PCode.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/code/PCode.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -88,6 +88,7 @@ import com.oracle.truffle.api.interop.UnsupportedMessageException; import com.oracle.truffle.api.library.ExportLibrary; import com.oracle.truffle.api.library.ExportMessage; +import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.nodes.RootNode; import com.oracle.truffle.api.object.Shape; import com.oracle.truffle.api.source.SourceSection; @@ -531,10 +532,10 @@ public TruffleString[] getVarnames() { return varnames; } - public byte[] getCodestring() { + public byte[] getCodestring(Node node) { RootNode rootNode = getRootNode(); if (rootNode instanceof PRootNode) { - return ((PRootNode) rootNode).getCode(); + return ((PRootNode) rootNode).getCode(node); } else { return PythonUtils.EMPTY_BYTE_ARRAY; } @@ -734,12 +735,8 @@ public TruffleString co_filename() { return fName; } - public PBytes co_code(PythonLanguage language) { - return createBytes(this.getCodestring(), language); - } - - public PBytes co_lnotab(PythonLanguage language) { - return createBytes(this.getLinetable(), language); + public PBytes co_code(PythonLanguage language, Node node) { + return createBytes(this.getCodestring(node), language); } public PTuple co_consts(PythonLanguage language) { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/compiler/bytecode_dsl/RootNodeCompiler.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/compiler/bytecode_dsl/RootNodeCompiler.java index 7cea82010f..10ee77f971 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/compiler/bytecode_dsl/RootNodeCompiler.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/compiler/bytecode_dsl/RootNodeCompiler.java @@ -64,6 +64,9 @@ import static com.oracle.graal.python.util.PythonUtils.toInternedTruffleStringUncached; import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.EnumSet; @@ -77,6 +80,7 @@ import java.util.function.Function; import com.oracle.graal.python.PythonLanguage; +import com.oracle.graal.python.builtins.modules.MarshalModuleBuiltins; import com.oracle.graal.python.builtins.objects.PNone; import com.oracle.graal.python.builtins.objects.code.PCode; import com.oracle.graal.python.builtins.objects.ellipsis.PEllipsis; @@ -141,6 +145,7 @@ import com.oracle.graal.python.pegparser.sst.UnaryOpTy; import com.oracle.graal.python.pegparser.sst.WithItemTy; import com.oracle.graal.python.pegparser.tokenizer.SourceRange; +import com.oracle.graal.python.runtime.PythonContext; import com.oracle.graal.python.util.PythonUtils; import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.bytecode.BytecodeConfig; @@ -148,7 +153,9 @@ import com.oracle.truffle.api.bytecode.BytecodeLocal; import com.oracle.truffle.api.bytecode.BytecodeParser; import com.oracle.truffle.api.bytecode.BytecodeRootNodes; +import com.oracle.truffle.api.bytecode.serialization.BytecodeSerializer; import com.oracle.truffle.api.instrumentation.StandardTags.StatementTag; +import com.oracle.truffle.api.source.Source; import com.oracle.truffle.api.strings.TruffleString; /** @@ -451,8 +458,7 @@ flags, orderedTruffleStringArray(names), sourceRange.endColumn, classcellIndex, selfIndex, - null, - nodes); + new BytecodeSupplier(nodes)); rootNode.setMetadata(codeUnit, ctx.errorCallback); if (codeUnit.isCoroutine() || codeUnit.isAsyncGenerator() || scope.isGeneratorWithYieldFrom()) { rootNode.yieldFromGeneratorIndex = yieldFromGenerator.getLocalIndex(); @@ -461,6 +467,31 @@ flags, orderedTruffleStringArray(names), return new BytecodeDSLCompilerResult(rootNode, codeUnit); } + static class BytecodeSupplier extends BytecodeDSLCodeUnit.BytecodeSupplier { + private final BytecodeRootNodes nodes; + + BytecodeSupplier(BytecodeRootNodes nodes) { + this.nodes = nodes; + } + + @Override + public PBytecodeDSLRootNode createRootNode(PythonContext context, Source source) { + return nodes.getNode(0); + } + + @Override + public byte[] createSerializedBytecode(PythonContext context) { + try { + BytecodeSerializer serializer = new MarshalModuleBuiltins.PBytecodeDSLSerializer(context); + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + nodes.serialize(new DataOutputStream(bytes), serializer); + return bytes.toByteArray(); + } catch (IOException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + } + } + private static class ArgumentInfo { static final ArgumentInfo NO_ARGS = new ArgumentInfo(0, 0, 0, false, false); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/BuiltinNames.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/BuiltinNames.java index 9458722f2e..73a3402ec7 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/BuiltinNames.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/BuiltinNames.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -531,4 +531,10 @@ private static TruffleString tsLiteral(String s) { public static final String J_UNICODEDATA = "unicodedata"; public static final TruffleString T_UNICODEDATA = tsLiteral(J_UNICODEDATA); + + public static final TruffleString T_MTIME = tsLiteral("mtime"); + public static final TruffleString T_SIZE = tsLiteral("size"); + + public static final TruffleString T__IMP = tsLiteral("_imp"); + public static final TruffleString T_FLAGS = tsLiteral("flags"); } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/ErrorMessages.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/ErrorMessages.java index f705a1edac..4eb62e073d 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/ErrorMessages.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/ErrorMessages.java @@ -750,7 +750,6 @@ public abstract class ErrorMessages { public static final TruffleString UNEXPECTED_S_IN_FIELD_NAME = tsLiteral("unexpected %s in field name"); public static final TruffleString UNDEFINED_CHARACTER_NAME = tsLiteral("undefined character name '%s'"); public static final TruffleString UNHASHABLE_TYPE_P = tsLiteral("unhashable type: '%p'"); - public static final TruffleString UNHASHABLE_TYPE = tsLiteral("unhashable type"); public static final TruffleString UNINITIALIZED_S_OBJECT = tsLiteral("uninitialized classmethod object"); public static final TruffleString UNKNOWN_ADDR_FAMILY = tsLiteral("unknown address family %d"); public static final TruffleString UNKNOWN_ATTR = tsLiteral("Unknown attribute: '%s'"); @@ -862,16 +861,8 @@ public abstract class ErrorMessages { public static final TruffleString INT_LATGER_THAN_32_BITS = tsLiteral("int larger than 32 bits"); public static final TruffleString UNKNOWN_FAMILY = tsLiteral("unknown family"); public static final TruffleString UNKNOWN_DIALECT = tsLiteral("unknown dialect"); - public static final TruffleString ONE_CHARACTER_BYTES_BYTEARRAY_INTEGER_EXPECTED = tsLiteral("one character bytes, bytearray or integer expected"); - public static final TruffleString ONE_CHARACTER_UNICODE_EXPECTED = tsLiteral("one character unicode string expected"); - public static final TruffleString STR_TOO_LONG = tsLiteral("string too long (%d, maximum length %d)"); - public static final TruffleString BYTES_TOO_LONG = tsLiteral("bytes too long (%d, maximum length %d)"); - public static final TruffleString BYTES_OR_INT_ADDR_EXPECTED_INSTEAD_OF_P = tsLiteral("bytes or integer address expected instead of %p instance"); - public static final TruffleString UNICODE_STR_OR_INT_ADDR_EXPECTED_INSTEAD_OF_P = tsLiteral("unicode string or integer address expected instead of %p instance"); public static final TruffleString N_NOT_SUBTYPE_OF_ARRAY = tsLiteral("%n is not a subtype of array"); public static final TruffleString FIRST_ARGUMENT_MUST_BE_A_TYPE_OBJECT_NOT_P = tsLiteral("first argument must be a type object, not %p"); - public static final TruffleString CANNOT_BE_CONVERTED_TO_POINTER = tsLiteral("cannot be converted to pointer"); - public static final TruffleString PY_OBJ_IS_NULL = tsLiteral("PyObject is NULL"); public static final TruffleString OUT_OF_RANGE_FLOAT_NOT_JSON_COMPLIANT = tsLiteral("Out of range float values are not JSON compliant: %s"); public static final TruffleString CIRCULAR_REFERENCE_DETECTED = tsLiteral("Circular reference detected"); public static final TruffleString ITEMS_MUST_RETURN_2_TUPLES = tsLiteral("items must return 2-tuples"); @@ -1190,124 +1181,18 @@ public abstract class ErrorMessages { public static final TruffleString ISSUBCLASS_ARG_2_CANNOT_CONTAIN_A_PARAMETERIZED_GENERIC = tsLiteral("issubclass() argument 2 cannot contain a parameterized generic"); public static final TruffleString ISSUBCLASS_ARG_1_MUST_BE_A_CLASS = tsLiteral("issubclass() arg 1 must be a class"); public static final TruffleString FOUND_NON_STR_S_IN_SKIP_FILE_PREFIXES = tsLiteral("Found non-str '%s' in skip_file_prefixes."); - - public static final TruffleString PASSING_STRUCTS_BY_VALUE_NOT_SUPPORTED = tsLiteral("Passing structs by value is not supported on NFI backend"); - public static final TruffleString RETURNING_STRUCT_BY_VALUE_NOT_SUPPORTED = tsLiteral("ctypes: returning struct by value is not supported."); - public static final TruffleString MEMORYVIEW_CANNOT_BE_CONVERTED_TO_NATIVE_MEMORY = tsLiteral("Memoryview cannot be converted to native memory"); - public static final TruffleString CANNOT_CONVERT_OBJECT_POINTER_TO_NATIVE = tsLiteral("Cannot convert Object pointer to native"); - public static final TruffleString CANNOT_APPLY_OFFSET_TO_AN_OBJECT_POINTER = tsLiteral("Cannot apply offset to an object pointer"); - public static final TruffleString S_SYMBOL_IS_MISSING = tsLiteral("%s symbol is missing"); - public static final TruffleString PACK_MUST_BE_A_NON_NEGATIVE_INTEGER = tsLiteral("_pack_ must be a non-negative integer"); - public static final TruffleString FIELDS_MUST_BE_A_SEQUENCE_OF_PAIRS = tsLiteral("'_fields_' must be a sequence of pairs"); - public static final TruffleString FIELDS_IS_FINAL = tsLiteral("_fields_ is final"); - public static final TruffleString SECOND_ITEM_IN_FIELDS_TUPLE_INDEX_D_MUST_BE_A_C_TYPE = tsLiteral("second item in _fields_ tuple (index %d) must be a C type"); - public static final TruffleString BIT_FIELDS_NOT_ALLOWED_FOR_TYPE_N = tsLiteral("bit fields not allowed for type %N"); - public static final TruffleString NUMBER_OF_BITS_INVALID_FOR_BIT_FIELD = tsLiteral("number of bits invalid for bit field"); - public static final TruffleString STRUCTURE_OR_UNION_CANNOT_CONTAIN_ITSELF = tsLiteral("Structure or union cannot contain itself"); - public static final TruffleString FIELDS_MUST_BE_A_SEQUENCE_OF_NAME_C_TYPE_PAIRS = tsLiteral("'_fields_' must be a sequence of (name, C type) pairs"); public static final TruffleString UNDERLYING_BUFFER_IS_NOT_WRITABLE = tsLiteral("underlying buffer is not writable"); - public static final TruffleString UNDERLYING_BUFFER_IS_NOT_C_CONTIGUOUS = tsLiteral("underlying buffer is not C contiguous"); - public static final TruffleString OFFSET_CANNOT_BE_NEGATIVE = tsLiteral("offset cannot be negative"); - public static final TruffleString BUFFER_SIZE_TOO_SMALL_D_INSTEAD_OF_AT_LEAST_D_BYTES = tsLiteral("Buffer size too small (%d instead of at least %d bytes)"); - public static final TruffleString THE_HANDLE_ATTRIBUTE_OF_THE_SECOND_ARGUMENT_MUST_BE_AN_INTEGER = tsLiteral("the _handle attribute of the second argument must be an integer"); - public static final TruffleString EXPECTED_P_INSTANCE_GOT_P = tsLiteral("expected %p instance, got %p"); - public static final TruffleString INCOMPATIBLE_TYPES_P_INSTANCE_INSTEAD_OF_P_INSTANCE = tsLiteral("incompatible types, %p instance instead of %p instance"); - public static final TruffleString CTYPES_OBJECT_STRUCTURE_TOO_DEEP = tsLiteral("ctypes object structure too deep"); - public static final TruffleString NOT_A_CTYPE_INSTANCE = tsLiteral("not a ctype instance"); - public static final TruffleString EXPECTED_P_INSTANCE_INSTEAD_OF_P = tsLiteral("expected %p instance instead of %p"); - public static final TruffleString ARRAY_LENGTH_MUST_BE_0_NOT_D = tsLiteral("Array length must be >= 0, not %d"); - public static final TruffleString EXPECTED_A_TYPE_OBJECT = tsLiteral("Expected a type object"); - public static final TruffleString HAS_NO_STGINFO = tsLiteral("has no _stginfo_"); - public static final TruffleString DON_T_KNOW_HOW_TO_CONVERT_PARAMETER_D = tsLiteral("Don't know how to convert parameter %d"); - public static final TruffleString MUST_BE_A_CTYPES_TYPE = tsLiteral("must be a ctypes type"); - public static final TruffleString NOT_A_CTYPES_TYPE_OR_OBJECT = tsLiteral("not a ctypes type or object"); - public static final TruffleString EXCEPTED_CTYPES_INSTANCE = tsLiteral("excepted ctypes instance"); - public static final TruffleString MINIMUM_SIZE_IS_D = tsLiteral("minimum size is %d"); - public static final TruffleString MEMORY_CANNOT_BE_RESIZED_BECAUSE_THIS_OBJECT_DOESN_T_OWN_IT = tsLiteral("Memory cannot be resized because this object doesn't own it"); - public static final TruffleString COULD_NOT_CONVERT_THE_HANDLE_ATTRIBUTE_TO_A_POINTER = tsLiteral("could not convert the _handle attribute to a pointer"); - public static final TruffleString NO_ALIGNMENT_INFO = tsLiteral("no alignment info"); - public static final TruffleString THIS_TYPE_HAS_NO_SIZE = tsLiteral("this type has no size"); - public static final TruffleString BYREF_ARGUMENT_MUST_BE_A_CTYPES_INSTANCE_NOT_P = tsLiteral("byref() argument must be a ctypes instance, not '%p'"); - public static final TruffleString INVALID_TYPE = tsLiteral("invalid type"); - public static final TruffleString TOO_MANY_ARGUMENTS_D_MAXIMUM_IS_D = tsLiteral("too many arguments (%d), maximum is %d"); - public static final TruffleString ARGUMENT_D = tsLiteral("argument %d: "); - public static final TruffleString FFI_CALL_FAILED = tsLiteral("ffi_call failed"); - public static final TruffleString FFI_PREP_CIF_FAILED = tsLiteral("ffi_prep_cif failed"); - public static final TruffleString INT_TOO_LONG_TO_CONVERT = tsLiteral("int too long to convert"); - public static final TruffleString CAST_ARGUMENT_2_MUST_BE_A_POINTER_TYPE_NOT_N = tsLiteral("cast() argument 2 must be a pointer type, not %N"); public static final TruffleString WRONG_TYPE = tsLiteral("wrong type"); - public static final TruffleString INVALID_RESULT_TYPE_FOR_CALLBACK_FUNCTION = tsLiteral("invalid result type for callback function"); - public static final TruffleString INVALID_INDEX = tsLiteral("invalid index"); - public static final TruffleString INDICES_MUST_BE_INTEGERS = tsLiteral("indices must be integers"); - public static final TruffleString CAN_ONLY_ASSIGN_SEQUENCE_OF_SAME_SIZE = tsLiteral("Can only assign sequence of same size"); - public static final TruffleString ARRAY_DOES_NOT_SUPPORT_ITEM_DELETION = tsLiteral("Array does not support item deletion"); - public static final TruffleString INDICES_MUST_BE_INTEGER = tsLiteral("indices must be integer"); - public static final TruffleString CLASS_MUST_DEFINE_A_LENGTH_ATTRIBUTE = tsLiteral("class must define a '_length_' attribute"); - public static final TruffleString THE_LENGTH_ATTRIBUTE_IS_TOO_LARGE = tsLiteral("The '_length_' attribute is too large"); - public static final TruffleString THE_LENGTH_ATTRIBUTE_MUST_BE_AN_INTEGER = tsLiteral("The '_length_' attribute must be an integer"); - public static final TruffleString THE_LENGTH_ATTRIBUTE_MUST_NOT_BE_NEGATIVE = tsLiteral("The '_length_' attribute must not be negative"); - public static final TruffleString CLASS_MUST_DEFINE_A_TYPE_ATTRIBUTE = tsLiteral("class must define a '_type_' attribute"); - public static final TruffleString TYPE_MUST_HAVE_STORAGE_INFO = tsLiteral("_type_ must have storage info"); - public static final TruffleString ARRAY_TOO_LARGE = tsLiteral("array too large"); - public static final TruffleString OUT_PARAMETER_D_MUST_BE_A_POINTER_TYPE_NOT_N = tsLiteral("'out' parameter %d must be a pointer type, not %N"); - public static final TruffleString ARGUMENT_MUST_BE_CALLABLE_OR_INTEGER_FUNCTION_ADDRESS = tsLiteral("argument must be callable or integer function address"); - public static final TruffleString CANNOT_CONSTRUCT_INSTANCE_OF_THIS_CLASS_NO_ARGTYPES = tsLiteral("cannot construct instance of this class: no argtypes"); - public static final TruffleString THE_ERRCHECK_ATTRIBUTE_MUST_BE_CALLABLE = tsLiteral("the errcheck attribute must be callable"); - public static final TruffleString RESTYPE_MUST_BE_A_TYPE_A_CALLABLE_OR_NONE = tsLiteral("restype must be a type, a callable, or None"); - public static final TruffleString THIS_FUNCTION_TAKES_AT_LEAST_D_ARGUMENT_S_D_GIVEN = tsLiteral("this function takes at least %d argument%s (%d given)"); - public static final TruffleString THIS_FUNCTION_TAKES_D_ARGUMENT_S_D_GIVEN = tsLiteral("this function takes %d argument%s (%d given)"); - public static final TruffleString REQUIRED_ARGUMENT_S_MISSING = tsLiteral("required argument '%s' missing"); - public static final TruffleString NOT_ENOUGH_ARGUMENTS = tsLiteral("not enough arguments"); public static final TruffleString NO_POSITIONAL_ARGUMENTS_EXPECTED = tsLiteral("no positional arguments expected"); - public static final TruffleString NULL_STGDICT_UNEXPECTED = tsLiteral("NULL stgdict unexpected"); - public static final TruffleString N_OUT_PARAMETER_MUST_BE_PASSED_AS_DEFAULT_VALUE = tsLiteral("%N 'out' parameter must be passed as default value"); - public static final TruffleString PARAMFLAG_D_NOT_YET_IMPLEMENTED = tsLiteral("paramflag %d not yet implemented"); - public static final TruffleString CALL_TAKES_EXACTLY_D_ARGUMENTS_D_GIVEN = tsLiteral("call takes exactly %d arguments (%d given)"); - public static final TruffleString PARAMFLAGS_MUST_BE_A_TUPLE_OR_NONE = tsLiteral("paramflags must be a tuple or None"); - public static final TruffleString PARAMFLAGS_MUST_HAVE_THE_SAME_LENGTH_AS_ARGTYPES = tsLiteral("paramflags must have the same length as argtypes"); - public static final TruffleString PARAMFLAGS_MUST_BE_A_SEQUENCE_OF_INT_STRING_VALUE_TUPLES = tsLiteral("paramflags must be a sequence of (int [,string [,value]]) tuples"); - public static final TruffleString PARAMFLAG_VALUE_D_NOT_SUPPORTED = tsLiteral("paramflag value %d not supported"); - public static final TruffleString CLASS_MUST_DEFINE_FLAGS_WHICH_MUST_BE_AN_INTEGER = tsLiteral("class must define _flags_ which must be an integer"); - public static final TruffleString ARGTYPES_MUST_BE_A_SEQUENCE_OF_TYPES = tsLiteral("_argtypes_ must be a sequence of types"); - public static final TruffleString RESTYPE_MUST_BE_A_TYPE_A_CALLABLE_OR_NONE1 = tsLiteral("_restype_ must be a type, a callable, or None"); - public static final TruffleString ITEM_D_IN_ARGTYPES_HAS_NO_FROM_PARAM_METHOD = tsLiteral("item %d in _argtypes_ has no from_param method"); - public static final TruffleString POINTER_DOES_NOT_SUPPORT_ITEM_DELETION = tsLiteral("Pointer does not support item deletion"); - public static final TruffleString EXPECTED_N_INSTEAD_OF_P = tsLiteral("expected %N instead of %p"); - public static final TruffleString CANNOT_CREATE_INSTANCE_HAS_NO_TYPE = tsLiteral("Cannot create instance: has no _type_"); - public static final TruffleString NULL_POINTER_ACCESS = tsLiteral("NULL pointer access"); - public static final TruffleString SLICE_START_IS_REQUIRED_FOR_STEP_0 = tsLiteral("slice start is required for step < 0"); - public static final TruffleString SLICE_STOP_IS_REQUIRED = tsLiteral("slice stop is required"); - public static final TruffleString POINTER_INDICES_MUST_BE_INTEGER = tsLiteral("Pointer indices must be integer"); - public static final TruffleString TYPE_MUST_BE_A_TYPE = tsLiteral("_type_ must be a type"); - public static final TruffleString EXPECTED_CDATA_INSTANCE = tsLiteral("expected CData instance"); - public static final TruffleString CLASS_MUST_DEFINE_A_TYPE_STRING_ATTRIBUTE = tsLiteral("class must define a '_type_' string attribute"); - public static final TruffleString A_TYPE_ATTRIBUTE_WHICH_MUST_BE_A_STRING_OF_LENGTH_1 = tsLiteral("class must define a '_type_' attribute which must be a string of length 1"); - public static final TruffleString WHICH_MUST_BE_A_SINGLE_CHARACTER_STRING_CONTAINING_ONE_OF_S = tsLiteral("class must define a '_type_' attribute which must be\n" + - "a single character string containing one of '%s'."); - public static final TruffleString TYPE_S_NOT_SUPPORTED = tsLiteral("_type_ '%s' not supported"); - public static final TruffleString S_IS_SPECIFIED_IN_ANONYMOUS_BUT_NOT_IN_FIELDS = tsLiteral("'%s' is specified in _anonymous_ but not in _fields_"); - public static final TruffleString ANONYMOUS_MUST_BE_A_SEQUENCE = tsLiteral("_anonymous_ must be a sequence"); - public static final TruffleString ABSTRACT_CLASS = tsLiteral("abstract class"); - public static final TruffleString UNEXPECTED_TYPE = tsLiteral("unexpected type"); - public static final TruffleString FIELDS_MUST_BE_A_SEQUENCE = tsLiteral("_fields_ must be a sequence"); - public static final TruffleString TOO_MANY_INITIALIZERS = tsLiteral("too many initializers"); - public static final TruffleString DUPLICATE_VALUES_FOR_FIELD_S = tsLiteral("duplicate values for field %s"); - public static final TruffleString EXPECTED_P_INSTANCE_INSTEAD_OF_POINTER_TO_P = tsLiteral("expected %p instance instead of pointer to %p"); - public static final TruffleString CTYPES_OBJECTS_CONTAINING_POINTERS_CANNOT_BE_PICKLED = tsLiteral("ctypes objects containing pointers cannot be pickled"); - public static final TruffleString P_DICT_MUST_BE_A_DICTIONARY_NOT_P = tsLiteral("%p.__dict__ must be a dictionary, not %p"); - public static final TruffleString STRING_TOO_LONG = tsLiteral("string too long"); - public static final TruffleString UNICODE_STRING_EXPECTED_INSTEAD_OF_P_INSTANCE = tsLiteral("unicode string expected instead of %p instance"); - public static final TruffleString BYTES_EXPECTED_INSTEAD_OF_P_INSTANCE = tsLiteral("bytes expected instead of %p instance"); - public static final TruffleString BYTE_STRING_TOO_LONG = tsLiteral("byte string too long"); public static final TruffleString UNKNOWN_CLOCK = tsLiteral("unknown clock"); public static final TruffleString S_ARG_N_MUST_SUPPORT_ITERATION = tsLiteral("%s arg %d must support iteration"); public static final TruffleString REDUCE_EMPTY_SEQ = tsLiteral("reduce() of empty sequence with no initial value"); public static final TruffleString OTHER_ARG_MUST_BE_KEY = tsLiteral("other argument must be K instance"); public static final TruffleString INVALID_PARTIAL_STATE = tsLiteral("invalid partial state"); public static final TruffleString LOST_S = tsLiteral("lost %s"); - public static final TruffleString CTYPES_FUNCTION_CALL_COULD_NOT_OBTAIN_FUNCTION_POINTER = tsLiteral("ctypes function call could not obtain function pointer"); public static final TruffleString UNABLE_TO_LOAD_LIBCRYPT = tsLiteral( "Unable to load libcrypt library. Please install libxcrypt-compat (RPM-based distributions) package or libcrypt1 (DEB-based distributions)."); + public static final TruffleString FAILED_TO_REPARSE_BYTECODE_FILE = tsLiteral("Failed to reload bytecode file. Use --python.KeepBytecodeInMemory to keep serialized bytecode in memory"); // ssl error messages public static final TruffleString SSL_ERR_DECODING_PEM_FILE_S = tsLiteral("Error decoding PEM-encoded file: %s"); @@ -1473,9 +1358,6 @@ public abstract class ErrorMessages { public static final TruffleString ARG_MUST_BE_A_SEQUENCE_OBJECT = tsLiteral("arg must be a sequence object"); public static final TruffleString STREAM_FUNCTION_RETURNED_A_NON_BYTES_OBJECT_P = tsLiteral("stream function returned a non-bytes object (%p)"); - public static final TruffleString CANNOT_BUILD_PARAMETER = tsLiteral("cannot build parameter"); - public static final TruffleString MEMORY_LEAK_IN_CALLBACK_FUNCTION = tsLiteral("memory leak in callback function."); - public static final TruffleString INDEX_EXCEEDS_INT = tsLiteral("index exceeds integer size"); public static final TruffleString X_NOT_IN_SEQUENCE = tsLiteral("sequence.index(x): x not in sequence"); public static final TruffleString ASYNC_FOR_NO_AITER = tsLiteral("'async for' requires object with __aiter__ method, got %N"); @@ -1648,7 +1530,6 @@ public abstract class ErrorMessages { public static final TruffleString DAYS_D_MUST_HAVE_MAGNITUDE_LESS_THAN_D = tsLiteral("days=%d; must have magnitude <= %d"); public static final TruffleString UNSUPPORTED_TYPE_FOR_TIMEDELTA_S_COMPONENT_P = tsLiteral("unsupported type for timedelta %s component: %p"); public static final TruffleString INTEGER_DIVISION_OR_MODULO_BY_ZERO = tsLiteral("integer division or modulo by zero"); - public static final TruffleString INTEGER_MODULE_BY_ZERO = tsLiteral("integer modulo by zero"); public static final TruffleString DATE_VALUE_OUT_OF_RANGE = tsLiteral("date value out of range"); public static final TruffleString FAILED_TO_ENCODE_LATIN1_STRING_WHEN_UNPICKLING_A_DATE_OBJECT = tsLiteral( "Failed to encode latin1 string when unpickling a date object. pickle.load(data, encoding='latin1') is assumed"); @@ -1683,8 +1564,6 @@ public abstract class ErrorMessages { "Failed to encode latin1 string when unpickling a time object. pickle.load(data, encoding='latin1') is assumed"); public static final TruffleString UNEXPECTED_RETURN_TYPE_FROM_AS_INTEGER_RATIO_EXPECTED_TUPLE_GOT_P = tsLiteral("unexpected return type from as_integer_ratio(): expected tuple, got '%p'"); public static final TruffleString AS_INTEGER_RATION_MUST_RETURN_A_2_TUPLE = tsLiteral("as_integer_ratio() must return a 2-tuple"); - public static final TruffleString DIVMOD_RETURNED_NON_TUPLE_P = tsLiteral("divmod() returned non-tuple (type %.200s)"); - public static final TruffleString DIVMOD_RETURNED_A_TUPLE_OF_SIZE_D = tsLiteral("divmod() returned a tuple of size %zd"); public static final TruffleString S_IS_A_BAD_DIRECTIVE_IN_FORMAT_S = tsLiteral("'%s' is a bad directive in format '%s'"); public static final TruffleString UNCONVERTED_DATA_REMAINS_S = tsLiteral("unconverted data remains: %s"); public static final TruffleString TIME_DATA_S_DOES_NOT_MATCH_FORMAT_S = tsLiteral("time data '%s' does not match format '%s'"); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/PRootNode.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/PRootNode.java index d708da886c..c21f46535b 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/PRootNode.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/PRootNode.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -153,14 +153,14 @@ public final void setCode(byte[] data) { } @TruffleBoundary - public final byte[] getCode() { + public final byte[] getCode(Node node) { if (code != null) { return code; } - return code = extractCode(); + return code = extractCode(node); } - protected byte[] extractCode() { + protected byte[] extractCode(Node node) { // no code for non-user functions return PythonUtils.EMPTY_BYTE_ARRAY; } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/PBytecodeGeneratorFunctionRootNode.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/PBytecodeGeneratorFunctionRootNode.java index 83f979aeff..2eb9b045ed 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/PBytecodeGeneratorFunctionRootNode.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/PBytecodeGeneratorFunctionRootNode.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -53,6 +53,7 @@ import com.oracle.truffle.api.RootCallTarget; import com.oracle.truffle.api.frame.FrameDescriptor; import com.oracle.truffle.api.frame.VirtualFrame; +import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.source.SourceSection; import com.oracle.truffle.api.strings.TruffleString; @@ -120,7 +121,7 @@ public PBytecodeRootNode getBytecodeRootNode() { } @Override - protected byte[] extractCode() { - return rootNode.extractCode(); + protected byte[] extractCode(Node node) { + return rootNode.extractCode(node); } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/PBytecodeRootNode.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/PBytecodeRootNode.java index a58ef8254c..72bb7f5225 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/PBytecodeRootNode.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/PBytecodeRootNode.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -6194,7 +6194,7 @@ public boolean setsUpCalleeContext() { } @Override - protected byte[] extractCode() { + protected byte[] extractCode(Node node) { /* * CPython exposes individual items of code objects, like constants, as different members of * the code object and the co_code attribute contains just the bytecode. It would be better diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/BytecodeDSLCodeUnit.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/BytecodeDSLCodeUnit.java index 73a4439be5..0afe50e3dc 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/BytecodeDSLCodeUnit.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/BytecodeDSLCodeUnit.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2025, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -40,100 +40,53 @@ */ package com.oracle.graal.python.nodes.bytecode_dsl; -import java.io.ByteArrayOutputStream; -import java.io.DataOutputStream; -import java.io.IOException; - -import com.oracle.graal.python.builtins.modules.MarshalModuleBuiltins; -import com.oracle.graal.python.builtins.modules.MarshalModuleBuiltins.PBytecodeDSLSerializer; import com.oracle.graal.python.compiler.CodeUnit; import com.oracle.graal.python.runtime.PythonContext; import com.oracle.truffle.api.CompilerAsserts; -import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; -import com.oracle.truffle.api.bytecode.BytecodeRootNodes; -import com.oracle.truffle.api.bytecode.serialization.BytecodeSerializer; import com.oracle.truffle.api.nodes.RootNode; import com.oracle.truffle.api.source.Source; import com.oracle.truffle.api.strings.TruffleString; public final class BytecodeDSLCodeUnit extends CodeUnit { - /* - * A {@link BytecodeDSLCodeUnit} is a context-independent representation of a root node. It - * contains the bytes produced from Bytecode DSL serialization. - * - * Since it is expensive to serialize every root node, we perform serialization lazily using the - * {@link BytecodeNodes} produced during parsing. - * - * When this code unit is directly instantiated via unmarshaling, there is no {@link - * BytecodeNodes}; instead, we store the serialized bytes directly. - */ - private volatile byte[] serialized; - private final BytecodeRootNodes nodes; public final int classcellIndex; public final int selfIndex; + private final BytecodeSupplier supplier; public BytecodeDSLCodeUnit(TruffleString name, TruffleString qualname, int argCount, int kwOnlyArgCount, int positionalOnlyArgCount, int flags, TruffleString[] names, TruffleString[] varnames, TruffleString[] cellvars, TruffleString[] freevars, int[] cell2arg, Object[] constants, int startLine, int startColumn, int endLine, int endColumn, - int classcellIndex, int selfIndex, byte[] serialized, BytecodeRootNodes nodes) { + int classcellIndex, int selfIndex, BytecodeSupplier supplier) { super(name, qualname, argCount, kwOnlyArgCount, positionalOnlyArgCount, flags, names, varnames, cellvars, freevars, cell2arg, constants, startLine, startColumn, endLine, endColumn); - // Only one of these fields should be set. The other gets computed dynamically. - assert nodes == null || nodes.count() == 1; - assert serialized == null ^ nodes == null; - this.serialized = serialized; - this.nodes = nodes; this.classcellIndex = classcellIndex; this.selfIndex = selfIndex; + this.supplier = supplier; + } + + public abstract static class BytecodeSupplier { + public abstract PBytecodeDSLRootNode createRootNode(PythonContext context, Source source); + + public abstract byte[] createSerializedBytecode(PythonContext context); } public BytecodeDSLCodeUnit withFlags(int flags) { return new BytecodeDSLCodeUnit(name, qualname, argCount, kwOnlyArgCount, positionalOnlyArgCount, flags, names, varnames, cellvars, freevars, cell2arg, constants, - startLine, startColumn, endLine, endColumn, classcellIndex, selfIndex, serialized, nodes); + startLine, startColumn, endLine, endColumn, classcellIndex, selfIndex, supplier); } @TruffleBoundary public PBytecodeDSLRootNode createRootNode(PythonContext context, Source source) { - if (nodes != null) { - return nodes.getNode(0); - } // We must not cache deserialized root, because the code unit may be shared by multiple // engines. The caller is responsible for ensuring the caching of the resulting root node if // necessary - byte[] toDeserialize = getSerialized(context); - BytecodeRootNodes deserialized = MarshalModuleBuiltins.deserializeBytecodeNodes(context, source, toDeserialize); - assert deserialized.count() == 1; - PBytecodeDSLRootNode result = deserialized.getNode(0); - result.setMetadata(this, null); - return result; + PBytecodeDSLRootNode rootNode = supplier.createRootNode(context, source); + rootNode.setMetadata(this, null); + return rootNode; } public byte[] getSerialized(PythonContext context) { CompilerAsserts.neverPartOfCompilation(); - byte[] result = serialized; - if (result == null) { - synchronized (this) { - result = serialized; - if (result == null) { - result = serialized = computeSerialized(context); - } - } - } - return result; - } - - @SuppressWarnings("unchecked") - @TruffleBoundary - private byte[] computeSerialized(PythonContext context) { - try { - assert PythonContext.get(null) == context; - BytecodeSerializer serializer = new PBytecodeDSLSerializer(); - ByteArrayOutputStream bytes = new ByteArrayOutputStream(); - nodes.serialize(new DataOutputStream(bytes), serializer); - return bytes.toByteArray(); - } catch (IOException e) { - throw CompilerDirectives.shouldNotReachHere(e); - } + return supplier.createSerializedBytecode(context); } public TruffleString getDocstring() { @@ -146,15 +99,11 @@ public TruffleString getDocstring() { @Override protected void dumpBytecode(StringBuilder sb, boolean optimized, RootNode rootNode) { - if (nodes == null) { - if (rootNode instanceof PBytecodeDSLRootNode dslRoot) { - sb.append(dslRoot.dump()); - sb.append('\n'); - } - sb.append("bytecode not available\n"); + if (rootNode instanceof PBytecodeDSLRootNode dslRoot) { + sb.append(dslRoot.dump()); + sb.append('\n'); } else { - sb.append(nodes.getNode(0).dump()); - sb.append('\n'); // dump does not print newline at the end + sb.append("bytecode not available\n"); } } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/PBytecodeDSLRootNode.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/PBytecodeDSLRootNode.java index b4ec86d752..c30d5269d2 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/PBytecodeDSLRootNode.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/PBytecodeDSLRootNode.java @@ -567,7 +567,12 @@ public final PythonThreadState getThreadState() { * Reparses with instrumentations for settrace and setprofile enabled. */ public final void ensureTraceAndProfileEnabled() { - getRootNodes().update(TRACE_AND_PROFILE_CONFIG); + try { + getRootNodes().update(TRACE_AND_PROFILE_CONFIG); + } catch (MarshalModuleBuiltins.ReparseError e) { + CompilerDirectives.transferToInterpreterAndInvalidate(); + throw PRaiseNode.raiseStatic(getBytecodeNode(), SystemError, ErrorMessages.FAILED_TO_REPARSE_BYTECODE_FILE); + } } private TracingNodes getTracingNodes(BytecodeNode location) { @@ -1010,8 +1015,8 @@ public static int lastiToBci(int lasti, BytecodeNode bytecodeNode) { } @Override - protected byte[] extractCode() { - return MarshalModuleBuiltins.serializeCodeUnit(null, PythonContext.get(this), co); + protected byte[] extractCode(Node node) { + return MarshalModuleBuiltins.serializeCodeUnit(node, PythonContext.get(node), co); } private static Object checkUnboundCell(PCell cell, int index, BytecodeNode bytecodeNode) { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonContext.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonContext.java index 4c45e48aef..4eace446a8 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonContext.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonContext.java @@ -613,9 +613,10 @@ public void pushInstrumentationData(PBytecodeDSLRootNode rootNode) { public void popInstrumentationData(PBytecodeDSLRootNode rootNode) { assert PythonOptions.ENABLE_BYTECODE_DSL_INTERPRETER; - assert instrumentationData != null : rootNode; - assert instrumentationData.getRootNode() == rootNode : String.format("%s != %s", instrumentationData.getRootNode(), rootNode); - instrumentationData = instrumentationData.getPrevious(); + if (instrumentationData != null) { + assert instrumentationData.getRootNode() == rootNode : String.format("%s != %s", instrumentationData.getRootNode(), rootNode); + instrumentationData = instrumentationData.getPrevious(); + } } public Object getAsyncgenFirstIter() { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java index bdbeea8bca..42d6177e6e 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java @@ -411,6 +411,11 @@ public static void checkBytecodeDSLEnv() { @Option(category = OptionCategory.EXPERT, usageSyntax = "true|false", help = "Print warnings when using experimental features at runtime.", stability = OptionStability.STABLE) // public static final OptionKey WarnExperimentalFeatures = new OptionKey<>(true); + @Option(category = OptionCategory.EXPERT, usageSyntax = "true|false", help = """ + By default GraalPy only keeps a transformed form of bytecode in memory and may need to reread bytecode files when a different form of bytecode is requested, \ + such as when settrace instrumentation is enabled. This option avoids rereading bytecode files by keeping the original bytecode form in memory""") // + public static final OptionKey KeepBytecodeInMemory = new OptionKey<>(false); + public static final OptionDescriptors DESCRIPTORS = new PythonOptionsOptionDescriptors(); @CompilationFinal(dimensions = 1) private static final OptionKey[] ENGINE_OPTION_KEYS; diff --git a/graalpython/lib-python/3/importlib/_bootstrap_external.py b/graalpython/lib-python/3/importlib/_bootstrap_external.py index 571b3089ee..d77cda6d5c 100644 --- a/graalpython/lib-python/3/importlib/_bootstrap_external.py +++ b/graalpython/lib-python/3/importlib/_bootstrap_external.py @@ -756,10 +756,9 @@ def _validate_hash_pyc(data, source_hash, name, exc_details): ) -# GraalPy change: add cache_key -def _compile_bytecode(data, name=None, bytecode_path=None, source_path=None, cache_key=-1): +def _compile_bytecode(data, name=None, bytecode_path=None, source_path=None): """Compile bytecode as found in a pyc.""" - code = marshal.loads(data, cache_key=cache_key) + code = marshal.loads(data) if isinstance(code, _code_type): _bootstrap._verbose_message('code object from {!r}', bytecode_path) if source_path is not None: @@ -1093,6 +1092,18 @@ def get_code(self, fullname): except OSError: pass else: + if bytecode_path is not None and type(self).get_data is FileLoader.get_data: + # GraalPy change: load the file in a way that we can trust for reparsing etc + if code := __graalpython__.load_bytecode_file(bytecode_path, source_path, st): + # From _compile_bytecode + if isinstance(code, _code_type): + _bootstrap._verbose_message('code object from {!r}', bytecode_path) + if source_path is not None: + _imp._fix_co_filename(code, source_path) + return code + else: + raise ImportError(f'Non-code object in {bytecode_path!r}', + name=fullname, path=bytecode_path) source_mtime = int(st['mtime']) try: data = self.get_data(bytecode_path) @@ -1103,8 +1114,6 @@ def get_code(self, fullname): 'name': fullname, 'path': bytecode_path, } - # GraalPy change: add cache_key - cache_key = 0 try: flags = _classify_pyc(data, fullname, exc_details) bytes_data = memoryview(data)[16:] @@ -1121,7 +1130,6 @@ def get_code(self, fullname): ) _validate_hash_pyc(data, source_hash, fullname, exc_details) - cache_key = int.from_bytes(source_hash, byteorder=sys.byteorder, signed=True) else: _validate_timestamp_pyc( data, @@ -1137,8 +1145,7 @@ def get_code(self, fullname): source_path) return _compile_bytecode(bytes_data, name=fullname, bytecode_path=bytecode_path, - source_path=source_path, - cache_key=cache_key) + source_path=source_path) if source_bytes is None: source_bytes = self.get_data(source_path) code_object = self.source_to_code(source_bytes, source_path)