Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions graalpython/com.oracle.graal.python.cext/src/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3709,8 +3709,7 @@ PyObject*
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
{
// GraalPy change: different implementation
// TODO: this implementation does not honor Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
return GraalPyPrivate_Unicode_FromUTF((void*) s, size, 1);
return GraalPyPrivate_Unicode_DecodeFSDefaultAndSize((void*) s, size);
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1069,9 +1069,10 @@ class TestPyUnicode(CPyExtTestCase):
)

test_PyUnicode_FSDecoder = CPyExtFunction(
lambda args: str(args[0]),
lambda args: os.fsdecode(args[0]),
lambda: (
(os.path.realpath(os_helper.TESTFN),),
(b"name-\xff",),
),
code='''PyObject* wrap_PyUnicode_FSDecoder(PyObject* path) {
PyObject* res;
Expand All @@ -1089,6 +1090,51 @@ class TestPyUnicode(CPyExtTestCase):
cmpfunc=unhandled_error_compare
)

test_PyUnicode_DecodeFSDefault = CPyExtFunction(
lambda args: os.fsdecode(args[0]),
lambda: (
(b"name",),
(b"name-\xff",),
),
code='''PyObject* wrap_PyUnicode_DecodeFSDefault(PyObject* path) {
char* data;
Py_ssize_t size;
if (PyBytes_AsStringAndSize(path, &data, &size) < 0) {
return NULL;
}
return PyUnicode_DecodeFSDefault(data);
}
''',
resultspec="O",
argspec='O',
arguments=["PyObject* path"],
callfunction="wrap_PyUnicode_DecodeFSDefault",
cmpfunc=unhandled_error_compare
)

test_PyUnicode_DecodeFSDefaultAndSize = CPyExtFunction(
lambda args: os.fsdecode(args[0][:args[1]]),
lambda: (
(b"name", 4),
(b"name-\xff", 6),
(b"name-\xff-suffix", 6),
),
code='''PyObject* wrap_PyUnicode_DecodeFSDefaultAndSize(PyObject* path, Py_ssize_t size) {
char* data;
Py_ssize_t path_size;
if (PyBytes_AsStringAndSize(path, &data, &path_size) < 0) {
return NULL;
}
return PyUnicode_DecodeFSDefaultAndSize(data, size);
}
''',
resultspec="O",
argspec='On',
arguments=["PyObject* path", "Py_ssize_t size"],
callfunction="wrap_PyUnicode_DecodeFSDefaultAndSize",
cmpfunc=unhandled_error_compare
)


class TestUnicodeObject(unittest.TestCase):
def test_intern(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
import static com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.getByteArray;
import static com.oracle.graal.python.builtins.objects.cext.structs.CStructAccess.writeLongField;
import static com.oracle.graal.python.builtins.objects.cext.structs.CStructAccess.writePtrField;
import static com.oracle.graal.python.lib.PyUnicodeFSDecoderNode.SURROGATE_ESCAPE_FROM_UTF8_TRANSCODING_ERROR_HANDLER;
import static com.oracle.graal.python.nodes.ErrorMessages.BAD_ARG_TYPE_FOR_BUILTIN_OP;
import static com.oracle.graal.python.nodes.ErrorMessages.PRECISION_TOO_LARGE;
import static com.oracle.graal.python.nodes.ErrorMessages.SEPARATOR_EXPECTED_STR_INSTANCE_P_FOUND;
Expand Down Expand Up @@ -912,6 +913,25 @@ static Object fsDecoder(Object arg,
}
}

@CApiBuiltin(ret = PyObjectTransfer, args = {ConstCharPtr, Py_ssize_t}, call = Ignored, acquireGil = false)
static long GraalPyPrivate_Unicode_DecodeFSDefaultAndSize(long s, long lsize) {
// TODO: this implementation does not honor Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
try {
int size = PInt.intValueExact(lsize);
TruffleString candidate = TruffleString.fromNativePointerUncached(s, 0, size, UTF_8, true);
TruffleString str;
if (candidate.isValidUncached(UTF_8)) {
str = candidate.switchEncodingUncached(TS_ENCODING);
} else {
str = candidate.switchEncodingUncached(TS_ENCODING, SURROGATE_ESCAPE_FROM_UTF8_TRANSCODING_ERROR_HANDLER);
}
// implicitly promotes TruffleString to PString
return PythonToNativeInternalNode.executeNewRefUncached(str);
} catch (OverflowException e) {
throw PRaiseNode.raiseStatic(null, MemoryError);
}
}

@CApiBuiltin(ret = PyObjectTransfer, args = {Pointer, Py_ssize_t, Int}, call = Ignored)
abstract static class GraalPyPrivate_Unicode_FromUTF extends CApiTernaryBuiltinNode {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* The Universal Permissive License (UPL), Version 1.0
Expand Down Expand Up @@ -42,6 +42,7 @@

import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ValueError;
import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING;
import static com.oracle.truffle.api.strings.TruffleString.Encoding.UTF_8;

import com.oracle.graal.python.builtins.objects.buffer.PythonBufferAccessLibrary;
import com.oracle.graal.python.builtins.objects.bytes.PBytes;
Expand All @@ -61,6 +62,8 @@
import com.oracle.truffle.api.frame.VirtualFrame;
import com.oracle.truffle.api.library.CachedLibrary;
import com.oracle.truffle.api.nodes.Node;
import com.oracle.truffle.api.strings.AbstractTruffleString;
import com.oracle.truffle.api.strings.TranscodingErrorHandler;
import com.oracle.truffle.api.strings.TruffleString;
import com.oracle.truffle.api.strings.TruffleString.Encoding;

Expand All @@ -71,6 +74,9 @@
@GenerateUncached
@GenerateInline(false)
public abstract class PyUnicodeFSDecoderNode extends PNodeWithContext {
public static final TranscodingErrorHandler SURROGATE_ESCAPE_FROM_UTF8_TRANSCODING_ERROR_HANDLER = PyUnicodeFSDecoderNode::surrogateEscapeTranscodingError;
public static final TranscodingErrorHandler SURROGATE_ESCAPE_TO_UTF8_TRANSCODING_ERROR_HANDLER = PyUnicodeFSDecoderNode::surrogateEscapeToUTF8Handler;

public abstract TruffleString execute(Frame frame, Object object);

@Specialization
Expand All @@ -91,11 +97,17 @@ static TruffleString doPString(PString object,
TruffleString doBytes(PBytes object,
@CachedLibrary("object") PythonBufferAccessLibrary bufferLib,
@Cached TruffleString.FromByteArrayNode fromByteArrayNode,
@Cached TruffleString.IsValidNode isValidNode,
@Cached TruffleString.SwitchEncodingNode switchEncodingNode,
@Shared("byteIndexOfCP") @Cached TruffleString.ByteIndexOfCodePointNode byteIndexOfCodePointNode) {
// TODO PyUnicode_DecodeFSDefault
TruffleString utf8 = fromByteArrayNode.execute(bufferLib.getCopiedByteArray(object), Encoding.UTF_8, false);
return checkString(this, switchEncodingNode.execute(utf8, TS_ENCODING), byteIndexOfCodePointNode);
TruffleString utf8 = fromByteArrayNode.execute(bufferLib.getCopiedByteArray(object), UTF_8, false);
TruffleString str;
if (isValidNode.execute(utf8, UTF_8)) {
str = switchEncodingNode.execute(utf8, TS_ENCODING);
} else {
str = switchEncodingNode.execute(utf8, TS_ENCODING, SURROGATE_ESCAPE_FROM_UTF8_TRANSCODING_ERROR_HANDLER);
}
return checkString(this, str, byteIndexOfCodePointNode);
}

@Fallback
Expand All @@ -114,4 +126,19 @@ private static TruffleString checkString(Node raisingNode, TruffleString str, Tr
}
return str;
}

private static TranscodingErrorHandler.ReplacementString surrogateEscapeTranscodingError(AbstractTruffleString sourceString, int byteIndex, int estimatedByteLength,
Encoding sourceEncoding, Encoding targetEncoding) {
assert sourceEncoding == UTF_8 && targetEncoding == TS_ENCODING;
int b = sourceString.readByteUncached(byteIndex, UTF_8);
assert b >= 0x80;
return new TranscodingErrorHandler.ReplacementString(TruffleString.fromCodePointUncached(0xdc00 | b, TS_ENCODING, true), 1);
}

private static TranscodingErrorHandler.ReplacementString surrogateEscapeToUTF8Handler(AbstractTruffleString sourceString, int byteIndex,
@SuppressWarnings("unused") int estimatedByteLength, Encoding sourceEncoding, Encoding targetEncoding) {
assert sourceEncoding == TS_ENCODING && targetEncoding == UTF_8;
int codepoint = sourceString.codePointAtByteIndexUncached(byteIndex, TS_ENCODING);
return new TranscodingErrorHandler.ReplacementString(TruffleString.fromByteArrayUncached(new byte[]{(byte) codepoint}, TruffleString.Encoding.UTF_8), 4);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import static com.oracle.graal.python.annotations.NativeSimpleType.SINT32;
import static com.oracle.graal.python.annotations.NativeSimpleType.SINT64;
import static com.oracle.graal.python.annotations.NativeSimpleType.VOID;
import static com.oracle.graal.python.lib.PyUnicodeFSDecoderNode.SURROGATE_ESCAPE_TO_UTF8_TRANSCODING_ERROR_HANDLER;
import static com.oracle.graal.python.nodes.StringLiterals.T_NATIVE;
import static com.oracle.graal.python.runtime.NativePosixConstants.OFFSETOF_STRUCT_IN6_ADDR_S6_ADDR;
import static com.oracle.graal.python.runtime.NativePosixConstants.OFFSETOF_STRUCT_IN_ADDR_S_ADDR;
Expand Down Expand Up @@ -94,6 +95,7 @@
import com.oracle.graal.python.annotations.PythonOS;
import com.oracle.graal.python.builtins.PythonBuiltinClassType;
import com.oracle.graal.python.builtins.objects.exception.OSErrorEnum;
import com.oracle.graal.python.lib.PyUnicodeFSDecoderNode;
import com.oracle.graal.python.nodes.ErrorMessages;
import com.oracle.graal.python.nodes.PRaiseNode;
import com.oracle.graal.python.runtime.PosixSupportLibrary.AcceptResult;
Expand Down Expand Up @@ -131,6 +133,7 @@
import com.oracle.truffle.api.TruffleSafepoint;
import com.oracle.truffle.api.dsl.Bind;
import com.oracle.truffle.api.dsl.Cached;
import com.oracle.truffle.api.dsl.Cached.Exclusive;
import com.oracle.truffle.api.dsl.Cached.Shared;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.library.CachedLibrary;
Expand Down Expand Up @@ -2540,9 +2543,9 @@ public AddrInfoCursor getaddrinfo(Object node, Object service, int family, int s
@ExportMessage
public TruffleString crypt(TruffleString word, TruffleString salt,
@Bind Node raisingNode,
@Shared("toUtf8") @Cached TruffleString.SwitchEncodingNode switchEncodingToUtf8Node,
@Shared("tsCopyBytes") @Cached TruffleString.CopyToByteArrayNode copyToByteArrayNode,
@Shared("cString") @Cached NativeMemory.ZeroTerminatedUtf8ToTruffleStringNode zeroTerminatedUtf8ToTruffleStringNode) throws PosixException {
@Exclusive @Cached TruffleString.SwitchEncodingNode switchEncodingToUtf8Node,
@Exclusive @Cached TruffleString.CopyToByteArrayNode copyToByteArrayNode,
@Exclusive @Cached NativeMemory.ZeroTerminatedUtf8ToTruffleStringNode zeroTerminatedUtf8ToTruffleStringNode) throws PosixException {
/*
* From the manpage: Upon successful completion, crypt returns a pointer to a string which
* encodes both the hashed passphrase, and the settings that were used to encode it. See
Expand Down Expand Up @@ -3242,9 +3245,10 @@ private int getSysConfPwdSizeMax() throws PosixException {
@ExportMessage
@SuppressWarnings("static-method")
public Object createPathFromString(TruffleString path,
@Shared("toUtf8") @Cached TruffleString.SwitchEncodingNode switchEncodingNode,
@Shared("tsCopyBytes") @Cached TruffleString.CopyToByteArrayNode copyToByteArrayNode) {
return checkPath(getStringBytes(path, switchEncodingNode, copyToByteArrayNode));
@Exclusive @Cached TruffleString.SwitchEncodingNode switchEncodingNode,
@Exclusive @Cached TruffleString.CopyToByteArrayNode copyToByteArrayNode) {
TruffleString utf8 = switchEncodingNode.execute(path, UTF_8, SURROGATE_ESCAPE_TO_UTF8_TRANSCODING_ERROR_HANDLER);
return checkPath(copyToByteArrayNode.execute(utf8, UTF_8));
}

@ExportMessage
Expand All @@ -3256,15 +3260,21 @@ public Object createPathFromBytes(byte[] path) {
@ExportMessage
@SuppressWarnings("static-method")
public TruffleString getPathAsString(Object path,
@Shared("tsFromBytes") @Cached TruffleString.FromByteArrayNode fromByteArrayNode,
@Shared("fromUtf8") @Cached TruffleString.SwitchEncodingNode switchEncodingNode) {
@Exclusive @Cached TruffleString.FromByteArrayNode fromByteArrayNode,
@Exclusive @Cached TruffleString.IsValidNode isValidNode,
@Exclusive @Cached TruffleString.SwitchEncodingNode switchEncodingNode) {
Buffer result = (Buffer) path;
if (result.length > Integer.MAX_VALUE) {
// sanity check that it is safe to cast result.length to int, to be removed once
// we support large arrays
throw CompilerDirectives.shouldNotReachHere("Posix path cannot fit into a Java array");
}
return createString(result.data, 0, (int) result.length, true, fromByteArrayNode, switchEncodingNode);
int length = (int) result.length;
TruffleString utf8 = fromByteArrayNode.execute(result.data, 0, length, UTF_8, true);
if (isValidNode.execute(utf8, UTF_8)) {
return switchEncodingNode.execute(utf8, TS_ENCODING);
}
return switchEncodingNode.execute(utf8, TS_ENCODING, PyUnicodeFSDecoderNode.SURROGATE_ESCAPE_FROM_UTF8_TRANSCODING_ERROR_HANDLER);
}

@ExportMessage
Expand All @@ -3275,13 +3285,11 @@ public Buffer getPathAsBytes(Object path) {

private static TruffleString createString(byte[] src, int offset, int length, boolean copy, TruffleString.FromByteArrayNode fromByteArrayNode,
TruffleString.SwitchEncodingNode switchEncodingNode) {
// TODO PyUnicode_DecodeFSDefault
TruffleString utf8 = fromByteArrayNode.execute(src, offset, length, UTF_8, copy);
return switchEncodingNode.execute(utf8, TS_ENCODING);
}

private static byte[] getStringBytes(TruffleString str, TruffleString.SwitchEncodingNode switchEncodingNode, TruffleString.CopyToByteArrayNode copyToByteArrayNode) {
// TODO replace getBytes with PyUnicode_FSConverter equivalent
TruffleString utf8 = switchEncodingNode.execute(str, UTF_8);
byte[] bytes = new byte[utf8.byteLength(UTF_8)];
copyToByteArrayNode.execute(utf8, 0, bytes, 0, bytes.length, UTF_8);
Expand Down
1 change: 0 additions & 1 deletion graalpython/lib-python/3/test/test_import/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1593,7 +1593,6 @@ def exec_module(*args):
else:
importlib.SourceLoader.exec_module = old_exec_module

@impl_detail("[GR-27024] [GR-23324] posix native support", graalpy=False)
@unittest.skipUnless(TESTFN_UNENCODABLE, 'need TESTFN_UNENCODABLE')
def test_unencodable_filename(self):
# Issue #11619: The Python parser and the import machinery must not
Expand Down
2 changes: 0 additions & 2 deletions graalpython/lib-python/3/test/test_unicode_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,11 @@ def _test_single(self, filename):

# The 'test' functions are unittest entry points, and simply call our
# _test functions with each of the filename combinations we wish to test
@impl_detail("[GR-27024] [GR-23324] posix native support", graalpy=False)
def test_single_files(self):
self._test_single(TESTFN_UNICODE)
if TESTFN_UNENCODABLE is not None:
self._test_single(TESTFN_UNENCODABLE)

@impl_detail("[GR-27024] [GR-23324] posix native support", graalpy=False)
def test_directories(self):
# For all 'equivalent' combinations:
# Make dir with encoded, chdir with unicode, checkdir with encoded
Expand Down
Loading