From 86adef58f4fd67744de960aea08d8bc02fbdf778 Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Sun, 29 Dec 2024 22:05:22 -0800 Subject: [PATCH 1/4] Use PosixFileStream for files on POSIX --- Src/IronPython.Modules/mmap.cs | 130 +++++++--- Src/IronPython.Modules/nt.cs | 96 +++++-- Src/IronPython/Modules/_fileio.cs | 104 ++++---- Src/IronPython/Runtime/PosixFileStream.cs | 267 ++++++++++++++++++++ Src/IronPython/Runtime/PythonFileManager.cs | 16 +- Tests/test_file.py | 2 +- Tests/test_io_stdlib.py | 6 +- 7 files changed, 525 insertions(+), 96 deletions(-) create mode 100644 Src/IronPython/Runtime/PosixFileStream.cs diff --git a/Src/IronPython.Modules/mmap.cs b/Src/IronPython.Modules/mmap.cs index c512385c8..8c84bce0c 100644 --- a/Src/IronPython.Modules/mmap.cs +++ b/Src/IronPython.Modules/mmap.cs @@ -15,6 +15,8 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Serialization; +using System.Runtime.Versioning; using System.Text; using System.Threading; @@ -24,6 +26,7 @@ using IronPython.Runtime.Types; using Microsoft.Scripting.Utils; +using Microsoft.Win32.SafeHandles; [assembly: PythonModule("mmap", typeof(IronPython.Modules.MmapModule))] namespace IronPython.Modules { @@ -92,6 +95,7 @@ public class MmapDefault : IWeakReferenceable { private readonly long _offset; private readonly string _mapName; private readonly MemoryMappedFileAccess _fileAccess; + private readonly SafeFileHandle _handle; // only used on some POSIX platforms, null otherwise private volatile bool _isClosed; private int _refCount = 1; @@ -148,46 +152,65 @@ public MmapDefault(CodeContext/*!*/ context, int fileno, long length, string tag PythonContext pContext = context.LanguageContext; if (pContext.FileManager.TryGetStreams(fileno, out StreamBox streams)) { - if ((_sourceStream = streams.ReadStream as FileStream) == null) { - throw WindowsError(PythonExceptions._OSError.ERROR_INVALID_HANDLE); + Stream stream = streams.ReadStream; + if (stream is FileStream fs) { + _sourceStream = fs; + } else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX) || RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) { + // use file descriptor +#if NET8_0_OR_GREATER + CheckFileAccess(_fileAccess, stream); + _handle = new SafeFileHandle((IntPtr)fileno, ownsHandle: false); + _file = MemoryMappedFile.CreateFromFile(_handle, _mapName, length, _fileAccess, HandleInheritability.None, leaveOpen: true); +#else + _handle = new SafeFileHandle((IntPtr)fileno, ownsHandle: false); + FileAccess fileAccess = stream.CanWrite ? stream.CanRead ? FileAccess.ReadWrite : FileAccess.Write : FileAccess.Read; + // This may or may not work on Mono, but on Mono streams.ReadStream is FileStream (unless dupped in some cases) + _sourceStream = new FileStream(_handle, fileAccess); +#endif } + // otherwise leaves _file as null and _sourceStream as null } else { throw PythonOps.OSError(PythonExceptions._OSError.ERROR_INVALID_BLOCK, "Bad file descriptor"); } - if (_fileAccess == MemoryMappedFileAccess.ReadWrite && !_sourceStream.CanWrite) { - throw WindowsError(PythonExceptions._OSError.ERROR_ACCESS_DENIED); - } + if (_file is null) { + // create _file form _sourceStream + if (_sourceStream is null) { + throw WindowsError(PythonExceptions._OSError.ERROR_INVALID_HANDLE); + } + + CheckFileAccess(_fileAccess, _sourceStream); - if (length == 0) { - length = _sourceStream.Length; if (length == 0) { - throw PythonOps.ValueError("cannot mmap an empty file"); - } - if (_offset >= length) { - throw PythonOps.ValueError("mmap offset is greater than file size"); + length = _sourceStream.Length; + if (length == 0) { + throw PythonOps.ValueError("cannot mmap an empty file"); + } + if (_offset >= length) { + throw PythonOps.ValueError("mmap offset is greater than file size"); + } + length -= _offset; } - length -= _offset; - } - long capacity = checked(_offset + length); + long capacity = checked(_offset + length); - // Enlarge the file as needed. - if (capacity > _sourceStream.Length) { - if (_sourceStream.CanWrite) { - _sourceStream.SetLength(capacity); - } else { - throw WindowsError(PythonExceptions._OSError.ERROR_NOT_ENOUGH_MEMORY); + // Enlarge the file as needed. + if (capacity > _sourceStream.Length) { + if (_sourceStream.CanWrite) { + _sourceStream.SetLength(capacity); + } else { + throw WindowsError(PythonExceptions._OSError.ERROR_NOT_ENOUGH_MEMORY); + } } - } - _file = CreateFromFile( - _sourceStream, - _mapName, - _sourceStream.Length, - _fileAccess, - HandleInheritability.None, - true); + _file = CreateFromFile( + _sourceStream, + _mapName, + _sourceStream.Length, + _fileAccess, + HandleInheritability.None, + true); + } } try { @@ -198,7 +221,24 @@ public MmapDefault(CodeContext/*!*/ context, int fileno, long length, string tag throw; } _position = 0L; - } + + void CheckFileAccess(MemoryMappedFileAccess mmapAccess, Stream stream) { + bool isValid = mmapAccess switch { + MemoryMappedFileAccess.Read => stream.CanRead, + MemoryMappedFileAccess.ReadWrite => stream.CanRead && stream.CanWrite, + MemoryMappedFileAccess.CopyOnWrite => stream.CanRead, + _ => false + }; + + if (!isValid) { + if (_handle is not null && _sourceStream is not null) { + _sourceStream.Dispose(); + } + throw PythonOps.OSError(PythonExceptions._OSError.ERROR_ACCESS_DENIED, "Invalid access mode"); + } + } + } // end of constructor + public object __len__() { using (new MmapLocker(this)) { @@ -325,6 +365,11 @@ private void CloseWorker() { _view.Flush(); _view.Dispose(); _file.Dispose(); + if (_handle is not null) { + // mmap owns _sourceStream too in this case + _sourceStream?.Dispose(); + _handle.Dispose(); + } _sourceStream = null; _view = null; _file = null; @@ -557,6 +602,11 @@ public void resize(long newsize) { } if (_sourceStream == null) { + if (_handle is not null && !_handle.IsInvalid + && (RuntimeInformation.IsOSPlatform(OSPlatform.OSX) || RuntimeInformation.IsOSPlatform(OSPlatform.Linux))) { + // resize on Posix platforms + PythonNT.ftruncateUnix(unchecked((int)_handle.DangerousGetHandle()), newsize); + } // resizing is not supported without an underlying file throw WindowsError(PythonExceptions._OSError.ERROR_INVALID_PARAMETER); } @@ -716,6 +766,9 @@ public void seek(long pos, int whence = SEEK_SET) { public object size() { using (new MmapLocker(this)) { + if (_handle is not null && (RuntimeInformation.IsOSPlatform(OSPlatform.OSX) || RuntimeInformation.IsOSPlatform(OSPlatform.Linux))) { + return GetFileSizeUnix(_handle); + } if (_sourceStream == null) return ReturnLong(_view.Capacity); return ReturnLong(new FileInfo(_sourceStream.Name).Length); } @@ -830,6 +883,25 @@ internal Bytes GetSearchString() { } } + [SupportedOSPlatform("linux"), SupportedOSPlatform("macos")] + private static long GetFileSizeUnix(SafeFileHandle handle) { + long size; + if (handle.IsInvalid) { + throw PythonOps.OSError(PythonExceptions._OSError.ERROR_INVALID_HANDLE, "Invalid file handle"); + } + + if (Mono.Unix.Native.Syscall.fstat((int)handle.DangerousGetHandle(), out Mono.Unix.Native.Stat status) == 0) { + size = status.st_size; + } else { + Mono.Unix.Native.Errno errno = Mono.Unix.Native.Stdlib.GetLastError(); + string msg = Mono.Unix.UnixMarshal.GetErrorDescription(errno); + int error = Mono.Unix.Native.NativeConvert.FromErrno(errno); + throw PythonOps.OSError(error, msg); + } + + return size; + } + #endregion #region Synchronization diff --git a/Src/IronPython.Modules/nt.cs b/Src/IronPython.Modules/nt.cs index 5811c9cf7..4622a62a5 100644 --- a/Src/IronPython.Modules/nt.cs +++ b/Src/IronPython.Modules/nt.cs @@ -431,22 +431,41 @@ public static int dup2(CodeContext/*!*/ context, int fd, int fd2) { } + [SupportedOSPlatform("linux"), SupportedOSPlatform("osx")] private static int UnixDup(int fd, int fd2, out Stream? stream) { int res = fd2 < 0 ? Mono.Unix.Native.Syscall.dup(fd) : Mono.Unix.Native.Syscall.dup2(fd, fd2); if (res < 0) throw GetLastUnixError(); if (ClrModule.IsMono) { - // This does not work on .NET, probably because .NET FileStream is not aware of Mono.Unix.UnixStream - stream = new Mono.Unix.UnixStream(res, ownsHandle: true); + // Elaborate workaround on Mono to avoid UnixStream as out + stream = new Mono.Unix.UnixStream(res, ownsHandle: false); + FileAccess fileAccess = stream.CanRead ? stream.CanWrite ? FileAccess.ReadWrite : FileAccess.Read : FileAccess.Write; + stream.Dispose(); + try { + // FileStream on Mono created with a file descriptor might not work: https://github.com/mono/mono/issues/12783 + // Test if it does, without closing the handle if it doesn't + var sfh = new SafeFileHandle((IntPtr)res, ownsHandle: false); + stream = new FileStream(sfh, fileAccess); + // No exception? Great! We can use FileStream. + stream.Dispose(); + sfh.Dispose(); + stream = null; // Create outside of try block + } catch (IOException) { + // Fall back to UnixStream + stream = new Mono.Unix.UnixStream(res, ownsHandle: true); + } + if (stream is null) { + // FileStream is safe + var sfh = new SafeFileHandle((IntPtr)res, ownsHandle: true); + stream = new FileStream(sfh, fileAccess); + } } else { - // This does not work 100% correctly on .NET, probably because each FileStream has its own read/write cursor - // (it should be shared between dupped descriptors) - //stream = new FileStream(new SafeFileHandle((IntPtr)res, ownsHandle: true), FileAccess.ReadWrite); - // Accidentaly, this would also not work on Mono: https://github.com/mono/mono/issues/12783 - stream = null; // Handle stream sharing in PythonFileManager + // normal case + stream = new PosixFileStream(res); } return res; } + #if FEATURE_PROCESS /// /// single instance of environment dictionary is shared between multiple runtimes because the environment @@ -470,6 +489,9 @@ public static object fstat(CodeContext/*!*/ context, int fd) { PythonFileManager fileManager = context.LanguageContext.FileManager; if (fileManager.TryGetStreams(fd, out StreamBox? streams)) { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { + return fstatUnix(fd); + } if (streams.IsConsoleStream()) return new stat_result(0x2000); if (streams.IsStandardIOStream()) return new stat_result(0x1000); if (StatStream(streams.ReadStream) is not null and var res) return res; @@ -483,15 +505,9 @@ public static object fstat(CodeContext/*!*/ context, int fd) { #endif if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { if (ReferenceEquals(stream, Stream.Null)) return new stat_result(0x2000); - } else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { - if (IsUnixStream(stream)) return new stat_result(0x1000); } return null; } - - static bool IsUnixStream(Stream stream) { - return stream is Mono.Unix.UnixStream; - } } public static void fsync(CodeContext context, int fd) { @@ -869,7 +885,16 @@ public static void mkdir(CodeContext context, object? path, [ParamDictionary, No private const int DefaultBufferSize = 4096; - [Documentation("open(path, flags, mode=511, *, dir_fd=None)")] + [Documentation(""" + open(path, flags, mode=511, *, dir_fd=None) + + Open a file for low level IO. Returns a file descriptor (integer). + + If dir_fd is not None, it should be a file descriptor open to a directory, + and path should be relative; path will then be relative to that directory. + dir_fd may not be implemented on your platform. + If it is unavailable, using it will raise a NotImplementedError. + """)] public static object open(CodeContext/*!*/ context, [NotNone] string path, int flags, [ParamDictionary, NotNone] IDictionary kwargs, [NotNone] params object[] args) { var numArgs = args.Length; CheckOptionalArgsCount(numRegParms: 2, numOptPosParms: 1, numKwParms: 1, numArgs, kwargs.Count); @@ -889,12 +914,23 @@ public static object open(CodeContext/*!*/ context, [NotNone] string path, int f } } + if ((RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) && !ClrModule.IsMono) { + // Use PosixFileStream to operate on fd directly + // On Mono, we must use FileStream due to limitations in MemoryMappedFile + Stream s = PosixFileStream.Open(path, flags, unchecked((uint)mode), out int fd); + //Stream s = PythonIOModule.FileIO.OpenFilePosix(path, flags, mode, out int fd); + if ((flags & O_APPEND) != 0) { + s.Seek(0L, SeekOrigin.End); + } + return context.LanguageContext.FileManager.Add(fd, new(s)); + } + try { FileMode fileMode = FileModeFromFlags(flags); FileAccess access = FileAccessFromFlags(flags); FileOptions options = FileOptionsFromFlags(flags); Stream s; // the stream opened to acces the file - FileStream? fs; // downcast of s if s is FileStream (this is always the case on POSIX) + FileStream? fs; // downcast of s if s is FileStream Stream? rs = null; // secondary read stream if needed, otherwise same as s if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && IsNulFile(path)) { fs = null; @@ -1436,6 +1472,13 @@ private static object statUnix(string path) { return LightExceptions.Throw(GetLastUnixError(path)); } + private static object fstatUnix(int fd) { + if (Mono.Unix.Native.Syscall.fstat(fd, out Mono.Unix.Native.Stat buf) == 0) { + return new stat_result(buf); + } + return LightExceptions.Throw(GetLastUnixError()); + } + private const int OPEN_EXISTING = 3; private const int FILE_ATTRIBUTE_NORMAL = 0x00000080; private const int FILE_READ_ATTRIBUTES = 0x0080; @@ -1669,8 +1712,27 @@ public static void truncate(CodeContext context, object? path, BigInteger length public static void truncate(CodeContext context, int fd, BigInteger length) => ftruncate(context, fd, length); - public static void ftruncate(CodeContext context, int fd, BigInteger length) - => context.LanguageContext.FileManager.GetStreams(fd).Truncate((long)length); + public static void ftruncate(CodeContext context, int fd, BigInteger length) { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { + ftruncateUnix(fd, (long)length); + } else { + context.LanguageContext.FileManager.GetStreams(fd).Truncate((long)length); + } + } + + + [SupportedOSPlatform("linux"), SupportedOSPlatform("osx")] + internal static void ftruncateUnix(int fd, long length) { + int result; + Mono.Unix.Native.Errno errno; + do { + result = Mono.Unix.Native.Syscall.ftruncate(fd, length); + } while (Mono.Unix.UnixMarshal.ShouldRetrySyscall(result, out errno)); + + if (errno != 0) + throw GetOsError(Mono.Unix.Native.NativeConvert.FromErrno(errno)); + } + #if FEATURE_FILESYSTEM public static object times() { diff --git a/Src/IronPython/Modules/_fileio.cs b/Src/IronPython/Modules/_fileio.cs index 28982c56e..5f299b655 100644 --- a/Src/IronPython/Modules/_fileio.cs +++ b/Src/IronPython/Modules/_fileio.cs @@ -105,55 +105,67 @@ public FileIO(CodeContext/*!*/ context, [NotNone] string name, [NotNone] string this.mode = NormalizeMode(mode, out int flags); if (opener is null) { - switch (this.mode) { - case "rb": - _streams = new(OpenFile(context, pal, name, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)); - break; - case "wb": - _streams = new(OpenFile(context, pal, name, FileMode.Create, FileAccess.Write, FileShare.ReadWrite)); - break; - case "xb": - _streams = new(OpenFile(context, pal, name, FileMode.CreateNew, FileAccess.Write, FileShare.ReadWrite)); - break; - case "ab": - _streams = new(OpenFile(context, pal, name, FileMode.Append, FileAccess.Write, FileShare.ReadWrite)); - _streams.ReadStream.Seek(0L, SeekOrigin.End); - break; - case "rb+": - _streams = new(OpenFile(context, pal, name, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite)); - break; - case "wb+": - _streams = new(OpenFile(context, pal, name, FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite)); - break; - case "xb+": - _streams = new(OpenFile(context, pal, name, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.ReadWrite)); - break; - case "ab+": - // Opening writeStream before readStream will create the file if it does not exist - var writeStream = OpenFile(context, pal, name, FileMode.Append, FileAccess.Write, FileShare.ReadWrite); - var readStream = OpenFile(context, pal, name, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); - readStream.Seek(0L, SeekOrigin.End); - writeStream.Seek(0L, SeekOrigin.End); - _streams = new(readStream, writeStream); - break; - default: - throw new InvalidOperationException(); - } - if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { - // On POSIX, register the file descriptor with the file manager right after file opening - _context.FileManager.GetOrAssignId(_streams); - // according to [documentation](https://learn.microsoft.com/en-us/dotnet/api/system.io.filestream.safefilehandle?view=net-9.0#remarks) - // accessing SafeFileHandle sets the current stream position to 0 - // in practice it doesn't seem to be the case, but better to be sure - if (this.mode.StartsWith("ab", StringComparison.InvariantCulture)) { - _streams.WriteStream.Seek(0L, SeekOrigin.End); + if ((RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) && !ClrModule.IsMono) { + // Use PosixFileStream to operate on fd directly + // On Mono, we must use FileStream due to limitations in MemoryMappedFile + var stream = PosixFileStream.Open(name, flags, 0b_110_110_110, out int fd); // mode: rw-rw-rw- + //var stream = OpenFilePosix(name, flags, 0b_110_110_110, out int fd); + if ((flags & O_APPEND) != 0) { + stream.Seek(0L, SeekOrigin.End); } - if (!_streams.IsSingleStream) { - _streams.ReadStream.Seek(_streams.WriteStream.Position, SeekOrigin.Begin); + _streams = new(stream); + _context.FileManager.Add(fd, _streams); + } else { + switch (this.mode) { + case "rb": + _streams = new(OpenFile(context, pal, name, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)); + break; + case "wb": + _streams = new(OpenFile(context, pal, name, FileMode.Create, FileAccess.Write, FileShare.ReadWrite)); + break; + case "xb": + _streams = new(OpenFile(context, pal, name, FileMode.CreateNew, FileAccess.Write, FileShare.ReadWrite)); + break; + case "ab": + _streams = new(OpenFile(context, pal, name, FileMode.Append, FileAccess.Write, FileShare.ReadWrite)); + _streams.WriteStream.Seek(0L, SeekOrigin.End); + break; + case "rb+": + _streams = new(OpenFile(context, pal, name, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite)); + break; + case "wb+": + _streams = new(OpenFile(context, pal, name, FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite)); + break; + case "xb+": + _streams = new(OpenFile(context, pal, name, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.ReadWrite)); + break; + case "ab+": + // Opening writeStream before readStream will create the file if it does not exist + var writeStream = OpenFile(context, pal, name, FileMode.Append, FileAccess.Write, FileShare.ReadWrite); + var readStream = OpenFile(context, pal, name, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); + readStream.Seek(0L, SeekOrigin.End); + writeStream.Seek(0L, SeekOrigin.End); + _streams = new(readStream, writeStream); + break; + default: + throw new InvalidOperationException(); + } + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { + // On POSIX, register the file descriptor with the file manager right after file opening + // Because the .NET case is already handled above before `switch`, this branch runs only on Mono + _context.FileManager.GetOrAssignId(_streams); + // according to [documentation](https://learn.microsoft.com/en-us/dotnet/api/system.io.filestream.safefilehandle?view=net-9.0#remarks) + // accessing SafeFileHandle sets the current stream position to 0 + // in practice it doesn't seem to be the case, but better to be sure + if (this.mode[0] == 'a') { + _streams.WriteStream.Seek(0L, SeekOrigin.End); + } + if (!_streams.IsSingleStream) { + _streams.ReadStream.Seek(_streams.WriteStream.Position, SeekOrigin.Begin); + } } } - } - else { + } else { // opener is not null object? fdobj = PythonOps.CallWithContext(context, opener, name, flags); if (fdobj is int fd) { if (fd < 0) { diff --git a/Src/IronPython/Runtime/PosixFileStream.cs b/Src/IronPython/Runtime/PosixFileStream.cs new file mode 100644 index 000000000..36a380b0a --- /dev/null +++ b/Src/IronPython/Runtime/PosixFileStream.cs @@ -0,0 +1,267 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information. + +#if NETSTANDARD2_1_OR_GREATER || NETCOREAPP2_1_OR_GREATER +#define SPAN_OVERRIDE // Stream has Span-based virtual methods +#endif + +using System; +using System.IO; +using System.Runtime.InteropServices; +using System.Runtime.Versioning; + +using Mono.Unix; +using Mono.Unix.Native; + +using IronPython.Runtime.Operations; +using System.Diagnostics; +using IronPython.Runtime.Exceptions; + +#nullable enable + +namespace IronPython.Runtime; + +[SupportedOSPlatform("linux")] +[SupportedOSPlatform("macos")] +internal class PosixFileStream : Stream +{ + private readonly int _fd; + private readonly bool _canSeek; + private readonly bool _canRead; + private readonly bool _canWrite; + + private bool _disposed; + + + public PosixFileStream(int fileDescriptor) { + if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux) && !RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + throw new PlatformNotSupportedException("This stream only works on POSIX systems"); + + if (fileDescriptor < 0) + throw PythonOps.OSError(PythonFileManager.EBADF, "Bad file descriptor"); + + _fd = fileDescriptor; + + _canSeek = Syscall.lseek(fileDescriptor, 0, SeekFlags.SEEK_CUR) >= 0; + _canRead = Syscall.read(fileDescriptor, IntPtr.Zero, 0) == 0; + _canWrite = Syscall.write(fileDescriptor, IntPtr.Zero, 0) == 0; + } + + + public static Stream Open(string name, int flags, uint mode, out int fd) { + OpenFlags openFlags = NativeConvert.ToOpenFlags(flags); + FilePermissions permissions = NativeConvert.ToFilePermissions(mode); + Errno errno; + + do { + fd = Syscall.open(name, openFlags, permissions); + } while (UnixMarshal.ShouldRetrySyscall(fd, out errno)); + + if (fd < 0) { + Debug.Assert(errno != 0); + throw CreateExceptionForLastError(errno, name); + } + return new PosixFileStream(fd); + } + + + public int Handle => _fd; + + public override bool CanSeek => _canSeek; + public override bool CanRead => _canRead; + public override bool CanWrite => _canWrite; + + + public override long Length { + get { + ThrowIfDisposed(); + int res = Syscall.fstat(_fd, out Stat stat); + ThrowIfError(res); + + return stat.st_size; + } + } + + + public override long Position { + get => Seek(0, SeekOrigin.Current); + set => Seek(value, SeekOrigin.Begin); + } + + + public override long Seek(long offset, SeekOrigin origin) { + ThrowIfDisposed(); + SeekFlags whence = origin switch + { + SeekOrigin.Begin => SeekFlags.SEEK_SET, + SeekOrigin.Current => SeekFlags.SEEK_CUR, + SeekOrigin.End => SeekFlags.SEEK_END, + _ => throw PythonOps.OSError(PythonFileManager.EINVAL, "Invalid argument") + }; + + long result = Syscall.lseek(_fd, offset, whence); + ThrowIfError(result); + + return result; + } + + + public override void SetLength(long value) { + ThrowIfDisposed(); + int result; + Errno errno; + do { + result = Syscall.ftruncate(_fd, value); + } while (UnixMarshal.ShouldRetrySyscall(result, out errno)); + ThrowIfError(errno); + } + + +#if SPAN_OVERRIDE +#pragma warning disable IDE0036 // Modifiers are not ordered + override +#pragma warning restore IDE0036 +#endif + public int Read(Span buffer) { + ThrowIfDisposed(); + if (!CanRead) + throw PythonOps.OSError(PythonFileManager.EBADF, "Bad file descriptor"); + + + if (buffer.Length == 0) + return 0; + + int bytesRead; + Errno errno; + unsafe { + fixed (byte* buf = buffer) { + do { + bytesRead = (int)Syscall.read(_fd, buf, (ulong)buffer.Length); + } while (UnixMarshal.ShouldRetrySyscall(bytesRead, out errno)); + } + } + ThrowIfError(errno); + + return bytesRead; + } + + + // If offset == 0 and count == 0, buffer is allowed to be null. + public override int Read(byte[] buffer, int offset, int count) { + ThrowIfDisposed(); + return Read(buffer.AsSpan(offset, count)); + } + + + public override int ReadByte() { + Span buffer = stackalloc byte[1]; + int bytesRead = Read(buffer); + return bytesRead == 0 ? -1 : buffer[0]; + } + + +#if SPAN_OVERRIDE +#pragma warning disable IDE0036 // Modifiers are not ordered + override +#pragma warning restore IDE0036 +#endif + public void Write(ReadOnlySpan buffer) { + ThrowIfDisposed(); + if (!CanWrite) + throw PythonOps.OSError(PythonFileManager.EBADF, "Bad file descriptor"); + + if (buffer.Length == 0) + return; + + int bytesWritten; + Errno errno; + unsafe { + fixed (byte* buf = buffer) { + do { + bytesWritten = (int)Syscall.write(_fd, buf, (ulong)buffer.Length); + } while (UnixMarshal.ShouldRetrySyscall(bytesWritten, out errno)); + } + } + ThrowIfError(errno); + } + + // If offset == 0 and count == 0, buffer is allowed to be null. + public override void Write(byte[] buffer, int offset, int count) { + ThrowIfDisposed(); + Write(buffer.AsSpan(offset, count)); + } + + + public override void WriteByte(byte value) { + Span buffer = stackalloc byte[] { value }; + Write(buffer); + } + + + public override void Flush() { + ThrowIfDisposed(); + int result; + Errno errno; + do { + result = Syscall.fsync(_fd); + } while (UnixMarshal.ShouldRetrySyscall(result, out errno)); + ThrowIfError(errno); + } + + protected override void Dispose(bool disposing) { + if (!_disposed) { + int result = Syscall.close(_fd); + WarnIfError(result, "Error closing file descriptor {0}: {1}: {2}"); + _disposed = true; + } + base.Dispose(disposing); + } + + + #region Private Methods + + private void ThrowIfDisposed() { + if (_disposed) + throw new ObjectDisposedException(GetType().Name); + } + + + private static void ThrowIfError(long result) { + if (result < 0) + throw CreateExceptionForLastError(); + } + + + private static void ThrowIfError(Errno errno) { + if (errno != 0) + throw CreateExceptionForLastError(errno); + } + + + private static Exception CreateExceptionForLastError(string? filename = null) { + Errno errno = Stdlib.GetLastError(); + return CreateExceptionForLastError(errno, filename); + } + + + private static Exception CreateExceptionForLastError(Errno errno, string? filename = null) { + if (errno == 0) return new InvalidOperationException("Unknown error"); + + string msg = UnixMarshal.GetErrorDescription(errno); + int error = NativeConvert.FromErrno(errno); + return PythonOps.OSError(error, msg, filename); + } + + private void WarnIfError(int result, string msgTmpl) { + if (result < 0) { + Errno errno = Stdlib.GetLastError(); + int error = NativeConvert.FromErrno(errno); + PythonOps.Warn(DefaultContext.Default, + PythonExceptions.RuntimeWarning, + msgTmpl, _fd, error, UnixMarshal.GetErrorDescription(errno)); + } + } + + #endregion +} diff --git a/Src/IronPython/Runtime/PythonFileManager.cs b/Src/IronPython/Runtime/PythonFileManager.cs index ca4dbef44..574ba0eb3 100644 --- a/Src/IronPython/Runtime/PythonFileManager.cs +++ b/Src/IronPython/Runtime/PythonFileManager.cs @@ -8,6 +8,7 @@ using System.Buffers; using System.Collections.Concurrent; using System.Collections.Generic; +using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.IO; using System.Runtime.InteropServices; @@ -15,9 +16,9 @@ using Microsoft.Scripting.Runtime; using Microsoft.Scripting.Utils; +using Mono.Unix; using IronPython.Runtime.Operations; -using System.Diagnostics; namespace IronPython.Runtime { @@ -28,10 +29,19 @@ internal sealed class StreamBox { private int _id = -1; private Stream _readStream; private Stream _writeStream; + private readonly bool _writeNeedsFlush; public StreamBox(Stream readStream, Stream writeStream) { _readStream = readStream; _writeStream = writeStream; + + // a hack but it does improve perfomance a lot if flushing is not needed + _writeNeedsFlush = writeStream switch { + FileStream => true, // FileStream is buffered by default, used on Windows and mostly Mono + PosixFileStream => false, // PosixFileStream is unbuffered, used on .NET/Posix + UnixStream => false, // UnixStream is unbuffered, used sometimes on Mono + _ => true // if in doubt, flush + }; } public StreamBox(Stream stream) : this(stream, stream) { } @@ -152,7 +162,9 @@ public int Write(IPythonBuffer buffer) { count = buffer.NumBytes(); _writeStream.Write(bytes, 0, count); #endif - _writeStream.Flush(); // IO at this level is not supposed to buffer so we need to call Flush. + if (_writeNeedsFlush) { + _writeStream.Flush(); // IO at this level is not supposed to buffer so we need to call Flush. + } if (!IsSingleStream) { _readStream.Seek(_writeStream.Position, SeekOrigin.Begin); } diff --git a/Tests/test_file.py b/Tests/test_file.py index 1fb7fa3d9..c2a64b166 100644 --- a/Tests/test_file.py +++ b/Tests/test_file.py @@ -702,7 +702,7 @@ def test_errors(self): with self.assertRaises(OSError) as cm: open('path_too_long' * 100) - self.assertEqual(cm.exception.errno, (errno.ENAMETOOLONG if is_posix else errno.EINVAL) if is_netcoreapp and not is_posix or sys.version_info >= (3,6) else errno.ENOENT) + self.assertEqual(cm.exception.errno, (errno.ENAMETOOLONG if is_posix else errno.EINVAL) if is_netcoreapp or sys.version_info >= (3,6) else errno.ENOENT) def test_write_bytes(self): fname = self.temp_file diff --git a/Tests/test_io_stdlib.py b/Tests/test_io_stdlib.py index 8a71c5e75..9c24dd4dd 100644 --- a/Tests/test_io_stdlib.py +++ b/Tests/test_io_stdlib.py @@ -85,7 +85,6 @@ def load_tests(loader, standard_tests, pattern): test.test_io.CTextIOWrapperTest('test_uninitialized'), # AssertionError: Exception not raised by repr test.test_io.CTextIOWrapperTest('test_unseekable'), # OSError: underlying stream is not seekable test.test_io.PyTextIOWrapperTest('test_nonnormalized_close_error_on_close'), # AssertionError: None is not an instance of - test.test_io.PyTextIOWrapperTest('test_seek_append_bom'), # OSError: [Errno -2146232800] Unable seek backward to overwrite data that previously existed in a file opened in Append mode. test.test_io.CMiscIOTest('test_io_after_close'), # AttributeError: 'TextIOWrapper' object has no attribute 'read1' test.test_io.CMiscIOTest('test_nonblock_pipe_write_bigbuf'), # AttributeError: 'module' object has no attribute 'fcntl' test.test_io.CMiscIOTest('test_nonblock_pipe_write_smallbuf'), # AttributeError: 'module' object has no attribute 'fcntl' @@ -101,6 +100,11 @@ def load_tests(loader, standard_tests, pattern): test.test_io.PyMiscIOTest('test_warn_on_dealloc_fd'), # AssertionError: ResourceWarning not triggered ] + if is_mono: + failing_tests += [ + test.test_io.PyTextIOWrapperTest('test_seek_append_bom'), # OSError: [Errno -2146232800] Unable seek backward to overwrite data that previously existed in a file opened in Append mode. + ] + skip_tests = [ test.test_io.CBufferedWriterTest('test_override_destructor'), # StackOverflowException test.test_io.CBufferedRandomTest('test_override_destructor'), # StackOverflowException From 64d0d4ae307b68ee71ad27b9501f395fcea201e8 Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Tue, 31 Dec 2024 13:07:13 -0800 Subject: [PATCH 2/4] Improve non-buffering strategy for FileStream --- Src/IronPython.Modules/nt.cs | 16 ++++++++++------ Src/IronPython/Runtime/PythonFileManager.cs | 13 ++----------- Tests/test_io_stdlib.py | 4 ++-- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/Src/IronPython.Modules/nt.cs b/Src/IronPython.Modules/nt.cs index 4622a62a5..ca843c658 100644 --- a/Src/IronPython.Modules/nt.cs +++ b/Src/IronPython.Modules/nt.cs @@ -883,8 +883,6 @@ public static void mkdir(CodeContext context, [NotNone] Bytes path, [ParamDictio public static void mkdir(CodeContext context, object? path, [ParamDictionary, NotNone] IDictionary kwargs, [NotNone] params object[] args) => mkdir(ConvertToFsString(context, path, nameof(path)), kwargs, args); - private const int DefaultBufferSize = 4096; - [Documentation(""" open(path, flags, mode=511, *, dir_fd=None) @@ -926,6 +924,12 @@ public static object open(CodeContext/*!*/ context, [NotNone] string path, int f } try { + // FileStream buffer size must be >= 0 on .NET, and >= 1 on .NET Framework and Mono. + // On .NET, buffer size 0 or 1 disables buffering. + // On .NET Framework, buffer size 1 disables buffering. + // On Mono, buffer size 1 makes writes of length >= 2 bypass the buffer. + const int NoBuffering = 1; + FileMode fileMode = FileModeFromFlags(flags); FileAccess access = FileAccessFromFlags(flags); FileOptions options = FileOptionsFromFlags(flags); @@ -940,15 +944,15 @@ public static object open(CodeContext/*!*/ context, [NotNone] string path, int f // open it again w/ just read access. fs = new FileStream(path, fileMode, FileAccess.Write, FileShare.None); fs.Close(); - s = fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, DefaultBufferSize, options); + s = fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, NoBuffering, options); } else if (access == FileAccess.ReadWrite && fileMode == FileMode.Append) { // .NET doesn't allow Append w/ access != Write, so open the file w/ Write // and a secondary stream w/ Read, then seek to the end. - s = fs = new FileStream(path, FileMode.Append, FileAccess.Write, FileShare.ReadWrite, DefaultBufferSize, options); - rs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, DefaultBufferSize, options); + s = fs = new FileStream(path, FileMode.Append, FileAccess.Write, FileShare.ReadWrite, NoBuffering, options); + rs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, NoBuffering, options); rs.Seek(0L, SeekOrigin.End); } else { - s = fs = new FileStream(path, fileMode, access, FileShare.ReadWrite, DefaultBufferSize, options); + s = fs = new FileStream(path, fileMode, access, FileShare.ReadWrite, NoBuffering, options); } rs ??= s; diff --git a/Src/IronPython/Runtime/PythonFileManager.cs b/Src/IronPython/Runtime/PythonFileManager.cs index 574ba0eb3..f1efa378f 100644 --- a/Src/IronPython/Runtime/PythonFileManager.cs +++ b/Src/IronPython/Runtime/PythonFileManager.cs @@ -29,19 +29,10 @@ internal sealed class StreamBox { private int _id = -1; private Stream _readStream; private Stream _writeStream; - private readonly bool _writeNeedsFlush; public StreamBox(Stream readStream, Stream writeStream) { _readStream = readStream; _writeStream = writeStream; - - // a hack but it does improve perfomance a lot if flushing is not needed - _writeNeedsFlush = writeStream switch { - FileStream => true, // FileStream is buffered by default, used on Windows and mostly Mono - PosixFileStream => false, // PosixFileStream is unbuffered, used on .NET/Posix - UnixStream => false, // UnixStream is unbuffered, used sometimes on Mono - _ => true // if in doubt, flush - }; } public StreamBox(Stream stream) : this(stream, stream) { } @@ -162,8 +153,8 @@ public int Write(IPythonBuffer buffer) { count = buffer.NumBytes(); _writeStream.Write(bytes, 0, count); #endif - if (_writeNeedsFlush) { - _writeStream.Flush(); // IO at this level is not supposed to buffer so we need to call Flush. + if (ClrModule.IsMono && count == 1) { + _writeStream.Flush(); // IO at this level is not supposed to buffer so we need to call Flush (only needed on Mono) } if (!IsSingleStream) { _readStream.Seek(_writeStream.Position, SeekOrigin.Begin); diff --git a/Tests/test_io_stdlib.py b/Tests/test_io_stdlib.py index 9c24dd4dd..344eef760 100644 --- a/Tests/test_io_stdlib.py +++ b/Tests/test_io_stdlib.py @@ -6,7 +6,7 @@ ## Run selected tests from test_io from StdLib ## -from iptest import is_ironpython, is_mono, generate_suite, run_test +from iptest import is_ironpython, is_mono, is_windows, generate_suite, run_test import test.test_io @@ -100,7 +100,7 @@ def load_tests(loader, standard_tests, pattern): test.test_io.PyMiscIOTest('test_warn_on_dealloc_fd'), # AssertionError: ResourceWarning not triggered ] - if is_mono: + if is_mono or is_windows: failing_tests += [ test.test_io.PyTextIOWrapperTest('test_seek_append_bom'), # OSError: [Errno -2146232800] Unable seek backward to overwrite data that previously existed in a file opened in Append mode. ] From 1cf9ac646b7b0abf3f488db7ebc89eb4b843d836 Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Wed, 1 Jan 2025 11:56:06 -0800 Subject: [PATCH 3/4] Ignore OSException along with IOException --- Src/IronPython.Modules/_warnings.cs | 2 +- Src/IronPython/Modules/_fileio.cs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Src/IronPython.Modules/_warnings.cs b/Src/IronPython.Modules/_warnings.cs index ae37101c8..e343cff9e 100644 --- a/Src/IronPython.Modules/_warnings.cs +++ b/Src/IronPython.Modules/_warnings.cs @@ -260,7 +260,7 @@ internal static void showwarning(CodeContext context, object message, PythonType ((TextWriter)file).Write(text); } // unrecognized file type - warning is lost } - } catch (IOException) { + } catch (Exception ex) when (ex is IOException or OSException) { // invalid file - warning is lost } } diff --git a/Src/IronPython/Modules/_fileio.cs b/Src/IronPython/Modules/_fileio.cs index 5f299b655..e67592af3 100644 --- a/Src/IronPython/Modules/_fileio.cs +++ b/Src/IronPython/Modules/_fileio.cs @@ -305,10 +305,10 @@ public override void close(CodeContext/*!*/ context) { try { flush(context); - } catch (IOException) { - // flushing can fail, esp. if the other half of a pipe is closed - // ignore it because we're closing anyway - } + } catch (IOException) { /* ignore */ } catch (OSException) { /* ignore */ } + // flushing can fail, esp. if the other half of a pipe is closed + // ignore it because we're closing anyway + _closed = true; if (_closefd) { From 215237cc7b1ce0a919894fd45ad8009ed4f32149 Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Sun, 5 Jan 2025 22:07:14 -0800 Subject: [PATCH 4/4] Fix file size checks in constructor --- Src/IronPython.Modules/mmap.cs | 46 +++++++++++++++++----------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/Src/IronPython.Modules/mmap.cs b/Src/IronPython.Modules/mmap.cs index 8236c2331..bfce0811c 100644 --- a/Src/IronPython.Modules/mmap.cs +++ b/Src/IronPython.Modules/mmap.cs @@ -190,9 +190,9 @@ public MmapDefault(CodeContext/*!*/ context, int fileno, long length, string tag } else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX) || RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) { // use file descriptor #if NET8_0_OR_GREATER - CheckFileAccess(_fileAccess, stream); + CheckFileAccessAndSize(stream); _handle = new SafeFileHandle((IntPtr)fileno, ownsHandle: false); - _file = MemoryMappedFile.CreateFromFile(_handle, _mapName, length, _fileAccess, HandleInheritability.None, leaveOpen: true); + _file = MemoryMappedFile.CreateFromFile(_handle, _mapName, stream.Length, _fileAccess, HandleInheritability.None, leaveOpen: true); #else _handle = new SafeFileHandle((IntPtr)fileno, ownsHandle: false); FileAccess fa = stream.CanWrite ? stream.CanRead ? FileAccess.ReadWrite : FileAccess.Write : FileAccess.Read; @@ -211,19 +211,12 @@ public MmapDefault(CodeContext/*!*/ context, int fileno, long length, string tag throw WindowsError(PythonExceptions._OSError.ERROR_INVALID_HANDLE); } - CheckFileAccess(_fileAccess, _sourceStream); - if (length == 0) { - length = _sourceStream.Length; - if (length == 0) { - throw PythonOps.ValueError("cannot mmap an empty file"); - } - if (_offset >= length) { - throw PythonOps.ValueError("mmap offset is greater than file size"); - } - length -= _offset; + length = _sourceStream.Length - _offset; } + CheckFileAccessAndSize(_sourceStream); + long capacity = checked(_offset + length); // Enlarge the file as needed. @@ -254,8 +247,8 @@ public MmapDefault(CodeContext/*!*/ context, int fileno, long length, string tag } _position = 0L; - void CheckFileAccess(MemoryMappedFileAccess mmapAccess, Stream stream) { - bool isValid = mmapAccess switch { + void CheckFileAccessAndSize(Stream stream) { + bool isValid = _fileAccess switch { MemoryMappedFileAccess.Read => stream.CanRead, MemoryMappedFileAccess.ReadWrite => stream.CanRead && stream.CanWrite, MemoryMappedFileAccess.CopyOnWrite => stream.CanRead, @@ -265,20 +258,27 @@ void CheckFileAccess(MemoryMappedFileAccess mmapAccess, Stream stream) { }; if (!isValid) { - if (_handle is not null && _sourceStream is not null) { - _sourceStream.Dispose(); - } - throw PythonOps.OSError(PythonExceptions._OSError.ERROR_ACCESS_DENIED, "Invalid access mode"); + ThrowException(PythonOps.OSError(PythonExceptions._OSError.ERROR_ACCESS_DENIED, "Invalid access mode")); } if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { // Unix map does not support increasing size on open - if (_offset + length > stream.Length) { - if (_handle is not null && _sourceStream is not null) { - _sourceStream.Dispose(); - } - throw PythonOps.ValueError("mmap length is greater than file size"); + if (length != 0 && _offset + length > stream.Length) { + ThrowException(PythonOps.ValueError("mmap length is greater than file size")); + } + } + if (length == 0 && stream.Length == 0) { + ThrowException(PythonOps.ValueError("cannot mmap an empty file")); + } + if (_offset >= stream.Length) { + ThrowException(PythonOps.ValueError("mmap offset is greater than file size")); + } + + void ThrowException(Exception ex) { + if (_handle is not null && _sourceStream is not null) { // .NET 6.0 on POSIX only + _sourceStream.Dispose(); } + throw ex; } } } // end of constructor