From 45b790aa23e26ca4aa2933e7855084796827db12 Mon Sep 17 00:00:00 2001 From: Mads Marquart Date: Sat, 31 Jan 2026 00:16:52 +0100 Subject: [PATCH] Use IOSurface Write directly into a shared memory buffer that can be shared directly with the compositor and avoids copying bytes when presenting (QuartzCore was copying stuff before). This also implements double and triple buffering of the surface, to avoid creating a bunch of unnecessary buffers. The compositor seems to sometimes work on two buffers at the same time? A bit unsure why. The way we wait for the compositor to stop using the buffer(s) is kinda bad, but that can be resolved later, and shouldn't be a problem for applications that do proper frame-pacing (which Winit doesn't yet provide though). --- Cargo.toml | 11 ++ src/backends/cg.rs | 348 ++++++++++++++++++++++++++++++++++----------- 2 files changed, 277 insertions(+), 82 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1183a5be..b6686853 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -104,6 +104,9 @@ objc2-core-graphics = { version = "0.3.2", default-features = false, features = objc2-core-foundation = { version = "0.3.2", default-features = false, features = [ "std", "CFCGTypes", + "CFNumber", + "CFString", + "CFDictionary", ] } objc2-foundation = { version = "0.3.2", default-features = false, features = [ "std", @@ -121,6 +124,14 @@ objc2-quartz-core = { version = "0.3.2", default-features = false, features = [ "CALayer", "CATransaction", ] } +objc2-io-surface = { version = "0.3.2", default-features = false, features = [ + "std", + "libc", + "objc2", + "objc2-core-foundation", + "IOSurfaceRef", + "IOSurfaceTypes", +] } # Web dependencies. [target.'cfg(target_family = "wasm")'.dependencies] diff --git a/src/backends/cg.rs b/src/backends/cg.rs index 889fe374..aa79eabf 100644 --- a/src/backends/cg.rs +++ b/src/backends/cg.rs @@ -1,29 +1,31 @@ //! Softbuffer implementation using CoreGraphics. use crate::backend_interface::*; use crate::error::InitError; -use crate::{util, Pixel, Rect, SoftBufferError}; +use crate::{Pixel, Rect, SoftBufferError}; use objc2::rc::Retained; use objc2::runtime::{AnyObject, Bool}; use objc2::{define_class, msg_send, AllocAnyThread, DefinedClass, MainThreadMarker, Message}; -use objc2_core_foundation::{CFRetained, CGPoint}; -use objc2_core_graphics::{ - CGBitmapInfo, CGColorRenderingIntent, CGColorSpace, CGDataProvider, CGImage, CGImageAlphaInfo, - CGImageByteOrderInfo, CGImageComponentInfo, CGImagePixelFormatInfo, -}; +use objc2_core_foundation::{CFMutableDictionary, CFNumber, CFRetained, CFString, CFType, CGPoint}; +use objc2_core_graphics::CGColorSpace; use objc2_foundation::{ ns_string, NSDictionary, NSKeyValueChangeKey, NSKeyValueChangeNewKey, NSKeyValueObservingOptions, NSNumber, NSObject, NSObjectNSKeyValueObserverRegistration, NSString, NSValue, }; +use objc2_io_surface::{ + kIOSurfaceBytesPerElement, kIOSurfaceCacheMode, kIOSurfaceColorSpace, kIOSurfaceHeight, + kIOSurfaceMapWriteCombineCache, kIOSurfacePixelFormat, kIOSurfaceWidth, IOSurfaceLockOptions, + IOSurfaceRef, +}; use objc2_quartz_core::{kCAGravityTopLeft, CALayer, CATransaction}; use raw_window_handle::{HasDisplayHandle, HasWindowHandle, RawWindowHandle}; use std::ffi::c_void; use std::marker::PhantomData; -use std::mem::size_of; +use std::mem::{size_of, ManuallyDrop}; use std::num::NonZeroU32; use std::ops::Deref; -use std::ptr::{self, slice_from_raw_parts_mut, NonNull}; +use std::ptr; define_class!( #[unsafe(super(NSObject))] @@ -104,10 +106,9 @@ pub struct CGImpl { root_layer: SendCALayer, observer: Retained, color_space: CFRetained, - /// The width of the underlying buffer. - width: usize, - /// The height of the underlying buffer. - height: usize, + front: Buffer, + front2: Option, + back: Buffer, window_handle: W, _display: PhantomData, } @@ -225,7 +226,8 @@ impl SurfaceInterface for CGImpl< // resized to something that doesn't fit, see #177. layer.setContentsGravity(unsafe { kCAGravityTopLeft }); - // Initialize color space here, to reduce work later on. + // The color space we're using. + // TODO: Allow setting this to something else? let color_space = CGColorSpace::new_device_rgb().unwrap(); // Grab initial width and height from the layer (whose properties have just been initialized @@ -235,13 +237,27 @@ impl SurfaceInterface for CGImpl< let width = (size.width * scale_factor) as usize; let height = (size.height * scale_factor) as usize; + // FIXME(madsmtm): Allow setting this: + // https://github.com/rust-windowing/softbuffer/pull/320 + let write_combine_cache = false; + let properties = Buffer::properties( + width, + height, + kCVPixelFormatType_32BGRA, + 4, + &color_space, + write_combine_cache, + ); + Ok(Self { layer: SendCALayer(layer), root_layer: SendCALayer(root_layer), observer, color_space, - width, - height, + front: Buffer::new(&properties), + // TODO: Allow configuring amount of buffers? + front2: Some(Buffer::new(&properties)), + back: Buffer::new(&properties), _display: PhantomData, window_handle: window_src, }) @@ -253,106 +269,153 @@ impl SurfaceInterface for CGImpl< } fn resize(&mut self, width: NonZeroU32, height: NonZeroU32) -> Result<(), SoftBufferError> { - self.width = width.get() as usize; - self.height = height.get() as usize; + let width = width.get() as usize; + let height = height.get() as usize; + + // TODO: Is this check desirable? + if self.front.surface.width() == width && self.front.surface.height() == height { + return Ok(()); + } + + // Recreate buffers. It's fine to release the old ones, `CALayer.contents` and/or the + // compositor is going to keep a reference if they're still in use. + let properties = Buffer::properties( + width, + height, + kCVPixelFormatType_32BGRA, + 4, + &self.color_space, + false, // write_combine_cache + ); + self.back = Buffer::new(&properties); + // Keep a second buffer if it was there before. + if self.front2.is_some() { + self.front2 = Some(Buffer::new(&properties)); + } + self.front = Buffer::new(&properties); + Ok(()) } fn buffer_mut(&mut self) -> Result, SoftBufferError> { - let buffer_size = util::byte_stride(self.width as u32) as usize * self.height / 4; + // Block until back buffer is no longer being used by the compositor. + // + // TODO: Allow configuring this: https://github.com/rust-windowing/softbuffer/issues/29 + // TODO: Is this actually the check we want to do? It seems like the compositor doesn't + // properly set the usage state when the application loses focus, even if you continue + // rendering there? + // + // Should we instead set up a `CVDisplayLink`, and only allow using the back buffer once a + // certain number of frames have passed since it was presented? Would be better though not + // perfect, `CVDisplayLink` isn't guaranteed to actually match the display's refresh rate: + // https://developer.apple.com/library/archive/documentation/GraphicsImaging/Conceptual/CoreVideo/CVProg_Concepts/CVProg_Concepts.html#//apple_ref/doc/uid/TP40001536-CH202-DontLinkElementID_2 + // + // Another option would be to keep a boundless queue as described in: + // https://github.com/commercial-emacs/commercial-emacs/blob/68f5a28a316ea0c553d4274ce86e95fc4a5a701a/src/nsterm.m#L10552-L10571 + while self.back.surface.is_in_use() { + std::thread::yield_now(); + } + + // Lock the back buffer to allow writing to it. + // + // Either unlocked in `BufferImpl`s `Drop` or `present_with_damage`. + self.back.lock(); + Ok(BufferImpl { - buffer: util::PixelBuffer(vec![Pixel::default(); buffer_size]), - width: self.width, - height: self.height, - color_space: &self.color_space, + front: &mut self.front, + front2: &mut self.front2, + back: &mut self.back, layer: &mut self.layer, }) } } +/// The implementation used for presenting the back buffer to the surface. +/// +/// This is triple-buffered because that's what QuartzCore / the compositor seems to require: +/// - The front buffer is what's currently assigned to `CALayer.contents`, and was submitted to the +/// compositor in the previous iteration of the run loop. +/// - The front2 / middle buffer is what the compositor is currently drawing from. +/// - The back buffer is what we'll be drawing into. #[derive(Debug)] pub struct BufferImpl<'a> { - width: usize, - height: usize, - color_space: &'a CGColorSpace, - buffer: util::PixelBuffer, + front: &'a mut Buffer, + front2: &'a mut Option, + back: &'a mut Buffer, layer: &'a mut SendCALayer, } +impl Drop for BufferImpl<'_> { + fn drop(&mut self) { + // Unlock the buffer we locked above. + self.back.unlock(); + } +} + impl BufferInterface for BufferImpl<'_> { fn byte_stride(&self) -> NonZeroU32 { - NonZeroU32::new(util::byte_stride(self.width as u32)).unwrap() + // A multiple of the cache line size, which is `64` on x86_64 and `128` on Aarch64. + // Check with `sysctl hw.cachelinesize`. + NonZeroU32::new(self.back.surface.bytes_per_row() as u32).unwrap() } fn width(&self) -> NonZeroU32 { - NonZeroU32::new(self.width as u32).unwrap() + NonZeroU32::new(self.back.surface.width() as u32).unwrap() } fn height(&self) -> NonZeroU32 { - NonZeroU32::new(self.height as u32).unwrap() + NonZeroU32::new(self.back.surface.height() as u32).unwrap() } - #[inline] fn pixels_mut(&mut self) -> &mut [Pixel] { - &mut self.buffer + let num_pixels = + self.back.surface.bytes_per_row() * self.back.surface.height() / size_of::(); + let ptr = self.back.surface.base_address().cast::(); + + // SAFETY: `IOSurface` is a kernel-managed buffer, which means it's page-aligned, which is + // plenty for the 4 byte alignment required here. + // + // Additionally, buffer is owned by us, and we're the only ones that are going to write to + // it. Since we re-use buffers, the buffer _might_ be read by the compositor while we write + // to it - this is still sound on our side, though it might cause tearing, depending on when + // the memory is flushed by the kernel. + unsafe { std::slice::from_raw_parts_mut(ptr.as_ptr(), num_pixels) } } + #[inline] fn age(&self) -> u8 { - 0 + self.back.age } fn present_with_damage(self, _damage: &[Rect]) -> Result<(), SoftBufferError> { - unsafe extern "C-unwind" fn release( - _info: *mut c_void, - data: NonNull, - size: usize, - ) { - let data = data.cast::(); - let slice = slice_from_raw_parts_mut(data.as_ptr(), size / size_of::()); - // SAFETY: This is the same slice that we passed to `Box::into_raw` below. - drop(unsafe { Box::from_raw(slice) }) - } - - let data_provider = { - let len = self.buffer.len() * size_of::(); - let buffer: *mut [Pixel] = Box::into_raw(self.buffer.0.into_boxed_slice()); - // Convert slice pointer to thin pointer. - let data_ptr = buffer.cast::(); - - // SAFETY: The data pointer and length are valid. - // The info pointer can safely be NULL, we don't use it in the `release` callback. - unsafe { - CGDataProvider::with_data(ptr::null_mut(), data_ptr, len, Some(release)).unwrap() + // Unlock the buffer now, and avoid the `unlock` in `Drop`. + // Would be prettier with https://github.com/rust-lang/rfcs/pull/3466. + let this = &mut *ManuallyDrop::new(self); + let front = &mut *this.front; + let front2 = &mut this.front2; + let back = &mut *this.back; + let layer = &mut *this.layer; + // Note that unlocking effectively flushes the changes, without this, the contents might not + // be visible to the compositor. + back.unlock(); + + back.age = 1; + if let Some(front2) = front2 { + if front2.age != 0 { + front2.age += 1; } - }; - - // `CGBitmapInfo` consists of a combination of `CGImageAlphaInfo`, `CGImageComponentInfo` - // `CGImageByteOrderInfo` and `CGImagePixelFormatInfo` (see e.g. `CGBitmapInfoMake`). - // - // TODO: Use `CGBitmapInfo::new` once the next version of objc2-core-graphics is released. - let bitmap_info = CGBitmapInfo( - CGImageAlphaInfo::NoneSkipFirst.0 - | CGImageComponentInfo::Integer.0 - | CGImageByteOrderInfo::Order32Little.0 - | CGImagePixelFormatInfo::Packed.0, - ); + } + if front.age != 0 { + front.age += 1; + } - let image = unsafe { - CGImage::new( - self.width, - self.height, - 8, - 32, - util::byte_stride(self.width as u32) as usize, - Some(self.color_space), - bitmap_info, - Some(&data_provider), - ptr::null(), - false, - CGColorRenderingIntent::RenderingIntentDefault, - ) + // Rotate buffers such that the back buffer is now the front buffer. + if let Some(front2) = front2 { + std::mem::swap(back, front2); + std::mem::swap(front2, front); + } else { + std::mem::swap(back, front); } - .unwrap(); // The CALayer has a default action associated with a change in the layer contents, causing // a quarter second fade transition to happen every time a new buffer is applied. This can @@ -360,14 +423,131 @@ impl BufferInterface for BufferImpl<'_> { CATransaction::begin(); CATransaction::setDisableActions(true); - // SAFETY: The contents is `CGImage`, which is a valid class for `contents`. - unsafe { self.layer.setContents(Some(image.as_ref())) }; + // SAFETY: We set `CALayer.contents` to an `IOSurface`, which is an undocumented option, but + // it's done in browsers and GDK: + // https://gitlab.gnome.org/GNOME/gtk/-/blob/4266c3c7b15299736df16c9dec57cd8ec7c7ebde/gdk/macos/GdkMacosTile.c#L44 + // And tested to work at least as far back as macOS 10.12. + unsafe { layer.setContents(Some(front.surface.as_ref())) }; CATransaction::commit(); Ok(()) } } +/// A single buffer in Softbuffer. +/// +/// Buffers are backed by an `IOSurface`, which is a shared memory buffer that can be passed to the +/// compositor without copying. The best official documentation I've found for how this works is +/// probably this keynote: +/// +/// +/// The first ~10mins of this keynote is also pretty good, it describes CA and the render server: +/// +/// +/// +/// See also these links: +/// - +/// - +/// - +/// - +/// - +#[derive(Debug)] +struct Buffer { + surface: CFRetained, + age: u8, +} + +// SAFETY: `IOSurface` is marked `NS_SWIFT_SENDABLE`. +unsafe impl Send for Buffer {} +// SAFETY: Same as above. +unsafe impl Sync for Buffer {} + +impl Buffer { + fn new(properties: &CFMutableDictionary) -> Self { + let surface = unsafe { IOSurfaceRef::new(properties.as_opaque()) }.unwrap(); + Self { surface, age: 0 } + } + + fn properties( + width: usize, + height: usize, + pixel_format: u32, + bytes_per_pixel: u32, + color_space: &CGColorSpace, + write_combine_cache: bool, + ) -> CFRetained> { + let properties = CFMutableDictionary::::empty(); + + // Set properties of the surface. + properties.add( + unsafe { kIOSurfaceWidth }, + &CFNumber::new_isize(width as isize), + ); + properties.add( + unsafe { kIOSurfaceHeight }, + &CFNumber::new_isize(height as isize), + ); + // NOTE: If an unsupported pixel format is provided, the compositor usually won't render + // anything (which means it'll render whatever was there before, very glitchy). + // + // The list of formats is hardware- and OS-dependent, see e.g. the following link: + // https://developer.apple.com/forums/thread/673868 + // + // Basically only `kCVPixelFormatType_32BGRA` is guaranteed to work, though from testing, + // there's a few more that we might be able to use; see the following repository: + // https://github.com/madsmtm/iosurface-calayer-formats + properties.add( + unsafe { kIOSurfacePixelFormat }, + &CFNumber::new_i32(pixel_format as i32), + ); + properties.add( + unsafe { kIOSurfaceBytesPerElement }, + &CFNumber::new_i32(bytes_per_pixel as i32), + ); + + // TODO: kIOSurfaceICCProfile instead? Or in addition to this? + properties.add( + unsafe { kIOSurfaceColorSpace }, + &*color_space.property_list().unwrap(), + ); + + // Be a bit more strict about usage of the surface in debug mode. + #[cfg(debug_assertions)] + properties.add( + unsafe { objc2_io_surface::kIOSurfacePixelSizeCastingAllowed }, + &**objc2_core_foundation::CFBoolean::new(false), + ); + + if write_combine_cache { + properties.add( + unsafe { kIOSurfaceCacheMode }, + &**CFNumber::new_i32(kIOSurfaceMapWriteCombineCache as _), + ); + } + + properties + } + + // The compositor shouldn't be writing to our surface, let's ensure that with this flag. + const LOCK_OPTIONS: IOSurfaceLockOptions = IOSurfaceLockOptions::AvoidSync; + + #[track_caller] + fn lock(&self) { + let ret = unsafe { self.surface.lock(Self::LOCK_OPTIONS, ptr::null_mut()) }; + if ret != 0 { + panic!("failed locking buffer: {ret}"); + } + } + + #[track_caller] + fn unlock(&self) { + let ret = unsafe { self.surface.unlock(Self::LOCK_OPTIONS, ptr::null_mut()) }; + if ret != 0 { + panic!("failed unlocking buffer: {ret}"); + } + } +} + #[derive(Debug)] struct SendCALayer(Retained); @@ -388,3 +568,7 @@ impl Deref for SendCALayer { &self.0 } } + +// Grabbed from `objc2-core-video` to avoid having to depend on that (for now at least). +#[allow(non_upper_case_globals)] +const kCVPixelFormatType_32BGRA: u32 = 0x42475241;