askui · philipph-askui · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026
diff --git a/src/askui/tools/agent_os.py b/src/askui/tools/agent_os.py
@@ -263,13 +263,18 @@ def disconnect(self) -> None:
         """
 
     @abstractmethod
-    def screenshot(self, report: bool = True) -> Image.Image:
+    def screenshot(self, report: bool = True, unscaled: bool = False) -> Image.Image:
         """
         Captures a screenshot of the current display.
 
         Args:
             report (bool, optional): Whether to include the screenshot in
                 reporting. Defaults to `True`.
+            unscaled (bool, optional): Whether to return the screenshot at its
+                full, real-screen resolution instead of the resolution shown to
+                the model. Only has an effect on scaling implementations (e.g.
+                `ComputerAgentOsFacade`); implementations that already return the
+                native resolution ignore it. Defaults to `False`.
 
         Returns:
             Image.Image: A PIL Image object containing the screenshot.

diff --git a/src/askui/tools/askui/askui_controller.py b/src/askui/tools/askui/askui_controller.py
@@ -349,13 +349,16 @@ def _stop_execution(self) -> None:
 
     @telemetry.record_call()
     @override
-    def screenshot(self, report: bool = True) -> Image.Image:
+    def screenshot(self, report: bool = True, unscaled: bool = False) -> Image.Image:
         """
         Take a screenshot of the current screen.
 
         Args:
             report (bool, optional): Whether to include the screenshot in reporting.
                 Defaults to `True`.
+            unscaled (bool, optional): Accepted for interface compatibility. This
+                client always returns the native screen resolution, so it has no
+                effect. Defaults to `False`.
 
         Returns:
             Image.Image: A PIL Image object containing the screenshot.
@@ -375,7 +378,8 @@ def screenshot(self, report: bool = True) -> Image.Image:
             screenResponse.bitmap.data,
         ).split()
         image = Image.merge("RGB", (b, g, r))
-        self._reporter.add_message("AgentOS", "screenshot()", image)
+        if report:
+            self._reporter.add_message("AgentOS", "screenshot()", image)
         return image
 
     @telemetry.record_call()

diff --git a/src/askui/tools/computer_agent_os_facade.py b/src/askui/tools/computer_agent_os_facade.py
@@ -50,6 +50,7 @@ def __init__(
         image_scaler: ImageScaler,
     ) -> None:
         self._agent_os = agent_os
+        self._image_scaler = image_scaler
         self._scaler = CoordinateScaler(
             coordinate_space=coordinate_space,
             image_scaler=image_scaler,
@@ -66,10 +67,47 @@ def disconnect(self) -> None:
         self._agent_os.disconnect()
         self._scaler.real_screen_resolution = None
 
-    def screenshot(self, report: bool = True) -> Image.Image:
+    def screenshot(self, report: bool = True, unscaled: bool = False) -> Image.Image:
         screenshot = self._agent_os.screenshot(report=report)
+        if unscaled:
+            self._scaler.real_screen_resolution = screenshot.size
+            return screenshot
         return self._scaler.scale_screenshot(screenshot)
 
+    def scale_image_for_model(self, image: Image.Image) -> Image.Image:
+        """Apply the same scaling screenshots receive, without recording state.
+
+        Unlike `screenshot`, this does not update the coordinate scaler's
+        recorded resolutions, so it is safe to call on arbitrary images (e.g. a
+        cropped region) without corrupting coordinate mapping.
+
+        Args:
+            image (Image.Image): The image to scale for model consumption.
+
+        Returns:
+            Image.Image: The scaled image.
+        """
+        return self._image_scaler(image)
+
+    def scale_point_to_real_screen(
+        self, x: float, y: float, check_coordinates_in_bounds: bool = True
+    ) -> tuple[int, int]:
+        """Map a point from the model coordinate space to real screen pixels.
+
+        Args:
+            x (float): The horizontal coordinate in the model coordinate space.
+            y (float): The vertical coordinate in the model coordinate space.
+            check_coordinates_in_bounds (bool, optional): Whether to raise if the
+                mapped coordinate falls outside the screen. Set to `False` when the
+                caller clamps the result itself. Defaults to `True`.
+
+        Returns:
+            tuple[int, int]: The corresponding `(x, y)` in real screen pixels.
+        """
+        return self._scaler.scale_coordinates(
+            x, y, check_coordinates_in_bounds=check_coordinates_in_bounds
+        )
+
     def _take_silent_screenshot(self) -> Image.Image:
         return self.screenshot(report=False)
 

diff --git a/src/askui/tools/playwright/agent_os.py b/src/askui/tools/playwright/agent_os.py
@@ -197,12 +197,15 @@ def disconnect(self) -> None:
         )
 
     @override
-    def screenshot(self, report: bool = True) -> Image.Image:
+    def screenshot(self, report: bool = True, unscaled: bool = False) -> Image.Image:
         """Capture a screenshot of the current page.
 
         Args:
             report (bool, optional): Whether to include the screenshot in
                 reporting. Defaults to `True`.
+            unscaled (bool, optional): Accepted for interface compatibility. This
+                agent OS always returns the native page resolution, so it has no
+                effect. Defaults to `False`.
 
         Returns:
             Image.Image: A PIL Image object containing the screenshot.

diff --git a/src/askui/tools/playwright/agent_os_facade.py b/src/askui/tools/playwright/agent_os_facade.py
@@ -55,8 +55,10 @@ def disconnect(self) -> None:
         self._agent_os.disconnect()
         self._scaler.real_screen_resolution = None
 
-    def screenshot(self, report: bool = True) -> Image.Image:
+    def screenshot(self, report: bool = True, unscaled: bool = False) -> Image.Image:
         screenshot = self._agent_os.screenshot(report=report)
+        if unscaled:
+            return screenshot
         return self._scaler.scale_screenshot(screenshot)
 
     def mouse_move(self, x: float, y: float, duration: int = 500) -> None:

diff --git a/src/askui/tools/store/computer/experimental/__init__.py b/src/askui/tools/store/computer/experimental/__init__.py
@@ -8,6 +8,7 @@
     ComputerSetProcessInFocusTool,
     ComputerSetWindowInFocusTool,
 )
+from .zoom import ComputerZoomTool
 
 __all__ = [
     "ComputerGetFileNamesTool",
@@ -18,4 +19,5 @@
     "ComputerAddWindowAsVirtualDisplayTool",
     "ComputerSetWindowInFocusTool",
     "ComputerSetProcessInFocusTool",
+    "ComputerZoomTool",
 ]
diff --git a/src/askui/tools/store/computer/experimental/zoom.py b/src/askui/tools/store/computer/experimental/zoom.py
@@ -0,0 +1,138 @@
+from typing import cast
+
+from PIL import Image
+
+from askui.models.shared import ComputerBaseTool, ToolTags
+from askui.reporting import NULL_REPORTER, Reporter
+from askui.tools.computer_agent_os_facade import ComputerAgentOsFacade
+
+
+class ComputerZoomTool(ComputerBaseTool):
+    """
+    Views a region of the screen at full resolution to inspect small details.
+
+    Screenshots are downscaled before they reach the model, so small UI elements
+    (icons, tab titles, status-bar text, line numbers, tiny buttons) can become
+    illegible. This tool crops the requested region from the full-resolution
+    screenshot and returns it magnified. The returned image is only a magnified
+    view; coordinates for subsequent actions still use the original screen
+    coordinate space.
+
+    Args:
+        agent_os (`ComputerAgentOsFacade`, optional): The agent OS facade. Injected
+            automatically when the tool is registered with an agent.
+        reporter (`Reporter`, optional): Reporter used to show the cropped image
+            (the exact image handed to the model) in the report. Defaults to a
+            null reporter that discards messages.
+
+    Example:
+        ```python
+        from askui import ComputerAgent
+        from askui.tools.store.computer.experimental import ComputerZoomTool
+
+        with ComputerAgent(act_tools=[ComputerZoomTool()]) as agent:
+            agent.act("Enable the tiny checkbox next to 'Advanced options'")
+
+        with ComputerAgent() as agent:
+            agent.act(
+                "Enable the tiny checkbox next to 'Advanced options'",
+                tools=[ComputerZoomTool()],
+            )
+        ```
+    """
+
+    def __init__(
+        self,
+        agent_os: ComputerAgentOsFacade | None = None,
+        reporter: Reporter = NULL_REPORTER,
+    ) -> None:
+        super().__init__(
+            name="zoom",
+            description=(
+                "View a specific region of the screen at full resolution. This "
+                "is a last resort for reading content that is genuinely too small "
+                "to make out in the normal screenshot (e.g. tiny text, icons, "
+                "status-bar text, line numbers) when that detail is required to "
+                "decide your next action.\n"
+                "Use it sparingly. Before zooming, rely on the normal screenshot "
+                "you already have. Do NOT use this tool when:\n"
+                "- the relevant text or element is already legible in the normal "
+                "screenshot;\n"
+                "- you only need to locate or click an element (the normal "
+                "screenshot coordinates are sufficient for that);\n"
+                "- you have already zoomed into this region — do not re-zoom the "
+                "same area.\n"
+                "Provide the region as [x1, y1, x2, y2], the top-left and "
+                "bottom-right corners in the same coordinates you use for "
+                "clicking. The returned image is only a magnified view; "
+                "coordinates for subsequent actions still use the original screen "
+                "coordinate space."
+            ),
+            input_schema={
+                "type": "object",
+                "properties": {
+                    "region": {
+                        "type": "array",
+                        "description": (
+                            "The region to zoom into as [x1, y1, x2, y2]: the "
+                            "top-left and bottom-right corners in screen "
+                            "coordinates."
+                        ),
+                        "items": {"type": "number"},
+                        "minItems": 4,
+                        "maxItems": 4,
+                    },
+                },
+                "required": ["region"],
+            },
+            agent_os=agent_os,
+            required_tags=[ToolTags.SCALED_AGENT_OS.value],
+        )
+        self.is_cacheable = True
+        self._reporter = reporter
+
+    def __call__(self, region: list[float]) -> tuple[str, Image.Image]:
+        if len(region) != 4:  # noqa: PLR2004
+            error_msg = (
+                f"region must contain exactly 4 values [x1, y1, x2, y2], "
+                f"got {len(region)}"
+            )
+            raise ValueError(error_msg)
+
+        agent_os = cast("ComputerAgentOsFacade", self.agent_os)
+        # Suppress reporting of the uncropped screenshot; we report the crop below.
+        screenshot = agent_os.screenshot(unscaled=True, report=False)
+
+        # Map the model-space corners to real screen pixels. Skip the mapper's
+        # bounds check; we clamp to the screenshot below so a slightly oversized
+        # region from the model crops to the edge instead of erroring.
+        x1, y1, x2, y2 = region
+        left, top = agent_os.scale_point_to_real_screen(
+            x1, y1, check_coordinates_in_bounds=False
+        )
+        right, bottom = agent_os.scale_point_to_real_screen(
+            x2, y2, check_coordinates_in_bounds=False
+        )
+
+        left, right = sorted((left, right))
+        top, bottom = sorted((top, bottom))
+        left = max(0, min(left, screenshot.width - 1))
+        right = max(left + 1, min(right, screenshot.width))
+        top = max(0, min(top, screenshot.height - 1))
+        bottom = max(top + 1, min(bottom, screenshot.height))
+
+        crop = screenshot.crop((left, top, right, bottom))
+        crop = agent_os.scale_image_for_model(crop)
+        # Report the region in real screen pixels (where the crop was actually
+        # taken), not the raw coordinates the model passed.
+        self._reporter.add_message(
+            "AgentOS", f"zoom([{left}, {top}, {right}, {bottom}])", crop
+        )
+        message = (
+            f"Zoomed into region [{x1}, {y1}, {x2}, {y2}] shown at full "
+            "resolution. Coordinates for further actions remain in the original "
+            "screen coordinate space. Now proceed with the next action (e.g. "
+            "move/click) using those coordinates; do not zoom again unless a "
+            "different region is still too small to read."
+        )
+        return message, crop