diff --git a/README.md b/README.md index 33a142f..e84dd7f 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ This MCP server acts as a bridge between Claude and Chrome's debugging capabilit - **Storage Access**: Read cookies, localStorage, and sessionStorage - **Real-time Monitoring**: Live console output tracking - **Object Inspection**: Inspect JavaScript objects and variables +- **Screenshot Capture**: Take screenshots for visual analysis by an LLM (viewport, full-page, or specific regions) ## Installation @@ -271,6 +272,24 @@ Once connected, use these commands: - `track_indexeddb(origin, enable?)` - Enable/disable IndexedDB tracking - `override_storage_quota(origin, quota_size_mb?)` - Override storage quota +### Screenshots +- `take_screenshot(format?, quality?, full_page?, clip_x?, clip_y?, clip_width?, clip_height?, optimize_for_speed?, filename?)` - Capture page screenshot and save to file. Returns file path for visual analysis. + +**Parameters:** +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| format | string | "png" | Image format: png, jpeg, webp | +| quality | int | None | 0-100 compression quality (jpeg/webp only) | +| full_page | bool | False | Capture beyond visible viewport | +| clip_x | float | None | X coordinate for region capture | +| clip_y | float | None | Y coordinate for region capture | +| clip_width | float | None | Width of region to capture | +| clip_height | float | None | Height of region to capture | +| optimize_for_speed | bool | False | Optimize for speed over file size | +| filename | string | None | Custom filename (auto-generated if not provided) | + +Screenshots are saved to: `{temp_dir}/chrome-devtools-screenshots/` + ## Use Cases ### Debugging API Calls in Your Web Application @@ -372,6 +391,37 @@ I notice the login request returns a 200 but no Set-Cookie header. Let me also c execute_javascript("Object.keys(localStorage)") ``` +### Visual Debugging with Screenshots + +When you need your llm to analyze visual layout issues, UI bugs, or design problems: + +1. **Navigate to the problematic page** +2. **Capture screenshot**: Use `take_screenshot()` to capture the current page +3. **Claude analyzes the image**: Claude can see and describe what's in the screenshot +4. **Target specific areas**: Use region clipping to focus on specific components +5. **Capture full pages**: Use `full_page=True` to capture long scrolling pages + +**Example workflow:** +``` +You: "The login button looks weird on my page" +LLM: Let me take a screenshot and see what's happening. + +take_screenshot() + +I can see the login button in the screenshot. The button appears to be cut off at the bottom and the text "Login" is not centered vertically. Let me capture just that button area for a closer look: + +take_screenshot(clip_x=100, clip_y=200, clip_width=200, clip_height=50) + +Now I can see more clearly - the button has padding-top: 5px but padding-bottom: 15px, causing the text alignment issue. The button height is also only 40px which is cutting off the descenders in the font. +``` + +**Screenshot options:** +- **Basic viewport**: `take_screenshot()` - Captures what's currently visible +- **Full page**: `take_screenshot(full_page=True)` - Captures entire scrollable page +- **Specific region**: `take_screenshot(clip_x=100, clip_y=200, clip_width=400, clip_height=300)` - Captures a specific area +- **Compressed JPEG**: `take_screenshot(format="jpeg", quality=70)` - Smaller file size +- **Custom name**: `take_screenshot(filename="login_page_issue")` - Save with specific name + ### DOM Element Inspection - `get_document(depth?, pierce?)` - Retrieve DOM document structure - `query_selector(node_id, selector)` - Find single element by CSS selector @@ -409,6 +459,9 @@ execute_javascript("Object.keys(localStorage)") | Check for JavaScript errors | `get_console_error_summary()` | | Watch console in real-time | `monitor_console_live(10)` | | Check page load performance | `get_page_info()` | +| Capture screenshot | `take_screenshot()` | +| Capture full page screenshot | `take_screenshot(full_page=True)` | +| Capture specific area | `take_screenshot(clip_x=100, clip_y=200, clip_width=400, clip_height=300)` | | Examine a variable | `inspect_console_object("window.myApp")` | | View cookies | `get_cookies()` | | Run JavaScript | `execute_javascript("document.title")` | diff --git a/src/main.py b/src/main.py index a10ccc9..3417969 100644 --- a/src/main.py +++ b/src/main.py @@ -36,6 +36,7 @@ register_dom_tools, register_network_tools, register_performance_tools, + register_screenshot_tools, register_storage_tools, ) @@ -66,6 +67,7 @@ def register_all_tools() -> None: register_css_tools(mcp) register_storage_tools(mcp) register_performance_tools(mcp) + register_screenshot_tools(mcp) logger.info("All MCP tools registered successfully") diff --git a/src/tools/__init__.py b/src/tools/__init__.py index 0a254ed..3de72cb 100644 --- a/src/tools/__init__.py +++ b/src/tools/__init__.py @@ -39,6 +39,7 @@ from .dom import register_dom_tools from .network import register_network_tools from .performance import register_performance_tools +from .screenshots import register_screenshot_tools from .storage import register_storage_tools __all__ = [ @@ -49,4 +50,5 @@ "register_css_tools", "register_storage_tools", "register_performance_tools", + "register_screenshot_tools", ] diff --git a/src/tools/screenshots.py b/src/tools/screenshots.py new file mode 100644 index 0000000..875e0c6 --- /dev/null +++ b/src/tools/screenshots.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 +"""Screenshot Capture Tools + +This module provides page screenshot capabilities through the DevTools Protocol. +It enables capturing full page or region screenshots in various formats and saves +them to files that can be analyzed by vision-capable LLMs. + +Key Features: + - Full page and viewport screenshot capture + - Region-based screenshot clipping + - Multiple output formats (PNG, JPEG, WebP) + - Quality compression control for JPEG/WebP + - Automatic file saving with timestamp-based naming + - Custom filename support + +Example: + Capturing screenshots: + + ```python + # Capture current viewport as PNG + result = await take_screenshot() + + # Capture full scrollable page as JPEG + result = await take_screenshot(format="jpeg", quality=80, full_page=True) + + # Capture specific region + result = await take_screenshot(clip_x=100, clip_y=100, clip_width=400, clip_height=300) + + # Custom filename + result = await take_screenshot(filename="my_screenshot") + ``` + +Note: + All screenshot operations require an active connection to Chrome with the + Page domain enabled. Screenshots are saved to a temp directory and the file + path is returned for LLM analysis. +""" + +from __future__ import annotations + +import base64 +import tempfile +from datetime import datetime +from pathlib import Path +from typing import Any + +from mcp.server.fastmcp import FastMCP + +from ..cdp_context import require_cdp_client +from .utils import create_error_response, create_success_response + +# Screenshots directory in system temp +SCREENSHOTS_DIR = Path(tempfile.gettempdir()) / "chrome-devtools-screenshots" + + +def register_screenshot_tools(mcp: FastMCP) -> None: + """Register screenshot capture tools with the MCP server. + + Args: + mcp: FastMCP server instance to register tools with. + + Registered Tools: + - take_screenshot: Capture page screenshots and save to file for LLM analysis + """ + + @mcp.tool() + @require_cdp_client + async def take_screenshot( + format: str = "png", + quality: int | None = None, + full_page: bool = False, + clip_x: float | None = None, + clip_y: float | None = None, + clip_width: float | None = None, + clip_height: float | None = None, + optimize_for_speed: bool = False, + filename: str | None = None, + **kwargs: Any, + ) -> dict[str, Any]: + """Capture a screenshot of the current page and save to file. + + Takes a screenshot of the current page state and saves it to a file in the + system temp directory. Returns the file path so vision-capable LLMs can + analyze the image. Supports multiple formats, quality control, full page + capture, and region clipping. + + Args: + format: Image format - "png", "jpeg", or "webp" (default: png). + PNG is lossless, JPEG/WebP support quality compression. + quality: Compression quality 0-100 for jpeg/webp formats. + Higher values mean better quality but larger files. + Ignored for PNG format. Default is browser default (~80). + full_page: If True, captures the full scrollable page content + beyond the visible viewport (default: False). + clip_x: X coordinate (from top-left) for region capture. + Must be provided with clip_y, clip_width, clip_height. + clip_y: Y coordinate (from top-left) for region capture. + clip_width: Width of the region to capture. + clip_height: Height of the region to capture. + optimize_for_speed: If True, optimizes for capture speed over + file size (default: False). + filename: Custom filename without extension. If not provided, + defaults to timestamp-based name (screenshot_YYYYMMDD_HHMMSS). + + Returns: + Success response with file_path that can be read by vision LLMs, + or error response if capture fails. + + Example: + # Basic viewport screenshot + take_screenshot() + + # Full page with compression + take_screenshot(format="jpeg", quality=70, full_page=True) + + # Capture specific region + take_screenshot(clip_x=100, clip_y=200, clip_width=300, clip_height=150) + + # Custom filename + take_screenshot(filename="login_page_error") + """ + try: + cdp_client = kwargs["cdp_client"] + + # Validate format + valid_formats = ("png", "jpeg", "webp") + format_lower = format.lower() + if format_lower not in valid_formats: + return create_error_response( + f"Invalid format '{format}'", + details=f"Supported formats: {', '.join(valid_formats)}", + ) + + # Validate quality parameter + if quality is not None: + if format_lower == "png": + return create_error_response( + "Quality parameter not supported for PNG format", + details="Use 'jpeg' or 'webp' format for quality control", + ) + if not (0 <= quality <= 100): + return create_error_response( + f"Invalid quality value: {quality}", + details="Quality must be between 0 and 100", + ) + + # Build CDP command parameters + params: dict[str, Any] = { + "format": format_lower, + } + + # Add quality for JPEG/WebP + if quality is not None and format_lower in ("jpeg", "webp"): + params["quality"] = quality + + # Add full page capture + if full_page: + params["captureBeyondViewport"] = True + + # Add clip region if all coordinates provided + clip_params = [clip_x, clip_y, clip_width, clip_height] + if any(p is not None for p in clip_params): + if not all(p is not None for p in clip_params): + return create_error_response( + "Incomplete clip region", + details="All clip parameters (clip_x, clip_y, clip_width, clip_height) " + "must be provided together", + ) + params["clip"] = { + "x": clip_x, + "y": clip_y, + "width": clip_width, + "height": clip_height, + "scale": 1, + } + + # Add speed optimization + if optimize_for_speed: + params["optimizeForSpeed"] = True + + # Capture screenshot via CDP + result = await cdp_client.send_command("Page.captureScreenshot", params) + + # Decode base64 data + screenshot_base64 = result["data"] + screenshot_bytes = base64.b64decode(screenshot_base64) + + # Create screenshots directory if it doesn't exist + SCREENSHOTS_DIR.mkdir(parents=True, exist_ok=True) + + # Generate filename + if filename: + file_name = f"{filename}.{format_lower}" + else: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + file_name = f"screenshot_{timestamp}.{format_lower}" + + # Write to file + file_path = SCREENSHOTS_DIR / file_name + file_path.write_bytes(screenshot_bytes) + + return create_success_response( + message=f"Screenshot saved to {file_path}", + data={ + "file_path": str(file_path), + "format": format_lower, + "size_bytes": len(screenshot_bytes), + "full_page": full_page, + "filename": file_name, + }, + ) + + except Exception as e: + return create_error_response(f"Screenshot capture failed: {e}")