diff --git a/pkg/create/templates.go b/pkg/create/templates.go
index fb5845f..54a1ee4 100644
--- a/pkg/create/templates.go
+++ b/pkg/create/templates.go
@@ -19,6 +19,7 @@ const (
TemplateOpenAGIComputerUse = "openagi-computer-use"
TemplateClaudeAgentSDK = "claude-agent-sdk"
TemplateYutoriComputerUse = "yutori"
+ TemplateUnifiedCUA = "cua"
)
type TemplateInfo struct {
@@ -90,6 +91,11 @@ var Templates = map[string]TemplateInfo{
Description: "Implements a Yutori n1 computer use agent",
Languages: []string{LanguageTypeScript, LanguagePython},
},
+ TemplateUnifiedCUA: {
+ Name: "Unified CUA",
+ Description: "Multi-provider computer use agent with Anthropic/OpenAI/Gemini fallback",
+ Languages: []string{LanguageTypeScript, LanguagePython},
+ },
}
// GetSupportedTemplatesForLanguage returns a list of all supported template names for a given language
@@ -213,6 +219,11 @@ var Commands = map[string]map[string]DeployConfig{
NeedsEnvFile: true,
InvokeCommand: `kernel invoke ts-yutori-cua cua-task --payload '{"query": "Navigate to https://example.com and describe the page"}'`,
},
+ TemplateUnifiedCUA: {
+ EntryPoint: "index.ts",
+ NeedsEnvFile: true,
+ InvokeCommand: `kernel invoke ts-cua cua-task --payload '{"query": "Go to https://news.ycombinator.com and get the top 5 stories"}'`,
+ },
},
LanguagePython: {
TemplateSampleApp: {
@@ -260,6 +271,11 @@ var Commands = map[string]map[string]DeployConfig{
NeedsEnvFile: true,
InvokeCommand: `kernel invoke python-yutori-cua cua-task --payload '{"query": "Navigate to https://example.com and describe the page"}'`,
},
+ TemplateUnifiedCUA: {
+ EntryPoint: "main.py",
+ NeedsEnvFile: true,
+ InvokeCommand: `kernel invoke python-cua cua-task --payload '{"query": "Go to https://news.ycombinator.com and get the top 5 stories"}'`,
+ },
},
}
diff --git a/pkg/templates/python/cua/.env.example b/pkg/templates/python/cua/.env.example
new file mode 100644
index 0000000..d4bfcbd
--- /dev/null
+++ b/pkg/templates/python/cua/.env.example
@@ -0,0 +1,26 @@
+# Copy this file to .env and fill in your API keys.
+# Only the key for your chosen provider is required.
+
+# Primary provider: "anthropic", "openai", or "gemini"
+CUA_PROVIDER=anthropic
+
+# Comma-separated fallback order (optional).
+# If the primary provider fails, these are tried in order.
+# CUA_FALLBACK_PROVIDERS=openai,gemini
+
+# Provider API keys — set the one(s) you plan to use
+ANTHROPIC_API_KEY=your_anthropic_api_key_here
+OPENAI_API_KEY=your_openai_api_key_here
+GOOGLE_API_KEY=your_google_api_key_here
+
+# Browser config (proxy, profile, extensions, timeout) is set per-request
+# via the payload "browser" field, not here. Example:
+# kernel invoke python-cua cua-task --payload '{
+# "query": "...",
+# "browser": {
+# "proxy_id": "proxy_abc123",
+# "profile": { "name": "my-profile", "save_changes": true },
+# "extensions": [{ "name": "my-extension" }],
+# "timeout_seconds": 600
+# }
+# }'
diff --git a/pkg/templates/python/cua/README.md b/pkg/templates/python/cua/README.md
new file mode 100644
index 0000000..90bbdfa
--- /dev/null
+++ b/pkg/templates/python/cua/README.md
@@ -0,0 +1,88 @@
+# Unified CUA Template
+
+A multi-provider Computer Use Agent (CUA) template for [Kernel](https://kernel.sh). Supports **Anthropic**, **OpenAI**, and **Google Gemini** as interchangeable backends with automatic fallback.
+
+## Quick start
+
+### 1. Install dependencies
+
+```bash
+uv sync
+```
+
+### 2. Configure environment
+
+Copy the example env file and add your API keys:
+
+```bash
+cp .env.example .env
+```
+
+Set `CUA_PROVIDER` to your preferred provider and add the matching API key:
+
+
+| Provider | Env var for key | Model used |
+| ----------- | ------------------- | ----------------------------------------- |
+| `anthropic` | `ANTHROPIC_API_KEY` | `claude-sonnet-4-6` |
+| `openai` | `OPENAI_API_KEY` | `gpt-5.4` |
+| `gemini` | `GOOGLE_API_KEY` | `gemini-2.5-computer-use-preview-10-2025` |
+
+
+### 3. Deploy to Kernel
+
+```bash
+kernel deploy main.py --env-file .env
+```
+
+### 4. Invoke
+
+```bash
+kernel invoke python-cua cua-task --payload '{"query": "Go to https://news.ycombinator.com and get the top 5 stories"}'
+```
+
+## Multi-provider fallback
+
+Set `CUA_FALLBACK_PROVIDERS` to automatically try another provider if the primary fails:
+
+```env
+CUA_PROVIDER=anthropic
+CUA_FALLBACK_PROVIDERS=openai,gemini
+```
+
+This will try Anthropic first, then OpenAI, then Gemini. Only providers with valid API keys are used.
+
+## Replay recording
+
+Pass `record_replay: true` in the payload to capture a video replay of the browser session:
+
+```bash
+kernel invoke python-cua cua-task --payload '{"query": "Navigate to example.com", "record_replay": true}'
+```
+
+The response will include a `replay_url` you can open in your browser.
+
+## Project structure
+
+```
+main.py — Kernel app entrypoint
+session.py — Browser session lifecycle with replay support
+providers/
+ __init__.py — Provider factory and fallback logic
+ anthropic.py — Anthropic Claude adapter
+ openai.py — OpenAI GPT adapter
+ gemini.py — Google Gemini adapter
+```
+
+## Customization
+
+Each provider adapter is self-contained. To customize a provider's behavior (system prompt, model, tool handling), edit the corresponding file in `providers/`.
+
+To add a new provider, create a new file that implements the `CuaProvider` protocol and register it in `providers/__init__.py`.
+
+## Resources
+
+- [Kernel Docs](https://docs.kernel.sh)
+- [Anthropic Computer Use](https://docs.anthropic.com/en/docs/agents-and-tools/computer-use)
+- [OpenAI Computer Use](https://platform.openai.com/docs/guides/computer-use)
+- [Google Gemini Computer Use](https://ai.google.dev/gemini-api/docs/computer-use)
+
diff --git a/pkg/templates/python/cua/_gitignore b/pkg/templates/python/cua/_gitignore
new file mode 100644
index 0000000..db80737
--- /dev/null
+++ b/pkg/templates/python/cua/_gitignore
@@ -0,0 +1,31 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+*.egg-info/
+dist/
+build/
+
+# Virtual environments
+.venv/
+venv/
+env/
+
+# Environment
+.env
+.env.local
+.env.*.local
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Logs
+*.log
diff --git a/pkg/templates/python/cua/main.py b/pkg/templates/python/cua/main.py
new file mode 100644
index 0000000..097a470
--- /dev/null
+++ b/pkg/templates/python/cua/main.py
@@ -0,0 +1,151 @@
+"""
+Unified CUA (Computer Use Agent) template with multi-provider support.
+
+Supports Anthropic, OpenAI, and Gemini as interchangeable providers.
+Configure via environment variables:
+ CUA_PROVIDER — primary provider ("anthropic", "openai", or "gemini")
+ CUA_FALLBACK_PROVIDERS — comma-separated fallback order (optional)
+
+Each provider requires its own API key:
+ ANTHROPIC_API_KEY, OPENAI_API_KEY, GOOGLE_API_KEY
+"""
+
+from __future__ import annotations
+
+import asyncio
+from typing import Literal, TypedDict
+
+import kernel
+from kernel import Kernel
+
+from providers import resolve_providers, run_with_fallback, TaskOptions
+from session import KernelBrowserSession, SessionOptions
+
+kernel_client = Kernel()
+app = kernel.App("python-cua")
+
+
+class BrowserProfile(TypedDict, total=False):
+ id: str
+ name: str
+ save_changes: bool
+
+
+class BrowserExtension(TypedDict, total=False):
+ id: str
+ name: str
+
+
+class BrowserConfig(TypedDict, total=False):
+ proxy_id: str
+ profile: BrowserProfile
+ extensions: list[BrowserExtension]
+ timeout_seconds: int
+
+
+class CuaInput(TypedDict, total=False):
+ query: str
+ provider: Literal["anthropic", "openai", "gemini"]
+ model: str
+ record_replay: bool
+ session_id: str
+ browser: BrowserConfig
+
+
+class CuaOutput(TypedDict, total=False):
+ result: str
+ provider: str
+ replay_url: str
+
+
+# Provider resolution is deferred to the action handler because env vars
+# are not available during Hypeman's build/discovery phase.
+_providers: list | None = None
+
+
+def _get_providers():
+ global _providers
+ if _providers is None:
+ _providers = resolve_providers()
+ print(f"Configured providers: {' -> '.join(p.name for p in _providers)}")
+ return _providers
+
+
+@app.action("cua-task")
+async def cua_task(ctx: kernel.KernelContext, payload: CuaInput | None = None) -> CuaOutput:
+ if not payload or not payload.get("query"):
+ raise ValueError('Query is required. Payload must include: {"query": "your task description"}')
+
+ providers = _get_providers()
+
+ # Per-request provider override: move requested provider to front
+ if payload.get("provider"):
+ requested = next((p for p in providers if p.name == payload["provider"]), None)
+ if requested:
+ providers = [requested] + [p for p in providers if p is not requested]
+
+ # Use an existing browser session (BYOB) or create a new one.
+ # BYOB is useful for multi-turn CUA on a persistent browser, or HITL
+ # where a human uses the live view between CUA calls.
+ if payload.get("session_id"):
+ browser = await asyncio.to_thread(
+ kernel_client.browsers.retrieve, payload["session_id"],
+ )
+ vp = getattr(browser, "viewport", None)
+ task_result = await run_with_fallback(
+ providers,
+ TaskOptions(
+ query=payload["query"],
+ kernel=kernel_client,
+ session_id=payload["session_id"],
+ model=payload.get("model"),
+ viewport_width=getattr(vp, "width", 1280),
+ viewport_height=getattr(vp, "height", 800),
+ ),
+ )
+ return {"result": task_result.result, "provider": task_result.provider}
+
+ browser_cfg = payload.get("browser") or {}
+ session = KernelBrowserSession(
+ kernel_client,
+ SessionOptions(
+ invocation_id=ctx.invocation_id,
+ stealth=True,
+ record_replay=payload.get("record_replay", False),
+ proxy_id=browser_cfg.get("proxy_id"),
+ profile=browser_cfg.get("profile"),
+ extensions=browser_cfg.get("extensions"),
+ timeout_seconds=browser_cfg.get("timeout_seconds", 300),
+ ),
+ )
+
+ await session.start()
+ print(f"Live view: {session.live_view_url}")
+
+ try:
+ task_result = await run_with_fallback(
+ providers,
+ TaskOptions(
+ query=payload["query"],
+ kernel=kernel_client,
+ session_id=session.session_id,
+ model=payload.get("model"),
+ viewport_width=session.opts.viewport_width,
+ viewport_height=session.opts.viewport_height,
+ ),
+ )
+
+ session_info = await session.stop()
+
+ output: CuaOutput = {
+ "result": task_result.result,
+ "provider": task_result.provider,
+ }
+ if session_info.replay_view_url:
+ output["replay_url"] = session_info.replay_view_url
+
+ return output
+
+ except Exception:
+ await session.stop()
+ raise
diff --git a/pkg/templates/python/cua/providers/__init__.py b/pkg/templates/python/cua/providers/__init__.py
new file mode 100644
index 0000000..63cbb0a
--- /dev/null
+++ b/pkg/templates/python/cua/providers/__init__.py
@@ -0,0 +1,108 @@
+"""
+Provider factory with automatic fallback.
+
+Resolution order:
+ 1. CUA_PROVIDER env var (required)
+ 2. CUA_FALLBACK_PROVIDERS env var (optional, comma-separated)
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from typing import Protocol
+
+from kernel import Kernel
+
+
+@dataclass
+class TaskOptions:
+ query: str
+ kernel: Kernel
+ session_id: str
+ model: str | None = None
+ viewport_width: int = 1280
+ viewport_height: int = 800
+
+
+@dataclass
+class TaskResult:
+ result: str
+ provider: str
+
+
+class CuaProvider(Protocol):
+ @property
+ def name(self) -> str: ...
+ def is_configured(self) -> bool: ...
+ async def run_task(self, options: TaskOptions) -> TaskResult: ...
+
+
+def _build_provider(name: str) -> CuaProvider | None:
+ if name == "anthropic":
+ from .anthropic import AnthropicProvider
+ return AnthropicProvider()
+ if name == "openai":
+ from .openai import OpenAIProvider
+ return OpenAIProvider()
+ if name == "gemini":
+ from .gemini import GeminiProvider
+ return GeminiProvider()
+ return None
+
+
+def resolve_providers() -> list[CuaProvider]:
+ """Build the ordered list of providers to try."""
+ primary = os.environ.get("CUA_PROVIDER", "").strip().lower()
+ fallbacks = [
+ s.strip().lower()
+ for s in os.environ.get("CUA_FALLBACK_PROVIDERS", "").split(",")
+ if s.strip()
+ ]
+
+ order = ([primary] if primary else []) + fallbacks
+
+ seen: set[str] = set()
+ providers: list[CuaProvider] = []
+
+ for name in order:
+ if name in seen:
+ continue
+ seen.add(name)
+
+ provider = _build_provider(name)
+ if provider is None:
+ print(f'Warning: Unknown provider "{name}", skipping.')
+ continue
+ if not provider.is_configured():
+ print(f'Warning: Provider "{name}" missing API key, skipping.')
+ continue
+ providers.append(provider)
+
+ if not providers:
+ raise RuntimeError(
+ "No CUA provider is configured. "
+ "Set CUA_PROVIDER to one of: anthropic, openai, gemini, "
+ "and provide the matching API key."
+ )
+
+ return providers
+
+
+async def run_with_fallback(
+ providers: list[CuaProvider],
+ options: TaskOptions,
+) -> TaskResult:
+ """Run a CUA task, trying each provider in order until one succeeds."""
+ errors: list[tuple[str, Exception]] = []
+
+ for provider in providers:
+ try:
+ print(f"Attempting provider: {provider.name}")
+ return await provider.run_task(options)
+ except Exception as exc:
+ print(f'Provider "{provider.name}" failed: {exc}')
+ errors.append((provider.name, exc))
+
+ summary = "\n".join(f" {name}: {exc}" for name, exc in errors)
+ raise RuntimeError(f"All providers failed:\n{summary}")
diff --git a/pkg/templates/python/cua/providers/anthropic.py b/pkg/templates/python/cua/providers/anthropic.py
new file mode 100644
index 0000000..8e9cdb6
--- /dev/null
+++ b/pkg/templates/python/cua/providers/anthropic.py
@@ -0,0 +1,220 @@
+"""Anthropic CUA provider adapter using Claude computer-use API."""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+import os
+from datetime import datetime
+
+import anthropic
+
+from . import CuaProvider, TaskOptions, TaskResult
+
+SYSTEM_PROMPT = """
+* You are utilising an Ubuntu virtual machine with internet access.
+* When you connect to the display, CHROMIUM IS ALREADY OPEN.
+* If you need to navigate to a new page, use ctrl+l to focus the url bar and then enter the url.
+* After each step, take a screenshot and carefully evaluate if you have achieved the right outcome.
+* Only when you confirm a step was executed correctly should you move on to the next one.
+* The current date is {date}.
+
+
+
+* When using Chromium, if a startup wizard appears, IGNORE IT.
+* Click on the search bar and enter the appropriate URL there.
+"""
+
+KEY_MAP = {
+ "Return": "Return", "Enter": "Return", "Backspace": "BackSpace",
+ "Tab": "Tab", "Escape": "Escape", "space": "space", "Space": "space",
+ "Up": "Up", "Down": "Down", "Left": "Left", "Right": "Right",
+ "Home": "Home", "End": "End", "Page_Up": "Prior", "Page_Down": "Next",
+ "ctrl": "Control_L", "Control_L": "Control_L",
+ "alt": "Alt_L", "Alt_L": "Alt_L",
+ "shift": "Shift_L", "Shift_L": "Shift_L",
+ "super": "Super_L", "Super_L": "Super_L",
+}
+
+
+def _map_key(key: str) -> str:
+ if "+" in key:
+ return "+".join(KEY_MAP.get(k.strip(), k.strip()) for k in key.split("+"))
+ return KEY_MAP.get(key, key)
+
+
+class AnthropicProvider:
+ name = "anthropic"
+
+ def __init__(self) -> None:
+ self._api_key = os.environ.get("ANTHROPIC_API_KEY", "")
+
+ def is_configured(self) -> bool:
+ return len(self._api_key) > 0
+
+ async def run_task(self, options: TaskOptions) -> TaskResult:
+ client = anthropic.Anthropic(api_key=self._api_key, max_retries=4)
+ model = options.model or "claude-sonnet-4-6"
+ messages: list[dict] = [{"role": "user", "content": options.query}]
+
+ date_str = datetime.now().strftime("%A, %B %d, %Y")
+ system_prompt = SYSTEM_PROMPT.format(date=date_str)
+
+ while True:
+ response = await asyncio.to_thread(
+ client.beta.messages.create,
+ max_tokens=4096,
+ messages=messages,
+ model=model,
+ system=[{"type": "text", "text": system_prompt, "cache_control": {"type": "ephemeral"}}],
+ tools=[{
+ "type": "computer_20251124",
+ "name": "computer",
+ "display_width_px": options.viewport_width,
+ "display_height_px": options.viewport_height,
+ "display_number": 1,
+ }],
+ betas=["computer-use-2025-11-24", "prompt-caching-2024-07-31"],
+ thinking={"type": "enabled", "budget_tokens": 1024},
+ )
+
+ assistant_content = []
+ for block in response.content:
+ if block.type == "thinking":
+ assistant_content.append({
+ "type": "thinking",
+ "thinking": block.thinking,
+ "signature": block.signature,
+ })
+ elif block.type == "text":
+ assistant_content.append({"type": "text", "text": block.text})
+ elif block.type == "tool_use":
+ assistant_content.append({
+ "type": "tool_use",
+ "id": block.id,
+ "name": block.name,
+ "input": block.input,
+ })
+
+ messages.append({"role": "assistant", "content": assistant_content})
+
+ if response.stop_reason == "end_turn":
+ text = " ".join(
+ b.text for b in response.content if b.type == "text"
+ )
+ return TaskResult(result=text, provider=self.name)
+
+ # Process tool calls
+ tool_results = []
+ for block in response.content:
+ if block.type != "tool_use":
+ continue
+ action = block.input.get("action", "")
+ try:
+ screenshot = await self._execute_action(
+ options, action, block.input,
+ )
+ tool_results.append({
+ "type": "tool_result",
+ "tool_use_id": block.id,
+ "content": [{
+ "type": "image",
+ "source": {
+ "type": "base64",
+ "media_type": "image/png",
+ "data": screenshot,
+ },
+ }],
+ })
+ except Exception as exc:
+ tool_results.append({
+ "type": "tool_result",
+ "tool_use_id": block.id,
+ "content": [{"type": "text", "text": f"Error: {exc}"}],
+ "is_error": True,
+ })
+
+ if tool_results:
+ messages.append({"role": "user", "content": tool_results})
+ else:
+ text = " ".join(
+ b.text for b in response.content if b.type == "text"
+ )
+ return TaskResult(result=text or "(no response)", provider=self.name)
+
+ async def _execute_action(
+ self, options: TaskOptions, action: str, params: dict
+ ) -> str:
+ computer = options.kernel.browsers.computer
+
+ if action == "screenshot":
+ pass
+ elif action == "key":
+ key = _map_key(params.get("key", ""))
+ await asyncio.to_thread(
+ computer.press_key, options.session_id, keys=[key]
+ )
+ elif action == "hold_key":
+ key = _map_key(params.get("key", ""))
+ duration = params.get("duration", 500)
+ await asyncio.to_thread(
+ computer.press_key, options.session_id,
+ keys=[key], duration=duration,
+ )
+ elif action == "type":
+ text = params.get("text", "")
+ await asyncio.to_thread(
+ computer.type_text, options.session_id, text=text,
+ )
+ elif action in ("left_click", "right_click", "middle_click"):
+ x, y = params.get("coordinate", [0, 0])
+ button = {"left_click": "left", "right_click": "right", "middle_click": "middle"}[action]
+ await asyncio.to_thread(
+ computer.click_mouse, options.session_id, x=x, y=y, button=button,
+ )
+ elif action == "double_click":
+ x, y = params.get("coordinate", [0, 0])
+ await asyncio.to_thread(
+ computer.click_mouse, options.session_id, x=x, y=y, num_clicks=2,
+ )
+ elif action == "triple_click":
+ x, y = params.get("coordinate", [0, 0])
+ await asyncio.to_thread(
+ computer.click_mouse, options.session_id, x=x, y=y, num_clicks=3,
+ )
+ elif action == "mouse_move":
+ x, y = params.get("coordinate", [0, 0])
+ await asyncio.to_thread(
+ computer.move_mouse, options.session_id, x=x, y=y,
+ )
+ elif action == "left_click_drag":
+ sx, sy = params.get("start_coordinate", [0, 0])
+ ex, ey = params.get("coordinate", [0, 0])
+ await asyncio.to_thread(
+ computer.drag_mouse, options.session_id,
+ path=[[sx, sy], [ex, ey]],
+ )
+ elif action == "scroll":
+ x, y = params.get("coordinate", [0, 0])
+ direction = params.get("direction", "down")
+ amount = params.get("amount", 3)
+ dx = -amount if direction == "left" else amount if direction == "right" else 0
+ dy = -amount if direction == "up" else amount if direction == "down" else 0
+ await asyncio.to_thread(
+ computer.scroll, options.session_id,
+ x=x, y=y, delta_x=dx, delta_y=dy,
+ )
+ elif action == "wait":
+ duration = params.get("duration", 1000)
+ await asyncio.sleep(duration / 1000)
+ elif action == "cursor_position":
+ pass
+ else:
+ raise ValueError(f"Unknown action: {action}")
+
+ # Screenshot after every action
+ await asyncio.sleep(0.5)
+ resp = await asyncio.to_thread(
+ computer.capture_screenshot, options.session_id,
+ )
+ return base64.b64encode(resp.read()).decode()
diff --git a/pkg/templates/python/cua/providers/gemini.py b/pkg/templates/python/cua/providers/gemini.py
new file mode 100644
index 0000000..ac32146
--- /dev/null
+++ b/pkg/templates/python/cua/providers/gemini.py
@@ -0,0 +1,241 @@
+"""Gemini CUA provider adapter using the Google GenAI SDK."""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+import os
+from datetime import datetime
+
+from google import genai
+from google.genai.types import (
+ Content,
+ GenerateContentConfig,
+ Part,
+ ThinkingConfig,
+ Tool,
+ ComputerUse,
+ Environment,
+)
+
+from . import CuaProvider, TaskOptions, TaskResult
+
+COORDINATE_SCALE = 1000
+DEFAULT_WIDTH = 1200
+DEFAULT_HEIGHT = 800
+
+def _system_prompt() -> str:
+ date = datetime.now().strftime("%A, %B %d, %Y")
+ return (
+ "You are a helpful assistant that can use a web browser.\n"
+ "You are operating a Chrome browser through computer use tools.\n"
+ "The browser is already open and ready for use.\n"
+ "When you need to navigate to a page, use the navigate action.\n"
+ "After each action, carefully evaluate the screenshot.\n"
+ f"Current date: {date}."
+ )
+
+
+class GeminiProvider:
+ name = "gemini"
+
+ def __init__(self) -> None:
+ self._api_key = os.environ.get("GOOGLE_API_KEY", "")
+
+ def is_configured(self) -> bool:
+ return len(self._api_key) > 0
+
+ async def run_task(self, options: TaskOptions) -> TaskResult:
+ width = options.viewport_width or DEFAULT_WIDTH
+ height = options.viewport_height or DEFAULT_HEIGHT
+ client = genai.Client(api_key=self._api_key)
+ model = options.model or "gemini-2.5-computer-use-preview-10-2025"
+
+ contents: list[Content] = [
+ Content(role="user", parts=[Part(text=options.query)]),
+ ]
+
+ for _i in range(50):
+ response = await asyncio.to_thread(
+ client.models.generate_content,
+ model=model,
+ contents=contents,
+ config=GenerateContentConfig(
+ temperature=1,
+ top_p=0.95,
+ top_k=40,
+ max_output_tokens=8192,
+ system_instruction=_system_prompt(),
+ tools=[Tool(computer_use=ComputerUse(environment=Environment.ENVIRONMENT_BROWSER))],
+ thinking_config=ThinkingConfig(include_thoughts=True),
+ ),
+ )
+
+ if not response.candidates or not response.candidates[0].content:
+ break
+
+ candidate = response.candidates[0]
+ contents.append(candidate.content)
+
+ # Extract text and function calls
+ text_parts = [
+ p.text for p in (candidate.content.parts or [])
+ if hasattr(p, "text") and p.text
+ ]
+ function_calls = [
+ p.function_call for p in (candidate.content.parts or [])
+ if hasattr(p, "function_call") and p.function_call
+ ]
+
+ if not function_calls:
+ return TaskResult(
+ result=" ".join(text_parts) or "(no response)",
+ provider=self.name,
+ )
+
+ # Execute function calls
+ responses: list[Part] = []
+ for fc in function_calls:
+ if not fc.name:
+ continue
+ args = dict(fc.args) if fc.args else {}
+
+ safety = args.get("safety_decision", {})
+ if isinstance(safety, dict) and safety.get("decision") == "require_confirmation":
+ print(f"Safety check: {safety.get('explanation', '')}")
+
+ result = await self._execute_action(
+ options, fc.name, args, width, height,
+ )
+
+ if result.get("error"):
+ responses.append(Part.from_function_response(
+ name=fc.name,
+ response={"error": result["error"], "url": "about:blank"},
+ ))
+ else:
+ responses.append(Part.from_function_response(
+ name=fc.name,
+ response={"url": result.get("url", "about:blank")},
+ ))
+ if result.get("screenshot"):
+ responses.append(Part(inline_data={
+ "mime_type": "image/png",
+ "data": result["screenshot"],
+ }))
+
+ contents.append(Content(role="user", parts=responses))
+
+ return TaskResult(result="(max iterations reached)", provider=self.name)
+
+ def _denorm(self, value: float | None, dimension: int) -> int:
+ if value is None:
+ return 0
+ return round((value / COORDINATE_SCALE) * dimension)
+
+ async def _execute_action(
+ self,
+ options: TaskOptions,
+ name: str,
+ args: dict,
+ width: int,
+ height: int,
+ ) -> dict:
+ computer = options.kernel.browsers.computer
+
+ try:
+ if name == "click_at":
+ x = self._denorm(args.get("x"), width)
+ y = self._denorm(args.get("y"), height)
+ await asyncio.to_thread(computer.click_mouse, options.session_id, x=x, y=y)
+
+ elif name == "hover_at":
+ x = self._denorm(args.get("x"), width)
+ y = self._denorm(args.get("y"), height)
+ await asyncio.to_thread(computer.move_mouse, options.session_id, x=x, y=y)
+
+ elif name == "type_text_at":
+ x = self._denorm(args.get("x"), width)
+ y = self._denorm(args.get("y"), height)
+ await asyncio.to_thread(computer.click_mouse, options.session_id, x=x, y=y)
+ text = args.get("text", "")
+ if text:
+ await asyncio.to_thread(computer.type_text, options.session_id, text=text)
+
+ elif name in ("scroll_document", "scroll_at"):
+ if name == "scroll_at":
+ x = self._denorm(args.get("x"), width)
+ y = self._denorm(args.get("y"), height)
+ else:
+ x, y = width // 2, height // 2
+ magnitude = args.get("magnitude", 3)
+ direction = args.get("direction", "down")
+ dy = -magnitude if direction == "up" else magnitude if direction == "down" else 0
+ dx = -magnitude if direction == "left" else magnitude if direction == "right" else 0
+ await asyncio.to_thread(
+ computer.scroll, options.session_id, x=x, y=y, delta_x=dx, delta_y=dy,
+ )
+
+ elif name == "wait_5_seconds":
+ await asyncio.sleep(5)
+
+ elif name == "go_back":
+ await asyncio.to_thread(
+ computer.press_key, options.session_id, keys=["Left"], hold_keys=["Alt_L"],
+ )
+
+ elif name == "go_forward":
+ await asyncio.to_thread(
+ computer.press_key, options.session_id, keys=["Right"], hold_keys=["Alt_L"],
+ )
+
+ elif name in ("navigate", "search"):
+ url = args.get("url") or args.get("text", "")
+ await asyncio.to_thread(
+ computer.batch, options.session_id, actions=[
+ {"type": "press_key", "press_key": {"keys": ["l"], "hold_keys": ["Control_L"]}},
+ {"type": "sleep", "sleep": {"duration_ms": 200}},
+ {"type": "press_key", "press_key": {"keys": ["a"], "hold_keys": ["Control_L"]}},
+ {"type": "type_text", "type_text": {"text": url}},
+ {"type": "press_key", "press_key": {"keys": ["Return"]}},
+ ],
+ )
+ await asyncio.sleep(1.5)
+
+ elif name == "key_combination":
+ combo = args.get("key_combination", "")
+ parts = [k.strip() for k in combo.split("+")]
+ hold_keys = parts[:-1] if len(parts) > 1 else []
+ keys = parts[-1:] if parts else []
+ kwargs: dict = {"keys": keys or parts}
+ if hold_keys:
+ kwargs["hold_keys"] = hold_keys
+ await asyncio.to_thread(
+ computer.press_key, options.session_id, **kwargs,
+ )
+
+ elif name == "drag_and_drop":
+ sx = self._denorm(args.get("start_x"), width)
+ sy = self._denorm(args.get("start_y"), height)
+ ex = self._denorm(args.get("end_x"), width)
+ ey = self._denorm(args.get("end_y"), height)
+ await asyncio.to_thread(
+ computer.drag_mouse, options.session_id, path=[[sx, sy], [ex, ey]],
+ )
+
+ elif name == "open_web_browser":
+ pass
+
+ else:
+ return {"error": f"Unknown action: {name}"}
+
+ # Screenshot after every action
+ await asyncio.sleep(0.5)
+ resp = await asyncio.to_thread(
+ computer.capture_screenshot, options.session_id,
+ )
+ screenshot = base64.b64encode(resp.read()).decode()
+ return {"screenshot": screenshot, "url": "about:blank"}
+
+ except Exception as exc:
+ return {"error": str(exc)}
diff --git a/pkg/templates/python/cua/providers/openai.py b/pkg/templates/python/cua/providers/openai.py
new file mode 100644
index 0000000..f83656f
--- /dev/null
+++ b/pkg/templates/python/cua/providers/openai.py
@@ -0,0 +1,242 @@
+"""OpenAI CUA provider adapter using the Responses API."""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+import json
+import os
+from datetime import datetime
+
+import httpx
+
+from . import CuaProvider, TaskOptions, TaskResult
+
+KEYSYM_MAP = {
+ "ENTER": "Return", "Enter": "Return", "RETURN": "Return",
+ "BACKSPACE": "BackSpace", "Backspace": "BackSpace",
+ "DELETE": "Delete", "TAB": "Tab", "ESCAPE": "Escape", "Escape": "Escape",
+ "SPACE": "space", "Space": "space",
+ "UP": "Up", "DOWN": "Down", "LEFT": "Left", "RIGHT": "Right",
+ "HOME": "Home", "END": "End",
+ "PAGEUP": "Prior", "PAGE_UP": "Prior", "PageUp": "Prior",
+ "PAGEDOWN": "Next", "PAGE_DOWN": "Next", "PageDown": "Next",
+ "CTRL": "Control_L", "Ctrl": "Control_L", "CONTROL": "Control_L", "Control": "Control_L",
+ "ALT": "Alt_L", "Alt": "Alt_L",
+ "SHIFT": "Shift_L", "Shift": "Shift_L",
+ "META": "Super_L", "Meta": "Super_L", "CMD": "Super_L", "COMMAND": "Super_L",
+}
+
+MODIFIER_KEYSYMS = {
+ "Control_L", "Control_R", "Alt_L", "Alt_R",
+ "Shift_L", "Shift_R", "Super_L", "Super_R",
+}
+
+
+def _translate_keys(keys: list[str]) -> list[str]:
+ return [KEYSYM_MAP.get(k, k) for k in keys]
+
+
+def _expand_and_translate(
+ keys: list[str], hold_keys: list[str],
+) -> tuple[list[str], list[str]]:
+ expanded = []
+ for raw in keys:
+ for part in (raw.split("+") if "+" in raw else [raw]):
+ trimmed = part.strip()
+ if trimmed:
+ expanded.append(trimmed)
+
+ translated = _translate_keys(expanded)
+ translated_hold = _translate_keys(hold_keys)
+
+ hold_from_keys = [k for k in translated if k in MODIFIER_KEYSYMS]
+ primary = [k for k in translated if k not in MODIFIER_KEYSYMS]
+
+ if not primary:
+ return translated, translated_hold
+
+ merged = list(dict.fromkeys(translated_hold + hold_from_keys))
+ return primary, merged
+
+
+def _translate_action(action: dict) -> list[dict]:
+ action_type = action.get("type", "")
+
+ if action_type == "click":
+ button = action.get("button", "left")
+ if button == "back":
+ return [{"type": "press_key", "press_key": {"keys": ["Left"], "hold_keys": ["Alt_L"]}}]
+ if button == "forward":
+ return [{"type": "press_key", "press_key": {"keys": ["Right"], "hold_keys": ["Alt_L"]}}]
+ if button == "wheel":
+ return [{"type": "scroll", "scroll": {
+ "x": action.get("x", 0), "y": action.get("y", 0),
+ "delta_x": action.get("scroll_x", 0), "delta_y": action.get("scroll_y", 0),
+ }}]
+ btn = "left"
+ if isinstance(button, int):
+ btn = {2: "middle", 3: "right"}.get(button, "left")
+ elif isinstance(button, str):
+ btn = button
+ return [{"type": "click_mouse", "click_mouse": {"x": action.get("x", 0), "y": action.get("y", 0), "button": btn}}]
+
+ if action_type == "double_click":
+ return [{"type": "click_mouse", "click_mouse": {"x": action.get("x", 0), "y": action.get("y", 0), "num_clicks": 2}}]
+
+ if action_type == "type":
+ return [{"type": "type_text", "type_text": {"text": action.get("text", "")}}]
+
+ if action_type == "keypress":
+ primary, hold = _expand_and_translate(action.get("keys", []), action.get("hold_keys", []))
+ result: dict = {"type": "press_key", "press_key": {"keys": primary}}
+ if hold:
+ result["press_key"]["hold_keys"] = hold
+ return [result]
+
+ if action_type == "scroll":
+ return [{"type": "scroll", "scroll": {
+ "x": action.get("x", 0), "y": action.get("y", 0),
+ "delta_x": action.get("scroll_x", 0), "delta_y": action.get("scroll_y", 0),
+ }}]
+
+ if action_type == "move":
+ return [{"type": "move_mouse", "move_mouse": {"x": action.get("x", 0), "y": action.get("y", 0)}}]
+
+ if action_type == "drag":
+ path = action.get("path", [])
+ points = []
+ for p in path:
+ if isinstance(p, dict):
+ points.append([p["x"], p["y"]])
+ elif isinstance(p, (list, tuple)) and len(p) >= 2:
+ points.append([p[0], p[1]])
+ if len(points) < 2:
+ raise ValueError("drag requires at least 2 path points")
+ return [{"type": "drag_mouse", "drag_mouse": {"path": points}}]
+
+ if action_type == "wait":
+ return [{"type": "sleep", "sleep": {"duration_ms": action.get("ms", 1000)}}]
+
+ if action_type == "goto":
+ url = action.get("url", "")
+ return [
+ {"type": "press_key", "press_key": {"keys": ["l"], "hold_keys": ["Control_L"]}},
+ {"type": "sleep", "sleep": {"duration_ms": 200}},
+ {"type": "press_key", "press_key": {"keys": ["a"], "hold_keys": ["Control_L"]}},
+ {"type": "type_text", "type_text": {"text": url}},
+ {"type": "press_key", "press_key": {"keys": ["Return"]}},
+ ]
+
+ if action_type == "back":
+ return [{"type": "press_key", "press_key": {"keys": ["Left"], "hold_keys": ["Alt_L"]}}]
+
+ if action_type == "screenshot":
+ return []
+
+ raise ValueError(f"Unknown CUA action: {action_type}")
+
+
+async def _create_response(api_key: str, **kwargs) -> dict:
+ """Call the OpenAI Responses API with retry."""
+ async with httpx.AsyncClient(timeout=120) as client:
+ for attempt in range(4):
+ try:
+ resp = await client.post(
+ "https://api.openai.com/v1/responses",
+ headers={
+ "Authorization": f"Bearer {api_key}",
+ "Content-Type": "application/json",
+ },
+ json=kwargs,
+ )
+ resp.raise_for_status()
+ return resp.json()
+ except httpx.HTTPStatusError as exc:
+ if exc.response.status_code >= 500 and attempt < 3:
+ await asyncio.sleep(2 ** attempt)
+ continue
+ raise
+ raise RuntimeError("Max retries exceeded")
+
+
+class OpenAIProvider:
+ name = "openai"
+
+ def __init__(self) -> None:
+ self._api_key = os.environ.get("OPENAI_API_KEY", "")
+
+ def is_configured(self) -> bool:
+ return len(self._api_key) > 0
+
+ async def run_task(self, options: TaskOptions) -> TaskResult:
+ computer = options.kernel.browsers.computer
+
+ # Navigate to starting page
+ goto_actions = _translate_action({"type": "goto", "url": "https://duckduckgo.com"})
+ await asyncio.to_thread(
+ computer.batch, options.session_id, actions=goto_actions,
+ )
+
+ input_items = [
+ {"role": "system", "content": f"Current date: {datetime.now().isoformat()}"},
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": options.query}]},
+ ]
+ items: list[dict] = []
+
+ for _turn in range(50):
+ response = await _create_response(
+ self._api_key,
+ model=options.model or "gpt-5.4",
+ input=input_items + items,
+ tools=[{"type": "computer"}],
+ truncation="auto",
+ reasoning={"effort": "low", "summary": "concise"},
+ )
+
+ output = response.get("output", [])
+ if not output:
+ raise RuntimeError("No output from model")
+
+ for item in output:
+ items.append(item)
+
+ if item.get("type") == "computer_call":
+ action_list = item.get("actions") or ([item["action"]] if "action" in item else [])
+
+ batch: list[dict] = []
+ for a in action_list:
+ batch.extend(_translate_action(a))
+ if batch:
+ await asyncio.to_thread(
+ computer.batch, options.session_id, actions=batch,
+ )
+
+ # Safety checks
+ for check in item.get("pending_safety_checks", []):
+ print(f"Safety check: {check.get('message', '')}")
+
+ await asyncio.sleep(0.3)
+ resp = await asyncio.to_thread(
+ computer.capture_screenshot, options.session_id,
+ )
+ screenshot = base64.b64encode(resp.read()).decode()
+
+ items.append({
+ "type": "computer_call_output",
+ "call_id": item["call_id"],
+ "acknowledged_safety_checks": item.get("pending_safety_checks", []),
+ "output": {
+ "type": "computer_screenshot",
+ "image_url": f"data:image/png;base64,{screenshot}",
+ },
+ })
+
+ # Check for final assistant message
+ last = output[-1] if output else {}
+ if last.get("role") == "assistant":
+ content = last.get("content", [])
+ texts = [c.get("text", "") for c in content if isinstance(c, dict) and "text" in c]
+ return TaskResult(result=" ".join(texts) or "(no response)", provider=self.name)
+
+ return TaskResult(result="(max turns reached)", provider=self.name)
diff --git a/pkg/templates/python/cua/pyproject.toml b/pkg/templates/python/cua/pyproject.toml
new file mode 100644
index 0000000..37e844e
--- /dev/null
+++ b/pkg/templates/python/cua/pyproject.toml
@@ -0,0 +1,14 @@
+[project]
+name = "python-cua"
+version = "0.1.0"
+description = "Unified CUA template with multi-provider fallback for Kernel"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+ "anthropic>=0.92.0",
+ "google-genai>=1.71.0",
+ "httpx>=0.28.1",
+ "kernel>=0.47.0",
+ "openai>=2.30.0",
+ "python-dotenv>=1.2.2",
+]
diff --git a/pkg/templates/python/cua/session.py b/pkg/templates/python/cua/session.py
new file mode 100644
index 0000000..3cbe254
--- /dev/null
+++ b/pkg/templates/python/cua/session.py
@@ -0,0 +1,163 @@
+"""Kernel Browser Session Manager with optional replay recording."""
+
+from __future__ import annotations
+
+import asyncio
+import time
+from dataclasses import dataclass, field
+from typing import Any
+
+from kernel import Kernel
+
+
+@dataclass
+class SessionOptions:
+ invocation_id: str | None = None
+ stealth: bool = True
+ timeout_seconds: int = 300
+ record_replay: bool = False
+ replay_grace_period: float = 5.0
+ viewport_width: int = 1280
+ viewport_height: int = 800
+ proxy_id: str | None = None
+ profile: dict | None = None
+ extensions: list[dict] | None = None
+
+
+@dataclass
+class SessionInfo:
+ session_id: str = ""
+ live_view_url: str = ""
+ replay_id: str | None = None
+ replay_view_url: str | None = None
+ viewport_width: int = 1280
+ viewport_height: int = 800
+
+
+class KernelBrowserSession:
+ """Manages Kernel browser lifecycle with optional replay recording."""
+
+ def __init__(self, kernel: Kernel, options: SessionOptions | None = None) -> None:
+ self.kernel = kernel
+ self.opts = options or SessionOptions()
+ self._session_id: str | None = None
+ self._live_view_url: str | None = None
+ self._replay_id: str | None = None
+ self._replay_view_url: str | None = None
+
+ @property
+ def session_id(self) -> str:
+ if not self._session_id:
+ raise RuntimeError("Session not started. Call start() first.")
+ return self._session_id
+
+ @property
+ def live_view_url(self) -> str | None:
+ return self._live_view_url
+
+ @property
+ def replay_view_url(self) -> str | None:
+ return self._replay_view_url
+
+ @property
+ def info(self) -> SessionInfo:
+ return SessionInfo(
+ session_id=self.session_id,
+ live_view_url=self._live_view_url or "",
+ replay_id=self._replay_id,
+ replay_view_url=self._replay_view_url,
+ viewport_width=self.opts.viewport_width,
+ viewport_height=self.opts.viewport_height,
+ )
+
+ async def start(self) -> SessionInfo:
+ create_kwargs: dict = {
+ "invocation_id": self.opts.invocation_id,
+ "stealth": self.opts.stealth,
+ "timeout_seconds": self.opts.timeout_seconds,
+ "viewport": {
+ "width": self.opts.viewport_width,
+ "height": self.opts.viewport_height,
+ },
+ }
+ if self.opts.proxy_id:
+ create_kwargs["proxy_id"] = self.opts.proxy_id
+ if self.opts.profile:
+ create_kwargs["profile"] = self.opts.profile
+ if self.opts.extensions:
+ create_kwargs["extensions"] = self.opts.extensions
+
+ browser = await asyncio.to_thread(
+ self.kernel.browsers.create,
+ **create_kwargs,
+ )
+
+ self._session_id = browser.session_id
+ self._live_view_url = getattr(browser, "browser_live_view_url", None)
+
+ print(f"Browser session: {self._session_id}")
+ print(f"Live view: {self._live_view_url}")
+
+ if self.opts.record_replay:
+ try:
+ replay = await asyncio.to_thread(
+ self.kernel.browsers.replays.start, self._session_id,
+ )
+ self._replay_id = replay.replay_id
+ print(f"Replay recording started: {self._replay_id}")
+ except Exception as exc:
+ print(f"Warning: Failed to start replay: {exc}")
+
+ return self.info
+
+ async def stop(self) -> SessionInfo:
+ info = self.info
+
+ if self._session_id:
+ try:
+ if self.opts.record_replay and self._replay_id:
+ if self.opts.replay_grace_period > 0:
+ await asyncio.sleep(self.opts.replay_grace_period)
+ await self._stop_replay()
+ info.replay_view_url = self._replay_view_url
+ finally:
+ print(f"Destroying browser session: {self._session_id}")
+ await asyncio.to_thread(
+ self.kernel.browsers.delete_by_id, self._session_id,
+ )
+
+ self._session_id = None
+ self._live_view_url = None
+ self._replay_id = None
+ self._replay_view_url = None
+
+ return info
+
+ async def _stop_replay(self) -> None:
+ if not self._session_id or not self._replay_id:
+ return
+
+ await asyncio.to_thread(
+ self.kernel.browsers.replays.stop,
+ self._replay_id,
+ id=self._session_id,
+ )
+ await asyncio.sleep(2)
+
+ deadline = time.monotonic() + 60
+ while time.monotonic() < deadline:
+ try:
+ replays = await asyncio.to_thread(
+ self.kernel.browsers.replays.list, self._session_id,
+ )
+ for r in replays:
+ if r.replay_id == self._replay_id:
+ self._replay_view_url = getattr(r, "replay_view_url", None)
+ if self._replay_view_url:
+ print(f"Replay URL: {self._replay_view_url}")
+ return
+ except Exception:
+ pass
+ await asyncio.sleep(1)
+
+ print("Warning: Replay may still be processing.")
diff --git a/pkg/templates/typescript/cua/.env.example b/pkg/templates/typescript/cua/.env.example
new file mode 100644
index 0000000..b56ea3b
--- /dev/null
+++ b/pkg/templates/typescript/cua/.env.example
@@ -0,0 +1,26 @@
+# Copy this file to .env and fill in your API keys.
+# Only the key for your chosen provider is required.
+
+# Primary provider: "anthropic", "openai", or "gemini"
+CUA_PROVIDER=anthropic
+
+# Comma-separated fallback order (optional).
+# If the primary provider fails, these are tried in order.
+# CUA_FALLBACK_PROVIDERS=openai,gemini
+
+# Provider API keys — set the one(s) you plan to use
+ANTHROPIC_API_KEY=your_anthropic_api_key_here
+OPENAI_API_KEY=your_openai_api_key_here
+GOOGLE_API_KEY=your_google_api_key_here
+
+# Browser config (proxy, profile, extensions, timeout) is set per-request
+# via the payload "browser" field, not here. Example:
+# kernel invoke ts-cua cua-task --payload '{
+# "query": "...",
+# "browser": {
+# "proxy_id": "proxy_abc123",
+# "profile": { "name": "my-profile", "save_changes": true },
+# "extensions": [{ "name": "my-extension" }],
+# "timeout_seconds": 600
+# }
+# }'
diff --git a/pkg/templates/typescript/cua/README.md b/pkg/templates/typescript/cua/README.md
new file mode 100644
index 0000000..ada6069
--- /dev/null
+++ b/pkg/templates/typescript/cua/README.md
@@ -0,0 +1,85 @@
+# Unified CUA Template
+
+A multi-provider Computer Use Agent (CUA) template for [Kernel](https://kernel.sh). Supports **Anthropic**, **OpenAI**, and **Google Gemini** as interchangeable backends with automatic fallback.
+
+## Quick start
+
+### 1. Install dependencies
+
+```bash
+npm install
+```
+
+### 2. Configure environment
+
+Copy the example env file and add your API keys:
+
+```bash
+cp .env.example .env
+```
+
+Set `CUA_PROVIDER` to your preferred provider and add the matching API key:
+
+| Provider | Env var for key | Model used |
+|-------------|----------------------|--------------------------------------------|
+| `anthropic` | `ANTHROPIC_API_KEY` | `claude-sonnet-4-6` |
+| `openai` | `OPENAI_API_KEY` | `gpt-5.4` |
+| `gemini` | `GOOGLE_API_KEY` | `gemini-2.5-computer-use-preview-10-2025` |
+
+### 3. Deploy to Kernel
+
+```bash
+kernel deploy index.ts --env-file .env
+```
+
+### 4. Invoke
+
+```bash
+kernel invoke ts-cua cua-task --payload '{"query": "Go to https://news.ycombinator.com and get the top 5 stories"}'
+```
+
+## Multi-provider fallback
+
+Set `CUA_FALLBACK_PROVIDERS` to automatically try another provider if the primary fails:
+
+```env
+CUA_PROVIDER=anthropic
+CUA_FALLBACK_PROVIDERS=openai,gemini
+```
+
+This will try Anthropic first, then OpenAI, then Gemini. Only providers with valid API keys are used.
+
+## Replay recording
+
+Pass `record_replay: true` in the payload to capture a video replay of the browser session:
+
+```bash
+kernel invoke ts-cua cua-task --payload '{"query": "Navigate to example.com", "record_replay": true}'
+```
+
+The response will include a `replay_url` you can open in your browser.
+
+## Project structure
+
+```
+index.ts — Kernel app entrypoint
+session.ts — Browser session lifecycle with replay support
+providers/
+ index.ts — Provider factory and fallback logic
+ anthropic.ts — Anthropic Claude adapter
+ openai.ts — OpenAI GPT adapter
+ gemini.ts — Google Gemini adapter
+```
+
+## Customization
+
+Each provider adapter is self-contained. To customize a provider's behavior (system prompt, model, tool handling), edit the corresponding file in `providers/`.
+
+To add a new provider, create a new file that implements the `CuaProvider` interface and register it in `providers/index.ts`.
+
+## Resources
+
+- [Kernel Docs](https://docs.kernel.sh)
+- [Anthropic Computer Use](https://docs.anthropic.com/en/docs/agents-and-tools/computer-use)
+- [OpenAI Computer Use](https://platform.openai.com/docs/guides/computer-use)
+- [Google Gemini Computer Use](https://ai.google.dev/gemini-api/docs/computer-use)
diff --git a/pkg/templates/typescript/cua/_gitignore b/pkg/templates/typescript/cua/_gitignore
new file mode 100644
index 0000000..0b43630
--- /dev/null
+++ b/pkg/templates/typescript/cua/_gitignore
@@ -0,0 +1,32 @@
+# Dependencies
+node_modules/
+package-lock.json
+
+# TypeScript
+*.tsbuildinfo
+dist/
+build/
+
+# Environment
+.env
+.env.local
+.env.*.local
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Logs
+logs/
+*.log
+npm-debug.log*
+
+# Misc
+.cache/
+.temp/
diff --git a/pkg/templates/typescript/cua/index.ts b/pkg/templates/typescript/cua/index.ts
new file mode 100644
index 0000000..dc165f0
--- /dev/null
+++ b/pkg/templates/typescript/cua/index.ts
@@ -0,0 +1,122 @@
+/**
+ * Unified CUA (Computer Use Agent) template with multi-provider support.
+ *
+ * Supports Anthropic, OpenAI, and Gemini as interchangeable providers.
+ * Configure via environment variables:
+ * CUA_PROVIDER — primary provider ("anthropic", "openai", or "gemini")
+ * CUA_FALLBACK_PROVIDERS — comma-separated fallback order (optional)
+ *
+ * Each provider requires its own API key:
+ * ANTHROPIC_API_KEY, OPENAI_API_KEY, GOOGLE_API_KEY
+ */
+
+import { Kernel, type KernelContext } from '@onkernel/sdk';
+import { KernelBrowserSession } from './session';
+import { resolveProviders, runWithFallback, type ProviderName } from './providers/index';
+
+const kernel = new Kernel();
+const app = kernel.app('ts-cua');
+
+interface BrowserConfig {
+ proxy_id?: string;
+ profile?: { id?: string; name?: string; save_changes?: boolean };
+ extensions?: Array<{ id?: string; name?: string }>;
+ timeout_seconds?: number;
+}
+
+interface CuaInput {
+ query: string;
+ provider?: ProviderName;
+ model?: string;
+ record_replay?: boolean;
+ session_id?: string;
+ browser?: BrowserConfig;
+}
+
+interface CuaOutput {
+ result: string;
+ provider: string;
+ replay_url?: string;
+}
+
+// Provider resolution is deferred to the action handler because env vars
+// are not available during Hypeman's build/discovery phase.
+let _providers: ReturnType | null = null;
+function getProviders() {
+ if (!_providers) {
+ _providers = resolveProviders();
+ console.log(`Configured providers: ${_providers.map(p => p.name).join(' -> ')}`);
+ }
+ return _providers;
+}
+
+app.action(
+ 'cua-task',
+ async (ctx: KernelContext, payload?: CuaInput): Promise => {
+ if (!payload?.query) {
+ throw new Error('Query is required. Payload must include: { "query": "your task description" }');
+ }
+
+ let providers = getProviders();
+
+ // Per-request provider override: move requested provider to front
+ if (payload.provider) {
+ const requested = providers.find(p => p.name === payload.provider);
+ if (requested) {
+ providers = [requested, ...providers.filter(p => p !== requested)];
+ }
+ }
+
+ // Use an existing browser session (BYOB) or create a new one.
+ // BYOB is useful for multi-turn CUA on a persistent browser, or HITL
+ // where a human uses the live view between CUA calls.
+ if (payload.session_id) {
+ const browser = await kernel.browsers.retrieve(payload.session_id);
+ const { result, provider } = await runWithFallback(providers, {
+ query: payload.query,
+ model: payload.model,
+ kernel,
+ sessionId: payload.session_id,
+ viewportWidth: browser.viewport?.width ?? 1280,
+ viewportHeight: browser.viewport?.height ?? 800,
+ });
+ return { result, provider };
+ }
+
+ const session = new KernelBrowserSession(kernel, {
+ invocationId: ctx.invocation_id,
+ stealth: true,
+ recordReplay: payload.record_replay ?? false,
+ ...(payload.browser?.proxy_id ? { proxyId: payload.browser.proxy_id } : {}),
+ ...(payload.browser?.profile ? { profile: payload.browser.profile } : {}),
+ ...(payload.browser?.extensions ? { extensions: payload.browser.extensions } : {}),
+ ...(payload.browser?.timeout_seconds ? { timeoutSeconds: payload.browser.timeout_seconds } : {}),
+ });
+
+ await session.start();
+ console.log('Live view:', session.liveViewUrl);
+
+ try {
+ const { result, provider } = await runWithFallback(providers, {
+ query: payload.query,
+ model: payload.model,
+ kernel,
+ sessionId: session.sessionId,
+ viewportWidth: session.viewportWidth,
+ viewportHeight: session.viewportHeight,
+ });
+
+ const sessionInfo = await session.stop();
+
+ return {
+ result,
+ provider,
+ replay_url: sessionInfo.replayViewUrl,
+ };
+ } catch (error) {
+ console.error('CUA task failed:', error);
+ await session.stop();
+ throw error;
+ }
+ },
+);
diff --git a/pkg/templates/typescript/cua/package.json b/pkg/templates/typescript/cua/package.json
new file mode 100644
index 0000000..08c932d
--- /dev/null
+++ b/pkg/templates/typescript/cua/package.json
@@ -0,0 +1,16 @@
+{
+ "name": "ts-cua",
+ "module": "index.ts",
+ "type": "module",
+ "private": true,
+ "dependencies": {
+ "@anthropic-ai/sdk": "^0.86.1",
+ "@google/genai": "^1.49.0",
+ "@onkernel/sdk": "^0.47.0",
+ "openai": "^6.33.0"
+ },
+ "devDependencies": {
+ "@types/node": "^22.15.17",
+ "typescript": "^5.9.3"
+ }
+}
diff --git a/pkg/templates/typescript/cua/providers/anthropic.ts b/pkg/templates/typescript/cua/providers/anthropic.ts
new file mode 100644
index 0000000..6c8e4e8
--- /dev/null
+++ b/pkg/templates/typescript/cua/providers/anthropic.ts
@@ -0,0 +1,229 @@
+/**
+ * Anthropic CUA provider adapter.
+ *
+ * Uses the Anthropic SDK's beta computer-use API with Claude models.
+ */
+
+import { Anthropic } from '@anthropic-ai/sdk';
+import type { CuaProvider, TaskOptions, TaskResult } from './index';
+
+function getSystemPrompt(): string {
+ const date = new Date().toLocaleDateString('en-US', { weekday: 'long', month: 'long', day: 'numeric', year: 'numeric' });
+ return `
+* You are utilising an Ubuntu virtual machine with internet access.
+* When you connect to the display, CHROMIUM IS ALREADY OPEN.
+* If you need to navigate to a new page, use ctrl+l to focus the url bar and then enter the url.
+* After each step, take a screenshot and carefully evaluate if you have achieved the right outcome.
+* Explicitly show your thinking: "I have evaluated step X..." If not correct, try again.
+* Only when you confirm a step was executed correctly should you move on to the next one.
+* The current date is ${date}.
+
+
+
+* When using Chromium, if a startup wizard appears, IGNORE IT.
+* Click on the search bar and enter the appropriate URL there.
+`;
+}
+
+type BetaMessageParam = Anthropic.Beta.Messages.BetaMessageParam;
+type BetaContentBlockParam = Anthropic.Beta.Messages.BetaContentBlockParam;
+
+export class AnthropicProvider implements CuaProvider {
+ readonly name = 'anthropic';
+ private apiKey: string;
+
+ constructor() {
+ this.apiKey = process.env.ANTHROPIC_API_KEY ?? '';
+ }
+
+ isConfigured(): boolean {
+ return this.apiKey.length > 0;
+ }
+
+ async runTask(options: TaskOptions): Promise {
+ const { query, kernel, sessionId, viewportWidth = 1280, viewportHeight = 800 } = options;
+ const client = new Anthropic({ apiKey: this.apiKey, maxRetries: 4 });
+ const model = options.model || 'claude-sonnet-4-6';
+
+ const messages: BetaMessageParam[] = [{ role: 'user', content: query }];
+
+ while (true) {
+ const response = await client.beta.messages.create({
+ max_tokens: 4096,
+ messages,
+ model,
+ system: [{ type: 'text', text: getSystemPrompt(), cache_control: { type: 'ephemeral' } }],
+ tools: [{
+ type: 'computer_20251124',
+ name: 'computer',
+ display_width_px: viewportWidth,
+ display_height_px: viewportHeight,
+ display_number: 1,
+ }],
+ betas: ['computer-use-2025-11-24', 'prompt-caching-2024-07-31'],
+ thinking: { type: 'enabled', budget_tokens: 1024 },
+ });
+
+ // Build assistant content for the messages array
+ const assistantContent: BetaContentBlockParam[] = response.content.map(block => {
+ if (block.type === 'thinking') {
+ return { type: 'thinking' as const, thinking: block.thinking, signature: block.signature };
+ }
+ if (block.type === 'text') {
+ return { type: 'text' as const, text: block.text };
+ }
+ if (block.type === 'tool_use') {
+ return { type: 'tool_use' as const, id: block.id, name: block.name, input: block.input };
+ }
+ return block as unknown as BetaContentBlockParam;
+ });
+ messages.push({ role: 'assistant', content: assistantContent });
+
+ if (response.stop_reason === 'end_turn') {
+ const text = response.content
+ .filter((b): b is Anthropic.Beta.Messages.BetaTextBlock => b.type === 'text')
+ .map(b => b.text)
+ .join('');
+ return { result: text, provider: this.name };
+ }
+
+ // Process tool calls
+ const toolResults: BetaContentBlockParam[] = [];
+ for (const block of response.content) {
+ if (block.type !== 'tool_use') continue;
+
+ const input = block.input as Record;
+ const action = input.action as string;
+
+ try {
+ const screenshot = await this.executeAction(kernel, sessionId, action, input);
+ toolResults.push({
+ type: 'tool_result' as unknown as 'text',
+ tool_use_id: block.id,
+ content: [{ type: 'image', source: { type: 'base64', media_type: 'image/png', data: screenshot } }],
+ } as unknown as BetaContentBlockParam);
+ } catch (error) {
+ toolResults.push({
+ type: 'tool_result' as unknown as 'text',
+ tool_use_id: block.id,
+ content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
+ is_error: true,
+ } as unknown as BetaContentBlockParam);
+ }
+ }
+
+ if (toolResults.length > 0) {
+ messages.push({ role: 'user', content: toolResults });
+ } else {
+ // No tool use and not end_turn — model is done
+ const text = response.content
+ .filter((b): b is Anthropic.Beta.Messages.BetaTextBlock => b.type === 'text')
+ .map(b => b.text)
+ .join('');
+ return { result: text || '(no response)', provider: this.name };
+ }
+ }
+ }
+
+ private async executeAction(
+ kernel: TaskOptions['kernel'],
+ sessionId: string,
+ action: string,
+ input: Record,
+ ): Promise {
+ const computer = kernel.browsers.computer;
+
+ switch (action) {
+ case 'screenshot': break;
+ case 'key': {
+ const key = input.key as string;
+ await computer.pressKey(sessionId, { keys: [this.mapKey(key)] });
+ break;
+ }
+ case 'hold_key': {
+ const key = input.key as string;
+ const duration = (input.duration as number) ?? 500;
+ await computer.pressKey(sessionId, { keys: [this.mapKey(key)], duration });
+ break;
+ }
+ case 'type': {
+ const text = input.text as string;
+ await computer.typeText(sessionId, { text });
+ break;
+ }
+ case 'cursor_position': break;
+ case 'mouse_move': {
+ const [x, y] = input.coordinate as [number, number];
+ await computer.moveMouse(sessionId, { x, y });
+ break;
+ }
+ case 'left_click':
+ case 'right_click':
+ case 'middle_click': {
+ const [x, y] = input.coordinate as [number, number];
+ const button = action === 'right_click' ? 'right' : action === 'middle_click' ? 'middle' : 'left';
+ await computer.clickMouse(sessionId, { x, y, button });
+ break;
+ }
+ case 'double_click': {
+ const [x, y] = input.coordinate as [number, number];
+ await computer.clickMouse(sessionId, { x, y, num_clicks: 2 });
+ break;
+ }
+ case 'triple_click': {
+ const [x, y] = input.coordinate as [number, number];
+ await computer.clickMouse(sessionId, { x, y, num_clicks: 3 });
+ break;
+ }
+ case 'left_click_drag': {
+ const startCoordinate = input.start_coordinate as [number, number];
+ const [ex, ey] = input.coordinate as [number, number];
+ await computer.dragMouse(sessionId, {
+ path: [
+ [startCoordinate[0], startCoordinate[1]],
+ [ex, ey],
+ ],
+ });
+ break;
+ }
+ case 'scroll': {
+ const [x, y] = input.coordinate as [number, number];
+ const direction = input.direction as string;
+ const amount = (input.amount as number) ?? 3;
+ const deltaX = direction === 'left' ? -amount : direction === 'right' ? amount : 0;
+ const deltaY = direction === 'up' ? -amount : direction === 'down' ? amount : 0;
+ await computer.scroll(sessionId, { x, y, delta_x: deltaX, delta_y: deltaY });
+ break;
+ }
+ case 'wait': {
+ const duration = (input.duration as number) ?? 1000;
+ await new Promise(r => setTimeout(r, duration));
+ break;
+ }
+ default:
+ throw new Error(`Unknown action: ${action}`);
+ }
+
+ // Take screenshot after every action
+ await new Promise(r => setTimeout(r, 500));
+ const resp = await computer.captureScreenshot(sessionId);
+ const buf = Buffer.from(await resp.arrayBuffer());
+ return buf.toString('base64');
+ }
+
+ private mapKey(key: string): string {
+ const map: Record = {
+ Return: 'Return', Enter: 'Return', Backspace: 'BackSpace',
+ Tab: 'Tab', Escape: 'Escape', space: 'space', Space: 'space',
+ Up: 'Up', Down: 'Down', Left: 'Left', Right: 'Right',
+ Home: 'Home', End: 'End', Page_Up: 'Prior', Page_Down: 'Next',
+ ctrl: 'Control_L', Control_L: 'Control_L', alt: 'Alt_L', Alt_L: 'Alt_L',
+ shift: 'Shift_L', Shift_L: 'Shift_L', super: 'Super_L', Super_L: 'Super_L',
+ };
+ // Handle combos like "ctrl+l"
+ if (key.includes('+')) {
+ return key.split('+').map(k => map[k.trim()] ?? k.trim()).join('+');
+ }
+ return map[key] ?? key;
+ }
+}
diff --git a/pkg/templates/typescript/cua/providers/gemini.ts b/pkg/templates/typescript/cua/providers/gemini.ts
new file mode 100644
index 0000000..1cc1ba3
--- /dev/null
+++ b/pkg/templates/typescript/cua/providers/gemini.ts
@@ -0,0 +1,247 @@
+/**
+ * Gemini CUA provider adapter.
+ *
+ * Uses Google's GenAI SDK with the computer-use-preview model.
+ */
+
+import {
+ GoogleGenAI,
+ Environment,
+ type Content,
+ type FunctionCall,
+ type Part,
+} from '@google/genai';
+import type { CuaProvider, TaskOptions, TaskResult } from './index';
+
+// Gemini uses a 0-1000 coordinate scale that maps to actual screen pixels.
+const COORDINATE_SCALE = 1000;
+const DEFAULT_WIDTH = 1200;
+const DEFAULT_HEIGHT = 800;
+
+const PREDEFINED_ACTIONS = [
+ 'click_at', 'hover_at', 'type_text_at', 'scroll_document',
+ 'scroll_at', 'wait_5_seconds', 'go_back', 'go_forward',
+ 'search', 'navigate', 'key_combination', 'drag_and_drop',
+ 'open_web_browser',
+];
+
+function getSystemPrompt(): string {
+ const date = new Date().toLocaleDateString('en-US', {
+ weekday: 'long', year: 'numeric', month: 'long', day: 'numeric',
+ });
+ return `You are a helpful assistant that can use a web browser.
+You are operating a Chrome browser through computer use tools.
+The browser is already open and ready for use.
+When you need to navigate to a page, use the navigate action with a full URL.
+After each action, carefully evaluate the screenshot to determine your next step.
+Current date: ${date}.`;
+}
+
+interface GeminiArgs {
+ x?: number;
+ y?: number;
+ text?: string;
+ url?: string;
+ key_combination?: string;
+ direction?: string;
+ magnitude?: number;
+ start_x?: number;
+ start_y?: number;
+ end_x?: number;
+ end_y?: number;
+ safety_decision?: { decision: string; explanation?: string };
+ [key: string]: unknown;
+}
+
+export class GeminiProvider implements CuaProvider {
+ readonly name = 'gemini';
+ private apiKey: string;
+
+ constructor() {
+ this.apiKey = process.env.GOOGLE_API_KEY ?? '';
+ }
+
+ isConfigured(): boolean {
+ return this.apiKey.length > 0;
+ }
+
+ async runTask(options: TaskOptions): Promise {
+ const { query, kernel, sessionId } = options;
+ const width = options.viewportWidth ?? DEFAULT_WIDTH;
+ const height = options.viewportHeight ?? DEFAULT_HEIGHT;
+ const ai = new GoogleGenAI({ apiKey: this.apiKey });
+ const model = options.model || 'gemini-2.5-computer-use-preview-10-2025';
+
+ const contents: Content[] = [{ role: 'user', parts: [{ text: query }] }];
+ const maxIterations = 50;
+
+ for (let i = 0; i < maxIterations; i++) {
+ const response = await ai.models.generateContent({
+ model,
+ contents,
+ config: {
+ temperature: 1,
+ topP: 0.95,
+ topK: 40,
+ maxOutputTokens: 8192,
+ systemInstruction: getSystemPrompt(),
+ tools: [{ computerUse: { environment: Environment.ENVIRONMENT_BROWSER } }],
+ thinkingConfig: { includeThoughts: true },
+ },
+ });
+
+ const candidateContent = response.candidates?.[0]?.content;
+ if (!candidateContent) break;
+ contents.push(candidateContent);
+
+ // Extract text and function calls
+ const textParts = (candidateContent.parts ?? [])
+ .filter(p => 'text' in p && p.text)
+ .map(p => (p as { text: string }).text);
+ const functionCalls = (candidateContent.parts ?? [])
+ .filter(p => 'functionCall' in p)
+ .map(p => (p as { functionCall: FunctionCall }).functionCall);
+
+ if (functionCalls.length === 0) {
+ return { result: textParts.join(' ') || '(no response)', provider: this.name };
+ }
+
+ // Execute function calls
+ const responses: Part[] = [];
+ for (const fc of functionCalls) {
+ if (!fc.name) continue;
+ const args = (fc.args ?? {}) as GeminiArgs;
+
+ if (args.safety_decision?.decision === 'require_confirmation') {
+ console.log('Safety check:', args.safety_decision.explanation);
+ }
+
+ const result = await this.executeAction(kernel, sessionId, fc.name, args, width, height);
+
+ const responseData: Record = { url: result.url || 'about:blank' };
+ const part: Part = {
+ functionResponse: {
+ name: fc.name,
+ response: result.error ? { error: result.error, url: 'about:blank' } : responseData,
+ ...(result.screenshot && PREDEFINED_ACTIONS.includes(fc.name) ? {
+ parts: [{ inlineData: { mimeType: 'image/png', data: result.screenshot } }],
+ } : {}),
+ },
+ };
+ responses.push(part);
+ }
+
+ contents.push({ role: 'user', parts: responses });
+ }
+
+ return { result: '(max iterations reached)', provider: this.name };
+ }
+
+ private denormalize(value: number | undefined, dimension: number): number {
+ if (value === undefined) return 0;
+ return Math.round((value / COORDINATE_SCALE) * dimension);
+ }
+
+ private async executeAction(
+ kernel: TaskOptions['kernel'],
+ sessionId: string,
+ name: string,
+ args: GeminiArgs,
+ width: number,
+ height: number,
+ ): Promise<{ screenshot?: string; url?: string; error?: string }> {
+ const computer = kernel.browsers.computer;
+
+ try {
+ switch (name) {
+ case 'click_at': {
+ const x = this.denormalize(args.x, width);
+ const y = this.denormalize(args.y, height);
+ await computer.clickMouse(sessionId, { x, y });
+ break;
+ }
+ case 'hover_at': {
+ const x = this.denormalize(args.x, width);
+ const y = this.denormalize(args.y, height);
+ await computer.moveMouse(sessionId, { x, y });
+ break;
+ }
+ case 'type_text_at': {
+ const x = this.denormalize(args.x, width);
+ const y = this.denormalize(args.y, height);
+ await computer.clickMouse(sessionId, { x, y });
+ if (args.text) {
+ await computer.typeText(sessionId, { text: args.text });
+ }
+ break;
+ }
+ case 'scroll_document':
+ case 'scroll_at': {
+ const x = name === 'scroll_at' ? this.denormalize(args.x, width) : width / 2;
+ const y = name === 'scroll_at' ? this.denormalize(args.y, height) : height / 2;
+ const magnitude = args.magnitude ?? 3;
+ const dir = args.direction ?? 'down';
+ const deltaY = dir === 'up' ? -magnitude : dir === 'down' ? magnitude : 0;
+ const deltaX = dir === 'left' ? -magnitude : dir === 'right' ? magnitude : 0;
+ await computer.scroll(sessionId, { x, y, delta_x: deltaX, delta_y: deltaY });
+ break;
+ }
+ case 'wait_5_seconds':
+ await new Promise(r => setTimeout(r, 5000));
+ break;
+ case 'go_back':
+ await computer.pressKey(sessionId, { keys: ['Left'], hold_keys: ['Alt_L'] });
+ break;
+ case 'go_forward':
+ await computer.pressKey(sessionId, { keys: ['Right'], hold_keys: ['Alt_L'] });
+ break;
+ case 'navigate':
+ case 'search': {
+ const url = args.url ?? args.text ?? '';
+ await computer.batch(sessionId, {
+ actions: [
+ { type: 'press_key', press_key: { keys: ['l'], hold_keys: ['Control_L'] } },
+ { type: 'sleep', sleep: { duration_ms: 200 } },
+ { type: 'press_key', press_key: { keys: ['a'], hold_keys: ['Control_L'] } },
+ { type: 'type_text', type_text: { text: url } },
+ { type: 'press_key', press_key: { keys: ['Return'] } },
+ ] as Parameters[1]['actions'],
+ });
+ await new Promise(r => setTimeout(r, 1500));
+ break;
+ }
+ case 'key_combination': {
+ const combo = args.key_combination ?? '';
+ const parts = combo.split('+').map(k => k.trim());
+ const holdKeys = parts.slice(0, -1);
+ const keys = parts.slice(-1);
+ await computer.pressKey(sessionId, {
+ keys: keys.length > 0 ? keys : parts,
+ ...(holdKeys.length > 0 ? { hold_keys: holdKeys } : {}),
+ });
+ break;
+ }
+ case 'drag_and_drop': {
+ const sx = this.denormalize(args.start_x, width);
+ const sy = this.denormalize(args.start_y, height);
+ const ex = this.denormalize(args.end_x, width);
+ const ey = this.denormalize(args.end_y, height);
+ await computer.dragMouse(sessionId, { path: [[sx, sy], [ex, ey]] });
+ break;
+ }
+ case 'open_web_browser':
+ break;
+ default:
+ return { error: `Unknown action: ${name}` };
+ }
+
+ // Take screenshot after action
+ await new Promise(r => setTimeout(r, 500));
+ const resp = await computer.captureScreenshot(sessionId);
+ const buf = Buffer.from(await resp.arrayBuffer());
+ return { screenshot: buf.toString('base64'), url: 'about:blank' };
+ } catch (error) {
+ return { error: error instanceof Error ? error.message : String(error) };
+ }
+ }
+}
diff --git a/pkg/templates/typescript/cua/providers/index.ts b/pkg/templates/typescript/cua/providers/index.ts
new file mode 100644
index 0000000..a1766d9
--- /dev/null
+++ b/pkg/templates/typescript/cua/providers/index.ts
@@ -0,0 +1,114 @@
+/**
+ * Provider factory with automatic fallback.
+ *
+ * Resolution order:
+ * 1. CUA_PROVIDER env var (required)
+ * 2. CUA_FALLBACK_PROVIDERS env var (optional, comma-separated)
+ *
+ * A provider is "available" when its API key env var is set.
+ */
+
+import type { Kernel } from '@onkernel/sdk';
+import { AnthropicProvider } from './anthropic';
+import { OpenAIProvider } from './openai';
+import { GeminiProvider } from './gemini';
+
+// Shared interface every provider adapter must implement.
+export interface TaskOptions {
+ query: string;
+ model?: string;
+ kernel: Kernel;
+ sessionId: string;
+ viewportWidth?: number;
+ viewportHeight?: number;
+}
+
+export interface TaskResult {
+ result: string;
+ provider: string;
+}
+
+export interface CuaProvider {
+ readonly name: string;
+ isConfigured(): boolean;
+ runTask(options: TaskOptions): Promise;
+}
+
+export type ProviderName = 'anthropic' | 'openai' | 'gemini';
+
+const PROVIDERS: Record CuaProvider> = {
+ anthropic: () => new AnthropicProvider(),
+ openai: () => new OpenAIProvider(),
+ gemini: () => new GeminiProvider(),
+};
+
+/**
+ * Build the ordered list of providers to try.
+ * Throws if no configured provider is found.
+ */
+export function resolveProviders(): CuaProvider[] {
+ const primaryName = (process.env.CUA_PROVIDER ?? '').trim().toLowerCase();
+ const fallbackNames = (process.env.CUA_FALLBACK_PROVIDERS ?? '')
+ .split(',')
+ .map(s => s.trim().toLowerCase())
+ .filter(Boolean);
+
+ const order = primaryName ? [primaryName, ...fallbackNames] : fallbackNames;
+
+ // Deduplicate while preserving order
+ const seen = new Set();
+ const providers: CuaProvider[] = [];
+
+ for (const name of order) {
+ if (seen.has(name)) continue;
+ seen.add(name);
+
+ const factory = PROVIDERS[name];
+ if (!factory) {
+ console.warn(`Unknown provider "${name}", skipping.`);
+ continue;
+ }
+
+ const provider = factory();
+ if (provider.isConfigured()) {
+ providers.push(provider);
+ } else {
+ console.warn(`Provider "${name}" is not configured (missing API key), skipping.`);
+ }
+ }
+
+ if (providers.length === 0) {
+ const available = Object.keys(PROVIDERS).join(', ');
+ throw new Error(
+ 'No CUA provider is configured. ' +
+ `Set CUA_PROVIDER to one of: ${available}, and provide the matching API key.`,
+ );
+ }
+
+ return providers;
+}
+
+/**
+ * Run a CUA task, trying each provider in order until one succeeds.
+ */
+export async function runWithFallback(
+ providers: CuaProvider[],
+ options: TaskOptions,
+): Promise {
+ const errors: Array<{ provider: string; error: unknown }> = [];
+
+ for (const provider of providers) {
+ try {
+ console.log(`Attempting provider: ${provider.name}`);
+ return await provider.runTask(options);
+ } catch (error) {
+ console.error(`Provider "${provider.name}" failed:`, error);
+ errors.push({ provider: provider.name, error });
+ }
+ }
+
+ const summary = errors
+ .map(e => ` ${e.provider}: ${e.error instanceof Error ? e.error.message : String(e.error)}`)
+ .join('\n');
+ throw new Error(`All providers failed:\n${summary}`);
+}
diff --git a/pkg/templates/typescript/cua/providers/openai.ts b/pkg/templates/typescript/cua/providers/openai.ts
new file mode 100644
index 0000000..a83274e
--- /dev/null
+++ b/pkg/templates/typescript/cua/providers/openai.ts
@@ -0,0 +1,255 @@
+/**
+ * OpenAI CUA provider adapter.
+ *
+ * Uses the OpenAI Responses API with computer use tool.
+ */
+
+import OpenAI from 'openai';
+import type {
+ ResponseInputItem,
+ ResponseItem,
+ ResponseComputerToolCall,
+ ResponseOutputMessage,
+} from 'openai/resources/responses/responses';
+import type { CuaProvider, TaskOptions, TaskResult } from './index';
+
+const KEYSYM_MAP: Record = {
+ ENTER: 'Return', Enter: 'Return', RETURN: 'Return',
+ BACKSPACE: 'BackSpace', Backspace: 'BackSpace',
+ DELETE: 'Delete', TAB: 'Tab', ESCAPE: 'Escape', Escape: 'Escape',
+ SPACE: 'space', Space: 'space',
+ UP: 'Up', DOWN: 'Down', LEFT: 'Left', RIGHT: 'Right',
+ HOME: 'Home', END: 'End',
+ PAGEUP: 'Prior', PAGE_UP: 'Prior', PageUp: 'Prior',
+ PAGEDOWN: 'Next', PAGE_DOWN: 'Next', PageDown: 'Next',
+ CTRL: 'Control_L', Ctrl: 'Control_L', CONTROL: 'Control_L', Control: 'Control_L',
+ ALT: 'Alt_L', Alt: 'Alt_L',
+ SHIFT: 'Shift_L', Shift: 'Shift_L',
+ META: 'Super_L', Meta: 'Super_L', CMD: 'Super_L', COMMAND: 'Super_L',
+ F1: 'F1', F2: 'F2', F3: 'F3', F4: 'F4', F5: 'F5', F6: 'F6',
+ F7: 'F7', F8: 'F8', F9: 'F9', F10: 'F10', F11: 'F11', F12: 'F12',
+};
+
+const MODIFIER_KEYSYMS = new Set([
+ 'Control_L', 'Control_R', 'Alt_L', 'Alt_R',
+ 'Shift_L', 'Shift_R', 'Super_L', 'Super_R',
+]);
+
+function translateKeys(keys: string[]): string[] {
+ return keys.map(k => KEYSYM_MAP[k] ?? k);
+}
+
+function expandAndTranslateKeys(keys: string[], holdKeys: string[]): { keys: string[]; holdKeys: string[] } {
+ const expanded: string[] = [];
+ for (const raw of keys) {
+ const parts = raw.includes('+') ? raw.split('+') : [raw];
+ for (const part of parts) {
+ const trimmed = part.trim();
+ if (trimmed) expanded.push(trimmed);
+ }
+ }
+
+ const translated = translateKeys(expanded);
+ const translatedHold = translateKeys(holdKeys);
+
+ const holdFromKeys: string[] = [];
+ const primaryKeys: string[] = [];
+ for (const key of translated) {
+ if (MODIFIER_KEYSYMS.has(key)) holdFromKeys.push(key);
+ else primaryKeys.push(key);
+ }
+
+ if (primaryKeys.length === 0) return { keys: translated, holdKeys: translatedHold };
+
+ const merged = [...new Set([...translatedHold, ...holdFromKeys])];
+ return { keys: primaryKeys, holdKeys: merged };
+}
+
+interface CuaAction {
+ type: string;
+ x?: number;
+ y?: number;
+ text?: string;
+ url?: string;
+ keys?: string[];
+ hold_keys?: string[];
+ button?: string | number;
+ scroll_x?: number;
+ scroll_y?: number;
+ ms?: number;
+ path?: Array<{ x: number; y: number }>;
+ [key: string]: unknown;
+}
+
+type BatchAction = {
+ type: string;
+ click_mouse?: { x: number; y: number; button?: string; num_clicks?: number };
+ move_mouse?: { x: number; y: number };
+ type_text?: { text: string };
+ press_key?: { keys: string[]; hold_keys?: string[] };
+ scroll?: { x: number; y: number; delta_x?: number; delta_y?: number };
+ drag_mouse?: { path: number[][] };
+ sleep?: { duration_ms: number };
+};
+
+function normalizeButton(button?: string | number): string {
+ if (button === undefined || button === null) return 'left';
+ if (typeof button === 'number') return button === 2 ? 'middle' : button === 3 ? 'right' : 'left';
+ return button;
+}
+
+function translateCuaAction(action: CuaAction): BatchAction[] {
+ switch (action.type) {
+ case 'click': {
+ if (action.button === 'back') return [{ type: 'press_key', press_key: { hold_keys: ['Alt_L'], keys: ['Left'] } }];
+ if (action.button === 'forward') return [{ type: 'press_key', press_key: { hold_keys: ['Alt_L'], keys: ['Right'] } }];
+ if (action.button === 'wheel') {
+ return [{ type: 'scroll', scroll: { x: action.x ?? 0, y: action.y ?? 0, delta_x: action.scroll_x ?? 0, delta_y: action.scroll_y ?? 0 } }];
+ }
+ return [{ type: 'click_mouse', click_mouse: { x: action.x ?? 0, y: action.y ?? 0, button: normalizeButton(action.button) } }];
+ }
+ case 'double_click':
+ return [{ type: 'click_mouse', click_mouse: { x: action.x ?? 0, y: action.y ?? 0, num_clicks: 2 } }];
+ case 'type':
+ return [{ type: 'type_text', type_text: { text: action.text ?? '' } }];
+ case 'keypress': {
+ const n = expandAndTranslateKeys(action.keys ?? [], action.hold_keys ?? []);
+ return [{ type: 'press_key', press_key: { keys: n.keys, ...(n.holdKeys.length ? { hold_keys: n.holdKeys } : {}) } }];
+ }
+ case 'scroll':
+ return [{ type: 'scroll', scroll: { x: action.x ?? 0, y: action.y ?? 0, delta_x: action.scroll_x ?? 0, delta_y: action.scroll_y ?? 0 } }];
+ case 'move':
+ return [{ type: 'move_mouse', move_mouse: { x: action.x ?? 0, y: action.y ?? 0 } }];
+ case 'drag': {
+ const points = (action.path ?? []).map(p => [p.x, p.y]);
+ if (points.length < 2) throw new Error('drag requires at least 2 path points');
+ return [{ type: 'drag_mouse', drag_mouse: { path: points } }];
+ }
+ case 'wait':
+ return [{ type: 'sleep', sleep: { duration_ms: action.ms ?? 1000 } }];
+ case 'goto':
+ return [
+ { type: 'press_key', press_key: { keys: ['l'], hold_keys: ['Control_L'] } },
+ { type: 'sleep', sleep: { duration_ms: 200 } },
+ { type: 'press_key', press_key: { keys: ['a'], hold_keys: ['Control_L'] } },
+ { type: 'type_text', type_text: { text: action.url ?? '' } },
+ { type: 'press_key', press_key: { keys: ['Return'] } },
+ ];
+ case 'back':
+ return [{ type: 'press_key', press_key: { keys: ['Left'], hold_keys: ['Alt_L'] } }];
+ case 'screenshot':
+ return [];
+ default:
+ throw new Error(`Unknown CUA action: ${action.type}`);
+ }
+}
+
+export class OpenAIProvider implements CuaProvider {
+ readonly name = 'openai';
+ private apiKey: string;
+
+ constructor() {
+ this.apiKey = process.env.OPENAI_API_KEY ?? '';
+ }
+
+ isConfigured(): boolean {
+ return this.apiKey.length > 0;
+ }
+
+ async runTask(options: TaskOptions): Promise {
+ const { query, kernel, sessionId } = options;
+ const client = new OpenAI({ apiKey: this.apiKey });
+
+ // Navigate to a neutral starting page
+ await kernel.browsers.computer.batch(sessionId, {
+ actions: translateCuaAction({ type: 'goto', url: 'https://duckduckgo.com' }) as Parameters[1]['actions'],
+ });
+
+ const input: ResponseInputItem[] = [
+ {
+ role: 'system',
+ content: `Current date: ${new Date().toISOString()}`,
+ } as unknown as ResponseInputItem,
+ {
+ type: 'message',
+ role: 'user',
+ content: [{ type: 'input_text', text: query }],
+ },
+ ];
+
+ const items: ResponseItem[] = [];
+ const maxTurns = 50;
+
+ for (let turn = 0; turn < maxTurns; turn++) {
+ const response = await client.responses.create({
+ model: options.model || 'gpt-5.4',
+ input: [...input, ...items] as ResponseInputItem[],
+ tools: [{ type: 'computer' } as unknown as OpenAI.Responses.Tool],
+ truncation: 'auto',
+ reasoning: { effort: 'low', summary: 'concise' },
+ });
+
+ if (!response.output) throw new Error('No output from model');
+
+ for (const item of response.output as ResponseItem[]) {
+ items.push(item);
+
+ if (item.type === 'computer_call') {
+ const cc = item as ResponseComputerToolCall & {
+ action?: CuaAction;
+ actions?: CuaAction[];
+ };
+ const actionList: CuaAction[] = Array.isArray(cc.actions)
+ ? cc.actions
+ : cc.action ? [cc.action] : [];
+
+ // Execute actions
+ const batch: BatchAction[] = [];
+ for (const a of actionList) {
+ batch.push(...translateCuaAction(a));
+ }
+ if (batch.length > 0) {
+ await kernel.browsers.computer.batch(sessionId, {
+ actions: batch as Parameters[1]['actions'],
+ });
+ }
+
+ // Acknowledge safety checks
+ const pending = cc.pending_safety_checks ?? [];
+ for (const check of pending) {
+ console.log(`Safety check: ${check.message ?? ''}`);
+ }
+
+ // Take screenshot
+ await new Promise(r => setTimeout(r, 300));
+ const screenshotResp = await kernel.browsers.computer.captureScreenshot(sessionId);
+ const buf = Buffer.from(await screenshotResp.arrayBuffer());
+ const screenshot = buf.toString('base64');
+
+ items.push({
+ type: 'computer_call_output',
+ call_id: cc.call_id,
+ acknowledged_safety_checks: pending,
+ output: {
+ type: 'computer_screenshot',
+ image_url: `data:image/png;base64,${screenshot}`,
+ },
+ } as unknown as ResponseItem);
+ }
+ }
+
+ // Check if the model produced a final assistant message
+ const lastItem = response.output[response.output.length - 1] as ResponseItem & { role?: string };
+ if (lastItem?.role === 'assistant') {
+ const msg = lastItem as ResponseOutputMessage;
+ const text = msg.content
+ ?.filter(c => c && 'text' in c)
+ .map(c => (c as { text: string }).text)
+ .join('') ?? '';
+ return { result: text || '(no response)', provider: this.name };
+ }
+ }
+
+ return { result: '(max turns reached)', provider: this.name };
+ }
+}
diff --git a/pkg/templates/typescript/cua/session.ts b/pkg/templates/typescript/cua/session.ts
new file mode 100644
index 0000000..8492238
--- /dev/null
+++ b/pkg/templates/typescript/cua/session.ts
@@ -0,0 +1,159 @@
+/**
+ * Kernel Browser Session Manager.
+ *
+ * Manages browser lifecycle with optional video replay recording.
+ */
+
+import type { Kernel } from '@onkernel/sdk';
+
+export interface SessionOptions {
+ invocationId?: string;
+ stealth?: boolean;
+ timeoutSeconds?: number;
+ recordReplay?: boolean;
+ replayGracePeriod?: number;
+ viewportWidth?: number;
+ viewportHeight?: number;
+ proxyId?: string;
+ profile?: { id?: string; name?: string; save_changes?: boolean };
+ extensions?: Array<{ id?: string; name?: string }>;
+}
+
+export interface SessionInfo {
+ sessionId: string;
+ liveViewUrl: string;
+ replayId?: string;
+ replayViewUrl?: string;
+ viewportWidth: number;
+ viewportHeight: number;
+}
+
+const DEFAULTS = {
+ stealth: true,
+ timeoutSeconds: 300,
+ recordReplay: false,
+ replayGracePeriod: 5.0,
+ viewportWidth: 1280,
+ viewportHeight: 800,
+};
+
+export class KernelBrowserSession {
+ private kernel: Kernel;
+ private opts: Required> & Pick;
+
+ private _sessionId: string | null = null;
+ private _liveViewUrl: string | null = null;
+ private _replayId: string | null = null;
+ private _replayViewUrl: string | null = null;
+
+ constructor(kernel: Kernel, options: SessionOptions = {}) {
+ this.kernel = kernel;
+ this.opts = { ...DEFAULTS, ...options };
+ }
+
+ get sessionId(): string {
+ if (!this._sessionId) throw new Error('Session not started. Call start() first.');
+ return this._sessionId;
+ }
+
+ get liveViewUrl(): string | null { return this._liveViewUrl; }
+ get replayViewUrl(): string | null { return this._replayViewUrl; }
+ get viewportWidth(): number { return this.opts.viewportWidth; }
+ get viewportHeight(): number { return this.opts.viewportHeight; }
+
+ get info(): SessionInfo {
+ return {
+ sessionId: this.sessionId,
+ liveViewUrl: this._liveViewUrl || '',
+ replayId: this._replayId || undefined,
+ replayViewUrl: this._replayViewUrl || undefined,
+ viewportWidth: this.opts.viewportWidth,
+ viewportHeight: this.opts.viewportHeight,
+ };
+ }
+
+ async start(): Promise {
+ const browser = await this.kernel.browsers.create({
+ invocation_id: this.opts.invocationId,
+ stealth: this.opts.stealth,
+ timeout_seconds: this.opts.timeoutSeconds,
+ viewport: { width: this.opts.viewportWidth, height: this.opts.viewportHeight },
+ ...(this.opts.proxyId ? { proxy_id: this.opts.proxyId } : {}),
+ ...(this.opts.profile ? { profile: this.opts.profile } : {}),
+ ...(this.opts.extensions?.length ? { extensions: this.opts.extensions } : {}),
+ });
+
+ this._sessionId = browser.session_id;
+ this._liveViewUrl = browser.browser_live_view_url ?? null;
+
+ console.log(`Browser session: ${this._sessionId}`);
+ console.log(`Live view: ${this._liveViewUrl}`);
+
+ if (this.opts.recordReplay) {
+ try {
+ const replay = await this.kernel.browsers.replays.start(this._sessionId);
+ this._replayId = replay.replay_id;
+ console.log(`Replay recording started: ${this._replayId}`);
+ } catch (error) {
+ console.warn(`Failed to start replay: ${error}`);
+ }
+ }
+
+ return this.info;
+ }
+
+ async stop(): Promise {
+ const info = this.info;
+
+ if (this._sessionId) {
+ try {
+ if (this.opts.recordReplay && this._replayId) {
+ if (this.opts.replayGracePeriod > 0) {
+ await sleep(this.opts.replayGracePeriod * 1000);
+ }
+ await this.stopReplay();
+ info.replayViewUrl = this._replayViewUrl || undefined;
+ }
+ } finally {
+ console.log(`Destroying browser session: ${this._sessionId}`);
+ await this.kernel.browsers.deleteByID(this._sessionId);
+ }
+ }
+
+ this._sessionId = null;
+ this._liveViewUrl = null;
+ this._replayId = null;
+ this._replayViewUrl = null;
+
+ return info;
+ }
+
+ private async stopReplay(): Promise {
+ if (!this._sessionId || !this._replayId) return;
+
+ await this.kernel.browsers.replays.stop(this._replayId, { id: this._sessionId });
+ await sleep(2000);
+
+ // Poll for replay URL
+ const deadline = Date.now() + 60_000;
+ while (Date.now() < deadline) {
+ try {
+ const replays = await this.kernel.browsers.replays.list(this._sessionId!);
+ const match = replays.find(r => r.replay_id === this._replayId);
+ if (match) {
+ this._replayViewUrl = match.replay_view_url ?? null;
+ if (this._replayViewUrl) {
+ console.log(`Replay URL: ${this._replayViewUrl}`);
+ }
+ return;
+ }
+ } catch { /* polling */ }
+ await sleep(1000);
+ }
+ console.warn('Replay may still be processing.');
+ }
+}
+
+function sleep(ms: number): Promise {
+ return new Promise(resolve => setTimeout(resolve, ms));
+}
diff --git a/pkg/templates/typescript/cua/tsconfig.json b/pkg/templates/typescript/cua/tsconfig.json
new file mode 100644
index 0000000..cbe5246
--- /dev/null
+++ b/pkg/templates/typescript/cua/tsconfig.json
@@ -0,0 +1,22 @@
+{
+ "compilerOptions": {
+ "target": "ESNext",
+ "module": "ESNext",
+ "moduleResolution": "bundler",
+ "esModuleInterop": true,
+ "forceConsistentCasingInFileNames": true,
+ "strict": true,
+ "skipLibCheck": true,
+ "outDir": "./dist",
+ "rootDir": ".",
+ "declaration": true,
+ "resolveJsonModule": true,
+ "isolatedModules": true,
+ "noUnusedLocals": false,
+ "noUnusedParameters": false,
+ "allowImportingTsExtensions": true,
+ "noEmit": true
+ },
+ "include": ["./**/*.ts"],
+ "exclude": ["node_modules", "dist"]
+}