From 8b6b9a85cb4c99e130ded496281c3b794c0e83b3 Mon Sep 17 00:00:00 2001 From: David Cramer Date: Fri, 27 Jun 2025 22:50:16 -0700 Subject: [PATCH] Revert "feat: add bot protection to Cloudflare Worker (#330)" This reverts commit 12548b2c01ea1aa3f0d0fe0aeea78f383edd9d71. --- docs/security.mdc | 39 --- packages/mcp-cloudflare/src/server/index.ts | 6 +- .../src/server/lib/bot-protection.test.ts | 245 ------------------ .../src/server/lib/bot-protection.ts | 155 ----------- 4 files changed, 3 insertions(+), 442 deletions(-) delete mode 100644 packages/mcp-cloudflare/src/server/lib/bot-protection.test.ts delete mode 100644 packages/mcp-cloudflare/src/server/lib/bot-protection.ts diff --git a/docs/security.mdc b/docs/security.mdc index 1c128e48d..16afccab4 100644 --- a/docs/security.mdc +++ b/docs/security.mdc @@ -150,44 +150,6 @@ SENTRY_CLIENT_SECRET=your_oauth_app_secret COOKIE_SECRET=random_32_char_string ``` -## Bot Protection - -The MCP server includes bot protection at the Cloudflare Worker level to prevent abuse from generic HTTP clients. - -### Implementation - -Bot protection is implemented as a wrapper around the worker's fetch handler: - -```typescript -export default withBotProtection( - Sentry.withSentry( - getSentryConfig, - oAuthProvider, - ) -) satisfies ExportedHandler; -``` - -### Blocked User Agents - -Generic bot user agents are blocked, including: -- Python clients: `python-requests`, `aiohttp`, `python-urllib` -- Go clients: `go-http-client` -- Java clients: `okhttp`, `apache-httpclient` -- Command line tools: `curl`, `wget` -- Other generic clients: `libwww-perl`, `bot`, `spider`, `crawler` - -### Allowed Bots - -Well-behaved bots are allowed, including: -- Search engines: Googlebot, Bingbot, DuckDuckBot -- Social media: FacebookExternalHit, TwitterBot -- Development tools: Postman, Insomnia -- Monitoring services: UptimeRobot, Pingdom, NewRelic - -### Response - -Blocked requests receive a `403 Forbidden` response with the message "Access denied". - ## CORS Configuration ```typescript @@ -203,5 +165,4 @@ const ALLOWED_ORIGINS = [ - OAuth implementation: `packages/mcp-cloudflare/src/server/routes/sentry-oauth.ts` - Cookie utilities: `packages/mcp-cloudflare/src/server/utils/cookies.ts` - OAuth Provider: `packages/mcp-cloudflare/src/server/bindings.ts` -- Bot protection: `packages/mcp-cloudflare/src/server/lib/bot-protection.ts` - Sentry OAuth docs: https://docs.sentry.io/api/guides/oauth/ \ No newline at end of file diff --git a/packages/mcp-cloudflare/src/server/index.ts b/packages/mcp-cloudflare/src/server/index.ts index 953a5b95a..fc5176a82 100644 --- a/packages/mcp-cloudflare/src/server/index.ts +++ b/packages/mcp-cloudflare/src/server/index.ts @@ -5,7 +5,6 @@ import app from "./app"; import { SCOPES } from "../constants"; import type { Env } from "./types"; import getSentryConfig from "./sentry.config"; -import { withBotProtection } from "./lib/bot-protection"; // required for Durable Objects export { SentryMCP }; @@ -24,6 +23,7 @@ const oAuthProvider = new OAuthProvider({ scopesSupported: Object.keys(SCOPES), }); -export default withBotProtection( - Sentry.withSentry(getSentryConfig, oAuthProvider), +export default Sentry.withSentry( + getSentryConfig, + oAuthProvider, ) satisfies ExportedHandler; diff --git a/packages/mcp-cloudflare/src/server/lib/bot-protection.test.ts b/packages/mcp-cloudflare/src/server/lib/bot-protection.test.ts deleted file mode 100644 index 05a480f76..000000000 --- a/packages/mcp-cloudflare/src/server/lib/bot-protection.test.ts +++ /dev/null @@ -1,245 +0,0 @@ -import { describe, it, expect, vi } from "vitest"; -import { withBotProtection } from "./bot-protection"; -import type { Env } from "../types"; -import type { IncomingRequestCfProperties } from "@cloudflare/workers-types"; - -describe("bot-protection", () => { - const mockEnv = {} as Env; - const mockCtx = { - waitUntil: vi.fn(), - passThroughOnException: vi.fn(), - props: {}, - } as ExecutionContext; - - const mockHandler: ExportedHandler = { - fetch: vi.fn().mockResolvedValue(new Response("OK")), - }; - - // Helper to create test requests with the proper type - const createTestRequest = ( - url: string, - init?: RequestInit, - ): Request> => { - return new Request(url, init) as Request< - unknown, - IncomingRequestCfProperties - >; - }; - - describe("withBotProtection", () => { - it("should block generic Python requests user agent", async () => { - const wrappedHandler = withBotProtection(mockHandler); - const request = createTestRequest("https://example.com", { - headers: { - "user-agent": "python-requests/2.31.0", - }, - }); - - const response = await wrappedHandler.fetch!(request, mockEnv, mockCtx); - - expect(response.status).toBe(403); - expect(await response.text()).toBe("Access denied"); - expect(mockHandler.fetch).not.toHaveBeenCalled(); - }); - - it("should block Go http client user agent", async () => { - const wrappedHandler = withBotProtection(mockHandler); - const request = createTestRequest("https://example.com", { - headers: { - "user-agent": "Go-http-client/1.1", - }, - }); - - const response = await wrappedHandler.fetch!(request, mockEnv, mockCtx); - - expect(response.status).toBe(403); - expect(mockHandler.fetch).not.toHaveBeenCalled(); - }); - - it("should block okhttp user agent", async () => { - const wrappedHandler = withBotProtection(mockHandler); - const request = createTestRequest("https://example.com", { - headers: { - "user-agent": "okhttp/4.9.3", - }, - }); - - const response = await wrappedHandler.fetch!(request, mockEnv, mockCtx); - - expect(response.status).toBe(403); - expect(mockHandler.fetch).not.toHaveBeenCalled(); - }); - - it("should block curl user agent", async () => { - const wrappedHandler = withBotProtection(mockHandler); - const request = createTestRequest("https://example.com", { - headers: { - "user-agent": "curl/7.68.0", - }, - }); - - const response = await wrappedHandler.fetch!(request, mockEnv, mockCtx); - - expect(response.status).toBe(403); - expect(mockHandler.fetch).not.toHaveBeenCalled(); - }); - - it("should block empty user agent", async () => { - const wrappedHandler = withBotProtection(mockHandler); - const request = createTestRequest("https://example.com", { - headers: {}, - }); - - const response = await wrappedHandler.fetch!(request, mockEnv, mockCtx); - - expect(response.status).toBe(403); - expect(mockHandler.fetch).not.toHaveBeenCalled(); - }); - - it("should block very short user agent", async () => { - const wrappedHandler = withBotProtection(mockHandler); - const request = createTestRequest("https://example.com", { - headers: { - "user-agent": "bot", - }, - }); - - const response = await wrappedHandler.fetch!(request, mockEnv, mockCtx); - - expect(response.status).toBe(403); - expect(mockHandler.fetch).not.toHaveBeenCalled(); - }); - - it("should allow Chrome browser user agent", async () => { - const wrappedHandler = withBotProtection(mockHandler); - const request = createTestRequest("https://example.com", { - headers: { - "user-agent": - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", - }, - }); - - const response = await wrappedHandler.fetch!(request, mockEnv, mockCtx); - - expect(response.status).toBe(200); - expect(await response.text()).toBe("OK"); - expect(mockHandler.fetch).toHaveBeenCalledWith(request, mockEnv, mockCtx); - }); - - it("should allow Firefox browser user agent", async () => { - const wrappedHandler = withBotProtection(mockHandler); - const request = createTestRequest("https://example.com", { - headers: { - "user-agent": - "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0", - }, - }); - - const response = await wrappedHandler.fetch!(request, mockEnv, mockCtx); - - expect(response.status).toBe(200); - expect(mockHandler.fetch).toHaveBeenCalled(); - }); - - it("should allow Safari browser user agent", async () => { - const wrappedHandler = withBotProtection(mockHandler); - const request = createTestRequest("https://example.com", { - headers: { - "user-agent": - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", - }, - }); - - const response = await wrappedHandler.fetch!(request, mockEnv, mockCtx); - - expect(response.status).toBe(200); - expect(mockHandler.fetch).toHaveBeenCalled(); - }); - - it("should allow Googlebot", async () => { - const wrappedHandler = withBotProtection(mockHandler); - const request = createTestRequest("https://example.com", { - headers: { - "user-agent": - "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", - }, - }); - - const response = await wrappedHandler.fetch!(request, mockEnv, mockCtx); - - expect(response.status).toBe(200); - expect(mockHandler.fetch).toHaveBeenCalled(); - }); - - it("should allow Postman", async () => { - const wrappedHandler = withBotProtection(mockHandler); - const request = createTestRequest("https://example.com", { - headers: { - "user-agent": "PostmanRuntime/7.32.1", - }, - }); - - const response = await wrappedHandler.fetch!(request, mockEnv, mockCtx); - - expect(response.status).toBe(200); - expect(mockHandler.fetch).toHaveBeenCalled(); - }); - - it("should allow UptimeRobot monitoring", async () => { - const wrappedHandler = withBotProtection(mockHandler); - const request = createTestRequest("https://example.com", { - headers: { - "user-agent": - "Mozilla/5.0+(compatible; UptimeRobot/2.0; http://www.uptimerobot.com/)", - }, - }); - - const response = await wrappedHandler.fetch!(request, mockEnv, mockCtx); - - expect(response.status).toBe(200); - expect(mockHandler.fetch).toHaveBeenCalled(); - }); - - it("should pass through other handler methods", () => { - const scheduledHandler = vi.fn(); - const queueHandler = vi.fn(); - const tailHandler = vi.fn(); - const traceHandler = vi.fn(); - const emailHandler = vi.fn(); - - const handler: ExportedHandler = { - fetch: vi.fn(), - scheduled: scheduledHandler, - queue: queueHandler, - tail: tailHandler, - trace: traceHandler, - email: emailHandler, - }; - - const wrappedHandler = withBotProtection(handler); - - expect(wrappedHandler.scheduled).toBe(scheduledHandler); - expect(wrappedHandler.queue).toBe(queueHandler); - expect(wrappedHandler.tail).toBe(tailHandler); - expect(wrappedHandler.trace).toBe(traceHandler); - expect(wrappedHandler.email).toBe(emailHandler); - }); - - it("should return 501 if no fetch handler provided", async () => { - const handlerWithoutFetch: ExportedHandler = {}; - const wrappedHandler = withBotProtection(handlerWithoutFetch); - - const request = createTestRequest("https://example.com", { - headers: { - "user-agent": - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", - }, - }); - - const response = await wrappedHandler.fetch!(request, mockEnv, mockCtx); - - expect(response.status).toBe(501); - expect(await response.text()).toBe("Not implemented"); - }); - }); -}); diff --git a/packages/mcp-cloudflare/src/server/lib/bot-protection.ts b/packages/mcp-cloudflare/src/server/lib/bot-protection.ts deleted file mode 100644 index 0dd7f26c4..000000000 --- a/packages/mcp-cloudflare/src/server/lib/bot-protection.ts +++ /dev/null @@ -1,155 +0,0 @@ -import type { Env } from "../types"; -import type { IncomingRequestCfProperties } from "@cloudflare/workers-types"; - -const GENERIC_BOT_USER_AGENTS = [ - // Python HTTP clients - "python-requests", - "python-httpx", - "python-urllib", - "aiohttp", - "httpcore", - - // Go HTTP clients - "go-http-client", - - // Java/Android HTTP clients - "okhttp", - "apache-httpclient", - "java", - - // Node.js HTTP clients - "node-fetch", - "axios", - "got", - "undici", - - // Command line tools - "curl", - "wget", - "httpie", - - // Ruby HTTP clients - "ruby", - "faraday", - - // PHP HTTP clients - "guzzlehttp", - "php", - - // .NET HTTP clients - "dotnet", - - // Other generic clients - "libwww-perl", - "lwp-trivial", - "httpclient", - "the http gem", - "rest-client", - - // Generic patterns that indicate non-browser clients - "bot", - "spider", - "crawler", - "scraper", - "monitor", - "fetch", -]; - -const ALLOWED_BOT_USER_AGENTS = [ - // Search engines - "googlebot", - "bingbot", - "slurp", // Yahoo - "duckduckbot", - "baiduspider", - "yandexbot", - - // Social media - "facebookexternalhit", - "twitterbot", - "linkedinbot", - "whatsapp", - "telegrambot", - - // Development tools - "postman", - "insomnia", - - // Monitoring services - "uptimerobot", - "pingdom", - "newrelic", - "datadog", - - // Other legitimate services - "github-camo", // GitHub image proxy - "slack-imgproxy", // Slack image proxy -]; - -function isGenericBot(userAgent: string): boolean { - const ua = userAgent.toLowerCase(); - - // First check if it's an allowed bot - for (const allowed of ALLOWED_BOT_USER_AGENTS) { - if (ua.includes(allowed)) { - return false; - } - } - - // Then check if it's a generic bot - for (const generic of GENERIC_BOT_USER_AGENTS) { - if (ua.includes(generic)) { - return true; - } - } - - // Check if it's missing a user agent or has a very short one - if (!userAgent || userAgent.length < 10) { - return true; - } - - // Check for well-formed browser user agents - // Most legitimate browsers have complex user agents with version numbers - const hasBrowserIdentifiers = - ua.includes("mozilla/") && - (ua.includes("gecko/") || - ua.includes("webkit/") || - ua.includes("chrome/") || - ua.includes("safari/")); - - return !hasBrowserIdentifiers; -} - -export function withBotProtection( - handler: ExportedHandler, -): ExportedHandler { - return { - ...handler, - async fetch( - request: Request>, - env: E, - ctx: ExecutionContext, - ) { - const userAgent = request.headers.get("user-agent") || ""; - - // Check if this is a generic bot - if (isGenericBot(userAgent)) { - // Return 403 Forbidden for generic bots - return new Response("Access denied", { - status: 403, - headers: { - "content-type": "text/plain", - }, - }); - } - - // If not a generic bot, pass through to the handler - if (handler.fetch) { - return handler.fetch(request, env, ctx); - } - - // Default response if no fetch handler - return new Response("Not implemented", { status: 501 }); - }, - }; -}