github · pelikhan · May 23, 2026 · May 23, 2026 · May 23, 2026 · May 23, 2026
diff --git a/actions/setup/js/awf_reflect.cjs b/actions/setup/js/awf_reflect.cjs
@@ -15,8 +15,11 @@
 
 "use strict";
 
+require("./shim.cjs");
+
 const fs = require("fs");
 const path = require("path");
+const { withRetry } = require("./error_recovery.cjs");
 
 // AWF API proxy management endpoint for discovering configured LLM providers and available models.
 // The api-proxy sidecar exposes /reflect on its management port (port 10000) inside the AWF
@@ -29,6 +32,12 @@ const AWF_REFLECT_OUTPUT_PATH = "/tmp/gh-aw/sandbox/firewall/awf-reflect.json";
 const AWF_REFLECT_TIMEOUT_MS = 60000;
 // Milliseconds to wait for each models_url fallback fetch (shorter than the main reflect timeout).
 const AWF_MODELS_URL_TIMEOUT_MS = 3000;
+// Maximum attempts for models_url fallback fetches when the proxy is not yet ready.
+const AWF_MODELS_URL_MAX_ATTEMPTS = 5;
+// Base delay between models_url fallback retries. Uses exponential backoff.
+const AWF_MODELS_URL_RETRY_BASE_MS = 250;
+// Cap for exponential backoff delay between retries.
+const AWF_MODELS_URL_RETRY_MAX_MS = 2000;
 // Gemini model name prefix stripped from model IDs in the Gemini models API response.
 // Example: { name: "models/gemini-1.5-pro" } → "gemini-1.5-pro"
 const GEMINI_MODEL_NAME_PREFIX = "models/";
@@ -84,32 +93,79 @@ function extractModelIds(json) {
  * @returns {Promise<string[]|null>}
  */
 async function fetchModelsFromUrl(modelsUrl, timeoutMs, logger) {
-  const ac = new AbortController();
-  const timer = setTimeout(() => {
-    logger(`awf-reflect: models fetch timed out for ${modelsUrl}`);
-    ac.abort();
-  }, timeoutMs);
+  let attemptCounter = 0;
+  const retryConfig = {
+    maxRetries: AWF_MODELS_URL_MAX_ATTEMPTS - 1,
+    // withRetry multiplies delay before the next attempt, so divide by 2 here
+    // to preserve the intended first backoff of AWF_MODELS_URL_RETRY_BASE_MS.
+    initialDelayMs: Math.ceil(AWF_MODELS_URL_RETRY_BASE_MS / 2),
+    maxDelayMs: AWF_MODELS_URL_RETRY_MAX_MS,
+    backoffMultiplier: 2,
+    jitterMs: 0,
+    shouldRetry: error => {
+      const original = error?.originalError || error;
+      const status = original?.status ?? original?.response?.status ?? null;
+      const shouldRetry = status === 503;
+      if (shouldRetry && attemptCounter < AWF_MODELS_URL_MAX_ATTEMPTS) {
+        logger(`awf-reflect: models fetch returned 503 for ${modelsUrl}; retrying (attempt ${attemptCounter + 1}/${AWF_MODELS_URL_MAX_ATTEMPTS})`);
+      }
+      return shouldRetry;
+    },
+  };
+
   try {
-    const res = await fetch(modelsUrl, { signal: ac.signal });
-    if (!res.ok) {
-      logger(`awf-reflect: models fetch returned ${res.status} for ${modelsUrl}`);
-      return null;
-    }
-    const json = await res.json();
-    const models = extractModelIds(json);
-    if (models) {
-      logger(`awf-reflect: fetched ${models.length} model(s) from ${modelsUrl}`);
-    }
-    return models;
+    return await withRetry(
+      async () => {
+        attemptCounter += 1;
+        const ac = new AbortController();
+        const timer = setTimeout(() => {
+          logger(`awf-reflect: models fetch timed out for ${modelsUrl}`);
+          ac.abort();
+        }, timeoutMs);
+        try {
+          const res = await fetch(modelsUrl, { signal: ac.signal });
+          if (!res.ok) {
+            if (res.status === 503) {
+              const err = Object.assign(new Error(`models fetch returned 503 for ${modelsUrl}`), { status: 503 });
+              throw err;
+            }
+            logger(`awf-reflect: models fetch returned ${res.status} for ${modelsUrl}`);
+            return null;
+          }
+          const json = await res.json();
+          const models = extractModelIds(json);
+          if (models) {
+            logger(`awf-reflect: fetched ${models.length} model(s) from ${modelsUrl}`);
+          }
+          return models;
+        } catch (err) {
+          const e = /** @type {Error} */ err;
+          if (e.name === "AbortError") {
+            return null; // already logged above
+          }
+          const status = e?.status ?? e?.response?.status ?? null;
+          if (status === 503) {
+            throw e;
+          }
+          logger(`awf-reflect: models fetch error for ${modelsUrl}: ${e.message}`);
+          return null;
+        } finally {
+          clearTimeout(timer);
+        }
+      },
+      retryConfig,
+      `awf-reflect models fetch for ${modelsUrl}`
+    );
   } catch (err) {
     const e = /** @type {Error} */ err;
-    if (e.name === "AbortError") {
-      return null; // already logged above
+    const original = e?.originalError || e;
+    const status = original?.status ?? original?.response?.status ?? null;
+    if (status === 503) {
+      logger(`awf-reflect: models fetch returned 503 for ${modelsUrl}`);
+      return null;
     }
     logger(`awf-reflect: models fetch error for ${modelsUrl}: ${e.message}`);
     return null;
-  } finally {
-    clearTimeout(timer);
   }
 }
 
@@ -250,6 +306,9 @@ if (typeof module !== "undefined" && module.exports) {
     AWF_REFLECT_OUTPUT_PATH,
     AWF_REFLECT_TIMEOUT_MS,
     AWF_MODELS_URL_TIMEOUT_MS,
+    AWF_MODELS_URL_MAX_ATTEMPTS,
+    AWF_MODELS_URL_RETRY_BASE_MS,
+    AWF_MODELS_URL_RETRY_MAX_MS,
     GEMINI_MODEL_NAME_PREFIX,
     enrichReflectModels,
     extractModelIds,

diff --git a/actions/setup/js/awf_reflect.test.cjs b/actions/setup/js/awf_reflect.test.cjs
@@ -10,6 +10,9 @@ const {
   AWF_REFLECT_OUTPUT_PATH,
   AWF_REFLECT_TIMEOUT_MS,
   AWF_MODELS_URL_TIMEOUT_MS,
+  AWF_MODELS_URL_MAX_ATTEMPTS,
+  AWF_MODELS_URL_RETRY_BASE_MS,
+  AWF_MODELS_URL_RETRY_MAX_MS,
   GEMINI_MODEL_NAME_PREFIX,
   enrichReflectModels,
   extractModelIds,
@@ -24,6 +27,9 @@ describe("awf_reflect.cjs", () => {
       expect(AWF_REFLECT_OUTPUT_PATH).toBe("/tmp/gh-aw/sandbox/firewall/awf-reflect.json");
       expect(AWF_REFLECT_TIMEOUT_MS).toBe(60000);
       expect(AWF_MODELS_URL_TIMEOUT_MS).toBe(3000);
+      expect(AWF_MODELS_URL_MAX_ATTEMPTS).toBe(5);
+      expect(AWF_MODELS_URL_RETRY_BASE_MS).toBe(250);
+      expect(AWF_MODELS_URL_RETRY_MAX_MS).toBe(2000);
       expect(GEMINI_MODEL_NAME_PREFIX).toBe("models/");
     });
   });
@@ -171,6 +177,33 @@ describe("awf_reflect.cjs", () => {
       expect(result).toBeNull();
       expect(logs.some(l => l.includes("models fetch error"))).toBe(true);
     });
+
+    it("retries on 503 and eventually succeeds", async () => {
+      vi.stubGlobal(
+        "fetch",
+        vi
+          .fn()
+          .mockResolvedValueOnce({ ok: false, status: 503 })
+          .mockResolvedValueOnce({ ok: false, status: 503 })
+          .mockResolvedValueOnce({ ok: true, status: 200, json: async () => ({ data: [{ id: "gpt-4o" }] }) })
+      );
+
+      const logs = [];
+      const result = await fetchModelsFromUrl("http://api-proxy:10000/v1/models", 1000, msg => logs.push(msg));
+      expect(result).toEqual(["gpt-4o"]);
+      expect(logs.filter(l => l.includes("retrying (attempt")).length).toBe(2);
+      expect(logs.some(l => l.includes("fetched 1 model(s)"))).toBe(true);
+    });
+
+    it("stops retrying after max attempts on repeated 503 responses", async () => {
+      vi.stubGlobal("fetch", vi.fn().mockResolvedValue({ ok: false, status: 503 }));
+
+      const logs = [];
+      const result = await fetchModelsFromUrl("http://api-proxy:10000/v1/models", 1000, msg => logs.push(msg));
+      expect(result).toBeNull();
+      expect(logs.filter(l => l.includes("retrying (attempt")).length).toBe(AWF_MODELS_URL_MAX_ATTEMPTS - 1);
+      expect(logs.some(l => l.includes("models fetch returned 503"))).toBe(true);
+    });
   });
 
   describe("fetchAWFReflect", () => {