diff --git a/actions/setup/js/create_issue.cjs b/actions/setup/js/create_issue.cjs index 7dc674307ce..37fe517ee20 100644 --- a/actions/setup/js/create_issue.cjs +++ b/actions/setup/js/create_issue.cjs @@ -28,7 +28,13 @@ const { parseAllowedIssueFields, validateAllowedIssueFields } = require("./allow const { buildWorkflowRunUrl } = require("./workflow_metadata_helpers.cjs"); const { MAX_LABELS, MAX_ASSIGNEES } = require("./constants.cjs"); const { findAgent, getIssueDetails, assignAgentToIssue } = require("./assign_agent_helpers.cjs"); +const { parseDeduplicateByTitle, normalizeTitleForDedup, findDuplicateByTitle } = require("./issue_title_dedup.cjs"); +const MS_PER_DAY = 24 * 60 * 60 * 1000; const ISSUE_FIELD_DATE_PATTERN = /^\d{4}-\d{2}-\d{2}$/; +const RECENTLY_CLOSED_DEDUP_DAYS = 30; +const TITLE_DEDUP_SEARCH_PER_PAGE = 100; +const TITLE_DEDUP_MAX_SEARCH_PAGES = 2; +const TITLE_DEDUP_MIN_SEARCH_RATE_LIMIT_REMAINING = 500; /** * Create a dedicated GitHub client for copilot assignment operations. @@ -401,6 +407,109 @@ async function applyIssueFields({ githubClient, owner, repo, issueNumber, fields ); } +async function searchTitleDedupIssues(githubClient, query) { + const candidates = []; + let fetchedItems = 0; + let totalCount = 0; + let sawNumericTotalCount = false; + let fetchedPageCount = 0; + + for (let page = 1; page <= TITLE_DEDUP_MAX_SEARCH_PAGES; page += 1) { + fetchedPageCount = page; + const response = await githubClient.rest.search.issuesAndPullRequests({ + q: query, + per_page: TITLE_DEDUP_SEARCH_PER_PAGE, + page, + sort: "updated", + order: "desc", + }); + const items = Array.isArray(response?.data?.items) ? response.data.items : []; + const hasNumericTotalCount = Number.isFinite(response?.data?.total_count); + const pageTotalCount = hasNumericTotalCount ? Number(response.data.total_count) : items.length; + if (hasNumericTotalCount) { + sawNumericTotalCount = true; + } + if (!hasNumericTotalCount) { + core.warning(`Title dedup search response missing numeric total_count for query "${query}" (page ${page}); using page item count fallback`); + } + totalCount = Math.max(totalCount, pageTotalCount); + fetchedItems += items.length; + + for (const item of items) { + if (!item.pull_request && typeof item.title === "string") { + candidates.push({ title: item.title }); + } + } + + if (items.length < TITLE_DEDUP_SEARCH_PER_PAGE) { + break; + } + } + + const reachedPageCap = fetchedPageCount === TITLE_DEDUP_MAX_SEARCH_PAGES; + const fetchedFullPages = fetchedItems === fetchedPageCount * TITLE_DEDUP_SEARCH_PER_PAGE; + const reachedPageCapWithoutCount = !sawNumericTotalCount && reachedPageCap && fetchedFullPages; + + return { + candidates, + fetchedItems, + totalCount, + truncated: totalCount > fetchedItems || reachedPageCapWithoutCount, + }; +} + +/** + * Search for existing issues that are potential title-duplicates. + * Includes open issues and recently closed issues, with paginated search up to a capped page count. + * + * @param {Object} githubClient + * @param {string} owner + * @param {string} repo + * @returns {Promise>} + */ +async function getRepoTitleDedupCandidates(githubClient, owner, repo) { + const sinceDate = new Date(Date.now() - RECENTLY_CLOSED_DEDUP_DAYS * MS_PER_DAY).toISOString().slice(0, 10); + const [openIssues, recentlyClosedIssues] = await Promise.all([ + searchTitleDedupIssues(githubClient, `repo:${owner}/${repo} is:issue is:open`), + searchTitleDedupIssues(githubClient, `repo:${owner}/${repo} is:issue is:closed closed:>=${sinceDate}`), + ]); + + if (openIssues.truncated) { + core.warning(`Title dedup search (open issues) truncated for ${owner}/${repo}: fetched ${openIssues.fetchedItems} of ${openIssues.totalCount} results (cap ${TITLE_DEDUP_MAX_SEARCH_PAGES} pages)`); + } + if (recentlyClosedIssues.truncated) { + core.warning(`Title dedup search (recently closed issues) truncated for ${owner}/${repo}: fetched ${recentlyClosedIssues.fetchedItems} of ${recentlyClosedIssues.totalCount} results (cap ${TITLE_DEDUP_MAX_SEARCH_PAGES} pages)`); + } + + return [...openIssues.candidates, ...recentlyClosedIssues.candidates]; +} + +/** + * @param {Object} githubClient + * @param {string} owner + * @param {string} repo + * @returns {Promise} + */ +async function shouldSkipRepoTitleDedupSearch(githubClient, owner, repo) { + try { + const response = await githubClient.rest.rateLimit.get(); + const rawRemaining = response?.data?.resources?.search?.remaining; + const remaining = Number(rawRemaining); + if (!Number.isFinite(remaining)) { + core.warning(`Could not determine search rate limit remaining for ${owner}/${repo}; proceeding with repo-level title dedup search`); + return false; + } + if (remaining <= TITLE_DEDUP_MIN_SEARCH_RATE_LIMIT_REMAINING) { + core.warning(`Skipping repo-level title dedup search for ${owner}/${repo}: search rate limit remaining is ${remaining} (threshold <= ${TITLE_DEDUP_MIN_SEARCH_RATE_LIMIT_REMAINING})`); + return true; + } + } catch (error) { + core.warning(`Could not check search rate limit before title dedup search: ${getErrorMessage(error)} — proceeding with repo-level dedup search`); + } + + return false; +} + /** * Main handler factory for create_issue * Returns a message handler function that processes individual create_issue messages @@ -418,6 +527,12 @@ async function main(config = {}) { const groupEnabled = parseBoolTemplatable(config.group, false); const closeOlderIssuesEnabled = parseBoolTemplatable(config.close_older_issues, false); const groupByDayEnabled = parseBoolTemplatable(config.group_by_day, false); + let deduplicateByTitle; + try { + deduplicateByTitle = parseDeduplicateByTitle(config.deduplicate_by_title); + } catch (error) { + throw new Error(`${ERR_VALIDATION}: ${getErrorMessage(error)}`); + } const rawCloseOlderKey = config.close_older_key ? String(config.close_older_key) : ""; const closeOlderKey = rawCloseOlderKey ? normalizeCloseOlderKey(rawCloseOlderKey) : ""; if (rawCloseOlderKey && !closeOlderKey) { @@ -476,6 +591,10 @@ async function main(config = {}) { core.warning(`Group-by-day mode has no effect: neither close-older-key nor GH_AW_WORKFLOW_ID is set — issues cannot be searched`); } } + if (deduplicateByTitle.enabled) { + const mode = deduplicateByTitle.maxDistance === 0 ? "exact title match" : `Levenshtein distance <= ${deduplicateByTitle.maxDistance}`; + core.info(`Title deduplication enabled (${mode})`); + } // Track how many items we've processed for max limit let processedCount = 0; @@ -483,6 +602,25 @@ async function main(config = {}) { // Track created issues for outputs const createdIssues = []; + // Track seen issue titles by repo for within-run deduplication + /** @type {Map>} */ + const createdTitlesByRepo = new Map(); + /** @type {Map>>} */ + const repoTitleDedupCandidatesCache = new Map(); + let skipRepoLevelSearch = false; + + /** + * @param {string} repo + * @param {string} seenTitle + * @param {string} seenNormalizedTitle + * @returns {void} + */ + function recordSeenTitle(repo, seenTitle, seenNormalizedTitle) { + const titles = createdTitlesByRepo.get(repo) || []; + titles.push({ title: seenTitle, normalizedTitle: seenNormalizedTitle }); + createdTitlesByRepo.set(repo, titles); + } + // Map to track temporary_id -> {repo, number} relationships across messages const temporaryIdMap = new Map(); @@ -643,6 +781,74 @@ async function main(config = {}) { // Apply title prefix (only if it doesn't already exist) title = applyTitlePrefix(title, titlePrefix); + const normalizedTitle = normalizeTitleForDedup(title); + + if (message._dropped_duplicate_by_title === true) { + const existingTitle = typeof message._duplicate_title === "string" ? message._duplicate_title : title; + const distance = typeof message._duplicate_distance === "number" ? message._duplicate_distance : 0; + core.warning(`Dropping duplicate create_issue from MCP pre-check in ${qualifiedItemRepo}: "${title}" (matched "${existingTitle}", distance=${distance})`); + return { + success: true, + dropped_duplicate: true, + dedup_source: "mcp-precheck", + title, + duplicate_of_title: existingTitle, + duplicate_distance: distance, + }; + } + + if (deduplicateByTitle.enabled) { + const withinRunCandidates = createdTitlesByRepo.get(qualifiedItemRepo) || []; + const withinRunDuplicate = findDuplicateByTitle(normalizedTitle, withinRunCandidates, deduplicateByTitle.maxDistance); + if (withinRunDuplicate) { + core.warning(`Dropping duplicate create_issue (within-run) in ${qualifiedItemRepo}: "${title}" (matched "${withinRunDuplicate.title}", distance=${withinRunDuplicate.distance})`); + return { + success: true, + dropped_duplicate: true, + dedup_source: "within-run", + title, + duplicate_of_title: withinRunDuplicate.title, + duplicate_distance: withinRunDuplicate.distance, + }; + } + + try { + const repoCacheKey = `${repoParts.owner}/${repoParts.repo}`; + if (!repoTitleDedupCandidatesCache.has(repoCacheKey) && !skipRepoLevelSearch) { + skipRepoLevelSearch = await shouldSkipRepoTitleDedupSearch(githubClient, repoParts.owner, repoParts.repo); + if (!skipRepoLevelSearch) { + const dedupCandidatesPromise = getRepoTitleDedupCandidates(githubClient, repoParts.owner, repoParts.repo); + dedupCandidatesPromise.catch(() => { + if (repoTitleDedupCandidatesCache.get(repoCacheKey) === dedupCandidatesPromise) { + repoTitleDedupCandidatesCache.delete(repoCacheKey); + } + }); + repoTitleDedupCandidatesCache.set(repoCacheKey, dedupCandidatesPromise); + } + } + + const repoCandidatesPromise = repoTitleDedupCandidatesCache.get(repoCacheKey); + if (repoCandidatesPromise) { + const repoCandidates = await repoCandidatesPromise; + const repoDuplicate = findDuplicateByTitle(normalizedTitle, repoCandidates, deduplicateByTitle.maxDistance); + if (repoDuplicate) { + recordSeenTitle(qualifiedItemRepo, title, normalizedTitle); + core.warning(`Dropping duplicate create_issue (repo-level) in ${qualifiedItemRepo}: "${title}" (matched "${repoDuplicate.title}", distance=${repoDuplicate.distance})`); + return { + success: true, + dropped_duplicate: true, + dedup_source: "repo-level", + title, + duplicate_of_title: repoDuplicate.title, + duplicate_distance: repoDuplicate.distance, + }; + } + } + } catch (error) { + core.warning(`Title deduplication search failed: ${getErrorMessage(error)} — proceeding with issue creation`); + } + } + // Add parent reference if (effectiveParentIssueNumber) { core.info("Detected issue context, parent issue " + effectiveParentRepo + "#" + effectiveParentIssueNumber); @@ -787,6 +993,9 @@ async function main(config = {}) { // If in staged mode, preview the issue without creating it if (isStaged) { logStagedPreviewInfo(`Would create issue in ${qualifiedItemRepo} with title: ${title}`); + if (deduplicateByTitle.enabled) { + recordSeenTitle(qualifiedItemRepo, title, normalizedTitle); + } // Return success with staged flag and preview info return { success: true, @@ -820,6 +1029,9 @@ async function main(config = {}) { core.info(`Created issue ${qualifiedItemRepo}#${issue.number}: ${issue.html_url}`); createdIssues.push({ ...issue, _repo: qualifiedItemRepo }); + if (deduplicateByTitle.enabled) { + recordSeenTitle(qualifiedItemRepo, title, normalizedTitle); + } if (issueFields.length > 0) { try { diff --git a/actions/setup/js/create_issue.test.cjs b/actions/setup/js/create_issue.test.cjs index 7e47f5acb9b..97630d2000e 100644 --- a/actions/setup/js/create_issue.test.cjs +++ b/actions/setup/js/create_issue.test.cjs @@ -41,6 +41,17 @@ describe("create_issue", () => { }, }), }, + rateLimit: { + get: vi.fn().mockResolvedValue({ + data: { + resources: { + search: { + remaining: 1000, + }, + }, + }, + }), + }, }, graphql: vi.fn(), }; @@ -453,6 +464,213 @@ describe("create_issue", () => { }); }); + describe("deduplicate-by-title", () => { + it("should drop within-run duplicates when enabled as boolean", async () => { + const handler = await main({ + deduplicate_by_title: true, + }); + + const first = await handler({ title: "Duplicate title" }); + const second = await handler({ title: "Duplicate title" }); + + expect(first.success).toBe(true); + expect(second.success).toBe(true); + expect(second.dropped_duplicate).toBe(true); + expect(second.dedup_source).toBe("within-run"); + expect(mockGithub.rest.issues.create).toHaveBeenCalledTimes(1); + }); + + it("should drop fuzzy duplicates based on Levenshtein distance", async () => { + const handler = await main({ + deduplicate_by_title: 1, + }); + + const first = await handler({ title: "Fix login bug" }); + const second = await handler({ title: "Fix login bag" }); + + expect(first.success).toBe(true); + expect(second.success).toBe(true); + expect(second.dropped_duplicate).toBe(true); + expect(second.duplicate_distance).toBe(1); + expect(mockGithub.rest.issues.create).toHaveBeenCalledTimes(1); + }); + + it("should drop duplicates that already exist in the repository", async () => { + mockGithub.rest.search.issuesAndPullRequests + .mockResolvedValueOnce({ + data: { + items: [{ title: "Existing title in repo", number: 99 }], + }, + }) + .mockResolvedValueOnce({ + data: { + items: [], + }, + }); + + const handler = await main({ + deduplicate_by_title: true, + }); + const result = await handler({ title: "Existing title in repo" }); + + expect(result.success).toBe(true); + expect(result.dropped_duplicate).toBe(true); + expect(result.dedup_source).toBe("repo-level"); + expect(mockGithub.rest.issues.create).not.toHaveBeenCalled(); + }); + + it("should deduplicate repeated repo-level duplicates within the same run", async () => { + mockGithub.rest.search.issuesAndPullRequests + .mockResolvedValueOnce({ + data: { + total_count: 1, + items: [{ title: "Existing title in repo", number: 99 }], + }, + }) + .mockResolvedValueOnce({ + data: { + total_count: 0, + items: [], + }, + }); + + const handler = await main({ + deduplicate_by_title: true, + }); + const first = await handler({ title: "Existing title in repo" }); + const second = await handler({ title: "Existing title in repo" }); + + expect(first.success).toBe(true); + expect(first.dropped_duplicate).toBe(true); + expect(first.dedup_source).toBe("repo-level"); + expect(second.success).toBe(true); + expect(second.dropped_duplicate).toBe(true); + expect(second.dedup_source).toBe("within-run"); + expect(mockGithub.rest.search.issuesAndPullRequests).toHaveBeenCalledTimes(2); + expect(mockGithub.rest.issues.create).not.toHaveBeenCalled(); + }); + + it("should cache repo-level dedup candidates within a run", async () => { + const handler = await main({ + deduplicate_by_title: true, + }); + + const first = await handler({ title: "First unique title" }); + const second = await handler({ title: "Second unique title" }); + + expect(first.success).toBe(true); + expect(second.success).toBe(true); + expect(mockGithub.rest.issues.create).toHaveBeenCalledTimes(2); + expect(mockGithub.rest.search.issuesAndPullRequests).toHaveBeenCalledTimes(2); + }); + + it("should paginate repo-level dedup searches", async () => { + const openPageOneItems = Array.from({ length: 100 }, (_, index) => ({ title: `Open issue ${index}` })); + mockGithub.rest.search.issuesAndPullRequests.mockImplementation(({ q, page = 1 }) => { + if (q.includes("is:open")) { + if (page === 1) { + return Promise.resolve({ + data: { + total_count: 101, + items: openPageOneItems, + }, + }); + } + return Promise.resolve({ + data: { + total_count: 101, + items: [{ title: "Existing title in repo" }], + }, + }); + } + + return Promise.resolve({ + data: { + total_count: 0, + items: [], + }, + }); + }); + + const handler = await main({ + deduplicate_by_title: true, + }); + const result = await handler({ title: "Existing title in repo" }); + + expect(result.success).toBe(true); + expect(result.dropped_duplicate).toBe(true); + expect(result.dedup_source).toBe("repo-level"); + expect(mockGithub.rest.search.issuesAndPullRequests).toHaveBeenCalledWith( + expect.objectContaining({ + q: expect.stringContaining("is:open"), + page: 2, + }) + ); + expect(mockGithub.rest.issues.create).not.toHaveBeenCalled(); + }); + + it("should warn when repo-level dedup search is truncated at page cap", async () => { + const pageItems = Array.from({ length: 100 }, (_, index) => ({ title: `Open issue ${index}` })); + mockGithub.rest.search.issuesAndPullRequests.mockImplementation(({ q, page = 1 }) => { + if (q.includes("is:open")) { + return Promise.resolve({ + data: { + total_count: 305, + items: page <= 2 ? pageItems : [], + }, + }); + } + + return Promise.resolve({ + data: { + total_count: 0, + items: [], + }, + }); + }); + + const handler = await main({ + deduplicate_by_title: true, + }); + const result = await handler({ title: "Completely new title" }); + + expect(result.success).toBe(true); + expect(mockGithub.rest.issues.create).toHaveBeenCalledOnce(); + expect(mockCore.warning).toHaveBeenCalledWith(expect.stringContaining("truncated")); + }); + + it("should skip repo-level search when search rate limit is low and still deduplicate within-run", async () => { + mockGithub.rest.rateLimit.get.mockResolvedValue({ + data: { + resources: { + search: { + remaining: 1, + }, + }, + }, + }); + + const handler = await main({ + deduplicate_by_title: true, + }); + const first = await handler({ title: "Rate limited title" }); + const second = await handler({ title: "Rate limited title" }); + + expect(first.success).toBe(true); + expect(second.success).toBe(true); + expect(second.dropped_duplicate).toBe(true); + expect(second.dedup_source).toBe("within-run"); + expect(mockGithub.rest.rateLimit.get).toHaveBeenCalledTimes(1); + expect(mockGithub.rest.search.issuesAndPullRequests).not.toHaveBeenCalled(); + expect(mockCore.warning).toHaveBeenCalledWith(expect.stringContaining("Skipping repo-level title dedup search")); + }); + + it("should reject invalid deduplicate-by-title configuration", async () => { + await expect(main({ deduplicate_by_title: "invalid" })).rejects.toThrow("deduplicate-by-title"); + await expect(main({ deduplicate_by_title: 101 })).rejects.toThrow("deduplicate-by-title"); + }); + }); + describe("repository targeting", () => { it("should create issue in specified repo", async () => { const handler = await main({ diff --git a/actions/setup/js/expired_entity_cleanup_helpers.test.cjs b/actions/setup/js/expired_entity_cleanup_helpers.test.cjs index 6085b3f2a90..f7557e66cd4 100644 --- a/actions/setup/js/expired_entity_cleanup_helpers.test.cjs +++ b/actions/setup/js/expired_entity_cleanup_helpers.test.cjs @@ -35,9 +35,23 @@ describe("expired_entity_cleanup_helpers", () => { describe("delay", () => { it("resolves after the specified time", async () => { - const start = Date.now(); - await delay(10); - expect(Date.now() - start).toBeGreaterThanOrEqual(10); + vi.useFakeTimers(); + try { + const promise = delay(10); + await vi.advanceTimersByTimeAsync(9); + + let resolved = false; + promise.then(() => { + resolved = true; + }); + await Promise.resolve(); + expect(resolved).toBe(false); + + await vi.advanceTimersByTimeAsync(1); + await expect(promise).resolves.toBeUndefined(); + } finally { + vi.useRealTimers(); + } }); it("resolves immediately for 0 ms", async () => { diff --git a/actions/setup/js/issue_title_dedup.cjs b/actions/setup/js/issue_title_dedup.cjs new file mode 100644 index 00000000000..b6227e62fac --- /dev/null +++ b/actions/setup/js/issue_title_dedup.cjs @@ -0,0 +1,73 @@ +// @ts-check + +const { levenshteinDistance } = require("./levenshtein_distance.cjs"); +const MAX_DEDUPLICATE_BY_TITLE_DISTANCE = 100; + +/** + * Parse create-issue deduplication config. + * - true => enabled with exact-match distance 0 + * - false => disabled + * - N => enabled with Levenshtein max distance N + * + * @param {unknown} value + * @returns {{ enabled: boolean, maxDistance: number }} + */ +function parseDeduplicateByTitle(value) { + if (value === undefined || value === null || value === false) { + return { enabled: false, maxDistance: 0 }; + } + if (value === true) { + return { enabled: true, maxDistance: 0 }; + } + if (typeof value === "number" && Number.isFinite(value) && Number.isInteger(value) && value >= 0 && value <= MAX_DEDUPLICATE_BY_TITLE_DISTANCE) { + return { enabled: true, maxDistance: value }; + } + throw new Error(`deduplicate-by-title must be a boolean or a non-negative integer (0-${MAX_DEDUPLICATE_BY_TITLE_DISTANCE})`); +} + +/** + * Normalize a title for deduplication comparisons. + * @param {string} title + * @returns {string} + */ +function normalizeTitleForDedup(title) { + return String(title ?? "") + .toLowerCase() + .replace(/\s+/g, " ") + .trim(); +} + +/** + * @typedef {{ title: string, normalizedTitle?: string }} TitleCandidate + */ + +/** + * Find a duplicate candidate by Levenshtein distance threshold. + * + * @param {string} normalizedTitle + * @param {TitleCandidate[]} candidates + * @param {number} maxDistance + * @returns {{ title: string, distance: number } | null} + */ +function findDuplicateByTitle(normalizedTitle, candidates, maxDistance) { + let bestMatch = null; + + for (const candidate of candidates) { + const candidateTitle = normalizeTitleForDedup(candidate.normalizedTitle || candidate.title); + const distance = levenshteinDistance(normalizedTitle, candidateTitle); + if (distance <= maxDistance && (!bestMatch || distance < bestMatch.distance)) { + bestMatch = { title: candidate.title, distance }; + if (distance === 0) { + return bestMatch; + } + } + } + + return bestMatch; +} + +module.exports = { + parseDeduplicateByTitle, + normalizeTitleForDedup, + findDuplicateByTitle, +}; diff --git a/actions/setup/js/levenshtein_distance.cjs b/actions/setup/js/levenshtein_distance.cjs new file mode 100644 index 00000000000..fa7ba0c9cf5 --- /dev/null +++ b/actions/setup/js/levenshtein_distance.cjs @@ -0,0 +1,52 @@ +// @ts-check + +/** + * Compute Levenshtein edit distance between two strings. + * Cost model: insertion=1, deletion=1, substitution=1. + * + * @param {string} a + * @param {string} b + * @returns {number} + */ +function levenshteinDistance(a, b) { + const source = String(a ?? ""); + const target = String(b ?? ""); + + if (source === target) { + return 0; + } + + const sourceLength = source.length; + const targetLength = target.length; + + if (sourceLength === 0) { + return targetLength; + } + if (targetLength === 0) { + return sourceLength; + } + + let previous = Array.from({ length: targetLength + 1 }, (_, index) => index); + let current = new Array(targetLength + 1); + + for (let sourceIndex = 1; sourceIndex <= sourceLength; sourceIndex++) { + current[0] = sourceIndex; + const sourceChar = source[sourceIndex - 1]; + + for (let targetIndex = 1; targetIndex <= targetLength; targetIndex++) { + const substitutionCost = sourceChar === target[targetIndex - 1] ? 0 : 1; + const deletion = previous[targetIndex] + 1; + const insertion = current[targetIndex - 1] + 1; + const substitution = previous[targetIndex - 1] + substitutionCost; + current[targetIndex] = Math.min(deletion, insertion, substitution); + } + + [previous, current] = [current, previous]; + } + + return previous[targetLength]; +} + +module.exports = { + levenshteinDistance, +}; diff --git a/actions/setup/js/levenshtein_distance.test.cjs b/actions/setup/js/levenshtein_distance.test.cjs new file mode 100644 index 00000000000..1f348fb84c8 --- /dev/null +++ b/actions/setup/js/levenshtein_distance.test.cjs @@ -0,0 +1,43 @@ +import { describe, it, expect } from "vitest"; +import { levenshteinDistance } from "./levenshtein_distance.cjs"; + +describe("levenshtein_distance", () => { + it("returns zero for identical strings", () => { + expect(levenshteinDistance("create issue", "create issue")).toBe(0); + }); + + it("handles empty strings", () => { + expect(levenshteinDistance("", "")).toBe(0); + expect(levenshteinDistance("", "abc")).toBe(3); + expect(levenshteinDistance("abc", "")).toBe(3); + }); + + it("computes insertion, deletion and substitution costs", () => { + expect(levenshteinDistance("abc", "abdc")).toBe(1); // insertion + expect(levenshteinDistance("abdc", "abc")).toBe(1); // deletion + expect(levenshteinDistance("abc", "axc")).toBe(1); // substitution + }); + + it("matches known examples", () => { + expect(levenshteinDistance("kitten", "sitting")).toBe(3); + expect(levenshteinDistance("flaw", "lawn")).toBe(2); + expect(levenshteinDistance("Saturday", "Sunday")).toBe(3); + }); + + it("is symmetric", () => { + const a = "feature: deduplicate by title"; + const b = "feature: dedupe by title"; + expect(levenshteinDistance(a, b)).toBe(levenshteinDistance(b, a)); + }); + + it("supports unicode characters", () => { + expect(levenshteinDistance("café", "cafe")).toBe(1); + expect(levenshteinDistance("🧪test", "🧪tests")).toBe(1); + }); + + it("coerces non-string inputs safely", () => { + expect(levenshteinDistance(1234, 1234)).toBe(0); + expect(levenshteinDistance(null, "x")).toBe(1); + expect(levenshteinDistance(undefined, "")).toBe(0); + }); +}); diff --git a/actions/setup/js/safe_output_summary.cjs b/actions/setup/js/safe_output_summary.cjs index 6fa1737108c..aca105b72c9 100644 --- a/actions/setup/js/safe_output_summary.cjs +++ b/actions/setup/js/safe_output_summary.cjs @@ -32,13 +32,14 @@ function generateSafeOutputSummary(options) { // Detect fallback outcomes for code-push types. // Prefer explicit fallback_type when available; infer only for backward compatibility. + const isDuplicateDrop = success && result && result.dropped_duplicate === true; const isFallback = success && result && result.fallback_used === true; const inferredFallbackType = isFallback && (result.pull_request_url || result.pull_request_number != null) ? "pull_request" : "issue"; const fallbackType = isFallback && result?.fallback_type ? result.fallback_type : inferredFallbackType; // Choose emoji and status based on success and fallback - const emoji = isFallback ? "⚠️" : success ? "✅" : "❌"; - const status = isFallback ? (fallbackType === "pull_request" ? "Fallback Pull Request Created" : "Fallback Issue Created") : success ? "Success" : "Failed"; + const emoji = isDuplicateDrop ? "⚠️" : isFallback ? "⚠️" : success ? "✅" : "❌"; + const status = isDuplicateDrop ? "Duplicate Dropped" : isFallback ? (fallbackType === "pull_request" ? "Fallback Pull Request Created" : "Fallback Issue Created") : success ? "Success" : "Failed"; // Start building the summary let summary = `
\n${emoji} ${displayType} - ${status} (Message ${messageIndex})\n\n`; @@ -47,7 +48,21 @@ function generateSafeOutputSummary(options) { const sectionTitle = isFallback ? `### ${displayType} — ${fallbackType === "pull_request" ? "Fallback Pull Request" : "Fallback Issue"}\n\n` : `### ${displayType}\n\n`; summary += sectionTitle; - if (isFallback) { + if (isDuplicateDrop) { + summary += `> ℹ️ Duplicate issue title was dropped by title-based deduplication.\n\n`; + if (result.title || message?.title) { + summary += `**Title:** ${result.title || message?.title}\n\n`; + } + if (result.duplicate_of_title) { + summary += `**Matched Existing Title:** ${result.duplicate_of_title}\n\n`; + } + if (result.duplicate_distance !== undefined) { + summary += `**Levenshtein Distance:** ${result.duplicate_distance}\n\n`; + } + if (result.dedup_source) { + summary += `**Dedup Source:** ${result.dedup_source}\n\n`; + } + } else if (isFallback) { // Explain why the fallback occurred and show the created fallback target if (fallbackType === "pull_request") { summary += `> ℹ️ Direct push to the original pull request branch was not possible (diverged/non-fast-forward). A fallback pull request was created instead.\n\n`; diff --git a/actions/setup/js/safe_output_summary.test.cjs b/actions/setup/js/safe_output_summary.test.cjs index 758dba5e575..8cb086e8c0a 100644 --- a/actions/setup/js/safe_output_summary.test.cjs +++ b/actions/setup/js/safe_output_summary.test.cjs @@ -80,6 +80,32 @@ describe("safe_output_summary", () => { expect(summary).toContain("permission denied"); }); + it("should generate summary for dropped duplicate issue", () => { + const options = { + type: "create_issue", + messageIndex: 3, + success: true, + result: { + dropped_duplicate: true, + title: "Duplicate title", + duplicate_of_title: "Duplicate title", + duplicate_distance: 0, + dedup_source: "within-run", + }, + message: { + title: "Duplicate title", + }, + }; + + const summary = generateSafeOutputSummary(options); + + expect(summary).toContain("⚠️"); + expect(summary).toContain("Duplicate Dropped"); + expect(summary).toContain("Matched Existing Title"); + expect(summary).toContain("Levenshtein Distance"); + expect(summary).toContain("Dedup Source"); + }); + it("should truncate long body content", () => { const longBody = "a".repeat(1000); diff --git a/actions/setup/js/safe_outputs_handlers.cjs b/actions/setup/js/safe_outputs_handlers.cjs index 40aa82f8f47..43d9a029c00 100644 --- a/actions/setup/js/safe_outputs_handlers.cjs +++ b/actions/setup/js/safe_outputs_handlers.cjs @@ -20,6 +20,8 @@ const { findRepoCheckout } = require("./find_repo_checkout.cjs"); const { resolveTargetRepoConfig, resolveAndValidateRepo } = require("./repo_helpers.cjs"); const { getOrGenerateTemporaryId } = require("./temporary_id.cjs"); const { parseAllowedExtensionsEnv } = require("./allowed_extensions_helpers.cjs"); +const { sanitizeTitle, applyTitlePrefix } = require("./sanitize_title.cjs"); +const { parseDeduplicateByTitle, normalizeTitleForDedup, findDuplicateByTitle } = require("./issue_title_dedup.cjs"); /** * Create handlers for safe output tools @@ -88,6 +90,17 @@ function createHandlers(server, appendSafeOutput, config = {}) { }; }; + const createIssueConfig = config.create_issue || {}; + let deduplicateByTitle = { enabled: false, maxDistance: 0 }; + try { + deduplicateByTitle = parseDeduplicateByTitle(createIssueConfig.deduplicate_by_title); + } catch (error) { + throw new Error(`${ERR_VALIDATION}: ${getErrorMessage(error)}`); + } + const createIssueTitlePrefix = createIssueConfig.title_prefix ?? ""; + /** @type {Map>} */ + const seenIssueTitlesByRepo = new Map(); + /** * Handler for upload_asset tool * Spec cross-reference: not part of the numbered outcome types in Safe Output Outcome Evaluation v1.0.0. @@ -940,6 +953,77 @@ function createHandlers(server, appendSafeOutput, config = {}) { }; }; + /** + * Handler for create_issue tool + * Applies title-based within-run deduplication for immediate feedback. + */ + const createIssueHandler = args => { + const entry = { ...(args || {}), type: "create_issue" }; + + const { defaultTargetRepo, allowedRepos } = resolveTargetRepoConfig(createIssueConfig); + const repoResult = resolveAndValidateRepo(entry, defaultTargetRepo, allowedRepos, "issue"); + if (!repoResult.success) { + return { + content: [ + { + type: "text", + text: JSON.stringify({ + result: "error", + error: repoResult.error, + }), + }, + ], + isError: true, + }; + } + const resolvedRepo = repoResult.repo; + + let resolvedTitle = entry.title?.trim() || ""; + if (!resolvedTitle) { + resolvedTitle = entry.body?.trim() || "Agent Output"; + } + resolvedTitle = applyTitlePrefix(sanitizeTitle(resolvedTitle, createIssueTitlePrefix), createIssueTitlePrefix); + + if (deduplicateByTitle.enabled) { + const normalizedTitle = normalizeTitleForDedup(resolvedTitle); + const seenTitles = seenIssueTitlesByRepo.get(resolvedRepo) || []; + const duplicate = findDuplicateByTitle(normalizedTitle, seenTitles, deduplicateByTitle.maxDistance); + if (duplicate) { + const droppedEntry = { + ...entry, + _dropped_duplicate_by_title: true, + _dedup_source: "mcp-within-run", + _duplicate_title: duplicate.title, + _duplicate_distance: duplicate.distance, + }; + appendSafeOutput(droppedEntry); + return { + content: [ + { + type: "text", + text: JSON.stringify({ + result: "duplicate_dropped", + reason: `Duplicate create_issue title matched "${duplicate.title}" (distance=${duplicate.distance})`, + }), + }, + ], + }; + } + seenTitles.push({ title: resolvedTitle, normalizedTitle }); + seenIssueTitlesByRepo.set(resolvedRepo, seenTitles); + } + + appendSafeOutput(entry); + return { + content: [ + { + type: "text", + text: JSON.stringify({ result: "success" }), + }, + ], + }; + }; + /** * Handler for create_project tool * Spec cross-reference: not part of the numbered outcome types in Safe Output Outcome Evaluation v1.0.0. @@ -1142,6 +1226,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { createPullRequestHandler, pushToPullRequestBranchHandler, pushRepoMemoryHandler, + createIssueHandler, createProjectHandler, addCommentHandler, }; diff --git a/actions/setup/js/safe_outputs_handlers.test.cjs b/actions/setup/js/safe_outputs_handlers.test.cjs index 0426d25b322..8402af15169 100644 --- a/actions/setup/js/safe_outputs_handlers.test.cjs +++ b/actions/setup/js/safe_outputs_handlers.test.cjs @@ -44,8 +44,6 @@ describe("safe_outputs_handlers", () => { mockAppendSafeOutput = vi.fn(); - handlers = createHandlers(mockServer, mockAppendSafeOutput); - // Create temporary workspace directory const testId = Math.random().toString(36).substring(7); testWorkspaceDir = `/tmp/test-handlers-workspace-${testId}`; @@ -55,6 +53,9 @@ describe("safe_outputs_handlers", () => { process.env.GITHUB_WORKSPACE = testWorkspaceDir; process.env.GITHUB_SERVER_URL = "https://github.com"; process.env.GITHUB_REPOSITORY = "owner/repo"; + process.env.GH_AW_WORKFLOW_ID = "test-workflow"; + + handlers = createHandlers(mockServer, mockAppendSafeOutput); }); afterEach(() => { @@ -71,6 +72,7 @@ describe("safe_outputs_handlers", () => { delete process.env.GITHUB_WORKSPACE; delete process.env.GITHUB_SERVER_URL; delete process.env.GITHUB_REPOSITORY; + delete process.env.GH_AW_WORKFLOW_ID; delete process.env.GH_AW_ASSETS_BRANCH; delete process.env.GH_AW_ASSETS_MAX_SIZE_KB; delete process.env.GH_AW_ASSETS_ALLOWED_EXTS; @@ -1104,6 +1106,7 @@ describe("safe_outputs_handlers", () => { expect(handlers.createPullRequestHandler).toBeDefined(); expect(handlers.pushToPullRequestBranchHandler).toBeDefined(); expect(handlers.pushRepoMemoryHandler).toBeDefined(); + expect(handlers.createIssueHandler).toBeDefined(); expect(handlers.addCommentHandler).toBeDefined(); }); @@ -1164,6 +1167,63 @@ describe("safe_outputs_handlers", () => { }); }); + describe("createIssueHandler", () => { + it("should append create_issue entry when dedup is disabled", () => { + handlers.createIssueHandler({ title: "Issue A", body: "Body A" }); + handlers.createIssueHandler({ title: "Issue A", body: "Body A again" }); + + expect(mockAppendSafeOutput).toHaveBeenCalledTimes(2); + const first = mockAppendSafeOutput.mock.calls[0][0]; + const second = mockAppendSafeOutput.mock.calls[1][0]; + expect(first.type).toBe("create_issue"); + expect(second.type).toBe("create_issue"); + expect(second._dropped_duplicate_by_title).toBeUndefined(); + }); + + it("should drop duplicate create_issue titles in MCP pre-check when enabled", () => { + const h = createHandlers(mockServer, mockAppendSafeOutput, { + create_issue: { + deduplicate_by_title: true, + }, + }); + + const first = h.createIssueHandler({ title: "Duplicate Issue", body: "First body" }); + const second = h.createIssueHandler({ title: "Duplicate Issue", body: "Second body" }); + + const firstResponse = JSON.parse(first.content[0].text); + const secondResponse = JSON.parse(second.content[0].text); + expect(firstResponse.result).toBe("success"); + expect(secondResponse.result).toBe("duplicate_dropped"); + const droppedEntry = mockAppendSafeOutput.mock.calls[1][0]; + expect(droppedEntry._dropped_duplicate_by_title).toBe(true); + expect(droppedEntry._duplicate_distance).toBe(0); + }); + + it("should support Levenshtein distance threshold in MCP pre-check", () => { + const h = createHandlers(mockServer, mockAppendSafeOutput, { + create_issue: { + deduplicate_by_title: 1, + }, + }); + + h.createIssueHandler({ title: "Fix login bug", body: "A" }); + const second = h.createIssueHandler({ title: "Fix login bag", body: "B" }); + const secondResponse = JSON.parse(second.content[0].text); + + expect(secondResponse.result).toBe("duplicate_dropped"); + }); + + it("should reject invalid deduplicate-by-title configuration", () => { + expect(() => + createHandlers(mockServer, mockAppendSafeOutput, { + create_issue: { + deduplicate_by_title: "invalid", + }, + }) + ).toThrow("deduplicate-by-title"); + }); + }); + describe("pushRepoMemoryHandler", () => { let memoryDir; diff --git a/actions/setup/js/safe_outputs_tools_loader.cjs b/actions/setup/js/safe_outputs_tools_loader.cjs index b13e940cec9..38e07d08322 100644 --- a/actions/setup/js/safe_outputs_tools_loader.cjs +++ b/actions/setup/js/safe_outputs_tools_loader.cjs @@ -72,6 +72,7 @@ function loadTools(server) { */ function attachHandlers(tools, handlers) { const handlerMap = { + create_issue: handlers.createIssueHandler, create_pull_request: handlers.createPullRequestHandler, push_to_pull_request_branch: handlers.pushToPullRequestBranchHandler, push_repo_memory: handlers.pushRepoMemoryHandler, diff --git a/actions/setup/js/safe_outputs_tools_loader.test.cjs b/actions/setup/js/safe_outputs_tools_loader.test.cjs index dcee56913b5..cfa4a674544 100644 --- a/actions/setup/js/safe_outputs_tools_loader.test.cjs +++ b/actions/setup/js/safe_outputs_tools_loader.test.cjs @@ -108,6 +108,20 @@ describe("safe_outputs_tools_loader", () => { expect(result[1].handler).toBeUndefined(); }); + it("should attach create_issue handler", () => { + const tools = [{ name: "create_issue", description: "Create issue" }]; + const handlers = { + createIssueHandler: vi.fn(), + createPullRequestHandler: vi.fn(), + pushToPullRequestBranchHandler: vi.fn(), + uploadAssetHandler: vi.fn(), + }; + + const result = attachHandlers(tools, handlers); + + expect(result[0].handler).toBe(handlers.createIssueHandler); + }); + it("should attach push_to_pull_request_branch handler", () => { const tools = [{ name: "push_to_pull_request_branch", description: "Push to PR" }]; const handlers = { diff --git a/actions/setup/js/types/safe-outputs-config.d.ts b/actions/setup/js/types/safe-outputs-config.d.ts index 4ff3a87b84e..a515959510b 100644 --- a/actions/setup/js/types/safe-outputs-config.d.ts +++ b/actions/setup/js/types/safe-outputs-config.d.ts @@ -13,6 +13,7 @@ interface SafeOutputConfig { */ interface CreateIssueConfig extends SafeOutputConfig { "title-prefix"?: string; + "deduplicate-by-title"?: boolean | number; labels?: string[]; "target-repo"?: string; "allowed-repos"?: string[]; diff --git a/pkg/parser/schemas/main_workflow_schema.json b/pkg/parser/schemas/main_workflow_schema.json index a1c1c607e4d..19b9f93ba2c 100644 --- a/pkg/parser/schemas/main_workflow_schema.json +++ b/pkg/parser/schemas/main_workflow_schema.json @@ -4948,6 +4948,19 @@ } ] }, + "deduplicate-by-title": { + "description": "Title-based deduplication for create-issue. Set to true for exact title matching, or provide a non-negative integer to deduplicate by Levenshtein edit distance (e.g., 1 allows one-character differences). Applies within-run and against open/recently-closed repository issues.", + "oneOf": [ + { + "type": "boolean" + }, + { + "type": "integer", + "minimum": 0, + "maximum": 100 + } + ] + }, "target-repo": { "type": "string", "description": "Target repository in format 'owner/repo' for cross-repository issue creation. Takes precedence over trial target repo settings."