-
Notifications
You must be signed in to change notification settings - Fork 733
chore: automatic repository groups parsing for openstack (CM-1100) #4002
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
039ef3d
b739c71
f10c0e5
8139289
4e024be
f272d0b
ca8c62d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,215 @@ | ||
| import CronTime from 'cron-time-generator' | ||
| import yaml from 'js-yaml' | ||
|
|
||
| import { WRITE_DB_CONFIG, getDbConnection } from '@crowd/data-access-layer/src/database' | ||
| import { pgpQx } from '@crowd/data-access-layer/src/queryExecutor' | ||
| import { getRepositoriesByUrl } from '@crowd/data-access-layer/src/repositories' | ||
| import { | ||
| createRepositoryGroup, | ||
| listRepositoryGroups, | ||
| updateRepositoryGroup, | ||
| } from '@crowd/data-access-layer/src/repositoryGroups' | ||
|
|
||
| import { IJobDefinition } from '../types' | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Config — one entry per governance YAML source we want to sync. | ||
| // Adding a second platform in the future is as simple as appending an entry. | ||
| // --------------------------------------------------------------------------- | ||
| const GOVERNANCE_SOURCES = [ | ||
| { | ||
| // OpenStack governance YAML published by the TC | ||
| yamlUrl: 'https://opendev.org/openstack/governance/raw/branch/master/reference/projects.yaml', | ||
| // Repos in the YAML are listed as "<owner>/<repo>". | ||
| // We convert them to full URLs using this prefix. | ||
| repoUrlBase: 'https://review.opendev.org/', | ||
| // ID of the insightsProject that owns these repository groups. | ||
| insightsProjectSlug: 'OpenStack', | ||
joanagmaia marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Slug lookup may silently fail due to casingMedium Severity
Additional Locations (1)Reviewed by Cursor Bugbot for commit f272d0b. Configure here. |
||
| }, | ||
| ] | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Types | ||
| // --------------------------------------------------------------------------- | ||
| type GovernanceYaml = Record< | ||
| string, | ||
| { | ||
| deliverables?: Record<string, { repos?: string[] }> | ||
| } | ||
| > | ||
|
|
||
| interface ParsedProject { | ||
| project: string | ||
| repos: string[] | ||
| } | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Helpers | ||
| // --------------------------------------------------------------------------- | ||
| async function fetchProjects(yamlUrl: string): Promise<ParsedProject[]> { | ||
| let text: string | ||
|
|
||
| try { | ||
| const response = await fetch(yamlUrl) | ||
|
|
||
| if (!response.ok) { | ||
| throw new Error(`HTTP ${response.status} ${response.statusText}`) | ||
| } | ||
| text = await response.text() | ||
| } catch (err) { | ||
| throw new Error(`Failed to fetch governance YAML from ${yamlUrl}: ${(err as Error).message}`) | ||
joanagmaia marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| let data: GovernanceYaml | ||
| try { | ||
| data = yaml.load(text) as GovernanceYaml | ||
| if (!data || typeof data !== 'object') { | ||
| throw new Error('Parsed YAML is not an object — file format may have changed') | ||
| } | ||
| } catch (err) { | ||
| throw new Error(`Failed to parse governance YAML from ${yamlUrl}: ${(err as Error).message}`) | ||
| } | ||
|
|
||
| return Object.entries(data).map(([project, info]) => ({ | ||
| project, | ||
| repos: Object.values(info?.deliverables ?? {}).flatMap((d) => d.repos ?? []), | ||
| })) | ||
joanagmaia marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| function toSlug(name: string): string { | ||
| return name | ||
| .toLowerCase() | ||
| .replace(/[^a-z0-9]+/g, '-') | ||
| .replace(/^-|-$/g, '') | ||
| } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Inconsistent slug generation risks duplicate repository groupsLow Severity The new Additional Locations (1)Reviewed by Cursor Bugbot for commit 4e024be. Configure here. |
||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Job definition | ||
| // --------------------------------------------------------------------------- | ||
| const job: IJobDefinition = { | ||
| name: 'openstack-repository-groups-sync', | ||
| // Run once a week | ||
| cronTime: CronTime.everyWeek(), | ||
| timeout: 30 * 60, // 30 minutes | ||
|
|
||
| process: async (ctx) => { | ||
| ctx.log.info('Starting OpenStack repository groups sync...') | ||
|
|
||
| const dbConnection = await getDbConnection(WRITE_DB_CONFIG(), 3, 0) | ||
| const qx = pgpQx(dbConnection) | ||
|
|
||
| for (const source of GOVERNANCE_SOURCES) { | ||
| ctx.log.debug(`Processing source: ${source.yamlUrl}`) | ||
|
|
||
| // ------------------------------------------------------------------ | ||
| // 1. Resolve the insights project | ||
| // ------------------------------------------------------------------ | ||
| const insightsProject = await qx.selectOneOrNone( | ||
| `SELECT id FROM "insightsProjects" WHERE slug = $(slug) AND "deletedAt" IS NULL`, | ||
| { slug: source.insightsProjectSlug }, | ||
| ) | ||
|
|
||
| if (!insightsProject) { | ||
| ctx.log.warn( | ||
| `Insights project with slug '${source.insightsProjectSlug}' not found — skipping source.`, | ||
| ) | ||
| continue | ||
| } | ||
|
|
||
| const insightsProjectId: string = insightsProject.id | ||
| ctx.log.debug(`Resolved insights project: ${insightsProjectId}`) | ||
|
|
||
| // ------------------------------------------------------------------ | ||
| // 2. Fetch + parse the governance YAML | ||
| // ------------------------------------------------------------------ | ||
| ctx.log.debug(`Fetching governance YAML...`) | ||
| let projects: ParsedProject[] | ||
|
|
||
| try { | ||
| projects = await fetchProjects(source.yamlUrl) | ||
| } catch (err) { | ||
| ctx.log.error({ err }, `Could not load governance YAML — skipping source`) | ||
| continue | ||
| } | ||
|
|
||
| ctx.log.info(`Parsed ${projects.length} projects from YAML`) | ||
|
|
||
| // ------------------------------------------------------------------ | ||
| // 3. Load existing repository groups so we can upsert | ||
| // ------------------------------------------------------------------ | ||
| const existingGroups = await listRepositoryGroups(qx, { insightsProjectId }) | ||
| const existingBySlug = new Map(existingGroups.map((g) => [g.slug, g])) | ||
|
|
||
| let created = 0 | ||
| let updated = 0 | ||
| let skipped = 0 | ||
|
|
||
| // ------------------------------------------------------------------ | ||
| // 4. Bulk-fetch all repo URLs that exist in the DB (single round-trip) | ||
| // ------------------------------------------------------------------ | ||
| const allCandidateUrls = projects.flatMap(({ repos }) => | ||
| repos.map((r) => `${source.repoUrlBase}${r}`), | ||
| ) | ||
| const foundRepos = await getRepositoriesByUrl(qx, allCandidateUrls) | ||
| const foundUrlSet = new Set(foundRepos.map((r) => r.url)) | ||
|
|
||
| // ------------------------------------------------------------------ | ||
| // 5. Upsert one repository group per YAML project | ||
| // ------------------------------------------------------------------ | ||
| for (const { project, repos } of projects) { | ||
| if (repos.length === 0) { | ||
| ctx.log.debug(`'${project}' has no repos in YAML — skipping`) | ||
| skipped++ | ||
| continue | ||
| } | ||
|
|
||
| const slug = toSlug(project) | ||
| const candidateUrls = repos.map((r) => `${source.repoUrlBase}${r}`) | ||
| const foundUrls = candidateUrls.filter((u) => foundUrlSet.has(u)) | ||
|
|
||
| if (foundUrls.length === 0) { | ||
| ctx.log.debug( | ||
| `'${project}': none of the ${candidateUrls.length} repo URLs exist in the repositories table — skipping`, | ||
| ) | ||
| skipped++ | ||
| continue | ||
| } | ||
|
|
||
| const missing = candidateUrls.filter((u) => !foundUrlSet.has(u)) | ||
| if (missing.length > 0) { | ||
| ctx.log.warn( | ||
| `'${project}': ${missing.length}/${candidateUrls.length} repos not found in DB` + | ||
| ` (first 5: ${missing.slice(0, 5).join(', ')}${missing.length > 5 ? '...' : ''})`, | ||
| ) | ||
| } | ||
|
|
||
| const existingGroup = existingBySlug.get(slug) | ||
|
|
||
| if (existingGroup) { | ||
| await updateRepositoryGroup(qx, existingGroup.id, { | ||
| name: project, | ||
| slug, | ||
| repositories: foundUrls, | ||
| }) | ||
| ctx.log.info(`Updated '${project}' — ${foundUrls.length} repos`) | ||
| updated++ | ||
| } else { | ||
| await createRepositoryGroup(qx, { | ||
| name: project, | ||
| slug, | ||
| insightsProjectId, | ||
| repositories: foundUrls, | ||
| }) | ||
| ctx.log.info(`Created '${project}' — ${foundUrls.length} repos`) | ||
| created++ | ||
| } | ||
| } | ||
|
|
||
| ctx.log.debug(`Source done — created: ${created}, updated: ${updated}, skipped: ${skipped}`) | ||
| } | ||
|
|
||
| ctx.log.info('OpenStack repository groups sync complete') | ||
| }, | ||
| } | ||
|
|
||
| export default job | ||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wrong URL base causes zero repository matches
High Severity
The
repoUrlBaseis set tohttps://review.opendev.org/, but the git integration service (services/apps/git_integration/src/crowdgit/services/utils.py) explicitly convertsreview.opendev.orgURLs toopendev.orgURLs before storing them. This means the database stores repo URLs withhttps://opendev.org/as the base. SincegetRepositoriesByUrldoes an exact URL match, every candidate URL generated by this job will fail to match, causing the job to silently skip all projects every week while reporting success.Reviewed by Cursor Bugbot for commit ca8c62d. Configure here.