Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 21 additions & 10 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions services/apps/cron_service/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"@crowd/telemetry": "workspace:*",
"@crowd/redis": "workspace:*",
"@aws-sdk/client-s3": "^3.700.0",
"js-yaml": "^4.1.0",
"@dsnp/parquetjs": "^1.7.0",
"cron": "^2.1.0",
"cron-time-generator": "^1.3.0",
Expand All @@ -36,6 +37,7 @@
"typescript": "^5.6.3"
},
"devDependencies": {
"@types/js-yaml": "^4.0.9",
"@types/node": "^20.8.2",
"nodemon": "^3.0.1"
}
Expand Down
215 changes: 215 additions & 0 deletions services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
import CronTime from 'cron-time-generator'
import yaml from 'js-yaml'

import { WRITE_DB_CONFIG, getDbConnection } from '@crowd/data-access-layer/src/database'
import { pgpQx } from '@crowd/data-access-layer/src/queryExecutor'
import { getRepositoriesByUrl } from '@crowd/data-access-layer/src/repositories'
import {
createRepositoryGroup,
listRepositoryGroups,
updateRepositoryGroup,
} from '@crowd/data-access-layer/src/repositoryGroups'

import { IJobDefinition } from '../types'

// ---------------------------------------------------------------------------
// Config — one entry per governance YAML source we want to sync.
// Adding a second platform in the future is as simple as appending an entry.
// ---------------------------------------------------------------------------
const GOVERNANCE_SOURCES = [
{
// OpenStack governance YAML published by the TC
yamlUrl: 'https://opendev.org/openstack/governance/raw/branch/master/reference/projects.yaml',
// Repos in the YAML are listed as "<owner>/<repo>".
// We convert them to full URLs using this prefix.
repoUrlBase: 'https://review.opendev.org/',
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wrong URL base causes zero repository matches

High Severity

The repoUrlBase is set to https://review.opendev.org/, but the git integration service (services/apps/git_integration/src/crowdgit/services/utils.py) explicitly converts review.opendev.org URLs to opendev.org URLs before storing them. This means the database stores repo URLs with https://opendev.org/ as the base. Since getRepositoriesByUrl does an exact URL match, every candidate URL generated by this job will fail to match, causing the job to silently skip all projects every week while reporting success.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit ca8c62d. Configure here.

// ID of the insightsProject that owns these repository groups.
insightsProjectSlug: 'OpenStack',
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Slug lookup may silently fail due to casing

Medium Severity

insightsProjectSlug is set to 'OpenStack' (mixed case), but every slug generation path in the codebase produces lowercase output — generate_slug in migrations applies lower(), getCleanString calls .toLowerCase(), and even this file's own toSlug lowercases. PostgreSQL TEXT comparison is case-sensitive by default, so WHERE slug = 'OpenStack' won't match a stored slug of 'openstack'. This would cause the job to silently skip the entire sync, logging only a warning.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit f272d0b. Configure here.

},
]

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
type GovernanceYaml = Record<
string,
{
deliverables?: Record<string, { repos?: string[] }>
}
>

interface ParsedProject {
project: string
repos: string[]
}

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
async function fetchProjects(yamlUrl: string): Promise<ParsedProject[]> {
let text: string

try {
const response = await fetch(yamlUrl)

if (!response.ok) {
throw new Error(`HTTP ${response.status} ${response.statusText}`)
}
text = await response.text()
} catch (err) {
throw new Error(`Failed to fetch governance YAML from ${yamlUrl}: ${(err as Error).message}`)
}

let data: GovernanceYaml
try {
data = yaml.load(text) as GovernanceYaml
if (!data || typeof data !== 'object') {
throw new Error('Parsed YAML is not an object — file format may have changed')
}
} catch (err) {
throw new Error(`Failed to parse governance YAML from ${yamlUrl}: ${(err as Error).message}`)
}

return Object.entries(data).map(([project, info]) => ({
project,
repos: Object.values(info?.deliverables ?? {}).flatMap((d) => d.repos ?? []),
}))
}

function toSlug(name: string): string {
return name
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-|-$/g, '')
}
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inconsistent slug generation risks duplicate repository groups

Low Severity

The new toSlug function generates slugs differently than the existing getCleanString(name).replace(/\s+/g, '-') used in collectionService.ts for repository group slugs. For names with special characters like periods, toSlug converts them to hyphens while getCleanString strips them entirely (e.g., "oslo.messaging""oslo-messaging" vs "oslomessaging"). Since both the API and cron job create and match repository groups by slug, this inconsistency could produce duplicates. @crowd/common (which exports getCleanString) is already a dependency of the cron service.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 4e024be. Configure here.


// ---------------------------------------------------------------------------
// Job definition
// ---------------------------------------------------------------------------
const job: IJobDefinition = {
name: 'openstack-repository-groups-sync',
// Run once a week
cronTime: CronTime.everyWeek(),
timeout: 30 * 60, // 30 minutes

process: async (ctx) => {
ctx.log.info('Starting OpenStack repository groups sync...')

const dbConnection = await getDbConnection(WRITE_DB_CONFIG(), 3, 0)
const qx = pgpQx(dbConnection)

for (const source of GOVERNANCE_SOURCES) {
ctx.log.debug(`Processing source: ${source.yamlUrl}`)

// ------------------------------------------------------------------
// 1. Resolve the insights project
// ------------------------------------------------------------------
const insightsProject = await qx.selectOneOrNone(
`SELECT id FROM "insightsProjects" WHERE slug = $(slug) AND "deletedAt" IS NULL`,
{ slug: source.insightsProjectSlug },
)

if (!insightsProject) {
ctx.log.warn(
`Insights project with slug '${source.insightsProjectSlug}' not found — skipping source.`,
)
continue
}

const insightsProjectId: string = insightsProject.id
ctx.log.debug(`Resolved insights project: ${insightsProjectId}`)

// ------------------------------------------------------------------
// 2. Fetch + parse the governance YAML
// ------------------------------------------------------------------
ctx.log.debug(`Fetching governance YAML...`)
let projects: ParsedProject[]

try {
projects = await fetchProjects(source.yamlUrl)
} catch (err) {
ctx.log.error({ err }, `Could not load governance YAML — skipping source`)
continue
}

ctx.log.info(`Parsed ${projects.length} projects from YAML`)

// ------------------------------------------------------------------
// 3. Load existing repository groups so we can upsert
// ------------------------------------------------------------------
const existingGroups = await listRepositoryGroups(qx, { insightsProjectId })
const existingBySlug = new Map(existingGroups.map((g) => [g.slug, g]))

let created = 0
let updated = 0
let skipped = 0

// ------------------------------------------------------------------
// 4. Bulk-fetch all repo URLs that exist in the DB (single round-trip)
// ------------------------------------------------------------------
const allCandidateUrls = projects.flatMap(({ repos }) =>
repos.map((r) => `${source.repoUrlBase}${r}`),
)
const foundRepos = await getRepositoriesByUrl(qx, allCandidateUrls)
const foundUrlSet = new Set(foundRepos.map((r) => r.url))

// ------------------------------------------------------------------
// 5. Upsert one repository group per YAML project
// ------------------------------------------------------------------
for (const { project, repos } of projects) {
if (repos.length === 0) {
ctx.log.debug(`'${project}' has no repos in YAML — skipping`)
skipped++
continue
}

const slug = toSlug(project)
const candidateUrls = repos.map((r) => `${source.repoUrlBase}${r}`)
const foundUrls = candidateUrls.filter((u) => foundUrlSet.has(u))

if (foundUrls.length === 0) {
ctx.log.debug(
`'${project}': none of the ${candidateUrls.length} repo URLs exist in the repositories table — skipping`,
)
skipped++
continue
}

const missing = candidateUrls.filter((u) => !foundUrlSet.has(u))
if (missing.length > 0) {
ctx.log.warn(
`'${project}': ${missing.length}/${candidateUrls.length} repos not found in DB` +
` (first 5: ${missing.slice(0, 5).join(', ')}${missing.length > 5 ? '...' : ''})`,
)
}

const existingGroup = existingBySlug.get(slug)

if (existingGroup) {
await updateRepositoryGroup(qx, existingGroup.id, {
name: project,
slug,
repositories: foundUrls,
})
ctx.log.info(`Updated '${project}' — ${foundUrls.length} repos`)
updated++
} else {
await createRepositoryGroup(qx, {
name: project,
slug,
insightsProjectId,
repositories: foundUrls,
})
ctx.log.info(`Created '${project}' — ${foundUrls.length} repos`)
created++
}
}

ctx.log.debug(`Source done — created: ${created}, updated: ${updated}, skipped: ${skipped}`)
}

ctx.log.info('OpenStack repository groups sync complete')
},
}

export default job
Loading