Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/apify/spotify/fetchSpotifyAlbumPlayCounts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import apifyClient from "@/lib/apify/client";
const PLAY_COUNT_ACTOR = "beatanalytics~spotify-play-count-scraper";

export type SpotifyAlbumPlayCounts = {
id?: string;
name?: string;
label?: string;
copyright?: string;
Expand Down
77 changes: 77 additions & 0 deletions lib/research/playcounts/__tests__/mapUnmappedAlbumTracks.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import { describe, it, expect, vi, beforeEach } from "vitest";
import { mapUnmappedAlbumTracks } from "../mapUnmappedAlbumTracks";

import generateAccessToken from "@/lib/spotify/generateAccessToken";
import getTracks from "@/lib/spotify/getTracks";
import { upsertSongs } from "@/lib/supabase/songs/upsertSongs";
import { upsertSongIdentifiers } from "@/lib/supabase/song_identifiers/upsertSongIdentifiers";

vi.mock("@/lib/spotify/generateAccessToken", () => ({ default: vi.fn() }));
vi.mock("@/lib/spotify/getTracks", () => ({ default: vi.fn() }));
vi.mock("@/lib/supabase/songs/upsertSongs", () => ({ upsertSongs: vi.fn() }));
vi.mock("@/lib/supabase/song_identifiers/upsertSongIdentifiers", () => ({
upsertSongIdentifiers: vi.fn(),
}));

const ALBUMS = [
{
id: "album_1",
name: "K.I.D.S. (Deluxe)",
tracks: [
{ id: "t_mapped", name: "The Spins", streamCount: 1 },
{ id: "t_new", name: "Nikes on My Feet", streamCount: 2 },
{ id: "t_noisrc", name: "Interlude", streamCount: 3 },
],
},
];

describe("mapUnmappedAlbumTracks", () => {
beforeEach(() => {
vi.clearAllMocks();
vi.mocked(generateAccessToken).mockResolvedValue({ access_token: "tok" } as never);
vi.mocked(upsertSongs).mockResolvedValue([] as never);
});

it("resolves ISRCs for unmapped tracks, upserts songs + identifiers, returns new mappings", async () => {
vi.mocked(getTracks).mockResolvedValue({
tracks: [
{ id: "t_new", name: "Nikes on My Feet", external_ids: { isrc: "ISRC_NIKES" } },
{ id: "t_noisrc", name: "Interlude", external_ids: {} },
],
error: null,
} as never);

const mapped = await mapUnmappedAlbumTracks(ALBUMS, new Set(["t_mapped"]));

expect(getTracks).toHaveBeenCalledWith({ ids: ["t_new", "t_noisrc"], accessToken: "tok" });
expect(upsertSongs).toHaveBeenCalledWith([
{ isrc: "ISRC_NIKES", name: "Nikes on My Feet", album: "K.I.D.S. (Deluxe)" },
]);
expect(upsertSongIdentifiers).toHaveBeenCalledWith([
{ song: "ISRC_NIKES", platform: "spotify", identifier_type: "track_id", value: "t_new" },
{ song: "ISRC_NIKES", platform: "spotify", identifier_type: "album_id", value: "album_1" },
]);
expect([...mapped.entries()]).toEqual([["t_new", "ISRC_NIKES"]]);
});

it("returns an empty map without Spotify calls when everything is mapped", async () => {
const mapped = await mapUnmappedAlbumTracks(ALBUMS, new Set(["t_mapped", "t_new", "t_noisrc"]));

expect(generateAccessToken).not.toHaveBeenCalled();
expect(mapped.size).toBe(0);
});

it("degrades to an empty map (no throw) when Spotify auth fails — capture proceeds for already-mapped tracks", async () => {
const consoleError = vi.spyOn(console, "error").mockImplementation(() => {});
vi.mocked(generateAccessToken).mockResolvedValue({
access_token: null,
error: new Error("down"),
} as never);

const mapped = await mapUnmappedAlbumTracks(ALBUMS, new Set());

expect(mapped.size).toBe(0);
expect(consoleError).toHaveBeenCalled();
consoleError.mockRestore();
});
});
34 changes: 34 additions & 0 deletions lib/research/playcounts/__tests__/writeAlbumPlayCounts.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,23 @@ import { writeAlbumPlayCounts } from "../writeAlbumPlayCounts";

import { selectSongIdentifiers } from "@/lib/supabase/song_identifiers/selectSongIdentifiers";
import { upsertSongMeasurements } from "@/lib/supabase/song_measurements/upsertSongMeasurements";
import { mapUnmappedAlbumTracks } from "../mapUnmappedAlbumTracks";
import { upsertSongIdentifiers } from "@/lib/supabase/song_identifiers/upsertSongIdentifiers";

vi.mock("@/lib/supabase/song_identifiers/selectSongIdentifiers", () => ({
selectSongIdentifiers: vi.fn(),
}));
vi.mock("@/lib/supabase/song_measurements/upsertSongMeasurements", () => ({
upsertSongMeasurements: vi.fn(),
}));
vi.mock("../mapUnmappedAlbumTracks", () => ({ mapUnmappedAlbumTracks: vi.fn() }));
vi.mock("@/lib/supabase/song_identifiers/upsertSongIdentifiers", () => ({
upsertSongIdentifiers: vi.fn(),
}));

const ALBUMS = [
{
id: "album_1",
name: "K.I.D.S. (Deluxe)",
tracks: [
{ id: "t1", name: "The Spins", streamCount: 100 },
Expand All @@ -27,6 +34,21 @@ describe("writeAlbumPlayCounts", () => {
vi.useFakeTimers();
vi.setSystemTime(new Date("2026-06-11T12:00:00Z"));
vi.mocked(upsertSongMeasurements).mockResolvedValue([] as never);
vi.mocked(mapUnmappedAlbumTracks).mockResolvedValue(new Map());
});

it("self-maps unmapped tracks and writes their measurements too (chat#1794)", async () => {
vi.mocked(selectSongIdentifiers).mockResolvedValue([
{ song: "ISRC1", platform: "spotify", identifier_type: "track_id", value: "t1" },
]);
vi.mocked(mapUnmappedAlbumTracks).mockResolvedValue(new Map([["t_unmapped", "ISRC_NEW"]]));

const written = await writeAlbumPlayCounts(ALBUMS, "run_3", {});

expect(mapUnmappedAlbumTracks).toHaveBeenCalledWith(ALBUMS, new Set(["t1"]));
const rows = vi.mocked(upsertSongMeasurements).mock.calls[0][0];
expect(rows.map((r: { song: string }) => r.song).sort()).toEqual(["ISRC1", "ISRC_NEW"]);
expect(written).toBe(2);
});

it("writes one measurement per mapped track with run + snapshot lineage", async () => {
Expand All @@ -51,6 +73,18 @@ describe("writeAlbumPlayCounts", () => {
expect(written).toBe(1);
});

it("upserts album_id mappings for every captured track (heals pre-mapped tracks, chat#1794)", async () => {
vi.mocked(selectSongIdentifiers).mockResolvedValue([
{ song: "ISRC1", platform: "spotify", identifier_type: "track_id", value: "t1" },
]);

await writeAlbumPlayCounts(ALBUMS, "run_4", {});

expect(upsertSongIdentifiers).toHaveBeenCalledWith([
{ song: "ISRC1", platform: "spotify", identifier_type: "album_id", value: "album_1" },
]);
});

it("omits snapshot lineage when not given", async () => {
vi.mocked(selectSongIdentifiers).mockResolvedValue([
{ song: "ISRC1", platform: "spotify", identifier_type: "track_id", value: "t1" },
Expand Down
73 changes: 73 additions & 0 deletions lib/research/playcounts/mapUnmappedAlbumTracks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import generateAccessToken from "@/lib/spotify/generateAccessToken";
import getTracks from "@/lib/spotify/getTracks";
import { upsertSongs } from "@/lib/supabase/songs/upsertSongs";
import { upsertSongIdentifiers } from "@/lib/supabase/song_identifiers/upsertSongIdentifiers";
import { SpotifyAlbumPlayCounts } from "@/lib/apify/spotify/fetchSpotifyAlbumPlayCounts";

/**
* Self-mapping bootstrap (chat#1794): resolve ISRCs for actor tracks that have
* no identifier mapping yet — batch Spotify `/v1/tracks` lookup — and upsert
* the `songs` rows plus track_id/album_id mappings. Failures degrade to an
* empty map (logged): capture proceeds for already-mapped tracks and the next
* snapshot retries the rest.
*
* @param albums - Parsed actor album items (with album `id`)
* @param mappedTrackIds - Track ids that already have mappings
* @returns Newly created trackId → ISRC mappings
*/
export async function mapUnmappedAlbumTracks(

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P3: Function is 56 lines, exceeding the project's 50-line guideline for domain functions in lib/**/*.ts. Consider extracting resolveTracksFromSpotify (token + fetch) and persistResolvedMappings (upsert songs + identifiers) to improve readability and testability.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At lib/research/playcounts/mapUnmappedAlbumTracks.ts, line 18:

<comment>Function is 56 lines, exceeding the project's 50-line guideline for domain functions in `lib/**/*.ts`. Consider extracting `resolveTracksFromSpotify` (token + fetch) and `persistResolvedMappings` (upsert songs + identifiers) to improve readability and testability.</comment>

<file context>
@@ -0,0 +1,73 @@
+ * @param mappedTrackIds - Track ids that already have mappings
+ * @returns Newly created trackId → ISRC mappings
+ */
+export async function mapUnmappedAlbumTracks(
+  albums: SpotifyAlbumPlayCounts[],
+  mappedTrackIds: Set<string>,
</file context>

albums: SpotifyAlbumPlayCounts[],
mappedTrackIds: Set<string>,
): Promise<Map<string, string>> {
const unmapped = albums.flatMap(album =>
(album.tracks ?? [])
.filter(track => !mappedTrackIds.has(track.id))
.map(track => ({ trackId: track.id, albumId: album.id, albumName: album.name })),
);
if (unmapped.length === 0) return new Map();

try {
const { access_token, error: tokenError } = await generateAccessToken();
if (!access_token) throw tokenError ?? new Error("No Spotify access token");

const { tracks, error } = await getTracks({
ids: unmapped.map(u => u.trackId),
accessToken: access_token,
});
if (!tracks) throw error ?? new Error("Spotify tracks lookup failed");

const contextByTrackId = new Map(unmapped.map(u => [u.trackId, u]));
const resolved = tracks.flatMap(track => {
const isrc = track.external_ids?.isrc;
const context = contextByTrackId.get(track.id);
if (!isrc || !context) return [];
return [
{
trackId: track.id,
isrc,
name: track.name,
albumId: context.albumId,
albumName: context.albumName,
},
];
});
if (resolved.length === 0) return new Map();

await upsertSongs(
resolved.map(r => ({ isrc: r.isrc, name: r.name ?? null, album: r.albumName ?? null })),

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Potential duplicate ISRC rows are sent to upsertSongs, which can fail the bulk upsert and cause the entire self-mapping pass to degrade to no new mappings.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At lib/research/playcounts/mapUnmappedAlbumTracks.ts, line 57:

<comment>Potential duplicate ISRC rows are sent to `upsertSongs`, which can fail the bulk upsert and cause the entire self-mapping pass to degrade to no new mappings.</comment>

<file context>
@@ -0,0 +1,73 @@
+    if (resolved.length === 0) return new Map();
+
+    await upsertSongs(
+      resolved.map(r => ({ isrc: r.isrc, name: r.name ?? null, album: r.albumName ?? null })),
+    );
+    await upsertSongIdentifiers(
</file context>

);
await upsertSongIdentifiers(
resolved.flatMap(r => [
{ song: r.isrc, platform: "spotify", identifier_type: "track_id", value: r.trackId },
...(r.albumId
? [{ song: r.isrc, platform: "spotify", identifier_type: "album_id", value: r.albumId }]
: []),
]),
);

return new Map(resolved.map(r => [r.trackId, r.isrc]));
} catch (error) {
console.error("[playcounts] identifier bootstrap failed:", error);
return new Map();
}
}
Comment on lines +18 to +73

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major | 🏗️ Heavy lift

Function exceeds the 50-line guideline for domain functions.

The mapUnmappedAlbumTracks function is 56 lines, violating the domain function length guideline. Consider extracting sub-responsibilities:

  • resolveTracksFromSpotify(unmapped, accessToken) to handle token generation and track fetching
  • persistResolvedMappings(resolved) to handle song and identifier upserts

This would improve readability and testability while aligning with the Single Responsibility Principle.

As per coding guidelines: "Keep functions under 50 lines" for domain functions in lib/**/*.ts.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@lib/research/playcounts/mapUnmappedAlbumTracks.ts` around lines 18 - 73, The
function mapUnmappedAlbumTracks is over the 50-line limit—extract the Spotify
lookup and persistence responsibilities into two helpers: implement
resolveTracksFromSpotify(unmapped:
{trackId:string,albumId:string,albumName:string}[], accessToken?:string) to
handle generateAccessToken() and getTracks(...) and return the resolved array of
{trackId,isrc,name,albumId,albumName}, and implement
persistResolvedMappings(resolved) to perform upsertSongs(...) and
upsertSongIdentifiers(...). Then reduce mapUnmappedAlbumTracks to prepare
unmapped, call resolveTracksFromSpotify(unmapped) and if non-empty call
persistResolvedMappings(resolved) before returning new Map(resolved.map(r =>
[r.trackId, r.isrc])); preserve existing error handling and return types
(Map<string,string>).

Source: Coding guidelines

29 changes: 25 additions & 4 deletions lib/research/playcounts/writeAlbumPlayCounts.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
import { selectSongIdentifiers } from "@/lib/supabase/song_identifiers/selectSongIdentifiers";
import { upsertSongMeasurements } from "@/lib/supabase/song_measurements/upsertSongMeasurements";
import { mapUnmappedAlbumTracks } from "@/lib/research/playcounts/mapUnmappedAlbumTracks";
import { upsertSongIdentifiers } from "@/lib/supabase/song_identifiers/upsertSongIdentifiers";
import { SpotifyAlbumPlayCounts } from "@/lib/apify/spotify/fetchSpotifyAlbumPlayCounts";

const METRIC = "platform_displayed_play_count";
const DATA_SOURCE = "apify_spotify_playcount";

/**
* Write actor album results into the measurement store: one row per track
* with an identifier mapping, stamped with the actor run id (and snapshot id
* when capturing for a snapshot job). Shared by the on-demand stats refresh
* and the snapshot workflow.
* Write actor album results into the measurement store, self-mapping as it
* goes (chat#1794): tracks without an identifier mapping get their ISRCs
* resolved via the Spotify API and their songs/identifier rows created, so a
* capture measures every track it touches. One measurement row per resolved
* track, stamped with the actor run id (and snapshot id when capturing for a
* snapshot job). Shared by the on-demand stats refresh and the snapshot
* workflow.
*
* @param albums - Parsed actor album items
* @param runId - The actor run id (raw_ref lineage)
Expand All @@ -28,6 +33,22 @@ export async function writeAlbumPlayCounts(
values: tracks.map(t => t.id),
});
const songByTrackId = new Map(mappings.map(m => [m.value, m.song]));
const newlyMapped = await mapUnmappedAlbumTracks(albums, new Set(songByTrackId.keys()));
for (const [trackId, isrc] of newlyMapped) songByTrackId.set(trackId, isrc);

// Album mappings for every captured track (idempotent): albums map to many
// songs, and pre-mapped tracks would otherwise never get their album row.
const albumRows = albums.flatMap(album =>
!album.id
? []
: (album.tracks ?? []).flatMap(track => {
const song = songByTrackId.get(track.id);
if (!song) return [];
return [{ song, platform: "spotify", identifier_type: "album_id", value: album.id }];
}),
);
await upsertSongIdentifiers(albumRows);

const capturedAt = new Date().toISOString();

const rows = tracks.flatMap(track => {
Expand Down
42 changes: 42 additions & 0 deletions lib/spotify/__tests__/getTracks.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { describe, it, expect, vi, beforeEach } from "vitest";
import getTracks from "../getTracks";

const mockFetch = vi.fn();
global.fetch = mockFetch as never;

describe("getTracks", () => {
beforeEach(() => vi.clearAllMocks());

it("batch-fetches tracks (50 per call) and concatenates results", async () => {
const ids = Array.from({ length: 60 }, (_, i) => `t${i}`);
mockFetch.mockResolvedValue({
ok: true,
json: async () => ({ tracks: [{ id: "t0", external_ids: { isrc: "ISRC0" } }] }),
} as never);

const { tracks, error } = await getTracks({ ids, accessToken: "tok" });

expect(mockFetch).toHaveBeenCalledTimes(2);
expect(mockFetch.mock.calls[0][0]).toContain(
"ids=" + encodeURIComponent(ids.slice(0, 50).join(",")),
);
expect(error).toBeNull();
expect(tracks).toHaveLength(2);
});

it("returns an error on a failed response", async () => {
mockFetch.mockResolvedValue({ ok: false, status: 429 } as never);

const { tracks, error } = await getTracks({ ids: ["t1"], accessToken: "tok" });

expect(tracks).toBeNull();
expect(error).toBeInstanceOf(Error);
});

it("returns [] for empty input without fetching", async () => {
const { tracks } = await getTracks({ ids: [], accessToken: "tok" });

expect(mockFetch).not.toHaveBeenCalled();
expect(tracks).toEqual([]);
});
});
55 changes: 55 additions & 0 deletions lib/spotify/getTracks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { SpotifyTrack } from "@/types/spotify.types";

const BATCH_SIZE = 50;

/**
* Fetch multiple tracks by id via `GET /v1/tracks` (the only public surface
* that returns `external_ids.isrc`), batched at the API's 50-id limit.
*
* @param params.ids - Spotify track ids
* @param params.accessToken - Client-credentials access token
* @returns All fetched tracks, or an error
*/
const getTracks = async ({
ids,
accessToken,
}: {
ids: string[];
accessToken: string;
}): Promise<{ tracks: SpotifyTrack[] | null; error: Error | null }> => {
if (ids.length === 0) return { tracks: [], error: null };

try {
const tracks: SpotifyTrack[] = [];
for (let i = 0; i < ids.length; i += BATCH_SIZE) {
const batch = ids.slice(i, i + BATCH_SIZE);
const url = `https://api.spotify.com/v1/tracks?ids=${encodeURIComponent(batch.join(","))}`;
const response = await fetch(url, {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Add a timeout to this external API call. Without an AbortController signal, the fetch can hang indefinitely if Spotify is slow or unresponsive, blocking the capture pipeline. A 10-second timeout with AbortController would bound the worst case and let the catch block return a clean error.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At lib/spotify/getTracks.ts, line 27:

<comment>Add a timeout to this external API call. Without an `AbortController` signal, the fetch can hang indefinitely if Spotify is slow or unresponsive, blocking the capture pipeline. A 10-second timeout with `AbortController` would bound the worst case and let the catch block return a clean error.</comment>

<file context>
@@ -0,0 +1,55 @@
+    for (let i = 0; i < ids.length; i += BATCH_SIZE) {
+      const batch = ids.slice(i, i + BATCH_SIZE);
+      const url = `https://api.spotify.com/v1/tracks?ids=${encodeURIComponent(batch.join(","))}`;
+      const response = await fetch(url, {
+        method: "GET",
+        headers: {
</file context>

method: "GET",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${accessToken}`,
},
});
Comment on lines +27 to +33

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Add timeout to external API calls.

The fetch request to Spotify has no timeout, which means the call could hang indefinitely if Spotify is slow or unresponsive. This blocks the request thread and degrades system availability.

Consider adding a timeout using AbortController:

🛡️ Recommended fix to add timeout
 const getTracks = async ({
   ids,
   accessToken,
 }: {
   ids: string[];
   accessToken: string;
 }): Promise<{ tracks: SpotifyTrack[] | null; error: Error | null }> => {
   if (ids.length === 0) return { tracks: [], error: null };

   try {
     const tracks: SpotifyTrack[] = [];
     for (let i = 0; i < ids.length; i += BATCH_SIZE) {
       const batch = ids.slice(i, i + BATCH_SIZE);
       const url = `https://api.spotify.com/v1/tracks?ids=${encodeURIComponent(batch.join(","))}`;
+      const controller = new AbortController();
+      const timeoutId = setTimeout(() => controller.abort(), 10000); // 10s timeout
+      
       const response = await fetch(url, {
         method: "GET",
         headers: {
           "Content-Type": "application/json",
           Authorization: `Bearer ${accessToken}`,
         },
+        signal: controller.signal,
       });
+      clearTimeout(timeoutId);

       if (!response.ok) {
         return {
           tracks: null,
           error: new Error(`Spotify tracks request failed: ${response.status}`),
         };
       }

       const data = await response.json();
       tracks.push(...(data.tracks ?? []).filter(Boolean));
     }
     return { tracks, error: null };
   } catch (error) {
     console.error(error);
     return {
       tracks: null,
       error: error instanceof Error ? error : new Error("Unknown error fetching tracks"),
     };
   }
 };
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@lib/spotify/getTracks.ts` around lines 27 - 33, The fetch in getTracks
(lib/spotify/getTracks.ts) needs a timeout to avoid hanging; wrap the request
with an AbortController: create an AbortController, pass controller.signal into
fetch, set a timer (e.g., configurable DEFAULT_TIMEOUT_MS) that calls
controller.abort() after the timeout, and clear the timer on success before
returning. Ensure you handle the abort error path in getTracks (distinguish
abort errors vs other network errors) and surface a clear timeout-related error
message instead of letting the call hang.


if (!response.ok) {
return {
tracks: null,
error: new Error(`Spotify tracks request failed: ${response.status}`),
};
}

const data = await response.json();
tracks.push(...(data.tracks ?? []).filter(Boolean));
}
return { tracks, error: null };
} catch (error) {
console.error(error);
return {
tracks: null,
error: error instanceof Error ? error : new Error("Unknown error fetching tracks"),
};
}
};

export default getTracks;
Loading
Loading