feat: YouTube music download system with admin dashboard

Sidecar architecture: edge functions (auth + DB) → youtube-worker container
(youtubei.js for playlist metadata, yt-dlp for audio downloads).

- Edge functions: youtube-playlist, youtube-process, youtube-status (CRUD)
- youtube-worker sidecar: Node.js + yt-dlp, downloads M4A to Supabase Storage
- Admin music page: import playlists, process queue, inline genre editing
- 12 music genres with per-track assignment and import-time defaults
- DB migrations: download_jobs, download_items tables with genre column
- Deploy script and CI workflow for edge functions + sidecar
This commit is contained in:
Millian Lamiaux
2026-03-26 10:47:05 +01:00
parent 8926de58e5
commit 3d8d9efd70
21 changed files with 2426 additions and 0 deletions

218
youtube-worker/server.js Normal file
View File

@@ -0,0 +1,218 @@
import http from "node:http";
import { execFile } from "node:child_process";
import { readFile, unlink, mkdir } from "node:fs/promises";
import { promisify } from "node:util";
import { Innertube } from "youtubei.js";
import { createClient } from "@supabase/supabase-js";
const execFileAsync = promisify(execFile);
// ── Config ────────────────────────────────────────────────────
const PORT = parseInt(process.env.PORT || "3001", 10);
const SUPABASE_URL = process.env.SUPABASE_URL;
const SUPABASE_SERVICE_ROLE_KEY = process.env.SUPABASE_SERVICE_ROLE_KEY;
const STORAGE_BUCKET = process.env.STORAGE_BUCKET || "workout-audio";
// Public-facing URL for constructing browser-accessible storage URLs.
// SUPABASE_URL is the internal Docker network URL (e.g. http://kong:8000)
// which browsers cannot reach.
const SUPABASE_PUBLIC_URL = process.env.SUPABASE_PUBLIC_URL || SUPABASE_URL;
if (!SUPABASE_URL || !SUPABASE_SERVICE_ROLE_KEY) {
console.error("Missing SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY");
process.exit(1);
}
const supabase = createClient(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY);
// ── YouTube client (singleton, reused across requests) ────────
let ytClient = null;
let ytClientPromise = null;
async function getYouTubeClient() {
if (ytClient) return ytClient;
if (ytClientPromise) return ytClientPromise;
ytClientPromise = Innertube.create({
generate_session_locally: true,
retrieve_player: true,
}).then((client) => {
ytClient = client;
ytClientPromise = null;
console.log("YouTube client initialized");
return client;
});
return ytClientPromise;
}
// Pre-warm the client on startup
getYouTubeClient().catch((err) =>
console.error("Failed to pre-warm YouTube client:", err.message)
);
// ── Helpers ───────────────────────────────────────────────────
function parsePlaylistId(url) {
if (/^[A-Za-z0-9_-]+$/.test(url) && !url.includes(".")) {
return url;
}
try {
const parsed = new URL(url);
const listParam = parsed.searchParams.get("list");
if (listParam) return listParam;
} catch {}
throw new Error(`Could not parse playlist ID from: "${url}"`);
}
function readBody(req) {
return new Promise((resolve, reject) => {
const chunks = [];
req.on("data", (c) => chunks.push(c));
req.on("end", () => {
try {
resolve(JSON.parse(Buffer.concat(chunks).toString()));
} catch (e) {
reject(new Error("Invalid JSON body"));
}
});
req.on("error", reject);
});
}
function jsonResponse(res, status, data) {
res.writeHead(status, { "Content-Type": "application/json" });
res.end(JSON.stringify(data));
}
// ── Routes ────────────────────────────────────────────────────
async function handlePlaylist(req, res) {
const { playlistUrl } = await readBody(req);
if (!playlistUrl) {
return jsonResponse(res, 400, { error: "playlistUrl is required" });
}
const playlistId = parsePlaylistId(playlistUrl);
const yt = await getYouTubeClient();
const playlist = await yt.getPlaylist(playlistId);
const title = playlist.info.title ?? "Untitled Playlist";
const items = [];
function extractItems(page) {
if (!page.items) return;
for (const item of page.items) {
// Only include items that have a video ID (skip Shorts/Reels)
if (!item.id) continue;
items.push({
videoId: item.id,
title: item.title?.toString() ?? "Untitled",
durationSeconds: item.duration?.seconds ?? 0,
thumbnailUrl: item.thumbnails?.[0]?.url ?? null,
});
}
}
// First page
extractItems(playlist);
// Pagination
let page = playlist;
while (page.has_continuation) {
page = await page.getContinuation();
extractItems(page);
}
console.log(`Playlist "${title}": ${items.length} items`);
jsonResponse(res, 200, { title, items });
}
async function handleDownload(req, res) {
const { videoId, jobId } = await readBody(req);
if (!videoId || !jobId) {
return jsonResponse(res, 400, { error: "videoId and jobId are required" });
}
// Use yt-dlp for the actual download — it handles PO tokens, signature
// deciphering, and anti-bot measures that youtubei.js cannot.
const tmpDir = `/tmp/ytdl-${jobId}`;
const outPath = `${tmpDir}/${videoId}.m4a`;
await mkdir(tmpDir, { recursive: true });
try {
console.log(`Downloading ${videoId} via yt-dlp...`);
const { stderr } = await execFileAsync("yt-dlp", [
"-f", "ba[ext=m4a]/ba", // best audio in m4a, fallback to best audio
"--no-playlist",
"--no-warnings",
"-x", // extract audio
"--audio-format", "m4a", // ensure m4a output
"--audio-quality", "0", // best quality
"-o", outPath,
`https://www.youtube.com/watch?v=${videoId}`,
], { timeout: 120_000 });
if (stderr) console.warn(`yt-dlp stderr for ${videoId}: ${stderr}`);
// Read the downloaded file
const audioData = await readFile(outPath);
console.log(
`Downloaded ${videoId}: ${(audioData.length / 1024 / 1024).toFixed(1)} MB`
);
// Upload to Supabase Storage — flat structure, no subfolder
const storagePath = `${videoId}.m4a`;
const { error: uploadError } = await supabase.storage
.from(STORAGE_BUCKET)
.upload(storagePath, audioData, {
contentType: "audio/mp4",
cacheControl: "3600",
upsert: true,
});
if (uploadError) {
throw new Error(`Storage upload failed: ${uploadError.message}`);
}
// Construct public URL using the external-facing base URL, not the
// internal Docker URL that supabase.storage.getPublicUrl() would use.
const publicUrl = `${SUPABASE_PUBLIC_URL}/storage/v1/object/public/${STORAGE_BUCKET}/${storagePath}`;
jsonResponse(res, 200, { storagePath, publicUrl });
} finally {
// Clean up temp file
await unlink(outPath).catch(() => {});
}
}
async function handleHealth(_req, res) {
jsonResponse(res, 200, { status: "ok" });
}
// ── Server ────────────────────────────────────────────────────
const server = http.createServer(async (req, res) => {
try {
if (req.method === "GET" && req.url === "/health") {
return await handleHealth(req, res);
}
if (req.method === "POST" && req.url === "/playlist") {
return await handlePlaylist(req, res);
}
if (req.method === "POST" && req.url === "/download") {
return await handleDownload(req, res);
}
jsonResponse(res, 404, { error: "Not found" });
} catch (err) {
console.error(`${req.method} ${req.url} error:`, err.message);
jsonResponse(res, 500, { error: err.message });
}
});
server.listen(PORT, () => {
console.log(`youtube-worker listening on port ${PORT}`);
});