WIP: checkpoint before stage-3 sub-stage rewrite

This commit is contained in:
lila 2026-05-12 22:13:14 +02:00
parent 73fb12ac35
commit 04a581efe1
8 changed files with 829 additions and 343 deletions

Binary file not shown.

View file

41
data-pipeline/db/reset.ts Normal file
View file

@ -0,0 +1,41 @@
import path from "node:path";
import { fileURLToPath } from "node:url";
import Database from "better-sqlite3";
// ── Paths ─────────────────────────────────────────────────────────────────────
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const DB_PATH = path.join(__dirname, "pipeline.db");
// ── Main ──────────────────────────────────────────────────────────────────────
function main(): void {
const mode = process.argv[2];
if (!mode || (mode !== "round1" && mode !== "all")) {
console.error("Usage: pnpm db:reset round1 | all");
console.error(" round1 — delete all round1 sub-stage rows");
console.error(" all — delete all run_status rows except reverse_link");
process.exit(1);
}
const db = new Database(DB_PATH);
let result: { changes: number };
if (mode === "round1") {
result = db
.prepare("DELETE FROM run_status WHERE stage LIKE 'round1%'")
.run();
console.log(`Deleted ${result.changes} round1 rows from run_status`);
} else {
result = db
.prepare("DELETE FROM run_status WHERE stage NOT IN ('reverse_link')")
.run();
console.log(`Deleted ${result.changes} rows from run_status`);
}
db.close();
}
main();

View file

@ -4,11 +4,11 @@
"private": true,
"type": "module",
"scripts": {
"db:reset": "tsx db/reset.ts",
"extract": "tsx stage-1-extract/scripts/extract.ts",
"reverse-link": "tsx stage-2-reverse-link/scripts/reverse-link.ts",
"db:import": "tsx db/import.ts",
"db:init": "tsx db/init.ts",
"annotate": "tsx stage-2-annotate/scripts/annotate.ts",
"test": "vitest run",
"test:watch": "vitest",
"pipeline:run": "tsx --env-file .env pipeline.ts"

View file

@ -110,7 +110,7 @@ async function checkProviderReady(provider: ProviderConfig): Promise<void> {
async function generateRunName(): Promise<string> {
await fs.mkdir(PATHS.reports, { recursive: true });
const date = new Date().toISOString().slice(0, 10);
const date = new Date().toISOString().exi(0, 10);
const files = await fs.readdir(PATHS.reports);
const todaysRuns = files.filter(
(f) => f.startsWith(date) && f.endsWith(".json"),
@ -175,7 +175,8 @@ function getModelRound1Status(modelName: string): StageStatus {
db
.prepare(
`SELECT COUNT(*) as count FROM run_status
WHERE model_name = ? AND stage = 'round1' AND status = 'complete'`,
WHERE model_name = ? AND stage = 'round1_gloss'
AND status = 'complete'`,
)
.get(modelName) as { count: number }
).count;
@ -602,9 +603,9 @@ async function main(): Promise<void> {
runCompare();
}
// ── Report
stats.stoppedAt = new Date();
await generateReport(runName, stats);
// ── Report (disabled until full pipeline is implemented)
// stats.stoppedAt = new Date();
// await generateReport(runName, stats);
console.log("\nPipeline complete.");
}

File diff suppressed because it is too large Load diff