WIP: checkpoint before stage-3 sub-stage rewrite
This commit is contained in:
parent
73fb12ac35
commit
04a581efe1
8 changed files with 829 additions and 343 deletions
BIN
data-pipeline/db/pipeline.db-shm
Normal file
BIN
data-pipeline/db/pipeline.db-shm
Normal file
Binary file not shown.
0
data-pipeline/db/pipeline.db-wal
Normal file
0
data-pipeline/db/pipeline.db-wal
Normal file
41
data-pipeline/db/reset.ts
Normal file
41
data-pipeline/db/reset.ts
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import Database from "better-sqlite3";
|
||||
|
||||
// ── Paths ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const DB_PATH = path.join(__dirname, "pipeline.db");
|
||||
|
||||
// ── Main ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
function main(): void {
|
||||
const mode = process.argv[2];
|
||||
|
||||
if (!mode || (mode !== "round1" && mode !== "all")) {
|
||||
console.error("Usage: pnpm db:reset round1 | all");
|
||||
console.error(" round1 — delete all round1 sub-stage rows");
|
||||
console.error(" all — delete all run_status rows except reverse_link");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const db = new Database(DB_PATH);
|
||||
|
||||
let result: { changes: number };
|
||||
|
||||
if (mode === "round1") {
|
||||
result = db
|
||||
.prepare("DELETE FROM run_status WHERE stage LIKE 'round1%'")
|
||||
.run();
|
||||
console.log(`Deleted ${result.changes} round1 rows from run_status`);
|
||||
} else {
|
||||
result = db
|
||||
.prepare("DELETE FROM run_status WHERE stage NOT IN ('reverse_link')")
|
||||
.run();
|
||||
console.log(`Deleted ${result.changes} rows from run_status`);
|
||||
}
|
||||
|
||||
db.close();
|
||||
}
|
||||
|
||||
main();
|
||||
|
|
@ -4,11 +4,11 @@
|
|||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"db:reset": "tsx db/reset.ts",
|
||||
"extract": "tsx stage-1-extract/scripts/extract.ts",
|
||||
"reverse-link": "tsx stage-2-reverse-link/scripts/reverse-link.ts",
|
||||
"db:import": "tsx db/import.ts",
|
||||
"db:init": "tsx db/init.ts",
|
||||
"annotate": "tsx stage-2-annotate/scripts/annotate.ts",
|
||||
"test": "vitest run",
|
||||
"test:watch": "vitest",
|
||||
"pipeline:run": "tsx --env-file .env pipeline.ts"
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ async function checkProviderReady(provider: ProviderConfig): Promise<void> {
|
|||
async function generateRunName(): Promise<string> {
|
||||
await fs.mkdir(PATHS.reports, { recursive: true });
|
||||
|
||||
const date = new Date().toISOString().slice(0, 10);
|
||||
const date = new Date().toISOString().exi(0, 10);
|
||||
const files = await fs.readdir(PATHS.reports);
|
||||
const todaysRuns = files.filter(
|
||||
(f) => f.startsWith(date) && f.endsWith(".json"),
|
||||
|
|
@ -175,7 +175,8 @@ function getModelRound1Status(modelName: string): StageStatus {
|
|||
db
|
||||
.prepare(
|
||||
`SELECT COUNT(*) as count FROM run_status
|
||||
WHERE model_name = ? AND stage = 'round1' AND status = 'complete'`,
|
||||
WHERE model_name = ? AND stage = 'round1_gloss'
|
||||
AND status = 'complete'`,
|
||||
)
|
||||
.get(modelName) as { count: number }
|
||||
).count;
|
||||
|
|
@ -602,9 +603,9 @@ async function main(): Promise<void> {
|
|||
runCompare();
|
||||
}
|
||||
|
||||
// ── Report
|
||||
stats.stoppedAt = new Date();
|
||||
await generateReport(runName, stats);
|
||||
// ── Report (disabled until full pipeline is implemented)
|
||||
// stats.stoppedAt = new Date();
|
||||
// await generateReport(runName, stats);
|
||||
|
||||
console.log("\nPipeline complete.");
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue