From 4fa307341289947c2d3b79146fc42b1a8b12a9a5 Mon Sep 17 00:00:00 2001 From: lila Date: Sun, 3 May 2026 17:56:29 +0200 Subject: [PATCH] feat: add db schema, init, and vitest config --- data-pipeline/db/index.ts | 24 +++ data-pipeline/db/init.ts | 39 +++++ data-pipeline/db/pipeline.db | Bin 0 -> 139264 bytes data-pipeline/db/schema.sql | 157 ++++++++++++++++++ data-pipeline/package.json | 9 +- .../{test => sample}/output/sample.json | 0 .../{test => sample}/scripts/sample.ts | 2 +- data-pipeline/tsconfig.json | 2 +- data-pipeline/vitest.config.ts | 10 ++ documentation/data-pipeline.md | 5 + eslint.config.mjs | 1 - package.json | 2 +- pnpm-lock.yaml | 5 +- 13 files changed, 248 insertions(+), 8 deletions(-) create mode 100644 data-pipeline/db/index.ts create mode 100644 data-pipeline/db/init.ts create mode 100644 data-pipeline/db/pipeline.db create mode 100644 data-pipeline/db/schema.sql rename data-pipeline/{test => sample}/output/sample.json (100%) rename data-pipeline/{test => sample}/scripts/sample.ts (99%) create mode 100644 data-pipeline/vitest.config.ts diff --git a/data-pipeline/db/index.ts b/data-pipeline/db/index.ts new file mode 100644 index 0000000..f0ce57d --- /dev/null +++ b/data-pipeline/db/index.ts @@ -0,0 +1,24 @@ +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import Database from "better-sqlite3"; + +// ── Paths ───────────────────────────────────────────────────────────────────── + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +const DB_PATH = path.join(__dirname, "pipeline.db"); + +// ── Types ───────────────────────────────────────────────────────────────────── + +export type Db = InstanceType; + +// ── Open ────────────────────────────────────────────────────────────────────── + +export function openDb(): Db { + const db = new Database(DB_PATH); + + db.pragma("journal_mode = WAL"); + db.pragma("foreign_keys = ON"); + + return db; +} diff --git a/data-pipeline/db/init.ts b/data-pipeline/db/init.ts new file mode 100644 index 0000000..f85d213 --- /dev/null +++ b/data-pipeline/db/init.ts @@ -0,0 +1,39 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import Database from "better-sqlite3"; + +// ── Paths ───────────────────────────────────────────────────────────────────── + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +const PATHS = { + schema: path.join(__dirname, "schema.sql"), + db: path.join(__dirname, "pipeline.db"), +}; + +// ── Init ────────────────────────────────────────────────────────────────────── + +export async function initDb(): Promise { + const schema = await fs.readFile(PATHS.schema, "utf-8"); + const db = new Database(PATHS.db); + + db.pragma("journal_mode = WAL"); + db.pragma("foreign_keys = ON"); + db.exec(schema); + db.close(); + + console.log(` pipeline.db initialised → ${PATHS.db}`); +} + +// ── Main ───────────────────────────────────────────────────────────────────── + +async function main(): Promise { + console.log("Initialising pipeline.db..."); + await initDb(); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/data-pipeline/db/pipeline.db b/data-pipeline/db/pipeline.db new file mode 100644 index 0000000000000000000000000000000000000000..f8dd7fc45fe0fbe238c43d72a367d6e9971c066d GIT binary patch literal 139264 zcmeI(?Qh%09l&wX52P(WCC&qzxT)!MMy%FeAmoLc8bjy0N@mz`TSZ12UugyE`6v_1$;3OLfb*U#;!u>&A`5nZ(dg;@5_eNF;`we^;G<;&aydF(NjcZzb?M z?&nbAmw)`nsGE7_TQ|?#%(pWyPh}?y6R%7f;~yr!*1jK~SD$EKjC`lQK5UJAK6GjL za^g$ntC?@&4jE2wY8NzpZB1z{*7F~gE$eZ9|DbG{g?y!0D(35!`LJ5I?3m<>8`;cy zE@R}@-`vg^F`0~|q+yhbhWKpmv7T=AHHIJ!30#Z`sKqN;^44j-9u2#?J2c zwvo-;%49P;8=1Sto>j4G&UF<{zkt0|DyG~6>{p9c*{tOEEhCqCKj)rK9KhYZyR-T3 zZpK*Zo@Y5wB$Z6vxv1*-H6>B16s^bhgR&D=GvBCJy=`$e8OBVD<4I!r;iu;`eRWl7 z&3UkU<*IGVr6Uv1hfpK4^hw$->(Tc|>J_k_R#cou^4vVH>L0Aec}CXDOh+bPXx=`n z={IjGN2#`DrwtS>yHG0~)JxTh-EA4c)Y-6RWXV1`?P{Y|uuRueZeYO{YJXC(t-8I` z$=*-vEaxkGjr^YFG+O!CZsV<6kL!N_iP|mNcITAAP%Inb;7%jonpJi4W^7O*d7P+3 zrk`uB&1iZ$tsE^#VG&)kTTY~$i3x|;{~U#4m$q;Kd zT?Q3Q5@u)6?5$=H@&^s;&ZT-|dJJK^Cb7bWn&r)OoQb6zBjc^it@YjQoU!Ci((0vs zYw230`siB9yV*1j+|;7^)CEOMJ0`(!E*oBToT%PNs`~APxTtm`V5YmjCk}1R8>(6t zyhwU2%s-De$LvWM3&pi(w;W!VF($;}Cfdo>Z-4p0ox!16A3JW8>8X^N?nxRyEUTJ+ z<%-gJ)q@jbzxdWdOd?g%iGk_FxwnA@=G@UuU(70cyG?k;A~#)kZ~MG%#N@rn)Zx~s zrq9hO&3&)B{#fgOsiDgHi9g@ECR5GCu%<6ADyJ{AcQ&;X$wdOy& z$)O59RdpItcec4~xJ8{VusWjZTZ?g{MX;`BI=D6ZvEns^!xfLtZ-xHXPzcOV^sG9U z(r#PQXn#<(-Dc-}yy}P{AemYjQuXz@xU2L}V5a>a!>X=257L*XrrgKrGvQ}ywh%x7 z0R#|0009ILKmY**5I|tC1d?ubc>W)(pi&zI5I_I{1Q0*~0R#|0009If0oMN|5H=A& z009ILKmY**5I_I{1P~ZN0oMNqFsM`s0R#|0009ILKmY**5I_I{Nr3f#34~1q5I_I{ z1Q0*~0R#|0009IBP=M$E0SqctLI42-5I_I{1Q0*~0R#|0Koa2jUjkth0R#|0009IL zKmY**5I_Kd0Tkfx{|7LrR0#nD5I_I{1Q0*~0R#|000BvW^?wP3O#~1?009ILKmY** z5I_I{1O`xm_5T43Dpf)N0R#|0009ILKmY**5I{f@VEtbLVG{ua5I_I{1Q0*~0R#|0 z0D%D%VEumpgG!YUKmY**5I_I{1Q0*~0R#|`1X%x}EUKmY**5I_I{1Q0*~0ZD-Me+h(51Q0*~0R#|0009ILKmY**22gOQOB+g}R4H1I?FZ#j-7@oydez%D ziv#AkCSm${wE8t-H`@2qF<8C#iq%T5ka+Q}($?7W>bc6PV7jcn#t zCY#yW$lNvdtcq20>Q*%U0`^j=m~s!WUoBc?vy$Jpj9ljZoO?QP0C)H9&gQ$j8Dpt? zp5;K1R5Inn>96^(`Z|c_{>bSMEl@PneDgfWP}W@}K!8v9WvoWPe{K$nPi3 zH0LKYePu;yJ@E9~cc6lGzt+C<%G;A6dwH)+@Ah{wY)kjbdM#hE%X#l^f5QDTkTq&t zcTW;EqIi`kTMwaK^^q$m5JRZ%nHC-IX|GnZQiTDO0W4aZO)XP(G`A z#I;7nwCnkL!|wh{hFH7lGH48DXVC1e=8qoMolEt`^ccc+O=5)$HOrgnmabK*kFKS>n@!`uO)Z*FT~NfdV-gJKvf)+7iRz7{s^4CSi)uFl zX1e=(;?UN-p{jMki=@}W{PTG8)SiT~P+WU<%i(nyV?rEmqMck~dKe6IAZw?;{NT>u zP_2)tdi*HUQzU$-el@=7Z;V*hF6H{aza#du8-b>fcBHFg!c* zn?4@-$%4Qs7iivi$$Rl}H0pKRnq^nZ51q+K@Toj9aUs;DBeVR}gJJhMJZdnEzpI@v zR62v?=De!Mzb%QxaAsFgwpW|lmG(sISK$DNsX?cdNHC(|JG;X#@J`WGqWO!t_Vd~A z!V>zZC88v!7+St?S3My*FMw?jcQ93N$-kX1;m#Qu|$JA?zHyn|BTqOnfm~ zJeWo5^4_gGrU?d$xo_!2E^&{42Gjgv>Hhsvp;4}X(w#y$zr-1W-8_6TZV$iQR}T-A zb6(v?s$FsJ5Z>hD|FtG?P1p2|4W+f~g)=a4%P+8x+4Vvww8vz8iU0?XazcQ+FTMhW zI!|=%*9_iTfg8xE!CX8F2a5N;-nes7)t}*-0gEx-`iOyLbL9#XO^>m}m3;pCy>smEO*afbdI9@!rZAW!9&UdpXy=h4}NFs3idY{{NIe zu5$SZAbRi%5?K&H009ILKmY**5I_I{1Q0mo0<8a^@&I!A z2q1s}0tg_000IagfB*sr^dZ3We;-I>K>z^+5I_I{1Q0*~0R#|0;FJsS|No!z0CM>V WAb { for (const [l, examples] of Object.entries(record.examples)) { const lang = l as SupportedLanguageCode; if (!base.examples[lang]) { - base.examples[lang] = examples as Example[]; + base.examples[lang] = examples; } } } diff --git a/data-pipeline/tsconfig.json b/data-pipeline/tsconfig.json index 7752b6c..5273064 100644 --- a/data-pipeline/tsconfig.json +++ b/data-pipeline/tsconfig.json @@ -8,5 +8,5 @@ "types": ["node"] }, "references": [{ "path": "../packages/shared" }], - "include": ["./**/*"] + "include": ["./**/*", "vitest.config.ts"] } diff --git a/data-pipeline/vitest.config.ts b/data-pipeline/vitest.config.ts new file mode 100644 index 0000000..bdc68ba --- /dev/null +++ b/data-pipeline/vitest.config.ts @@ -0,0 +1,10 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + environment: "node", + globals: true, + include: ["tests/**/*.test.ts"], + exclude: ["**/dist/**", "**/node_modules/**"], + }, +}); diff --git a/documentation/data-pipeline.md b/documentation/data-pipeline.md index bdb5971..543bca7 100644 --- a/documentation/data-pipeline.md +++ b/documentation/data-pipeline.md @@ -528,6 +528,7 @@ llama.cpp is not installed. **Next action:** Write the round 1 generation script. +- [ ] Write tests for stage 3 - [ ] Write round 1 script (generation) - [ ] Write compile-candidates script - [ ] Write round 2 script (voting) @@ -542,24 +543,28 @@ llama.cpp is not installed. ### Stage 4 — Merge `🔲 not started` +- [ ] Write tests for stage 3 - [ ] Write merge script - [ ] Run merge → `pipeline.db` - [ ] Confirm tiebreaker resolves all flagged translations ### Stage 4b — Tiebreak `🔲 not started` +- [ ] Write tests for stage 3 - [ ] Write tiebreak logic - [ ] Run tiebreaker for all flagged translations - [ ] Confirm no flagged translations remain before seeding ### Stage 5 — Compare / QA `🔲 not started` +- [ ] Write tests for stage 3 - [ ] Write compare script - [ ] Run compare → `COVERAGE.md` - [ ] Review output quality before seeding ### Stage 6 — Sync `🔲 not started` +- [ ] Write tests for stage 3 - [ ] Write sync script - [ ] Configure `DATABASE_URL` in `.env` - [ ] Run sync → production PostgreSQL diff --git a/eslint.config.mjs b/eslint.config.mjs index a88b6f1..386ffe9 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -12,7 +12,6 @@ export default defineConfig([ "node_modules/", "routeTree.gen.ts", "scripts/**", - "data-pipeline/**/*", ]), eslint.configs.recommended, diff --git a/package.json b/package.json index 6a30e1d..3766166 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,7 @@ "prettier --write" ] }, - "packageManager": "pnpm@10.33.1", + "packageManager": "pnpm@10.33.2", "devDependencies": { "@eslint/js": "^10.0.1", "@tanstack/eslint-plugin-router": "^1.161.6", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4453586..6d416f3 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -173,6 +173,9 @@ importers: typescript: specifier: ^5.9.3 version: 5.9.3 + vitest: + specifier: ^4.1.0 + version: 4.1.0(@opentelemetry/api@1.9.1)(@types/node@24.12.0)(jsdom@29.0.1(@noble/hashes@2.2.0))(vite@8.0.1(@types/node@24.12.0)(esbuild@0.27.4)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3)) packages/db: dependencies: @@ -4391,7 +4394,6 @@ snapshots: magic-string: 0.30.21 optionalDependencies: vite: 8.0.1(@types/node@24.12.0)(esbuild@0.27.4)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3) - optional: true '@vitest/mocker@4.1.0(vite@8.0.1(@types/node@25.5.0)(esbuild@0.27.4)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3))': dependencies: @@ -6136,7 +6138,6 @@ snapshots: jsdom: 29.0.1(@noble/hashes@2.2.0) transitivePeerDependencies: - msw - optional: true vitest@4.1.0(@opentelemetry/api@1.9.1)(@types/node@25.5.0)(jsdom@29.0.1(@noble/hashes@2.2.0))(vite@8.0.1(@types/node@25.5.0)(esbuild@0.27.4)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3)): dependencies: