feat: migrate production schema from OMW to Kaikki flat vocabulary model

- Replace terms/translations/term_glosses/term_examples with vocabulary_entries and entry_translations - Remove decks, topics and related tables (deferred) - Add cefr_level and difficulty to entry_translations for game query filtering - Update termModel.ts for new schema — getDistractors now takes sourceLanguage - Update gameService.ts and multiplayerGameService.ts for entryId rename - Update all test fixtures from termId to entryId - Generate and apply migration 0011
2026-05-05 17:39:25 +02:00 · 2026-05-05 17:39:25 +02:00 · 963bff4eb8
commit 963bff4eb8
parent 38d8b85228
10 changed files with 949 additions and 215 deletions
--- a/packages/db/src/models/termModel.ts
+++ b/packages/db/src/models/termModel.ts
@ -1,25 +1,27 @@
 import { db } from "@lila/db";
-import { eq, and, isNotNull, sql, ne } from "drizzle-orm";
-import { terms, translations, term_glosses } from "@lila/db/schema";
+import { eq, and, ne, sql, isNotNull } from "drizzle-orm";
+import { vocabulary_entries, entry_translations } from "@lila/db/schema";
 import { alias } from "drizzle-orm/pg-core";
-
 import type {
  SupportedLanguageCode,
  SupportedPos,
  DifficultyLevel,
 } from "@lila/shared";

+// ── Types ─────────────────────────────────────────────────────────────────────
+
 export type TranslationPairRow = {
-  termId: string;
+  entryId: string;
  sourceText: string;
  targetText: string;
  sourceGloss: string | null;
 };

-// Note: difficulty filter is intentionally asymmetric. We filter on the target
-// (answer) side only — a word can be A2 in Italian but B1 in English, and what
-// matters for the learner is the difficulty of the word they're being taught.
+// ── Queries ───────────────────────────────────────────────────────────────────

+// Note: difficulty filter is intentionally on the target (translation) side.
+// A word can be A2 in one language but B1 in another — what matters for the
+// learner is the difficulty of the word they are being tested on.
 export const getGameTerms = async (
  sourceLanguage: SupportedLanguageCode,
  targetLanguage: SupportedLanguageCode,
@ -27,53 +29,36 @@ export const getGameTerms = async (
  difficulty: DifficultyLevel,
  rounds: number,
 ): Promise<TranslationPairRow[]> => {
-  const sourceTranslations = alias(translations, "source_translations");
-  const targetTranslations = alias(translations, "target_translations");
+  const sourceEntries = alias(vocabulary_entries, "source_entries");
+  const targetTranslations = alias(entry_translations, "target_translations");

  const rows = await db
    .select({
-      termId: terms.id,
-      sourceText: sourceTranslations.text,
-      targetText: targetTranslations.text,
-      sourceGloss: term_glosses.text,
+      entryId: sourceEntries.id,
+      sourceText: sourceEntries.headword,
+      targetText: targetTranslations.translation,
+      sourceGloss: sourceEntries.gloss,
    })
-    .from(terms)
-    .innerJoin(
-      sourceTranslations,
-      and(
-        eq(sourceTranslations.term_id, terms.id),
-        eq(sourceTranslations.language_code, sourceLanguage), // Filter here!
-      ),
-    )
+    .from(sourceEntries)
    .innerJoin(
      targetTranslations,
      and(
-        eq(targetTranslations.term_id, terms.id),
-        eq(targetTranslations.language_code, targetLanguage), // Filter here!
-      ),
-    )
-    .leftJoin(
-      term_glosses,
-      and(
-        eq(term_glosses.term_id, terms.id),
-        eq(term_glosses.language_code, sourceLanguage),
+        eq(targetTranslations.entry_id, sourceEntries.id),
+        eq(targetTranslations.target_language_code, targetLanguage),
+        eq(targetTranslations.difficulty, difficulty),
+        isNotNull(targetTranslations.translation),
      ),
    )
    .where(
      and(
-        eq(terms.pos, pos),
-        eq(targetTranslations.difficulty, difficulty),
-        isNotNull(sourceTranslations.difficulty), // Good data quality check!
+        eq(sourceEntries.language_code, sourceLanguage),
+        eq(sourceEntries.pos, pos),
+        isNotNull(sourceEntries.difficulty),
      ),
    )
-    // TODO(post-mvp): ORDER BY RANDOM() sorts the entire filtered result set before
-    // applying LIMIT, which is fine at current data volumes (low thousands of rows
-    // after POS + difficulty filters) but degrades as the terms table grows. Once
-    // the database is fully populated and tagged, replace with one of:
-    //   - TABLESAMPLE BERNOULLI(n) for approximate sampling on large tables
-    //   - Random offset: SELECT ... OFFSET floor(random() * (SELECT count(*) ...))
-    //   - Pre-computed random column with a btree index, reshuffled periodically
-    // Benchmark first — don't optimise until it actually hurts.
+    // TODO(post-mvp): ORDER BY RANDOM() sorts the entire filtered result set
+    // before applying LIMIT, which is fine at current data volumes but degrades
+    // as the table grows. See original termModel.ts for optimisation options.
    .orderBy(sql`RANDOM()`)
    .limit(rounds);

@ -81,32 +66,33 @@ export const getGameTerms = async (
 };

 export const getDistractors = async (
-  excludeTermId: string,
+  excludeEntryId: string,
  excludeText: string,
+  sourceLanguage: SupportedLanguageCode,
  targetLanguage: SupportedLanguageCode,
  pos: SupportedPos,
  difficulty: DifficultyLevel,
  count: number,
 ): Promise<string[]> => {
  const rows = await db
-    .select({ text: translations.text })
-    .from(terms)
+    .select({ text: entry_translations.translation })
+    .from(vocabulary_entries)
    .innerJoin(
-      translations,
+      entry_translations,
      and(
-        eq(translations.term_id, terms.id),
-        eq(translations.language_code, targetLanguage),
+        eq(entry_translations.entry_id, vocabulary_entries.id),
+        eq(entry_translations.target_language_code, targetLanguage),
+        eq(entry_translations.difficulty, difficulty),
      ),
    )
    .where(
      and(
-        eq(terms.pos, pos),
-        eq(translations.difficulty, difficulty),
-        ne(terms.id, excludeTermId),
-        ne(translations.text, excludeText),
+        eq(vocabulary_entries.language_code, sourceLanguage),
+        eq(vocabulary_entries.pos, pos),
+        ne(vocabulary_entries.id, excludeEntryId),
+        ne(entry_translations.translation, excludeText),
      ),
    )
-    // TODO(post-mvp): same ORDER BY RANDOM() concern as getGameTerms — see comment there.
    .orderBy(sql`RANDOM()`)
    .limit(count);