feat(api): implement game terms query with double join

- Add double join on translations for source/target languages - Left join term_glosses for optional source-language glosses - Filter difficulty on target side only (intentionally asymmetric: a word's difficulty can differ between languages, and what matters is the difficulty of the word being learned) - Return neutral field names (sourceText, targetText, sourceGloss) instead of quiz semantics; service layer maps to prompt/answer - Tighten term_glosses unique constraint to (term_id, language_code) to prevent the left join from multiplying question rows - Add TODO for ORDER BY RANDOM() scaling post-MVP
2026-04-10 18:02:03 +02:00 · 2026-04-10 18:02:03 +02:00 · b59fac493d
commit b59fac493d
parent 9fc3ba375a
4 changed files with 356 additions and 28 deletions
--- a/packages/db/src/db/schema.ts
+++ b/packages/db/src/db/schema.ts
@ -51,11 +51,7 @@ export const term_glosses = pgTable(
    created_at: timestamp({ withTimezone: true }).defaultNow().notNull(),
  },
  (table) => [
-    unique("unique_term_gloss").on(
-      table.term_id,
-      table.language_code,
-      table.text,
-    ),
+    unique("unique_term_gloss").on(table.term_id, table.language_code),
    check(
      "language_code_check",
      sql`${table.language_code} IN (${sql.raw(SUPPORTED_LANGUAGE_CODES.map((l) => `'${l}'`).join(", "))})`,
--- a/packages/db/src/models/termModel.ts
+++ b/packages/db/src/models/termModel.ts
@ -1,6 +1,7 @@
 import { db } from "@glossa/db";
-import { eq, and } from "drizzle-orm";
-import { terms, translations } from "@glossa/db/schema";
+import { eq, and, isNotNull, sql } from "drizzle-orm";
+import { terms, translations, term_glosses } from "@glossa/db/schema";
+import { alias } from "drizzle-orm/pg-core";

 import type {
  SupportedLanguageCode,
@ -8,25 +9,73 @@ import type {
  DifficultyLevel,
 } from "@glossa/shared";

+export type TranslationPairRow = {
+  termId: string;
+  sourceText: string;
+  targetText: string;
+  sourceGloss: string | null;
+};
+
+// Note: difficulty filter is intentionally asymmetric. We filter on the target
+// (answer) side only — a word can be A2 in Italian but B1 in English, and what
+// matters for the learner is the difficulty of the word they're being taught.
+
 export const getGameTerms = async (
  sourceLanguage: SupportedLanguageCode,
  targetLanguage: SupportedLanguageCode,
  pos: SupportedPos,
  difficulty: DifficultyLevel,
-  count: number,
-) => {
+  rounds: number,
+): Promise<TranslationPairRow[]> => {
+  const sourceTranslations = alias(translations, "source_translations");
+  const targetTranslations = alias(translations, "target_translations");
+
  const rows = await db
-    .select()
+    .select({
+      termId: terms.id,
+      prompt: sourceTranslations.text,
+      answer: targetTranslations.text,
+      gloss: term_glosses.text,
+    })
    .from(terms)
-    .innerJoin(translations, eq(translations.term_id, terms.id))
+    .innerJoin(
+      sourceTranslations,
+      and(
+        eq(sourceTranslations.term_id, terms.id),
+        eq(sourceTranslations.language_code, sourceLanguage), // Filter here!
+      ),
+    )
+    .innerJoin(
+      targetTranslations,
+      and(
+        eq(targetTranslations.term_id, terms.id),
+        eq(targetTranslations.language_code, targetLanguage), // Filter here!
+      ),
+    )
+    .leftJoin(
+      term_glosses,
+      and(
+        eq(term_glosses.term_id, terms.id),
+        eq(term_glosses.language_code, sourceLanguage),
+      ),
+    )
    .where(
      and(
        eq(terms.pos, pos),
-        eq(translations.language_code, targetLanguage),
-        eq(translations.difficulty, difficulty),
+        eq(targetTranslations.difficulty, difficulty),
+        isNotNull(sourceTranslations.difficulty), // Good data quality check!
      ),
    )
-    .limit(count);
+    // TODO(post-mvp): ORDER BY RANDOM() sorts the entire filtered result set before
+    // applying LIMIT, which is fine at current data volumes (low thousands of rows
+    // after POS + difficulty filters) but degrades as the terms table grows. Once
+    // the database is fully populated and tagged, replace with one of:
+    //   - TABLESAMPLE BERNOULLI(n) for approximate sampling on large tables
+    //   - Random offset: SELECT ... OFFSET floor(random() * (SELECT count(*) ...))
+    //   - Pre-computed random column with a btree index, reshuffled periodically
+    // Benchmark first — don't optimise until it actually hurts.
+    .orderBy(sql`RANDOM()`)
+    .limit(rounds);

  return rows;
 };