feat: migrate production schema from OMW to Kaikki flat vocabulary model
- Replace terms/translations/term_glosses/term_examples with vocabulary_entries and entry_translations - Remove decks, topics and related tables (deferred) - Add cefr_level and difficulty to entry_translations for game query filtering - Update termModel.ts for new schema — getDistractors now takes sourceLanguage - Update gameService.ts and multiplayerGameService.ts for entryId rename - Update all test fixtures from termId to entryId - Generate and apply migration 0011
This commit is contained in:
parent
38d8b85228
commit
963bff4eb8
10 changed files with 949 additions and 215 deletions
|
|
@ -1,25 +1,27 @@
|
|||
import { db } from "@lila/db";
|
||||
import { eq, and, isNotNull, sql, ne } from "drizzle-orm";
|
||||
import { terms, translations, term_glosses } from "@lila/db/schema";
|
||||
import { eq, and, ne, sql, isNotNull } from "drizzle-orm";
|
||||
import { vocabulary_entries, entry_translations } from "@lila/db/schema";
|
||||
import { alias } from "drizzle-orm/pg-core";
|
||||
|
||||
import type {
|
||||
SupportedLanguageCode,
|
||||
SupportedPos,
|
||||
DifficultyLevel,
|
||||
} from "@lila/shared";
|
||||
|
||||
// ── Types ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
export type TranslationPairRow = {
|
||||
termId: string;
|
||||
entryId: string;
|
||||
sourceText: string;
|
||||
targetText: string;
|
||||
sourceGloss: string | null;
|
||||
};
|
||||
|
||||
// Note: difficulty filter is intentionally asymmetric. We filter on the target
|
||||
// (answer) side only — a word can be A2 in Italian but B1 in English, and what
|
||||
// matters for the learner is the difficulty of the word they're being taught.
|
||||
// ── Queries ───────────────────────────────────────────────────────────────────
|
||||
|
||||
// Note: difficulty filter is intentionally on the target (translation) side.
|
||||
// A word can be A2 in one language but B1 in another — what matters for the
|
||||
// learner is the difficulty of the word they are being tested on.
|
||||
export const getGameTerms = async (
|
||||
sourceLanguage: SupportedLanguageCode,
|
||||
targetLanguage: SupportedLanguageCode,
|
||||
|
|
@ -27,53 +29,36 @@ export const getGameTerms = async (
|
|||
difficulty: DifficultyLevel,
|
||||
rounds: number,
|
||||
): Promise<TranslationPairRow[]> => {
|
||||
const sourceTranslations = alias(translations, "source_translations");
|
||||
const targetTranslations = alias(translations, "target_translations");
|
||||
const sourceEntries = alias(vocabulary_entries, "source_entries");
|
||||
const targetTranslations = alias(entry_translations, "target_translations");
|
||||
|
||||
const rows = await db
|
||||
.select({
|
||||
termId: terms.id,
|
||||
sourceText: sourceTranslations.text,
|
||||
targetText: targetTranslations.text,
|
||||
sourceGloss: term_glosses.text,
|
||||
entryId: sourceEntries.id,
|
||||
sourceText: sourceEntries.headword,
|
||||
targetText: targetTranslations.translation,
|
||||
sourceGloss: sourceEntries.gloss,
|
||||
})
|
||||
.from(terms)
|
||||
.innerJoin(
|
||||
sourceTranslations,
|
||||
and(
|
||||
eq(sourceTranslations.term_id, terms.id),
|
||||
eq(sourceTranslations.language_code, sourceLanguage), // Filter here!
|
||||
),
|
||||
)
|
||||
.from(sourceEntries)
|
||||
.innerJoin(
|
||||
targetTranslations,
|
||||
and(
|
||||
eq(targetTranslations.term_id, terms.id),
|
||||
eq(targetTranslations.language_code, targetLanguage), // Filter here!
|
||||
),
|
||||
)
|
||||
.leftJoin(
|
||||
term_glosses,
|
||||
and(
|
||||
eq(term_glosses.term_id, terms.id),
|
||||
eq(term_glosses.language_code, sourceLanguage),
|
||||
eq(targetTranslations.entry_id, sourceEntries.id),
|
||||
eq(targetTranslations.target_language_code, targetLanguage),
|
||||
eq(targetTranslations.difficulty, difficulty),
|
||||
isNotNull(targetTranslations.translation),
|
||||
),
|
||||
)
|
||||
.where(
|
||||
and(
|
||||
eq(terms.pos, pos),
|
||||
eq(targetTranslations.difficulty, difficulty),
|
||||
isNotNull(sourceTranslations.difficulty), // Good data quality check!
|
||||
eq(sourceEntries.language_code, sourceLanguage),
|
||||
eq(sourceEntries.pos, pos),
|
||||
isNotNull(sourceEntries.difficulty),
|
||||
),
|
||||
)
|
||||
// TODO(post-mvp): ORDER BY RANDOM() sorts the entire filtered result set before
|
||||
// applying LIMIT, which is fine at current data volumes (low thousands of rows
|
||||
// after POS + difficulty filters) but degrades as the terms table grows. Once
|
||||
// the database is fully populated and tagged, replace with one of:
|
||||
// - TABLESAMPLE BERNOULLI(n) for approximate sampling on large tables
|
||||
// - Random offset: SELECT ... OFFSET floor(random() * (SELECT count(*) ...))
|
||||
// - Pre-computed random column with a btree index, reshuffled periodically
|
||||
// Benchmark first — don't optimise until it actually hurts.
|
||||
// TODO(post-mvp): ORDER BY RANDOM() sorts the entire filtered result set
|
||||
// before applying LIMIT, which is fine at current data volumes but degrades
|
||||
// as the table grows. See original termModel.ts for optimisation options.
|
||||
.orderBy(sql`RANDOM()`)
|
||||
.limit(rounds);
|
||||
|
||||
|
|
@ -81,32 +66,33 @@ export const getGameTerms = async (
|
|||
};
|
||||
|
||||
export const getDistractors = async (
|
||||
excludeTermId: string,
|
||||
excludeEntryId: string,
|
||||
excludeText: string,
|
||||
sourceLanguage: SupportedLanguageCode,
|
||||
targetLanguage: SupportedLanguageCode,
|
||||
pos: SupportedPos,
|
||||
difficulty: DifficultyLevel,
|
||||
count: number,
|
||||
): Promise<string[]> => {
|
||||
const rows = await db
|
||||
.select({ text: translations.text })
|
||||
.from(terms)
|
||||
.select({ text: entry_translations.translation })
|
||||
.from(vocabulary_entries)
|
||||
.innerJoin(
|
||||
translations,
|
||||
entry_translations,
|
||||
and(
|
||||
eq(translations.term_id, terms.id),
|
||||
eq(translations.language_code, targetLanguage),
|
||||
eq(entry_translations.entry_id, vocabulary_entries.id),
|
||||
eq(entry_translations.target_language_code, targetLanguage),
|
||||
eq(entry_translations.difficulty, difficulty),
|
||||
),
|
||||
)
|
||||
.where(
|
||||
and(
|
||||
eq(terms.pos, pos),
|
||||
eq(translations.difficulty, difficulty),
|
||||
ne(terms.id, excludeTermId),
|
||||
ne(translations.text, excludeText),
|
||||
eq(vocabulary_entries.language_code, sourceLanguage),
|
||||
eq(vocabulary_entries.pos, pos),
|
||||
ne(vocabulary_entries.id, excludeEntryId),
|
||||
ne(entry_translations.translation, excludeText),
|
||||
),
|
||||
)
|
||||
// TODO(post-mvp): same ORDER BY RANDOM() concern as getGameTerms — see comment there.
|
||||
.orderBy(sql`RANDOM()`)
|
||||
.limit(count);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue