feat(api): implement game terms query with double join
- Add double join on translations for source/target languages - Left join term_glosses for optional source-language glosses - Filter difficulty on target side only (intentionally asymmetric: a word's difficulty can differ between languages, and what matters is the difficulty of the word being learned) - Return neutral field names (sourceText, targetText, sourceGloss) instead of quiz semantics; service layer maps to prompt/answer - Tighten term_glosses unique constraint to (term_id, language_code) to prevent the left join from multiplying question rows - Add TODO for ORDER BY RANDOM() scaling post-MVP
This commit is contained in:
parent
9fc3ba375a
commit
b59fac493d
4 changed files with 356 additions and 28 deletions
|
|
@ -51,11 +51,7 @@ export const term_glosses = pgTable(
|
|||
created_at: timestamp({ withTimezone: true }).defaultNow().notNull(),
|
||||
},
|
||||
(table) => [
|
||||
unique("unique_term_gloss").on(
|
||||
table.term_id,
|
||||
table.language_code,
|
||||
table.text,
|
||||
),
|
||||
unique("unique_term_gloss").on(table.term_id, table.language_code),
|
||||
check(
|
||||
"language_code_check",
|
||||
sql`${table.language_code} IN (${sql.raw(SUPPORTED_LANGUAGE_CODES.map((l) => `'${l}'`).join(", "))})`,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { db } from "@glossa/db";
|
||||
import { eq, and } from "drizzle-orm";
|
||||
import { terms, translations } from "@glossa/db/schema";
|
||||
import { eq, and, isNotNull, sql } from "drizzle-orm";
|
||||
import { terms, translations, term_glosses } from "@glossa/db/schema";
|
||||
import { alias } from "drizzle-orm/pg-core";
|
||||
|
||||
import type {
|
||||
SupportedLanguageCode,
|
||||
|
|
@ -8,25 +9,73 @@ import type {
|
|||
DifficultyLevel,
|
||||
} from "@glossa/shared";
|
||||
|
||||
export type TranslationPairRow = {
|
||||
termId: string;
|
||||
sourceText: string;
|
||||
targetText: string;
|
||||
sourceGloss: string | null;
|
||||
};
|
||||
|
||||
// Note: difficulty filter is intentionally asymmetric. We filter on the target
|
||||
// (answer) side only — a word can be A2 in Italian but B1 in English, and what
|
||||
// matters for the learner is the difficulty of the word they're being taught.
|
||||
|
||||
export const getGameTerms = async (
|
||||
sourceLanguage: SupportedLanguageCode,
|
||||
targetLanguage: SupportedLanguageCode,
|
||||
pos: SupportedPos,
|
||||
difficulty: DifficultyLevel,
|
||||
count: number,
|
||||
) => {
|
||||
rounds: number,
|
||||
): Promise<TranslationPairRow[]> => {
|
||||
const sourceTranslations = alias(translations, "source_translations");
|
||||
const targetTranslations = alias(translations, "target_translations");
|
||||
|
||||
const rows = await db
|
||||
.select()
|
||||
.select({
|
||||
termId: terms.id,
|
||||
prompt: sourceTranslations.text,
|
||||
answer: targetTranslations.text,
|
||||
gloss: term_glosses.text,
|
||||
})
|
||||
.from(terms)
|
||||
.innerJoin(translations, eq(translations.term_id, terms.id))
|
||||
.innerJoin(
|
||||
sourceTranslations,
|
||||
and(
|
||||
eq(sourceTranslations.term_id, terms.id),
|
||||
eq(sourceTranslations.language_code, sourceLanguage), // Filter here!
|
||||
),
|
||||
)
|
||||
.innerJoin(
|
||||
targetTranslations,
|
||||
and(
|
||||
eq(targetTranslations.term_id, terms.id),
|
||||
eq(targetTranslations.language_code, targetLanguage), // Filter here!
|
||||
),
|
||||
)
|
||||
.leftJoin(
|
||||
term_glosses,
|
||||
and(
|
||||
eq(term_glosses.term_id, terms.id),
|
||||
eq(term_glosses.language_code, sourceLanguage),
|
||||
),
|
||||
)
|
||||
.where(
|
||||
and(
|
||||
eq(terms.pos, pos),
|
||||
eq(translations.language_code, targetLanguage),
|
||||
eq(translations.difficulty, difficulty),
|
||||
eq(targetTranslations.difficulty, difficulty),
|
||||
isNotNull(sourceTranslations.difficulty), // Good data quality check!
|
||||
),
|
||||
)
|
||||
.limit(count);
|
||||
// TODO(post-mvp): ORDER BY RANDOM() sorts the entire filtered result set before
|
||||
// applying LIMIT, which is fine at current data volumes (low thousands of rows
|
||||
// after POS + difficulty filters) but degrades as the terms table grows. Once
|
||||
// the database is fully populated and tagged, replace with one of:
|
||||
// - TABLESAMPLE BERNOULLI(n) for approximate sampling on large tables
|
||||
// - Random offset: SELECT ... OFFSET floor(random() * (SELECT count(*) ...))
|
||||
// - Pre-computed random column with a btree index, reshuffled periodically
|
||||
// Benchmark first — don't optimise until it actually hurts.
|
||||
.orderBy(sql`RANDOM()`)
|
||||
.limit(rounds);
|
||||
|
||||
return rows;
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue