feat: migrate production schema from OMW to Kaikki flat vocabulary model
- Replace terms/translations/term_glosses/term_examples with vocabulary_entries and entry_translations - Remove decks, topics and related tables (deferred) - Add cefr_level and difficulty to entry_translations for game query filtering - Update termModel.ts for new schema — getDistractors now takes sourceLanguage - Update gameService.ts and multiplayerGameService.ts for entryId rename - Update all test fixtures from termId to entryId - Generate and apply migration 0011
This commit is contained in:
parent
38d8b85228
commit
963bff4eb8
10 changed files with 949 additions and 215 deletions
|
|
@ -10,6 +10,7 @@ import {
|
|||
index,
|
||||
boolean,
|
||||
integer,
|
||||
smallint,
|
||||
} from "drizzle-orm/pg-core";
|
||||
|
||||
import { sql, relations } from "drizzle-orm";
|
||||
|
|
@ -18,182 +19,100 @@ import {
|
|||
SUPPORTED_POS,
|
||||
SUPPORTED_LANGUAGE_CODES,
|
||||
CEFR_LEVELS,
|
||||
SUPPORTED_DECK_TYPES,
|
||||
DIFFICULTY_LEVELS,
|
||||
LOBBY_STATUSES,
|
||||
} from "@lila/shared";
|
||||
|
||||
export const terms = pgTable(
|
||||
"terms",
|
||||
// ── Vocabulary ────────────────────────────────────────────────────────────────
|
||||
|
||||
export const vocabulary_entries = pgTable(
|
||||
"vocabulary_entries",
|
||||
{
|
||||
id: uuid().primaryKey().defaultRandom(),
|
||||
source: varchar({ length: 50 }), // 'omw', 'wiktionary', null for manual
|
||||
source_id: text(), // synset_id value for omw, wiktionary QID, etc.
|
||||
headword: text().notNull(),
|
||||
language_code: varchar({ length: 10 }).notNull(),
|
||||
pos: varchar({ length: 20 }).notNull(),
|
||||
sense_index: smallint().notNull().default(0),
|
||||
gloss: text(),
|
||||
examples: text().array().notNull().default([]),
|
||||
cefr_level: varchar({ length: 2 }),
|
||||
difficulty: varchar({ length: 20 }),
|
||||
source: varchar({ length: 50 }).notNull().default("kaikki"),
|
||||
created_at: timestamp({ withTimezone: true }).defaultNow().notNull(),
|
||||
},
|
||||
(table) => [
|
||||
unique("unique_entry").on(
|
||||
table.headword,
|
||||
table.language_code,
|
||||
table.pos,
|
||||
table.sense_index,
|
||||
),
|
||||
check(
|
||||
"language_code_check",
|
||||
sql`${table.language_code} IN (${sql.raw(SUPPORTED_LANGUAGE_CODES.map((l) => `'${l}'`).join(", "))})`,
|
||||
),
|
||||
check(
|
||||
"pos_check",
|
||||
sql`${table.pos} IN (${sql.raw(SUPPORTED_POS.map((p) => `'${p}'`).join(", "))})`,
|
||||
),
|
||||
unique("unique_source_id").on(table.source, table.source_id),
|
||||
index("idx_terms_source_pos").on(table.source, table.pos),
|
||||
],
|
||||
);
|
||||
|
||||
export const term_glosses = pgTable(
|
||||
"term_glosses",
|
||||
{
|
||||
id: uuid().primaryKey().defaultRandom(),
|
||||
term_id: uuid()
|
||||
.notNull()
|
||||
.references(() => terms.id, { onDelete: "cascade" }),
|
||||
language_code: varchar({ length: 10 }).notNull(),
|
||||
text: text().notNull(),
|
||||
description: text(),
|
||||
created_at: timestamp({ withTimezone: true }).defaultNow().notNull(),
|
||||
},
|
||||
(table) => [
|
||||
unique("unique_term_gloss").on(table.term_id, table.language_code),
|
||||
check(
|
||||
"language_code_check",
|
||||
sql`${table.language_code} IN (${sql.raw(SUPPORTED_LANGUAGE_CODES.map((l) => `'${l}'`).join(", "))})`,
|
||||
),
|
||||
],
|
||||
);
|
||||
|
||||
export const term_examples = pgTable(
|
||||
"term_examples",
|
||||
{
|
||||
id: uuid().primaryKey().defaultRandom(),
|
||||
term_id: uuid()
|
||||
.notNull()
|
||||
.references(() => terms.id, { onDelete: "cascade" }),
|
||||
language_code: varchar({ length: 10 }).notNull(),
|
||||
text: text().notNull(),
|
||||
created_at: timestamp({ withTimezone: true }).defaultNow().notNull(),
|
||||
},
|
||||
(table) => [
|
||||
unique("unique_term_example").on(
|
||||
table.term_id,
|
||||
table.language_code,
|
||||
table.text,
|
||||
),
|
||||
check(
|
||||
"language_code_check",
|
||||
sql`${table.language_code} IN (${sql.raw(SUPPORTED_LANGUAGE_CODES.map((l) => `'${l}'`).join(", "))})`,
|
||||
),
|
||||
index("idx_term_examples_term_id").on(table.term_id, table.language_code),
|
||||
],
|
||||
);
|
||||
|
||||
export const translations = pgTable(
|
||||
"translations",
|
||||
{
|
||||
id: uuid().primaryKey().defaultRandom(),
|
||||
term_id: uuid()
|
||||
.notNull()
|
||||
.references(() => terms.id, { onDelete: "cascade" }),
|
||||
language_code: varchar({ length: 10 }).notNull(),
|
||||
text: text().notNull(),
|
||||
cefr_level: varchar({ length: 2 }),
|
||||
difficulty: varchar({ length: 20 }),
|
||||
created_at: timestamp({ withTimezone: true }).defaultNow().notNull(),
|
||||
},
|
||||
(table) => [
|
||||
unique("unique_translations").on(
|
||||
table.term_id,
|
||||
table.language_code,
|
||||
table.text,
|
||||
),
|
||||
check(
|
||||
"language_code_check",
|
||||
sql`${table.language_code} IN (${sql.raw(SUPPORTED_LANGUAGE_CODES.map((l) => `'${l}'`).join(", "))})`,
|
||||
),
|
||||
check(
|
||||
"cefr_check",
|
||||
sql`${table.cefr_level} IN (${sql.raw(CEFR_LEVELS.map((l) => `'${l}'`).join(", "))})`,
|
||||
sql`${table.cefr_level} IS NULL OR ${table.cefr_level} IN (${sql.raw(CEFR_LEVELS.map((l) => `'${l}'`).join(", "))})`,
|
||||
),
|
||||
check(
|
||||
"difficulty_check",
|
||||
sql`${table.difficulty} IN (${sql.raw(DIFFICULTY_LEVELS.map((d) => `'${d}'`).join(", "))})`,
|
||||
sql`${table.difficulty} IS NULL OR ${table.difficulty} IN (${sql.raw(DIFFICULTY_LEVELS.map((d) => `'${d}'`).join(", "))})`,
|
||||
),
|
||||
index("idx_translations_lang").on(
|
||||
index("idx_entries_lang_pos").on(
|
||||
table.language_code,
|
||||
table.pos,
|
||||
table.difficulty,
|
||||
table.cefr_level,
|
||||
table.term_id,
|
||||
),
|
||||
],
|
||||
);
|
||||
|
||||
export const decks = pgTable(
|
||||
"decks",
|
||||
export const entry_translations = pgTable(
|
||||
"entry_translations",
|
||||
{
|
||||
id: uuid().primaryKey().defaultRandom(),
|
||||
name: text().notNull(),
|
||||
description: text(),
|
||||
source_language: varchar({ length: 10 }).notNull(),
|
||||
validated_languages: varchar({ length: 10 }).array().notNull().default([]),
|
||||
type: varchar({ length: 20 }).notNull(),
|
||||
entry_id: uuid()
|
||||
.notNull()
|
||||
.references(() => vocabulary_entries.id, { onDelete: "cascade" }),
|
||||
target_language_code: varchar({ length: 10 }).notNull(),
|
||||
translation: text().notNull(),
|
||||
sense_hint: text(),
|
||||
cefr_level: varchar({ length: 2 }),
|
||||
difficulty: varchar({ length: 20 }),
|
||||
source: varchar({ length: 50 }).notNull().default("kaikki"),
|
||||
created_at: timestamp({ withTimezone: true }).defaultNow().notNull(),
|
||||
},
|
||||
(table) => [
|
||||
check(
|
||||
"source_language_check",
|
||||
sql`${table.source_language} IN (${sql.raw(SUPPORTED_LANGUAGE_CODES.map((l) => `'${l}'`).join(", "))})`,
|
||||
unique("unique_translation").on(
|
||||
table.entry_id,
|
||||
table.target_language_code,
|
||||
table.translation,
|
||||
),
|
||||
check(
|
||||
"validated_languages_check",
|
||||
sql`validated_languages <@ ARRAY[${sql.raw(SUPPORTED_LANGUAGE_CODES.map((l) => `'${l}'`).join(", "))}]::varchar[]`,
|
||||
"target_language_code_check",
|
||||
sql`${table.target_language_code} IN (${sql.raw(SUPPORTED_LANGUAGE_CODES.map((l) => `'${l}'`).join(", "))})`,
|
||||
),
|
||||
check(
|
||||
"validated_languages_excludes_source",
|
||||
sql`NOT (${table.source_language} = ANY(${table.validated_languages}))`,
|
||||
"cefr_check",
|
||||
sql`${table.cefr_level} IS NULL OR ${table.cefr_level} IN (${sql.raw(CEFR_LEVELS.map((l) => `'${l}'`).join(", "))})`,
|
||||
),
|
||||
check(
|
||||
"deck_type_check",
|
||||
sql`${table.type} IN (${sql.raw(SUPPORTED_DECK_TYPES.map((t) => `'${t}'`).join(", "))})`,
|
||||
"difficulty_check",
|
||||
sql`${table.difficulty} IS NULL OR ${table.difficulty} IN (${sql.raw(DIFFICULTY_LEVELS.map((d) => `'${d}'`).join(", "))})`,
|
||||
),
|
||||
index("idx_translations_target_lang").on(
|
||||
table.target_language_code,
|
||||
table.difficulty,
|
||||
table.entry_id,
|
||||
),
|
||||
unique("unique_deck_name").on(table.name, table.source_language),
|
||||
index("idx_decks_type").on(table.type, table.source_language),
|
||||
],
|
||||
);
|
||||
|
||||
export const deck_terms = pgTable(
|
||||
"deck_terms",
|
||||
{
|
||||
deck_id: uuid()
|
||||
.notNull()
|
||||
.references(() => decks.id, { onDelete: "cascade" }),
|
||||
term_id: uuid()
|
||||
.notNull()
|
||||
.references(() => terms.id, { onDelete: "cascade" }),
|
||||
},
|
||||
(table) => [primaryKey({ columns: [table.deck_id, table.term_id] })],
|
||||
);
|
||||
|
||||
export const topics = pgTable("topics", {
|
||||
id: uuid().primaryKey().defaultRandom(),
|
||||
slug: varchar({ length: 50 }).notNull().unique(),
|
||||
label: text().notNull(),
|
||||
description: text(),
|
||||
created_at: timestamp({ withTimezone: true }).defaultNow().notNull(),
|
||||
});
|
||||
|
||||
export const term_topics = pgTable(
|
||||
"term_topics",
|
||||
{
|
||||
term_id: uuid()
|
||||
.notNull()
|
||||
.references(() => terms.id, { onDelete: "cascade" }),
|
||||
topic_id: uuid()
|
||||
.notNull()
|
||||
.references(() => topics.id, { onDelete: "cascade" }),
|
||||
},
|
||||
(table) => [primaryKey({ columns: [table.term_id, table.topic_id] })],
|
||||
);
|
||||
// ── Auth (managed by Better Auth) ─────────────────────────────────────────────
|
||||
|
||||
export const user = pgTable("user", {
|
||||
id: text("id").primaryKey(),
|
||||
|
|
@ -204,7 +123,7 @@ export const user = pgTable("user", {
|
|||
createdAt: timestamp("created_at").defaultNow().notNull(),
|
||||
updatedAt: timestamp("updated_at")
|
||||
.defaultNow()
|
||||
.$onUpdate(() => /* @__PURE__ */ new Date())
|
||||
.$onUpdate(() => new Date())
|
||||
.notNull(),
|
||||
});
|
||||
|
||||
|
|
@ -216,7 +135,7 @@ export const session = pgTable(
|
|||
token: text("token").notNull().unique(),
|
||||
createdAt: timestamp("created_at").defaultNow().notNull(),
|
||||
updatedAt: timestamp("updated_at")
|
||||
.$onUpdate(() => /* @__PURE__ */ new Date())
|
||||
.$onUpdate(() => new Date())
|
||||
.notNull(),
|
||||
ipAddress: text("ip_address"),
|
||||
userAgent: text("user_agent"),
|
||||
|
|
@ -245,7 +164,7 @@ export const account = pgTable(
|
|||
password: text("password"),
|
||||
createdAt: timestamp("created_at").defaultNow().notNull(),
|
||||
updatedAt: timestamp("updated_at")
|
||||
.$onUpdate(() => /* @__PURE__ */ new Date())
|
||||
.$onUpdate(() => new Date())
|
||||
.notNull(),
|
||||
},
|
||||
(table) => [index("account_userId_idx").on(table.userId)],
|
||||
|
|
@ -261,24 +180,13 @@ export const verification = pgTable(
|
|||
createdAt: timestamp("created_at").defaultNow().notNull(),
|
||||
updatedAt: timestamp("updated_at")
|
||||
.defaultNow()
|
||||
.$onUpdate(() => /* @__PURE__ */ new Date())
|
||||
.$onUpdate(() => new Date())
|
||||
.notNull(),
|
||||
},
|
||||
(table) => [index("verification_identifier_idx").on(table.identifier)],
|
||||
);
|
||||
|
||||
export const userRelations = relations(user, ({ many }) => ({
|
||||
sessions: many(session),
|
||||
accounts: many(account),
|
||||
}));
|
||||
|
||||
export const sessionRelations = relations(session, ({ one }) => ({
|
||||
user: one(user, { fields: [session.userId], references: [user.id] }),
|
||||
}));
|
||||
|
||||
export const accountRelations = relations(account, ({ one }) => ({
|
||||
user: one(user, { fields: [account.userId], references: [user.id] }),
|
||||
}));
|
||||
// ── Lobbies ───────────────────────────────────────────────────────────────────
|
||||
|
||||
export const lobbies = pgTable(
|
||||
"lobbies",
|
||||
|
|
@ -318,6 +226,36 @@ export const lobby_players = pgTable(
|
|||
(table) => [primaryKey({ columns: [table.lobbyId, table.userId] })],
|
||||
);
|
||||
|
||||
// ── Relations ─────────────────────────────────────────────────────────────────
|
||||
|
||||
export const vocabularyEntryRelations = relations(
|
||||
vocabulary_entries,
|
||||
({ many }) => ({ translations: many(entry_translations) }),
|
||||
);
|
||||
|
||||
export const entryTranslationRelations = relations(
|
||||
entry_translations,
|
||||
({ one }) => ({
|
||||
entry: one(vocabulary_entries, {
|
||||
fields: [entry_translations.entry_id],
|
||||
references: [vocabulary_entries.id],
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
export const userRelations = relations(user, ({ many }) => ({
|
||||
sessions: many(session),
|
||||
accounts: many(account),
|
||||
}));
|
||||
|
||||
export const sessionRelations = relations(session, ({ one }) => ({
|
||||
user: one(user, { fields: [session.userId], references: [user.id] }),
|
||||
}));
|
||||
|
||||
export const accountRelations = relations(account, ({ one }) => ({
|
||||
user: one(user, { fields: [account.userId], references: [user.id] }),
|
||||
}));
|
||||
|
||||
export const lobbyRelations = relations(lobbies, ({ one, many }) => ({
|
||||
host: one(user, { fields: [lobbies.hostUserId], references: [user.id] }),
|
||||
players: many(lobby_players),
|
||||
|
|
|
|||
|
|
@ -1,25 +1,27 @@
|
|||
import { db } from "@lila/db";
|
||||
import { eq, and, isNotNull, sql, ne } from "drizzle-orm";
|
||||
import { terms, translations, term_glosses } from "@lila/db/schema";
|
||||
import { eq, and, ne, sql, isNotNull } from "drizzle-orm";
|
||||
import { vocabulary_entries, entry_translations } from "@lila/db/schema";
|
||||
import { alias } from "drizzle-orm/pg-core";
|
||||
|
||||
import type {
|
||||
SupportedLanguageCode,
|
||||
SupportedPos,
|
||||
DifficultyLevel,
|
||||
} from "@lila/shared";
|
||||
|
||||
// ── Types ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
export type TranslationPairRow = {
|
||||
termId: string;
|
||||
entryId: string;
|
||||
sourceText: string;
|
||||
targetText: string;
|
||||
sourceGloss: string | null;
|
||||
};
|
||||
|
||||
// Note: difficulty filter is intentionally asymmetric. We filter on the target
|
||||
// (answer) side only — a word can be A2 in Italian but B1 in English, and what
|
||||
// matters for the learner is the difficulty of the word they're being taught.
|
||||
// ── Queries ───────────────────────────────────────────────────────────────────
|
||||
|
||||
// Note: difficulty filter is intentionally on the target (translation) side.
|
||||
// A word can be A2 in one language but B1 in another — what matters for the
|
||||
// learner is the difficulty of the word they are being tested on.
|
||||
export const getGameTerms = async (
|
||||
sourceLanguage: SupportedLanguageCode,
|
||||
targetLanguage: SupportedLanguageCode,
|
||||
|
|
@ -27,53 +29,36 @@ export const getGameTerms = async (
|
|||
difficulty: DifficultyLevel,
|
||||
rounds: number,
|
||||
): Promise<TranslationPairRow[]> => {
|
||||
const sourceTranslations = alias(translations, "source_translations");
|
||||
const targetTranslations = alias(translations, "target_translations");
|
||||
const sourceEntries = alias(vocabulary_entries, "source_entries");
|
||||
const targetTranslations = alias(entry_translations, "target_translations");
|
||||
|
||||
const rows = await db
|
||||
.select({
|
||||
termId: terms.id,
|
||||
sourceText: sourceTranslations.text,
|
||||
targetText: targetTranslations.text,
|
||||
sourceGloss: term_glosses.text,
|
||||
entryId: sourceEntries.id,
|
||||
sourceText: sourceEntries.headword,
|
||||
targetText: targetTranslations.translation,
|
||||
sourceGloss: sourceEntries.gloss,
|
||||
})
|
||||
.from(terms)
|
||||
.innerJoin(
|
||||
sourceTranslations,
|
||||
and(
|
||||
eq(sourceTranslations.term_id, terms.id),
|
||||
eq(sourceTranslations.language_code, sourceLanguage), // Filter here!
|
||||
),
|
||||
)
|
||||
.from(sourceEntries)
|
||||
.innerJoin(
|
||||
targetTranslations,
|
||||
and(
|
||||
eq(targetTranslations.term_id, terms.id),
|
||||
eq(targetTranslations.language_code, targetLanguage), // Filter here!
|
||||
),
|
||||
)
|
||||
.leftJoin(
|
||||
term_glosses,
|
||||
and(
|
||||
eq(term_glosses.term_id, terms.id),
|
||||
eq(term_glosses.language_code, sourceLanguage),
|
||||
eq(targetTranslations.entry_id, sourceEntries.id),
|
||||
eq(targetTranslations.target_language_code, targetLanguage),
|
||||
eq(targetTranslations.difficulty, difficulty),
|
||||
isNotNull(targetTranslations.translation),
|
||||
),
|
||||
)
|
||||
.where(
|
||||
and(
|
||||
eq(terms.pos, pos),
|
||||
eq(targetTranslations.difficulty, difficulty),
|
||||
isNotNull(sourceTranslations.difficulty), // Good data quality check!
|
||||
eq(sourceEntries.language_code, sourceLanguage),
|
||||
eq(sourceEntries.pos, pos),
|
||||
isNotNull(sourceEntries.difficulty),
|
||||
),
|
||||
)
|
||||
// TODO(post-mvp): ORDER BY RANDOM() sorts the entire filtered result set before
|
||||
// applying LIMIT, which is fine at current data volumes (low thousands of rows
|
||||
// after POS + difficulty filters) but degrades as the terms table grows. Once
|
||||
// the database is fully populated and tagged, replace with one of:
|
||||
// - TABLESAMPLE BERNOULLI(n) for approximate sampling on large tables
|
||||
// - Random offset: SELECT ... OFFSET floor(random() * (SELECT count(*) ...))
|
||||
// - Pre-computed random column with a btree index, reshuffled periodically
|
||||
// Benchmark first — don't optimise until it actually hurts.
|
||||
// TODO(post-mvp): ORDER BY RANDOM() sorts the entire filtered result set
|
||||
// before applying LIMIT, which is fine at current data volumes but degrades
|
||||
// as the table grows. See original termModel.ts for optimisation options.
|
||||
.orderBy(sql`RANDOM()`)
|
||||
.limit(rounds);
|
||||
|
||||
|
|
@ -81,32 +66,33 @@ export const getGameTerms = async (
|
|||
};
|
||||
|
||||
export const getDistractors = async (
|
||||
excludeTermId: string,
|
||||
excludeEntryId: string,
|
||||
excludeText: string,
|
||||
sourceLanguage: SupportedLanguageCode,
|
||||
targetLanguage: SupportedLanguageCode,
|
||||
pos: SupportedPos,
|
||||
difficulty: DifficultyLevel,
|
||||
count: number,
|
||||
): Promise<string[]> => {
|
||||
const rows = await db
|
||||
.select({ text: translations.text })
|
||||
.from(terms)
|
||||
.select({ text: entry_translations.translation })
|
||||
.from(vocabulary_entries)
|
||||
.innerJoin(
|
||||
translations,
|
||||
entry_translations,
|
||||
and(
|
||||
eq(translations.term_id, terms.id),
|
||||
eq(translations.language_code, targetLanguage),
|
||||
eq(entry_translations.entry_id, vocabulary_entries.id),
|
||||
eq(entry_translations.target_language_code, targetLanguage),
|
||||
eq(entry_translations.difficulty, difficulty),
|
||||
),
|
||||
)
|
||||
.where(
|
||||
and(
|
||||
eq(terms.pos, pos),
|
||||
eq(translations.difficulty, difficulty),
|
||||
ne(terms.id, excludeTermId),
|
||||
ne(translations.text, excludeText),
|
||||
eq(vocabulary_entries.language_code, sourceLanguage),
|
||||
eq(vocabulary_entries.pos, pos),
|
||||
ne(vocabulary_entries.id, excludeEntryId),
|
||||
ne(entry_translations.translation, excludeText),
|
||||
),
|
||||
)
|
||||
// TODO(post-mvp): same ORDER BY RANDOM() concern as getGameTerms — see comment there.
|
||||
.orderBy(sql`RANDOM()`)
|
||||
.limit(count);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue