diff --git a/documentation/notes.md b/documentation/notes.md index 7a0a373..4025e7a 100644 --- a/documentation/notes.md +++ b/documentation/notes.md @@ -3,6 +3,8 @@ ## tasks - pinning dependencies in package.json files +- add this to drizzle migrartions file: +✅ ALTER TABLE terms ADD CHECK (pos IN ('noun', 'verb', 'adjective', etc)); ## open word net diff --git a/packages/db/package.json b/packages/db/package.json index 55c3b40..45cad40 100644 --- a/packages/db/package.json +++ b/packages/db/package.json @@ -7,6 +7,7 @@ "build": "tsc" }, "dependencies": { + "@glossa/shared": "workspace:*", "dotenv": "^17.3.1", "drizzle-orm": "^0.45.1", "pg": "^8.20.0" diff --git a/packages/db/src/db/schema.ts b/packages/db/src/db/schema.ts index e69de29..6ad36d6 100644 --- a/packages/db/src/db/schema.ts +++ b/packages/db/src/db/schema.ts @@ -0,0 +1,193 @@ +import { + pgTable, + text, + uuid, + timestamp, + varchar, + unique, + check, + boolean, + integer, + primaryKey, + index, +} from "drizzle-orm/pg-core"; + +import { sql } from "drizzle-orm"; + +import { SUPPORTED_POS, SUPPORTED_LANGUAGE_CODES } from "@glossa/shared"; + +export const terms = pgTable( + "terms", + { + id: uuid().primaryKey().defaultRandom(), + synset_id: text().unique().notNull(), + pos: varchar({ length: 20 }).notNull(), + created_at: timestamp({ withTimezone: true }).defaultNow().notNull(), + }, + (table) => [ + check( + "pos_check", + sql`${table.pos} IN (${sql.raw(SUPPORTED_POS.map((p) => `'${p}'`).join(", "))})`, + ), + index("idx_terms_pos").on(table.pos), + ], +); +// sql.raw() is safe here: SUPPORTED_POS is a compile-time constant from @glossa/shared, +// not user input. If you're refactoring this, make sure the values still come from that +// constant and never from a runtime/user-controlled source. + +export const translations = pgTable( + "translations", + { + id: uuid().primaryKey().defaultRandom(), + term_id: uuid() + .notNull() + .references(() => terms.id, { onDelete: "cascade" }), + language_code: varchar({ length: 10 }).notNull(), + text: text().notNull(), + created_at: timestamp({ withTimezone: true }).defaultNow().notNull(), + }, + (table) => [ + unique("unique_translations").on( + table.term_id, + table.language_code, + table.text, + ), + index("idx_translations_lang").on(table.language_code, table.term_id), + ], +); + +export const term_glosses = pgTable( + "term_glosses", + { + id: uuid().primaryKey().defaultRandom(), + term_id: uuid() + .notNull() + .references(() => terms.id, { onDelete: "cascade" }), + language_code: varchar({ length: 10 }).notNull(), + text: text().notNull(), + created_at: timestamp({ withTimezone: true }).defaultNow().notNull(), + }, + (table) => [ + unique("unique_term_gloss").on( + table.term_id, + table.language_code, + table.text, + ), + , + index("idx_term_glosses_term").on(table.term_id), + ], +); + +export const language_pairs = pgTable( + "language_pairs", + { + id: uuid().primaryKey().defaultRandom(), + source_language: varchar({ length: 10 }).notNull(), + target_language: varchar({ length: 10 }).notNull(), + label: text(), + active: boolean().default(true).notNull(), + created_at: timestamp({ withTimezone: true }).defaultNow().notNull(), + }, + (table) => [ + unique("unique_source_target").on( + table.source_language, + table.target_language, + ), + check( + "source_language_check", + sql`${table.source_language} IN (${sql.raw(SUPPORTED_LANGUAGE_CODES.map((l) => `'${l}'`).join(", "))})`, + ), + check( + "target_language_check", + sql`${table.target_language} IN (${sql.raw(SUPPORTED_LANGUAGE_CODES.map((l) => `'${l}'`).join(", "))})`, + ), + check( + "no_self_pair", + sql`${table.source_language} != ${table.target_language}`, + ), + index("idx_pairs_active").on( + table.active, + table.source_language, + table.target_language, + ), + ], +); + +export const users = pgTable("users", { + id: uuid().primaryKey().defaultRandom(), + openauth_sub: text().unique().notNull(), + email: varchar({ length: 255 }).unique(), + display_name: varchar({ length: 100 }).unique(), + created_at: timestamp({ withTimezone: true }).defaultNow().notNull(), + last_login_at: timestamp({ withTimezone: true }), +}); +// KNOWN LIMITATION: email is nullable (GitHub users may have no public email) +// and unique, but two OAuth providers can return the same email for different +// accounts. For MVP this is acceptable since users are identified by +// openauth_sub, not email. If multi-provider login per user is added later, +// consider a separate user_emails table. + +export const decks = pgTable( + "decks", + { + id: uuid().primaryKey().defaultRandom(), + name: text().notNull(), + description: text(), + language_pair_id: uuid() + .notNull() + .references(() => language_pairs.id, { onDelete: "cascade" }), + created_by: uuid() + .notNull() + .references(() => users.id, { onDelete: "cascade" }), + is_public: boolean().default(false).notNull(), + created_at: timestamp({ withTimezone: true }).defaultNow().notNull(), + }, + (table) => [ + unique("unique_deck_name").on(table.name, table.created_by), + index("idx_decks_created_by").on(table.created_by), + index("idx_decks_language_pair").on(table.language_pair_id), + ], +); + +export const deck_terms = pgTable( + "deck_terms", + { + deck_id: uuid() + .notNull() + .references(() => decks.id, { onDelete: "cascade" }), + term_id: uuid() + .notNull() + .references(() => terms.id, { onDelete: "cascade" }), + position: integer().notNull(), + added_at: timestamp({ withTimezone: true }).defaultNow().notNull(), + }, + (table) => [ + primaryKey({ columns: [table.deck_id, table.term_id] }), + index("idx_deck_terms_term").on(table.term_id), + ], +); + +/* + * INTENTIONAL DESIGN DECISIONS + * + * surrogate id + synset_id (terms): + * Both exist on purpose. synset_id is the natural WordNet key used for lookups + * and re-imports. id is the stable internal FK target — if synset IDs change in + * a future WordNet version, FK references don't need to cascade. + * + * display_name UNIQUE (users): + * Unique usernames are a feature, not an oversight. One "Alex" per app. + * + * UNIQUE(term_id, language_code, text) (translations): + * This does allow synonyms. "banco" and "orilla" are different text values and + * both insert cleanly. The constraint only prevents exact duplicate rows. + * + * updated_at omitted: + * A column with DEFAULT now() that is never written on updates is misleading. + * Omitted until a trigger or ORM hook is in place to actually maintain it. + * + * FK indexes: + * All FK columns are covered — either by explicit indexes, composite unique + * indexes, or the composite PK on deck_terms. No sequential scans on joins. + */ diff --git a/packages/shared/src/constants.ts b/packages/shared/src/constants.ts new file mode 100644 index 0000000..e868bbb --- /dev/null +++ b/packages/shared/src/constants.ts @@ -0,0 +1,3 @@ +export const SUPPORTED_LANGUAGE_CODES = ["en", "it"] as const; + +export const SUPPORTED_POS = ["noun"] as const; diff --git a/packages/shared/src/index.ts b/packages/shared/src/index.ts index 1bda49b..548d69e 100644 --- a/packages/shared/src/index.ts +++ b/packages/shared/src/index.ts @@ -1 +1 @@ -export const placeholder = true; +export * from "./constants.js"; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a281c80..5ff559a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -112,6 +112,9 @@ importers: packages/db: dependencies: + '@glossa/shared': + specifier: workspace:* + version: link:../shared dotenv: specifier: ^17.3.1 version: 17.3.1