formatting
This commit is contained in:
parent
20fa6a9331
commit
e3a2136720
11 changed files with 72803 additions and 408878 deletions
|
|
@ -14,14 +14,12 @@ Each synset extracted from WordNet is represented as:
|
||||||
{
|
{
|
||||||
"synset_id": "ili:i35545",
|
"synset_id": "ili:i35545",
|
||||||
"pos": "noun",
|
"pos": "noun",
|
||||||
"translations": {
|
"translations": { "en": ["entity"], "it": ["cosa", "entità"] }
|
||||||
"en": ["entity"],
|
|
||||||
"it": ["cosa", "entità"]
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
**Fields:**
|
**Fields:**
|
||||||
|
|
||||||
- `synset_id` — OMW Interlingual Index ID, maps to `terms.synset_id` in the DB
|
- `synset_id` — OMW Interlingual Index ID, maps to `terms.synset_id` in the DB
|
||||||
- `pos` — part of speech, matches the CHECK constraint on `terms.pos`
|
- `pos` — part of speech, matches the CHECK constraint on `terms.pos`
|
||||||
- `translations` — object of language code → array of lemmas (synonyms within a synset)
|
- `translations` — object of language code → array of lemmas (synonyms within a synset)
|
||||||
|
|
@ -53,20 +51,21 @@ translations
|
||||||
## 3. Seeding Script — v1 (batch, truncate-based)
|
## 3. Seeding Script — v1 (batch, truncate-based)
|
||||||
|
|
||||||
### Approach
|
### Approach
|
||||||
|
|
||||||
- Read a single JSON file
|
- Read a single JSON file
|
||||||
- Batch inserts into `terms` and `translations` in groups of 500
|
- Batch inserts into `terms` and `translations` in groups of 500
|
||||||
- Truncate tables before each run for a clean slate
|
- Truncate tables before each run for a clean slate
|
||||||
|
|
||||||
### Key decisions made during development
|
### Key decisions made during development
|
||||||
|
|
||||||
| Issue | Resolution |
|
| Issue | Resolution |
|
||||||
|-------|-----------|
|
| -------------------------------- | --------------------------------------------------- |
|
||||||
| `JSON.parse` returns `any` | Added `Array.isArray` check before casting |
|
| `JSON.parse` returns `any` | Added `Array.isArray` check before casting |
|
||||||
| `forEach` doesn't await | Switched to `for...of` |
|
| `forEach` doesn't await | Switched to `for...of` |
|
||||||
| Empty array types | Used Drizzle's `$inferInsert` types |
|
| Empty array types | Used Drizzle's `$inferInsert` types |
|
||||||
| `translations` naming conflict | Renamed local variable to `translationRows` |
|
| `translations` naming conflict | Renamed local variable to `translationRows` |
|
||||||
| Final batch not flushed | Added `if (termsArray.length > 0)` guard after loop |
|
| Final batch not flushed | Added `if (termsArray.length > 0)` guard after loop |
|
||||||
| Exact batch size check `=== 500` | Changed to `>= 500` |
|
| Exact batch size check `=== 500` | Changed to `>= 500` |
|
||||||
|
|
||||||
### Final script structure
|
### Final script structure
|
||||||
|
|
||||||
|
|
@ -134,7 +133,9 @@ const main = async () => {
|
||||||
|
|
||||||
if (termsArray.length >= 500) {
|
if (termsArray.length >= 500) {
|
||||||
batchCount++;
|
batchCount++;
|
||||||
console.log(`Uploading batch ${batchCount} (${batchCount * 500}/${allSynsets.length} synsets)...`);
|
console.log(
|
||||||
|
`Uploading batch ${batchCount} (${batchCount * 500}/${allSynsets.length} synsets)...`,
|
||||||
|
);
|
||||||
await uploadToDB(termsArray, translationsArray);
|
await uploadToDB(termsArray, translationsArray);
|
||||||
termsArray.length = 0;
|
termsArray.length = 0;
|
||||||
translationsArray.length = 0;
|
translationsArray.length = 0;
|
||||||
|
|
@ -143,7 +144,9 @@ const main = async () => {
|
||||||
|
|
||||||
if (termsArray.length > 0) {
|
if (termsArray.length > 0) {
|
||||||
batchCount++;
|
batchCount++;
|
||||||
console.log(`Uploading final batch (${allSynsets.length}/${allSynsets.length} synsets)...`);
|
console.log(
|
||||||
|
`Uploading final batch (${allSynsets.length}/${allSynsets.length} synsets)...`,
|
||||||
|
);
|
||||||
await uploadToDB(termsArray, translationsArray);
|
await uploadToDB(termsArray, translationsArray);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -161,6 +164,7 @@ main().catch((error) => {
|
||||||
## 4. Pitfalls Encountered
|
## 4. Pitfalls Encountered
|
||||||
|
|
||||||
### Duplicate key on re-run
|
### Duplicate key on re-run
|
||||||
|
|
||||||
Running the script twice causes `duplicate key value violates unique constraint "terms_synset_id_unique"`. Fix: truncate before seeding.
|
Running the script twice causes `duplicate key value violates unique constraint "terms_synset_id_unique"`. Fix: truncate before seeding.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
@ -168,15 +172,19 @@ docker exec -it glossa-database psql -U glossa -d glossa -c "TRUNCATE translatio
|
||||||
```
|
```
|
||||||
|
|
||||||
### `onConflictDoNothing` breaks FK references
|
### `onConflictDoNothing` breaks FK references
|
||||||
|
|
||||||
When `onConflictDoNothing` skips a `terms` insert, the in-memory UUID is never written to the DB. Subsequent `translations` inserts reference that non-existent UUID, causing a FK violation. This is why the truncate approach is correct for batch seeding.
|
When `onConflictDoNothing` skips a `terms` insert, the in-memory UUID is never written to the DB. Subsequent `translations` inserts reference that non-existent UUID, causing a FK violation. This is why the truncate approach is correct for batch seeding.
|
||||||
|
|
||||||
### DATABASE_URL misconfigured
|
### DATABASE_URL misconfigured
|
||||||
|
|
||||||
Correct format:
|
Correct format:
|
||||||
|
|
||||||
```
|
```
|
||||||
DATABASE_URL=postgresql://glossa:glossa@localhost:5432/glossa
|
DATABASE_URL=postgresql://glossa:glossa@localhost:5432/glossa
|
||||||
```
|
```
|
||||||
|
|
||||||
### Tables not found after `docker compose up`
|
### Tables not found after `docker compose up`
|
||||||
|
|
||||||
Migrations must be applied first: `npx drizzle-kit migrate`
|
Migrations must be applied first: `npx drizzle-kit migrate`
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
@ -205,10 +213,13 @@ docker exec -it glossa-database psql -U glossa -d glossa -c "SELECT COUNT(*) FRO
|
||||||
## 6. Seeding Script — v2 (incremental upsert, multi-file)
|
## 6. Seeding Script — v2 (incremental upsert, multi-file)
|
||||||
|
|
||||||
### Motivation
|
### Motivation
|
||||||
|
|
||||||
The truncate approach is fine for dev but unsuitable for production — it wipes all data. The v2 approach extends the database incrementally without ever truncating.
|
The truncate approach is fine for dev but unsuitable for production — it wipes all data. The v2 approach extends the database incrementally without ever truncating.
|
||||||
|
|
||||||
### File naming convention
|
### File naming convention
|
||||||
|
|
||||||
One JSON file per language pair per POS:
|
One JSON file per language pair per POS:
|
||||||
|
|
||||||
```
|
```
|
||||||
scripts/datafiles/
|
scripts/datafiles/
|
||||||
en-it-nouns.json
|
en-it-nouns.json
|
||||||
|
|
@ -219,7 +230,9 @@ scripts/datafiles/
|
||||||
```
|
```
|
||||||
|
|
||||||
### How incremental upsert works
|
### How incremental upsert works
|
||||||
|
|
||||||
For a concept like "dog" already in the DB with English and Italian:
|
For a concept like "dog" already in the DB with English and Italian:
|
||||||
|
|
||||||
1. Import `en-fr-nouns.json`
|
1. Import `en-fr-nouns.json`
|
||||||
2. Upsert `terms` by `synset_id` — finds existing row, returns its real ID
|
2. Upsert `terms` by `synset_id` — finds existing row, returns its real ID
|
||||||
3. `dog (en)` already exists → skipped by `onConflictDoNothing`
|
3. `dog (en)` already exists → skipped by `onConflictDoNothing`
|
||||||
|
|
@ -228,6 +241,7 @@ For a concept like "dog" already in the DB with English and Italian:
|
||||||
The concept is **extended**, not replaced.
|
The concept is **extended**, not replaced.
|
||||||
|
|
||||||
### Tradeoff vs batch approach
|
### Tradeoff vs batch approach
|
||||||
|
|
||||||
Batching is no longer possible since you need the real `term.id` from the DB before inserting translations. Each synset is processed individually. For 25k rows this is still fast enough.
|
Batching is no longer possible since you need the real `term.id` from the DB before inserting translations. Each synset is processed individually. For 25k rows this is still fast enough.
|
||||||
|
|
||||||
### Key types added
|
### Key types added
|
||||||
|
|
@ -252,7 +266,9 @@ type FileName = {
|
||||||
const parseFilename = (filename: string): FileName => {
|
const parseFilename = (filename: string): FileName => {
|
||||||
const parts = filename.replace(".json", "").split("-");
|
const parts = filename.replace(".json", "").split("-");
|
||||||
if (parts.length !== 3)
|
if (parts.length !== 3)
|
||||||
throw new Error(`Invalid filename format: ${filename}. Expected: sourcelang-targetlang-pos.json`);
|
throw new Error(
|
||||||
|
`Invalid filename format: ${filename}. Expected: sourcelang-targetlang-pos.json`,
|
||||||
|
);
|
||||||
const [sourceLang, targetLang, pos] = parts;
|
const [sourceLang, targetLang, pos] = parts;
|
||||||
if (!SUPPORTED_LANGUAGE_CODES.includes(sourceLang as LANGUAGE_CODE))
|
if (!SUPPORTED_LANGUAGE_CODES.includes(sourceLang as LANGUAGE_CODE))
|
||||||
throw new Error(`Unsupported language code: ${sourceLang}`);
|
throw new Error(`Unsupported language code: ${sourceLang}`);
|
||||||
|
|
@ -278,10 +294,7 @@ const upsertSynset = async (
|
||||||
const [upsertedTerm] = await db
|
const [upsertedTerm] = await db
|
||||||
.insert(terms)
|
.insert(terms)
|
||||||
.values({ synset_id: synset.synset_id, pos: synset.pos })
|
.values({ synset_id: synset.synset_id, pos: synset.pos })
|
||||||
.onConflictDoUpdate({
|
.onConflictDoUpdate({ target: terms.synset_id, set: { pos: synset.pos } })
|
||||||
target: terms.synset_id,
|
|
||||||
set: { pos: synset.pos },
|
|
||||||
})
|
|
||||||
.returning({ id: terms.id, created_at: terms.created_at });
|
.returning({ id: terms.id, created_at: terms.created_at });
|
||||||
|
|
||||||
const termInserted = upsertedTerm.created_at > new Date(Date.now() - 1000);
|
const termInserted = upsertedTerm.created_at > new Date(Date.now() - 1000);
|
||||||
|
|
@ -310,12 +323,12 @@ const upsertSynset = async (
|
||||||
|
|
||||||
## 7. Strategy Comparison
|
## 7. Strategy Comparison
|
||||||
|
|
||||||
| Strategy | Use case | Pros | Cons |
|
| Strategy | Use case | Pros | Cons |
|
||||||
|----------|----------|------|------|
|
| ------------------ | ----------------------------- | --------------------- | -------------------- |
|
||||||
| Truncate + batch | Dev / first-time setup | Fast, simple | Wipes all data |
|
| Truncate + batch | Dev / first-time setup | Fast, simple | Wipes all data |
|
||||||
| Incremental upsert | Production / adding languages | Safe, non-destructive | No batching, slower |
|
| Incremental upsert | Production / adding languages | Safe, non-destructive | No batching, slower |
|
||||||
| Migrations-as-data | Production audit trail | Clean history | Files accumulate |
|
| Migrations-as-data | Production audit trail | Clean history | Files accumulate |
|
||||||
| Diff-based sync | Large production datasets | Minimal writes | Complex to implement |
|
| Diff-based sync | Large production datasets | Minimal writes | Complex to implement |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -331,6 +344,7 @@ The `exports` field must be an object, not an array:
|
||||||
```
|
```
|
||||||
|
|
||||||
Imports then resolve as:
|
Imports then resolve as:
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
import { db } from "@glossa/db";
|
import { db } from "@glossa/db";
|
||||||
import { terms, translations } from "@glossa/db/schema";
|
import { terms, translations } from "@glossa/db/schema";
|
||||||
|
|
|
||||||
|
|
@ -61,19 +61,22 @@ Production will use Nginx to serve static Vite build output.
|
||||||
**Original approach:** Store `frequency_rank` on `terms` table and filter by rank range for difficulty.
|
**Original approach:** Store `frequency_rank` on `terms` table and filter by rank range for difficulty.
|
||||||
|
|
||||||
**Problem discovered:** WordNet/OMW frequency data is unreliable for language learning. Extraction produced results like:
|
**Problem discovered:** WordNet/OMW frequency data is unreliable for language learning. Extraction produced results like:
|
||||||
|
|
||||||
- Rank 1: "In" → "indio" (chemical symbol: Indium)
|
- Rank 1: "In" → "indio" (chemical symbol: Indium)
|
||||||
- Rank 2: "Be" → "berillio" (chemical symbol: Beryllium)
|
- Rank 2: "Be" → "berillio" (chemical symbol: Beryllium)
|
||||||
- Rank 7: "He" → "elio" (chemical symbol: Helium)
|
- Rank 7: "He" → "elio" (chemical symbol: Helium)
|
||||||
|
|
||||||
These are technically "common" in WordNet (every element is a noun) but useless for vocabulary learning.
|
These are technically "common" in WordNet (every element is a noun) but useless for vocabulary learning.
|
||||||
|
|
||||||
**Decision:**
|
**Decision:**
|
||||||
|
|
||||||
- `terms` table stores ALL available OMW synsets (raw data, no frequency filtering)
|
- `terms` table stores ALL available OMW synsets (raw data, no frequency filtering)
|
||||||
- `decks` table stores curated learning lists (A1, A2, B1, "Most Common 1000", etc.)
|
- `decks` table stores curated learning lists (A1, A2, B1, "Most Common 1000", etc.)
|
||||||
- `deck_terms` junction table links terms to decks with position ordering
|
- `deck_terms` junction table links terms to decks with position ordering
|
||||||
- `rooms.deck_id` specifies which vocabulary deck a game uses
|
- `rooms.deck_id` specifies which vocabulary deck a game uses
|
||||||
|
|
||||||
**Benefits:**
|
**Benefits:**
|
||||||
|
|
||||||
- Curricula can come from external sources (CEFR lists, Oxford 3000, SUBTLEX)
|
- Curricula can come from external sources (CEFR lists, Oxford 3000, SUBTLEX)
|
||||||
- Bad data (chemical symbols, obscure words) excluded at deck level, not schema level
|
- Bad data (chemical symbols, obscure words) excluded at deck level, not schema level
|
||||||
- Users can create custom decks later
|
- Users can create custom decks later
|
||||||
|
|
@ -161,7 +164,8 @@ Then `sudo sysctl -p` or restart Docker.
|
||||||
|
|
||||||
**Problem:** Embeds auth provider in the primary key (e.g. `"google|12345"`). If OpenAuth changes format or a second provider is added, the PK cascades through all FKs (`rooms.host_id`, `room_players.user_id`).
|
**Problem:** Embeds auth provider in the primary key (e.g. `"google|12345"`). If OpenAuth changes format or a second provider is added, the PK cascades through all FKs (`rooms.host_id`, `room_players.user_id`).
|
||||||
|
|
||||||
**Decision:**
|
**Decision:**
|
||||||
|
|
||||||
- `users.id` = internal UUID (stable FK target)
|
- `users.id` = internal UUID (stable FK target)
|
||||||
- `users.openauth_sub` = text UNIQUE (auth provider claim)
|
- `users.openauth_sub` = text UNIQUE (auth provider claim)
|
||||||
- Allows adding multiple auth providers per user later without FK changes
|
- Allows adding multiple auth providers per user later without FK changes
|
||||||
|
|
@ -177,6 +181,7 @@ Allows multiple synonyms per language per term (e.g. "dog", "hound" for same syn
|
||||||
### Decks: `pair_id` is nullable
|
### Decks: `pair_id` is nullable
|
||||||
|
|
||||||
`decks.pair_id` references `language_pairs` but is nullable. Reasons:
|
`decks.pair_id` references `language_pairs` but is nullable. Reasons:
|
||||||
|
|
||||||
- Single-language decks (e.g. "English Grammar")
|
- Single-language decks (e.g. "English Grammar")
|
||||||
- Multi-pair decks (e.g. "Cognates" spanning EN-IT and EN-FR)
|
- Multi-pair decks (e.g. "Cognates" spanning EN-IT and EN-FR)
|
||||||
- System decks (created by app, not tied to specific user)
|
- System decks (created by app, not tied to specific user)
|
||||||
|
|
@ -186,19 +191,22 @@ Allows multiple synonyms per language per term (e.g. "dog", "hound" for same syn
|
||||||
**Original approach:** Store `frequency_rank` on `terms` table and filter by rank range for difficulty.
|
**Original approach:** Store `frequency_rank` on `terms` table and filter by rank range for difficulty.
|
||||||
|
|
||||||
**Problem discovered:** WordNet/OMW frequency data is unreliable for language learning. Extraction produced results like:
|
**Problem discovered:** WordNet/OMW frequency data is unreliable for language learning. Extraction produced results like:
|
||||||
|
|
||||||
- Rank 1: "In" → "indio" (chemical symbol: Indium)
|
- Rank 1: "In" → "indio" (chemical symbol: Indium)
|
||||||
- Rank 2: "Be" → "berillio" (chemical symbol: Beryllium)
|
- Rank 2: "Be" → "berillio" (chemical symbol: Beryllium)
|
||||||
- Rank 7: "He" → "elio" (chemical symbol: Helium)
|
- Rank 7: "He" → "elio" (chemical symbol: Helium)
|
||||||
|
|
||||||
These are technically "common" in WordNet (every element is a noun) but useless for vocabulary learning.
|
These are technically "common" in WordNet (every element is a noun) but useless for vocabulary learning.
|
||||||
|
|
||||||
**Decision:**
|
**Decision:**
|
||||||
|
|
||||||
- `terms` table stores ALL available OMW synsets (raw data, no frequency filtering)
|
- `terms` table stores ALL available OMW synsets (raw data, no frequency filtering)
|
||||||
- `decks` table stores curated learning lists (A1, A2, B1, "Most Common 1000", etc.)
|
- `decks` table stores curated learning lists (A1, A2, B1, "Most Common 1000", etc.)
|
||||||
- `deck_terms` junction table links terms to decks with position ordering
|
- `deck_terms` junction table links terms to decks with position ordering
|
||||||
- `rooms.deck_id` specifies which vocabulary deck a game uses
|
- `rooms.deck_id` specifies which vocabulary deck a game uses
|
||||||
|
|
||||||
**Benefits:**
|
**Benefits:**
|
||||||
|
|
||||||
- Curricula can come from external sources (CEFR lists, Oxford 3000, SUBTLEX)
|
- Curricula can come from external sources (CEFR lists, Oxford 3000, SUBTLEX)
|
||||||
- Bad data (chemical symbols, obscure words) excluded at deck level, not schema level
|
- Bad data (chemical symbols, obscure words) excluded at deck level, not schema level
|
||||||
- Users can create custom decks later
|
- Users can create custom decks later
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
- pinning dependencies in package.json files
|
- pinning dependencies in package.json files
|
||||||
- add this to drizzle migrartions file:
|
- add this to drizzle migrartions file:
|
||||||
✅ ALTER TABLE terms ADD CHECK (pos IN ('noun', 'verb', 'adjective', etc));
|
✅ ALTER TABLE terms ADD CHECK (pos IN ('noun', 'verb', 'adjective', etc));
|
||||||
|
|
||||||
## openwordnet
|
## openwordnet
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ Goal: Word data lives in the DB and can be queried via the API.
|
||||||
Done when: `GET /api/decks/1/terms?limit=10` returns 10 terms from a specific deck.
|
Done when: `GET /api/decks/1/terms?limit=10` returns 10 terms from a specific deck.
|
||||||
|
|
||||||
[x] Run `extract-en-it-nouns.py` locally → generates `datafiles/en-it-nouns.json`
|
[x] Run `extract-en-it-nouns.py` locally → generates `datafiles/en-it-nouns.json`
|
||||||
-- Import ALL available OMW noun synsets (no frequency filtering)
|
-- Import ALL available OMW noun synsets (no frequency filtering)
|
||||||
[x] Write Drizzle schema: `terms`, `translations`, `language_pairs`, `term_glosses`, `decks`, `deck_terms`
|
[x] Write Drizzle schema: `terms`, `translations`, `language_pairs`, `term_glosses`, `decks`, `deck_terms`
|
||||||
[x] Write and run migration (includes CHECK constraints for `pos`, `gloss_type`)
|
[x] Write and run migration (includes CHECK constraints for `pos`, `gloss_type`)
|
||||||
[x] Write `packages/db/src/seed.ts` (imports ALL terms + translations, NO decks)
|
[x] Write `packages/db/src/seed.ts` (imports ALL terms + translations, NO decks)
|
||||||
|
|
@ -142,9 +142,9 @@ Not required to ship, but address before real users arrive.
|
||||||
|
|
||||||
Dependency Graph
|
Dependency Graph
|
||||||
Phase 0 (Foundation)
|
Phase 0 (Foundation)
|
||||||
└── Phase 1 (Vocabulary Data)
|
└── Phase 1 (Vocabulary Data)
|
||||||
└── Phase 2 (Auth)
|
└── Phase 2 (Auth)
|
||||||
├── Phase 3 (Singleplayer) ← parallel with Phase 4
|
├── Phase 3 (Singleplayer) ← parallel with Phase 4
|
||||||
└── Phase 4 (Room Lobby)
|
└── Phase 4 (Room Lobby)
|
||||||
└── Phase 5 (Multiplayer Game)
|
└── Phase 5 (Multiplayer Game)
|
||||||
└── Phase 6 (Deployment)
|
└── Phase 6 (Deployment)
|
||||||
|
|
|
||||||
|
|
@ -72,7 +72,7 @@ vocab-trainer/
|
||||||
│ └── db/ # Drizzle schema, migrations, seed script
|
│ └── db/ # Drizzle schema, migrations, seed script
|
||||||
├── scripts/
|
├── scripts/
|
||||||
| ├── datafiles/
|
| ├── datafiles/
|
||||||
│ | └── en-it-nouns.json
|
│ | └── en-it-nouns.json
|
||||||
│ └── extract-en-it-nouns.py # One-time WordNet + OMW extraction → seed.json
|
│ └── extract-en-it-nouns.py # One-time WordNet + OMW extraction → seed.json
|
||||||
├── docker-compose.yml
|
├── docker-compose.yml
|
||||||
├── docker-compose.prod.yml
|
├── docker-compose.prod.yml
|
||||||
|
|
@ -159,19 +159,19 @@ SSL is fully automatic via `nginx-proxy` + `acme-companion`. No manual Certbot n
|
||||||
|
|
||||||
### 5.1 Valkey Key Structure
|
### 5.1 Valkey Key Structure
|
||||||
|
|
||||||
Ephemeral room state is stored in Valkey with TTL (e.g., 1 hour).
|
Ephemeral room state is stored in Valkey with TTL (e.g., 1 hour).
|
||||||
PostgreSQL stores durable history only.
|
PostgreSQL stores durable history only.
|
||||||
|
|
||||||
Key Format: `room:{code}:{field}`
|
Key Format: `room:{code}:{field}`
|
||||||
| Key | Type | TTL | Description |
|
| Key | Type | TTL | Description |
|
||||||
|------------------------------|---------|-------|-------------|
|
|------------------------------|---------|-------|-------------|
|
||||||
| `room:{code}:state` | Hash | 1h | Current question index, round status |
|
| `room:{code}:state` | Hash | 1h | Current question index, round status |
|
||||||
| `room:{code}:players` | Set | 1h | List of connected user IDs |
|
| `room:{code}:players` | Set | 1h | List of connected user IDs |
|
||||||
| `room:{code}:answers:{round}`| Hash | 15m | Temp storage for current round answers |
|
| `room:{code}:answers:{round}`| Hash | 15m | Temp storage for current round answers |
|
||||||
|
|
||||||
Recovery Strategy
|
Recovery Strategy
|
||||||
If server crashes mid-game, Valkey data is lost.
|
If server crashes mid-game, Valkey data is lost.
|
||||||
PostgreSQL `room_players.score` remains 0.
|
PostgreSQL `room_players.score` remains 0.
|
||||||
Room status is reset to `finished` via startup health check if `updated_at` is stale.
|
Room status is reset to `finished` via startup health check if `updated_at` is stale.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
@ -186,79 +186,79 @@ Adding a new language pair requires no schema changes — only new rows in `tran
|
||||||
## Core tables
|
## Core tables
|
||||||
|
|
||||||
terms
|
terms
|
||||||
id uuid PK
|
id uuid PK
|
||||||
synset_id text UNIQUE -- OMW ILI (e.g. "ili:i12345")
|
synset_id text UNIQUE -- OMW ILI (e.g. "ili:i12345")
|
||||||
pos varchar(20) -- NOT NULL, CHECK (pos IN ('noun', 'verb', 'adjective', 'adverb'))
|
pos varchar(20) -- NOT NULL, CHECK (pos IN ('noun', 'verb', 'adjective', 'adverb'))
|
||||||
created_at timestamptz DEFAULT now()
|
created_at timestamptz DEFAULT now()
|
||||||
-- REMOVED: frequency_rank (handled at deck level)
|
-- REMOVED: frequency_rank (handled at deck level)
|
||||||
|
|
||||||
translations
|
translations
|
||||||
id uuid PK
|
id uuid PK
|
||||||
term_id uuid FK → terms.id
|
term_id uuid FK → terms.id
|
||||||
language_code varchar(10) -- NOT NULL, BCP 47: "en", "it"
|
language_code varchar(10) -- NOT NULL, BCP 47: "en", "it"
|
||||||
text text -- NOT NULL
|
text text -- NOT NULL
|
||||||
created_at timestamptz DEFAULT now()
|
created_at timestamptz DEFAULT now()
|
||||||
UNIQUE (term_id, language_code, text) -- Allow synonyms, prevent exact duplicates
|
UNIQUE (term_id, language_code, text) -- Allow synonyms, prevent exact duplicates
|
||||||
|
|
||||||
term_glosses
|
term_glosses
|
||||||
id uuid PK
|
id uuid PK
|
||||||
term_id uuid FK → terms.id
|
term_id uuid FK → terms.id
|
||||||
language_code varchar(10) -- NOT NULL
|
language_code varchar(10) -- NOT NULL
|
||||||
text text -- NOT NULL
|
text text -- NOT NULL
|
||||||
created_at timestamptz DEFAULT now()
|
created_at timestamptz DEFAULT now()
|
||||||
|
|
||||||
language_pairs
|
language_pairs
|
||||||
id uuid PK
|
id uuid PK
|
||||||
source varchar(10) -- NOT NULL
|
source varchar(10) -- NOT NULL
|
||||||
target varchar(10) -- NOT NULL
|
target varchar(10) -- NOT NULL
|
||||||
label text
|
label text
|
||||||
active boolean DEFAULT true
|
active boolean DEFAULT true
|
||||||
UNIQUE (source, target)
|
UNIQUE (source, target)
|
||||||
|
|
||||||
decks
|
decks
|
||||||
id uuid PK
|
id uuid PK
|
||||||
name text -- NOT NULL (e.g. "A1 Italian Nouns", "Most Common 1000")
|
name text -- NOT NULL (e.g. "A1 Italian Nouns", "Most Common 1000")
|
||||||
description text -- NULLABLE
|
description text -- NULLABLE
|
||||||
pair_id uuid FK → language_pairs.id -- NULLABLE (for single-language or multi-pair decks)
|
pair_id uuid FK → language_pairs.id -- NULLABLE (for single-language or multi-pair decks)
|
||||||
created_by uuid FK → users.id -- NULLABLE (for system decks)
|
created_by uuid FK → users.id -- NULLABLE (for system decks)
|
||||||
is_public boolean DEFAULT true
|
is_public boolean DEFAULT true
|
||||||
created_at timestamptz DEFAULT now()
|
created_at timestamptz DEFAULT now()
|
||||||
|
|
||||||
deck_terms
|
deck_terms
|
||||||
deck_id uuid FK → decks.id
|
deck_id uuid FK → decks.id
|
||||||
term_id uuid FK → terms.id
|
term_id uuid FK → terms.id
|
||||||
position smallint -- NOT NULL, ordering within deck (1, 2, 3...)
|
position smallint -- NOT NULL, ordering within deck (1, 2, 3...)
|
||||||
added_at timestamptz DEFAULT now()
|
added_at timestamptz DEFAULT now()
|
||||||
PRIMARY KEY (deck_id, term_id)
|
PRIMARY KEY (deck_id, term_id)
|
||||||
|
|
||||||
users
|
users
|
||||||
id uuid PK -- Internal stable ID (FK target)
|
id uuid PK -- Internal stable ID (FK target)
|
||||||
openauth_sub text UNIQUE -- NOT NULL, OpenAuth `sub` claim (e.g. "google|12345")
|
openauth_sub text UNIQUE -- NOT NULL, OpenAuth `sub` claim (e.g. "google|12345")
|
||||||
email varchar(255) UNIQUE -- NULLABLE (GitHub users may lack email)
|
email varchar(255) UNIQUE -- NULLABLE (GitHub users may lack email)
|
||||||
display_name varchar(100)
|
display_name varchar(100)
|
||||||
created_at timestamptz DEFAULT now()
|
created_at timestamptz DEFAULT now()
|
||||||
last_login_at timestamptz
|
last_login_at timestamptz
|
||||||
-- REMOVED: games_played, games_won (derive from room_players)
|
-- REMOVED: games_played, games_won (derive from room_players)
|
||||||
|
|
||||||
rooms
|
rooms
|
||||||
id uuid PK
|
id uuid PK
|
||||||
code varchar(8) UNIQUE -- NOT NULL, CHECK (code = UPPER(code))
|
code varchar(8) UNIQUE -- NOT NULL, CHECK (code = UPPER(code))
|
||||||
host_id uuid FK → users.id
|
host_id uuid FK → users.id
|
||||||
pair_id uuid FK → language_pairs.id
|
pair_id uuid FK → language_pairs.id
|
||||||
deck_id uuid FK → decks.id -- Which vocabulary deck this room uses
|
deck_id uuid FK → decks.id -- Which vocabulary deck this room uses
|
||||||
status varchar(20) -- NOT NULL, CHECK (status IN ('waiting', 'in_progress', 'finished'))
|
status varchar(20) -- NOT NULL, CHECK (status IN ('waiting', 'in_progress', 'finished'))
|
||||||
max_players smallint -- NOT NULL, DEFAULT 4, CHECK (max_players BETWEEN 2 AND 10)
|
max_players smallint -- NOT NULL, DEFAULT 4, CHECK (max_players BETWEEN 2 AND 10)
|
||||||
round_count smallint -- NOT NULL, DEFAULT 10, CHECK (round_count BETWEEN 5 AND 20)
|
round_count smallint -- NOT NULL, DEFAULT 10, CHECK (round_count BETWEEN 5 AND 20)
|
||||||
created_at timestamptz DEFAULT now()
|
created_at timestamptz DEFAULT now()
|
||||||
updated_at timestamptz DEFAULT now() -- For stale room recovery
|
updated_at timestamptz DEFAULT now() -- For stale room recovery
|
||||||
|
|
||||||
room_players
|
room_players
|
||||||
room_id uuid FK → rooms.id
|
room_id uuid FK → rooms.id
|
||||||
user_id uuid FK → users.id
|
user_id uuid FK → users.id
|
||||||
score integer DEFAULT 0 -- Final score only (written at game end)
|
score integer DEFAULT 0 -- Final score only (written at game end)
|
||||||
joined_at timestamptz DEFAULT now()
|
joined_at timestamptz DEFAULT now()
|
||||||
left_at timestamptz -- Populated on WS disconnect/leave
|
left_at timestamptz -- Populated on WS disconnect/leave
|
||||||
PRIMARY KEY (room_id, user_id)
|
PRIMARY KEY (room_id, user_id)
|
||||||
|
|
||||||
Indexes
|
Indexes
|
||||||
-- Vocabulary
|
-- Vocabulary
|
||||||
|
|
@ -501,8 +501,6 @@ Tests are co-located with source files (`*.test.ts` / `*.test.tsx`).
|
||||||
- [ ] 10–20 passing tests covering critical paths
|
- [ ] 10–20 passing tests covering critical paths
|
||||||
- [ ] pnpm workspace build pipeline green
|
- [ ] pnpm workspace build pipeline green
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 15. Out of Scope (MVP)
|
## 15. Out of Scope (MVP)
|
||||||
|
|
|
||||||
|
|
@ -56,12 +56,8 @@
|
||||||
"name": "deck_terms_deck_id_decks_id_fk",
|
"name": "deck_terms_deck_id_decks_id_fk",
|
||||||
"tableFrom": "deck_terms",
|
"tableFrom": "deck_terms",
|
||||||
"tableTo": "decks",
|
"tableTo": "decks",
|
||||||
"columnsFrom": [
|
"columnsFrom": ["deck_id"],
|
||||||
"deck_id"
|
"columnsTo": ["id"],
|
||||||
],
|
|
||||||
"columnsTo": [
|
|
||||||
"id"
|
|
||||||
],
|
|
||||||
"onDelete": "cascade",
|
"onDelete": "cascade",
|
||||||
"onUpdate": "no action"
|
"onUpdate": "no action"
|
||||||
},
|
},
|
||||||
|
|
@ -69,12 +65,8 @@
|
||||||
"name": "deck_terms_term_id_terms_id_fk",
|
"name": "deck_terms_term_id_terms_id_fk",
|
||||||
"tableFrom": "deck_terms",
|
"tableFrom": "deck_terms",
|
||||||
"tableTo": "terms",
|
"tableTo": "terms",
|
||||||
"columnsFrom": [
|
"columnsFrom": ["term_id"],
|
||||||
"term_id"
|
"columnsTo": ["id"],
|
||||||
],
|
|
||||||
"columnsTo": [
|
|
||||||
"id"
|
|
||||||
],
|
|
||||||
"onDelete": "cascade",
|
"onDelete": "cascade",
|
||||||
"onUpdate": "no action"
|
"onUpdate": "no action"
|
||||||
}
|
}
|
||||||
|
|
@ -82,10 +74,7 @@
|
||||||
"compositePrimaryKeys": {
|
"compositePrimaryKeys": {
|
||||||
"deck_terms_deck_id_term_id_pk": {
|
"deck_terms_deck_id_term_id_pk": {
|
||||||
"name": "deck_terms_deck_id_term_id_pk",
|
"name": "deck_terms_deck_id_term_id_pk",
|
||||||
"columns": [
|
"columns": ["deck_id", "term_id"]
|
||||||
"deck_id",
|
|
||||||
"term_id"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"uniqueConstraints": {},
|
"uniqueConstraints": {},
|
||||||
|
|
@ -180,12 +169,8 @@
|
||||||
"name": "decks_language_pair_id_language_pairs_id_fk",
|
"name": "decks_language_pair_id_language_pairs_id_fk",
|
||||||
"tableFrom": "decks",
|
"tableFrom": "decks",
|
||||||
"tableTo": "language_pairs",
|
"tableTo": "language_pairs",
|
||||||
"columnsFrom": [
|
"columnsFrom": ["language_pair_id"],
|
||||||
"language_pair_id"
|
"columnsTo": ["id"],
|
||||||
],
|
|
||||||
"columnsTo": [
|
|
||||||
"id"
|
|
||||||
],
|
|
||||||
"onDelete": "cascade",
|
"onDelete": "cascade",
|
||||||
"onUpdate": "no action"
|
"onUpdate": "no action"
|
||||||
},
|
},
|
||||||
|
|
@ -193,12 +178,8 @@
|
||||||
"name": "decks_created_by_users_id_fk",
|
"name": "decks_created_by_users_id_fk",
|
||||||
"tableFrom": "decks",
|
"tableFrom": "decks",
|
||||||
"tableTo": "users",
|
"tableTo": "users",
|
||||||
"columnsFrom": [
|
"columnsFrom": ["created_by"],
|
||||||
"created_by"
|
"columnsTo": ["id"],
|
||||||
],
|
|
||||||
"columnsTo": [
|
|
||||||
"id"
|
|
||||||
],
|
|
||||||
"onDelete": "cascade",
|
"onDelete": "cascade",
|
||||||
"onUpdate": "no action"
|
"onUpdate": "no action"
|
||||||
}
|
}
|
||||||
|
|
@ -208,10 +189,7 @@
|
||||||
"unique_deck_name": {
|
"unique_deck_name": {
|
||||||
"name": "unique_deck_name",
|
"name": "unique_deck_name",
|
||||||
"nullsNotDistinct": false,
|
"nullsNotDistinct": false,
|
||||||
"columns": [
|
"columns": ["name", "created_by"]
|
||||||
"name",
|
|
||||||
"created_by"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"policies": {},
|
"policies": {},
|
||||||
|
|
@ -297,10 +275,7 @@
|
||||||
"unique_source_target": {
|
"unique_source_target": {
|
||||||
"name": "unique_source_target",
|
"name": "unique_source_target",
|
||||||
"nullsNotDistinct": false,
|
"nullsNotDistinct": false,
|
||||||
"columns": [
|
"columns": ["source_language", "target_language"]
|
||||||
"source_language",
|
|
||||||
"target_language"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"policies": {},
|
"policies": {},
|
||||||
|
|
@ -379,12 +354,8 @@
|
||||||
"name": "term_glosses_term_id_terms_id_fk",
|
"name": "term_glosses_term_id_terms_id_fk",
|
||||||
"tableFrom": "term_glosses",
|
"tableFrom": "term_glosses",
|
||||||
"tableTo": "terms",
|
"tableTo": "terms",
|
||||||
"columnsFrom": [
|
"columnsFrom": ["term_id"],
|
||||||
"term_id"
|
"columnsTo": ["id"],
|
||||||
],
|
|
||||||
"columnsTo": [
|
|
||||||
"id"
|
|
||||||
],
|
|
||||||
"onDelete": "cascade",
|
"onDelete": "cascade",
|
||||||
"onUpdate": "no action"
|
"onUpdate": "no action"
|
||||||
}
|
}
|
||||||
|
|
@ -394,11 +365,7 @@
|
||||||
"unique_term_gloss": {
|
"unique_term_gloss": {
|
||||||
"name": "unique_term_gloss",
|
"name": "unique_term_gloss",
|
||||||
"nullsNotDistinct": false,
|
"nullsNotDistinct": false,
|
||||||
"columns": [
|
"columns": ["term_id", "language_code", "text"]
|
||||||
"term_id",
|
|
||||||
"language_code",
|
|
||||||
"text"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"policies": {},
|
"policies": {},
|
||||||
|
|
@ -459,9 +426,7 @@
|
||||||
"terms_synset_id_unique": {
|
"terms_synset_id_unique": {
|
||||||
"name": "terms_synset_id_unique",
|
"name": "terms_synset_id_unique",
|
||||||
"nullsNotDistinct": false,
|
"nullsNotDistinct": false,
|
||||||
"columns": [
|
"columns": ["synset_id"]
|
||||||
"synset_id"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"policies": {},
|
"policies": {},
|
||||||
|
|
@ -538,12 +503,8 @@
|
||||||
"name": "translations_term_id_terms_id_fk",
|
"name": "translations_term_id_terms_id_fk",
|
||||||
"tableFrom": "translations",
|
"tableFrom": "translations",
|
||||||
"tableTo": "terms",
|
"tableTo": "terms",
|
||||||
"columnsFrom": [
|
"columnsFrom": ["term_id"],
|
||||||
"term_id"
|
"columnsTo": ["id"],
|
||||||
],
|
|
||||||
"columnsTo": [
|
|
||||||
"id"
|
|
||||||
],
|
|
||||||
"onDelete": "cascade",
|
"onDelete": "cascade",
|
||||||
"onUpdate": "no action"
|
"onUpdate": "no action"
|
||||||
}
|
}
|
||||||
|
|
@ -553,11 +514,7 @@
|
||||||
"unique_translations": {
|
"unique_translations": {
|
||||||
"name": "unique_translations",
|
"name": "unique_translations",
|
||||||
"nullsNotDistinct": false,
|
"nullsNotDistinct": false,
|
||||||
"columns": [
|
"columns": ["term_id", "language_code", "text"]
|
||||||
"term_id",
|
|
||||||
"language_code",
|
|
||||||
"text"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"policies": {},
|
"policies": {},
|
||||||
|
|
@ -614,23 +571,17 @@
|
||||||
"users_openauth_sub_unique": {
|
"users_openauth_sub_unique": {
|
||||||
"name": "users_openauth_sub_unique",
|
"name": "users_openauth_sub_unique",
|
||||||
"nullsNotDistinct": false,
|
"nullsNotDistinct": false,
|
||||||
"columns": [
|
"columns": ["openauth_sub"]
|
||||||
"openauth_sub"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"users_email_unique": {
|
"users_email_unique": {
|
||||||
"name": "users_email_unique",
|
"name": "users_email_unique",
|
||||||
"nullsNotDistinct": false,
|
"nullsNotDistinct": false,
|
||||||
"columns": [
|
"columns": ["email"]
|
||||||
"email"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"users_display_name_unique": {
|
"users_display_name_unique": {
|
||||||
"name": "users_display_name_unique",
|
"name": "users_display_name_unique",
|
||||||
"nullsNotDistinct": false,
|
"nullsNotDistinct": false,
|
||||||
"columns": [
|
"columns": ["display_name"]
|
||||||
"display_name"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"policies": {},
|
"policies": {},
|
||||||
|
|
@ -644,9 +595,5 @@
|
||||||
"roles": {},
|
"roles": {},
|
||||||
"policies": {},
|
"policies": {},
|
||||||
"views": {},
|
"views": {},
|
||||||
"_meta": {
|
"_meta": { "columns": {}, "schemas": {}, "tables": {} }
|
||||||
"columns": {},
|
}
|
||||||
"schemas": {},
|
|
||||||
"tables": {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -10,4 +10,4 @@
|
||||||
"breakpoints": true
|
"breakpoints": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -5,7 +5,7 @@
|
||||||
"moduleResolution": "NodeNext",
|
"moduleResolution": "NodeNext",
|
||||||
"outDir": "./dist",
|
"outDir": "./dist",
|
||||||
"resolveJsonModule": true,
|
"resolveJsonModule": true,
|
||||||
"types": ["vitest/globals"],
|
"types": ["vitest/globals"]
|
||||||
},
|
},
|
||||||
"include": ["src", "vitest.config.ts"],
|
"include": ["src", "vitest.config.ts"]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@
|
||||||
"moduleResolution": "NodeNext",
|
"moduleResolution": "NodeNext",
|
||||||
"outDir": "./dist",
|
"outDir": "./dist",
|
||||||
"resolveJsonModule": true,
|
"resolveJsonModule": true,
|
||||||
"types": ["vitest/globals"],
|
"types": ["vitest/globals"]
|
||||||
},
|
},
|
||||||
"include": ["src", "vitest.config.ts"],
|
"include": ["src", "vitest.config.ts"]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue