From fd9667c1fd3a57dcbec57cdf8dc248d18a8c3cf9 Mon Sep 17 00:00:00 2001 From: lila Date: Tue, 28 Apr 2026 17:26:01 +0200 Subject: [PATCH] updating documentation --- documentation/backlog.md | 9 +- documentation/roasts/gameService.md | 348 ++++++++++++++++++++++++++++ 2 files changed, 351 insertions(+), 6 deletions(-) diff --git a/documentation/backlog.md b/documentation/backlog.md index 3d5e5da..6a91e2d 100644 --- a/documentation/backlog.md +++ b/documentation/backlog.md @@ -8,15 +8,9 @@ Labels: `[feature]` `[infra]` `[security]` `[ux]` `[debt]` Things that are actively in progress or should be picked up immediately. Mostly operational risk and the remaining phase 7 hardening work. -- **Google OAuth publishing** `[infra]` - Only test users can currently log in via Google. Publish the OAuth consent screen so any Google user can sign in โ€” requires branding verification in Google Cloud Console. - - **Hetzner domain migration check** `[infra]` Verify whether the lilastudy.com domain needs to be migrated following a Hetzner DNS change. Check Hetzner dashboard for any pending migration notice. -- **Conditionally register OAuth providers** `[debt]` - Better Auth logs warnings when social providers are registered without credentials (`Social provider google is missing clientId or clientSecret`). Instead of registering all providers unconditionally, only add a provider to the config when its credentials are present in the environment. Keeps local dev clean for contributors who don't have OAuth apps set up. - --- ## next @@ -69,6 +63,9 @@ Clearly planned work, not yet started. No hard ordering โ€” sequence based on wh - **Tighten CSP to remove unsafe-inline** `[security]` Current script-src uses 'unsafe-inline' to accommodate framework-injected inline scripts (likely TanStack Router hydration). Tightening this would require nonce-based CSP, which needs server-rendered HTML or a Caddy layer that injects per-request nonces. Not urgent โ€” pragmatic CSP with 'unsafe-inline' is mainstream for SPAs at this scale. Revisit if the app handles more sensitive data or grows a meaningful user base +- **Publish Google OAuth consent screen** `[infra]` + App is currently in testing mode, which caps OAuth sign-ins at 100 users. Before hitting that limit, publish the consent screen in Google Cloud Console. Basic scopes (email, profile, openid) require no Google review โ€” just fill in branding fields (app name, logo, support email, privacy policy URL) and click publish. Trigger: do this before reaching 80 users. + --- ## later diff --git a/documentation/roasts/gameService.md b/documentation/roasts/gameService.md index e69de29..73283de 100644 --- a/documentation/roasts/gameService.md +++ b/documentation/roasts/gameService.md @@ -0,0 +1,348 @@ +# ๐Ÿ”ฅ GameService Roast: `apps/api/src/services/gameService.ts` + +> *"It works on my machine" is not a scalability strategy.* + +**Project:** lila โ€” Vocabulary Trainer +**File Roasted:** `gameService.ts` +**Date:** $(date) +**Roaster:** Qwen3.6 + +--- + +## ๐Ÿ“‹ Executive Summary + +| Metric | Score | Notes | +| ------------- | -------- | ---------------------------------------------------- | +| Code Quality | 8/10 | Clean layering, good types, consistent style | +| Correctness | 6/10 | Race condition + N+1 query are critical | +| Test Coverage | 7/10 | Good happy-path tests, missing concurrency tests | +| Scalability | 5/10 | Will choke at ~100 concurrent users without fixes | +| **Overall** | **7/10** | Solid foundation, but fix the footguns before launch | + +--- + +## ๐Ÿšจ Critical Issues (Fix Before Production) + +### 1. Race Condition: Lost Update in `evaluateAnswer` + +**Location:** `gameService.ts:45-58` + `InMemoryGameSessionStore.ts:update()` + +// Current flow (VULNERABLE): +const session = await store.get(submission.sessionId); // READ +const updatedAnswers = new Map(session.answers); // MODIFY (local copy) +updatedAnswers.delete(submission.questionId); +await store.update(submission.sessionId, { answers: updatedAnswers }); // WRITE + +The Attack: + + Client submits answer A and answer B for the same question (network retry, bug, or malice) + Both requests read the same session.answers Map (question still present) + Both delete the question from their local copy + Both write back โ†’ second write overwrites first + Result: One answer is silently lost, session state desyncs + +Why Tests Missed It: Vitest runs tests synchronously. Race conditions require deliberate concurrency testing. +Fix Options: + +// Option A: Add atomic operation to store interface +interface GameSessionStore { + deleteAnswer(sessionId: string, questionId: string): Promise; +} + +// Option B: Use Valkey Lua script for atomic read-modify-write +// Option C: Optimistic locking with version numbers + +Priority: ๐Ÿ”ด CRITICAL โ€” Data integrity issue +2. N+1 Query: Database Performance Bomb +Location: gameService.ts:24-26 + termModel.ts:getDistractors() + +// For each of N terms, we call getDistractors(): +const questions: GameQuestion[] = await Promise.all( + terms.map(async (term) => { + const distractorTexts = await getDistractors(term.termId, ...); // ๐Ÿšฉ N queries! + }) +); + +Impact Analysis: +Rounds + +DB Queries + +At 50 concurrent users +3 + +1 + 3 = 4 + +200 queries/min +10 + +1 + 10 = 11 + +550 queries/min +20 + +1 + 20 = 21 + +1,050 queries/min +Each getDistractors() runs: + +SELECT text FROM terms +JOIN translations ON ... +WHERE pos = $1 AND difficulty = $2 AND term_id != $3 AND text != $4 +ORDER BY RANDOM() LIMIT 6 + +Fix: Batch Fetch Distractors + +// Fetch all distractors in ONE query +const allDistractors = await db + .select({ termId: terms.id, text: translations.text }) + .from(terms) + .innerJoin(translations, /* ... */) + .where(and( + eq(terms.pos, pos), + eq(translations.difficulty, difficulty), + inArray(terms.id, termIds), // Batch! + )) + .limit(DISTRACTOR_FETCH_COUNT * termIds.length); + +// Group by termId in JS, then slice to 3 unique distractors per term +const distractorsByTerm = groupByTermId(allDistractors); + +Priority: ๐Ÿ”ด CRITICAL โ€” Performance/scalability issue + +3. Error Handling Inconsistency +Location: gameService.ts:33-36 + +if (uniqueDistractors.length < 3) { + throw new Error(`Not enough unique distractors for term: ${term.targetText}`); // ๐Ÿšฉ +} + +Problem: Raw Error bypasses your errorHandler middleware: + + No HTTP status mapping (defaults to 500) + No structured logging + Inconsistent API responses + +Fix: +import { UnprocessableEntityError } from "../errors/AppError.js"; + +if (uniqueDistractors.length < 3) { + logger.warn({ termId: term.termId, uniqueCount: uniqueDistractors.length }, + "insufficient_distractors"); + throw new UnprocessableEntityError( + `Not enough unique distractors for term: ${term.targetText}` + ); +} +Priority: ๐ŸŸก HIGH โ€” Observability & UX issue +โš ๏ธ High-Severity Smells +4. Code Duplication: Singleplayer vs Multiplayer +Compare: gameService.ts vs multiplayerGameService.ts +// gameService.ts +const optionTexts = [term.targetText, ...uniqueDistractors.slice(0, 3)]; +const shuffledTexts = shuffleArray(optionTexts); +const correctOptionId = shuffledTexts.indexOf(term.targetText); + +// multiplayerGameService.ts (lines 35-45) +const optionTexts = [correctAnswer.targetText, ...distractorTexts]; +const shuffledTexts = shuffle(optionTexts); // Different function, same logic! +const correctOptionId = shuffledTexts.indexOf(correctAnswer.targetText); + +Risks: + + Fix shuffle bias in one place, forget the other + Add new option type (e.g., etymology hint), update one service only + Harder to test core game logic in isolation + +Fix: Extract pure function to @lila/shared or new @lila/game-logic: + +// packages/shared/src/game-logic.ts +export const buildQuestionOptions = ( + correctAnswer: string, + distractors: string[], + optionCount: number = 4 +): { options: AnswerOption[]; correctOptionId: number } => { + const uniqueDistractors = [...new Set(distractors.filter(d => d !== correctAnswer))]; + const optionTexts = [correctAnswer, ...uniqueDistractors.slice(0, optionCount - 1)]; + const shuffled = shuffleSecure(optionTexts); + const correctOptionId = shuffled.indexOf(correctAnswer); + + return { + options: shuffled.map((text, idx) => ({ optionId: idx, text })), + correctOptionId + }; +}; + +Priority: ๐ŸŸก HIGH โ€” Maintainability issue +5. Shuffle Bias: Math.random() Trap +Location: utils.ts:shuffleArray() + multiplayerGameService.ts:shuffle() + +export const shuffleArray = (array: T[]): T[] => { + for (let i = result.length - 1; i > 0; i--) { + const j = Math.floor(Math.random() * (i + 1)); // ๐Ÿšฉ Modulo bias + non-crypto RNG + // ... + } +}; + +The Math: + + Math.random() has ~53 bits of entropy (fine for vocab) + Math.floor(rand * n) has modulo bias when n isn't a power of 2 + For n=4: bias is ~0.01% (tiny, but non-zero) + +When It Matters: + + Competitive leaderboards ("option 0 is correct 26% of the time") + Achievement systems based on answer patterns + Security-sensitive features (not applicable here, but principle matters) + +Fix (if needed): +import { randomBytes } from "crypto"; + +const shuffleSecure = (array: T[]): T[] => { + const result = [...array]; + for (let i = result.length - 1; i > 0; i--) { + // Use crypto.getRandomValues for better randomness + const rand = randomBytes(4).readUInt32LE(0); + const j = rand % (i + 1); + [result[i], result[j]] = [result[j], result[i]]; + } + return result; +}; + +Priority: ๐ŸŸข LOW โ€” Document tradeoff and move on for now + +6. Test Coverage Gaps +File: gameService.test.ts +โœ… Well Tested: + + Happy path: session creation, answer evaluation + Edge cases: duplicate distractors, empty results, invalid inputs + Error propagation from DB layer + +โŒ Missing Tests: + +// 1. Concurrency test (race condition) +it("rejects duplicate answers for same question under concurrent load", async () => { + const session = await createGameSession(validRequest, store, "user-1"); + const question = session.questions[0]!; + + // Submit two answers simultaneously + const [result1, result2] = await Promise.allSettled([ + evaluateAnswer({ sessionId, questionId, selectedOptionId: 0 }, store, "user-1"), + evaluateAnswer({ sessionId, questionId, selectedOptionId: 1 }, store, "user-1"), + ]); + + // Exactly one should succeed, one should throw ConflictError + expect([result1, result2].filter(r => r.status === "fulfilled")).toHaveLength(1); +}); + +// 2. TTL expiration test +it("deletes session after TTL expires", async () => { + vi.useFakeTimers(); + const session = await createGameSession(validRequest, store, "user-1"); + + vi.advanceTimersByTime(31 * 60 * 1000); // 31 minutes + + await expect(store.get(session.sessionId)).resolves.toBeNull(); +}); + +// 3. Distractor fallback strategy test +it("uses fallback when <3 unique distractors available", async () => { + mockGetDistractors.mockResolvedValue(["same", "same", "same", "same"]); + // Should either: (a) fetch from broader pool, or (b) reduce rounds gracefully +}); + +Priority: ๐ŸŸก HIGH โ€” Prevents regression on critical fixes +๐Ÿงผ Code Quality Nitpicks +7. Magic Numbers + +// gameService.ts:52 +await store.create(sessionId, {...}, 30 * 60 * 1000); // What is this? + +// termModel.ts:65 +.limit(count); // count=6, but why? + +// shared/schemas/game.ts:15 +optionId: z.number().int().min(0).max(3), // Why 4 options? + +Fix: Centralize in @lila/shared/constants.ts: + +export const GAME_SESSION_TTL_MS = 30 * 60 * 1000; +export const DISTRACTOR_FETCH_COUNT = 6; +export const GAME_OPTION_COUNT = 4; +export const MIN_UNIQUE_DISTRACTORS = 3; + +8. Mutable Reference Leakage +Location: InMemoryGameSessionStore.ts:get() + +get(sessionId: string): Promise { + return Promise.resolve(entry.data); // ๐Ÿšฉ Returns mutable reference to internal state +} + +Risk: Any code that does session.answers.delete(...) mutates the store's internal Map directly. +Fix: + +// Option A: Deep clone (simple, works for this data shape) +return Promise.resolve(structuredClone(entry.data)); + +// Option B: Return readonly view (TypeScript-only protection) +return Promise.resolve(entry.data as Readonly); + +// Option C: Use immutable data structures (overkill for now) + +9. Zero Observability +Problem: No logging, no metrics. You're flying blind in production. +Minimal Fix (5 minutes): + + + +// apps/api/src/lib/logger.ts +import pino from "pino"; +export const logger = pino({ + level: process.env.LOG_LEVEL || "info", + transport: process.env.NODE_ENV === "production" + ? { target: "pino-pretty" } + : undefined +}); + +// In gameService.ts: +import { logger } from "../lib/logger.js"; + +logger.info( + { userId, sourceLang, targetLang, termCount: terms.length }, + "game_session_created" +); + +logger.debug( + { sessionId, questionId, isCorrect, responseTimeMs }, + "answer_evaluated" +); + +Bonus: Export a Prometheus histogram for game_service_duration_seconds. + +10. ORDER BY RANDOM() Time Bomb +Location: termModel.ts:getGameTerms() + getDistractors() + +.orderBy(sql`RANDOM()`) // ๐Ÿšฉ Fine for 10k rows, slow for 1M + +The Comment Admits It: + +// TODO(post-mvp): ORDER BY RANDOM() sorts the entire filtered result set... + +Reality Check: "Post-MVP" never comes without a ticket. +Fix Options: + +-- Option A: Pre-computed random_seed column (updated nightly) +WHERE ... AND random_seed >= random() +ORDER BY random_seed +LIMIT $1 + +-- Option B: TABLESAMPLE for approximate sampling (Postgres 9.5+) +FROM terms TABLESAMPLE SYSTEM(10) +WHERE ... +LIMIT $1 + +-- Option C: Random offset (simple, but still scans) +OFFSET floor(random() * (SELECT count(*) FROM terms WHERE ...)) + +Action: Add a ticket to documentation/tickets/t00009.md now. \ No newline at end of file