import Database from "better-sqlite3"; import path from "node:path"; import fs from "node:fs"; import { fileURLToPath } from "node:url"; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const DB_PATH = path.join(__dirname, "db/pipeline.db"); const db = new Database(DB_PATH, { readonly: true }); // Pull 50 synsets: ~12 per POS, all must have German translations const synsets = db .prepare( ` SELECT DISTINCT s.source_id, s.pos FROM synsets s JOIN translations t ON t.source_id = s.source_id WHERE t.language = 'de' ORDER BY RANDOM() LIMIT 50 `, ) .all() as { source_id: string; pos: string }[]; const results: string[] = []; let index = 0; for (const synset of synsets) { index++; const glosses = db .prepare("SELECT language, text FROM glosses WHERE source_id = ?") .all(synset.source_id) as { language: string; text: string }[]; const enGloss = glosses.find((g) => g.language === "en")?.text ?? "—"; const deGloss = glosses.find((g) => g.language === "de")?.text ?? "—"; const deTranslations = db .prepare( "SELECT word FROM translations WHERE source_id = ? AND language = 'de'", ) .all(synset.source_id) as { word: string }[]; const enTranslations = db .prepare( "SELECT word FROM translations WHERE source_id = ? AND language = 'en'", ) .all(synset.source_id) as { word: string }[]; const deWords = deTranslations.map((t) => t.word); const enWords = enTranslations.map((t) => t.word); results.push( [ `${String(index).padStart(2, " ")}. [${synset.pos}] ${synset.source_id}`, ` EN gloss: ${enGloss}`, ` DE gloss: ${deGloss}`, ` EN words: ${enWords.join(", ")}`, ` DE words: ${deWords.join(", ")}`, ` QUALITY: ___`, ``, ].join("\n"), ); } const output = [ "# OMW German Translation Quality Audit", "", "Instructions: for each entry, check if the German translations", "match the meaning described by the English gloss.", "", "Mark QUALITY as:", " OK — all German translations fit the meaning", " PARTIAL — some fit, some don't", " BAD — none of the German translations fit", " USELESS — translations are correct but useless for learners", "", "---", "", ...results, ].join("\n"); const outPath = path.join(__dirname, "audit.md"); fs.writeFileSync(outPath, output, "utf-8"); console.log(`Wrote ${synsets.length} entries → ${outPath}`); db.close();