adding bug description as todo comment

This commit is contained in:
lila 2026-03-31 18:34:23 +02:00
parent 488f0dab11
commit 5603f15fe3

View file

@ -2,6 +2,7 @@
- [x] Setup hardcoded path, name, description, source language, POS - [x] Setup hardcoded path, name, description, source language, POS
- [x] Read wordlist load the 1000 nouns - [x] Read wordlist load the 1000 nouns
- [x] Query terms match to database, find which ones have translations - [x] Query terms match to database, find which ones have translations
- [ ] writing missing words to textfile for future use
- [ ] Validation determine validated languages from the data (e.g., ["en", "it"] if all matched terms have both) - [ ] Validation determine validated languages from the data (e.g., ["en", "it"] if all matched terms have both)
- [ ] Check idempotency skip if deck exists - [ ] Check idempotency skip if deck exists
- [ ] Create deck insert with discovered validated_for_languages - [ ] Create deck insert with discovered validated_for_languages
@ -9,6 +10,11 @@
- [ ] Report summary - [ ] Report summary
*/ */
// TODO: Wordlist contains 1000 lines but only 999 unique words (965 found + 34 missing = 999).
// Likely cause: duplicate entry in top1000englishnouns file.
// Investigate with: const unique = new Set(words); console.log(words.length - unique.size);
// Fix either by deduplicating in code ([...new Set(words)]) or cleaning the source file.
import fs from "node:fs/promises"; import fs from "node:fs/promises";
import { db } from "@glossa/db"; import { db } from "@glossa/db";
import { translations, terms } from "@glossa/db/schema"; import { translations, terms } from "@glossa/db/schema";
@ -64,12 +70,14 @@ const main = async () => {
console.log("📖 Reading word list..."); console.log("📖 Reading word list...");
const sourceWords = await readingFromWordlist(); const sourceWords = await readingFromWordlist();
console.log(` ${sourceWords.length} words loaded\n`); console.log(` ${sourceWords.length} words loaded\n`);
// check if sourceWords exist in database // check if sourceWords exist in database
console.log("🔍 Checking against database..."); console.log("🔍 Checking against database...");
const { termIds, missingWords } = const { termIds, missingWords } =
await checkingSourceWordsAgainstDB(sourceWords); await checkingSourceWordsAgainstDB(sourceWords);
console.log("words found in db: ", termIds.length); console.log("words found in db: ", termIds.length);
console.log("words NOT found in db: ", missingWords.length); console.log("words NOT found in db: ", missingWords.length);
// write missing words to file // write missing words to file
await writeMissingWordsToFile(missingWords); await writeMissingWordsToFile(missingWords);
}; };