adding bug description as todo comment
This commit is contained in:
parent
488f0dab11
commit
5603f15fe3
1 changed files with 8 additions and 0 deletions
|
|
@ -2,6 +2,7 @@
|
|||
- [x] Setup — hardcoded path, name, description, source language, POS
|
||||
- [x] Read wordlist — load the 1000 nouns
|
||||
- [x] Query terms — match to database, find which ones have translations
|
||||
- [ ] writing missing words to textfile for future use
|
||||
- [ ] Validation — determine validated languages from the data (e.g., ["en", "it"] if all matched terms have both)
|
||||
- [ ] Check idempotency — skip if deck exists
|
||||
- [ ] Create deck — insert with discovered validated_for_languages
|
||||
|
|
@ -9,6 +10,11 @@
|
|||
- [ ] Report — summary
|
||||
*/
|
||||
|
||||
// TODO: Wordlist contains 1000 lines but only 999 unique words (965 found + 34 missing = 999).
|
||||
// Likely cause: duplicate entry in top1000englishnouns file.
|
||||
// Investigate with: const unique = new Set(words); console.log(words.length - unique.size);
|
||||
// Fix either by deduplicating in code ([...new Set(words)]) or cleaning the source file.
|
||||
|
||||
import fs from "node:fs/promises";
|
||||
import { db } from "@glossa/db";
|
||||
import { translations, terms } from "@glossa/db/schema";
|
||||
|
|
@ -64,12 +70,14 @@ const main = async () => {
|
|||
console.log("📖 Reading word list...");
|
||||
const sourceWords = await readingFromWordlist();
|
||||
console.log(` ${sourceWords.length} words loaded\n`);
|
||||
|
||||
// check if sourceWords exist in database
|
||||
console.log("🔍 Checking against database...");
|
||||
const { termIds, missingWords } =
|
||||
await checkingSourceWordsAgainstDB(sourceWords);
|
||||
console.log("words found in db: ", termIds.length);
|
||||
console.log("words NOT found in db: ", missingWords.length);
|
||||
|
||||
// write missing words to file
|
||||
await writeMissingWordsToFile(missingWords);
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue