adding bug description as todo comment
This commit is contained in:
parent
488f0dab11
commit
5603f15fe3
1 changed files with 8 additions and 0 deletions
|
|
@ -2,6 +2,7 @@
|
||||||
- [x] Setup — hardcoded path, name, description, source language, POS
|
- [x] Setup — hardcoded path, name, description, source language, POS
|
||||||
- [x] Read wordlist — load the 1000 nouns
|
- [x] Read wordlist — load the 1000 nouns
|
||||||
- [x] Query terms — match to database, find which ones have translations
|
- [x] Query terms — match to database, find which ones have translations
|
||||||
|
- [ ] writing missing words to textfile for future use
|
||||||
- [ ] Validation — determine validated languages from the data (e.g., ["en", "it"] if all matched terms have both)
|
- [ ] Validation — determine validated languages from the data (e.g., ["en", "it"] if all matched terms have both)
|
||||||
- [ ] Check idempotency — skip if deck exists
|
- [ ] Check idempotency — skip if deck exists
|
||||||
- [ ] Create deck — insert with discovered validated_for_languages
|
- [ ] Create deck — insert with discovered validated_for_languages
|
||||||
|
|
@ -9,6 +10,11 @@
|
||||||
- [ ] Report — summary
|
- [ ] Report — summary
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// TODO: Wordlist contains 1000 lines but only 999 unique words (965 found + 34 missing = 999).
|
||||||
|
// Likely cause: duplicate entry in top1000englishnouns file.
|
||||||
|
// Investigate with: const unique = new Set(words); console.log(words.length - unique.size);
|
||||||
|
// Fix either by deduplicating in code ([...new Set(words)]) or cleaning the source file.
|
||||||
|
|
||||||
import fs from "node:fs/promises";
|
import fs from "node:fs/promises";
|
||||||
import { db } from "@glossa/db";
|
import { db } from "@glossa/db";
|
||||||
import { translations, terms } from "@glossa/db/schema";
|
import { translations, terms } from "@glossa/db/schema";
|
||||||
|
|
@ -64,12 +70,14 @@ const main = async () => {
|
||||||
console.log("📖 Reading word list...");
|
console.log("📖 Reading word list...");
|
||||||
const sourceWords = await readingFromWordlist();
|
const sourceWords = await readingFromWordlist();
|
||||||
console.log(` ${sourceWords.length} words loaded\n`);
|
console.log(` ${sourceWords.length} words loaded\n`);
|
||||||
|
|
||||||
// check if sourceWords exist in database
|
// check if sourceWords exist in database
|
||||||
console.log("🔍 Checking against database...");
|
console.log("🔍 Checking against database...");
|
||||||
const { termIds, missingWords } =
|
const { termIds, missingWords } =
|
||||||
await checkingSourceWordsAgainstDB(sourceWords);
|
await checkingSourceWordsAgainstDB(sourceWords);
|
||||||
console.log("words found in db: ", termIds.length);
|
console.log("words found in db: ", termIds.length);
|
||||||
console.log("words NOT found in db: ", missingWords.length);
|
console.log("words NOT found in db: ", missingWords.length);
|
||||||
|
|
||||||
// write missing words to file
|
// write missing words to file
|
||||||
await writeMissingWordsToFile(missingWords);
|
await writeMissingWordsToFile(missingWords);
|
||||||
};
|
};
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue