feat: add db import script, fix duplicate translations in extract, add annotate script

This commit is contained in:
lila 2026-05-03 22:05:10 +02:00
parent 4a842140b9
commit f59399be02
7 changed files with 274 additions and 62 deletions

View file

@ -140,4 +140,27 @@ describe("stage 1 — omw.json validation", () => {
expect(errors, `\n${errors.join("\n")}`).toHaveLength(0);
});
it("no duplicate translations within a single synset and language", async () => {
const raw = await fs.readFile(OMW_PATH, "utf-8");
const records = JSON.parse(raw) as OmwRecord[];
const errors: string[] = [];
for (const record of records) {
for (const [lang, words] of Object.entries(record.translations)) {
const seen = new Set<string>();
for (const word of words) {
if (seen.has(word)) {
errors.push(
`${record.source_id} (${lang}): duplicate translation "${word}"`,
);
}
seen.add(word);
}
}
}
expect(errors, `\n${errors.join("\n")}`).toHaveLength(0);
});
});