feat: add stage 2 reverse link sync script

This commit is contained in:
lila 2026-05-05 18:57:55 +02:00
parent b5a76ee178
commit 6f9a42c707
2 changed files with 110 additions and 0 deletions

View file

@ -5,6 +5,7 @@
"type": "module",
"scripts": {
"extract": "tsx stage-1-extract/scripts/extract.ts",
"reverse-link": "tsx stage-2-reverse-link/scripts/reverse-link.ts",
"db:import": "tsx db/import.ts",
"db:init": "tsx db/init.ts",
"annotate": "tsx stage-2-annotate/scripts/annotate.ts",

View file

@ -0,0 +1,109 @@
import { openDb } from "../../db/index.js";
// ── Types ─────────────────────────────────────────────────────────────────────
type TranslationRow = {
translation_id: number;
entry_id: number;
entry_language: string;
entry_headword: string;
target_lang: string;
word: string;
sense_hint: string | null;
};
type EntryRow = { id: number };
// ── Sync ──────────────────────────────────────────────────────────────────────
export function reverseLink(): void {
const db = openDb();
// Find all translations and their source entry details
const translations = db
.prepare(
`SELECT
t.id AS translation_id,
t.entry_id,
e.language AS entry_language,
e.headword AS entry_headword,
t.target_lang,
t.word,
t.sense_hint
FROM translations t
JOIN entries e ON e.id = t.entry_id`,
)
.all() as TranslationRow[];
console.log(
` Found ${translations.length.toLocaleString()} translations to check`,
);
const findEntry = db.prepare(
`SELECT id FROM entries WHERE headword = ? AND language = ? LIMIT 1`,
);
const insertReverseLink = db.prepare(
`INSERT INTO translations (entry_id, target_lang, word, sense_hint, source)
VALUES (?, ?, ?, ?, 'reverse_link')
ON CONFLICT (entry_id, target_lang, word) DO NOTHING`,
);
const sync = db.transaction(() => {
let inserted = 0;
let skipped = 0;
let noEntry = 0;
for (const t of translations) {
// Look for an entry in the target language with the translation word as headword
const targetEntry = findEntry.get(t.word, t.target_lang) as
| EntryRow
| undefined;
if (!targetEntry) {
noEntry++;
continue;
}
// Insert reverse link: target entry → source language → source headword
const result = insertReverseLink.run(
targetEntry.id,
t.entry_language,
t.entry_headword,
t.sense_hint ?? null,
);
if (result.changes > 0) {
inserted++;
} else {
skipped++;
}
}
return { inserted, skipped, noEntry };
});
const counts = sync();
db.close();
console.log(` Inserted: ${counts.inserted.toLocaleString()} reverse links`);
console.log(
` Skipped: ${counts.skipped.toLocaleString()} (already existed)`,
);
console.log(
` No entry: ${counts.noEntry.toLocaleString()} (target word not in entries)`,
);
}
// ── Main ─────────────────────────────────────────────────────────────────────
function main(): void {
console.log("Running reverse link sync...");
reverseLink();
console.log("\nReverse link sync complete.");
}
if (import.meta.url === `file://${process.argv[1]}`) {
main();
}