feat: add stage 2 reverse link sync script
This commit is contained in:
parent
b5a76ee178
commit
6f9a42c707
2 changed files with 110 additions and 0 deletions
|
|
@ -5,6 +5,7 @@
|
|||
"type": "module",
|
||||
"scripts": {
|
||||
"extract": "tsx stage-1-extract/scripts/extract.ts",
|
||||
"reverse-link": "tsx stage-2-reverse-link/scripts/reverse-link.ts",
|
||||
"db:import": "tsx db/import.ts",
|
||||
"db:init": "tsx db/init.ts",
|
||||
"annotate": "tsx stage-2-annotate/scripts/annotate.ts",
|
||||
|
|
|
|||
109
data-pipeline/stage-2-reverse-link/scripts/reverse-link.ts
Normal file
109
data-pipeline/stage-2-reverse-link/scripts/reverse-link.ts
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
import { openDb } from "../../db/index.js";
|
||||
|
||||
// ── Types ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
type TranslationRow = {
|
||||
translation_id: number;
|
||||
entry_id: number;
|
||||
entry_language: string;
|
||||
entry_headword: string;
|
||||
target_lang: string;
|
||||
word: string;
|
||||
sense_hint: string | null;
|
||||
};
|
||||
|
||||
type EntryRow = { id: number };
|
||||
|
||||
// ── Sync ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
export function reverseLink(): void {
|
||||
const db = openDb();
|
||||
|
||||
// Find all translations and their source entry details
|
||||
const translations = db
|
||||
.prepare(
|
||||
`SELECT
|
||||
t.id AS translation_id,
|
||||
t.entry_id,
|
||||
e.language AS entry_language,
|
||||
e.headword AS entry_headword,
|
||||
t.target_lang,
|
||||
t.word,
|
||||
t.sense_hint
|
||||
FROM translations t
|
||||
JOIN entries e ON e.id = t.entry_id`,
|
||||
)
|
||||
.all() as TranslationRow[];
|
||||
|
||||
console.log(
|
||||
` Found ${translations.length.toLocaleString()} translations to check`,
|
||||
);
|
||||
|
||||
const findEntry = db.prepare(
|
||||
`SELECT id FROM entries WHERE headword = ? AND language = ? LIMIT 1`,
|
||||
);
|
||||
|
||||
const insertReverseLink = db.prepare(
|
||||
`INSERT INTO translations (entry_id, target_lang, word, sense_hint, source)
|
||||
VALUES (?, ?, ?, ?, 'reverse_link')
|
||||
ON CONFLICT (entry_id, target_lang, word) DO NOTHING`,
|
||||
);
|
||||
|
||||
const sync = db.transaction(() => {
|
||||
let inserted = 0;
|
||||
let skipped = 0;
|
||||
let noEntry = 0;
|
||||
|
||||
for (const t of translations) {
|
||||
// Look for an entry in the target language with the translation word as headword
|
||||
const targetEntry = findEntry.get(t.word, t.target_lang) as
|
||||
| EntryRow
|
||||
| undefined;
|
||||
|
||||
if (!targetEntry) {
|
||||
noEntry++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Insert reverse link: target entry → source language → source headword
|
||||
const result = insertReverseLink.run(
|
||||
targetEntry.id,
|
||||
t.entry_language,
|
||||
t.entry_headword,
|
||||
t.sense_hint ?? null,
|
||||
);
|
||||
|
||||
if (result.changes > 0) {
|
||||
inserted++;
|
||||
} else {
|
||||
skipped++;
|
||||
}
|
||||
}
|
||||
|
||||
return { inserted, skipped, noEntry };
|
||||
});
|
||||
|
||||
const counts = sync();
|
||||
|
||||
db.close();
|
||||
|
||||
console.log(` Inserted: ${counts.inserted.toLocaleString()} reverse links`);
|
||||
console.log(
|
||||
` Skipped: ${counts.skipped.toLocaleString()} (already existed)`,
|
||||
);
|
||||
console.log(
|
||||
` No entry: ${counts.noEntry.toLocaleString()} (target word not in entries)`,
|
||||
);
|
||||
}
|
||||
|
||||
// ── Main ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
function main(): void {
|
||||
console.log("Running reverse link sync...");
|
||||
reverseLink();
|
||||
console.log("\nReverse link sync complete.");
|
||||
}
|
||||
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
main();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue