feat: add stage 2 reverse link sync script
This commit is contained in:
parent
b5a76ee178
commit
6f9a42c707
2 changed files with 110 additions and 0 deletions
|
|
@ -5,6 +5,7 @@
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"extract": "tsx stage-1-extract/scripts/extract.ts",
|
"extract": "tsx stage-1-extract/scripts/extract.ts",
|
||||||
|
"reverse-link": "tsx stage-2-reverse-link/scripts/reverse-link.ts",
|
||||||
"db:import": "tsx db/import.ts",
|
"db:import": "tsx db/import.ts",
|
||||||
"db:init": "tsx db/init.ts",
|
"db:init": "tsx db/init.ts",
|
||||||
"annotate": "tsx stage-2-annotate/scripts/annotate.ts",
|
"annotate": "tsx stage-2-annotate/scripts/annotate.ts",
|
||||||
|
|
|
||||||
109
data-pipeline/stage-2-reverse-link/scripts/reverse-link.ts
Normal file
109
data-pipeline/stage-2-reverse-link/scripts/reverse-link.ts
Normal file
|
|
@ -0,0 +1,109 @@
|
||||||
|
import { openDb } from "../../db/index.js";
|
||||||
|
|
||||||
|
// ── Types ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
type TranslationRow = {
|
||||||
|
translation_id: number;
|
||||||
|
entry_id: number;
|
||||||
|
entry_language: string;
|
||||||
|
entry_headword: string;
|
||||||
|
target_lang: string;
|
||||||
|
word: string;
|
||||||
|
sense_hint: string | null;
|
||||||
|
};
|
||||||
|
|
||||||
|
type EntryRow = { id: number };
|
||||||
|
|
||||||
|
// ── Sync ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export function reverseLink(): void {
|
||||||
|
const db = openDb();
|
||||||
|
|
||||||
|
// Find all translations and their source entry details
|
||||||
|
const translations = db
|
||||||
|
.prepare(
|
||||||
|
`SELECT
|
||||||
|
t.id AS translation_id,
|
||||||
|
t.entry_id,
|
||||||
|
e.language AS entry_language,
|
||||||
|
e.headword AS entry_headword,
|
||||||
|
t.target_lang,
|
||||||
|
t.word,
|
||||||
|
t.sense_hint
|
||||||
|
FROM translations t
|
||||||
|
JOIN entries e ON e.id = t.entry_id`,
|
||||||
|
)
|
||||||
|
.all() as TranslationRow[];
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
` Found ${translations.length.toLocaleString()} translations to check`,
|
||||||
|
);
|
||||||
|
|
||||||
|
const findEntry = db.prepare(
|
||||||
|
`SELECT id FROM entries WHERE headword = ? AND language = ? LIMIT 1`,
|
||||||
|
);
|
||||||
|
|
||||||
|
const insertReverseLink = db.prepare(
|
||||||
|
`INSERT INTO translations (entry_id, target_lang, word, sense_hint, source)
|
||||||
|
VALUES (?, ?, ?, ?, 'reverse_link')
|
||||||
|
ON CONFLICT (entry_id, target_lang, word) DO NOTHING`,
|
||||||
|
);
|
||||||
|
|
||||||
|
const sync = db.transaction(() => {
|
||||||
|
let inserted = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
let noEntry = 0;
|
||||||
|
|
||||||
|
for (const t of translations) {
|
||||||
|
// Look for an entry in the target language with the translation word as headword
|
||||||
|
const targetEntry = findEntry.get(t.word, t.target_lang) as
|
||||||
|
| EntryRow
|
||||||
|
| undefined;
|
||||||
|
|
||||||
|
if (!targetEntry) {
|
||||||
|
noEntry++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert reverse link: target entry → source language → source headword
|
||||||
|
const result = insertReverseLink.run(
|
||||||
|
targetEntry.id,
|
||||||
|
t.entry_language,
|
||||||
|
t.entry_headword,
|
||||||
|
t.sense_hint ?? null,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.changes > 0) {
|
||||||
|
inserted++;
|
||||||
|
} else {
|
||||||
|
skipped++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { inserted, skipped, noEntry };
|
||||||
|
});
|
||||||
|
|
||||||
|
const counts = sync();
|
||||||
|
|
||||||
|
db.close();
|
||||||
|
|
||||||
|
console.log(` Inserted: ${counts.inserted.toLocaleString()} reverse links`);
|
||||||
|
console.log(
|
||||||
|
` Skipped: ${counts.skipped.toLocaleString()} (already existed)`,
|
||||||
|
);
|
||||||
|
console.log(
|
||||||
|
` No entry: ${counts.noEntry.toLocaleString()} (target word not in entries)`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Main ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function main(): void {
|
||||||
|
console.log("Running reverse link sync...");
|
||||||
|
reverseLink();
|
||||||
|
console.log("\nReverse link sync complete.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||||
|
main();
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue