feat(db): add incremental upsert seed script for WordNet vocabulary

Implements packages/db/src/seed.ts — reads all JSON files from
scripts/datafiles/, validates filenames against supported language
codes and POS, and upserts synsets into  and
via onConflictDoNothing. Safe to re-run; produces 0 writes on
a duplicate run.
This commit is contained in:
lila 2026-03-30 15:58:01 +02:00
parent 55885336ba
commit 2b177aad5b
12 changed files with 1349 additions and 10 deletions

View file

@ -0,0 +1,84 @@
CREATE TABLE "deck_terms" (
"deck_id" uuid NOT NULL,
"term_id" uuid NOT NULL,
"position" integer NOT NULL,
"added_at" timestamp with time zone DEFAULT now() NOT NULL,
CONSTRAINT "deck_terms_deck_id_term_id_pk" PRIMARY KEY("deck_id","term_id")
);
--> statement-breakpoint
CREATE TABLE "decks" (
"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
"name" text NOT NULL,
"description" text,
"language_pair_id" uuid NOT NULL,
"created_by" uuid NOT NULL,
"is_public" boolean DEFAULT false NOT NULL,
"created_at" timestamp with time zone DEFAULT now() NOT NULL,
CONSTRAINT "unique_deck_name" UNIQUE("name","created_by")
);
--> statement-breakpoint
CREATE TABLE "language_pairs" (
"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
"source_language" varchar(10) NOT NULL,
"target_language" varchar(10) NOT NULL,
"label" text,
"active" boolean DEFAULT true NOT NULL,
"created_at" timestamp with time zone DEFAULT now() NOT NULL,
CONSTRAINT "unique_source_target" UNIQUE("source_language","target_language"),
CONSTRAINT "source_language_check" CHECK ("language_pairs"."source_language" IN ('en', 'it')),
CONSTRAINT "target_language_check" CHECK ("language_pairs"."target_language" IN ('en', 'it')),
CONSTRAINT "no_self_pair" CHECK ("language_pairs"."source_language" != "language_pairs"."target_language")
);
--> statement-breakpoint
CREATE TABLE "term_glosses" (
"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
"term_id" uuid NOT NULL,
"language_code" varchar(10) NOT NULL,
"text" text NOT NULL,
"created_at" timestamp with time zone DEFAULT now() NOT NULL,
CONSTRAINT "unique_term_gloss" UNIQUE("term_id","language_code","text")
);
--> statement-breakpoint
CREATE TABLE "terms" (
"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
"synset_id" text NOT NULL,
"pos" varchar(20) NOT NULL,
"created_at" timestamp with time zone DEFAULT now() NOT NULL,
CONSTRAINT "terms_synset_id_unique" UNIQUE("synset_id"),
CONSTRAINT "pos_check" CHECK ("terms"."pos" IN ('noun'))
);
--> statement-breakpoint
CREATE TABLE "translations" (
"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
"term_id" uuid NOT NULL,
"language_code" varchar(10) NOT NULL,
"text" text NOT NULL,
"created_at" timestamp with time zone DEFAULT now() NOT NULL,
CONSTRAINT "unique_translations" UNIQUE("term_id","language_code","text")
);
--> statement-breakpoint
CREATE TABLE "users" (
"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
"openauth_sub" text NOT NULL,
"email" varchar(255),
"display_name" varchar(100),
"created_at" timestamp with time zone DEFAULT now() NOT NULL,
"last_login_at" timestamp with time zone,
CONSTRAINT "users_openauth_sub_unique" UNIQUE("openauth_sub"),
CONSTRAINT "users_email_unique" UNIQUE("email"),
CONSTRAINT "users_display_name_unique" UNIQUE("display_name")
);
--> statement-breakpoint
ALTER TABLE "deck_terms" ADD CONSTRAINT "deck_terms_deck_id_decks_id_fk" FOREIGN KEY ("deck_id") REFERENCES "public"."decks"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "deck_terms" ADD CONSTRAINT "deck_terms_term_id_terms_id_fk" FOREIGN KEY ("term_id") REFERENCES "public"."terms"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "decks" ADD CONSTRAINT "decks_language_pair_id_language_pairs_id_fk" FOREIGN KEY ("language_pair_id") REFERENCES "public"."language_pairs"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "decks" ADD CONSTRAINT "decks_created_by_users_id_fk" FOREIGN KEY ("created_by") REFERENCES "public"."users"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "term_glosses" ADD CONSTRAINT "term_glosses_term_id_terms_id_fk" FOREIGN KEY ("term_id") REFERENCES "public"."terms"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "translations" ADD CONSTRAINT "translations_term_id_terms_id_fk" FOREIGN KEY ("term_id") REFERENCES "public"."terms"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
CREATE INDEX "idx_deck_terms_term" ON "deck_terms" USING btree ("term_id");--> statement-breakpoint
CREATE INDEX "idx_decks_created_by" ON "decks" USING btree ("created_by");--> statement-breakpoint
CREATE INDEX "idx_decks_language_pair" ON "decks" USING btree ("language_pair_id");--> statement-breakpoint
CREATE INDEX "idx_pairs_active" ON "language_pairs" USING btree ("active","source_language","target_language");--> statement-breakpoint
CREATE INDEX "idx_term_glosses_term" ON "term_glosses" USING btree ("term_id");--> statement-breakpoint
CREATE INDEX "idx_terms_pos" ON "terms" USING btree ("pos");--> statement-breakpoint
CREATE INDEX "idx_translations_lang" ON "translations" USING btree ("language_code","term_id");

View file

@ -0,0 +1,652 @@
{
"id": "5830ce3b-dc0e-44a7-83d6-bc74016ca4fd",
"prevId": "00000000-0000-0000-0000-000000000000",
"version": "7",
"dialect": "postgresql",
"tables": {
"public.deck_terms": {
"name": "deck_terms",
"schema": "",
"columns": {
"deck_id": {
"name": "deck_id",
"type": "uuid",
"primaryKey": false,
"notNull": true
},
"term_id": {
"name": "term_id",
"type": "uuid",
"primaryKey": false,
"notNull": true
},
"position": {
"name": "position",
"type": "integer",
"primaryKey": false,
"notNull": true
},
"added_at": {
"name": "added_at",
"type": "timestamp with time zone",
"primaryKey": false,
"notNull": true,
"default": "now()"
}
},
"indexes": {
"idx_deck_terms_term": {
"name": "idx_deck_terms_term",
"columns": [
{
"expression": "term_id",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
}
},
"foreignKeys": {
"deck_terms_deck_id_decks_id_fk": {
"name": "deck_terms_deck_id_decks_id_fk",
"tableFrom": "deck_terms",
"tableTo": "decks",
"columnsFrom": [
"deck_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
},
"deck_terms_term_id_terms_id_fk": {
"name": "deck_terms_term_id_terms_id_fk",
"tableFrom": "deck_terms",
"tableTo": "terms",
"columnsFrom": [
"term_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {
"deck_terms_deck_id_term_id_pk": {
"name": "deck_terms_deck_id_term_id_pk",
"columns": [
"deck_id",
"term_id"
]
}
},
"uniqueConstraints": {},
"policies": {},
"checkConstraints": {},
"isRLSEnabled": false
},
"public.decks": {
"name": "decks",
"schema": "",
"columns": {
"id": {
"name": "id",
"type": "uuid",
"primaryKey": true,
"notNull": true,
"default": "gen_random_uuid()"
},
"name": {
"name": "name",
"type": "text",
"primaryKey": false,
"notNull": true
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false
},
"language_pair_id": {
"name": "language_pair_id",
"type": "uuid",
"primaryKey": false,
"notNull": true
},
"created_by": {
"name": "created_by",
"type": "uuid",
"primaryKey": false,
"notNull": true
},
"is_public": {
"name": "is_public",
"type": "boolean",
"primaryKey": false,
"notNull": true,
"default": false
},
"created_at": {
"name": "created_at",
"type": "timestamp with time zone",
"primaryKey": false,
"notNull": true,
"default": "now()"
}
},
"indexes": {
"idx_decks_created_by": {
"name": "idx_decks_created_by",
"columns": [
{
"expression": "created_by",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
},
"idx_decks_language_pair": {
"name": "idx_decks_language_pair",
"columns": [
{
"expression": "language_pair_id",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
}
},
"foreignKeys": {
"decks_language_pair_id_language_pairs_id_fk": {
"name": "decks_language_pair_id_language_pairs_id_fk",
"tableFrom": "decks",
"tableTo": "language_pairs",
"columnsFrom": [
"language_pair_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
},
"decks_created_by_users_id_fk": {
"name": "decks_created_by_users_id_fk",
"tableFrom": "decks",
"tableTo": "users",
"columnsFrom": [
"created_by"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {
"unique_deck_name": {
"name": "unique_deck_name",
"nullsNotDistinct": false,
"columns": [
"name",
"created_by"
]
}
},
"policies": {},
"checkConstraints": {},
"isRLSEnabled": false
},
"public.language_pairs": {
"name": "language_pairs",
"schema": "",
"columns": {
"id": {
"name": "id",
"type": "uuid",
"primaryKey": true,
"notNull": true,
"default": "gen_random_uuid()"
},
"source_language": {
"name": "source_language",
"type": "varchar(10)",
"primaryKey": false,
"notNull": true
},
"target_language": {
"name": "target_language",
"type": "varchar(10)",
"primaryKey": false,
"notNull": true
},
"label": {
"name": "label",
"type": "text",
"primaryKey": false,
"notNull": false
},
"active": {
"name": "active",
"type": "boolean",
"primaryKey": false,
"notNull": true,
"default": true
},
"created_at": {
"name": "created_at",
"type": "timestamp with time zone",
"primaryKey": false,
"notNull": true,
"default": "now()"
}
},
"indexes": {
"idx_pairs_active": {
"name": "idx_pairs_active",
"columns": [
{
"expression": "active",
"isExpression": false,
"asc": true,
"nulls": "last"
},
{
"expression": "source_language",
"isExpression": false,
"asc": true,
"nulls": "last"
},
{
"expression": "target_language",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {
"unique_source_target": {
"name": "unique_source_target",
"nullsNotDistinct": false,
"columns": [
"source_language",
"target_language"
]
}
},
"policies": {},
"checkConstraints": {
"source_language_check": {
"name": "source_language_check",
"value": "\"language_pairs\".\"source_language\" IN ('en', 'it')"
},
"target_language_check": {
"name": "target_language_check",
"value": "\"language_pairs\".\"target_language\" IN ('en', 'it')"
},
"no_self_pair": {
"name": "no_self_pair",
"value": "\"language_pairs\".\"source_language\" != \"language_pairs\".\"target_language\""
}
},
"isRLSEnabled": false
},
"public.term_glosses": {
"name": "term_glosses",
"schema": "",
"columns": {
"id": {
"name": "id",
"type": "uuid",
"primaryKey": true,
"notNull": true,
"default": "gen_random_uuid()"
},
"term_id": {
"name": "term_id",
"type": "uuid",
"primaryKey": false,
"notNull": true
},
"language_code": {
"name": "language_code",
"type": "varchar(10)",
"primaryKey": false,
"notNull": true
},
"text": {
"name": "text",
"type": "text",
"primaryKey": false,
"notNull": true
},
"created_at": {
"name": "created_at",
"type": "timestamp with time zone",
"primaryKey": false,
"notNull": true,
"default": "now()"
}
},
"indexes": {
"idx_term_glosses_term": {
"name": "idx_term_glosses_term",
"columns": [
{
"expression": "term_id",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
}
},
"foreignKeys": {
"term_glosses_term_id_terms_id_fk": {
"name": "term_glosses_term_id_terms_id_fk",
"tableFrom": "term_glosses",
"tableTo": "terms",
"columnsFrom": [
"term_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {
"unique_term_gloss": {
"name": "unique_term_gloss",
"nullsNotDistinct": false,
"columns": [
"term_id",
"language_code",
"text"
]
}
},
"policies": {},
"checkConstraints": {},
"isRLSEnabled": false
},
"public.terms": {
"name": "terms",
"schema": "",
"columns": {
"id": {
"name": "id",
"type": "uuid",
"primaryKey": true,
"notNull": true,
"default": "gen_random_uuid()"
},
"synset_id": {
"name": "synset_id",
"type": "text",
"primaryKey": false,
"notNull": true
},
"pos": {
"name": "pos",
"type": "varchar(20)",
"primaryKey": false,
"notNull": true
},
"created_at": {
"name": "created_at",
"type": "timestamp with time zone",
"primaryKey": false,
"notNull": true,
"default": "now()"
}
},
"indexes": {
"idx_terms_pos": {
"name": "idx_terms_pos",
"columns": [
{
"expression": "pos",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {
"terms_synset_id_unique": {
"name": "terms_synset_id_unique",
"nullsNotDistinct": false,
"columns": [
"synset_id"
]
}
},
"policies": {},
"checkConstraints": {
"pos_check": {
"name": "pos_check",
"value": "\"terms\".\"pos\" IN ('noun')"
}
},
"isRLSEnabled": false
},
"public.translations": {
"name": "translations",
"schema": "",
"columns": {
"id": {
"name": "id",
"type": "uuid",
"primaryKey": true,
"notNull": true,
"default": "gen_random_uuid()"
},
"term_id": {
"name": "term_id",
"type": "uuid",
"primaryKey": false,
"notNull": true
},
"language_code": {
"name": "language_code",
"type": "varchar(10)",
"primaryKey": false,
"notNull": true
},
"text": {
"name": "text",
"type": "text",
"primaryKey": false,
"notNull": true
},
"created_at": {
"name": "created_at",
"type": "timestamp with time zone",
"primaryKey": false,
"notNull": true,
"default": "now()"
}
},
"indexes": {
"idx_translations_lang": {
"name": "idx_translations_lang",
"columns": [
{
"expression": "language_code",
"isExpression": false,
"asc": true,
"nulls": "last"
},
{
"expression": "term_id",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
}
},
"foreignKeys": {
"translations_term_id_terms_id_fk": {
"name": "translations_term_id_terms_id_fk",
"tableFrom": "translations",
"tableTo": "terms",
"columnsFrom": [
"term_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {
"unique_translations": {
"name": "unique_translations",
"nullsNotDistinct": false,
"columns": [
"term_id",
"language_code",
"text"
]
}
},
"policies": {},
"checkConstraints": {},
"isRLSEnabled": false
},
"public.users": {
"name": "users",
"schema": "",
"columns": {
"id": {
"name": "id",
"type": "uuid",
"primaryKey": true,
"notNull": true,
"default": "gen_random_uuid()"
},
"openauth_sub": {
"name": "openauth_sub",
"type": "text",
"primaryKey": false,
"notNull": true
},
"email": {
"name": "email",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"display_name": {
"name": "display_name",
"type": "varchar(100)",
"primaryKey": false,
"notNull": false
},
"created_at": {
"name": "created_at",
"type": "timestamp with time zone",
"primaryKey": false,
"notNull": true,
"default": "now()"
},
"last_login_at": {
"name": "last_login_at",
"type": "timestamp with time zone",
"primaryKey": false,
"notNull": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {
"users_openauth_sub_unique": {
"name": "users_openauth_sub_unique",
"nullsNotDistinct": false,
"columns": [
"openauth_sub"
]
},
"users_email_unique": {
"name": "users_email_unique",
"nullsNotDistinct": false,
"columns": [
"email"
]
},
"users_display_name_unique": {
"name": "users_display_name_unique",
"nullsNotDistinct": false,
"columns": [
"display_name"
]
}
},
"policies": {},
"checkConstraints": {},
"isRLSEnabled": false
}
},
"enums": {},
"schemas": {},
"sequences": {},
"roles": {},
"policies": {},
"views": {},
"_meta": {
"columns": {},
"schemas": {},
"tables": {}
}
}

View file

@ -1 +1,13 @@
{"version":"7","dialect":"postgresql","entries":[]}
{
"version": "7",
"dialect": "postgresql",
"entries": [
{
"idx": 0,
"version": "7",
"when": 1774721919883,
"tag": "0000_bitter_turbo",
"breakpoints": true
}
]
}

View file

@ -16,5 +16,8 @@
"@types/pg": "^8.20.0",
"drizzle-kit": "^0.31.10"
},
"exports": "./src/index.ts"
"exports": {
".": "./src/index.ts",
"./schema": "./src/db/schema.ts"
}
}

View file

@ -74,7 +74,6 @@ export const term_glosses = pgTable(
table.language_code,
table.text,
),
,
index("idx_term_glosses_term").on(table.term_id),
],
);

View file

@ -0,0 +1,203 @@
import fs from "node:fs/promises";
import { eq } from "drizzle-orm";
import { SUPPORTED_LANGUAGE_CODES, SUPPORTED_POS } from "@glossa/shared";
import { db } from "@glossa/db";
import { terms, translations } from "@glossa/db/schema";
// the following generate unions of the imported const arrays
type POS = (typeof SUPPORTED_POS)[number];
type LANGUAGE_CODE = (typeof SUPPORTED_LANGUAGE_CODES)[number];
type Synset = {
synset_id: string;
pos: POS;
translations: Partial<Record<LANGUAGE_CODE, string[]>>;
};
type FileName = {
sourceLang: LANGUAGE_CODE;
targetLang: LANGUAGE_CODE;
pos: POS;
};
const dataDir = "../../scripts/datafiles/";
const parseFilename = (filename: string): FileName => {
const parts = filename.replace(".json", "").split("-");
if (parts.length !== 3)
throw new Error(
`Invalid filename format: ${filename}. Expected: sourcelang-targetlang-pos.json`,
);
const [sourceLang, targetLang, pos] = parts;
if (!SUPPORTED_LANGUAGE_CODES.includes(sourceLang as LANGUAGE_CODE))
throw new Error(`Unsupported language code: ${sourceLang}`);
if (!SUPPORTED_LANGUAGE_CODES.includes(targetLang as LANGUAGE_CODE))
throw new Error(`Unsupported language code: ${targetLang}`);
if (!SUPPORTED_POS.includes(pos as POS))
throw new Error(`Unsupported POS: ${pos}`);
return {
sourceLang: sourceLang as LANGUAGE_CODE,
targetLang: targetLang as LANGUAGE_CODE,
pos: pos as POS,
};
};
const readFromJsonFile = async (filepath: string): Promise<Synset[]> => {
const data = await fs.readFile(filepath, "utf8");
const parsed = JSON.parse(data);
if (!Array.isArray(parsed)) throw new Error("Expected a JSON array");
return parsed as Synset[];
};
const uploadSynsetToDB = async (
synset: Synset,
_fileInfo: FileName,
): Promise<{ termInserted: boolean; translationsInserted: number }> => {
// 1. Try to insert the term — skip if synset_id already exists
const inserted = await db
.insert(terms)
.values({ synset_id: synset.synset_id, pos: synset.pos })
.onConflictDoNothing()
.returning({ id: terms.id });
let termId: string;
let termInserted: boolean;
if (inserted.length > 0) {
termId = inserted[0]!.id;
termInserted = true;
} else {
// Term already exists — fetch its real DB id for the FK
const [existing] = await db
.select({ id: terms.id })
.from(terms)
.where(eq(terms.synset_id, synset.synset_id))
.limit(1);
if (!existing)
throw new Error(`Term not found after conflict: ${synset.synset_id}`);
termId = existing.id;
termInserted = false;
}
// 2. Build translation rows and upsert — skip duplicates silently
const translationRows = Object.entries(synset.translations).flatMap(
([lang, lemmas]) =>
lemmas!.map((lemma) => ({
id: crypto.randomUUID(),
term_id: termId,
language_code: lang as LANGUAGE_CODE,
text: lemma,
})),
);
if (translationRows.length === 0) {
return { termInserted, translationsInserted: 0 };
}
const result = await db
.insert(translations)
.values(translationRows)
.onConflictDoNothing()
.returning({ id: translations.id });
return { termInserted, translationsInserted: result.length };
};
const main = async () => {
// step 1: discovering files
console.log("\n");
console.log("\n");
console.log("##########################################");
console.log("step 1: discovering files");
console.log("##########################################");
console.log("🔍 Scanning datafiles directory...");
const allFiles = await fs.readdir(dataDir);
const jsonFiles = allFiles.filter((f) => f.endsWith(".json"));
if (jsonFiles.length === 0) {
console.warn("⚠️ No JSON files found in", dataDir);
return;
}
console.log(`📁 Found ${jsonFiles.length} file(s)\n`);
// step 2: validating filenames
console.log("\n");
console.log("\n");
console.log("##########################################");
console.log("step 2: validating filenames");
console.log("##########################################");
const validFiles: { filename: string; fileInfo: FileName }[] = [];
for (const filename of jsonFiles) {
try {
const fileInfo = parseFilename(filename);
validFiles.push({ filename, fileInfo });
console.log(
`${filename}${fileInfo.sourceLang}${fileInfo.targetLang} (${fileInfo.pos})`,
);
} catch (e) {
console.warn(` ⚠️ Skipping ${filename}: ${(e as Error).message}`);
}
}
if (validFiles.length === 0) {
console.error("❌ No valid files to process. Exiting.");
return;
}
// step 3: processing each file
console.log("\n");
console.log("\n");
console.log("##########################################");
console.log("step 3: processing each file");
console.log("##########################################");
let totalTermsInserted = 0;
let totalTranslationsInserted = 0;
for (const [i, { filename, fileInfo }] of validFiles.entries()) {
const prefix = `[${i + 1}/${validFiles.length}]`;
console.log(`\n${prefix} 📄 ${filename}`);
const synsets = await readFromJsonFile(dataDir + filename);
console.log(`${prefix} Loaded ${synsets.length} synsets`);
let fileTermsInserted = 0;
let fileTranslationsInserted = 0;
for (const [j, synset] of synsets.entries()) {
if (j > 0 && j % 500 === 0) {
console.log(`${prefix}${j}/${synsets.length} synsets processed...`);
}
const { termInserted, translationsInserted } = await uploadSynsetToDB(
synset,
fileInfo,
);
if (termInserted) fileTermsInserted++;
fileTranslationsInserted += translationsInserted;
}
console.log(
`${prefix} ✅ Done — ${fileTermsInserted} new terms, ${fileTranslationsInserted} new translations`,
);
totalTermsInserted += fileTermsInserted;
totalTranslationsInserted += fileTranslationsInserted;
}
// step 4: Final summary
console.log("\n");
console.log("\n");
console.log("##########################################");
console.log("step 4: final summary");
console.log("##########################################");
console.log(`\n🎉 Seeding complete!`);
console.log(` Terms inserted: ${totalTermsInserted}`);
console.log(` Translations inserted: ${totalTranslationsInserted}`);
};
main().catch((error) => {
console.error(error);
process.exit(1);
});