feat: enrich script working, redesigning to sub-stage architecture

- Enrich script functional with timeout, progress tracking, rejection mechanism
- Identified ordering issue: CEFR voting needs validated translations first
- Redesign: round1_gloss → round1_example → round1_translations → round1_cefr
- Update data-pipeline.md with new sub-stage design and roadmap
- Qwen3.5-4B confirmed working with thinking disabled
This commit is contained in:
lila 2026-05-07 13:09:43 +02:00
parent 7f10c35e03
commit 73fb12ac35
7 changed files with 337 additions and 122 deletions

View file

@ -60,6 +60,13 @@ CREATE TABLE IF NOT EXISTS model_translation_cefr_votes (
UNIQUE (translation_id, model_name)
);
CREATE TABLE IF NOT EXISTS model_translation_rejections (
id INTEGER PRIMARY KEY,
translation_id INTEGER NOT NULL REFERENCES translations(id),
model_name TEXT NOT NULL,
UNIQUE (translation_id, model_name)
);
CREATE TABLE IF NOT EXISTS generated_glosses (
id INTEGER PRIMARY KEY,
entry_id INTEGER NOT NULL REFERENCES entries(id),