From 54ddebf0548ab47e7341e10b00b2ad3f75e06e7c Mon Sep 17 00:00:00 2001 From: Tyler Hallada Date: Thu, 26 Feb 2026 01:26:25 -0500 Subject: [PATCH] N-gram metrics overhaul & UI improvements --- benches/ngram_benchmarks.rs | 25 +- .../2026-02-24-n-grams-statistics-tab.md | 221 +++ ...cs-overhaul-error-anomaly-speed-anomaly.md | 265 ++++ ...integrated-bigram-char-focus-generation.md | 351 +++++ src/app.rs | 391 +++-- src/config.rs | 15 +- src/engine/key_stats.rs | 73 +- src/engine/mod.rs | 2 +- src/engine/ngram_stats.rs | 1342 +++++++++++++---- src/engine/skill_tree.rs | 4 +- src/generator/code_syntax.rs | 443 +++--- src/generator/dictionary.rs | 23 + src/generator/mod.rs | 9 +- src/generator/passage.rs | 3 +- src/generator/phonetic.rs | 273 +++- src/main.rs | 681 ++++++--- src/store/json_store.rs | 33 +- src/ui/components/dashboard.rs | 48 +- src/ui/components/keyboard_diagram.rs | 91 +- src/ui/components/skill_tree.rs | 9 +- src/ui/components/stats_dashboard.rs | 498 +++++- src/ui/components/typing_area.rs | 12 +- src/ui/layout.rs | 8 +- 23 files changed, 3812 insertions(+), 1008 deletions(-) create mode 100644 docs/plans/2026-02-24-n-grams-statistics-tab.md create mode 100644 docs/plans/2026-02-25-bigram-metrics-overhaul-error-anomaly-speed-anomaly.md create mode 100644 docs/plans/2026-02-25-ema-error-decay-integrated-bigram-char-focus-generation.md diff --git a/benches/ngram_benchmarks.rs b/benches/ngram_benchmarks.rs index 43afa97..07ccd15 100644 --- a/benches/ngram_benchmarks.rs +++ b/benches/ngram_benchmarks.rs @@ -21,9 +21,7 @@ fn bench_extraction(c: &mut Criterion) { let keystrokes = make_keystrokes(500); c.bench_function("extract_ngram_events (500 keystrokes)", |b| { - b.iter(|| { - extract_ngram_events(black_box(&keystrokes), 800.0) - }) + b.iter(|| extract_ngram_events(black_box(&keystrokes), 800.0)) }); } @@ -84,17 +82,13 @@ fn bench_focus_selection(c: &mut Criterion) { let unlocked: Vec = all_chars; c.bench_function("weakest_bigram (3K entries)", |b| { - b.iter(|| { - bigram_stats.weakest_bigram(black_box(&char_stats), black_box(&unlocked)) - }) + b.iter(|| bigram_stats.weakest_bigram(black_box(&char_stats), black_box(&unlocked))) }); } fn bench_history_replay(c: &mut Criterion) { // Build 500 drills of ~300 keystrokes each - let drills: Vec> = (0..500) - .map(|_| make_keystrokes(300)) - .collect(); + let drills: Vec> = (0..500).map(|_| make_keystrokes(300)).collect(); c.bench_function("history replay (500 drills x 300 keystrokes)", |b| { b.iter(|| { @@ -103,8 +97,7 @@ fn bench_history_replay(c: &mut Criterion) { let mut key_stats = KeyStatsStore::default(); for (drill_idx, keystrokes) in drills.iter().enumerate() { - let (bigram_events, trigram_events) = - extract_ngram_events(keystrokes, 800.0); + let (bigram_events, trigram_events) = extract_ngram_events(keystrokes, 800.0); for kt in keystrokes { if kt.correct { @@ -117,13 +110,19 @@ fn bench_history_replay(c: &mut Criterion) { for ev in &bigram_events { bigram_stats.update( - ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, + ev.key.clone(), + ev.total_time_ms, + ev.correct, + ev.has_hesitation, drill_idx as u32, ); } for ev in &trigram_events { trigram_stats.update( - ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, + ev.key.clone(), + ev.total_time_ms, + ev.correct, + ev.has_hesitation, drill_idx as u32, ); } diff --git a/docs/plans/2026-02-24-n-grams-statistics-tab.md b/docs/plans/2026-02-24-n-grams-statistics-tab.md new file mode 100644 index 0000000..0ce9700 --- /dev/null +++ b/docs/plans/2026-02-24-n-grams-statistics-tab.md @@ -0,0 +1,221 @@ +# Plan: N-grams Statistics Tab + +## Context + +The n-gram error tracking system (last commit `e7f57dd`) tracks bigram/trigram transition difficulties and uses them to adapt drill selection. However, there's no visibility into what the system has identified as weak or how it's influencing drills. This plan adds a **[6] N-grams** tab to the Statistics page to surface this data. + +--- + +## Layout + +``` + [1] Dashboard [2] History [3] Activity [4] Accuracy [5] Timing [6] N-grams + + ┌─ Active Focus ──────────────────────────────────────────────────────────────┐ + │ Focus: Bigram "th" (difficulty: 1.24) │ + │ Bigram diff 1.24 > char 'n' diff 0.50 x 0.8 threshold │ + └─────────────────────────────────────────────────────────────────────────────┘ + ┌─ Eligible Bigrams (3) ────────────────┐┌─ Watchlist ─────────────────────────┐ + │ Pair Diff Err% Exp% Red Conf N ││ Pair Red Samples Streak │ + │ th 1.24 18% 7% 2.10 0.41 32 ││ er 1.82 14/20 2/3 │ + │ ed 0.89 22% 9% 1.90 0.53 28 ││ in 1.61 8/20 1/3 │ + │ ng 0.72 14% 8% 1.72 0.58 24 ││ ou 1.53 18/20 1/3 │ + └────────────────────────────────────────┘└───────────────────────────────────┘ + Scope: Global | Bigrams: 142 | Trigrams: 387 | Hesitation: >832ms | Tri-gain: 12.0% + + [ESC] Back [Tab] Next tab [1-6] Switch tab +``` + +--- + +## Scope Decisions + +- **Drill scope**: Tab shows data for `app.drill_scope` (current adaptive scope). A scope label in the summary line makes this explicit (e.g., "Scope: Global" or "Scope: Branch: lowercase"). +- **Trigram gain**: Sourced from `app.trigram_gain_history` (computed every 50 ranked drills). Always from ranked stats, consistent with bigram/trigram counts shown. The value is a fraction in `[0.0, 1.0]` (count of signal trigrams / total qualified trigrams), so it is mathematically non-negative. Format: `X.X%` (one decimal). When empty: `--` with note "(computed every 50 drills)". +- **Eligible vs Watchlist**: Strictly disjoint by construction. Watchlist filter explicitly excludes bigrams that pass all eligibility gates. + +--- + +## Layer Boundaries + +Domain logic (engine) and presentation (UI) are separated: + +- **Engine** (`ngram_stats.rs`): Owns `FocusReasoning` (domain decision explanation), `select_focus_target_with_reasoning()`, filtering/gating/sorting logic for eligible and watchlist bigrams. Returns domain-oriented results. +- **UI** (`stats_dashboard.rs`): Owns `NgramTabData`, `EligibleBigramRow`, `WatchlistBigramRow` (view model structs tailored for rendering columns). +- **Adapter** (`main.rs`): `build_ngram_tab_data()` is the single point that translates engine output → UI view models. All stats store lookups for display columns happen here. + +--- + +## Files to Modify + +### 1. `src/engine/ngram_stats.rs` — Domain logic + focus reasoning + +**`FocusReasoning` enum** (domain concept — why the target was selected): +```rust +pub enum FocusReasoning { + BigramWins { + bigram_difficulty: f64, + char_difficulty: f64, + char_key: Option, // None when no focused char exists + }, + CharWins { + char_key: char, + char_difficulty: f64, + bigram_best: Option<(BigramKey, f64)>, + }, + NoBigrams { char_key: char }, + Fallback, +} +``` + +**`select_focus_target_with_reasoning()`** — Unified function returning `(FocusTarget, FocusReasoning)`. Internally calls `focused_key()` and `weakest_bigram()` once. Handles all four match arms without synthetic values. + +**`focus_eligible_bigrams()`** on `BigramStatsStore` — Returns `Vec<(BigramKey, f64 /*difficulty*/, f64 /*redundancy*/)>` sorted by `(difficulty desc, redundancy desc, key lexical asc)`. Same gating as `weakest_bigram()`: sample >= `MIN_SAMPLES_FOR_FOCUS`, streak >= `STABILITY_STREAK_REQUIRED`, redundancy > `STABILITY_THRESHOLD`, difficulty > 0. Returns ALL qualifying entries (no truncation — UI handles truncation to available height). + +**`watchlist_bigrams()`** on `BigramStatsStore` — Returns `Vec<(BigramKey, f64 /*redundancy*/)>` sorted by `(redundancy desc, key lexical asc)`. Criteria: redundancy > `STABILITY_THRESHOLD`, sample_count >= 3 (noise floor), AND NOT fully eligible. Returns ALL qualifying entries. + +**Export constants** — Make `MIN_SAMPLES_FOR_FOCUS` and `STABILITY_STREAK_REQUIRED` `pub(crate)` so the adapter in `main.rs` can pass them into `NgramTabData` without duplicating values. + +### 2. `src/ui/components/stats_dashboard.rs` — View models + rendering + +**View model structs** (presentation-oriented, mapped from engine data by adapter): + +```rust +pub struct EligibleBigramRow { + pub pair: String, // e.g., "th" + pub difficulty: f64, + pub error_rate_pct: f64, // smoothed, as percentage + pub expected_rate_pct: f64,// from char independence, as percentage + pub redundancy: f64, + pub confidence: f64, + pub sample_count: usize, +} + +pub struct WatchlistBigramRow { + pub pair: String, + pub redundancy: f64, + pub sample_count: usize, + pub redundancy_streak: u8, +} +``` + +**`NgramTabData` struct** (assembled by `build_ngram_tab_data()` in main.rs): +```rust +pub struct NgramTabData { + pub focus_target: FocusTarget, + pub focus_reasoning: FocusReasoning, + pub eligible: Vec, + pub watchlist: Vec, + pub total_bigrams: usize, + pub total_trigrams: usize, + pub hesitation_threshold_ms: f64, + pub latest_trigram_gain: Option, + pub scope_label: String, + // Engine thresholds for watchlist progress denominators: + pub min_samples_for_focus: usize, // from ngram_stats::MIN_SAMPLES_FOR_FOCUS + pub stability_streak_required: u8, // from ngram_stats::STABILITY_STREAK_REQUIRED +} +``` + +**Add field** to `StatsDashboard`: `ngram_data: Option<&'a NgramTabData>` + +**Update constructor**, tab header (add `"[6] N-grams"`), footer (`[1-6]`), `render_tab()` dispatch. + +**Rendering methods:** + +- **`render_ngram_tab()`** — Vertical layout: focus (4 lines), lists (Min 5), summary (2 lines). + +- **`render_ngram_focus()`** — Bordered "Active Focus" block. + - Line 1: target name in `colors.focused_key()` + bold + - Line 2: reasoning in `colors.text_pending()` + - When BigramWins + char_key is None: "Bigram selected (no individual char weakness found)" + - Empty state: "Complete some adaptive drills to see focus data" + +- **`render_eligible_bigrams()`** — Bordered "Eligible Bigrams (N)" block. + - Header in `colors.accent()` + bold + - Rows colored by difficulty: `error()` (>1.0), `warning()` (>0.5), `success()` (<=0.5) + - Columns: `Pair Diff Err% Exp% Red Conf N` + - Narrow (<38 inner): drop Exp% and Conf + - Truncate rows to available height + - Empty state: "No bigrams meet focus criteria yet" + +- **`render_watchlist_bigrams()`** — Bordered "Watchlist" block. + - Columns: `Pair Red Samples Streak` + - Samples rendered as `n/{data.min_samples_for_focus}`, Streak as `n/{data.stability_streak_required}` — denominators sourced from `NgramTabData` (engine constants), never hardcoded in UI + - All rows in `colors.warning()` + - Truncate rows to available height + - Empty state: "No approaching bigrams" + +- **`render_ngram_summary()`** — Single line: scope label, bigram/trigram counts, hesitation threshold, trigram gain. + +### 3. `src/main.rs` — Input handling + adapter + +**`handle_stats_key()`**: +- `STATS_TAB_COUNT`: 5 → 6 +- Add `KeyCode::Char('6') => app.stats_tab = 5` in both branches + +**`build_ngram_tab_data(app: &App) -> NgramTabData`** — Dedicated adapter function (single point of engine→UI translation): +- Calls `select_focus_target_with_reasoning()` +- Calls `focus_eligible_bigrams()` and `watchlist_bigrams()` +- Maps engine results to `EligibleBigramRow`/`WatchlistBigramRow` by looking up additional per-bigram stats (error rate, expected rate, confidence, streak) from `app.ranked_bigram_stats` and `app.ranked_key_stats` +- Builds scope label from `app.drill_scope` +- Only called when `app.stats_tab == 5` + +**`render_stats()`**: Call `build_ngram_tab_data()` when on tab 5, pass `Some(&data)` to StatsDashboard. + +--- + +## Implementation Order + +1. Add `FocusReasoning` enum and `select_focus_target_with_reasoning()` to `ngram_stats.rs` +2. Add `focus_eligible_bigrams()` and `watchlist_bigrams()` to `BigramStatsStore` +3. Add unit tests for steps 1-2 +4. Add view model structs (`EligibleBigramRow`, `WatchlistBigramRow`, `NgramTabData`) and `ngram_data` field to `stats_dashboard.rs` +5. Add all rendering methods to `stats_dashboard.rs` +6. Update tab header, footer, `render_tab()` dispatch in `stats_dashboard.rs` +7. Add `build_ngram_tab_data()` adapter + update `render_stats()` in `main.rs` +8. Update `handle_stats_key()` in `main.rs` + +--- + +## Verification + +### Unit Tests (in `ngram_stats.rs` test module) + +**`test_focus_eligible_bigrams_gating`** — BigramStatsStore with bigrams at boundary conditions: +- sample=25, streak=3, redundancy=2.0 → eligible +- sample=15, streak=3, redundancy=2.0 → excluded (samples < 20) +- sample=25, streak=2, redundancy=2.0 → excluded (streak < 3) +- sample=25, streak=3, redundancy=1.2 → excluded (redundancy <= 1.5) +- sample=25, streak=3, redundancy=2.0, confidence=1.5 → excluded (difficulty <= 0) + +**`test_focus_eligible_bigrams_ordering_and_tiebreak`** — 3 eligible bigrams: two with same difficulty but different redundancy, one with lower difficulty. Verify sorted by (difficulty desc, redundancy desc, key lexical asc). + +**`test_watchlist_bigrams_gating`** — Bigrams at boundary: +- Fully eligible (sample=25, streak=3) → excluded (goes to eligible list) +- High redundancy, low samples (sample=10) → included +- High redundancy, low streak (sample=25, streak=1) → included +- Low redundancy (1.3) → excluded +- Very few samples (sample=2) → excluded (< 3 noise floor) + +**`test_watchlist_bigrams_ordering_and_tiebreak`** — 3 watchlist entries: two with same redundancy. Verify sorted by (redundancy desc, key lexical asc). + +**`test_select_focus_with_reasoning_bigram_wins`** — Bigram difficulty > char difficulty * 0.8. Returns `BigramWins` with correct values and `char_key: Some(ch)`. + +**`test_select_focus_with_reasoning_char_wins`** — Char difficulty high, bigram < threshold. Returns `CharWins` with `bigram_best` populated. + +**`test_select_focus_with_reasoning_no_bigrams`** — No eligible bigrams. Returns `NoBigrams`. + +**`test_select_focus_with_reasoning_bigram_only`** — No focused char, bigram exists. Returns `BigramWins` with `char_key: None`. + +### Build & Existing Tests +- `cargo build` — no compile errors +- `cargo test` — all existing + new tests pass + +### Manual Testing +- Navigate to Statistics → press [6] → see N-grams tab +- Tab/BackTab cycles through all 6 tabs +- With no drill history: empty states shown for all panels +- After several adaptive drills: eligible bigrams appear with plausible data +- Scope label reflects current drill scope +- Verify layout at 80x24 terminal size — confirm column drop at narrow widths keeps header/data aligned diff --git a/docs/plans/2026-02-25-bigram-metrics-overhaul-error-anomaly-speed-anomaly.md b/docs/plans/2026-02-25-bigram-metrics-overhaul-error-anomaly-speed-anomaly.md new file mode 100644 index 0000000..ae268e9 --- /dev/null +++ b/docs/plans/2026-02-25-bigram-metrics-overhaul-error-anomaly-speed-anomaly.md @@ -0,0 +1,265 @@ +# Plan: Bigram Metrics Overhaul — Error Anomaly & Speed Anomaly + +## Context + +The current bigram metrics use `difficulty = (1 - confidence) * redundancy` to gate eligibility and focus. This is fundamentally broken: when a user types faster than target WPM (`confidence > 1.0`), difficulty goes negative — even for bigrams with 100% error rate. The root cause is that "confidence" (a speed-vs-target ratio) and "redundancy" (an error-rate ratio) are conflated into a single metric that can cancel out genuine problems. + +This overhaul replaces the conflated system with two orthogonal anomaly metrics: +- **`error_anomaly`** — how much worse a bigram's error rate is compared to what's expected from its constituent characters (same math as current `redundancy_score`, reframed as a percentage) +- **`speed_anomaly`** — how much slower a bigram transition is compared to the user's normal speed typing the second character (user-relative, no target WPM dependency) + +Both are displayed as percentages where positive = worse than expected. The UI shows two side-by-side columns, one per anomaly type, with confirmed problems highlighted. + +--- + +## Persistence / Migration + +**NgramStat is NOT persisted to disk.** N-gram stores are rebuilt from drill history on every startup (see `json_store.rs:104` comment: "N-gram stats are not included — they are always rebuilt from drill history", and `app.rs:1152` `rebuild_ngram_stats()`). The stores are never saved via `save_data()` — only `profile`, `key_stats`, `ranked_key_stats`, and `drill_history` are persisted. + +Therefore: +- No serde migration, `#[serde(alias)]`, or backward-compat handling is needed for NgramStat field renames/removals +- `#[serde(default)]` annotations on NgramStat fields are vestigial (the derive exists for in-memory cloning, not disk persistence) but harmless to leave +- The `Serialize`/`Deserialize` derives on NgramStat can stay (used by BigramStatsStore/TrigramStatsStore types which derive them transitively, though the stores themselves are also not persisted) + +**KeyStat IS persisted** — `confidence` on KeyStat is NOT being changed (used by skill_tree progression). No migration needed there. + +--- + +## Changes + +### 1. `src/engine/ngram_stats.rs` — Metrics engine overhaul + +**NgramStat struct** (line 34): +- Remove `confidence: f64` field +- Rename `redundancy_streak: u8` → `error_anomaly_streak: u8` +- Add `speed_anomaly_streak: u8` with `#[serde(default)]` +- **Preserved fields** (explicitly unchanged): `filtered_time_ms`, `best_time_ms`, `sample_count`, `error_count`, `hesitation_count`, `recent_times`, `recent_correct`, `last_seen_drill_index` — all remain and continue to be updated by `update_stat()` + +**`update_stat()`** (line 65): +- Remove `confidence = target_time_ms / stat.filtered_time_ms` computation (line 82) +- Remove `target_time_ms` parameter (no longer needed) +- **Keep** `hesitation` parameter and `drill_index` parameter — these update `hesitation_count` (line 72) and `last_seen_drill_index` (line 66) which are used by trigram pruning and other downstream logic +- New signature (module-private, matching current visibility): `fn update_stat(stat: &mut NgramStat, time_ms: f64, correct: bool, hesitation: bool, drill_index: u32)` +- All other field updates remain identical (EMA on filtered_time_ms, best_time_ms, recent_times, recent_correct, error_count, sample_count) + +**Constants** (lines 10-16): +- Rename `STABILITY_THRESHOLD` → `ERROR_ANOMALY_RATIO_THRESHOLD` (value stays 1.5) +- Rename `STABILITY_STREAK_REQUIRED` → `ANOMALY_STREAK_REQUIRED` (value stays 3) +- Rename `WATCHLIST_MIN_SAMPLES` → `ANOMALY_MIN_SAMPLES` (value stays 3) +- Add `SPEED_ANOMALY_PCT_THRESHOLD: f64 = 50.0` (50% slower than expected) +- Add `MIN_CHAR_SAMPLES_FOR_SPEED: usize = 10` (EMA alpha=0.1 needs ~10 samples for initial value to decay to ~35% influence; 5 samples still has ~59% initial-value bias, too noisy for baseline) +- Remove `DEFAULT_TARGET_CPM` (no longer used by update_stat or stores) + +**`BigramStatsStore` struct** (line 102): +- Remove `target_cpm: f64` field and `default_target_cpm()` helper +- `BigramStatsStore::update()` (line 114): Remove `target_time_ms` calculation. Pass-through to `update_stat()` without it. + +**`TrigramStatsStore` struct** (line 285): +- Remove `target_cpm: f64` field +- `TrigramStatsStore::update()` (line 293): Remove `target_time_ms` calculation. Pass-through to `update_stat()` without it. + +**Remove `get_confidence()`** methods on both stores (lines 121, 300) — they read the deleted `confidence` field. Both are `#[allow(dead_code)]` already. + +**Rename `redundancy_score()`** → **`error_anomaly_ratio()`** (line 132): +- Same math internally, just renamed. Returns `e_ab / expected_ab`. + +**New methods on `BigramStatsStore`**: + +```rust +/// Error anomaly as percentage: (ratio - 1.0) * 100 +/// Returns None if bigram has no stats. +pub fn error_anomaly_pct(&self, key: &BigramKey, char_stats: &KeyStatsStore) -> Option { + let _stat = self.stats.get(key)?; + let ratio = self.error_anomaly_ratio(key, char_stats); + Some((ratio - 1.0) * 100.0) +} + +/// Speed anomaly: % slower than user types char_b in isolation. +/// Compares bigram filtered_time_ms to char_b's filtered_time_ms. +/// Returns None if bigram has no stats or char_b has < MIN_CHAR_SAMPLES_FOR_SPEED samples. +pub fn speed_anomaly_pct(&self, key: &BigramKey, char_stats: &KeyStatsStore) -> Option { + let stat = self.stats.get(key)?; + let char_b_stat = char_stats.stats.get(&key.0[1])?; + if char_b_stat.sample_count < MIN_CHAR_SAMPLES_FOR_SPEED { return None; } + let ratio = stat.filtered_time_ms / char_b_stat.filtered_time_ms; + Some((ratio - 1.0) * 100.0) +} +``` + +**Rename `update_redundancy_streak()`** → **`update_error_anomaly_streak()`** (line 142): +- Same logic, uses renamed constant and renamed field + +**New `update_speed_anomaly_streak()`**: +- Same pattern as error streak: call `speed_anomaly_pct()`, compare against `SPEED_ANOMALY_PCT_THRESHOLD` +- If `speed_anomaly_pct()` returns `None` (char baseline unavailable/under-sampled), **hold previous streak value** — don't reset or increment. The bigram simply can't be evaluated for speed yet. +- Requires both bigram samples >= `ANOMALY_MIN_SAMPLES` AND char_b samples >= `MIN_CHAR_SAMPLES_FOR_SPEED` before any streak update occurs. + +**New `BigramAnomaly` struct**: +```rust +pub struct BigramAnomaly { + pub key: BigramKey, + pub anomaly_pct: f64, + pub sample_count: usize, + pub streak: u8, + pub confirmed: bool, // streak >= ANOMALY_STREAK_REQUIRED && samples >= MIN_SAMPLES_FOR_FOCUS +} +``` + +**Replace `focus_eligible_bigrams()` + `watchlist_bigrams()`** with: +- **`error_anomaly_bigrams(&self, char_stats: &KeyStatsStore, unlocked: &[char]) -> Vec`** — All bigrams with `error_anomaly_ratio > ERROR_ANOMALY_RATIO_THRESHOLD` and `samples >= ANOMALY_MIN_SAMPLES`, sorted by anomaly_pct desc. Each entry's `confirmed` flag = `error_anomaly_streak >= ANOMALY_STREAK_REQUIRED && samples >= MIN_SAMPLES_FOR_FOCUS`. +- **`speed_anomaly_bigrams(&self, char_stats: &KeyStatsStore, unlocked: &[char]) -> Vec`** — All bigrams where `speed_anomaly_pct() > Some(SPEED_ANOMALY_PCT_THRESHOLD)` and `samples >= ANOMALY_MIN_SAMPLES`, sorted by anomaly_pct desc. Same confirmed logic using `speed_anomaly_streak`. + +**Replace `weakest_bigram()`** with **`worst_confirmed_anomaly()`**: +- Takes `char_stats: &KeyStatsStore` and `unlocked: &[char]` +- Collects all confirmed error anomalies and confirmed speed anomalies into a single candidate pool +- Each candidate is `(BigramKey, anomaly_pct, anomaly_type)` where type is `Error` or `Speed` +- **Dedup per bigram**: If a bigram appears in both error and speed lists, keep whichever has higher anomaly_pct (or prefer error on tie) +- Return the single bigram with highest anomaly_pct, or None if no confirmed anomalies +- This eliminates ambiguity about same-bigram-in-both-lists — each bigram gets at most one candidacy + +**Update `FocusReasoning` enum** (line 471): +Current variants are: `BigramWins { bigram_difficulty, char_difficulty, char_key }`, `CharWins { char_key, char_difficulty, bigram_best }`, `NoBigrams { char_key }`, `Fallback`. + +Replace with: +```rust +pub enum FocusReasoning { + BigramWins { + bigram_anomaly_pct: f64, + anomaly_type: AnomalyType, // Error or Speed + char_key: Option, + }, + CharWins { + char_key: char, + bigram_best: Option<(BigramKey, f64)>, + }, + NoBigrams { + char_key: char, + }, + Fallback, +} + +pub enum AnomalyType { Error, Speed } +``` + +**Update `select_focus_target_with_reasoning()`** (line 489): +- Call `worst_confirmed_anomaly()` instead of `weakest_bigram()` +- **Focus priority rule**: Any confirmed bigram anomaly always wins over char focus. Rationale: char focus is the default skill-tree progression mechanism; confirmed bigram anomalies are exceptional problems that survived a conservative gate (3 consecutive drills above threshold + 20 samples). No cross-scale score comparison needed — confirmation itself is the signal. +- When no confirmed bigram anomalies exist, fall back to char focus as before. +- Anomaly_pct is unbounded (e.g. 200% = 3x worse than expected) — this is fine because confirmation gating prevents transient spikes from stealing focus, and the value is only used for ranking among confirmed anomalies, not for threshold comparison against char scores. + +**Update `select_focus_target()`** (line 545): +- Same delegation change, pass `char_stats` through + +### 2. `src/app.rs` — Streak update call sites & store cleanup + +**`target_cpm` removal checklist** (complete audit of all references): + +| Location | What | Action | +|---|---|---| +| `ngram_stats.rs:105-106` | `BigramStatsStore.target_cpm` field + serde attr | Remove field | +| `ngram_stats.rs:288-289` | `TrigramStatsStore.target_cpm` field + serde attr | Remove field | +| `ngram_stats.rs:109-111` | `fn default_target_cpm()` helper | Remove function | +| `ngram_stats.rs:11` | `const DEFAULT_TARGET_CPM` | Remove constant | +| `ngram_stats.rs:115` | `BigramStatsStore::update()` target_time_ms calc | Remove line | +| `ngram_stats.rs:294` | `TrigramStatsStore::update()` target_time_ms calc | Remove line | +| `ngram_stats.rs:1386` | Test helper `bigram_stats.target_cpm = DEFAULT_TARGET_CPM` | Remove line | +| `app.rs:1155` | `self.bigram_stats.target_cpm = ...` in rebuild_ngram_stats | Remove line | +| `app.rs:1157` | `self.ranked_bigram_stats.target_cpm = ...` | Remove line | +| `app.rs:1159` | `self.trigram_stats.target_cpm = ...` | Remove line | +| `app.rs:1161` | `self.ranked_trigram_stats.target_cpm = ...` | Remove line | +| `key_stats.rs:37` | `KeyStatsStore.target_cpm` | **KEEP** — used by `update_key()` for char confidence | +| `app.rs:330,332,609,611,1320,1322,1897-1898,1964-1965` | `key_stats.target_cpm = ...` | **KEEP** — KeyStatsStore still uses target_cpm | +| `config.rs:142` | `fn target_cpm()` | **KEEP** — still used by KeyStatsStore | + +**At all 6 `update_redundancy_streak` call sites** (lines 899, 915, 1044, 1195, 1212, plus rebuild): +- Rename to `update_error_anomaly_streak()` +- Add parallel call to `update_speed_anomaly_streak()` passing the appropriate `&KeyStatsStore`: + - `&self.key_stats` for `self.bigram_stats` updates + - `&self.ranked_key_stats` for `self.ranked_bigram_stats` updates + +**Update `select_focus_target` calls** in `generate_drill` (line ~663) and drill header in main.rs: +- Add `ranked_key_stats` parameter (already available at call sites) + +### 3. `src/ui/components/stats_dashboard.rs` — Two-column anomaly display + +**Replace data structs**: +- Remove `EligibleBigramRow` (line 20) and `WatchlistBigramRow` (line 30) +- Add single `AnomalyBigramRow`: + ```rust + pub struct AnomalyBigramRow { + pub pair: String, + pub anomaly_pct: f64, + pub sample_count: usize, + pub streak: u8, + pub confirmed: bool, + } + ``` + +**Replace `NgramTabData` fields** (line 39): +- Remove `eligible_bigrams: Vec` and `watchlist_bigrams: Vec` +- Add `error_anomalies: Vec` and `speed_anomalies: Vec` + +**Replace render functions**: +- Remove `render_eligible_bigrams()` (line 1473) and `render_watchlist_bigrams()` (line 1560) +- Add `render_error_anomalies()` and `render_speed_anomalies()` +- Each renders a table with columns: `Pair | Anomaly% | Samples | Streak` +- Confirmed rows (`.confirmed == true`) use highlight/accent color +- Unconfirmed rows use dimmer/warning color +- Column titles: `" Error Anomalies ({}) "` and `" Speed Anomalies ({}) "` +- Empty states: `" No error anomalies detected"` / `" No speed anomalies detected"` + +**Narrow-width adaptation**: +- Wide mode (width >= 60): 50/50 horizontal split, full columns `Pair | Anomaly% | Samples | Streak` +- Narrow mode (width < 60): Stack vertically (error on top, speed below). Compact columns: `Pair | Anom% | Smp` + - Drop `Streak` column + - Abbreviate headers + - This mirrors the existing pattern used by the current eligible/watchlist tables +- **Vertical space budget** (stacked mode): Each panel gets a minimum of 3 data rows (+ 1 header + 1 border = 5 lines). Remaining vertical space is split evenly. If total available height < 10 lines, show only error anomalies panel (speed anomalies are less actionable). This prevents one panel from starving the other. + +**Update `render_ngram_tab()`** (line 1308): +- Split the bottom section into two horizontal chunks (50/50) +- Left: `render_error_anomalies()`, Right: `render_speed_anomalies()` +- On narrow terminals (width < 60), stack vertically instead + +### 4. `src/main.rs` — Bridge adapter + +**`build_ngram_tab_data()`** (~line 2232): +- Call `error_anomaly_bigrams()` and `speed_anomaly_bigrams()` instead of old functions +- Map `BigramAnomaly` → `AnomalyBigramRow` +- Pass `&ranked_key_stats` for speed anomaly computation + +**Drill header** (~line 1133): `select_focus_target()` signature change (adding `char_stats` param) will require updating the call here. + +--- + +## Files Modified + +1. **`src/engine/ngram_stats.rs`** — Core metrics overhaul (remove confidence from NgramStat, remove target_cpm from stores, add two anomaly systems, new query functions) +2. **`src/app.rs`** — Update streak calls, remove target_cpm initialization, update select_focus_target calls +3. **`src/ui/components/stats_dashboard.rs`** — Two-column anomaly display, new data structs, narrow-width adaptation +4. **`src/main.rs`** — Bridge adapter, select_focus_target call update + +--- + +## Test Updates + +- **Rewrite `test_focus_eligible_bigrams_gating`** → `test_error_anomaly_bigrams`: Test that bigrams above error threshold with sufficient samples appear; confirmed flag set correctly based on streak + samples +- **Rewrite `test_watchlist_bigrams_gating`** → split into `test_error_anomaly_confirmation` and `test_speed_anomaly_bigrams` +- **New `test_speed_anomaly_pct`**: Verify speed anomaly calculation against mock char stats; verify None returned when char_b has < MIN_CHAR_SAMPLES_FOR_SPEED (10) samples; verify correct result at exactly 10 samples (boundary) +- **New `test_speed_anomaly_streak_holds_when_char_unavailable`**: Verify streak is not reset when char baseline is insufficient (samples 0, 5, 9 — all below threshold) +- **New `test_speed_anomaly_borderline_baseline`**: Verify behavior at sample count transitions (9 → None, 10 → Some) and that early-session noise at exactly 10 samples produces reasonable anomaly values (not extreme outliers from EMA initialization bias) +- **Update `test_weakest_bigram*`** → `test_worst_confirmed_anomaly*`: Verify it picks highest anomaly across both types, deduplicates per bigram preferring higher pct (error on tie), returns None when nothing confirmed +- **Update focus reasoning tests**: Update `FocusReasoning` variants to new names (`BigramWins` now carries `anomaly_pct` and `anomaly_type` instead of `bigram_difficulty`) +- **Update `build_ngram_tab_data_maps_fields_correctly`**: Check `error_anomalies`/`speed_anomalies` fields with `AnomalyBigramRow` assertions + +--- + +## Verification + +1. `cargo build` — no compile errors +2. `cargo test` — all tests pass +3. Manual: N-grams tab shows two columns (Error Anomalies / Speed Anomalies) +4. Manual: Confirmed problem bigrams appear highlighted; unconfirmed appear dimmer +5. Manual: Drill header still shows `Focus: "th"` for bigram focus +6. Manual: Bigrams previously stuck on watchlist due to negative difficulty now appear as confirmed error anomalies +7. Manual: On narrow terminal (< 60 cols), columns stack vertically with compact headers diff --git a/docs/plans/2026-02-25-ema-error-decay-integrated-bigram-char-focus-generation.md b/docs/plans/2026-02-25-ema-error-decay-integrated-bigram-char-focus-generation.md new file mode 100644 index 0000000..acba282 --- /dev/null +++ b/docs/plans/2026-02-25-ema-error-decay-integrated-bigram-char-focus-generation.md @@ -0,0 +1,351 @@ +# Plan: EMA Error Decay + Integrated Bigram/Char Focus Generation + +## Context + +Two problems with the current n-gram focus system: + +1. **Focus stickiness**: Bigram anomaly uses cumulative `(error_count+1)/(sample_count+2)` Laplace smoothing. A bigram with 20 errors / 25 samples would need ~54 consecutive correct strokes to drop below the 1.5x threshold. Once confirmed, a bigram dominates focus for many drills even as the user visibly improves, while worse bigrams can't take over. + +2. **Post-processing bigram focus causes repetition**: When a bigram is in focus, `apply_bigram_focus()` post-processes finished text by replacing 40% of words with dictionary words containing the bigram. This selects randomly from candidates with no duplicate tracking, causing repeated words. It also means the bigram doesn't influence the actual word selection — it's bolted on after generation and overrides the focused char (the weakest char gets replaced by bigram[0]). + +This plan addresses both: (A) switch error rate to EMA so anomalies respond to recent performance, and (B) integrate bigram focus directly into the word selection algorithm alongside char focus, enabling both to be active simultaneously. + +--- + +## Part A: EMA Error Rate Decay + +### Approach + +Add an `error_rate_ema: f64` field to both `NgramStat` and `KeyStat`, updated via exponential moving average on each keystroke (same pattern as existing `filtered_time_ms`). Use this EMA for all anomaly computations instead of cumulative `(error_count+1)/(sample_count+2)`. + +Both bigram AND char error rates must use EMA — `error_anomaly_ratio` divides one by the other, so asymmetric decay would distort the comparison. + +**Alpha = 0.1** (same as timing EMA). Half-life ~7 samples. A bigram at 30% error rate recovering with all-correct strokes: drops below 1.5x threshold after ~15 correct (~2 drills). This is responsive without being twitchy. + +### Changes + +#### `src/engine/ngram_stats.rs` + +**NgramStat struct** (line 34): +- Add `error_rate_ema: f64` with `#[serde(default = "default_error_rate_ema")]` and default value `0.5` +- Add `fn default_error_rate_ema() -> f64 { 0.5 }` (Laplace-equivalent neutral prior) +- Remove `recent_correct: Vec` — superseded by EMA and never read + +**`update_stat()`** (line 67): +- After existing `error_count` increment, add EMA update: + ```rust + let error_signal = if correct { 0.0 } else { 1.0 }; + if stat.sample_count == 1 { + stat.error_rate_ema = error_signal; + } else { + stat.error_rate_ema = EMA_ALPHA * error_signal + (1.0 - EMA_ALPHA) * stat.error_rate_ema; + } + ``` +- Remove `recent_correct` push/trim logic (lines 89-92) +- Keep `error_count` and `sample_count` (needed for gating thresholds and display) + +**`smoothed_error_rate_raw()`** (line 95): Remove. After `smoothed_error_rate()` on both BigramStatsStore and TrigramStatsStore switch to `error_rate_ema`, this function has no callers. + +**`BigramStatsStore::smoothed_error_rate()`** (line 120): Change to return `stat.error_rate_ema` instead of `smoothed_error_rate_raw(stat.error_count, stat.sample_count)`. + +**`TrigramStatsStore::smoothed_error_rate()`** (line 333): Same change — return `stat.error_rate_ema`. + +**`error_anomaly_ratio()`** (line 123): No changes needed — it calls `self.smoothed_error_rate()` and `char_stats.smoothed_error_rate()`, which now both return EMA values. + +**Default for NgramStat** (line 50): Set `error_rate_ema: 0.5` (neutral — same as Laplace `(0+1)/(0+2)`). + +#### `src/engine/key_stats.rs` + +**KeyStat struct** (line 7): +- Add `error_rate_ema: f64` with `#[serde(default = "default_error_rate_ema")]` and default value `0.5` +- Add `fn default_error_rate_ema() -> f64 { 0.5 }` helper +- **Note**: KeyStat IS persisted to disk. The `#[serde(default)]` ensures backward compat — existing data without the field gets 0.5. + +**`update_key()`** (line 50) — called for correct strokes: +- Add EMA update: `stat.error_rate_ema = if stat.total_count == 1 { 0.0 } else { EMA_ALPHA * 0.0 + (1.0 - EMA_ALPHA) * stat.error_rate_ema }` +- Use `total_count` (already incremented on the line before) to detect first sample + +**`update_key_error()`** (line 83) — called for error strokes: +- Add EMA update: `stat.error_rate_ema = if stat.total_count == 1 { 1.0 } else { EMA_ALPHA * 1.0 + (1.0 - EMA_ALPHA) * stat.error_rate_ema }` + +**`smoothed_error_rate()`** (line 90): Change to return `stat.error_rate_ema` (or 0.5 for missing keys). + +#### `src/app.rs` + +**`rebuild_ngram_stats()`** (line 1155): +- Reset `error_rate_ema` to `0.5` alongside `error_count` and `total_count` for KeyStat stores (lines 1165-1172) +- NgramStat stores already reset to `Default` which has `error_rate_ema: 0.5` +- The replay loop (line 1177) naturally rebuilds EMA by calling `update_stat()` and `update_key()`/`update_key_error()` in order + +No other app.rs changes needed — the streak update and focus selection code reads through `error_anomaly_ratio()` which now uses EMA values transparently. + +--- + +## Part B: Integrated Bigram + Char Focus Generation + +### Approach + +Replace the exclusive `FocusTarget` enum (either char OR bigram) with a `FocusSelection` struct that carries both independently. The weakest char comes from skill_tree progression; the worst bigram anomaly comes from the anomaly system. Both feed into the `PhoneticGenerator` simultaneously. Remove `apply_bigram_focus()` post-processing entirely. + +### Changes + +#### `src/engine/ngram_stats.rs` — Focus selection + +**Replace `FocusTarget` enum** (line 510): +```rust +// Old +pub enum FocusTarget { Char(char), Bigram(BigramKey) } + +// New +#[derive(Clone, Debug, PartialEq)] +pub struct FocusSelection { + pub char_focus: Option, + pub bigram_focus: Option<(BigramKey, f64, AnomalyType)>, +} +``` + +**Replace `FocusReasoning` enum** (line 523): +```rust +// Old +pub enum FocusReasoning { + BigramWins { bigram_anomaly_pct: f64, anomaly_type: AnomalyType, char_key: Option }, + CharWins { char_key: char, bigram_best: Option<(BigramKey, f64)> }, + NoBigrams { char_key: char }, + Fallback, +} + +// New — reasoning is now just the selection itself (both fields self-describe) +// FocusReasoning is removed; FocusSelection carries all needed info. +``` + +**Simplify `select_focus_target_with_reasoning()`** → **`select_focus()`**: +```rust +pub fn select_focus( + skill_tree: &SkillTree, + scope: DrillScope, + ranked_key_stats: &KeyStatsStore, + ranked_bigram_stats: &BigramStatsStore, +) -> FocusSelection { + let unlocked = skill_tree.unlocked_keys(scope); + let char_focus = skill_tree.focused_key(scope, ranked_key_stats); + let bigram_focus = ranked_bigram_stats.worst_confirmed_anomaly(ranked_key_stats, &unlocked); + FocusSelection { char_focus, bigram_focus } +} +``` + +Remove `select_focus_target()` and `select_focus_target_with_reasoning()` — replaced by `select_focus()`. + +#### `src/generator/mod.rs` — Trait update + +**Update `TextGenerator` trait** (line 14): +```rust +pub trait TextGenerator { + fn generate( + &mut self, + filter: &CharFilter, + focused_char: Option, + focused_bigram: Option<[char; 2]>, + word_count: usize, + ) -> String; +} +``` + +#### `src/generator/phonetic.rs` — Integrated word selection + +**`generate()` method** — rewrite word selection with tiered approach: + +Note: `find_matching(filter, None)` is used (not `focused_char`) because we do our own tiering below. `find_matching` returns ALL words matching the CharFilter — the `focused` param only sorts, never filters — but passing `None` avoids an unnecessary sort we'd discard anyway. + +```rust +fn generate( + &mut self, + filter: &CharFilter, + focused_char: Option, + focused_bigram: Option<[char; 2]>, + word_count: usize, +) -> String { + let matching_words: Vec = self.dictionary + .find_matching(filter, None) // no char-sort; we tier ourselves + .iter().map(|s| s.to_string()).collect(); + let use_real_words = matching_words.len() >= MIN_REAL_WORDS; + + // Pre-categorize words into tiers for real-word mode + let bigram_str = focused_bigram.map(|b| format!("{}{}", b[0], b[1])); + let focus_char_lower = focused_char.filter(|ch| ch.is_ascii_lowercase()); + + let (bigram_indices, char_indices, other_indices) = if use_real_words { + let mut bi = Vec::new(); + let mut ci = Vec::new(); + let mut oi = Vec::new(); + for (i, w) in matching_words.iter().enumerate() { + if bigram_str.as_ref().is_some_and(|b| w.contains(b.as_str())) { + bi.push(i); + } else if focus_char_lower.is_some_and(|ch| w.contains(ch)) { + ci.push(i); + } else { + oi.push(i); + } + } + (bi, ci, oi) + } else { + (vec![], vec![], vec![]) + }; + + let mut words: Vec = Vec::new(); + let mut recent: Vec = Vec::new(); // anti-repeat window + + for _ in 0..word_count { + if use_real_words { + let word = self.pick_tiered_word( + &matching_words, + &bigram_indices, + &char_indices, + &other_indices, + &recent, + ); + recent.push(word.clone()); + if recent.len() > 4 { recent.remove(0); } + words.push(word); + } else { + let word = self.generate_phonetic_word( + filter, focused_char, focused_bigram, + ); + words.push(word); + } + } + words.join(" ") +} +``` + +**New `pick_tiered_word()` method**: +```rust +fn pick_tiered_word( + &mut self, + all_words: &[String], + bigram_indices: &[usize], + char_indices: &[usize], + other_indices: &[usize], + recent: &[String], +) -> String { + // Tier selection probabilities: + // Both available: 40% bigram, 30% char, 30% other + // Only bigram: 50% bigram, 50% other + // Only char: 70% char, 30% other (matches current behavior) + // Neither: 100% other + // + // Try up to 6 times to avoid repeating a recent word. + for _ in 0..6 { + let tier = self.select_tier(bigram_indices, char_indices, other_indices); + let idx = tier[self.rng.gen_range(0..tier.len())]; + let word = &all_words[idx]; + if !recent.contains(word) { + return word.clone(); + } + } + // Fallback: accept any non-recent word from full pool + let idx = self.rng.gen_range(0..all_words.len()); + all_words[idx].clone() +} +``` + +**`select_tier()` helper**: Returns reference to the tier to sample from based on availability and probability roll. Only considers a tier "available" if it has >= 2 words (prevents unavoidable repeats when a tier has just 1 word and the anti-repeat window rejects it). Falls through to the next tier when the selected tier is too small. + +**`try_generate_word()` / `generate_phonetic_word()`** — add bigram awareness for Markov fallback: +- Accept `focused_bigram: Option<[char; 2]>` parameter +- Only attempt bigram forcing when both chars pass the CharFilter (avoids pathological starts when bigram chars are rare/unavailable in current filter scope) +- When eligible: 30% chance to start word with bigram[0] and force bigram[1] as second char, then continue Markov chain from `[' ', bigram[0], bigram[1]]` prefix +- Falls back to existing focused_char logic otherwise + +#### `src/generator/code_syntax.rs` + `src/generator/passage.rs` + +Add `_focused_bigram: Option<[char; 2]>` parameter to their `generate()` signatures (ignored, matching trait). + +#### `src/app.rs` — Pipeline update + +**`generate_text()`** (line 653): +- Call `select_focus()` (new function) instead of `select_focus_target()` +- Extract `focused_char` from `selection.char_focus` (the actual weakest char) +- Extract `focused_bigram` from `selection.bigram_focus.map(|(k, _, _)| k.0)` +- Pass both to `generator.generate(filter, focused_char, focused_bigram, word_count)` +- **Remove** the `apply_bigram_focus()` call (lines 784-787) +- Post-processing passes (capitalize, punctuate, numbers, code_patterns) continue to receive `focused_char` — this is now the real weakest char, not the bigram's first char + +**Remove `apply_bigram_focus()`** method (lines 1087-1131) entirely. + +**Store `FocusSelection`** on App: +- Add `pub current_focus: Option` field to App (default `None`) +- Set in `generate_text()` right after `select_focus()` — captures the focus that was actually used to generate the current drill's text +- **Lifecycle**: Set when drill starts (in `generate_text()`). Persists through the drill result screen (so the user sees what was in focus for the drill they just completed). Cleared to `None` when: starting the next drill (overwritten), leaving drill screen, changing drill scope/mode, or on import/reset. This is a snapshot, not live-recomputed — the header always shows what generated the current text. +- Used by drill header display in main.rs (reads `app.current_focus` instead of re-calling `select_focus()`) + +#### `src/main.rs` — Drill header + stats adapter + +**Drill header** (line 1134): +- Read `app.current_focus` to build focus_text (no re-computation — shows what generated the text) +- Display format: `Focus: 'n' + "th"` (both), `Focus: 'n'` (char only), `Focus: "th"` (bigram only) +- Replace the current `select_focus_target()` call with reading the stored selection +- When `current_focus` is `None`, show no focus text + +**`build_ngram_tab_data()`** (line 2253): +- Call `select_focus()` instead of `select_focus_target_with_reasoning()` +- Update `NgramTabData` struct: replace `focus_target: FocusTarget` and `focus_reasoning: FocusReasoning` with `focus: FocusSelection` + +#### `src/ui/components/stats_dashboard.rs` — Focus panel + +**`NgramTabData`** (line 28): +- Replace `focus_target: FocusTarget` and `focus_reasoning: FocusReasoning` with `focus: FocusSelection` +- Remove `FocusTarget` and `FocusReasoning` imports + +**`render_ngram_focus()`** (line 1352): +- Show both focus targets when both active: + - Line 1: `Focus: Char 'n' + Bigram "th"` (or just one if only one active) + - Line 2: Details — `Char 'n': weakest key | Bigram "th": error anomaly 250%` +- When neither active: show fallback message +- Rendering adapts based on which focuses are present + +--- + +## Files Modified + +1. **`src/engine/ngram_stats.rs`** — EMA field on NgramStat, EMA-based smoothed_error_rate, `FocusSelection` struct, `select_focus()`, remove old FocusTarget/FocusReasoning +2. **`src/engine/key_stats.rs`** — EMA field on KeyStat, EMA updates in update_key/update_key_error, EMA-based smoothed_error_rate +3. **`src/generator/mod.rs`** — TextGenerator trait: add `focused_bigram` parameter +4. **`src/generator/phonetic.rs`** — Tiered word selection with bigram+char, anti-repeat window, Markov bigram awareness +5. **`src/generator/code_syntax.rs`** — Add ignored `focused_bigram` parameter +6. **`src/generator/passage.rs`** — Add ignored `focused_bigram` parameter +7. **`src/app.rs`** — Use `select_focus()`, pass both focuses to generator, remove `apply_bigram_focus()`, store `current_focus` +8. **`src/main.rs`** — Update drill header, update `build_ngram_tab_data()` adapter +9. **`src/ui/components/stats_dashboard.rs`** — Update NgramTabData, render_ngram_focus for dual focus display + +--- + +## Test Updates + +### Part A (EMA) +- **Update `test_error_anomaly_bigrams`**: Set `error_rate_ema` directly instead of relying on cumulative error_count/sample_count for anomaly ratio computation +- **Update `test_worst_confirmed_anomaly_dedup`** and **`_prefers_error_on_tie`**: Same — set EMA values +- **New `test_error_rate_ema_decay`**: Verify that after N correct strokes, error_rate_ema drops as expected. Verify anomaly ratio crosses below threshold after reasonable recovery (~15 correct strokes from 30% error rate). +- **New `test_error_rate_ema_rebuild_from_history`**: Verify that rebuilding from drill history produces same EMA as live updates (deterministic replay) +- **New `test_ema_ranking_stability_during_recovery`**: Two bigrams both confirmed. Bigram A has higher anomaly. User corrects bigram A over several drills while bigram B stays bad. Verify that A's anomaly drops below B's and B becomes the new worst_confirmed_anomaly — clean handoff without oscillation. +- **Update key_stats tests**: Verify EMA updates in `update_key()` and `update_key_error()`, backward compat (serde default) + +### Part B (Integrated focus) +- **Replace focus reasoning tests** (`test_select_focus_with_reasoning_*`): Replace with `test_select_focus_*` testing `FocusSelection` struct — verify both char_focus and bigram_focus are populated independently +- **New `test_phonetic_bigram_focus_increases_bigram_words`**: Generate 1200 words with focused_bigram, verify significantly more words contain the bigram than without +- **New `test_phonetic_dual_focus_no_excessive_repeats`**: Generate text with both focuses, verify no word appears > 3 times consecutively +- **Update `build_ngram_tab_data_maps_fields_correctly`**: Update for `FocusSelection` struct instead of FocusTarget/FocusReasoning +- **New `test_find_matching_focused_is_sort_only`** (in `dictionary.rs` or `phonetic.rs`): Verify that `find_matching(filter, Some('k'))` and `find_matching(filter, None)` return the same set of words (same membership, potentially different order). Guards against future regressions where focused param accidentally becomes a filter. +- No `apply_bigram_focus` tests exist to remove (method was untested) + +--- + +## Verification + +1. `cargo build` — no compile errors +2. `cargo test` — all tests pass +3. Manual: Start adaptive drill, observe both char and bigram appearing in focus header +4. Manual: Verify drill text contains focused bigram words AND focused char words mixed naturally +5. Manual: Verify no excessive word repetition (the old apply_bigram_focus problem) +6. Manual: Practice a bigram focus target correctly for 2-3 drills → verify it drops out of focus and a different bigram (or char-only) takes over +7. Manual: N-grams tab shows both focuses in the Active Focus panel +8. Manual: Narrow terminal (<60 cols) stacks anomaly panels vertically; very short terminal (<10 rows available for panels) shows only error anomalies panel; focus panel always shows at least line 1 diff --git a/src/app.rs b/src/app.rs index 9f4aeec..6d4b303 100644 --- a/src/app.rs +++ b/src/app.rs @@ -2,19 +2,18 @@ use std::collections::{HashSet, VecDeque}; use std::sync::Arc; use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::thread; -use std::time::Instant; +use std::time::{Duration, Instant}; use rand::Rng; use rand::SeedableRng; use rand::rngs::SmallRng; use crate::config::Config; +use crate::engine::FocusSelection; use crate::engine::filter::CharFilter; use crate::engine::key_stats::KeyStatsStore; -use crate::engine::FocusTarget; use crate::engine::ngram_stats::{ - self, BigramKey, BigramStatsStore, TrigramStatsStore, extract_ngram_events, - select_focus_target, + self, BigramStatsStore, TrigramStatsStore, extract_ngram_events, select_focus, }; use crate::engine::scoring; use crate::engine::skill_tree::{BranchId, BranchStatus, DrillScope, SkillTree}; @@ -35,14 +34,16 @@ use crate::generator::passage::{ use crate::generator::phonetic::PhoneticGenerator; use crate::generator::punctuate; use crate::generator::transition_table::TransitionTable; -use crate::keyboard::model::KeyboardModel; use crate::keyboard::display::BACKSPACE; +use crate::keyboard::model::KeyboardModel; use crate::session::drill::DrillState; use crate::session::input::{self, KeystrokeEvent}; use crate::session::result::{DrillResult, KeyTime}; use crate::store::json_store::JsonStore; -use crate::store::schema::{DrillHistoryData, ExportData, KeyStatsData, ProfileData, EXPORT_VERSION}; +use crate::store::schema::{ + DrillHistoryData, EXPORT_VERSION, ExportData, KeyStatsData, ProfileData, +}; use crate::ui::components::menu::Menu; use crate::ui::theme::Theme; @@ -108,6 +109,8 @@ const MASTERY_MESSAGES: &[&str] = &[ "One more key conquered!", ]; +const POST_DRILL_INPUT_LOCK_MS: u64 = 800; + struct DownloadJob { downloaded_bytes: Arc, total_bytes: Arc, @@ -135,7 +138,10 @@ pub fn next_available_path(path_str: &str) -> String { let path = std::path::Path::new(path_str).to_path_buf(); let parent = path.parent().unwrap_or(std::path::Path::new(".")); let extension = path.extension().and_then(|e| e.to_str()).unwrap_or("json"); - let full_stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("export"); + let full_stem = path + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("export"); // Strip existing trailing -N suffix to get base stem let base_stem = if let Some(pos) = full_stem.rfind('-') { @@ -272,6 +278,8 @@ pub struct App { pub user_median_transition_ms: f64, pub transition_buffer: Vec, pub trigram_gain_history: Vec, + pub current_focus: Option, + pub post_drill_input_lock_until: Option, rng: SmallRng, transition_table: TransitionTable, #[allow(dead_code)] @@ -293,38 +301,39 @@ impl App { let store = JsonStore::new().ok(); - let (key_stats, ranked_key_stats, skill_tree, profile, drill_history) = if let Some(ref s) = store { - // load_profile returns None if file exists but can't parse (schema mismatch) - let pd = s.load_profile(); + let (key_stats, ranked_key_stats, skill_tree, profile, drill_history) = + if let Some(ref s) = store { + // load_profile returns None if file exists but can't parse (schema mismatch) + let pd = s.load_profile(); - match pd { - Some(pd) if !pd.needs_reset() => { - let ksd = s.load_key_stats(); - let rksd = s.load_ranked_key_stats(); - let lhd = s.load_drill_history(); - let st = SkillTree::new(pd.skill_tree.clone()); - (ksd.stats, rksd.stats, st, pd, lhd.drills) + match pd { + Some(pd) if !pd.needs_reset() => { + let ksd = s.load_key_stats(); + let rksd = s.load_ranked_key_stats(); + let lhd = s.load_drill_history(); + let st = SkillTree::new(pd.skill_tree.clone()); + (ksd.stats, rksd.stats, st, pd, lhd.drills) + } + _ => { + // Schema mismatch or parse failure: full reset of all stores + ( + KeyStatsStore::default(), + KeyStatsStore::default(), + SkillTree::default(), + ProfileData::default(), + Vec::new(), + ) + } } - _ => { - // Schema mismatch or parse failure: full reset of all stores - ( - KeyStatsStore::default(), - KeyStatsStore::default(), - SkillTree::default(), - ProfileData::default(), - Vec::new(), - ) - } - } - } else { - ( - KeyStatsStore::default(), - KeyStatsStore::default(), - SkillTree::default(), - ProfileData::default(), - Vec::new(), - ) - }; + } else { + ( + KeyStatsStore::default(), + KeyStatsStore::default(), + SkillTree::default(), + ProfileData::default(), + Vec::new(), + ) + }; let mut key_stats_with_target = key_stats; key_stats_with_target.target_cpm = config.target_cpm(); @@ -421,6 +430,8 @@ impl App { user_median_transition_ms: 0.0, transition_buffer: Vec::new(), trigram_gain_history: Vec::new(), + current_focus: None, + post_drill_input_lock_until: None, rng: SmallRng::from_entropy(), transition_table, dictionary, @@ -454,6 +465,23 @@ impl App { self.settings_editing_download_dir = false; } + pub fn arm_post_drill_input_lock(&mut self) { + self.post_drill_input_lock_until = + Some(Instant::now() + Duration::from_millis(POST_DRILL_INPUT_LOCK_MS)); + } + + pub fn clear_post_drill_input_lock(&mut self) { + self.post_drill_input_lock_until = None; + } + + pub fn post_drill_input_lock_remaining_ms(&self) -> Option { + self.post_drill_input_lock_until.and_then(|until| { + until + .checked_duration_since(Instant::now()) + .map(|remaining| remaining.as_millis().max(1) as u64) + }) + } + pub fn export_data(&mut self) { let path = std::path::Path::new(&self.settings_export_path); @@ -643,6 +671,7 @@ impl App { } pub fn start_drill(&mut self) { + self.clear_post_drill_input_lock(); let (text, source_info) = self.generate_text(); self.drill = Some(DrillState::new(&text)); self.drill_source_info = source_info; @@ -659,17 +688,16 @@ impl App { let scope = self.drill_scope; let all_keys = self.skill_tree.unlocked_keys(scope); - // Select focus target: single char or bigram - let focus_target = select_focus_target( + // Select focus targets: char and bigram independently + let selection = select_focus( &self.skill_tree, scope, &self.ranked_key_stats, &self.ranked_bigram_stats, ); - let (focused_char, focused_bigram) = match &focus_target { - FocusTarget::Char(ch) => (Some(*ch), None), - FocusTarget::Bigram(key) => (Some(key.0[0]), Some(key.clone())), - }; + self.current_focus = Some(selection.clone()); + let focused_char = selection.char_focus; + let focused_bigram = selection.bigram_focus.map(|(k, _, _)| k.0); // Generate base lowercase text using only lowercase keys from scope let lowercase_keys: Vec = all_keys @@ -684,7 +712,8 @@ impl App { let dict = Dictionary::load(); let rng = SmallRng::from_rng(&mut self.rng).unwrap(); let mut generator = PhoneticGenerator::new(table, dict, rng); - let mut text = generator.generate(&filter, lowercase_focused, word_count); + let mut text = + generator.generate(&filter, lowercase_focused, focused_bigram, word_count); // Apply capitalization if uppercase keys are in scope let cap_keys: Vec = all_keys @@ -694,7 +723,8 @@ impl App { .collect(); if !cap_keys.is_empty() { let mut rng = SmallRng::from_rng(&mut self.rng).unwrap(); - text = capitalize::apply_capitalization(&text, &cap_keys, focused_char, &mut rng); + text = + capitalize::apply_capitalization(&text, &cap_keys, focused_char, &mut rng); } // Apply punctuation if punctuation keys are in scope @@ -722,7 +752,8 @@ impl App { if !digit_keys.is_empty() { let has_dot = all_keys.contains(&'.'); let mut rng = SmallRng::from_rng(&mut self.rng).unwrap(); - text = numbers::apply_numbers(&text, &digit_keys, has_dot, focused_char, &mut rng); + text = + numbers::apply_numbers(&text, &digit_keys, has_dot, focused_char, &mut rng); } // Apply code symbols only if this drill is for the CodeSymbols branch, @@ -781,11 +812,6 @@ impl App { text = insert_line_breaks(&text); } - // After all generation: if bigram focus, swap some words for bigram-containing words - if let Some(ref bigram) = focused_bigram { - text = self.apply_bigram_focus(&text, &filter, bigram); - } - (text, None) } DrillMode::Code => { @@ -796,13 +822,10 @@ impl App { .unwrap_or_else(|| self.config.code_language.clone()); self.last_code_drill_language = Some(lang.clone()); let rng = SmallRng::from_rng(&mut self.rng).unwrap(); - let mut generator = CodeSyntaxGenerator::new( - rng, - &lang, - &self.config.code_download_dir, - ); + let mut generator = + CodeSyntaxGenerator::new(rng, &lang, &self.config.code_download_dir); self.code_drill_language_override = None; - let text = generator.generate(&filter, None, word_count); + let text = generator.generate(&filter, None, None, word_count); (text, Some(generator.last_source().to_string())) } DrillMode::Passage => { @@ -821,7 +844,7 @@ impl App { self.config.passage_downloads_enabled, ); self.passage_drill_selection_override = None; - let text = generator.generate(&filter, None, word_count); + let text = generator.generate(&filter, None, None, word_count); (text, Some(generator.last_source().to_string())) } } @@ -891,18 +914,43 @@ impl App { // Extract and update n-gram stats for all drill modes let drill_index = self.drill_history.len() as u32; - let hesitation_thresh = ngram_stats::hesitation_threshold(self.user_median_transition_ms); + let hesitation_thresh = + ngram_stats::hesitation_threshold(self.user_median_transition_ms); let (bigram_events, trigram_events) = extract_ngram_events(&result.per_key_times, hesitation_thresh); + // Collect unique bigram keys for per-drill streak updates + let mut seen_bigrams: std::collections::HashSet = + std::collections::HashSet::new(); for ev in &bigram_events { - self.bigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index); - self.bigram_stats.update_redundancy_streak(&ev.key, &self.key_stats); + seen_bigrams.insert(ev.key.clone()); + self.bigram_stats.update( + ev.key.clone(), + ev.total_time_ms, + ev.correct, + ev.has_hesitation, + drill_index, + ); + } + // Update streaks once per drill per unique bigram (not per event) + for key in &seen_bigrams { + self.bigram_stats + .update_error_anomaly_streak(key, &self.key_stats); + self.bigram_stats + .update_speed_anomaly_streak(key, &self.key_stats); } for ev in &trigram_events { - self.trigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index); + self.trigram_stats.update( + ev.key.clone(), + ev.total_time_ms, + ev.correct, + ev.has_hesitation, + drill_index, + ); } if ranked { + let mut seen_ranked_bigrams: std::collections::HashSet = + std::collections::HashSet::new(); for kt in &result.per_key_times { if kt.correct { self.ranked_key_stats.update_key(kt.key, kt.time_ms); @@ -911,11 +959,29 @@ impl App { } } for ev in &bigram_events { - self.ranked_bigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index); - self.ranked_bigram_stats.update_redundancy_streak(&ev.key, &self.ranked_key_stats); + seen_ranked_bigrams.insert(ev.key.clone()); + self.ranked_bigram_stats.update( + ev.key.clone(), + ev.total_time_ms, + ev.correct, + ev.has_hesitation, + drill_index, + ); + } + for key in &seen_ranked_bigrams { + self.ranked_bigram_stats + .update_error_anomaly_streak(key, &self.ranked_key_stats); + self.ranked_bigram_stats + .update_speed_anomaly_streak(key, &self.ranked_key_stats); } for ev in &trigram_events { - self.ranked_trigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index); + self.ranked_trigram_stats.update( + ev.key.clone(), + ev.total_time_ms, + ev.correct, + ev.has_hesitation, + drill_index, + ); } let update = self .skill_tree @@ -1003,6 +1069,9 @@ impl App { } self.last_result = Some(result); + if !self.milestone_queue.is_empty() || self.drill_mode != DrillMode::Adaptive { + self.arm_post_drill_input_lock(); + } // Adaptive mode auto-continues unless milestone popups must be shown first. if self.drill_mode == DrillMode::Adaptive && self.milestone_queue.is_empty() { @@ -1036,15 +1105,36 @@ impl App { // Extract and update n-gram stats let drill_index = self.drill_history.len() as u32; - let hesitation_thresh = ngram_stats::hesitation_threshold(self.user_median_transition_ms); + let hesitation_thresh = + ngram_stats::hesitation_threshold(self.user_median_transition_ms); let (bigram_events, trigram_events) = extract_ngram_events(&result.per_key_times, hesitation_thresh); + let mut seen_bigrams: std::collections::HashSet = + std::collections::HashSet::new(); for ev in &bigram_events { - self.bigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index); - self.bigram_stats.update_redundancy_streak(&ev.key, &self.key_stats); + seen_bigrams.insert(ev.key.clone()); + self.bigram_stats.update( + ev.key.clone(), + ev.total_time_ms, + ev.correct, + ev.has_hesitation, + drill_index, + ); + } + for key in &seen_bigrams { + self.bigram_stats + .update_error_anomaly_streak(key, &self.key_stats); + self.bigram_stats + .update_speed_anomaly_streak(key, &self.key_stats); } for ev in &trigram_events { - self.trigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index); + self.trigram_stats.update( + ev.key.clone(), + ev.total_time_ms, + ev.correct, + ev.has_hesitation, + drill_index, + ); } // Update transition buffer for hesitation baseline @@ -1056,6 +1146,7 @@ impl App { } self.last_result = Some(result); + self.arm_post_drill_input_lock(); self.screen = AppScreen::DrillResult; self.save_data(); } @@ -1081,52 +1172,6 @@ impl App { /// Replace up to 40% of words with dictionary words containing the target bigram. /// No more than 3 consecutive bigram-focused words to prevent repetitive feel. - fn apply_bigram_focus(&mut self, text: &str, filter: &CharFilter, bigram: &BigramKey) -> String { - let bigram_str: String = bigram.0.iter().collect(); - let words: Vec<&str> = text.split(' ').collect(); - if words.is_empty() { - return text.to_string(); - } - - // Find dictionary words that contain the bigram and pass the filter - let dict = Dictionary::load(); - let candidates: Vec<&str> = dict - .find_matching(filter, None) - .into_iter() - .filter(|w| w.contains(&bigram_str)) - .collect(); - - if candidates.is_empty() { - return text.to_string(); - } - - let max_replacements = (words.len() * 2 + 4) / 5; // ~40% - let mut replaced = 0; - let mut consecutive = 0; - let mut result_words: Vec = Vec::with_capacity(words.len()); - - for word in &words { - let already_has = word.contains(&bigram_str); - if already_has { - consecutive += 1; - result_words.push(word.to_string()); - continue; - } - - if replaced < max_replacements && consecutive < 3 { - let candidate = candidates[self.rng.gen_range(0..candidates.len())]; - result_words.push(candidate.to_string()); - replaced += 1; - consecutive += 1; - } else { - consecutive = 0; - result_words.push(word.to_string()); - } - } - - result_words.join(" ") - } - /// Update the rolling transition buffer with new inter-keystroke intervals. fn update_transition_buffer(&mut self, per_key_times: &[KeyTime]) { for kt in per_key_times { @@ -1152,67 +1197,121 @@ impl App { fn rebuild_ngram_stats(&mut self) { // Reset n-gram stores self.bigram_stats = BigramStatsStore::default(); - self.bigram_stats.target_cpm = self.config.target_cpm(); self.ranked_bigram_stats = BigramStatsStore::default(); - self.ranked_bigram_stats.target_cpm = self.config.target_cpm(); self.trigram_stats = TrigramStatsStore::default(); - self.trigram_stats.target_cpm = self.config.target_cpm(); self.ranked_trigram_stats = TrigramStatsStore::default(); - self.ranked_trigram_stats.target_cpm = self.config.target_cpm(); self.transition_buffer.clear(); self.user_median_transition_ms = 0.0; - // Reset char-level error/total counts (timing fields are untouched) + // Reset char-level error/total counts and EMA (timing fields are untouched) for stat in self.key_stats.stats.values_mut() { stat.error_count = 0; stat.total_count = 0; + stat.error_rate_ema = 0.5; } for stat in self.ranked_key_stats.stats.values_mut() { stat.error_count = 0; stat.total_count = 0; + stat.error_rate_ema = 0.5; } // Take drill_history out temporarily to avoid borrow conflict let history = std::mem::take(&mut self.drill_history); for (drill_index, result) in history.iter().enumerate() { - let hesitation_thresh = ngram_stats::hesitation_threshold(self.user_median_transition_ms); + let hesitation_thresh = + ngram_stats::hesitation_threshold(self.user_median_transition_ms); let (bigram_events, trigram_events) = extract_ngram_events(&result.per_key_times, hesitation_thresh); - // Rebuild char-level error/total counts from history + // Rebuild char-level error/total counts and EMA from history for kt in &result.per_key_times { if kt.correct { let stat = self.key_stats.stats.entry(kt.key).or_default(); stat.total_count += 1; + // Update error rate EMA for correct stroke + if stat.total_count == 1 { + stat.error_rate_ema = 0.0; + } else { + stat.error_rate_ema = 0.1 * 0.0 + 0.9 * stat.error_rate_ema; + } } else { self.key_stats.update_key_error(kt.key); } } + // Collect unique bigram keys seen this drill for per-drill streak updates + let mut seen_bigrams: std::collections::HashSet = + std::collections::HashSet::new(); + for ev in &bigram_events { - self.bigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index as u32); - self.bigram_stats.update_redundancy_streak(&ev.key, &self.key_stats); + seen_bigrams.insert(ev.key.clone()); + self.bigram_stats.update( + ev.key.clone(), + ev.total_time_ms, + ev.correct, + ev.has_hesitation, + drill_index as u32, + ); + } + // Update streaks once per drill per unique bigram (not per event) + for key in &seen_bigrams { + self.bigram_stats + .update_error_anomaly_streak(key, &self.key_stats); + self.bigram_stats + .update_speed_anomaly_streak(key, &self.key_stats); } for ev in &trigram_events { - self.trigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index as u32); + self.trigram_stats.update( + ev.key.clone(), + ev.total_time_ms, + ev.correct, + ev.has_hesitation, + drill_index as u32, + ); } if result.ranked { + let mut seen_ranked_bigrams: std::collections::HashSet = + std::collections::HashSet::new(); + for kt in &result.per_key_times { if kt.correct { let stat = self.ranked_key_stats.stats.entry(kt.key).or_default(); stat.total_count += 1; + if stat.total_count == 1 { + stat.error_rate_ema = 0.0; + } else { + stat.error_rate_ema = 0.1 * 0.0 + 0.9 * stat.error_rate_ema; + } } else { self.ranked_key_stats.update_key_error(kt.key); } } for ev in &bigram_events { - self.ranked_bigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index as u32); - self.ranked_bigram_stats.update_redundancy_streak(&ev.key, &self.ranked_key_stats); + seen_ranked_bigrams.insert(ev.key.clone()); + self.ranked_bigram_stats.update( + ev.key.clone(), + ev.total_time_ms, + ev.correct, + ev.has_hesitation, + drill_index as u32, + ); + } + for key in &seen_ranked_bigrams { + self.ranked_bigram_stats + .update_error_anomaly_streak(key, &self.ranked_key_stats); + self.ranked_bigram_stats + .update_speed_anomaly_streak(key, &self.ranked_key_stats); } for ev in &trigram_events { - self.ranked_trigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index as u32); + self.ranked_trigram_stats.update( + ev.key.clone(), + ev.total_time_ms, + ev.correct, + ev.has_hesitation, + drill_index as u32, + ); } } @@ -1282,6 +1381,7 @@ impl App { } pub fn go_to_menu(&mut self) { + self.clear_post_drill_input_lock(); self.screen = AppScreen::Menu; self.drill = None; self.drill_source_info = None; @@ -1289,6 +1389,7 @@ impl App { } pub fn go_to_stats(&mut self) { + self.clear_post_drill_input_lock(); self.stats_tab = 0; self.history_selected = 0; self.history_confirm_delete = false; @@ -1562,10 +1663,8 @@ impl App { } pub fn start_code_downloads(&mut self) { - let queue = build_code_download_queue( - &self.config.code_language, - &self.code_intro_download_dir, - ); + let queue = + build_code_download_queue(&self.config.code_language, &self.code_intro_download_dir); self.code_intro_download_total = queue.len(); self.code_download_queue = queue; @@ -1662,10 +1761,8 @@ impl App { let snippets_limit = self.code_intro_snippets_per_repo; // Get static references for thread - let repo_ref: &'static crate::generator::code_syntax::CodeRepo = - &lang.repos[repo_idx]; - let block_style_ref: &'static crate::generator::code_syntax::BlockStyle = - &lang.block_style; + let repo_ref: &'static crate::generator::code_syntax::CodeRepo = &lang.repos[repo_idx]; + let block_style_ref: &'static crate::generator::code_syntax::BlockStyle = &lang.block_style; let handle = thread::spawn(move || { let ok = download_code_repo_to_cache_with_progress( @@ -1931,12 +2028,11 @@ impl App { // Editable text field handled directly in key handler. } 6 => { - self.config.code_snippets_per_repo = - match self.config.code_snippets_per_repo { - 0 => 1, - n if n >= 200 => 0, - n => n + 10, - }; + self.config.code_snippets_per_repo = match self.config.code_snippets_per_repo { + 0 => 1, + n if n >= 200 => 0, + n => n + 10, + }; } // 7 = Download Code Now (action button) 8 => { @@ -1998,12 +2094,11 @@ impl App { // Editable text field handled directly in key handler. } 6 => { - self.config.code_snippets_per_repo = - match self.config.code_snippets_per_repo { - 0 => 200, - 1 => 0, - n => n.saturating_sub(10).max(1), - }; + self.config.code_snippets_per_repo = match self.config.code_snippets_per_repo { + 0 => 200, + 1 => 0, + n => n.saturating_sub(10).max(1), + }; } // 7 = Download Code Now (action button) 8 => { diff --git a/src/config.rs b/src/config.rs index 991994b..e2963d6 100644 --- a/src/config.rs +++ b/src/config.rs @@ -202,10 +202,19 @@ code_language = "go" let config = Config::default(); let serialized = toml::to_string_pretty(&config).unwrap(); let deserialized: Config = toml::from_str(&serialized).unwrap(); - assert_eq!(config.code_downloads_enabled, deserialized.code_downloads_enabled); + assert_eq!( + config.code_downloads_enabled, + deserialized.code_downloads_enabled + ); assert_eq!(config.code_download_dir, deserialized.code_download_dir); - assert_eq!(config.code_snippets_per_repo, deserialized.code_snippets_per_repo); - assert_eq!(config.code_onboarding_done, deserialized.code_onboarding_done); + assert_eq!( + config.code_snippets_per_repo, + deserialized.code_snippets_per_repo + ); + assert_eq!( + config.code_onboarding_done, + deserialized.code_onboarding_done + ); } #[test] diff --git a/src/engine/key_stats.rs b/src/engine/key_stats.rs index 7b94ec5..6683084 100644 --- a/src/engine/key_stats.rs +++ b/src/engine/key_stats.rs @@ -15,6 +15,12 @@ pub struct KeyStat { pub error_count: usize, #[serde(default)] pub total_count: usize, + #[serde(default = "default_error_rate_ema")] + pub error_rate_ema: f64, +} + +fn default_error_rate_ema() -> f64 { + 0.5 } impl Default for KeyStat { @@ -27,6 +33,7 @@ impl Default for KeyStat { recent_times: Vec::new(), error_count: 0, total_count: 0, + error_rate_ema: 0.5, } } } @@ -67,6 +74,13 @@ impl KeyStatsStore { if stat.recent_times.len() > 30 { stat.recent_times.remove(0); } + + // Update error rate EMA (correct stroke = 0.0 signal) + if stat.total_count == 1 { + stat.error_rate_ema = 0.0; + } else { + stat.error_rate_ema = EMA_ALPHA * 0.0 + (1.0 - EMA_ALPHA) * stat.error_rate_ema; + } } pub fn get_confidence(&self, key: char) -> f64 { @@ -84,13 +98,20 @@ impl KeyStatsStore { let stat = self.stats.entry(key).or_default(); stat.error_count += 1; stat.total_count += 1; + + // Update error rate EMA (error stroke = 1.0 signal) + if stat.total_count == 1 { + stat.error_rate_ema = 1.0; + } else { + stat.error_rate_ema = EMA_ALPHA * 1.0 + (1.0 - EMA_ALPHA) * stat.error_rate_ema; + } } - /// Laplace-smoothed error rate: (errors + 1) / (total + 2). + /// EMA-based error rate for a key. pub fn smoothed_error_rate(&self, key: char) -> f64 { match self.stats.get(&key) { - Some(s) => (s.error_count as f64 + 1.0) / (s.total_count as f64 + 2.0), - None => 0.5, // (0 + 1) / (0 + 2) = 0.5 + Some(s) => s.error_rate_ema, + None => 0.5, } } } @@ -142,4 +163,50 @@ mod tests { "confidence should be < 1.0 for slow typing, got {conf}" ); } + + #[test] + fn test_ema_error_rate_correct_strokes() { + let mut store = KeyStatsStore::default(); + // All correct strokes → EMA should be 0.0 for first, stay near 0 + store.update_key('a', 200.0); + assert!((store.smoothed_error_rate('a') - 0.0).abs() < f64::EPSILON); + for _ in 0..10 { + store.update_key('a', 200.0); + } + assert!( + store.smoothed_error_rate('a') < 0.01, + "All correct → EMA near 0" + ); + } + + #[test] + fn test_ema_error_rate_error_strokes() { + let mut store = KeyStatsStore::default(); + // First stroke is error + store.update_key_error('b'); + assert!((store.smoothed_error_rate('b') - 1.0).abs() < f64::EPSILON); + // Follow with correct strokes → EMA decays + for _ in 0..20 { + store.update_key('b', 200.0); + } + let rate = store.smoothed_error_rate('b'); + assert!( + rate < 0.15, + "After 20 correct, EMA should be < 0.15, got {rate}" + ); + } + + #[test] + fn test_ema_error_rate_default_for_missing_key() { + let store = KeyStatsStore::default(); + assert!((store.smoothed_error_rate('z') - 0.5).abs() < f64::EPSILON); + } + + #[test] + fn test_ema_error_rate_serde_default() { + // Verify backward compat: deserializing old data without error_rate_ema gets 0.5 + let json = r#"{"filtered_time_ms":200.0,"best_time_ms":200.0,"confidence":1.0,"sample_count":10,"recent_times":[],"error_count":2,"total_count":10}"#; + let stat: KeyStat = serde_json::from_str(json).unwrap(); + assert!((stat.error_rate_ema - 0.5).abs() < f64::EPSILON); + } } diff --git a/src/engine/mod.rs b/src/engine/mod.rs index 5fcb3ce..b821f5e 100644 --- a/src/engine/mod.rs +++ b/src/engine/mod.rs @@ -5,4 +5,4 @@ pub mod ngram_stats; pub mod scoring; pub mod skill_tree; -pub use ngram_stats::FocusTarget; +pub use ngram_stats::FocusSelection; diff --git a/src/engine/ngram_stats.rs b/src/engine/ngram_stats.rs index 6cfaf44..578366f 100644 --- a/src/engine/ngram_stats.rs +++ b/src/engine/ngram_stats.rs @@ -8,11 +8,13 @@ use crate::keyboard::display::BACKSPACE; use crate::session::result::KeyTime; const EMA_ALPHA: f64 = 0.1; -const DEFAULT_TARGET_CPM: f64 = 175.0; const MAX_RECENT: usize = 30; -const STABILITY_THRESHOLD: f64 = 1.5; -const STABILITY_STREAK_REQUIRED: u8 = 3; -const MIN_SAMPLES_FOR_FOCUS: usize = 20; +const ERROR_ANOMALY_RATIO_THRESHOLD: f64 = 1.5; +pub(crate) const ANOMALY_STREAK_REQUIRED: u8 = 3; +pub(crate) const MIN_SAMPLES_FOR_FOCUS: usize = 20; +const ANOMALY_MIN_SAMPLES: usize = 3; +const SPEED_ANOMALY_PCT_THRESHOLD: f64 = 50.0; +const MIN_CHAR_SAMPLES_FOR_SPEED: usize = 10; const MAX_TRIGRAM_ENTRIES: usize = 5000; // --------------------------------------------------------------------------- @@ -33,35 +35,47 @@ pub struct TrigramKey(pub [char; 3]); pub struct NgramStat { pub filtered_time_ms: f64, pub best_time_ms: f64, - pub confidence: f64, pub sample_count: usize, pub error_count: usize, pub hesitation_count: usize, pub recent_times: Vec, - pub recent_correct: Vec, - pub redundancy_streak: u8, + #[serde(default = "default_error_rate_ema")] + pub error_rate_ema: f64, + pub error_anomaly_streak: u8, + #[serde(default)] + pub speed_anomaly_streak: u8, #[serde(default)] pub last_seen_drill_index: u32, } +fn default_error_rate_ema() -> f64 { + 0.5 +} + impl Default for NgramStat { fn default() -> Self { Self { filtered_time_ms: 1000.0, best_time_ms: f64::MAX, - confidence: 0.0, sample_count: 0, error_count: 0, hesitation_count: 0, recent_times: Vec::new(), - recent_correct: Vec::new(), - redundancy_streak: 0, + error_rate_ema: 0.5, + error_anomaly_streak: 0, + speed_anomaly_streak: 0, last_seen_drill_index: 0, } } } -fn update_stat(stat: &mut NgramStat, time_ms: f64, correct: bool, hesitation: bool, target_time_ms: f64, drill_index: u32) { +fn update_stat( + stat: &mut NgramStat, + time_ms: f64, + correct: bool, + hesitation: bool, + drill_index: u32, +) { stat.last_seen_drill_index = drill_index; stat.sample_count += 1; if !correct { @@ -78,20 +92,19 @@ fn update_stat(stat: &mut NgramStat, time_ms: f64, correct: bool, hesitation: bo } stat.best_time_ms = stat.best_time_ms.min(stat.filtered_time_ms); - stat.confidence = target_time_ms / stat.filtered_time_ms; stat.recent_times.push(time_ms); if stat.recent_times.len() > MAX_RECENT { stat.recent_times.remove(0); } - stat.recent_correct.push(correct); - if stat.recent_correct.len() > MAX_RECENT { - stat.recent_correct.remove(0); - } -} -fn smoothed_error_rate_raw(errors: usize, samples: usize) -> f64 { - (errors as f64 + 1.0) / (samples as f64 + 2.0) + // Update error rate EMA + let error_signal = if correct { 0.0 } else { 1.0 }; + if stat.sample_count == 1 { + stat.error_rate_ema = error_signal; + } else { + stat.error_rate_ema = EMA_ALPHA * error_signal + (1.0 - EMA_ALPHA) * stat.error_rate_ema; + } } // --------------------------------------------------------------------------- @@ -101,34 +114,31 @@ fn smoothed_error_rate_raw(errors: usize, samples: usize) -> f64 { #[derive(Clone, Debug, Default, Serialize, Deserialize)] pub struct BigramStatsStore { pub stats: HashMap, - #[serde(default = "default_target_cpm")] - pub target_cpm: f64, -} - -fn default_target_cpm() -> f64 { - DEFAULT_TARGET_CPM } impl BigramStatsStore { - pub fn update(&mut self, key: BigramKey, time_ms: f64, correct: bool, hesitation: bool, drill_index: u32) { - let target_time_ms = 2.0 * 60000.0 / self.target_cpm; + pub fn update( + &mut self, + key: BigramKey, + time_ms: f64, + correct: bool, + hesitation: bool, + drill_index: u32, + ) { let stat = self.stats.entry(key).or_default(); - update_stat(stat, time_ms, correct, hesitation, target_time_ms, drill_index); - } - - #[allow(dead_code)] - pub fn get_confidence(&self, key: &BigramKey) -> f64 { - self.stats.get(key).map(|s| s.confidence).unwrap_or(0.0) + update_stat(stat, time_ms, correct, hesitation, drill_index); } pub fn smoothed_error_rate(&self, key: &BigramKey) -> f64 { match self.stats.get(key) { - Some(s) => smoothed_error_rate_raw(s.error_count, s.sample_count), - None => smoothed_error_rate_raw(0, 0), + Some(s) => s.error_rate_ema, + None => 0.5, } } - pub fn redundancy_score(&self, key: &BigramKey, char_stats: &KeyStatsStore) -> f64 { + /// Error anomaly ratio: bigram error rate / expected error rate from char independence. + /// Values > 1.0 indicate genuine bigram difficulty beyond individual char weakness. + pub fn error_anomaly_ratio(&self, key: &BigramKey, char_stats: &KeyStatsStore) -> f64 { let e_a = char_stats.smoothed_error_rate(key.0[0]); let e_b = char_stats.smoothed_error_rate(key.0[1]); let e_ab = self.smoothed_error_rate(key); @@ -136,62 +146,204 @@ impl BigramStatsStore { e_ab / expected_ab.max(0.01) } - /// Update redundancy streak for a bigram given current char stats. + /// Error anomaly as percentage: (ratio - 1.0) * 100. + /// Returns None if bigram has no stats. + #[allow(dead_code)] + pub fn error_anomaly_pct(&self, key: &BigramKey, char_stats: &KeyStatsStore) -> Option { + let _stat = self.stats.get(key)?; + let ratio = self.error_anomaly_ratio(key, char_stats); + Some((ratio - 1.0) * 100.0) + } + + /// Speed anomaly: % slower than user types char_b in isolation. + /// Compares bigram filtered_time_ms to char_b's filtered_time_ms. + /// Returns None if bigram has no stats or char_b has < MIN_CHAR_SAMPLES_FOR_SPEED samples. + pub fn speed_anomaly_pct(&self, key: &BigramKey, char_stats: &KeyStatsStore) -> Option { + let stat = self.stats.get(key)?; + let char_b_stat = char_stats.stats.get(&key.0[1])?; + if char_b_stat.sample_count < MIN_CHAR_SAMPLES_FOR_SPEED { + return None; + } + let ratio = stat.filtered_time_ms / char_b_stat.filtered_time_ms; + Some((ratio - 1.0) * 100.0) + } + + /// Update error anomaly streak for a bigram given current char stats. /// Call this after updating the bigram stats. - pub fn update_redundancy_streak(&mut self, key: &BigramKey, char_stats: &KeyStatsStore) { - let redundancy = self.redundancy_score(key, char_stats); + pub fn update_error_anomaly_streak(&mut self, key: &BigramKey, char_stats: &KeyStatsStore) { + let ratio = self.error_anomaly_ratio(key, char_stats); if let Some(stat) = self.stats.get_mut(key) { - if redundancy > STABILITY_THRESHOLD { - stat.redundancy_streak = stat.redundancy_streak.saturating_add(1); + if ratio > ERROR_ANOMALY_RATIO_THRESHOLD { + stat.error_anomaly_streak = stat.error_anomaly_streak.saturating_add(1); } else { - stat.redundancy_streak = 0; + stat.error_anomaly_streak = 0; } } } - /// Find the weakest eligible bigram (stability-gated). - /// Only considers bigrams whose chars are all in `unlocked`. - pub fn weakest_bigram( + /// Update speed anomaly streak for a bigram given current char stats. + /// If speed_anomaly_pct() returns None (char baseline unavailable), holds previous streak value. + pub fn update_speed_anomaly_streak(&mut self, key: &BigramKey, char_stats: &KeyStatsStore) { + let stat = match self.stats.get(key) { + Some(s) => s, + None => return, + }; + if stat.sample_count < ANOMALY_MIN_SAMPLES { + return; + } + match self.speed_anomaly_pct(key, char_stats) { + Some(pct) => { + if let Some(stat) = self.stats.get_mut(key) { + if pct > SPEED_ANOMALY_PCT_THRESHOLD { + stat.speed_anomaly_streak = stat.speed_anomaly_streak.saturating_add(1); + } else { + stat.speed_anomaly_streak = 0; + } + } + } + None => { + // Hold previous streak — char baseline unavailable + } + } + } + + /// All bigrams with error anomaly above threshold and sufficient samples. + /// Sorted by anomaly_pct desc. Each entry's `confirmed` flag indicates + /// streak >= ANOMALY_STREAK_REQUIRED && samples >= MIN_SAMPLES_FOR_FOCUS. + pub fn error_anomaly_bigrams( &self, char_stats: &KeyStatsStore, unlocked: &[char], - ) -> Option<(BigramKey, f64)> { - let mut best: Option<(BigramKey, f64)> = None; + ) -> Vec { + let mut results = Vec::new(); for (key, stat) in &self.stats { - // Must be composed of unlocked chars if !unlocked.contains(&key.0[0]) || !unlocked.contains(&key.0[1]) { continue; } - // Minimum samples - if stat.sample_count < MIN_SAMPLES_FOR_FOCUS { + if stat.sample_count < ANOMALY_MIN_SAMPLES { continue; } - // Stability gate - if stat.redundancy_streak < STABILITY_STREAK_REQUIRED { + let e_a = char_stats.smoothed_error_rate(key.0[0]); + let e_b = char_stats.smoothed_error_rate(key.0[1]); + let expected = 1.0 - (1.0 - e_a) * (1.0 - e_b); + let ratio = self.error_anomaly_ratio(key, char_stats); + if ratio <= ERROR_ANOMALY_RATIO_THRESHOLD { continue; } - let redundancy = self.redundancy_score(key, char_stats); - if redundancy <= STABILITY_THRESHOLD { + let anomaly_pct = (ratio - 1.0) * 100.0; + let confirmed = stat.error_anomaly_streak >= ANOMALY_STREAK_REQUIRED + && stat.sample_count >= MIN_SAMPLES_FOR_FOCUS; + results.push(BigramAnomaly { + key: key.clone(), + anomaly_pct, + sample_count: stat.sample_count, + error_count: stat.error_count, + error_rate_ema: stat.error_rate_ema, + speed_ms: stat.filtered_time_ms, + expected_baseline: expected, + confirmed, + }); + } + + results.sort_by(|a, b| { + b.anomaly_pct + .partial_cmp(&a.anomaly_pct) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| a.key.0.cmp(&b.key.0)) + }); + + results + } + + /// All bigrams with speed anomaly above threshold and sufficient samples. + /// Sorted by anomaly_pct desc. + pub fn speed_anomaly_bigrams( + &self, + char_stats: &KeyStatsStore, + unlocked: &[char], + ) -> Vec { + let mut results = Vec::new(); + + for (key, stat) in &self.stats { + if !unlocked.contains(&key.0[0]) || !unlocked.contains(&key.0[1]) { continue; } - // ngram_difficulty = (1.0 - confidence) * redundancy - let difficulty = (1.0 - stat.confidence) * redundancy; - if difficulty <= 0.0 { + if stat.sample_count < ANOMALY_MIN_SAMPLES { continue; } - match best { - Some((_, best_diff)) if difficulty > best_diff => { - best = Some((key.clone(), difficulty)); - } - None => { - best = Some((key.clone(), difficulty)); + let char_b_speed = char_stats + .stats + .get(&key.0[1]) + .map(|s| s.filtered_time_ms) + .unwrap_or(0.0); + match self.speed_anomaly_pct(key, char_stats) { + Some(pct) if pct > SPEED_ANOMALY_PCT_THRESHOLD => { + let confirmed = stat.speed_anomaly_streak >= ANOMALY_STREAK_REQUIRED + && stat.sample_count >= MIN_SAMPLES_FOR_FOCUS; + results.push(BigramAnomaly { + key: key.clone(), + anomaly_pct: pct, + sample_count: stat.sample_count, + error_count: stat.error_count, + error_rate_ema: stat.error_rate_ema, + speed_ms: stat.filtered_time_ms, + expected_baseline: char_b_speed, + confirmed, + }); } _ => {} } } - best + results.sort_by(|a, b| { + b.anomaly_pct + .partial_cmp(&a.anomaly_pct) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| a.key.0.cmp(&b.key.0)) + }); + + results + } + + /// Find the worst confirmed anomaly across both error and speed anomalies. + /// Each bigram gets at most one candidacy (whichever anomaly type is higher; error on tie). + pub fn worst_confirmed_anomaly( + &self, + char_stats: &KeyStatsStore, + unlocked: &[char], + ) -> Option<(BigramKey, f64, AnomalyType)> { + let mut candidates: HashMap = HashMap::new(); + + // Collect confirmed error anomalies + for a in self.error_anomaly_bigrams(char_stats, unlocked) { + if a.confirmed { + candidates.insert(a.key, (a.anomaly_pct, AnomalyType::Error)); + } + } + + // Collect confirmed speed anomalies, dedup per bigram preferring higher pct (error on tie) + for a in self.speed_anomaly_bigrams(char_stats, unlocked) { + if a.confirmed { + match candidates.get(&a.key) { + Some((existing_pct, _)) if *existing_pct >= a.anomaly_pct => { + // Keep existing (error wins on tie since >= keeps it) + } + _ => { + candidates.insert(a.key, (a.anomaly_pct, AnomalyType::Speed)); + } + } + } + } + + candidates + .into_iter() + .max_by(|a, b| { + a.1.0 + .partial_cmp(&b.1.0) + .unwrap_or(std::cmp::Ordering::Equal) + }) + .map(|(key, (pct, typ))| (key, pct, typ)) } } @@ -202,26 +354,25 @@ impl BigramStatsStore { #[derive(Clone, Debug, Default, Serialize, Deserialize)] pub struct TrigramStatsStore { pub stats: HashMap, - #[serde(default = "default_target_cpm")] - pub target_cpm: f64, } impl TrigramStatsStore { - pub fn update(&mut self, key: TrigramKey, time_ms: f64, correct: bool, hesitation: bool, drill_index: u32) { - let target_time_ms = 3.0 * 60000.0 / self.target_cpm; + pub fn update( + &mut self, + key: TrigramKey, + time_ms: f64, + correct: bool, + hesitation: bool, + drill_index: u32, + ) { let stat = self.stats.entry(key).or_default(); - update_stat(stat, time_ms, correct, hesitation, target_time_ms, drill_index); - } - - #[allow(dead_code)] - pub fn get_confidence(&self, key: &TrigramKey) -> f64 { - self.stats.get(key).map(|s| s.confidence).unwrap_or(0.0) + update_stat(stat, time_ms, correct, hesitation, drill_index); } pub fn smoothed_error_rate(&self, key: &TrigramKey) -> f64 { match self.stats.get(key) { - Some(s) => smoothed_error_rate_raw(s.error_count, s.sample_count), - None => smoothed_error_rate_raw(0, 0), + Some(s) => s.error_rate_ema, + None => 0.5, } } @@ -248,7 +399,13 @@ impl TrigramStatsStore { /// Prune to `max_entries` by composite utility score. /// `total_drills` is the current total drill count for recency calculation. - pub fn prune(&mut self, max_entries: usize, total_drills: u32, bigram_stats: &BigramStatsStore, char_stats: &KeyStatsStore) { + pub fn prune( + &mut self, + max_entries: usize, + total_drills: u32, + bigram_stats: &BigramStatsStore, + char_stats: &KeyStatsStore, + ) { if self.stats.len() <= max_entries { return; } @@ -263,10 +420,13 @@ impl TrigramStatsStore { .map(|(key, stat)| { let drills_since = total_drills.saturating_sub(stat.last_seen_drill_index) as f64; let recency = 1.0 / (drills_since + 1.0); - let redundancy = self.redundancy_score(key, bigram_stats, char_stats).min(3.0); + let redundancy = self + .redundancy_score(key, bigram_stats, char_stats) + .min(3.0); let data = (stat.sample_count as f64).ln_1p(); - let utility = recency_weight * recency + signal_weight * redundancy + data_weight * data; + let utility = + recency_weight * recency + signal_weight * redundancy + data_weight * data; (key.clone(), utility) }) .collect(); @@ -276,9 +436,7 @@ impl TrigramStatsStore { let keep: HashMap = scored .into_iter() - .filter_map(|(key, _)| { - self.stats.remove(&key).map(|stat| (key, stat)) - }) + .filter_map(|(key, _)| self.stats.remove(&key).map(|stat| (key, stat))) .collect(); self.stats = keep; @@ -374,46 +532,51 @@ pub fn extract_ngram_events( } // --------------------------------------------------------------------------- -// FocusTarget & selection +// Anomaly types // --------------------------------------------------------------------------- #[derive(Clone, Debug, PartialEq)] -pub enum FocusTarget { - Char(char), - Bigram(BigramKey), +pub enum AnomalyType { + Error, + Speed, } -/// Select the best focus target: either a single character or a bigram. -/// -/// If the weakest eligible bigram's difficulty score exceeds 80% of the -/// weakest character's difficulty, focus on the bigram. Otherwise fall back -/// to the character. -pub fn select_focus_target( +pub struct BigramAnomaly { + pub key: BigramKey, + pub anomaly_pct: f64, + pub sample_count: usize, + pub error_count: usize, + pub error_rate_ema: f64, + pub speed_ms: f64, + pub expected_baseline: f64, + pub confirmed: bool, +} + +// --------------------------------------------------------------------------- +// FocusSelection +// --------------------------------------------------------------------------- + +/// Combined focus selection: carries both char and bigram focus independently. +#[derive(Clone, Debug, PartialEq)] +pub struct FocusSelection { + pub char_focus: Option, + pub bigram_focus: Option<(BigramKey, f64, AnomalyType)>, +} + +/// Select focus targets: weakest char from skill tree + worst confirmed bigram anomaly. +/// Both are independent — neither overrides the other. +pub fn select_focus( skill_tree: &SkillTree, scope: DrillScope, ranked_key_stats: &KeyStatsStore, ranked_bigram_stats: &BigramStatsStore, -) -> FocusTarget { +) -> FocusSelection { let unlocked = skill_tree.unlocked_keys(scope); - let focused_char = skill_tree.focused_key(scope, ranked_key_stats); - - let bigram_result = ranked_bigram_stats.weakest_bigram(ranked_key_stats, &unlocked); - - match (focused_char, bigram_result) { - (Some(ch), Some((bigram_key, bigram_difficulty))) => { - // Compute char difficulty: (1.0 - confidence) — no redundancy multiplier for chars - let char_conf = ranked_key_stats.get_confidence(ch); - let char_difficulty = (1.0 - char_conf).max(0.0); - - if bigram_difficulty > char_difficulty * 0.8 { - FocusTarget::Bigram(bigram_key) - } else { - FocusTarget::Char(ch) - } - } - (Some(ch), None) => FocusTarget::Char(ch), - (None, Some((bigram_key, _))) => FocusTarget::Bigram(bigram_key), - (None, None) => FocusTarget::Char('e'), // fallback + let char_focus = skill_tree.focused_key(scope, ranked_key_stats); + let bigram_focus = ranked_bigram_stats.worst_confirmed_anomaly(ranked_key_stats, &unlocked); + FocusSelection { + char_focus, + bigram_focus, } } @@ -441,7 +604,10 @@ pub fn trigram_marginal_gain( let with_signal = qualified .iter() - .filter(|k| trigram_stats.redundancy_score(k, bigram_stats, char_stats) > STABILITY_THRESHOLD) + .filter(|k| { + trigram_stats.redundancy_score(k, bigram_stats, char_stats) + > ERROR_ANOMALY_RATIO_THRESHOLD + }) .count(); with_signal as f64 / qualified.len() as f64 @@ -569,269 +735,253 @@ mod tests { assert!(!trigrams[0].correct); // abc: b incorrect -> false } - // --- Laplace smoothing tests --- + // --- EMA error rate tests --- #[test] - fn laplace_smoothing_zero_samples() { - assert!((smoothed_error_rate_raw(0, 0) - 0.5).abs() < f64::EPSILON); + fn ema_default_is_neutral() { + let store = BigramStatsStore::default(); + let key = BigramKey(['a', 'b']); + assert!((store.smoothed_error_rate(&key) - 0.5).abs() < f64::EPSILON); } #[test] - fn laplace_smoothing_convergence() { - // With 100 samples and 10 errors, should be close to 0.1 - let rate = smoothed_error_rate_raw(10, 100); - assert!((rate - 11.0 / 102.0).abs() < f64::EPSILON); - assert!(rate > 0.1 && rate < 0.12); + fn ema_first_sample_sets_directly() { + let mut store = BigramStatsStore::default(); + let key = BigramKey(['a', 'b']); + store.update(key.clone(), 200.0, true, false, 0); + assert!((store.smoothed_error_rate(&key) - 0.0).abs() < f64::EPSILON); + + let mut store2 = BigramStatsStore::default(); + store2.update(key.clone(), 200.0, false, false, 0); + assert!((store2.smoothed_error_rate(&key) - 1.0).abs() < f64::EPSILON); } #[test] - fn laplace_smoothing_all_errors() { - let rate = smoothed_error_rate_raw(50, 50); - assert!((rate - 51.0 / 52.0).abs() < f64::EPSILON); + fn ema_converges_toward_zero_with_correct() { + let mut store = BigramStatsStore::default(); + let key = BigramKey(['a', 'b']); + // Start with an error + store.update(key.clone(), 200.0, false, false, 0); + assert!((store.smoothed_error_rate(&key) - 1.0).abs() < f64::EPSILON); + // 20 correct strokes should bring it down significantly + for i in 1..=20 { + store.update(key.clone(), 200.0, true, false, i); + } + let rate = store.smoothed_error_rate(&key); + assert!( + rate < 0.15, + "After 20 correct, EMA should be < 0.15, got {rate}" + ); + } + + #[test] + fn test_error_rate_ema_decay() { + // Verify that after N correct strokes, error_rate_ema drops as expected + let mut store = BigramStatsStore::default(); + let key = BigramKey(['t', 'h']); + // Simulate 30% error rate: 3 errors in 10 strokes + for i in 0..10 { + let correct = i % 3 != 0; // errors at 0, 3, 6, 9 + store.update(key.clone(), 200.0, correct, false, i); + } + let rate_before = store.smoothed_error_rate(&key); + // Now 15 correct strokes + for i in 10..25 { + store.update(key.clone(), 200.0, true, false, i); + } + let rate_after = store.smoothed_error_rate(&key); + assert!( + rate_after < rate_before, + "EMA should decay: before={rate_before} after={rate_after}" + ); + assert!( + rate_after < 0.15, + "After 15 correct strokes, rate should be < 0.15, got {rate_after}" + ); } // --- Redundancy tests --- #[test] fn redundancy_proxy_example() { - // Example 1 from plan: "is" where 's' is weak + // "is" where 's' is weak — bigram error rate is explained by char weakness let mut char_stats = KeyStatsStore::default(); - // Simulate: s has high error rate - // We need to set up error_count and total_count - // s: e_s = 0.25 -> (errors+1)/(samples+2) = 0.25 - // Solve: (e+1)/(s+2) = 0.25 -> at s=50, e=12: (13)/(52) = 0.25 let s_stat = char_stats.stats.entry('s').or_default(); - s_stat.error_count = 12; - s_stat.total_count = 50; - // i: e_i = 0.03 -> (e+1)/(s+2) = 0.03 -> at s=100, e=~2: (3)/(102) = 0.0294 + s_stat.error_rate_ema = 0.25; let i_stat = char_stats.stats.entry('i').or_default(); - i_stat.error_count = 2; - i_stat.total_count = 100; + i_stat.error_rate_ema = 0.03; let mut bigram_stats = BigramStatsStore::default(); let is_key = BigramKey(['i', 's']); - // e_is = 0.28 -> (e+1)/(s+2) = 0.28 -> at s=50, e=~13: (14)/(52) = 0.269 - // Let's pick s=100, e=~27: (28)/(102) = 0.2745 - // Actually, let's just use values that give close to what we want let is_stat = bigram_stats.stats.entry(is_key.clone()).or_default(); - is_stat.error_count = 27; + is_stat.error_rate_ema = 0.27; is_stat.sample_count = 100; let e_s = char_stats.smoothed_error_rate('s'); let e_i = char_stats.smoothed_error_rate('i'); let e_is = bigram_stats.smoothed_error_rate(&is_key); let expected = 1.0 - (1.0 - e_s) * (1.0 - e_i); - let redundancy = bigram_stats.redundancy_score(&is_key, &char_stats); + let redundancy = bigram_stats.error_anomaly_ratio(&is_key, &char_stats); - // The redundancy should be close to 1.0 (proxy, not genuine) assert!( - redundancy < STABILITY_THRESHOLD, - "Proxy bigram 'is' should have redundancy < {STABILITY_THRESHOLD}, got {redundancy} (e_s={e_s}, e_i={e_i}, e_is={e_is}, expected={expected})" + redundancy < ERROR_ANOMALY_RATIO_THRESHOLD, + "Proxy bigram 'is' should have redundancy < {ERROR_ANOMALY_RATIO_THRESHOLD}, got {redundancy} (e_s={e_s}, e_i={e_i}, e_is={e_is}, expected={expected})" ); } #[test] fn redundancy_genuine_difficulty() { - // Example 2 from plan: "ed" where both chars are fine individually + // "ed" where both chars are fine individually but bigram has high error rate let mut char_stats = KeyStatsStore::default(); - // e: e_e = 0.04 -> (e+1)/(s+2) ~ 0.04 at s=100, errors=~3: (4)/(102) = 0.039 let e_stat = char_stats.stats.entry('e').or_default(); - e_stat.error_count = 3; - e_stat.total_count = 100; - // d: e_d = 0.05 -> (e+1)/(s+2) ~ 0.05 at s=100, errors=~4: (5)/(102) = 0.049 + e_stat.error_rate_ema = 0.04; let d_stat = char_stats.stats.entry('d').or_default(); - d_stat.error_count = 4; - d_stat.total_count = 100; + d_stat.error_rate_ema = 0.05; let mut bigram_stats = BigramStatsStore::default(); let ed_key = BigramKey(['e', 'd']); - // e_ed = 0.22 -> at s=100, errors=~21: (22)/(102) = 0.2157 let ed_stat = bigram_stats.stats.entry(ed_key.clone()).or_default(); - ed_stat.error_count = 21; + ed_stat.error_rate_ema = 0.22; ed_stat.sample_count = 100; - let redundancy = bigram_stats.redundancy_score(&ed_key, &char_stats); + let redundancy = bigram_stats.error_anomaly_ratio(&ed_key, &char_stats); assert!( - redundancy > STABILITY_THRESHOLD, - "Genuine difficulty 'ed' should have redundancy > {STABILITY_THRESHOLD}, got {redundancy}" + redundancy > ERROR_ANOMALY_RATIO_THRESHOLD, + "Genuine difficulty 'ed' should have redundancy > {ERROR_ANOMALY_RATIO_THRESHOLD}, got {redundancy}" ); } #[test] fn redundancy_trigram_explained_by_bigram() { - // Example 3: "the" where "th" bigram explains the difficulty + // "the" where "th" bigram explains the difficulty let mut char_stats = KeyStatsStore::default(); - for &(ch, errors, total) in &[('t', 2, 100), ('h', 3, 100), ('e', 3, 100)] { + for &(ch, ema) in &[('t', 0.03), ('h', 0.04), ('e', 0.04)] { let s = char_stats.stats.entry(ch).or_default(); - s.error_count = errors; - s.total_count = total; + s.error_rate_ema = ema; } let mut bigram_stats = BigramStatsStore::default(); - // th has high error rate: e_th = 0.15 -> at s=100, e=~14: (15)/(102) = 0.147 let th_stat = bigram_stats.stats.entry(BigramKey(['t', 'h'])).or_default(); - th_stat.error_count = 14; + th_stat.error_rate_ema = 0.15; th_stat.sample_count = 100; - // he has low error rate let he_stat = bigram_stats.stats.entry(BigramKey(['h', 'e'])).or_default(); - he_stat.error_count = 3; + he_stat.error_rate_ema = 0.04; he_stat.sample_count = 100; let mut trigram_stats = TrigramStatsStore::default(); let the_key = TrigramKey(['t', 'h', 'e']); - // e_the = 0.16 -> at s=100, e=~15: (16)/(102) = 0.157 let the_stat = trigram_stats.stats.entry(the_key.clone()).or_default(); - the_stat.error_count = 15; + the_stat.error_rate_ema = 0.16; the_stat.sample_count = 100; let redundancy = trigram_stats.redundancy_score(&the_key, &bigram_stats, &char_stats); assert!( - redundancy < STABILITY_THRESHOLD, - "Trigram 'the' explained by 'th' bigram should have redundancy < {STABILITY_THRESHOLD}, got {redundancy}" + redundancy < ERROR_ANOMALY_RATIO_THRESHOLD, + "Trigram 'the' explained by 'th' bigram should have redundancy < {ERROR_ANOMALY_RATIO_THRESHOLD}, got {redundancy}" ); } // --- Stability gate tests --- #[test] - fn stability_streak_increments_and_resets() { + fn error_anomaly_streak_increments_and_resets() { let mut bigram_stats = BigramStatsStore::default(); let key = BigramKey(['e', 'd']); - // Set up a bigram with genuine difficulty + // Set up a bigram with genuine difficulty via EMA let stat = bigram_stats.stats.entry(key.clone()).or_default(); - stat.error_count = 25; + stat.error_rate_ema = 0.25; stat.sample_count = 100; let mut char_stats = KeyStatsStore::default(); // Low char error rates - char_stats.stats.entry('e').or_default().error_count = 2; - char_stats.stats.entry('e').or_default().total_count = 100; - char_stats.stats.entry('d').or_default().error_count = 2; - char_stats.stats.entry('d').or_default().total_count = 100; + char_stats.stats.entry('e').or_default().error_rate_ema = 0.03; + char_stats.stats.entry('d').or_default().error_rate_ema = 0.03; // Should increment streak - bigram_stats.update_redundancy_streak(&key, &char_stats); - assert_eq!(bigram_stats.stats[&key].redundancy_streak, 1); - bigram_stats.update_redundancy_streak(&key, &char_stats); - assert_eq!(bigram_stats.stats[&key].redundancy_streak, 2); - bigram_stats.update_redundancy_streak(&key, &char_stats); - assert_eq!(bigram_stats.stats[&key].redundancy_streak, 3); + bigram_stats.update_error_anomaly_streak(&key, &char_stats); + assert_eq!(bigram_stats.stats[&key].error_anomaly_streak, 1); + bigram_stats.update_error_anomaly_streak(&key, &char_stats); + assert_eq!(bigram_stats.stats[&key].error_anomaly_streak, 2); + bigram_stats.update_error_anomaly_streak(&key, &char_stats); + assert_eq!(bigram_stats.stats[&key].error_anomaly_streak, 3); - // Now simulate char stats getting worse (making redundancy low) - char_stats.stats.entry('e').or_default().error_count = 30; - bigram_stats.update_redundancy_streak(&key, &char_stats); - assert_eq!(bigram_stats.stats[&key].redundancy_streak, 0); // reset + // Now simulate char stats getting worse (making anomaly ratio low) + char_stats.stats.entry('e').or_default().error_rate_ema = 0.30; + bigram_stats.update_error_anomaly_streak(&key, &char_stats); + assert_eq!(bigram_stats.stats[&key].error_anomaly_streak, 0); // reset } #[test] - fn focus_eligibility_requires_all_conditions() { + fn worst_confirmed_anomaly_requires_all_conditions() { let mut bigram_stats = BigramStatsStore::default(); let mut char_stats = KeyStatsStore::default(); let unlocked = vec!['a', 'b', 'c', 'd', 'e']; - // Set up char stats with low error rates + // Set up char stats with low EMA error rates for &ch in &['a', 'b'] { let s = char_stats.stats.entry(ch).or_default(); - s.error_count = 2; - s.total_count = 100; + s.error_rate_ema = 0.03; } let key = BigramKey(['a', 'b']); let stat = bigram_stats.stats.entry(key.clone()).or_default(); - stat.error_count = 25; + stat.error_rate_ema = 0.80; stat.sample_count = 25; // enough samples - stat.confidence = 0.5; - stat.redundancy_streak = STABILITY_STREAK_REQUIRED; // stable + stat.error_anomaly_streak = ANOMALY_STREAK_REQUIRED; // stable - // Should be eligible - let result = bigram_stats.weakest_bigram(&char_stats, &unlocked); - assert!(result.is_some(), "Should be eligible with all conditions met"); + // Should be confirmed + let result = bigram_stats.worst_confirmed_anomaly(&char_stats, &unlocked); + assert!( + result.is_some(), + "Should be confirmed with all conditions met" + ); - // Reset streak -> not eligible - bigram_stats.stats.get_mut(&key).unwrap().redundancy_streak = 2; - let result = bigram_stats.weakest_bigram(&char_stats, &unlocked); - assert!(result.is_none(), "Should NOT be eligible without stable streak"); + // Reset streak -> not confirmed + bigram_stats + .stats + .get_mut(&key) + .unwrap() + .error_anomaly_streak = 2; + let result = bigram_stats.worst_confirmed_anomaly(&char_stats, &unlocked); + assert!( + result.is_none(), + "Should NOT be confirmed without stable streak" + ); - // Restore streak, reduce samples -> not eligible - bigram_stats.stats.get_mut(&key).unwrap().redundancy_streak = STABILITY_STREAK_REQUIRED; + // Restore streak, reduce samples -> not confirmed + bigram_stats + .stats + .get_mut(&key) + .unwrap() + .error_anomaly_streak = ANOMALY_STREAK_REQUIRED; bigram_stats.stats.get_mut(&key).unwrap().sample_count = 15; - let result = bigram_stats.weakest_bigram(&char_stats, &unlocked); - assert!(result.is_none(), "Should NOT be eligible with < 20 samples"); + let result = bigram_stats.worst_confirmed_anomaly(&char_stats, &unlocked); + assert!( + result.is_none(), + "Should NOT be confirmed with < 20 samples" + ); } // --- Focus selection tests --- #[test] - fn focus_falls_back_to_char_when_no_bigrams() { + fn focus_no_bigrams_gives_char_only() { let skill_tree = SkillTree::default(); let key_stats = KeyStatsStore::default(); let bigram_stats = BigramStatsStore::default(); - let target = select_focus_target( - &skill_tree, - DrillScope::Global, - &key_stats, - &bigram_stats, - ); + let selection = select_focus(&skill_tree, DrillScope::Global, &key_stats, &bigram_stats); - // With default skill tree, focused_key may return a char or None - // Either way, should not be a Bigram - match target { - FocusTarget::Char(_) => {} // expected - FocusTarget::Bigram(_) => panic!("Should not select bigram with no data"), - } - } - - #[test] - fn focus_selects_bigram_when_difficulty_exceeds_threshold() { - // Set up a skill tree with some unlocked keys and known confidence - let skill_tree = SkillTree::default(); - let mut key_stats = KeyStatsStore::default(); - - // Give all unlocked keys high confidence so focused_key returns - // the one with lowest confidence - for &ch in &['e', 't', 'a', 'o', 'n', 'i'] { - let stat = key_stats.stats.entry(ch).or_default(); - stat.confidence = 0.95; - stat.filtered_time_ms = 360.0; // slow enough to not be mastered - stat.sample_count = 50; - stat.total_count = 50; - stat.error_count = 2; - } - // Make 'n' the weakest char: confidence = 0.5 -> char_difficulty = 0.5 - key_stats.stats.get_mut(&'n').unwrap().confidence = 0.5; - key_stats.stats.get_mut(&'n').unwrap().filtered_time_ms = 686.0; - - // Set up a bigram 'e','t' with high difficulty that exceeds 0.8 * char_difficulty - // char_difficulty = 1.0 - 0.5 = 0.5, threshold = 0.5 * 0.8 = 0.4 - // bigram needs ngram_difficulty > 0.4 - // ngram_difficulty = (1.0 - confidence) * redundancy - // confidence = 0.4, redundancy = 2.0 -> difficulty = 0.6 * 2.0 = 1.2 > 0.4 - let mut bigram_stats = BigramStatsStore::default(); - let et_key = BigramKey(['e', 't']); - let stat = bigram_stats.stats.entry(et_key.clone()).or_default(); - stat.confidence = 0.4; - stat.sample_count = 30; - stat.error_count = 20; - stat.redundancy_streak = STABILITY_STREAK_REQUIRED; - - let target = select_focus_target( - &skill_tree, - DrillScope::Global, - &key_stats, - &bigram_stats, - ); - - assert_eq!( - target, - FocusTarget::Bigram(et_key), - "Bigram should win when its difficulty exceeds char_difficulty * 0.8" + // No bigram data → bigram_focus should be None + assert!( + selection.bigram_focus.is_none(), + "No bigram data should mean no bigram focus" ); } #[test] - fn focus_selects_char_when_bigram_difficulty_below_threshold() { + fn focus_both_char_and_bigram_independent() { let skill_tree = SkillTree::default(); let mut key_stats = KeyStatsStore::default(); @@ -841,41 +991,70 @@ mod tests { stat.filtered_time_ms = 360.0; stat.sample_count = 50; stat.total_count = 50; - stat.error_count = 2; + stat.error_rate_ema = 0.03; } - // Make 'n' very weak: confidence = 0.1 -> char_difficulty = 0.9 - // threshold = 0.9 * 0.8 = 0.72 - key_stats.stats.get_mut(&'n').unwrap().confidence = 0.1; - key_stats.stats.get_mut(&'n').unwrap().filtered_time_ms = 3400.0; + key_stats.stats.get_mut(&'n').unwrap().confidence = 0.5; + key_stats.stats.get_mut(&'n').unwrap().filtered_time_ms = 686.0; - // Bigram 'e','t' with high confidence and low error rate -> low difficulty - // char error rates: e_e ≈ 0.058, e_t ≈ 0.058 - // expected_et = 1 - (1-0.058)*(1-0.058) ≈ 0.113 - // bigram error: (5+1)/(30+2) = 0.1875 -> redundancy ≈ 1.66 - // ngram_difficulty = (1.0 - 0.85) * 1.66 = 0.249 < 0.72 + // Set up a bigram with confirmed error anomaly let mut bigram_stats = BigramStatsStore::default(); let et_key = BigramKey(['e', 't']); let stat = bigram_stats.stats.entry(et_key.clone()).or_default(); - stat.confidence = 0.85; stat.sample_count = 30; - stat.error_count = 5; - stat.redundancy_streak = STABILITY_STREAK_REQUIRED; + stat.error_rate_ema = 0.80; + stat.error_anomaly_streak = ANOMALY_STREAK_REQUIRED; - let target = select_focus_target( - &skill_tree, - DrillScope::Global, - &key_stats, - &bigram_stats, + let selection = select_focus(&skill_tree, DrillScope::Global, &key_stats, &bigram_stats); + + // Both should be populated independently + assert_eq!( + selection.char_focus, + Some('n'), + "Char focus should be weakest char 'n'" ); + assert!( + selection.bigram_focus.is_some(), + "Bigram focus should be present" + ); + let (key, _, _) = selection.bigram_focus.unwrap(); + assert_eq!(key, et_key, "Bigram focus should be 'et'"); + } - match target { - FocusTarget::Char(ch) => { - assert_eq!(ch, 'n', "Should focus on weakest char 'n'"); - } - FocusTarget::Bigram(_) => { - panic!("Should NOT select bigram when its difficulty is below threshold"); - } + #[test] + fn focus_char_only_when_no_confirmed_bigram() { + let skill_tree = SkillTree::default(); + let mut key_stats = KeyStatsStore::default(); + + for &ch in &['e', 't', 'a', 'o', 'n', 'i'] { + let stat = key_stats.stats.entry(ch).or_default(); + stat.confidence = 0.95; + stat.filtered_time_ms = 360.0; + stat.sample_count = 50; + stat.total_count = 50; + stat.error_rate_ema = 0.03; } + key_stats.stats.get_mut(&'n').unwrap().confidence = 0.1; + key_stats.stats.get_mut(&'n').unwrap().filtered_time_ms = 3400.0; + + // Bigram with low error rate → no anomaly + let mut bigram_stats = BigramStatsStore::default(); + let et_key = BigramKey(['e', 't']); + let stat = bigram_stats.stats.entry(et_key.clone()).or_default(); + stat.sample_count = 30; + stat.error_rate_ema = 0.02; + stat.error_anomaly_streak = ANOMALY_STREAK_REQUIRED; + + let selection = select_focus(&skill_tree, DrillScope::Global, &key_stats, &bigram_stats); + + assert_eq!( + selection.char_focus, + Some('n'), + "Should focus on weakest char 'n'" + ); + assert!( + selection.bigram_focus.is_none(), + "No confirmed anomaly → no bigram focus" + ); } #[test] @@ -889,33 +1068,25 @@ mod tests { stat.filtered_time_ms = 360.0; stat.sample_count = 50; stat.total_count = 50; - stat.error_count = 2; + stat.error_rate_ema = 0.03; } key_stats.stats.get_mut(&'n').unwrap().confidence = 0.5; key_stats.stats.get_mut(&'n').unwrap().filtered_time_ms = 686.0; - // Bigram with high difficulty but streak only 2 (needs 3) + // Bigram with high error rate but streak only 2 (needs 3) let mut bigram_stats = BigramStatsStore::default(); let et_key = BigramKey(['e', 't']); let stat = bigram_stats.stats.entry(et_key.clone()).or_default(); - stat.confidence = 0.3; stat.sample_count = 30; - stat.error_count = 25; - stat.redundancy_streak = STABILITY_STREAK_REQUIRED - 1; // not enough + stat.error_rate_ema = 0.80; + stat.error_anomaly_streak = ANOMALY_STREAK_REQUIRED - 1; // not enough - let target = select_focus_target( - &skill_tree, - DrillScope::Global, - &key_stats, - &bigram_stats, + let selection = select_focus(&skill_tree, DrillScope::Global, &key_stats, &bigram_stats); + + assert!( + selection.bigram_focus.is_none(), + "Insufficient streak → no bigram focus" ); - - match target { - FocusTarget::Char(_) => {} // expected: bigram filtered by stability gate - FocusTarget::Bigram(_) => { - panic!("Should NOT select bigram with insufficient redundancy streak"); - } - } } // --- Hesitation tests --- @@ -953,14 +1124,17 @@ mod tests { let trigram_stats = TrigramStatsStore::default(); let bigram_stats = BigramStatsStore::default(); let char_stats = KeyStatsStore::default(); - assert_eq!(trigram_marginal_gain(&trigram_stats, &bigram_stats, &char_stats), 0.0); + assert_eq!( + trigram_marginal_gain(&trigram_stats, &bigram_stats, &char_stats), + 0.0 + ); } // --- Replay invariance --- #[test] fn replay_produces_correct_error_total_counts() { - // Simulate a replay: process keystrokes and verify counts + // Simulate a replay: process keystrokes and verify counts + EMA let mut key_stats = KeyStatsStore::default(); // Simulate: 10 correct 'a', 3 errors 'a', 5 correct 'b', 1 error 'b' @@ -986,32 +1160,46 @@ mod tests { make_keytime('b', 300.0, false), // error ]; - // Process like rebuild_ngram_stats does + // Process like rebuild_ngram_stats does (updating EMA for correct strokes too) for kt in &keystrokes { if kt.correct { let stat = key_stats.stats.entry(kt.key).or_default(); stat.total_count += 1; + if stat.total_count == 1 { + stat.error_rate_ema = 0.0; + } else { + stat.error_rate_ema = 0.1 * 0.0 + 0.9 * stat.error_rate_ema; + } } else { key_stats.update_key_error(kt.key); } } let a_stat = key_stats.stats.get(&'a').unwrap(); - assert_eq!(a_stat.total_count, 13, "a: 10 correct + 3 errors = 13 total"); + assert_eq!( + a_stat.total_count, 13, + "a: 10 correct + 3 errors = 13 total" + ); assert_eq!(a_stat.error_count, 3, "a: 3 errors"); let b_stat = key_stats.stats.get(&'b').unwrap(); assert_eq!(b_stat.total_count, 6, "b: 5 correct + 1 error = 6 total"); assert_eq!(b_stat.error_count, 1, "b: 1 error"); - // Verify smoothed error rate is reasonable + // Verify EMA error rate is reasonable (not exact Laplace, but proportional) let a_rate = key_stats.smoothed_error_rate('a'); - // (3 + 1) / (13 + 2) = 4/15 = 0.2667 - assert!((a_rate - 4.0 / 15.0).abs() < f64::EPSILON); + // 'a' had 3 errors in 13 strokes, last was error → EMA should be moderate + assert!( + a_rate > 0.05 && a_rate < 0.5, + "a rate should be moderate, got {a_rate}" + ); let b_rate = key_stats.smoothed_error_rate('b'); - // (1 + 1) / (6 + 2) = 2/8 = 0.25 - assert!((b_rate - 2.0 / 8.0).abs() < f64::EPSILON); + // 'b' had 1 error (the last stroke) → EMA should reflect recent error + assert!( + b_rate > 0.05 && b_rate < 0.5, + "b rate should reflect recent error, got {b_rate}" + ); } #[test] @@ -1057,8 +1245,14 @@ mod tests { trigram_stats.prune(2, 5, &bigram_stats, &char_stats); // "New" (index 4) should survive over "old" (index 0) due to higher recency - assert!(trigram_stats.stats.contains_key(&new_key), "most recent trigram should survive prune"); - assert!(!trigram_stats.stats.contains_key(&old_key), "oldest trigram should be pruned"); + assert!( + trigram_stats.stats.contains_key(&new_key), + "most recent trigram should survive prune" + ); + assert!( + !trigram_stats.stats.contains_key(&old_key), + "oldest trigram should be pruned" + ); assert_eq!(trigram_stats.stats.len(), 2); // Now verify that using a WRONG total (e.g. 2 completed drills instead of 5) @@ -1111,7 +1305,13 @@ mod tests { for _ in 0..100 { let mut store = BigramStatsStore::default(); for ev in bigram_events.iter().take(400) { - store.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, 0); + store.update( + ev.key.clone(), + ev.total_time_ms, + ev.correct, + ev.has_hesitation, + 0, + ); } } let elapsed = start.elapsed() / 100; @@ -1134,7 +1334,7 @@ mod tests { stat.filtered_time_ms = 430.0; stat.sample_count = 50; stat.total_count = 50; - stat.error_count = 3; + stat.error_rate_ema = 0.05; } let mut count: usize = 0; @@ -1145,10 +1345,9 @@ mod tests { } let key = BigramKey([a, b]); let stat = bigram_stats.stats.entry(key).or_default(); - stat.confidence = 0.5 + (count % 50) as f64 * 0.01; stat.sample_count = 25 + count % 30; - stat.error_count = 5 + count % 10; - stat.redundancy_streak = if count % 3 == 0 { 3 } else { 1 }; + stat.error_rate_ema = 0.1 + (count % 10) as f64 * 0.05; + stat.error_anomaly_streak = if count % 3 == 0 { 3 } else { 1 }; count += 1; } } @@ -1159,7 +1358,7 @@ mod tests { let start = std::time::Instant::now(); for _ in 0..100 { - let _ = bigram_stats.weakest_bigram(&char_stats, &unlocked); + let _ = bigram_stats.worst_confirmed_anomaly(&char_stats, &unlocked); } let elapsed = start.elapsed() / 100; @@ -1171,9 +1370,7 @@ mod tests { #[test] fn perf_budget_history_replay_under_500ms() { - let drills: Vec> = (0..500) - .map(|_| make_bench_keystrokes(300)) - .collect(); + let drills: Vec> = (0..500).map(|_| make_bench_keystrokes(300)).collect(); let budget = std::time::Duration::from_millis(500 * DEBUG_MULTIPLIER as u64); @@ -1196,13 +1393,19 @@ mod tests { for ev in &bigram_events { bigram_stats.update( - ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, + ev.key.clone(), + ev.total_time_ms, + ev.correct, + ev.has_hesitation, drill_idx as u32, ); } for ev in &trigram_events { trigram_stats.update( - ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, + ev.key.clone(), + ev.total_time_ms, + ev.correct, + ev.has_hesitation, drill_idx as u32, ); } @@ -1218,4 +1421,493 @@ mod tests { "history replay took {elapsed:?}, budget is {budget:?}" ); } + + // --- error_anomaly_bigrams tests --- + + fn make_bigram_store_with_char_stats() -> (BigramStatsStore, KeyStatsStore) { + let mut char_stats = KeyStatsStore::default(); + for ch in 'a'..='z' { + let s = char_stats.stats.entry(ch).or_default(); + s.error_rate_ema = 0.03; + } + let bigram_stats = BigramStatsStore::default(); + (bigram_stats, char_stats) + } + + #[test] + fn test_error_anomaly_bigrams() { + let (mut bigram_stats, char_stats) = make_bigram_store_with_char_stats(); + let unlocked: Vec = ('a'..='z').collect(); + + // Confirmed: sample=25, streak=3, high EMA → anomaly ratio > 1.5 + let k1 = BigramKey(['t', 'h']); + let s1 = bigram_stats.stats.entry(k1.clone()).or_default(); + s1.sample_count = 25; + s1.error_rate_ema = 0.70; + s1.error_anomaly_streak = 3; + + // Included but not confirmed: samples < 20 + let k2 = BigramKey(['e', 'd']); + let s2 = bigram_stats.stats.entry(k2.clone()).or_default(); + s2.sample_count = 15; + s2.error_rate_ema = 0.60; + s2.error_anomaly_streak = 3; + + // Excluded: samples < ANOMALY_MIN_SAMPLES (3) + let k3 = BigramKey(['a', 'b']); + let s3 = bigram_stats.stats.entry(k3.clone()).or_default(); + s3.sample_count = 2; + s3.error_rate_ema = 0.80; + s3.error_anomaly_streak = 3; + + // Excluded: error anomaly ratio <= 1.5 (low EMA) + let k4 = BigramKey(['i', 's']); + let s4 = bigram_stats.stats.entry(k4.clone()).or_default(); + s4.sample_count = 25; + s4.error_rate_ema = 0.02; + s4.error_anomaly_streak = 3; + + let anomalies = bigram_stats.error_anomaly_bigrams(&char_stats, &unlocked); + let keys: Vec = anomalies.iter().map(|a| a.key.clone()).collect(); + + assert!(keys.contains(&k1), "k1 should be in error anomalies"); + assert!( + keys.contains(&k2), + "k2 should be in error anomalies (above min samples)" + ); + assert!( + !keys.contains(&k3), + "k3 should be excluded (too few samples)" + ); + assert!( + !keys.contains(&k4), + "k4 should be excluded (low anomaly ratio)" + ); + + // k1 should be confirmed (samples >= 20 && streak >= 3) + let k1_entry = anomalies.iter().find(|a| a.key == k1).unwrap(); + assert!(k1_entry.confirmed, "k1 should be confirmed"); + + // k2 should NOT be confirmed (samples < 20) + let k2_entry = anomalies.iter().find(|a| a.key == k2).unwrap(); + assert!( + !k2_entry.confirmed, + "k2 should NOT be confirmed (low samples)" + ); + } + + #[test] + fn test_speed_anomaly_pct() { + let mut bigram_stats = BigramStatsStore::default(); + let mut char_stats = KeyStatsStore::default(); + + // Set up char 'b' with sufficient samples and known time + let b_stat = char_stats.stats.entry('b').or_default(); + b_stat.sample_count = 10; // exactly at threshold + b_stat.filtered_time_ms = 200.0; + + // Set up bigram 'a','b' with time 50% slower than char b + let key = BigramKey(['a', 'b']); + let stat = bigram_stats.stats.entry(key.clone()).or_default(); + stat.filtered_time_ms = 300.0; // 50% slower than 200 + stat.sample_count = 10; + + let pct = bigram_stats.speed_anomaly_pct(&key, &char_stats); + assert!( + pct.is_some(), + "Should return Some when char has enough samples" + ); + assert!( + (pct.unwrap() - 50.0).abs() < f64::EPSILON, + "Should be 50% slower" + ); + + // Reduce char_b samples below threshold + char_stats.stats.get_mut(&'b').unwrap().sample_count = 9; + let pct = bigram_stats.speed_anomaly_pct(&key, &char_stats); + assert!( + pct.is_none(), + "Should return None when char has < 10 samples" + ); + } + + #[test] + fn test_speed_anomaly_streak_holds_when_char_unavailable() { + let mut bigram_stats = BigramStatsStore::default(); + let mut char_stats = KeyStatsStore::default(); + + // Set up char 'b' with insufficient samples + let b_stat = char_stats.stats.entry('b').or_default(); + b_stat.sample_count = 5; // below MIN_CHAR_SAMPLES_FOR_SPEED + b_stat.filtered_time_ms = 200.0; + + let key = BigramKey(['a', 'b']); + let stat = bigram_stats.stats.entry(key.clone()).or_default(); + stat.filtered_time_ms = 400.0; + stat.sample_count = 10; + stat.speed_anomaly_streak = 2; // pre-existing streak + + // Update streak — char baseline unavailable, should hold + bigram_stats.update_speed_anomaly_streak(&key, &char_stats); + assert_eq!( + bigram_stats.stats[&key].speed_anomaly_streak, 2, + "Streak should be held when char unavailable" + ); + + // Now give char_b enough samples + char_stats.stats.get_mut(&'b').unwrap().sample_count = 10; + + // Speed anomaly = (400/200 - 1) * 100 = 100% > 50% threshold => increment + bigram_stats.update_speed_anomaly_streak(&key, &char_stats); + assert_eq!( + bigram_stats.stats[&key].speed_anomaly_streak, 3, + "Streak should increment when above threshold" + ); + + // Make speed normal + bigram_stats.stats.get_mut(&key).unwrap().filtered_time_ms = 220.0; + // Speed anomaly = (220/200 - 1) * 100 = 10% < 50% threshold => reset + bigram_stats.update_speed_anomaly_streak(&key, &char_stats); + assert_eq!( + bigram_stats.stats[&key].speed_anomaly_streak, 0, + "Streak should reset when below threshold" + ); + } + + #[test] + fn test_speed_anomaly_bigrams() { + let mut bigram_stats = BigramStatsStore::default(); + let mut char_stats = KeyStatsStore::default(); + let unlocked = vec!['a', 'b', 'c', 'd']; + + // Set up char stats with enough samples + for &ch in &['b', 'd'] { + let s = char_stats.stats.entry(ch).or_default(); + s.sample_count = 15; + s.filtered_time_ms = 200.0; + } + + // Bigram with speed anomaly > 50% + let k1 = BigramKey(['a', 'b']); + let s1 = bigram_stats.stats.entry(k1.clone()).or_default(); + s1.filtered_time_ms = 400.0; // 100% slower + s1.sample_count = 25; + s1.speed_anomaly_streak = 3; + + // Bigram with speed anomaly < 50% (excluded) + let k2 = BigramKey(['c', 'd']); + let s2 = bigram_stats.stats.entry(k2.clone()).or_default(); + s2.filtered_time_ms = 250.0; // 25% slower + s2.sample_count = 25; + s2.speed_anomaly_streak = 3; + + let anomalies = bigram_stats.speed_anomaly_bigrams(&char_stats, &unlocked); + let keys: Vec = anomalies.iter().map(|a| a.key.clone()).collect(); + + assert!( + keys.contains(&k1), + "k1 should be in speed anomalies (100% slower)" + ); + assert!( + !keys.contains(&k2), + "k2 should be excluded (only 25% slower)" + ); + + let k1_entry = anomalies.iter().find(|a| a.key == k1).unwrap(); + assert!(k1_entry.confirmed, "k1 should be confirmed"); + } + + #[test] + fn test_worst_confirmed_anomaly_dedup() { + let mut bigram_stats = BigramStatsStore::default(); + let mut char_stats = KeyStatsStore::default(); + let unlocked = vec!['a', 'b']; + + // Set up char stats with low EMA error rates + let b_stat = char_stats.stats.entry('b').or_default(); + b_stat.sample_count = 15; + b_stat.filtered_time_ms = 200.0; + b_stat.error_rate_ema = 0.03; + + let a_stat = char_stats.stats.entry('a').or_default(); + a_stat.error_rate_ema = 0.03; + + // Bigram with both error and speed anomalies + let key = BigramKey(['a', 'b']); + let stat = bigram_stats.stats.entry(key.clone()).or_default(); + stat.error_rate_ema = 0.70; + stat.sample_count = 25; + stat.error_anomaly_streak = ANOMALY_STREAK_REQUIRED; + stat.filtered_time_ms = 600.0; // 200% slower + stat.speed_anomaly_streak = ANOMALY_STREAK_REQUIRED; + + let result = bigram_stats.worst_confirmed_anomaly(&char_stats, &unlocked); + assert!(result.is_some(), "Should find a confirmed anomaly"); + + // Should pick whichever anomaly type has higher pct + let (_, pct, _) = result.unwrap(); + let error_pct = bigram_stats.error_anomaly_pct(&key, &char_stats).unwrap(); + let speed_pct = bigram_stats.speed_anomaly_pct(&key, &char_stats).unwrap(); + let expected_pct = error_pct.max(speed_pct); + assert!( + (pct - expected_pct).abs() < f64::EPSILON, + "Should pick higher anomaly pct" + ); + } + + #[test] + fn test_worst_confirmed_anomaly_prefers_error_on_tie() { + let mut bigram_stats = BigramStatsStore::default(); + let mut char_stats = KeyStatsStore::default(); + let unlocked = vec!['a', 'b']; + + let b_stat = char_stats.stats.entry('b').or_default(); + b_stat.sample_count = 15; + b_stat.filtered_time_ms = 200.0; + b_stat.error_rate_ema = 0.03; + + let a_stat = char_stats.stats.entry('a').or_default(); + a_stat.error_rate_ema = 0.03; + + let key = BigramKey(['a', 'b']); + let stat = bigram_stats.stats.entry(key.clone()).or_default(); + stat.sample_count = 25; + stat.error_anomaly_streak = ANOMALY_STREAK_REQUIRED; + stat.speed_anomaly_streak = ANOMALY_STREAK_REQUIRED; + + // Set EMA so error_anomaly_pct ≈ 150% + // expected_ab = 1 - (1 - 0.03)^2 ≈ 0.0591 + // For ratio = 2.5: e_ab = 2.5 * 0.0591 ≈ 0.1478 + stat.error_rate_ema = 0.1478; + // speed_anomaly_pct = (500/200 - 1)*100 = 150% + stat.filtered_time_ms = 500.0; + + let error_pct = bigram_stats.error_anomaly_pct(&key, &char_stats).unwrap(); + let speed_pct = bigram_stats.speed_anomaly_pct(&key, &char_stats).unwrap(); + + let result = bigram_stats.worst_confirmed_anomaly(&char_stats, &unlocked); + assert!(result.is_some()); + let (_, _pct, typ) = result.unwrap(); + + if (error_pct - speed_pct).abs() < 1.0 { + assert_eq!( + typ, + AnomalyType::Error, + "Error should win on tie or near-tie" + ); + } else if error_pct > speed_pct { + assert_eq!(typ, AnomalyType::Error, "Error should win when higher"); + } else { + assert_eq!(typ, AnomalyType::Speed, "Speed should win when higher"); + } + + // Force exact tie by setting speed to match error exactly + let exact_speed_time = (error_pct / 100.0 + 1.0) * 200.0; + bigram_stats.stats.get_mut(&key).unwrap().filtered_time_ms = exact_speed_time; + + let error_pct2 = bigram_stats.error_anomaly_pct(&key, &char_stats).unwrap(); + let speed_pct2 = bigram_stats.speed_anomaly_pct(&key, &char_stats).unwrap(); + assert!( + (error_pct2 - speed_pct2).abs() < f64::EPSILON, + "Pcts should be exactly equal: error={error_pct2}, speed={speed_pct2}" + ); + + let result2 = bigram_stats.worst_confirmed_anomaly(&char_stats, &unlocked); + assert!(result2.is_some()); + let (_, _, typ2) = result2.unwrap(); + assert_eq!(typ2, AnomalyType::Error, "Error should win on exact tie"); + } + + #[test] + fn test_speed_anomaly_borderline_baseline() { + let mut bigram_stats = BigramStatsStore::default(); + let mut char_stats = KeyStatsStore::default(); + + let key = BigramKey(['a', 'b']); + let stat = bigram_stats.stats.entry(key.clone()).or_default(); + stat.filtered_time_ms = 400.0; // 2x char baseline => 100% anomaly + stat.sample_count = 10; + + // At 9 samples: speed_anomaly_pct should return None + let b_stat = char_stats.stats.entry('b').or_default(); + b_stat.filtered_time_ms = 200.0; + b_stat.sample_count = 9; + + assert!( + bigram_stats.speed_anomaly_pct(&key, &char_stats).is_none(), + "Should be None at 9 char samples" + ); + + // At exactly 10 samples: should return Some + char_stats.stats.get_mut(&'b').unwrap().sample_count = 10; + let pct = bigram_stats.speed_anomaly_pct(&key, &char_stats); + assert!(pct.is_some(), "Should be Some at exactly 10 char samples"); + assert!( + (pct.unwrap() - 100.0).abs() < f64::EPSILON, + "400ms / 200ms => 100% anomaly" + ); + + // Realistic-noise fixture: char baseline is 200ms, bigram is 310ms => 55% anomaly + // (just above 50% threshold). This should be a mild anomaly, not extreme. + bigram_stats.stats.get_mut(&key).unwrap().filtered_time_ms = 310.0; + let pct = bigram_stats.speed_anomaly_pct(&key, &char_stats).unwrap(); + assert!( + (pct - 55.0).abs() < 1e-10, + "310ms / 200ms => 55% anomaly, got {pct}" + ); + assert!( + pct > SPEED_ANOMALY_PCT_THRESHOLD && pct < 100.0, + "55% should be above 50% threshold but not extreme" + ); + + // At exactly the threshold: 300ms / 200ms = 50% exactly + bigram_stats.stats.get_mut(&key).unwrap().filtered_time_ms = 300.0; + let pct = bigram_stats.speed_anomaly_pct(&key, &char_stats).unwrap(); + assert!( + (pct - 50.0).abs() < f64::EPSILON, + "300ms / 200ms => exactly 50%" + ); + + // Verify streak behavior at boundary: at exactly threshold, streak should NOT increment + // (threshold comparison is >, not >=) + let stat = bigram_stats.stats.get_mut(&key).unwrap(); + stat.speed_anomaly_streak = 2; + stat.filtered_time_ms = 300.0; // exactly 50% + bigram_stats.update_speed_anomaly_streak(&key, &char_stats); + assert_eq!( + bigram_stats.stats[&key].speed_anomaly_streak, 0, + "Streak should reset at exactly threshold (not strictly above)" + ); + } + + #[test] + fn test_select_focus_both_active() { + let skill_tree = SkillTree::default(); + let mut key_stats = KeyStatsStore::default(); + + for &ch in &['e', 't', 'a', 'o', 'n', 'i'] { + let stat = key_stats.stats.entry(ch).or_default(); + stat.confidence = 0.95; + stat.filtered_time_ms = 360.0; + stat.sample_count = 50; + stat.total_count = 50; + stat.error_rate_ema = 0.03; + } + key_stats.stats.get_mut(&'n').unwrap().confidence = 0.5; + key_stats.stats.get_mut(&'n').unwrap().filtered_time_ms = 686.0; + + let mut bigram_stats = BigramStatsStore::default(); + let et_key = BigramKey(['e', 't']); + let stat = bigram_stats.stats.entry(et_key.clone()).or_default(); + stat.sample_count = 30; + stat.error_rate_ema = 0.80; + stat.error_anomaly_streak = ANOMALY_STREAK_REQUIRED; + + let selection = select_focus(&skill_tree, DrillScope::Global, &key_stats, &bigram_stats); + + assert_eq!(selection.char_focus, Some('n')); + assert!(selection.bigram_focus.is_some()); + let (key, pct, _) = selection.bigram_focus.unwrap(); + assert_eq!(key, et_key); + assert!(pct > 0.0); + } + + #[test] + fn test_select_focus_bigram_only() { + // All chars mastered, but bigram anomaly exists + let skill_tree = SkillTree::default(); + let mut key_stats = KeyStatsStore::default(); + + for &ch in &['e', 't', 'a', 'o', 'n', 'i'] { + let stat = key_stats.stats.entry(ch).or_default(); + stat.confidence = 2.0; + stat.filtered_time_ms = 100.0; + stat.sample_count = 200; + stat.total_count = 200; + stat.error_rate_ema = 0.01; + } + + assert!( + skill_tree + .focused_key(DrillScope::Global, &key_stats) + .is_none(), + "Precondition: focused_key should return None when all chars are mastered" + ); + + let mut bigram_stats = BigramStatsStore::default(); + let et_key = BigramKey(['e', 't']); + let stat = bigram_stats.stats.entry(et_key.clone()).or_default(); + stat.sample_count = 30; + stat.error_rate_ema = 0.80; + stat.error_anomaly_streak = ANOMALY_STREAK_REQUIRED; + + let selection = select_focus(&skill_tree, DrillScope::Global, &key_stats, &bigram_stats); + + assert!( + selection.char_focus.is_none(), + "No char weakness → no char focus" + ); + assert!( + selection.bigram_focus.is_some(), + "Bigram anomaly should be present" + ); + } + + #[test] + fn test_ema_ranking_stability_during_recovery() { + // Two bigrams both confirmed. Bigram A has higher anomaly. + // User corrects bigram A → B becomes worst. + let mut bigram_stats = BigramStatsStore::default(); + let mut char_stats = KeyStatsStore::default(); + let unlocked = vec!['a', 'b', 'c', 'd']; + + for &ch in &['a', 'b', 'c', 'd'] { + char_stats.stats.entry(ch).or_default().error_rate_ema = 0.03; + } + + let key_a = BigramKey(['a', 'b']); + let sa = bigram_stats.stats.entry(key_a.clone()).or_default(); + sa.error_rate_ema = 0.50; + sa.sample_count = 30; + sa.error_anomaly_streak = ANOMALY_STREAK_REQUIRED; + + let key_b = BigramKey(['c', 'd']); + let sb = bigram_stats.stats.entry(key_b.clone()).or_default(); + sb.error_rate_ema = 0.30; + sb.sample_count = 30; + sb.error_anomaly_streak = ANOMALY_STREAK_REQUIRED; + + // Initially A is worst + let result = bigram_stats.worst_confirmed_anomaly(&char_stats, &unlocked); + assert!(result.is_some()); + let (worst_key, _, _) = result.unwrap(); + assert_eq!(worst_key, key_a, "A should be worst initially"); + + // Simulate A recovering: 20 correct strokes + for i in 30..50 { + bigram_stats.update(key_a.clone(), 200.0, true, false, i); + bigram_stats.update_error_anomaly_streak(&key_a, &char_stats); + } + + // Now B should be worst (A recovered) + let result2 = bigram_stats.worst_confirmed_anomaly(&char_stats, &unlocked); + if let Some((worst_key2, _, _)) = result2 { + // B should now be the worst (or A dropped out of anomaly entirely) + if worst_key2 == key_a { + // A's EMA should be much lower than before + let a_ema = bigram_stats.stats[&key_a].error_rate_ema; + assert!( + a_ema < 0.30, + "A's EMA should have dropped significantly, got {a_ema}" + ); + } + } + // A's EMA should definitely be lower now + let a_ema = bigram_stats.stats[&key_a].error_rate_ema; + assert!( + a_ema < bigram_stats.stats[&key_b].error_rate_ema, + "After recovery, A's EMA ({a_ema}) should be < B's ({})", + bigram_stats.stats[&key_b].error_rate_ema + ); + } } diff --git a/src/engine/skill_tree.rs b/src/engine/skill_tree.rs index 8e00d3d..420f8e6 100644 --- a/src/engine/skill_tree.rs +++ b/src/engine/skill_tree.rs @@ -567,9 +567,7 @@ impl SkillTree { let newly_mastered: Vec = if let Some(before) = before_stats { before_unlocked .iter() - .filter(|&&ch| { - before.get_confidence(ch) < 1.0 && stats.get_confidence(ch) >= 1.0 - }) + .filter(|&&ch| before.get_confidence(ch) < 1.0 && stats.get_confidence(ch) >= 1.0) .copied() .collect() } else { diff --git a/src/generator/code_syntax.rs b/src/generator/code_syntax.rs index eb5dc99..0a108e8 100644 --- a/src/generator/code_syntax.rs +++ b/src/generator/code_syntax.rs @@ -51,24 +51,39 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ ], has_builtin: true, block_style: BlockStyle::Braces(&[ - "fn ", "pub fn ", "async fn ", "pub async fn ", "impl ", "trait ", "struct ", "enum ", - "macro_rules! ", "mod ", "const ", "static ", "type ", "pub struct ", "pub enum ", - "pub trait ", "pub mod ", "pub const ", "pub static ", "pub type ", + "fn ", + "pub fn ", + "async fn ", + "pub async fn ", + "impl ", + "trait ", + "struct ", + "enum ", + "macro_rules! ", + "mod ", + "const ", + "static ", + "type ", + "pub struct ", + "pub enum ", + "pub trait ", + "pub mod ", + "pub const ", + "pub static ", + "pub type ", ]), }, CodeLanguage { key: "python", display_name: "Python", extensions: &[".py", ".pyi"], - repos: &[ - CodeRepo { - key: "cpython", - urls: &[ - "https://raw.githubusercontent.com/python/cpython/main/Lib/json/encoder.py", - "https://raw.githubusercontent.com/python/cpython/main/Lib/pathlib/__init__.py", - ], - }, - ], + repos: &[CodeRepo { + key: "cpython", + urls: &[ + "https://raw.githubusercontent.com/python/cpython/main/Lib/json/encoder.py", + "https://raw.githubusercontent.com/python/cpython/main/Lib/pathlib/__init__.py", + ], + }], has_builtin: true, block_style: BlockStyle::Indentation(&["def ", "class ", "async def ", "@"]), }, @@ -76,15 +91,13 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ key: "javascript", display_name: "JavaScript", extensions: &[".js", ".mjs"], - repos: &[ - CodeRepo { - key: "node-stdlib", - urls: &[ - "https://raw.githubusercontent.com/nodejs/node/main/lib/path.js", - "https://raw.githubusercontent.com/nodejs/node/main/lib/url.js", - ], - }, - ], + repos: &[CodeRepo { + key: "node-stdlib", + urls: &[ + "https://raw.githubusercontent.com/nodejs/node/main/lib/path.js", + "https://raw.githubusercontent.com/nodejs/node/main/lib/url.js", + ], + }], has_builtin: true, block_style: BlockStyle::Braces(&[ "function ", @@ -101,14 +114,10 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ key: "go", display_name: "Go", extensions: &[".go"], - repos: &[ - CodeRepo { - key: "go-stdlib", - urls: &[ - "https://raw.githubusercontent.com/golang/go/master/src/fmt/print.go", - ], - }, - ], + repos: &[CodeRepo { + key: "go-stdlib", + urls: &["https://raw.githubusercontent.com/golang/go/master/src/fmt/print.go"], + }], has_builtin: true, block_style: BlockStyle::Braces(&["func ", "type "]), }, @@ -119,9 +128,7 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ repos: &[ CodeRepo { key: "ts-node", - urls: &[ - "https://raw.githubusercontent.com/TypeStrong/ts-node/main/src/index.ts", - ], + urls: &["https://raw.githubusercontent.com/TypeStrong/ts-node/main/src/index.ts"], }, CodeRepo { key: "deno-std", @@ -195,9 +202,7 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ }, CodeRepo { key: "jq", - urls: &[ - "https://raw.githubusercontent.com/jqlang/jq/master/src/builtin.c", - ], + urls: &["https://raw.githubusercontent.com/jqlang/jq/master/src/builtin.c"], }, ], has_builtin: true, @@ -229,9 +234,7 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ }, CodeRepo { key: "fmt", - urls: &[ - "https://raw.githubusercontent.com/fmtlib/fmt/master/include/fmt/format.h", - ], + urls: &["https://raw.githubusercontent.com/fmtlib/fmt/master/include/fmt/format.h"], }, ], has_builtin: true, @@ -274,7 +277,13 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ ], has_builtin: true, block_style: BlockStyle::EndDelimited(&[ - "def ", "class ", "module ", "attr_", "scope ", "describe ", "it ", + "def ", + "class ", + "module ", + "attr_", + "scope ", + "describe ", + "it ", ]), }, CodeLanguage { @@ -319,9 +328,7 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ repos: &[ CodeRepo { key: "nvm", - urls: &[ - "https://raw.githubusercontent.com/nvm-sh/nvm/master/nvm.sh", - ], + urls: &["https://raw.githubusercontent.com/nvm-sh/nvm/master/nvm.sh"], }, CodeRepo { key: "oh-my-zsh", @@ -340,9 +347,7 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ repos: &[ CodeRepo { key: "kong", - urls: &[ - "https://raw.githubusercontent.com/Kong/kong/master/kong/init.lua", - ], + urls: &["https://raw.githubusercontent.com/Kong/kong/master/kong/init.lua"], }, CodeRepo { key: "luarocks", @@ -359,41 +364,60 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ key: "kotlin", display_name: "Kotlin", extensions: &[".kt", ".kts"], - repos: &[ - CodeRepo { - key: "kotlinx-coroutines", - urls: &[ - "https://raw.githubusercontent.com/Kotlin/kotlinx.coroutines/master/kotlinx-coroutines-core/common/src/flow/Builders.kt", - "https://raw.githubusercontent.com/Kotlin/kotlinx.coroutines/master/kotlinx-coroutines-core/common/src/channels/Channel.kt", - ], - }, - ], + repos: &[CodeRepo { + key: "kotlinx-coroutines", + urls: &[ + "https://raw.githubusercontent.com/Kotlin/kotlinx.coroutines/master/kotlinx-coroutines-core/common/src/flow/Builders.kt", + "https://raw.githubusercontent.com/Kotlin/kotlinx.coroutines/master/kotlinx-coroutines-core/common/src/channels/Channel.kt", + ], + }], has_builtin: false, block_style: BlockStyle::Braces(&[ - "fun ", "class ", "object ", "interface ", "suspend fun ", - "public ", "private ", "internal ", "override fun ", "open ", - "data class ", "sealed ", "abstract ", - "val ", "var ", "enum ", "annotation ", "typealias ", + "fun ", + "class ", + "object ", + "interface ", + "suspend fun ", + "public ", + "private ", + "internal ", + "override fun ", + "open ", + "data class ", + "sealed ", + "abstract ", + "val ", + "var ", + "enum ", + "annotation ", + "typealias ", ]), }, CodeLanguage { key: "scala", display_name: "Scala", extensions: &[".scala"], - repos: &[ - CodeRepo { - key: "scala-stdlib", - urls: &[ - "https://raw.githubusercontent.com/scala/scala/2.13.x/src/library/scala/collection/immutable/List.scala", - "https://raw.githubusercontent.com/scala/scala/2.13.x/src/library/scala/collection/mutable/HashMap.scala", - "https://raw.githubusercontent.com/scala/scala/2.13.x/src/library/scala/Option.scala", - ], - }, - ], + repos: &[CodeRepo { + key: "scala-stdlib", + urls: &[ + "https://raw.githubusercontent.com/scala/scala/2.13.x/src/library/scala/collection/immutable/List.scala", + "https://raw.githubusercontent.com/scala/scala/2.13.x/src/library/scala/collection/mutable/HashMap.scala", + "https://raw.githubusercontent.com/scala/scala/2.13.x/src/library/scala/Option.scala", + ], + }], has_builtin: false, block_style: BlockStyle::Braces(&[ - "def ", "class ", "object ", "trait ", "case class ", - "val ", "var ", "type ", "implicit ", "given ", "extension ", + "def ", + "class ", + "object ", + "trait ", + "case class ", + "val ", + "var ", + "type ", + "implicit ", + "given ", + "extension ", ]), }, CodeLanguage { @@ -461,18 +485,29 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ key: "dart", display_name: "Dart", extensions: &[".dart"], - repos: &[ - CodeRepo { - key: "flutter", - urls: &[ - "https://raw.githubusercontent.com/flutter/flutter/master/packages/flutter/lib/src/widgets/framework.dart", - ], - }, - ], + repos: &[CodeRepo { + key: "flutter", + urls: &[ + "https://raw.githubusercontent.com/flutter/flutter/master/packages/flutter/lib/src/widgets/framework.dart", + ], + }], has_builtin: false, block_style: BlockStyle::Braces(&[ - "void ", "Future ", "Future<", "class ", "int ", "String ", "bool ", "static ", "factory ", - "Widget ", "get ", "set ", "enum ", "typedef ", "extension ", + "void ", + "Future ", + "Future<", + "class ", + "int ", + "String ", + "bool ", + "static ", + "factory ", + "Widget ", + "get ", + "set ", + "enum ", + "typedef ", + "extension ", ]), }, CodeLanguage { @@ -495,22 +530,23 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ ], has_builtin: false, block_style: BlockStyle::EndDelimited(&[ - "def ", "defp ", "defmodule ", - "defmacro ", "defstruct", "defprotocol ", "defimpl ", + "def ", + "defp ", + "defmodule ", + "defmacro ", + "defstruct", + "defprotocol ", + "defimpl ", ]), }, CodeLanguage { key: "perl", display_name: "Perl", extensions: &[".pl", ".pm"], - repos: &[ - CodeRepo { - key: "mojolicious", - urls: &[ - "https://raw.githubusercontent.com/mojolicious/mojo/main/lib/Mojolicious.pm", - ], - }, - ], + repos: &[CodeRepo { + key: "mojolicious", + urls: &["https://raw.githubusercontent.com/mojolicious/mojo/main/lib/Mojolicious.pm"], + }], has_builtin: false, block_style: BlockStyle::Braces(&["sub "]), }, @@ -518,30 +554,31 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ key: "zig", display_name: "Zig", extensions: &[".zig"], - repos: &[ - CodeRepo { - key: "zig-stdlib", - urls: &[ - "https://raw.githubusercontent.com/ziglang/zig/master/lib/std/mem.zig", - "https://raw.githubusercontent.com/ziglang/zig/master/lib/std/fmt.zig", - ], - }, - ], + repos: &[CodeRepo { + key: "zig-stdlib", + urls: &[ + "https://raw.githubusercontent.com/ziglang/zig/master/lib/std/mem.zig", + "https://raw.githubusercontent.com/ziglang/zig/master/lib/std/fmt.zig", + ], + }], has_builtin: false, - block_style: BlockStyle::Braces(&["pub fn ", "fn ", "const ", "pub const ", "test ", "var "]), + block_style: BlockStyle::Braces(&[ + "pub fn ", + "fn ", + "const ", + "pub const ", + "test ", + "var ", + ]), }, CodeLanguage { key: "julia", display_name: "Julia", extensions: &[".jl"], - repos: &[ - CodeRepo { - key: "julia-stdlib", - urls: &[ - "https://raw.githubusercontent.com/JuliaLang/julia/master/base/array.jl", - ], - }, - ], + repos: &[CodeRepo { + key: "julia-stdlib", + urls: &["https://raw.githubusercontent.com/JuliaLang/julia/master/base/array.jl"], + }], has_builtin: false, block_style: BlockStyle::EndDelimited(&["function ", "macro "]), }, @@ -549,14 +586,10 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ key: "nim", display_name: "Nim", extensions: &[".nim"], - repos: &[ - CodeRepo { - key: "nim-stdlib", - urls: &[ - "https://raw.githubusercontent.com/nim-lang/Nim/devel/lib/pure/strutils.nim", - ], - }, - ], + repos: &[CodeRepo { + key: "nim-stdlib", + urls: &["https://raw.githubusercontent.com/nim-lang/Nim/devel/lib/pure/strutils.nim"], + }], has_builtin: false, block_style: BlockStyle::Indentation(&["proc ", "func ", "method ", "type "]), }, @@ -564,14 +597,10 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ key: "ocaml", display_name: "OCaml", extensions: &[".ml", ".mli"], - repos: &[ - CodeRepo { - key: "ocaml-stdlib", - urls: &[ - "https://raw.githubusercontent.com/ocaml/ocaml/trunk/stdlib/list.ml", - ], - }, - ], + repos: &[CodeRepo { + key: "ocaml-stdlib", + urls: &["https://raw.githubusercontent.com/ocaml/ocaml/trunk/stdlib/list.ml"], + }], has_builtin: false, block_style: BlockStyle::Indentation(&["let ", "type ", "module "]), }, @@ -596,21 +625,24 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ has_builtin: false, // Haskell: top-level declarations are indented blocks block_style: BlockStyle::Indentation(&[ - "data ", "type ", "class ", "instance ", "newtype ", "module ", + "data ", + "type ", + "class ", + "instance ", + "newtype ", + "module ", ]), }, CodeLanguage { key: "clojure", display_name: "Clojure", extensions: &[".clj", ".cljs"], - repos: &[ - CodeRepo { - key: "clojure-core", - urls: &[ - "https://raw.githubusercontent.com/clojure/clojure/master/src/clj/clojure/core.clj", - ], - }, - ], + repos: &[CodeRepo { + key: "clojure-core", + urls: &[ + "https://raw.githubusercontent.com/clojure/clojure/master/src/clj/clojure/core.clj", + ], + }], has_builtin: false, block_style: BlockStyle::Indentation(&["(defn ", "(defn- ", "(defmacro "]), }, @@ -618,15 +650,13 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ key: "r", display_name: "R", extensions: &[".r", ".R"], - repos: &[ - CodeRepo { - key: "shiny", - urls: &[ - "https://raw.githubusercontent.com/rstudio/shiny/main/R/bootstrap.R", - "https://raw.githubusercontent.com/rstudio/shiny/main/R/input-text.R", - ], - }, - ], + repos: &[CodeRepo { + key: "shiny", + urls: &[ + "https://raw.githubusercontent.com/rstudio/shiny/main/R/bootstrap.R", + "https://raw.githubusercontent.com/rstudio/shiny/main/R/input-text.R", + ], + }], has_builtin: false, // R functions are defined as `name <- function(...)`. Since our extractor only // supports `starts_with`, we match roxygen doc blocks that precede functions. @@ -636,36 +666,30 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ key: "erlang", display_name: "Erlang", extensions: &[".erl"], - repos: &[ - CodeRepo { - key: "cowboy", - urls: &[ - "https://raw.githubusercontent.com/ninenines/cowboy/master/src/cowboy_req.erl", - "https://raw.githubusercontent.com/ninenines/cowboy/master/src/cowboy_http.erl", - ], - }, - ], + repos: &[CodeRepo { + key: "cowboy", + urls: &[ + "https://raw.githubusercontent.com/ninenines/cowboy/master/src/cowboy_req.erl", + "https://raw.githubusercontent.com/ninenines/cowboy/master/src/cowboy_http.erl", + ], + }], has_builtin: false, // Erlang: -spec and -record use braces for types/fields. // Erlang functions themselves don't use braces (they end with `.`), // so extraction is limited to type specs and records. - block_style: BlockStyle::Braces(&[ - "-spec ", "-record(", "-type ", "-callback ", - ]), + block_style: BlockStyle::Braces(&["-spec ", "-record(", "-type ", "-callback "]), }, CodeLanguage { key: "groovy", display_name: "Groovy", extensions: &[".groovy"], - repos: &[ - CodeRepo { - key: "nextflow", - urls: &[ - "https://raw.githubusercontent.com/nextflow-io/nextflow/master/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy", - "https://raw.githubusercontent.com/nextflow-io/nextflow/master/modules/nextflow/src/main/groovy/nextflow/Session.groovy", - ], - }, - ], + repos: &[CodeRepo { + key: "nextflow", + urls: &[ + "https://raw.githubusercontent.com/nextflow-io/nextflow/master/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy", + "https://raw.githubusercontent.com/nextflow-io/nextflow/master/modules/nextflow/src/main/groovy/nextflow/Session.groovy", + ], + }], has_builtin: false, block_style: BlockStyle::Braces(&["def ", "void ", "static ", "public ", "private "]), }, @@ -673,14 +697,12 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ key: "fsharp", display_name: "F#", extensions: &[".fs", ".fsx"], - repos: &[ - CodeRepo { - key: "fsharp-compiler", - urls: &[ - "https://raw.githubusercontent.com/dotnet/fsharp/main/src/Compiler/Utilities/lib.fs", - ], - }, - ], + repos: &[CodeRepo { + key: "fsharp-compiler", + urls: &[ + "https://raw.githubusercontent.com/dotnet/fsharp/main/src/Compiler/Utilities/lib.fs", + ], + }], has_builtin: false, block_style: BlockStyle::Indentation(&["let ", "member ", "type ", "module "]), }, @@ -688,18 +710,23 @@ pub const CODE_LANGUAGES: &[CodeLanguage] = &[ key: "objective-c", display_name: "Objective-C", extensions: &[".m", ".h"], - repos: &[ - CodeRepo { - key: "afnetworking", - urls: &[ - "https://raw.githubusercontent.com/AFNetworking/AFNetworking/master/AFNetworking/AFURLSessionManager.m", - ], - }, - ], + repos: &[CodeRepo { + key: "afnetworking", + urls: &[ + "https://raw.githubusercontent.com/AFNetworking/AFNetworking/master/AFNetworking/AFURLSessionManager.m", + ], + }], has_builtin: false, block_style: BlockStyle::Braces(&[ - "- (", "+ (", "- (void)", "- (id)", "- (BOOL)", - "@interface ", "@implementation ", "@protocol ", "typedef ", + "- (", + "+ (", + "- (void)", + "- (id)", + "- (BOOL)", + "@interface ", + "@implementation ", + "@protocol ", + "typedef ", ]), }, ]; @@ -767,8 +794,8 @@ pub fn build_code_download_queue(lang_key: &str, cache_dir: &str) -> Vec<(String for lk in &languages_to_download { if let Some(lang) = language_by_key(lk) { for (repo_idx, repo) in lang.repos.iter().enumerate() { - let cache_path = std::path::Path::new(cache_dir) - .join(format!("{}_{}.txt", lang.key, repo.key)); + let cache_path = + std::path::Path::new(cache_dir).join(format!("{}_{}.txt", lang.key, repo.key)); if !cache_path.exists() || std::fs::metadata(&cache_path) .map(|m| m.len() == 0) @@ -1653,7 +1680,8 @@ impl TextGenerator for CodeSyntaxGenerator { fn generate( &mut self, _filter: &CharFilter, - _focused: Option, + _focused_char: Option, + _focused_bigram: Option<[char; 2]>, word_count: usize, ) -> String { let embedded = self.get_snippets(); @@ -1721,7 +1749,10 @@ fn approx_token_count(text: &str) -> usize { } fn fit_snippet_to_target(snippet: &str, target_units: usize) -> String { - let max_units = target_units.saturating_mul(3).saturating_div(2).max(target_units); + let max_units = target_units + .saturating_mul(3) + .saturating_div(2) + .max(target_units); if approx_token_count(snippet) <= max_units { return snippet.to_string(); } @@ -1777,8 +1808,8 @@ where all_snippets.truncate(snippets_limit); - let cache_path = std::path::Path::new(cache_dir) - .join(format!("{}_{}.txt", language_key, repo.key)); + let cache_path = + std::path::Path::new(cache_dir).join(format!("{}_{}.txt", language_key, repo.key)); let combined = all_snippets.join("\n---SNIPPET---\n"); fs::write(cache_path, combined).is_ok() } @@ -1811,8 +1842,12 @@ fn is_noise_snippet(snippet: &str) -> bool { .lines() .filter(|l| { let t = l.trim(); - !t.is_empty() && !t.starts_with("//") && !t.starts_with('#') && !t.starts_with("/*") - && !t.starts_with('*') && !t.starts_with("*/") + !t.is_empty() + && !t.starts_with("//") + && !t.starts_with('#') + && !t.starts_with("/*") + && !t.starts_with('*') + && !t.starts_with("*/") }) .collect(); @@ -1828,8 +1863,15 @@ fn is_noise_snippet(snippet: &str) -> bool { // Reject if body consists entirely of import/use/require/include statements let import_prefixes = [ - "import ", "from ", "use ", "require", "#include", "using ", - "package ", "module ", "extern crate ", + "import ", + "from ", + "use ", + "require", + "#include", + "using ", + "package ", + "module ", + "extern crate ", ]; let body_lines: Vec<&str> = meaningful_lines.iter().skip(1).copied().collect(); if !body_lines.is_empty() @@ -2087,7 +2129,10 @@ fn structural_extract_indent(lines: &[&str]) -> Vec { } } - while snippet_lines.last().map_or(false, |sl| sl.trim().is_empty()) { + while snippet_lines + .last() + .map_or(false, |sl| sl.trim().is_empty()) + { snippet_lines.pop(); } @@ -2483,18 +2528,14 @@ z = 99 println!(" ({lines} lines, {bytes} bytes)"); total_ok += 1; - let snippets = - extract_code_snippets(&content, &lang.block_style); + let snippets = extract_code_snippets(&content, &lang.block_style); println!(" Extracted {} snippets", snippets.len()); lang_total_snippets += snippets.len(); // Show first 2 snippets (truncated) for (si, snippet) in snippets.iter().take(2).enumerate() { - let preview: String = snippet - .lines() - .take(5) - .collect::>() - .join("\n"); + let preview: String = + snippet.lines().take(5).collect::>().join("\n"); let suffix = if snippet.lines().count() > 5 { "\n ..." } else { @@ -2507,7 +2548,9 @@ z = 99 .join("\n"); println!( " --- snippet {} ---\n{}{}", - si + 1, indented, suffix, + si + 1, + indented, + suffix, ); } } diff --git a/src/generator/dictionary.rs b/src/generator/dictionary.rs index 89de0e5..798af9f 100644 --- a/src/generator/dictionary.rs +++ b/src/generator/dictionary.rs @@ -39,3 +39,26 @@ impl Dictionary { matching } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn find_matching_focused_is_sort_only() { + let dictionary = Dictionary::load(); + let filter = CharFilter::new(('a'..='z').collect()); + + let without_focus = dictionary.find_matching(&filter, None); + let with_focus = dictionary.find_matching(&filter, Some('k')); + + // Same membership — focused param only reorders, never filters + let mut sorted_without: Vec<&str> = without_focus.clone(); + let mut sorted_with: Vec<&str> = with_focus.clone(); + sorted_without.sort(); + sorted_with.sort(); + + assert_eq!(sorted_without, sorted_with); + assert_eq!(without_focus.len(), with_focus.len()); + } +} diff --git a/src/generator/mod.rs b/src/generator/mod.rs index 6700699..8f93c7f 100644 --- a/src/generator/mod.rs +++ b/src/generator/mod.rs @@ -12,6 +12,11 @@ pub mod transition_table; use crate::engine::filter::CharFilter; pub trait TextGenerator { - fn generate(&mut self, filter: &CharFilter, focused: Option, word_count: usize) - -> String; + fn generate( + &mut self, + filter: &CharFilter, + focused_char: Option, + focused_bigram: Option<[char; 2]>, + word_count: usize, + ) -> String; } diff --git a/src/generator/passage.rs b/src/generator/passage.rs index 6dc879a..0be0543 100644 --- a/src/generator/passage.rs +++ b/src/generator/passage.rs @@ -176,7 +176,8 @@ impl TextGenerator for PassageGenerator { fn generate( &mut self, _filter: &CharFilter, - _focused: Option, + _focused_char: Option, + _focused_bigram: Option<[char; 2]>, word_count: usize, ) -> String { let use_builtin = self.selection == "all" || self.selection == "builtin"; diff --git a/src/generator/phonetic.rs b/src/generator/phonetic.rs index c3cbb71..befd650 100644 --- a/src/generator/phonetic.rs +++ b/src/generator/phonetic.rs @@ -56,9 +56,14 @@ impl PhoneticGenerator { Some(filtered.last().unwrap().0) } - fn generate_phonetic_word(&mut self, filter: &CharFilter, focused: Option) -> String { + fn generate_phonetic_word( + &mut self, + filter: &CharFilter, + focused_char: Option, + focused_bigram: Option<[char; 2]>, + ) -> String { for _attempt in 0..5 { - let word = self.try_generate_word(filter, focused); + let word = self.try_generate_word(filter, focused_char, focused_bigram); if word.len() >= MIN_WORD_LEN { return word; } @@ -67,14 +72,46 @@ impl PhoneticGenerator { "the".to_string() } - fn try_generate_word(&mut self, filter: &CharFilter, focused: Option) -> String { + fn try_generate_word( + &mut self, + filter: &CharFilter, + focused: Option, + focused_bigram: Option<[char; 2]>, + ) -> String { let mut word = Vec::new(); - // Start with space prefix - let start_char = if let Some(focus) = focused { + // Try bigram-start: 30% chance to start word with bigram[0],bigram[1] + let bigram_eligible = + focused_bigram.filter(|b| filter.is_allowed(b[0]) && filter.is_allowed(b[1])); + let start_char = if let Some(bg) = bigram_eligible { + if self.rng.gen_bool(0.3) { + word.push(bg[0]); + word.push(bg[1]); + // Continue Markov chain from the bigram + let prefix = vec![' ', bg[0], bg[1]]; + if let Some(probs) = self.table.segment(&prefix) { + Self::pick_weighted_from(&mut self.rng, probs, filter) + } else { + None + } + } else if let Some(focus) = focused { + if self.rng.gen_bool(0.4) && filter.is_allowed(focus) { + word.push(focus); + let prefix = vec![' ', ' ', focus]; + if let Some(probs) = self.table.segment(&prefix) { + Self::pick_weighted_from(&mut self.rng, probs, filter) + } else { + None + } + } else { + None + } + } else { + None + } + } else if let Some(focus) = focused { if self.rng.gen_bool(0.4) && filter.is_allowed(focus) { word.push(focus); - // Get next char from transition table let prefix = vec![' ', ' ', focus]; if let Some(probs) = self.table.segment(&prefix) { Self::pick_weighted_from(&mut self.rng, probs, filter) @@ -189,65 +226,151 @@ impl PhoneticGenerator { word.iter().collect() } + + fn pick_tiered_word( + &mut self, + all_words: &[String], + bigram_indices: &[usize], + char_indices: &[usize], + other_indices: &[usize], + recent: &[String], + ) -> String { + for _ in 0..6 { + let tier = self.select_tier(bigram_indices, char_indices, other_indices); + let idx = tier[self.rng.gen_range(0..tier.len())]; + let word = &all_words[idx]; + if !recent.contains(word) { + return word.clone(); + } + } + // Fallback: accept any word from full pool + let idx = self.rng.gen_range(0..all_words.len()); + all_words[idx].clone() + } + + fn select_tier<'a>( + &mut self, + bigram_indices: &'a [usize], + char_indices: &'a [usize], + other_indices: &'a [usize], + ) -> &'a [usize] { + let has_bigram = bigram_indices.len() >= 2; + let has_char = char_indices.len() >= 2; + + // Tier selection probabilities: + // Both available: 40% bigram, 30% char, 30% other + // Only bigram: 50% bigram, 50% other + // Only char: 70% char, 30% other + // Neither: 100% other + let roll: f64 = self.rng.gen_range(0.0..1.0); + + match (has_bigram, has_char) { + (true, true) => { + if roll < 0.4 { + bigram_indices + } else if roll < 0.7 { + char_indices + } else { + if other_indices.len() >= 2 { + other_indices + } else if has_char { + char_indices + } else { + bigram_indices + } + } + } + (true, false) => { + if roll < 0.5 { + bigram_indices + } else { + if other_indices.len() >= 2 { + other_indices + } else { + bigram_indices + } + } + } + (false, true) => { + if roll < 0.7 { + char_indices + } else { + if other_indices.len() >= 2 { + other_indices + } else { + char_indices + } + } + } + (false, false) => { + // Use other_indices if available, otherwise all words + if other_indices.len() >= 2 { + other_indices + } else { + char_indices + } + } + } + } } impl TextGenerator for PhoneticGenerator { fn generate( &mut self, filter: &CharFilter, - focused: Option, + focused_char: Option, + focused_bigram: Option<[char; 2]>, word_count: usize, ) -> String { - // keybr's approach: prefer real words when enough match the filter - // Collect matching words into owned Vec to avoid borrow conflict let matching_words: Vec = self .dictionary - .find_matching(filter, focused) + .find_matching(filter, None) .iter() .map(|s| s.to_string()) .collect(); let use_real_words = matching_words.len() >= MIN_REAL_WORDS; + // Pre-categorize words into tiers for real-word mode + let bigram_str = focused_bigram.map(|b| format!("{}{}", b[0], b[1])); + let focus_char_lower = focused_char.filter(|ch| ch.is_ascii_lowercase()); + + let (bigram_indices, char_indices, other_indices) = if use_real_words { + let mut bi = Vec::new(); + let mut ci = Vec::new(); + let mut oi = Vec::new(); + for (i, w) in matching_words.iter().enumerate() { + if bigram_str.as_ref().is_some_and(|b| w.contains(b.as_str())) { + bi.push(i); + } else if focus_char_lower.is_some_and(|ch| w.contains(ch)) { + ci.push(i); + } else { + oi.push(i); + } + } + (bi, ci, oi) + } else { + (vec![], vec![], vec![]) + }; + let mut words: Vec = Vec::new(); - let mut last_word = String::new(); + let mut recent: Vec = Vec::new(); for _ in 0..word_count { if use_real_words { - // Pick a real word (avoid consecutive duplicates). - // If focused is set, bias sampling toward words containing that key. - let focus = focused.filter(|ch| ch.is_ascii_lowercase()); - let focused_indices: Vec = if let Some(ch) = focus { - matching_words - .iter() - .enumerate() - .filter_map(|(i, w)| w.contains(ch).then_some(i)) - .collect() - } else { - Vec::new() - }; - let mut picked = None; - for _ in 0..6 { - let idx = if !focused_indices.is_empty() && self.rng.gen_bool(0.70) { - let j = self.rng.gen_range(0..focused_indices.len()); - focused_indices[j] - } else { - self.rng.gen_range(0..matching_words.len()) - }; - let word = matching_words[idx].clone(); - if word != last_word { - picked = Some(word); - break; - } + let word = self.pick_tiered_word( + &matching_words, + &bigram_indices, + &char_indices, + &other_indices, + &recent, + ); + recent.push(word.clone()); + if recent.len() > 4 { + recent.remove(0); } - let word = match picked { - Some(w) => w, - None => self.generate_phonetic_word(filter, focused), - }; - last_word.clone_from(&word); words.push(word); } else { - // Fall back to phonetic pseudo-words - let word = self.generate_phonetic_word(filter, focused); + let word = self.generate_phonetic_word(filter, focused_char, focused_bigram); words.push(word); } } @@ -272,7 +395,7 @@ mod tests { Dictionary::load(), SmallRng::seed_from_u64(42), ); - let focused_text = focused_gen.generate(&filter, Some('k'), 1200); + let focused_text = focused_gen.generate(&filter, Some('k'), None, 1200); let focused_count = focused_text .split_whitespace() .filter(|w| w.contains('k')) @@ -280,7 +403,7 @@ mod tests { let mut baseline_gen = PhoneticGenerator::new(table, Dictionary::load(), SmallRng::seed_from_u64(42)); - let baseline_text = baseline_gen.generate(&filter, None, 1200); + let baseline_text = baseline_gen.generate(&filter, None, None, 1200); let baseline_count = baseline_text .split_whitespace() .filter(|w| w.contains('k')) @@ -291,4 +414,64 @@ mod tests { "focused_count={focused_count}, baseline_count={baseline_count}" ); } + + #[test] + fn test_phonetic_bigram_focus_increases_bigram_words() { + let dictionary = Dictionary::load(); + let table = TransitionTable::build_from_words(&dictionary.words_list()); + let filter = CharFilter::new(('a'..='z').collect()); + + let mut bigram_gen = PhoneticGenerator::new( + table.clone(), + Dictionary::load(), + SmallRng::seed_from_u64(42), + ); + let bigram_text = bigram_gen.generate(&filter, None, Some(['t', 'h']), 1200); + let bigram_count = bigram_text + .split_whitespace() + .filter(|w| w.contains("th")) + .count(); + + let mut baseline_gen = + PhoneticGenerator::new(table, Dictionary::load(), SmallRng::seed_from_u64(42)); + let baseline_text = baseline_gen.generate(&filter, None, None, 1200); + let baseline_count = baseline_text + .split_whitespace() + .filter(|w| w.contains("th")) + .count(); + + assert!( + bigram_count > baseline_count, + "bigram_count={bigram_count}, baseline_count={baseline_count}" + ); + } + + #[test] + fn test_phonetic_dual_focus_no_excessive_repeats() { + let dictionary = Dictionary::load(); + let table = TransitionTable::build_from_words(&dictionary.words_list()); + let filter = CharFilter::new(('a'..='z').collect()); + + let mut generator = + PhoneticGenerator::new(table, Dictionary::load(), SmallRng::seed_from_u64(42)); + let text = generator.generate(&filter, Some('k'), Some(['t', 'h']), 200); + let words: Vec<&str> = text.split_whitespace().collect(); + + // Check no word appears > 3 times consecutively + let mut max_consecutive = 1; + let mut current_run = 1; + for i in 1..words.len() { + if words[i] == words[i - 1] { + current_run += 1; + max_consecutive = max_consecutive.max(current_run); + } else { + current_run = 1; + } + } + + assert!( + max_consecutive <= 3, + "Max consecutive repeats = {max_consecutive}, expected <= 3" + ); + } } diff --git a/src/main.rs b/src/main.rs index a98a909..aacd37e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -30,15 +30,15 @@ use ratatui::widgets::{Block, Paragraph, Widget, Wrap}; use app::{App, AppScreen, DrillMode, MilestoneKind, StatusKind}; use engine::skill_tree::{DrillScope, find_key_branch}; -use keyboard::display::key_display_name; -use keyboard::finger::Hand; use event::{AppEvent, EventHandler}; use generator::code_syntax::{code_language_options, is_language_cached, language_by_key}; use generator::passage::{is_book_cached, passage_options}; +use keyboard::display::key_display_name; +use keyboard::finger::Hand; use ui::components::dashboard::Dashboard; use ui::components::keyboard_diagram::KeyboardDiagram; use ui::components::skill_tree::{SkillTreeWidget, detail_line_count, selectable_branches}; -use ui::components::stats_dashboard::StatsDashboard; +use ui::components::stats_dashboard::{AnomalyBigramRow, NgramTabData, StatsDashboard}; use ui::components::stats_sidebar::StatsSidebar; use ui::components::typing_area::TypingArea; use ui::layout::AppLayout; @@ -205,12 +205,18 @@ fn handle_key(app: &mut App, key: KeyEvent) { // Track depressed keys and shift state for keyboard diagram match (&key.code, key.kind) { - (KeyCode::Modifier(ModifierKeyCode::LeftShift | ModifierKeyCode::RightShift), KeyEventKind::Press | KeyEventKind::Repeat) => { + ( + KeyCode::Modifier(ModifierKeyCode::LeftShift | ModifierKeyCode::RightShift), + KeyEventKind::Press | KeyEventKind::Repeat, + ) => { app.shift_held = true; app.last_key_time = Some(Instant::now()); return; // Don't dispatch bare shift presses to screen handlers } - (KeyCode::Modifier(ModifierKeyCode::LeftShift | ModifierKeyCode::RightShift), KeyEventKind::Release) => { + ( + KeyCode::Modifier(ModifierKeyCode::LeftShift | ModifierKeyCode::RightShift), + KeyEventKind::Release, + ) => { app.shift_held = false; return; } @@ -261,6 +267,14 @@ fn handle_key(app: &mut App, key: KeyEvent) { return; } + // Briefly block all input right after a drill completes to avoid accidental + // popup dismissal or continuation from trailing keystrokes. + if app.post_drill_input_lock_remaining_ms().is_some() + && (!app.milestone_queue.is_empty() || app.screen == AppScreen::DrillResult) + { + return; + } + if key.modifiers.contains(KeyModifiers::CONTROL) && key.code == KeyCode::Char('c') { app.should_quit = true; return; @@ -412,7 +426,7 @@ fn handle_result_key(app: &mut App, key: KeyEvent) { } fn handle_stats_key(app: &mut App, key: KeyEvent) { - const STATS_TAB_COUNT: usize = 5; + const STATS_TAB_COUNT: usize = 6; // Confirmation dialog takes priority if app.history_confirm_delete { @@ -452,6 +466,7 @@ fn handle_stats_key(app: &mut App, key: KeyEvent) { KeyCode::Char('3') => app.stats_tab = 2, KeyCode::Char('4') => app.stats_tab = 3, KeyCode::Char('5') => app.stats_tab = 4, + KeyCode::Char('6') => app.stats_tab = 5, KeyCode::Tab => app.stats_tab = (app.stats_tab + 1) % STATS_TAB_COUNT, KeyCode::BackTab => { app.stats_tab = if app.stats_tab == 0 { @@ -472,6 +487,7 @@ fn handle_stats_key(app: &mut App, key: KeyEvent) { KeyCode::Char('3') => app.stats_tab = 2, KeyCode::Char('4') => app.stats_tab = 3, KeyCode::Char('5') => app.stats_tab = 4, + KeyCode::Char('6') => app.stats_tab = 5, KeyCode::Tab => app.stats_tab = (app.stats_tab + 1) % STATS_TAB_COUNT, KeyCode::BackTab => { app.stats_tab = if app.stats_tab == 0 { @@ -528,7 +544,10 @@ fn handle_settings_key(app: &mut App, key: KeyEvent) { } // Priority 4: editing a path field - if app.settings_editing_download_dir || app.settings_editing_export_path || app.settings_editing_import_path { + if app.settings_editing_download_dir + || app.settings_editing_export_path + || app.settings_editing_import_path + { match key.code { KeyCode::Esc => { app.clear_settings_modals(); @@ -579,30 +598,28 @@ fn handle_settings_key(app: &mut App, key: KeyEvent) { app.settings_selected += 1; } } - KeyCode::Enter => { - match app.settings_selected { - 5 | 9 => { - app.clear_settings_modals(); - app.settings_editing_download_dir = true; - } - 7 => app.start_code_downloads_from_settings(), - 11 => app.start_passage_downloads_from_settings(), - 12 => { - app.clear_settings_modals(); - app.settings_editing_export_path = true; - } - 13 => app.export_data(), - 14 => { - app.clear_settings_modals(); - app.settings_editing_import_path = true; - } - 15 => { - app.clear_settings_modals(); - app.settings_confirm_import = true; - } - _ => app.settings_cycle_forward(), + KeyCode::Enter => match app.settings_selected { + 5 | 9 => { + app.clear_settings_modals(); + app.settings_editing_download_dir = true; } - } + 7 => app.start_code_downloads_from_settings(), + 11 => app.start_passage_downloads_from_settings(), + 12 => { + app.clear_settings_modals(); + app.settings_editing_export_path = true; + } + 13 => app.export_data(), + 14 => { + app.clear_settings_modals(); + app.settings_editing_import_path = true; + } + 15 => { + app.clear_settings_modals(); + app.settings_confirm_import = true; + } + _ => app.settings_cycle_forward(), + }, KeyCode::Right | KeyCode::Char('l') => { match app.settings_selected { 5 | 7 | 9 | 11 | 12 | 13 | 14 | 15 => {} // path/button fields @@ -1126,13 +1143,30 @@ fn render_drill(frame: &mut ratatui::Frame, app: &App) { DrillMode::Passage => "Passage (Unranked)", }; + // Compute focus text from stored selection (what generated this drill's text) + let focus_text = if let Some(ref focus) = app.current_focus { + match (&focus.char_focus, &focus.bigram_focus) { + (Some(ch), Some((key, _, _))) => { + format!(" | Focus: '{ch}' + \"{}{}\"", key.0[0], key.0[1]) + } + (Some(ch), None) => format!(" | Focus: '{ch}'"), + (None, Some((key, _, _))) => { + format!(" | Focus: \"{}{}\"", key.0[0], key.0[1]) + } + (None, None) => String::new(), + } + } else { + String::new() + }; + // For medium/narrow: show compact stats in header if !tier.show_sidebar() { let wpm = drill.wpm(); let accuracy = drill.accuracy(); let errors = drill.typo_count(); - let header_text = - format!(" {mode_name} | WPM: {wpm:.0} | Acc: {accuracy:.1}% | Errors: {errors}"); + let header_text = format!( + " {mode_name} | WPM: {wpm:.0} | Acc: {accuracy:.1}% | Err: {errors}{focus_text}" + ); let header = Paragraph::new(Line::from(Span::styled( &*header_text, Style::default() @@ -1144,18 +1178,6 @@ fn render_drill(frame: &mut ratatui::Frame, app: &App) { frame.render_widget(header, app_layout.header); } else { let header_title = format!(" {mode_name} Drill "); - let focus_text = if app.drill_mode == DrillMode::Adaptive { - let focused = app - .skill_tree - .focused_key(app.drill_scope, &app.ranked_key_stats); - if let Some(focused) = focused { - format!(" | Focus: '{focused}'") - } else { - String::new() - } - } else { - String::new() - }; let header = Paragraph::new(Line::from(vec![ Span::styled( &*header_title, @@ -1294,7 +1316,6 @@ fn render_drill(frame: &mut ratatui::Frame, app: &App) { Style::default().fg(colors.text_pending()), ))); frame.render_widget(footer, app_layout.footer); - } } @@ -1422,8 +1443,7 @@ fn render_milestone_overlay( let milestone_key = milestone.keys.first().copied(); let unlocked_keys = app.skill_tree.unlocked_keys(app.drill_scope); let is_shifted = milestone_key.is_some_and(|ch| { - ch.is_ascii_uppercase() - || app.keyboard_model.shifted_to_base(ch).is_some() + ch.is_ascii_uppercase() || app.keyboard_model.shifted_to_base(ch).is_some() }); let kbd = KeyboardDiagram::new( None, @@ -1453,8 +1473,13 @@ fn render_milestone_overlay( let footer_y = inner.y + inner.height.saturating_sub(1); if footer_y < inner.y + inner.height { let footer_area = Rect::new(inner.x, footer_y, inner.width, 1); + let footer_text = if let Some(ms) = app.post_drill_input_lock_remaining_ms() { + format!(" Input temporarily blocked ({ms}ms remaining)") + } else { + " Press any key to continue".to_string() + }; let footer = Paragraph::new(Line::from(Span::styled( - " Press any key to continue", + footer_text, Style::default().fg(colors.text_pending()), ))); frame.render_widget(footer, footer_area); @@ -1477,6 +1502,14 @@ mod review_tests { use crate::session::result::DrillResult; use chrono::{TimeDelta, Utc}; + /// Create an App for testing with the store disabled so tests never + /// read or write the user's real data files. + fn test_app() -> App { + let mut app = App::new(); + app.store = None; + app + } + fn test_result(ts_offset_secs: i64) -> DrillResult { DrillResult { wpm: 60.0, @@ -1497,7 +1530,7 @@ mod review_tests { #[test] fn milestone_overlay_blocks_underlying_input() { - let mut app = App::new(); + let mut app = test_app(); app.screen = AppScreen::Drill; app.drill = Some(crate::session::drill::DrillState::new("abc")); app.milestone_queue @@ -1506,10 +1539,13 @@ mod review_tests { keys: vec!['a'], finger_info: vec![('a', "left pinky".to_string())], message: "msg", - }); + }); let before_cursor = app.drill.as_ref().map(|d| d.cursor).unwrap_or(0); - handle_key(&mut app, KeyEvent::new(KeyCode::Char('x'), KeyModifiers::NONE)); + handle_key( + &mut app, + KeyEvent::new(KeyCode::Char('x'), KeyModifiers::NONE), + ); let after_cursor = app.drill.as_ref().map(|d| d.cursor).unwrap_or(0); assert_eq!(before_cursor, after_cursor); @@ -1518,7 +1554,7 @@ mod review_tests { #[test] fn milestone_queue_chains_before_result_actions() { - let mut app = App::new(); + let mut app = test_app(); app.screen = AppScreen::DrillResult; app.milestone_queue .push_back(crate::app::KeyMilestonePopup { @@ -1535,19 +1571,66 @@ mod review_tests { message: "msg2", }); - handle_key(&mut app, KeyEvent::new(KeyCode::Char('q'), KeyModifiers::NONE)); + handle_key( + &mut app, + KeyEvent::new(KeyCode::Char('q'), KeyModifiers::NONE), + ); assert_eq!(app.screen, AppScreen::DrillResult); assert_eq!(app.milestone_queue.len(), 1); - handle_key(&mut app, KeyEvent::new(KeyCode::Char('q'), KeyModifiers::NONE)); + handle_key( + &mut app, + KeyEvent::new(KeyCode::Char('q'), KeyModifiers::NONE), + ); assert_eq!(app.screen, AppScreen::DrillResult); assert!(app.milestone_queue.is_empty()); // Now normal result action should apply. - handle_key(&mut app, KeyEvent::new(KeyCode::Char('q'), KeyModifiers::NONE)); + handle_key( + &mut app, + KeyEvent::new(KeyCode::Char('q'), KeyModifiers::NONE), + ); assert_eq!(app.screen, AppScreen::Menu); } + #[test] + fn post_drill_lock_blocks_result_shortcuts_temporarily() { + let mut app = test_app(); + app.screen = AppScreen::DrillResult; + app.last_result = Some(test_result(1)); + app.post_drill_input_lock_until = + Some(Instant::now() + std::time::Duration::from_millis(500)); + + handle_key( + &mut app, + KeyEvent::new(KeyCode::Char('c'), KeyModifiers::NONE), + ); + + assert_eq!(app.screen, AppScreen::DrillResult); + } + + #[test] + fn post_drill_lock_blocks_milestone_dismissal_temporarily() { + let mut app = test_app(); + app.screen = AppScreen::DrillResult; + app.milestone_queue + .push_back(crate::app::KeyMilestonePopup { + kind: crate::app::MilestoneKind::Unlock, + keys: vec!['a'], + finger_info: vec![('a', "left pinky".to_string())], + message: "msg", + }); + app.post_drill_input_lock_until = + Some(Instant::now() + std::time::Duration::from_millis(500)); + + handle_key( + &mut app, + KeyEvent::new(KeyCode::Char('x'), KeyModifiers::NONE), + ); + + assert_eq!(app.milestone_queue.len(), 1); + } + #[test] fn overlay_mode_height_boundaries() { assert_eq!(overlay_keyboard_mode(14), 0); @@ -1558,13 +1641,16 @@ mod review_tests { #[test] fn result_delete_shortcut_opens_confirmation_for_latest() { - let mut app = App::new(); + let mut app = test_app(); app.screen = AppScreen::DrillResult; app.last_result = Some(test_result(2)); app.drill_history = vec![test_result(1), test_result(2)]; app.history_selected = 1; - handle_key(&mut app, KeyEvent::new(KeyCode::Char('x'), KeyModifiers::NONE)); + handle_key( + &mut app, + KeyEvent::new(KeyCode::Char('x'), KeyModifiers::NONE), + ); assert!(app.history_confirm_delete); assert_eq!(app.history_selected, 0); @@ -1572,15 +1658,21 @@ mod review_tests { #[test] fn result_delete_confirmation_yes_deletes_latest() { - let mut app = App::new(); + let mut app = test_app(); app.screen = AppScreen::DrillResult; app.last_result = Some(test_result(3)); let older = test_result(1); let newer = test_result(2); app.drill_history = vec![older.clone(), newer.clone()]; - handle_key(&mut app, KeyEvent::new(KeyCode::Char('x'), KeyModifiers::NONE)); - handle_key(&mut app, KeyEvent::new(KeyCode::Char('y'), KeyModifiers::NONE)); + handle_key( + &mut app, + KeyEvent::new(KeyCode::Char('x'), KeyModifiers::NONE), + ); + handle_key( + &mut app, + KeyEvent::new(KeyCode::Char('y'), KeyModifiers::NONE), + ); assert!(!app.history_confirm_delete); assert_eq!(app.drill_history.len(), 1); @@ -1591,13 +1683,19 @@ mod review_tests { #[test] fn result_delete_confirmation_cancel_keeps_history() { - let mut app = App::new(); + let mut app = test_app(); app.screen = AppScreen::DrillResult; app.last_result = Some(test_result(2)); app.drill_history = vec![test_result(1), test_result(2)]; - handle_key(&mut app, KeyEvent::new(KeyCode::Char('x'), KeyModifiers::NONE)); - handle_key(&mut app, KeyEvent::new(KeyCode::Char('n'), KeyModifiers::NONE)); + handle_key( + &mut app, + KeyEvent::new(KeyCode::Char('x'), KeyModifiers::NONE), + ); + handle_key( + &mut app, + KeyEvent::new(KeyCode::Char('n'), KeyModifiers::NONE), + ); assert!(!app.history_confirm_delete); assert_eq!(app.drill_history.len(), 2); @@ -1606,11 +1704,14 @@ mod review_tests { #[test] fn result_continue_shortcuts_start_next_drill() { - let mut app = App::new(); + let mut app = test_app(); app.screen = AppScreen::DrillResult; app.last_result = Some(test_result(2)); - handle_key(&mut app, KeyEvent::new(KeyCode::Char('c'), KeyModifiers::NONE)); + handle_key( + &mut app, + KeyEvent::new(KeyCode::Char('c'), KeyModifiers::NONE), + ); assert_eq!(app.screen, AppScreen::Drill); app.screen = AppScreen::DrillResult; @@ -1627,7 +1728,7 @@ mod review_tests { #[test] fn result_continue_code_uses_last_language_params() { - let mut app = App::new(); + let mut app = test_app(); app.screen = AppScreen::DrillResult; app.last_result = Some(test_result(2)); app.drill_mode = DrillMode::Code; @@ -1635,7 +1736,10 @@ mod review_tests { app.config.code_language = "python".to_string(); app.last_code_drill_language = Some("rust".to_string()); - handle_key(&mut app, KeyEvent::new(KeyCode::Char('c'), KeyModifiers::NONE)); + handle_key( + &mut app, + KeyEvent::new(KeyCode::Char('c'), KeyModifiers::NONE), + ); assert_eq!(app.screen, AppScreen::Drill); assert_eq!(app.drill_mode, DrillMode::Code); @@ -1658,65 +1762,44 @@ mod review_tests { #[test] fn settings_modal_invariant_enter_export_path_clears_others() { - let mut app = App::new(); + let mut app = test_app(); app.screen = AppScreen::Settings; // First, activate import confirmation app.settings_selected = 15; // Import Data - handle_settings_key( - &mut app, - KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE), - ); + handle_settings_key(&mut app, KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)); assert!(app.settings_confirm_import); assert!(modal_edit_count(&app) <= 1); // Cancel it - handle_settings_key( - &mut app, - KeyEvent::new(KeyCode::Esc, KeyModifiers::NONE), - ); + handle_settings_key(&mut app, KeyEvent::new(KeyCode::Esc, KeyModifiers::NONE)); assert!(!app.settings_confirm_import); // Enter export path editing app.settings_selected = 12; // Export Path - handle_settings_key( - &mut app, - KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE), - ); + handle_settings_key(&mut app, KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)); assert!(app.settings_editing_export_path); assert!(modal_edit_count(&app) <= 1); // Esc out - handle_settings_key( - &mut app, - KeyEvent::new(KeyCode::Esc, KeyModifiers::NONE), - ); + handle_settings_key(&mut app, KeyEvent::new(KeyCode::Esc, KeyModifiers::NONE)); assert!(!app.settings_editing_export_path); } #[test] fn settings_modal_invariant_enter_import_path_clears_others() { - let mut app = App::new(); + let mut app = test_app(); app.screen = AppScreen::Settings; // Activate export path editing first app.settings_selected = 12; - handle_settings_key( - &mut app, - KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE), - ); + handle_settings_key(&mut app, KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)); assert!(app.settings_editing_export_path); // Esc out, then enter import path editing - handle_settings_key( - &mut app, - KeyEvent::new(KeyCode::Esc, KeyModifiers::NONE), - ); + handle_settings_key(&mut app, KeyEvent::new(KeyCode::Esc, KeyModifiers::NONE)); app.settings_selected = 14; // Import Path - handle_settings_key( - &mut app, - KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE), - ); + handle_settings_key(&mut app, KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)); assert!(app.settings_editing_import_path); assert!(!app.settings_editing_export_path); assert!(modal_edit_count(&app) <= 1); @@ -1724,15 +1807,12 @@ mod review_tests { #[test] fn settings_confirm_import_dialog_y_n_esc() { - let mut app = App::new(); + let mut app = test_app(); app.screen = AppScreen::Settings; // Trigger import confirmation app.settings_selected = 15; - handle_settings_key( - &mut app, - KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE), - ); + handle_settings_key(&mut app, KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)); assert!(app.settings_confirm_import); // 'n' cancels @@ -1744,23 +1824,17 @@ mod review_tests { // Trigger again app.settings_selected = 15; - handle_settings_key( - &mut app, - KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE), - ); + handle_settings_key(&mut app, KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)); assert!(app.settings_confirm_import); // Esc cancels - handle_settings_key( - &mut app, - KeyEvent::new(KeyCode::Esc, KeyModifiers::NONE), - ); + handle_settings_key(&mut app, KeyEvent::new(KeyCode::Esc, KeyModifiers::NONE)); assert!(!app.settings_confirm_import); } #[test] fn settings_status_message_dismissed_on_keypress() { - let mut app = App::new(); + let mut app = test_app(); app.screen = AppScreen::Settings; // Set a status message @@ -1840,7 +1914,7 @@ mod review_tests { #[test] fn caps_lock_set_from_state_flag() { - let mut app = App::new(); + let mut app = test_app(); assert!(!app.caps_lock); // Modifier event with CAPS_LOCK in state turns it on @@ -1858,7 +1932,7 @@ mod review_tests { #[test] fn caps_lock_not_cleared_by_char_event_with_empty_state() { - let mut app = App::new(); + let mut app = test_app(); app.caps_lock = true; app.screen = AppScreen::Drill; app.drill = Some(crate::session::drill::DrillState::new("abc")); @@ -1873,12 +1947,15 @@ mod review_tests { KeyEventState::NONE, ), ); - assert!(app.caps_lock, "char event with empty state must not clear caps_lock"); + assert!( + app.caps_lock, + "char event with empty state must not clear caps_lock" + ); } #[test] fn caps_lock_cleared_by_modifier_event_without_caps_flag() { - let mut app = App::new(); + let mut app = test_app(); app.caps_lock = true; // Modifier event WITHOUT CAPS_LOCK in state clears it @@ -1891,12 +1968,15 @@ mod review_tests { KeyEventState::NONE, ), ); - assert!(!app.caps_lock, "modifier event without CAPS_LOCK flag should clear caps_lock"); + assert!( + !app.caps_lock, + "modifier event without CAPS_LOCK flag should clear caps_lock" + ); } #[test] fn caps_lock_on_uppercase_char_does_not_set_shift() { - let mut app = App::new(); + let mut app = test_app(); app.caps_lock = true; app.screen = AppScreen::Drill; app.drill = Some(crate::session::drill::DrillState::new("ABC")); @@ -1912,12 +1992,15 @@ mod review_tests { KeyEventState::NONE, ), ); - assert!(!app.shift_held, "uppercase char with caps lock should not set shift_held"); + assert!( + !app.shift_held, + "uppercase char with caps lock should not set shift_held" + ); } #[test] fn caps_lock_on_lowercase_char_with_shift_sets_shift() { - let mut app = App::new(); + let mut app = test_app(); app.caps_lock = true; app.screen = AppScreen::Drill; app.drill = Some(crate::session::drill::DrillState::new("abc")); @@ -1932,12 +2015,15 @@ mod review_tests { KeyEventState::NONE, ), ); - assert!(app.shift_held, "lowercase char with caps+shift should set shift_held"); + assert!( + app.shift_held, + "lowercase char with caps+shift should set shift_held" + ); } #[test] fn caps_lock_off_uppercase_char_with_shift_sets_shift() { - let mut app = App::new(); + let mut app = test_app(); assert!(!app.caps_lock); app.screen = AppScreen::Drill; app.drill = Some(crate::session::drill::DrillState::new("ABC")); @@ -1952,12 +2038,15 @@ mod review_tests { KeyEventState::NONE, ), ); - assert!(app.shift_held, "uppercase char without caps lock should set shift_held"); + assert!( + app.shift_held, + "uppercase char without caps lock should set shift_held" + ); } #[test] fn caps_lock_off_lowercase_char_without_shift_clears_shift() { - let mut app = App::new(); + let mut app = test_app(); app.shift_held = true; app.screen = AppScreen::Drill; app.drill = Some(crate::session::drill::DrillState::new("abc")); @@ -1972,12 +2061,15 @@ mod review_tests { KeyEventState::NONE, ), ); - assert!(!app.shift_held, "lowercase char without shift should clear shift_held"); + assert!( + !app.shift_held, + "lowercase char without shift should clear shift_held" + ); } #[test] fn shift_modifier_press_sets_shift_held() { - let mut app = App::new(); + let mut app = test_app(); handle_key( &mut app, @@ -1993,7 +2085,7 @@ mod review_tests { #[test] fn shift_modifier_release_clears_shift_held() { - let mut app = App::new(); + let mut app = test_app(); app.shift_held = true; handle_key( @@ -2010,17 +2102,20 @@ mod review_tests { #[test] fn depressed_keys_tracks_char_press() { - let mut app = App::new(); + let mut app = test_app(); app.screen = AppScreen::Drill; app.drill = Some(crate::session::drill::DrillState::new("abc")); - handle_key(&mut app, KeyEvent::new(KeyCode::Char('a'), KeyModifiers::NONE)); + handle_key( + &mut app, + KeyEvent::new(KeyCode::Char('a'), KeyModifiers::NONE), + ); assert!(app.depressed_keys.contains(&'a')); } #[test] fn depressed_keys_release_removes_char() { - let mut app = App::new(); + let mut app = test_app(); app.depressed_keys.insert('a'); handle_key( @@ -2037,7 +2132,7 @@ mod review_tests { #[test] fn caps_lock_cleared_by_capslock_key_without_caps_flag() { - let mut app = App::new(); + let mut app = test_app(); app.caps_lock = true; // Pressing CapsLock key to toggle off: event has KeyCode::CapsLock @@ -2051,12 +2146,15 @@ mod review_tests { KeyEventState::NONE, ), ); - assert!(!app.caps_lock, "CapsLock key event without CAPS_LOCK state should clear caps_lock"); + assert!( + !app.caps_lock, + "CapsLock key event without CAPS_LOCK state should clear caps_lock" + ); } #[test] fn caps_lock_non_alpha_char_with_shift_still_sets_shift() { - let mut app = App::new(); + let mut app = test_app(); app.caps_lock = true; app.screen = AppScreen::Drill; app.drill = Some(crate::session::drill::DrillState::new("!@#")); @@ -2072,7 +2170,147 @@ mod review_tests { KeyEventState::NONE, ), ); - assert!(app.shift_held, "non-alpha char with shift should set shift_held regardless of caps"); + assert!( + app.shift_held, + "non-alpha char with shift should set shift_held regardless of caps" + ); + } + + #[test] + fn build_ngram_tab_data_maps_fields_correctly() { + use crate::engine::ngram_stats::{BigramKey, ANOMALY_STREAK_REQUIRED}; + + let mut app = test_app(); + + // Set up char stats with known EMA error rates + for &ch in &['e', 't', 'a', 'o', 'n', 'i'] { + let stat = app.ranked_key_stats.stats.entry(ch).or_default(); + stat.confidence = 0.95; + stat.filtered_time_ms = 360.0; + stat.sample_count = 50; + stat.total_count = 50; + stat.error_rate_ema = 0.03; + } + // Make 'n' weak so we get a focused char + app.ranked_key_stats.stats.get_mut(&'n').unwrap().confidence = 0.5; + app.ranked_key_stats + .stats + .get_mut(&'n') + .unwrap() + .filtered_time_ms = 686.0; + + // Add a confirmed error anomaly bigram + let et_key = BigramKey(['e', 't']); + let stat = app + .ranked_bigram_stats + .stats + .entry(et_key.clone()) + .or_default(); + stat.sample_count = 30; + stat.error_rate_ema = 0.80; + stat.error_anomaly_streak = ANOMALY_STREAK_REQUIRED; + + // Add an unconfirmed anomaly bigram (low samples) + let ao_key = BigramKey(['a', 'o']); + let stat = app + .ranked_bigram_stats + .stats + .entry(ao_key.clone()) + .or_default(); + stat.sample_count = 10; + stat.error_rate_ema = 0.60; + stat.error_anomaly_streak = 1; + + // Add a trigram to verify count + let the_key = crate::engine::ngram_stats::TrigramKey(['t', 'h', 'e']); + app.ranked_trigram_stats + .stats + .entry(the_key) + .or_default() + .sample_count = 5; + + // Set trigram gain history + app.trigram_gain_history.push(0.12); + + // Set drill scope + app.drill_scope = DrillScope::Global; + app.stats_tab = 5; + + let data = build_ngram_tab_data(&app); + + // Verify scope label + assert_eq!(data.scope_label, "Global"); + + // Verify trigram gain + assert_eq!(data.latest_trigram_gain, Some(0.12)); + + // Verify bigram/trigram counts + assert_eq!(data.total_bigrams, app.ranked_bigram_stats.stats.len()); + assert!( + data.total_trigrams >= 1, + "should include at least our test trigram" + ); + + // Verify hesitation threshold + assert!(data.hesitation_threshold_ms >= 800.0); + + // Verify FocusSelection has both char and bigram + assert!(data.focus.char_focus.is_some(), "should have char focus"); + assert!( + data.focus.bigram_focus.is_some(), + "should have bigram focus" + ); + + // Verify error anomaly rows have correct fields populated + if !data.error_anomalies.is_empty() { + let row = &data.error_anomalies[0]; + assert!(row.anomaly_pct > 0.0, "anomaly_pct should be positive"); + assert!(row.sample_count > 0, "sample_count should be positive"); + } + + // Verify 'ao' appears in error anomalies (high error rate, above min samples) + let ao_row = data.error_anomalies.iter().find(|r| r.bigram == "ao"); + if let Some(ao) = ao_row { + assert_eq!(ao.sample_count, 10); + assert!(!ao.confirmed, "ao should not be confirmed (low samples)"); + } + + // Add a speed anomaly bigram and verify speed_anomalies mapping + let ni_key = BigramKey(['n', 'i']); + let stat = app + .ranked_bigram_stats + .stats + .entry(ni_key.clone()) + .or_default(); + stat.sample_count = 25; + stat.error_rate_ema = 0.02; + stat.filtered_time_ms = 600.0; // much slower than char 'i' baseline + stat.speed_anomaly_streak = ANOMALY_STREAK_REQUIRED; + + // Make char 'i' baseline fast enough that 600ms is a big anomaly + app.ranked_key_stats + .stats + .get_mut(&'i') + .unwrap() + .filtered_time_ms = 200.0; + + let data2 = build_ngram_tab_data(&app); + + // Verify speed anomalies contain our bigram with correct field mapping + let ni_row = data2.speed_anomalies.iter().find(|r| r.bigram == "ni"); + assert!(ni_row.is_some(), "ni should appear in speed_anomalies"); + let ni = ni_row.unwrap(); + assert_eq!(ni.sample_count, 25); + assert!(ni.anomaly_pct > 100.0, "600ms / 200ms => 200% anomaly"); + assert!( + (ni.expected_baseline - 200.0).abs() < 1.0, + "expected baseline should be char 'i' speed (200ms), got {}", + ni.expected_baseline + ); + assert!( + ni.confirmed, + "ni should be confirmed (samples >= 20, streak >= required)" + ); } } @@ -2081,7 +2319,7 @@ fn render_result(frame: &mut ratatui::Frame, app: &App) { if let Some(ref result) = app.last_result { let centered = ui::layout::centered_rect(60, 70, area); - let dashboard = Dashboard::new(result, app.theme); + let dashboard = Dashboard::new(result, app.theme, app.post_drill_input_lock_remaining_ms()); frame.render_widget(dashboard, centered); if app.history_confirm_delete && !app.drill_history.is_empty() { @@ -2117,6 +2355,11 @@ fn render_result(frame: &mut ratatui::Frame, app: &App) { fn render_stats(frame: &mut ratatui::Frame, app: &App) { let area = frame.area(); + let ngram_data = if app.stats_tab == 5 { + Some(build_ngram_tab_data(app)) + } else { + None + }; let dashboard = StatsDashboard::new( &app.drill_history, &app.key_stats, @@ -2129,10 +2372,79 @@ fn render_stats(frame: &mut ratatui::Frame, app: &App) { app.history_selected, app.history_confirm_delete, &app.keyboard_model, + ngram_data.as_ref(), ); frame.render_widget(dashboard, area); } +fn build_ngram_tab_data(app: &App) -> NgramTabData { + use engine::ngram_stats::{self, select_focus}; + + let focus = select_focus( + &app.skill_tree, + app.drill_scope, + &app.ranked_key_stats, + &app.ranked_bigram_stats, + ); + + let unlocked = app.skill_tree.unlocked_keys(app.drill_scope); + + let error_anomalies_raw = app + .ranked_bigram_stats + .error_anomaly_bigrams(&app.ranked_key_stats, &unlocked); + let speed_anomalies_raw = app + .ranked_bigram_stats + .speed_anomaly_bigrams(&app.ranked_key_stats, &unlocked); + + let error_anomalies: Vec = error_anomalies_raw + .iter() + .map(|a| AnomalyBigramRow { + bigram: format!("{}{}", a.key.0[0], a.key.0[1]), + anomaly_pct: a.anomaly_pct, + sample_count: a.sample_count, + error_count: a.error_count, + error_rate_ema: a.error_rate_ema, + speed_ms: a.speed_ms, + expected_baseline: a.expected_baseline, + confirmed: a.confirmed, + }) + .collect(); + + let speed_anomalies: Vec = speed_anomalies_raw + .iter() + .map(|a| AnomalyBigramRow { + bigram: format!("{}{}", a.key.0[0], a.key.0[1]), + anomaly_pct: a.anomaly_pct, + sample_count: a.sample_count, + error_count: a.error_count, + error_rate_ema: a.error_rate_ema, + speed_ms: a.speed_ms, + expected_baseline: a.expected_baseline, + confirmed: a.confirmed, + }) + .collect(); + + let scope_label = match app.drill_scope { + DrillScope::Global => "Global".to_string(), + DrillScope::Branch(id) => format!("Branch: {}", id.to_key()), + }; + + let hesitation_threshold_ms = ngram_stats::hesitation_threshold(app.user_median_transition_ms); + + let latest_trigram_gain = app.trigram_gain_history.last().copied(); + + NgramTabData { + focus, + error_anomalies, + speed_anomalies, + total_bigrams: app.ranked_bigram_stats.stats.len(), + total_trigrams: app.ranked_trigram_stats.stats.len(), + hesitation_threshold_ms, + latest_trigram_gain, + scope_label, + } +} + fn render_settings(frame: &mut ratatui::Frame, app: &App) { let area = frame.area(); let colors = &app.theme.colors; @@ -2224,21 +2536,13 @@ fn render_settings(frame: &mut ratatui::Frame, app: &App) { app.settings_export_path.clone(), true, // path field ), - ( - "Export Data".to_string(), - "Export now".to_string(), - false, - ), + ("Export Data".to_string(), "Export now".to_string(), false), ( "Import Path".to_string(), app.settings_import_path.clone(), true, // path field ), - ( - "Import Data".to_string(), - "Import now".to_string(), - false, - ), + ("Import Data".to_string(), "Import now".to_string(), false), ]; let header_height = if inner.height > 0 { 1 } else { 0 }; @@ -2311,11 +2615,11 @@ fn render_settings(frame: &mut ratatui::Frame, app: &App) { colors.text_pending() }); - let is_editing_this_path = is_selected && *is_path && ( - app.settings_editing_download_dir - || app.settings_editing_export_path - || app.settings_editing_import_path - ); + let is_editing_this_path = is_selected + && *is_path + && (app.settings_editing_download_dir + || app.settings_editing_export_path + || app.settings_editing_import_path); let lines = if *is_path { let path_line = if is_editing_this_path { format!(" {value}_") @@ -2346,7 +2650,11 @@ fn render_settings(frame: &mut ratatui::Frame, app: &App) { || app.settings_editing_export_path || app.settings_editing_import_path; let footer_hints: Vec<&str> = if any_path_editing { - vec!["Editing path:", "[Type/Backspace] Modify", "[ESC] Done editing"] + vec![ + "Editing path:", + "[Type/Backspace] Modify", + "[ESC] Done editing", + ] } else { vec![ "[ESC] Save & back", @@ -2635,7 +2943,12 @@ fn render_code_language_select(frame: &mut ratatui::Frame, app: &App) { if let Some(footer) = footer_area { let mut footer_lines: Vec = hint_lines_vec .iter() - .map(|line| Line::from(Span::styled(line.clone(), Style::default().fg(colors.text_pending())))) + .map(|line| { + Line::from(Span::styled( + line.clone(), + Style::default().fg(colors.text_pending()), + )) + }) .collect(); if show_notice { footer_lines.push(Line::from(Span::styled( @@ -2735,7 +3048,12 @@ fn render_passage_book_select(frame: &mut ratatui::Frame, app: &App) { if let Some(footer) = footer_area { let mut footer_lines: Vec = hint_lines_vec .iter() - .map(|line| Line::from(Span::styled(line.clone(), Style::default().fg(colors.text_pending())))) + .map(|line| { + Line::from(Span::styled( + line.clone(), + Style::default().fg(colors.text_pending()), + )) + }) .collect(); if show_notice { footer_lines.push(Line::from(Span::styled( @@ -2907,11 +3225,12 @@ fn render_passage_intro(frame: &mut ratatui::Frame, app: &App) { Paragraph::new(lines).render(content_area, frame.buffer_mut()); if let Some(footer) = footer_area { let mut footer_lines = vec![Line::from("")]; - footer_lines.extend( - hint_lines - .into_iter() - .map(|hint| Line::from(Span::styled(hint, Style::default().fg(colors.text_pending())))), - ); + footer_lines.extend(hint_lines.into_iter().map(|hint| { + Line::from(Span::styled( + hint, + Style::default().fg(colors.text_pending()), + )) + })); Paragraph::new(footer_lines) .wrap(Wrap { trim: false }) .render(footer, frame.buffer_mut()); @@ -3131,11 +3450,12 @@ fn render_code_intro(frame: &mut ratatui::Frame, app: &App) { Paragraph::new(lines).render(content_area, frame.buffer_mut()); if let Some(footer) = footer_area { let mut footer_lines = vec![Line::from("")]; - footer_lines.extend( - hint_lines - .into_iter() - .map(|hint| Line::from(Span::styled(hint, Style::default().fg(colors.text_pending())))), - ); + footer_lines.extend(hint_lines.into_iter().map(|hint| { + Line::from(Span::styled( + hint, + Style::default().fg(colors.text_pending()), + )) + })); Paragraph::new(footer_lines) .wrap(Wrap { trim: false }) .render(footer, frame.buffer_mut()); @@ -3252,7 +3572,7 @@ fn render_keyboard_explorer(frame: &mut ratatui::Frame, app: &App) { .constraints([ Constraint::Length(3), // header Constraint::Length(8), // keyboard diagram - Constraint::Min(3), // detail panel + Constraint::Min(3), // detail panel Constraint::Length(1), // footer ]) .split(area); @@ -3271,8 +3591,7 @@ fn render_keyboard_explorer(frame: &mut ratatui::Frame, app: &App) { Style::default().fg(colors.text_pending()), )), ]; - let header = Paragraph::new(header_lines) - .alignment(ratatui::layout::Alignment::Center); + let header = Paragraph::new(header_lines).alignment(ratatui::layout::Alignment::Center); frame.render_widget(header, layout[0]); // Keyboard diagram @@ -3344,8 +3663,26 @@ fn render_keyboard_detail_panel(frame: &mut ratatui::Frame, app: &App, area: Rec let is_shifted = selected.is_uppercase() || matches!( selected, - '!' | '@' | '#' | '$' | '%' | '^' | '&' | '*' | '(' | ')' | '_' | '+' - | '{' | '}' | '|' | ':' | '"' | '<' | '>' | '?' | '~' + '!' | '@' + | '#' + | '$' + | '%' + | '^' + | '&' + | '*' + | '(' + | ')' + | '_' + | '+' + | '{' + | '}' + | '|' + | ':' + | '"' + | '<' + | '>' + | '?' + | '~' ); let shift_guidance = if is_shifted { if finger.hand == Hand::Left { @@ -3438,8 +3775,14 @@ fn render_keyboard_detail_panel(frame: &mut ratatui::Frame, app: &App, area: Rec } else { right_col.push("Built-in Key".to_string()); } - right_col.push(format!("Unlocked: {}", if is_unlocked { "Yes" } else { "No" })); - right_col.push(format!("In Focus?: {}", if in_focus { "Yes" } else { "No" })); + right_col.push(format!( + "Unlocked: {}", + if is_unlocked { "Yes" } else { "No" } + )); + right_col.push(format!( + "In Focus?: {}", + if in_focus { "Yes" } else { "No" } + )); if is_unlocked { right_col.push(format!("Mastery: {mastery_text}")); } else { diff --git a/src/store/json_store.rs b/src/store/json_store.rs index b3e4cff..29ca5e6 100644 --- a/src/store/json_store.rs +++ b/src/store/json_store.rs @@ -8,7 +8,7 @@ use serde::{Serialize, de::DeserializeOwned}; use crate::config::Config; use crate::store::schema::{ - DrillHistoryData, ExportData, KeyStatsData, ProfileData, EXPORT_VERSION, + DrillHistoryData, EXPORT_VERSION, ExportData, KeyStatsData, ProfileData, }; pub struct JsonStore { @@ -136,9 +136,18 @@ impl JsonStore { let files: Vec<(&str, String)> = vec![ ("profile.json", serde_json::to_string_pretty(&data.profile)?), - ("key_stats.json", serde_json::to_string_pretty(&data.key_stats)?), - ("key_stats_ranked.json", serde_json::to_string_pretty(&data.ranked_key_stats)?), - ("lesson_history.json", serde_json::to_string_pretty(&data.drill_history)?), + ( + "key_stats.json", + serde_json::to_string_pretty(&data.key_stats)?, + ), + ( + "key_stats_ranked.json", + serde_json::to_string_pretty(&data.ranked_key_stats)?, + ), + ( + "lesson_history.json", + serde_json::to_string_pretty(&data.drill_history)?, + ), ]; // Stage phase: write .tmp files @@ -172,9 +181,7 @@ impl JsonStore { let had_original = final_path.exists(); // Back up existing file if it exists - if had_original - && let Err(e) = fs::rename(&final_path, &bak_path) - { + if had_original && let Err(e) = fs::rename(&final_path, &bak_path) { // Rollback: restore already committed files for (committed_final, committed_bak, committed_had) in &committed { if *committed_had { @@ -335,12 +342,19 @@ mod tests { // Now create a store that points to a nonexistent subdir of the same tmpdir // so that staging .tmp writes will fail let bad_dir = _dir.path().join("nonexistent_subdir"); - let bad_store = JsonStore { base_dir: bad_dir.clone() }; + let bad_store = JsonStore { + base_dir: bad_dir.clone(), + }; let config = Config::default(); let export = make_test_export(&config); let result = bad_store.import_all(&export); assert!(result.is_err()); - assert!(result.unwrap_err().to_string().contains("Import failed during staging")); + assert!( + result + .unwrap_err() + .to_string() + .contains("Import failed during staging") + ); // Original file in the real store is unchanged let after_content = fs::read_to_string(store.file_path("profile.json")).unwrap(); @@ -390,5 +404,4 @@ mod tests { // Should have been cleaned up assert!(!store.file_path("profile.json.bak").exists()); } - } diff --git a/src/ui/components/dashboard.rs b/src/ui/components/dashboard.rs index b6f2f74..f6ea3d9 100644 --- a/src/ui/components/dashboard.rs +++ b/src/ui/components/dashboard.rs @@ -10,11 +10,20 @@ use crate::ui::theme::Theme; pub struct Dashboard<'a> { pub result: &'a DrillResult, pub theme: &'a Theme, + pub input_lock_remaining_ms: Option, } impl<'a> Dashboard<'a> { - pub fn new(result: &'a DrillResult, theme: &'a Theme) -> Self { - Self { result, theme } + pub fn new( + result: &'a DrillResult, + theme: &'a Theme, + input_lock_remaining_ms: Option, + ) -> Self { + Self { + result, + theme, + input_lock_remaining_ms, + } } } @@ -114,16 +123,31 @@ impl Widget for Dashboard<'_> { ]); Paragraph::new(chars_line).render(layout[4], buf); - let help = Paragraph::new(Line::from(vec![ - Span::styled( - " [c/Enter/Space] Continue ", - Style::default().fg(colors.accent()), - ), - Span::styled("[r] Retry ", Style::default().fg(colors.accent())), - Span::styled("[q] Menu ", Style::default().fg(colors.accent())), - Span::styled("[s] Stats ", Style::default().fg(colors.accent())), - Span::styled("[x] Delete", Style::default().fg(colors.accent())), - ])); + let help = if let Some(ms) = self.input_lock_remaining_ms { + Paragraph::new(Line::from(vec![ + Span::styled( + " Input temporarily blocked ", + Style::default().fg(colors.warning()), + ), + Span::styled( + format!("({ms}ms remaining)"), + Style::default() + .fg(colors.warning()) + .add_modifier(Modifier::BOLD), + ), + ])) + } else { + Paragraph::new(Line::from(vec![ + Span::styled( + " [c/Enter/Space] Continue ", + Style::default().fg(colors.accent()), + ), + Span::styled("[r] Retry ", Style::default().fg(colors.accent())), + Span::styled("[q] Menu ", Style::default().fg(colors.accent())), + Span::styled("[s] Stats ", Style::default().fg(colors.accent())), + Span::styled("[x] Delete", Style::default().fg(colors.accent())), + ])) + }; help.render(layout[6], buf); } } diff --git a/src/ui/components/keyboard_diagram.rs b/src/ui/components/keyboard_diagram.rs index 585f286..2dc890f 100644 --- a/src/ui/components/keyboard_diagram.rs +++ b/src/ui/components/keyboard_diagram.rs @@ -267,6 +267,22 @@ impl KeyboardDiagram<'_> { } let offsets: &[u16] = &[3, 4, 6]; + let keyboard_width = letter_rows + .iter() + .enumerate() + .map(|(row_idx, row)| { + let offset = offsets.get(row_idx).copied().unwrap_or(0); + let row_end = offset + row.len() as u16 * key_width; + match row_idx { + 0 => row_end + 3, // [B] + 1 => row_end + 3, // [E] + 2 => row_end + 3, // [S] + _ => row_end, + } + }) + .max() + .unwrap_or(0); + let start_x = inner.x + inner.width.saturating_sub(keyboard_width) / 2; for (row_idx, row) in letter_rows.iter().enumerate() { let y = inner.y + row_idx as u16; @@ -283,18 +299,18 @@ impl KeyboardDiagram<'_> { let is_next = self.next_key == Some(TAB); let is_sel = self.is_sentinel_selected(TAB); let style = modifier_key_style(is_dep, is_next, is_sel, colors); - buf.set_string(inner.x, y, "[T]", style); + buf.set_string(start_x, y, "[T]", style); } 2 => { let is_dep = self.shift_held; let style = modifier_key_style(is_dep, false, false, colors); - buf.set_string(inner.x, y, "[S]", style); + buf.set_string(start_x, y, "[S]", style); } _ => {} } for (col_idx, physical_key) in row.iter().enumerate() { - let x = inner.x + offset + col_idx as u16 * key_width; + let x = start_x + offset + col_idx as u16 * key_width; if x + key_width > inner.x + inner.width { break; } @@ -326,7 +342,7 @@ impl KeyboardDiagram<'_> { } // Render trailing modifier key - let row_end_x = inner.x + offset + row.len() as u16 * key_width; + let row_end_x = start_x + offset + row.len() as u16 * key_width; match row_idx { 1 => { if row_end_x + 3 <= inner.x + inner.width { @@ -351,7 +367,7 @@ impl KeyboardDiagram<'_> { // Backspace at end of first row if inner.height >= 3 { let y = inner.y; - let row_end_x = inner.x + offsets[0] + letter_rows[0].len() as u16 * key_width; + let row_end_x = start_x + offsets[0] + letter_rows[0].len() as u16 * key_width; if row_end_x + 3 <= inner.x + inner.width { let is_dep = self.depressed_keys.contains(&BACKSPACE); let is_next = self.next_key == Some(BACKSPACE); @@ -373,6 +389,24 @@ impl KeyboardDiagram<'_> { } let offsets: &[u16] = &[0, 5, 5, 6]; + let keyboard_width = self + .model + .rows + .iter() + .enumerate() + .map(|(row_idx, row)| { + let offset = offsets.get(row_idx).copied().unwrap_or(0); + let row_end = offset + row.len() as u16 * key_width; + match row_idx { + 0 => row_end + 6, // [Bksp] + 2 => row_end + 7, // [Enter] + 3 => row_end + 6, // [Shft] + _ => row_end, + } + }) + .max() + .unwrap_or(0); + let start_x = inner.x + inner.width.saturating_sub(keyboard_width) / 2; for (row_idx, row) in self.model.rows.iter().enumerate() { let y = inner.y + row_idx as u16; @@ -391,7 +425,7 @@ impl KeyboardDiagram<'_> { let is_sel = self.is_sentinel_selected(TAB); let style = modifier_key_style(is_dep, is_next, is_sel, colors); let label = format!("[{}]", display::key_short_label(TAB)); - buf.set_string(inner.x, y, &label, style); + buf.set_string(start_x, y, &label, style); } } 2 => { @@ -401,10 +435,10 @@ impl KeyboardDiagram<'_> { let style = Style::default() .fg(readable_fg(bg, colors.warning())) .bg(bg); - buf.set_string(inner.x, y, "[Cap]", style); + buf.set_string(start_x, y, "[Cap]", style); } else { let style = Style::default().fg(colors.text_pending()).bg(colors.bg()); - buf.set_string(inner.x, y, "[ ]", style); + buf.set_string(start_x, y, "[ ]", style); } } } @@ -412,14 +446,14 @@ impl KeyboardDiagram<'_> { if offset >= 6 { let is_dep = self.shift_held; let style = modifier_key_style(is_dep, false, false, colors); - buf.set_string(inner.x, y, "[Shft]", style); + buf.set_string(start_x, y, "[Shft]", style); } } _ => {} } for (col_idx, physical_key) in row.iter().enumerate() { - let x = inner.x + offset + col_idx as u16 * key_width; + let x = start_x + offset + col_idx as u16 * key_width; if x + key_width > inner.x + inner.width { break; } @@ -451,7 +485,7 @@ impl KeyboardDiagram<'_> { } // Render trailing modifier keys - let after_x = inner.x + offset + row.len() as u16 * key_width; + let after_x = start_x + offset + row.len() as u16 * key_width; match row_idx { 0 => { if after_x + 6 <= inner.x + inner.width { @@ -484,34 +518,13 @@ impl KeyboardDiagram<'_> { } } - // Compute full keyboard width from rendered rows (including trailing modifier keys), - // so the space bar centers relative to the keyboard, not the container. - let keyboard_width = self - .model - .rows - .iter() - .enumerate() - .map(|(row_idx, row)| { - let offset = offsets.get(row_idx).copied().unwrap_or(0); - let row_end = offset + row.len() as u16 * key_width; - match row_idx { - 0 => row_end + 6, // [Bksp] - 2 => row_end + 7, // [Enter] - 3 => row_end + 6, // [Shft] - _ => row_end, - } - }) - .max() - .unwrap_or(0) - .min(inner.width); - // Space bar row (row 4) let space_y = inner.y + 4; if space_y < inner.y + inner.height { let space_name = display::key_display_name(SPACE); let space_label = format!("[ {space_name} ]"); let space_width = space_label.len() as u16; - let space_x = inner.x + (keyboard_width.saturating_sub(space_width)) / 2; + let space_x = start_x + (keyboard_width.saturating_sub(space_width)) / 2; if space_x + space_width <= inner.x + inner.width { let is_dep = self.depressed_keys.contains(&SPACE); let is_next = self.next_key == Some(SPACE); @@ -527,6 +540,16 @@ impl KeyboardDiagram<'_> { let letter_rows = self.model.letter_rows(); let key_width: u16 = 5; let offsets: &[u16] = &[1, 3, 5]; + let keyboard_width = letter_rows + .iter() + .enumerate() + .map(|(row_idx, row)| { + let offset = offsets.get(row_idx).copied().unwrap_or(0); + offset + row.len() as u16 * key_width + }) + .max() + .unwrap_or(0); + let start_x = inner.x + inner.width.saturating_sub(keyboard_width) / 2; if inner.height < 3 || inner.width < 30 { return; @@ -541,7 +564,7 @@ impl KeyboardDiagram<'_> { let offset = offsets.get(row_idx).copied().unwrap_or(0); for (col_idx, physical_key) in row.iter().enumerate() { - let x = inner.x + offset + col_idx as u16 * key_width; + let x = start_x + offset + col_idx as u16 * key_width; if x + key_width > inner.x + inner.width { break; } diff --git a/src/ui/components/skill_tree.rs b/src/ui/components/skill_tree.rs index cc3ad72..fc6e953 100644 --- a/src/ui/components/skill_tree.rs +++ b/src/ui/components/skill_tree.rs @@ -118,8 +118,8 @@ impl Widget for SkillTreeWidget<'_> { let notice_lines = footer_notice .map(|text| wrapped_line_count(text, inner.width as usize)) .unwrap_or(0); - let show_notice = - footer_notice.is_some() && (inner.height as usize >= hint_lines.len() + notice_lines + 8); + let show_notice = footer_notice.is_some() + && (inner.height as usize >= hint_lines.len() + notice_lines + 8); let footer_needed = hint_lines.len() + if show_notice { notice_lines } else { 0 } + 1; let footer_height = footer_needed .min(inner.height.saturating_sub(5) as usize) @@ -161,7 +161,10 @@ impl Widget for SkillTreeWidget<'_> { } } footer_lines.extend(hint_lines.into_iter().map(|line| { - Line::from(Span::styled(line, Style::default().fg(colors.text_pending()))) + Line::from(Span::styled( + line, + Style::default().fg(colors.text_pending()), + )) })); let footer = Paragraph::new(footer_lines).wrap(Wrap { trim: false }); footer.render(layout[3], buf); diff --git a/src/ui/components/stats_dashboard.rs b/src/ui/components/stats_dashboard.rs index 6cb927b..6fc566e 100644 --- a/src/ui/components/stats_dashboard.rs +++ b/src/ui/components/stats_dashboard.rs @@ -6,12 +6,39 @@ use ratatui::widgets::{Block, Clear, Paragraph, Widget}; use std::collections::{BTreeSet, HashMap}; use crate::engine::key_stats::KeyStatsStore; +use crate::engine::ngram_stats::{AnomalyType, FocusSelection}; use crate::keyboard::display::{self, BACKSPACE, ENTER, MODIFIER_SENTINELS, SPACE, TAB}; use crate::keyboard::model::KeyboardModel; use crate::session::result::DrillResult; use crate::ui::components::activity_heatmap::ActivityHeatmap; use crate::ui::theme::Theme; +// --------------------------------------------------------------------------- +// N-grams tab view models +// --------------------------------------------------------------------------- + +pub struct AnomalyBigramRow { + pub bigram: String, + pub anomaly_pct: f64, + pub sample_count: usize, + pub error_count: usize, + pub error_rate_ema: f64, + pub speed_ms: f64, + pub expected_baseline: f64, + pub confirmed: bool, +} + +pub struct NgramTabData { + pub focus: FocusSelection, + pub error_anomalies: Vec, + pub speed_anomalies: Vec, + pub total_bigrams: usize, + pub total_trigrams: usize, + pub hesitation_threshold_ms: f64, + pub latest_trigram_gain: Option, + pub scope_label: String, +} + pub struct StatsDashboard<'a> { pub history: &'a [DrillResult], pub key_stats: &'a KeyStatsStore, @@ -24,6 +51,7 @@ pub struct StatsDashboard<'a> { pub history_selected: usize, pub history_confirm_delete: bool, pub keyboard_model: &'a KeyboardModel, + pub ngram_data: Option<&'a NgramTabData>, } impl<'a> StatsDashboard<'a> { @@ -39,6 +67,7 @@ impl<'a> StatsDashboard<'a> { history_selected: usize, history_confirm_delete: bool, keyboard_model: &'a KeyboardModel, + ngram_data: Option<&'a NgramTabData>, ) -> Self { Self { history, @@ -52,6 +81,7 @@ impl<'a> StatsDashboard<'a> { history_selected, history_confirm_delete, keyboard_model, + ngram_data, } } } @@ -92,6 +122,7 @@ impl Widget for StatsDashboard<'_> { "[3] Activity", "[4] Accuracy", "[5] Timing", + "[6] N-grams", ]; let tab_spans: Vec = tabs .iter() @@ -114,9 +145,9 @@ impl Widget for StatsDashboard<'_> { // Footer let footer_text = if self.active_tab == 1 { - " [ESC] Back [Tab] Next tab [1-5] Switch tab [j/k] Navigate [x] Delete" + " [ESC] Back [Tab] Next tab [1-6] Switch tab [j/k] Navigate [x] Delete" } else { - " [ESC] Back [Tab] Next tab [1-5] Switch tab" + " [ESC] Back [Tab] Next tab [1-6] Switch tab" }; let footer = Paragraph::new(Line::from(Span::styled( footer_text, @@ -163,6 +194,7 @@ impl StatsDashboard<'_> { 2 => self.render_activity_tab(area, buf), 3 => self.render_accuracy_tab(area, buf), 4 => self.render_timing_tab(area, buf), + 5 => self.render_ngram_tab(area, buf), _ => {} } } @@ -692,6 +724,17 @@ impl StatsDashboard<'_> { let show_shifted = inner.height >= 10; // 4 base + 4 shifted + 1 mod row + 1 spare let all_rows = &self.keyboard_model.rows; let offsets: &[u16] = &[0, 2, 3, 4]; + let kbd_width = all_rows + .iter() + .enumerate() + .map(|(i, row)| { + let off = offsets.get(i).copied().unwrap_or(0); + off + row.len() as u16 * key_step + }) + .max() + .unwrap_or(inner.width) + .min(inner.width); + let keyboard_x = inner.x + inner.width.saturating_sub(kbd_width) / 2; for (row_idx, row) in all_rows.iter().enumerate() { let base_y = if show_shifted { @@ -711,7 +754,7 @@ impl StatsDashboard<'_> { let shifted_y = base_y - 1; if shifted_y >= inner.y { for (col_idx, physical_key) in row.iter().enumerate() { - let x = inner.x + offset + col_idx as u16 * key_step; + let x = keyboard_x + offset + col_idx as u16 * key_step; if x + key_width > inner.x + inner.width { break; } @@ -733,7 +776,7 @@ impl StatsDashboard<'_> { // Base row for (col_idx, physical_key) in row.iter().enumerate() { - let x = inner.x + offset + col_idx as u16 * key_step; + let x = keyboard_x + offset + col_idx as u16 * key_step; if x + key_width > inner.x + inner.width { break; } @@ -745,20 +788,8 @@ impl StatsDashboard<'_> { let display = format_accuracy_cell(key, accuracy, key_width); buf.set_string(x, base_y, &display, Style::default().fg(fg_color)); } - } - // Modifier key stats row below the keyboard, spread across keyboard width - let kbd_width = all_rows - .iter() - .enumerate() - .map(|(i, row)| { - let off = offsets.get(i).copied().unwrap_or(0); - off + row.len() as u16 * key_step - }) - .max() - .unwrap_or(inner.width) - .min(inner.width); let mod_y = if show_shifted { inner.y + all_rows.len() as u16 * 2 + 1 } else { @@ -783,7 +814,7 @@ impl StatsDashboard<'_> { let accuracy = self.get_key_accuracy(key); let fg_color = accuracy_color(accuracy, colors); buf.set_string( - inner.x + positions[i], + keyboard_x + positions[i], mod_y, &labels[i], Style::default().fg(fg_color), @@ -848,6 +879,17 @@ impl StatsDashboard<'_> { let show_shifted = inner.height >= 10; // 4 base + 4 shifted + 1 mod row + 1 spare let all_rows = &self.keyboard_model.rows; let offsets: &[u16] = &[0, 2, 3, 4]; + let kbd_width = all_rows + .iter() + .enumerate() + .map(|(i, row)| { + let off = offsets.get(i).copied().unwrap_or(0); + off + row.len() as u16 * key_step + }) + .max() + .unwrap_or(inner.width) + .min(inner.width); + let keyboard_x = inner.x + inner.width.saturating_sub(kbd_width) / 2; for (row_idx, row) in all_rows.iter().enumerate() { let base_y = if show_shifted { @@ -866,7 +908,7 @@ impl StatsDashboard<'_> { let shifted_y = base_y - 1; if shifted_y >= inner.y { for (col_idx, physical_key) in row.iter().enumerate() { - let x = inner.x + offset + col_idx as u16 * key_step; + let x = keyboard_x + offset + col_idx as u16 * key_step; if x + key_width > inner.x + inner.width { break; } @@ -886,7 +928,7 @@ impl StatsDashboard<'_> { } for (col_idx, physical_key) in row.iter().enumerate() { - let x = inner.x + offset + col_idx as u16 * key_step; + let x = keyboard_x + offset + col_idx as u16 * key_step; if x + key_width > inner.x + inner.width { break; } @@ -897,20 +939,8 @@ impl StatsDashboard<'_> { let display = format_timing_cell(key, time_ms, key_width); buf.set_string(x, base_y, &display, Style::default().fg(fg_color)); } - } - // Modifier key stats row below the keyboard, spread across keyboard width - let kbd_width = all_rows - .iter() - .enumerate() - .map(|(i, row)| { - let off = offsets.get(i).copied().unwrap_or(0); - off + row.len() as u16 * key_step - }) - .max() - .unwrap_or(inner.width) - .min(inner.width); let mod_y = if show_shifted { inner.y + all_rows.len() as u16 * 2 + 1 } else { @@ -935,7 +965,7 @@ impl StatsDashboard<'_> { let time_ms = self.get_key_time_ms(key); let fg_color = timing_color(time_ms, colors); buf.set_string( - inner.x + positions[i], + keyboard_x + positions[i], mod_y, &labels[i], Style::default().fg(fg_color), @@ -1261,6 +1291,334 @@ impl StatsDashboard<'_> { Paragraph::new(lines).render(inner, buf); } + + // --- N-grams tab --- + + fn render_ngram_tab(&self, area: Rect, buf: &mut Buffer) { + let colors = &self.theme.colors; + + let data = match self.ngram_data { + Some(d) => d, + None => { + let msg = Paragraph::new(Line::from(Span::styled( + "Complete some adaptive drills to see n-gram data", + Style::default().fg(colors.text_pending()), + ))); + msg.render(area, buf); + return; + } + }; + + let layout = Layout::default() + .direction(Direction::Vertical) + .constraints([ + Constraint::Length(4), // focus box + Constraint::Min(5), // lists + Constraint::Length(2), // summary + ]) + .split(area); + + self.render_ngram_focus(data, layout[0], buf); + + let wide = layout[1].width >= 60; + if wide { + let lists = Layout::default() + .direction(Direction::Horizontal) + .constraints([Constraint::Percentage(50), Constraint::Percentage(50)]) + .split(layout[1]); + self.render_error_anomalies(data, lists[0], buf); + self.render_speed_anomalies(data, lists[1], buf); + } else { + // Stacked vertically for narrow terminals + let available = layout[1].height; + if available < 10 { + // Only show error anomalies if very little space + self.render_error_anomalies(data, layout[1], buf); + } else { + let half = available / 2; + let lists = Layout::default() + .direction(Direction::Vertical) + .constraints([Constraint::Length(half), Constraint::Min(0)]) + .split(layout[1]); + self.render_error_anomalies(data, lists[0], buf); + self.render_speed_anomalies(data, lists[1], buf); + } + } + self.render_ngram_summary(data, layout[2], buf); + } + + fn render_ngram_focus(&self, data: &NgramTabData, area: Rect, buf: &mut Buffer) { + let colors = &self.theme.colors; + + let block = Block::bordered() + .title(Line::from(Span::styled( + " Active Focus ", + Style::default() + .fg(colors.accent()) + .add_modifier(Modifier::BOLD), + ))) + .border_style(Style::default().fg(colors.accent())); + let inner = block.inner(area); + block.render(area, buf); + + if inner.height < 1 { + return; + } + + let mut lines = Vec::new(); + + match (&data.focus.char_focus, &data.focus.bigram_focus) { + (Some(ch), Some((key, anomaly_pct, anomaly_type))) => { + let bigram_label = format!("\"{}{}\"", key.0[0], key.0[1]); + // Line 1: both focuses + lines.push(Line::from(vec![ + Span::styled(" Focus: ", Style::default().fg(colors.fg())), + Span::styled( + format!("Char '{ch}'"), + Style::default() + .fg(colors.focused_key()) + .add_modifier(Modifier::BOLD), + ), + Span::styled(" + ", Style::default().fg(colors.fg())), + Span::styled( + format!("Bigram {bigram_label}"), + Style::default() + .fg(colors.focused_key()) + .add_modifier(Modifier::BOLD), + ), + ])); + // Line 2: details + if inner.height >= 2 { + let type_label = match anomaly_type { + AnomalyType::Error => "error", + AnomalyType::Speed => "speed", + }; + let detail = format!( + " Char '{ch}': weakest key | Bigram {bigram_label}: {type_label} anomaly {anomaly_pct:.0}%" + ); + lines.push(Line::from(Span::styled( + detail, + Style::default().fg(colors.text_pending()), + ))); + } + } + (Some(ch), None) => { + lines.push(Line::from(vec![ + Span::styled(" Focus: ", Style::default().fg(colors.fg())), + Span::styled( + format!("Char '{ch}'"), + Style::default() + .fg(colors.focused_key()) + .add_modifier(Modifier::BOLD), + ), + ])); + if inner.height >= 2 { + lines.push(Line::from(Span::styled( + format!(" Char '{ch}': weakest key, no confirmed bigram anomalies"), + Style::default().fg(colors.text_pending()), + ))); + } + } + (None, Some((key, anomaly_pct, anomaly_type))) => { + let bigram_label = format!("\"{}{}\"", key.0[0], key.0[1]); + let type_label = match anomaly_type { + AnomalyType::Error => "error", + AnomalyType::Speed => "speed", + }; + lines.push(Line::from(vec![ + Span::styled(" Focus: ", Style::default().fg(colors.fg())), + Span::styled( + format!("Bigram {bigram_label}"), + Style::default() + .fg(colors.focused_key()) + .add_modifier(Modifier::BOLD), + ), + Span::styled( + format!(" ({type_label} anomaly: {anomaly_pct:.0}%)"), + Style::default().fg(colors.text_pending()), + ), + ])); + } + (None, None) => { + lines.push(Line::from(Span::styled( + " Complete some adaptive drills to see focus data", + Style::default().fg(colors.text_pending()), + ))); + } + } + + Paragraph::new(lines).render(inner, buf); + } + + fn render_anomaly_panel( + &self, + title: &str, + empty_msg: &str, + rows: &[AnomalyBigramRow], + is_speed: bool, + area: Rect, + buf: &mut Buffer, + ) { + let colors = &self.theme.colors; + + let block = Block::bordered() + .title(Line::from(Span::styled( + title.to_string(), + Style::default() + .fg(colors.accent()) + .add_modifier(Modifier::BOLD), + ))) + .border_style(Style::default().fg(colors.accent())); + let inner = block.inner(area); + block.render(area, buf); + + if inner.height < 1 { + return; + } + + if rows.is_empty() { + buf.set_string( + inner.x, + inner.y, + empty_msg, + Style::default().fg(colors.text_pending()), + ); + return; + } + + let narrow = inner.width < 30; + + // Error table: Bigram Anom% Rate Errors Smp Strk + // Speed table: Bigram Anom% Speed Smp Strk + let header = if narrow { + if is_speed { + " Bgrm Speed Expct Anom%" + } else { + " Bgrm Err Smp Rate Exp Anom%" + } + } else if is_speed { + " Bigram Speed Expect Samples Anom%" + } else { + " Bigram Errors Samples Rate Expect Anom%" + }; + buf.set_string( + inner.x, + inner.y, + header, + Style::default() + .fg(colors.accent()) + .add_modifier(Modifier::BOLD), + ); + + let max_rows = (inner.height as usize).saturating_sub(1); + for (i, row) in rows.iter().take(max_rows).enumerate() { + let y = inner.y + 1 + i as u16; + if y >= inner.y + inner.height { + break; + } + + let line = if narrow { + if is_speed { + format!( + " {:>4} {:>3.0}ms {:>3.0}ms {:>4.0}%", + row.bigram, row.speed_ms, row.expected_baseline, row.anomaly_pct, + ) + } else { + format!( + " {:>4} {:>3} {:>3} {:>3.0}% {:>2.0}% {:>4.0}%", + row.bigram, + row.error_count, + row.sample_count, + row.error_rate_ema * 100.0, + row.expected_baseline * 100.0, + row.anomaly_pct, + ) + } + } else if is_speed { + format!( + " {:>6} {:>4.0}ms {:>4.0}ms {:>5} {:>4.0}%", + row.bigram, + row.speed_ms, + row.expected_baseline, + row.sample_count, + row.anomaly_pct, + ) + } else { + format!( + " {:>6} {:>5} {:>5} {:>4.0}% {:>4.0}% {:>5.0}%", + row.bigram, + row.error_count, + row.sample_count, + row.error_rate_ema * 100.0, + row.expected_baseline * 100.0, + row.anomaly_pct, + ) + }; + + let color = if row.confirmed { + colors.error() + } else { + colors.warning() + }; + + buf.set_string(inner.x, y, &line, Style::default().fg(color)); + } + } + + fn render_error_anomalies(&self, data: &NgramTabData, area: Rect, buf: &mut Buffer) { + let title = format!(" Error Anomalies ({}) ", data.error_anomalies.len()); + self.render_anomaly_panel( + &title, + " No error anomalies detected", + &data.error_anomalies, + false, + area, + buf, + ); + } + + fn render_speed_anomalies(&self, data: &NgramTabData, area: Rect, buf: &mut Buffer) { + let title = format!(" Speed Anomalies ({}) ", data.speed_anomalies.len()); + self.render_anomaly_panel( + &title, + " No speed anomalies detected", + &data.speed_anomalies, + true, + area, + buf, + ); + } + + fn render_ngram_summary(&self, data: &NgramTabData, area: Rect, buf: &mut Buffer) { + let colors = &self.theme.colors; + + let gain_str = match data.latest_trigram_gain { + Some(g) => format!("{:.1}%", g * 100.0), + None => "--".to_string(), + }; + let gain_note = if data.latest_trigram_gain.is_none() { + " (computed every 50 drills)" + } else { + "" + }; + + let line = format!( + " Scope: {} | Bigrams: {} | Trigrams: {} | Hesitation: >{:.0}ms | Tri-gain: {}{}", + data.scope_label, + data.total_bigrams, + data.total_trigrams, + data.hesitation_threshold_ms, + gain_str, + gain_note, + ); + + buf.set_string( + area.x, + area.y, + &line, + Style::default().fg(colors.text_pending()), + ); + } } fn accuracy_color(accuracy: f64, colors: &crate::ui::theme::ThemeColors) -> ratatui::style::Color { @@ -1501,3 +1859,79 @@ fn format_duration(secs: f64) -> String { format!("{s}s") } } + +/// Compute the ngram tab panel layout for the given terminal area. +/// Returns `(wide, lists_area_height)` where: +/// - `wide` = true means side-by-side anomaly panels (width >= 60) +/// - `lists_area_height` = height available for the anomaly panels region +/// +/// When `!wide && lists_area_height < 10`, only error anomalies should render. +#[cfg(test)] +fn ngram_panel_layout(area: Rect) -> (bool, u16) { + let layout = Layout::default() + .direction(Direction::Vertical) + .constraints([ + Constraint::Length(4), // focus box + Constraint::Min(5), // lists + Constraint::Length(2), // summary + ]) + .split(area); + let wide = layout[1].width >= 60; + (wide, layout[1].height) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn narrow_short_terminal_shows_only_error_panel() { + // 50 cols × 15 rows: narrow (<60) so panels stack vertically. + // lists area = 15 - 4 (focus) - 2 (summary) = 9 rows → < 10 → error only. + let area = Rect::new(0, 0, 50, 15); + let (wide, lists_height) = ngram_panel_layout(area); + assert!(!wide, "50 cols should be narrow layout"); + assert!( + lists_height < 10, + "lists_height={lists_height}, expected < 10 so only error panel renders" + ); + } + + #[test] + fn narrow_tall_terminal_stacks_both_panels() { + // 50 cols × 30 rows: narrow (<60) so panels stack vertically. + // lists area = 30 - 4 - 2 = 24 rows → >= 10 → both panels stacked. + let area = Rect::new(0, 0, 50, 30); + let (wide, lists_height) = ngram_panel_layout(area); + assert!(!wide, "50 cols should be narrow layout"); + assert!( + lists_height >= 10, + "lists_height={lists_height}, expected >= 10 so both panels stack vertically" + ); + } + + #[test] + fn wide_terminal_shows_side_by_side_panels() { + // 80 cols × 24 rows: wide (>= 60) so panels render side by side. + let area = Rect::new(0, 0, 80, 24); + let (wide, _) = ngram_panel_layout(area); + assert!( + wide, + "80 cols should be wide layout with side-by-side panels" + ); + } + + #[test] + fn boundary_width_59_is_narrow() { + let area = Rect::new(0, 0, 59, 24); + let (wide, _) = ngram_panel_layout(area); + assert!(!wide, "59 cols should be narrow"); + } + + #[test] + fn boundary_width_60_is_wide() { + let area = Rect::new(0, 0, 60, 24); + let (wide, _) = ngram_panel_layout(area); + assert!(wide, "60 cols should be wide"); + } +} diff --git a/src/ui/components/typing_area.rs b/src/ui/components/typing_area.rs index 39db2ea..84f5889 100644 --- a/src/ui/components/typing_area.rs +++ b/src/ui/components/typing_area.rs @@ -103,7 +103,9 @@ fn contrast_ratio(a: ratatui::style::Color, b: ratatui::style::Color) -> f64 { (hi + 0.05) / (lo + 0.05) } -fn choose_cursor_colors(colors: &crate::ui::theme::ThemeColors) -> (ratatui::style::Color, ratatui::style::Color) { +fn choose_cursor_colors( + colors: &crate::ui::theme::ThemeColors, +) -> (ratatui::style::Color, ratatui::style::Color) { use ratatui::style::Color; let base_bg = colors.bg(); @@ -113,7 +115,13 @@ fn choose_cursor_colors(colors: &crate::ui::theme::ThemeColors) -> (ratatui::sty if contrast_ratio(cursor_bg, base_bg) < 1.8 { let mut best_bg = cursor_bg; let mut best_ratio = contrast_ratio(cursor_bg, base_bg); - for candidate in [colors.accent(), colors.focused_key(), colors.warning(), Color::Black, Color::White] { + for candidate in [ + colors.accent(), + colors.focused_key(), + colors.warning(), + Color::Black, + Color::White, + ] { let ratio = contrast_ratio(candidate, base_bg); if ratio > best_ratio { best_bg = candidate; diff --git a/src/ui/layout.rs b/src/ui/layout.rs index c4f3a15..5d61f28 100644 --- a/src/ui/layout.rs +++ b/src/ui/layout.rs @@ -91,8 +91,12 @@ pub fn centered_rect(percent_x: u16, percent_y: u16, area: Rect) -> Rect { let target_w = requested_w.max(MIN_POPUP_WIDTH).min(area.width); let target_h = requested_h.max(MIN_POPUP_HEIGHT).min(area.height); - let left = area.x.saturating_add((area.width.saturating_sub(target_w)) / 2); - let top = area.y.saturating_add((area.height.saturating_sub(target_h)) / 2); + let left = area + .x + .saturating_add((area.width.saturating_sub(target_w)) / 2); + let top = area + .y + .saturating_add((area.height.saturating_sub(target_h)) / 2); Rect::new(left, top, target_w, target_h) }