N-gram error tracking for adaptive drill selection

2026-02-24 14:55:51 -05:00
parent 0c5a70d5c4
commit e7f57dd497
11 changed files with 2244 additions and 10 deletions
@@ -26,6 +26,12 @@ dependencies = [
 "libc",
 ]

+[[package]]
+name = "anes"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
+
 [[package]]
 name = "anstream"
 version = "0.6.21"
@@ -163,6 +169,12 @@ version = "1.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"

+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+
 [[package]]
 name = "castaway"
 version = "0.2.4"
@@ -208,6 +220,33 @@ dependencies = [
 "windows-link",
 ]

+[[package]]
+name = "ciborium"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
+dependencies = [
+ "ciborium-io",
+ "ciborium-ll",
+ "serde",
+]
+
+[[package]]
+name = "ciborium-io"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
+
+[[package]]
+name = "ciborium-ll"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
+dependencies = [
+ "ciborium-io",
+ "half",
+]
+
 [[package]]
 name = "clap"
 version = "4.5.57"
@@ -302,6 +341,67 @@ dependencies = [
 "libc",
 ]

+[[package]]
+name = "criterion"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
+dependencies = [
+ "anes",
+ "cast",
+ "ciborium",
+ "clap",
+ "criterion-plot",
+ "is-terminal",
+ "itertools 0.10.5",
+ "num-traits",
+ "once_cell",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
+dependencies = [
+ "cast",
+ "itertools 0.10.5",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
 [[package]]
 name = "crossterm"
 version = "0.28.1"
@@ -345,6 +445,12 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "crunchy"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
+
 [[package]]
 name = "crypto-common"
 version = "0.1.7"
@@ -710,6 +816,17 @@ dependencies = [
 "tracing",
 ]

+[[package]]
+name = "half"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+ "zerocopy",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.16.1"
@@ -727,6 +844,12 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"

+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
 [[package]]
 name = "hex"
 version = "0.4.3"
@@ -1031,12 +1154,32 @@ dependencies = [
 "serde",
 ]

+[[package]]
+name = "is-terminal"
+version = "0.4.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "is_terminal_polyfill"
 version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"

+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itertools"
 version = "0.14.0"
@@ -1080,6 +1223,7 @@ dependencies = [
 "anyhow",
 "chrono",
 "clap",
+ "criterion",
 "crossterm 0.28.1",
 "dirs",
 "rand",
@@ -1320,6 +1464,12 @@ version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"

+[[package]]
+name = "oorandom"
+version = "11.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
+
 [[package]]
 name = "openssl"
 version = "0.10.75"
@@ -1521,6 +1671,34 @@ version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"

+[[package]]
+name = "plotters"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
+dependencies = [
+ "plotters-backend",
+]
+
 [[package]]
 name = "portable-atomic"
 version = "1.13.1"
@@ -1629,7 +1807,7 @@ dependencies = [
 "compact_str",
 "hashbrown",
 "indoc",
- "itertools",
+ "itertools 0.14.0",
 "kasuari",
 "lru",
 "strum",
@@ -1682,7 +1860,7 @@ dependencies = [
 "hashbrown",
 "indoc",
 "instability",
- "itertools",
+ "itertools 0.14.0",
 "line-clipping",
 "ratatui-core",
 "strum",
@@ -1691,6 +1869,26 @@ dependencies = [
 "unicode-width",
 ]

+[[package]]
+name = "rayon"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
 [[package]]
 name = "redox_syscall"
 version = "0.5.18"
@@ -2358,6 +2556,16 @@ dependencies = [
 "zerovec",
 ]

+[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "tokio"
 version = "1.49.0"
@@ -2546,7 +2754,7 @@ version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "16b380a1238663e5f8a691f9039c73e1cdae598a30e9855f541d29b08b53e9a5"
 dependencies = [
- "itertools",
+ "itertools 0.14.0",
 "unicode-segmentation",
 "unicode-width",
 ]
@@ -21,6 +21,11 @@ reqwest = { version = "0.12", features = ["blocking"], optional = true }

 [dev-dependencies]
 tempfile = "3"
+criterion = { version = "0.5", features = ["html_reports"] }
+
+[[bench]]
+name = "ngram_benchmarks"
+harness = false

 [features]
 default = ["network"]
@@ -0,0 +1,144 @@
+use criterion::{Criterion, black_box, criterion_group, criterion_main};
+
+use keydr::engine::key_stats::KeyStatsStore;
+use keydr::engine::ngram_stats::{
+    BigramKey, BigramStatsStore, TrigramStatsStore, extract_ngram_events,
+};
+use keydr::session::result::KeyTime;
+
+fn make_keystrokes(count: usize) -> Vec<KeyTime> {
+    let chars = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'];
+    (0..count)
+        .map(|i| KeyTime {
+            key: chars[i % chars.len()],
+            time_ms: 200.0 + (i % 50) as f64,
+            correct: i % 7 != 0, // ~14% error rate
+        })
+        .collect()
+}
+
+fn bench_extraction(c: &mut Criterion) {
+    let keystrokes = make_keystrokes(500);
+
+    c.bench_function("extract_ngram_events (500 keystrokes)", |b| {
+        b.iter(|| {
+            extract_ngram_events(black_box(&keystrokes), 800.0)
+        })
+    });
+}
+
+fn bench_update(c: &mut Criterion) {
+    let keystrokes = make_keystrokes(500);
+    let (bigram_events, _) = extract_ngram_events(&keystrokes, 800.0);
+
+    c.bench_function("bigram_stats update (400 events)", |b| {
+        b.iter(|| {
+            let mut store = BigramStatsStore::default();
+            for ev in bigram_events.iter().take(400) {
+                store.update(
+                    black_box(ev.key.clone()),
+                    black_box(ev.total_time_ms),
+                    black_box(ev.correct),
+                    black_box(ev.has_hesitation),
+                    0,
+                );
+            }
+            store
+        })
+    });
+}
+
+fn bench_focus_selection(c: &mut Criterion) {
+    // Use a-z + A-Z + 0-9 = 62 chars for up to 3844 unique bigrams
+    let all_chars: Vec<char> = ('a'..='z').chain('A'..='Z').chain('0'..='9').collect();
+
+    let mut bigram_stats = BigramStatsStore::default();
+    let mut char_stats = KeyStatsStore::default();
+
+    for &ch in &all_chars {
+        let stat = char_stats.stats.entry(ch).or_default();
+        stat.confidence = 0.8;
+        stat.filtered_time_ms = 430.0;
+        stat.sample_count = 50;
+        stat.total_count = 50;
+        stat.error_count = 3;
+    }
+
+    let mut count: usize = 0;
+    for &a in &all_chars {
+        for &b in &all_chars {
+            if bigram_stats.stats.len() >= 3000 {
+                break;
+            }
+            let key = BigramKey([a, b]);
+            let stat = bigram_stats.stats.entry(key).or_default();
+            stat.confidence = 0.5 + (count % 50) as f64 * 0.01;
+            stat.sample_count = 25 + count % 30;
+            stat.error_count = 5 + count % 10;
+            stat.redundancy_streak = if count % 3 == 0 { 3 } else { 1 };
+            count += 1;
+        }
+    }
+    assert_eq!(bigram_stats.stats.len(), 3000);
+
+    let unlocked: Vec<char> = all_chars;
+
+    c.bench_function("weakest_bigram (3K entries)", |b| {
+        b.iter(|| {
+            bigram_stats.weakest_bigram(black_box(&char_stats), black_box(&unlocked))
+        })
+    });
+}
+
+fn bench_history_replay(c: &mut Criterion) {
+    // Build 500 drills of ~300 keystrokes each
+    let drills: Vec<Vec<KeyTime>> = (0..500)
+        .map(|_| make_keystrokes(300))
+        .collect();
+
+    c.bench_function("history replay (500 drills x 300 keystrokes)", |b| {
+        b.iter(|| {
+            let mut bigram_stats = BigramStatsStore::default();
+            let mut trigram_stats = TrigramStatsStore::default();
+            let mut key_stats = KeyStatsStore::default();
+
+            for (drill_idx, keystrokes) in drills.iter().enumerate() {
+                let (bigram_events, trigram_events) =
+                    extract_ngram_events(keystrokes, 800.0);
+
+                for kt in keystrokes {
+                    if kt.correct {
+                        let stat = key_stats.stats.entry(kt.key).or_default();
+                        stat.total_count += 1;
+                    } else {
+                        key_stats.update_key_error(kt.key);
+                    }
+                }
+
+                for ev in &bigram_events {
+                    bigram_stats.update(
+                        ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation,
+                        drill_idx as u32,
+                    );
+                }
+                for ev in &trigram_events {
+                    trigram_stats.update(
+                        ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation,
+                        drill_idx as u32,
+                    );
+                }
+            }
+
+            (bigram_stats, trigram_stats, key_stats)
+        })
+    });
+}
+
+criterion_group!(
+    benches,
+    bench_extraction,
+    bench_update,
+    bench_focus_selection,
+    bench_history_replay,
+);
+criterion_main!(benches);
@@ -0,0 +1,338 @@
+# N-gram Error Tracking for Adaptive Drill Selection
+
+## Context
+
+keydr currently tracks typing errors at the single-character level only. The adaptive algorithm picks the weakest character by confidence score and biases drill text to include words containing that character. This misses **transition difficulties** -- sequences where individual characters are easy but the combination is hard (e.g., same-finger bigrams, awkward hand transitions). Research strongly supports that these transition effects are real and distinct from single-character difficulty.
+
+**Goal:** Add bigram (n=2) and trigram (n=3) error tracking, with a redundancy detection formula that distinguishes genuine transition difficulties from errors that are just proxies for single-character weakness. Integrate problematic bigrams into the adaptive drill selection pipeline. Trigrams are tracked for observation only and not used for drill generation until empirically proven useful.
+
+---
+
+## Research Summary
+
+1. **N-gram tracking is genuinely novel** -- No existing typing tutor does comprehensive n-gram *error* tracking with adaptive drill selection.
+
+2. **Bigrams capture real, distinct information** -- The 136M Keystrokes study (Dhakal et al., CHI 2018) found letter pairs typed by different hands are more predictive of speed than character repetitions. This cannot be inferred from single-char data.
+
+3. **Motor chunking is real** -- The motor cortex plans keystrokes in chunks, not individually. Single-character optimization misses this.
+
+4. **Bigrams are the sweet spot** -- Nearly all keyboard layout research focuses on bigrams. Trigrams likely offer diminishing returns.
+
+---
+
+## Core Innovation: Redundancy Detection
+
+The key question: "Is a high-error bigram just a proxy for a high-error character?"
+
+### Error Rate Estimation (Laplace-smoothed)
+
+Raw error rates are unstable at low sample counts. All error rates use Laplace smoothing:
+
+```
+smoothed_error_rate(errors, samples) = (errors + 1) / (samples + 2)
+```
+
+This gives a Bayesian prior of 50% error rate that gets pulled toward the true rate as samples accumulate. At 10 samples with 3 errors, this yields 0.333 instead of raw 0.3 -- a small correction. At 2 samples with 1 error, it yields 0.5 instead of raw 0.5 -- stabilizing the estimate.
+
+### Bigram Redundancy Formula
+
+For bigram "ab" with characters `a` and `b`:
+
+```
+e_a = smoothed_error_rate(char_a.errors, char_a.samples)
+e_b = smoothed_error_rate(char_b.errors, char_b.samples)
+e_ab = smoothed_error_rate(bigram_ab.errors, bigram_ab.samples)
+
+expected_ab = 1.0 - (1.0 - e_a) * (1.0 - e_b)
+redundancy_ab = e_ab / max(expected_ab, 0.01)
+```
+
+### Trigram Redundancy Formula
+
+For trigram "abc", redundancy is computed against BOTH individual chars AND constituent bigrams:
+
+```
+// Expected from chars alone (independence assumption)
+expected_from_chars = 1.0 - (1.0 - e_a) * (1.0 - e_b) * (1.0 - e_c)
+
+// Expected from bigrams (takes the max -- if either bigram explains the error, no trigram signal)
+expected_from_bigrams = max(e_ab, e_bc)
+
+// Use the higher expectation (harder to exceed = more conservative)
+expected_abc = max(expected_from_chars, expected_from_bigrams)
+redundancy_abc = e_abc / max(expected_abc, 0.01)
+```
+
+This ensures trigrams only flag as informative when NEITHER the individual characters NOR constituent bigrams explain the difficulty.
+
+### Focus Eligibility (Stability-Gated)
+
+An n-gram becomes eligible for focus only when ALL conditions hold:
+
+1. `sample_count >= 20` -- minimum statistical reliability
+2. `redundancy > 1.5` -- genuine transition difficulty, not a proxy
+3. `redundancy_stable == true` -- the redundancy score has been > 1.5 for the last 3 consecutive update checks (prevents focus flapping from noisy estimates)
+
+The **difficulty score** for ranking eligible n-grams:
+
+```
+ngram_difficulty = (1.0 - confidence) * redundancy
+```
+
+### Worked Examples
+
+**Example 1 -- Proxy (should NOT focus):** User struggles with 's'. `e_s = 0.25`, `e_i = 0.03`. Expected bigram "is" error: `1 - 0.75 * 0.97 = 0.273`. Observed "is" error: `0.28`. Redundancy: `0.28 / 0.273 = 1.03`. This is ~1.0, confirming "is" errors are just 's' errors. Not eligible.
+
+**Example 2 -- Genuine difficulty (should focus):** User is fine with 'e' and 'd' individually. `e_e = 0.04`, `e_d = 0.05`. Expected "ed" error: `1 - 0.96 * 0.95 = 0.088`. Observed "ed" error: `0.22`. Redundancy: `0.22 / 0.088 = 2.5`. This exceeds 1.5 -- the "ed" transition is genuinely hard. Eligible for focus.
+
+**Example 3 -- Trigram vs bigram:** `e_t = 0.03`, `e_h = 0.04`, `e_e = 0.04`. Bigram `e_th = 0.15` (genuine difficulty). Expected trigram "the" from chars: `0.107`. Expected from bigrams: `max(0.15, 0.04) = 0.15`. Observed "the" error: `0.16`. Redundancy: `0.16 / 0.15 = 1.07`. Not significant -- the "th" bigram already explains the trigram difficulty. Trigram NOT eligible.
+
+---
+
+## Confidence Scale
+
+`NgramStat.confidence` uses the same formula as `KeyStat.confidence`:
+
+```
+target_time_ms = 60000.0 / target_cpm    // 342.86ms at 175 CPM
+confidence = target_time_ms / filtered_time_ms
+```
+
+- `confidence < 1.0`: Slower than target (needs practice)
+- `confidence == 1.0`: Exactly at target speed
+- `confidence > 1.0`: Faster than target (mastered)
+
+For n-grams, `target_time_ms` scales linearly with order: a bigram target is `2 * single_char_target`, a trigram target is `3 * single_char_target`. This is approximate but consistent.
+
+---
+
+## Hesitation Tracking
+
+Hesitations indicate cognitive uncertainty even when the correct key is pressed. The threshold is **relative to the user's rolling baseline**:
+
+```
+hesitation_threshold = max(800.0, 2.5 * user_median_transition_ms)
+```
+
+Where `user_median_transition_ms` is the median of the user's last 200 inter-keystroke intervals across all drills. The 800ms absolute floor prevents the threshold from being too low for fast typists. The 2.5x multiplier flags transitions that are notably slower than the user's norm.
+
+`user_median_transition_ms` is stored as a single rolling value on the App struct, updated from `per_key_times` after each drill.
+
+---
+
+## N-gram Key Representation
+
+N-gram keys use typed arrays instead of strings to avoid encoding/canonicalization issues:
+
+```rust
+#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub struct BigramKey(pub [char; 2]);
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub struct TrigramKey(pub [char; 3]);
+```
+
+**Normalization rules** (applied at extraction boundary in `extract_ngram_events`):
+- All characters are Unicode scalar values (Rust `char`) -- no grapheme cluster handling needed since the app only supports ASCII typing
+- No case folding -- 'A' and 'a' are distinct (they require different motor actions: shift+a vs a)
+- Punctuation is included (transitions to/from punctuation are legitimate motor sequences)
+- BACKSPACE characters are filtered out before windowing
+- Space characters split windows (no cross-word-boundary n-grams)
+
+---
+
+## Implementation
+
+### Phase 1: Core Data Structures & Extraction
+
+**New file: `src/engine/ngram_stats.rs`**
+
+- `BigramKey(pub [char; 2])` and `TrigramKey(pub [char; 3])` -- typed keys with Hash/Eq/Serialize
+- `NgramStat` struct:
+  - `filtered_time_ms: f64` -- EMA-smoothed transition time (alpha=0.1)
+  - `best_time_ms: f64` -- personal best EMA time
+  - `confidence: f64` -- `(target_time_ms * order) / filtered_time_ms`
+  - `sample_count: usize` -- total observations
+  - `error_count: usize` -- total errors (mistype or hesitation)
+  - `hesitation_count: usize` -- total hesitations specifically
+  - `recent_times: Vec<f64>` -- last 30 observations
+  - `recent_correct: Vec<bool>` -- last 30 correctness values
+  - `redundancy_streak: u8` -- consecutive updates where redundancy > 1.5 (for stability gate, max 255)
+- `BigramStatsStore` -- `HashMap<BigramKey, NgramStat>` (concrete, not generic)
+  - `update(&mut self, key: BigramKey, time_ms: f64, correct: bool, hesitation: bool)`
+  - `get_confidence(&self, key: &BigramKey) -> f64`
+  - `smoothed_error_rate(&self, key: &BigramKey) -> f64` -- Laplace-smoothed
+  - `redundancy_score(&self, key: &BigramKey, char_stats: &KeyStatsStore) -> f64`
+  - `weakest_bigram(&self, char_stats: &KeyStatsStore, unlocked: &[char]) -> Option<(BigramKey, f64)>` -- stability-gated
+- `TrigramStatsStore` -- `HashMap<TrigramKey, NgramStat>` (concrete, not generic)
+  - Same update/query methods as BigramStatsStore
+  - `prune(&mut self, max_entries: usize)` -- composite utility pruning (see below)
+- Internal: shared helper functions/trait for the common EMA update logic to avoid duplication between bigram and trigram stores
+- `BigramEvent` / `TrigramEvent` structs -- `{ key, total_time_ms, correct, has_hesitation }`
+- `extract_ngram_events(per_key_times: &[KeyTime], hesitation_threshold: f64) -> (Vec<BigramEvent>, Vec<TrigramEvent>)` -- single pass, returns both orders
+- `FocusTarget` enum -- `Char(char) | Bigram(BigramKey)` -- lives in `src/engine/ngram_stats.rs`, re-exported from `src/engine/mod.rs`
+
+**Note:** `KeyStatsStore` needs a new method `smoothed_error_rate(key: char) -> f64` to provide Laplace-smoothed error rates. This requires adding `error_count` to `KeyStat`. Currently `KeyStat` only tracks timing for correct keystrokes -- we need to also count errors. Add `error_count: usize` and `total_count: usize` fields to `KeyStat`, increment in `update_key()`. Use `#[serde(default)]` for backward compat on deserialization.
+
+**Modify: `src/engine/key_stats.rs`** (additive)
+- Add `error_count: usize` and `total_count: usize` to `KeyStat` with `#[serde(default)]`
+- Add `update_key_error(&mut self, key: char)` -- increments error/total counts without updating timing
+- Add `smoothed_error_rate(&self, key: char) -> f64` -- Laplace-smoothed
+
+**Modify: `src/engine/mod.rs`** (additive) -- add `pub mod ngram_stats`, re-export `FocusTarget`
+
+**Extraction detail:** For bigram "th", transition time = `window[1].time_ms`. For trigram "the", transition time = `window[1].time_ms + window[2].time_ms`. The first element's `time_ms` is the transition FROM the previous character and is NOT part of this n-gram.
+
+### Phase 2: Persistence (Replay-Only, No Caching)
+
+**Architecture:** `drill_history` (lesson_history.json) is the **sole source of truth**. N-gram stats are **always rebuilt from drill history** on startup. There are no separate n-gram cache files in this initial implementation. This eliminates all cache coherency concerns at the cost of ~200-500ms startup replay. Caching can be added later as an optimization if rebuild latency becomes problematic.
+
+**Modify: `src/store/schema.rs`** (additive)
+- Add concrete `BigramStatsData { stats: BigramStatsStore }` with Default impl
+- Add concrete `TrigramStatsData { stats: TrigramStatsStore }` with Default impl
+- These types are used for export/import serialization only, not for runtime caching
+
+**Modify: `src/app.rs`** (additive + modify existing)
+- Add 4 fields to `App`: `bigram_stats`, `ranked_bigram_stats`, `trigram_stats`, `ranked_trigram_stats`
+- Add `user_median_transition_ms: f64` and `transition_buffer: Vec<f64>` (rolling last 200 intervals)
+- On startup: rebuild all n-gram stats + hesitation baseline by replaying `drill_history`
+- `save_data()`: no n-gram files to save (stats are always derived)
+
+**Trigram pruning:** Max 5,000 entries. Prune by composite utility score after history replay:
+```
+utility = recency_weight * (1.0 / (drills_since_last_seen + 1))
+        + signal_weight * redundancy_score.min(3.0)
+        + data_weight * (sample_count as f64).ln()
+```
+Where `recency_weight=0.3`, `signal_weight=0.5`, `data_weight=0.2`. Entries with highest utility are kept. This preserves rare-but-informative trigrams over frequent-but-noisy ones.
+
+### Phase 3: Drill Integration
+
+**Modify: `src/app.rs` -- `finish_drill()`** (modify existing, after line 847)
+- Compute `hesitation_threshold = max(800.0, 2.5 * self.user_median_transition_ms)`
+- Call `extract_ngram_events(&result.per_key_times, hesitation_threshold)`
+- Update `bigram_stats` and `trigram_stats` with each event
+- For incorrect keystrokes: also call `self.key_stats.update_key_error(kt.key)` to build char-level error counts
+- Same pattern for ranked stats in the ranked block (after line 854)
+- Update `transition_buffer` and recompute `user_median_transition_ms`
+
+**Modify: `src/app.rs` -- `finish_partial_drill()`** -- same pattern
+
+**Hesitation baseline rebuild:** During startup history replay, also accumulate transition times into `transition_buffer` to rebuild `user_median_transition_ms`. This ensures the hesitation threshold is consistent across restarts.
+
+### Phase 4: Adaptive Focus Selection (Bigram Only)
+
+The focus pipeline uses a **thin adapter at the App boundary** rather than changing generator signatures directly. This minimizes cross-cutting risk.
+
+**Modify: `src/app.rs` -- `generate_text()`** (modify existing, line 628)
+
+```rust
+// Adapter: compute focus target, then decompose into existing generator knobs
+let focus_target = select_focus_target(
+    &self.skill_tree, scope, &self.ranked_key_stats, &self.ranked_bigram_stats
+);
+
+let (focused_char, focused_bigram) = match &focus_target {
+    FocusTarget::Char(ch) => (Some(*ch), None),
+    FocusTarget::Bigram(key) => (Some(key.0[0]), Some(key.clone())),
+};
+
+// Existing generators use focused_char unchanged
+let mut text = generator.generate(&filter, lowercase_focused_char, word_count);
+// ... existing capitalize/punctuate/numbers pipeline unchanged ...
+
+// After all generation: if bigram focus, swap some words for bigram-containing words
+if let Some(ref bigram) = focused_bigram {
+    text = self.apply_bigram_focus(&text, &filter, bigram);
+}
+```
+
+**New method on `App`: `apply_bigram_focus()`**
+- Scans generated words, replaces up to 40% with dictionary words containing the target bigram
+- Only replaces when suitable alternatives exist and pass the CharFilter
+- Maintains word count and approximate text length
+- **Diversity cap:** No more than 3 consecutive bigram-focused words to prevent repetitive feel
+
+This approach keeps ALL existing generator APIs unchanged. If the adapter proves insufficient (e.g., bigram-focused words are too rare in dictionary), we can widen generator APIs in a follow-up.
+
+**Focus selection logic** (new function `select_focus_target()` in `src/engine/ngram_stats.rs`):
+1. Compute weakest single character via existing `focused_key()`
+2. Compute weakest eligible bigram via `weakest_bigram()` (stability-gated: sample >= 20, redundancy > 1.5 for 3 consecutive checks)
+3. If bigram `ngram_difficulty > char_difficulty * 0.8`, focus on bigram
+4. Otherwise, fall back to single-char focus
+
+### Phase 5: Information Gain Analysis (Trigram Observation)
+
+**Add to `src/engine/ngram_stats.rs`:**
+
+```rust
+pub fn trigram_marginal_gain(
+    trigram_stats: &TrigramStatsStore,
+    bigram_stats: &BigramStatsStore,
+    char_stats: &KeyStatsStore,
+) -> f64
+```
+
+Computes what fraction of trigrams with >= 20 samples have `redundancy > 1.5` vs their constituent bigrams. Returns a value in `[0.0, 1.0]`.
+
+- Called every 50 drills, result logged to a `trigram_gain_history: Vec<f64>` on the App
+- If the most recent 3 measurements all show gain > 10%, trigrams could be promoted to active focus (future work)
+- This metric is primarily for analysis -- it answers "are trigrams adding value beyond bigrams for this user?"
+
+### Phase 6: Export/Import
+
+**Modify: `src/store/schema.rs`** (additive) -- add n-gram fields to `ExportData` with `#[serde(default)]`
+**Modify: `src/store/json_store.rs`** (additive) -- update `export_all()` to serialize n-gram stats from memory; `import_all()` imports them into drill_history replay pipeline
+
+---
+
+## Performance Budgets
+
+| Operation | Budget | Notes |
+|-----------|--------|-------|
+| N-gram extraction per drill | < 1ms | Linear scan of ~200-500 keystrokes |
+| Stats update per drill | < 1ms | ~400 bigram + ~300 trigram hash map inserts |
+| Focus selection | < 5ms | Iterate all bigrams (~2K), filter + rank |
+| History replay (full rebuild) | < 500ms | Replay 500 drills x extraction + update (fixture: 500 drills, 300 keystrokes each) |
+| Memory for n-gram stores | < 5MB | ~3K bigrams + 5K trigrams x ~200 bytes each |
+
+Benchmark tests enforce extraction (<1ms for 500 keystrokes), update (<1ms for 400 events), and focus selection (<5ms for 3K bigrams) budgets.
+
+---
+
+## Files Summary
+
+| File | Action | Breaking? | What Changes |
+|------|--------|-----------|-------------|
+| `src/engine/ngram_stats.rs` | **New** | No | All n-gram structs, extraction, redundancy formula, FocusTarget, focus selection |
+| `src/engine/mod.rs` | Modify | No (additive) | Add `pub mod ngram_stats`, re-export `FocusTarget` |
+| `src/engine/key_stats.rs` | Modify | No (additive) | Add `error_count`/`total_count` to `KeyStat` with `#[serde(default)]`, add `smoothed_error_rate()` |
+| `src/store/schema.rs` | Modify | No (additive) | `BigramStatsData`/`TrigramStatsData` types, `ExportData` update with `#[serde(default)]` |
+| `src/store/json_store.rs` | Modify | No (additive) | Export/import n-gram data |
+| `src/app.rs` | Modify | No (internal) | App fields, `finish_drill()` n-gram extraction, `generate_text()` adapter + `apply_bigram_focus()`, startup replay |
+| `src/generator/dictionary.rs` | Unchanged | - | Existing `find_matching` used as-is via adapter |
+| `src/generator/phonetic.rs` | Unchanged | - | Existing API used as-is via adapter |
+
+---
+
+## Verification
+
+1. **Unit tests** for `extract_ngram_events` -- verify bigram/trigram extraction from known keystroke sequences, BACKSPACE filtering, space-boundary skipping, hesitation detection at threshold boundary
+2. **Unit tests** for `redundancy_score` -- the 3 worked examples above as test cases, plus edge cases (zero samples, all errors, no errors)
+3. **Unit tests** for Laplace smoothing -- verify convergence behavior at low and high sample counts
+4. **Unit tests** for stability gate -- verify `redundancy_streak` increments/resets correctly, focus eligibility requires 3 consecutive hits
+5. **Deterministic integration tests** for focus selection -- seed `SmallRng` with fixed seed, verify tie-breaking behavior between char and bigram focus, verify fallback when no bigrams are eligible
+6. **Regression test** -- verify existing single-character focus works unchanged when no bigrams have sufficient samples (cold start path)
+7. **Benchmark tests** (non-blocking, `#[bench]` or criterion):
+   - Extraction: < 1ms for 500 `KeyTime` entries
+   - Update: < 1ms for 400 bigram events
+   - Focus selection: < 5ms for 3,000 bigram entries
+   - History replay: < 500ms for 500 drills of 300 keystrokes each
+8. **Manual test** -- deliberately mistype a specific bigram repeatedly, verify it becomes the focus target and subsequent drills contain words with that bigram
+
+## Future Considerations (Not in Scope)
+
+- **N-gram cache files** for faster startup if replay latency becomes problematic (hybrid append-only cursor approach)
+- **Per-order empirical confidence targets** instead of linear scaling (calibrate from user data, log diagnostics)
+- **Bigram placement control** in phonetic generator (prefix/medial/suffix weighting) if adapter approach proves insufficient
+- **Trigram-driven focus** if marginal gain metric consistently shows > 10% incremental value
@@ -11,6 +11,11 @@ use rand::rngs::SmallRng;
 use crate::config::Config;
 use crate::engine::filter::CharFilter;
 use crate::engine::key_stats::KeyStatsStore;
+use crate::engine::FocusTarget;
+use crate::engine::ngram_stats::{
+    self, BigramKey, BigramStatsStore, TrigramStatsStore, extract_ngram_events,
+    select_focus_target,
+};
 use crate::engine::scoring;
 use crate::engine::skill_tree::{BranchId, BranchStatus, DrillScope, SkillTree};
 use crate::generator::TextGenerator;
@@ -35,7 +40,7 @@ use crate::keyboard::display::BACKSPACE;

 use crate::session::drill::DrillState;
 use crate::session::input::{self, KeystrokeEvent};
-use crate::session::result::DrillResult;
+use crate::session::result::{DrillResult, KeyTime};
 use crate::store::json_store::JsonStore;
 use crate::store::schema::{DrillHistoryData, ExportData, KeyStatsData, ProfileData, EXPORT_VERSION};
 use crate::ui::components::menu::Menu;
@@ -260,6 +265,13 @@ pub struct App {
    pub keyboard_explorer_selected: Option<char>,
    pub explorer_accuracy_cache_overall: Option<(char, usize, usize)>,
    pub explorer_accuracy_cache_ranked: Option<(char, usize, usize)>,
+    pub bigram_stats: BigramStatsStore,
+    pub ranked_bigram_stats: BigramStatsStore,
+    pub trigram_stats: TrigramStatsStore,
+    pub ranked_trigram_stats: TrigramStatsStore,
+    pub user_median_transition_ms: f64,
+    pub transition_buffer: Vec<f64>,
+    pub trigram_gain_history: Vec<f64>,
    rng: SmallRng,
    transition_table: TransitionTable,
    #[allow(dead_code)]
@@ -402,6 +414,13 @@ impl App {
            keyboard_explorer_selected: None,
            explorer_accuracy_cache_overall: None,
            explorer_accuracy_cache_ranked: None,
+            bigram_stats: BigramStatsStore::default(),
+            ranked_bigram_stats: BigramStatsStore::default(),
+            trigram_stats: TrigramStatsStore::default(),
+            ranked_trigram_stats: TrigramStatsStore::default(),
+            user_median_transition_ms: 0.0,
+            transition_buffer: Vec::new(),
+            trigram_gain_history: Vec::new(),
            rng: SmallRng::from_entropy(),
            transition_table,
            dictionary,
@@ -419,6 +438,9 @@ impl App {
            });
        }

+        // Rebuild n-gram stats from drill history
+        app.rebuild_ngram_stats();
+
        app.start_drill();
        app
    }
@@ -591,6 +613,9 @@ impl App {
        self.skill_tree = SkillTree::new(self.profile.skill_tree.clone());
        self.keyboard_model = KeyboardModel::from_name(&self.config.keyboard_layout);

+        // Rebuild n-gram stats from imported drill history
+        self.rebuild_ngram_stats();
+
        // Check theme availability
        let theme_name = self.config.theme.clone();
        let loaded_theme = Theme::load(&theme_name).unwrap_or_default();
@@ -633,7 +658,18 @@ impl App {
            DrillMode::Adaptive => {
                let scope = self.drill_scope;
                let all_keys = self.skill_tree.unlocked_keys(scope);
-                let focused = self.skill_tree.focused_key(scope, &self.ranked_key_stats);
+
+                // Select focus target: single char or bigram
+                let focus_target = select_focus_target(
+                    &self.skill_tree,
+                    scope,
+                    &self.ranked_key_stats,
+                    &self.ranked_bigram_stats,
+                );
+                let (focused_char, focused_bigram) = match &focus_target {
+                    FocusTarget::Char(ch) => (Some(*ch), None),
+                    FocusTarget::Bigram(key) => (Some(key.0[0]), Some(key.clone())),
+                };

                // Generate base lowercase text using only lowercase keys from scope
                let lowercase_keys: Vec<char> = all_keys
@@ -643,7 +679,7 @@ impl App {
                    .collect();
                let filter = CharFilter::new(lowercase_keys);
                // Only pass focused to phonetic generator if it's a lowercase letter
-                let lowercase_focused = focused.filter(|ch| ch.is_ascii_lowercase());
+                let lowercase_focused = focused_char.filter(|ch| ch.is_ascii_lowercase());
                let table = self.transition_table.clone();
                let dict = Dictionary::load();
                let rng = SmallRng::from_rng(&mut self.rng).unwrap();
@@ -658,7 +694,7 @@ impl App {
                    .collect();
                if !cap_keys.is_empty() {
                    let mut rng = SmallRng::from_rng(&mut self.rng).unwrap();
-                    text = capitalize::apply_capitalization(&text, &cap_keys, focused, &mut rng);
+                    text = capitalize::apply_capitalization(&text, &cap_keys, focused_char, &mut rng);
                }

                // Apply punctuation if punctuation keys are in scope
@@ -674,7 +710,7 @@ impl App {
                    .collect();
                if !punct_keys.is_empty() {
                    let mut rng = SmallRng::from_rng(&mut self.rng).unwrap();
-                    text = punctuate::apply_punctuation(&text, &punct_keys, focused, &mut rng);
+                    text = punctuate::apply_punctuation(&text, &punct_keys, focused_char, &mut rng);
                }

                // Apply numbers if digit keys are in scope
@@ -686,7 +722,7 @@ impl App {
                if !digit_keys.is_empty() {
                    let has_dot = all_keys.contains(&'.');
                    let mut rng = SmallRng::from_rng(&mut self.rng).unwrap();
-                    text = numbers::apply_numbers(&text, &digit_keys, has_dot, focused, &mut rng);
+                    text = numbers::apply_numbers(&text, &digit_keys, has_dot, focused_char, &mut rng);
                }

                // Apply code symbols only if this drill is for the CodeSymbols branch,
@@ -734,7 +770,7 @@ impl App {
                        text = code_patterns::apply_code_symbols(
                            &text,
                            &symbol_keys,
-                            focused,
+                            focused_char,
                            &mut rng,
                        );
                    }
@@ -745,6 +781,11 @@ impl App {
                    text = insert_line_breaks(&text);
                }

+                // After all generation: if bigram focus, swap some words for bigram-containing words
+                if let Some(ref bigram) = focused_bigram {
+                    text = self.apply_bigram_focus(&text, &filter, bigram);
+                }
+
                (text, None)
            }
            DrillMode::Code => {
@@ -843,15 +884,39 @@ impl App {
            for kt in &result.per_key_times {
                if kt.correct {
                    self.key_stats.update_key(kt.key, kt.time_ms);
+                } else {
+                    self.key_stats.update_key_error(kt.key);
                }
            }

+            // Extract and update n-gram stats for all drill modes
+            let drill_index = self.drill_history.len() as u32;
+            let hesitation_thresh = ngram_stats::hesitation_threshold(self.user_median_transition_ms);
+            let (bigram_events, trigram_events) =
+                extract_ngram_events(&result.per_key_times, hesitation_thresh);
+            for ev in &bigram_events {
+                self.bigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index);
+                self.bigram_stats.update_redundancy_streak(&ev.key, &self.key_stats);
+            }
+            for ev in &trigram_events {
+                self.trigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index);
+            }
+
            if ranked {
                for kt in &result.per_key_times {
                    if kt.correct {
                        self.ranked_key_stats.update_key(kt.key, kt.time_ms);
+                    } else {
+                        self.ranked_key_stats.update_key_error(kt.key);
                    }
                }
+                for ev in &bigram_events {
+                    self.ranked_bigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index);
+                    self.ranked_bigram_stats.update_redundancy_streak(&ev.key, &self.ranked_key_stats);
+                }
+                for ev in &trigram_events {
+                    self.ranked_trigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index);
+                }
                let update = self
                    .skill_tree
                    .update(&self.ranked_key_stats, before_stats.as_ref());
@@ -919,6 +984,19 @@ impl App {
                self.profile.last_practice_date = Some(today);
            }

+            // Update transition buffer for hesitation baseline
+            self.update_transition_buffer(&result.per_key_times);
+
+            // Periodic trigram marginal gain analysis (every 50 drills)
+            if self.profile.total_drills % 50 == 0 && self.profile.total_drills > 0 {
+                let gain = ngram_stats::trigram_marginal_gain(
+                    &self.ranked_trigram_stats,
+                    &self.ranked_bigram_stats,
+                    &self.ranked_key_stats,
+                );
+                self.trigram_gain_history.push(gain);
+            }
+
            self.drill_history.push(result.clone());
            if self.drill_history.len() > 500 {
                self.drill_history.remove(0);
@@ -951,9 +1029,27 @@ impl App {
            for kt in &result.per_key_times {
                if kt.correct {
                    self.key_stats.update_key(kt.key, kt.time_ms);
+                } else {
+                    self.key_stats.update_key_error(kt.key);
                }
            }

+            // Extract and update n-gram stats
+            let drill_index = self.drill_history.len() as u32;
+            let hesitation_thresh = ngram_stats::hesitation_threshold(self.user_median_transition_ms);
+            let (bigram_events, trigram_events) =
+                extract_ngram_events(&result.per_key_times, hesitation_thresh);
+            for ev in &bigram_events {
+                self.bigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index);
+                self.bigram_stats.update_redundancy_streak(&ev.key, &self.key_stats);
+            }
+            for ev in &trigram_events {
+                self.trigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index);
+            }
+
+            // Update transition buffer for hesitation baseline
+            self.update_transition_buffer(&result.per_key_times);
+
            self.drill_history.push(result.clone());
            if self.drill_history.len() > 500 {
                self.drill_history.remove(0);
@@ -983,6 +1079,177 @@ impl App {
        }
    }

+    /// Replace up to 40% of words with dictionary words containing the target bigram.
+    /// No more than 3 consecutive bigram-focused words to prevent repetitive feel.
+    fn apply_bigram_focus(&mut self, text: &str, filter: &CharFilter, bigram: &BigramKey) -> String {
+        let bigram_str: String = bigram.0.iter().collect();
+        let words: Vec<&str> = text.split(' ').collect();
+        if words.is_empty() {
+            return text.to_string();
+        }
+
+        // Find dictionary words that contain the bigram and pass the filter
+        let dict = Dictionary::load();
+        let candidates: Vec<&str> = dict
+            .find_matching(filter, None)
+            .into_iter()
+            .filter(|w| w.contains(&bigram_str))
+            .collect();
+
+        if candidates.is_empty() {
+            return text.to_string();
+        }
+
+        let max_replacements = (words.len() * 2 + 4) / 5; // ~40%
+        let mut replaced = 0;
+        let mut consecutive = 0;
+        let mut result_words: Vec<String> = Vec::with_capacity(words.len());
+
+        for word in &words {
+            let already_has = word.contains(&bigram_str);
+            if already_has {
+                consecutive += 1;
+                result_words.push(word.to_string());
+                continue;
+            }
+
+            if replaced < max_replacements && consecutive < 3 {
+                let candidate = candidates[self.rng.gen_range(0..candidates.len())];
+                result_words.push(candidate.to_string());
+                replaced += 1;
+                consecutive += 1;
+            } else {
+                consecutive = 0;
+                result_words.push(word.to_string());
+            }
+        }
+
+        result_words.join(" ")
+    }
+
+    /// Update the rolling transition buffer with new inter-keystroke intervals.
+    fn update_transition_buffer(&mut self, per_key_times: &[KeyTime]) {
+        for kt in per_key_times {
+            if kt.key == BACKSPACE {
+                continue;
+            }
+            self.transition_buffer.push(kt.time_ms);
+        }
+        // Keep only last 200 entries
+        if self.transition_buffer.len() > 200 {
+            let excess = self.transition_buffer.len() - 200;
+            self.transition_buffer.drain(..excess);
+        }
+        // Recompute median
+        let mut buf = self.transition_buffer.clone();
+        self.user_median_transition_ms = ngram_stats::compute_median(&mut buf);
+    }
+
+    /// Rebuild all n-gram stats and char-level error/total counts from drill history.
+    /// This is the sole source of truth for error_count/total_count on KeyStat
+    /// and all n-gram stores. Timing EMA on KeyStat is NOT touched here
+    /// (it is either loaded from disk or rebuilt by `rebuild_from_history`).
+    fn rebuild_ngram_stats(&mut self) {
+        // Reset n-gram stores
+        self.bigram_stats = BigramStatsStore::default();
+        self.bigram_stats.target_cpm = self.config.target_cpm();
+        self.ranked_bigram_stats = BigramStatsStore::default();
+        self.ranked_bigram_stats.target_cpm = self.config.target_cpm();
+        self.trigram_stats = TrigramStatsStore::default();
+        self.trigram_stats.target_cpm = self.config.target_cpm();
+        self.ranked_trigram_stats = TrigramStatsStore::default();
+        self.ranked_trigram_stats.target_cpm = self.config.target_cpm();
+        self.transition_buffer.clear();
+        self.user_median_transition_ms = 0.0;
+
+        // Reset char-level error/total counts (timing fields are untouched)
+        for stat in self.key_stats.stats.values_mut() {
+            stat.error_count = 0;
+            stat.total_count = 0;
+        }
+        for stat in self.ranked_key_stats.stats.values_mut() {
+            stat.error_count = 0;
+            stat.total_count = 0;
+        }
+
+        // Take drill_history out temporarily to avoid borrow conflict
+        let history = std::mem::take(&mut self.drill_history);
+
+        for (drill_index, result) in history.iter().enumerate() {
+            let hesitation_thresh = ngram_stats::hesitation_threshold(self.user_median_transition_ms);
+            let (bigram_events, trigram_events) =
+                extract_ngram_events(&result.per_key_times, hesitation_thresh);
+
+            // Rebuild char-level error/total counts from history
+            for kt in &result.per_key_times {
+                if kt.correct {
+                    let stat = self.key_stats.stats.entry(kt.key).or_default();
+                    stat.total_count += 1;
+                } else {
+                    self.key_stats.update_key_error(kt.key);
+                }
+            }
+
+            for ev in &bigram_events {
+                self.bigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index as u32);
+                self.bigram_stats.update_redundancy_streak(&ev.key, &self.key_stats);
+            }
+            for ev in &trigram_events {
+                self.trigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index as u32);
+            }
+
+            if result.ranked {
+                for kt in &result.per_key_times {
+                    if kt.correct {
+                        let stat = self.ranked_key_stats.stats.entry(kt.key).or_default();
+                        stat.total_count += 1;
+                    } else {
+                        self.ranked_key_stats.update_key_error(kt.key);
+                    }
+                }
+                for ev in &bigram_events {
+                    self.ranked_bigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index as u32);
+                    self.ranked_bigram_stats.update_redundancy_streak(&ev.key, &self.ranked_key_stats);
+                }
+                for ev in &trigram_events {
+                    self.ranked_trigram_stats.update(ev.key.clone(), ev.total_time_ms, ev.correct, ev.has_hesitation, drill_index as u32);
+                }
+            }
+
+            // Update transition buffer
+            for kt in &result.per_key_times {
+                if kt.key != BACKSPACE {
+                    self.transition_buffer.push(kt.time_ms);
+                }
+            }
+            if self.transition_buffer.len() > 200 {
+                let excess = self.transition_buffer.len() - 200;
+                self.transition_buffer.drain(..excess);
+            }
+            let mut buf = self.transition_buffer.clone();
+            self.user_median_transition_ms = ngram_stats::compute_median(&mut buf);
+        }
+
+        // Put drill_history back
+        self.drill_history = history;
+
+        // Prune trigrams — use drill_history.len() as total, matching the drill_index
+        // space used in last_seen_drill_index above (history position, includes partials)
+        let total_history_entries = self.drill_history.len() as u32;
+        self.trigram_stats.prune(
+            ngram_stats::MAX_TRIGRAMS,
+            total_history_entries,
+            &self.bigram_stats,
+            &self.key_stats,
+        );
+        self.ranked_trigram_stats.prune(
+            ngram_stats::MAX_TRIGRAMS,
+            total_history_entries,
+            &self.ranked_bigram_stats,
+            &self.ranked_key_stats,
+        );
+    }
+
    pub fn retry_drill(&mut self) {
        if let Some(ref drill) = self.drill {
            let text: String = drill.target.iter().collect();
@@ -1111,6 +1378,9 @@ impl App {
        }

        self.profile.skill_tree = self.skill_tree.progress.clone();
+
+        // Rebuild n-gram stats from the replayed history
+        self.rebuild_ngram_stats();
    }

    pub fn go_to_skill_tree(&mut self) {
@@ -11,6 +11,10 @@ pub struct KeyStat {
    pub confidence: f64,
    pub sample_count: usize,
    pub recent_times: Vec<f64>,
+    #[serde(default)]
+    pub error_count: usize,
+    #[serde(default)]
+    pub total_count: usize,
 }

 impl Default for KeyStat {
@@ -21,6 +25,8 @@ impl Default for KeyStat {
            confidence: 0.0,
            sample_count: 0,
            recent_times: Vec::new(),
+            error_count: 0,
+            total_count: 0,
        }
    }
 }
@@ -44,6 +50,7 @@ impl KeyStatsStore {
    pub fn update_key(&mut self, key: char, time_ms: f64) {
        let stat = self.stats.entry(key).or_default();
        stat.sample_count += 1;
+        stat.total_count += 1;

        if stat.sample_count == 1 {
            stat.filtered_time_ms = time_ms;
@@ -70,6 +77,22 @@ impl KeyStatsStore {
    pub fn get_stat(&self, key: char) -> Option<&KeyStat> {
        self.stats.get(&key)
    }
+
+    /// Record an error for a key (increments error_count and total_count).
+    /// Does NOT update timing/confidence (those are only updated for correct strokes).
+    pub fn update_key_error(&mut self, key: char) {
+        let stat = self.stats.entry(key).or_default();
+        stat.error_count += 1;
+        stat.total_count += 1;
+    }
+
+    /// Laplace-smoothed error rate: (errors + 1) / (total + 2).
+    pub fn smoothed_error_rate(&self, key: char) -> f64 {
+        match self.stats.get(&key) {
+            Some(s) => (s.error_count as f64 + 1.0) / (s.total_count as f64 + 2.0),
+            None => 0.5, // (0 + 1) / (0 + 2) = 0.5
+        }
+    }
 }

 #[cfg(test)]
@@ -1,5 +1,8 @@
 pub mod filter;
 pub mod key_stats;
 pub mod learning_rate;
+pub mod ngram_stats;
 pub mod scoring;
 pub mod skill_tree;
+
+pub use ngram_stats::FocusTarget;
@@ -0,0 +1,18 @@
+// Library target exists solely for criterion benchmarks.
+// The binary entry point is main.rs; this file re-declares the module tree so
+// that bench harnesses can import types via `keydr::engine::*` / `keydr::session::*`.
+// Most code is only exercised through the binary, so suppress dead_code warnings.
+#![allow(dead_code)]
+
+// Public: used directly by benchmarks
+pub mod engine;
+pub mod session;
+
+// Private: required transitively by engine/session (won't compile without them)
+mod app;
+mod config;
+mod event;
+mod generator;
+mod keyboard;
+mod store;
+mod ui;
@@ -101,6 +101,7 @@ impl JsonStore {
    }

    /// Bundle all persisted data + config into an ExportData struct.
+    /// N-gram stats are not included — they are always rebuilt from drill history.
    pub fn export_all(&self, config: &Config) -> ExportData {
        let profile = self.load_profile().unwrap_or_default();
        let key_stats = self.load_key_stats();
@@ -74,6 +74,9 @@ impl Default for DrillHistoryData {

 pub const EXPORT_VERSION: u32 = 1;

+/// Export contract: drill_history is the sole source of truth for n-gram stats.
+/// N-gram data is always rebuilt from history on import/startup, so it is not
+/// included in the export payload.
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct ExportData {
    pub keydr_export_version: u32,