From 232f93e05465a5953a22ea97e9f11ffddefe20a4 Mon Sep 17 00:00:00 2001 From: Tyler Hallada Date: Sat, 11 Apr 2026 00:11:45 +0000 Subject: [PATCH] Remove trigram anamoly calculation Decided that this wasn't worth it and bigram anamolies are enough. --- benches/ngram_benchmarks.rs | 20 +- locales/cs.yml | 3 - locales/da.yml | 3 - locales/de.yml | 3 - locales/en.yml | 3 - locales/es.yml | 3 - locales/et.yml | 3 - locales/fi.yml | 3 - locales/fr.yml | 3 - locales/hr.yml | 3 - locales/hu.yml | 3 - locales/it.yml | 3 - locales/lt.yml | 3 - locales/lv.yml | 3 - locales/nb.yml | 3 - locales/nl.yml | 3 - locales/pl.yml | 3 - locales/pt.yml | 3 - locales/ro.yml | 3 - locales/sl.yml | 3 - locales/sv.yml | 3 - locales/tr.yml | 3 - src/app.rs | 90 +------- src/engine/ngram_stats.rs | 299 ++------------------------- src/main.rs | 24 +-- src/ui/components/stats_dashboard.rs | 16 +- 26 files changed, 23 insertions(+), 489 deletions(-) diff --git a/benches/ngram_benchmarks.rs b/benches/ngram_benchmarks.rs index 07ccd15..78affee 100644 --- a/benches/ngram_benchmarks.rs +++ b/benches/ngram_benchmarks.rs @@ -2,7 +2,7 @@ use criterion::{Criterion, black_box, criterion_group, criterion_main}; use keydr::engine::key_stats::KeyStatsStore; use keydr::engine::ngram_stats::{ - BigramKey, BigramStatsStore, TrigramStatsStore, extract_ngram_events, + BigramKey, BigramStatsStore, extract_ngram_events, }; use keydr::session::result::KeyTime; @@ -20,14 +20,14 @@ fn make_keystrokes(count: usize) -> Vec { fn bench_extraction(c: &mut Criterion) { let keystrokes = make_keystrokes(500); - c.bench_function("extract_ngram_events (500 keystrokes)", |b| { + c.bench_function("extract_bigrams (500 keystrokes)", |b| { b.iter(|| extract_ngram_events(black_box(&keystrokes), 800.0)) }); } fn bench_update(c: &mut Criterion) { let keystrokes = make_keystrokes(500); - let (bigram_events, _) = extract_ngram_events(&keystrokes, 800.0); + let bigram_events = extract_ngram_events(&keystrokes, 800.0); c.bench_function("bigram_stats update (400 events)", |b| { b.iter(|| { @@ -93,11 +93,10 @@ fn bench_history_replay(c: &mut Criterion) { c.bench_function("history replay (500 drills x 300 keystrokes)", |b| { b.iter(|| { let mut bigram_stats = BigramStatsStore::default(); - let mut trigram_stats = TrigramStatsStore::default(); let mut key_stats = KeyStatsStore::default(); for (drill_idx, keystrokes) in drills.iter().enumerate() { - let (bigram_events, trigram_events) = extract_ngram_events(keystrokes, 800.0); + let bigram_events = extract_ngram_events(keystrokes, 800.0); for kt in keystrokes { if kt.correct { @@ -117,18 +116,9 @@ fn bench_history_replay(c: &mut Criterion) { drill_idx as u32, ); } - for ev in &trigram_events { - trigram_stats.update( - ev.key.clone(), - ev.total_time_ms, - ev.correct, - ev.has_hesitation, - drill_idx as u32, - ); - } } - (bigram_stats, trigram_stats, key_stats) + (bigram_stats, key_stats) }) }); } diff --git a/locales/cs.yml b/locales/cs.yml index 5c6848d..3fb4faa 100644 --- a/locales/cs.yml +++ b/locales/cs.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Nebyly detekovany anomalie rychlosti' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Vah: >%{ms}ms' - gain_label: ' | Zisk: %{value}' - gain_interval: ' (kazdych 50)' focus_char_value: 'Znak ''%{ch}''' # Activity heatmap diff --git a/locales/da.yml b/locales/da.yml index 45e239d..0801ec5 100644 --- a/locales/da.yml +++ b/locales/da.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Ingen hastigheds-anomalier opdaget' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Hes: >%{ms}ms' - gain_label: ' | Gevinst: %{value}' - gain_interval: ' (hver 50.)' focus_char_value: 'Tegn ''%{ch}''' # Activity heatmap diff --git a/locales/de.yml b/locales/de.yml index 5540ae6..657fe21 100644 --- a/locales/de.yml +++ b/locales/de.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Keine Tempo-Anomalien erkannt' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Hes: >%{ms}ms' - gain_label: ' | Gewinn: %{value}' - gain_interval: ' (alle 50)' focus_char_value: 'Zeichen ''%{ch}''' # Activity heatmap diff --git a/locales/en.yml b/locales/en.yml index 447913a..6b3424b 100644 --- a/locales/en.yml +++ b/locales/en.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' No speed anomalies detected' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Hes: >%{ms}ms' - gain_label: ' | Gain: %{value}' - gain_interval: ' (every 50)' focus_char_value: 'Char ''%{ch}''' # Activity heatmap diff --git a/locales/es.yml b/locales/es.yml index bd63f77..5ff49ce 100644 --- a/locales/es.yml +++ b/locales/es.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' No se detectaron anomalías de velocidad' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Hes: >%{ms}ms' - gain_label: ' | Gan: %{value}' - gain_interval: ' (cada 50)' focus_char_value: 'Carácter ''%{ch}''' # Mapa de actividad diff --git a/locales/et.yml b/locales/et.yml index 4ca7eab..303dff5 100644 --- a/locales/et.yml +++ b/locales/et.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Kiiruse anomaaliaid ei tuvastatud' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Kõhk: >%{ms}ms' - gain_label: ' | Kasv: %{value}' - gain_interval: ' (iga 50)' focus_char_value: 'Märk ''%{ch}''' # Aktiivsuse soojuskaart diff --git a/locales/fi.yml b/locales/fi.yml index 1377eec..9075a6c 100644 --- a/locales/fi.yml +++ b/locales/fi.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Nopeuspoikkeamia ei havaittu' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Epär: >%{ms}ms' - gain_label: ' | Kasvu: %{value}' - gain_interval: ' (joka 50.)' focus_char_value: 'Merkki ''%{ch}''' # Activity heatmap diff --git a/locales/fr.yml b/locales/fr.yml index 1265619..9c45bd1 100644 --- a/locales/fr.yml +++ b/locales/fr.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Aucune anomalie de vitesse détectée' scope_label_prefix: ' ' bi_label: ' | Bi : %{count}' - tri_label: ' | Tri : %{count}' hes_label: ' | Hés : >%{ms}ms' - gain_label: ' | Gain : %{value}' - gain_interval: ' (tous les 50)' focus_char_value: 'Caractère ''%{ch}''' # Carte d'activité diff --git a/locales/hr.yml b/locales/hr.yml index 9400a1e..617c0fe 100644 --- a/locales/hr.yml +++ b/locales/hr.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Nema otkrivenih anomalija brzine' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Hes: >%{ms}ms' - gain_label: ' | Dobitak: %{value}' - gain_interval: ' (svakih 50)' focus_char_value: 'Znak ''%{ch}''' # Activity heatmap diff --git a/locales/hu.yml b/locales/hu.yml index 1524833..c0fdb5f 100644 --- a/locales/hu.yml +++ b/locales/hu.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Nem észlelt sebesség anomáliák' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Hes: >%{ms}ms' - gain_label: ' | Nyereség: %{value}' - gain_interval: ' (minden 50.)' focus_char_value: 'Kar. ''%{ch}''' # Activity heatmap diff --git a/locales/it.yml b/locales/it.yml index 83dd51e..5fe9eca 100644 --- a/locales/it.yml +++ b/locales/it.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Nessuna anomalia di velocità rilevata' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Esi: >%{ms}ms' - gain_label: ' | Guad: %{value}' - gain_interval: ' (ogni 50)' focus_char_value: 'Carattere ''%{ch}''' # Mappa di attività diff --git a/locales/lt.yml b/locales/lt.yml index 582b0d4..48ae432 100644 --- a/locales/lt.yml +++ b/locales/lt.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Greičio anomalijų nerasta' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Hes: >%{ms}ms' - gain_label: ' | Augimas: %{value}' - gain_interval: ' (kas 50)' focus_char_value: 'Simb. ''%{ch}''' # Activity heatmap diff --git a/locales/lv.yml b/locales/lv.yml index 573b947..6725a17 100644 --- a/locales/lv.yml +++ b/locales/lv.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Ātruma anomālijas nav atrastas' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Hes: >%{ms}ms' - gain_label: ' | Pieaugums: %{value}' - gain_interval: ' (katrus 50)' focus_char_value: 'Simb. ''%{ch}''' # Activity heatmap diff --git a/locales/nb.yml b/locales/nb.yml index daf2e20..eb37495 100644 --- a/locales/nb.yml +++ b/locales/nb.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Ingen hastighets-anomalier oppdaget' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Hes: >%{ms}ms' - gain_label: ' | Gevinst: %{value}' - gain_interval: ' (hver 50.)' focus_char_value: 'Tegn ''%{ch}''' # Activity heatmap diff --git a/locales/nl.yml b/locales/nl.yml index a9bd513..5067018 100644 --- a/locales/nl.yml +++ b/locales/nl.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Geen snelheid-anomalieen gedetecteerd' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Hes: >%{ms}ms' - gain_label: ' | Winst: %{value}' - gain_interval: ' (elke 50)' focus_char_value: 'Teken ''%{ch}''' # Activity heatmap diff --git a/locales/pl.yml b/locales/pl.yml index 5ae787d..4e53f73 100644 --- a/locales/pl.yml +++ b/locales/pl.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Nie wykryto anomalii predkosci' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Wah: >%{ms}ms' - gain_label: ' | Zysk: %{value}' - gain_interval: ' (co 50)' focus_char_value: 'Znak ''%{ch}''' # Activity heatmap diff --git a/locales/pt.yml b/locales/pt.yml index 23a5342..4f6a972 100644 --- a/locales/pt.yml +++ b/locales/pt.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Nenhuma anomalia de velocidade detectada' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Hes: >%{ms}ms' - gain_label: ' | Gan: %{value}' - gain_interval: ' (a cada 50)' focus_char_value: 'Caractere ''%{ch}''' # Mapa de atividade diff --git a/locales/ro.yml b/locales/ro.yml index 94200e5..868c462 100644 --- a/locales/ro.yml +++ b/locales/ro.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Nicio anomalie de viteza detectata' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Ezit: >%{ms}ms' - gain_label: ' | Castig: %{value}' - gain_interval: ' (la fiecare 50)' focus_char_value: 'Caracter ''%{ch}''' # Activity heatmap diff --git a/locales/sl.yml b/locales/sl.yml index 72e3da1..28e9bba 100644 --- a/locales/sl.yml +++ b/locales/sl.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Ni zaznanih anomalij hitrosti' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Okl: >%{ms}ms' - gain_label: ' | Dobiček: %{value}' - gain_interval: ' (vsakih 50)' focus_char_value: 'Znak ''%{ch}''' # Toplotna karta aktivnosti diff --git a/locales/sv.yml b/locales/sv.yml index 177f769..cf2b26a 100644 --- a/locales/sv.yml +++ b/locales/sv.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Inga hastighets-anomalier upptaeckta' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Hes: >%{ms}ms' - gain_label: ' | Vinst: %{value}' - gain_interval: ' (var 50:e)' focus_char_value: 'Tecken ''%{ch}''' # Activity heatmap diff --git a/locales/tr.yml b/locales/tr.yml index c646a36..4c56d04 100644 --- a/locales/tr.yml +++ b/locales/tr.yml @@ -146,10 +146,7 @@ stats: no_speed_anomalies: ' Hız anomalisi tespit edilmedi' scope_label_prefix: ' ' bi_label: ' | Bi: %{count}' - tri_label: ' | Tri: %{count}' hes_label: ' | Dur: >%{ms}ms' - gain_label: ' | Kazanç: %{value}' - gain_interval: ' (her 50)' focus_char_value: 'Karakter ''%{ch}''' # Aktivite ısı haritası diff --git a/src/app.rs b/src/app.rs index 343528f..1130231 100644 --- a/src/app.rs +++ b/src/app.rs @@ -15,7 +15,7 @@ use crate::engine::FocusSelection; use crate::engine::filter::CharFilter; use crate::engine::key_stats::KeyStatsStore; use crate::engine::ngram_stats::{ - self, BigramStatsStore, TrigramStatsStore, extract_ngram_events, select_focus, + self, BigramStatsStore, extract_ngram_events, select_focus, }; use crate::engine::scoring; use crate::engine::skill_tree::{BranchId, BranchStatus, DrillScope, SkillTree, SkillTreeProgress}; @@ -382,11 +382,8 @@ pub struct App { pub explorer_accuracy_cache_ranked: Option<(char, usize, usize)>, pub bigram_stats: BigramStatsStore, pub ranked_bigram_stats: BigramStatsStore, - pub trigram_stats: TrigramStatsStore, - pub ranked_trigram_stats: TrigramStatsStore, pub user_median_transition_ms: f64, pub transition_buffer: Vec, - pub trigram_gain_history: Vec, pub current_focus: Option, pub post_drill_input_lock_until: Option, adaptive_word_history: VecDeque>, @@ -590,11 +587,8 @@ impl App { explorer_accuracy_cache_ranked: None, bigram_stats: BigramStatsStore::default(), ranked_bigram_stats: BigramStatsStore::default(), - trigram_stats: TrigramStatsStore::default(), - ranked_trigram_stats: TrigramStatsStore::default(), user_median_transition_ms: 0.0, transition_buffer: Vec::new(), - trigram_gain_history: Vec::new(), current_focus: None, post_drill_input_lock_until: None, adaptive_word_history: VecDeque::new(), @@ -1159,7 +1153,7 @@ impl App { let drill_index = self.drill_history.len() as u32; let hesitation_thresh = ngram_stats::hesitation_threshold(self.user_median_transition_ms); - let (bigram_events, trigram_events) = + let bigram_events = extract_ngram_events(&result.per_key_times, hesitation_thresh); // Collect unique bigram keys for per-drill streak updates let mut seen_bigrams: std::collections::HashSet = @@ -1181,15 +1175,6 @@ impl App { self.bigram_stats .update_speed_anomaly_streak(key, &self.key_stats); } - for ev in &trigram_events { - self.trigram_stats.update( - ev.key.clone(), - ev.total_time_ms, - ev.correct, - ev.has_hesitation, - drill_index, - ); - } if ranked { let mut seen_ranked_bigrams: std::collections::HashSet = @@ -1217,15 +1202,6 @@ impl App { self.ranked_bigram_stats .update_speed_anomaly_streak(key, &self.ranked_key_stats); } - for ev in &trigram_events { - self.ranked_trigram_stats.update( - ev.key.clone(), - ev.total_time_ms, - ev.correct, - ev.has_hesitation, - drill_index, - ); - } let update = self .skill_tree .update(&self.ranked_key_stats, before_stats.as_ref()); @@ -1351,16 +1327,6 @@ impl App { // Update transition buffer for hesitation baseline self.update_transition_buffer(&result.per_key_times); - // Periodic trigram marginal gain analysis (every 50 drills) - if self.profile.total_drills % 50 == 0 && self.profile.total_drills > 0 { - let gain = ngram_stats::trigram_marginal_gain( - &self.ranked_trigram_stats, - &self.ranked_bigram_stats, - &self.ranked_key_stats, - ); - self.trigram_gain_history.push(gain); - } - self.drill_history.push(result.clone()); if self.drill_history.len() > 500 { self.drill_history.remove(0); @@ -1406,7 +1372,7 @@ impl App { let drill_index = self.drill_history.len() as u32; let hesitation_thresh = ngram_stats::hesitation_threshold(self.user_median_transition_ms); - let (bigram_events, trigram_events) = + let bigram_events = extract_ngram_events(&result.per_key_times, hesitation_thresh); let mut seen_bigrams: std::collections::HashSet = std::collections::HashSet::new(); @@ -1426,15 +1392,6 @@ impl App { self.bigram_stats .update_speed_anomaly_streak(key, &self.key_stats); } - for ev in &trigram_events { - self.trigram_stats.update( - ev.key.clone(), - ev.total_time_ms, - ev.correct, - ev.has_hesitation, - drill_index, - ); - } // Update transition buffer for hesitation baseline self.update_transition_buffer(&result.per_key_times); @@ -1497,8 +1454,6 @@ impl App { // Reset n-gram stores self.bigram_stats = BigramStatsStore::default(); self.ranked_bigram_stats = BigramStatsStore::default(); - self.trigram_stats = TrigramStatsStore::default(); - self.ranked_trigram_stats = TrigramStatsStore::default(); self.transition_buffer.clear(); self.user_median_transition_ms = 0.0; @@ -1520,7 +1475,7 @@ impl App { for (drill_index, result) in history.iter().enumerate() { let hesitation_thresh = ngram_stats::hesitation_threshold(self.user_median_transition_ms); - let (bigram_events, trigram_events) = + let bigram_events = extract_ngram_events(&result.per_key_times, hesitation_thresh); // Rebuild char-level error/total counts and EMA from history @@ -1560,15 +1515,6 @@ impl App { self.bigram_stats .update_speed_anomaly_streak(key, &self.key_stats); } - for ev in &trigram_events { - self.trigram_stats.update( - ev.key.clone(), - ev.total_time_ms, - ev.correct, - ev.has_hesitation, - drill_index as u32, - ); - } if result.ranked { let mut seen_ranked_bigrams: std::collections::HashSet = @@ -1603,15 +1549,6 @@ impl App { self.ranked_bigram_stats .update_speed_anomaly_streak(key, &self.ranked_key_stats); } - for ev in &trigram_events { - self.ranked_trigram_stats.update( - ev.key.clone(), - ev.total_time_ms, - ev.correct, - ev.has_hesitation, - drill_index as u32, - ); - } } // Update transition buffer @@ -1630,22 +1567,6 @@ impl App { // Put drill_history back self.drill_history = history; - - // Prune trigrams — use drill_history.len() as total, matching the drill_index - // space used in last_seen_drill_index above (history position, includes partials) - let total_history_entries = self.drill_history.len() as u32; - self.trigram_stats.prune( - ngram_stats::MAX_TRIGRAMS, - total_history_entries, - &self.bigram_stats, - &self.key_stats, - ); - self.ranked_trigram_stats.prune( - ngram_stats::MAX_TRIGRAMS, - total_history_entries, - &self.ranked_bigram_stats, - &self.ranked_key_stats, - ); } pub fn retry_drill(&mut self) { @@ -3125,11 +3046,8 @@ impl App { explorer_accuracy_cache_ranked: None, bigram_stats: BigramStatsStore::default(), ranked_bigram_stats: BigramStatsStore::default(), - trigram_stats: TrigramStatsStore::default(), - ranked_trigram_stats: TrigramStatsStore::default(), user_median_transition_ms: 0.0, transition_buffer: Vec::new(), - trigram_gain_history: Vec::new(), current_focus: None, post_drill_input_lock_until: None, adaptive_word_history: VecDeque::new(), diff --git a/src/engine/ngram_stats.rs b/src/engine/ngram_stats.rs index 578366f..a041969 100644 --- a/src/engine/ngram_stats.rs +++ b/src/engine/ngram_stats.rs @@ -15,8 +15,6 @@ pub(crate) const MIN_SAMPLES_FOR_FOCUS: usize = 20; const ANOMALY_MIN_SAMPLES: usize = 3; const SPEED_ANOMALY_PCT_THRESHOLD: f64 = 50.0; const MIN_CHAR_SAMPLES_FOR_SPEED: usize = 10; -const MAX_TRIGRAM_ENTRIES: usize = 5000; - // --------------------------------------------------------------------------- // N-gram keys // --------------------------------------------------------------------------- @@ -24,9 +22,6 @@ const MAX_TRIGRAM_ENTRIES: usize = 5000; #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct BigramKey(pub [char; 2]); -#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub struct TrigramKey(pub [char; 3]); - // --------------------------------------------------------------------------- // NgramStat // --------------------------------------------------------------------------- @@ -347,102 +342,6 @@ impl BigramStatsStore { } } -// --------------------------------------------------------------------------- -// TrigramStatsStore -// --------------------------------------------------------------------------- - -#[derive(Clone, Debug, Default, Serialize, Deserialize)] -pub struct TrigramStatsStore { - pub stats: HashMap, -} - -impl TrigramStatsStore { - pub fn update( - &mut self, - key: TrigramKey, - time_ms: f64, - correct: bool, - hesitation: bool, - drill_index: u32, - ) { - let stat = self.stats.entry(key).or_default(); - update_stat(stat, time_ms, correct, hesitation, drill_index); - } - - pub fn smoothed_error_rate(&self, key: &TrigramKey) -> f64 { - match self.stats.get(key) { - Some(s) => s.error_rate_ema, - None => 0.5, - } - } - - pub fn redundancy_score( - &self, - key: &TrigramKey, - bigram_stats: &BigramStatsStore, - char_stats: &KeyStatsStore, - ) -> f64 { - let e_a = char_stats.smoothed_error_rate(key.0[0]); - let e_b = char_stats.smoothed_error_rate(key.0[1]); - let e_c = char_stats.smoothed_error_rate(key.0[2]); - let e_abc = self.smoothed_error_rate(key); - - let expected_from_chars = 1.0 - (1.0 - e_a) * (1.0 - e_b) * (1.0 - e_c); - - let e_ab = bigram_stats.smoothed_error_rate(&BigramKey([key.0[0], key.0[1]])); - let e_bc = bigram_stats.smoothed_error_rate(&BigramKey([key.0[1], key.0[2]])); - let expected_from_bigrams = e_ab.max(e_bc); - - let expected = expected_from_chars.max(expected_from_bigrams); - e_abc / expected.max(0.01) - } - - /// Prune to `max_entries` by composite utility score. - /// `total_drills` is the current total drill count for recency calculation. - pub fn prune( - &mut self, - max_entries: usize, - total_drills: u32, - bigram_stats: &BigramStatsStore, - char_stats: &KeyStatsStore, - ) { - if self.stats.len() <= max_entries { - return; - } - - let recency_weight = 0.3; - let signal_weight = 0.5; - let data_weight = 0.2; - - let mut scored: Vec<(TrigramKey, f64)> = self - .stats - .iter() - .map(|(key, stat)| { - let drills_since = total_drills.saturating_sub(stat.last_seen_drill_index) as f64; - let recency = 1.0 / (drills_since + 1.0); - let redundancy = self - .redundancy_score(key, bigram_stats, char_stats) - .min(3.0); - let data = (stat.sample_count as f64).ln_1p(); - - let utility = - recency_weight * recency + signal_weight * redundancy + data_weight * data; - (key.clone(), utility) - }) - .collect(); - - scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); - scored.truncate(max_entries); - - let keep: HashMap = scored - .into_iter() - .filter_map(|(key, _)| self.stats.remove(&key).map(|stat| (key, stat))) - .collect(); - - self.stats = keep; - } -} - // --------------------------------------------------------------------------- // Extraction events & function // --------------------------------------------------------------------------- @@ -455,27 +354,17 @@ pub struct BigramEvent { pub has_hesitation: bool, } -#[derive(Debug)] -pub struct TrigramEvent { - pub key: TrigramKey, - pub total_time_ms: f64, - pub correct: bool, - pub has_hesitation: bool, -} - -/// Extract bigram and trigram events from a sequence of per-key times. +/// Extract bigram events from a sequence of per-key times. /// /// - BACKSPACE entries are filtered out -/// - Space characters split windows (no cross-word n-grams) +/// - Space characters split windows (no cross-word bigrams) /// - For bigram "ab": time = window[1].time_ms -/// - For trigram "abc": time = window[1].time_ms + window[2].time_ms -/// - hesitation = any transition time > hesitation_threshold +/// - hesitation = transition time > hesitation_threshold pub fn extract_ngram_events( per_key_times: &[KeyTime], hesitation_threshold: f64, -) -> (Vec, Vec) { +) -> Vec { let mut bigrams = Vec::new(); - let mut trigrams = Vec::new(); // Filter out backspace entries let filtered: Vec<&KeyTime> = per_key_times @@ -505,30 +394,7 @@ pub fn extract_ngram_events( }); } - // Extract trigrams: slide a window of 3 - for window in filtered.windows(3) { - let a = window[0]; - let b = window[1]; - let c = window[2]; - - // Skip if any is a space (no cross-word) - if a.key == ' ' || b.key == ' ' || c.key == ' ' { - continue; - } - - let time_ms = b.time_ms + c.time_ms; - let correct = a.correct && b.correct && c.correct; - let has_hesitation = b.time_ms > hesitation_threshold || c.time_ms > hesitation_threshold; - - trigrams.push(TrigramEvent { - key: TrigramKey([a.key, b.key, c.key]), - total_time_ms: time_ms, - correct, - has_hesitation, - }); - } - - (bigrams, trigrams) + bigrams } // --------------------------------------------------------------------------- @@ -580,39 +446,6 @@ pub fn select_focus( } } -// --------------------------------------------------------------------------- -// Trigram marginal gain analysis -// --------------------------------------------------------------------------- - -/// Compute what fraction of trigrams with sufficient samples show genuine -/// redundancy beyond their constituent bigrams. Returns a value in [0.0, 1.0]. -pub fn trigram_marginal_gain( - trigram_stats: &TrigramStatsStore, - bigram_stats: &BigramStatsStore, - char_stats: &KeyStatsStore, -) -> f64 { - let qualified: Vec<&TrigramKey> = trigram_stats - .stats - .iter() - .filter(|(_, s)| s.sample_count >= MIN_SAMPLES_FOR_FOCUS) - .map(|(k, _)| k) - .collect(); - - if qualified.is_empty() { - return 0.0; - } - - let with_signal = qualified - .iter() - .filter(|k| { - trigram_stats.redundancy_score(k, bigram_stats, char_stats) - > ERROR_ANOMALY_RATIO_THRESHOLD - }) - .count(); - - with_signal as f64 / qualified.len() as f64 -} - // --------------------------------------------------------------------------- // Hesitation helpers // --------------------------------------------------------------------------- @@ -636,9 +469,6 @@ pub fn compute_median(values: &mut [f64]) -> f64 { } } -/// Constant for max trigram entries (used by App during pruning). -pub const MAX_TRIGRAMS: usize = MAX_TRIGRAM_ENTRIES; - // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- @@ -666,15 +496,11 @@ mod tests { make_keytime('l', 180.0, true), make_keytime('o', 160.0, true), ]; - let (bigrams, trigrams) = extract_ngram_events(×, 800.0); + let bigrams = extract_ngram_events(×, 800.0); assert_eq!(bigrams.len(), 4); // he, el, ll, lo assert_eq!(bigrams[0].key, BigramKey(['h', 'e'])); assert_eq!(bigrams[0].total_time_ms, 200.0); assert!(bigrams[0].correct); - - assert_eq!(trigrams.len(), 3); // hel, ell, llo - assert_eq!(trigrams[0].key, TrigramKey(['h', 'e', 'l'])); - assert_eq!(trigrams[0].total_time_ms, 200.0 + 150.0); // e.time + l.time } #[test] @@ -685,7 +511,7 @@ mod tests { make_keytime(BACKSPACE, 150.0, true), make_keytime('b', 180.0, true), ]; - let (bigrams, _) = extract_ngram_events(×, 800.0); + let bigrams = extract_ngram_events(×, 800.0); // After filtering backspace: a, x, b -> bigrams: ax, xb assert_eq!(bigrams.len(), 2); assert_eq!(bigrams[0].key, BigramKey(['a', 'x'])); @@ -701,13 +527,11 @@ mod tests { make_keytime('c', 180.0, true), make_keytime('d', 160.0, true), ]; - let (bigrams, trigrams) = extract_ngram_events(×, 800.0); + let bigrams = extract_ngram_events(×, 800.0); // ab is valid, b-space skipped, space-c skipped, cd is valid assert_eq!(bigrams.len(), 2); assert_eq!(bigrams[0].key, BigramKey(['a', 'b'])); assert_eq!(bigrams[1].key, BigramKey(['c', 'd'])); - // Only trigram with no space: none (ab_space and space_cd both have space) - assert_eq!(trigrams.len(), 0); } #[test] @@ -717,7 +541,7 @@ mod tests { make_keytime('b', 900.0, true), // > 800 threshold make_keytime('c', 200.0, true), ]; - let (bigrams, _) = extract_ngram_events(×, 800.0); + let bigrams = extract_ngram_events(×, 800.0); assert!(bigrams[0].has_hesitation); // ab: b.time = 900 > 800 assert!(!bigrams[1].has_hesitation); // bc: c.time = 200 < 800 } @@ -729,10 +553,9 @@ mod tests { make_keytime('b', 200.0, false), // incorrect make_keytime('c', 150.0, true), ]; - let (bigrams, trigrams) = extract_ngram_events(×, 800.0); + let bigrams = extract_ngram_events(×, 800.0); assert!(!bigrams[0].correct); // ab: a correct, b incorrect -> false assert!(!bigrams[1].correct); // bc: b incorrect, c correct -> false - assert!(!trigrams[0].correct); // abc: b incorrect -> false } // --- EMA error rate tests --- @@ -851,36 +674,6 @@ mod tests { ); } - #[test] - fn redundancy_trigram_explained_by_bigram() { - // "the" where "th" bigram explains the difficulty - let mut char_stats = KeyStatsStore::default(); - for &(ch, ema) in &[('t', 0.03), ('h', 0.04), ('e', 0.04)] { - let s = char_stats.stats.entry(ch).or_default(); - s.error_rate_ema = ema; - } - - let mut bigram_stats = BigramStatsStore::default(); - let th_stat = bigram_stats.stats.entry(BigramKey(['t', 'h'])).or_default(); - th_stat.error_rate_ema = 0.15; - th_stat.sample_count = 100; - let he_stat = bigram_stats.stats.entry(BigramKey(['h', 'e'])).or_default(); - he_stat.error_rate_ema = 0.04; - he_stat.sample_count = 100; - - let mut trigram_stats = TrigramStatsStore::default(); - let the_key = TrigramKey(['t', 'h', 'e']); - let the_stat = trigram_stats.stats.entry(the_key.clone()).or_default(); - the_stat.error_rate_ema = 0.16; - the_stat.sample_count = 100; - - let redundancy = trigram_stats.redundancy_score(&the_key, &bigram_stats, &char_stats); - assert!( - redundancy < ERROR_ANOMALY_RATIO_THRESHOLD, - "Trigram 'the' explained by 'th' bigram should have redundancy < {ERROR_ANOMALY_RATIO_THRESHOLD}, got {redundancy}" - ); - } - // --- Stability gate tests --- #[test] @@ -1117,19 +910,6 @@ mod tests { assert_eq!(compute_median(&mut vals), 0.0); } - // --- Trigram marginal gain --- - - #[test] - fn marginal_gain_zero_when_no_qualified() { - let trigram_stats = TrigramStatsStore::default(); - let bigram_stats = BigramStatsStore::default(); - let char_stats = KeyStatsStore::default(); - assert_eq!( - trigram_marginal_gain(&trigram_stats, &bigram_stats, &char_stats), - 0.0 - ); - } - // --- Replay invariance --- #[test] @@ -1217,50 +997,6 @@ mod tests { assert_eq!(bigram_stats.stats[&key].last_seen_drill_index, 42); } - #[test] - fn prune_recency_correct_with_mixed_drill_indices() { - // Simulate interleaved partial (indices 0,1,3) and full (indices 2,4) drills. - // The key point: total_drills must match the index space (5, not 2) - // to avoid artificially inflating recency for partial-drill trigrams. - let mut trigram_stats = TrigramStatsStore::default(); - let bigram_stats = BigramStatsStore::default(); - let char_stats = KeyStatsStore::default(); - - // "Old" trigram last seen at drill index 0 (earliest) - let old_key = TrigramKey(['o', 'l', 'd']); - trigram_stats.update(old_key.clone(), 300.0, true, false, 0); - trigram_stats.stats.get_mut(&old_key).unwrap().sample_count = 5; - - // "Mid" trigram last seen at partial drill index 1 - let mid_key = TrigramKey(['m', 'i', 'd']); - trigram_stats.update(mid_key.clone(), 300.0, true, false, 1); - trigram_stats.stats.get_mut(&mid_key).unwrap().sample_count = 5; - - // "New" trigram last seen at drill index 4 (most recent) - let new_key = TrigramKey(['n', 'e', 'w']); - trigram_stats.update(new_key.clone(), 300.0, true, false, 4); - trigram_stats.stats.get_mut(&new_key).unwrap().sample_count = 5; - - // Prune down to 2 entries with total_drills = 5 (matching history length) - trigram_stats.prune(2, 5, &bigram_stats, &char_stats); - - // "New" (index 4) should survive over "old" (index 0) due to higher recency - assert!( - trigram_stats.stats.contains_key(&new_key), - "most recent trigram should survive prune" - ); - assert!( - !trigram_stats.stats.contains_key(&old_key), - "oldest trigram should be pruned" - ); - assert_eq!(trigram_stats.stats.len(), 2); - - // Now verify that using a WRONG total (e.g. 2 completed drills instead of 5) - // would compress the recency range. We don't assert this breaks ordering here - // since the fix is in app.rs passing the correct total -- this test just confirms - // the correct behavior when the right total is used. - } - // --- Performance budget tests --- // These enforce hard pass/fail limits. Budgets are for release builds; // debug builds are ~10-20x slower, so we apply a 20x multiplier. @@ -1298,7 +1034,7 @@ mod tests { #[test] fn perf_budget_update_under_1ms() { let keystrokes = make_bench_keystrokes(500); - let (bigram_events, _) = extract_ngram_events(&keystrokes, 800.0); + let bigram_events = extract_ngram_events(&keystrokes, 800.0); let budget = std::time::Duration::from_millis(1 * DEBUG_MULTIPLIER as u64); let start = std::time::Instant::now(); @@ -1376,11 +1112,10 @@ mod tests { let start = std::time::Instant::now(); let mut bigram_stats = BigramStatsStore::default(); - let mut trigram_stats = TrigramStatsStore::default(); let mut key_stats = KeyStatsStore::default(); for (drill_idx, keystrokes) in drills.iter().enumerate() { - let (bigram_events, trigram_events) = extract_ngram_events(keystrokes, 800.0); + let bigram_events = extract_ngram_events(keystrokes, 800.0); for kt in keystrokes { if kt.correct { @@ -1400,21 +1135,11 @@ mod tests { drill_idx as u32, ); } - for ev in &trigram_events { - trigram_stats.update( - ev.key.clone(), - ev.total_time_ms, - ev.correct, - ev.has_hesitation, - drill_idx as u32, - ); - } } let elapsed = start.elapsed(); // Sanity: we actually processed data assert!(!bigram_stats.stats.is_empty()); - assert!(!trigram_stats.stats.is_empty()); assert!( elapsed < budget, diff --git a/src/main.rs b/src/main.rs index 32af570..aa2725c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4918,17 +4918,6 @@ mod review_tests { stat.error_rate_ema = 0.60; stat.error_anomaly_streak = 1; - // Add a trigram to verify count - let the_key = crate::engine::ngram_stats::TrigramKey(['t', 'h', 'e']); - app.ranked_trigram_stats - .stats - .entry(the_key) - .or_default() - .sample_count = 5; - - // Set trigram gain history - app.trigram_gain_history.push(0.12); - // Set drill scope app.drill_scope = DrillScope::Global; app.stats_tab = 5; @@ -4938,15 +4927,8 @@ mod review_tests { // Verify scope label assert_eq!(data.scope_label, "Global"); - // Verify trigram gain - assert_eq!(data.latest_trigram_gain, Some(0.12)); - - // Verify bigram/trigram counts + // Verify bigram count assert_eq!(data.total_bigrams, app.ranked_bigram_stats.stats.len()); - assert!( - data.total_trigrams >= 1, - "should include at least our test trigram" - ); // Verify hesitation threshold assert!(data.hesitation_threshold_ms >= 800.0); @@ -5621,16 +5603,12 @@ fn build_ngram_tab_data(app: &App) -> NgramTabData { let hesitation_threshold_ms = ngram_stats::hesitation_threshold(app.user_median_transition_ms); - let latest_trigram_gain = app.trigram_gain_history.last().copied(); - NgramTabData { focus, error_anomalies, speed_anomalies, total_bigrams: app.ranked_bigram_stats.stats.len(), - total_trigrams: app.ranked_trigram_stats.stats.len(), hesitation_threshold_ms, - latest_trigram_gain, scope_label, } } diff --git a/src/ui/components/stats_dashboard.rs b/src/ui/components/stats_dashboard.rs index aff1d1c..59e3c7c 100644 --- a/src/ui/components/stats_dashboard.rs +++ b/src/ui/components/stats_dashboard.rs @@ -36,9 +36,7 @@ pub struct NgramTabData { pub error_anomalies: Vec, pub speed_anomalies: Vec, pub total_bigrams: usize, - pub total_trigrams: usize, pub hesitation_threshold_ms: f64, - pub latest_trigram_gain: Option, pub scope_label: String, } @@ -1636,24 +1634,12 @@ impl StatsDashboard<'_> { let colors = &self.theme.colors; let w = area.width as usize; - let gain_str = match data.latest_trigram_gain { - Some(g) => format!("{:.1}%", g * 100.0), - None => "--".to_string(), - }; - // Build segments from most to least important, progressively drop from the right let scope = t!("stats.scope_label_prefix", ).to_string() + &data.scope_label; let bigrams = t!("stats.bi_label", count = data.total_bigrams).to_string(); - let trigrams = t!("stats.tri_label", count = data.total_trigrams).to_string(); let hesitation = t!("stats.hes_label", ms = format!("{:.0}", data.hesitation_threshold_ms)).to_string(); - let gain = t!("stats.gain_label", value = &gain_str).to_string(); - let gain_note_str = if data.latest_trigram_gain.is_none() { - t!("stats.gain_interval").to_string() - } else { - String::new() - }; - let segments: &[&str] = &[&scope, &bigrams, &trigrams, &hesitation, &gain, &gain_note_str]; + let segments: &[&str] = &[&scope, &bigrams, &hesitation]; let mut line = String::new(); for seg in segments { if line.len() + seg.len() <= w {