Remove trigram anamoly calculation

Decided that this wasn't worth it and bigram anamolies are enough.
This commit is contained in:
2026-04-11 00:11:45 +00:00
parent d1fde5c0c1
commit 232f93e054
26 changed files with 23 additions and 489 deletions

View File

@@ -2,7 +2,7 @@ use criterion::{Criterion, black_box, criterion_group, criterion_main};
use keydr::engine::key_stats::KeyStatsStore;
use keydr::engine::ngram_stats::{
BigramKey, BigramStatsStore, TrigramStatsStore, extract_ngram_events,
BigramKey, BigramStatsStore, extract_ngram_events,
};
use keydr::session::result::KeyTime;
@@ -20,14 +20,14 @@ fn make_keystrokes(count: usize) -> Vec<KeyTime> {
fn bench_extraction(c: &mut Criterion) {
let keystrokes = make_keystrokes(500);
c.bench_function("extract_ngram_events (500 keystrokes)", |b| {
c.bench_function("extract_bigrams (500 keystrokes)", |b| {
b.iter(|| extract_ngram_events(black_box(&keystrokes), 800.0))
});
}
fn bench_update(c: &mut Criterion) {
let keystrokes = make_keystrokes(500);
let (bigram_events, _) = extract_ngram_events(&keystrokes, 800.0);
let bigram_events = extract_ngram_events(&keystrokes, 800.0);
c.bench_function("bigram_stats update (400 events)", |b| {
b.iter(|| {
@@ -93,11 +93,10 @@ fn bench_history_replay(c: &mut Criterion) {
c.bench_function("history replay (500 drills x 300 keystrokes)", |b| {
b.iter(|| {
let mut bigram_stats = BigramStatsStore::default();
let mut trigram_stats = TrigramStatsStore::default();
let mut key_stats = KeyStatsStore::default();
for (drill_idx, keystrokes) in drills.iter().enumerate() {
let (bigram_events, trigram_events) = extract_ngram_events(keystrokes, 800.0);
let bigram_events = extract_ngram_events(keystrokes, 800.0);
for kt in keystrokes {
if kt.correct {
@@ -117,18 +116,9 @@ fn bench_history_replay(c: &mut Criterion) {
drill_idx as u32,
);
}
for ev in &trigram_events {
trigram_stats.update(
ev.key.clone(),
ev.total_time_ms,
ev.correct,
ev.has_hesitation,
drill_idx as u32,
);
}
}
(bigram_stats, trigram_stats, key_stats)
(bigram_stats, key_stats)
})
});
}

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Nebyly detekovany anomalie rychlosti'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Vah: >%{ms}ms'
gain_label: ' | Zisk: %{value}'
gain_interval: ' (kazdych 50)'
focus_char_value: 'Znak ''%{ch}'''
# Activity heatmap

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Ingen hastigheds-anomalier opdaget'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Hes: >%{ms}ms'
gain_label: ' | Gevinst: %{value}'
gain_interval: ' (hver 50.)'
focus_char_value: 'Tegn ''%{ch}'''
# Activity heatmap

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Keine Tempo-Anomalien erkannt'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Hes: >%{ms}ms'
gain_label: ' | Gewinn: %{value}'
gain_interval: ' (alle 50)'
focus_char_value: 'Zeichen ''%{ch}'''
# Activity heatmap

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' No speed anomalies detected'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Hes: >%{ms}ms'
gain_label: ' | Gain: %{value}'
gain_interval: ' (every 50)'
focus_char_value: 'Char ''%{ch}'''
# Activity heatmap

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' No se detectaron anomalías de velocidad'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Hes: >%{ms}ms'
gain_label: ' | Gan: %{value}'
gain_interval: ' (cada 50)'
focus_char_value: 'Carácter ''%{ch}'''
# Mapa de actividad

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Kiiruse anomaaliaid ei tuvastatud'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Kõhk: >%{ms}ms'
gain_label: ' | Kasv: %{value}'
gain_interval: ' (iga 50)'
focus_char_value: 'Märk ''%{ch}'''
# Aktiivsuse soojuskaart

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Nopeuspoikkeamia ei havaittu'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Epär: >%{ms}ms'
gain_label: ' | Kasvu: %{value}'
gain_interval: ' (joka 50.)'
focus_char_value: 'Merkki ''%{ch}'''
# Activity heatmap

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Aucune anomalie de vitesse détectée'
scope_label_prefix: ' '
bi_label: ' | Bi : %{count}'
tri_label: ' | Tri : %{count}'
hes_label: ' | Hés : >%{ms}ms'
gain_label: ' | Gain : %{value}'
gain_interval: ' (tous les 50)'
focus_char_value: 'Caractère ''%{ch}'''
# Carte d'activité

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Nema otkrivenih anomalija brzine'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Hes: >%{ms}ms'
gain_label: ' | Dobitak: %{value}'
gain_interval: ' (svakih 50)'
focus_char_value: 'Znak ''%{ch}'''
# Activity heatmap

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Nem észlelt sebesség anomáliák'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Hes: >%{ms}ms'
gain_label: ' | Nyereség: %{value}'
gain_interval: ' (minden 50.)'
focus_char_value: 'Kar. ''%{ch}'''
# Activity heatmap

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Nessuna anomalia di velocità rilevata'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Esi: >%{ms}ms'
gain_label: ' | Guad: %{value}'
gain_interval: ' (ogni 50)'
focus_char_value: 'Carattere ''%{ch}'''
# Mappa di attività

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Greičio anomalijų nerasta'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Hes: >%{ms}ms'
gain_label: ' | Augimas: %{value}'
gain_interval: ' (kas 50)'
focus_char_value: 'Simb. ''%{ch}'''
# Activity heatmap

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Ātruma anomālijas nav atrastas'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Hes: >%{ms}ms'
gain_label: ' | Pieaugums: %{value}'
gain_interval: ' (katrus 50)'
focus_char_value: 'Simb. ''%{ch}'''
# Activity heatmap

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Ingen hastighets-anomalier oppdaget'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Hes: >%{ms}ms'
gain_label: ' | Gevinst: %{value}'
gain_interval: ' (hver 50.)'
focus_char_value: 'Tegn ''%{ch}'''
# Activity heatmap

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Geen snelheid-anomalieen gedetecteerd'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Hes: >%{ms}ms'
gain_label: ' | Winst: %{value}'
gain_interval: ' (elke 50)'
focus_char_value: 'Teken ''%{ch}'''
# Activity heatmap

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Nie wykryto anomalii predkosci'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Wah: >%{ms}ms'
gain_label: ' | Zysk: %{value}'
gain_interval: ' (co 50)'
focus_char_value: 'Znak ''%{ch}'''
# Activity heatmap

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Nenhuma anomalia de velocidade detectada'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Hes: >%{ms}ms'
gain_label: ' | Gan: %{value}'
gain_interval: ' (a cada 50)'
focus_char_value: 'Caractere ''%{ch}'''
# Mapa de atividade

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Nicio anomalie de viteza detectata'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Ezit: >%{ms}ms'
gain_label: ' | Castig: %{value}'
gain_interval: ' (la fiecare 50)'
focus_char_value: 'Caracter ''%{ch}'''
# Activity heatmap

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Ni zaznanih anomalij hitrosti'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Okl: >%{ms}ms'
gain_label: ' | Dobiček: %{value}'
gain_interval: ' (vsakih 50)'
focus_char_value: 'Znak ''%{ch}'''
# Toplotna karta aktivnosti

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Inga hastighets-anomalier upptaeckta'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Hes: >%{ms}ms'
gain_label: ' | Vinst: %{value}'
gain_interval: ' (var 50:e)'
focus_char_value: 'Tecken ''%{ch}'''
# Activity heatmap

View File

@@ -146,10 +146,7 @@ stats:
no_speed_anomalies: ' Hız anomalisi tespit edilmedi'
scope_label_prefix: ' '
bi_label: ' | Bi: %{count}'
tri_label: ' | Tri: %{count}'
hes_label: ' | Dur: >%{ms}ms'
gain_label: ' | Kazanç: %{value}'
gain_interval: ' (her 50)'
focus_char_value: 'Karakter ''%{ch}'''
# Aktivite ısı haritası

View File

@@ -15,7 +15,7 @@ use crate::engine::FocusSelection;
use crate::engine::filter::CharFilter;
use crate::engine::key_stats::KeyStatsStore;
use crate::engine::ngram_stats::{
self, BigramStatsStore, TrigramStatsStore, extract_ngram_events, select_focus,
self, BigramStatsStore, extract_ngram_events, select_focus,
};
use crate::engine::scoring;
use crate::engine::skill_tree::{BranchId, BranchStatus, DrillScope, SkillTree, SkillTreeProgress};
@@ -382,11 +382,8 @@ pub struct App {
pub explorer_accuracy_cache_ranked: Option<(char, usize, usize)>,
pub bigram_stats: BigramStatsStore,
pub ranked_bigram_stats: BigramStatsStore,
pub trigram_stats: TrigramStatsStore,
pub ranked_trigram_stats: TrigramStatsStore,
pub user_median_transition_ms: f64,
pub transition_buffer: Vec<f64>,
pub trigram_gain_history: Vec<f64>,
pub current_focus: Option<FocusSelection>,
pub post_drill_input_lock_until: Option<Instant>,
adaptive_word_history: VecDeque<HashSet<String>>,
@@ -590,11 +587,8 @@ impl App {
explorer_accuracy_cache_ranked: None,
bigram_stats: BigramStatsStore::default(),
ranked_bigram_stats: BigramStatsStore::default(),
trigram_stats: TrigramStatsStore::default(),
ranked_trigram_stats: TrigramStatsStore::default(),
user_median_transition_ms: 0.0,
transition_buffer: Vec::new(),
trigram_gain_history: Vec::new(),
current_focus: None,
post_drill_input_lock_until: None,
adaptive_word_history: VecDeque::new(),
@@ -1159,7 +1153,7 @@ impl App {
let drill_index = self.drill_history.len() as u32;
let hesitation_thresh =
ngram_stats::hesitation_threshold(self.user_median_transition_ms);
let (bigram_events, trigram_events) =
let bigram_events =
extract_ngram_events(&result.per_key_times, hesitation_thresh);
// Collect unique bigram keys for per-drill streak updates
let mut seen_bigrams: std::collections::HashSet<ngram_stats::BigramKey> =
@@ -1181,15 +1175,6 @@ impl App {
self.bigram_stats
.update_speed_anomaly_streak(key, &self.key_stats);
}
for ev in &trigram_events {
self.trigram_stats.update(
ev.key.clone(),
ev.total_time_ms,
ev.correct,
ev.has_hesitation,
drill_index,
);
}
if ranked {
let mut seen_ranked_bigrams: std::collections::HashSet<ngram_stats::BigramKey> =
@@ -1217,15 +1202,6 @@ impl App {
self.ranked_bigram_stats
.update_speed_anomaly_streak(key, &self.ranked_key_stats);
}
for ev in &trigram_events {
self.ranked_trigram_stats.update(
ev.key.clone(),
ev.total_time_ms,
ev.correct,
ev.has_hesitation,
drill_index,
);
}
let update = self
.skill_tree
.update(&self.ranked_key_stats, before_stats.as_ref());
@@ -1351,16 +1327,6 @@ impl App {
// Update transition buffer for hesitation baseline
self.update_transition_buffer(&result.per_key_times);
// Periodic trigram marginal gain analysis (every 50 drills)
if self.profile.total_drills % 50 == 0 && self.profile.total_drills > 0 {
let gain = ngram_stats::trigram_marginal_gain(
&self.ranked_trigram_stats,
&self.ranked_bigram_stats,
&self.ranked_key_stats,
);
self.trigram_gain_history.push(gain);
}
self.drill_history.push(result.clone());
if self.drill_history.len() > 500 {
self.drill_history.remove(0);
@@ -1406,7 +1372,7 @@ impl App {
let drill_index = self.drill_history.len() as u32;
let hesitation_thresh =
ngram_stats::hesitation_threshold(self.user_median_transition_ms);
let (bigram_events, trigram_events) =
let bigram_events =
extract_ngram_events(&result.per_key_times, hesitation_thresh);
let mut seen_bigrams: std::collections::HashSet<ngram_stats::BigramKey> =
std::collections::HashSet::new();
@@ -1426,15 +1392,6 @@ impl App {
self.bigram_stats
.update_speed_anomaly_streak(key, &self.key_stats);
}
for ev in &trigram_events {
self.trigram_stats.update(
ev.key.clone(),
ev.total_time_ms,
ev.correct,
ev.has_hesitation,
drill_index,
);
}
// Update transition buffer for hesitation baseline
self.update_transition_buffer(&result.per_key_times);
@@ -1497,8 +1454,6 @@ impl App {
// Reset n-gram stores
self.bigram_stats = BigramStatsStore::default();
self.ranked_bigram_stats = BigramStatsStore::default();
self.trigram_stats = TrigramStatsStore::default();
self.ranked_trigram_stats = TrigramStatsStore::default();
self.transition_buffer.clear();
self.user_median_transition_ms = 0.0;
@@ -1520,7 +1475,7 @@ impl App {
for (drill_index, result) in history.iter().enumerate() {
let hesitation_thresh =
ngram_stats::hesitation_threshold(self.user_median_transition_ms);
let (bigram_events, trigram_events) =
let bigram_events =
extract_ngram_events(&result.per_key_times, hesitation_thresh);
// Rebuild char-level error/total counts and EMA from history
@@ -1560,15 +1515,6 @@ impl App {
self.bigram_stats
.update_speed_anomaly_streak(key, &self.key_stats);
}
for ev in &trigram_events {
self.trigram_stats.update(
ev.key.clone(),
ev.total_time_ms,
ev.correct,
ev.has_hesitation,
drill_index as u32,
);
}
if result.ranked {
let mut seen_ranked_bigrams: std::collections::HashSet<ngram_stats::BigramKey> =
@@ -1603,15 +1549,6 @@ impl App {
self.ranked_bigram_stats
.update_speed_anomaly_streak(key, &self.ranked_key_stats);
}
for ev in &trigram_events {
self.ranked_trigram_stats.update(
ev.key.clone(),
ev.total_time_ms,
ev.correct,
ev.has_hesitation,
drill_index as u32,
);
}
}
// Update transition buffer
@@ -1630,22 +1567,6 @@ impl App {
// Put drill_history back
self.drill_history = history;
// Prune trigrams — use drill_history.len() as total, matching the drill_index
// space used in last_seen_drill_index above (history position, includes partials)
let total_history_entries = self.drill_history.len() as u32;
self.trigram_stats.prune(
ngram_stats::MAX_TRIGRAMS,
total_history_entries,
&self.bigram_stats,
&self.key_stats,
);
self.ranked_trigram_stats.prune(
ngram_stats::MAX_TRIGRAMS,
total_history_entries,
&self.ranked_bigram_stats,
&self.ranked_key_stats,
);
}
pub fn retry_drill(&mut self) {
@@ -3125,11 +3046,8 @@ impl App {
explorer_accuracy_cache_ranked: None,
bigram_stats: BigramStatsStore::default(),
ranked_bigram_stats: BigramStatsStore::default(),
trigram_stats: TrigramStatsStore::default(),
ranked_trigram_stats: TrigramStatsStore::default(),
user_median_transition_ms: 0.0,
transition_buffer: Vec::new(),
trigram_gain_history: Vec::new(),
current_focus: None,
post_drill_input_lock_until: None,
adaptive_word_history: VecDeque::new(),

View File

@@ -15,8 +15,6 @@ pub(crate) const MIN_SAMPLES_FOR_FOCUS: usize = 20;
const ANOMALY_MIN_SAMPLES: usize = 3;
const SPEED_ANOMALY_PCT_THRESHOLD: f64 = 50.0;
const MIN_CHAR_SAMPLES_FOR_SPEED: usize = 10;
const MAX_TRIGRAM_ENTRIES: usize = 5000;
// ---------------------------------------------------------------------------
// N-gram keys
// ---------------------------------------------------------------------------
@@ -24,9 +22,6 @@ const MAX_TRIGRAM_ENTRIES: usize = 5000;
#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct BigramKey(pub [char; 2]);
#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct TrigramKey(pub [char; 3]);
// ---------------------------------------------------------------------------
// NgramStat
// ---------------------------------------------------------------------------
@@ -347,102 +342,6 @@ impl BigramStatsStore {
}
}
// ---------------------------------------------------------------------------
// TrigramStatsStore
// ---------------------------------------------------------------------------
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct TrigramStatsStore {
pub stats: HashMap<TrigramKey, NgramStat>,
}
impl TrigramStatsStore {
pub fn update(
&mut self,
key: TrigramKey,
time_ms: f64,
correct: bool,
hesitation: bool,
drill_index: u32,
) {
let stat = self.stats.entry(key).or_default();
update_stat(stat, time_ms, correct, hesitation, drill_index);
}
pub fn smoothed_error_rate(&self, key: &TrigramKey) -> f64 {
match self.stats.get(key) {
Some(s) => s.error_rate_ema,
None => 0.5,
}
}
pub fn redundancy_score(
&self,
key: &TrigramKey,
bigram_stats: &BigramStatsStore,
char_stats: &KeyStatsStore,
) -> f64 {
let e_a = char_stats.smoothed_error_rate(key.0[0]);
let e_b = char_stats.smoothed_error_rate(key.0[1]);
let e_c = char_stats.smoothed_error_rate(key.0[2]);
let e_abc = self.smoothed_error_rate(key);
let expected_from_chars = 1.0 - (1.0 - e_a) * (1.0 - e_b) * (1.0 - e_c);
let e_ab = bigram_stats.smoothed_error_rate(&BigramKey([key.0[0], key.0[1]]));
let e_bc = bigram_stats.smoothed_error_rate(&BigramKey([key.0[1], key.0[2]]));
let expected_from_bigrams = e_ab.max(e_bc);
let expected = expected_from_chars.max(expected_from_bigrams);
e_abc / expected.max(0.01)
}
/// Prune to `max_entries` by composite utility score.
/// `total_drills` is the current total drill count for recency calculation.
pub fn prune(
&mut self,
max_entries: usize,
total_drills: u32,
bigram_stats: &BigramStatsStore,
char_stats: &KeyStatsStore,
) {
if self.stats.len() <= max_entries {
return;
}
let recency_weight = 0.3;
let signal_weight = 0.5;
let data_weight = 0.2;
let mut scored: Vec<(TrigramKey, f64)> = self
.stats
.iter()
.map(|(key, stat)| {
let drills_since = total_drills.saturating_sub(stat.last_seen_drill_index) as f64;
let recency = 1.0 / (drills_since + 1.0);
let redundancy = self
.redundancy_score(key, bigram_stats, char_stats)
.min(3.0);
let data = (stat.sample_count as f64).ln_1p();
let utility =
recency_weight * recency + signal_weight * redundancy + data_weight * data;
(key.clone(), utility)
})
.collect();
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scored.truncate(max_entries);
let keep: HashMap<TrigramKey, NgramStat> = scored
.into_iter()
.filter_map(|(key, _)| self.stats.remove(&key).map(|stat| (key, stat)))
.collect();
self.stats = keep;
}
}
// ---------------------------------------------------------------------------
// Extraction events & function
// ---------------------------------------------------------------------------
@@ -455,27 +354,17 @@ pub struct BigramEvent {
pub has_hesitation: bool,
}
#[derive(Debug)]
pub struct TrigramEvent {
pub key: TrigramKey,
pub total_time_ms: f64,
pub correct: bool,
pub has_hesitation: bool,
}
/// Extract bigram and trigram events from a sequence of per-key times.
/// Extract bigram events from a sequence of per-key times.
///
/// - BACKSPACE entries are filtered out
/// - Space characters split windows (no cross-word n-grams)
/// - Space characters split windows (no cross-word bigrams)
/// - For bigram "ab": time = window[1].time_ms
/// - For trigram "abc": time = window[1].time_ms + window[2].time_ms
/// - hesitation = any transition time > hesitation_threshold
/// - hesitation = transition time > hesitation_threshold
pub fn extract_ngram_events(
per_key_times: &[KeyTime],
hesitation_threshold: f64,
) -> (Vec<BigramEvent>, Vec<TrigramEvent>) {
) -> Vec<BigramEvent> {
let mut bigrams = Vec::new();
let mut trigrams = Vec::new();
// Filter out backspace entries
let filtered: Vec<&KeyTime> = per_key_times
@@ -505,30 +394,7 @@ pub fn extract_ngram_events(
});
}
// Extract trigrams: slide a window of 3
for window in filtered.windows(3) {
let a = window[0];
let b = window[1];
let c = window[2];
// Skip if any is a space (no cross-word)
if a.key == ' ' || b.key == ' ' || c.key == ' ' {
continue;
}
let time_ms = b.time_ms + c.time_ms;
let correct = a.correct && b.correct && c.correct;
let has_hesitation = b.time_ms > hesitation_threshold || c.time_ms > hesitation_threshold;
trigrams.push(TrigramEvent {
key: TrigramKey([a.key, b.key, c.key]),
total_time_ms: time_ms,
correct,
has_hesitation,
});
}
(bigrams, trigrams)
bigrams
}
// ---------------------------------------------------------------------------
@@ -580,39 +446,6 @@ pub fn select_focus(
}
}
// ---------------------------------------------------------------------------
// Trigram marginal gain analysis
// ---------------------------------------------------------------------------
/// Compute what fraction of trigrams with sufficient samples show genuine
/// redundancy beyond their constituent bigrams. Returns a value in [0.0, 1.0].
pub fn trigram_marginal_gain(
trigram_stats: &TrigramStatsStore,
bigram_stats: &BigramStatsStore,
char_stats: &KeyStatsStore,
) -> f64 {
let qualified: Vec<&TrigramKey> = trigram_stats
.stats
.iter()
.filter(|(_, s)| s.sample_count >= MIN_SAMPLES_FOR_FOCUS)
.map(|(k, _)| k)
.collect();
if qualified.is_empty() {
return 0.0;
}
let with_signal = qualified
.iter()
.filter(|k| {
trigram_stats.redundancy_score(k, bigram_stats, char_stats)
> ERROR_ANOMALY_RATIO_THRESHOLD
})
.count();
with_signal as f64 / qualified.len() as f64
}
// ---------------------------------------------------------------------------
// Hesitation helpers
// ---------------------------------------------------------------------------
@@ -636,9 +469,6 @@ pub fn compute_median(values: &mut [f64]) -> f64 {
}
}
/// Constant for max trigram entries (used by App during pruning).
pub const MAX_TRIGRAMS: usize = MAX_TRIGRAM_ENTRIES;
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
@@ -666,15 +496,11 @@ mod tests {
make_keytime('l', 180.0, true),
make_keytime('o', 160.0, true),
];
let (bigrams, trigrams) = extract_ngram_events(&times, 800.0);
let bigrams = extract_ngram_events(&times, 800.0);
assert_eq!(bigrams.len(), 4); // he, el, ll, lo
assert_eq!(bigrams[0].key, BigramKey(['h', 'e']));
assert_eq!(bigrams[0].total_time_ms, 200.0);
assert!(bigrams[0].correct);
assert_eq!(trigrams.len(), 3); // hel, ell, llo
assert_eq!(trigrams[0].key, TrigramKey(['h', 'e', 'l']));
assert_eq!(trigrams[0].total_time_ms, 200.0 + 150.0); // e.time + l.time
}
#[test]
@@ -685,7 +511,7 @@ mod tests {
make_keytime(BACKSPACE, 150.0, true),
make_keytime('b', 180.0, true),
];
let (bigrams, _) = extract_ngram_events(&times, 800.0);
let bigrams = extract_ngram_events(&times, 800.0);
// After filtering backspace: a, x, b -> bigrams: ax, xb
assert_eq!(bigrams.len(), 2);
assert_eq!(bigrams[0].key, BigramKey(['a', 'x']));
@@ -701,13 +527,11 @@ mod tests {
make_keytime('c', 180.0, true),
make_keytime('d', 160.0, true),
];
let (bigrams, trigrams) = extract_ngram_events(&times, 800.0);
let bigrams = extract_ngram_events(&times, 800.0);
// ab is valid, b-space skipped, space-c skipped, cd is valid
assert_eq!(bigrams.len(), 2);
assert_eq!(bigrams[0].key, BigramKey(['a', 'b']));
assert_eq!(bigrams[1].key, BigramKey(['c', 'd']));
// Only trigram with no space: none (ab_space and space_cd both have space)
assert_eq!(trigrams.len(), 0);
}
#[test]
@@ -717,7 +541,7 @@ mod tests {
make_keytime('b', 900.0, true), // > 800 threshold
make_keytime('c', 200.0, true),
];
let (bigrams, _) = extract_ngram_events(&times, 800.0);
let bigrams = extract_ngram_events(&times, 800.0);
assert!(bigrams[0].has_hesitation); // ab: b.time = 900 > 800
assert!(!bigrams[1].has_hesitation); // bc: c.time = 200 < 800
}
@@ -729,10 +553,9 @@ mod tests {
make_keytime('b', 200.0, false), // incorrect
make_keytime('c', 150.0, true),
];
let (bigrams, trigrams) = extract_ngram_events(&times, 800.0);
let bigrams = extract_ngram_events(&times, 800.0);
assert!(!bigrams[0].correct); // ab: a correct, b incorrect -> false
assert!(!bigrams[1].correct); // bc: b incorrect, c correct -> false
assert!(!trigrams[0].correct); // abc: b incorrect -> false
}
// --- EMA error rate tests ---
@@ -851,36 +674,6 @@ mod tests {
);
}
#[test]
fn redundancy_trigram_explained_by_bigram() {
// "the" where "th" bigram explains the difficulty
let mut char_stats = KeyStatsStore::default();
for &(ch, ema) in &[('t', 0.03), ('h', 0.04), ('e', 0.04)] {
let s = char_stats.stats.entry(ch).or_default();
s.error_rate_ema = ema;
}
let mut bigram_stats = BigramStatsStore::default();
let th_stat = bigram_stats.stats.entry(BigramKey(['t', 'h'])).or_default();
th_stat.error_rate_ema = 0.15;
th_stat.sample_count = 100;
let he_stat = bigram_stats.stats.entry(BigramKey(['h', 'e'])).or_default();
he_stat.error_rate_ema = 0.04;
he_stat.sample_count = 100;
let mut trigram_stats = TrigramStatsStore::default();
let the_key = TrigramKey(['t', 'h', 'e']);
let the_stat = trigram_stats.stats.entry(the_key.clone()).or_default();
the_stat.error_rate_ema = 0.16;
the_stat.sample_count = 100;
let redundancy = trigram_stats.redundancy_score(&the_key, &bigram_stats, &char_stats);
assert!(
redundancy < ERROR_ANOMALY_RATIO_THRESHOLD,
"Trigram 'the' explained by 'th' bigram should have redundancy < {ERROR_ANOMALY_RATIO_THRESHOLD}, got {redundancy}"
);
}
// --- Stability gate tests ---
#[test]
@@ -1117,19 +910,6 @@ mod tests {
assert_eq!(compute_median(&mut vals), 0.0);
}
// --- Trigram marginal gain ---
#[test]
fn marginal_gain_zero_when_no_qualified() {
let trigram_stats = TrigramStatsStore::default();
let bigram_stats = BigramStatsStore::default();
let char_stats = KeyStatsStore::default();
assert_eq!(
trigram_marginal_gain(&trigram_stats, &bigram_stats, &char_stats),
0.0
);
}
// --- Replay invariance ---
#[test]
@@ -1217,50 +997,6 @@ mod tests {
assert_eq!(bigram_stats.stats[&key].last_seen_drill_index, 42);
}
#[test]
fn prune_recency_correct_with_mixed_drill_indices() {
// Simulate interleaved partial (indices 0,1,3) and full (indices 2,4) drills.
// The key point: total_drills must match the index space (5, not 2)
// to avoid artificially inflating recency for partial-drill trigrams.
let mut trigram_stats = TrigramStatsStore::default();
let bigram_stats = BigramStatsStore::default();
let char_stats = KeyStatsStore::default();
// "Old" trigram last seen at drill index 0 (earliest)
let old_key = TrigramKey(['o', 'l', 'd']);
trigram_stats.update(old_key.clone(), 300.0, true, false, 0);
trigram_stats.stats.get_mut(&old_key).unwrap().sample_count = 5;
// "Mid" trigram last seen at partial drill index 1
let mid_key = TrigramKey(['m', 'i', 'd']);
trigram_stats.update(mid_key.clone(), 300.0, true, false, 1);
trigram_stats.stats.get_mut(&mid_key).unwrap().sample_count = 5;
// "New" trigram last seen at drill index 4 (most recent)
let new_key = TrigramKey(['n', 'e', 'w']);
trigram_stats.update(new_key.clone(), 300.0, true, false, 4);
trigram_stats.stats.get_mut(&new_key).unwrap().sample_count = 5;
// Prune down to 2 entries with total_drills = 5 (matching history length)
trigram_stats.prune(2, 5, &bigram_stats, &char_stats);
// "New" (index 4) should survive over "old" (index 0) due to higher recency
assert!(
trigram_stats.stats.contains_key(&new_key),
"most recent trigram should survive prune"
);
assert!(
!trigram_stats.stats.contains_key(&old_key),
"oldest trigram should be pruned"
);
assert_eq!(trigram_stats.stats.len(), 2);
// Now verify that using a WRONG total (e.g. 2 completed drills instead of 5)
// would compress the recency range. We don't assert this breaks ordering here
// since the fix is in app.rs passing the correct total -- this test just confirms
// the correct behavior when the right total is used.
}
// --- Performance budget tests ---
// These enforce hard pass/fail limits. Budgets are for release builds;
// debug builds are ~10-20x slower, so we apply a 20x multiplier.
@@ -1298,7 +1034,7 @@ mod tests {
#[test]
fn perf_budget_update_under_1ms() {
let keystrokes = make_bench_keystrokes(500);
let (bigram_events, _) = extract_ngram_events(&keystrokes, 800.0);
let bigram_events = extract_ngram_events(&keystrokes, 800.0);
let budget = std::time::Duration::from_millis(1 * DEBUG_MULTIPLIER as u64);
let start = std::time::Instant::now();
@@ -1376,11 +1112,10 @@ mod tests {
let start = std::time::Instant::now();
let mut bigram_stats = BigramStatsStore::default();
let mut trigram_stats = TrigramStatsStore::default();
let mut key_stats = KeyStatsStore::default();
for (drill_idx, keystrokes) in drills.iter().enumerate() {
let (bigram_events, trigram_events) = extract_ngram_events(keystrokes, 800.0);
let bigram_events = extract_ngram_events(keystrokes, 800.0);
for kt in keystrokes {
if kt.correct {
@@ -1400,21 +1135,11 @@ mod tests {
drill_idx as u32,
);
}
for ev in &trigram_events {
trigram_stats.update(
ev.key.clone(),
ev.total_time_ms,
ev.correct,
ev.has_hesitation,
drill_idx as u32,
);
}
}
let elapsed = start.elapsed();
// Sanity: we actually processed data
assert!(!bigram_stats.stats.is_empty());
assert!(!trigram_stats.stats.is_empty());
assert!(
elapsed < budget,

View File

@@ -4918,17 +4918,6 @@ mod review_tests {
stat.error_rate_ema = 0.60;
stat.error_anomaly_streak = 1;
// Add a trigram to verify count
let the_key = crate::engine::ngram_stats::TrigramKey(['t', 'h', 'e']);
app.ranked_trigram_stats
.stats
.entry(the_key)
.or_default()
.sample_count = 5;
// Set trigram gain history
app.trigram_gain_history.push(0.12);
// Set drill scope
app.drill_scope = DrillScope::Global;
app.stats_tab = 5;
@@ -4938,15 +4927,8 @@ mod review_tests {
// Verify scope label
assert_eq!(data.scope_label, "Global");
// Verify trigram gain
assert_eq!(data.latest_trigram_gain, Some(0.12));
// Verify bigram/trigram counts
// Verify bigram count
assert_eq!(data.total_bigrams, app.ranked_bigram_stats.stats.len());
assert!(
data.total_trigrams >= 1,
"should include at least our test trigram"
);
// Verify hesitation threshold
assert!(data.hesitation_threshold_ms >= 800.0);
@@ -5621,16 +5603,12 @@ fn build_ngram_tab_data(app: &App) -> NgramTabData {
let hesitation_threshold_ms = ngram_stats::hesitation_threshold(app.user_median_transition_ms);
let latest_trigram_gain = app.trigram_gain_history.last().copied();
NgramTabData {
focus,
error_anomalies,
speed_anomalies,
total_bigrams: app.ranked_bigram_stats.stats.len(),
total_trigrams: app.ranked_trigram_stats.stats.len(),
hesitation_threshold_ms,
latest_trigram_gain,
scope_label,
}
}

View File

@@ -36,9 +36,7 @@ pub struct NgramTabData {
pub error_anomalies: Vec<AnomalyBigramRow>,
pub speed_anomalies: Vec<AnomalyBigramRow>,
pub total_bigrams: usize,
pub total_trigrams: usize,
pub hesitation_threshold_ms: f64,
pub latest_trigram_gain: Option<f64>,
pub scope_label: String,
}
@@ -1636,24 +1634,12 @@ impl StatsDashboard<'_> {
let colors = &self.theme.colors;
let w = area.width as usize;
let gain_str = match data.latest_trigram_gain {
Some(g) => format!("{:.1}%", g * 100.0),
None => "--".to_string(),
};
// Build segments from most to least important, progressively drop from the right
let scope = t!("stats.scope_label_prefix", ).to_string() + &data.scope_label;
let bigrams = t!("stats.bi_label", count = data.total_bigrams).to_string();
let trigrams = t!("stats.tri_label", count = data.total_trigrams).to_string();
let hesitation = t!("stats.hes_label", ms = format!("{:.0}", data.hesitation_threshold_ms)).to_string();
let gain = t!("stats.gain_label", value = &gain_str).to_string();
let gain_note_str = if data.latest_trigram_gain.is_none() {
t!("stats.gain_interval").to_string()
} else {
String::new()
};
let segments: &[&str] = &[&scope, &bigrams, &trigrams, &hesitation, &gain, &gain_note_str];
let segments: &[&str] = &[&scope, &bigrams, &hesitation];
let mut line = String::new();
for seg in segments {
if line.len() + seg.len() <= w {