Skill tree progression system & whitespace support

2026-02-15 07:30:34 +00:00
parent 13550505c1
commit 6d6815af02
22 changed files with 2883 additions and 238 deletions
--- a/src/generator/capitalize.rs
+++ b/src/generator/capitalize.rs
@@ -0,0 +1,123 @@
+use rand::rngs::SmallRng;
+use rand::Rng;
+
+/// Post-processing pass that capitalizes words in generated text.
+/// Only capitalizes using letters from `unlocked_capitals`.
+pub fn apply_capitalization(
+    text: &str,
+    unlocked_capitals: &[char],
+    focused: Option<char>,
+    rng: &mut SmallRng,
+) -> String {
+    if unlocked_capitals.is_empty() {
+        return text.to_string();
+    }
+
+    // If focused key is an uppercase letter, boost its probability
+    let focused_upper = focused.filter(|ch| ch.is_ascii_uppercase());
+
+    let mut result = String::with_capacity(text.len());
+    let mut at_sentence_start = true;
+
+    for (i, ch) in text.chars().enumerate() {
+        if at_sentence_start && ch.is_ascii_lowercase() {
+            let upper = ch.to_ascii_uppercase();
+            if unlocked_capitals.contains(&upper) {
+                result.push(upper);
+                at_sentence_start = false;
+                continue;
+            }
+        }
+
+        // After period/question/exclamation + space, next word starts a sentence
+        if ch == ' ' && i > 0 {
+            let prev = text.as_bytes().get(i - 1).map(|&b| b as char);
+            if matches!(prev, Some('.' | '?' | '!')) {
+                at_sentence_start = true;
+            }
+        }
+
+        // Capitalize word starts: boosted for focused key, ~12% for others
+        if ch.is_ascii_lowercase() && !at_sentence_start {
+            let is_word_start = i == 0 || text.as_bytes().get(i - 1).map(|&b| b as char) == Some(' ');
+            if is_word_start {
+                let upper = ch.to_ascii_uppercase();
+                if unlocked_capitals.contains(&upper) {
+                    let prob = if focused_upper == Some(upper) { 0.40 } else { 0.12 };
+                    if rng.gen_bool(prob) {
+                        result.push(upper);
+                        continue;
+                    }
+                }
+            }
+        }
+
+        if ch != '.' && ch != '?' && ch != '!' {
+            at_sentence_start = false;
+        }
+
+        result.push(ch);
+    }
+
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rand::SeedableRng;
+
+    #[test]
+    fn test_no_caps_when_empty() {
+        let mut rng = SmallRng::seed_from_u64(42);
+        let result = apply_capitalization("hello world", &[], None, &mut rng);
+        assert_eq!(result, "hello world");
+    }
+
+    #[test]
+    fn test_capitalizes_first_word() {
+        let mut rng = SmallRng::seed_from_u64(42);
+        let result = apply_capitalization("hello world", &['H', 'W'], None, &mut rng);
+        assert!(result.starts_with('H'));
+    }
+
+    #[test]
+    fn test_only_capitalizes_unlocked() {
+        let mut rng = SmallRng::seed_from_u64(42);
+        // Only 'W' is unlocked, not 'H'
+        let result = apply_capitalization("hello world", &['W'], None, &mut rng);
+        assert!(result.starts_with('h')); // 'H' not unlocked
+    }
+
+    #[test]
+    fn test_after_period() {
+        let mut rng = SmallRng::seed_from_u64(42);
+        let result = apply_capitalization("one. two", &['O', 'T'], None, &mut rng);
+        assert!(result.starts_with('O'));
+        assert!(result.contains("Two") || result.contains("two"));
+        // At minimum, first word should be capitalized
+    }
+
+    #[test]
+    fn test_focused_capital_boosted() {
+        // With focused 'W', W capitalization should happen more often
+        let caps = &['H', 'W'];
+        let mut focused_count = 0;
+        let mut unfocused_count = 0;
+        // Run many trials to check statistical boosting
+        for seed in 0..200 {
+            let mut rng = SmallRng::seed_from_u64(seed);
+            let text = "hello world wide web wonder what where who will work";
+            let result = apply_capitalization(text, caps, Some('W'), &mut rng);
+            // Count W capitalizations (skip first word which is always capitalized if 'H' is available)
+            focused_count += result.matches('W').count();
+            let mut rng2 = SmallRng::seed_from_u64(seed);
+            let result2 = apply_capitalization(text, caps, None, &mut rng2);
+            unfocused_count += result2.matches('W').count();
+        }
+        assert!(
+            focused_count > unfocused_count,
+            "Focused W count ({focused_count}) should exceed unfocused ({unfocused_count})"
+        );
+    }
+}
--- a/src/generator/code_patterns.rs
+++ b/src/generator/code_patterns.rs
@@ -0,0 +1,220 @@
+use rand::rngs::SmallRng;
+use rand::Rng;
+
+/// Post-processing pass that inserts code-like expressions into text.
+/// Only uses symbols from `unlocked_symbols`.
+pub fn apply_code_symbols(
+    text: &str,
+    unlocked_symbols: &[char],
+    focused: Option<char>,
+    rng: &mut SmallRng,
+) -> String {
+    if unlocked_symbols.is_empty() {
+        return text.to_string();
+    }
+
+    // If focused key is a code symbol, boost insertion probability
+    let focused_symbol = focused.filter(|ch| unlocked_symbols.contains(ch));
+    let base_prob = if focused_symbol.is_some() { 0.35 } else { 0.20 };
+
+    let words: Vec<&str> = text.split(' ').collect();
+    let mut result = Vec::new();
+
+    for word in &words {
+        if rng.gen_bool(base_prob) {
+            let expr = generate_code_expr(word, unlocked_symbols, focused_symbol, rng);
+            result.push(expr);
+        } else {
+            result.push(word.to_string());
+        }
+    }
+
+    result.join(" ")
+}
+
+fn generate_code_expr(
+    word: &str,
+    symbols: &[char],
+    focused_symbol: Option<char>,
+    rng: &mut SmallRng,
+) -> String {
+    // Categorize available symbols
+    let has = |ch: char| symbols.contains(&ch);
+
+    // Try various patterns based on available symbols
+    let mut patterns: Vec<Box<dyn Fn(&mut SmallRng) -> String>> = Vec::new();
+    // Track which patterns use the focused symbol for priority selection
+    let mut focused_patterns: Vec<usize> = Vec::new();
+
+    // Arithmetic & Assignment patterns
+    if has('=') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("{w} = val")));
+        if focused_symbol == Some('=') { focused_patterns.push(idx); }
+    }
+    if has('+') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("{w} + num")));
+        if focused_symbol == Some('+') { focused_patterns.push(idx); }
+    }
+    if has('*') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("{w} * cnt")));
+        if focused_symbol == Some('*') { focused_patterns.push(idx); }
+    }
+    if has('/') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("{w} / max")));
+        if focused_symbol == Some('/') { focused_patterns.push(idx); }
+    }
+    if has('-') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("{w} - one")));
+        if focused_symbol == Some('-') { focused_patterns.push(idx); }
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("-{w}")));
+        if focused_symbol == Some('-') { focused_patterns.push(idx); }
+    }
+    if has('=') && has('+') {
+        let w = word.to_string();
+        patterns.push(Box::new(move |_| format!("{w} += one")));
+    }
+    if has('=') && has('-') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("{w} -= one")));
+        if focused_symbol == Some('-') { focused_patterns.push(idx); }
+    }
+    if has('=') && has('=') {
+        let w = word.to_string();
+        patterns.push(Box::new(move |_| format!("{w} == nil")));
+    }
+
+    // Grouping patterns
+    if has('{') && has('}') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("{{ {w} }}")));
+        if matches!(focused_symbol, Some('{') | Some('}')) { focused_patterns.push(idx); }
+    }
+    if has('[') && has(']') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("{w}[idx]")));
+        if matches!(focused_symbol, Some('[') | Some(']')) { focused_patterns.push(idx); }
+    }
+    if has('<') && has('>') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("Vec<{w}>")));
+        if matches!(focused_symbol, Some('<') | Some('>')) { focused_patterns.push(idx); }
+    }
+
+    // Logic patterns
+    if has('&') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("&{w}")));
+        if focused_symbol == Some('&') { focused_patterns.push(idx); }
+    }
+    if has('|') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("{w} | nil")));
+        if focused_symbol == Some('|') { focused_patterns.push(idx); }
+    }
+    if has('!') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("!{w}")));
+        if focused_symbol == Some('!') { focused_patterns.push(idx); }
+    }
+
+    // Special patterns
+    if has('@') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("@{w}")));
+        if focused_symbol == Some('@') { focused_patterns.push(idx); }
+    }
+    if has('#') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("#{w}")));
+        if focused_symbol == Some('#') { focused_patterns.push(idx); }
+    }
+    if has('_') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("{w}_val")));
+        if focused_symbol == Some('_') { focused_patterns.push(idx); }
+    }
+    if has('$') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("${w}")));
+        if focused_symbol == Some('$') { focused_patterns.push(idx); }
+    }
+    if has('\\') {
+        let w = word.to_string();
+        let idx = patterns.len();
+        patterns.push(Box::new(move |_| format!("\\{w}")));
+        if focused_symbol == Some('\\') { focused_patterns.push(idx); }
+    }
+
+    if patterns.is_empty() {
+        return word.to_string();
+    }
+
+    // 50% chance to prefer a pattern that uses the focused symbol
+    let idx = if !focused_patterns.is_empty() && rng.gen_bool(0.50) {
+        focused_patterns[rng.gen_range(0..focused_patterns.len())]
+    } else {
+        rng.gen_range(0..patterns.len())
+    };
+    patterns[idx](rng)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rand::SeedableRng;
+
+    #[test]
+    fn test_no_symbols_when_empty() {
+        let mut rng = SmallRng::seed_from_u64(42);
+        let result = apply_code_symbols("hello world", &[], None, &mut rng);
+        assert_eq!(result, "hello world");
+    }
+
+    #[test]
+    fn test_uses_only_unlocked_symbols() {
+        let mut rng = SmallRng::seed_from_u64(42);
+        let symbols = ['=', '+'];
+        let text = "a b c d e f g h i j";
+        let result = apply_code_symbols(text, &symbols, None, &mut rng);
+        for ch in result.chars() {
+            if !ch.is_alphanumeric() && ch != ' ' {
+                assert!(
+                    symbols.contains(&ch),
+                    "Unexpected symbol '{ch}' in: {result}"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_dash_patterns_generated() {
+        let mut rng = SmallRng::seed_from_u64(42);
+        let symbols = ['-', '='];
+        let text = "a b c d e f g h i j k l m n o p q r s t";
+        let result = apply_code_symbols(text, &symbols, None, &mut rng);
+        assert!(result.contains('-'), "Expected dash in: {result}");
+    }
+}
--- a/src/generator/code_syntax.rs
+++ b/src/generator/code_syntax.rs
@@ -245,11 +245,11 @@ impl TextGenerator for CodeSyntaxGenerator {
            result.push(snippet.to_string());
        }

-        result.join(" ")
+        result.join("\n\n")
    }
 }

-/// Extract function-length snippets from raw source code
+/// Extract function-length snippets from raw source code, preserving whitespace.
 fn extract_code_snippets(source: &str) -> Vec<String> {
    let mut snippets = Vec::new();
    let lines: Vec<&str> = source.lines().collect();
@@ -285,11 +285,11 @@ fn extract_code_snippets(source: &str) -> Vec<String> {
            }

            if snippet_lines.len() >= 3 && snippet_lines.len() <= 30 {
-                let snippet = snippet_lines.join(" ");
-                // Normalize whitespace
-                let normalized: String = snippet.split_whitespace().collect::<Vec<_>>().join(" ");
-                if normalized.len() >= 20 && normalized.len() <= 500 {
-                    snippets.push(normalized);
+                // Preserve original newlines and indentation
+                let snippet = snippet_lines.join("\n");
+                let char_count = snippet.chars().filter(|c| !c.is_whitespace()).count();
+                if char_count >= 20 && snippet.len() <= 800 {
+                    snippets.push(snippet);
                }
            }

--- a/src/generator/mod.rs
+++ b/src/generator/mod.rs
@@ -1,9 +1,13 @@
 pub mod cache;
+pub mod capitalize;
+pub mod code_patterns;
 pub mod code_syntax;
 pub mod dictionary;
 pub mod github_code;
+pub mod numbers;
 pub mod passage;
 pub mod phonetic;
+pub mod punctuate;
 pub mod transition_table;

 use crate::engine::filter::CharFilter;
--- a/src/generator/numbers.rs
+++ b/src/generator/numbers.rs
@@ -0,0 +1,132 @@
+use rand::rngs::SmallRng;
+use rand::Rng;
+
+/// Post-processing pass that inserts number expressions into text.
+/// Only uses digits from `unlocked_digits`.
+pub fn apply_numbers(
+    text: &str,
+    unlocked_digits: &[char],
+    has_dot: bool,
+    focused: Option<char>,
+    rng: &mut SmallRng,
+) -> String {
+    if unlocked_digits.is_empty() {
+        return text.to_string();
+    }
+
+    // If focused key is a digit, boost number insertion probability
+    let focused_digit = focused.filter(|ch| ch.is_ascii_digit());
+    let base_prob = if focused_digit.is_some() { 0.30 } else { 0.15 };
+
+    let words: Vec<&str> = text.split(' ').collect();
+    let mut result = Vec::new();
+
+    for word in &words {
+        if rng.gen_bool(base_prob) {
+            let expr = generate_number_expr(unlocked_digits, has_dot, focused_digit, rng);
+            result.push(expr);
+        } else {
+            result.push(word.to_string());
+        }
+    }
+
+    result.join(" ")
+}
+
+fn generate_number_expr(
+    digits: &[char],
+    has_dot: bool,
+    focused_digit: Option<char>,
+    rng: &mut SmallRng,
+) -> String {
+    // Determine how many patterns are available (version pattern needs dot)
+    let max_pattern = if has_dot { 5 } else { 4 };
+    let pattern = rng.gen_range(0..max_pattern);
+    let num = match pattern {
+        0 => {
+            // Simple count: "3" or "42"
+            random_number(digits, 1, 3, focused_digit, rng)
+        }
+        1 => {
+            // Measurement: "7 miles" or "42 items"
+            let num = random_number(digits, 1, 2, focused_digit, rng);
+            let units = ["items", "miles", "days", "lines", "times", "parts"];
+            let unit = units[rng.gen_range(0..units.len())];
+            return format!("{num} {unit}");
+        }
+        2 => {
+            // Year-like: "2024"
+            random_number(digits, 4, 4, focused_digit, rng)
+        }
+        3 => {
+            // ID: "room 42" or "page 7"
+            let prefixes = ["room", "page", "step", "item", "line", "port"];
+            let prefix = prefixes[rng.gen_range(0..prefixes.len())];
+            let num = random_number(digits, 1, 3, focused_digit, rng);
+            return format!("{prefix} {num}");
+        }
+        _ => {
+            // Version-like: "3.14" or "2.0" (only when dot is available)
+            let major = random_number(digits, 1, 1, focused_digit, rng);
+            let minor = random_number(digits, 1, 2, focused_digit, rng);
+            return format!("{major}.{minor}");
+        }
+    };
+    num
+}
+
+fn random_number(
+    digits: &[char],
+    min_len: usize,
+    max_len: usize,
+    focused_digit: Option<char>,
+    rng: &mut SmallRng,
+) -> String {
+    let len = rng.gen_range(min_len..=max_len);
+    (0..len)
+        .map(|_| {
+            // 40% chance to use the focused digit if it's a digit
+            if let Some(fd) = focused_digit {
+                if rng.gen_bool(0.40) {
+                    return fd;
+                }
+            }
+            digits[rng.gen_range(0..digits.len())]
+        })
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rand::SeedableRng;
+
+    #[test]
+    fn test_no_numbers_when_empty() {
+        let mut rng = SmallRng::seed_from_u64(42);
+        let result = apply_numbers("hello world", &[], false, None, &mut rng);
+        assert_eq!(result, "hello world");
+    }
+
+    #[test]
+    fn test_numbers_use_only_unlocked_digits() {
+        let mut rng = SmallRng::seed_from_u64(42);
+        let digits = ['1', '2', '3'];
+        let text = "a b c d e f g h i j k l m n o p q r s t";
+        let result = apply_numbers(text, &digits, false, None, &mut rng);
+        for ch in result.chars() {
+            if ch.is_ascii_digit() {
+                assert!(digits.contains(&ch), "Unexpected digit {ch} in: {result}");
+            }
+        }
+    }
+
+    #[test]
+    fn test_no_dot_without_punctuation() {
+        let mut rng = SmallRng::seed_from_u64(42);
+        let digits = ['1', '2', '3', '4', '5'];
+        let text = "a b c d e f g h i j k l m n o p q r s t";
+        let result = apply_numbers(text, &digits, false, None, &mut rng);
+        assert!(!result.contains('.'), "Should not contain dot when has_dot=false: {result}");
+    }
+}
--- a/src/generator/punctuate.rs
+++ b/src/generator/punctuate.rs
@@ -0,0 +1,213 @@
+use rand::rngs::SmallRng;
+use rand::Rng;
+
+/// Post-processing pass that inserts punctuation into generated text.
+/// Only uses punctuation chars from `unlocked_punct`.
+pub fn apply_punctuation(
+    text: &str,
+    unlocked_punct: &[char],
+    focused: Option<char>,
+    rng: &mut SmallRng,
+) -> String {
+    if unlocked_punct.is_empty() {
+        return text.to_string();
+    }
+
+    // If focused key is a punctuation char in our set, boost its insertion probability
+    let focused_punct = focused.filter(|ch| unlocked_punct.contains(ch));
+
+    let words: Vec<&str> = text.split(' ').collect();
+    if words.is_empty() {
+        return text.to_string();
+    }
+
+    let has_period = unlocked_punct.contains(&'.');
+    let has_comma = unlocked_punct.contains(&',');
+    let has_apostrophe = unlocked_punct.contains(&'\'');
+    let has_semicolon = unlocked_punct.contains(&';');
+    let has_colon = unlocked_punct.contains(&':');
+    let has_quote = unlocked_punct.contains(&'"');
+    let has_dash = unlocked_punct.contains(&'-');
+    let has_question = unlocked_punct.contains(&'?');
+    let has_exclaim = unlocked_punct.contains(&'!');
+    let has_open_paren = unlocked_punct.contains(&'(');
+    let has_close_paren = unlocked_punct.contains(&')');
+
+    let mut result = Vec::new();
+    let mut words_since_period = 0;
+    let mut words_since_comma = 0;
+
+    for (i, word) in words.iter().enumerate() {
+        let mut w = word.to_string();
+
+        // Contractions (~8% of words, boosted if apostrophe is focused)
+        let apostrophe_prob = if focused_punct == Some('\'') { 0.30 } else { 0.08 };
+        if has_apostrophe && w.len() >= 3 && rng.gen_bool(apostrophe_prob) {
+            w = make_contraction(&w, rng);
+        }
+
+        // Compound words with dash (~5% of words, boosted if dash is focused)
+        let dash_prob = if focused_punct == Some('-') { 0.25 } else { 0.05 };
+        if has_dash && i + 1 < words.len() && rng.gen_bool(dash_prob) {
+            w.push('-');
+        }
+
+        // Sentence ending punctuation
+        words_since_period += 1;
+        let end_sentence = words_since_period >= 8 && rng.gen_bool(0.15)
+            || words_since_period >= 12;
+
+        if end_sentence && i < words.len() - 1 {
+            let q_prob = if focused_punct == Some('?') { 0.40 } else { 0.15 };
+            let excl_prob = if focused_punct == Some('!') { 0.40 } else { 0.10 };
+            if has_question && rng.gen_bool(q_prob) {
+                w.push('?');
+            } else if has_exclaim && rng.gen_bool(excl_prob) {
+                w.push('!');
+            } else if has_period {
+                w.push('.');
+            }
+            words_since_period = 0;
+            words_since_comma = 0;
+        } else {
+            // Comma after clause (~every 4-6 words)
+            words_since_comma += 1;
+            let comma_prob = if focused_punct == Some(',') { 0.40 } else { 0.20 };
+            if has_comma && words_since_comma >= 4 && rng.gen_bool(comma_prob) && i < words.len() - 1 {
+                w.push(',');
+                words_since_comma = 0;
+            }
+
+            // Semicolon between clauses (rare, boosted if focused)
+            let semi_prob = if focused_punct == Some(';') { 0.25 } else { 0.05 };
+            if has_semicolon && words_since_comma >= 5 && rng.gen_bool(semi_prob) && i < words.len() - 1 {
+                w.push(';');
+                words_since_comma = 0;
+            }
+
+            // Colon before list-like content (rare, boosted if focused)
+            let colon_prob = if focused_punct == Some(':') { 0.20 } else { 0.03 };
+            if has_colon && rng.gen_bool(colon_prob) && i < words.len() - 1 {
+                w.push(':');
+            }
+        }
+
+        // Quoted phrases (~5% chance to start a quote, boosted if focused)
+        let quote_prob = if focused_punct == Some('"') { 0.20 } else { 0.04 };
+        if has_quote && rng.gen_bool(quote_prob) && i + 2 < words.len() {
+            w = format!("\"{w}");
+        }
+
+        // Parenthetical asides (rare, boosted if focused)
+        let paren_prob = if matches!(focused_punct, Some('(' | ')')) { 0.15 } else { 0.03 };
+        if has_open_paren && has_close_paren && rng.gen_bool(paren_prob) && i + 2 < words.len() {
+            w = format!("({w}");
+        }
+
+        result.push(w);
+    }
+
+    // End with period if we have it
+    if has_period {
+        if let Some(last) = result.last_mut() {
+            let last_char = last.chars().last();
+            if !matches!(last_char, Some('.' | '?' | '!' | '"' | ')')) {
+                last.push('.');
+            }
+        }
+    }
+
+    // Close any open quotes/parens
+    let mut open_quotes = 0i32;
+    let mut open_parens = 0i32;
+    for w in &result {
+        for ch in w.chars() {
+            if ch == '"' { open_quotes += 1; }
+            if ch == '(' { open_parens += 1; }
+            if ch == ')' { open_parens -= 1; }
+        }
+    }
+    if let Some(last) = result.last_mut() {
+        if open_quotes % 2 != 0 && has_quote {
+            // Remove trailing period to put quote after
+            let had_period = last.ends_with('.');
+            if had_period {
+                last.pop();
+            }
+            last.push('"');
+            if had_period {
+                last.push('.');
+            }
+        }
+        if open_parens > 0 && has_close_paren {
+            let had_period = last.ends_with('.');
+            if had_period {
+                last.pop();
+            }
+            last.push(')');
+            if had_period {
+                last.push('.');
+            }
+        }
+    }
+
+    result.join(" ")
+}
+
+fn make_contraction(word: &str, rng: &mut SmallRng) -> String {
+    // Simple contractions based on common patterns
+    let contractions: &[(&str, &str)] = &[
+        ("not", "n't"),
+        ("will", "'ll"),
+        ("would", "'d"),
+        ("have", "'ve"),
+        ("are", "'re"),
+        ("is", "'s"),
+    ];
+
+    for &(base, suffix) in contractions {
+        if word == base {
+            // For "not" -> "don't", "can't", etc. - just return the contraction form
+            return format!("{word}{suffix}");
+        }
+    }
+
+    // Generic: ~chance to add 's
+    if rng.gen_bool(0.5) {
+        format!("{word}'s")
+    } else {
+        word.to_string()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rand::SeedableRng;
+
+    #[test]
+    fn test_no_punct_when_empty() {
+        let mut rng = SmallRng::seed_from_u64(42);
+        let result = apply_punctuation("hello world", &[], None, &mut rng);
+        assert_eq!(result, "hello world");
+    }
+
+    #[test]
+    fn test_adds_period_at_end() {
+        let mut rng = SmallRng::seed_from_u64(42);
+        let text = "one two three four five six seven eight nine ten";
+        let result = apply_punctuation(text, &['.'], None, &mut rng);
+        assert!(result.ends_with('.'));
+    }
+
+    #[test]
+    fn test_period_appears_mid_text() {
+        let mut rng = SmallRng::seed_from_u64(42);
+        let words: Vec<&str> = (0..20).map(|_| "word").collect();
+        let text = words.join(" ");
+        let result = apply_punctuation(&text, &['.', ','], None, &mut rng);
+        // Should have at least one period somewhere in the middle
+        let period_count = result.chars().filter(|&c| c == '.').count();
+        assert!(period_count >= 1, "Expected periods in: {result}");
+    }
+}