-
-
Notifications
You must be signed in to change notification settings - Fork 458
Implementing grammar enumerator #3750
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,212 @@ | ||
| //! Enumerator for context-free grammar | ||
| use alloc::vec::Vec; | ||
|
|
||
| /// for more detail see the paper `https://arxiv.org/pdf/2305.00522` | ||
| use crate::generators::gramatron::Automaton; | ||
| use crate::inputs::Terminal; | ||
|
|
||
| /// IntegerizedStack encodes a stack of integers as a single integer. | ||
| #[derive(Debug)] | ||
| pub struct IntegerizedStack { | ||
| value: u64, | ||
| } | ||
|
|
||
| impl IntegerizedStack { | ||
| /// Create a new IntegerizedStack with initial value | ||
| pub fn new(v: u64) -> Self { | ||
| Self { value: v } | ||
| } | ||
|
|
||
| /// Removes an integer from self.value | ||
| pub fn pop(&mut self) -> u64 { | ||
| let (rest, ret) = decode(self.value); | ||
| self.value = rest; | ||
| ret | ||
| } | ||
|
|
||
| /// Pop from self.value mod n | ||
| pub fn modpop(&mut self, modulus: u64) -> u64 { | ||
| let (rest, ret) = mod_decode(self.value, modulus); | ||
| self.value = rest; | ||
| ret | ||
| } | ||
|
|
||
| /// Assumes value codes exactly n integers. Zero afterwards. | ||
| pub fn split(&mut self, n: usize) -> Vec<u64> { | ||
| let mut out = Vec::with_capacity(n); | ||
| for _ in 0..(n - 1) { | ||
| out.push(self.pop()); | ||
| } | ||
| out.push(self.value); | ||
| self.value = 0; | ||
| out | ||
| } | ||
| } | ||
|
|
||
| /// Rosenberg-Strong pairing decode | ||
| fn decode(z: u64) -> (u64, u64) { | ||
| let m = (z as f64).sqrt().floor() as u64; | ||
| let msq = m * m; | ||
| if z - msq < m { | ||
| (z - msq, m) | ||
| } else { | ||
| (m, msq + 2 * m - z) | ||
| } | ||
| } | ||
|
Comment on lines
+46
to
+55
|
||
|
|
||
| /// Modular pairing decode | ||
| /// Returns (z mod k, (z - (z mod k)) / k) | ||
| fn mod_decode(z: u64, k: u64) -> (u64, u64) { | ||
| let a = z % k; | ||
| let b = (z - a) / k; | ||
| (b, a) | ||
| } | ||
|
Comment on lines
+57
to
+63
|
||
|
|
||
| /// Enumerate the n-th derivation directly on a Gramatron [`Automaton`] | ||
| /// - Triggers whose `dest` equals `final_state` are treated as terminal rules (base cases). | ||
| /// - All other triggers are nonterminal rules (recursive cases). | ||
| pub fn enumerate_automaton(state: usize, n: u64, automaton: &Automaton) -> Vec<Terminal> { | ||
| let final_state = automaton.final_state; | ||
| let triggers = &automaton.pda[state]; | ||
|
|
||
| // Partitioning triggers into terminals and nonterminals | ||
| let terminal_indices: Vec<usize> = triggers | ||
| .iter() | ||
| .enumerate() | ||
| .filter(|(_, t)| t.dest == final_state) | ||
| .map(|(i, _)| i) | ||
| .collect(); | ||
| let nonterminal_indices: Vec<usize> = triggers | ||
| .iter() | ||
| .enumerate() | ||
| .filter(|(_, t)| t.dest != final_state) | ||
| .map(|(i, _)| i) | ||
| .collect(); | ||
|
|
||
| let num_terminal = terminal_indices.len() as u64; | ||
|
|
||
| if n < num_terminal { | ||
| // Base case: pick the n-th terminal trigger | ||
| let trigger_idx = terminal_indices[n as usize]; | ||
| let trigger = &triggers[trigger_idx]; | ||
| return vec![Terminal::new(state, trigger_idx, trigger.term.clone())]; | ||
| } | ||
|
|
||
| // if nonterminals then we need to choose one and recurse | ||
| let mut stack = IntegerizedStack::new(n - num_terminal); | ||
| let num_nonterminal = nonterminal_indices.len() as u64; | ||
| let rule_choice = stack.modpop(num_nonterminal) as usize; | ||
| let trigger_idx = nonterminal_indices[rule_choice]; | ||
| let trigger = &triggers[trigger_idx]; | ||
| let dest = trigger.dest; | ||
|
|
||
| let mut result = vec![Terminal::new(state, trigger_idx, trigger.term.clone())]; | ||
|
|
||
| let child_terminals = enumerate_automaton(dest, stack.value, automaton); | ||
| result.extend(child_terminals); | ||
|
Comment on lines
+95
to
+106
|
||
| result | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use alloc::string::String; | ||
|
|
||
| use super::*; | ||
| use crate::generators::gramatron::Trigger; | ||
|
|
||
| /// Build a test automaton with two recursive paths: | ||
| /// | ||
| /// ```text | ||
| /// State 0 (init): "a"→3(final), "("→1, "["→2 | ||
| /// State 1: ")"→3(final), "x"→1 | ||
| /// State 2: "]"→3(final), "y"→2 | ||
| /// State 3 (final) | ||
| /// ``` | ||
| /// | ||
| /// This generates: "a", "()", "[]", "(x)", "[y]", "(xx)", "[yy]", ... | ||
| fn test_automaton() -> Automaton { | ||
| Automaton { | ||
| init_state: 0, | ||
| final_state: 3, | ||
| pda: alloc::vec![ | ||
| // State 0 | ||
| alloc::vec![ | ||
| Trigger { | ||
| dest: 3, | ||
| term: String::from("a") | ||
| }, | ||
| Trigger { | ||
| dest: 1, | ||
| term: String::from("(") | ||
| }, | ||
| Trigger { | ||
| dest: 2, | ||
| term: String::from("[") | ||
| }, | ||
| ], | ||
| // State 1 | ||
| alloc::vec![ | ||
| Trigger { | ||
| dest: 3, | ||
| term: String::from(")") | ||
| }, | ||
| Trigger { | ||
| dest: 1, | ||
| term: String::from("x") | ||
| }, | ||
| ], | ||
| // State 2 | ||
| alloc::vec![ | ||
| Trigger { | ||
| dest: 3, | ||
| term: String::from("]") | ||
| }, | ||
| Trigger { | ||
| dest: 2, | ||
| term: String::from("y") | ||
| }, | ||
| ], | ||
| // State 3 (final) | ||
| alloc::vec![], | ||
| ], | ||
| } | ||
| } | ||
|
|
||
| /// Helper: concatenate all terminal symbols into a single string. | ||
| fn symbols_to_string(terms: &[Terminal]) -> String { | ||
| terms.iter().map(|t| t.symbol.as_str()).collect() | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_enumerate_automaton_known_outputs() { | ||
| let automaton = test_automaton(); | ||
|
|
||
| // n=0: terminal trigger at init → "a" | ||
| let terms = enumerate_automaton(0, 0, &automaton); | ||
| assert_eq!(symbols_to_string(&terms), "a"); | ||
|
|
||
| // n=1: "(" then recurse into state 1 depth 0 → "()" | ||
| let terms = enumerate_automaton(0, 1, &automaton); | ||
| assert_eq!(symbols_to_string(&terms), "()"); | ||
|
|
||
| // n=2: "[" then recurse into state 2 depth 0 → "[]" | ||
| let terms = enumerate_automaton(0, 2, &automaton); | ||
| assert_eq!(symbols_to_string(&terms), "[]"); | ||
|
|
||
| // n=3: "(" then "x" then ")" → "(x)" | ||
| let terms = enumerate_automaton(0, 3, &automaton); | ||
| assert_eq!(symbols_to_string(&terms), "(x)"); | ||
|
|
||
| // n=4: "[" then "y" then "]" → "[y]" | ||
| let terms = enumerate_automaton(0, 4, &automaton); | ||
| assert_eq!(symbols_to_string(&terms), "[y]"); | ||
|
|
||
| // n=5: "(" then "xx" then ")" → "(xx)" | ||
| let terms = enumerate_automaton(0, 5, &automaton); | ||
| assert_eq!(symbols_to_string(&terms), "(xx)"); | ||
|
|
||
| // n=6: "[" then "yy" then "]" → "[yy]" | ||
| let terms = enumerate_automaton(0, 6, &automaton); | ||
| assert_eq!(symbols_to_string(&terms), "[yy]"); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -63,6 +63,17 @@ where | |
| } | ||
| } | ||
|
|
||
| /// Enumerate the n-th input deterministically using the IntegerizedStack algorithm. | ||
| /// This produces a unique [`GramatronInput`] for each value of `n`. | ||
| pub fn enumerate_nth(&self, n: u64) -> GramatronInput { | ||
| let terminals = crate::generators::enumerator::enumerate_automaton( | ||
| self.automaton.init_state, | ||
| n, | ||
| self.automaton, | ||
| ); | ||
| GramatronInput::new(terminals) | ||
| } | ||
|
Comment on lines
+66
to
+75
|
||
|
|
||
| /// Append the generated terminals | ||
| pub fn append_generated_terminals(&self, input: &mut GramatronInput, state: &mut S) -> usize { | ||
| let mut counter = 0; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IntegerizedStack::splitwill underflow/panic forn == 0(it doesn - 1) and also behaves oddly forn == 0/n == 1. Since this is a public method, please handle these edge cases explicitly (e.g., return an empty vec forn == 0, and forn == 1just return the current value) instead of relying onn - 1.