From 5ee819cd6136c712083df2bb58bf013efc9f4758 Mon Sep 17 00:00:00 2001
From: Eric Huss <eric@huss.org>
Date: Fri, 13 Feb 2026 07:45:22 -0800
Subject: [PATCH 01/12] Add negative lookahead to the grammar

This adds the `!` prefix which represents negative lookahead. This was
included in the original PEG paper, though it was called "NOT", whereas
I went with a more explicit "NegativeLookahead".

This will be helpful in several productions which need to have these
kinds of exclusions.

The syntax is also commonly used in regular expression engines which
usually use `(?!expr)`. This is also common in many other PEG libraries.

There is a small risk this could be confusing, since `!` is sometimes
used for other purposes in other contexts. For example, Prolog uses `!`
for their cut operator. I think this should be fine since it is common
with PEG.
---
 dev-guide/src/grammar.md                         |  7 ++++++-
 src/notation.md                                  |  1 +
 tools/grammar/src/lib.rs                         |  3 +++
 tools/grammar/src/parser.rs                      | 11 +++++++++++
 tools/mdbook-spec/src/grammar/render_markdown.rs |  5 +++++
 tools/mdbook-spec/src/grammar/render_railroad.rs |  6 ++++++
 6 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/dev-guide/src/grammar.md b/dev-guide/src/grammar.md
index 40e4883096..b5be7aa144 100644
--- a/dev-guide/src/grammar.md
+++ b/dev-guide/src/grammar.md
@@ -39,7 +39,11 @@ Sequence ->
         (` `* AdornedExpr)* ` `* Cut
       | (` `* AdornedExpr)+
 
-AdornedExpr -> Expr1 Quantifier? Suffix? Footnote?
+AdornedExpr -> Prefix? Expr1 Quantifier? Suffix? Footnote?
+
+Prefix -> NegativeLookahead
+
+NegativeLookahead -> `!`
 
 Suffix -> ` _` <not underscore, unless in backtick>* `_`
 
@@ -135,6 +139,7 @@ The general format is a series of productions separated by blank lines. The expr
 | Suffix | \_except \[LazyBooleanExpression\]\_  | Adds a suffix to the previous expression to provide an additional English description, rendered in subscript. This can contain limited Markdown, but try to avoid anything except basics like links. |
 | Footnote | \[^extern-safe\] | Adds a footnote, which can supply extra information that may be helpful to the user. The footnote itself should be defined outside of the code block like a normal Markdown footnote. |
 | Optional | Expr? | The preceding expression is optional. |
+| NegativeLookahead | !Expr | Matches if Expr does not follow, without consuming any input. |
 | Repeat | Expr* | The preceding expression is repeated 0 or more times. |
 | RepeatNonGreedy | Expr*? | The preceding expression is repeated 0 or more times without being greedy. |
 | RepeatPlus | Expr+ | The preceding expression is repeated 1 or more times. |
diff --git a/src/notation.md b/src/notation.md
index 850ee9fb5e..ace8e65bc0 100644
--- a/src/notation.md
+++ b/src/notation.md
@@ -20,6 +20,7 @@ The following notations are used by the *Lexer* and *Syntax* grammar snippets:
 | x<sup>a..=b</sup> | HEX_DIGIT<sup>1..=5</sup>     | a to b repetitions of x, inclusive of b   |
 | Rule1 Rule2       | `fn` _Name_ _Parameters_      | Sequence of rules in order                |
 | \|                | `u8` \| `u16`, Block \| Item  | Either one or another                     |
+| !                 | !COMMENT                      | Matches if the expression does not follow, without consuming any input |
 | \[ ]               | \[`b` `B`]                     | Any of the characters listed              |
 | \[ - ]             | \[`a`-`z`]                     | Any of the characters in the range        |
 | ~\[ ]              | ~\[`b` `B`]                    | Any characters, except those listed       |
diff --git a/tools/grammar/src/lib.rs b/tools/grammar/src/lib.rs
index 6fbb886558..986019270b 100644
--- a/tools/grammar/src/lib.rs
+++ b/tools/grammar/src/lib.rs
@@ -51,6 +51,8 @@ pub enum ExpressionKind {
     Sequence(Vec<Expression>),
     /// `A?`
     Optional(Box<Expression>),
+    /// `!A`
+    NegativeLookahead(Box<Expression>),
     /// `A*`
     Repeat(Box<Expression>),
     /// `A*?`
@@ -137,6 +139,7 @@ impl Expression {
         match &self.kind {
             ExpressionKind::Grouped(e)
             | ExpressionKind::Optional(e)
+            | ExpressionKind::NegativeLookahead(e)
             | ExpressionKind::Repeat(e)
             | ExpressionKind::RepeatNonGreedy(e)
             | ExpressionKind::RepeatPlus(e)
diff --git a/tools/grammar/src/parser.rs b/tools/grammar/src/parser.rs
index f65cb80f97..48c271d255 100644
--- a/tools/grammar/src/parser.rs
+++ b/tools/grammar/src/parser.rs
@@ -251,6 +251,8 @@ impl Parser<'_> {
             self.parse_grouped()?
         } else if next == b'~' {
             self.parse_neg_expression()?
+        } else if next == b'!' {
+            self.parse_negative_lookahead()?
         } else {
             return Ok(None);
         };
@@ -387,6 +389,15 @@ impl Parser<'_> {
         Ok(ExpressionKind::NegExpression(box_kind(kind)))
     }
 
+    fn parse_negative_lookahead(&mut self) -> Result<ExpressionKind> {
+        self.expect("!", "expected !")?;
+        self.space0();
+        let Some(e) = self.parse_expr1()? else {
+            bail!(self, "expected expression after !");
+        };
+        Ok(ExpressionKind::NegativeLookahead(Box::new(e)))
+    }
+
     /// Parse e.g. `F00F` after `U+`.
     fn parse_unicode(&mut self) -> Result<ExpressionKind> {
         let mut xs = Vec::with_capacity(4);
diff --git a/tools/mdbook-spec/src/grammar/render_markdown.rs b/tools/mdbook-spec/src/grammar/render_markdown.rs
index edec8da035..59045fc503 100644
--- a/tools/mdbook-spec/src/grammar/render_markdown.rs
+++ b/tools/mdbook-spec/src/grammar/render_markdown.rs
@@ -67,6 +67,7 @@ fn last_expr(expr: &Expression) -> &ExpressionKind {
         ExpressionKind::Alt(es) | ExpressionKind::Sequence(es) => last_expr(es.last().unwrap()),
         ExpressionKind::Grouped(_)
         | ExpressionKind::Optional(_)
+        | ExpressionKind::NegativeLookahead(_)
         | ExpressionKind::Repeat(_)
         | ExpressionKind::RepeatNonGreedy(_)
         | ExpressionKind::RepeatPlus(_)
@@ -119,6 +120,10 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, output: &mut String) {
             render_expression(e, cx, output);
             output.push_str("<sup>?</sup>");
         }
+        ExpressionKind::NegativeLookahead(e) => {
+            output.push('!');
+            render_expression(e, cx, output);
+        }
         ExpressionKind::Repeat(e) => {
             render_expression(e, cx, output);
             output.push_str("<sup>\\*</sup>");
diff --git a/tools/mdbook-spec/src/grammar/render_railroad.rs b/tools/mdbook-spec/src/grammar/render_railroad.rs
index ebb20af1bc..57a440769c 100644
--- a/tools/mdbook-spec/src/grammar/render_railroad.rs
+++ b/tools/mdbook-spec/src/grammar/render_railroad.rs
@@ -143,6 +143,12 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option<B
                         make_seq(&es)?
                     }
                 }
+                ExpressionKind::NegativeLookahead(e) => {
+                    let forward = render_expression(e, cx, stack)?;
+                    let lbox =
+                        LabeledBox::new(forward, Comment::new("not followed by".to_string()));
+                    Box::new(lbox)
+                }
                 // Treat `e?` and `e{..=1}` / `e{0..=1}` equally.
                 ExpressionKind::Optional(e)
                 | ExpressionKind::RepeatRange {

From 2a7c3500a79b234e16c81321214c7321df868a99 Mon Sep 17 00:00:00 2001
From: Eric Huss <eric@huss.org>
Date: Fri, 13 Feb 2026 08:22:17 -0800
Subject: [PATCH 02/12] Add Unicode to character range

This adds the ability to specify Unicode code points in a character
range. This will be useful for defining some productions without using
English, and perhaps to be a little clearer.

This also extends the Unicode grammar to allow up to 6 characters for
larger code points.
---
 dev-guide/src/grammar.md                      | 10 ++-
 src/input-format.md                           |  4 +-
 tools/grammar/src/lib.rs                      | 31 ++++++++-
 tools/grammar/src/parser.rs                   | 66 ++++++++++++-------
 .../src/grammar/render_markdown.rs            | 24 ++++---
 .../src/grammar/render_railroad.rs            | 16 ++++-
 6 files changed, 111 insertions(+), 40 deletions(-)

diff --git a/dev-guide/src/grammar.md b/dev-guide/src/grammar.md
index b5be7aa144..2dbdcf1ab4 100644
--- a/dev-guide/src/grammar.md
+++ b/dev-guide/src/grammar.md
@@ -85,7 +85,7 @@ Expr1 ->
     | Group
     | NegativeExpression
 
-Unicode -> `U+` [`A`-`Z` `0`-`9`]4..=4
+Unicode -> `U+` [`A`-`Z` `0`-`9`]4..=6
 
 NonTerminal -> Name
 
@@ -102,7 +102,11 @@ Characters ->
     | CharacterTerminal
     | CharacterName
 
-CharacterRange -> BACKTICK <any char> BACKTICK `-` BACKTICK <any char> BACKTICK
+CharacterRange -> Character `-` Character
+
+Character ->
+        BACKTICK <any char> BACKTICK
+      | Unicode
 
 CharacterTerminal -> Terminal
 
@@ -127,7 +131,7 @@ The general format is a series of productions separated by blank lines. The expr
 | Comment | // Single line comment. | A comment extending to the end of the line. |
 | Terminal | \`example\` | A sequence of exact characters, surrounded by backticks. |
 | Charset | \[ \`A\`-\`Z\` \`0\`-\`9\` \`_\` \] | A choice from a set of characters, space-separated. There are three different forms. |
-| CharacterRange | \[ \`A\`-\`Z\` \] | A range of characters; each character should be in backticks. |
+| CharacterRange | \[ \`A\`-\`Z\` \] | A range of characters. Characters can be a Unicode expression or be a literal character surrounded in backticks. |
 | CharacterTerminal | \[ \`x\` \] | A single character, surrounded by backticks. |
 | CharacterName | \[ LF \] | A nonterminal, referring to another production. |
 | Prose | \<any ASCII character except CR\> | An English description of what should be matched, surrounded in angle brackets. |
diff --git a/src/input-format.md b/src/input-format.md
index 2432da0339..be6bb670b3 100644
--- a/src/input-format.md
+++ b/src/input-format.md
@@ -3,7 +3,9 @@ r[input]
 
 r[input.syntax]
 ```grammar,lexer
-@root CHAR -> <a Unicode scalar value>
+@root CHAR -> [U+0000-U+D7FF U+E000-U+10FFFF] // a Unicode scalar value
+
+@root ASCII -> [U+0000-U+007F]
 
 NUL -> U+0000
 ```
diff --git a/tools/grammar/src/lib.rs b/tools/grammar/src/lib.rs
index 986019270b..1d64e45143 100644
--- a/tools/grammar/src/lib.rs
+++ b/tools/grammar/src/lib.rs
@@ -87,7 +87,7 @@ pub enum ExpressionKind {
     /// `^ A B C`
     Cut(Box<Expression>),
     /// `U+0060`
-    Unicode(String),
+    Unicode((char, String)),
 }
 
 #[derive(Copy, Clone, Debug)]
@@ -115,7 +115,34 @@ pub enum Characters {
     /// `` `_` ``
     Terminal(String),
     /// `` `A`-`Z` ``
-    Range(char, char),
+    Range(Character, Character),
+}
+
+#[derive(Clone, Debug)]
+pub enum Character {
+    Char(char),
+    /// `U+0060`
+    ///
+    /// The `String` is the hex digits after `U+`.
+    Unicode((char, String)),
+}
+
+impl Character {
+    pub fn get_ch(&self) -> char {
+        match self {
+            Character::Char(ch) => *ch,
+            Character::Unicode((ch, _)) => *ch,
+        }
+    }
+}
+
+impl Display for Character {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
+        match self {
+            Character::Char(ch) => write!(f, "`{ch}`"),
+            Character::Unicode((_, s)) => write!(f, "U+{s}"),
+        }
+    }
 }
 
 impl Grammar {
diff --git a/tools/grammar/src/parser.rs b/tools/grammar/src/parser.rs
index 48c271d255..080d03c66e 100644
--- a/tools/grammar/src/parser.rs
+++ b/tools/grammar/src/parser.rs
@@ -1,6 +1,6 @@
 //! A parser of the ENBF-like grammar.
 
-use super::{Characters, Expression, ExpressionKind, Grammar, Production, RangeLimit};
+use super::{Character, Characters, Expression, ExpressionKind, Grammar, Production, RangeLimit};
 use std::fmt;
 use std::fmt::Display;
 use std::path::Path;
@@ -221,7 +221,7 @@ impl Parser<'_> {
         };
 
         let kind = if self.take_str("U+") {
-            self.parse_unicode()?
+            ExpressionKind::Unicode(self.parse_unicode()?)
         } else if self.input[self.index..]
             .chars()
             .next()
@@ -322,27 +322,19 @@ impl Parser<'_> {
     /// Parse an element of a character class, e.g.
     /// `` `a`-`b` `` | `` `term` `` | `` NonTerminal ``.
     fn parse_characters(&mut self) -> Result<Option<Characters>> {
-        if let Some(b'`') = self.peek() {
-            let recov = self.index;
-            let a = self.parse_terminal_str()?;
+        if let Some(a) = self.parse_character()? {
             if self.take_str("-") {
-                //~^ Parse `` `a`-`b` `` character range.
-                if a.len() > 1 {
-                    self.index = recov + 1;
-                    bail!(self, "invalid start terminal in range");
-                }
-                let recov = self.index;
-                let b = self.parse_terminal_str()?;
-                if b.len() > 1 {
-                    self.index = recov + 1;
-                    bail!(self, "invalid end terminal in range");
-                }
-                let a = a.chars().next().unwrap();
-                let b = b.chars().next().unwrap();
+                let Some(b) = self.parse_character()? else {
+                    bail!(self, "expected character in range");
+                };
                 Ok(Some(Characters::Range(a, b)))
             } else {
                 //~^ Parse terminal in backticks.
-                Ok(Some(Characters::Terminal(a)))
+                let t = match a {
+                    Character::Char(ch) => ch.to_string(),
+                    Character::Unicode(_) => bail!(self, "unicode not supported"),
+                };
+                Ok(Some(Characters::Terminal(t)))
             }
         } else if let Some(name) = self.parse_name() {
             //~^ Parse nonterminal identifier.
@@ -352,6 +344,23 @@ impl Parser<'_> {
         }
     }
 
+    fn parse_character(&mut self) -> Result<Option<Character>> {
+        if let Some(b'`') = self.peek() {
+            let recov = self.index;
+            let term = self.parse_terminal_str()?;
+            if term.len() > 1 {
+                self.index = recov + 1;
+                bail!(self, "invalid start terminal in range");
+            }
+            let ch = term.chars().next().unwrap();
+            Ok(Some(Character::Char(ch)))
+        } else if self.take_str("U+") {
+            Ok(Some(Character::Unicode(self.parse_unicode()?)))
+        } else {
+            Ok(None)
+        }
+    }
+
     /// Parse e.g. `<prose text>`.
     fn parse_prose(&mut self) -> Result<ExpressionKind> {
         self.expect("<", "expected opening `<`")?;
@@ -399,9 +408,9 @@ impl Parser<'_> {
     }
 
     /// Parse e.g. `F00F` after `U+`.
-    fn parse_unicode(&mut self) -> Result<ExpressionKind> {
-        let mut xs = Vec::with_capacity(4);
-        for _ in 0..4 {
+    fn parse_unicode(&mut self) -> Result<(char, String)> {
+        let mut xs = Vec::with_capacity(6);
+        let mut push_next = || {
             match self.peek() {
                 Some(x @ (b'0'..=b'9' | b'A'..=b'F')) => {
                     xs.push(x);
@@ -409,8 +418,19 @@ impl Parser<'_> {
                 }
                 _ => bail!(self, "expected 4 uppercase hexadecimal digits after `U+`"),
             }
+            Ok(())
+        };
+        for _ in 0..4 {
+            push_next()?;
+        }
+        for _ in 0..2 {
+            if push_next().is_err() {
+                break;
+            }
         }
-        Ok(ExpressionKind::Unicode(String::from_utf8(xs).unwrap()))
+        let s = String::from_utf8(xs).unwrap();
+        let ch = char::from_u32(u32::from_str_radix(&s, 16).unwrap()).unwrap();
+        Ok((ch, s))
     }
 
     /// Parse `?` after expression.
diff --git a/tools/mdbook-spec/src/grammar/render_markdown.rs b/tools/mdbook-spec/src/grammar/render_markdown.rs
index 59045fc503..7f9624fc86 100644
--- a/tools/mdbook-spec/src/grammar/render_markdown.rs
+++ b/tools/mdbook-spec/src/grammar/render_markdown.rs
@@ -3,7 +3,7 @@
 use super::RenderCtx;
 use crate::grammar::Grammar;
 use anyhow::bail;
-use grammar::{Characters, Expression, ExpressionKind, Production};
+use grammar::{Character, Characters, Expression, ExpressionKind, Production};
 use regex::Regex;
 use std::borrow::Cow;
 use std::fmt::Write;
@@ -186,7 +186,7 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, output: &mut String) {
             output.push_str("^ ");
             render_expression(e, cx, output);
         }
-        ExpressionKind::Unicode(s) => {
+        ExpressionKind::Unicode((_, s)) => {
             output.push_str("U+");
             output.push_str(s);
         }
@@ -227,12 +227,20 @@ fn render_characters(chars: &Characters, cx: &RenderCtx, output: &mut String) {
             markdown_escape(s)
         )
         .unwrap(),
-        Characters::Range(a, b) => write!(
-            output,
-            "<span class=\"grammar-literal\">{a}\
-                 </span>-<span class=\"grammar-literal\">{b}</span>"
-        )
-        .unwrap(),
+        Characters::Range(a, b) => {
+            let write_ch = |ch: &Character, output: &mut String| match ch {
+                Character::Char(ch) => write!(
+                    output,
+                    "<span class=\"grammar-literal\">{}</span>",
+                    markdown_escape(&ch.to_string())
+                )
+                .unwrap(),
+                Character::Unicode((_, s)) => write!(output, "U+{s}").unwrap(),
+            };
+            write_ch(a, output);
+            output.push('-');
+            write_ch(b, output);
+        }
     }
 }
 
diff --git a/tools/mdbook-spec/src/grammar/render_railroad.rs b/tools/mdbook-spec/src/grammar/render_railroad.rs
index 57a440769c..1543127889 100644
--- a/tools/mdbook-spec/src/grammar/render_railroad.rs
+++ b/tools/mdbook-spec/src/grammar/render_railroad.rs
@@ -3,7 +3,7 @@
 use super::RenderCtx;
 use crate::grammar::Grammar;
 use anyhow::bail;
-use grammar::{Characters, Expression, ExpressionKind, Production, RangeLimit};
+use grammar::{Character, Characters, Expression, ExpressionKind, Production, RangeLimit};
 use railroad::*;
 use regex::Regex;
 use std::fmt::Write;
@@ -298,7 +298,7 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option<B
                     let lbox = LabeledBox::new(rhs, Comment::new("no backtracking".to_string()));
                     Box::new(lbox)
                 }
-                ExpressionKind::Unicode(s) => Box::new(Terminal::new(format!("U+{}", s))),
+                ExpressionKind::Unicode((_, s)) => Box::new(Terminal::new(format!("U+{}", s))),
             };
         }
     };
@@ -317,7 +317,17 @@ fn render_characters(chars: &Characters, cx: &RenderCtx) -> Box<dyn Node> {
     match chars {
         Characters::Named(s) => node_for_nt(cx, s),
         Characters::Terminal(s) => Box::new(Terminal::new(s.clone())),
-        Characters::Range(a, b) => Box::new(Terminal::new(format!("{a}-{b}"))),
+        Characters::Range(a, b) => {
+            let mut s = String::new();
+            let write_ch = |ch: &Character, output: &mut String| match ch {
+                Character::Char(ch) => output.push(*ch),
+                Character::Unicode((_, s)) => write!(output, "U+{s}").unwrap(),
+            };
+            write_ch(a, &mut s);
+            s.push('-');
+            write_ch(b, &mut s);
+            Box::new(Terminal::new(s))
+        }
     }
 }
 

From 999f8839e3ae152906c1070344bdb1a802072c81 Mon Sep 17 00:00:00 2001
From: Eric Huss <eric@huss.org>
Date: Fri, 13 Feb 2026 09:26:19 -0800
Subject: [PATCH 03/12] Use negative lookahead in the grammar

This replaces some suffixes and prose with the new negative lookahead
syntax instead. This should all have the same meaning.
---
 src/identifiers.md  |  2 +-
 src/input-format.md |  2 +-
 src/tokens.md       | 30 +++++++++++++-----------------
 3 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/src/identifiers.md b/src/identifiers.md
index 979284a1c7..5abe303ca2 100644
--- a/src/identifiers.md
+++ b/src/identifiers.md
@@ -16,7 +16,7 @@ NON_KEYWORD_IDENTIFIER -> IDENTIFIER_OR_KEYWORD _except a [strict][lex.keywords.
 IDENTIFIER -> NON_KEYWORD_IDENTIFIER | RAW_IDENTIFIER
 
 RESERVED_RAW_IDENTIFIER ->
-    `r#` (`_` | `crate` | `self` | `Self` | `super`) _not immediately followed by XID_Continue_
+    `r#` (`_` | `crate` | `self` | `Self` | `super`) !XID_Continue
 ```
 
 <!-- When updating the version, update the UAX links, too. -->
diff --git a/src/input-format.md b/src/input-format.md
index be6bb670b3..d6eca2dc2d 100644
--- a/src/input-format.md
+++ b/src/input-format.md
@@ -5,7 +5,7 @@ r[input.syntax]
 ```grammar,lexer
 @root CHAR -> [U+0000-U+D7FF U+E000-U+10FFFF] // a Unicode scalar value
 
-@root ASCII -> [U+0000-U+007F]
+ASCII -> [U+0000-U+007F]
 
 NUL -> U+0000
 ```
diff --git a/src/tokens.md b/src/tokens.md
index 047afd76a6..65b000ce60 100644
--- a/src/tokens.md
+++ b/src/tokens.md
@@ -115,7 +115,7 @@ r[lex.token.literal.suffix.syntax]
 ```grammar,lexer
 SUFFIX -> IDENTIFIER_OR_KEYWORD _except `_`_
 
-SUFFIX_NO_E -> SUFFIX _not beginning with `e` or `E`_
+SUFFIX_NO_E -> ![`e` `E`] SUFFIX
 ```
 
 r[lex.token.literal.suffix.validity]
@@ -253,8 +253,7 @@ r[lex.token.byte.syntax]
 BYTE_LITERAL ->
     `b'` ^ ( ASCII_FOR_CHAR | BYTE_ESCAPE )  `'` SUFFIX?
 
-ASCII_FOR_CHAR ->
-    <any ASCII (i.e. 0x00 to 0x7F) except `'`, `\`, LF, CR, or TAB>
+ASCII_FOR_CHAR -> ![`'` `\` LF CR TAB] ASCII
 
 BYTE_ESCAPE ->
       `\x` HEX_DIGIT HEX_DIGIT
@@ -272,8 +271,7 @@ r[lex.token.str-byte.syntax]
 BYTE_STRING_LITERAL ->
     `b"` ^ ( ASCII_FOR_STRING | BYTE_ESCAPE | STRING_CONTINUE )* `"` SUFFIX?
 
-ASCII_FOR_STRING ->
-    <any ASCII (i.e 0x00 to 0x7F) except `"`, `\`, or CR>
+ASCII_FOR_STRING -> ![`"` `\` CR] ASCII
 ```
 
 r[lex.token.str-byte.intro]
@@ -309,8 +307,7 @@ RAW_BYTE_STRING_CONTENT ->
       `"` ^ ASCII_FOR_RAW*? `"`
     | `#` RAW_BYTE_STRING_CONTENT `#`
 
-ASCII_FOR_RAW ->
-    <any ASCII (i.e. 0x00 to 0x7F) except CR>
+ASCII_FOR_RAW -> !CR ASCII
 ```
 
 r[lex.token.str-byte-raw.intro]
@@ -559,7 +556,7 @@ r[lex.token.literal.float.syntax]
 FLOAT_LITERAL ->
       DEC_LITERAL (`.` DEC_LITERAL)? FLOAT_EXPONENT SUFFIX?
     | DEC_LITERAL `.` DEC_LITERAL SUFFIX_NO_E?
-    | DEC_LITERAL `.` _not immediately followed by `.`, `_` or an XID_Start character_
+    | DEC_LITERAL `.` !(`.` | `_` | XID_Start)
 
 FLOAT_EXPONENT ->
     (`e`|`E`) (`+`|`-`)? `_`* DEC_DIGIT (DEC_DIGIT|`_`)*
@@ -608,13 +605,12 @@ r[lex.token.literal.reserved.syntax]
 RESERVED_NUMBER ->
       BIN_LITERAL [`2`-`9`]
     | OCT_LITERAL [`8`-`9`]
-    | ( BIN_LITERAL | OCT_LITERAL | HEX_LITERAL ) `.` _not immediately followed by `.`, `_` or an XID_Start character_
+    | ( BIN_LITERAL | OCT_LITERAL | HEX_LITERAL ) `.` !(`.` | `_` | XID_Start)
     | ( BIN_LITERAL | OCT_LITERAL ) (`e`|`E`)
-    | `0b` `_`* <end of input or not BIN_DIGIT>
-    | `0o` `_`* <end of input or not OCT_DIGIT>
-    | `0x` `_`* <end of input or not HEX_DIGIT>
+    | `0b` `_`* !BIN_DIGIT
+    | `0o` `_`* !OCT_DIGIT
+    | `0x` `_`* !HEX_DIGIT
     | DEC_LITERAL ( `.` DEC_LITERAL )? (`e` | `E`) (`+` | `-`)? <end of input or not DEC_DIGIT>
-
 ```
 
 r[lex.token.literal.reserved.intro]
@@ -657,16 +653,16 @@ r[lex.token.life.syntax]
 ```grammar,lexer
 LIFETIME_TOKEN ->
       RAW_LIFETIME
-    | `'` IDENTIFIER_OR_KEYWORD _not immediately followed by `'`_
+    | `'` IDENTIFIER_OR_KEYWORD !`'`
 
 LIFETIME_OR_LABEL ->
       RAW_LIFETIME
-    | `'` NON_KEYWORD_IDENTIFIER _not immediately followed by `'`_
+    | `'` NON_KEYWORD_IDENTIFIER !`'`
 
 RAW_LIFETIME ->
-    `'r#` IDENTIFIER_OR_KEYWORD _not immediately followed by `'`_
+    `'r#` ^ IDENTIFIER_OR_KEYWORD !`'`
 
-RESERVED_RAW_LIFETIME -> `'r#` (`_` | `crate` | `self` | `Self` | `super`) _not immediately followed by `'`_
+RESERVED_RAW_LIFETIME -> `'r#` (`_` | `crate` | `self` | `Self` | `super`) !(`'` | XID_Continue)
 ```
 
 r[lex.token.life.intro]

From cc7025c5b439b24091ab4406c14ee8bf51725e8b Mon Sep 17 00:00:00 2001
From: Eric Huss <eric@huss.org>
Date: Fri, 13 Feb 2026 09:30:07 -0800
Subject: [PATCH 04/12] Fix LINE_COMMENT grammar

This clarifies that bare `//` is explicitly meant to be either followed
by LF or EOF. Otherwise it incorrectly matches other comment rules.
---
 src/comments.md     | 3 ++-
 src/input-format.md | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/comments.md b/src/comments.md
index a240e7dc58..1320077938 100644
--- a/src/comments.md
+++ b/src/comments.md
@@ -5,7 +5,8 @@ r[comments.syntax]
 ```grammar,lexer
 @root LINE_COMMENT ->
       `//` (~[`/` `!` LF] | `//`) ~LF*
-    | `//`
+    | `//` EOF
+    | `//` _immediately followed by LF_
 
 BLOCK_COMMENT ->
       `/*`
diff --git a/src/input-format.md b/src/input-format.md
index d6eca2dc2d..3e35cba1ee 100644
--- a/src/input-format.md
+++ b/src/input-format.md
@@ -3,11 +3,13 @@ r[input]
 
 r[input.syntax]
 ```grammar,lexer
-@root CHAR -> [U+0000-U+D7FF U+E000-U+10FFFF] // a Unicode scalar value
+CHAR -> [U+0000-U+D7FF U+E000-U+10FFFF] // a Unicode scalar value
 
 ASCII -> [U+0000-U+007F]
 
 NUL -> U+0000
+
+EOF -> !CHAR  // End of file or input
 ```
 
 r[input.intro]

From 844b827ebb8390c38ece4e97df3a478f5da34438 Mon Sep 17 00:00:00 2001
From: Eric Huss <eric@huss.org>
Date: Fri, 13 Feb 2026 09:35:57 -0800
Subject: [PATCH 05/12] Fix BLOCK_COMMENT order

This fixes the BLOCK_COMMENT grammar so that it follows the rule that
the first alternation that matches wins. The previous grammar would fail
with the use of the cut operator to parse these two forms.
---
 src/comments.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/comments.md b/src/comments.md
index 1320077938..e82cd28ace 100644
--- a/src/comments.md
+++ b/src/comments.md
@@ -9,12 +9,13 @@ r[comments.syntax]
     | `//` _immediately followed by LF_
 
 BLOCK_COMMENT ->
-      `/*`
+      `/**/`
+    | `/***/`
+    | `/*`
+        ^
         ( ~[`*` `!`] | `**` | BLOCK_COMMENT_OR_DOC )
         ( BLOCK_COMMENT_OR_DOC | ~`*/` )*
       `*/`
-    | `/**/`
-    | `/***/`
 
 @root INNER_LINE_DOC ->
     `//!` ~[LF CR]*

From 20f26498e5b6cab21a3688e460de0d2d9d7defa1 Mon Sep 17 00:00:00 2001
From: Eric Huss <eric@huss.org>
Date: Fri, 13 Feb 2026 09:54:48 -0800
Subject: [PATCH 06/12] Fix handling of carriage returns in doc comments

This fixes the doc comments so that they properly handle a carriage
return by using the cut operator. Rustc will fail parsing if a doc
comment contains a carriage return.

This requires including (LF|EOF) at the end of line so the cut operator
has something to complete the line.

This also removes the negative `/` from OUTER_LINE_DOC. This does not
work correctly with the check for CR, and is not needed because
LINE_COMMENT already matches `////`. Later I plan to include a rule for
comments that makes it clear the order that they are parsed.

A negative lookahead is necessary in OUTER_BLOCK_DOC to prevent it from
trying to parse what should be a BLOCK_COMMENT as an OUTER_BLOCK_DOC and
failing due to the cut operator.
---
 src/comments.md | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/comments.md b/src/comments.md
index e82cd28ace..6e4c06744f 100644
--- a/src/comments.md
+++ b/src/comments.md
@@ -18,20 +18,25 @@ BLOCK_COMMENT ->
       `*/`
 
 @root INNER_LINE_DOC ->
-    `//!` ~[LF CR]*
+    `//!` ^ LINE_DOC_COMMENT_CONTENT (LF | EOF)
+
+LINE_DOC_COMMENT_CONTENT -> (!CR ~LF)*
 
 INNER_BLOCK_DOC ->
-    `/*!` ( BLOCK_COMMENT_OR_DOC | ~[`*/` CR] )* `*/`
+    `/*!` ^ ( BLOCK_COMMENT_OR_DOC | BLOCK_CHAR )* `*/`
 
 @root OUTER_LINE_DOC ->
-    `///` (~`/` ~[LF CR]*)?
+    `///` ^ LINE_DOC_COMMENT_CONTENT (LF | EOF)
 
 OUTER_BLOCK_DOC ->
-    `/**`
+    `/**` ![`*` `/`]
+      ^
       ( ~`*` | BLOCK_COMMENT_OR_DOC )
-      ( BLOCK_COMMENT_OR_DOC | ~[`*/` CR] )*
+      ( BLOCK_COMMENT_OR_DOC | BLOCK_CHAR )*
     `*/`
 
+BLOCK_CHAR -> (!(`*/` | CR) CHAR)
+
 @root BLOCK_COMMENT_OR_DOC ->
       BLOCK_COMMENT
     | OUTER_BLOCK_DOC

From bff2d5fccb80ba62fcd936b92c13150881d9633e Mon Sep 17 00:00:00 2001
From: Eric Huss <eric@huss.org>
Date: Fri, 13 Feb 2026 10:27:55 -0800
Subject: [PATCH 07/12] Add a new COMMENT grammar rule

This is intended to indicate the order that the rules are expected to be
processed (as defined in this grammar). Of course real parsers can take
a different approach if they have the same results.

This is roughly similar to the order that rustc takes, though
[`block_comment`](https://github.com/rust-lang/rust/blob/d7daac06d87e1252d10eaa44960164faac46beff/compiler/rustc_lexer/src/lib.rs#L782-L817)
roughly takes the approach of combining the `/*` prefix, and then
deciding if it is an inner doc comment, outer doc comment, or else a
regular block comment.

LINE_COMMENT must be first so that it is not confused with a doc
comment.

BLOCK_COMMENT must be last so that its cut operator does not interfere
with doc comments that start with `/*`. It could be moved up higher in
the list if it had negative lookahead to disambiguate OUTER_BLOCK_DOC,
but the expression for that is more complicated than the one in
OUTER_BLOCK_DOC.
---
 src/comments.md | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/comments.md b/src/comments.md
index 6e4c06744f..bbb3332539 100644
--- a/src/comments.md
+++ b/src/comments.md
@@ -3,7 +3,15 @@ r[comments]
 
 r[comments.syntax]
 ```grammar,lexer
-@root LINE_COMMENT ->
+@root COMMENT ->
+      LINE_COMMENT
+    | INNER_LINE_DOC
+    | OUTER_LINE_DOC
+    | INNER_BLOCK_DOC
+    | OUTER_BLOCK_DOC
+    | BLOCK_COMMENT
+
+LINE_COMMENT ->
       `//` (~[`/` `!` LF] | `//`) ~LF*
     | `//` EOF
     | `//` _immediately followed by LF_
@@ -17,7 +25,7 @@ BLOCK_COMMENT ->
         ( BLOCK_COMMENT_OR_DOC | ~`*/` )*
       `*/`
 
-@root INNER_LINE_DOC ->
+INNER_LINE_DOC ->
     `//!` ^ LINE_DOC_COMMENT_CONTENT (LF | EOF)
 
 LINE_DOC_COMMENT_CONTENT -> (!CR ~LF)*
@@ -25,7 +33,7 @@ LINE_DOC_COMMENT_CONTENT -> (!CR ~LF)*
 INNER_BLOCK_DOC ->
     `/*!` ^ ( BLOCK_COMMENT_OR_DOC | BLOCK_CHAR )* `*/`
 
-@root OUTER_LINE_DOC ->
+OUTER_LINE_DOC ->
     `///` ^ LINE_DOC_COMMENT_CONTENT (LF | EOF)
 
 OUTER_BLOCK_DOC ->
@@ -37,7 +45,7 @@ OUTER_BLOCK_DOC ->
 
 BLOCK_CHAR -> (!(`*/` | CR) CHAR)
 
-@root BLOCK_COMMENT_OR_DOC ->
+BLOCK_COMMENT_OR_DOC ->
       BLOCK_COMMENT
     | OUTER_BLOCK_DOC
     | INNER_BLOCK_DOC

From 7c12d351303140ffcd667ec0eb6c849eea511cf3 Mon Sep 17 00:00:00 2001
From: Eric Huss <eric@huss.org>
Date: Fri, 13 Feb 2026 10:28:57 -0800
Subject: [PATCH 08/12] Fix desugaring of doc comments

rustc actually includes the spaces for doc comments.
---
 src/comments.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/comments.md b/src/comments.md
index bbb3332539..ef283a9ea1 100644
--- a/src/comments.md
+++ b/src/comments.md
@@ -66,7 +66,7 @@ r[comments.doc.syntax]
 Line doc comments beginning with exactly _three_ slashes (`///`), and block doc comments (`/** ... */`), both outer doc comments, are interpreted as a special syntax for [`doc` attributes].
 
 r[comments.doc.attributes]
-That is, they are equivalent to writing `#[doc="..."]` around the body of the comment, i.e., `/// Foo` turns into `#[doc="Foo"]` and `/** Bar */` turns into `#[doc="Bar"]`. They must therefore appear before something that accepts an outer attribute.
+That is, they are equivalent to writing `#[doc="..."]` around the body of the comment, i.e., `/// Foo` turns into `#[doc=" Foo"]` and `/** Bar */` turns into `#[doc=" Bar "]`. They must therefore appear before something that accepts an outer attribute.
 
 r[comments.doc.inner-syntax]
 Line comments beginning with `//!` and block comments `/*! ... */` are doc comments that apply to the parent of the comment, rather than the item that follows.

From f57a11ff75c458c59224992fce65d7081afdfcae Mon Sep 17 00:00:00 2001
From: Travis Cross <tc@traviscross.com>
Date: Wed, 18 Feb 2026 01:15:05 +0000
Subject: [PATCH 09/12] Add cut to `FLOAT_EXPONENT` and remove reserved alt

The cut operator after (`e`|`E`) in `FLOAT_EXPONENT` reflects rustc's
actual parsing behavior: once the lexer sees an exponent indicator, it
commits and does not backtrack.  This makes the last `RESERVED_NUMBER`
alternative -- which existed to catch the empty-exponent case --
redundant, since the cut in `FLOAT_EXPONENT` now handles it directly.

Co-authored-by: Eric Huss <eric@huss.org>
---
 src/tokens.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/tokens.md b/src/tokens.md
index 65b000ce60..d878eabfe2 100644
--- a/src/tokens.md
+++ b/src/tokens.md
@@ -559,7 +559,7 @@ FLOAT_LITERAL ->
     | DEC_LITERAL `.` !(`.` | `_` | XID_Start)
 
 FLOAT_EXPONENT ->
-    (`e`|`E`) (`+`|`-`)? `_`* DEC_DIGIT (DEC_DIGIT|`_`)*
+    (`e`|`E`) ^ (`+`|`-`)? `_`* DEC_DIGIT (DEC_DIGIT|`_`)*
 ```
 
 r[lex.token.literal.float.form]
@@ -610,7 +610,6 @@ RESERVED_NUMBER ->
     | `0b` `_`* !BIN_DIGIT
     | `0o` `_`* !OCT_DIGIT
     | `0x` `_`* !HEX_DIGIT
-    | DEC_LITERAL ( `.` DEC_LITERAL )? (`e` | `E`) (`+` | `-`)? <end of input or not DEC_DIGIT>
 ```
 
 r[lex.token.literal.reserved.intro]

From ae86f1918b8e9ccca36e28391e9413ff1ab701fe Mon Sep 17 00:00:00 2001
From: Travis Cross <tc@traviscross.com>
Date: Wed, 18 Feb 2026 01:37:21 +0000
Subject: [PATCH 10/12] Fix preposition in `CharacterRange` description

The description says characters can be "surrounded in
backticks", but it'd be better to say "surrounded by".
---
 dev-guide/src/grammar.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev-guide/src/grammar.md b/dev-guide/src/grammar.md
index 2dbdcf1ab4..7a4cdea466 100644
--- a/dev-guide/src/grammar.md
+++ b/dev-guide/src/grammar.md
@@ -131,7 +131,7 @@ The general format is a series of productions separated by blank lines. The expr
 | Comment | // Single line comment. | A comment extending to the end of the line. |
 | Terminal | \`example\` | A sequence of exact characters, surrounded by backticks. |
 | Charset | \[ \`A\`-\`Z\` \`0\`-\`9\` \`_\` \] | A choice from a set of characters, space-separated. There are three different forms. |
-| CharacterRange | \[ \`A\`-\`Z\` \] | A range of characters. Characters can be a Unicode expression or be a literal character surrounded in backticks. |
+| CharacterRange | \[ \`A\`-\`Z\` \] | A range of characters. Characters can be a Unicode expression or be a literal character surrounded by backticks. |
 | CharacterTerminal | \[ \`x\` \] | A single character, surrounded by backticks. |
 | CharacterName | \[ LF \] | A nonterminal, referring to another production. |
 | Prose | \<any ASCII character except CR\> | An English description of what should be matched, surrounded in angle brackets. |

From fc1589736c175015f02ecf6b7741970d00609390 Mon Sep 17 00:00:00 2001
From: Travis Cross <tc@traviscross.com>
Date: Wed, 18 Feb 2026 01:40:10 +0000
Subject: [PATCH 11/12] Fix U+xxxx notation description

The grammar now accepts 4-6 hex digits for Unicode code points (needed
for values above U+FFFF), so let's update the notation column to
reflect the variable width.  Let's also capitalize "Unicode", which is
a proper noun.
---
 src/notation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/notation.md b/src/notation.md
index ace8e65bc0..b74c74b22f 100644
--- a/src/notation.md
+++ b/src/notation.md
@@ -27,7 +27,7 @@ The following notations are used by the *Lexer* and *Syntax* grammar snippets:
 | ~`string`         | ~`\n`, ~`*/`                  | Any characters, except this sequence      |
 | ( )               | (`,` _Parameter_)<sup>?</sup> | Groups items                              |
 | ^                 | `b'` ^ ASCII_FOR_CHAR         | The rest of the sequence must match or parsing fails unconditionally ([hard cut operator]) |
-| U+xxxx            | U+0060                        | A single unicode character                |
+| U+xxxx..xxxxxx    | U+0060                        | A single Unicode character                |
 | \<text\>          | \<any ASCII char except CR\>  | An English description of what should be matched |
 | Rule <sub>suffix</sub> | IDENTIFIER_OR_KEYWORD <sub>_except `crate`_</sub> | A modification to the previous rule |
 | // Comment. | // Single line comment. | A comment extending to the end of the line. |

From 164f5fa02b8e729381350488605d2f8f4c39bf3c Mon Sep 17 00:00:00 2001
From: Travis Cross <tc@traviscross.com>
Date: Wed, 18 Feb 2026 01:54:52 +0000
Subject: [PATCH 12/12] Add tests for negative lookahead and Unicode

These tests cover:

- Parser: negative lookahead with nonterminals, terminals, charsets,
  grouped expressions, within sequences, repetitions, and
  alternations; error case for trailing `!`; Unicode code points with
  4, 5, and 6 hex digits; charset ranges with `Character::Char`,
  `Character::Unicode`, and mixed forms; charsets combining named
  entries, terminals, and Unicode ranges.

- Markdown renderer: negative lookahead rendering with `!`, Unicode
  rendering as `U+xxxx`, charset rendering with char and Unicode
  ranges, cut and neg expression rendering, and markdown escaping.

- Railroad renderer: negative lookahead renders as a "not followed by"
  labeled box, Unicode renders as terminal, charset ranges, cut
  renders as "no backtracking" labeled box, and neg expression renders
  as "with the exception of" labeled box.
---
 tools/grammar/src/parser.rs                   | 331 +++++++++++++++++-
 .../src/grammar/render_markdown.rs            | 178 ++++++++++
 .../src/grammar/render_railroad.rs            | 117 ++++++-
 3 files changed, 624 insertions(+), 2 deletions(-)

diff --git a/tools/grammar/src/parser.rs b/tools/grammar/src/parser.rs
index 080d03c66e..0db6b478b5 100644
--- a/tools/grammar/src/parser.rs
+++ b/tools/grammar/src/parser.rs
@@ -573,7 +573,7 @@ fn translate_position(input: &str, index: usize) -> (&str, usize, usize) {
 #[cfg(test)]
 mod tests {
     use crate::parser::{parse_grammar, translate_position};
-    use crate::{ExpressionKind, Grammar, RangeLimit};
+    use crate::{Character, Characters, ExpressionKind, Grammar, RangeLimit};
     use std::path::Path;
 
     #[test]
@@ -778,4 +778,333 @@ mod tests {
         assert_eq!(max, Some(1));
         assert!(matches!(limit, RangeLimit::HalfOpen));
     }
+
+    // --- Negative lookahead tests ---
+
+    #[test]
+    fn lookahead_simple_nonterminal() {
+        let input = "Rule -> !Foo";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::NegativeLookahead(inner) = &rule.expression.kind else {
+            panic!("expected NegativeLookahead, got {:?}", rule.expression.kind);
+        };
+        assert!(matches!(&inner.kind, ExpressionKind::Nt(n) if n == "Foo"));
+    }
+
+    #[test]
+    fn lookahead_terminal() {
+        let input = "Rule -> !`'` Foo";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Sequence(seq) = &rule.expression.kind else {
+            panic!("expected Sequence, got {:?}", rule.expression.kind);
+        };
+        assert_eq!(seq.len(), 2);
+        let ExpressionKind::NegativeLookahead(inner) = &seq[0].kind else {
+            panic!("expected NegativeLookahead, got {:?}", seq[0].kind);
+        };
+        assert!(matches!(&inner.kind, ExpressionKind::Terminal(t) if t == "'"));
+        assert!(matches!(&seq[1].kind, ExpressionKind::Nt(n) if n == "Foo"));
+    }
+
+    #[test]
+    fn lookahead_charset() {
+        let input = "Rule -> ![`e` `E`] SUFFIX";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Sequence(seq) = &rule.expression.kind else {
+            panic!("expected Sequence, got {:?}", rule.expression.kind);
+        };
+        assert_eq!(seq.len(), 2);
+        let ExpressionKind::NegativeLookahead(inner) = &seq[0].kind else {
+            panic!("expected NegativeLookahead, got {:?}", seq[0].kind);
+        };
+        let ExpressionKind::Charset(chars) = &inner.kind else {
+            panic!("expected Charset inside lookahead, got {:?}", inner.kind);
+        };
+        assert_eq!(chars.len(), 2);
+        assert!(matches!(&chars[0], Characters::Terminal(t) if t == "e"));
+        assert!(matches!(&chars[1], Characters::Terminal(t) if t == "E"));
+    }
+
+    #[test]
+    fn lookahead_grouped() {
+        let input = "Rule -> !(`.` | `_` | XID_Start)";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::NegativeLookahead(inner) = &rule.expression.kind else {
+            panic!("expected NegativeLookahead, got {:?}", rule.expression.kind);
+        };
+        let ExpressionKind::Grouped(grouped) = &inner.kind else {
+            panic!("expected Grouped inside lookahead, got {:?}", inner.kind);
+        };
+        let ExpressionKind::Alt(alts) = &grouped.kind else {
+            panic!("expected Alt inside Grouped, got {:?}", grouped.kind);
+        };
+        assert_eq!(alts.len(), 3);
+        assert!(matches!(&alts[0].kind, ExpressionKind::Terminal(t) if t == "."));
+        assert!(matches!(&alts[1].kind, ExpressionKind::Terminal(t) if t == "_"));
+        assert!(matches!(&alts[2].kind, ExpressionKind::Nt(n) if n == "XID_Start"));
+    }
+
+    #[test]
+    fn lookahead_in_sequence_middle() {
+        let input = "Rule -> A !B C";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Sequence(seq) = &rule.expression.kind else {
+            panic!("expected Sequence, got {:?}", rule.expression.kind);
+        };
+        assert_eq!(seq.len(), 3);
+        assert!(matches!(&seq[0].kind, ExpressionKind::Nt(n) if n == "A"));
+        let ExpressionKind::NegativeLookahead(inner) = &seq[1].kind else {
+            panic!("expected NegativeLookahead, got {:?}", seq[1].kind);
+        };
+        assert!(matches!(&inner.kind, ExpressionKind::Nt(n) if n == "B"));
+        assert!(matches!(&seq[2].kind, ExpressionKind::Nt(n) if n == "C"));
+    }
+
+    #[test]
+    fn lookahead_in_repetition() {
+        let input = "Rule -> (!A B)*";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Repeat(rep) = &rule.expression.kind else {
+            panic!("expected Repeat, got {:?}", rule.expression.kind);
+        };
+        let ExpressionKind::Grouped(grouped) = &rep.kind else {
+            panic!("expected Grouped inside Repeat, got {:?}", rep.kind);
+        };
+        let ExpressionKind::Sequence(seq) = &grouped.kind else {
+            panic!("expected Sequence inside Grouped, got {:?}", grouped.kind);
+        };
+        assert_eq!(seq.len(), 2);
+        assert!(matches!(&seq[0].kind, ExpressionKind::NegativeLookahead(_)));
+        assert!(matches!(&seq[1].kind, ExpressionKind::Nt(n) if n == "B"));
+    }
+
+    #[test]
+    fn lookahead_in_alternation() {
+        let input = "Rule -> !A B | C";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Alt(alts) = &rule.expression.kind else {
+            panic!("expected Alt, got {:?}", rule.expression.kind);
+        };
+        assert_eq!(alts.len(), 2);
+        let ExpressionKind::Sequence(seq) = &alts[0].kind else {
+            panic!("expected Sequence, got {:?}", alts[0].kind);
+        };
+        assert_eq!(seq.len(), 2);
+        assert!(matches!(&seq[0].kind, ExpressionKind::NegativeLookahead(_)));
+        assert!(matches!(&seq[1].kind, ExpressionKind::Nt(n) if n == "B"));
+        assert!(matches!(&alts[1].kind, ExpressionKind::Nt(n) if n == "C"));
+    }
+
+    #[test]
+    fn lookahead_fail_trailing() {
+        let input = "Rule -> !";
+        let err = parse(input).unwrap_err();
+        assert!(err.contains("expected expression after !"));
+    }
+
+    // --- Unicode tests ---
+
+    #[test]
+    fn unicode_4_digit() {
+        let input = "Rule -> U+0009";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Unicode((ch, s)) = &rule.expression.kind else {
+            panic!("expected Unicode, got {:?}", rule.expression.kind);
+        };
+        assert_eq!(*ch, '\t');
+        assert_eq!(s, "0009");
+    }
+
+    #[test]
+    fn unicode_5_digit() {
+        let input = "Rule -> U+E0000";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Unicode((ch, s)) = &rule.expression.kind else {
+            panic!("expected Unicode, got {:?}", rule.expression.kind);
+        };
+        assert_eq!(*ch, '\u{E0000}');
+        assert_eq!(s, "E0000");
+    }
+
+    #[test]
+    fn unicode_6_digit() {
+        let input = "Rule -> U+10FFFF";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Unicode((ch, s)) = &rule.expression.kind else {
+            panic!("expected Unicode, got {:?}", rule.expression.kind);
+        };
+        assert_eq!(*ch, '\u{10FFFF}');
+        assert_eq!(s, "10FFFF");
+    }
+
+    #[test]
+    fn unicode_in_alternation() {
+        let input = "Rule -> U+0009 | U+000A";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Alt(alts) = &rule.expression.kind else {
+            panic!("expected Alt, got {:?}", rule.expression.kind);
+        };
+        assert_eq!(alts.len(), 2);
+        assert!(matches!(
+            &alts[0].kind,
+            ExpressionKind::Unicode((ch, _)) if *ch == '\t'
+        ));
+        assert!(matches!(
+            &alts[1].kind,
+            ExpressionKind::Unicode((ch, _)) if *ch == '\n'
+        ));
+    }
+
+    // --- Character / charset range tests ---
+
+    #[test]
+    fn charset_unicode_range() {
+        let input = "Rule -> [U+0000-U+007F]";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Charset(chars) = &rule.expression.kind else {
+            panic!("expected Charset, got {:?}", rule.expression.kind);
+        };
+        assert_eq!(chars.len(), 1);
+        let Characters::Range(a, b) = &chars[0] else {
+            panic!("expected Range, got {:?}", chars[0]);
+        };
+        assert!(matches!(a, Character::Unicode((ch, _)) if *ch == '\0'));
+        assert!(matches!(
+            b,
+            Character::Unicode((ch, _)) if *ch == '\u{7F}'
+        ));
+    }
+
+    #[test]
+    fn charset_char_range() {
+        let input = "Rule -> [`a`-`z`]";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Charset(chars) = &rule.expression.kind else {
+            panic!("expected Charset, got {:?}", rule.expression.kind);
+        };
+        assert_eq!(chars.len(), 1);
+        let Characters::Range(a, b) = &chars[0] else {
+            panic!("expected Range, got {:?}", chars[0]);
+        };
+        assert!(matches!(a, Character::Char(ch) if *ch == 'a'));
+        assert!(matches!(b, Character::Char(ch) if *ch == 'z'));
+    }
+
+    #[test]
+    fn charset_mixed_range() {
+        let input = "Rule -> [`a`-U+007A]";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Charset(chars) = &rule.expression.kind else {
+            panic!("expected Charset, got {:?}", rule.expression.kind);
+        };
+        assert_eq!(chars.len(), 1);
+        let Characters::Range(a, b) = &chars[0] else {
+            panic!("expected Range, got {:?}", chars[0]);
+        };
+        assert!(matches!(a, Character::Char(ch) if *ch == 'a'));
+        assert!(matches!(
+            b,
+            Character::Unicode((ch, _)) if *ch == 'z'
+        ));
+    }
+
+    #[test]
+    fn charset_multiple_unicode_ranges() {
+        let input = "Rule -> [U+0000-U+D7FF U+E000-U+10FFFF]";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Charset(chars) = &rule.expression.kind else {
+            panic!("expected Charset, got {:?}", rule.expression.kind);
+        };
+        assert_eq!(chars.len(), 2);
+        let Characters::Range(a1, b1) = &chars[0] else {
+            panic!("expected Range, got {:?}", chars[0]);
+        };
+        assert!(matches!(a1, Character::Unicode((ch, _)) if *ch == '\0'));
+        assert!(matches!(b1, Character::Unicode((ch, _)) if *ch == '\u{D7FF}'));
+        let Characters::Range(a2, b2) = &chars[1] else {
+            panic!("expected Range, got {:?}", chars[1]);
+        };
+        assert!(matches!(a2, Character::Unicode((ch, _)) if *ch == '\u{E000}'));
+        assert!(matches!(b2, Character::Unicode((ch, _)) if *ch == '\u{10FFFF}'));
+    }
+
+    #[test]
+    fn charset_terminals_and_named() {
+        let input = "Rule -> [`a` `b` Foo]";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Charset(chars) = &rule.expression.kind else {
+            panic!("expected Charset, got {:?}", rule.expression.kind);
+        };
+        assert_eq!(chars.len(), 3);
+        assert!(matches!(&chars[0], Characters::Terminal(t) if t == "a"));
+        assert!(matches!(&chars[1], Characters::Terminal(t) if t == "b"));
+        assert!(matches!(&chars[2], Characters::Named(n) if n == "Foo"));
+    }
+
+    // --- Negative lookahead combined with charset ---
+
+    #[test]
+    fn lookahead_charset_with_named_and_terminals() {
+        // Pattern from tokens.md: ![`'` `\` LF CR TAB] ASCII
+        let input = "Rule -> ![`x` `y` LF] Foo";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Sequence(seq) = &rule.expression.kind else {
+            panic!("expected Sequence, got {:?}", rule.expression.kind);
+        };
+        assert_eq!(seq.len(), 2);
+        let ExpressionKind::NegativeLookahead(inner) = &seq[0].kind else {
+            panic!("expected NegativeLookahead, got {:?}", seq[0].kind);
+        };
+        let ExpressionKind::Charset(chars) = &inner.kind else {
+            panic!("expected Charset, got {:?}", inner.kind);
+        };
+        assert_eq!(chars.len(), 3);
+        assert!(matches!(&chars[0], Characters::Terminal(t) if t == "x"));
+        assert!(matches!(&chars[1], Characters::Terminal(t) if t == "y"));
+        assert!(matches!(&chars[2], Characters::Named(n) if n == "LF"));
+    }
+
+    // --- Negative lookahead combined with Unicode ---
+
+    #[test]
+    fn lookahead_charset_with_unicode_range() {
+        let input = "Rule -> ![U+0000-U+007F] Foo";
+        let grammar = parse(input).unwrap();
+        let rule = grammar.productions.get("Rule").unwrap();
+        let ExpressionKind::Sequence(seq) = &rule.expression.kind else {
+            panic!("expected Sequence, got {:?}", rule.expression.kind);
+        };
+        let ExpressionKind::NegativeLookahead(inner) = &seq[0].kind else {
+            panic!("expected NegativeLookahead, got {:?}", seq[0].kind);
+        };
+        let ExpressionKind::Charset(chars) = &inner.kind else {
+            panic!("expected Charset, got {:?}", inner.kind);
+        };
+        assert_eq!(chars.len(), 1);
+        let Characters::Range(a, b) = &chars[0] else {
+            panic!("expected Range, got {:?}", chars[0]);
+        };
+        assert!(matches!(a, Character::Unicode((ch, _)) if *ch == '\0'));
+        assert!(matches!(
+            b,
+            Character::Unicode((ch, _)) if *ch == '\u{7F}'
+        ));
+    }
 }
diff --git a/tools/mdbook-spec/src/grammar/render_markdown.rs b/tools/mdbook-spec/src/grammar/render_markdown.rs
index 7f9624fc86..316eb9aaf3 100644
--- a/tools/mdbook-spec/src/grammar/render_markdown.rs
+++ b/tools/mdbook-spec/src/grammar/render_markdown.rs
@@ -250,3 +250,181 @@ fn markdown_escape(s: &str) -> Cow<'_, str> {
         LazyLock::new(|| Regex::new(r#"[\\`_*\[\](){}'".-]"#).unwrap());
     ESC_RE.replace_all(s, r"\$0")
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::collections::HashMap;
+
+    /// Creates a minimal `RenderCtx` for testing.
+    fn test_cx() -> RenderCtx {
+        RenderCtx {
+            md_link_map: HashMap::new(),
+            rr_link_map: HashMap::new(),
+            for_summary: false,
+        }
+    }
+
+    /// Renders a single expression to a markdown string.
+    fn render(kind: ExpressionKind) -> String {
+        let cx = test_cx();
+        let expr = Expression::new_kind(kind);
+        let mut output = String::new();
+        render_expression(&expr, &cx, &mut output);
+        output
+    }
+
+    // -- Negative lookahead tests --
+
+    #[test]
+    fn lookahead_nonterminal() {
+        let result = render(ExpressionKind::NegativeLookahead(Box::new(
+            Expression::new_kind(ExpressionKind::Nt("CHAR".to_string())),
+        )));
+        assert!(result.contains("!"), "should contain `!` prefix");
+        assert!(
+            result.contains("CHAR"),
+            "should contain the nonterminal name"
+        );
+    }
+
+    #[test]
+    fn lookahead_terminal() {
+        let result = render(ExpressionKind::NegativeLookahead(Box::new(
+            Expression::new_kind(ExpressionKind::Terminal("'".to_string())),
+        )));
+        assert!(result.starts_with("!"), "should start with `!`");
+        assert!(
+            result.contains("grammar-literal"),
+            "should render inner terminal as a grammar literal"
+        );
+    }
+
+    #[test]
+    fn lookahead_charset() {
+        let result = render(ExpressionKind::NegativeLookahead(Box::new(
+            Expression::new_kind(ExpressionKind::Charset(vec![
+                Characters::Terminal("e".to_string()),
+                Characters::Terminal("E".to_string()),
+            ])),
+        )));
+        assert!(result.starts_with("!"), "should start with `!`");
+        assert!(
+            result.contains("\\["),
+            "should contain escaped opening bracket for charset"
+        );
+    }
+
+    #[test]
+    fn lookahead_grouped() {
+        // !( `.` | `_` )
+        let inner =
+            ExpressionKind::Grouped(Box::new(Expression::new_kind(ExpressionKind::Alt(vec![
+                Expression::new_kind(ExpressionKind::Terminal(".".to_string())),
+                Expression::new_kind(ExpressionKind::Terminal("_".to_string())),
+            ]))));
+        let result = render(ExpressionKind::NegativeLookahead(Box::new(
+            Expression::new_kind(inner),
+        )));
+        assert!(result.starts_with("!("));
+        assert!(result.contains("|"));
+    }
+
+    // -- Unicode tests --
+
+    #[test]
+    fn unicode_4_digit() {
+        let result = render(ExpressionKind::Unicode(('\t', "0009".to_string())));
+        assert_eq!(result, "U+0009");
+    }
+
+    #[test]
+    fn unicode_6_digit() {
+        let result = render(ExpressionKind::Unicode((
+            '\u{10FFFF}',
+            "10FFFF".to_string(),
+        )));
+        assert_eq!(result, "U+10FFFF");
+    }
+
+    // -- Charset with Unicode range tests --
+
+    #[test]
+    fn charset_unicode_range() {
+        let result = render(ExpressionKind::Charset(vec![Characters::Range(
+            Character::Unicode(('\0', "0000".to_string())),
+            Character::Unicode(('\u{007F}', "007F".to_string())),
+        )]));
+        assert!(result.contains("\\["));
+        assert!(result.contains("U+0000"));
+        assert!(result.contains("U+007F"));
+        assert!(result.contains("-"));
+    }
+
+    #[test]
+    fn charset_char_range() {
+        let result = render(ExpressionKind::Charset(vec![Characters::Range(
+            Character::Char('a'),
+            Character::Char('z'),
+        )]));
+        assert!(result.contains("\\["));
+        assert!(result.contains("grammar-literal"));
+        assert!(result.contains("-"));
+    }
+
+    #[test]
+    fn charset_mixed_range() {
+        // [`a`-U+007A]
+        let result = render(ExpressionKind::Charset(vec![Characters::Range(
+            Character::Char('a'),
+            Character::Unicode(('\u{007A}', "007A".to_string())),
+        )]));
+        assert!(result.contains("grammar-literal"));
+        assert!(result.contains("U+007A"));
+        assert!(result.contains("-"));
+    }
+
+    // -- Cut test --
+
+    #[test]
+    fn cut_rendering() {
+        let result = render(ExpressionKind::Cut(Box::new(Expression::new_kind(
+            ExpressionKind::Nt("Foo".to_string()),
+        ))));
+        assert!(result.starts_with("^ "), "cut should render as `^ ` prefix");
+        assert!(result.contains("Foo"));
+    }
+
+    // -- NegExpression test --
+
+    #[test]
+    fn neg_expression_rendering() {
+        let result = render(ExpressionKind::NegExpression(Box::new(
+            Expression::new_kind(ExpressionKind::Charset(vec![Characters::Terminal(
+                "a".to_string(),
+            )])),
+        )));
+        assert!(
+            result.starts_with("~"),
+            "neg expression should render as `~` prefix"
+        );
+    }
+
+    // -- Markdown escape tests --
+
+    #[test]
+    fn markdown_escape_backtick() {
+        assert_eq!(markdown_escape("`"), "\\`");
+    }
+
+    #[test]
+    fn markdown_escape_brackets() {
+        assert_eq!(markdown_escape("["), "\\[");
+        assert_eq!(markdown_escape("]"), "\\]");
+    }
+
+    #[test]
+    fn markdown_escape_plain() {
+        assert_eq!(markdown_escape("abc"), "abc");
+    }
+}
diff --git a/tools/mdbook-spec/src/grammar/render_railroad.rs b/tools/mdbook-spec/src/grammar/render_railroad.rs
index 1543127889..ad7b291e57 100644
--- a/tools/mdbook-spec/src/grammar/render_railroad.rs
+++ b/tools/mdbook-spec/src/grammar/render_railroad.rs
@@ -391,7 +391,7 @@ impl Node for Except {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use grammar::{Expression, ExpressionKind, RangeLimit};
+    use grammar::{Character, Characters, Expression, ExpressionKind, RangeLimit};
 
     /// Render an expression to an SVG string fragment.
     fn render_to_svg(expr: &Expression) -> Option<String> {
@@ -411,6 +411,8 @@ mod tests {
         })
     }
 
+    // -- RepeatRange tests --
+
     #[test]
     fn test_empty_exclusive_equal() {
         // `e{2..2}` (half-open, min == max) renders as empty.
@@ -476,4 +478,117 @@ mod tests {
             "expected nonterminal for e{{..=1}}, got: {svg}"
         );
     }
+
+    // -- Negative lookahead tests --
+
+    #[test]
+    fn lookahead_nonterminal() {
+        let expr = Expression::new_kind(ExpressionKind::NegativeLookahead(Box::new(
+            Expression::new_kind(ExpressionKind::Nt("CHAR".to_string())),
+        )));
+        let svg = render_to_svg(&expr).unwrap();
+        assert!(
+            svg.contains("not followed by"),
+            "should contain the 'not followed by' label"
+        );
+        assert!(svg.contains("CHAR"), "should contain the nonterminal name");
+    }
+
+    #[test]
+    fn lookahead_terminal() {
+        let expr = Expression::new_kind(ExpressionKind::NegativeLookahead(Box::new(
+            Expression::new_kind(ExpressionKind::Terminal("CR".to_string())),
+        )));
+        let svg = render_to_svg(&expr).unwrap();
+        assert!(svg.contains("not followed by"));
+        assert!(svg.contains("CR"));
+    }
+
+    #[test]
+    fn lookahead_charset() {
+        let expr = Expression::new_kind(ExpressionKind::NegativeLookahead(Box::new(
+            Expression::new_kind(ExpressionKind::Charset(vec![
+                Characters::Terminal("e".to_string()),
+                Characters::Terminal("E".to_string()),
+            ])),
+        )));
+        let svg = render_to_svg(&expr).unwrap();
+        assert!(svg.contains("not followed by"));
+        assert!(svg.contains("e"));
+        assert!(svg.contains("E"));
+    }
+
+    // -- Unicode tests --
+
+    #[test]
+    fn unicode_4_digit() {
+        let expr = Expression::new_kind(ExpressionKind::Unicode(('\t', "0009".to_string())));
+        let svg = render_to_svg(&expr).unwrap();
+        assert!(svg.contains("U+0009"), "should render Unicode code point");
+    }
+
+    #[test]
+    fn unicode_6_digit() {
+        let expr = Expression::new_kind(ExpressionKind::Unicode((
+            '\u{10FFFF}',
+            "10FFFF".to_string(),
+        )));
+        let svg = render_to_svg(&expr).unwrap();
+        assert!(svg.contains("U+10FFFF"));
+    }
+
+    // -- Charset with ranges --
+
+    #[test]
+    fn charset_unicode_range() {
+        let expr = Expression::new_kind(ExpressionKind::Charset(vec![Characters::Range(
+            Character::Unicode(('\0', "0000".to_string())),
+            Character::Unicode(('\u{007F}', "007F".to_string())),
+        )]));
+        let svg = render_to_svg(&expr).unwrap();
+        assert!(svg.contains("U+0000"));
+        assert!(svg.contains("U+007F"));
+    }
+
+    #[test]
+    fn charset_char_range() {
+        let expr = Expression::new_kind(ExpressionKind::Charset(vec![Characters::Range(
+            Character::Char('a'),
+            Character::Char('z'),
+        )]));
+        let svg = render_to_svg(&expr).unwrap();
+        assert!(svg.contains("a"));
+        assert!(svg.contains("z"));
+    }
+
+    // -- Cut test --
+
+    #[test]
+    fn cut_rendering() {
+        let expr = Expression::new_kind(ExpressionKind::Cut(Box::new(Expression::new_kind(
+            ExpressionKind::Nt("Foo".to_string()),
+        ))));
+        let svg = render_to_svg(&expr).unwrap();
+        assert!(
+            svg.contains("no backtracking"),
+            "cut should render with 'no backtracking' label"
+        );
+        assert!(svg.contains("Foo"));
+    }
+
+    // -- NegExpression test --
+
+    #[test]
+    fn neg_expression_rendering() {
+        let expr = Expression::new_kind(ExpressionKind::NegExpression(Box::new(
+            Expression::new_kind(ExpressionKind::Charset(vec![Characters::Terminal(
+                "a".to_string(),
+            )])),
+        )));
+        let svg = render_to_svg(&expr).unwrap();
+        assert!(
+            svg.contains("with the exception of"),
+            "neg expression should have exception label"
+        );
+    }
 }