rust-lang · traviscross · Feb 18, 2026 · Feb 13, 2026 · Feb 13, 2026 · Feb 13, 2026
diff --git a/dev-guide/src/grammar.md b/dev-guide/src/grammar.md
@@ -39,7 +39,11 @@ Sequence ->
         (` `* AdornedExpr)* ` `* Cut
       | (` `* AdornedExpr)+
 
-AdornedExpr -> Expr1 Quantifier? Suffix? Footnote?
+AdornedExpr -> Prefix? Expr1 Quantifier? Suffix? Footnote?
+
+Prefix -> NegativeLookahead
+
+NegativeLookahead -> `!`
 
 Suffix -> ` _` <not underscore, unless in backtick>* `_`
 
@@ -81,7 +85,7 @@ Expr1 ->
     | Group
     | NegativeExpression
 
-Unicode -> `U+` [`A`-`Z` `0`-`9`]4..=4
+Unicode -> `U+` [`A`-`Z` `0`-`9`]4..=6
 
 NonTerminal -> Name
 
@@ -98,7 +102,11 @@ Characters ->
     | CharacterTerminal
     | CharacterName
 
-CharacterRange -> BACKTICK <any char> BACKTICK `-` BACKTICK <any char> BACKTICK
+CharacterRange -> Character `-` Character
+
+Character ->
+        BACKTICK <any char> BACKTICK
+      | Unicode
 
 CharacterTerminal -> Terminal
 
@@ -123,7 +131,7 @@ The general format is a series of productions separated by blank lines. The expr
 | Comment | // Single line comment. | A comment extending to the end of the line. |
 | Terminal | \`example\` | A sequence of exact characters, surrounded by backticks. |
 | Charset | \[ \`A\`-\`Z\` \`0\`-\`9\` \`_\` \] | A choice from a set of characters, space-separated. There are three different forms. |
-| CharacterRange | \[ \`A\`-\`Z\` \] | A range of characters; each character should be in backticks. |
+| CharacterRange | \[ \`A\`-\`Z\` \] | A range of characters. Characters can be a Unicode expression or be a literal character surrounded by backticks. |
 | CharacterTerminal | \[ \`x\` \] | A single character, surrounded by backticks. |
 | CharacterName | \[ LF \] | A nonterminal, referring to another production. |
 | Prose | \<any ASCII character except CR\> | An English description of what should be matched, surrounded in angle brackets. |
@@ -135,6 +143,7 @@ The general format is a series of productions separated by blank lines. The expr
 | Suffix | \_except \[LazyBooleanExpression\]\_  | Adds a suffix to the previous expression to provide an additional English description, rendered in subscript. This can contain limited Markdown, but try to avoid anything except basics like links. |
 | Footnote | \[^extern-safe\] | Adds a footnote, which can supply extra information that may be helpful to the user. The footnote itself should be defined outside of the code block like a normal Markdown footnote. |
 | Optional | Expr? | The preceding expression is optional. |
+| NegativeLookahead | !Expr | Matches if Expr does not follow, without consuming any input. |
 | Repeat | Expr* | The preceding expression is repeated 0 or more times. |
 | RepeatNonGreedy | Expr*? | The preceding expression is repeated 0 or more times without being greedy. |
 | RepeatPlus | Expr+ | The preceding expression is repeated 1 or more times. |

diff --git a/src/comments.md b/src/comments.md
@@ -3,34 +3,49 @@ r[comments]
 
 r[comments.syntax]
 ```grammar,lexer
-@root LINE_COMMENT ->
+@root COMMENT ->
+      LINE_COMMENT
+    | INNER_LINE_DOC
+    | OUTER_LINE_DOC
+    | INNER_BLOCK_DOC
+    | OUTER_BLOCK_DOC
+    | BLOCK_COMMENT
+
+LINE_COMMENT ->
       `//` (~[`/` `!` LF] | `//`) ~LF*
-    | `//`
+    | `//` EOF
+    | `//` _immediately followed by LF_
 
 BLOCK_COMMENT ->
-      `/*`
+      `/**/`
+    | `/***/`
+    | `/*`
+        ^
         ( ~[`*` `!`] | `**` | BLOCK_COMMENT_OR_DOC )
         ( BLOCK_COMMENT_OR_DOC | ~`*/` )*
       `*/`
-    | `/**/`
-    | `/***/`
 
-@root INNER_LINE_DOC ->
-    `//!` ~[LF CR]*
+INNER_LINE_DOC ->
+    `//!` ^ LINE_DOC_COMMENT_CONTENT (LF | EOF)
+
+LINE_DOC_COMMENT_CONTENT -> (!CR ~LF)*
 
 INNER_BLOCK_DOC ->
-    `/*!` ( BLOCK_COMMENT_OR_DOC | ~[`*/` CR] )* `*/`
+    `/*!` ^ ( BLOCK_COMMENT_OR_DOC | BLOCK_CHAR )* `*/`
 
-@root OUTER_LINE_DOC ->
-    `///` (~`/` ~[LF CR]*)?
+OUTER_LINE_DOC ->
+    `///` ^ LINE_DOC_COMMENT_CONTENT (LF | EOF)
 
 OUTER_BLOCK_DOC ->
-    `/**`
+    `/**` ![`*` `/`]
+      ^
       ( ~`*` | BLOCK_COMMENT_OR_DOC )
-      ( BLOCK_COMMENT_OR_DOC | ~[`*/` CR] )*
+      ( BLOCK_COMMENT_OR_DOC | BLOCK_CHAR )*
     `*/`
 
-@root BLOCK_COMMENT_OR_DOC ->
+BLOCK_CHAR -> (!(`*/` | CR) CHAR)
+
+BLOCK_COMMENT_OR_DOC ->
       BLOCK_COMMENT
     | OUTER_BLOCK_DOC
     | INNER_BLOCK_DOC
@@ -51,7 +66,7 @@ r[comments.doc.syntax]
 Line doc comments beginning with exactly _three_ slashes (`///`), and block doc comments (`/** ... */`), both outer doc comments, are interpreted as a special syntax for [`doc` attributes].
 
 r[comments.doc.attributes]
-That is, they are equivalent to writing `#[doc="..."]` around the body of the comment, i.e., `/// Foo` turns into `#[doc="Foo"]` and `/** Bar */` turns into `#[doc="Bar"]`. They must therefore appear before something that accepts an outer attribute.
+That is, they are equivalent to writing `#[doc="..."]` around the body of the comment, i.e., `/// Foo` turns into `#[doc=" Foo"]` and `/** Bar */` turns into `#[doc=" Bar "]`. They must therefore appear before something that accepts an outer attribute.
 
 r[comments.doc.inner-syntax]
 Line comments beginning with `//!` and block comments `/*! ... */` are doc comments that apply to the parent of the comment, rather than the item that follows.

diff --git a/src/identifiers.md b/src/identifiers.md
@@ -16,7 +16,7 @@ NON_KEYWORD_IDENTIFIER -> IDENTIFIER_OR_KEYWORD _except a [strict][lex.keywords.
 IDENTIFIER -> NON_KEYWORD_IDENTIFIER | RAW_IDENTIFIER
 
 RESERVED_RAW_IDENTIFIER ->
-    `r#` (`_` | `crate` | `self` | `Self` | `super`) _not immediately followed by XID_Continue_
+    `r#` (`_` | `crate` | `self` | `Self` | `super`) !XID_Continue
 ```
 
 <!-- When updating the version, update the UAX links, too. -->

diff --git a/src/input-format.md b/src/input-format.md
@@ -3,9 +3,13 @@ r[input]
 
 r[input.syntax]
 ```grammar,lexer
-@root CHAR -> <a Unicode scalar value>
+CHAR -> [U+0000-U+D7FF U+E000-U+10FFFF] // a Unicode scalar value
+
+ASCII -> [U+0000-U+007F]
 
 NUL -> U+0000
+
+EOF -> !CHAR  // End of file or input
 ```
 
 r[input.intro]

diff --git a/src/notation.md b/src/notation.md
@@ -20,13 +20,14 @@ The following notations are used by the *Lexer* and *Syntax* grammar snippets:
 | x<sup>a..=b</sup> | HEX_DIGIT<sup>1..=5</sup>     | a to b repetitions of x, inclusive of b   |
 | Rule1 Rule2       | `fn` _Name_ _Parameters_      | Sequence of rules in order                |
 | \|                | `u8` \| `u16`, Block \| Item  | Either one or another                     |
+| !                 | !COMMENT                      | Matches if the expression does not follow, without consuming any input |
 | \[ ]               | \[`b` `B`]                     | Any of the characters listed              |
 | \[ - ]             | \[`a`-`z`]                     | Any of the characters in the range        |
 | ~\[ ]              | ~\[`b` `B`]                    | Any characters, except those listed       |
 | ~`string`         | ~`\n`, ~`*/`                  | Any characters, except this sequence      |
 | ( )               | (`,` _Parameter_)<sup>?</sup> | Groups items                              |
 | ^                 | `b'` ^ ASCII_FOR_CHAR         | The rest of the sequence must match or parsing fails unconditionally ([hard cut operator]) |
-| U+xxxx            | U+0060                        | A single unicode character                |
+| U+xxxx..xxxxxx    | U+0060                        | A single Unicode character                |
 | \<text\>          | \<any ASCII char except CR\>  | An English description of what should be matched |
 | Rule <sub>suffix</sub> | IDENTIFIER_OR_KEYWORD <sub>_except `crate`_</sub> | A modification to the previous rule |
 | // Comment. | // Single line comment. | A comment extending to the end of the line. |

diff --git a/src/tokens.md b/src/tokens.md
@@ -115,7 +115,7 @@ r[lex.token.literal.suffix.syntax]
 ```grammar,lexer
 SUFFIX -> IDENTIFIER_OR_KEYWORD _except `_`_
 
-SUFFIX_NO_E -> SUFFIX _not beginning with `e` or `E`_
+SUFFIX_NO_E -> ![`e` `E`] SUFFIX
 ```
 
 r[lex.token.literal.suffix.validity]
@@ -253,8 +253,7 @@ r[lex.token.byte.syntax]
 BYTE_LITERAL ->
     `b'` ^ ( ASCII_FOR_CHAR | BYTE_ESCAPE )  `'` SUFFIX?
 
-ASCII_FOR_CHAR ->
-    <any ASCII (i.e. 0x00 to 0x7F) except `'`, `\`, LF, CR, or TAB>
+ASCII_FOR_CHAR -> ![`'` `\` LF CR TAB] ASCII
 
 BYTE_ESCAPE ->
       `\x` HEX_DIGIT HEX_DIGIT
@@ -272,8 +271,7 @@ r[lex.token.str-byte.syntax]
 BYTE_STRING_LITERAL ->
     `b"` ^ ( ASCII_FOR_STRING | BYTE_ESCAPE | STRING_CONTINUE )* `"` SUFFIX?
 
-ASCII_FOR_STRING ->
-    <any ASCII (i.e 0x00 to 0x7F) except `"`, `\`, or CR>
+ASCII_FOR_STRING -> ![`"` `\` CR] ASCII
 ```
 
 r[lex.token.str-byte.intro]
@@ -309,8 +307,7 @@ RAW_BYTE_STRING_CONTENT ->
       `"` ^ ASCII_FOR_RAW*? `"`
     | `#` RAW_BYTE_STRING_CONTENT `#`
 
-ASCII_FOR_RAW ->
-    <any ASCII (i.e. 0x00 to 0x7F) except CR>
+ASCII_FOR_RAW -> !CR ASCII
 ```
 
 r[lex.token.str-byte-raw.intro]
@@ -559,10 +556,10 @@ r[lex.token.literal.float.syntax]
 FLOAT_LITERAL ->
       DEC_LITERAL (`.` DEC_LITERAL)? FLOAT_EXPONENT SUFFIX?
     | DEC_LITERAL `.` DEC_LITERAL SUFFIX_NO_E?
-    | DEC_LITERAL `.` _not immediately followed by `.`, `_` or an XID_Start character_
+    | DEC_LITERAL `.` !(`.` | `_` | XID_Start)
 
 FLOAT_EXPONENT ->
-    (`e`|`E`) (`+`|`-`)? `_`* DEC_DIGIT (DEC_DIGIT|`_`)*
+    (`e`|`E`) ^ (`+`|`-`)? `_`* DEC_DIGIT (DEC_DIGIT|`_`)*
 ```
 
 r[lex.token.literal.float.form]
@@ -608,13 +605,11 @@ r[lex.token.literal.reserved.syntax]
 RESERVED_NUMBER ->
       BIN_LITERAL [`2`-`9`]
     | OCT_LITERAL [`8`-`9`]
-    | ( BIN_LITERAL | OCT_LITERAL | HEX_LITERAL ) `.` _not immediately followed by `.`, `_` or an XID_Start character_
+    | ( BIN_LITERAL | OCT_LITERAL | HEX_LITERAL ) `.` !(`.` | `_` | XID_Start)
     | ( BIN_LITERAL | OCT_LITERAL ) (`e`|`E`)
-    | `0b` `_`* <end of input or not BIN_DIGIT>
-    | `0o` `_`* <end of input or not OCT_DIGIT>
-    | `0x` `_`* <end of input or not HEX_DIGIT>
-    | DEC_LITERAL ( `.` DEC_LITERAL )? (`e` | `E`) (`+` | `-`)? <end of input or not DEC_DIGIT>
-
+    | `0b` `_`* !BIN_DIGIT
+    | `0o` `_`* !OCT_DIGIT
+    | `0x` `_`* !HEX_DIGIT
 ```
 
 r[lex.token.literal.reserved.intro]
@@ -657,16 +652,16 @@ r[lex.token.life.syntax]
 ```grammar,lexer
 LIFETIME_TOKEN ->
       RAW_LIFETIME
-    | `'` IDENTIFIER_OR_KEYWORD _not immediately followed by `'`_
+    | `'` IDENTIFIER_OR_KEYWORD !`'`
 
 LIFETIME_OR_LABEL ->
       RAW_LIFETIME
-    | `'` NON_KEYWORD_IDENTIFIER _not immediately followed by `'`_
+    | `'` NON_KEYWORD_IDENTIFIER !`'`
 
 RAW_LIFETIME ->
-    `'r#` IDENTIFIER_OR_KEYWORD _not immediately followed by `'`_
+    `'r#` ^ IDENTIFIER_OR_KEYWORD !`'`
 
-RESERVED_RAW_LIFETIME -> `'r#` (`_` | `crate` | `self` | `Self` | `super`) _not immediately followed by `'`_
+RESERVED_RAW_LIFETIME -> `'r#` (`_` | `crate` | `self` | `Self` | `super`) !(`'` | XID_Continue)
 ```
 
 r[lex.token.life.intro]

diff --git a/tools/grammar/src/lib.rs b/tools/grammar/src/lib.rs
@@ -51,6 +51,8 @@ pub enum ExpressionKind {
     Sequence(Vec<Expression>),
     /// `A?`
     Optional(Box<Expression>),
+    /// `!A`
+    NegativeLookahead(Box<Expression>),
     /// `A*`
     Repeat(Box<Expression>),
     /// `A*?`
@@ -85,7 +87,7 @@ pub enum ExpressionKind {
     /// `^ A B C`
     Cut(Box<Expression>),
     /// `U+0060`
-    Unicode(String),
+    Unicode((char, String)),
 }
 
 #[derive(Copy, Clone, Debug)]
@@ -113,7 +115,34 @@ pub enum Characters {
     /// `` `_` ``
     Terminal(String),
     /// `` `A`-`Z` ``
-    Range(char, char),
+    Range(Character, Character),
+}
+
+#[derive(Clone, Debug)]
+pub enum Character {
+    Char(char),
+    /// `U+0060`
+    ///
+    /// The `String` is the hex digits after `U+`.
+    Unicode((char, String)),
+}
+
+impl Character {
+    pub fn get_ch(&self) -> char {
+        match self {
+            Character::Char(ch) => *ch,
+            Character::Unicode((ch, _)) => *ch,
+        }
+    }
+}
+
+impl Display for Character {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
+        match self {
+            Character::Char(ch) => write!(f, "`{ch}`"),
+            Character::Unicode((_, s)) => write!(f, "U+{s}"),
+        }
+    }
 }
 
 impl Grammar {
@@ -137,6 +166,7 @@ impl Expression {
         match &self.kind {
             ExpressionKind::Grouped(e)
             | ExpressionKind::Optional(e)
+            | ExpressionKind::NegativeLookahead(e)
             | ExpressionKind::Repeat(e)
             | ExpressionKind::RepeatNonGreedy(e)
             | ExpressionKind::RepeatPlus(e)