Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions dev-guide/src/grammar.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,25 +52,22 @@ Footnote -> `[^` ~[`]` LF]+ `]`
Quantifier ->
Optional
| Repeat
| RepeatNonGreedy
| RepeatPlus
| RepeatPlusNonGreedy
| RepeatRange
| RepeatRangeInclusive
| RepeatRangeNamed

Optional -> `?`

Repeat -> `*`

RepeatNonGreedy -> `*?`

RepeatPlus -> `+`

RepeatPlusNonGreedy -> `+?`
RepeatRange -> `{` ( Name `:` )? Range? `..` Range? `}`

RepeatRange -> `{` Range? `..` Range? `}`
RepeatRangeInclusive -> `{` ( Name `:` )? Range? `..=` Range `}`

RepeatRangeInclusive -> `{` Range? `..=` Range `}`
RepeatRangeNamed -> `{` Name `}`

Range -> [0-9]+

Expand Down Expand Up @@ -145,11 +142,11 @@ The general format is a series of productions separated by blank lines. The expr
| Optional | Expr? | The preceding expression is optional. |
| NegativeLookahead | !Expr | Matches if Expr does not follow, without consuming any input. |
| Repeat | Expr* | The preceding expression is repeated 0 or more times. |
| RepeatNonGreedy | Expr*? | The preceding expression is repeated 0 or more times without being greedy. |
| RepeatPlus | Expr+ | The preceding expression is repeated 1 or more times. |
| RepeatPlusNonGreedy | Expr+? | The preceding expression is repeated 1 or more times without being greedy. |
| RepeatRange | Expr{2..4} | The preceding expression is repeated between the range of times specified. Either bound can be excluded, which works just like Rust ranges. |
| RepeatRangeInclusive | Expr{2..=4} | The preceding expression is repeated between the inclusive range of times specified. The lower bound can be omitted. |
| Named RepeatRangeInclusive | Expr{name:2..=4} | If a name precedes the range, then the number of repetitions are stored in a variable with that name that subsequent RepeatRangeNamed expressions can refer to. |
| RepeatRangeNamed | Expr{name} | Repeat the number of times from the previously labeled repetition. |

## Automatic linking

Expand Down
2 changes: 2 additions & 0 deletions src/notation.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ The following notations are used by the *Lexer* and *Syntax* grammar snippets:
| x<sup>+</sup> | _MacroMatch_<sup>+</sup> | 1 or more of x |
| x<sup>a..b</sup> | HEX_DIGIT<sup>1..6</sup> | a to b repetitions of x, exclusive of b |
| x<sup>a..=b</sup> | HEX_DIGIT<sup>1..=5</sup> | a to b repetitions of x, inclusive of b |
| x<sup>n:a..=b</sup> | `#`<sup>n:1..=255</sup> | a labeled repetition that a subsequent repetition can refer to |
| x<sup>n</sup> | `#`<sup>n</sup> | repeat the number of times from the previously labeled repetition |
| Rule1 Rule2 | `fn` _Name_ _Parameters_ | Sequence of rules in order |
| \| | `u8` \| `u16`, Block \| Item | Either one or another |
| ! | !COMMENT | Matches if the expression does not follow, without consuming any input |
Expand Down
28 changes: 16 additions & 12 deletions src/tokens.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,11 +214,13 @@ r[lex.token.literal.str-raw]

r[lex.token.literal.str-raw.syntax]
```grammar,lexer
RAW_STRING_LITERAL -> `r` RAW_STRING_CONTENT SUFFIX?
RAW_STRING_LITERAL ->
`r` `"` ^ RAW_STRING_CONTENT `"` SUFFIX?
| `r` `#`{n:1..=255} ^ `"` RAW_STRING_CONTENT_HASHED `"` `#`{n} SUFFIX?

RAW_STRING_CONTENT ->
`"` ^ ( ~CR )*? `"`
| `#` RAW_STRING_CONTENT `#`
RAW_STRING_CONTENT -> (!`"` ~CR )*

RAW_STRING_CONTENT_HASHED -> (!(`"` `#`{n}) ~CR )*
```

r[lex.token.literal.str-raw.intro]
Expand Down Expand Up @@ -301,11 +303,12 @@ r[lex.token.str-byte-raw]
r[lex.token.str-byte-raw.syntax]
```grammar,lexer
RAW_BYTE_STRING_LITERAL ->
`br` RAW_BYTE_STRING_CONTENT SUFFIX?
`br` `"` ^ RAW_BYTE_STRING_CONTENT `"` SUFFIX?
| `br` `#`{n:1..=255} ^ `"` RAW_BYTE_STRING_CONTENT_HASHED `"` `#`{n} SUFFIX?

RAW_BYTE_STRING_CONTENT -> (!`"` ASCII_FOR_RAW )*

RAW_BYTE_STRING_CONTENT ->
`"` ^ ASCII_FOR_RAW*? `"`
| `#` RAW_BYTE_STRING_CONTENT `#`
RAW_BYTE_STRING_CONTENT_HASHED -> (!(`"` `#`{n}) ASCII_FOR_RAW )*

ASCII_FOR_RAW -> !CR ASCII
```
Expand Down Expand Up @@ -395,11 +398,12 @@ r[lex.token.str-c-raw]
r[lex.token.str-c-raw.syntax]
```grammar,lexer
RAW_C_STRING_LITERAL ->
`cr` RAW_C_STRING_CONTENT SUFFIX?
`cr` `"` ^ RAW_C_STRING_CONTENT `"` SUFFIX?
| `cr` `#`{n:1..=255} ^ `"` RAW_C_STRING_CONTENT_HASHED `"` `#`{n} SUFFIX?

RAW_C_STRING_CONTENT -> (!`"` ~[CR NUL] )*

RAW_C_STRING_CONTENT ->
`"` ^ ( ~[CR NUL] )*? `"`
| `#` RAW_C_STRING_CONTENT `#`
RAW_C_STRING_CONTENT_HASHED -> (!(`"` `#`{n}) ~[CR NUL] )*
```

r[lex.token.str-c-raw.intro]
Expand Down
12 changes: 5 additions & 7 deletions tools/grammar/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,19 +55,18 @@ pub enum ExpressionKind {
NegativeLookahead(Box<Expression>),
/// `A*`
Repeat(Box<Expression>),
/// `A*?`
RepeatNonGreedy(Box<Expression>),
/// `A+`
RepeatPlus(Box<Expression>),
/// `A+?`
RepeatPlusNonGreedy(Box<Expression>),
/// `A{2..4}` or `A{2..=4}`
/// `A{2..4}` or `A{2..=4}` or `A{name:2..=4}`
RepeatRange {
expr: Box<Expression>,
name: Option<String>,
min: Option<u32>,
max: Option<u32>,
limit: RangeLimit,
},
/// `A{name}`
RepeatRangeNamed(Box<Expression>, String),
/// `NonTerminal`
Nt(String),
/// `` `string` ``
Expand Down Expand Up @@ -168,10 +167,9 @@ impl Expression {
| ExpressionKind::Optional(e)
| ExpressionKind::NegativeLookahead(e)
| ExpressionKind::Repeat(e)
| ExpressionKind::RepeatNonGreedy(e)
| ExpressionKind::RepeatPlus(e)
| ExpressionKind::RepeatPlusNonGreedy(e)
| ExpressionKind::RepeatRange { expr: e, .. }
| ExpressionKind::RepeatRangeNamed(e, _)
| ExpressionKind::NegExpression(e)
| ExpressionKind::Cut(e) => {
e.visit_nt(callback);
Expand Down
36 changes: 23 additions & 13 deletions tools/grammar/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -439,29 +439,38 @@ impl Parser<'_> {
Ok(ExpressionKind::Optional(box_kind(kind)))
}

/// Parse `*` | `*?` after expression.
/// Parse `*` after expression.
fn parse_repeat(&mut self, kind: ExpressionKind) -> Result<ExpressionKind> {
self.expect("*", "expected `*`")?;
Ok(if self.take_str("?") {
ExpressionKind::RepeatNonGreedy(box_kind(kind))
} else {
ExpressionKind::Repeat(box_kind(kind))
})
Ok(ExpressionKind::Repeat(box_kind(kind)))
}

/// Parse `+` | `+?` after expression.
/// Parse `+` after expression.
fn parse_repeat_plus(&mut self, kind: ExpressionKind) -> Result<ExpressionKind> {
self.expect("+", "expected `+`")?;
Ok(if self.take_str("?") {
ExpressionKind::RepeatPlusNonGreedy(box_kind(kind))
} else {
ExpressionKind::RepeatPlus(box_kind(kind))
})
Ok(ExpressionKind::RepeatPlus(box_kind(kind)))
}

/// Parse `{a..b}` | `{a..=b}` after expression.
/// Parse `{a..b}` | `{a..=b}` | `{name:a..=b}` | `{name}` after expression.
//
// `name:` before the range is a named binding. `{name}` refers to that binding.
fn parse_repeat_range(&mut self, kind: ExpressionKind) -> Result<ExpressionKind> {
self.expect("{", "expected `{`")?;
let start = self.index;
let name = match (self.parse_name(), self.peek()) {
(Some(name), Some(b':')) => {
self.index += 1;
Some(name)
}
(Some(name), Some(b'}')) => {
self.index += 1;
return Ok(ExpressionKind::RepeatRangeNamed(box_kind(kind), name));
}
_ => {
self.index = start;
None
}
};
let min = self.take_while(&|x| x.is_ascii_digit());
let Ok(min) = (!min.is_empty()).then(|| min.parse::<u32>()).transpose() else {
bail!(self, "malformed range start");
Expand Down Expand Up @@ -492,6 +501,7 @@ impl Parser<'_> {
self.expect("}", "expected `}`")?;
Ok(ExpressionKind::RepeatRange {
expr: box_kind(kind),
name,
min,
max,
limit,
Expand Down
19 changes: 8 additions & 11 deletions tools/mdbook-spec/src/grammar/render_markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,9 @@ fn last_expr(expr: &Expression) -> &ExpressionKind {
| ExpressionKind::Optional(_)
| ExpressionKind::NegativeLookahead(_)
| ExpressionKind::Repeat(_)
| ExpressionKind::RepeatNonGreedy(_)
| ExpressionKind::RepeatPlus(_)
| ExpressionKind::RepeatPlusNonGreedy(_)
| ExpressionKind::RepeatRange { .. }
| ExpressionKind::RepeatRangeNamed(_, _)
| ExpressionKind::Nt(_)
| ExpressionKind::Terminal(_)
| ExpressionKind::Prose(_)
Expand Down Expand Up @@ -128,33 +127,31 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, output: &mut String) {
render_expression(e, cx, output);
output.push_str("<sup>\\*</sup>");
}
ExpressionKind::RepeatNonGreedy(e) => {
render_expression(e, cx, output);
output.push_str("<sup>\\* (non-greedy)</sup>");
}
ExpressionKind::RepeatPlus(e) => {
render_expression(e, cx, output);
output.push_str("<sup>+</sup>");
}
ExpressionKind::RepeatPlusNonGreedy(e) => {
render_expression(e, cx, output);
output.push_str("<sup>+ (non-greedy)</sup>");
}
ExpressionKind::RepeatRange {
expr,
name,
min,
max,
limit,
} => {
render_expression(expr, cx, output);
write!(
output,
"<sup>{min}{limit}{max}</sup>",
"<sup>{name}{min}{limit}{max}</sup>",
name = name.as_ref().map(|n| format!("{n}:")).unwrap_or_default(),
min = min.map(|v| v.to_string()).unwrap_or_default(),
max = max.map(|v| v.to_string()).unwrap_or_default(),
)
.unwrap();
}
ExpressionKind::RepeatRangeNamed(e, name) => {
render_expression(e, cx, output);
write!(output, "<sup>{name}</sup>").unwrap();
}
ExpressionKind::Nt(nt) => {
let dest = cx.md_link_map.get(nt).map_or("missing", |d| d.as_str());
write!(output, "<span class=\"grammar-text\">[{nt}]({dest})</span>").unwrap();
Expand Down
29 changes: 17 additions & 12 deletions tools/mdbook-spec/src/grammar/render_railroad.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option<B
ExpressionKind::Grouped(e)
| ExpressionKind::RepeatRange {
expr: e,
name: _,
min: Some(1),
max: Some(1),
limit: RangeLimit::Closed,
Expand Down Expand Up @@ -153,6 +154,7 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option<B
ExpressionKind::Optional(e)
| ExpressionKind::RepeatRange {
expr: e,
name: _,
min: None | Some(0),
max: Some(1),
limit: RangeLimit::Closed,
Expand All @@ -164,36 +166,26 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option<B
ExpressionKind::Repeat(e)
| ExpressionKind::RepeatRange {
expr: e,
name: _,
min: None | Some(0),
max: None,
limit: RangeLimit::HalfOpen,
} => {
let n = render_expression(e, cx, stack)?;
Box::new(Optional::new(Repeat::new(n, railroad::Empty)))
}
ExpressionKind::RepeatNonGreedy(e) => {
let n = render_expression(e, cx, stack)?;
let r = Box::new(Optional::new(Repeat::new(n, railroad::Empty)));
let lbox = LabeledBox::new(r, Comment::new("non-greedy".to_string()));
Box::new(lbox)
}
// Treat `e+` and `e{1..}` equally.
ExpressionKind::RepeatPlus(e)
| ExpressionKind::RepeatRange {
expr: e,
name: _,
min: Some(1),
max: None,
limit: RangeLimit::HalfOpen,
} => {
let n = render_expression(e, cx, stack)?;
Box::new(Repeat::new(n, railroad::Empty))
}
ExpressionKind::RepeatPlusNonGreedy(e) => {
let n = render_expression(e, cx, stack)?;
let r = Repeat::new(n, railroad::Empty);
let lbox = LabeledBox::new(r, Comment::new("non-greedy".to_string()));
Box::new(lbox)
}
// For `e{..=0}` / `e{0..=0}` or `e{..1}` / `e{0..1}` render an empty node.
ExpressionKind::RepeatRange { max: Some(0), .. }
| ExpressionKind::RepeatRange {
Expand All @@ -205,13 +197,15 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option<B
// `(e{1..=b})?` (or `(e{1..b})?` for half-open).
ExpressionKind::RepeatRange {
expr: e,
name,
min: None | Some(0),
max: Some(b @ 2..),
limit,
} => {
state = ExpressionKind::Optional(Box::new(Expression::new_kind(
ExpressionKind::RepeatRange {
expr: e.clone(),
name: name.clone(),
min: Some(1),
max: Some(*b),
limit: *limit,
Expand All @@ -222,6 +216,7 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option<B
// Render `e{1..b}` / `e{1..=b}` directly.
ExpressionKind::RepeatRange {
expr: e,
name: _,
min: Some(1),
max: Some(b @ 2..),
limit,
Expand Down Expand Up @@ -251,12 +246,14 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option<B
// - `e{a..b}` as `e{0..a-1} e{1..b-(a-1)}`
ExpressionKind::RepeatRange {
expr: e,
name,
min: Some(a @ 2..),
max: b @ None,
limit,
}
| ExpressionKind::RepeatRange {
expr: e,
name,
min: Some(a @ 2..),
max: b @ Some(_),
limit,
Expand All @@ -267,6 +264,7 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option<B
}
es.push(Expression::new_kind(ExpressionKind::RepeatRange {
expr: e.clone(),
name: name.clone(),
min: Some(1),
max: b.map(|x| x - (a - 1)),
limit: *limit,
Expand All @@ -279,6 +277,12 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option<B
limit: RangeLimit::Closed,
..
} => unreachable!("closed range must have upper bound"),
ExpressionKind::RepeatRangeNamed(e, name) => {
let n = render_expression(e, cx, stack)?;
let cmt = format!("repeat exactly {name} times");
let lbox = LabeledBox::new(n, Comment::new(cmt));
Box::new(lbox)
}
ExpressionKind::Nt(nt) => node_for_nt(cx, nt),
ExpressionKind::Terminal(t) => Box::new(Terminal::new(t.clone())),
ExpressionKind::Prose(s) => Box::new(Terminal::new(s.clone())),
Expand Down Expand Up @@ -405,6 +409,7 @@ mod tests {
fn range_expr(min: Option<u32>, max: Option<u32>, limit: RangeLimit) -> Expression {
Expression::new_kind(ExpressionKind::RepeatRange {
expr: Box::new(Expression::new_kind(ExpressionKind::Nt("e".to_string()))),
name: None,
min,
max,
limit,
Expand Down