diff --git a/generator/src/docs.rs b/generator/src/docs.rs new file mode 100644 index 00000000..8660e538 --- /dev/null +++ b/generator/src/docs.rs @@ -0,0 +1,122 @@ +use pest::iterators::Pairs; +use pest_meta::parser::Rule; +use std::collections::HashMap; + +#[derive(Debug)] +pub(crate) struct DocComment { + pub grammar_doc: String, + + /// HashMap for store all doc_comments for rules. + /// key is rule name, value is doc_comment. + pub line_docs: HashMap, +} + +/// Consume pairs to matches `Rule::grammar_doc`, `Rule::line_doc` into `DocComment` +/// +/// e.g. +/// +/// a pest file: +/// +/// ```ignore +/// //! This is a grammar doc +/// /// line doc 1 +/// /// line doc 2 +/// foo = {} +/// +/// /// line doc 3 +/// bar = {} +/// ``` +/// +/// Then will get: +/// +/// ```ignore +/// grammar_doc = "This is a grammar doc" +/// line_docs = { "foo": "line doc 1\nline doc 2", "bar": "line doc 3" } +/// ``` +pub(crate) fn consume(pairs: Pairs<'_, Rule>) -> DocComment { + let mut grammar_doc = String::new(); + + let mut line_docs: HashMap = HashMap::new(); + let mut line_doc = String::new(); + + for pair in pairs { + match pair.as_rule() { + Rule::grammar_doc => { + // grammar_doc > inner_doc + let inner_doc = pair.into_inner().next().unwrap(); + grammar_doc.push_str(inner_doc.as_str()); + grammar_doc.push('\n'); + } + Rule::grammar_rule => { + if let Some(inner) = pair.into_inner().next() { + // grammar_rule > line_doc | identifier + match inner.as_rule() { + Rule::line_doc => { + if let Some(inner_doc) = inner.into_inner().next() { + line_doc.push_str(inner_doc.as_str()); + line_doc.push('\n'); + } + } + Rule::identifier => { + if !line_doc.is_empty() { + let rule_name = inner.as_str().to_owned(); + + // Remove last \n + line_doc.pop(); + line_docs.insert(rule_name, line_doc.clone()); + line_doc.clear(); + } + } + _ => (), + } + } + } + _ => (), + } + } + + if !grammar_doc.is_empty() { + // Remove last \n + grammar_doc.pop(); + } + + DocComment { + grammar_doc, + line_docs, + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use pest_meta::parser; + use pest_meta::parser::Rule; + + #[test] + fn test_doc_comment() { + let pairs = match parser::parse(Rule::grammar_rules, include_str!("../tests/test.pest")) { + Ok(pairs) => pairs, + Err(_) => panic!("error parsing tests/test.pest"), + }; + + let doc_comment = super::consume(pairs); + + let mut expected = HashMap::new(); + expected.insert("foo".to_owned(), "Matches foo str, e.g.: `foo`".to_owned()); + expected.insert( + "bar".to_owned(), + "Matches bar str,\n Indent 2, e.g: `bar` or `foobar`".to_owned(), + ); + expected.insert( + "dar".to_owned(), + "Matches dar\nMatch dar description".to_owned(), + ); + assert_eq!(expected, doc_comment.line_docs); + + assert_eq!( + "A parser for JSON file.\nAnd this is a example for JSON parser.\n\n indent-4-space", + doc_comment.grammar_doc + ); + } +} diff --git a/generator/src/generator.rs b/generator/src/generator.rs index fc1263d8..87d1f00d 100644 --- a/generator/src/generator.rs +++ b/generator/src/generator.rs @@ -17,12 +17,15 @@ use pest::unicode::unicode_property_names; use pest_meta::ast::*; use pest_meta::optimizer::*; -pub fn generate( +use crate::docs::DocComment; + +pub(crate) fn generate( name: Ident, generics: &Generics, path: Option, rules: Vec, defaults: Vec<&str>, + doc_comment: &DocComment, include_grammar: bool, ) -> TokenStream { let uses_eoi = defaults.iter().any(|name| *name == "EOI"); @@ -36,7 +39,7 @@ pub fn generate( } else { quote!() }; - let rule_enum = generate_enum(&rules, uses_eoi); + let rule_enum = generate_enum(&rules, doc_comment, uses_eoi); let patterns = generate_patterns(&rules, uses_eoi); let skip = generate_skip(&rules); @@ -181,10 +184,25 @@ fn generate_include(name: &Ident, path: &str) -> TokenStream { } } -fn generate_enum(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { - let rules = rules.iter().map(|rule| format_ident!("r#{}", rule.name)); +fn generate_enum(rules: &[OptimizedRule], doc_comment: &DocComment, uses_eoi: bool) -> TokenStream { + let rules = rules.iter().map(|rule| { + let rule_name = format_ident!("r#{}", rule.name); + + match doc_comment.line_docs.get(&rule.name) { + Some(doc) => quote! { + #[doc = #doc] + #rule_name + }, + None => quote! { + #rule_name + }, + } + }); + + let grammar_doc = &doc_comment.grammar_doc; if uses_eoi { quote! { + #[doc = #grammar_doc] #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rule { @@ -194,6 +212,7 @@ fn generate_enum(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { } } else { quote! { + #[doc = #grammar_doc] #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rule { @@ -208,6 +227,7 @@ fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { .iter() .map(|rule| { let rule = format_ident!("r#{}", rule.name); + quote! { Rule::#rule => rules::#rule(state) } @@ -657,10 +677,11 @@ fn option_type() -> TokenStream { #[cfg(test)] mod tests { - use proc_macro2::Span; - use super::*; + use proc_macro2::Span; + use std::collections::HashMap; + #[test] fn rule_enum_simple() { let rules = vec![OptimizedRule { @@ -669,12 +690,22 @@ mod tests { expr: OptimizedExpr::Ident("g".to_owned()), }]; + let mut line_docs = HashMap::new(); + line_docs.insert("f".to_owned(), "This is rule comment".to_owned()); + + let doc_comment = &DocComment { + grammar_doc: "Rule doc\nhello".to_owned(), + line_docs, + }; + assert_eq!( - generate_enum(&rules, false).to_string(), + generate_enum(&rules, doc_comment, false).to_string(), quote! { + #[doc = "Rule doc\nhello"] #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rule { + #[doc = "This is rule comment"] r#f } } @@ -957,7 +988,7 @@ mod tests { } #[test] - fn generate_complete() { + fn test_generate_complete() { let name = Ident::new("MyParser", Span::call_site()); let generics = Generics::default(); @@ -974,6 +1005,14 @@ mod tests { }, ]; + let mut line_docs = HashMap::new(); + line_docs.insert("if".to_owned(), "If statement".to_owned()); + + let doc_comment = &DocComment { + line_docs, + grammar_doc: "This is Rule doc\nThis is second line".to_owned(), + }; + let defaults = vec!["ANY"]; let result = result_type(); let box_ty = box_type(); @@ -981,15 +1020,17 @@ mod tests { current_dir.push("test.pest"); let test_path = current_dir.to_str().expect("path contains invalid unicode"); assert_eq!( - generate(name, &generics, Some(PathBuf::from("test.pest")), rules, defaults, true).to_string(), + generate(name, &generics, Some(PathBuf::from("test.pest")), rules, defaults, doc_comment, true).to_string(), quote! { #[allow(non_upper_case_globals)] const _PEST_GRAMMAR_MyParser: &'static str = include_str!(#test_path); + #[doc = "This is Rule doc\nThis is second line"] #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rule { r#a, + #[doc = "If statement"] r#if } diff --git a/generator/src/lib.rs b/generator/src/lib.rs index 938bd168..f9c118f8 100644 --- a/generator/src/lib.rs +++ b/generator/src/lib.rs @@ -31,6 +31,7 @@ use syn::{Attribute, DeriveInput, Generics, Ident, Lit, Meta}; #[macro_use] mod macros; +mod docs; mod generator; use pest_meta::parser::{self, rename_meta_rule, Rule}; @@ -91,10 +92,19 @@ pub fn derive_parser(input: TokenStream, include_grammar: bool) -> TokenStream { }; let defaults = unwrap_or_report(validator::validate_pairs(pairs.clone())); + let doc_comment = docs::consume(pairs.clone()); let ast = unwrap_or_report(parser::consume_rules(pairs)); let optimized = optimizer::optimize(ast); - generator::generate(name, &generics, path, optimized, defaults, include_grammar) + generator::generate( + name, + &generics, + path, + optimized, + defaults, + &doc_comment, + include_grammar, + ) } fn read_file>(path: P) -> io::Result { @@ -225,4 +235,38 @@ mod tests { let ast = syn::parse_str(definition).unwrap(); parse_derive(ast); } + + #[test] + fn test_generate_doc() { + let input = quote! { + #[derive(Parser)] + #[grammar = "../tests/test.pest"] + pub struct TestParser; + }; + + let token = super::derive_parser(input, true); + + let expected = quote! { + #[doc = "A parser for JSON file.\nAnd this is a example for JSON parser.\n\n indent-4-space"] + #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] + #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + + pub enum Rule { + #[doc = "Matches foo str, e.g.: `foo`"] + r#foo, + #[doc = "Matches bar str,\n Indent 2, e.g: `bar` or `foobar`"] + r#bar, + r#bar1, + #[doc = "Matches dar\nMatch dar description"] + r#dar + } + }; + + assert!( + token.to_string().contains(expected.to_string().as_str()), + "{}\n\nExpected to contains:\n{}", + token, + expected + ); + } } diff --git a/generator/tests/test.pest b/generator/tests/test.pest new file mode 100644 index 00000000..c86a65ff --- /dev/null +++ b/generator/tests/test.pest @@ -0,0 +1,20 @@ +//! A parser for JSON file. +//! And this is a example for JSON parser. +//! +//! indent-4-space + +/// Matches foo str, e.g.: `foo` +foo = { "foo" } + +/// Matches bar str, +/// Indent 2, e.g: `bar` or `foobar` + +bar = { "bar" | "foobar" } + +bar1 = { "bar1" } + +/// Matches dar + +/// Match dar description + +dar = { "da" } \ No newline at end of file diff --git a/grammars/src/grammars/json.pest b/grammars/src/grammars/json.pest index f8b423a5..413aa7b3 100644 --- a/grammars/src/grammars/json.pest +++ b/grammars/src/grammars/json.pest @@ -7,8 +7,13 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. +//! A parser for JSON file. +//! +//! And this is a example for JSON parser. json = { SOI ~ (object | array) ~ EOI } +/// Matches object, e.g.: `{ "foo": "bar" }` +/// Foobar object = { "{" ~ pair ~ ("," ~ pair)* ~ "}" | "{" ~ "}" } pair = { string ~ ":" ~ value } diff --git a/meta/src/grammar.pest b/meta/src/grammar.pest index 282ca35b..e09bf61b 100644 --- a/meta/src/grammar.pest +++ b/meta/src/grammar.pest @@ -7,11 +7,12 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. -grammar_rules = _{ SOI ~ grammar_rule+ ~ EOI } +grammar_rules = _{ SOI ~ grammar_doc* ~ (grammar_rule)+ ~ EOI } grammar_rule = { identifier ~ assignment_operator ~ modifier? ~ - opening_brace ~ expression ~ closing_brace + opening_brace ~ expression ~ closing_brace | + line_doc } assignment_operator = { "=" } @@ -92,7 +93,14 @@ quote = { "\"" } single_quote = { "'" } range_operator = { ".." } -newline = _{ "\n" | "\r\n" } -WHITESPACE = _{ " " | "\t" | newline } -block_comment = _{ "/*" ~ (block_comment | !"*/" ~ ANY)* ~ "*/" } -COMMENT = _{ block_comment | ("//" ~ (!newline ~ ANY)*) } +newline = _{ "\n" | "\r\n" } +WHITESPACE = _{ " " | "\t" | newline } +line_comment = _{ ("//" ~ !("/" | "!") ~ (!newline ~ ANY)*) } +block_comment = _{ "/*" ~ (block_comment | !"*/" ~ ANY)* ~ "*/" } +COMMENT = _{ block_comment | line_comment } + +// ref: https://doc.rust-lang.org/reference/comments.html +space = _{ " " | "\t" } +grammar_doc = ${ "//!" ~ space? ~ inner_doc } +line_doc = ${ "///" ~ space? ~ !"/" ~ inner_doc } +inner_doc = @{ (!newline ~ ANY)* } diff --git a/meta/src/parser.rs b/meta/src/parser.rs index fc0224b3..eb957a16 100644 --- a/meta/src/parser.rs +++ b/meta/src/parser.rs @@ -243,6 +243,8 @@ pub fn rename_meta_rule(rule: &Rule) -> String { Rule::insensitive_string => "`^`".to_owned(), Rule::range_operator => "`..`".to_owned(), Rule::single_quote => "`'`".to_owned(), + Rule::grammar_doc => "//!".to_owned(), + Rule::line_doc => "///".to_owned(), other_rule => format!("{:?}", other_rule), } } @@ -256,6 +258,13 @@ fn consume_rules_with_spans( pairs .filter(|pair| pair.as_rule() == Rule::grammar_rule) + .filter(|pair| { + // To ignore `grammar_rule > line_doc` pairs + let mut pairs = pair.clone().into_inner(); + let pair = pairs.next().unwrap(); + + pair.as_rule() != Rule::line_doc + }) .map(|pair| { let mut pairs = pair.into_inner().peekable(); @@ -1093,13 +1102,48 @@ mod tests { }; } + #[test] + fn grammar_doc_and_line_doc() { + let input = "//! hello\n/// world\na = { \"a\" }"; + parses_to! { + parser: PestParser, + input: input, + rule: Rule::grammar_rules, + tokens: [ + grammar_doc(0, 9, [ + inner_doc(4, 9), + ]), + grammar_rule(10, 19, [ + line_doc(10, 19, [ + inner_doc(14, 19), + ]), + ]), + grammar_rule(20, 31, [ + identifier(20, 21), + assignment_operator(22, 23), + opening_brace(24, 25), + expression(26, 30, [ + term(26, 30, [ + string(26, 29, [ + quote(26, 27), + inner_str(27, 28), + quote(28, 29) + ]) + ]) + ]), + closing_brace(30, 31), + ]) + ] + }; + } + #[test] fn wrong_identifier() { fails_with! { parser: PestParser, input: "0", rule: Rule::grammar_rules, - positives: vec![Rule::identifier], + positives: vec![Rule::grammar_rule, Rule::grammar_doc], negatives: vec![], pos: 0 }; @@ -1315,8 +1359,11 @@ mod tests { #[test] fn ast() { - let input = - "rule = _{ a{1} ~ \"a\"{3,} ~ b{, 2} ~ \"b\"{1, 2} | !(^\"c\" | PUSH('d'..'e'))?* }"; + let input = r##" + /// This is line comment + /// This is rule + rule = _{ a{1} ~ "a"{3,} ~ b{, 2} ~ "b"{1, 2} | !(^"c" | PUSH('d'..'e'))?* } + "##; let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap(); let ast = consume_rules_with_spans(pairs).unwrap(); @@ -1368,7 +1415,7 @@ mod tests { expr: Expr::Seq( Box::new(Expr::PeekSlice(-4, None)), Box::new(Expr::PeekSlice(0, Some(3))), - ) + ), }], ); }