From 2f893fb73087c7ce7fc572f108c95b56f5d391d8 Mon Sep 17 00:00:00 2001 From: Victor Barua Date: Fri, 6 Mar 2026 09:06:19 -0800 Subject: [PATCH 1/3] build: use antlr grammars directly from submodule --- core/build.gradle.kts | 20 ++-- core/src/main/antlr/SubstraitLexer.g4 | 132 -------------------------- core/src/main/antlr/SubstraitType.g4 | 83 ---------------- 3 files changed, 11 insertions(+), 224 deletions(-) delete mode 100644 core/src/main/antlr/SubstraitLexer.g4 delete mode 100644 core/src/main/antlr/SubstraitType.g4 diff --git a/core/build.gradle.kts b/core/build.gradle.kts index df0f3f364..9112fee70 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -226,6 +226,7 @@ tasks.named("sourcesJar") { sourceSets { main { + antlr.srcDirs("../substrait/grammar") proto.srcDir("../substrait/proto") resources.srcDir("../substrait/extensions") resources.srcDir("build/generated/sources/manifest/") @@ -248,7 +249,16 @@ project.configure { } } +val submodulesUpdate by + tasks.registering(Exec::class) { + group = "Build Setup" + description = "Updates (and inits) substrait git submodule" + commandLine = listOf("git", "submodule", "update", "--init", "--recursive") + workingDir = rootProject.projectDir + } + tasks.named("generateGrammarSource") { + dependsOn(submodulesUpdate) arguments.add("-package") arguments.add("io.substrait.type") arguments.add("-visitor") @@ -256,19 +266,11 @@ tasks.named("generateGrammarSource") { arguments.add("-Xlog") arguments.add("-Werror") arguments.add("-Xexact-output-dir") - setSource(fileTree("src/main/antlr/SubstraitType.g4")) + exclude("FuncTestCaseLexer.g4", "FuncTestCaseParser.g4") outputDirectory = layout.buildDirectory.dir("generated/sources/antlr/main/java/io/substrait/type").get().asFile } -val submodulesUpdate by - tasks.registering(Exec::class) { - group = "Build Setup" - description = "Updates (and inits) substrait git submodule" - commandLine = listOf("git", "submodule", "update", "--init", "--recursive") - workingDir = rootProject.projectDir - } - protobuf { generateProtoTasks { all().configureEach { dependsOn(submodulesUpdate) } } protoc { artifact = "com.google.protobuf:protoc:" + libs.protoc.get().getVersion() } diff --git a/core/src/main/antlr/SubstraitLexer.g4 b/core/src/main/antlr/SubstraitLexer.g4 deleted file mode 100644 index 4dfdf5272..000000000 --- a/core/src/main/antlr/SubstraitLexer.g4 +++ /dev/null @@ -1,132 +0,0 @@ -lexer grammar SubstraitLexer; - -options { - caseInsensitive = true; -} - -// Whitespace and comment handling -LineComment : '//' ~[\r\n]* -> channel(HIDDEN) ; -BlockComment : ( '/*' ( ~'*' | '*'+ ~[*/] ) '*'* '*/' ) -> channel(HIDDEN) ; -Whitespace : [ \t\r]+ -> channel(HIDDEN) ; - -fragment DIGIT: [0-9]; - -// Syntactic keywords. -If : 'IF'; -Then : 'THEN'; -Else : 'ELSE'; -Func : 'FUNC'; - -// TYPES -Boolean : 'BOOLEAN'; -I8 : 'I8'; -I16 : 'I16'; -I32 : 'I32'; -I64 : 'I64'; -FP32 : 'FP32'; -FP64 : 'FP64'; -String : 'STRING'; -Binary : 'BINARY'; -Timestamp: 'TIMESTAMP'; -Timestamp_TZ: 'TIMESTAMP_TZ'; -Date : 'DATE'; -Time : 'TIME'; -Interval_Year: 'INTERVAL_YEAR'; -Interval_Day: 'INTERVAL_DAY'; -Interval_Compound: 'INTERVAL_COMPOUND'; -UUID : 'UUID'; -Decimal : 'DECIMAL'; -Precision_Time: 'PRECISION_TIME'; -Precision_Timestamp: 'PRECISION_TIMESTAMP'; -Precision_Timestamp_TZ: 'PRECISION_TIMESTAMP_TZ'; -FixedChar: 'FIXEDCHAR'; -VarChar : 'VARCHAR'; -FixedBinary: 'FIXEDBINARY'; -Struct : 'STRUCT'; -NStruct : 'NSTRUCT'; -List : 'LIST'; -Map : 'MAP'; -UserDefined: 'U!'; - -// short names for types -Bool: 'BOOL'; -Str: 'STR'; -VBin: 'VBIN'; -Ts: 'TS'; -TsTZ: 'TSTZ'; -IYear: 'IYEAR'; -IDay: 'IDAY'; -ICompound: 'ICOMPOUND'; -Dec: 'DEC'; -PT: 'PT'; -PTs: 'PTS'; -PTsTZ: 'PTSTZ'; -FChar: 'FCHAR'; -VChar: 'VCHAR'; -FBin: 'FBIN'; - -Any: 'ANY'; -AnyVar: Any [0-9]; - -DoubleColon: '::'; - -// MATH -Plus : '+'; -Minus : '-'; -Asterisk : '*'; -ForwardSlash : '/'; -Percent : '%'; - -// COMPARE -Eq : '='; -Ne : '!='; -Gte : '>='; -Lte : '<='; -Gt : '>'; -Lt : '<'; -Bang : '!'; - -// ORGANIZE -OAngleBracket: Lt; -CAngleBracket: Gt; -OParen: '('; -CParen: ')'; -OBracket: '['; -CBracket: ']'; -Comma: ','; -Colon: ':'; -QMark: '?'; -Hash: '#'; -Dot: '.'; - - -// OPERATIONS -And : 'AND'; -Or : 'OR'; -Assign : ':='; -Arrow : '->'; - - - -fragment Int - : '1'..'9' Digit* - | '0' - ; - -fragment Digit - : '0'..'9' - ; - -Number - : '-'? Int - ; - -Identifier - : ('A'..'Z' | '_' | '$') ('A'..'Z' | '_' | '$' | Digit)* - ; - -Newline - : ( '\r' '\n'? - | '\n' - ) - ; diff --git a/core/src/main/antlr/SubstraitType.g4 b/core/src/main/antlr/SubstraitType.g4 deleted file mode 100644 index d14df3976..000000000 --- a/core/src/main/antlr/SubstraitType.g4 +++ /dev/null @@ -1,83 +0,0 @@ -grammar SubstraitType; - -options { - caseInsensitive = true; -} - -import SubstraitLexer; - -startRule: expr EOF; - -typeStatement: typeDef EOF; - -scalarType - : Boolean #boolean - | I8 #i8 - | I16 #i16 - | I32 #i32 - | I64 #i64 - | FP32 #fp32 - | FP64 #fp64 - | String #string - | Binary #binary - | Timestamp #timestamp - | Timestamp_TZ #timestampTz - | Date #date - | Time #time - | Interval_Year #intervalYear - | UUID #uuid - ; - -parameterizedType - : FixedChar isnull=QMark? Lt length=numericParameter Gt #fixedChar - | VarChar isnull=QMark? Lt length=numericParameter Gt #varChar - | FixedBinary isnull=QMark? Lt length=numericParameter Gt #fixedBinary - | Decimal isnull=QMark? Lt precision=numericParameter Comma scale=numericParameter Gt #decimal - | Interval_Day isnull=QMark? Lt precision=numericParameter Gt #precisionIntervalDay - | Interval_Compound isnull=QMark? Lt precision=numericParameter Gt #precisionIntervalCompound - | Precision_Time isnull=QMark? Lt precision=numericParameter Gt #precisionTime - | Precision_Timestamp isnull=QMark? Lt precision=numericParameter Gt #precisionTimestamp - | Precision_Timestamp_TZ isnull=QMark? Lt precision=numericParameter Gt #precisionTimestampTZ - | Struct isnull=QMark? Lt expr (Comma expr)* Gt #struct - | NStruct isnull=QMark? Lt Identifier expr (Comma Identifier expr)* Gt #nStruct - | List isnull=QMark? Lt expr Gt #list - | Map isnull=QMark? Lt key=expr Comma value=expr Gt #map - | Func isnull=QMark? Lt params=funcParams Arrow returnType=expr Gt #func - | UserDefined Identifier isnull=QMark? (Lt expr (Comma expr)* Gt)? #userDefined - ; - -funcParams - : expr #singleFuncParam - | OParen expr (Comma expr)* CParen #funcParamsWithParens - ; - -numericParameter - : Number #numericLiteral - | Identifier #numericParameterName - | expr #numericExpression - ; - -anyType - : Any isnull=QMark? - | AnyVar isnull=QMark? - ; - -typeDef - : scalarType isnull=QMark? - | parameterizedType - | anyType - ; - -expr - : OParen expr CParen #ParenExpression - | Identifier Eq expr Newline+ (Identifier Eq expr Newline+)* finalType=typeDef Newline* #MultilineDefinition - | typeDef #TypeLiteral - | Number #LiteralNumber - | Identifier isnull=QMark? #ParameterName - | Identifier OParen (expr (Comma expr)*)? CParen #FunctionCall - | left=expr op=(And | Or | Plus | Minus | Lt | Gt | Eq | Ne | - Lte | Gte | Asterisk | ForwardSlash) right=expr #BinaryExpr - | If ifExpr=expr Then thenExpr=expr Else elseExpr=expr #IfExpr - | (Bang) expr #NotExpr - | ifExpr=expr QMark thenExpr=expr Colon elseExpr=expr #Ternary - ; From 19f3b0d79a55ed6aafcabc1264aa2e51d52f7c56 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Mon, 9 Mar 2026 09:14:54 +0100 Subject: [PATCH 2/3] fix: apply suggestion from @nielspardon --- core/build.gradle.kts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 9112fee70..74696c24c 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -226,7 +226,9 @@ tasks.named("sourcesJar") { sourceSets { main { - antlr.srcDirs("../substrait/grammar") + antlr { + setSrcDirs(listOf(file("${rootProject.projectDir}/substrait/grammar"))) + } proto.srcDir("../substrait/proto") resources.srcDir("../substrait/extensions") resources.srcDir("build/generated/sources/manifest/") From 48315f4321a0601b80b1f0901ebf13db41fe9661 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Mon, 9 Mar 2026 09:20:43 +0100 Subject: [PATCH 3/3] fix: spotless --- core/build.gradle.kts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 74696c24c..63fbbbef4 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -226,9 +226,7 @@ tasks.named("sourcesJar") { sourceSets { main { - antlr { - setSrcDirs(listOf(file("${rootProject.projectDir}/substrait/grammar"))) - } + antlr { setSrcDirs(listOf(file("${rootProject.projectDir}/substrait/grammar"))) } proto.srcDir("../substrait/proto") resources.srcDir("../substrait/extensions") resources.srcDir("build/generated/sources/manifest/")