From cb10c57e616cc4df86e53ac83a7383b4e8e64859 Mon Sep 17 00:00:00 2001 From: noroshi <253434427+n0r0shi@users.noreply.github.com> Date: Tue, 24 Feb 2026 02:55:36 +0000 Subject: [PATCH 1/2] feat: support factorial, pmod, and rint expressions Register datafusion-spark's SparkFactorial, SparkPmod, and SparkRint UDFs and add serde mappings in mathExpressions. --- native/core/src/execution/jni_api.rs | 6 +++++ .../apache/comet/serde/QueryPlanSerde.scala | 3 +++ .../comet/CometMathExpressionSuite.scala | 24 +++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs index 0193f3012c..153e9bdd5c 100644 --- a/native/core/src/execution/jni_api.rs +++ b/native/core/src/execution/jni_api.rs @@ -51,7 +51,10 @@ use datafusion_spark::function::hash::sha1::SparkSha1; use datafusion_spark::function::hash::sha2::SparkSha2; use datafusion_spark::function::map::map_from_entries::MapFromEntries; use datafusion_spark::function::math::expm1::SparkExpm1; +use datafusion_spark::function::math::factorial::SparkFactorial; use datafusion_spark::function::math::hex::SparkHex; +use datafusion_spark::function::math::modulus::SparkPmod; +use datafusion_spark::function::math::rint::SparkRint; use datafusion_spark::function::math::width_bucket::SparkWidthBucket; use datafusion_spark::function::string::char::CharFunc; use datafusion_spark::function::string::concat::SparkConcat; @@ -400,6 +403,9 @@ fn register_datafusion_spark_function(session_ctx: &SessionContext) { session_ctx.register_udf(ScalarUDF::new_from_impl(SparkWidthBucket::default())); session_ctx.register_udf(ScalarUDF::new_from_impl(MapFromEntries::default())); session_ctx.register_udf(ScalarUDF::new_from_impl(SparkCrc32::default())); + session_ctx.register_udf(ScalarUDF::new_from_impl(SparkFactorial::default())); + session_ctx.register_udf(ScalarUDF::new_from_impl(SparkPmod::default())); + session_ctx.register_udf(ScalarUDF::new_from_impl(SparkRint::default())); } /// Prepares arrow arrays for output. diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 9d13ccd9ed..c6b2f8a15e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -96,6 +96,7 @@ object QueryPlanSerde extends Logging with CometExprShim { classOf[Divide] -> CometDivide, classOf[Exp] -> CometScalarFunction("exp"), classOf[Expm1] -> CometScalarFunction("expm1"), + classOf[Factorial] -> CometScalarFunction("factorial"), classOf[Floor] -> CometFloor, classOf[Hex] -> CometHex, classOf[IntegralDivide] -> CometIntegralDivide, @@ -104,10 +105,12 @@ object QueryPlanSerde extends Logging with CometExprShim { classOf[Log2] -> CometLog2, classOf[Log10] -> CometLog10, classOf[Multiply] -> CometMultiply, + classOf[Pmod] -> CometScalarFunction("pmod"), classOf[Pow] -> CometScalarFunction("pow"), classOf[Rand] -> CometRand, classOf[Randn] -> CometRandn, classOf[Remainder] -> CometRemainder, + classOf[Rint] -> CometScalarFunction("rint"), classOf[Round] -> CometRound, classOf[Signum] -> CometScalarFunction("signum"), classOf[Sin] -> CometScalarFunction("sin"), diff --git a/spark/src/test/scala/org/apache/comet/CometMathExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometMathExpressionSuite.scala index 9d27f2d25f..58a167d91d 100644 --- a/spark/src/test/scala/org/apache/comet/CometMathExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometMathExpressionSuite.scala @@ -145,4 +145,28 @@ class CometMathExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelpe "SELECT id, width_bucket(value, 0.0, 10.0, 5) FROM width_bucket_range ORDER BY id") } } + + test("factorial") { + withParquetTable(Seq((0, 1), (1, 5), (2, 10), (3, 20), (4, -1)).map(Tuple1(_)), "tbl") { + checkSparkAnswerAndOperator("SELECT factorial(_1._1) FROM tbl") + checkSparkAnswerAndOperator("SELECT factorial(_1._2) FROM tbl") + checkSparkAnswerAndOperator("SELECT factorial(NULL) FROM tbl") + } + } + + test("pmod") { + withParquetTable(Seq((10, 3), (7, -2), (-7, 2), (-7, -2), (0, 5)), "tbl") { + checkSparkAnswerAndOperator("SELECT pmod(_1, _2) FROM tbl") + checkSparkAnswerAndOperator("SELECT pmod(NULL, _2) FROM tbl") + } + } + + test("rint") { + withParquetTable( + Seq[java.lang.Double](1.5, 2.5, -1.5, 0.0, 3.7, null).map(Tuple1(_)), + "tbl") { + checkSparkAnswerAndOperator("SELECT rint(_1) FROM tbl") + checkSparkAnswerAndOperator("SELECT rint(NULL) FROM tbl") + } + } } From 0b30e2ef55e96c5e1bc4a5790cc42e3225415cde Mon Sep 17 00:00:00 2001 From: noroshi Date: Wed, 4 Mar 2026 22:45:46 +0000 Subject: [PATCH 2/2] address feedback --- .../sql-tests/expressions/math/factorial.sql | 32 +++++++++++++ .../sql-tests/expressions/math/pmod.sql | 46 +++++++++++++++++++ .../sql-tests/expressions/math/rint.sql | 32 +++++++++++++ .../comet/CometMathExpressionSuite.scala | 24 ---------- 4 files changed, 110 insertions(+), 24 deletions(-) create mode 100644 spark/src/test/resources/sql-tests/expressions/math/factorial.sql create mode 100644 spark/src/test/resources/sql-tests/expressions/math/pmod.sql create mode 100644 spark/src/test/resources/sql-tests/expressions/math/rint.sql diff --git a/spark/src/test/resources/sql-tests/expressions/math/factorial.sql b/spark/src/test/resources/sql-tests/expressions/math/factorial.sql new file mode 100644 index 0000000000..7feb67c38c --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/math/factorial.sql @@ -0,0 +1,32 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- ConfigMatrix: parquet.enable.dictionary=false,true + +statement +CREATE TABLE test_factorial(i int) USING parquet + +statement +INSERT INTO test_factorial VALUES (0), (1), (5), (10), (20), (21), (-1), (NULL) + +-- column input +query +SELECT factorial(i) FROM test_factorial + +-- literal arguments +query +SELECT factorial(0), factorial(1), factorial(5), factorial(20), factorial(21), factorial(-1), factorial(NULL) diff --git a/spark/src/test/resources/sql-tests/expressions/math/pmod.sql b/spark/src/test/resources/sql-tests/expressions/math/pmod.sql new file mode 100644 index 0000000000..1d03d2172d --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/math/pmod.sql @@ -0,0 +1,46 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- ConfigMatrix: parquet.enable.dictionary=false,true + +statement +CREATE TABLE test_pmod_int(a int, b int) USING parquet + +statement +INSERT INTO test_pmod_int VALUES (10, 3), (7, -2), (-7, 2), (-7, -2), (0, 5), (10, 0), (NULL, 3), (10, NULL) + +-- integer column input +query +SELECT pmod(a, b) FROM test_pmod_int + +-- integer literal arguments +query +SELECT pmod(10, 3), pmod(7, -2), pmod(-7, 2), pmod(-7, -2), pmod(10, 0), pmod(NULL, 3) + +statement +CREATE TABLE test_pmod_double(a double, b double) USING parquet + +statement +INSERT INTO test_pmod_double VALUES (10.5, 3.0), (-7.5, 2.0), (10.0, 0.0), (NULL, 1.0) + +-- floating-point column input +query +SELECT pmod(a, b) FROM test_pmod_double + +-- floating-point literal arguments +query +SELECT pmod(10.5, 3.0), pmod(-7.5, 2.0), pmod(10.0, 0.0) diff --git a/spark/src/test/resources/sql-tests/expressions/math/rint.sql b/spark/src/test/resources/sql-tests/expressions/math/rint.sql new file mode 100644 index 0000000000..f163efacc4 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/math/rint.sql @@ -0,0 +1,32 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- ConfigMatrix: parquet.enable.dictionary=false,true + +statement +CREATE TABLE test_rint(d double) USING parquet + +statement +INSERT INTO test_rint VALUES (1.5), (2.5), (-1.5), (0.0), (3.7), (NULL), (cast('NaN' as double)), (cast('Infinity' as double)) + +-- column input +query +SELECT rint(d) FROM test_rint + +-- literal arguments +query +SELECT rint(1.5), rint(2.5), rint(-1.5), rint(0.0), rint(3.7), rint(NULL) diff --git a/spark/src/test/scala/org/apache/comet/CometMathExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometMathExpressionSuite.scala index 58a167d91d..9d27f2d25f 100644 --- a/spark/src/test/scala/org/apache/comet/CometMathExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometMathExpressionSuite.scala @@ -145,28 +145,4 @@ class CometMathExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelpe "SELECT id, width_bucket(value, 0.0, 10.0, 5) FROM width_bucket_range ORDER BY id") } } - - test("factorial") { - withParquetTable(Seq((0, 1), (1, 5), (2, 10), (3, 20), (4, -1)).map(Tuple1(_)), "tbl") { - checkSparkAnswerAndOperator("SELECT factorial(_1._1) FROM tbl") - checkSparkAnswerAndOperator("SELECT factorial(_1._2) FROM tbl") - checkSparkAnswerAndOperator("SELECT factorial(NULL) FROM tbl") - } - } - - test("pmod") { - withParquetTable(Seq((10, 3), (7, -2), (-7, 2), (-7, -2), (0, 5)), "tbl") { - checkSparkAnswerAndOperator("SELECT pmod(_1, _2) FROM tbl") - checkSparkAnswerAndOperator("SELECT pmod(NULL, _2) FROM tbl") - } - } - - test("rint") { - withParquetTable( - Seq[java.lang.Double](1.5, 2.5, -1.5, 0.0, 3.7, null).map(Tuple1(_)), - "tbl") { - checkSparkAnswerAndOperator("SELECT rint(_1) FROM tbl") - checkSparkAnswerAndOperator("SELECT rint(NULL) FROM tbl") - } - } }