Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ dependencies {
testImplementation(platform(libs.junit.bom))
testImplementation(libs.protobuf.java.util)
testImplementation(libs.guava)
testImplementation(libs.bundles.jackson)

testImplementation(libs.junit.jupiter)
testRuntimeOnly(libs.junit.platform.launcher)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ public class DefaultExtensionCatalog {
public static final String FUNCTIONS_AGGREGATE_APPROX =
"extension:io.substrait:functions_aggregate_approx";

/** Extension identifier for aggregate functions with decimal output. */
public static final String FUNCTIONS_AGGREGATE_DECIMAL_OUTPUT =
"extension:io.substrait:functions_aggregate_decimal_output";

/** Extension identifier for generic aggregate functions. */
public static final String FUNCTIONS_AGGREGATE_GENERIC =
"extension:io.substrait:functions_aggregate_generic";
Expand All @@ -37,6 +41,9 @@ public class DefaultExtensionCatalog {
/** Extension identifier for geometry functions. */
public static final String FUNCTIONS_GEOMETRY = "extension:io.substrait:functions_geometry";

/** Extension identifier for list functions. */
public static final String FUNCTIONS_LIST = "extension:io.substrait:functions_list";

/** Extension identifier for logarithmic functions. */
public static final String FUNCTIONS_LOGARITHMIC = "extension:io.substrait:functions_logarithmic";

Expand Down Expand Up @@ -78,7 +85,10 @@ private static SimpleExtension.ExtensionCollection loadDefaultCollection() {
"logarithmic",
"rounding",
"rounding_decimal",
"set",
"string")
// TODO(#688): functions_list.yaml is not loaded here because it uses lambda type
// expressions (e.g. func<any1 -> any2>) that are not yet supported by the type parser.
.stream()
.map(c -> String.format("/functions_%s.yaml", c))
.collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,11 @@ public ScalarFunctionVariant getScalarFunction(FunctionAnchor anchor) {
anchor.key(), anchor.urn()));
}

/** Returns true if the given URN has any functions or types loaded in this collection. */
public boolean containsUrn(String urn) {
return urnSupplier.get().contains(urn) || types().stream().anyMatch(t -> t.urn().equals(urn));
}

private void checkUrn(String name) {
if (urnSupplier.get().contains(name)) {
return;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package io.substrait.extension;

import static io.substrait.extension.DefaultExtensionCatalog.DEFAULT_COLLECTION;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import org.junit.jupiter.api.Test;

/**
* Verifies that every extension YAML in substrait/extensions is loaded by {@link
* DefaultExtensionCatalog}.
*/
class DefaultExtensionCatalogTest {

private static final Set<String> UNSUPPORTED_FILES =
Set.of(
// TODO: aggregate_decimal_output defines count and approx_count_distinct with
// decimal<38,0> return types instead of i64. When loaded alongside aggregate_generic,
// the same function key (e.g. count:any) maps to the same Calcite operator twice,
// which breaks the reverse lookup in FunctionConverter.getSqlOperatorFromSubstraitFunc.
// Fixing this requires either deduplicating the operator map or adding type-based
// disambiguation for aggregate functions.
"functions_aggregate_decimal_output.yaml",
"functions_geometry.yaml", // user-defined types not supported in Calcite type conversion
"functions_list.yaml", // TODO(#688): remove once lambda types are supported
"type_variations.yaml", // type variations not yet supported by extension loader
"unknown.yaml" // unknown type extension not yet loaded
);

private static final ObjectMapper YAML_MAPPER = new ObjectMapper(new YAMLFactory());

@Test
void allExtensionYamlFilesAreLoaded() throws IOException {
List<File> yamlFiles = getExtensionYamlFiles();

for (File file : yamlFiles) {
if (UNSUPPORTED_FILES.contains(file.getName())) {
continue;
}
String urn = parseUrn(file);
assertTrue(
DEFAULT_COLLECTION.containsUrn(urn),
file.getName() + " not loaded by DefaultExtensionCatalog (urn: " + urn + ")");
}
}

private static String parseUrn(File yamlFile) throws IOException {
JsonNode doc = YAML_MAPPER.readTree(yamlFile);
JsonNode urnNode = doc.get("urn");
return urnNode == null ? null : urnNode.asText();
}

private static List<File> getExtensionYamlFiles() {
File extensionsDir = new File("../substrait/extensions");
assertTrue(extensionsDir.isDirectory(), "substrait/extensions directory not found");
Comment on lines +60 to +61
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering whether this should check the extension YAML files on the classpath instead of relying on the current working directory (especially given that @vbarua is planning to remove the submodule and move packaging the extension YAMLs into a separate JAR into substrait-packaging). The YAMLs should be already on the classpath. We're already loading classgraph for the isthmus-cli which can be useful to scan the classpath for any *.yaml files.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good to me, I'll make that change shortly. Thanks!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

btw. it might be better to relocate the YAML files on the classpath into a subfolder like substrait/extensions to be easier recognizable as default YAML files. right now they are being added into the root of the classpath

File[] files = extensionsDir.listFiles((dir, name) -> name.endsWith(".yaml"));
assertTrue(files != null && files.length > 0, "No YAML files found");
return List.of(files);
}
}