-
Notifications
You must be signed in to change notification settings - Fork 30
Feat/furnace sql #113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Feat/furnace sql #113
Changes from all commits
3396ead
9c7ec17
fd9ad6e
4a23420
f948c2d
cec7efa
5758e14
ce90696
d8d6ac1
3b1dca0
c07b31b
6eb0de2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -4,7 +4,7 @@ | |||||||||||||
|
|
||||||||||||||
| import time | ||||||||||||||
| import warnings | ||||||||||||||
| from typing import TYPE_CHECKING, Literal, overload | ||||||||||||||
| from typing import TYPE_CHECKING, Any, Literal, overload | ||||||||||||||
|
|
||||||||||||||
| from foundry_dev_tools.clients.api_client import APIClient | ||||||||||||||
| from foundry_dev_tools.errors.handling import ErrorHandlingConfig | ||||||||||||||
|
|
@@ -13,7 +13,14 @@ | |||||||||||||
| FoundrySqlQueryFailedError, | ||||||||||||||
| FoundrySqlSerializationFormatNotImplementedError, | ||||||||||||||
| ) | ||||||||||||||
| from foundry_dev_tools.utils.api_types import Ref, SqlDialect, SQLReturnType, assert_in_literal | ||||||||||||||
| from foundry_dev_tools.utils.api_types import ( | ||||||||||||||
| ArrowCompressionCodec, | ||||||||||||||
| FurnaceSqlDialect, | ||||||||||||||
| Ref, | ||||||||||||||
| SqlDialect, | ||||||||||||||
| SQLReturnType, | ||||||||||||||
| assert_in_literal, | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
| if TYPE_CHECKING: | ||||||||||||||
| import pandas as pd | ||||||||||||||
|
|
@@ -296,3 +303,346 @@ def api_queries_results( | |||||||||||||
| }, | ||||||||||||||
| **kwargs, | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| class FoundrySqlServerClientV2(APIClient): | ||||||||||||||
| """FoundrySqlServerClientV2 implements the newer foundry-sql-server API. | ||||||||||||||
|
|
||||||||||||||
| This client uses a different API flow compared to V1: | ||||||||||||||
| - Executes queries via POST to /api/ with applicationId and sql | ||||||||||||||
| - Polls POST to /api/status for query completion | ||||||||||||||
| - Retrieves results via POST to /api/stream with tickets | ||||||||||||||
| """ | ||||||||||||||
|
|
||||||||||||||
| api_name = "foundry-sql-server" | ||||||||||||||
|
|
||||||||||||||
| @overload | ||||||||||||||
| def query_foundry_sql( | ||||||||||||||
| self, | ||||||||||||||
| query: str, | ||||||||||||||
| return_type: Literal["pandas"], | ||||||||||||||
| branch: Ref = ..., | ||||||||||||||
| sql_dialect: FurnaceSqlDialect = ..., | ||||||||||||||
| arrow_compression_codec: ArrowCompressionCodec = ..., | ||||||||||||||
| timeout: int = ..., | ||||||||||||||
| experimental_use_trino: bool = ..., | ||||||||||||||
| ) -> pd.DataFrame: ... | ||||||||||||||
|
|
||||||||||||||
| @overload | ||||||||||||||
| def query_foundry_sql( | ||||||||||||||
| self, | ||||||||||||||
| query: str, | ||||||||||||||
| return_type: Literal["polars"], | ||||||||||||||
| branch: Ref = ..., | ||||||||||||||
| sql_dialect: FurnaceSqlDialect = ..., | ||||||||||||||
| arrow_compression_codec: ArrowCompressionCodec = ..., | ||||||||||||||
| timeout: int = ..., | ||||||||||||||
| experimental_use_trino: bool = ..., | ||||||||||||||
| ) -> pl.DataFrame: ... | ||||||||||||||
|
|
||||||||||||||
| @overload | ||||||||||||||
| def query_foundry_sql( | ||||||||||||||
| self, | ||||||||||||||
| query: str, | ||||||||||||||
| return_type: Literal["spark"], | ||||||||||||||
| branch: Ref = ..., | ||||||||||||||
| sql_dialect: FurnaceSqlDialect = ..., | ||||||||||||||
| arrow_compression_codec: ArrowCompressionCodec = ..., | ||||||||||||||
| timeout: int = ..., | ||||||||||||||
| experimental_use_trino: bool = ..., | ||||||||||||||
| ) -> pyspark.sql.DataFrame: ... | ||||||||||||||
|
|
||||||||||||||
| @overload | ||||||||||||||
| def query_foundry_sql( | ||||||||||||||
| self, | ||||||||||||||
| query: str, | ||||||||||||||
| return_type: Literal["arrow"], | ||||||||||||||
| branch: Ref = ..., | ||||||||||||||
| sql_dialect: FurnaceSqlDialect = ..., | ||||||||||||||
| arrow_compression_codec: ArrowCompressionCodec = ..., | ||||||||||||||
| timeout: int = ..., | ||||||||||||||
| experimental_use_trino: bool = ..., | ||||||||||||||
| ) -> pa.Table: ... | ||||||||||||||
|
|
||||||||||||||
| @overload | ||||||||||||||
| def query_foundry_sql( | ||||||||||||||
| self, | ||||||||||||||
| query: str, | ||||||||||||||
| return_type: Literal["pandas", "polars", "spark", "arrow"] = ..., | ||||||||||||||
| branch: Ref = ..., | ||||||||||||||
| sql_dialect: FurnaceSqlDialect = ..., | ||||||||||||||
| arrow_compression_codec: ArrowCompressionCodec = ..., | ||||||||||||||
| timeout: int = ..., | ||||||||||||||
| experimental_use_trino: bool = ..., | ||||||||||||||
| ) -> pd.DataFrame | pl.DataFrame | pa.Table | pyspark.sql.DataFrame: ... | ||||||||||||||
|
|
||||||||||||||
| def query_foundry_sql( | ||||||||||||||
| self, | ||||||||||||||
| query: str, | ||||||||||||||
| return_type: Literal["pandas", "polars", "spark", "arrow"] = "pandas", | ||||||||||||||
| branch: Ref = "master", | ||||||||||||||
| sql_dialect: FurnaceSqlDialect = "SPARK", | ||||||||||||||
| arrow_compression_codec: ArrowCompressionCodec = "NONE", | ||||||||||||||
| timeout: int = 600, | ||||||||||||||
| experimental_use_trino: bool = False, | ||||||||||||||
| ) -> pd.DataFrame | pl.DataFrame | pa.Table | pyspark.sql.DataFrame: | ||||||||||||||
| """Queries the Foundry SQL server using the V2 API. | ||||||||||||||
|
|
||||||||||||||
| Uses Arrow IPC to communicate with the Foundry SQL Server Endpoint. | ||||||||||||||
|
|
||||||||||||||
| Example: | ||||||||||||||
| df = client.query_foundry_sql( | ||||||||||||||
| query="SELECT * FROM `ri.foundry.main.dataset.abc` LIMIT 10" | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
| Args: | ||||||||||||||
| query: The SQL Query | ||||||||||||||
| return_type: The return type (pandas, polars, spark, or arrow). Note: "raw" is not supported in V2. | ||||||||||||||
| branch: The dataset branch to query | ||||||||||||||
| sql_dialect: The SQL dialect to use (only SPARK is supported for V2) | ||||||||||||||
| arrow_compression_codec: Arrow compression codec (NONE, LZ4, ZSTD) | ||||||||||||||
| timeout: Query timeout in seconds | ||||||||||||||
| experimental_use_trino: If True, modifies the query to use Trino backend by adding /*+ backend(trino) */ hint | ||||||||||||||
|
||||||||||||||
| experimental_use_trino: If True, modifies the query to use Trino backend by adding /*+ backend(trino) */ hint | |
| experimental_use_trino: If True, modifies the query to use the Trino backend by adding a | |
| ``/*+ backend(trino) */`` hint after the first ``SELECT ``. This is experimental and | |
| only affects queries that contain an uppercase ``SELECT `` (with a trailing space); | |
| it will not modify queries using lowercase ``select``, different whitespace, or | |
| queries starting with ``WITH`` clauses. |
Copilot
AI
Feb 17, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The docstring claims this method raises FoundrySqlQueryClientTimedOutError on timeout, but timeout handling is not implemented in the V2 client. This is inconsistent with the actual behavior. Either implement timeout handling or remove this from the Raises section of the docstring.
| FoundrySqlQueryClientTimedOutError: If the query times out |
Copilot
AI
Feb 20, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The query modification for experimental_use_trino uses a simple string replace that only replaces the first occurrence of "SELECT ". This approach will fail for queries that use lowercase "select " or have different whitespace patterns. It will also fail for queries that don't start with SELECT (e.g., "WITH cte AS (...) SELECT ..."). Consider using a more robust approach like a regex that handles case-insensitivity and various whitespace patterns, or insert the hint after detecting the first SELECT keyword more reliably.
Copilot
AI
Feb 20, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The arrow_compression_codec parameter is not validated with assert_in_literal like sql_dialect is. Add validation to ensure only valid codec values (NONE, LZ4, ZSTD) are passed to the API. This would provide better error messages if an invalid codec is specified.
Copilot
AI
Feb 20, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Args section in the docstring for _extract_query_handle has an extra blank line after the response_json parameter description and before the Returns section. This is inconsistent with other docstrings in the codebase. Remove the extra blank line at line 477.
Copilot
AI
Feb 20, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The docstring for _extract_ticket says "Returns: List of tickets for fetching results" but the method actually returns a dictionary with keys "id", "tickets", and "type", not a list. The return type annotation correctly shows dict[str, Any], but the docstring is misleading. Update the docstring to accurately describe the returned dictionary structure.
Copilot
AI
Feb 20, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The ticket extraction uses a nested list comprehension that flattens tickets from multiple ticket groups. However, there's no error handling if the response structure is unexpected (e.g., missing "status", "ready", or "tickets" keys). Consider adding error handling to provide a more informative error message if the response structure is not as expected, similar to how _extract_query_handle uses KeyError-prone dictionary access.
nicornk marked this conversation as resolved.
Show resolved
Hide resolved
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -152,6 +152,11 @@ def foundry_sql_server(self) -> foundry_sql_server.FoundrySqlServerClient: | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| """Returns :py:class:`foundry_dev_tools.clients.foundry_sql_server.FoundrySqlServerClient`.""" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| return foundry_sql_server.FoundrySqlServerClient(self) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| @cached_property | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| def foundry_sql_server_v2(self) -> foundry_sql_server.FoundrySqlServerClientV2: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| """Returns :py:class:`foundry_dev_tools.clients.foundry_sql_server.FoundrySqlServerClientV2`.""" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
152
to
+157
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| """Returns :py:class:`foundry_dev_tools.clients.foundry_sql_server.FoundrySqlServerClient`.""" | |
| return foundry_sql_server.FoundrySqlServerClient(self) | |
| @cached_property | |
| def foundry_sql_server_v2(self) -> foundry_sql_server.FoundrySqlServerClientV2: | |
| """Returns :py:class:`foundry_dev_tools.clients.foundry_sql_server.FoundrySqlServerClientV2`.""" | |
| """Returns :py:class:`foundry_dev_tools.clients.foundry_sql_server.FoundrySqlServerClient`. | |
| This client exposes the original / legacy Foundry SQL Server API and is kept for | |
| backwards compatibility. Prefer :py:meth:`foundry_sql_server_v2` for new | |
| development unless you explicitly depend on legacy behaviour. | |
| """ | |
| return foundry_sql_server.FoundrySqlServerClient(self) | |
| @cached_property | |
| def foundry_sql_server_v2(self) -> foundry_sql_server.FoundrySqlServerClientV2: | |
| """Returns :py:class:`foundry_dev_tools.clients.foundry_sql_server.FoundrySqlServerClientV2`. | |
| This client uses the V2 SQL Server API. Use this for new workloads when possible, | |
| especially when you: | |
| * want support for Arrow compression codecs for more efficient data transfer, or | |
| * want to experiment with the Trino-based execution backend (experimental). | |
| In contrast, :py:meth:`foundry_sql_server` targets the legacy API surface and | |
| is primarily intended for existing code that relies on its behaviour. The V2 | |
| client may evolve independently and can introduce new capabilities that are not | |
| available via the legacy client. | |
| """ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The overload signature for the "polars" return type is inconsistent with other overloads and the actual implementation. This overload specifies
branchandsql_dialectparameters, but the actual implementation (line 365-371) does not accept these parameters. Instead, it acceptsapplication_idanddisable_arrow_compressionlike the other overloads. The parameters should match:application_id: str,disable_arrow_compression: bool = ..., andtimeoutshould be removed if not implemented (see other issue). The V2 API uses a different flow than V1 and does not use branch/sql_dialect parameters.