Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion packages/gooddata-pandas/src/gooddata_pandas/dataframe.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# (C) 2021 GoodData Corporation
from __future__ import annotations

from typing import Callable, Optional, Union
from typing import Callable, Literal, Optional, Union

import pandas
from gooddata_api_client import models
Expand Down Expand Up @@ -259,6 +259,7 @@ def for_created_visualization(
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
is_cancellable: bool = False,
optimized: bool = False,
grand_totals_position: Optional[Literal["pinnedBottom", "pinnedTop", "bottom", "top"]] = "bottom",
) -> tuple[pandas.DataFrame, DataFrameMetadata]:
"""
Creates a data frame using a created visualization.
Expand All @@ -272,6 +273,9 @@ def for_created_visualization(
headers in memory as lists of dicts, which can consume a lot of memory for large results.
Optimized accumulator stores only unique values and story only reference to them in the list,
which can significantly reduce memory usage.
grand_totals_position (Literal["pinnedBottom", "pinnedTop", "bottom", "top"], optional):
Position where grand totals should be placed. "pinnedBottom" and "bottom" append totals,
"pinnedTop" and "top" prepend totals. Defaults to "bottom".

Returns:
pandas.DataFrame: A DataFrame instance.
Expand All @@ -283,6 +287,7 @@ def for_created_visualization(
exec_def=execution_definition,
on_execution_submitted=on_execution_submitted,
optimized=optimized,
grand_totals_position=grand_totals_position,
)

def result_cache_metadata_for_exec_result_id(self, result_id: str) -> ResultCacheMetadata:
Expand All @@ -306,6 +311,7 @@ def for_exec_def(
page_size: int = _DEFAULT_PAGE_SIZE,
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
optimized: bool = False,
grand_totals_position: Optional[Literal["pinnedBottom", "pinnedTop", "bottom", "top"]] = "bottom",
) -> tuple[pandas.DataFrame, DataFrameMetadata]:
"""
Creates a data frame using an execution definition.
Expand Down Expand Up @@ -342,6 +348,9 @@ def for_exec_def(
headers in memory as lists of dicts, which can consume a lot of memory for large results.
Optimized accumulator stores only unique values and story only reference to them in the list,
which can significantly reduce memory usage.
grand_totals_position (Literal["pinnedBottom", "pinnedTop", "bottom", "top"], optional):
Position where grand totals should be placed. "pinnedBottom" and "bottom" append totals,
"pinnedTop" and "top" prepend totals. Defaults to "bottom".

Returns:
Tuple[pandas.DataFrame, DataFrameMetadata]: Tuple holding DataFrame and DataFrame metadata.
Expand All @@ -363,6 +372,7 @@ def for_exec_def(
result_size_bytes_limit=result_size_bytes_limit,
page_size=page_size,
optimized=optimized,
grand_totals_position=grand_totals_position,
)

def for_exec_result_id(
Expand All @@ -376,6 +386,7 @@ def for_exec_result_id(
use_primary_labels_in_attributes: bool = False,
page_size: int = _DEFAULT_PAGE_SIZE,
optimized: bool = False,
grand_totals_position: Optional[Literal["pinnedBottom", "pinnedTop", "bottom", "top"]] = "bottom",
) -> tuple[pandas.DataFrame, DataFrameMetadata]:
"""
Retrieves a DataFrame and DataFrame metadata for a given execution result identifier.
Expand Down Expand Up @@ -410,6 +421,9 @@ def for_exec_result_id(
headers in memory as lists of dicts, which can consume a lot of memory for large results.
Optimized accumulator stores only unique values and story only reference to them in the list,
which can significantly reduce memory usage.
grand_totals_position (Literal["pinnedBottom", "pinnedTop", "bottom", "top"], optional):
Position where grand totals should be placed. "pinnedBottom" and "bottom" append totals,
"pinnedTop" and "top" prepend totals. Defaults to "bottom".

Returns:
Tuple[pandas.DataFrame, DataFrameMetadata]: Tuple holding DataFrame and DataFrame metadata.
Expand All @@ -436,4 +450,5 @@ def for_exec_result_id(
use_primary_labels_in_attributes=use_primary_labels_in_attributes,
page_size=page_size,
optimized=optimized,
grand_totals_position=grand_totals_position,
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from abc import ABC, abstractmethod
from collections.abc import Iterator
from functools import cached_property
from typing import Any, Callable, Optional, Union, cast
from typing import Any, Callable, Literal, Optional, Union, cast

import pandas
from attrs import define, field, frozen
Expand Down Expand Up @@ -698,31 +698,46 @@ def _headers_to_index(
), primary_attribute_labels_mapping


def _merge_grand_totals_into_data(extract: _DataWithHeaders) -> Union[_DataArray, list[_DataArray]]:
def _merge_grand_totals_into_data(
extract: _DataWithHeaders,
grand_totals_position: Optional[Literal["pinnedBottom", "pinnedTop", "bottom", "top"]] = "bottom",
) -> Union[_DataArray, list[_DataArray]]:
"""
Merges grand totals into the extracted data. This function will mutate the extracted data,
extending the rows and columns with grand totals. Going with mutation here so as not to copy arrays around.

Args:
extract (_DataWithHeaders): Extracted data with headers and grand totals.
grand_totals_position (Literal["pinnedBottom", "pinnedTop", "bottom", "top"], optional):
Position where grand totals should be placed. "pinnedBottom" and "bottom" append totals,
"pinnedTop" and "top" prepend totals. Defaults to "bottom".

Returns:
Union[_DataArray, List[_DataArray]]: Mutated data with rows and columns extended with grand totals.
"""
data: list[_DataArray] = extract.data
# Treat None as "bottom" as a fallback
if grand_totals_position is None:
grand_totals_position = "bottom"
# Determine if grand totals should be prepended or appended
should_prepend = grand_totals_position in ("pinnedTop", "top")

if extract.grand_totals[0] is not None:
# column totals are computed into extra rows, one row per column total
# add those rows at the end of the data rows
data.extend(extract.grand_totals[0])
# add those rows at the beginning or end of the data rows based on position
if should_prepend:
data[:0] = extract.grand_totals[0]
else:
data.extend(extract.grand_totals[0])

if extract.grand_totals[1] is not None:
# row totals are computed into extra columns that should be appended to
# existing data rows
# existing data rows (column position doesn't change for row totals)
for row_idx, cols_to_append in enumerate(extract.grand_totals[1]):
data[row_idx].extend(cols_to_append)

return data
return data


def _merge_grand_total_headers_into_headers(extract: _DataWithHeaders) -> _HeadersByAxis:
Expand Down Expand Up @@ -757,6 +772,7 @@ def convert_execution_response_to_dataframe(
use_primary_labels_in_attributes: bool = False,
page_size: int = _DEFAULT_PAGE_SIZE,
optimized: bool = False,
grand_totals_position: Optional[Literal["pinnedBottom", "pinnedTop", "bottom", "top"]] = "bottom",
) -> tuple[pandas.DataFrame, DataFrameMetadata]:
"""
Converts execution result to a pandas dataframe, maintaining the dimensionality of the result.
Expand All @@ -776,6 +792,9 @@ def convert_execution_response_to_dataframe(
headers in memory as lists of dicts, which can consume a lot of memory for large results.
Optimized accumulator stores only unique values and story only reference to them in the list,
which can significantly reduce memory usage.
grand_totals_position (Literal["pinnedBottom", "pinnedTop", "bottom", "top"], optional):
Position where grand totals should be placed. "pinnedBottom" and "bottom" append totals,
"pinnedTop" and "top" prepend totals. Defaults to "bottom".

Returns:
Tuple[pandas.DataFrame, DataFrameMetadata]: A tuple containing the created dataframe and its metadata.
Expand All @@ -789,7 +808,7 @@ def convert_execution_response_to_dataframe(
optimized=optimized,
)

full_data = _merge_grand_totals_into_data(extract)
full_data = _merge_grand_totals_into_data(extract=extract, grand_totals_position=grand_totals_position)
full_headers = _merge_grand_total_headers_into_headers(extract)

index, primary_labels_from_index = _headers_to_index(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# (C) 2022 GoodData Corporation
from pathlib import Path
from typing import Optional
from typing import Literal, Optional

import pytest
from gooddata_pandas import DataFrameFactory
Expand Down Expand Up @@ -31,9 +31,12 @@ def _run_and_validate_results(
expected_column_totals: Optional[list[list[int]]] = None,
page_size: int = 100,
optimized: bool = False,
grand_totals_position: Optional[Literal["pinnedBottom", "pinnedTop", "bottom", "top"]] = "bottom",
) -> str:
# generate dataframe from exec_def
result, result_metadata = gdf.for_exec_def(exec_def=exec_def, page_size=page_size)
result, result_metadata = gdf.for_exec_def(
exec_def=exec_def, page_size=page_size, grand_totals_position=grand_totals_position
)
assert result.values.shape == expected

# use result ID from computation above and generate dataframe just from it
Expand Down
Loading