diff --git a/.gitignore b/.gitignore index 7529ac0..fcdeef5 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ subgrounds.egg-info/ # apple .DS_Store +.venv*/ \ No newline at end of file diff --git a/data/curve_swaps.parquet b/data/curve_swaps.parquet new file mode 100644 index 0000000..e912855 Binary files /dev/null and b/data/curve_swaps.parquet differ diff --git a/poetry.lock b/poetry.lock index 5c280f8..a0f51c7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1247,48 +1247,6 @@ files = [ {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"}, ] -[[package]] -name = "numpy" -version = "1.26.0" -description = "Fundamental package for array computing in Python" -category = "main" -optional = false -python-versions = "<3.13,>=3.9" -files = [ - {file = "numpy-1.26.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f8db2f125746e44dce707dd44d4f4efeea8d7e2b43aace3f8d1f235cfa2733dd"}, - {file = "numpy-1.26.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0621f7daf973d34d18b4e4bafb210bbaf1ef5e0100b5fa750bd9cde84c7ac292"}, - {file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51be5f8c349fdd1a5568e72713a21f518e7d6707bcf8503b528b88d33b57dc68"}, - {file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:767254ad364991ccfc4d81b8152912e53e103ec192d1bb4ea6b1f5a7117040be"}, - {file = "numpy-1.26.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:436c8e9a4bdeeee84e3e59614d38c3dbd3235838a877af8c211cfcac8a80b8d3"}, - {file = "numpy-1.26.0-cp310-cp310-win32.whl", hash = "sha256:c2e698cb0c6dda9372ea98a0344245ee65bdc1c9dd939cceed6bb91256837896"}, - {file = "numpy-1.26.0-cp310-cp310-win_amd64.whl", hash = "sha256:09aaee96c2cbdea95de76ecb8a586cb687d281c881f5f17bfc0fb7f5890f6b91"}, - {file = "numpy-1.26.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:637c58b468a69869258b8ae26f4a4c6ff8abffd4a8334c830ffb63e0feefe99a"}, - {file = "numpy-1.26.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:306545e234503a24fe9ae95ebf84d25cba1fdc27db971aa2d9f1ab6bba19a9dd"}, - {file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c6adc33561bd1d46f81131d5352348350fc23df4d742bb246cdfca606ea1208"}, - {file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e062aa24638bb5018b7841977c360d2f5917268d125c833a686b7cbabbec496c"}, - {file = "numpy-1.26.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:546b7dd7e22f3c6861463bebb000646fa730e55df5ee4a0224408b5694cc6148"}, - {file = "numpy-1.26.0-cp311-cp311-win32.whl", hash = "sha256:c0b45c8b65b79337dee5134d038346d30e109e9e2e9d43464a2970e5c0e93229"}, - {file = "numpy-1.26.0-cp311-cp311-win_amd64.whl", hash = "sha256:eae430ecf5794cb7ae7fa3808740b015aa80747e5266153128ef055975a72b99"}, - {file = "numpy-1.26.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:166b36197e9debc4e384e9c652ba60c0bacc216d0fc89e78f973a9760b503388"}, - {file = "numpy-1.26.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f042f66d0b4ae6d48e70e28d487376204d3cbf43b84c03bac57e28dac6151581"}, - {file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5e18e5b14a7560d8acf1c596688f4dfd19b4f2945b245a71e5af4ddb7422feb"}, - {file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f6bad22a791226d0a5c7c27a80a20e11cfe09ad5ef9084d4d3fc4a299cca505"}, - {file = "numpy-1.26.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4acc65dd65da28060e206c8f27a573455ed724e6179941edb19f97e58161bb69"}, - {file = "numpy-1.26.0-cp312-cp312-win32.whl", hash = "sha256:bb0d9a1aaf5f1cb7967320e80690a1d7ff69f1d47ebc5a9bea013e3a21faec95"}, - {file = "numpy-1.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:ee84ca3c58fe48b8ddafdeb1db87388dce2c3c3f701bf447b05e4cfcc3679112"}, - {file = "numpy-1.26.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4a873a8180479bc829313e8d9798d5234dfacfc2e8a7ac188418189bb8eafbd2"}, - {file = "numpy-1.26.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:914b28d3215e0c721dc75db3ad6d62f51f630cb0c277e6b3bcb39519bed10bd8"}, - {file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c78a22e95182fb2e7874712433eaa610478a3caf86f28c621708d35fa4fd6e7f"}, - {file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86f737708b366c36b76e953c46ba5827d8c27b7a8c9d0f471810728e5a2fe57c"}, - {file = "numpy-1.26.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b44e6a09afc12952a7d2a58ca0a2429ee0d49a4f89d83a0a11052da696440e49"}, - {file = "numpy-1.26.0-cp39-cp39-win32.whl", hash = "sha256:5671338034b820c8d58c81ad1dafc0ed5a00771a82fccc71d6438df00302094b"}, - {file = "numpy-1.26.0-cp39-cp39-win_amd64.whl", hash = "sha256:020cdbee66ed46b671429c7265cf00d8ac91c046901c55684954c3958525dab2"}, - {file = "numpy-1.26.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0792824ce2f7ea0c82ed2e4fecc29bb86bee0567a080dacaf2e0a01fe7654369"}, - {file = "numpy-1.26.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d484292eaeb3e84a51432a94f53578689ffdea3f90e10c8b203a99be5af57d8"}, - {file = "numpy-1.26.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:186ba67fad3c60dbe8a3abff3b67a91351100f2661c8e2a80364ae6279720299"}, - {file = "numpy-1.26.0.tar.gz", hash = "sha256:f93fc78fe8bf15afe2b8d6b6499f1c73953169fad1e9a8dd086cdff3190e7fdf"}, -] - [[package]] name = "ordered-set" version = "4.1.0" @@ -1573,6 +1531,43 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "polars" +version = "0.19.12" +description = "Blazingly fast DataFrame library" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "polars-0.19.12-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c308e501805d9ca256b695fd55974c6267f68e05a98ccd14430071a5f9c97be8"}, + {file = "polars-0.19.12-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:9a47da843a425fb4d9aa23f69f6e855f0498cdfb9d35a93c0a5e377594649aac"}, + {file = "polars-0.19.12-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ece4c108c749b213fbba006eb39ffb3703be208121a625369bfe47048d321a0"}, + {file = "polars-0.19.12-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69c9e6919a5113c75e059ba773f22ac1ff78d966ab5663f07dc0dc5ff916246c"}, + {file = "polars-0.19.12-cp38-abi3-win_amd64.whl", hash = "sha256:6efbc14c051bfc188c85ead71197ba0bd2fce7e6131061e28e8b280350d37a35"}, + {file = "polars-0.19.12.tar.gz", hash = "sha256:55b16602bcbf91b5ce888d1a588b47fcd9a35221b1a08c5a27cfe6d7cbd81be2"}, +] + +[package.extras] +adbc = ["adbc_driver_sqlite"] +all = ["polars[adbc,cloudpickle,connectorx,deltalake,fsspec,gevent,matplotlib,numpy,pandas,pyarrow,pydantic,pyiceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]"] +cloudpickle = ["cloudpickle"] +connectorx = ["connectorx"] +deltalake = ["deltalake (>=0.10.0)"] +fsspec = ["fsspec"] +gevent = ["gevent"] +matplotlib = ["matplotlib"] +numpy = ["numpy (>=1.16.0)"] +openpyxl = ["openpyxl (>=3.0.0)"] +pandas = ["pandas", "pyarrow (>=7.0.0)"] +pyarrow = ["pyarrow (>=7.0.0)"] +pydantic = ["pydantic"] +pyiceberg = ["pyiceberg (>=0.5.0)"] +pyxlsb = ["pyxlsb (>=1.0)"] +sqlalchemy = ["pandas", "sqlalchemy"] +timezone = ["backports.zoneinfo", "tzdata"] +xlsx2csv = ["xlsx2csv (>=0.8.0)"] +xlsxwriter = ["xlsxwriter"] + [[package]] name = "prompt-toolkit" version = "3.0.39" @@ -1642,6 +1637,48 @@ files = [ [package.extras] tests = ["pytest"] +[[package]] +name = "pyarrow" +version = "13.0.0" +description = "Python library for Apache Arrow" +category = "main" +optional = true +python-versions = ">=3.8" +files = [ + {file = "pyarrow-13.0.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:1afcc2c33f31f6fb25c92d50a86b7a9f076d38acbcb6f9e74349636109550148"}, + {file = "pyarrow-13.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:70fa38cdc66b2fc1349a082987f2b499d51d072faaa6b600f71931150de2e0e3"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd57b13a6466822498238877892a9b287b0a58c2e81e4bdb0b596dbb151cbb73"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8ce69f7bf01de2e2764e14df45b8404fc6f1a5ed9871e8e08a12169f87b7a26"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:588f0d2da6cf1b1680974d63be09a6530fd1bd825dc87f76e162404779a157dc"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:6241afd72b628787b4abea39e238e3ff9f34165273fad306c7acf780dd850956"}, + {file = "pyarrow-13.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:fda7857e35993673fcda603c07d43889fca60a5b254052a462653f8656c64f44"}, + {file = "pyarrow-13.0.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:aac0ae0146a9bfa5e12d87dda89d9ef7c57a96210b899459fc2f785303dcbb67"}, + {file = "pyarrow-13.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d7759994217c86c161c6a8060509cfdf782b952163569606bb373828afdd82e8"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:868a073fd0ff6468ae7d869b5fc1f54de5c4255b37f44fb890385eb68b68f95d"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51be67e29f3cfcde263a113c28e96aa04362ed8229cb7c6e5f5c719003659d33"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:d1b4e7176443d12610874bb84d0060bf080f000ea9ed7c84b2801df851320295"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:69b6f9a089d116a82c3ed819eea8fe67dae6105f0d81eaf0fdd5e60d0c6e0944"}, + {file = "pyarrow-13.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ab1268db81aeb241200e321e220e7cd769762f386f92f61b898352dd27e402ce"}, + {file = "pyarrow-13.0.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:ee7490f0f3f16a6c38f8c680949551053c8194e68de5046e6c288e396dccee80"}, + {file = "pyarrow-13.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e3ad79455c197a36eefbd90ad4aa832bece7f830a64396c15c61a0985e337287"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68fcd2dc1b7d9310b29a15949cdd0cb9bc34b6de767aff979ebf546020bf0ba0"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc6fd330fd574c51d10638e63c0d00ab456498fc804c9d01f2a61b9264f2c5b2"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:e66442e084979a97bb66939e18f7b8709e4ac5f887e636aba29486ffbf373763"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:0f6eff839a9e40e9c5610d3ff8c5bdd2f10303408312caf4c8003285d0b49565"}, + {file = "pyarrow-13.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b30a27f1cddf5c6efcb67e598d7823a1e253d743d92ac32ec1eb4b6a1417867"}, + {file = "pyarrow-13.0.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:09552dad5cf3de2dc0aba1c7c4b470754c69bd821f5faafc3d774bedc3b04bb7"}, + {file = "pyarrow-13.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3896ae6c205d73ad192d2fc1489cd0edfab9f12867c85b4c277af4d37383c18c"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6647444b21cb5e68b593b970b2a9a07748dd74ea457c7dadaa15fd469c48ada1"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47663efc9c395e31d09c6aacfa860f4473815ad6804311c5433f7085415d62a7"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:b9ba6b6d34bd2563345488cf444510588ea42ad5613df3b3509f48eb80250afd"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:d00d374a5625beeb448a7fa23060df79adb596074beb3ddc1838adb647b6ef09"}, + {file = "pyarrow-13.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:c51afd87c35c8331b56f796eff954b9c7f8d4b7fef5903daf4e05fcf017d23a8"}, + {file = "pyarrow-13.0.0.tar.gz", hash = "sha256:83333726e83ed44b0ac94d8d7a21bbdee4a05029c3b1e8db58a863eec8fd8a33"}, +] + +[package.dependencies] +numpy = ">=1.16.6" + [[package]] name = "pycparser" version = "2.21" @@ -2532,8 +2569,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p all = ["dash", "plotly"] dash = ["dash"] plotly = ["plotly"] +polars = ["polars", "pyarrow"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "21fc81a15dd3662c4004c2bc8ac16b58bc59b25edfa36b749000c3179c9f7aac" +content-hash = "aa4fecc61b83354a2eb799651a77af5d325eff64f10ede5a9cc1525de7dda52a" diff --git a/pyproject.toml b/pyproject.toml index 6daf7b8..61a11bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,10 +20,13 @@ dash = { version = "^2.3.1", optional = true } plotly = { version = "^5.14.1", optional = true } httpx = { extras = ["http2"], version = "^0.24.1" } pytest-asyncio = "^0.21.0" +polars = "^0.19.12" +pyarrow = { version = "^13.0.0", optional = true } [tool.poetry.extras] dash = ["dash"] plotly = ["plotly"] +polars = ["polars", "pyarrow"] all = ["dash", "plotly"] # https://python-poetry.org/docs/managing-dependencies/#dependency-groups diff --git a/subgrounds/contrib/polars/__init__.py b/subgrounds/contrib/polars/__init__.py new file mode 100644 index 0000000..0df4ef8 --- /dev/null +++ b/subgrounds/contrib/polars/__init__.py @@ -0,0 +1,3 @@ +from .client import PolarsSubgrounds + +__all__ = ["PolarsSubgrounds"] diff --git a/subgrounds/contrib/polars/client.py b/subgrounds/contrib/polars/client.py new file mode 100644 index 0000000..849f48e --- /dev/null +++ b/subgrounds/contrib/polars/client.py @@ -0,0 +1,167 @@ +from functools import cached_property +from json import JSONDecodeError +from typing import Any, Type + +import httpx +import polars as pl +from pipe import map, traverse + +from subgrounds.client import SubgroundsBase +from subgrounds.contrib.polars.utils import df_of_json +from subgrounds.errors import GraphQLError, ServerError +from subgrounds.pagination import LegacyStrategy, PaginationStrategy +from subgrounds.query import DataRequest, DataResponse, DocumentResponse +from subgrounds.subgraph import FieldPath, Subgraph +from subgrounds.utils import default_header + +HTTP2_SUPPORT = True + + +class PolarsSubgrounds(SubgroundsBase): + """TODO: Write comment""" + + @cached_property + def _client(self): + """Cached client""" + + return httpx.Client(http2=HTTP2_SUPPORT, timeout=self.timeout) + + def load( + self, + url: str, + save_schema: bool = False, + is_subgraph: bool = True, + ) -> Subgraph: + try: + loader = self._load(url, save_schema, is_subgraph) + url, query = next(loader) # if this fails, schema is loaded from cache + data = self._fetch(url, {"query": query}) + loader.send(data) + + except StopIteration as e: + return e.value + + assert False + + def load_subgraph( + self, url: str, save_schema: bool = False, cache_dir: str | None = None + ) -> Subgraph: + """Performs introspection on the provided GraphQL API ``url`` to get the + schema, stores the schema if ``save_schema`` is ``True`` and returns a + generated class representing the subgraph with all its entities. + + Args: + url The url of the API. + save_schema: Flag indicating whether or not the schema should be cached to + disk. + + Returns: + Subgraph: A generated class representing the subgraph and its entities + """ + + return self.load(url, save_schema, cache_dir, True) + + def _fetch(self, url: str, blob: dict[str, Any]) -> dict[str, Any]: + resp = self._client.post( + url, json=blob, headers=default_header(url) | self.headers + ) + resp.raise_for_status() + + try: + raw_data = resp.json() + + except JSONDecodeError: + raise ServerError( + f"Server ({url}) did not respond with proper JSON" + f"\nDid you query a proper GraphQL endpoint?" + f"\n\n{resp.content}" + ) + + if (data := raw_data.get("data")) is None: + raise GraphQLError(raw_data.get("errors", "Unknown Error(s) Found")) + + return data + + def execute( + self, + req: DataRequest, + pagination_strategy: Type[PaginationStrategy] | None = LegacyStrategy, + ) -> DataResponse: + """Executes a :class:`DataRequest` and returns a :class:`DataResponse`. + + Args: + req: The :class:`DataRequest` object to be executed. + pagination_strategy: A Class implementing the :class:`PaginationStrategy` + ``Protocol``. If ``None``, then automatic pagination is disabled. + Defaults to :class:`LegacyStrategy`. + + Returns: + A :class:`DataResponse` object representing the response + """ + + try: + executor = self._execute(req, pagination_strategy) + + doc = next(executor) + while True: + data = self._fetch( + doc.url, {"query": doc.graphql, "variables": doc.variables} + ) + doc = executor.send(DocumentResponse(url=doc.url, data=data)) + + except StopIteration as e: + return e.value + + def query_json( + self, + fpaths: FieldPath | list[FieldPath], + pagination_strategy: Type[PaginationStrategy] | None = LegacyStrategy, + ) -> list[dict[str, Any]]: + """Equivalent to + ``Subgrounds.execute(Subgrounds.mk_request(fpaths), pagination_strategy)``. + + Args: + fpaths: One or more :class:`FieldPath` objects + that should be included in the request. + pagination_strategy: A class implementing the :class:`PaginationStrategy` + ``Protocol``. If ``None``, then automatic pagination is disabled. + Defaults to :class:`LegacyStrategy`. + + Returns: + The reponse data + """ + + fpaths = list([fpaths] | traverse | map(FieldPath._auto_select) | traverse) + req = self.mk_request(fpaths) + data = self.execute(req, pagination_strategy) + return [doc.data for doc in data.responses] + + def query_df( + self, + fpaths: FieldPath | list[FieldPath], + pagination_strategy: Type[PaginationStrategy] | None = LegacyStrategy, + ) -> pl.DataFrame: + """ + Queries and converts raw GraphQL data to a Polars DataFrame. + + Args: + fpaths: One or more FieldPath objects that + should be included in the request. + pagination_strategy: A class implementing the :class:`PaginationStrategy` + ``Protocol``. If ``None``, then automatic pagination is disabled. + Defaults to :class:`LegacyStrategy`. + parquet_name: The name of the parquet file to write to. + + Returns: + pl.DataFrame: A Polars DataFrame containing the queried data. + """ + + # Query raw GraphQL data + fpaths = list([fpaths] | traverse | map(FieldPath._auto_select) | traverse) + + # TODO: check fpaths for only one entity + + json_data = self.query_json(fpaths, pagination_strategy=pagination_strategy) + data = json_data[0] + + return df_of_json(data) diff --git a/subgrounds/contrib/polars/utils.py b/subgrounds/contrib/polars/utils.py new file mode 100644 index 0000000..85ed623 --- /dev/null +++ b/subgrounds/contrib/polars/utils.py @@ -0,0 +1,119 @@ +from typing import Any + +import polars as pl + + +def df_of_json(data: dict[str, Any]) -> pl.DataFrame: + """Creates a ``pl.DataFrame`` from a JSON `data`. + + Args: + json_data: Response data + + Returns: + A ``pl.DataFrame`` formatted from the response data. + """ + + # TODO: refactor + # Get the first key of the first JSON object. + # This is the key that contains the data. + json_data_key = list(data.keys())[0] + numeric_data = force_numeric(data[json_data_key]) + + graphql_df = pl.from_dicts(numeric_data, infer_schema_length=None) + + # Apply the formatting to the Polars DataFrame + graphql_df = format_array_columns(graphql_df) + return format_dictionary_columns(graphql_df) + + +def format_dictionary_columns(df: pl.DataFrame) -> pl.DataFrame: + """Unnest dictionary values into their own columns, renaming them appropriately. + + Args: + df (pl.DataFrame): Input DataFrame containing dictionary columns. + + Returns: + pl.DataFrame: DataFrame with dictionary values unnested into separate columns. + + Example: + >>> data = { + ... "dict_col": [{"A": 1, "B": 2}, {"A": 3, "B": 4}], + ... "arr_col": [[1, 2, 3], [4, 5, 6]], + ... } + >>> df = pl.DataFrame(data) + >>> result = fmt_dict_cols(df) + >>> print(result) + after test: shape: (2, 3) + ... + (output example here) + + """ + + for column in df.columns: + if len(df[column]) > 0 and isinstance(df[column][0], dict): + col_names = df[column][0].keys() + # Rename struct columns + struct_df = df.select( + pl.col(column).struct.rename_fields( + [f"{column}_{c}" for c in col_names] + ) + ) + struct_df = struct_df.unnest(column) + # Add struct_df columns to df and drop the original column + df = df.with_columns(struct_df).drop(column) + + return df + + +def format_array_columns(df: pl.DataFrame) -> pl.DataFrame: + """Unnest array values into their own columns, renaming them appropriately. + + Args: + df (pl.DataFrame): Input DataFrame containing array columns. + + Returns: + pl.DataFrame: DataFrame with array values unnested into separate columns. + + Example: + >>> data = { + ... "dict_col": [{"A": 1, "B": 2}, {"A": 3, "B": 4}], + ... "arr_col": [[1, 2, 3], [4, 5, 6]], + ... } + >>> df = pl.DataFrame(data) + >>> result = fmt_arr_cols(df) + >>> print(result) + after test: shape: (2, 4) + ... + (output example here) + + """ + + # use this logic if column is a list (rows show up as pl.Series) + for column in df.columns: + if len(df[column]) > 0 and isinstance(df[column][0], pl.Series): + # convert struct to array + struct_df = df.select([pl.col(column).list.to_struct()]) + # rename struct fields + struct_df = struct_df.select( + pl.col(column).struct.rename_fields( + [f"{column}_{i}" for i in range(len(struct_df.shape))] + ) + ) + # unnest struct fields into their own columns + struct_df = struct_df.unnest(column) + # add struct_df columns to df and + df = df.with_columns(struct_df).drop(column) + + return df + + +def force_numeric(json_data: list[str]) -> list[str]: + # scan all keys. If one of the keys is timestamp or blockNumber, then leave alone. For any other key that has int values, convert to float + # print(json_data) + + for entry in json_data: + for key, value in entry.items(): + if key != "timestamp" and key != "blockNumber" and isinstance(value, int): + entry[key] = float(value) + + return json_data diff --git a/tests/contrib/polars/test_client.py b/tests/contrib/polars/test_client.py new file mode 100644 index 0000000..8823ecd --- /dev/null +++ b/tests/contrib/polars/test_client.py @@ -0,0 +1,1431 @@ +import polars as pl +from pandas.testing import assert_frame_equal + +from subgrounds import FieldPath, Subgraph +from subgrounds.dataframe_utils import df_of_json +from subgrounds.schema import TypeMeta, TypeRef + + +def test_df_of_json_1(klima_bridged_carbon_subgraph: Subgraph): + expected = pl.DataFrame( + data={ + "carbonOffsets_retirements_value": [1, 2, 3, 4, 5], + "carbonOffsets_tokenAddress": [ + "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640", + "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640", + "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640", + "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640", + "0xc2e9f25be6257c210d7adf0d4cd6e3e881ba25f8", + ], + } + ) + + fpaths = [ + FieldPath( + klima_bridged_carbon_subgraph, + TypeRef.non_null_list("CarbonOffset"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="carbonOffsets", + description="", + args=[], + type=TypeRef.non_null_list(name="CarbonOffset", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="retirements", + description="", + args=[], + type=TypeRef.non_null_list(name="Retirement", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="value", + description="", + args=[], + type=TypeRef.Named(name="BigInt", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + klima_bridged_carbon_subgraph, + TypeRef.non_null_list("CarbonOffset"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="carbonOffsets", + description="", + args=[], + type=TypeRef.non_null_list(name="CarbonOffset", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="tokenAddress", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + ] + + json = { + "carbonOffsets": [ + { + "tokenAddress": "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640", + "retirements": [ + {"value": 1}, + {"value": 2}, + {"value": 3}, + {"value": 4}, + ], + }, + { + "tokenAddress": "0xc2e9f25be6257c210d7adf0d4cd6e3e881ba25f8", + "retirements": [ + {"value": 5}, + ], + }, + { + "tokenAddress": "0x298b7c5e0770d151e4c5cf6cca4dae3a3ffc8e27", + "retirements": [], + }, + ] + } + + assert_frame_equal(df_of_json(json), expected) + + +def test_df_of_json_3(univ3_subgraph: Subgraph): + expected = pl.DataFrame( + data={ + "swaps_timestamp": [ + 1643206992, + 1643206992, + 1643206992, + 1643206992, + 1643206992, + 1643206992, + 1643206992, + 1643206992, + 1643206992, + 1643206992, + ], + "swaps_pool_id": [ + "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640", + "0xc2e9f25be6257c210d7adf0d4cd6e3e881ba25f8", + "0xcf7e21b96a7dae8e1663b5a266fd812cbe973e70", + "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640", + "0x00f59b15dc1fe2e16cde0678d2164fd5ff10e424", + "0x298b7c5e0770d151e4c5cf6cca4dae3a3ffc8e27", + "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640", + "0x11b815efb8f581194ae79006d24e0d814b7697f6", + "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640", + "0x60594a405d53811d3bc4766596efd80fd545a270", + ], + "swaps_token0_symbol": [ + "USDC", + "DAI", + "gOHM", + "USDC", + "STC", + "MIM", + "USDC", + "WETH", + "USDC", + "DAI", + ], + "swaps_token1_symbol": [ + "WETH", + "WETH", + "WETH", + "WETH", + "WETH", + "USDC", + "WETH", + "USDT", + "WETH", + "WETH", + ], + } + ) + + json = [ + { + "swaps": [ + { + "pool": {"id": "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640"}, + "timestamp": 1643206992, + "token0": {"symbol": "USDC"}, + "token1": {"symbol": "WETH"}, + }, + { + "pool": {"id": "0xc2e9f25be6257c210d7adf0d4cd6e3e881ba25f8"}, + "timestamp": 1643206992, + "token0": {"symbol": "DAI"}, + "token1": {"symbol": "WETH"}, + }, + { + "pool": {"id": "0xcf7e21b96a7dae8e1663b5a266fd812cbe973e70"}, + "timestamp": 1643206992, + "token0": {"symbol": "gOHM"}, + "token1": {"symbol": "WETH"}, + }, + { + "pool": {"id": "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640"}, + "timestamp": 1643206992, + "token0": {"symbol": "USDC"}, + "token1": {"symbol": "WETH"}, + }, + { + "pool": {"id": "0x00f59b15dc1fe2e16cde0678d2164fd5ff10e424"}, + "timestamp": 1643206992, + "token0": {"symbol": "STC"}, + "token1": {"symbol": "WETH"}, + }, + { + "pool": {"id": "0x298b7c5e0770d151e4c5cf6cca4dae3a3ffc8e27"}, + "timestamp": 1643206992, + "token0": {"symbol": "MIM"}, + "token1": {"symbol": "USDC"}, + }, + { + "pool": {"id": "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640"}, + "timestamp": 1643206992, + "token0": {"symbol": "USDC"}, + "token1": {"symbol": "WETH"}, + }, + { + "pool": {"id": "0x11b815efb8f581194ae79006d24e0d814b7697f6"}, + "timestamp": 1643206992, + "token0": {"symbol": "WETH"}, + "token1": {"symbol": "USDT"}, + }, + { + "pool": {"id": "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640"}, + "timestamp": 1643206992, + "token0": {"symbol": "USDC"}, + "token1": {"symbol": "WETH"}, + }, + { + "pool": {"id": "0x60594a405d53811d3bc4766596efd80fd545a270"}, + "timestamp": 1643206992, + "token0": {"symbol": "DAI"}, + "token1": {"symbol": "WETH"}, + }, + ] + } + ] + + fpaths = [ + FieldPath( + univ3_subgraph, + TypeRef.non_null_list("Swap", kind="OBJECT"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="swaps", + description="", + args=[], + type=TypeRef.non_null_list("Swap", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="timestamp", + description="", + args=[], + type=TypeRef.Named(name="BigInt", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.non_null_list("Swap", kind="OBJECT"), + TypeRef.Named(name="String", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="swaps", + description="", + args=[], + type=TypeRef.non_null_list("Swap", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="pool", + description="", + args=[], + type=TypeRef.Named(name="Pool", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="id", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.non_null_list("Swap", kind="OBJECT"), + TypeRef.Named(name="String", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="swaps", + description="", + args=[], + type=TypeRef.non_null_list("Swap", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="token0", + description="", + args=[], + type=TypeRef.Named(name="Token", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="symbol", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.non_null_list("Swap", kind="OBJECT"), + TypeRef.Named(name="String", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="swaps", + description="", + args=[], + type=TypeRef.non_null_list("Swap", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="token1", + description="", + args=[], + type=TypeRef.Named(name="Token", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="symbol", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + ] + + assert_frame_equal(df_of_json(json, fpaths), expected) + + +def test_df_of_json_flat_1(klima_bridged_carbon_subgraph: Subgraph): + expected = pl.DataFrame( + data={ + "foo_a": ["hello"], + "foo_b": ["world"], + "foo_c_x": [10], + "bar_d": [20], + "bar_e": [False], + } + ) + + json = [ + { + "foo": {"a": "hello", "b": "world", "c": {"x": 10}}, + "bar": {"d": 20, "e": False}, + } + ] + + fpaths = [ + FieldPath( + klima_bridged_carbon_subgraph, + TypeRef.Named(name="Foo", kind="OBJECT"), + TypeRef.Named(name="String", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="foo", + description="", + args=[], + type=TypeRef.Named(name="Foo", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="a", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + klima_bridged_carbon_subgraph, + TypeRef.Named(name="Foo", kind="OBJECT"), + TypeRef.Named(name="String", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="foo", + description="", + args=[], + type=TypeRef.Named(name="Foo", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="b", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + klima_bridged_carbon_subgraph, + TypeRef.Named(name="Foo", kind="OBJECT"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="foo", + description="", + args=[], + type=TypeRef.Named(name="Foo", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="c", + description="", + args=[], + type=TypeRef.Named(name="C", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="x", + description="", + args=[], + type=TypeRef.Named(name="BigInt", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + klima_bridged_carbon_subgraph, + TypeRef.Named(name="Bar", kind="OBJECT"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="bar", + description="", + args=[], + type=TypeRef.Named(name="Bar", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="d", + description="", + args=[], + type=TypeRef.Named(name="BigInt", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + klima_bridged_carbon_subgraph, + TypeRef.Named(name="Bar", kind="OBJECT"), + TypeRef.Named(name="Boolean", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="bar", + description="", + args=[], + type=TypeRef.Named(name="Bar", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="e", + description="", + args=[], + type=TypeRef.Named(name="Boolean", kind="SCALAR"), + ), + ), + ], + ), + ] + + assert_frame_equal(df_of_json(json, fpaths), expected) + + +def test_df_of_json_flat_2(univ3_subgraph: Subgraph): + expected = pl.DataFrame( + data=[ + { + "token_id": "0xdbdb4d16eda451d0503b854cf79d55697f90c8df", + "token_name": "Alchemix", + "token_symbol": "ALCX", + } + ] + ) + + json = [ + { + "token": { + "id": "0xdbdb4d16eda451d0503b854cf79d55697f90c8df", + "name": "Alchemix", + "symbol": "ALCX", + } + } + ] + + fpaths = [ + FieldPath( + univ3_subgraph, + TypeRef.Named(name="Token", kind="OBJECT"), + TypeRef.Named(name="String", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="token", + description="", + args=[], + type=TypeRef.Named(name="Token", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="id", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.Named(name="Token", kind="OBJECT"), + TypeRef.Named(name="String", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="token", + description="", + args=[], + type=TypeRef.Named(name="Token", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="name", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.Named(name="Token", kind="OBJECT"), + TypeRef.Named(name="String", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="token", + description="", + args=[], + type=TypeRef.Named(name="Token", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="symbol", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + ] + + assert_frame_equal(df_of_json(json, fpaths), expected) + + +def test_df_of_json_sfield(univ3_subgraph: Subgraph): + expected = pl.DataFrame( + data={ + "swaps_timestamp": [ + 1643206992, + 1643206992, + 1643206992, + 1643206992, + 1643206992, + 1643206992, + 1643206992, + 1643206992, + 1643206992, + 1643206992, + ], + "swaps_pool_id": [ + "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640", + "0xc2e9f25be6257c210d7adf0d4cd6e3e881ba25f8", + "0xcf7e21b96a7dae8e1663b5a266fd812cbe973e70", + "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640", + "0x00f59b15dc1fe2e16cde0678d2164fd5ff10e424", + "0x298b7c5e0770d151e4c5cf6cca4dae3a3ffc8e27", + "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640", + "0x11b815efb8f581194ae79006d24e0d814b7697f6", + "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640", + "0x60594a405d53811d3bc4766596efd80fd545a270", + ], + "swaps_token0_symbol": [ + "USDC", + "DAI", + "gOHM", + "USDC", + "STC", + "MIM", + "USDC", + "WETH", + "USDC", + "DAI", + ], + "swaps_token1_symbol": [ + "WETH", + "WETH", + "WETH", + "WETH", + "WETH", + "USDC", + "WETH", + "USDT", + "WETH", + "WETH", + ], + "swaps_price": [ + 2658.5258552452533, + 2671.2712442099437, + 0.514571064347756, + 2653.578717169152, + 509832.97241621936, + 0.9994968933919802, + 2654.7704568332915, + 0.00037697663801254737, + 2661.9363854642856, + 2657.060762848602, + ], + } + ) + + json = [ + { + "swaps": [ + { + "amount0": -233931.515098, + "amount1": 87.99294339622642, + "pool": {"id": "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640"}, + "timestamp": 1643206992, + "token0": {"symbol": "USDC"}, + "token1": {"symbol": "WETH"}, + "price": 2658.5258552452533, + }, + { + "amount0": 2298.7018250845344, + "amount1": -0.8605272976553902, + "pool": {"id": "0xc2e9f25be6257c210d7adf0d4cd6e3e881ba25f8"}, + "timestamp": 1643206992, + "token0": {"symbol": "DAI"}, + "token1": {"symbol": "WETH"}, + "price": 2671.2712442099437, + }, + { + "amount0": -0.4428024474548323, + "amount1": 0.8605272976553902, + "pool": {"id": "0xcf7e21b96a7dae8e1663b5a266fd812cbe973e70"}, + "timestamp": 1643206992, + "token0": {"symbol": "gOHM"}, + "token1": {"symbol": "WETH"}, + "price": 0.514571064347756, + }, + { + "amount0": -1847.299583, + "amount1": 0.6961540545406192, + "pool": {"id": "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640"}, + "timestamp": 1643206992, + "token0": {"symbol": "USDC"}, + "token1": {"symbol": "WETH"}, + "price": 2653.578717169152, + }, + { + "amount0": 791669.33237876, + "amount1": -1.5528013588977019, + "pool": {"id": "0x00f59b15dc1fe2e16cde0678d2164fd5ff10e424"}, + "timestamp": 1643206992, + "token0": {"symbol": "STC"}, + "token1": {"symbol": "WETH"}, + "price": 509832.97241621936, + }, + { + "amount0": -99949.68933919803, + "amount1": 100000.0, + "pool": {"id": "0x298b7c5e0770d151e4c5cf6cca4dae3a3ffc8e27"}, + "timestamp": 1643206992, + "token0": {"symbol": "MIM"}, + "token1": {"symbol": "USDC"}, + "price": 0.9994968933919802, + }, + { + "amount0": -106116.660281, + "amount1": 39.972066137717945, + "pool": {"id": "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640"}, + "timestamp": 1643206992, + "token0": {"symbol": "USDC"}, + "token1": {"symbol": "WETH"}, + "price": 2654.7704568332915, + }, + { + "amount0": 29.51901886792453, + "amount1": -78304.637188, + "pool": {"id": "0x11b815efb8f581194ae79006d24e0d814b7697f6"}, + "timestamp": 1643206992, + "token0": {"symbol": "WETH"}, + "token1": {"symbol": "USDT"}, + "price": 0.00037697663801254737, + }, + { + "amount0": -74534.218793, + "amount1": 28.0, + "pool": {"id": "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640"}, + "timestamp": 1643206992, + "token0": {"symbol": "USDC"}, + "token1": {"symbol": "WETH"}, + "price": 2661.9363854642856, + }, + { + "amount0": -15043.573567, + "amount1": 5.661734867843959, + "pool": {"id": "0x60594a405d53811d3bc4766596efd80fd545a270"}, + "timestamp": 1643206992, + "token0": {"symbol": "DAI"}, + "token1": {"symbol": "WETH"}, + "price": 2657.060762848602, + }, + ] + } + ] + + fpaths = [ + FieldPath( + univ3_subgraph, + TypeRef.non_null_list("Swap", kind="OBJECT"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="swaps", + description="", + args=[], + type=TypeRef.non_null_list("Swap", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="timestamp", + description="", + args=[], + type=TypeRef.Named(name="BigInt", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.non_null_list("Swap", kind="OBJECT"), + TypeRef.Named(name="String", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="swaps", + description="", + args=[], + type=TypeRef.non_null_list("Swap", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="pool", + description="", + args=[], + type=TypeRef.Named(name="Pool", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="id", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.non_null_list("Swap", kind="OBJECT"), + TypeRef.Named(name="String", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="swaps", + description="", + args=[], + type=TypeRef.non_null_list("Swap", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="token0", + description="", + args=[], + type=TypeRef.Named(name="Token", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="symbol", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.non_null_list("Swap", kind="OBJECT"), + TypeRef.Named(name="String", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="swaps", + description="", + args=[], + type=TypeRef.non_null_list("Swap", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="token1", + description="", + args=[], + type=TypeRef.Named(name="Token", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="symbol", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.non_null_list("Swap", kind="OBJECT"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="swaps", + description="", + args=[], + type=TypeRef.non_null_list("Swap", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="price", + description="", + args=[], + type=TypeRef.Named(name="BigDecimal", kind="OBJECT"), + ), + ), + ], + ), + ] + + assert_frame_equal(df_of_json(json, fpaths), expected) + + +def test_df_of_json_6(univ3_subgraph: Subgraph): + expected = pl.DataFrame( + data={ + "pools_id": [ + "0x1d42064fc4beb5f8aaf85f4617ae8b3b5b8bd801", + "0x1d42064fc4beb5f8aaf85f4617ae8b3b5b8bd801", + "0x6c6bc977e13df9b0de53b251522280bb72383700", + "0x6c6bc977e13df9b0de53b251522280bb72383700", + ], + "pools_token0_symbol": ["UNI", "UNI", "DAI", "DAI"], + "pools_token1_symbol": ["WETH", "WETH", "USDC", "USDC"], + "pools_swaps_timestamp": [1643206408, 1643205177, 1643206881, 1643206138], + "pools_swaps_amountUSD": [ + 2261.033938028949777104204086349881, + 35534.09369471071499539246013240419, + 5497.8352603453796463465, + 243989.9166851892808097625, + ], + } + ) + + json = [ + { + "pools": [ + { + "id": "0x1d42064fc4beb5f8aaf85f4617ae8b3b5b8bd801", + "token0": {"symbol": "UNI"}, + "token1": {"symbol": "WETH"}, + "swaps": [ + {"amountUSD": 2261.0339380289497, "timestamp": 1643206408}, + {"amountUSD": 35534.09369471072, "timestamp": 1643205177}, + ], + }, + { + "id": "0x6c6bc977e13df9b0de53b251522280bb72383700", + "token0": {"symbol": "DAI"}, + "token1": {"symbol": "USDC"}, + "swaps": [ + {"amountUSD": 5497.8352603453795, "timestamp": 1643206881}, + {"amountUSD": 243989.9166851893, "timestamp": 1643206138}, + ], + }, + ] + } + ] + + fpaths = [ + FieldPath( + univ3_subgraph, + TypeRef.non_null_list("Pool"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="pools", + description="", + args=[], + type=TypeRef.non_null_list("Pool"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="id", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.non_null_list("Pool"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="pools", + description="", + args=[], + type=TypeRef.non_null_list("Pool"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="token0", + description="", + args=[], + type=TypeRef.Named(name="Token", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="symbol", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.non_null_list("Pool"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="pools", + description="", + args=[], + type=TypeRef.non_null_list("Pool"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="token1", + description="", + args=[], + type=TypeRef.Named(name="Token", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="symbol", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.non_null_list("Pool"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="pools", + description="", + args=[], + type=TypeRef.non_null_list("Pool"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="swaps", + description="", + args=[], + type=TypeRef.non_null_list("Swap", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="timestamp", + description="", + args=[], + type=TypeRef.Named(name="BigInt", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.non_null_list("Pool"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="pools", + description="", + args=[], + type=TypeRef.non_null_list("Pool"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="swaps", + description="", + args=[], + type=TypeRef.non_null_list("Swap", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="amountUSD", + description="", + args=[], + type=TypeRef.Named(name="BigDecimal", kind="OBJECT"), + ), + ), + ], + ), + ] + + assert_frame_equal(df_of_json(json, fpaths), expected) + + +def test_df_of_json_semiflat_1(univ3_subgraph: Subgraph): + expected = pd.DataFrame( + data={ + "token_id": "0xdbdb4d16eda451d0503b854cf79d55697f90c8df", + "token_name": "Alchemix", + "token_symbol": "ALCX", + "token_whitelistPools_id": [ + "0x689b322bf5056487eec7f9b2577cd43a37eb6302", + "0xb80946cd2b4b68bedd769a21ca2f096ead6e0ee8", + ], + } + ) + + json = [ + { + "token": { + "id": "0xdbdb4d16eda451d0503b854cf79d55697f90c8df", + "name": "Alchemix", + "symbol": "ALCX", + "whitelistPools": [ + {"id": "0x689b322bf5056487eec7f9b2577cd43a37eb6302"}, + {"id": "0xb80946cd2b4b68bedd769a21ca2f096ead6e0ee8"}, + ], + } + } + ] + + fpaths = [ + FieldPath( + univ3_subgraph, + TypeRef.Named(name="Token", kind="OBJECT"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="token", + description="", + args=[], + type=TypeRef.Named(name="Token", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="id", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.Named(name="Token", kind="OBJECT"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="token", + description="", + args=[], + type=TypeRef.Named(name="Token", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="name", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.Named(name="Token", kind="OBJECT"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="token", + description="", + args=[], + type=TypeRef.Named(name="Token", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="symbol", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.Named(name="Token", kind="OBJECT"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="token", + description="", + args=[], + type=TypeRef.Named(name="Token", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="whitelistPools", + description="", + args=[], + type=TypeRef.non_null_list("Pool"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="id", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + ] + + assert_frame_equal(df_of_json(json, fpaths), expected) + + +def test_df_of_json_semiflat_2(univ3_subgraph: Subgraph): + expected = pl.DataFrame( + data={ + "pool_token0_symbol": "USDC", + "pool_token1_symbol": "WETH", + "pool_swaps_timestamp": [1620179783, 1620203006, 1620208251, 1620215997], + } + ) + + json = [ + { + "pool": { + "token0": {"symbol": "USDC"}, + "token1": {"symbol": "WETH"}, + "swaps": [ + {"timestamp": 1620179783}, + {"timestamp": 1620203006}, + {"timestamp": 1620208251}, + {"timestamp": 1620215997}, + ], + } + } + ] + + fpaths = [ + FieldPath( + univ3_subgraph, + TypeRef.Named(name="Pool", kind="OBJECT"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="pool", + description="", + args=[], + type=TypeRef.non_null_list("Pool"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="token0", + description="", + args=[], + type=TypeRef.Named(name="Token", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="symbol", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.Named(name="Pool", kind="OBJECT"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="pool", + description="", + args=[], + type=TypeRef.non_null_list("Pool"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="token1", + description="", + args=[], + type=TypeRef.Named(name="Token", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="symbol", + description="", + args=[], + type=TypeRef.Named(name="String", kind="SCALAR"), + ), + ), + ], + ), + FieldPath( + univ3_subgraph, + TypeRef.non_null_list("Pool"), + TypeRef.Named(name="BigInt", kind="SCALAR"), + [ + ( + None, + TypeMeta.FieldMeta( + name="pool", + description="", + args=[], + type=TypeRef.non_null_list("Pool"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="swaps", + description="", + args=[], + type=TypeRef.non_null_list("Swap", kind="OBJECT"), + ), + ), + ( + None, + TypeMeta.FieldMeta( + name="timestamp", + description="", + args=[], + type=TypeRef.Named(name="BigInt", kind="SCALAR"), + ), + ), + ], + ), + ] + + assert_frame_equal(df_of_json(json, fpaths), expected)