Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,39 @@
# DataSHIELD Interface Python

This DataSHIELD Client Interface is a Python port of the original DataSHIELD Client Interface written in R ([DSI](https://github.com/datashield/DSI)). The provided interface can be implemented for accessing a data repository supporting the DataSHIELD infrastructure: controlled R commands to be executed on the server side are garanteeing that non disclosive information is returned to client side.

## Configuration

The search path for the DataSHIELD configuration file is the following:

1. User general location: `~/.config/datashield/config.yaml`
2. Current project specific location: `./.datashield/config.yaml`

The configurations are merged: any existing entry is replaced by the new one (for instance server names must be unique).

The format of the DataSHIELD configuration file is:

```yaml
servers:
- name: server1
url: https://opal-demo.obiba.org
user: dsuser
password: P@ssw0rd
- name: server2
url: https://opal.example.org
token: your-access-token-here
profile: default
- name: server3
url: https://study.example.org/opal
user: dsuser
password: P@ssw0rd
profile: custom
driver: datashield_opal.OpalDriver
```

Each server entry in the list must have:
- `name`: Unique identifier for the server
- `url`: The server URL
- Authentication: Either `user` and `password`, or `token` (recommended)
- `profile`: DataSHIELD profile name (optional, defaults to "default")
- `driver`: Connection driver class name (optional, defaults to "datashield_opal.OpalDriver")
1 change: 1 addition & 0 deletions datashield/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datashield.interface import (
DSConnection as DSConnection,
DSConfig as DSConfig,
DSLoginInfo as DSLoginInfo,
DSDriver as DSDriver,
DSError as DSError,
Expand Down
26 changes: 22 additions & 4 deletions datashield/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

import logging
from datashield.interface import DSLoginInfo, DSConnection, DSDriver, DSError
from datashield.interface import DSConfig, DSLoginInfo, DSConnection, DSDriver, DSError
import time


Expand All @@ -12,8 +12,24 @@ class DSLoginBuilder:
Helper class to formalize DataSHIELD login arguments for a set of servers.
"""

def __init__(self):
def __init__(self, names: list[str] = None):
"""Create a builder, optionally loading login information from configuration files
for the specified server names.

:param names: The list of server names to load from configuration files, if any. If not defined,
no login information will be loaded from configuration files.
"""
self.items: list[DSLoginInfo] = []
# load login information from configuration files, in order of precedence
if names is not None and len(names) > 0:
config = DSConfig.load()
name_set = set(names)
if config.servers:
items = [x for x in config.servers if x.name in name_set]
if len(items) == 0:
logging.warning(f"No matching server names found in configuration for: {', '.join(names)}")
else:
self.items.extend(items)

def add(
self,
Expand Down Expand Up @@ -46,7 +62,9 @@ def add(
raise ValueError(f"Server name must be unique: {name}")
if user is None and token is None:
raise ValueError("Either user or token must be provided")
self.items.append(DSLoginInfo(name, url, user, password, token, profile, driver))
self.items.append(
DSLoginInfo(name=name, url=url, user=user, password=password, token=token, profile=profile, driver=driver)
)
return self

def remove(self, name: str):
Expand Down Expand Up @@ -109,7 +127,7 @@ def open(self, restore: str = None, failSafe: bool = False) -> None:
raise e
if self.has_errors():
for name in self.errors:
print(f"Connection to {name} has failed")
logging.error(f"Connection to {name} has failed")

def close(self, save: str = None) -> None:
"""
Expand Down
106 changes: 86 additions & 20 deletions datashield/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,97 @@
"""

import importlib
import logging
import os
import yaml
from pydantic import BaseModel, Field, model_validator

# Default configuration file paths to look for DataSHIELD login information, in order of precedence
CONFIG_FILES = ["~/.config/datashield/config.yaml", "./.datashield/config.yaml"]

class DSLoginInfo:

class DSLoginInfo(BaseModel):
"""
Helper class with DataSHIELD login details.
"""

def __init__(
self,
name: str,
url: str,
user: str = None,
password: str = None,
token: str = None,
profile: str = "default",
driver: str = "datashield_opal.OpalDriver",
):
self.items = []
self.name = name
self.url = url
self.user = user
self.password = password
self.token = token
self.profile = profile if profile is not None else "default"
self.driver = driver if driver is not None else "datashield_opal.OpalDriver"
name: str
url: str
user: str | None = None
password: str | None = None
token: str | None = None
profile: str = "default"
driver: str = "datashield_opal.OpalDriver"

model_config = {"extra": "forbid"}

@model_validator(mode="after")
def validate_credentials(self) -> "DSLoginInfo":
if self.user is None and self.token is None:
raise ValueError("Either user or token must be provided")
return self


class DSConfig(BaseModel):
"""
Helper class with DataSHIELD configuration details.
"""

servers: list[DSLoginInfo] = Field(default_factory=list)

model_config = {"extra": "forbid"}

@classmethod
def load(cls) -> "DSConfig":
"""
Load the DataSHIELD configuration from the default configuration files.
Each file must contain a list of servers with their login details.
All readable configuration files listed in ``CONFIG_FILES`` are processed in
order. Their configurations are merged, with servers identified by their
``name`` field. If the same server name appears in multiple files, the
definition from the later file in the list takes precedence and replaces
the earlier one. Servers that are only present in earlier files are kept.

:return: The DataSHIELD configuration object
"""
merged_config = None
for config_file in CONFIG_FILES:
try:
# check file exists and is readable, if not, silently ignore
path = os.path.expanduser(config_file)
if not os.path.exists(path):
continue
if not os.access(path, os.R_OK):
continue
config = cls.load_from_file(path)
if merged_config is None:
merged_config = config
else:
# merge servers by name, new ones replacing existing ones, and keep the rest of existing ones
existing_servers = {x.name: x for x in merged_config.servers}
for server in config.servers:
existing_servers[server.name] = server
merged_config.servers = list(existing_servers.values())
except Exception:
# log and ignore errors, e.g. file not found or invalid format
logging.error(f"Failed to load login information from {config_file}")
return merged_config if merged_config else cls()

@classmethod
def load_from_file(cls, file: str) -> "DSConfig":
"""
Load the DataSHIELD configuration from a YAML file. The file must contain a list of servers with their login details.

:param file: The path to the YAML file containing the DataSHIELD configuration
:return: The DataSHIELD configuration object
"""
with open(file) as f:
config_data = yaml.safe_load(f)

if config_data is None:
config_data = {}

return cls.model_validate(config_data)


class DSResult:
Expand Down Expand Up @@ -409,7 +475,7 @@ def new_connection(cls, args: DSLoginInfo, restore: str = None) -> DSConnection:
raise NotImplementedError("DSConnection function not available")

@classmethod
def load_class(cls, name: str) -> any:
def load_class(cls, name: str) -> type["DSDriver"]:
"""
Load a class from its fully qualified name (dot separated).

Expand Down
7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "datashield"
version = "0.2.0"
version = "0.3.0"
description = "DataSHIELD Client Interface in Python."
authors = [
{name = "Yannick Marcon", email = "yannick.marcon@obiba.org"}
Expand All @@ -22,7 +22,10 @@ classifiers = [
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]
dependencies = []
dependencies = [
"pydantic>=2.0",
"PyYAML>=6.0",
]

[project.optional-dependencies]
test = [
Expand Down
Loading
Loading