From 827cc767ac010f06c76e10819211c7b279cf9ee6 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Fri, 21 Nov 2025 16:18:49 +0100 Subject: [PATCH 01/55] first draft --- pyMBE/storage/base_type.py | 13 ++++++ pyMBE/storage/df_management.py | 68 +++++++++++++++++++++++++++++ pyMBE/storage/instances/particle.py | 25 +++++++++++ pyMBE/storage/templates/particle.py | 50 +++++++++++++++++++++ test.py | 67 ++++++++++++++++++++++++++++ 5 files changed, 223 insertions(+) create mode 100644 pyMBE/storage/base_type.py create mode 100644 pyMBE/storage/instances/particle.py create mode 100644 pyMBE/storage/templates/particle.py create mode 100644 test.py diff --git a/pyMBE/storage/base_type.py b/pyMBE/storage/base_type.py new file mode 100644 index 0000000..e3aaa49 --- /dev/null +++ b/pyMBE/storage/base_type.py @@ -0,0 +1,13 @@ +from pydantic import BaseModel, Field + +class PMBBaseModel(BaseModel): + """ + Base class for all pyMBE models: + - Hard-coded pmb_type in subclasses + """ + + pmb_type: str = Field(frozen=True) + + class Config: + validate_assignment = True + extra = "forbid" diff --git a/pyMBE/storage/df_management.py b/pyMBE/storage/df_management.py index f01b950..2b1e739 100644 --- a/pyMBE/storage/df_management.py +++ b/pyMBE/storage/df_management.py @@ -24,7 +24,75 @@ import logging import warnings +from typing import Dict, Type, Callable +from pyMBE.storage.base_type import PMBBaseModel + class _DFManagement: + """ + Storage: + templates[pmb_type][template_name] = pd.DataFrame + instances[pmb_type][particle_id] = InstanceModel + """ + + def __init__(self): + self.templates: Dict[str, Dict[str, PMBBaseModel]] = {} + self.instances: Dict[str, Dict[int, PMBBaseModel]] = {} + + # ---------------------------------------------------------------------- + # TEMPLATE MANAGEMENT + # ---------------------------------------------------------------------- + def register_template(self, template: PMBBaseModel): + pmb_type = template.pmb_type + template_name = template.name + + if pmb_type not in self.templates: + self.templates[pmb_type] = {} + + if template_name in self.templates[pmb_type]: + raise ValueError( + f"Template '{template_name}' already exists for type '{pmb_type}'." + ) + + self.templates[pmb_type][template_name] = template + + # ---------------------------------------------------------------------- + # INSTANCE MANAGEMENT + # ---------------------------------------------------------------------- + def register_instance(self, instance: PMBBaseModel): + pmb_type = instance.pmb_type + + if not hasattr(instance, "particle_id"): + raise TypeError( + "Instances must define a 'particle_id' field." + ) + + pid = instance.particle_id + template_name = instance.name + + # Check template + if pmb_type not in self.templates: + raise KeyError( + f"No templates registered for pmb_type '{pmb_type}'." + ) + + if template_name not in self.templates[pmb_type]: + raise ValueError( + f"Template '{template_name}' does not exist for type '{pmb_type}'." + ) + + # Check instance dictionary + if pmb_type not in self.instances: + self.instances[pmb_type] = {} + + # Enforce unique particle_id + if pid in self.instances[pmb_type]: + raise ValueError( + f"Duplicate particle_id={pid} for type '{pmb_type}'." + ) + + self.instances[pmb_type][pid] = instance + + class _NumpyEncoder(json.JSONEncoder): """ diff --git a/pyMBE/storage/instances/particle.py b/pyMBE/storage/instances/particle.py new file mode 100644 index 0000000..fa0a8fb --- /dev/null +++ b/pyMBE/storage/instances/particle.py @@ -0,0 +1,25 @@ +# instances/particle.py + +from pydantic import Field, field_validator +from ..base_type import PMBBaseModel + + +class ParticleInstance(PMBBaseModel): + """ + Instantiated particle in the system: + - particle_id is unique + - template_name links to ParticleTemplate + - can override active_state if needed + """ + + pmb_type: str = Field(default="particle", frozen=True) + + particle_id: int + name: str + active_state: str | None = None + + @field_validator("particle_id") + def validate_particle_id(cls, pid): + if pid < 0: + raise ValueError("particle_id must be a non-negative integer.") + return pid diff --git a/pyMBE/storage/templates/particle.py b/pyMBE/storage/templates/particle.py new file mode 100644 index 0000000..d6bccdc --- /dev/null +++ b/pyMBE/storage/templates/particle.py @@ -0,0 +1,50 @@ +from typing import Dict +from pydantic import Field, field_validator + +from ..base_type import PMBBaseModel + + +class ParticleState(PMBBaseModel): + pmb_type: str = Field(default="particle", frozen=True) + + label: str + es_type: int + charge: int + + +class ParticleTemplate(PMBBaseModel): + """ + Template describing the type of particle: + - sigma, epsilon + - allowed states + - template_name = unique string identifier + """ + + pmb_type: str = Field(default="particle", frozen=True) + + name: str + sigma: float + epsilon: float + + states: Dict[str, ParticleState] = Field(default_factory=dict) + default_state: str | None = None + + # ---------------- Validators ----------------- + + @field_validator("default_state") + def validate_default_state(cls, v, values): + if v is None: + return v + if "states" in values and v not in values["states"]: + raise ValueError( + f"default_state '{v}' not found in states " + f"({list(values['states'].keys())})" + ) + return v + + # ---------------- Helpers ----------------- + + def add_state(self, state: ParticleState): + if state.label in self.states: + raise ValueError(f"State '{state.label}' already exists.") + self.states[state.label] = state diff --git a/test.py b/test.py new file mode 100644 index 0000000..b5b02f3 --- /dev/null +++ b/test.py @@ -0,0 +1,67 @@ + +""" +Test script for the pyMBE DFManagement API +with hard-coded pmb_type in templates and instances. +""" + +from pyMBE.storage.df_management import _DFManagement +from pyMBE.storage.templates.particle import ParticleTemplate +from pyMBE.storage.instances.particle import ParticleInstance + + +def main(): + print("=== Testing DFManagement API ===") + + pmb = _DFManagement() + + # ------------------------------------------------------- + # 1. Create two particle templates + # ------------------------------------------------------- + tpl_A = ParticleTemplate( + name="WCA_bead", + sigma=1.0, + epsilon=1.0 + ) + + tpl_B = ParticleTemplate( + name="LJ_bead", + sigma=0.5, + epsilon=2.0 + ) + + # Register templates + print("\nRegistering templates...") + pmb.register_template(tpl_A) + pmb.register_template(tpl_B) + + print("Registered templates:") + for tname in pmb.templates["particle"]: + print(f" - {tname}") + + # ------------------------------------------------------- + # 2. Create particle instances + # ------------------------------------------------------- + inst_1 = ParticleInstance( + particle_id=1, + name="WCA_bead" + ) + inst_2 = ParticleInstance( + particle_id=2, + name="LJ_bead" + ) + + inst_3 = ParticleInstance( + particle_id=3, + name="LJ_bead" + ) + + # Register instances + print("\nRegistering instances...") + pmb.register_instance(inst_1) + pmb.register_instance(inst_2) + pmb.register_instance(inst_3) + print("Registered instances:") + print(pmb.instances) + +if __name__ == "__main__": + main() \ No newline at end of file From 8d8c4602d32f73445b261ea4a4a0c9035de94c55 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Tue, 25 Nov 2025 11:32:32 +0100 Subject: [PATCH 02/55] Database with reactions and states disentangled --- pyMBE/storage/base_type.py | 3 +- pyMBE/storage/df_management.py | 139 +++++++++++++++++----------- pyMBE/storage/instances/particle.py | 15 +-- pyMBE/storage/templates/particle.py | 43 ++++----- test.py | 113 +++++++++++----------- 5 files changed, 168 insertions(+), 145 deletions(-) diff --git a/pyMBE/storage/base_type.py b/pyMBE/storage/base_type.py index e3aaa49..ef5f829 100644 --- a/pyMBE/storage/base_type.py +++ b/pyMBE/storage/base_type.py @@ -7,7 +7,8 @@ class PMBBaseModel(BaseModel): """ pmb_type: str = Field(frozen=True) - + name: str + class Config: validate_assignment = True extra = "forbid" diff --git a/pyMBE/storage/df_management.py b/pyMBE/storage/df_management.py index 2b1e739..ca7357c 100644 --- a/pyMBE/storage/df_management.py +++ b/pyMBE/storage/df_management.py @@ -25,7 +25,10 @@ import warnings from typing import Dict, Type, Callable -from pyMBE.storage.base_type import PMBBaseModel +from pyMBE.storage.templates.particle import ParticleTemplate +from pyMBE.storage.instances.particle import ParticleInstance +from pyMBE.storage.reactions.reaction import Reaction + class _DFManagement: """ @@ -35,63 +38,93 @@ class _DFManagement: """ def __init__(self): - self.templates: Dict[str, Dict[str, PMBBaseModel]] = {} - self.instances: Dict[str, Dict[int, PMBBaseModel]] = {} + self.templates: Dict[str, ParticleTemplate] = {} + self.instances: Dict[int, ParticleInstance] = {} + self.reactions: Dict[str, Reaction] = {} - # ---------------------------------------------------------------------- + # ---------------------------------------- # TEMPLATE MANAGEMENT - # ---------------------------------------------------------------------- - def register_template(self, template: PMBBaseModel): - pmb_type = template.pmb_type - template_name = template.name - - if pmb_type not in self.templates: - self.templates[pmb_type] = {} - - if template_name in self.templates[pmb_type]: - raise ValueError( - f"Template '{template_name}' already exists for type '{pmb_type}'." - ) + # ---------------------------------------- + def register_template(self, template: ParticleTemplate): + if template.name in self.templates: + raise ValueError(f"Template '{template.name}' already exists.") + self.templates[template.name] = template - self.templates[pmb_type][template_name] = template - - # ---------------------------------------------------------------------- + # ---------------------------------------- # INSTANCE MANAGEMENT - # ---------------------------------------------------------------------- - def register_instance(self, instance: PMBBaseModel): - pmb_type = instance.pmb_type - - if not hasattr(instance, "particle_id"): - raise TypeError( - "Instances must define a 'particle_id' field." - ) - + # ---------------------------------------- + def register_instance(self, instance: ParticleInstance): pid = instance.particle_id - template_name = instance.name - - # Check template - if pmb_type not in self.templates: - raise KeyError( - f"No templates registered for pmb_type '{pmb_type}'." - ) - - if template_name not in self.templates[pmb_type]: - raise ValueError( - f"Template '{template_name}' does not exist for type '{pmb_type}'." - ) - - # Check instance dictionary - if pmb_type not in self.instances: - self.instances[pmb_type] = {} - - # Enforce unique particle_id - if pid in self.instances[pmb_type]: - raise ValueError( - f"Duplicate particle_id={pid} for type '{pmb_type}'." - ) - - self.instances[pmb_type][pid] = instance - + if pid in self.instances: + raise ValueError(f"Instance with id '{pid}' already exists.") + + # validate particle template + if instance.name not in self.templates: + raise ValueError(f"Particle template '{instance.name}' not found.") + + # validate state + tpl = self.templates[instance.name] + if instance.state_name not in tpl.states: + raise ValueError(f"State '{instance.state_name}' not found for particle '{instance.name}'.") + + self.instances[pid] = instance + + # ---------------------------------------- + # REACTIONS + # ---------------------------------------- + def register_reaction(self, reaction: Reaction): + if reaction.name in self.reactions: + raise ValueError(f"Reaction '{reaction.name}' already exists.") + + # validate participants + for p in reaction.participants: + if p.particle_name not in self.templates: + raise ValueError(f"Unknown particle '{p.particle_name}' in reaction '{reaction.name}'.") + + tpl = self.templates[p.particle_name] + if p.state_name not in tpl.states: + raise ValueError( + f"State '{p.state_name}' not defined for particle '{p.particle_name}'." + ) + + self.reactions[reaction.name] = reaction + + # ---------------------------------------- + # DATAFRAME EXPORT + # ---------------------------------------- + def get_templates_df(self): + rows = [] + for tpl in self.templates.values(): + for sname, st in tpl.states.items(): + rows.append({ + "particle": tpl.name, + "state": sname, + "charge": st.charge, + "es_type": st.es_type + }) + return pd.DataFrame(rows) + + def get_instances_df(self): + rows = [] + for inst in self.instances.values(): + rows.append(inst.model_dump()) + return pd.DataFrame(rows) + + def get_reactions_df(self): + rows = [] + for r in self.reactions.values(): + stoich = { + f"{p.particle_name}:{p.state_name}": p.coefficient + for p in r.participants + } + rows.append({ + "reaction": r.name, + "stoichiometry": stoich, + "constant": r.constant, + "reaction_type": r.reaction_type, + "metadata": r.metadata, + }) + return pd.DataFrame(rows) class _NumpyEncoder(json.JSONEncoder): diff --git a/pyMBE/storage/instances/particle.py b/pyMBE/storage/instances/particle.py index fa0a8fb..68bab71 100644 --- a/pyMBE/storage/instances/particle.py +++ b/pyMBE/storage/instances/particle.py @@ -1,22 +1,15 @@ -# instances/particle.py - +from typing import Optional from pydantic import Field, field_validator from ..base_type import PMBBaseModel class ParticleInstance(PMBBaseModel): """ - Instantiated particle in the system: - - particle_id is unique - - template_name links to ParticleTemplate - - can override active_state if needed + A placed particle within the simulation. """ - - pmb_type: str = Field(default="particle", frozen=True) - + pmb_type: str = "particle" particle_id: int - name: str - active_state: str | None = None + state_name: str @field_validator("particle_id") def validate_particle_id(cls, pid): diff --git a/pyMBE/storage/templates/particle.py b/pyMBE/storage/templates/particle.py index d6bccdc..a8d07d8 100644 --- a/pyMBE/storage/templates/particle.py +++ b/pyMBE/storage/templates/particle.py @@ -1,15 +1,14 @@ -from typing import Dict +from typing import Dict, Literal from pydantic import Field, field_validator from ..base_type import PMBBaseModel class ParticleState(PMBBaseModel): - pmb_type: str = Field(default="particle", frozen=True) - - label: str - es_type: int + pmb_type: Literal["particle_state"] = "particle_state" + name: str # e.g. "HA", "A-", "H+" charge: int + es_type: float # label in espresso class ParticleTemplate(PMBBaseModel): @@ -21,30 +20,22 @@ class ParticleTemplate(PMBBaseModel): """ pmb_type: str = Field(default="particle", frozen=True) - - name: str sigma: float epsilon: float - - states: Dict[str, ParticleState] = Field(default_factory=dict) - default_state: str | None = None + states: Dict[str, ParticleState] = {} # ---------------- Validators ----------------- - @field_validator("default_state") - def validate_default_state(cls, v, values): - if v is None: - return v - if "states" in values and v not in values["states"]: - raise ValueError( - f"default_state '{v}' not found in states " - f"({list(values['states'].keys())})" - ) - return v - - # ---------------- Helpers ----------------- - def add_state(self, state: ParticleState): - if state.label in self.states: - raise ValueError(f"State '{state.label}' already exists.") - self.states[state.label] = state + if state.name in self.states: + raise ValueError(f"State {state.name} already exists in template {self.name}") + self.states[state.name] = state + + @classmethod + def single_state(cls, name: str, charge: int, es_type: str, epsilon: float = 1.0): + """ + Convenience constructor for particles such as H+ that only need one state. + """ + state = ParticleState(name=name, charge=charge, es_type=es_type) + return cls(name=name, epsilon=epsilon, states={name: state}) + diff --git a/test.py b/test.py index b5b02f3..68cd670 100644 --- a/test.py +++ b/test.py @@ -1,67 +1,72 @@ - -""" -Test script for the pyMBE DFManagement API -with hard-coded pmb_type in templates and instances. -""" - +# test.py from pyMBE.storage.df_management import _DFManagement -from pyMBE.storage.templates.particle import ParticleTemplate +from pyMBE.storage.templates.particle import ParticleTemplate, ParticleState from pyMBE.storage.instances.particle import ParticleInstance - +from pyMBE.storage.reactions.reaction import Reaction, ReactionParticipant def main(): - print("=== Testing DFManagement API ===") + db = _DFManagement() - pmb = _DFManagement() - # ------------------------------------------------------- - # 1. Create two particle templates - # ------------------------------------------------------- - tpl_A = ParticleTemplate( - name="WCA_bead", - sigma=1.0, - epsilon=1.0 - ) + # ============================================================ + # 1. CREATE PARTICLE TEMPLATES + STATES + # ============================================================ - tpl_B = ParticleTemplate( - name="LJ_bead", - sigma=0.5, - epsilon=2.0 - ) + # A particle (acid) + tpl_A = ParticleTemplate(name="A", sigma=3.5, epsilon=0.2) + tpl_A.add_state(ParticleState(name="HA", charge=0, es_type=0)) + tpl_A.add_state(ParticleState(name="A-", charge=-1, es_type=1)) + + # H+ particle (single-state) + tpl_H = ParticleTemplate(name="H", sigma=3.5, epsilon=0.2) + tpl_H.add_state(ParticleState(name="H+", charge=+1, es_type=2)) # Register templates - print("\nRegistering templates...") - pmb.register_template(tpl_A) - pmb.register_template(tpl_B) - - print("Registered templates:") - for tname in pmb.templates["particle"]: - print(f" - {tname}") - - # ------------------------------------------------------- - # 2. Create particle instances - # ------------------------------------------------------- - inst_1 = ParticleInstance( - particle_id=1, - name="WCA_bead" - ) - inst_2 = ParticleInstance( - particle_id=2, - name="LJ_bead" - ) + db.register_template(tpl_A) + db.register_template(tpl_H) + + # ============================================================ + # 2. CREATE INSTANCES (optional for testing) + # ============================================================ + + inst1 = ParticleInstance(name="A", particle_id=1, state_name="HA") + inst2 = ParticleInstance(name="A", particle_id=2, state_name="A-") + inst3 = ParticleInstance(name="H", particle_id=3, state_name="H+") - inst_3 = ParticleInstance( - particle_id=3, - name="LJ_bead" + db.register_instance(inst1) + db.register_instance(inst2) + db.register_instance(inst3) + + # ============================================================ + # 3. DEFINE A REACTION: HA <-> A- + H+ + # ============================================================ + + rx = Reaction( + name="acid_dissociation", + constant=1e-5, + reaction_type="acid/base", + participants=[ + ReactionParticipant(particle_name="A", state_name="HA", coefficient=-1), + ReactionParticipant(particle_name="A", state_name="A-", coefficient=+1), + ReactionParticipant(particle_name="H", state_name="H+", coefficient=+1), + ], ) - # Register instances - print("\nRegistering instances...") - pmb.register_instance(inst_1) - pmb.register_instance(inst_2) - pmb.register_instance(inst_3) - print("Registered instances:") - print(pmb.instances) - + db.register_reaction(rx) + + # ============================================================ + # 4. PRINT DATAFRAMES + # ============================================================ + + print("\n=== Templates DataFrame ===") + print(db.get_templates_df()) + + print("\n=== Instances DataFrame ===") + print(db.get_instances_df()) + + print("\n=== Reactions DataFrame ===") + print(db.get_reactions_df()) + if __name__ == "__main__": - main() \ No newline at end of file + main() + From 6b336b56d976db3206e623197a37a947c906e79b Mon Sep 17 00:00:00 2001 From: pmblanco Date: Tue, 25 Nov 2025 17:58:58 +0100 Subject: [PATCH 03/55] first complete draft of the database for particles --- pyMBE/storage/df_management.py | 47 ++++++++++------- pyMBE/storage/instances/particle.py | 5 +- pyMBE/storage/pint_quantity.py | 78 +++++++++++++++++++++++++++++ pyMBE/storage/reactions/io.py | 28 +++++++++++ pyMBE/storage/reactions/reaction.py | 33 ++++++++++++ pyMBE/storage/templates/particle.py | 14 +++--- test.py | 73 +++++++++++++++++++++++---- 7 files changed, 242 insertions(+), 36 deletions(-) create mode 100644 pyMBE/storage/pint_quantity.py create mode 100644 pyMBE/storage/reactions/io.py create mode 100644 pyMBE/storage/reactions/reaction.py diff --git a/pyMBE/storage/df_management.py b/pyMBE/storage/df_management.py index ca7357c..157103f 100644 --- a/pyMBE/storage/df_management.py +++ b/pyMBE/storage/df_management.py @@ -37,7 +37,8 @@ class _DFManagement: instances[pmb_type][particle_id] = InstanceModel """ - def __init__(self): + def __init__(self,units): + self.units = units self.templates: Dict[str, ParticleTemplate] = {} self.instances: Dict[int, ParticleInstance] = {} self.reactions: Dict[str, Reaction] = {} @@ -64,8 +65,8 @@ def register_instance(self, instance: ParticleInstance): # validate state tpl = self.templates[instance.name] - if instance.state_name not in tpl.states: - raise ValueError(f"State '{instance.state_name}' not found for particle '{instance.name}'.") + if instance.initial_state not in tpl.states: + raise ValueError(f"State '{instance.initial_state}' not found for particle '{instance.name}'.") self.instances[pid] = instance @@ -76,17 +77,6 @@ def register_reaction(self, reaction: Reaction): if reaction.name in self.reactions: raise ValueError(f"Reaction '{reaction.name}' already exists.") - # validate participants - for p in reaction.participants: - if p.particle_name not in self.templates: - raise ValueError(f"Unknown particle '{p.particle_name}' in reaction '{reaction.name}'.") - - tpl = self.templates[p.particle_name] - if p.state_name not in tpl.states: - raise ValueError( - f"State '{p.state_name}' not defined for particle '{p.particle_name}'." - ) - self.reactions[reaction.name] = reaction # ---------------------------------------- @@ -98,8 +88,12 @@ def get_templates_df(self): for sname, st in tpl.states.items(): rows.append({ "particle": tpl.name, + "sigma": tpl.sigma.to_quantity(self.units), + "epsilon": tpl.epsilon.to_quantity(self.units), + "cutoff": tpl.cutoff.to_quantity(self.units), + "offset": tpl.offset.to_quantity(self.units), "state": sname, - "charge": st.charge, + "z": st.z, "es_type": st.es_type }) return pd.DataFrame(rows) @@ -107,25 +101,42 @@ def get_templates_df(self): def get_instances_df(self): rows = [] for inst in self.instances.values(): - rows.append(inst.model_dump()) + rows.append({ + "pmb_type": inst.pmb_type, + "name": inst.name, + "particle_id": inst.particle_id, + "residue_id": int(inst.residue_id) if inst.residue_id is not None else pd.NA, + "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else pd.NA, + }) return pd.DataFrame(rows) def get_reactions_df(self): rows = [] for r in self.reactions.values(): stoich = { - f"{p.particle_name}:{p.state_name}": p.coefficient + f"{p.state_name}": p.coefficient for p in r.participants } rows.append({ "reaction": r.name, "stoichiometry": stoich, - "constant": r.constant, + "pK": r.pK, "reaction_type": r.reaction_type, "metadata": r.metadata, }) return pd.DataFrame(rows) + def update_particle_instance(self, particle_id, attribute, value): + + if particle_id not in self.instances: + raise KeyError(f"Instance '{particle_id}' not found.") + + allowed = ["initial_state", "residue_id", "molecule_id"] + if attribute not in allowed: + raise ValueError(f"Attribute '{attribute}' not allowed. Allowed attributes: {allowed}") + + self.instances[particle_id] = self.instances[particle_id].model_copy(update={attribute: value}) + class _NumpyEncoder(json.JSONEncoder): """ diff --git a/pyMBE/storage/instances/particle.py b/pyMBE/storage/instances/particle.py index 68bab71..69c34f4 100644 --- a/pyMBE/storage/instances/particle.py +++ b/pyMBE/storage/instances/particle.py @@ -1,6 +1,7 @@ from typing import Optional from pydantic import Field, field_validator from ..base_type import PMBBaseModel +import pandas as pd class ParticleInstance(PMBBaseModel): @@ -9,7 +10,9 @@ class ParticleInstance(PMBBaseModel): """ pmb_type: str = "particle" particle_id: int - state_name: str + initial_state: str + residue_id: int | None = None + molecule_id: int | None = None @field_validator("particle_id") def validate_particle_id(cls, pid): diff --git a/pyMBE/storage/pint_quantity.py b/pyMBE/storage/pint_quantity.py new file mode 100644 index 0000000..e8b5689 --- /dev/null +++ b/pyMBE/storage/pint_quantity.py @@ -0,0 +1,78 @@ +# pyMBE/storage/quantity_field.py +from dataclasses import dataclass +from typing import Any +from pint import UnitRegistry, Quantity + +# dimension -> representative unit used to check dimensionality +_DIMENSION_REPRESENTATIVE = { + "length": "nm", + "energy": "meV", + "dimensionless": "dimensionless", + # extend as needed +} + + +@dataclass +class PintQuantity: + """ + Internal, SI-based stored representation of a Pint quantity. + Stores magnitude and unit string using base/SI units. + """ + + magnitude: float + units: str # string representation of base units (e.g. "meter", "joule") + dimension: str # logical dimension: "length", "energy", ... + + @classmethod + def from_quantity(cls, q: Quantity, expected_dimension: str, ureg: UnitRegistry): + """ + Validate `q` has the expected dimension using the provided ureg, + convert to base units (SI-like) and store magnitude + units. + """ + if not isinstance(q, Quantity): + raise TypeError("from_quantity expects a pint.Quantity") + + # Build a representative unit for the dimension using the provided registry + if expected_dimension not in _DIMENSION_REPRESENTATIVE: + raise ValueError(f"Unknown expected_dimension '{expected_dimension}'") + + rep_unit = ureg(_DIMENSION_REPRESENTATIVE[expected_dimension]) + + # Use pint's dimensionality check + try: + if not q.check(rep_unit): + raise ValueError(f"Quantity {q} does not have expected dimension '{expected_dimension}'") + except Exception as e: + # If check fails because registries differ, try converting via string (best-effort) + raise + + # Use the dimension representative unit + rep_unit_str = _DIMENSION_REPRESENTATIVE[expected_dimension] + rep_unit = ureg(rep_unit_str) + + # Validate dimensionality + if not q.check(rep_unit): + raise ValueError(f"Quantity {q} does not match expected dimension '{expected_dimension}'") + + # Convert to the representative SI unit + q_base = q.to(rep_unit) + + # Store magnitude and unit name + mag = float(q_base.magnitude) + unit_str = rep_unit_str + return cls(magnitude=mag, units=unit_str, dimension=expected_dimension) + + def to_quantity(self, ureg: UnitRegistry) -> Quantity: + """ + Reconstruct a pint.Quantity using the provided UnitRegistry. + The units string should be parseable by ureg. + """ + return self.magnitude * ureg(self.units) + + def to_dict(self) -> dict: + return {"magnitude": self.magnitude, "units": self.units, "dimension": self.dimension} + + @classmethod + def from_dict(cls, d: dict): + return cls(magnitude=d["magnitude"], units=d["units"], dimension=d["dimension"]) + diff --git a/pyMBE/storage/reactions/io.py b/pyMBE/storage/reactions/io.py new file mode 100644 index 0000000..fadb53f --- /dev/null +++ b/pyMBE/storage/reactions/io.py @@ -0,0 +1,28 @@ +import json +from typing import Dict +from .reaction import Reaction, ReactionParticipant + + +def load_reaction_set(path: str) -> Dict[str, Reaction]: + with open(path, "r") as f: + data = json.load(f) + + reactions = {} + for name, rdata in data["data"].items(): + + participants = [ + ReactionParticipant(**p) + for p in rdata["participants"] + ] + + reaction = Reaction( + name=name, + participants=participants, + constant=rdata["constant"], + reaction_type=rdata.get("reaction_type", "acid_base"), + metadata=rdata.get("metadata") + ) + + reactions[name] = reaction + + return reactions diff --git a/pyMBE/storage/reactions/reaction.py b/pyMBE/storage/reactions/reaction.py new file mode 100644 index 0000000..0448e7f --- /dev/null +++ b/pyMBE/storage/reactions/reaction.py @@ -0,0 +1,33 @@ +from typing import List, Dict, Optional +from pydantic import BaseModel, Field, field_validator + + +class ReactionParticipant(BaseModel): + """ + One participant in a reaction. + coefficient < 0 -> reactant + coefficient > 0 -> product + """ + particle_name: str + state_name: str + coefficient: int + +class Reaction(BaseModel): + name: str + participants: List[ReactionParticipant] + pK: float = Field(..., description="pKa, logK, eq constant, etc.") + reaction_type: str = Field(..., description="acid_base, binding, redox, ...") + metadata: Optional[Dict] = None + + @field_validator("participants") + def at_least_two_participants(cls, v): + if len(v) < 2: + raise ValueError("A reaction must have at least 2 participants.") + return v + + @field_validator("participants") + def no_zero_coeff(cls, v): + for p in v: + if p.coefficient == 0: + raise ValueError(f"Participant {p.name} has coefficient 0.") + return v diff --git a/pyMBE/storage/templates/particle.py b/pyMBE/storage/templates/particle.py index a8d07d8..1286680 100644 --- a/pyMBE/storage/templates/particle.py +++ b/pyMBE/storage/templates/particle.py @@ -2,12 +2,12 @@ from pydantic import Field, field_validator from ..base_type import PMBBaseModel - +from ..pint_quantity import PintQuantity class ParticleState(PMBBaseModel): pmb_type: Literal["particle_state"] = "particle_state" name: str # e.g. "HA", "A-", "H+" - charge: int + z: int es_type: float # label in espresso @@ -20,8 +20,10 @@ class ParticleTemplate(PMBBaseModel): """ pmb_type: str = Field(default="particle", frozen=True) - sigma: float - epsilon: float + sigma: PintQuantity + cutoff: PintQuantity + offset: PintQuantity + epsilon: PintQuantity states: Dict[str, ParticleState] = {} # ---------------- Validators ----------------- @@ -32,10 +34,10 @@ def add_state(self, state: ParticleState): self.states[state.name] = state @classmethod - def single_state(cls, name: str, charge: int, es_type: str, epsilon: float = 1.0): + def single_state(cls, name: str, z: int, es_type: str, epsilon: float = 1.0): """ Convenience constructor for particles such as H+ that only need one state. """ - state = ParticleState(name=name, charge=charge, es_type=es_type) + state = ParticleState(name=name, z=z, es_type=es_type) return cls(name=name, epsilon=epsilon, states={name: state}) diff --git a/test.py b/test.py index 68cd670..5e6bd1a 100644 --- a/test.py +++ b/test.py @@ -3,23 +3,44 @@ from pyMBE.storage.templates.particle import ParticleTemplate, ParticleState from pyMBE.storage.instances.particle import ParticleInstance from pyMBE.storage.reactions.reaction import Reaction, ReactionParticipant +from pyMBE.storage.pint_quantity import PintQuantity + +import pint +import scipy.constants def main(): - db = _DFManagement() + + units = pint.UnitRegistry() + unit_length= 0.355*units.nm + temperature = 298.15 * units.K + kB=scipy.constants.k * units.J / units.K + kT=temperature*kB + units.define(f'reduced_energy = {kT} ') + units.define(f'reduced_length = {unit_length}') + + db = _DFManagement(units=units) # ============================================================ # 1. CREATE PARTICLE TEMPLATES + STATES # ============================================================ - + + # A particle (acid) - tpl_A = ParticleTemplate(name="A", sigma=3.5, epsilon=0.2) - tpl_A.add_state(ParticleState(name="HA", charge=0, es_type=0)) - tpl_A.add_state(ParticleState(name="A-", charge=-1, es_type=1)) + tpl_A = ParticleTemplate(name="A", sigma=PintQuantity.from_quantity(q=3.5 * units.reduced_length, expected_dimension="length", ureg=units), + cutoff=PintQuantity.from_quantity(q=4 * units.reduced_length, expected_dimension="length", ureg=units), + offset=PintQuantity.from_quantity(q=0 * units.reduced_length, expected_dimension="length", ureg=units), + epsilon=PintQuantity.from_quantity(q=0.2 * units.reduced_energy, expected_dimension="energy", ureg=units)) + + tpl_A.add_state(ParticleState(name="HA", z=0, es_type=0)) + tpl_A.add_state(ParticleState(name="A-", z=-1, es_type=1)) # H+ particle (single-state) - tpl_H = ParticleTemplate(name="H", sigma=3.5, epsilon=0.2) - tpl_H.add_state(ParticleState(name="H+", charge=+1, es_type=2)) + tpl_H = ParticleTemplate(name="H", sigma=PintQuantity(magnitude=3.5, units="nm", dimension="length"), + cutoff=PintQuantity.from_quantity(q=4 * units.reduced_length, expected_dimension="length", ureg=units), + offset=PintQuantity.from_quantity(q=0 * units.reduced_length, expected_dimension="length", ureg=units), + epsilon=PintQuantity(magnitude=0.2, units="J", dimension="energy")) + tpl_H.add_state(ParticleState(name="H+", z=+1, es_type=2)) # Register templates db.register_template(tpl_A) @@ -29,13 +50,19 @@ def main(): # 2. CREATE INSTANCES (optional for testing) # ============================================================ - inst1 = ParticleInstance(name="A", particle_id=1, state_name="HA") - inst2 = ParticleInstance(name="A", particle_id=2, state_name="A-") - inst3 = ParticleInstance(name="H", particle_id=3, state_name="H+") + inst1 = ParticleInstance(name="A", particle_id=1, initial_state="HA") + inst2 = ParticleInstance(name="A", particle_id=2, initial_state="A-",residue_id=0) + inst3 = ParticleInstance(name="H", particle_id=3, initial_state="H+") db.register_instance(inst1) db.register_instance(inst2) db.register_instance(inst3) + + print("\n=== Instances DataFrame ===") + print(db.get_instances_df()) + + + db.update_particle_instance(particle_id=1, attribute="residue_id", value=int(0)) # ============================================================ # 3. DEFINE A REACTION: HA <-> A- + H+ @@ -43,7 +70,7 @@ def main(): rx = Reaction( name="acid_dissociation", - constant=1e-5, + pK=4.75, reaction_type="acid/base", participants=[ ReactionParticipant(particle_name="A", state_name="HA", coefficient=-1), @@ -67,6 +94,30 @@ def main(): print("\n=== Reactions DataFrame ===") print(db.get_reactions_df()) + # ------------------------- + # Now create a different registry with different reduced unit definitions + # and re-create a DFManager with that registry. The DB still stores SI values, + # so conversions are consistent. + # ------------------------- + ureg2 = pint.UnitRegistry() + # define different reduced units (different numeric size) + unit_length2 = 0.2 * ureg2.nanometer + temperature2 = 310.0 * ureg2.kelvin + kB2 = scipy.constants.k * ureg2.joule / ureg2.kelvin + kT2 = temperature2 * kB2 + ureg2.define(f"reduced_length = {unit_length2}") + ureg2.define(f"reduced_energy = {kT2}") + + # create a new DFManager that uses the same stored templates but different ureg + # For this demo we will copy the stored templates (in real use you would re-load from serialized storage) + db2 = _DFManagement(units=ureg2) + # re-insert templates by transferring stored representation (simulate loading) + for name, tpl_obj in db.templates.items(): + db2.register_template(tpl_obj) # tpl_obj stores SI/base units internally + + print("\nUsing registry 2 (different reduced units):") + print(db2.get_templates_df()) + if __name__ == "__main__": main() From 1e122029efe4819d094401666d49081911ebbad4 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Tue, 2 Dec 2025 17:51:49 +0100 Subject: [PATCH 04/55] add all pmb_types --- pyMBE/storage/df_management.py | 192 ++++++++--- pyMBE/storage/instances/bond.py | 16 + pyMBE/storage/instances/hydrogel.py | 9 + pyMBE/storage/instances/molecule.py | 14 + pyMBE/storage/instances/particle.py | 4 +- pyMBE/storage/instances/peptide.py | 14 + pyMBE/storage/instances/protein.py | 14 + pyMBE/storage/instances/residue.py | 15 + pyMBE/storage/io.py | 499 ++++++++++++++++++++++++++++ pyMBE/storage/pint_quantity.py | 1 + pyMBE/storage/templates/bond.py | 11 + pyMBE/storage/templates/hydrogel.py | 28 ++ pyMBE/storage/templates/molecule.py | 9 + pyMBE/storage/templates/peptide.py | 9 + pyMBE/storage/templates/protein.py | 9 + pyMBE/storage/templates/residue.py | 9 + test.py | 176 +++++++++- 17 files changed, 969 insertions(+), 60 deletions(-) create mode 100644 pyMBE/storage/instances/bond.py create mode 100644 pyMBE/storage/instances/hydrogel.py create mode 100644 pyMBE/storage/instances/molecule.py create mode 100644 pyMBE/storage/instances/peptide.py create mode 100644 pyMBE/storage/instances/protein.py create mode 100644 pyMBE/storage/instances/residue.py create mode 100644 pyMBE/storage/io.py create mode 100644 pyMBE/storage/templates/bond.py create mode 100644 pyMBE/storage/templates/hydrogel.py create mode 100644 pyMBE/storage/templates/molecule.py create mode 100644 pyMBE/storage/templates/peptide.py create mode 100644 pyMBE/storage/templates/protein.py create mode 100644 pyMBE/storage/templates/residue.py diff --git a/pyMBE/storage/df_management.py b/pyMBE/storage/df_management.py index 157103f..fa99cde 100644 --- a/pyMBE/storage/df_management.py +++ b/pyMBE/storage/df_management.py @@ -24,10 +24,25 @@ import logging import warnings -from typing import Dict, Type, Callable +from typing import Dict, Any from pyMBE.storage.templates.particle import ParticleTemplate +from pyMBE.storage.templates.residue import ResidueTemplate +from pyMBE.storage.templates.molecule import MoleculeTemplate +from pyMBE.storage.templates.bond import BondTemplate from pyMBE.storage.instances.particle import ParticleInstance +from pyMBE.storage.instances.residue import ResidueInstance +from pyMBE.storage.instances.molecule import MoleculeInstance +from pyMBE.storage.instances.bond import BondInstance from pyMBE.storage.reactions.reaction import Reaction +from pyMBE.storage.templates.peptide import PeptideTemplate +from pyMBE.storage.instances.peptide import PeptideInstance +from pyMBE.storage.templates.protein import ProteinTemplate +from pyMBE.storage.instances.protein import ProteinInstance +from pyMBE.storage.templates.hydrogel import HydrogelTemplate +from pyMBE.storage.instances.hydrogel import HydrogelInstance + + +TemplateType = Any # union of template classes (ParticleTemplate, ResidueTemplate, ...) class _DFManagement: @@ -39,40 +54,106 @@ class _DFManagement: def __init__(self,units): self.units = units - self.templates: Dict[str, ParticleTemplate] = {} - self.instances: Dict[int, ParticleInstance] = {} + # templates: pmb_type -> (name -> template) + self.templates: Dict[str, Dict[str, TemplateType]] = {} + # instances: pmb_type -> (id -> instance) + self.instances: Dict[str, Dict[int, Any]] = {} self.reactions: Dict[str, Reaction] = {} # ---------------------------------------- # TEMPLATE MANAGEMENT # ---------------------------------------- - def register_template(self, template: ParticleTemplate): - if template.name in self.templates: - raise ValueError(f"Template '{template.name}' already exists.") - self.templates[template.name] = template + def register_template(self, template: TemplateType): + pmb_type = getattr(template, "pmb_type", None) + if pmb_type is None: + # infer from class + if isinstance(template, ParticleTemplate): + pmb_type = "particle" + elif isinstance(template, ResidueTemplate): + pmb_type = "residue" + elif isinstance(template, MoleculeTemplate): + pmb_type = "molecule" + elif isinstance(template, BondTemplate): + pmb_type = "bond" + elif isinstance(template, PeptideTemplate): + pmb_type = "peptide" + elif isinstance(template, ProteinTemplate): + pmb_type = "protein" + elif isinstance(template, HydrogelTemplate): + pmb_type = "hydrogel" + else: + raise TypeError("Unknown template type; set attribute pmb_type or use supported templates") + + self.templates.setdefault(pmb_type, {}) + + if template.name in self.templates[pmb_type]: + raise ValueError(f"Template '{template.name}' exists in '{pmb_type}'") + + # particle templates must define at least one state + if pmb_type == "particle": + if not hasattr(template, "states") or len(template.states) == 0: + raise ValueError("ParticleTemplate must define at least one state.") + # ensure default_state valid if set + if getattr(template, "default_state", None) is not None and template.default_state not in template.states: + raise ValueError("default_state not in template states") + + self.templates[pmb_type][template.name] = template # ---------------------------------------- # INSTANCE MANAGEMENT # ---------------------------------------- def register_instance(self, instance: ParticleInstance): - pid = instance.particle_id - if pid in self.instances: - raise ValueError(f"Instance with id '{pid}' already exists.") + """ + Instance must carry attributes: + - for particle: name (template name), particle_id, state_name + - for residue: name (residue template), residue_id + """ + # infer pmb_type from instance class + if isinstance(instance, ParticleInstance): + pmb_type = "particle" + iid = instance.particle_id + elif isinstance(instance, ResidueInstance): + pmb_type = "residue" + iid = instance.residue_id + elif isinstance(instance, MoleculeInstance): + pmb_type = "molecule" + iid = instance.molecule_id + elif isinstance(instance, PeptideInstance): + pmb_type = "peptide" + iid = instance.molecule_id + elif isinstance(instance, ProteinInstance): + pmb_type = "protein" + iid = instance.molecule_id + elif isinstance(instance, BondInstance): + pmb_type = "bond" + iid = instance.bond_id + elif isinstance(instance, HydrogelInstance): + pmb_type = "hydrogel" + iid = instance.hydrogel_id + else: + raise TypeError("Unsupported instance type") + + self.instances.setdefault(pmb_type, {}) + + if iid in self.instances[pmb_type]: + raise ValueError(f"Instance id {iid} already exists in type '{pmb_type}'") - # validate particle template - if instance.name not in self.templates: - raise ValueError(f"Particle template '{instance.name}' not found.") + # validate template exists + if instance.name not in self.templates.get(pmb_type, {}): + raise ValueError(f"Template '{instance.name}' not found for type '{pmb_type}'") - # validate state - tpl = self.templates[instance.name] - if instance.initial_state not in tpl.states: - raise ValueError(f"State '{instance.initial_state}' not found for particle '{instance.name}'.") + # validate state for particle instances + if pmb_type == "particle": + tpl: ParticleTemplate = self.templates[pmb_type][instance.name] + if instance.initial_state not in tpl.states: + raise ValueError(f"State '{instance.initial_state}' not defined in template '{instance.name}'") - self.instances[pid] = instance + self.instances[pmb_type][iid] = instance # ---------------------------------------- # REACTIONS # ---------------------------------------- + def register_reaction(self, reaction: Reaction): if reaction.name in self.reactions: raise ValueError(f"Reaction '{reaction.name}' already exists.") @@ -82,11 +163,15 @@ def register_reaction(self, reaction: Reaction): # ---------------------------------------- # DATAFRAME EXPORT # ---------------------------------------- - def get_templates_df(self): + + def get_templates_df(self, pmb_type: str = "particle"): rows = [] - for tpl in self.templates.values(): - for sname, st in tpl.states.items(): - rows.append({ + if pmb_type not in self.templates: + return pd.DataFrame(rows) + for tpl in self.templates[pmb_type].values(): + if pmb_type == "particle": + for sname, st in tpl.states.items(): + rows.append({ "particle": tpl.name, "sigma": tpl.sigma.to_quantity(self.units), "epsilon": tpl.epsilon.to_quantity(self.units), @@ -95,21 +180,41 @@ def get_templates_df(self): "state": sname, "z": st.z, "es_type": st.es_type - }) + }) + else: + # Generic representation for other types + rows.append(tpl.model_dump()) return pd.DataFrame(rows) - - def get_instances_df(self): + + def get_instances_df(self, pmb_type: str = "particle"): rows = [] - for inst in self.instances.values(): - rows.append({ - "pmb_type": inst.pmb_type, - "name": inst.name, - "particle_id": inst.particle_id, - "residue_id": int(inst.residue_id) if inst.residue_id is not None else pd.NA, - "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else pd.NA, - }) + if pmb_type not in self.instances: + return pd.DataFrame(rows) + for inst in self.instances[pmb_type].values(): + if pmb_type == "particle": + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "particle_id": inst.particle_id, + "initial_state": inst.initial_state, + "residue_id": int(inst.residue_id) if inst.residue_id is not None else pd.NA, + "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else pd.NA, + }) + elif pmb_type == "residue": + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "residue_id": inst.residue_id, + "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else pd.NA, + }) + else: + # Generic representation for other types + rows.append(inst.model_dump()) + + return pd.DataFrame(rows) + def get_reactions_df(self): rows = [] for r in self.reactions.values(): @@ -126,16 +231,21 @@ def get_reactions_df(self): }) return pd.DataFrame(rows) - def update_particle_instance(self, particle_id, attribute, value): - - if particle_id not in self.instances: - raise KeyError(f"Instance '{particle_id}' not found.") - - allowed = ["initial_state", "residue_id", "molecule_id"] + def update_instance(self, instance_id, pmb_type, attribute, value): + if instance_id not in self.instances[pmb_type]: + raise KeyError(f"Instance '{instance_id}' not found in type '{pmb_type}'.") + + if pmb_type == "particle": + allowed = ["initial_state", "residue_id", "molecule_id"] + elif pmb_type == "residue": + allowed = ["molecule_id"] + else: + allowed = [None] # No attributes allowed for other types + if attribute not in allowed: - raise ValueError(f"Attribute '{attribute}' not allowed. Allowed attributes: {allowed}") + raise ValueError(f"Attribute '{attribute}' not allowed for {pmb_type}. Allowed attributes: {allowed}") - self.instances[particle_id] = self.instances[particle_id].model_copy(update={attribute: value}) + self.instances[pmb_type][instance_id] = self.instances[pmb_type][instance_id].model_copy(update={attribute: value}) class _NumpyEncoder(json.JSONEncoder): diff --git a/pyMBE/storage/instances/bond.py b/pyMBE/storage/instances/bond.py new file mode 100644 index 0000000..cacce1a --- /dev/null +++ b/pyMBE/storage/instances/bond.py @@ -0,0 +1,16 @@ +from pyMBE.storage.base_type import PMBBaseModel +from pydantic import field_validator + +class BondInstance(PMBBaseModel): + pmb_type: str = "bond" + bond_id: int + name : str # bond template name + particle_id1: int + particle_id2: int + + + @field_validator("bond_id") + def validate_bond_id(cls, bid): + if bid < 0: + raise ValueError("bond_id must be a non-negative integer.") + return bid \ No newline at end of file diff --git a/pyMBE/storage/instances/hydrogel.py b/pyMBE/storage/instances/hydrogel.py new file mode 100644 index 0000000..22ffcdd --- /dev/null +++ b/pyMBE/storage/instances/hydrogel.py @@ -0,0 +1,9 @@ +from typing import List +from pydantic import Field +from ..base_type import PMBBaseModel + +class HydrogelInstance(PMBBaseModel): + pmb_type: str = Field(default="hydrogel", frozen=True) + hydrogel_id: int + name: str + molecule_ids: List[str] = Field(default_factory=list) diff --git a/pyMBE/storage/instances/molecule.py b/pyMBE/storage/instances/molecule.py new file mode 100644 index 0000000..2cb33c9 --- /dev/null +++ b/pyMBE/storage/instances/molecule.py @@ -0,0 +1,14 @@ +from pyMBE.storage.base_type import PMBBaseModel +from pydantic import field_validator + + +class MoleculeInstance(PMBBaseModel): + pmb_type: str = "molecule" + name: str # molecule template name + molecule_id: int + + @field_validator("molecule_id") + def validate_residue_id(cls, mid): + if mid < 0: + raise ValueError("molecule_id must be a non-negative integer.") + return mid diff --git a/pyMBE/storage/instances/particle.py b/pyMBE/storage/instances/particle.py index 69c34f4..5cfc4c3 100644 --- a/pyMBE/storage/instances/particle.py +++ b/pyMBE/storage/instances/particle.py @@ -1,7 +1,5 @@ -from typing import Optional -from pydantic import Field, field_validator +from pydantic import field_validator from ..base_type import PMBBaseModel -import pandas as pd class ParticleInstance(PMBBaseModel): diff --git a/pyMBE/storage/instances/peptide.py b/pyMBE/storage/instances/peptide.py new file mode 100644 index 0000000..c1142b9 --- /dev/null +++ b/pyMBE/storage/instances/peptide.py @@ -0,0 +1,14 @@ +from pyMBE.storage.base_type import PMBBaseModel +from pydantic import field_validator + + +class PeptideInstance(PMBBaseModel): + pmb_type: str = "peptide" + name: str # molecule template name + molecule_id: int + + @field_validator("molecule_id") + def validate_residue_id(cls, mid): + if mid < 0: + raise ValueError("molecule_id must be a non-negative integer.") + return mid diff --git a/pyMBE/storage/instances/protein.py b/pyMBE/storage/instances/protein.py new file mode 100644 index 0000000..c21c0b6 --- /dev/null +++ b/pyMBE/storage/instances/protein.py @@ -0,0 +1,14 @@ +from pyMBE.storage.base_type import PMBBaseModel +from pydantic import field_validator + + +class ProteinInstance(PMBBaseModel): + pmb_type: str = "protein" + name: str # molecule template name + molecule_id: int + + @field_validator("molecule_id") + def validate_residue_id(cls, mid): + if mid < 0: + raise ValueError("molecule_id must be a non-negative integer.") + return mid \ No newline at end of file diff --git a/pyMBE/storage/instances/residue.py b/pyMBE/storage/instances/residue.py new file mode 100644 index 0000000..1215052 --- /dev/null +++ b/pyMBE/storage/instances/residue.py @@ -0,0 +1,15 @@ +from pyMBE.storage.base_type import PMBBaseModel +from pydantic import field_validator + + +class ResidueInstance(PMBBaseModel): + pmb_type: str = "residue" + name: str # residue template name + residue_id: int + molecule_id: int | None = None + + @field_validator("residue_id") + def validate_residue_id(cls, rid): + if rid < 0: + raise ValueError("residue_id must be a non-negative integer.") + return rid diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py new file mode 100644 index 0000000..9700c65 --- /dev/null +++ b/pyMBE/storage/io.py @@ -0,0 +1,499 @@ +# pyMBE/storage/io.py +import os +import json +from pathlib import Path +from typing import Any, Dict + +import pandas as pd +from pint import UnitRegistry + +from pyMBE.storage.df_management import _DFManagement +from pyMBE.storage.pint_quantity import PintQuantity +from pyMBE.storage.templates.particle import ParticleTemplate, ParticleState +from pyMBE.storage.templates.residue import ResidueTemplate +from pyMBE.storage.templates.molecule import MoleculeTemplate +from pyMBE.storage.templates.bond import BondTemplate +from pyMBE.storage.instances.particle import ParticleInstance +from pyMBE.storage.instances.residue import ResidueInstance +from pyMBE.storage.instances.molecule import MoleculeInstance +from pyMBE.storage.instances.bond import BondInstance +from pyMBE.storage.reactions.reaction import Reaction, ReactionParticipant +from pyMBE.storage.templates.peptide import PeptideTemplate +from pyMBE.storage.instances.peptide import PeptideInstance +from pyMBE.storage.templates.protein import ProteinTemplate +from pyMBE.storage.instances.protein import ProteinInstance +from pyMBE.storage.templates.hydrogel import HydrogelTemplate, HydrogelNode, HydrogelChain +from pyMBE.storage.instances.hydrogel import HydrogelInstance + +# ---------------------------------------------------------------------- +# Helpers for JSON encode/decode +# ---------------------------------------------------------------------- +def _encode(obj: Any) -> str: + """ + Convert Python object -> JSON string. + - If obj is a PintQuantity (PintQuantity dataclass), convert via to_dict(). + - If obj is a Pydantic model, prefer its model_dump() result. + - Otherwise, json.dumps(obj). + """ + if obj is None: + return "" + # PintQuantity dataclass (has to_dict) + if isinstance(obj, PintQuantity): + return json.dumps(obj.to_dict(), separators=(",", ":"), ensure_ascii=False) + + # If it's already a dict/list/scalar, json-dump it + try: + return json.dumps(obj, separators=(",", ":"), ensure_ascii=False) + except TypeError: + # Last resort: convert to string + return json.dumps(str(obj), separators=(",", ":"), ensure_ascii=False) + + +def _decode(s: Any) -> Any: + """ + Robust JSON decoder: + - Returns None for None/NaN/empty/non-string values + - If s is a JSON string, returns parsed Python object + - If s is already a dict/list/number, returns it unchanged + """ + # None / pandas NA / nan handling + if s is None: + return None + # pandas often gives float('nan') or numpy.nan + if isinstance(s, float): + # NaN -> None + if pd.isna(s): + return None + return s + # If already native Python container + if isinstance(s, (dict, list, int, bool)): + return s + # Must be a string to parse JSON + if not isinstance(s, str): + return None + s_str = s.strip() + if s_str == "" or s_str.lower() == "nan": + return None + try: + return json.loads(s_str) + except Exception: + # If it fails, try to interpret as plain string + return s_str + +# ---------------------------------------------------------------------- +# SAVE +# ---------------------------------------------------------------------- +def save_database_csv(db: _DFManagement, folder: str): + """ + Save all database content to CSV files in the specified folder. + Files produced: + - templates_particle.csv + - templates_residue.csv + - templates_molecule.csv + - templates_bond.csv + - instances_particle.csv + - instances_residue.csv + - instances_molecule.csv + - instances_bond.csv + - reactions.csv + """ + os.makedirs(folder, exist_ok=True) + + # ----------------------------- + # TEMPLATES + # ----------------------------- + for pmb_type, tpl_dict in db.templates.items(): + rows = [] + for tpl in tpl_dict.values(): + # PARTICLE TEMPLATE: explicit custom encoding + if pmb_type == "particle" and isinstance(tpl, ParticleTemplate): + rows.append({ + "name": tpl.name, + "sigma": _encode(tpl.sigma.to_dict()), + "epsilon": _encode(tpl.epsilon.to_dict()), + "cutoff": _encode(tpl.cutoff.to_dict()), + "offset": _encode(tpl.offset.to_dict()), + # states: dict state_name -> ParticleState.model_dump() + "states": _encode({sname: st.model_dump() for sname, st in tpl.states.items()}), + }) + + # RESIDUE TEMPLATE + elif pmb_type == "residue" and isinstance(tpl, ResidueTemplate): + rows.append({ + "name": tpl.name, + "central_bead": tpl.central_bead, + "side_chains": _encode(tpl.side_chains), + }) + + # MOLECULE TEMPLATE + elif pmb_type == "molecule" and isinstance(tpl, MoleculeTemplate): + rows.append({ + "name": tpl.name, + "residue_list": _encode(tpl.residue_list), + }) + + elif pmb_type == "peptide" and isinstance(tpl, PeptideTemplate): + rows.append({ + "name": tpl.name, + "model": tpl.model, + "residue_list": _encode(tpl.residue_list), + "sequence": _encode(tpl.sequence), + }) + elif pmb_type == "protein" and isinstance(tpl, ProteinTemplate): + rows.append({ + "name": tpl.name, + "model": tpl.model, + "residue_list": _encode(tpl.residue_list), + "sequence": _encode(tpl.sequence), + }) + # BOND TEMPLATE + elif pmb_type == "bond" and isinstance(tpl, BondTemplate): + # parameters: dict[str, scalar or PintQuantity] + params_serial = {} + for k, v in tpl.parameters.items(): + if isinstance(v, PintQuantity): + params_serial[k] = v.to_dict() + else: + # assume scalar serializable + params_serial[k] = v + rows.append({ + "name": tpl.name, + "bond_type": tpl.bond_type, + "parameters": _encode(params_serial), + "l0": _encode(tpl.l0.to_dict()), + }) + # HYDROGEL TEMPLATE + elif pmb_type == "hydrogel" and isinstance(tpl, HydrogelTemplate): + rows.append({ + "name": tpl.name, + "node_map": _encode([node.model_dump() for node in tpl.node_map]), + "chain_map": _encode([chain.model_dump() for chain in tpl.chain_map]), + }) + else: + # Generic fallback: try model_dump() + try: + rows.append(tpl.model_dump()) + except Exception: + rows.append({"name": getattr(tpl, "name", None)}) + + df = pd.DataFrame(rows) + df.to_csv(os.path.join(folder, f"templates_{pmb_type}.csv"), index=False) + + # ----------------------------- + # INSTANCES + # ----------------------------- + for pmb_type, inst_dict in db.instances.items(): + rows = [] + for inst in inst_dict.values(): + if pmb_type == "particle" and isinstance(inst, ParticleInstance): + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "particle_id": int(inst.particle_id), + "initial_state": inst.initial_state, + "residue_id": int(inst.residue_id) if inst.residue_id is not None else "", + "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else "", + }) + elif pmb_type == "residue" and isinstance(inst, ResidueInstance): + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "residue_id": int(inst.residue_id), + "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else "", + }) + elif pmb_type == "molecule" and isinstance(inst, MoleculeInstance): + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "molecule_id": int(inst.molecule_id), + }) + elif pmb_type == "peptide" and isinstance(inst, PeptideInstance): + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "molecule_id": int(inst.molecule_id), + }) + elif pmb_type == "protein" and isinstance(inst, ProteinInstance): + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "molecule_id": int(inst.molecule_id), + }) + elif pmb_type == "bond" and isinstance(inst, BondInstance): + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "bond_id": int(inst.bond_id), + "particle_id1": int(inst.particle_id1), + "particle_id2": int(inst.particle_id2), + }) + elif pmb_type == "hydrogel" and isinstance(inst, HydrogelInstance): + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "hydrogel_id": int(inst.hydrogel_id), + "molecule_ids": _encode(inst.molecule_ids), + }) + else: + # fallback to model_dump + try: + rows.append(inst.model_dump()) + except Exception: + rows.append({"name": getattr(inst, "name", None)}) + + df = pd.DataFrame(rows) + df.to_csv(os.path.join(folder, f"instances_{pmb_type}.csv"), index=False) + + # ----------------------------- + # REACTIONS + # ----------------------------- + rows = [] + for rx in db.reactions.values(): + rows.append({ + "name": rx.name, + "participants": _encode([p.model_dump() for p in rx.participants]), + "pK": rx.pK if hasattr(rx, "pK") else None, + "reaction_type": rx.reaction_type, + "metadata": _encode(rx.metadata) if getattr(rx, "metadata", None) is not None else "", + }) + pd.DataFrame(rows).to_csv(os.path.join(folder, "reactions.csv"), index=False) + + +# ---------------------------------------------------------------------- +# LOAD +# ---------------------------------------------------------------------- +def load_database_csv(db: _DFManagement, folder: str): + """ + Load CSV files from folder into the provided _DFManagement instance. + This mutates db.templates, db.instances and db.reactions in place. + + Important: + - The PintQuantity.from_dict(...) returns a stored PintQuantity dataclass + (no registry required). Reconstruction to a pint.Quantity occurs when + users call .to_quantity(db.ureg) on the PintQuantity. + """ + folder = Path(folder) + if not folder.exists(): + raise FileNotFoundError(f"Folder '{folder}' does not exist.") + + # target pmb types we support + pyMBE_types = ["particle", + "residue", + "molecule", + "bond", + "peptide", + "protein", + "hydrogel"] + + # ----------------------------- + # TEMPLATES + # ----------------------------- + for pmb_type in pyMBE_types: + csv_file = folder / f"templates_{pmb_type}.csv" + if not csv_file.exists(): + continue + df = pd.read_csv(csv_file, dtype=str).fillna("") + + templates: Dict[str, Any] = {} + + for _, row in df.iterrows(): + # row values are strings (or empty string) + if pmb_type == "particle": + sigma_d = _decode(row["sigma"]) + epsilon_d = _decode(row["epsilon"]) + cutoff_d = _decode(row["cutoff"]) + offset_d = _decode(row["offset"]) + states_d = _decode(row["states"]) + + sigma = PintQuantity.from_dict(sigma_d) if sigma_d is not None else None + epsilon = PintQuantity.from_dict(epsilon_d) if epsilon_d is not None else None + cutoff = PintQuantity.from_dict(cutoff_d) if cutoff_d is not None else None + offset = PintQuantity.from_dict(offset_d) if offset_d is not None else None + + states: Dict[str, ParticleState] = {} + if isinstance(states_d, dict): + for sname, sdata in states_d.items(): + # sdata expected to be a dict matching ParticleState fields + states[sname] = ParticleState(**sdata) + + tpl = ParticleTemplate( + name=row["name"], + sigma=sigma, + epsilon=epsilon, + cutoff=cutoff, + offset=offset, + states=states, + ) + templates[tpl.name] = tpl + + elif pmb_type == "residue": + sc = _decode(row.get("side_chains", "")) or [] + if not isinstance(sc, list): + sc = list(sc) + tpl = ResidueTemplate( + name=row["name"], + central_bead=row.get("central_bead", ""), + side_chains=sc + ) + templates[tpl.name] = tpl + + elif pmb_type == "molecule": + rl = _decode(row.get("residue_list", "")) or [] + if not isinstance(rl, list): + rl = list(rl) + tpl = MoleculeTemplate( + name=row["name"], + residue_list=rl + ) + templates[tpl.name] = tpl + elif pmb_type == "peptide": + rl = _decode(row.get("residue_list", "")) or [] + if not isinstance(rl, list): + rl = list(rl) + seq = _decode(row.get("sequence", "")) or [] + if not isinstance(seq, list): + seq = list(seq) + tpl = PeptideTemplate( + name=row["name"], + model=row.get("model", ""), + residue_list=rl, + sequence=seq + ) + templates[tpl.name] = tpl + elif pmb_type == "protein": + rl = _decode(row.get("residue_list", "")) or [] + if not isinstance(rl, list): + rl = list(rl) + seq = _decode(row.get("sequence", "")) or [] + if not isinstance(seq, list): + seq = list(seq) + tpl = ProteinTemplate( + name=row["name"], + model=row.get("model", ""), + residue_list=rl, + sequence=seq + ) + templates[tpl.name] = tpl + elif pmb_type == "bond": + params_raw = _decode(row.get("parameters", "")) or {} + parameters: Dict[str, Any] = {} + for k, v in params_raw.items(): + # if v is a dict, assume PintQuantity dict + if isinstance(v, dict) and {"magnitude", "units", "dimension"}.issubset(v.keys()): + parameters[k] = PintQuantity.from_dict(v) + else: + parameters[k] = v + tpl = BondTemplate( + name=row["name"], + bond_type=row.get("bond_type", ""), + parameters=parameters, + l0=PintQuantity.from_dict(_decode(row["l0"]))) + templates[tpl.name] = tpl + elif pmb_type == "hydrogel": + node_map_raw = _decode(row.get("node_map", "")) or [] + chain_map_raw = _decode(row.get("chain_map", "")) or [] + + node_map = [HydrogelNode(**n) for n in node_map_raw if isinstance(n, dict)] + chain_map = [HydrogelChain(**c) for c in chain_map_raw if isinstance(c, dict)] + tpl = HydrogelTemplate( + name=row["name"], + node_map=node_map, + chain_map=chain_map + ) + templates[tpl.name] = tpl + db.templates[pmb_type] = templates + + # ----------------------------- + # INSTANCES + # ----------------------------- + for pmb_type in pyMBE_types: + csv_file = folder / f"instances_{pmb_type}.csv" + if not csv_file.exists(): + continue + df = pd.read_csv(csv_file, dtype=str).fillna("") + + instances: Dict[Any, Any] = {} + + for _, row in df.iterrows(): + if pmb_type == "particle": + # some fields might be empty strings -> map to None + residue_val = row.get("residue_id", "") or "" + molecule_val = row.get("molecule_id", "") or "" + inst = ParticleInstance( + name=row["name"], + particle_id=int(row["particle_id"]), + initial_state=row["initial_state"], + residue_id=None if residue_val == "" else int(residue_val), + molecule_id=None if molecule_val == "" else int(molecule_val), + ) + instances[inst.particle_id] = inst + + elif pmb_type == "residue": + mol_val = row.get("molecule_id", "") or "" + inst = ResidueInstance( + name=row["name"], + residue_id=int(row["residue_id"]), + molecule_id=None if mol_val == "" else int(mol_val), + ) + instances[inst.residue_id] = inst + + elif pmb_type == "molecule": + inst = MoleculeInstance( + name=row["name"], + molecule_id=int(row["molecule_id"]), + ) + instances[inst.molecule_id] = inst + elif pmb_type == "peptide": + inst = PeptideInstance( + name=row["name"], + molecule_id=int(row["molecule_id"]), + ) + instances[inst.molecule_id] = inst + elif pmb_type == "protein": + inst = ProteinInstance( + name=row["name"], + molecule_id=int(row["molecule_id"]), + ) + instances[inst.molecule_id] = inst + elif pmb_type == "bond": + inst = BondInstance( + name=row["name"], + bond_id=int(row["bond_id"]), + particle_id1=int(row["particle_id1"]), + particle_id2=int(row["particle_id2"]), + ) + instances[inst.bond_id] = inst + elif pmb_type == "hydrogel": + mol_ids = _decode(row.get("molecule_ids", "")) or [] + if not isinstance(mol_ids, list): + mol_ids = list(mol_ids) + inst = HydrogelInstance( + name=row["name"], + hydrogel_id=int(row["hydrogel_id"]), + molecule_ids=mol_ids + ) + instances[inst.hydrogel_id] = inst + db.instances[pmb_type] = instances + + # ----------------------------- + # REACTIONS + # ----------------------------- + rx_file = folder / "reactions.csv" + reactions: Dict[str, Reaction] = {} + if rx_file.exists(): + df = pd.read_csv(rx_file, dtype=str).fillna("") + for _, row in df.iterrows(): + participants_raw = _decode(row.get("participants", "")) or [] + participants = [ReactionParticipant(**p) for p in participants_raw] + metadata = _decode(row.get("metadata", "")) or None + rx = Reaction( + name=row["name"], + participants=participants, + pK=float(row["pK"]) if (row.get("pK") not in (None, "", "nan")) else None, + reaction_type=row.get("reaction_type", None), + metadata=metadata + ) + reactions[rx.name] = rx + db.reactions = reactions + + diff --git a/pyMBE/storage/pint_quantity.py b/pyMBE/storage/pint_quantity.py index e8b5689..af07f07 100644 --- a/pyMBE/storage/pint_quantity.py +++ b/pyMBE/storage/pint_quantity.py @@ -7,6 +7,7 @@ _DIMENSION_REPRESENTATIVE = { "length": "nm", "energy": "meV", + "energy/length**2": "meV/nm**2", "dimensionless": "dimensionless", # extend as needed } diff --git a/pyMBE/storage/templates/bond.py b/pyMBE/storage/templates/bond.py new file mode 100644 index 0000000..6d3b5b4 --- /dev/null +++ b/pyMBE/storage/templates/bond.py @@ -0,0 +1,11 @@ +from typing import Dict, Literal +from ..base_type import PMBBaseModel +from ..pint_quantity import PintQuantity + + +class BondTemplate(PMBBaseModel): + pmb_type: Literal["bond"] = "bond" + name: str # e.g. "HARMONIC_default" + bond_type: str # "HARMONIC", "FENE" + parameters: Dict[str, PintQuantity] # k, r0, d_r_max... + l0: PintQuantity # initial bond length diff --git a/pyMBE/storage/templates/hydrogel.py b/pyMBE/storage/templates/hydrogel.py new file mode 100644 index 0000000..166614b --- /dev/null +++ b/pyMBE/storage/templates/hydrogel.py @@ -0,0 +1,28 @@ +from typing import List +from pydantic import Field, BaseModel +from ..base_type import PMBBaseModel + + +class HydrogelNode(BaseModel): + particle_name: str + lattice_index: List[int] # must be length 3 + + +class HydrogelChain(BaseModel): + node_start: str + node_end: str + residue_list: List[str] # list of residue names + + +class HydrogelTemplate(PMBBaseModel): + """ + A hydrogel definition consists of: + - node_map: list of nodes with particle names and lattice positions + - chain_map: list of node-node polymer chains with residue lists + """ + pmb_type: str = Field(default="hydrogel", frozen=True) + name: str + + node_map: List[HydrogelNode] = Field(default_factory=list) + chain_map: List[HydrogelChain] = Field(default_factory=list) + diff --git a/pyMBE/storage/templates/molecule.py b/pyMBE/storage/templates/molecule.py new file mode 100644 index 0000000..7e3c77b --- /dev/null +++ b/pyMBE/storage/templates/molecule.py @@ -0,0 +1,9 @@ +from pyMBE.storage.base_type import PMBBaseModel +from pydantic import Field + +class MoleculeTemplate(PMBBaseModel): + pmb_type: str = Field(default="molecule", frozen=True) + name: str + residue_list: list[str] + + diff --git a/pyMBE/storage/templates/peptide.py b/pyMBE/storage/templates/peptide.py new file mode 100644 index 0000000..9115706 --- /dev/null +++ b/pyMBE/storage/templates/peptide.py @@ -0,0 +1,9 @@ +from pyMBE.storage.base_type import PMBBaseModel +from pydantic import Field + +class PeptideTemplate(PMBBaseModel): + pmb_type: str = Field(default="peptide", frozen=True) + name: str + model: str + residue_list: list[str] + sequence: list[str] \ No newline at end of file diff --git a/pyMBE/storage/templates/protein.py b/pyMBE/storage/templates/protein.py new file mode 100644 index 0000000..f027cda --- /dev/null +++ b/pyMBE/storage/templates/protein.py @@ -0,0 +1,9 @@ +from pyMBE.storage.base_type import PMBBaseModel +from pydantic import Field + +class ProteinTemplate(PMBBaseModel): + pmb_type: str = Field(default="protein", frozen=True) + name: str + model: str + residue_list: list[str] + sequence: list[str] \ No newline at end of file diff --git a/pyMBE/storage/templates/residue.py b/pyMBE/storage/templates/residue.py new file mode 100644 index 0000000..85f6c0b --- /dev/null +++ b/pyMBE/storage/templates/residue.py @@ -0,0 +1,9 @@ +from pyMBE.storage.base_type import PMBBaseModel +from pydantic import Field + +class ResidueTemplate(PMBBaseModel): + pmb_type: str = Field(default="residue", frozen=True) + name: str + central_bead: str + side_chains: list[str] = [] + \ No newline at end of file diff --git a/test.py b/test.py index 5e6bd1a..edc9ee7 100644 --- a/test.py +++ b/test.py @@ -4,6 +4,20 @@ from pyMBE.storage.instances.particle import ParticleInstance from pyMBE.storage.reactions.reaction import Reaction, ReactionParticipant from pyMBE.storage.pint_quantity import PintQuantity +from pyMBE.storage.templates.residue import ResidueTemplate +from pyMBE.storage.instances.residue import ResidueInstance +from pyMBE.storage.templates.molecule import MoleculeTemplate +from pyMBE.storage.instances.molecule import MoleculeInstance +from pyMBE.storage.templates.bond import BondTemplate +from pyMBE.storage.instances.bond import BondInstance +from pyMBE.storage.templates.peptide import PeptideTemplate +from pyMBE.storage.instances.peptide import PeptideInstance +from pyMBE.storage.templates.protein import ProteinTemplate +from pyMBE.storage.instances.protein import ProteinInstance +from pyMBE.storage.templates.hydrogel import HydrogelTemplate, HydrogelNode, HydrogelChain +from pyMBE.storage.instances.hydrogel import HydrogelInstance + +import pyMBE.storage.io as io import pint import scipy.constants @@ -20,23 +34,22 @@ def main(): db = _DFManagement(units=units) - # ============================================================ # 1. CREATE PARTICLE TEMPLATES + STATES # ============================================================ - # A particle (acid) - tpl_A = ParticleTemplate(name="A", sigma=PintQuantity.from_quantity(q=3.5 * units.reduced_length, expected_dimension="length", ureg=units), - cutoff=PintQuantity.from_quantity(q=4 * units.reduced_length, expected_dimension="length", ureg=units), - offset=PintQuantity.from_quantity(q=0 * units.reduced_length, expected_dimension="length", ureg=units), - epsilon=PintQuantity.from_quantity(q=0.2 * units.reduced_energy, expected_dimension="energy", ureg=units)) + tpl_A = ParticleTemplate(name="A", + sigma=PintQuantity.from_quantity(q=3.5 * units.reduced_length, expected_dimension="length", ureg=units), + cutoff=PintQuantity.from_quantity(q=4 * units.reduced_length, expected_dimension="length", ureg=units), + offset=PintQuantity.from_quantity(q=0 * units.reduced_length, expected_dimension="length", ureg=units), + epsilon=PintQuantity.from_quantity(q=0.2 * units.reduced_energy, expected_dimension="energy", ureg=units)) tpl_A.add_state(ParticleState(name="HA", z=0, es_type=0)) tpl_A.add_state(ParticleState(name="A-", z=-1, es_type=1)) # H+ particle (single-state) - tpl_H = ParticleTemplate(name="H", sigma=PintQuantity(magnitude=3.5, units="nm", dimension="length"), + tpl_H = ParticleTemplate(name="H", sigma=PintQuantity.from_quantity(q=3.5 * units.reduced_length, expected_dimension="length", ureg=units), cutoff=PintQuantity.from_quantity(q=4 * units.reduced_length, expected_dimension="length", ureg=units), offset=PintQuantity.from_quantity(q=0 * units.reduced_length, expected_dimension="length", ureg=units), epsilon=PintQuantity(magnitude=0.2, units="J", dimension="energy")) @@ -45,6 +58,62 @@ def main(): # Register templates db.register_template(tpl_A) db.register_template(tpl_H) + print("\n=== Particle Templates DataFrame ===") + print(db.get_templates_df()) + + + tpl_R1 = ResidueTemplate(name="R1", central_bead="A", side_chains=["H","A"]) + tpl_R2 = ResidueTemplate(name="R2", central_bead="HA", side_chains=["H","HA"]) + db.register_template(tpl_R1) + db.register_template(tpl_R2) + print("\n=== Residue Templates DataFrame ===") + print(db.get_templates_df(pmb_type="residue")) + + + tpl_M1 = MoleculeTemplate(name="M1", residue_list=["R1","R2"]) + db.register_template(tpl_M1) + print("\n=== Molecule Templates DataFrame ===") + print(db.get_templates_df(pmb_type="molecule")) + + parameters = {"k": PintQuantity.from_quantity(q=100.0 * units.reduced_energy / (units.reduced_length**2), expected_dimension="energy/length**2", ureg=units), + "r0": PintQuantity.from_quantity(q=1.0 * units.reduced_length, expected_dimension="length", ureg=units),} + + tpl_bond = BondTemplate(name="A1-A2", + bond_type="harmonic", + parameters=parameters, + l0=PintQuantity.from_quantity(q=1.0 * units.reduced_length, + expected_dimension="length", + ureg=units)) + db.register_template(tpl_bond) + print("\n=== Bond Templates DataFrame ===") + print(db.get_templates_df(pmb_type="bond")) + + print("\n=== Peptide Templates DataFrame ===") + tpl_P1 = PeptideTemplate(name="Peptide1", + model="Model1", + residue_list=["R1","R2"], + sequence=["R1","R2"]) + db.register_template(tpl_P1) + print(db.get_templates_df(pmb_type="peptide")) + + print("\n=== Protein Templates DataFrame ===") + tpl_PR1 = ProteinTemplate(name="Protein1", + model="ModelP1", + residue_list=["R1","R2"], + sequence=["R1","R2"]) + db.register_template(tpl_PR1) + print(db.get_templates_df(pmb_type="protein")) + + + print("\n=== Hydrogel Templates DataFrame ===") + node1 = HydrogelNode(particle_name="A", lattice_index=[0,0,0]) + node2 = HydrogelNode(particle_name="HA", lattice_index=[1,0,0]) + chain1 = HydrogelChain(node_start="A", node_end="HA", residue_list=["R1","R2"]) + tpl_HG1 = HydrogelTemplate(name="Hydrogel1", + node_map=[node1, node2], + chain_map=[chain1]) + db.register_template(tpl_HG1) + print(db.get_templates_df(pmb_type="hydrogel")) # ============================================================ # 2. CREATE INSTANCES (optional for testing) @@ -58,11 +127,62 @@ def main(): db.register_instance(inst2) db.register_instance(inst3) - print("\n=== Instances DataFrame ===") + print("\n=== Particle Instances DataFrame ===") print(db.get_instances_df()) - db.update_particle_instance(particle_id=1, attribute="residue_id", value=int(0)) + db.update_instance(pmb_type="particle", instance_id=1, attribute="residue_id", value=int(0)) + print("\n=== Particle Instances DataFrame (after update) ===") + print(db.get_instances_df()) + + inst1 = ResidueInstance(name="R1", + residue_id=1) + inst2 = ResidueInstance(name="R2", + residue_id=2) + inst3 = ResidueInstance(name="R1", + residue_id=3, + molecule_id=0) + + db.register_instance(inst1) + db.register_instance(inst2) + db.register_instance(inst3) + + print("\n=== Residue Instances DataFrame ===") + print(db.get_instances_df(pmb_type="residue")) + + + db.update_instance(pmb_type="residue",instance_id=1, attribute="molecule_id", value=int(0)) + print("\n=== Residue Instances DataFrame (after update)===") + print(db.get_instances_df(pmb_type="residue")) + + + inst1 = MoleculeInstance(name="M1", molecule_id=1) + inst2 = MoleculeInstance(name="M1", molecule_id=2) + db.register_instance(inst1) + db.register_instance(inst2) + print("\n=== Molecule Instances DataFrame ===") + print(db.get_instances_df(pmb_type="molecule")) + + inst_bond = BondInstance(name="A1-A2", bond_id=1, particle_id1=1, particle_id2=2) + db.register_instance(inst_bond) + print("\n=== Bond Instances DataFrame ===") + print(db.get_instances_df(pmb_type="bond")) + + print("\n=== Peptide Instances DataFrame ===") + inst_peptide1 = PeptideInstance(name="Peptide1", molecule_id=3) + db.register_instance(inst_peptide1) + print(db.get_instances_df(pmb_type="peptide")) + + print("\n=== Protein Instances DataFrame ===") + inst_protein1 = ProteinInstance(name="Protein1", molecule_id=4) + db.register_instance(inst_protein1) + print(db.get_instances_df(pmb_type="protein")) + + print("\n=== Hydrogel Instances DataFrame ===") + inst_hydrogel1 = HydrogelInstance(name="Hydrogel1", hydrogel_id=1, molecule_ids=["1","2","3"]) + db.register_instance(inst_hydrogel1) + print(db.get_instances_df(pmb_type="hydrogel")) + # ============================================================ # 3. DEFINE A REACTION: HA <-> A- + H+ @@ -85,9 +205,7 @@ def main(): # 4. PRINT DATAFRAMES # ============================================================ - print("\n=== Templates DataFrame ===") - print(db.get_templates_df()) - + print("\n=== Instances DataFrame ===") print(db.get_instances_df()) @@ -112,11 +230,37 @@ def main(): # For this demo we will copy the stored templates (in real use you would re-load from serialized storage) db2 = _DFManagement(units=ureg2) # re-insert templates by transferring stored representation (simulate loading) - for name, tpl_obj in db.templates.items(): - db2.register_template(tpl_obj) # tpl_obj stores SI/base units internally + for ptype, tdict in db.templates.items(): + for tname, t in tdict.items(): + db2.register_template(t) + + print("\nTemplates shown with registry 2 (different reduced units):") + print(db2.get_templates_df("particle")) + + io.save_database_csv(db, folder="test_db_csv") + + db3 = _DFManagement(units=ureg2) + + io.load_database_csv(db3, folder="test_db_csv") + print("\nLoaded DB3 Templates DataFrame:") + print(db3.get_templates_df("particle")) + print(db3.get_templates_df("residue")) + print(db3.get_templates_df("molecule")) + print(db3.get_templates_df("bond")) + print(db3.get_templates_df("peptide")) + print(db3.get_templates_df("protein")) + print(db3.get_templates_df("hydrogel")) + print("\nLoaded DB3 Instances DataFrame:") + print(db3.get_instances_df("particle")) + print(db3.get_instances_df("residue")) + print(db3.get_instances_df("molecule")) + print(db3.get_instances_df("bond")) + print(db3.get_instances_df("peptide")) + print(db3.get_instances_df("protein")) + print(db3.get_instances_df("hydrogel")) + print("\nLoaded DB3 Reactions DataFrame:") + print(db3.get_reactions_df()) - print("\nUsing registry 2 (different reduced units):") - print(db2.get_templates_df()) if __name__ == "__main__": main() From 308894f1529f483e966de2f8f9d50120864ae743 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Wed, 3 Dec 2025 15:20:13 +0100 Subject: [PATCH 05/55] clean up and document storage code --- pyMBE/storage/instances/hydrogel.py | 54 ++ pyMBE/storage/instances/molecule.py | 50 ++ pyMBE/storage/instances/peptide.py | 49 ++ pyMBE/storage/instances/protein.py | 47 ++ pyMBE/storage/instances/residue.py | 50 ++ pyMBE/storage/io.py | 474 ++++++++++-------- .../storage/{df_management.py => manager.py} | 351 ++++++++----- pyMBE/storage/pint_quantity.py | 82 ++- pyMBE/storage/reactions/io.py | 28 -- pyMBE/storage/reactions/reaction.py | 88 +++- pyMBE/storage/templates/bond.py | 36 ++ pyMBE/storage/templates/hydrogel.py | 47 +- pyMBE/storage/templates/molecule.py | 27 + pyMBE/storage/templates/particle.py | 64 ++- pyMBE/storage/templates/peptide.py | 29 ++ pyMBE/storage/templates/protein.py | 10 + pyMBE/storage/templates/residue.py | 10 + test.py | 130 ++--- 18 files changed, 1164 insertions(+), 462 deletions(-) rename pyMBE/storage/{df_management.py => manager.py} (83%) delete mode 100644 pyMBE/storage/reactions/io.py diff --git a/pyMBE/storage/instances/hydrogel.py b/pyMBE/storage/instances/hydrogel.py index 22ffcdd..410ec71 100644 --- a/pyMBE/storage/instances/hydrogel.py +++ b/pyMBE/storage/instances/hydrogel.py @@ -1,8 +1,62 @@ +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from typing import List from pydantic import Field from ..base_type import PMBBaseModel class HydrogelInstance(PMBBaseModel): + """ + Persistent instance representation of a hydrogel object. + + A ``HydrogelInstance`` stores the high-level composition of a + hydrogel in terms of the constituent polymer chain molecules. + Each hydrogel is assigned a unique integer ID and has a human-readable + name, along with a list of molecule identifiers referencing previously + registered molecule instances. + + This class is intentionally lightweight and fully serializable. + It does **not** store simulation-engine internal objects + (such as lattice builders, Espresso handles, network topologies, etc.). + These are expected to be constructed externally at run time. + + Attributes: + pmb_type (str): + Fixed string identifier for this instance type. Always + ``"hydrogel"``. + hydrogel_id (int): + Unique non-negative integer identifying this hydrogel instance. + name (str): + Human-readable name for the hydrogel (e.g., ``"HG_001"``). + molecule_ids (List[str]): + List of molecule instance IDs that compose the hydrogel. + Each entry must correspond to a valid molecule instance stored + in the database. Defaults to an empty list. + + Notes: + - This class represents the *instance* level (what specific + hydrogel exists in the system), not a template describing generic + hydrogel types. + - The integrity of ``molecule_ids`` (e.g., references to existing + molecule instances) should be validated in the database layer + during creation or update and not inside this class. + """ pmb_type: str = Field(default="hydrogel", frozen=True) hydrogel_id: int name: str diff --git a/pyMBE/storage/instances/molecule.py b/pyMBE/storage/instances/molecule.py index 2cb33c9..521cce3 100644 --- a/pyMBE/storage/instances/molecule.py +++ b/pyMBE/storage/instances/molecule.py @@ -1,8 +1,58 @@ +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from pyMBE.storage.base_type import PMBBaseModel from pydantic import field_validator class MoleculeInstance(PMBBaseModel): + """ + Persistent instance representation of a molecule. + + A ``MoleculeInstance`` links a concrete molecule in the system to a + molecule template (through its ``name``) and assigns it a unique + integer identifier. Molecule instances typically serve as containers + for ordered lists of residue instances, which are managed in the + database layer outside of this class. + + This class is intentionally minimal and fully serializable. It stores + no engine-specific data or structural objects. + + Attributes: + pmb_type (str): + Fixed string identifying this object as a molecule instance. + Always ``"molecule"``. + name (str): + Name of the molecule **template** from which this instance + was created. This must correspond to an existing + ``MoleculeTemplate`` in the database. + molecule_id (int): + Unique non-negative integer identifying this molecule + instance within the database. + + Notes: + - Validation of whether ``name`` corresponds to a registered + molecule template is performed at the database level. + - Structural or connectivity information (e.g., residue ordering) + is maintained outside this class in the instance registry. + """ + pmb_type: str = "molecule" name: str # molecule template name molecule_id: int diff --git a/pyMBE/storage/instances/peptide.py b/pyMBE/storage/instances/peptide.py index c1142b9..64dd368 100644 --- a/pyMBE/storage/instances/peptide.py +++ b/pyMBE/storage/instances/peptide.py @@ -1,8 +1,57 @@ +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from pyMBE.storage.base_type import PMBBaseModel from pydantic import field_validator class PeptideInstance(PMBBaseModel): + """ + Instance of a peptide molecule placed in the simulation. + + ``PeptideInstance`` represents a concrete occurrence of a peptide, + created from a peptide-related template (e.g., a sequence or residue + list defined elsewhere in the database). Each instance corresponds to + one full peptide chain and is identified by a unique ``molecule_id``. + + Attributes: + pmb_type (str): + Fixed string identifying this object as a peptide instance. + Always ``"peptide"``. + name (str): + Name of the peptide template from which this instance was + created. This typically corresponds to a user-defined + peptide type or sequence label. + molecule_id (int): + Unique non-negative integer identifying this peptide within + the database. Assigned sequentially by the database manager + when the instance is created. + + Notes: + - This class only tracks the identity of the peptide instance. + Residues and particles belonging to the peptide reference this + instance through their ``molecule_id`` fields. + - Connectivity (ordering of residues), spatial arrangement, + and bonding interactions are managed separately by the + database or simulation engine. + """ + pmb_type: str = "peptide" name: str # molecule template name molecule_id: int diff --git a/pyMBE/storage/instances/protein.py b/pyMBE/storage/instances/protein.py index c21c0b6..126aa3e 100644 --- a/pyMBE/storage/instances/protein.py +++ b/pyMBE/storage/instances/protein.py @@ -1,8 +1,55 @@ +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from pyMBE.storage.base_type import PMBBaseModel from pydantic import field_validator class ProteinInstance(PMBBaseModel): + """ + Instance of a protein molecule placed in the simulation. + + ``ProteinInstance`` represents a concrete protein object created + from a protein template defined in the database. Each instance + corresponds to one full protein chain and is uniquely identified + by its ``molecule_id``. + + Attributes: + pmb_type (str): + Fixed string identifying this object as a protein instance. + Always ``"protein"``. + name (str): + Name of the protein template from which this instance was + created. This usually corresponds to a user-defined or + imported protein type or sequence identifier. + molecule_id (int): + Unique non-negative integer identifying this protein within + the database. Assigned by the database manager upon creation. + + Notes: + - A ``ProteinInstance`` only records the identity of the protein + and its template association. + - Residues and particles that belong to the protein reference + this instance through their ``molecule_id`` values. + - The structural connectivity (residue sequence, domains) is + handled at the template level or by the builder modules. + """ pmb_type: str = "protein" name: str # molecule template name molecule_id: int diff --git a/pyMBE/storage/instances/residue.py b/pyMBE/storage/instances/residue.py index 1215052..e5d5bcb 100644 --- a/pyMBE/storage/instances/residue.py +++ b/pyMBE/storage/instances/residue.py @@ -1,8 +1,58 @@ +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from pyMBE.storage.base_type import PMBBaseModel from pydantic import field_validator class ResidueInstance(PMBBaseModel): + """ + Instance of a residue placed within a molecule during a simulation. + + ``ResidueInstance`` represents a concrete occurrence of a residue + derived from a residue template. Each instance is uniquely indexed + by ``residue_id`` and may optionally belong to a parent molecule, + such as a peptide, protein, or generic molecule. + + Attributes: + pmb_type (str): + Fixed string identifying this object as a residue instance. + Always ``"residue"``. + name (str): + Name of the residue template from which this instance is derived. + residue_id (int): + Unique non-negative integer identifying this residue instance + within the database. + molecule_id (int | None): + Identifier of the parent molecule to which this residue belongs. + ``None`` indicates that the residue is not assigned to any molecule. + + Notes: + - ``ResidueInstance`` does not itself store particle-level + information; instead, particles reference the residue via + ``residue_id``. + - Residues may be standalone (e.g., in coarse systems) or part of + polymers, proteins, peptides, or hydrogels. + - The sequence ordering and topology of residues are encoded at the + molecule instance/template level, not here. + """ + pmb_type: str = "residue" name: str # residue template name residue_id: int diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py index 9700c65..83eb797 100644 --- a/pyMBE/storage/io.py +++ b/pyMBE/storage/io.py @@ -1,4 +1,22 @@ -# pyMBE/storage/io.py +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + import os import json from pathlib import Path @@ -7,7 +25,6 @@ import pandas as pd from pint import UnitRegistry -from pyMBE.storage.df_management import _DFManagement from pyMBE.storage.pint_quantity import PintQuantity from pyMBE.storage.templates.particle import ParticleTemplate, ParticleState from pyMBE.storage.templates.residue import ResidueTemplate @@ -25,36 +42,21 @@ from pyMBE.storage.templates.hydrogel import HydrogelTemplate, HydrogelNode, HydrogelChain from pyMBE.storage.instances.hydrogel import HydrogelInstance -# ---------------------------------------------------------------------- -# Helpers for JSON encode/decode -# ---------------------------------------------------------------------- -def _encode(obj: Any) -> str: - """ - Convert Python object -> JSON string. - - If obj is a PintQuantity (PintQuantity dataclass), convert via to_dict(). - - If obj is a Pydantic model, prefer its model_dump() result. - - Otherwise, json.dumps(obj). + +def _decode(s: Any) -> Any: """ - if obj is None: - return "" - # PintQuantity dataclass (has to_dict) - if isinstance(obj, PintQuantity): - return json.dumps(obj.to_dict(), separators=(",", ":"), ensure_ascii=False) + Decodes a JSON-like object or string. - # If it's already a dict/list/scalar, json-dump it - try: - return json.dumps(obj, separators=(",", ":"), ensure_ascii=False) - except TypeError: - # Last resort: convert to string - return json.dumps(str(obj), separators=(",", ":"), ensure_ascii=False) + Handles various input types and converts them to a Python object. + Args: + s (Any): Input value to decode. Can be None, float('nan'), dict, list, number, or string. -def _decode(s: Any) -> Any: - """ - Robust JSON decoder: - - Returns None for None/NaN/empty/non-string values - - If s is a JSON string, returns parsed Python object - - If s is already a dict/list/number, returns it unchanged + Returns: + Any: + - None if input is None, NaN, empty string, or non-string unrecognized type. + - Decoded Python object if input is a JSON string. + - Original object if it is already a dict, list, int, or bool. """ # None / pandas NA / nan handling if s is None: @@ -80,197 +82,49 @@ def _decode(s: Any) -> Any: # If it fails, try to interpret as plain string return s_str -# ---------------------------------------------------------------------- -# SAVE -# ---------------------------------------------------------------------- -def save_database_csv(db: _DFManagement, folder: str): - """ - Save all database content to CSV files in the specified folder. - Files produced: - - templates_particle.csv - - templates_residue.csv - - templates_molecule.csv - - templates_bond.csv - - instances_particle.csv - - instances_residue.csv - - instances_molecule.csv - - instances_bond.csv - - reactions.csv +def _encode(obj: Any) -> str: """ - os.makedirs(folder, exist_ok=True) + Encodes a Python object as a JSON string. - # ----------------------------- - # TEMPLATES - # ----------------------------- - for pmb_type, tpl_dict in db.templates.items(): - rows = [] - for tpl in tpl_dict.values(): - # PARTICLE TEMPLATE: explicit custom encoding - if pmb_type == "particle" and isinstance(tpl, ParticleTemplate): - rows.append({ - "name": tpl.name, - "sigma": _encode(tpl.sigma.to_dict()), - "epsilon": _encode(tpl.epsilon.to_dict()), - "cutoff": _encode(tpl.cutoff.to_dict()), - "offset": _encode(tpl.offset.to_dict()), - # states: dict state_name -> ParticleState.model_dump() - "states": _encode({sname: st.model_dump() for sname, st in tpl.states.items()}), - }) + Special handling for PintQuantity and Pydantic models. - # RESIDUE TEMPLATE - elif pmb_type == "residue" and isinstance(tpl, ResidueTemplate): - rows.append({ - "name": tpl.name, - "central_bead": tpl.central_bead, - "side_chains": _encode(tpl.side_chains), - }) + Args: + obj (Any): Object to encode. Can be None, PintQuantity, Pydantic model, or standard Python object. - # MOLECULE TEMPLATE - elif pmb_type == "molecule" and isinstance(tpl, MoleculeTemplate): - rows.append({ - "name": tpl.name, - "residue_list": _encode(tpl.residue_list), - }) - - elif pmb_type == "peptide" and isinstance(tpl, PeptideTemplate): - rows.append({ - "name": tpl.name, - "model": tpl.model, - "residue_list": _encode(tpl.residue_list), - "sequence": _encode(tpl.sequence), - }) - elif pmb_type == "protein" and isinstance(tpl, ProteinTemplate): - rows.append({ - "name": tpl.name, - "model": tpl.model, - "residue_list": _encode(tpl.residue_list), - "sequence": _encode(tpl.sequence), - }) - # BOND TEMPLATE - elif pmb_type == "bond" and isinstance(tpl, BondTemplate): - # parameters: dict[str, scalar or PintQuantity] - params_serial = {} - for k, v in tpl.parameters.items(): - if isinstance(v, PintQuantity): - params_serial[k] = v.to_dict() - else: - # assume scalar serializable - params_serial[k] = v - rows.append({ - "name": tpl.name, - "bond_type": tpl.bond_type, - "parameters": _encode(params_serial), - "l0": _encode(tpl.l0.to_dict()), - }) - # HYDROGEL TEMPLATE - elif pmb_type == "hydrogel" and isinstance(tpl, HydrogelTemplate): - rows.append({ - "name": tpl.name, - "node_map": _encode([node.model_dump() for node in tpl.node_map]), - "chain_map": _encode([chain.model_dump() for chain in tpl.chain_map]), - }) - else: - # Generic fallback: try model_dump() - try: - rows.append(tpl.model_dump()) - except Exception: - rows.append({"name": getattr(tpl, "name", None)}) + Returns: + str: JSON string representation of the object. + Returns empty string for None. + """ + if obj is None: + return "" + # PintQuantity dataclass (has to_dict) + if isinstance(obj, PintQuantity): + return json.dumps(obj.to_dict(), separators=(",", ":"), ensure_ascii=False) - df = pd.DataFrame(rows) - df.to_csv(os.path.join(folder, f"templates_{pmb_type}.csv"), index=False) + # If it's already a dict/list/scalar, json-dump it + try: + return json.dumps(obj, separators=(",", ":"), ensure_ascii=False) + except TypeError: + # Last resort: convert to string + return json.dumps(str(obj), separators=(",", ":"), ensure_ascii=False) - # ----------------------------- - # INSTANCES - # ----------------------------- - for pmb_type, inst_dict in db.instances.items(): - rows = [] - for inst in inst_dict.values(): - if pmb_type == "particle" and isinstance(inst, ParticleInstance): - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "particle_id": int(inst.particle_id), - "initial_state": inst.initial_state, - "residue_id": int(inst.residue_id) if inst.residue_id is not None else "", - "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else "", - }) - elif pmb_type == "residue" and isinstance(inst, ResidueInstance): - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "residue_id": int(inst.residue_id), - "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else "", - }) - elif pmb_type == "molecule" and isinstance(inst, MoleculeInstance): - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "molecule_id": int(inst.molecule_id), - }) - elif pmb_type == "peptide" and isinstance(inst, PeptideInstance): - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "molecule_id": int(inst.molecule_id), - }) - elif pmb_type == "protein" and isinstance(inst, ProteinInstance): - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "molecule_id": int(inst.molecule_id), - }) - elif pmb_type == "bond" and isinstance(inst, BondInstance): - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "bond_id": int(inst.bond_id), - "particle_id1": int(inst.particle_id1), - "particle_id2": int(inst.particle_id2), - }) - elif pmb_type == "hydrogel" and isinstance(inst, HydrogelInstance): - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "hydrogel_id": int(inst.hydrogel_id), - "molecule_ids": _encode(inst.molecule_ids), - }) - else: - # fallback to model_dump - try: - rows.append(inst.model_dump()) - except Exception: - rows.append({"name": getattr(inst, "name", None)}) +def _load_database_csv(db, folder): + """ + Loads CSV files from a folder into a database instance. - df = pd.DataFrame(rows) - df.to_csv(os.path.join(folder, f"instances_{pmb_type}.csv"), index=False) - - # ----------------------------- - # REACTIONS - # ----------------------------- - rows = [] - for rx in db.reactions.values(): - rows.append({ - "name": rx.name, - "participants": _encode([p.model_dump() for p in rx.participants]), - "pK": rx.pK if hasattr(rx, "pK") else None, - "reaction_type": rx.reaction_type, - "metadata": _encode(rx.metadata) if getattr(rx, "metadata", None) is not None else "", - }) - pd.DataFrame(rows).to_csv(os.path.join(folder, "reactions.csv"), index=False) + This function populates the `templates`, `instances`, and `reactions` attributes + of the provided database object in place. Supports various pyMBE types. + Args: + db (Manager): Database manager object to populate. + folder (str or Path): Path to the folder containing CSV files. -# ---------------------------------------------------------------------- -# LOAD -# ---------------------------------------------------------------------- -def load_database_csv(db: _DFManagement, folder: str): - """ - Load CSV files from folder into the provided _DFManagement instance. - This mutates db.templates, db.instances and db.reactions in place. + Raises: + FileNotFoundError: If the folder does not exist. - Important: - - The PintQuantity.from_dict(...) returns a stored PintQuantity dataclass - (no registry required). Reconstruction to a pint.Quantity occurs when - users call .to_quantity(db.ureg) on the PintQuantity. + Notes: + - PintQuantity objects are reconstructed from their dictionary representation. + - Supports particle, residue, molecule, peptide, protein, bond, and hydrogel types. """ folder = Path(folder) if not folder.exists(): @@ -285,9 +139,7 @@ def load_database_csv(db: _DFManagement, folder: str): "protein", "hydrogel"] - # ----------------------------- # TEMPLATES - # ----------------------------- for pmb_type in pyMBE_types: csv_file = folder / f"templates_{pmb_type}.csv" if not csv_file.exists(): @@ -403,9 +255,7 @@ def load_database_csv(db: _DFManagement, folder: str): templates[tpl.name] = tpl db.templates[pmb_type] = templates - # ----------------------------- # INSTANCES - # ----------------------------- for pmb_type in pyMBE_types: csv_file = folder / f"instances_{pmb_type}.csv" if not csv_file.exists(): @@ -475,9 +325,7 @@ def load_database_csv(db: _DFManagement, folder: str): instances[inst.hydrogel_id] = inst db.instances[pmb_type] = instances - # ----------------------------- # REACTIONS - # ----------------------------- rx_file = folder / "reactions.csv" reactions: Dict[str, Reaction] = {} if rx_file.exists(): @@ -496,4 +344,200 @@ def load_database_csv(db: _DFManagement, folder: str): reactions[rx.name] = rx db.reactions = reactions +def _load_reaction_set(path): + """ + Loads a set of reactions from a JSON file. + + Args: + path (str): Path to the JSON file containing reaction data. + + Returns: + dict[str, Reaction]: Dictionary mapping reaction names to Reaction objects. + """ + with open(path, "r") as f: + data = json.load(f) + + reactions = {} + for name, rdata in data["data"].items(): + + participants = [ + ReactionParticipant(**p) + for p in rdata["participants"] + ] + + reaction = Reaction( + name=name, + participants=participants, + constant=rdata["constant"], + reaction_type=rdata.get("reaction_type", "acid_base"), + metadata=rdata.get("metadata") + ) + + reactions[name] = reaction + + return reactions + +def _save_database_csv(db, folder): + """ + Saves the database content into CSV files in a folder. + + This function serializes all templates, instances, and reactions. + + Args: + db (Manager): Database object containing templates, instances, and reactions. + folder (str or Path): Path to the folder where CSV files will be saved. + """ + os.makedirs(folder, exist_ok=True) + + # TEMPLATES + for pmb_type, tpl_dict in db.templates.items(): + rows = [] + for tpl in tpl_dict.values(): + # PARTICLE TEMPLATE: explicit custom encoding + if pmb_type == "particle" and isinstance(tpl, ParticleTemplate): + rows.append({ + "name": tpl.name, + "sigma": _encode(tpl.sigma.to_dict()), + "epsilon": _encode(tpl.epsilon.to_dict()), + "cutoff": _encode(tpl.cutoff.to_dict()), + "offset": _encode(tpl.offset.to_dict()), + "states": _encode({sname: st.model_dump() for sname, st in tpl.states.items()}), # states: dict state_name -> ParticleState.model_dump() + }) + + # RESIDUE TEMPLATE + elif pmb_type == "residue" and isinstance(tpl, ResidueTemplate): + rows.append({ + "name": tpl.name, + "central_bead": tpl.central_bead, + "side_chains": _encode(tpl.side_chains), + }) + + # MOLECULE TEMPLATE + elif pmb_type == "molecule" and isinstance(tpl, MoleculeTemplate): + rows.append({ + "name": tpl.name, + "residue_list": _encode(tpl.residue_list), + }) + + elif pmb_type == "peptide" and isinstance(tpl, PeptideTemplate): + rows.append({ + "name": tpl.name, + "model": tpl.model, + "residue_list": _encode(tpl.residue_list), + "sequence": _encode(tpl.sequence), + }) + elif pmb_type == "protein" and isinstance(tpl, ProteinTemplate): + rows.append({ + "name": tpl.name, + "model": tpl.model, + "residue_list": _encode(tpl.residue_list), + "sequence": _encode(tpl.sequence), + }) + # BOND TEMPLATE + elif pmb_type == "bond" and isinstance(tpl, BondTemplate): + # parameters: dict[str, scalar or PintQuantity] + params_serial = {} + for k, v in tpl.parameters.items(): + if isinstance(v, PintQuantity): + params_serial[k] = v.to_dict() + else: + # assume scalar serializable + params_serial[k] = v + rows.append({ + "name": tpl.name, + "bond_type": tpl.bond_type, + "parameters": _encode(params_serial), + "l0": _encode(tpl.l0.to_dict()), + }) + # HYDROGEL TEMPLATE + elif pmb_type == "hydrogel" and isinstance(tpl, HydrogelTemplate): + rows.append({ + "name": tpl.name, + "node_map": _encode([node.model_dump() for node in tpl.node_map]), + "chain_map": _encode([chain.model_dump() for chain in tpl.chain_map]), + }) + else: + # Generic fallback: try model_dump() + try: + rows.append(tpl.model_dump()) + except Exception: + rows.append({"name": getattr(tpl, "name", None)}) + + df = pd.DataFrame(rows) + df.to_csv(os.path.join(folder, f"templates_{pmb_type}.csv"), index=False) + + # INSTANCES + for pmb_type, inst_dict in db.instances.items(): + rows = [] + for inst in inst_dict.values(): + if pmb_type == "particle" and isinstance(inst, ParticleInstance): + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "particle_id": int(inst.particle_id), + "initial_state": inst.initial_state, + "residue_id": int(inst.residue_id) if inst.residue_id is not None else "", + "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else "", + }) + elif pmb_type == "residue" and isinstance(inst, ResidueInstance): + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "residue_id": int(inst.residue_id), + "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else "", + }) + elif pmb_type == "molecule" and isinstance(inst, MoleculeInstance): + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "molecule_id": int(inst.molecule_id), + }) + elif pmb_type == "peptide" and isinstance(inst, PeptideInstance): + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "molecule_id": int(inst.molecule_id), + }) + elif pmb_type == "protein" and isinstance(inst, ProteinInstance): + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "molecule_id": int(inst.molecule_id), + }) + elif pmb_type == "bond" and isinstance(inst, BondInstance): + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "bond_id": int(inst.bond_id), + "particle_id1": int(inst.particle_id1), + "particle_id2": int(inst.particle_id2), + }) + elif pmb_type == "hydrogel" and isinstance(inst, HydrogelInstance): + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "hydrogel_id": int(inst.hydrogel_id), + "molecule_ids": _encode(inst.molecule_ids), + }) + else: + # fallback to model_dump + try: + rows.append(inst.model_dump()) + except Exception: + rows.append({"name": getattr(inst, "name", None)}) + + df = pd.DataFrame(rows) + df.to_csv(os.path.join(folder, f"instances_{pmb_type}.csv"), index=False) + + # REACTIONS + rows = [] + for rx in db.reactions.values(): + rows.append({ + "name": rx.name, + "participants": _encode([p.model_dump() for p in rx.participants]), + "pK": rx.pK if hasattr(rx, "pK") else None, + "reaction_type": rx.reaction_type, + "metadata": _encode(rx.metadata) if getattr(rx, "metadata", None) is not None else "", + }) + pd.DataFrame(rows).to_csv(os.path.join(folder, "reactions.csv"), index=False) \ No newline at end of file diff --git a/pyMBE/storage/df_management.py b/pyMBE/storage/manager.py similarity index 83% rename from pyMBE/storage/df_management.py rename to pyMBE/storage/manager.py index fa99cde..960326e 100644 --- a/pyMBE/storage/df_management.py +++ b/pyMBE/storage/manager.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2023-2025 pyMBE-dev team +# Copyright (C) 2025 pyMBE-dev team # # This file is part of pyMBE. # @@ -41,72 +41,171 @@ from pyMBE.storage.templates.hydrogel import HydrogelTemplate from pyMBE.storage.instances.hydrogel import HydrogelInstance - TemplateType = Any # union of template classes (ParticleTemplate, ResidueTemplate, ...) +InstanceType = Any # union of instance classes (ParticleInstance, ResidueInstance, ...) - -class _DFManagement: +class Manager: """ - Storage: - templates[pmb_type][template_name] = pd.DataFrame - instances[pmb_type][particle_id] = InstanceModel + The canonical database manager for pyMBE. + + This class stores all templates, instances, and reactions in structured, + explicit dictionaries. + + All I/O operations (CSV/JSON save/load) operate through DFManager. + + Attributes + ---------- + ureg : UnitRegistry + Pint unit registry used to reconstruct physical quantities from storage. + + templates : dict[str, dict[str, TemplateType]] + Templates indexed by type and name. + Example: templates["particle"]["A"] → ParticleTemplate + + instances : dict[str, dict[int, InstanceType]] + Instances indexed by type and id. + Example: instances["particle"][5] → ParticleInstance + + reactions : dict[str, Reaction] + Chemical reactions keyed by reaction name. """ def __init__(self,units): + """ + Initialize an empty structured database. + + Args: + ureg (UnitRegistry): Pint unit registry used to rebuild quantities. + """ self.units = units - # templates: pmb_type -> (name -> template) self.templates: Dict[str, Dict[str, TemplateType]] = {} - # instances: pmb_type -> (id -> instance) - self.instances: Dict[str, Dict[int, Any]] = {} + self.instances: Dict[str, Dict[int, InstanceType]] = {} self.reactions: Dict[str, Reaction] = {} - # ---------------------------------------- - # TEMPLATE MANAGEMENT - # ---------------------------------------- - def register_template(self, template: TemplateType): - pmb_type = getattr(template, "pmb_type", None) - if pmb_type is None: - # infer from class - if isinstance(template, ParticleTemplate): - pmb_type = "particle" - elif isinstance(template, ResidueTemplate): - pmb_type = "residue" - elif isinstance(template, MoleculeTemplate): - pmb_type = "molecule" - elif isinstance(template, BondTemplate): - pmb_type = "bond" - elif isinstance(template, PeptideTemplate): - pmb_type = "peptide" - elif isinstance(template, ProteinTemplate): - pmb_type = "protein" - elif isinstance(template, HydrogelTemplate): - pmb_type = "hydrogel" + def _get_instances_df(self, pmb_type): + """ + Returns a DataFrame containing all instance objects of a given pyMBE type. + + Args: + pmb_type (str): + The instance type to query. Must be a key in + `self.instances`, such as `"particle"` or `"residue"`. + + Returns: + pandas.DataFrame: + A DataFrame where each row corresponds to one registered + instance of the specified PMB type. If no instances exist, + an empty DataFrame is returned. + + Notes: + - Missing integer identifiers (e.g., `residue_id`) are stored as + `pandas.NA` to ensure proper nullable integer handling. + - Particle and residue instances receive custom row structures; + all other instance types use direct model dumps. + """ + rows = [] + if pmb_type not in self.instances: + return pd.DataFrame(rows) + for inst in self.instances[pmb_type].values(): + if pmb_type == "particle": + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "particle_id": inst.particle_id, + "initial_state": inst.initial_state, + "residue_id": int(inst.residue_id) if inst.residue_id is not None else pd.NA, + "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else pd.NA, + }) + elif pmb_type == "residue": + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "residue_id": inst.residue_id, + "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else pd.NA, + }) else: - raise TypeError("Unknown template type; set attribute pmb_type or use supported templates") + # Generic representation for other types + rows.append(inst.model_dump()) + return pd.DataFrame(rows) - self.templates.setdefault(pmb_type, {}) + def _get_reactions_df(self): + """ + Returns a DataFrame summarizing all registered chemical reactions. - if template.name in self.templates[pmb_type]: - raise ValueError(f"Template '{template.name}' exists in '{pmb_type}'") + Returns: + pandas.DataFrame: + A DataFrame where each row corresponds to one reaction. + + Notes: + - Participant objects are expected to expose ``state_name`` and + ``coefficient`` attributes. + - Stoichiometry is stored as a single dictionary per row to allow + flexible downstream manipulation. + """ + rows = [] + for r in self.reactions.values(): + stoich = { + f"{p.state_name}": p.coefficient + for p in r.participants + } + rows.append({ + "reaction": r.name, + "stoichiometry": stoich, + "pK": r.pK, + "reaction_type": r.reaction_type, + "metadata": r.metadata, + }) + return pd.DataFrame(rows) - # particle templates must define at least one state - if pmb_type == "particle": - if not hasattr(template, "states") or len(template.states) == 0: - raise ValueError("ParticleTemplate must define at least one state.") - # ensure default_state valid if set - if getattr(template, "default_state", None) is not None and template.default_state not in template.states: - raise ValueError("default_state not in template states") + def _get_templates_df(self, pmb_type): + """ + Returns a DataFrame containing all template definitions of a PMB type. - self.templates[pmb_type][template.name] = template + Args: + pmb_type (str): + The template type to query, e.g. `"particle"`, `"residue"`, + `"molecule"`. - # ---------------------------------------- - # INSTANCE MANAGEMENT - # ---------------------------------------- - def register_instance(self, instance: ParticleInstance): + Returns: + pandas.DataFrame: + A DataFrame representing all templates of the given type. + Particle templates expand to multiple rows, one per state. + Empty DataFrame if no templates for that type exist. + + Notes: + - Unit-bearing fields are converted to plain quantities through + ``to_quantity(self.units)`` to maintain consistent I/O. """ - Instance must carry attributes: - - for particle: name (template name), particle_id, state_name - - for residue: name (residue template), residue_id + rows = [] + if pmb_type not in self.templates: + return pd.DataFrame(rows) + for tpl in self.templates[pmb_type].values(): + if pmb_type == "particle": + for sname, st in tpl.states.items(): + rows.append({ + "particle": tpl.name, + "sigma": tpl.sigma.to_quantity(self.units), + "epsilon": tpl.epsilon.to_quantity(self.units), + "cutoff": tpl.cutoff.to_quantity(self.units), + "offset": tpl.offset.to_quantity(self.units), + "state": sname, + "z": st.z, + "es_type": st.es_type + }) + else: + # Generic representation for other types + rows.append(tpl.model_dump()) + return pd.DataFrame(rows) + + def _register_instance(self, instance): + """ + Register an instance of a pyMBE object. + + Args: + instance: Any instance conforming to the pyMBE instance models. + + Raises: + ValueError: If the id or instance model does not exist or is duplicated. """ # infer pmb_type from instance class if isinstance(instance, ParticleInstance): @@ -150,88 +249,100 @@ def register_instance(self, instance: ParticleInstance): self.instances[pmb_type][iid] = instance - # ---------------------------------------- - # REACTIONS - # ---------------------------------------- + def _register_reaction(self, reaction): + """ + Register a chemical or physical reaction. + + Args: + reaction (Reaction): Reaction object. - def register_reaction(self, reaction: Reaction): + Raises: + ValueError: If reaction name already exists. + """ if reaction.name in self.reactions: raise ValueError(f"Reaction '{reaction.name}' already exists.") self.reactions[reaction.name] = reaction - # ---------------------------------------- - # DATAFRAME EXPORT - # ---------------------------------------- - - def get_templates_df(self, pmb_type: str = "particle"): - rows = [] - if pmb_type not in self.templates: - return pd.DataFrame(rows) - for tpl in self.templates[pmb_type].values(): - if pmb_type == "particle": - for sname, st in tpl.states.items(): - rows.append({ - "particle": tpl.name, - "sigma": tpl.sigma.to_quantity(self.units), - "epsilon": tpl.epsilon.to_quantity(self.units), - "cutoff": tpl.cutoff.to_quantity(self.units), - "offset": tpl.offset.to_quantity(self.units), - "state": sname, - "z": st.z, - "es_type": st.es_type - }) - else: - # Generic representation for other types - rows.append(tpl.model_dump()) - return pd.DataFrame(rows) - - def get_instances_df(self, pmb_type: str = "particle"): - rows = [] - if pmb_type not in self.instances: - return pd.DataFrame(rows) - for inst in self.instances[pmb_type].values(): - if pmb_type == "particle": - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "particle_id": inst.particle_id, - "initial_state": inst.initial_state, - "residue_id": int(inst.residue_id) if inst.residue_id is not None else pd.NA, - "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else pd.NA, - }) - elif pmb_type == "residue": - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "residue_id": inst.residue_id, - "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else pd.NA, - }) + def _register_template(self, template): + """ + Register a template. + + Args: + template: Any template object conforming to the pyMBE template models. + + Raises: + ValueError: If a template with the same name already exists. + """ + pmb_type = getattr(template, "pmb_type", None) + if pmb_type is None: + # infer from class + if isinstance(template, ParticleTemplate): + pmb_type = "particle" + elif isinstance(template, ResidueTemplate): + pmb_type = "residue" + elif isinstance(template, MoleculeTemplate): + pmb_type = "molecule" + elif isinstance(template, BondTemplate): + pmb_type = "bond" + elif isinstance(template, PeptideTemplate): + pmb_type = "peptide" + elif isinstance(template, ProteinTemplate): + pmb_type = "protein" + elif isinstance(template, HydrogelTemplate): + pmb_type = "hydrogel" else: - # Generic representation for other types - rows.append(inst.model_dump()) + raise TypeError("Unknown template type; set attribute pmb_type or use supported templates") + self.templates.setdefault(pmb_type, {}) - return pd.DataFrame(rows) + if template.name in self.templates[pmb_type]: + raise ValueError(f"Template '{template.name}' exists in '{pmb_type}'") + # particle templates must define at least one state + if pmb_type == "particle": + if not hasattr(template, "states") or len(template.states) == 0: + raise ValueError("ParticleTemplate must define at least one state.") + # ensure default_state valid if set + if getattr(template, "default_state", None) is not None and template.default_state not in template.states: + raise ValueError("default_state not in template states") - def get_reactions_df(self): - rows = [] - for r in self.reactions.values(): - stoich = { - f"{p.state_name}": p.coefficient - for p in r.participants - } - rows.append({ - "reaction": r.name, - "stoichiometry": stoich, - "pK": r.pK, - "reaction_type": r.reaction_type, - "metadata": r.metadata, - }) - return pd.DataFrame(rows) + self.templates[pmb_type][template.name] = template + + def _update_instance(self, instance_id, pmb_type, attribute, value): + """ + Updates a single attribute of a registered instance. + Only a restricted set of attributes is allowed for each PMB type, + ensuring database consistency. - def update_instance(self, instance_id, pmb_type, attribute, value): + Args: + instance_id (Hashable): + Unique identifier of the instance to update. + pmb_type (str): + Instance category, such as ``"particle"`` or ``"residue"``. + attribute (str): + Name of the field to update. + value (Any): + New value to assign to the specified attribute. + + Raises: + KeyError: + If the provided ``instance_id`` does not exist for the given + ``pmb_type``. + ValueError: + If attempting to modify an attribute that is not permitted + for the instance's PMB type. + + Notes: + - Allowed updates: + * ``particle``: ``initial_state``, ``residue_id``, ``molecule_id`` + * ``residue``: ``molecule_id`` + * All other types: no attribute updates allowed. + - The method replaces the instance with a new Pydantic model + using ``model_copy(update=...)`` to maintain immutability and + avoid partial mutations of internal state. + """ + if instance_id not in self.instances[pmb_type]: raise KeyError(f"Instance '{instance_id}' not found in type '{pmb_type}'.") diff --git a/pyMBE/storage/pint_quantity.py b/pyMBE/storage/pint_quantity.py index af07f07..d5bd350 100644 --- a/pyMBE/storage/pint_quantity.py +++ b/pyMBE/storage/pint_quantity.py @@ -1,4 +1,22 @@ -# pyMBE/storage/quantity_field.py +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from dataclasses import dataclass from typing import Any from pint import UnitRegistry, Quantity @@ -16,19 +34,37 @@ @dataclass class PintQuantity: """ - Internal, SI-based stored representation of a Pint quantity. - Stores magnitude and unit string using base/SI units. + Internal representation of a Pint quantity for pyMBE storage. + + Stores the magnitude and units of a quantity in a base/SI-like format + along with its logical physical dimension. + + Attributes: + magnitude (float): Numeric value of the quantity in the stored units. + units (str): String representation of the units (e.g., "nm", "meV", "meV/nm**2"). + dimension (str): Logical dimension of the quantity, e.g., "length", "energy", etc. """ magnitude: float - units: str # string representation of base units (e.g. "meter", "joule") - dimension: str # logical dimension: "length", "energy", ... + units: str + dimension: str @classmethod - def from_quantity(cls, q: Quantity, expected_dimension: str, ureg: UnitRegistry): + def from_quantity(cls, q, expected_dimension, ureg): """ - Validate `q` has the expected dimension using the provided ureg, - convert to base units (SI-like) and store magnitude + units. + Create a PintQuantity from a Pint Quantity, validating its dimension. + + Args: + q (Quantity): Pint Quantity to store. + expected_dimension (str): Expected logical dimension ("length", "energy", etc.). + ureg (UnitRegistry): Pint UnitRegistry used for unit conversion. + + Returns: + PintQuantity: Internal representation in SI-like units. + + Raises: + TypeError: If `q` is not a pint.Quantity. + ValueError: If the quantity does not match the expected dimension. """ if not isinstance(q, Quantity): raise TypeError("from_quantity expects a pint.Quantity") @@ -63,17 +99,37 @@ def from_quantity(cls, q: Quantity, expected_dimension: str, ureg: UnitRegistry) unit_str = rep_unit_str return cls(magnitude=mag, units=unit_str, dimension=expected_dimension) - def to_quantity(self, ureg: UnitRegistry) -> Quantity: + def to_quantity(self, ureg): """ - Reconstruct a pint.Quantity using the provided UnitRegistry. - The units string should be parseable by ureg. + Convert the stored PintQuantity back into a Pint Quantity. + + Args: + ureg (UnitRegistry): Pint UnitRegistry used to construct the Quantity. + + Returns: + Quantity: Pint Quantity with the stored magnitude and units. """ return self.magnitude * ureg(self.units) - def to_dict(self) -> dict: + def to_dict(self): + """ + Serialize the PintQuantity to a dictionary. + + Returns: + dict: Dictionary with keys "magnitude", "units", and "dimension". + """ return {"magnitude": self.magnitude, "units": self.units, "dimension": self.dimension} @classmethod - def from_dict(cls, d: dict): + def from_dict(cls, d): + """ + Deserialize a PintQuantity from a dictionary. + + Args: + d (dict): Dictionary containing "magnitude", "units", and "dimension". + + Returns: + PintQuantity: Reconstructed PintQuantity object. + """ return cls(magnitude=d["magnitude"], units=d["units"], dimension=d["dimension"]) diff --git a/pyMBE/storage/reactions/io.py b/pyMBE/storage/reactions/io.py deleted file mode 100644 index fadb53f..0000000 --- a/pyMBE/storage/reactions/io.py +++ /dev/null @@ -1,28 +0,0 @@ -import json -from typing import Dict -from .reaction import Reaction, ReactionParticipant - - -def load_reaction_set(path: str) -> Dict[str, Reaction]: - with open(path, "r") as f: - data = json.load(f) - - reactions = {} - for name, rdata in data["data"].items(): - - participants = [ - ReactionParticipant(**p) - for p in rdata["participants"] - ] - - reaction = Reaction( - name=name, - participants=participants, - constant=rdata["constant"], - reaction_type=rdata.get("reaction_type", "acid_base"), - metadata=rdata.get("metadata") - ) - - reactions[name] = reaction - - return reactions diff --git a/pyMBE/storage/reactions/reaction.py b/pyMBE/storage/reactions/reaction.py index 0448e7f..9c359c0 100644 --- a/pyMBE/storage/reactions/reaction.py +++ b/pyMBE/storage/reactions/reaction.py @@ -1,18 +1,98 @@ +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from typing import List, Dict, Optional from pydantic import BaseModel, Field, field_validator class ReactionParticipant(BaseModel): """ - One participant in a reaction. - coefficient < 0 -> reactant - coefficient > 0 -> product + Represents one participant in a chemical reaction. + + A reaction participant is defined by a particle name, a specific + state of that particle, and an integer stoichiometric coefficient. + Negative coefficients indicate reactants, whereas positive + coefficients indicate products. + + Attributes: + particle_name (str): + The name of the particle template participating in the reaction. + state_name (str): + The state of the particle (e.g., protonation state, charge state). + coefficient (int): + Stoichiometric coefficient of the participant: + - ``coefficient < 0`` → reactant + - ``coefficient > 0`` → product + + Notes: + - Coefficients of zero are forbidden. + - Together, ``particle_name`` and ``state_name`` identify a unique + chemical species in the simulation framework. """ particle_name: str state_name: str coefficient: int class Reaction(BaseModel): + """ + Defines a chemical reaction between particle states. + + A ``Reaction`` object captures the stoichiometry and thermodynamic + properties of a chemical equilibrium. + This can represent phenomena such as acid–base reactionsor any multi-species reaction scheme + supported by the simulation engine. + + Attributes: + name (str): + Unique identifier for the reaction. + participants (List[ReactionParticipant]): + List of reactants and products with stoichiometric coefficients. + Must include at least two participants. + pK (float): + Reaction equilibrium parameter (e.g., pKa, log K). The meaning + depends on ``reaction_type``. + reaction_type (str): + A categorical descriptor of the reaction, such as ``"acid_base"`` + metadata (Optional[Dict]): + Optional free-form metadata for additional reaction details, + notes, or model-specific configuration. + + Validation: + - At least one participant are required. + - All participants must have non-zero stoichiometric coefficients. + + Examples: + Acid dissociation of HA: + HA ↔ H⁺ + A⁻ + + Represented as: + Reaction( + name="acid_dissociation", + participants=[ + ReactionParticipant("A", "HA", -1), + ReactionParticipant("A", "A-", 1), + ReactionParticipant("H", "H+", 1), + ], + pK=4.75, + reaction_type="acid_base", + ) + """ name: str participants: List[ReactionParticipant] pK: float = Field(..., description="pKa, logK, eq constant, etc.") @@ -22,7 +102,7 @@ class Reaction(BaseModel): @field_validator("participants") def at_least_two_participants(cls, v): if len(v) < 2: - raise ValueError("A reaction must have at least 2 participants.") + raise ValueError("A reaction must have at least 1 participant.") return v @field_validator("participants") diff --git a/pyMBE/storage/templates/bond.py b/pyMBE/storage/templates/bond.py index 6d3b5b4..33e8814 100644 --- a/pyMBE/storage/templates/bond.py +++ b/pyMBE/storage/templates/bond.py @@ -1,9 +1,45 @@ +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from typing import Dict, Literal from ..base_type import PMBBaseModel from ..pint_quantity import PintQuantity class BondTemplate(PMBBaseModel): + """ + Template defining a bond in a pyMBE simulation. + + Attributes: + pmb_type (Literal["bond"]): Fixed type identifier for this template. Always "bond". + name (str): Unique name of the bond template, e.g., "HARMONIC_default". + bond_type (str): Type of bond potential. Examples: "HARMONIC", "FENE". + parameters (Dict[str, PintQuantity]): Dictionary of bond parameters. + Common keys: + - "k": Force constant (energy / distance^2) + - "r0": Equilibrium bond length + - "d_r_max": Maximum bond extension (for FENE) + l0 (PintQuantity): Initial bond length when the bond is instantiated. + + Notes: + Values are stored as PintQuantity objects for unit-aware calculations. + """ pmb_type: Literal["bond"] = "bond" name: str # e.g. "HARMONIC_default" bond_type: str # "HARMONIC", "FENE" diff --git a/pyMBE/storage/templates/hydrogel.py b/pyMBE/storage/templates/hydrogel.py index 166614b..d4459ad 100644 --- a/pyMBE/storage/templates/hydrogel.py +++ b/pyMBE/storage/templates/hydrogel.py @@ -1,14 +1,48 @@ +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from typing import List from pydantic import Field, BaseModel from ..base_type import PMBBaseModel class HydrogelNode(BaseModel): + """ + Represents a node in a hydrogel network. + + Attributes: + particle_name (str): Name of the particle at this node. + lattice_index (List[int]): 3D lattice position of the node. Must be a list of length 3. + """ particle_name: str lattice_index: List[int] # must be length 3 class HydrogelChain(BaseModel): + """ + Represents a polymer chain between two hydrogel nodes. + + Attributes: + node_start (str): Name of the starting node. + node_end (str): Name of the ending node. + residue_list (List[str]): List of residue names forming the chain between the nodes. + """ node_start: str node_end: str residue_list: List[str] # list of residue names @@ -16,9 +50,16 @@ class HydrogelChain(BaseModel): class HydrogelTemplate(PMBBaseModel): """ - A hydrogel definition consists of: - - node_map: list of nodes with particle names and lattice positions - - chain_map: list of node-node polymer chains with residue lists + Template defining a hydrogel network in pyMBE. + + A hydrogel template consists of nodes (particles at specific lattice positions) + and polymer chains connecting those nodes. + + Attributes: + pmb_type (str): Fixed type identifier for this template. Always "hydrogel". + name (str): Unique name of the hydrogel template. + node_map (List[HydrogelNode]): List of nodes defining the hydrogel lattice. + chain_map (List[HydrogelChain]): List of polymer chains connecting nodes. """ pmb_type: str = Field(default="hydrogel", frozen=True) name: str diff --git a/pyMBE/storage/templates/molecule.py b/pyMBE/storage/templates/molecule.py index 7e3c77b..4a806f7 100644 --- a/pyMBE/storage/templates/molecule.py +++ b/pyMBE/storage/templates/molecule.py @@ -1,7 +1,34 @@ +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from pyMBE.storage.base_type import PMBBaseModel from pydantic import Field class MoleculeTemplate(PMBBaseModel): + """ + Template defining a molecule in pyMBE. + + Attributes: + pmb_type (str): Fixed type identifier for this template. Always "molecule". + name (str): Unique name of the molecule template. + residue_list (List[str]): Ordered list of residue names that make up the molecule. + """ pmb_type: str = Field(default="molecule", frozen=True) name: str residue_list: list[str] diff --git a/pyMBE/storage/templates/particle.py b/pyMBE/storage/templates/particle.py index 1286680..6e5b281 100644 --- a/pyMBE/storage/templates/particle.py +++ b/pyMBE/storage/templates/particle.py @@ -1,3 +1,22 @@ +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from typing import Dict, Literal from pydantic import Field, field_validator @@ -5,6 +24,15 @@ from ..pint_quantity import PintQuantity class ParticleState(PMBBaseModel): + """ + Represents a single state of a particle in pyMBE. + + Attributes: + pmb_type (Literal["particle_state"]): Fixed type identifier. Always "particle_state". + name (str): Name of the particle state, e.g., "HA", "A-", "H+". + z (int): Charge of the particle in this state. + es_type (float): Identifier for the state used in Espresso simulations. + """ pmb_type: Literal["particle_state"] = "particle_state" name: str # e.g. "HA", "A-", "H+" z: int @@ -13,10 +41,16 @@ class ParticleState(PMBBaseModel): class ParticleTemplate(PMBBaseModel): """ - Template describing the type of particle: - - sigma, epsilon - - allowed states - - template_name = unique string identifier + Template describing a particle type, including interaction parameters and allowed states. + + Attributes: + pmb_type (str): Fixed type identifier. Always "particle". + sigma (PintQuantity): Particle diameter or size parameter. + epsilon (PintQuantity): Depth of the LJ potential well (interaction strength). + cutoff (PintQuantity): Cutoff distance for the LJ potential. + offset (PintQuantity): Offset distance for the LJ potential. + states (Dict[str, ParticleState]): Dictionary of allowed particle states. + Keys are state names, values are ParticleState instances. """ pmb_type: str = Field(default="particle", frozen=True) @@ -26,18 +60,20 @@ class ParticleTemplate(PMBBaseModel): epsilon: PintQuantity states: Dict[str, ParticleState] = {} - # ---------------- Validators ----------------- + def add_state(self, state): + """ + Add a new state to the particle template. + + This method registers a new `ParticleState` in the template's `states` dictionary. + If a state with the same name already exists, a `ValueError` is raised. + + Args: + state (ParticleState): The particle state to add. - def add_state(self, state: ParticleState): + Raises: + ValueError: If a state with the same name already exists in the template. + """ if state.name in self.states: raise ValueError(f"State {state.name} already exists in template {self.name}") self.states[state.name] = state - @classmethod - def single_state(cls, name: str, z: int, es_type: str, epsilon: float = 1.0): - """ - Convenience constructor for particles such as H+ that only need one state. - """ - state = ParticleState(name=name, z=z, es_type=es_type) - return cls(name=name, epsilon=epsilon, states={name: state}) - diff --git a/pyMBE/storage/templates/peptide.py b/pyMBE/storage/templates/peptide.py index 9115706..0b4ae3a 100644 --- a/pyMBE/storage/templates/peptide.py +++ b/pyMBE/storage/templates/peptide.py @@ -1,7 +1,36 @@ +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from pyMBE.storage.base_type import PMBBaseModel from pydantic import Field class PeptideTemplate(PMBBaseModel): + """ + Template defining a peptide in a pyMBE simulation. + + Attributes: + pmb_type (str): Fixed type identifier. Always "peptide". + name (str): Unique name of the peptide template. + model (str): Name or type of the model used for this peptide. + residue_list (List[str]): Ordered list of residue names that make up the peptide. + sequence (List[str]): Ordered sequence of residues representing the peptide's structure. + """ pmb_type: str = Field(default="peptide", frozen=True) name: str model: str diff --git a/pyMBE/storage/templates/protein.py b/pyMBE/storage/templates/protein.py index f027cda..6b8e7cb 100644 --- a/pyMBE/storage/templates/protein.py +++ b/pyMBE/storage/templates/protein.py @@ -2,6 +2,16 @@ from pydantic import Field class ProteinTemplate(PMBBaseModel): + """ + Template defining a protein in a pyMBE simulation. + + Attributes: + pmb_type (str): Fixed type identifier. Always "protein". + name (str): Unique name of the protein template. + model (str): Name or type of the model used for this protein. + residue_list (List[str]): Ordered list of residue names that compose the protein. + sequence (List[str]): Ordered sequence of residues representing the protein's structure. + """ pmb_type: str = Field(default="protein", frozen=True) name: str model: str diff --git a/pyMBE/storage/templates/residue.py b/pyMBE/storage/templates/residue.py index 85f6c0b..8fb7f34 100644 --- a/pyMBE/storage/templates/residue.py +++ b/pyMBE/storage/templates/residue.py @@ -2,6 +2,16 @@ from pydantic import Field class ResidueTemplate(PMBBaseModel): + """ + Template defining a residue in a pyMBE simulation. + + Attributes: + pmb_type (str): Fixed type identifier. Always "residue". + name (str): Unique name of the residue template. + central_bead (str): Name of the central bead representing the residue. + side_chains (List[str]): List of side-chain names attached to the central bead. + Defaults to an empty list if no side chains are present. + """ pmb_type: str = Field(default="residue", frozen=True) name: str central_bead: str diff --git a/test.py b/test.py index edc9ee7..288651e 100644 --- a/test.py +++ b/test.py @@ -1,5 +1,5 @@ # test.py -from pyMBE.storage.df_management import _DFManagement +from pyMBE.storage.manager import Manager from pyMBE.storage.templates.particle import ParticleTemplate, ParticleState from pyMBE.storage.instances.particle import ParticleInstance from pyMBE.storage.reactions.reaction import Reaction, ReactionParticipant @@ -32,7 +32,7 @@ def main(): units.define(f'reduced_energy = {kT} ') units.define(f'reduced_length = {unit_length}') - db = _DFManagement(units=units) + db = Manager(units=units) # ============================================================ # 1. CREATE PARTICLE TEMPLATES + STATES @@ -56,24 +56,24 @@ def main(): tpl_H.add_state(ParticleState(name="H+", z=+1, es_type=2)) # Register templates - db.register_template(tpl_A) - db.register_template(tpl_H) + db._register_template(tpl_A) + db._register_template(tpl_H) print("\n=== Particle Templates DataFrame ===") - print(db.get_templates_df()) + print(db._get_templates_df(pmb_type="particle")) tpl_R1 = ResidueTemplate(name="R1", central_bead="A", side_chains=["H","A"]) tpl_R2 = ResidueTemplate(name="R2", central_bead="HA", side_chains=["H","HA"]) - db.register_template(tpl_R1) - db.register_template(tpl_R2) + db._register_template(tpl_R1) + db._register_template(tpl_R2) print("\n=== Residue Templates DataFrame ===") - print(db.get_templates_df(pmb_type="residue")) + print(db._get_templates_df(pmb_type="residue")) tpl_M1 = MoleculeTemplate(name="M1", residue_list=["R1","R2"]) - db.register_template(tpl_M1) + db._register_template(tpl_M1) print("\n=== Molecule Templates DataFrame ===") - print(db.get_templates_df(pmb_type="molecule")) + print(db._get_templates_df(pmb_type="molecule")) parameters = {"k": PintQuantity.from_quantity(q=100.0 * units.reduced_energy / (units.reduced_length**2), expected_dimension="energy/length**2", ureg=units), "r0": PintQuantity.from_quantity(q=1.0 * units.reduced_length, expected_dimension="length", ureg=units),} @@ -84,25 +84,25 @@ def main(): l0=PintQuantity.from_quantity(q=1.0 * units.reduced_length, expected_dimension="length", ureg=units)) - db.register_template(tpl_bond) + db._register_template(tpl_bond) print("\n=== Bond Templates DataFrame ===") - print(db.get_templates_df(pmb_type="bond")) + print(db._get_templates_df(pmb_type="bond")) print("\n=== Peptide Templates DataFrame ===") tpl_P1 = PeptideTemplate(name="Peptide1", model="Model1", residue_list=["R1","R2"], sequence=["R1","R2"]) - db.register_template(tpl_P1) - print(db.get_templates_df(pmb_type="peptide")) + db._register_template(tpl_P1) + print(db._get_templates_df(pmb_type="peptide")) print("\n=== Protein Templates DataFrame ===") tpl_PR1 = ProteinTemplate(name="Protein1", model="ModelP1", residue_list=["R1","R2"], sequence=["R1","R2"]) - db.register_template(tpl_PR1) - print(db.get_templates_df(pmb_type="protein")) + db._register_template(tpl_PR1) + print(db._get_templates_df(pmb_type="protein")) print("\n=== Hydrogel Templates DataFrame ===") @@ -112,8 +112,8 @@ def main(): tpl_HG1 = HydrogelTemplate(name="Hydrogel1", node_map=[node1, node2], chain_map=[chain1]) - db.register_template(tpl_HG1) - print(db.get_templates_df(pmb_type="hydrogel")) + db._register_template(tpl_HG1) + print(db._get_templates_df(pmb_type="hydrogel")) # ============================================================ # 2. CREATE INSTANCES (optional for testing) @@ -123,17 +123,17 @@ def main(): inst2 = ParticleInstance(name="A", particle_id=2, initial_state="A-",residue_id=0) inst3 = ParticleInstance(name="H", particle_id=3, initial_state="H+") - db.register_instance(inst1) - db.register_instance(inst2) - db.register_instance(inst3) + db._register_instance(inst1) + db._register_instance(inst2) + db._register_instance(inst3) print("\n=== Particle Instances DataFrame ===") - print(db.get_instances_df()) + print(db._get_instances_df(pmb_type="particle")) - db.update_instance(pmb_type="particle", instance_id=1, attribute="residue_id", value=int(0)) + db._update_instance(pmb_type="particle", instance_id=1, attribute="residue_id", value=int(0)) print("\n=== Particle Instances DataFrame (after update) ===") - print(db.get_instances_df()) + print(db._get_instances_df(pmb_type="particle")) inst1 = ResidueInstance(name="R1", residue_id=1) @@ -143,45 +143,45 @@ def main(): residue_id=3, molecule_id=0) - db.register_instance(inst1) - db.register_instance(inst2) - db.register_instance(inst3) + db._register_instance(inst1) + db._register_instance(inst2) + db._register_instance(inst3) print("\n=== Residue Instances DataFrame ===") - print(db.get_instances_df(pmb_type="residue")) + print(db._get_instances_df(pmb_type="residue")) - db.update_instance(pmb_type="residue",instance_id=1, attribute="molecule_id", value=int(0)) + db._update_instance(pmb_type="residue",instance_id=1, attribute="molecule_id", value=int(0)) print("\n=== Residue Instances DataFrame (after update)===") - print(db.get_instances_df(pmb_type="residue")) + print(db._get_instances_df(pmb_type="residue")) inst1 = MoleculeInstance(name="M1", molecule_id=1) inst2 = MoleculeInstance(name="M1", molecule_id=2) - db.register_instance(inst1) - db.register_instance(inst2) + db._register_instance(inst1) + db._register_instance(inst2) print("\n=== Molecule Instances DataFrame ===") - print(db.get_instances_df(pmb_type="molecule")) + print(db._get_instances_df(pmb_type="molecule")) inst_bond = BondInstance(name="A1-A2", bond_id=1, particle_id1=1, particle_id2=2) - db.register_instance(inst_bond) + db._register_instance(inst_bond) print("\n=== Bond Instances DataFrame ===") - print(db.get_instances_df(pmb_type="bond")) + print(db._get_instances_df(pmb_type="bond")) print("\n=== Peptide Instances DataFrame ===") inst_peptide1 = PeptideInstance(name="Peptide1", molecule_id=3) - db.register_instance(inst_peptide1) - print(db.get_instances_df(pmb_type="peptide")) + db._register_instance(inst_peptide1) + print(db._get_instances_df(pmb_type="peptide")) print("\n=== Protein Instances DataFrame ===") inst_protein1 = ProteinInstance(name="Protein1", molecule_id=4) - db.register_instance(inst_protein1) - print(db.get_instances_df(pmb_type="protein")) + db._register_instance(inst_protein1) + print(db._get_instances_df(pmb_type="protein")) print("\n=== Hydrogel Instances DataFrame ===") inst_hydrogel1 = HydrogelInstance(name="Hydrogel1", hydrogel_id=1, molecule_ids=["1","2","3"]) - db.register_instance(inst_hydrogel1) - print(db.get_instances_df(pmb_type="hydrogel")) + db._register_instance(inst_hydrogel1) + print(db._get_instances_df(pmb_type="hydrogel")) # ============================================================ @@ -199,7 +199,7 @@ def main(): ], ) - db.register_reaction(rx) + db._register_reaction(rx) # ============================================================ # 4. PRINT DATAFRAMES @@ -207,10 +207,10 @@ def main(): print("\n=== Instances DataFrame ===") - print(db.get_instances_df()) + print(db._get_instances_df(pmb_type="particle")) print("\n=== Reactions DataFrame ===") - print(db.get_reactions_df()) + print(db._get_reactions_df()) # ------------------------- # Now create a different registry with different reduced unit definitions @@ -228,38 +228,38 @@ def main(): # create a new DFManager that uses the same stored templates but different ureg # For this demo we will copy the stored templates (in real use you would re-load from serialized storage) - db2 = _DFManagement(units=ureg2) + db2 = Manager(units=ureg2) # re-insert templates by transferring stored representation (simulate loading) for ptype, tdict in db.templates.items(): for tname, t in tdict.items(): - db2.register_template(t) + db2._register_template(t) print("\nTemplates shown with registry 2 (different reduced units):") - print(db2.get_templates_df("particle")) + print(db2._get_templates_df("particle")) - io.save_database_csv(db, folder="test_db_csv") + io._save_database_csv(db, folder="test_db_csv") - db3 = _DFManagement(units=ureg2) + db3 = Manager(units=ureg2) - io.load_database_csv(db3, folder="test_db_csv") + io._load_database_csv(db3, folder="test_db_csv") print("\nLoaded DB3 Templates DataFrame:") - print(db3.get_templates_df("particle")) - print(db3.get_templates_df("residue")) - print(db3.get_templates_df("molecule")) - print(db3.get_templates_df("bond")) - print(db3.get_templates_df("peptide")) - print(db3.get_templates_df("protein")) - print(db3.get_templates_df("hydrogel")) + print(db3._get_templates_df("particle")) + print(db3._get_templates_df("residue")) + print(db3._get_templates_df("molecule")) + print(db3._get_templates_df("bond")) + print(db3._get_templates_df("peptide")) + print(db3._get_templates_df("protein")) + print(db3._get_templates_df("hydrogel")) print("\nLoaded DB3 Instances DataFrame:") - print(db3.get_instances_df("particle")) - print(db3.get_instances_df("residue")) - print(db3.get_instances_df("molecule")) - print(db3.get_instances_df("bond")) - print(db3.get_instances_df("peptide")) - print(db3.get_instances_df("protein")) - print(db3.get_instances_df("hydrogel")) + print(db3._get_instances_df("particle")) + print(db3._get_instances_df("residue")) + print(db3._get_instances_df("molecule")) + print(db3._get_instances_df("bond")) + print(db3._get_instances_df("peptide")) + print(db3._get_instances_df("protein")) + print(db3._get_instances_df("hydrogel")) print("\nLoaded DB3 Reactions DataFrame:") - print(db3.get_reactions_df()) + print(db3._get_reactions_df()) if __name__ == "__main__": From ecc776d8d9effb28c333650e1836c810d6493f31 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Thu, 4 Dec 2025 18:06:05 +0100 Subject: [PATCH 06/55] start refactoring pyMBE to use the new database --- pyMBE/lib/handy_functions.py | 239 ++++++++ pyMBE/pyMBE.py | 828 +++++++++------------------- pyMBE/storage/base_type.py | 13 +- pyMBE/storage/instances/bond.py | 51 ++ pyMBE/storage/instances/particle.py | 54 +- pyMBE/storage/io.py | 94 +++- pyMBE/storage/manager.py | 193 +++++-- pyMBE/storage/reactions/reaction.py | 85 ++- pyMBE/storage/templates/bond.py | 15 +- pyMBE/storage/templates/hydrogel.py | 9 +- pyMBE/storage/templates/lj.py | 113 ++++ pyMBE/storage/templates/particle.py | 1 + pyMBE/storage/templates/peptide.py | 2 +- pyMBE/storage/templates/protein.py | 2 +- test.py | 211 ++++--- 15 files changed, 1197 insertions(+), 713 deletions(-) create mode 100644 pyMBE/storage/templates/lj.py diff --git a/pyMBE/lib/handy_functions.py b/pyMBE/lib/handy_functions.py index 73869ce..c633a07 100644 --- a/pyMBE/lib/handy_functions.py +++ b/pyMBE/lib/handy_functions.py @@ -17,6 +17,245 @@ # along with this program. If not, see . import logging +import re +import numpy as np + +def check_aminoacid_key(key): + """ + Checks if `key` corresponds to a valid aminoacid letter code. + + Args: + key(`str`): key to be checked. + + Returns: + `bool`: True if `key` is a valid aminoacid letter code, False otherwise. + """ + valid_AA_keys=['V', #'VAL' + 'I', #'ILE' + 'L', #'LEU' + 'E', #'GLU' + 'Q', #'GLN' + 'D', #'ASP' + 'N', #'ASN' + 'H', #'HIS' + 'W', #'TRP' + 'F', #'PHE' + 'Y', #'TYR' + 'R', #'ARG' + 'K', #'LYS' + 'S', #'SER' + 'T', #'THR' + 'M', #'MET' + 'A', #'ALA' + 'G', #'GLY' + 'P', #'PRO' + 'C'] #'CYS' + if key in valid_AA_keys: + return True + else: + return False + +def check_if_metal_ion(key): + """ + Checks if `key` corresponds to a label of a supported metal ion. + + Args: + key(`str`): key to be checked + + + def get_particle_id_map(self, object_name): + ''' + Gets all the ids associated with the object with name `object_name` in `pmb.df` + + Args: + object_name(`str`): name of the object + + Returns: + id_map(`dict`): dict of the structure {"all": [all_ids_with_object_name], "residue_map": {res_id: [particle_ids_in_res_id]}, "molecule_map": {mol_id: [particle_ids_in_mol_id]}, } + ''' + object_type=self._check_supported_molecule(molecule_name=object_name, + valid_pmb_types= ['particle','residue','molecule',"peptide","protein"]) + id_list = [] + mol_map = {} + res_map = {} + def do_res_map(res_ids): + for res_id in res_ids: + res_list=self.df.loc[(self.df['residue_id']== res_id) & (self.df['pmb_type']== "particle")].particle_id.dropna().tolist() + res_map[res_id]=res_list + return res_map + if object_type in ['molecule', 'protein', 'peptide']: + mol_ids = self.df.loc[self.df['name']== object_name].molecule_id.dropna().tolist() + for mol_id in mol_ids: + res_ids = set(self.df.loc[(self.df['molecule_id']== mol_id) & (self.df['pmb_type']== "particle") ].residue_id.dropna().tolist()) + res_map=do_res_map(res_ids=res_ids) + mol_list=self.df.loc[(self.df['molecule_id']== mol_id) & (self.df['pmb_type']== "particle")].particle_id.dropna().tolist() + id_list+=mol_list + mol_map[mol_id]=mol_list + elif object_type == 'residue': + res_ids = self.df.loc[self.df['name']== object_name].residue_id.dropna().tolist() + res_map=do_res_map(res_ids=res_ids) + id_list=[] + for res_id_list in res_map.values(): + id_list+=res_id_list + elif object_type == 'particle': + id_list = self.d + Returns: + (`bool`): True if `key` is a supported metal ion, False otherwise. + """ + if key in get_metal_ions_charge_number_map().keys(): + return True + else: + return False + +def define_AA_particles(topology_dict, lj_setup_mode, pmb): + valid_lj_setups = ["wca"] + + if lj_setup_mode not in valid_lj_setups: + raise ValueError('Invalid key for the lj setup, supported setup modes are {valid_lj_setups}') + if lj_setup_mode == "wca": + sigma = 1*pmb.units.Quantity("reduced_length") + epsilon = 1*pmb.units.Quantity("reduced_energy") + part_dict={} + sequence=[] + metal_ions_charge_number_map=get_metal_ions_charge_number_map() + defined_particles=[] + for particle in topology_dict.keys(): + particle_name = re.split(r'\d+', particle)[0] + if particle_name not in part_dict.keys(): + if lj_setup_mode == "wca": + part_dict={"sigma": sigma, + "offset": topology_dict[particle]['radius']*2-sigma, + "epsilon": epsilon, + "name": particle_name} + if check_if_metal_ion(key=particle_name): + z=metal_ions_charge_number_map[particle_name] + else: + z=0 + part_dict["z"]=z + part_dict["name"]=particle_name + if check_aminoacid_key(key=particle_name): + sequence.append(particle_name) + if particle_name not in defined_particles: + pmb.define_particle(**part_dict) + defined_particles.append(particle_name) + + +def define_AA_residues(sequence, model, pmb): + """ + Convinience function to define a residue template in the pyMBE database for each aminoacid in peptide and proteins. + + Args: + pmb(pymbe_library): Instance of the pyMBE library. + sequence(`lst`): Sequence of the peptide or protein. + model(`string`): Model name. Currently only models with 1 bead '1beadAA' or with 2 beads '2beadAA' per amino acid are supported. + """ + + residue_list = [] + for item in sequence: + if model == '1beadAA': + central_bead = item + side_chains = [] + elif model == '2beadAA': + if item in ['c','n', 'G']: + central_bead = item + side_chains = [] + else: + central_bead = 'CA' + side_chains = [item] + residue_name='AA-'+item + if residue_name not in residue_list: + pmb.define_residue(name = residue_name, + central_bead = central_bead, + side_chains = side_chains) + residue_list.append(residue_name) + +def get_metal_ions_charge_number_map(): + """ + Gets a map with the charge numbers of all the metal ions supported. + + Returns: + metal_charge_number_map(dict): Has the structure {"metal_name": metal_charge_number} + + """ + metal_charge_number_map = {"Ca": 2} + return metal_charge_number_map + +def get_lj_parameters(particle_name1, particle_name2, pmb, combining_rule='Lorentz-Berthelot'): + """ + Returns the Lennard-Jones parameters for the interaction between the particle types given by + `particle_name1` and `particle_name2` in `pymbe.df`, calculated according to the provided combining rule. + + Args: + particle_name1 (str): label of the type of the first particle type + particle_name2 (str): label of the type of the second particle type + combining_rule (`string`, optional): combining rule used to calculate `sigma` and `epsilon` for the potential betwen a pair of particles. Defaults to 'Lorentz-Berthelot'. + + Returns: + {"epsilon": epsilon_value, "sigma": sigma_value, "offset": offset_value, "cutoff": cutoff_value} + + Note: + - Currently, the only `combining_rule` supported is Lorentz-Berthelot. + - If the sigma value of `particle_name1` or `particle_name2` is 0, the function will return an empty dictionary. No LJ interactions are set up for particles with sigma = 0. + """ + supported_combining_rules=["Lorentz-Berthelot"] + lj_parameters_keys=["sigma","epsilon","offset","cutoff"] + if combining_rule not in supported_combining_rules: + raise ValueError(f"Combining_rule {combining_rule} currently not implemented in pyMBE, valid keys are {supported_combining_rules}") + lj_parameters={} + for key in lj_parameters_keys: + lj_parameters[key]=[] + # Search the LJ parameters of the type pair + for name in [particle_name1,particle_name2]: + for key in lj_parameters_keys: + lj_parameters[key].append(getattr(pmb.db.get_template(pmb_type="particle", name=name), key)) + # If one of the particle has sigma=0, no LJ interations are set up between that particle type and the others + if not all(sigma_value.magnitude for sigma_value in lj_parameters["sigma"]): + return {} + # Apply combining rule + if combining_rule == 'Lorentz-Berthelot': + lj_parameters["sigma"]=(lj_parameters["sigma"][0]+lj_parameters["sigma"][1])/2 + lj_parameters["cutoff"]=(lj_parameters["cutoff"][0]+lj_parameters["cutoff"][1])/2 + lj_parameters["offset"]=(lj_parameters["offset"][0]+lj_parameters["offset"][1])/2 + lj_parameters["epsilon"]=np.sqrt(lj_parameters["epsilon"][0]*lj_parameters["epsilon"][1]) + return lj_parameters + + +def calculate_initial_bond_length(bond_object, bond_type, epsilon, sigma, cutoff, offset): + """ + Calculates the initial bond length that is used when setting up molecules, + based on the minimum of the sum of bonded and short-range (LJ) interactions. + + Args: + bond_object(`espressomd.interactions.BondedInteractions`): instance of a bond object from espressomd library + bond_type(`str`): label identifying the used bonded potential + epsilon(`pint.Quantity`): LJ epsilon of the interaction between the particles + sigma(`pint.Quantity`): LJ sigma of the interaction between the particles + cutoff(`pint.Quantity`): cutoff-radius of the LJ interaction + offset(`pint.Quantity`): offset of the LJ interaction + """ + def truncated_lj_potential(x, epsilon, sigma, cutoff,offset): + if x>cutoff: + return 0.0 + else: + return 4*epsilon*((sigma/(x-offset))**12-(sigma/(x-offset))**6) - 4*epsilon*((sigma/cutoff)**12-(sigma/cutoff)**6) + + epsilon_red=epsilon.to('reduced_energy').magnitude + sigma_red=sigma.to('reduced_length').magnitude + cutoff_red=cutoff.to('reduced_length').magnitude + offset_red=offset.to('reduced_length').magnitude + + if bond_type == "harmonic": + r_0 = bond_object.params.get('r_0') + k = bond_object.params.get('k') + l0 = scipy.optimize.minimize(lambda x: 0.5*k*(x-r_0)**2 + truncated_lj_potential(x, epsilon_red, sigma_red, cutoff_red, offset_red), x0=r_0).x + elif bond_type == "FENE": + r_0 = bond_object.params.get('r_0') + k = bond_object.params.get('k') + d_r_max = bond_object.params.get('d_r_max') + l0 = scipy.optimize.minimize(lambda x: -0.5*k*(d_r_max**2)*np.log(1-((x-r_0)/d_r_max)**2) + truncated_lj_potential(x, epsilon_red, sigma_red, cutoff_red,offset_red), x0=1.0).x + return l0 + + def setup_electrostatic_interactions(units, espresso_system, kT, c_salt=None, solvent_permittivity=78.5, method='p3m', tune_p3m=True, accuracy=1e-3, params=None, verbose=False): """ diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index ab69f8c..3fee30f 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -25,37 +25,66 @@ import scipy.optimize import logging import importlib.resources -from pyMBE.storage.df_management import _DFManagement as _DFm +from pyMBE.storage.manager import Manager +from pyMBE.storage.templates.particle import ParticleTemplate, ParticleState +from pyMBE.storage.instances.particle import ParticleInstance +from pyMBE.storage.reactions.reaction import Reaction, ReactionParticipant +from pyMBE.storage.pint_quantity import PintQuantity +from pyMBE.storage.templates.residue import ResidueTemplate +from pyMBE.storage.instances.residue import ResidueInstance +from pyMBE.storage.templates.molecule import MoleculeTemplate +from pyMBE.storage.instances.molecule import MoleculeInstance +from pyMBE.storage.templates.bond import BondTemplate +from pyMBE.storage.instances.bond import BondInstance +from pyMBE.storage.templates.peptide import PeptideTemplate +from pyMBE.storage.instances.peptide import PeptideInstance +from pyMBE.storage.templates.protein import ProteinTemplate +from pyMBE.storage.instances.protein import ProteinInstance +from pyMBE.storage.templates.hydrogel import HydrogelTemplate, HydrogelNode, HydrogelChain +from pyMBE.storage.instances.hydrogel import HydrogelInstance + +import pyMBE.storage.io as io class pymbe_library(): """ - The library for the Molecular Builder for ESPResSo (pyMBE) + Core library for the Molecular Builder for ESPResSo (pyMBE). + + Provides access to fundamental constants, reduced unit setup, and a + database for storing particle, molecule, and reaction information. Attributes: - N_A(`pint.Quantity`): Avogadro number. - Kb(`pint.Quantity`): Boltzmann constant. - e(`pint.Quantity`): Elementary charge. - df(`Pandas.Dataframe`): Dataframe used to bookkeep all the information stored in pyMBE. Typically refered as `pmb.df`. - kT(`pint.Quantity`): Thermal energy. - Kw(`pint.Quantity`): Ionic product of water. Used in the setup of the G-RxMC method. + N_A (pint.Quantity): Avogadro number. + kB (pint.Quantity): Boltzmann constant. + e (pint.Quantity): Elementary charge. + kT (pint.Quantity): Thermal energy at the set temperature. + Kw (pint.Quantity): Ionic product of water. Used in G-RxMC method setup. + db (Manager): Database manager instance for pyMBE objects. + rng (np.random.Generator): Random number generator initialized with the provided seed. + units (pint.UnitRegistry): Pint UnitRegistry for unit-aware calculations. + lattice_builder (optional): Placeholder for lattice builder object, initially None. + root (importlib.resources.Path): Root path to the pyMBE package resources. """ def __init__(self, seed, temperature=None, unit_length=None, unit_charge=None, Kw=None): """ - Initializes the pymbe_library by setting up the reduced unit system with `temperature` and `reduced_length` - and sets up the `pmb.df` for bookkeeping. + Initialize the pyMBE library. + + Sets up the reduced unit system using temperature, unit length, and unit charge, + initializes the pyMBE database, and sets default physical constants. Args: - temperature(`pint.Quantity`,optional): Value of the temperature in the pyMBE UnitRegistry. Defaults to None. - unit_length(`pint.Quantity`, optional): Value of the unit of length in the pyMBE UnitRegistry. Defaults to None. - unit_charge (`pint.Quantity`,optional): Reduced unit of charge defined using the `pmb.units` UnitRegistry. Defaults to None. - Kw (`pint.Quantity`,optional): Ionic product of water in mol^2/l^2. Defaults to None. - - Note: - - If no `temperature` is given, a value of 298.15 K is assumed by default. - - If no `unit_length` is given, a value of 0.355 nm is assumed by default. - - If no `unit_charge` is given, a value of 1 elementary charge is assumed by default. - - If no `Kw` is given, a value of 10^(-14) * mol^2 / l^2 is assumed by default. + seed (int): Seed for the random number generator. + temperature (pint.Quantity, optional): Simulation temperature. Defaults to 298.15 K if None. + unit_length (pint.Quantity, optional): Reference length for reduced units. Defaults to 0.355 nm if None. + unit_charge (pint.Quantity, optional): Reference charge for reduced units. Defaults to 1 elementary charge if None. + Kw (pint.Quantity, optional): Ionic product of water in mol^2/l^2. Defaults to 1e-14 mol^2/l^2 if None. + + Notes: + - Initializes `self.rng` for random number generation. + - Sets fundamental constants: Avogadro number (`N_A`), Boltzmann constant (`kB`), elementary charge (`e`). + - Initializes the reduced units via `set_reduced_units`. + - Prepares an empty database (`self.db`) for pyMBE objects. + - Initializes placeholders for `lattice_builder` and package resource path (`root`). """ # Seed and RNG self.seed=seed @@ -69,30 +98,10 @@ def __init__(self, seed, temperature=None, unit_length=None, unit_charge=None, K temperature=temperature, Kw=Kw) - self.df = _DFm._setup_df() + self.db = Manager(units=self.units) self.lattice_builder = None self.root = importlib.resources.files(__package__) - def _define_particle_entry_in_df(self,name): - """ - Defines a particle entry in pmb.df. - - Args: - name(`str`): Unique label that identifies this particle type. - - Returns: - index(`int`): Index of the particle in pmb.df - """ - - if _DFm._check_if_name_is_defined_in_df(name=name, df=self.df): - index = self.df[self.df['name']==name].index[0] - else: - index = len(self.df) - self.df.at [index, 'name'] = name - self.df.at [index,'pmb_type'] = 'particle' - self.df.fillna(pd.NA, inplace=True) - return index - def _check_supported_molecule(self, molecule_name,valid_pmb_types): """ Checks if the molecule name `molecule_name` is supported by a method of pyMBE. @@ -128,6 +137,22 @@ def _check_if_name_has_right_type(self, name, expected_pmb_type, hard_check=True if hard_check: raise ValueError(f"The name {name} has been defined in the pyMBE DataFrame with a pmb_type = {pmb_type}. This function only supports pyMBE objects with pmb_type = {expected_pmb_type}") return False + def _get_residue_list_from_sequence(self, sequence): + """ + Convinience function to get a `residue_list` from a protein or peptide `sequence`. + + Args: + sequence(`lst`): Sequence of the peptide or protein. + + Returns: + residue_list(`list` of `str`): List of the `name`s of the `residue`s in the sequence of the `molecule`. + """ + residue_list = [] + for item in sequence: + residue_name='AA-'+item + residue_list.append(residue_name) + return residue_list + def add_bonds_to_espresso(self, espresso_system) : """ @@ -308,40 +333,7 @@ def calc_partition_coefficient(charge, c_macro): return {"charges_dict": Z_HH_Donnan, "pH_system_list": pH_system_list, "partition_coefficients": partition_coefficients_list} - def calculate_initial_bond_length(self, bond_object, bond_type, epsilon, sigma, cutoff, offset): - """ - Calculates the initial bond length that is used when setting up molecules, - based on the minimum of the sum of bonded and short-range (LJ) interactions. - - Args: - bond_object(`espressomd.interactions.BondedInteractions`): instance of a bond object from espressomd library - bond_type(`str`): label identifying the used bonded potential - epsilon(`pint.Quantity`): LJ epsilon of the interaction between the particles - sigma(`pint.Quantity`): LJ sigma of the interaction between the particles - cutoff(`pint.Quantity`): cutoff-radius of the LJ interaction - offset(`pint.Quantity`): offset of the LJ interaction - """ - def truncated_lj_potential(x, epsilon, sigma, cutoff,offset): - if x>cutoff: - return 0.0 - else: - return 4*epsilon*((sigma/(x-offset))**12-(sigma/(x-offset))**6) - 4*epsilon*((sigma/cutoff)**12-(sigma/cutoff)**6) - - epsilon_red=epsilon.to('reduced_energy').magnitude - sigma_red=sigma.to('reduced_length').magnitude - cutoff_red=cutoff.to('reduced_length').magnitude - offset_red=offset.to('reduced_length').magnitude - - if bond_type == "harmonic": - r_0 = bond_object.params.get('r_0') - k = bond_object.params.get('k') - l0 = scipy.optimize.minimize(lambda x: 0.5*k*(x-r_0)**2 + truncated_lj_potential(x, epsilon_red, sigma_red, cutoff_red, offset_red), x0=r_0).x - elif bond_type == "FENE": - r_0 = bond_object.params.get('r_0') - k = bond_object.params.get('k') - d_r_max = bond_object.params.get('d_r_max') - l0 = scipy.optimize.minimize(lambda x: -0.5*k*(d_r_max**2)*np.log(1-((x-r_0)/d_r_max)**2) + truncated_lj_potential(x, epsilon_red, sigma_red, cutoff_red,offset_red), x0=1.0).x - return l0 + def calculate_net_charge(self, espresso_system, molecule_name, dimensionless=False): ''' @@ -411,41 +403,6 @@ def center_molecule_in_simulation_box(self, molecule_id, espresso_system): espresso_system.part.by_id(pid).pos = es_pos - center_of_mass + box_center return - def check_aminoacid_key(self, key): - """ - Checks if `key` corresponds to a valid aminoacid letter code. - - Args: - key(`str`): key to be checked. - - Returns: - `bool`: True if `key` is a valid aminoacid letter code, False otherwise. - """ - valid_AA_keys=['V', #'VAL' - 'I', #'ILE' - 'L', #'LEU' - 'E', #'GLU' - 'Q', #'GLN' - 'D', #'ASP' - 'N', #'ASN' - 'H', #'HIS' - 'W', #'TRP' - 'F', #'PHE' - 'Y', #'TYR' - 'R', #'ARG' - 'K', #'LYS' - 'S', #'SER' - 'T', #'THR' - 'M', #'MET' - 'A', #'ALA' - 'G', #'GLY' - 'P', #'PRO' - 'C'] #'CYS' - if key in valid_AA_keys: - return True - else: - return False - def check_dimensionality(self, variable, expected_dimensionality): """ Checks if the dimensionality of `variable` matches `expected_dimensionality`. @@ -466,20 +423,7 @@ def check_dimensionality(self, variable, expected_dimensionality): raise ValueError(f"The variable {variable} should have a dimensionality of {expected_dimensionality}, instead the variable has a dimensionality of {variable.dimensionality}") return correct_dimensionality - def check_if_metal_ion(self,key): - """ - Checks if `key` corresponds to a label of a supported metal ion. - Args: - key(`str`): key to be checked - - Returns: - (`bool`): True if `key` is a supported metal ion, False otherwise. - """ - if key in self.get_metal_ions_charge_number_map().keys(): - return True - else: - return False def check_pka_set(self, pka_set): """ @@ -1244,44 +1188,11 @@ def create_residue(self, name, espresso_system, central_bead_position=None,use_d overwrite = True) # Internal bookkeeping of the side chain beads ids residues_info[residue_id]['side_chain_ids']=side_chain_beads_ids - return residues_info - - - - def define_AA_residues(self, sequence, model): - """ - Defines in `pmb.df` all the different residues in `sequence`. - - Args: - sequence(`lst`): Sequence of the peptide or protein. - model(`string`): Model name. Currently only models with 1 bead '1beadAA' or with 2 beads '2beadAA' per amino acid are supported. - - Returns: - residue_list(`list` of `str`): List of the `name`s of the `residue`s in the sequence of the `molecule`. - """ - residue_list = [] - for residue_name in sequence: - if model == '1beadAA': - central_bead = residue_name - side_chains = [] - elif model == '2beadAA': - if residue_name in ['c','n', 'G']: - central_bead = residue_name - side_chains = [] - else: - central_bead = 'CA' - side_chains = [residue_name] - if residue_name not in residue_list: - self.define_residue(name = 'AA-'+residue_name, - central_bead = central_bead, - side_chains = side_chains) - residue_list.append('AA-'+residue_name) - return residue_list + return residues_info def define_bond(self, bond_type, bond_parameters, particle_pairs): - - ''' - Defines a pmb object of type `bond` in `pymbe.df`. + """ + Defines bond templates for each particle pair in `particle_pairs` in the pyMBE database. Args: bond_type(`str`): label to identify the potential to model the bond. @@ -1292,124 +1203,78 @@ def define_bond(self, bond_type, bond_parameters, particle_pairs): Currently, only HARMONIC and FENE bonds are supported. For a HARMONIC bond the dictionary must contain the following parameters: - - - k (`obj`) : Magnitude of the bond. It should have units of energy/length**2 + - k (`pint.Quantity`) : Magnitude of the bond. It should have units of energy/length**2 using the `pmb.units` UnitRegistry. - - r_0 (`obj`) : Equilibrium bond length. It should have units of length using + - r_0 (`pint.Quantity`) : Equilibrium bond length. It should have units of length using the `pmb.units` UnitRegistry. - For a FENE bond the dictionary must contain the same parameters as for a HARMONIC bond and: - - - d_r_max (`obj`): Maximal stretching length for FENE. It should have + For a FENE bond the dictionary must contain the same parameters as for a HARMONIC bond and: + - d_r_max (`pint.Quantity`): Maximal stretching length for FENE. It should have units of length using the `pmb.units` UnitRegistry. Default 'None'. - ''' + """ - bond_object=self.create_bond_in_espresso(bond_type, bond_parameters) - for particle_name1, particle_name2 in particle_pairs: + parameters_expected_dimensions={"r_0": "length", + "k": "energy/length**2", + "d_r_max": "length"} - lj_parameters=self.get_lj_parameters(particle_name1 = particle_name1, - particle_name2 = particle_name2, - combining_rule = 'Lorentz-Berthelot') - - l0 = self.calculate_initial_bond_length(bond_object = bond_object, - bond_type = bond_type, - epsilon = lj_parameters["epsilon"], - sigma = lj_parameters["sigma"], - cutoff = lj_parameters["cutoff"], - offset = lj_parameters["offset"],) - index = len(self.df) - for label in [f'{particle_name1}-{particle_name2}', f'{particle_name2}-{particle_name1}']: - _DFm._check_if_multiple_pmb_types_for_name(name=label, - pmb_type_to_be_defined="bond", - df=self.df) - name=f'{particle_name1}-{particle_name2}' - _DFm._check_if_multiple_pmb_types_for_name(name=name, - pmb_type_to_be_defined="bond", - df=self.df) - self.df.at [index,'name']= name - self.df.at [index,'bond_object'] = bond_object - self.df.at [index,'l0'] = l0 - _DFm._add_value_to_df(df = self.df, - index = index, - key = ('pmb_type',''), - new_value = 'bond') - _DFm._add_value_to_df(df = self.df, - index = index, - key = ('parameters_of_the_potential',''), - new_value = bond_object.get_params(), - non_standard_value = True) - self.df.fillna(pd.NA, inplace=True) - return + parameters_tpl = {} + for key in bond_parameters.keys(): + parameters_tpl[key]= PintQuantity.from_quantity(q=bond_parameters[key], + expected_dimension=parameters_expected_dimensions[key], + ureg=self.units) + + bond_names=[] + for particle_name1, particle_name2 in particle_pairs: + + tpl = BondTemplate(particle_name1=particle_name1, + particle_name2=particle_name2, + parameters=parameters_tpl, + bond_type=bond_type) + tpl._make_name() + if tpl.name in bond_names: + raise RuntimeError(f"Bond {tpl.name} has already been defined, please check the list of particle pairs") + else: + self.db._register_template(tpl) + bond_names.append(tpl.name) - def define_default_bond(self, bond_type, bond_parameters, epsilon=None, sigma=None, cutoff=None, offset=None): + def define_default_bond(self, bond_type, bond_parameters): """ - Asigns `bond` in `pmb.df` as the default bond. - The LJ parameters can be optionally provided to calculate the initial bond length - + Defines a bond template as a "default" template in the pyMBE database. + Args: bond_type(`str`): label to identify the potential to model the bond. bond_parameters(`dict`): parameters of the potential of the bond. - sigma(`float`, optional): LJ sigma of the interaction between the particles. - epsilon(`float`, optional): LJ epsilon for the interaction between the particles. - cutoff(`float`, optional): cutoff-radius of the LJ interaction. - offset(`float`, optional): offset of the LJ interaction. - + Note: - Currently, only harmonic and FENE bonds are supported. """ - - bond_object=self.create_bond_in_espresso(bond_type, bond_parameters) - - if epsilon is None: - epsilon=1*self.units('reduced_energy') - if sigma is None: - sigma=1*self.units('reduced_length') - if cutoff is None: - cutoff=2**(1.0/6.0)*self.units('reduced_length') - if offset is None: - offset=0*self.units('reduced_length') - l0 = self.calculate_initial_bond_length(bond_object = bond_object, - bond_type = bond_type, - epsilon = epsilon, - sigma = sigma, - cutoff = cutoff, - offset = offset) - - _DFm._check_if_multiple_pmb_types_for_name(name='default', - pmb_type_to_be_defined='bond', - df=self.df) - - index = max(self.df.index, default=-1) + 1 - self.df.at [index,'name'] = 'default' - self.df.at [index,'bond_object'] = bond_object - self.df.at [index,'l0'] = l0 - _DFm._add_value_to_df(df = self.df, - index = index, - key = ('pmb_type',''), - new_value = 'bond') - _DFm._add_value_to_df(df = self.df, - index = index, - key = ('parameters_of_the_potential',''), - new_value = bond_object.get_params(), - non_standard_value=True) - self.df.fillna(pd.NA, inplace=True) - return + parameters_expected_dimensions={"r_0": "length", + "k": "energy/length**2", + "d_r_max": "length"} + parameters_tpl = {} + for key in bond_parameters.keys(): + parameters_tpl[key]= PintQuantity.from_quantity(q=bond_parameters[key], + expected_dimension=parameters_expected_dimensions[key], + ureg=self.units) + tpl = BondTemplate(parameters=parameters_tpl, + bond_type=bond_type) + tpl.name = "default" + self.db._register_template(tpl) def define_hydrogel(self, name, node_map, chain_map): """ - Defines a pyMBE object of type `hydrogel` in `pymbe.df`. + Defines a hydrogel template in the pyMBE database. Args: name(`str`): Unique label that identifies the `hydrogel`. - node_map(`list of ict`): [{"particle_name": , "lattice_index": }, ... ] + node_map(`list of dict`): [{"particle_name": , "lattice_index": }, ... ] chain_map(`list of dict`): [{"node_start": , "node_end": , "residue_list": , ... ] """ - node_indices = {tuple(entry['lattice_index']) for entry in node_map} - diamond_indices = {tuple(row) for row in self.lattice_builder.lattice.indices} - if node_indices != diamond_indices: - raise ValueError(f"Incomplete hydrogel: A diamond lattice must contain exactly 8 lattice indices, {diamond_indices} ") + # Sanity tests + node_indices = {tuple(entry['lattice_index']) for entry in node_map} + chain_map_connectivity = set() for entry in chain_map: start = self.lattice_builder.node_labels[entry['node_start']] @@ -1419,54 +1284,41 @@ def define_hydrogel(self, name, node_map, chain_map): if self.lattice_builder.lattice.connectivity != chain_map_connectivity: raise ValueError("Incomplete hydrogel: A diamond lattice must contain correct 16 lattice index pairs") - _DFm._check_if_multiple_pmb_types_for_name(name=name, - pmb_type_to_be_defined='hydrogel', - df=self.df) - - index = len(self.df) - self.df.at [index, "name"] = name - self.df.at [index, "pmb_type"] = "hydrogel" - _DFm._add_value_to_df(df = self.df, - index = index, - key = ('node_map',''), - new_value = node_map, - non_standard_value = True) - _DFm._add_value_to_df(df = self.df, - index = index, - key = ('chain_map',''), - new_value = chain_map, - non_standard_value = True) - for chain_label in chain_map: - node_start = chain_label["node_start"] - node_end = chain_label["node_end"] - residue_list = chain_label['residue_list'] - # Molecule name - molecule_name = "chain_"+node_start+"_"+node_end - self.define_molecule(name=molecule_name, residue_list=residue_list) - return + + diamond_indices = {tuple(row) for row in self.lattice_builder.lattice.indices} + if node_indices != diamond_indices: + raise ValueError(f"Incomplete hydrogel: A diamond lattice must contain exactly 8 lattice indices, {diamond_indices} ") + + # Register information in the pyMBE database + nodes=[] + for entry in node_map: + nodes.append(HydrogelNode(particle_name=entry["particle_name"], + lattice_index=entry["lattice_index"])) + chains=[] + for chain in chain_map: + chains.append(HydrogelChain(node_start=chain["node_start"], + node_end=chain["node_end"], + molecule_name=chain["molecule_name"])) + tpl = HydrogelTemplate(name=name, + node_map=nodes, + chain_map=chains) + self.db._register_template(tpl) def define_molecule(self, name, residue_list): """ - Defines a pyMBE object of type `molecule` in `pymbe.df`. + Defines a molecule template in the pyMBE database. Args: name(`str`): Unique label that identifies the `molecule`. residue_list(`list` of `str`): List of the `name`s of the `residue`s in the sequence of the `molecule`. """ - _DFm._check_if_multiple_pmb_types_for_name(name=name, - pmb_type_to_be_defined='molecule', - df=self.df) + tpl = MoleculeTemplate(name=name, + residue_list=residue_list) + self.db._register_template(tpl) - index = len(self.df) - self.df.at [index,'name'] = name - self.df.at [index,'pmb_type'] = 'molecule' - self.df.at [index,('residue_list','')] = residue_list - self.df.fillna(pd.NA, inplace=True) - return - - def define_particle(self, name, z=0, acidity=pd.NA, pka=pd.NA, sigma=pd.NA, epsilon=pd.NA, cutoff=pd.NA, offset=pd.NA,overwrite=False): + def define_particle(self, name, z=0, acidity=pd.NA, pka=pd.NA, sigma=pd.NA, epsilon=pd.NA, cutoff=pd.NA, offset=pd.NA): """ - Defines the properties of a particle object. + Defines a particle template in the pyMBE database. Args: name(`str`): Unique label that identifies this particle type. @@ -1477,7 +1329,6 @@ def define_particle(self, name, z=0, acidity=pd.NA, pka=pd.NA, sigma=pd.NA, epsi cutoff(`pint.Quantity`, optional): Cutoff parameter used to set up Lennard-Jones interactions for this particle type. Defaults to pd.NA. offset(`pint.Quantity`, optional): Offset parameter used to set up Lennard-Jones interactions for this particle type. Defaults to pd.NA. epsilon(`pint.Quantity`, optional): Epsilon parameter used to setup Lennard-Jones interactions for this particle tipe. Defaults to pd.NA. - overwrite(`bool`, optional): Switch to enable overwriting of already existing values in pmb.df. Defaults to False. Note: - `sigma`, `cutoff` and `offset` must have a dimensitonality of `[length]` and should be defined using pmb.units. @@ -1487,167 +1338,83 @@ def define_particle(self, name, z=0, acidity=pd.NA, pka=pd.NA, sigma=pd.NA, epsi - The default setup corresponds to the Weeks−Chandler−Andersen (WCA) model, corresponding to purely steric interactions. - For more information on `sigma`, `epsilon`, `cutoff` and `offset` check `pmb.setup_lj_interactions()`. """ - index=self._define_particle_entry_in_df(name=name) - _DFm._check_if_multiple_pmb_types_for_name(name=name, - pmb_type_to_be_defined='particle', - df=self.df) - + # If `cutoff` and `offset` are not defined, default them to the following values if pd.isna(cutoff): cutoff=self.units.Quantity(2**(1./6.), "reduced_length") if pd.isna(offset): offset=self.units.Quantity(0, "reduced_length") - # Define LJ parameters - parameters_with_dimensionality={"sigma":{"value": sigma, "dimensionality": "[length]"}, - "cutoff":{"value": cutoff, "dimensionality": "[length]"}, - "offset":{"value": offset, "dimensionality": "[length]"}, - "epsilon":{"value": epsilon, "dimensionality": "[energy]"},} - - for parameter_key in parameters_with_dimensionality.keys(): - if not pd.isna(parameters_with_dimensionality[parameter_key]["value"]): - self.check_dimensionality(variable=parameters_with_dimensionality[parameter_key]["value"], - expected_dimensionality=parameters_with_dimensionality[parameter_key]["dimensionality"]) - _DFm._add_value_to_df(df = self.df, - key = (parameter_key,''), - index = index, - new_value = parameters_with_dimensionality[parameter_key]["value"], - overwrite = overwrite) - + tpl = ParticleTemplate(name=name, + sigma=PintQuantity.from_quantity(q=sigma, expected_dimension="length", ureg=self.units), + epsilon=PintQuantity.from_quantity(q=epsilon, expected_dimension="energy", ureg=self.units), + cutoff=PintQuantity.from_quantity(q=cutoff, expected_dimension="length", ureg=self.units), + offset=PintQuantity.from_quantity(q=offset, expected_dimension="length", ureg=self.units)) + # Define particle acid/base properties - self.set_particle_acidity(name=name, - acidity=acidity, - default_charge_number=z, - pka=pka, - overwrite=overwrite) - self.df.fillna(pd.NA, inplace=True) + self.set_particle_acidity(particle_template=tpl, + acidity=acidity, + default_charge_number=z, + pka=pka) return - def define_particles(self, parameters, overwrite=False): - ''' - Defines a particle object in pyMBE for each particle name in `particle_names` - - Args: - parameters(`dict`): dictionary with the particle parameters. - overwrite(`bool`, optional): Switch to enable overwriting of already existing values in pmb.df. Defaults to False. - - Note: - - parameters = {"particle_name1: {"sigma": sigma_value, "epsilon": epsilon_value, ...}, particle_name2: {...},} - ''' - if not parameters: - return 0 - for particle_name in parameters.keys(): - parameters[particle_name]["overwrite"]=overwrite - self.define_particle(**parameters[particle_name]) - return - def define_peptide(self, name, sequence, model): """ - Defines a pyMBE object of type `peptide` in the `pymbe.df`. + Defines a peptide template in the pyMBE database. Args: - name (`str`): Unique label that identifies the `peptide`. - sequence (`string`): Sequence of the `peptide`. - model (`string`): Model name. Currently only models with 1 bead '1beadAA' or with 2 beads '2beadAA' per amino acid are supported. + name (`str`): Unique label that identifies the peptide. + sequence (`str`): Sequence of the peptide. + model (`str`): Model name. Currently only models with 1 bead '1beadAA' or with 2 beads '2beadAA' per amino acid are supported. """ - _DFm._check_if_multiple_pmb_types_for_name(name = name, - pmb_type_to_be_defined='peptide', - df=self.df) - valid_keys = ['1beadAA','2beadAA'] if model not in valid_keys: raise ValueError('Invalid label for the peptide model, please choose between 1beadAA or 2beadAA') clean_sequence = self.protein_sequence_parser(sequence=sequence) - residue_list = self.define_AA_residues(sequence=clean_sequence, - model=model) - self.define_molecule(name = name, residue_list=residue_list) - index = self.df.loc[self.df['name'] == name].index.item() - self.df.at [index,'model'] = model - self.df.at [index,('sequence','')] = clean_sequence - self.df.at [index,'pmb_type'] = "peptide" - self.df.fillna(pd.NA, inplace=True) - + residue_list = self._get_residue_list_from_sequence(sequence=clean_sequence) + tpl = PeptideTemplate(name=name, + residue_list=residue_list, + model=model, + sequence=sequence) + self.db._register_template(tpl) - def define_protein(self, name,model, topology_dict, lj_setup_mode="wca", overwrite=False): + def define_protein(self, name, sequence, model): """ - Defines a globular protein pyMBE object in `pymbe.df`. + Defines a protein template in the pyMBE database. Args: name (`str`): Unique label that identifies the protein. + sequence (`str`): Sequence of the protein. model (`string`): Model name. Currently only models with 1 bead '1beadAA' or with 2 beads '2beadAA' per amino acid are supported. topology_dict (`dict`): {'initial_pos': coords_list, 'chain_id': id, 'radius': radius_value} - lj_setup_mode(`str`): Key for the setup for the LJ potential. Defaults to "wca". - overwrite(`bool`, optional): Switch to enable overwriting of already existing values in pmb.df. Defaults to False. Note: - Currently, only `lj_setup_mode="wca"` is supported. This corresponds to setting up the WCA potential. """ - - # Sanity checks - _DFm._check_if_multiple_pmb_types_for_name(name = name, - pmb_type_to_be_defined='protein', - df=self.df) valid_model_keys = ['1beadAA','2beadAA'] - valid_lj_setups = ["wca"] if model not in valid_model_keys: raise ValueError('Invalid key for the protein model, supported models are {valid_model_keys}') - if lj_setup_mode not in valid_lj_setups: - raise ValueError('Invalid key for the lj setup, supported setup modes are {valid_lj_setups}') - if lj_setup_mode == "wca": - sigma = 1*self.units.Quantity("reduced_length") - epsilon = 1*self.units.Quantity("reduced_energy") - part_dict={} - sequence=[] - metal_ions_charge_number_map=self.get_metal_ions_charge_number_map() - for particle in topology_dict.keys(): - particle_name = re.split(r'\d+', particle)[0] - if particle_name not in part_dict.keys(): - if lj_setup_mode == "wca": - part_dict[particle_name]={"sigma": sigma, - "offset": topology_dict[particle]['radius']*2-sigma, - "epsilon": epsilon, - "name": particle_name} - if self.check_if_metal_ion(key=particle_name): - z=metal_ions_charge_number_map[particle_name] - else: - z=0 - part_dict[particle_name]["z"]=z - - if self.check_aminoacid_key(key=particle_name): - sequence.append(particle_name) - - self.define_particles(parameters=part_dict, - overwrite=overwrite) - residue_list = self.define_AA_residues(sequence=sequence, - model=model) - index = len(self.df) - self.df.at [index,'name'] = name - self.df.at [index,'pmb_type'] = 'protein' - self.df.at [index,'model'] = model - self.df.at [index,('sequence','')] = sequence - self.df.at [index,('residue_list','')] = residue_list - self.df.fillna(pd.NA, inplace=True) - return + + residue_list = self._get_residue_list_from_sequence(sequence=sequence) + tpl = ProteinTemplate(name=name, + model=model, + residue_list=residue_list, + sequence=sequence) + self.db._register_template(tpl) def define_residue(self, name, central_bead, side_chains): """ - Defines a pyMBE object of type `residue` in `pymbe.df`. + Defines a residue template in the pyMBE database. Args: - name(`str`): Unique label that identifies the `residue`. - central_bead(`str`): `name` of the `particle` to be placed as central_bead of the `residue`. - side_chains(`list` of `str`): List of `name`s of the pmb_objects to be placed as side_chains of the `residue`. Currently, only pmb_objects of type `particle`s or `residue`s are supported. - """ - _DFm._check_if_multiple_pmb_types_for_name(name=name, - pmb_type_to_be_defined='residue', - df=self.df) - - index = len(self.df) - self.df.at [index, 'name'] = name - self.df.at [index,'pmb_type'] = 'residue' - self.df.at [index,'central_bead'] = central_bead - self.df.at [index,('side_chains','')] = side_chains - self.df.fillna(pd.NA, inplace=True) + name(`str`): Unique label that identifies the residue. + central_bead(`str`): `name` of the `particle` to be placed as central_bead of the residue. + side_chains(`list` of `str`): List of `name`s of the pmb_objects to be placed as side_chains of the residue. Currently, only pyMBE objects of type `particle` or `residue` are supported. + """ + tpl = ResidueTemplate(name=name, + central_bead=central_bead, + side_chains=side_chains) + self.db._register_template(tpl) return def delete_molecule_in_system(self, molecule_id, espresso_system): @@ -2024,55 +1791,7 @@ def get_charge_number_map(self): charge_number_map = pd.concat([state_one,state_two],axis=0).to_dict() return charge_number_map - def get_lj_parameters(self, particle_name1, particle_name2, combining_rule='Lorentz-Berthelot'): - """ - Returns the Lennard-Jones parameters for the interaction between the particle types given by - `particle_name1` and `particle_name2` in `pymbe.df`, calculated according to the provided combining rule. - - Args: - particle_name1 (str): label of the type of the first particle type - particle_name2 (str): label of the type of the second particle type - combining_rule (`string`, optional): combining rule used to calculate `sigma` and `epsilon` for the potential betwen a pair of particles. Defaults to 'Lorentz-Berthelot'. - - Returns: - {"epsilon": epsilon_value, "sigma": sigma_value, "offset": offset_value, "cutoff": cutoff_value} - - Note: - - Currently, the only `combining_rule` supported is Lorentz-Berthelot. - - If the sigma value of `particle_name1` or `particle_name2` is 0, the function will return an empty dictionary. No LJ interactions are set up for particles with sigma = 0. - """ - supported_combining_rules=["Lorentz-Berthelot"] - lj_parameters_keys=["sigma","epsilon","offset","cutoff"] - if combining_rule not in supported_combining_rules: - raise ValueError(f"Combining_rule {combining_rule} currently not implemented in pyMBE, valid keys are {supported_combining_rules}") - lj_parameters={} - for key in lj_parameters_keys: - lj_parameters[key]=[] - # Search the LJ parameters of the type pair - for name in [particle_name1,particle_name2]: - for key in lj_parameters_keys: - lj_parameters[key].append(self.df[self.df.name == name][key].values[0]) - # If one of the particle has sigma=0, no LJ interations are set up between that particle type and the others - if not all(sigma_value.magnitude for sigma_value in lj_parameters["sigma"]): - return {} - # Apply combining rule - if combining_rule == 'Lorentz-Berthelot': - lj_parameters["sigma"]=(lj_parameters["sigma"][0]+lj_parameters["sigma"][1])/2 - lj_parameters["cutoff"]=(lj_parameters["cutoff"][0]+lj_parameters["cutoff"][1])/2 - lj_parameters["offset"]=(lj_parameters["offset"][0]+lj_parameters["offset"][1])/2 - lj_parameters["epsilon"]=np.sqrt(lj_parameters["epsilon"][0]*lj_parameters["epsilon"][1]) - return lj_parameters - - def get_metal_ions_charge_number_map(self): - """ - Gets a map with the charge numbers of all the metal ions supported. - - Returns: - metal_charge_number_map(dict): Has the structure {"metal_name": metal_charge_number} - - """ - metal_charge_number_map = {"Ca": 2} - return metal_charge_number_map + def get_particle_id_map(self, object_name): ''' @@ -2172,17 +1891,32 @@ def get_reduced_units(self): def get_type_map(self): """ - Gets all different espresso types assigned to particles in `pmb.df`. - + Return the mapping of ESPResSo types for all particles present in ``pmb.df``. + + This method delegates to ``self.db.get_es_types_map()`` and returns its output. + The resulting structure is a nested dictionary that lists, for each particle + template, all defined states and their corresponding ESPResSo type (``es_type``). + Returns: - type_map(`dict`): {"name": espresso_type}. + dict[str, dict[str, float | int | str]]: + A dictionary of the form:: + + { + particle_name: { + state_name: es_type, + ... + }, + ... + } + + where ``es_type`` is the ESPResSo particle type used in simulations. + + See Also: + ``Manager.get_es_types_map`` – the underlying method that performs + the extraction. """ - df_state_one = self.df.state_one.dropna(how='all') - df_state_two = self.df.state_two.dropna(how='all') - state_one = pd.Series (df_state_one.es_type.values,index=df_state_one.label) - state_two = pd.Series (df_state_two.es_type.values,index=df_state_two.label) - type_map = pd.concat([state_one,state_two],axis=0).to_dict() - return type_map + + return self.db.get_es_types_map() def initialize_lattice_builder(self, diamond_lattice): """ @@ -2300,18 +2034,33 @@ def load_pka_set(self, filename, overwrite=True): def propose_unused_type(self): """ - Searches in `pmb.df` all the different particle types defined and returns a new one. + Propose an unused ESPResSo particle type. + + This method scans the full `type_map` produced by `get_type_map()`, + which contains all particle templates and their associated state `es_type`. + It extracts all integer `es_type` values and returns the next available + integer type, ensuring no collisions with existing ones. Returns: - unused_type(`int`): unused particle type + int: The next available integer ESPResSo type. Returns ``0`` if no + integer types are currently defined. + + """ type_map = self.get_type_map() - if not type_map: - unused_type = 0 - else: - valid_values = [v for v in type_map.values() if pd.notna(v)] # Filter out pd.NA values - unused_type = max(valid_values) + 1 if valid_values else 0 # Ensure max() doesn't fail if all values are NA - return unused_type + + # Flatten all es_type values across all particles and states + all_types = [] + for particle_entry in type_map.values(): + for es_type in particle_entry.values(): + if isinstance(es_type, int): + all_types.append(es_type) + + # If no integer es_types exist, start at 0 + if not all_types: + return 0 + + return max(all_types) + 1 def protein_sequence_parser(self, sequence): ''' @@ -2581,7 +2330,7 @@ def search_particles_in_residue(self, residue_name): list_of_particles_in_residue.append(side_chain) return list_of_particles_in_residue - def set_particle_acidity(self, name, acidity=pd.NA, default_charge_number=0, pka=pd.NA, overwrite=True): + def set_particle_acidity(self, particle_template, acidity=pd.NA, default_charge_number=0, pka=pd.NA): """ Sets the particle acidity including the charges in each of its possible states. @@ -2601,87 +2350,52 @@ def set_particle_acidity(self, name, acidity=pd.NA, default_charge_number=0, pka acidity_valid_keys = ['inert','acidic', 'basic'] if not pd.isna(acidity): if acidity not in acidity_valid_keys: - raise ValueError(f"Acidity {acidity} provided for particle name {name} is not supproted. Valid keys are: {acidity_valid_keys}") + raise ValueError(f"Acidity {acidity} provided for particle name {particle_template.name} is not supported. Valid keys are: {acidity_valid_keys}") if acidity in ['acidic', 'basic'] and pd.isna(pka): - raise ValueError(f"pKa not provided for particle with name {name} with acidity {acidity}. pKa must be provided for acidic or basic particles.") + raise ValueError(f"pKa not provided for particle with name {particle_template.name} with acidity {acidity}. pKa must be provided for acidic or basic particles.") if acidity == "inert": acidity = pd.NA logging.warning("the keyword 'inert' for acidity has been replaced by setting acidity = pd.NA. For backwards compatibility, acidity has been set to pd.NA. Support for `acidity = 'inert'` may be deprecated in future releases of pyMBE") - self._define_particle_entry_in_df(name=name) - for index in self.df[self.df['name']==name].index: - if pka is not pd.NA: - _DFm._add_value_to_df(df = self.df, - key = ('pka',''), - index = index, - new_value = pka, - overwrite = overwrite) + # Define the first state + if pka is pd.NA: + # Inert particle with a single state + z_state_one = default_charge_number + name_state_one = particle_template.name + else: + if acidity == "acidic": + z_state_one = 0 + elif acidity == "basic": + z_state_one = 1 + name_state_one = particle_template.name + "H" + + particle_template.add_state(ParticleState(name=name_state_one, + z=z_state_one, + es_type=self.propose_unused_type())) + self.db._register_template(particle_template) + + # For monoprotic acid/base particles, define the second state + if pka is not pd.NA: + if acidity == "acidic": + z_state_two = -1 + elif acidity == "basic": + z_state_two = 0 + name_state_two = particle_template.name + particle_template.add_state(ParticleState(name=name_state_two, + z=z_state_two, + es_type=self.propose_unused_type())) + + reaction = Reaction(participants=[ReactionParticipant(particle_name=particle_template.name, + state_name=name_state_one, + coefficient=-1), + ReactionParticipant(particle_name=particle_template.name, + state_name=name_state_two, + coefficient=1)], + reaction_type="acid/base", + pK=pka) + self.db._register_reaction(reaction) - _DFm._add_value_to_df(df = self.df, - key = ('acidity',''), - index = index, - new_value = acidity, - overwrite = overwrite) - if not _DFm._check_if_df_cell_has_a_value(df=self.df, index=index,key=('state_one','es_type')): - _DFm._add_value_to_df(df = self.df, - key = ('state_one','es_type'), - index = index, - new_value = self.propose_unused_type(), - overwrite = overwrite) - if pd.isna(self.df.loc [self.df['name'] == name].acidity.iloc[0]): - _DFm._add_value_to_df(df = self.df, - key = ('state_one','z'), - index = index, - new_value = default_charge_number, - overwrite = overwrite) - _DFm._add_value_to_df(df = self.df, - key = ('state_one','label'), - index = index, - new_value = name, - overwrite = overwrite) - else: - protonated_label = f'{name}H' - _DFm._add_value_to_df(df = self.df, - key = ('state_one','label'), - index = index, - new_value = protonated_label, - overwrite = overwrite) - _DFm._add_value_to_df(df = self.df, - key = ('state_two','label'), - index = index, - new_value = name, - overwrite = overwrite) - if not _DFm._check_if_df_cell_has_a_value(df=self.df, index=index,key=('state_two','es_type')): - _DFm._add_value_to_df(df = self.df, - key = ('state_two','es_type'), - index = index, - new_value = self.propose_unused_type(), - overwrite = overwrite) - if self.df.loc [self.df['name'] == name].acidity.iloc[0] == 'acidic': - _DFm._add_value_to_df(df = self.df, - key = ('state_one','z'), - index = index, - new_value = 0, - overwrite = overwrite) - _DFm._add_value_to_df(df = self.df, - key = ('state_two','z'), - index = index, - new_value = -1, - overwrite = overwrite) - elif self.df.loc [self.df['name'] == name].acidity.iloc[0] == 'basic': - _DFm._add_value_to_df(df = self.df, - key = ('state_one','z'), - index = index, - new_value = +1, - overwrite = overwrite) - _DFm._add_value_to_df(df = self.df, - key = ('state_two','z'), - index = index, - new_value = 0, - overwrite = overwrite) - self.df.fillna(pd.NA, inplace=True) - return def set_reduced_units(self, unit_length=None, unit_charge=None, temperature=None, Kw=None): """ diff --git a/pyMBE/storage/base_type.py b/pyMBE/storage/base_type.py index ef5f829..d22f2e4 100644 --- a/pyMBE/storage/base_type.py +++ b/pyMBE/storage/base_type.py @@ -2,8 +2,17 @@ class PMBBaseModel(BaseModel): """ - Base class for all pyMBE models: - - Hard-coded pmb_type in subclasses + Base class for all pyMBE models. + + Provides common fields and validation behavior for pyMBE templates and instances. + + Attributes: + pmb_type (str): Fixed type identifier. Subclasses must set this to a specific type. + name (str): Unique name of the model instance or template. + + Config: + validate_assignment (bool): Ensures that attribute assignments are validated. + extra (str): Forbids extra attributes not defined in the model. """ pmb_type: str = Field(frozen=True) diff --git a/pyMBE/storage/instances/bond.py b/pyMBE/storage/instances/bond.py index cacce1a..37d7f92 100644 --- a/pyMBE/storage/instances/bond.py +++ b/pyMBE/storage/instances/bond.py @@ -1,7 +1,58 @@ +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from pyMBE.storage.base_type import PMBBaseModel from pydantic import field_validator class BondInstance(PMBBaseModel): + """ + Instance representation of a bond between two particles. + + A ``BondInstance`` links two particle instances using a specified + bond template. This class stores only lightweight, serializable + identifiers (not Espresso objects or interaction handles), ensuring + that the object can be safely persisted, exported, and reloaded from + CSV or other storage formats. + + Attributes: + pmb_type (str): + Fixed identifier set to ``"bond"`` for all bond instances. + bond_id (int): + Unique non-negative integer identifying this bond instance. + name (str): + Name of the bond template from which this instance was created. + particle_id1 (int): + ID of the first particle involved in the bond. + particle_id2 (int): + ID of the second particle involved in the bond. + + Validators: + validate_bond_id: + Ensures that ``bond_id`` is a non-negative integer. + + Notes: + - ``particle_id1`` and ``particle_id2`` must correspond to + particle instance IDs already registered in the database. + - This class does **not** store simulation engine–specific + objects (e.g., Espresso bond handles). Those should be created + by a runtime builder separate from the persistent database. + """ pmb_type: str = "bond" bond_id: int name : str # bond template name diff --git a/pyMBE/storage/instances/particle.py b/pyMBE/storage/instances/particle.py index 5cfc4c3..3f7773a 100644 --- a/pyMBE/storage/instances/particle.py +++ b/pyMBE/storage/instances/particle.py @@ -1,10 +1,62 @@ +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from pydantic import field_validator from ..base_type import PMBBaseModel class ParticleInstance(PMBBaseModel): """ - A placed particle within the simulation. + Concrete instance of a particle placed in the simulation. + + ``ParticleInstance`` represents a single particle created from a + ``ParticleTemplate`` and placed within the system. + Each instance has a unique integer identifier, an initial chemical + state (e.g., ``"A-"`` or ``"HA"``), and optional associations to a + residue and/or molecule instance. + + Attributes: + pmb_type (str): + Fixed string identifying this object as a particle instance. + Always ``"particle"``. + particle_id (int): + Unique non-negative integer identifying the particle within + the database. Assigned sequentially by the database manager. + initial_state (str): + Name of the particle state at creation time. Must correspond + to one of the allowed states defined in the originating + ``ParticleTemplate``. State transitions are handled at the + simulation level, not here. + residue_id (int | None): + Optional identifier of the ``ResidueInstance`` this particle + belongs to. Particles that are not part of a residue should + leave this field as ``None``. + molecule_id (int | None): + Optional identifier of the ``MoleculeInstance`` this particle + belongs to. Particles not belonging to any molecule should + keep this as ``None``. + + Notes: + - ``initial_state`` is stored as a plain string to ensure clean + serialization and avoid engine-specific objects. + - Connectivity, bonding, and spatial ordering are external to + this class and handled by the database or simulation backend. """ pmb_type: str = "particle" particle_id: int diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py index 83eb797..e7efcff 100644 --- a/pyMBE/storage/io.py +++ b/pyMBE/storage/io.py @@ -41,9 +41,9 @@ from pyMBE.storage.instances.protein import ProteinInstance from pyMBE.storage.templates.hydrogel import HydrogelTemplate, HydrogelNode, HydrogelChain from pyMBE.storage.instances.hydrogel import HydrogelInstance +from pyMBE.storage.templates.lj import LJInteractionTemplate - -def _decode(s: Any) -> Any: +def _decode(s): """ Decodes a JSON-like object or string. @@ -82,7 +82,7 @@ def _decode(s: Any) -> Any: # If it fails, try to interpret as plain string return s_str -def _encode(obj: Any) -> str: +def _encode(obj): """ Encodes a Python object as a JSON string. @@ -137,7 +137,8 @@ def _load_database_csv(db, folder): "bond", "peptide", "protein", - "hydrogel"] + "hydrogel", + "lj"] # TEMPLATES for pmb_type in pyMBE_types: @@ -202,28 +203,22 @@ def _load_database_csv(db, folder): rl = _decode(row.get("residue_list", "")) or [] if not isinstance(rl, list): rl = list(rl) - seq = _decode(row.get("sequence", "")) or [] - if not isinstance(seq, list): - seq = list(seq) tpl = PeptideTemplate( name=row["name"], model=row.get("model", ""), residue_list=rl, - sequence=seq + sequence=row["sequence"] ) templates[tpl.name] = tpl elif pmb_type == "protein": rl = _decode(row.get("residue_list", "")) or [] if not isinstance(rl, list): rl = list(rl) - seq = _decode(row.get("sequence", "")) or [] - if not isinstance(seq, list): - seq = list(seq) tpl = ProteinTemplate( name=row["name"], model=row.get("model", ""), residue_list=rl, - sequence=seq + sequence=row["sequence"] ) templates[tpl.name] = tpl elif pmb_type == "bond": @@ -238,8 +233,7 @@ def _load_database_csv(db, folder): tpl = BondTemplate( name=row["name"], bond_type=row.get("bond_type", ""), - parameters=parameters, - l0=PintQuantity.from_dict(_decode(row["l0"]))) + parameters=parameters) templates[tpl.name] = tpl elif pmb_type == "hydrogel": node_map_raw = _decode(row.get("node_map", "")) or [] @@ -253,7 +247,39 @@ def _load_database_csv(db, folder): chain_map=chain_map ) templates[tpl.name] = tpl - db.templates[pmb_type] = templates + elif pmb_type == "lj": + sigma_d = _decode(row["sigma"]) + epsilon_d = _decode(row["epsilon"]) + cutoff_d = _decode(row["cutoff"]) + offset_d = _decode(row["offset"]) + state1 = row["state1"] + state2 = row["state2"] + shift_d = _decode(row.get("shift", "")) + + sigma = PintQuantity.from_dict(sigma_d) if sigma_d is not None else None + epsilon = PintQuantity.from_dict(epsilon_d) if epsilon_d is not None else None + cutoff = PintQuantity.from_dict(cutoff_d) if cutoff_d is not None else None + offset = PintQuantity.from_dict(offset_d) if offset_d is not None else None + + + if isinstance(shift_d, dict) and {"magnitude", "units", "dimension"}.issubset(shift_d.keys()): + shift = PintQuantity.from_dict(shift_d) + else: + shift = shift_d # could be "auto" or None + + tpl = LJInteractionTemplate( + state1=state1, + state2=state2, + sigma=sigma, + epsilon=epsilon, + cutoff=cutoff, + offset=offset, + shift=shift + ) + + templates[tpl.name] = tpl + + db._templates[pmb_type] = templates # INSTANCES for pmb_type in pyMBE_types: @@ -323,7 +349,7 @@ def _load_database_csv(db, folder): molecule_ids=mol_ids ) instances[inst.hydrogel_id] = inst - db.instances[pmb_type] = instances + db._instances[pmb_type] = instances # REACTIONS rx_file = folder / "reactions.csv" @@ -342,7 +368,7 @@ def _load_database_csv(db, folder): metadata=metadata ) reactions[rx.name] = rx - db.reactions = reactions + db._reactions = reactions def _load_reaction_set(path): """ @@ -391,17 +417,17 @@ def _save_database_csv(db, folder): os.makedirs(folder, exist_ok=True) # TEMPLATES - for pmb_type, tpl_dict in db.templates.items(): + for pmb_type, tpl_dict in db._templates.items(): rows = [] for tpl in tpl_dict.values(): # PARTICLE TEMPLATE: explicit custom encoding if pmb_type == "particle" and isinstance(tpl, ParticleTemplate): rows.append({ "name": tpl.name, - "sigma": _encode(tpl.sigma.to_dict()), - "epsilon": _encode(tpl.epsilon.to_dict()), - "cutoff": _encode(tpl.cutoff.to_dict()), - "offset": _encode(tpl.offset.to_dict()), + "sigma": _encode(tpl.sigma), + "epsilon": _encode(tpl.epsilon), + "cutoff": _encode(tpl.cutoff), + "offset": _encode(tpl.offset), "states": _encode({sname: st.model_dump() for sname, st in tpl.states.items()}), # states: dict state_name -> ParticleState.model_dump() }) @@ -425,14 +451,14 @@ def _save_database_csv(db, folder): "name": tpl.name, "model": tpl.model, "residue_list": _encode(tpl.residue_list), - "sequence": _encode(tpl.sequence), + "sequence": tpl.sequence, }) elif pmb_type == "protein" and isinstance(tpl, ProteinTemplate): rows.append({ "name": tpl.name, "model": tpl.model, "residue_list": _encode(tpl.residue_list), - "sequence": _encode(tpl.sequence), + "sequence": tpl.sequence, }) # BOND TEMPLATE elif pmb_type == "bond" and isinstance(tpl, BondTemplate): @@ -446,9 +472,10 @@ def _save_database_csv(db, folder): params_serial[k] = v rows.append({ "name": tpl.name, + "particle_name1": tpl.particle_name1, + "particle_name2": tpl.particle_name2, "bond_type": tpl.bond_type, "parameters": _encode(params_serial), - "l0": _encode(tpl.l0.to_dict()), }) # HYDROGEL TEMPLATE elif pmb_type == "hydrogel" and isinstance(tpl, HydrogelTemplate): @@ -457,6 +484,19 @@ def _save_database_csv(db, folder): "node_map": _encode([node.model_dump() for node in tpl.node_map]), "chain_map": _encode([chain.model_dump() for chain in tpl.chain_map]), }) + # LJ TEMPLATE + elif pmb_type == "lj" and isinstance(tpl, LJInteractionTemplate): + rows.append({ + "name": tpl.name, + "state1": tpl.state1, + "state2": tpl.state2, + "sigma": _encode(tpl.sigma), + "epsilon":_encode(tpl.epsilon), + "cutoff": _encode(tpl.cutoff), + "offset": _encode(tpl.offset), + "shift": _encode(tpl.shift) + }) + else: # Generic fallback: try model_dump() try: @@ -468,7 +508,7 @@ def _save_database_csv(db, folder): df.to_csv(os.path.join(folder, f"templates_{pmb_type}.csv"), index=False) # INSTANCES - for pmb_type, inst_dict in db.instances.items(): + for pmb_type, inst_dict in db._instances.items(): rows = [] for inst in inst_dict.values(): if pmb_type == "particle" and isinstance(inst, ParticleInstance): @@ -532,7 +572,7 @@ def _save_database_csv(db, folder): # REACTIONS rows = [] - for rx in db.reactions.values(): + for rx in db._reactions.values(): rows.append({ "name": rx.name, "participants": _encode([p.model_dump() for p in rx.participants]), diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index 960326e..045eaa1 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -40,6 +40,7 @@ from pyMBE.storage.instances.protein import ProteinInstance from pyMBE.storage.templates.hydrogel import HydrogelTemplate from pyMBE.storage.instances.hydrogel import HydrogelInstance +from pyMBE.storage.templates.lj import LJInteractionTemplate TemplateType = Any # union of template classes (ParticleTemplate, ResidueTemplate, ...) InstanceType = Any # union of instance classes (ParticleInstance, ResidueInstance, ...) @@ -77,10 +78,10 @@ def __init__(self,units): Args: ureg (UnitRegistry): Pint unit registry used to rebuild quantities. """ - self.units = units - self.templates: Dict[str, Dict[str, TemplateType]] = {} - self.instances: Dict[str, Dict[int, InstanceType]] = {} - self.reactions: Dict[str, Reaction] = {} + self._units = units + self._templates: Dict[str, Dict[str, TemplateType]] = {} + self._instances: Dict[str, Dict[int, InstanceType]] = {} + self._reactions: Dict[str, Reaction] = {} def _get_instances_df(self, pmb_type): """ @@ -89,7 +90,7 @@ def _get_instances_df(self, pmb_type): Args: pmb_type (str): The instance type to query. Must be a key in - `self.instances`, such as `"particle"` or `"residue"`. + `self._instances`, such as `"particle"` or `"residue"`. Returns: pandas.DataFrame: @@ -104,9 +105,9 @@ def _get_instances_df(self, pmb_type): all other instance types use direct model dumps. """ rows = [] - if pmb_type not in self.instances: + if pmb_type not in self._instances: return pd.DataFrame(rows) - for inst in self.instances[pmb_type].values(): + for inst in self._instances[pmb_type].values(): if pmb_type == "particle": rows.append({ "pmb_type": pmb_type, @@ -143,7 +144,7 @@ def _get_reactions_df(self): flexible downstream manipulation. """ rows = [] - for r in self.reactions.values(): + for r in self._reactions.values(): stoich = { f"{p.state_name}": p.coefficient for p in r.participants @@ -174,24 +175,40 @@ def _get_templates_df(self, pmb_type): Notes: - Unit-bearing fields are converted to plain quantities through - ``to_quantity(self.units)`` to maintain consistent I/O. + ``to_quantity(self._units)`` to maintain consistent I/O. """ rows = [] - if pmb_type not in self.templates: + if pmb_type not in self._templates: return pd.DataFrame(rows) - for tpl in self.templates[pmb_type].values(): + for tpl in self._templates[pmb_type].values(): if pmb_type == "particle": for sname, st in tpl.states.items(): rows.append({ - "particle": tpl.name, - "sigma": tpl.sigma.to_quantity(self.units), - "epsilon": tpl.epsilon.to_quantity(self.units), - "cutoff": tpl.cutoff.to_quantity(self.units), - "offset": tpl.offset.to_quantity(self.units), + "pmb_type": tpl.pmb_type, + "name": tpl.name, + "sigma": tpl.sigma.to_quantity(self._units), + "epsilon": tpl.epsilon.to_quantity(self._units), + "cutoff": tpl.cutoff.to_quantity(self._units), + "offset": tpl.offset.to_quantity(self._units), "state": sname, "z": st.z, "es_type": st.es_type - }) + }) + elif pmb_type == "lj": + shift = tpl.shift + if isinstance(shift, dict) and {"magnitude", "units", "dimension"}.issubset(shift.keys()): + shift = tpl.shift.to_quantity(self._units) + rows.append({ + "pmb_type": tpl.pmb_type, + "name": tpl.name, + "state1": tpl.state1, + "state2": tpl.state2, + "sigma": tpl.sigma.to_quantity(self._units), + "epsilon": tpl.epsilon.to_quantity(self._units), + "cutoff": tpl.cutoff.to_quantity(self._units), + "offset": tpl.offset.to_quantity(self._units), + "shift": shift + }) else: # Generic representation for other types rows.append(tpl.model_dump()) @@ -232,22 +249,22 @@ def _register_instance(self, instance): else: raise TypeError("Unsupported instance type") - self.instances.setdefault(pmb_type, {}) + self._instances.setdefault(pmb_type, {}) - if iid in self.instances[pmb_type]: + if iid in self._instances[pmb_type]: raise ValueError(f"Instance id {iid} already exists in type '{pmb_type}'") # validate template exists - if instance.name not in self.templates.get(pmb_type, {}): + if instance.name not in self._templates.get(pmb_type, {}): raise ValueError(f"Template '{instance.name}' not found for type '{pmb_type}'") # validate state for particle instances if pmb_type == "particle": - tpl: ParticleTemplate = self.templates[pmb_type][instance.name] + tpl: ParticleTemplate = self._templates[pmb_type][instance.name] if instance.initial_state not in tpl.states: raise ValueError(f"State '{instance.initial_state}' not defined in template '{instance.name}'") - self.instances[pmb_type][iid] = instance + self._instances[pmb_type][iid] = instance def _register_reaction(self, reaction): """ @@ -259,10 +276,10 @@ def _register_reaction(self, reaction): Raises: ValueError: If reaction name already exists. """ - if reaction.name in self.reactions: + if reaction.name in self._reactions: raise ValueError(f"Reaction '{reaction.name}' already exists.") - self.reactions[reaction.name] = reaction + self._reactions[reaction.name] = reaction def _register_template(self, template): """ @@ -283,20 +300,22 @@ def _register_template(self, template): pmb_type = "residue" elif isinstance(template, MoleculeTemplate): pmb_type = "molecule" - elif isinstance(template, BondTemplate): - pmb_type = "bond" elif isinstance(template, PeptideTemplate): pmb_type = "peptide" elif isinstance(template, ProteinTemplate): pmb_type = "protein" elif isinstance(template, HydrogelTemplate): pmb_type = "hydrogel" + elif isinstance(template, BondTemplate): + pmb_type = "bond" + elif isinstance(template, LJInteractionTemplate): + pmb_type = "lj" else: raise TypeError("Unknown template type; set attribute pmb_type or use supported templates") - self.templates.setdefault(pmb_type, {}) + self._templates.setdefault(pmb_type, {}) - if template.name in self.templates[pmb_type]: + if template.name in self._templates[pmb_type]: raise ValueError(f"Template '{template.name}' exists in '{pmb_type}'") # particle templates must define at least one state @@ -307,7 +326,7 @@ def _register_template(self, template): if getattr(template, "default_state", None) is not None and template.default_state not in template.states: raise ValueError("default_state not in template states") - self.templates[pmb_type][template.name] = template + self._templates[pmb_type][template.name] = template def _update_instance(self, instance_id, pmb_type, attribute, value): """ @@ -343,8 +362,8 @@ def _update_instance(self, instance_id, pmb_type, attribute, value): avoid partial mutations of internal state. """ - if instance_id not in self.instances[pmb_type]: - raise KeyError(f"Instance '{instance_id}' not found in type '{pmb_type}'.") + if instance_id not in self._instances[pmb_type]: + raise KeyError(f"Instance '{instance_id}' not found for type '{pmb_type}' in the pyMBE database.") if pmb_type == "particle": allowed = ["initial_state", "residue_id", "molecule_id"] @@ -356,7 +375,117 @@ def _update_instance(self, instance_id, pmb_type, attribute, value): if attribute not in allowed: raise ValueError(f"Attribute '{attribute}' not allowed for {pmb_type}. Allowed attributes: {allowed}") - self.instances[pmb_type][instance_id] = self.instances[pmb_type][instance_id].model_copy(update={attribute: value}) + self._instances[pmb_type][instance_id] = self._instances[pmb_type][instance_id].model_copy(update={attribute: value}) + + def _update_reaction_participant(self, reaction_name, particle_name, state_name, coefficient): + """ + Append a new participant to an existing reaction in the database. + + Args: + reaction_name (str): + Name of the reaction to be updated. + particle_name (str): + Name of the particle template participating in the reaction. + state_name (str): + Specific state of the particle (e.g., protonation or charge state). + coefficient (int): + Stoichiometric coefficient for the new participant: + - ``coefficient < 0`` → reactant + - ``coefficient > 0`` → product + Zero is not allowed. + + """ + if reaction_name not in self._reactions: + raise KeyError(f"Reaction '{reaction_name}' not found in the pyMBE database.") + + rxn = self._reactions[reaction_name].add_participant(particle_name=particle_name, + state_name=state_name, + coefficient=coefficient) + self._register_reaction(rxn) + self._reactions.pop(reaction_name) + + def get_template(self, pmb_type, name): + """ + Retrieve a stored template by type and name. + + Looks up a template within the internal template registry + (`self._templates`) using its pyMBE type (e.g., "particle", "residue", + "bond", ...) and its unique name. If the template does not exist, + a `KeyError` is raised. + + Args: + pmb_type (str): The template category (e.g., "particle", "molecule", + "residue", "bond", "protein", ...). + name (str): The unique name of the template to retrieve. + + Returns: + PMBBaseModel: The stored template instance corresponding to the + provided type and name. + + Raises: + KeyError: If no template with the given type and name exists in + the internal registry. + """ + if name not in self._templates[pmb_type]: + raise KeyError(f"Template '{name}' not found in type '{pmb_type}'.") + else: + return self._templates[pmb_type][name] + + def get_es_types_map(self): + """ + Return a mapping from each particle to its states' `es_type`. + + Iterates over all particle templates and extracts the ESPResSo type (`es_type`) + defined for each state. Produces a nested dictionary of the form: + + { + particle_name: { + state_name: es_type, + ... + }, + ... + } + + Returns: + dict[str, dict[str, int]]: + A dictionary mapping each particle name to another dictionary that maps + each state name to its corresponding ``es_type``. + + Raises: + KeyError: + If the ``"particle"`` template group does not exist in the database. + + Examples: + Suppose templates include: + Particle A: + HA: es_type = 0 + A-: es_type = 1 + Particle H: + H+: es_type = 2 + + Then the method returns: + { + "A": { + "HA": 0, + "A-": 1, + }, + "H": { + "H+": 2, + } + } + """ + if "particle" not in self._templates: + return {} + + result = {} + for particle_name, tpl in self._templates["particle"].items(): + for state_name, state in tpl.states.items(): + if particle_name not in result: + result[particle_name] = {state_name: state.es_type} + else: + result[particle_name][state_name] = state.es_type + + return result class _NumpyEncoder(json.JSONEncoder): diff --git a/pyMBE/storage/reactions/reaction.py b/pyMBE/storage/reactions/reaction.py index 9c359c0..8f558d1 100644 --- a/pyMBE/storage/reactions/reaction.py +++ b/pyMBE/storage/reactions/reaction.py @@ -18,7 +18,7 @@ # from typing import List, Dict, Optional -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field, field_validator, model_validator class ReactionParticipant(BaseModel): @@ -93,12 +93,37 @@ class Reaction(BaseModel): reaction_type="acid_base", ) """ - name: str + participants: List[ReactionParticipant] pK: float = Field(..., description="pKa, logK, eq constant, etc.") reaction_type: str = Field(..., description="acid_base, binding, redox, ...") metadata: Optional[Dict] = None + name: str = Field(default="", description="Automatically generated reaction name") + + @model_validator(mode="after") + def generate_name(self): + """Automatically generate reaction name from participants.""" + reactants = [] + products = [] + + for p in self.participants: + species = f"{p.state_name}" + if p.coefficient < 0: + reactants.append(species) + else: + products.append(species) + + reactants = sorted(reactants) + products = sorted(products) + + left = " + ".join(reactants) + right = " + ".join(products) + + # reversible reaction symbol + self.name = f"{left} <-> {right}" + return self + @field_validator("participants") def at_least_two_participants(cls, v): if len(v) < 2: @@ -111,3 +136,59 @@ def no_zero_coeff(cls, v): if p.coefficient == 0: raise ValueError(f"Participant {p.name} has coefficient 0.") return v + + def add_participant(self, particle_name, state_name, coefficient): + """ + Add a new reaction participant to the reaction. + + Creates a new :class:`ReactionParticipant` with the provided particle name, + state name and stoichiometric coefficient, and returns an updated + :class:`Reaction` instance containing the additional participant. + + The reaction object itself is not modified in place. Instead, a new + validated copy is returned, following Pydantic's immutable data model + best practices. +d + Args: + particle_name (str): + Name of the particle participating in the reaction. + state_name (str): + Specific state of the particle (e.g., protonation or charge state). + coefficient (int): + Stoichiometric coefficient for the participant: + - ``coefficient < 0`` → reactant + - ``coefficient > 0`` → product + Coefficients equal to zero are not allowed. + + Returns: + Reaction: + A new :class:`Reaction` object with the participant added. + + Raises: + ValueError: + If ``coefficient`` is zero. + + Examples: + >>> rxn = Reaction( + ... name="acid_dissociation", + ... participants=[ + ... ReactionParticipant("A", "HA", -1), + ... ReactionParticipant("A", "A-", 1), + ... ], + ... pK=4.7, + ... reaction_type="acid_base", + ... ) + >>> rxn = rxn.add_participant("H", "H+", 1) + """ + if coefficient == 0: + raise ValueError("Stoichiometric coefficient cannot be zero.") + + new_participant = ReactionParticipant( + particle_name=particle_name, + state_name=state_name, + coefficient=coefficient, + ) + + new_reaction = self.model_copy(update={"participants": self.participants + [new_participant]}) + + return new_reaction.generate_name() diff --git a/pyMBE/storage/templates/bond.py b/pyMBE/storage/templates/bond.py index 33e8814..f323d81 100644 --- a/pyMBE/storage/templates/bond.py +++ b/pyMBE/storage/templates/bond.py @@ -20,6 +20,7 @@ from typing import Dict, Literal from ..base_type import PMBBaseModel from ..pint_quantity import PintQuantity +from pydantic import Field, model_validator class BondTemplate(PMBBaseModel): @@ -41,7 +42,17 @@ class BondTemplate(PMBBaseModel): Values are stored as PintQuantity objects for unit-aware calculations. """ pmb_type: Literal["bond"] = "bond" - name: str # e.g. "HARMONIC_default" + name: str = Field(default="default") bond_type: str # "HARMONIC", "FENE" + particle_name1: str | None = None + particle_name2: str | None = None parameters: Dict[str, PintQuantity] # k, r0, d_r_max... - l0: PintQuantity # initial bond length + + def _make_name(self): + """Create a canonical name for the bond.""" + if self.particle_name1 is None or self.particle_name2 is None: + raise RuntimeError("The BondTemplate has no defined particle_name1 or particle_name2 and therefore the name could not be automatically generated") + pn1, pn2 = sorted([self.particle_name1, self.particle_name2]) + self.name = f"{pn1}-{pn2}" + + \ No newline at end of file diff --git a/pyMBE/storage/templates/hydrogel.py b/pyMBE/storage/templates/hydrogel.py index d4459ad..bb44cce 100644 --- a/pyMBE/storage/templates/hydrogel.py +++ b/pyMBE/storage/templates/hydrogel.py @@ -21,7 +21,6 @@ from pydantic import Field, BaseModel from ..base_type import PMBBaseModel - class HydrogelNode(BaseModel): """ Represents a node in a hydrogel network. @@ -33,20 +32,18 @@ class HydrogelNode(BaseModel): particle_name: str lattice_index: List[int] # must be length 3 - class HydrogelChain(BaseModel): """ Represents a polymer chain between two hydrogel nodes. Attributes: + molecule_name (str): Name of the molecule representing the polymer chain. node_start (str): Name of the starting node. node_end (str): Name of the ending node. - residue_list (List[str]): List of residue names forming the chain between the nodes. """ + molecule_name: str node_start: str - node_end: str - residue_list: List[str] # list of residue names - + node_end: str class HydrogelTemplate(PMBBaseModel): """ diff --git a/pyMBE/storage/templates/lj.py b/pyMBE/storage/templates/lj.py new file mode 100644 index 0000000..cdd1e52 --- /dev/null +++ b/pyMBE/storage/templates/lj.py @@ -0,0 +1,113 @@ +# +# Copyright (C) 2025 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +from pydantic import BaseModel, Field, model_validator +from ..pint_quantity import PintQuantity + + +class LJInteractionTemplate(BaseModel): + """ + Template representing the Lennard–Jones (LJ) interaction parameters + between two particle *states*. + + The template **always generates the interaction name automatically** + from the two provided state names, ensuring standardized naming and + preventing inconsistencies between different LJ entries. + + The LJ parameters stored here correspond to the *final effective* + values after applying the combining rule (e.g., Lorentz–Berthelot). + This allows users to inspect, validate, or export the exact values + that will be passed to the simulation engine. + + Attributes: + pmb_type (str): + Fixed identifier for the template type. Always ``"lj"``. + state1 (str): + Name of the first particle state in the pair. + state2 (str): + Name of the second particle state in the pair. + sigma (PintQuantity): + Lennard–Jones σ parameter (distance scale) after applying + the combining rule. + epsilon (PintQuantity): + Lennard–Jones ε parameter (energy scale) after combining. + cutoff (PintQuantity): + Cutoff radius for the interaction. + offset (PintQuantity): + Offset applied to the potential (ESPResSo parameter). + shift (str | PintQuantity): + Shift applied at the cutoff. May be ``"auto"`` or a PintQuantity value. + name (str): + Auto-generated unique identifier for the interaction, built from + ``state1`` and ``state2`` in alphabetical order. Cannot be set + manually by the user. + + Notes: + - The order of ``state1`` and ``state2`` does **not** matter. + The name is always generated as ``"min(state1, state2)-max(state1, state2)"``. + - Users should store raw LJ parameters (σ, ε) in particle templates. + This object stores the *final combined* interaction values. + + Examples: + Creating an LJ interaction: + + >>> LJInteractionTemplate( + ... state1="HA", + ... state2="A-", + ... sigma=sigma, + ... epsilon=epsilon, + ... cutoff=cutoff, + ... offset=offset, + ... shift="auto", + ... ) + + Interaction between ``"L"`` and ``"W"`` results in: + + >>> LJInteractionTemplate( + ... state1="W", + ... state2="L", + ... ... + ... ).name + 'L-W' + """ + + pmb_type: str = "lj" + name: str = Field(default="", description="Automatically generated name") + + state1: str + state2: str + + sigma: PintQuantity + epsilon: PintQuantity + cutoff: PintQuantity + offset: PintQuantity + shift: str | PintQuantity + + + @classmethod + def _make_name(cls, state1: str, state2: str) -> str: + """Create a canonical name from two states.""" + s1, s2 = sorted([state1, state2]) + return f"{s1}-{s2}" + + @model_validator(mode="after") + def _auto_generate_name(self): + """Enforce standardized automatic name.""" + object.__setattr__(self, "name", self._make_name(self.state1, self.state2)) + return self diff --git a/pyMBE/storage/templates/particle.py b/pyMBE/storage/templates/particle.py index 6e5b281..bc308b1 100644 --- a/pyMBE/storage/templates/particle.py +++ b/pyMBE/storage/templates/particle.py @@ -54,6 +54,7 @@ class ParticleTemplate(PMBBaseModel): """ pmb_type: str = Field(default="particle", frozen=True) + name : str sigma: PintQuantity cutoff: PintQuantity offset: PintQuantity diff --git a/pyMBE/storage/templates/peptide.py b/pyMBE/storage/templates/peptide.py index 0b4ae3a..3d1ae61 100644 --- a/pyMBE/storage/templates/peptide.py +++ b/pyMBE/storage/templates/peptide.py @@ -35,4 +35,4 @@ class PeptideTemplate(PMBBaseModel): name: str model: str residue_list: list[str] - sequence: list[str] \ No newline at end of file + sequence: str \ No newline at end of file diff --git a/pyMBE/storage/templates/protein.py b/pyMBE/storage/templates/protein.py index 6b8e7cb..2f20d71 100644 --- a/pyMBE/storage/templates/protein.py +++ b/pyMBE/storage/templates/protein.py @@ -16,4 +16,4 @@ class ProteinTemplate(PMBBaseModel): name: str model: str residue_list: list[str] - sequence: list[str] \ No newline at end of file + sequence: str \ No newline at end of file diff --git a/test.py b/test.py index 288651e..ee4e25d 100644 --- a/test.py +++ b/test.py @@ -1,4 +1,5 @@ -# test.py + +import pyMBE from pyMBE.storage.manager import Manager from pyMBE.storage.templates.particle import ParticleTemplate, ParticleState from pyMBE.storage.instances.particle import ParticleInstance @@ -16,6 +17,10 @@ from pyMBE.storage.instances.protein import ProteinInstance from pyMBE.storage.templates.hydrogel import HydrogelTemplate, HydrogelNode, HydrogelChain from pyMBE.storage.instances.hydrogel import HydrogelInstance +from pyMBE.storage.templates.lj import LJInteractionTemplate + +from pyMBE.lib.lattice import DiamondLattice +import importlib.resources import pyMBE.storage.io as io @@ -38,82 +43,124 @@ def main(): # 1. CREATE PARTICLE TEMPLATES + STATES # ============================================================ - # A particle (acid) - tpl_A = ParticleTemplate(name="A", - sigma=PintQuantity.from_quantity(q=3.5 * units.reduced_length, expected_dimension="length", ureg=units), - cutoff=PintQuantity.from_quantity(q=4 * units.reduced_length, expected_dimension="length", ureg=units), - offset=PintQuantity.from_quantity(q=0 * units.reduced_length, expected_dimension="length", ureg=units), - epsilon=PintQuantity.from_quantity(q=0.2 * units.reduced_energy, expected_dimension="energy", ureg=units)) - - tpl_A.add_state(ParticleState(name="HA", z=0, es_type=0)) - tpl_A.add_state(ParticleState(name="A-", z=-1, es_type=1)) - - # H+ particle (single-state) - tpl_H = ParticleTemplate(name="H", sigma=PintQuantity.from_quantity(q=3.5 * units.reduced_length, expected_dimension="length", ureg=units), - cutoff=PintQuantity.from_quantity(q=4 * units.reduced_length, expected_dimension="length", ureg=units), - offset=PintQuantity.from_quantity(q=0 * units.reduced_length, expected_dimension="length", ureg=units), - epsilon=PintQuantity(magnitude=0.2, units="J", dimension="energy")) - tpl_H.add_state(ParticleState(name="H+", z=+1, es_type=2)) - - # Register templates - db._register_template(tpl_A) - db._register_template(tpl_H) - print("\n=== Particle Templates DataFrame ===") - print(db._get_templates_df(pmb_type="particle")) + pmb = pyMBE.pymbe_library(seed=42) + units = pmb.units + pmb.define_particle(name="Z", + sigma=3.5 * units.reduced_length, + cutoff=4 * units.reduced_length, + offset=0 * units.reduced_length, + epsilon=0.2 * units.reduced_energy, + acidity="acidic", + pka=4.25) + + pmb.define_particle(name="X", + sigma=3.5 * units.reduced_length, + cutoff=4 * units.reduced_length, + offset=0 * units.reduced_length, + epsilon=0.2 * units.reduced_energy) + + pmb.define_residue(name="R1", central_bead="Z", side_chains=["X","Z"]) + pmb.define_residue(name="R2", central_bead="ZA", side_chains=["X","HZ"]) + + print("\n=== Residue Templates DataFrame ===") + print(pmb.db._get_templates_df(pmb_type="residue")) + pmb.define_molecule(name="M1", residue_list=["R1","R2"]) + print("\n=== Molecule Templates DataFrame ===") + print(pmb.db._get_templates_df(pmb_type="molecule")) - tpl_R1 = ResidueTemplate(name="R1", central_bead="A", side_chains=["H","A"]) - tpl_R2 = ResidueTemplate(name="R2", central_bead="HA", side_chains=["H","HA"]) - db._register_template(tpl_R1) - db._register_template(tpl_R2) - print("\n=== Residue Templates DataFrame ===") - print(db._get_templates_df(pmb_type="residue")) + print("\n=== Hydrogel Templates DataFrame ===") + diamond_lattice = DiamondLattice(30, 3.5 * units.reduced_length) + lattice_builder = pmb.initialize_lattice_builder(diamond_lattice) + + # Setting up node topology + indices = diamond_lattice.indices + node_topology = [] + + for index in range(len(indices)): + node_topology.append({"particle_name": "A", + "lattice_index": indices[index]}) + # Setting up chain topology + node_labels = lattice_builder.node_labels + chain_labels = lattice_builder.chain_labels + reverse_node_labels = {v: k for k, v in node_labels.items()} + chain_topology = [] + + for chain_data in chain_labels.items(): + node_label_pair = chain_data[0] + node_label_s, node_label_e = [int(x) for x in node_label_pair.strip("()").split(",")] + chain_topology.append({'node_start':reverse_node_labels[node_label_s], + 'node_end': reverse_node_labels[node_label_e], + 'molecule_name':"M1"}) + + pmb.define_hydrogel("my_hydrogel", node_topology, chain_topology) + print(pmb.db._get_templates_df(pmb_type="hydrogel")) - tpl_M1 = MoleculeTemplate(name="M1", residue_list=["R1","R2"]) - db._register_template(tpl_M1) print("\n=== Molecule Templates DataFrame ===") - print(db._get_templates_df(pmb_type="molecule")) + print(pmb.db._get_templates_df(pmb_type="molecule")) + + tpl = LJInteractionTemplate(state1 = "A", + state2 = "AH", + sigma = PintQuantity.from_quantity(q=1.0 * units.reduced_length, expected_dimension="length", ureg=units), + cutoff = PintQuantity.from_quantity(q=1.2 * units.reduced_length, expected_dimension="length", ureg=units), + offset = PintQuantity.from_quantity(q=0 * units.reduced_length, expected_dimension="length", ureg=units), + epsilon = PintQuantity.from_quantity(q=1.0 * units.reduced_energy, expected_dimension="energy", ureg=units), + shift = "auto" + ) + db._register_template(tpl) + print(db._get_templates_df(pmb_type="lj")) + + print("\n=== Particle Templates DataFrame ===") + print(pmb.db._get_templates_df(pmb_type="particle")) + print(pmb.db._get_reactions_df()) + + # Update reaction + """ + pmb.db._update_reaction_participant(reaction_name="AH <-> A", + particle_name="H", + state_name="H", + coefficient=1) + print(pmb.db._get_reactions_df()) + """ - parameters = {"k": PintQuantity.from_quantity(q=100.0 * units.reduced_energy / (units.reduced_length**2), expected_dimension="energy/length**2", ureg=units), - "r0": PintQuantity.from_quantity(q=1.0 * units.reduced_length, expected_dimension="length", ureg=units),} + + + parameters = {"k": 100.0 * units.reduced_energy / (units.reduced_length**2), + "r_0": 1.0 * units.reduced_length} + + + pmb.define_bond(bond_type="harmonic", + bond_parameters=parameters, + particle_pairs=[["A","A"], + ["K","A"]]) + + pmb.define_default_bond(bond_type="harmonic", + bond_parameters=parameters) - tpl_bond = BondTemplate(name="A1-A2", - bond_type="harmonic", - parameters=parameters, - l0=PintQuantity.from_quantity(q=1.0 * units.reduced_length, - expected_dimension="length", - ureg=units)) - db._register_template(tpl_bond) print("\n=== Bond Templates DataFrame ===") - print(db._get_templates_df(pmb_type="bond")) + print(pmb.db._get_templates_df(pmb_type="bond")) print("\n=== Peptide Templates DataFrame ===") - tpl_P1 = PeptideTemplate(name="Peptide1", - model="Model1", - residue_list=["R1","R2"], - sequence=["R1","R2"]) - db._register_template(tpl_P1) - print(db._get_templates_df(pmb_type="peptide")) + pmb.define_peptide(name="Peptide1", + model="1beadAA", + sequence="KKKKDDDD") + + print(pmb.db._get_templates_df(pmb_type="peptide")) print("\n=== Protein Templates DataFrame ===") - tpl_PR1 = ProteinTemplate(name="Protein1", - model="ModelP1", - residue_list=["R1","R2"], - sequence=["R1","R2"]) - db._register_template(tpl_PR1) + path = importlib.resources.files(pyMBE) / "parameters" / "globular_proteins" / f"1beb.vtf", + + topology_dict = pmb.read_protein_vtf_in_df (filename=path[0]) + + pmb.define_protein(name="blabla", + model="2beadAA", + sequence="KKKKKK") + print(db._get_templates_df(pmb_type="protein")) - print("\n=== Hydrogel Templates DataFrame ===") - node1 = HydrogelNode(particle_name="A", lattice_index=[0,0,0]) - node2 = HydrogelNode(particle_name="HA", lattice_index=[1,0,0]) - chain1 = HydrogelChain(node_start="A", node_end="HA", residue_list=["R1","R2"]) - tpl_HG1 = HydrogelTemplate(name="Hydrogel1", - node_map=[node1, node2], - chain_map=[chain1]) - db._register_template(tpl_HG1) - print(db._get_templates_df(pmb_type="hydrogel")) + # ============================================================ # 2. CREATE INSTANCES (optional for testing) @@ -123,15 +170,15 @@ def main(): inst2 = ParticleInstance(name="A", particle_id=2, initial_state="A-",residue_id=0) inst3 = ParticleInstance(name="H", particle_id=3, initial_state="H+") - db._register_instance(inst1) - db._register_instance(inst2) - db._register_instance(inst3) +# db._register_instance(inst1) +# db._register_instance(inst2) +# db._register_instance(inst3) print("\n=== Particle Instances DataFrame ===") print(db._get_instances_df(pmb_type="particle")) - db._update_instance(pmb_type="particle", instance_id=1, attribute="residue_id", value=int(0)) +# db._update_instance(pmb_type="particle", instance_id=1, attribute="residue_id", value=int(0)) print("\n=== Particle Instances DataFrame (after update) ===") print(db._get_instances_df(pmb_type="particle")) @@ -143,44 +190,44 @@ def main(): residue_id=3, molecule_id=0) - db._register_instance(inst1) - db._register_instance(inst2) - db._register_instance(inst3) +# db._register_instance(inst1) +# db._register_instance(inst2) +# db._register_instance(inst3) print("\n=== Residue Instances DataFrame ===") print(db._get_instances_df(pmb_type="residue")) - db._update_instance(pmb_type="residue",instance_id=1, attribute="molecule_id", value=int(0)) +# db._update_instance(pmb_type="residue",instance_id=1, attribute="molecule_id", value=int(0)) print("\n=== Residue Instances DataFrame (after update)===") print(db._get_instances_df(pmb_type="residue")) inst1 = MoleculeInstance(name="M1", molecule_id=1) inst2 = MoleculeInstance(name="M1", molecule_id=2) - db._register_instance(inst1) - db._register_instance(inst2) +# db._register_instance(inst1) +# db._register_instance(inst2) print("\n=== Molecule Instances DataFrame ===") print(db._get_instances_df(pmb_type="molecule")) inst_bond = BondInstance(name="A1-A2", bond_id=1, particle_id1=1, particle_id2=2) - db._register_instance(inst_bond) + # db._register_instance(inst_bond) print("\n=== Bond Instances DataFrame ===") print(db._get_instances_df(pmb_type="bond")) print("\n=== Peptide Instances DataFrame ===") - inst_peptide1 = PeptideInstance(name="Peptide1", molecule_id=3) - db._register_instance(inst_peptide1) + # inst_peptide1 = PeptideInstance(name="Peptide1", molecule_id=3) + # db._register_instance(inst_peptide1) print(db._get_instances_df(pmb_type="peptide")) print("\n=== Protein Instances DataFrame ===") - inst_protein1 = ProteinInstance(name="Protein1", molecule_id=4) - db._register_instance(inst_protein1) + # inst_protein1 = ProteinInstance(name="Protein1", molecule_id=4) + # db._register_instance(inst_protein1) print(db._get_instances_df(pmb_type="protein")) print("\n=== Hydrogel Instances DataFrame ===") - inst_hydrogel1 = HydrogelInstance(name="Hydrogel1", hydrogel_id=1, molecule_ids=["1","2","3"]) - db._register_instance(inst_hydrogel1) + # inst_hydrogel1 = HydrogelInstance(name="Hydrogel1", hydrogel_id=1, molecule_ids=["1","2","3"]) + # db._register_instance(inst_hydrogel1) print(db._get_instances_df(pmb_type="hydrogel")) @@ -205,7 +252,6 @@ def main(): # 4. PRINT DATAFRAMES # ============================================================ - print("\n=== Instances DataFrame ===") print(db._get_instances_df(pmb_type="particle")) @@ -230,14 +276,14 @@ def main(): # For this demo we will copy the stored templates (in real use you would re-load from serialized storage) db2 = Manager(units=ureg2) # re-insert templates by transferring stored representation (simulate loading) - for ptype, tdict in db.templates.items(): + for ptype, tdict in db._templates.items(): for tname, t in tdict.items(): db2._register_template(t) print("\nTemplates shown with registry 2 (different reduced units):") print(db2._get_templates_df("particle")) - io._save_database_csv(db, folder="test_db_csv") + io._save_database_csv(pmb.db, folder="test_db_csv") db3 = Manager(units=ureg2) @@ -250,6 +296,7 @@ def main(): print(db3._get_templates_df("peptide")) print(db3._get_templates_df("protein")) print(db3._get_templates_df("hydrogel")) + print(db3._get_templates_df("lj")) print("\nLoaded DB3 Instances DataFrame:") print(db3._get_instances_df("particle")) print(db3._get_instances_df("residue")) From 5f735edd2bc0d891b49429837afb80cc7ccbdf5e Mon Sep 17 00:00:00 2001 From: Pablo Date: Fri, 5 Dec 2025 18:12:40 +0100 Subject: [PATCH 07/55] updated residue and bond management in pyMBE --- pyMBE/lib/handy_functions.py | 103 ++--- pyMBE/pyMBE.py | 582 +++++++++++++--------------- pyMBE/storage/instances/bond.py | 5 +- pyMBE/storage/io.py | 2 + pyMBE/storage/manager.py | 217 ++++++++++- pyMBE/storage/templates/bond.py | 55 ++- pyMBE/storage/templates/particle.py | 33 +- requirements.txt | 1 + test.py | 58 ++- 9 files changed, 632 insertions(+), 424 deletions(-) diff --git a/pyMBE/lib/handy_functions.py b/pyMBE/lib/handy_functions.py index c633a07..a593019 100644 --- a/pyMBE/lib/handy_functions.py +++ b/pyMBE/lib/handy_functions.py @@ -19,6 +19,7 @@ import logging import re import numpy as np +import scipy def check_aminoacid_key(key): """ @@ -180,80 +181,38 @@ def get_metal_ions_charge_number_map(): metal_charge_number_map = {"Ca": 2} return metal_charge_number_map -def get_lj_parameters(particle_name1, particle_name2, pmb, combining_rule='Lorentz-Berthelot'): +def calculate_initial_bond_length(bond_parameters, bond_type, lj_parameters): """ - Returns the Lennard-Jones parameters for the interaction between the particle types given by - `particle_name1` and `particle_name2` in `pymbe.df`, calculated according to the provided combining rule. - + Calculates the initial bond length that is used when setting up molecules, + based on the minimum of the sum of bonded and short-range (LJ) interactions. + Args: - particle_name1 (str): label of the type of the first particle type - particle_name2 (str): label of the type of the second particle type - combining_rule (`string`, optional): combining rule used to calculate `sigma` and `epsilon` for the potential betwen a pair of particles. Defaults to 'Lorentz-Berthelot'. - - Returns: - {"epsilon": epsilon_value, "sigma": sigma_value, "offset": offset_value, "cutoff": cutoff_value} - - Note: - - Currently, the only `combining_rule` supported is Lorentz-Berthelot. - - If the sigma value of `particle_name1` or `particle_name2` is 0, the function will return an empty dictionary. No LJ interactions are set up for particles with sigma = 0. - """ - supported_combining_rules=["Lorentz-Berthelot"] - lj_parameters_keys=["sigma","epsilon","offset","cutoff"] - if combining_rule not in supported_combining_rules: - raise ValueError(f"Combining_rule {combining_rule} currently not implemented in pyMBE, valid keys are {supported_combining_rules}") - lj_parameters={} - for key in lj_parameters_keys: - lj_parameters[key]=[] - # Search the LJ parameters of the type pair - for name in [particle_name1,particle_name2]: - for key in lj_parameters_keys: - lj_parameters[key].append(getattr(pmb.db.get_template(pmb_type="particle", name=name), key)) - # If one of the particle has sigma=0, no LJ interations are set up between that particle type and the others - if not all(sigma_value.magnitude for sigma_value in lj_parameters["sigma"]): - return {} - # Apply combining rule - if combining_rule == 'Lorentz-Berthelot': - lj_parameters["sigma"]=(lj_parameters["sigma"][0]+lj_parameters["sigma"][1])/2 - lj_parameters["cutoff"]=(lj_parameters["cutoff"][0]+lj_parameters["cutoff"][1])/2 - lj_parameters["offset"]=(lj_parameters["offset"][0]+lj_parameters["offset"][1])/2 - lj_parameters["epsilon"]=np.sqrt(lj_parameters["epsilon"][0]*lj_parameters["epsilon"][1]) - return lj_parameters - - -def calculate_initial_bond_length(bond_object, bond_type, epsilon, sigma, cutoff, offset): - """ - Calculates the initial bond length that is used when setting up molecules, - based on the minimum of the sum of bonded and short-range (LJ) interactions. - - Args: - bond_object(`espressomd.interactions.BondedInteractions`): instance of a bond object from espressomd library - bond_type(`str`): label identifying the used bonded potential - epsilon(`pint.Quantity`): LJ epsilon of the interaction between the particles - sigma(`pint.Quantity`): LJ sigma of the interaction between the particles - cutoff(`pint.Quantity`): cutoff-radius of the LJ interaction - offset(`pint.Quantity`): offset of the LJ interaction - """ - def truncated_lj_potential(x, epsilon, sigma, cutoff,offset): - if x>cutoff: - return 0.0 - else: - return 4*epsilon*((sigma/(x-offset))**12-(sigma/(x-offset))**6) - 4*epsilon*((sigma/cutoff)**12-(sigma/cutoff)**6) - - epsilon_red=epsilon.to('reduced_energy').magnitude - sigma_red=sigma.to('reduced_length').magnitude - cutoff_red=cutoff.to('reduced_length').magnitude - offset_red=offset.to('reduced_length').magnitude - - if bond_type == "harmonic": - r_0 = bond_object.params.get('r_0') - k = bond_object.params.get('k') - l0 = scipy.optimize.minimize(lambda x: 0.5*k*(x-r_0)**2 + truncated_lj_potential(x, epsilon_red, sigma_red, cutoff_red, offset_red), x0=r_0).x - elif bond_type == "FENE": - r_0 = bond_object.params.get('r_0') - k = bond_object.params.get('k') - d_r_max = bond_object.params.get('d_r_max') - l0 = scipy.optimize.minimize(lambda x: -0.5*k*(d_r_max**2)*np.log(1-((x-r_0)/d_r_max)**2) + truncated_lj_potential(x, epsilon_red, sigma_red, cutoff_red,offset_red), x0=1.0).x - return l0 + bond_object(`espressomd.interactions.BondedInteractions`): instance of a bond object from espressomd library + bond_type(`str`): label identifying the used bonded potential + epsilon(`pint.Quantity`): LJ epsilon of the interaction between the particles + sigma(`pint.Quantity`): LJ sigma of the interaction between the particles + cutoff(`pint.Quantity`): cutoff-radius of the LJ interaction + offset(`pint.Quantity`): offset of the LJ interaction + """ + def truncated_lj_potential(x, epsilon, sigma, cutoff,offset): + if x>cutoff: + return 0.0 + else: + return 4*epsilon*((sigma/(x-offset))**12-(sigma/(x-offset))**6) - 4*epsilon*((sigma/cutoff)**12-(sigma/cutoff)**6) + epsilon=lj_parameters["epsilon"].m_as("reduced_energy") + sigma=lj_parameters["sigma"].m_as("reduced_length") + cutoff=lj_parameters["cutoff"].m_as("reduced_length") + offset=lj_parameters["offset"].m_as("reduced_length") + if bond_type == "harmonic": + r_0 = bond_parameters['r_0'].m_as("reduced_length") + k = bond_parameters['k'].m_as("reduced_energy/reduced_length**2") + l0 = scipy.optimize.minimize(lambda x: 0.5*k*(x-r_0)**2 + truncated_lj_potential(x, epsilon, sigma, cutoff, offset), x0=r_0).x + elif bond_type == "FENE": + r_0 = bond_parameters['r_0'].m_as("reduced_length") + k = bond_parameters['k'].m_as("reduced_energy/reduced_length**2") + d_r_max = bond_parameters['d_r_max'].m_as("reduced_length") + l0 = scipy.optimize.minimize(lambda x: -0.5*k*(d_r_max**2)*np.log(1-((x-r_0)/d_r_max)**2) + truncated_lj_potential(x, epsilon, sigma, cutoff,offset), x0=1.0).x + return l0 diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 3fee30f..fe133e5 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -25,6 +25,8 @@ import scipy.optimize import logging import importlib.resources + +# Templates from pyMBE.storage.manager import Manager from pyMBE.storage.templates.particle import ParticleTemplate, ParticleState from pyMBE.storage.instances.particle import ParticleInstance @@ -43,6 +45,8 @@ from pyMBE.storage.templates.hydrogel import HydrogelTemplate, HydrogelNode, HydrogelChain from pyMBE.storage.instances.hydrogel import HydrogelInstance +# Utilities +import pyMBE.lib.handy_functions as hf import pyMBE.storage.io as io class pymbe_library(): @@ -100,7 +104,8 @@ def __init__(self, seed, temperature=None, unit_length=None, unit_charge=None, K self.db = Manager(units=self.units) self.lattice_builder = None - self.root = importlib.resources.files(__package__) + self.root = importlib.resources.files(__package__) + self._bond_instances={} def _check_supported_molecule(self, molecule_name,valid_pmb_types): """ @@ -152,24 +157,6 @@ def _get_residue_list_from_sequence(self, sequence): residue_name='AA-'+item residue_list.append(residue_name) return residue_list - - - def add_bonds_to_espresso(self, espresso_system) : - """ - Adds all bonds defined in `pmb.df` to `espresso_system`. - - Args: - espresso_system(`espressomd.system.System`): system object of espressomd library - """ - - if 'bond' in self.df["pmb_type"].values: - bond_df = self.df.loc[self.df ['pmb_type'] == 'bond'] - bond_list = bond_df.bond_object.values.tolist() - for bond in bond_list: - espresso_system.bonded_inter.add(bond) - else: - logging.warning('there are no bonds defined in pymbe.df') - return def calculate_center_of_mass_of_molecule(self, molecule_id, espresso_system): """ @@ -423,8 +410,6 @@ def check_dimensionality(self, variable, expected_dimensionality): raise ValueError(f"The variable {variable} should have a dimensionality of {expected_dimensionality}, instead the variable has a dimensionality of {variable.dimensionality}") return correct_dimensionality - - def check_pka_set(self, pka_set): """ Checks that `pka_set` has the formatting expected by the pyMBE library. @@ -486,9 +471,43 @@ def create_added_salt(self, espresso_system, cation_name, anion_name, c_salt): logging.info(f"added salt concentration of {c_salt_calculated.to('reduced_length**-3')} given by {N_cation} cations and {N_anion} anions") return c_salt_calculated - def create_bond_in_espresso(self, bond_type, bond_parameters): - ''' - Creates either a harmonic or a FENE bond in ESPResSo + def create_bond(self, particle_id1, particle_id2, espresso_system, use_default_bond=False): + """ + Creates a bond between two particle instances in an ESPResSo system and registers it in the pyMBE database. + + This method performs the following steps: + 1. Retrieves the particle instances corresponding to `particle_id1` and `particle_id2` from the database. + 2. Retrieves or creates the corresponding ESPResSo bond instance using the bond template. + 3. Adds the ESPResSo bond instance to the ESPResSo system if it was newly created. + 4. Adds the bond to the first particle's bond list in ESPResSo. + 5. Creates a `BondInstance` in the database and registers it. + + Args: + particle_id1 (int): pyMBE and ESPResSo ID of the first particle. + particle_id2 (int): pyMBE and ESPResSo ID of the second particle. + espresso_system (espressomd.system.System): ESPResSo system object where the bond will be created. + use_default_bond (bool, optional): If True, use a default bond template if no specific template exists. Defaults to False. + """ + particle_inst_1 = self.db.get_instance(pmb_type="particle", + instance_id=particle_id1) + particle_inst_2 = self.db.get_instance(pmb_type="particle", + instance_id=particle_id2) + + bond_inst = self.get_espresso_bond_instance(particle_name1=particle_inst_1.name, + particle_name2=particle_inst_2.name, + espresso_system=espresso_system, + use_default_bond=use_default_bond) + espresso_system.part.by_id(particle_id1).add_bond((bond_inst, particle_id2)) + pmb_bond_instance = BondInstance(bond_id=self.db._propose_instance_id(pmb_type="bond"), + name=BondTemplate.make_bond_key(pn1=particle_inst_1.name, + pn2=particle_inst_2.name), + particle_id1=particle_id1, + particle_id2=particle_id2) + self.db._register_instance(instance=pmb_bond_instance) + + def create_espresso_bond_instance(self, bond_type, bond_parameters): + """ + Creates an ESPResSo bond instance. Args: bond_type(`str`): label to identify the potential to model the bond. @@ -498,52 +517,35 @@ def create_bond_in_espresso(self, bond_type, bond_parameters): Currently, only HARMONIC and FENE bonds are supported. For a HARMONIC bond the dictionary must contain: - - - k (`obj`) : Magnitude of the bond. It should have units of energy/length**2 + - k (`Pint.Quantity`) : Magnitude of the bond. It should have units of energy/length**2 using the `pmb.units` UnitRegistry. - - r_0 (`obj`) : Equilibrium bond length. It should have units of length using + - r_0 (`Pint.Quantity`) : Equilibrium bond length. It should have units of length using the `pmb.units` UnitRegistry. For a FENE bond the dictionary must additionally contain: - - - d_r_max (`obj`): Maximal stretching length for FENE. It should have + - d_r_max (`Pint.Quantity`): Maximal stretching length for FENE. It should have units of length using the `pmb.units` UnitRegistry. Default 'None'. Returns: - bond_object (`obj`): an ESPResSo bond object - ''' + (`espressomd.interactions`): instance of an ESPResSo bond object + """ from espressomd import interactions - - valid_bond_types = ["harmonic", "FENE"] - - if 'k' in bond_parameters: - bond_magnitude = bond_parameters['k'].to('reduced_energy / reduced_length**2') - else: - raise ValueError("Magnitude of the potential (k) is missing") - + valid_bond_types = ["harmonic", "FENE"] + if bond_type not in valid_bond_types: + raise NotImplementedError(f"Bond type '{bond_type}' currently not implemented in pyMBE, accepted types are {valid_bond_types}") + required_parameters = {"harmonic": ["r_0","k"], + "FENE": ["r_0","k","d_r_max"]} + for required_parameter in required_parameters[bond_type]: + if required_parameter not in bond_parameters.keys(): + raise ValueError(f"Missing required parameter {required_parameter} for {bond_type} bond") if bond_type == 'harmonic': - if 'r_0' in bond_parameters: - bond_length = bond_parameters['r_0'].to('reduced_length') - else: - raise ValueError("Equilibrium bond length (r_0) is missing") - bond_object = interactions.HarmonicBond(k = bond_magnitude.magnitude, - r_0 = bond_length.magnitude) + bond_instance = interactions.HarmonicBond(k = bond_parameters["k"].m_as("reduced_energy/reduced_length**2"), + r_0 = bond_parameters["r_0"].m_as("reduced_length")) elif bond_type == 'FENE': - if 'r_0' in bond_parameters: - bond_length = bond_parameters['r_0'].to('reduced_length').magnitude - else: - logging.warning("no value provided for r_0. Defaulting to r_0 = 0") - bond_length=0 - if 'd_r_max' in bond_parameters: - max_bond_stret = bond_parameters['d_r_max'].to('reduced_length') - else: - raise ValueError("Maximal stretching length (d_r_max) is missing") - bond_object = interactions.FeneBond(r_0 = bond_length, - k = bond_magnitude.magnitude, - d_r_max = max_bond_stret.magnitude) - else: - raise NotImplementedError(f"Bond type '{bond_type}' currently not implemented in pyMBE, accepted types are {valid_bond_types}") - return bond_object + bond_instance = interactions.FeneBond(k = bond_parameters["k"].m_as("reduced_energy/reduced_length**2"), + r_0 = bond_parameters["r_0"].m_as("reduced_length"), + d_r_max = bond_parameters["d_r_max"].m_as("reduced_length")) + return bond_instance def create_counterions(self, object_name, cation_name, anion_name, espresso_system): @@ -921,46 +923,35 @@ def create_particle(self, name, espresso_system, number_of_particles, position=N """ if number_of_particles <=0: return [] - if not _DFm._check_if_name_is_defined_in_df(name=name, df=self.df): - logging.warning(f"Particle with name '{name}' is not defined in the pyMBE DataFrame, no particle will be created.") + if not self.db._has_template(name=name, pmb_type="particle"): + logging.warning(f"Particle template with name '{name}' is not defined in the pyMBE database, no particle will be created.") return [] - self._check_if_name_has_right_type(name=name, - expected_pmb_type="particle") - # Copy the data of the particle `number_of_particles` times in the `df` - self.df = _DFm._copy_df_entry(df = self.df, - name = name, - column_name = 'particle_id', - number_of_copies = number_of_particles) - # Get information from the particle type `name` from the df - z = self.df.loc[self.df['name'] == name].state_one.z.values[0] - z = 0. if z is None else z - es_type = self.df.loc[self.df['name'] == name].state_one.es_type.values[0] - # Get a list of the index in `df` corresponding to the new particles to be created - index = np.where(self.df['name'] == name) - index_list = list(index[0])[-number_of_particles:] - # Create the new particles into `espresso_system` + + part_tpl = self.db.get_template(pmb_type="particle", + name=name) + initial_state = part_tpl.states[part_tpl.initial_state] + z = initial_state.z + es_type = initial_state.es_type + + # Create the new particles into ESPResSo created_pid_list=[] for index in range(number_of_particles): - df_index = int(index_list[index]) - _DFm._clean_df_row(df = self.df, - index = df_index) if position is None: particle_position = self.rng.random((1, 3))[0] *np.copy(espresso_system.box_l) else: particle_position = position[index] - if len(espresso_system.part.all()) == 0: - bead_id = 0 - else: - bead_id = max (espresso_system.part.all().id) + 1 - created_pid_list.append(bead_id) - kwargs = dict(id=bead_id, pos=particle_position, type=es_type, q=z) + + particle_id = self.db._propose_instance_id(pmb_type="particle") + created_pid_list.append(particle_id) + kwargs = dict(id=particle_id, pos=particle_position, type=es_type, q=z) if fix: kwargs["fix"] = 3 * [fix] espresso_system.part.add(**kwargs) - _DFm._add_value_to_df(df = self.df, - key = ('particle_id',''), - index = df_index, - new_value = bead_id) + part_inst = ParticleInstance(name=name, + particle_id=particle_id, + initial_state=initial_state.name) + self.db._register_instance(part_inst) + return created_pid_list def create_protein(self, name, number_of_proteins, espresso_system, topology_dict): @@ -1030,165 +1021,114 @@ def create_residue(self, name, espresso_system, central_bead_position=None,use_d backbone_vector(`list` of `float`): Backbone vector of the molecule. All side chains are created perpendicularly to `backbone_vector`. Returns: - residues_info(`dict`): {residue_id:{"central_bead_id":central_bead_id, "side_chain_ids":[particle_id1, ...]}} + (int) : residue_id of the residue created. """ - if not _DFm._check_if_name_is_defined_in_df(name=name, df=self.df): - logging.warning(f"Residue with name '{name}' is not defined in the pyMBE DataFrame, no residue will be created.") + if not self.db._has_template(name=name, pmb_type="residue"): + logging.warning(f"Residue template with name '{name}' is not defined in the pyMBE database, no residue will be created.") + return + res_tpl = self.db.get_template(pmb_type="residue", + name=name) + # Assign a residue_id + residue_id = self.db._propose_instance_id(pmb_type="residue") + res_inst = ResidueInstance(name=name, + residue_id=residue_id) + self.db._register_instance(res_inst) + # create the principal bead + central_bead_name = res_tpl.central_bead + central_bead_id = self.create_particle(name=central_bead_name, + espresso_system=espresso_system, + position=central_bead_position, + number_of_particles = 1)[0] + if not central_bead_id: + logging.warning(f"Central bead with particle template with name '{name}' is not defined in the pyMBE database, no residue will be created.") return - self._check_if_name_has_right_type(name=name, - expected_pmb_type="residue") - # Copy the data of a residue in the `df - self.df = _DFm._copy_df_entry(df = self.df, - name = name, - column_name = 'residue_id', - number_of_copies = 1) - residues_index = np.where(self.df['name']==name) - residue_index_list =list(residues_index[0])[-1:] - # search for defined particle and residue names - particle_and_residue_df = self.df.loc[(self.df['pmb_type']== "particle") | (self.df['pmb_type']== "residue")] - particle_and_residue_names = particle_and_residue_df["name"].tolist() - for residue_index in residue_index_list: - side_chain_list = self.df.loc[self.df.index[residue_index]].side_chains.values[0] - for side_chain_element in side_chain_list: - if side_chain_element not in particle_and_residue_names: - raise ValueError (f"{side_chain_element} is not defined") - # Internal bookkepping of the residue info (important for side-chain residues) - # Dict structure {residue_id:{"central_bead_id":central_bead_id, "side_chain_ids":[particle_id1, ...]}} - residues_info={} - for residue_index in residue_index_list: - _DFm._clean_df_row(df = self.df, - index = int(residue_index)) - # Assign a residue_id - if self.df['residue_id'].isnull().all(): - residue_id=0 - else: - residue_id = self.df['residue_id'].max() + 1 - _DFm._add_value_to_df(df = self.df, - key = ('residue_id',''), - index = int(residue_index), - new_value = residue_id) - # create the principal bead - central_bead_name = self.df.loc[self.df['name']==name].central_bead.values[0] - central_bead_id = self.create_particle(name=central_bead_name, - espresso_system=espresso_system, - position=central_bead_position, - number_of_particles = 1)[0] - central_bead_position=espresso_system.part.by_id(central_bead_id).pos - #assigns same residue_id to the central_bead particle created. - index = self.df[self.df['particle_id']==central_bead_id].index.values[0] - self.df.at [index,'residue_id'] = residue_id - # Internal bookkeeping of the central bead id - residues_info[residue_id]={} - residues_info[residue_id]['central_bead_id']=central_bead_id - # create the lateral beads - side_chain_list = self.df.loc[self.df.index[residue_index]].side_chains.values[0] - side_chain_beads_ids = [] - for side_chain_element in side_chain_list: - pmb_type = self.df[self.df['name']==side_chain_element].pmb_type.values[0] - if pmb_type == 'particle': - bond = self.search_bond(particle_name1=central_bead_name, - particle_name2=side_chain_element, - hard_check=True, - use_default_bond=use_default_bond) - l0 = self.get_bond_length(particle_name1=central_bead_name, - particle_name2=side_chain_element, - hard_check=True, - use_default_bond=use_default_bond) - - if backbone_vector is None: - bead_position=self.generate_random_points_in_a_sphere(center=central_bead_position, - radius=l0, - n_samples=1, - on_surface=True)[0] - else: - bead_position=central_bead_position+self.generate_trial_perpendicular_vector(vector=np.array(backbone_vector), + central_bead_position=espresso_system.part.by_id(central_bead_id).pos + # Assigns residue_id to the central_bead particle created. + self.db._update_instance(pmb_type="particle", + instance_id=central_bead_id, + attribute="residue_id", + value=residue_id) + + # create the lateral beads + side_chain_list = res_tpl.side_chains + side_chain_beads_ids = [] + for side_chain_name in side_chain_list: + pmb_type_list = self.db._find_template_types(name=side_chain_name) + if len(pmb_type_list) > 2: + raise KeyError(f"Detected multiple templates with the same name '{side_chain_name}' in the pyMBE database, pmb_types: {pmb_type_list}. Residue creation aborted to avoid ambiguity.") + elif not pmb_type_list: + logging.warning(f"Element in side chain with name '{name}' is not defined in the pyMBE database, nothing will be created.") + continue + pmb_type = pmb_type_list[0] + if pmb_type == 'particle': + lj_parameters = self.get_lj_parameters(particle_name1=central_bead_name, + particle_name2=side_chain_name) + bond_tpl = self.get_bond_template(particle_name1=central_bead_name, + particle_name2=side_chain_name) + l0 = hf.calculate_initial_bond_length(lj_parameters=lj_parameters, + bond_type=bond_tpl.bond_type, + bond_parameters=bond_tpl.get_parameters(ureg=self.units)) + if backbone_vector is None: + bead_position=self.generate_random_points_in_a_sphere(center=central_bead_position, + radius=l0, + n_samples=1, + on_surface=True)[0] + else: + bead_position=central_bead_position+self.generate_trial_perpendicular_vector(vector=np.array(backbone_vector), + magnitude=l0) + + side_bead_id = self.create_particle(name=side_chain_name, + espresso_system=espresso_system, + position=[bead_position], + number_of_particles=1)[0] + side_chain_beads_ids.append(side_bead_id) + self.db._update_instance(pmb_type="particle", + instance_id=side_bead_id, + attribute="residue_id", + value=residue_id) + self.create_bond(particle_id1=central_bead_id, + particle_id2=side_bead_id, + espresso_system=espresso_system, + use_default_bond=use_default_bond) + elif pmb_type == 'residue': + side_residue_tpl = self.db.get_template(name=side_chain_name, + pmb_type=pmb_type) + central_bead_side_chain = side_residue_tpl.central_bead + lj_parameters = self.get_lj_parameters(particle_name1=central_bead_name, + particle_name2=central_bead_side_chain) + bond_tpl = self.get_bond_template(particle_name1=central_bead_name, + particle_name2=central_bead_side_chain) + l0 = hf.calculate_initial_bond_length(lj_parameters=lj_parameters, + bond_type=bond_tpl.bond_type, + bond_parameters=bond_tpl.get_parameters(ureg=self.units)) + if backbone_vector is None: + residue_position=self.generate_random_points_in_a_sphere(center=central_bead_position, + radius=l0, + n_samples=1, + on_surface=True)[0] + else: + residue_position=central_bead_position+self.generate_trial_perpendicular_vector(vector=backbone_vector, magnitude=l0) - - side_bead_id = self.create_particle(name=side_chain_element, - espresso_system=espresso_system, - position=[bead_position], - number_of_particles=1)[0] - index = self.df[self.df['particle_id']==side_bead_id].index.values[0] - _DFm._add_value_to_df(df = self.df, - key = ('residue_id',''), - index = int(index), - new_value = residue_id, - overwrite = True) - side_chain_beads_ids.append(side_bead_id) - espresso_system.part.by_id(central_bead_id).add_bond((bond, side_bead_id)) - self.df, index = _DFm._add_bond_in_df(df = self.df, - particle_id1 = central_bead_id, - particle_id2 = side_bead_id, - use_default_bond = use_default_bond) - _DFm._add_value_to_df(df = self.df, - key = ('residue_id',''), - index = int(index), - new_value = residue_id, - overwrite = True) - - elif pmb_type == 'residue': - central_bead_side_chain = self.df[self.df['name']==side_chain_element].central_bead.values[0] - bond = self.search_bond(particle_name1=central_bead_name, - particle_name2=central_bead_side_chain, - hard_check=True, - use_default_bond=use_default_bond) - l0 = self.get_bond_length(particle_name1=central_bead_name, - particle_name2=central_bead_side_chain, - hard_check=True, - use_default_bond=use_default_bond) - if backbone_vector is None: - residue_position=self.generate_random_points_in_a_sphere(center=central_bead_position, - radius=l0, - n_samples=1, - on_surface=True)[0] - else: - residue_position=central_bead_position+self.generate_trial_perpendicular_vector(vector=backbone_vector, - magnitude=l0) - lateral_residue_info = self.create_residue(name=side_chain_element, - espresso_system=espresso_system, - central_bead_position=[residue_position], - use_default_bond=use_default_bond) - lateral_residue_dict=list(lateral_residue_info.values())[0] - central_bead_side_chain_id=lateral_residue_dict['central_bead_id'] - lateral_beads_side_chain_ids=lateral_residue_dict['side_chain_ids'] - residue_id_side_chain=list(lateral_residue_info.keys())[0] - # Change the residue_id of the residue in the side chain to the one of the bigger residue - index = self.df[(self.df['residue_id']==residue_id_side_chain) & (self.df['pmb_type']=='residue') ].index.values[0] - _DFm._add_value_to_df(df = self.df, - key = ('residue_id',''), - index = int(index), - new_value = residue_id, - overwrite = True) - # Change the residue_id of the particles in the residue in the side chain - side_chain_beads_ids+=[central_bead_side_chain_id]+lateral_beads_side_chain_ids - for particle_id in side_chain_beads_ids: - index = self.df[(self.df['particle_id']==particle_id) & (self.df['pmb_type']=='particle')].index.values[0] - _DFm._add_value_to_df(df = self.df, - key = ('residue_id',''), - index = int (index), - new_value = residue_id, - overwrite = True) - espresso_system.part.by_id(central_bead_id).add_bond((bond, central_bead_side_chain_id)) - self.df, index = _DFm._add_bond_in_df(df = self.df, - particle_id1 = central_bead_id, - particle_id2 = central_bead_side_chain_id, - use_default_bond = use_default_bond) - _DFm._add_value_to_df(df = self.df, - key = ('residue_id',''), - index = int(index), - new_value = residue_id, - overwrite = True) - # Change the residue_id of the bonds in the residues in the side chain to the one of the bigger residue - for index in self.df[(self.df['residue_id']==residue_id_side_chain) & (self.df['pmb_type']=='bond') ].index: - _DFm._add_value_to_df(df = self.df, - key = ('residue_id',''), - index = int(index), - new_value = residue_id, - overwrite = True) - # Internal bookkeeping of the side chain beads ids - residues_info[residue_id]['side_chain_ids']=side_chain_beads_ids - return residues_info + side_residue_id = self.create_residue(name=side_chain_name, + espresso_system=espresso_system, + central_bead_position=[residue_position], + use_default_bond=use_default_bond) + # Find particle ids of the inner residue + side_chain_beads_ids = self.db._find_instance_ids_by_attribute(pmb_type="particle", + attribute="residue_id", + value=side_residue_id) + # Change the residue_id of the residue in the side chain to the one of the outer residue + for particle_id in side_chain_beads_ids: + self.db._update_instance(instance_id=particle_id, + pmb_type="particle", + attribute="residue_id", + value=residue_id) + self.create_bond(particle_id1=central_bead_id, + particle_id2=side_chain_beads_ids[0], + espresso_system=espresso_system, + use_default_bond=use_default_bond) + return residue_id def define_bond(self, bond_type, bond_parameters, particle_pairs): """ @@ -1736,39 +1676,68 @@ def generate_trial_perpendicular_vector(self,vector,magnitude): # Normalize the perpendicular vector to have the same magnitude as the input vector perpendicular_vector /= np.linalg.norm(perpendicular_vector) return perpendicular_vector*magnitude - - def get_bond_length(self, particle_name1, particle_name2, hard_check=False, use_default_bond=False) : + + def get_bond_template(self, particle_name1, particle_name2, use_default_bond=False) : """ - Searches for bonds between the particle types given by `particle_name1` and `particle_name2` in `pymbe.df` and returns the initial bond length. - If `use_default_bond` is activated and a "default" bond is defined, returns the length of that default bond instead. - If no bond is found, it prints a message and it does not return anything. If `hard_check` is activated, the code stops if no bond is found. + Searches for bond template linking particle templates with `particle_name1` and `particle_name2` names in the pyMBE database and returns it. + If `use_default_bond` is activated and a "default" bond is defined, returns the default bond template instead. Args: - particle_name1(str): label of the type of the first particle type of the bonded particles. - particle_name2(str): label of the type of the second particle type of the bonded particles. - hard_check(bool, optional): If it is activated, the code stops if no bond is found. Defaults to False. - use_default_bond(bool, optional): If it is activated, the "default" bond is returned if no bond is found between `particle_name1` and `particle_name2`. Defaults to False. + particle_name1(`str`): label of the type of the first particle type of the bonded particles. + particle_name2(`str`): label of the type of the second particle type of the bonded particles. + use_default_bond(`bool`, optional): If it is activated, the "default" bond is returned if no bond is found between `particle_name1` and `particle_name2`. Defaults to False. Returns: - l0(`pint.Quantity`): bond length + bond(`espressomd.interactions.BondedInteractions`): bond object from the espressomd library. Note: - If `use_default_bond`=True and no bond is defined between `particle_name1` and `particle_name2`, it returns the default bond defined in `pmb.df`. - - If `hard_check`=`True` stops the code when no bond is found. - """ - bond_key = _DFm._find_bond_key(df = self.df, - particle_name1 = particle_name1, - particle_name2 = particle_name2, - use_default_bond = use_default_bond) - if bond_key: - return self.df[self.df['name'] == bond_key].l0.values[0] + """ + if use_default_bond: + bond_key = "default" else: - msg = f"Bond not defined between particles {particle_name1} and {particle_name2}" - if hard_check: - raise ValueError(msg) - else: - logging.warning(msg) - return + bond_key = BondTemplate.make_bond_key(pn1=particle_name1, + pn2=particle_name2) + bond_tpl = self.db.get_template(name=bond_key, + pmb_type="bond") + return bond_tpl + + def get_espresso_bond_instance(self, particle_name1, particle_name2, espresso_system, use_default_bond=False): + """ + Retrieve or create a bond instance in an ESPResSo system for a given pair of particle names. + + This method checks whether a bond instance already exists in the database for the + specified particle pair. If it exists, it retrieves the corresponding ESPResSo bond + instance. Otherwise, it creates a new ESPResSo bond instance using the bond template. + + Args: + particle_name1 (str): Name of the first particle involved in the bond. + particle_name2 (str): Name of the second particle involved in the bond. + espresso_system: An ESPResSo system object where the bond will be added or retrieved. + use_default_bond (bool, optional): If True, use a default bond template when no + specific template exists for the particle pair. Defaults to False. + + Returns: + (espressomd.interactions.BondedInteraction): The ESPResSo bond instance object. + + Raises: + KeyError: If no bond template is found for the particle pair and `use_default_bond` is False. + + Note: + When a new bond instance is created, it is not added to the ESPResSo system. + """ + bond_tpl = self.get_bond_template(particle_name1=particle_name1, + particle_name2=particle_name2, + use_default_bond=use_default_bond) + if bond_tpl.name in self._bond_instances.keys(): + bond_inst = self._bond_instances[bond_tpl.name] + else: + # Create an instance of the bond + bond_inst = self.create_espresso_bond_instance(bond_type=bond_tpl.bond_type, + bond_parameters=bond_tpl.get_parameters(self.units)) + self._bond_instances[bond_tpl.name]= bond_inst + espresso_system.bonded_inter.add(bond_inst) + return bond_inst def get_charge_number_map(self): ''' @@ -1791,7 +1760,43 @@ def get_charge_number_map(self): charge_number_map = pd.concat([state_one,state_two],axis=0).to_dict() return charge_number_map - + def get_lj_parameters(self, particle_name1, particle_name2, combining_rule='Lorentz-Berthelot'): + """ + Returns the Lennard-Jones parameters for the interaction between the particle types given by + `particle_name1` and `particle_name2` in `pymbe.df`, calculated according to the provided combining rule. + + Args: + particle_name1 (str): label of the type of the first particle type + particle_name2 (str): label of the type of the second particle type + combining_rule (`string`, optional): combining rule used to calculate `sigma` and `epsilon` for the potential betwen a pair of particles. Defaults to 'Lorentz-Berthelot'. + + Returns: + {"epsilon": epsilon_value, "sigma": sigma_value, "offset": offset_value, "cutoff": cutoff_value} + + Note: + - Currently, the only `combining_rule` supported is Lorentz-Berthelot. + - If the sigma value of `particle_name1` or `particle_name2` is 0, the function will return an empty dictionary. No LJ interactions are set up for particles with sigma = 0. + """ + supported_combining_rules=["Lorentz-Berthelot"] + if combining_rule not in supported_combining_rules: + raise ValueError(f"Combining_rule {combining_rule} currently not implemented in pyMBE, valid keys are {supported_combining_rules}") + part_tpl1 = self.db.get_template(name=particle_name1, + pmb_type="particle") + part_tpl2 = self.db.get_template(name=particle_name2, + pmb_type="particle") + lj_parameters1 = part_tpl1.get_lj_parameters(ureg=self.units) + lj_parameters2 = part_tpl2.get_lj_parameters(ureg=self.units) + + # If one of the particle has sigma=0, no LJ interations are set up between that particle type and the others + if part_tpl1.sigma.magnitude == 0 or part_tpl2.sigma.magnitude == 0: + return {} + # Apply combining rule + if combining_rule == 'Lorentz-Berthelot': + sigma=(lj_parameters1["sigma"]+lj_parameters2["sigma"])/2 + cutoff=(lj_parameters1["cutoff"]+lj_parameters2["cutoff"])/2 + offset=(lj_parameters1["offset"]+lj_parameters2["offset"])/2 + epsilon=np.sqrt(lj_parameters1["epsilon"]*lj_parameters2["epsilon"]) + return {"sigma": sigma, "cutoff": cutoff, "offset": offset, "epsilon": epsilon} def get_particle_id_map(self, object_name): ''' @@ -2253,42 +2258,11 @@ def read_protein_vtf_in_df (self,filename,unit_length=None): return topology_dict - def search_bond(self, particle_name1, particle_name2, hard_check=False, use_default_bond=False) : - """ - Searches for bonds between the particle types given by `particle_name1` and `particle_name2` in `pymbe.df` and returns it. - If `use_default_bond` is activated and a "default" bond is defined, returns that default bond instead. - If no bond is found, it prints a message and it does not return anything. If `hard_check` is activated, the code stops if no bond is found. - - Args: - particle_name1(`str`): label of the type of the first particle type of the bonded particles. - particle_name2(`str`): label of the type of the second particle type of the bonded particles. - hard_check(`bool`, optional): If it is activated, the code stops if no bond is found. Defaults to False. - use_default_bond(`bool`, optional): If it is activated, the "default" bond is returned if no bond is found between `particle_name1` and `particle_name2`. Defaults to False. - - Returns: - bond(`espressomd.interactions.BondedInteractions`): bond object from the espressomd library. + - Note: - - If `use_default_bond`=True and no bond is defined between `particle_name1` and `particle_name2`, it returns the default bond defined in `pmb.df`. - - If `hard_check`=`True` stops the code when no bond is found. - """ + + - bond_key = _DFm._find_bond_key(df = self.df, - particle_name1 = particle_name1, - particle_name2 = particle_name2, - use_default_bond = use_default_bond) - if use_default_bond: - if not _DFm._check_if_name_is_defined_in_df(name="default", df=self.df): - raise ValueError(f"use_default_bond is set to {use_default_bond} but no default bond has been defined. Please define a default bond with pmb.define_default_bond") - if bond_key: - return self.df[self.df['name']==bond_key].bond_object.values[0] - else: - msg= f"Bond not defined between particles {particle_name1} and {particle_name2}" - if hard_check: - raise ValueError(msg) - else: - logging.warning(msg) - return None def search_particles_in_residue(self, residue_name): ''' Searches for all particles in a given residue of name `residue_name`. diff --git a/pyMBE/storage/instances/bond.py b/pyMBE/storage/instances/bond.py index 37d7f92..5fa5390 100644 --- a/pyMBE/storage/instances/bond.py +++ b/pyMBE/storage/instances/bond.py @@ -41,6 +41,8 @@ class BondInstance(PMBBaseModel): ID of the first particle involved in the bond. particle_id2 (int): ID of the second particle involved in the bond. + es_id (int): + Unique non-negative integer identifying this bond instance. Validators: validate_bond_id: @@ -57,8 +59,7 @@ class BondInstance(PMBBaseModel): bond_id: int name : str # bond template name particle_id1: int - particle_id2: int - + particle_id2: int @field_validator("bond_id") def validate_bond_id(cls, bid): diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py index e7efcff..1d4bf85 100644 --- a/pyMBE/storage/io.py +++ b/pyMBE/storage/io.py @@ -176,6 +176,7 @@ def _load_database_csv(db, folder): cutoff=cutoff, offset=offset, states=states, + initial_state=row["initial_state"] ) templates[tpl.name] = tpl @@ -428,6 +429,7 @@ def _save_database_csv(db, folder): "epsilon": _encode(tpl.epsilon), "cutoff": _encode(tpl.cutoff), "offset": _encode(tpl.offset), + "initial_state": tpl.initial_state, "states": _encode({sname: st.model_dump() for sname, st in tpl.states.items()}), # states: dict state_name -> ParticleState.model_dump() }) diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index 045eaa1..0986bf7 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -83,6 +83,108 @@ def __init__(self,units): self._instances: Dict[str, Dict[int, InstanceType]] = {} self._reactions: Dict[str, Reaction] = {} + def _find_instance_ids_by_attribute(self, pmb_type, attribute, value): + """ + Return a list of instance IDs for a given pmb_type where a given attribute + matches the requested value. + + Args: + pmb_type (str): The pyMBE type to search within. + attribute (str): The attribute name to match on (e.g. "residue_id", "molecule_id"). + value: The attribute value to match. + + Returns: + List[int]: IDs of matching instances. + """ + if pmb_type not in self._instances: + raise KeyError(f"Unknown pmb_type '{pmb_type}' in instance database.") + results = [] + for inst_id, inst in self._instances[pmb_type].items(): + if hasattr(inst, attribute) and getattr(inst, attribute) == value: + results.append(inst_id) + return results + + def _find_instance_ids_by_name(self, pmb_type, name): + """ + Return the IDs of all instances of a given pyMBE type that use a + specific template name. + + This method inspects the instance registry stored under + ``self._instances[pmb_type]`` and collects all instance identifiers + whose ``instance.name`` matches the provided template name. + + Args: + pmb_type (str): + The instance category to search within. + + name (str): + The template name associated with the instances of interest. + + Returns: + list[int]: + A list of instance IDs whose underlying template name matches + ``name``. The list is empty if no such instances exist. + + Raises: + KeyError: + If ``pmb_type`` is not a recognized instance category. + + Examples: + >>> db._find_instance_ids_by_name("particle", "A") + [0, 3, 7][] + + Notes: + - Only exact name matches are considered. + - This method does not validate whether the corresponding template + actually exists; it only inspects registered *instances*. + """ + if pmb_type not in self._instances: + return [] + + result = [] + for iid, inst in self._instances[pmb_type].items(): + if hasattr(inst, "name") and inst.name == name: + result.append(iid) + + return result + + + + def _find_template_types(self, name): + """ + Return all pyMBE template categories that contain a template + with a given name. + + Searches across every template group stored in ``self._templates``, + and collects the PMB types (keys of the template registry) for which + a template named ``name`` exists. + + Args: + name (str): + The template name to search for. + + Returns: + list[str]: + A list of PMB types (e.g., ``["particle", "residue"]``) in + which a template named ``name`` exists. The list is empty if + no such template is found. + + Examples: + >>> db._find_template_types("A") + ["particle"] + + >>> db._find_template_types("nonexistent") + [] + """ + found = [] + + for pmb_type, group in self._templates.items(): + if name in group: + found.append(pmb_type) + + return found + + def _get_instances_df(self, pmb_type): """ Returns a DataFrame containing all instance objects of a given pyMBE type. @@ -190,6 +292,7 @@ def _get_templates_df(self, pmb_type): "epsilon": tpl.epsilon.to_quantity(self._units), "cutoff": tpl.cutoff.to_quantity(self._units), "offset": tpl.offset.to_quantity(self._units), + "initial_state": tpl.initial_state, "state": sname, "z": st.z, "es_type": st.es_type @@ -214,6 +317,67 @@ def _get_templates_df(self, pmb_type): rows.append(tpl.model_dump()) return pd.DataFrame(rows) + def _has_instance(self, pmb_type, instance_id): + """ + Check whether an instance with a given ID exists under a specific pyMBE type. + + Args: + pmb_type (str): + The instance category to search in. + + instance_id (int): + The unique identifier of the instance. + + Returns: + bool: + ``True`` if the instance exists in the given category, + ``False`` otherwise. + + Raises: + KeyError: + If ``pmb_type`` is not a known instance category in the database. + + Examples: + >>> db._has_instance("particle", 3) + True + + >>> db._has_instance("nonexistent_type", 5) + KeyError + """ + if pmb_type not in self._instances: + raise KeyError(f"Instance type '{pmb_type}' not found in the database.") + + return instance_id in self._instances[pmb_type] + + def _has_template(self, pmb_type, name): + """ + Check whether a template with a given name exists within a specific pyMBE type. + + Args: + pmb_type (str): + The template category to search in (e.g. ``"particle"``, + ``"bond"``, ``"molecule"``, ``"lj"``, etc.). + name (str): + The template name to check for. + + Returns: + bool: + ``True`` if a template named ``name`` exists under ``pmb_type``; + ``False`` otherwise. + + Raises: + KeyError: + If ``pmb_type`` is not a recognized template category in the database. + + Examples: + >>> db.has_template("particle", "A") + True + """ + if pmb_type not in self._templates: + raise KeyError(f"Template type '{pmb_type}' not found in the database.") + template_in_db = name in self._templates.get(pmb_type, {}) + return template_in_db + def _register_instance(self, instance): """ Register an instance of a pyMBE object. @@ -403,6 +567,52 @@ def _update_reaction_participant(self, reaction_name, particle_name, state_name, coefficient=coefficient) self._register_reaction(rxn) self._reactions.pop(reaction_name) + + def _propose_instance_id(self, pmb_type): + """ + Propose the next available id for a new TypeInstance. + + If no instances of the given pmb_type exist, the proposed + identifier is ``0``. Otherwise, the next available integer after the + current maximum is returned. + + Returns: + int: A non-negative integer that is not already used in the pyMBE database. + + Notes: + - The method does not fill gaps; it always returns ``max + 1``. + """ + if pmb_type not in self._instances or len(self._instances[pmb_type]) == 0: + return 0 + + used_ids = list(self._instances[pmb_type].keys()) + return max(used_ids) + 1 + + def get_instance(self, pmb_type, instance_id): + """ + Retrieve a stored instance by type and instance_id. + + Looks up an instance within the internal instance registry + (`self._instances`) using its pyMBE type (e.g., "particle", "residue", + "bond", ...) and its unique id. If the instance does not exist, + a `KeyError` is raised. + + Args: + pmb_type (str): The instance pyMBE category. + name (str): The unique name of the template to retrieve. + + Returns: + InstanceType: The stored InstanceTemplate instance corresponding to the + provided type and name. + + Raises: + KeyError: If no template with the given type and name exists in + the internal registry. + """ + if instance_id not in self._instances[pmb_type]: + raise KeyError(f"InstanceTemplate with id = '{instance_id}' not found in type '{pmb_type}'.") + else: + return self._instances[pmb_type][instance_id] def get_template(self, pmb_type, name): """ @@ -414,12 +624,11 @@ def get_template(self, pmb_type, name): a `KeyError` is raised. Args: - pmb_type (str): The template category (e.g., "particle", "molecule", - "residue", "bond", "protein", ...). - name (str): The unique name of the template to retrieve. + pmb_type (str): The template pyMBE category. + name (str): The unique id of the template to retrieve. Returns: - PMBBaseModel: The stored template instance corresponding to the + TemplateType: The stored template instance corresponding to the provided type and name. Raises: diff --git a/pyMBE/storage/templates/bond.py b/pyMBE/storage/templates/bond.py index f323d81..d37e9cc 100644 --- a/pyMBE/storage/templates/bond.py +++ b/pyMBE/storage/templates/bond.py @@ -36,7 +36,6 @@ class BondTemplate(PMBBaseModel): - "k": Force constant (energy / distance^2) - "r0": Equilibrium bond length - "d_r_max": Maximum bond extension (for FENE) - l0 (PintQuantity): Initial bond length when the bond is instantiated. Notes: Values are stored as PintQuantity objects for unit-aware calculations. @@ -48,11 +47,53 @@ class BondTemplate(PMBBaseModel): particle_name2: str | None = None parameters: Dict[str, PintQuantity] # k, r0, d_r_max... + @classmethod + def make_bond_key(cls, pn1, pn2): + """Return a canonical name for a bond between two particle names. + + Args: + pn1 (str): Name of the first particle. + pn2 (str): Name of the second particle. + + Returns: + str: Canonical bond name, e.g. "A-B". + """ + return "-".join(sorted([pn1, pn2])) + def _make_name(self): - """Create a canonical name for the bond.""" - if self.particle_name1 is None or self.particle_name2 is None: - raise RuntimeError("The BondTemplate has no defined particle_name1 or particle_name2 and therefore the name could not be automatically generated") - pn1, pn2 = sorted([self.particle_name1, self.particle_name2]) - self.name = f"{pn1}-{pn2}" + """Create canonical name using particle names.""" + if not self.particle_name1 or not self.particle_name2: + raise RuntimeError( + "Cannot generate bond name: particle_name1 or particle_name2 missing." + ) + + self.name = self.make_bond_key(self.particle_name1, self.particle_name2) + + def get_parameters(self, ureg): + """ + Retrieve the bond parameters as Pint `Quantity` objects. + + Args: + ureg (pint.UnitRegistry) : Pint unit registry used to reconstruct physical quantities from storage. + + Returns: + Dict[str, pint.Quantity]: + A dictionary mapping parameter names to their corresponding + unit-aware Pint quantities. - \ No newline at end of file + Example: + >>> bt = BondTemplate( + ... bond_type="harmonic", + ... particle_name1="A", + ... particle_name2="B", + ... parameters={"k": PintQuantity("100 kJ/mol/nm^2"), + ... "r0": PintQuantity("0.3 nm")} + ... ) + >>> bt.get_parameters() + {'k': , + 'r0': } + """ + pint_parameters={} + for parameter in self.parameters.keys(): + pint_parameters[parameter] = self.parameters[parameter].to_quantity(ureg) + return pint_parameters \ No newline at end of file diff --git a/pyMBE/storage/templates/particle.py b/pyMBE/storage/templates/particle.py index bc308b1..ce56875 100644 --- a/pyMBE/storage/templates/particle.py +++ b/pyMBE/storage/templates/particle.py @@ -17,7 +17,7 @@ # along with this program. If not, see . # -from typing import Dict, Literal +from typing import Dict, Literal, Optional from pydantic import Field, field_validator from ..base_type import PMBBaseModel @@ -36,7 +36,7 @@ class ParticleState(PMBBaseModel): pmb_type: Literal["particle_state"] = "particle_state" name: str # e.g. "HA", "A-", "H+" z: int - es_type: float # label in espresso + es_type: int # label in espresso class ParticleTemplate(PMBBaseModel): @@ -51,6 +51,8 @@ class ParticleTemplate(PMBBaseModel): offset (PintQuantity): Offset distance for the LJ potential. states (Dict[str, ParticleState]): Dictionary of allowed particle states. Keys are state names, values are ParticleState instances. + initial_state (Optional[str]): Name of the default particle state. + If not provided explicitly, the first added state becomes the initial state. """ pmb_type: str = Field(default="particle", frozen=True) @@ -60,6 +62,7 @@ class ParticleTemplate(PMBBaseModel): offset: PintQuantity epsilon: PintQuantity states: Dict[str, ParticleState] = {} + initial_state: Optional[str] = None def add_state(self, state): """ @@ -78,3 +81,29 @@ def add_state(self, state): raise ValueError(f"State {state.name} already exists in template {self.name}") self.states[state.name] = state + # Automatically assign initial state if this is the first state + if self.initial_state is None: + self.initial_state = state.name + + def get_lj_parameters(self, ureg): + """ + Retrieve the Lennard-Jones interaction parameters for the particle template. + + Args: + ureg (pint.UnitRegistry) : Pint unit registry used to reconstruct physical quantities from storage. + + Returns: + Dict[str, pint.Quantity]: + A dictionary containing the following LJ parameters: sigma, epsilon, cutoff, offset. + + Example: + >>> tpl = ParticleTemplate(...) + >>> params = tpl.get_lj_parameters() + >>> params["sigma"] + + """ + return {"sigma": self.sigma.to_quantity(ureg), + "epsilon": self.epsilon.to_quantity(ureg), + "cutoff": self.cutoff.to_quantity(ureg), + "offset": self.offset.to_quantity(ureg)} + diff --git a/requirements.txt b/requirements.txt index 4cb76e9..ec349a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ pint-pandas>=0.3 biopandas==0.5.1.dev0 scipy>=1.8.0 matplotlib>=3.5.1 +pydantic>=2.12.5 # soft dependencies to run the samples tqdm>=4.57.0 # soft dependencies to run the testsuite diff --git a/test.py b/test.py index ee4e25d..92a987b 100644 --- a/test.py +++ b/test.py @@ -25,7 +25,8 @@ import pyMBE.storage.io as io import pint -import scipy.constants +import scipy +import espressomd def main(): @@ -36,7 +37,7 @@ def main(): kT=temperature*kB units.define(f'reduced_energy = {kT} ') units.define(f'reduced_length = {unit_length}') - + espresso_system=espressomd.System (box_l = [10]*3) db = Manager(units=units) # ============================================================ @@ -60,7 +61,7 @@ def main(): epsilon=0.2 * units.reduced_energy) pmb.define_residue(name="R1", central_bead="Z", side_chains=["X","Z"]) - pmb.define_residue(name="R2", central_bead="ZA", side_chains=["X","HZ"]) + pmb.define_residue(name="R2", central_bead="Z", side_chains=["X","R1"]) print("\n=== Residue Templates DataFrame ===") print(pmb.db._get_templates_df(pmb_type="residue")) @@ -132,8 +133,9 @@ def main(): pmb.define_bond(bond_type="harmonic", bond_parameters=parameters, - particle_pairs=[["A","A"], - ["K","A"]]) + particle_pairs=[["Z","Z"], + ["Z","X"], + ["X","X"]]) pmb.define_default_bond(bond_type="harmonic", bond_parameters=parameters) @@ -165,42 +167,33 @@ def main(): # ============================================================ # 2. CREATE INSTANCES (optional for testing) # ============================================================ - - inst1 = ParticleInstance(name="A", particle_id=1, initial_state="HA") - inst2 = ParticleInstance(name="A", particle_id=2, initial_state="A-",residue_id=0) - inst3 = ParticleInstance(name="H", particle_id=3, initial_state="H+") - -# db._register_instance(inst1) -# db._register_instance(inst2) -# db._register_instance(inst3) + pmb.create_particle(name="Z", + espresso_system=espresso_system, + number_of_particles=3) + pmb.create_particle(name="X", + espresso_system=espresso_system, + number_of_particles=1) print("\n=== Particle Instances DataFrame ===") - print(db._get_instances_df(pmb_type="particle")) + print(pmb.db._get_instances_df(pmb_type="particle")) -# db._update_instance(pmb_type="particle", instance_id=1, attribute="residue_id", value=int(0)) + pmb.db._update_instance(pmb_type="particle", instance_id=1, attribute="residue_id", value=int(0)) print("\n=== Particle Instances DataFrame (after update) ===") - print(db._get_instances_df(pmb_type="particle")) + print(pmb.db._get_instances_df(pmb_type="particle")) - inst1 = ResidueInstance(name="R1", - residue_id=1) - inst2 = ResidueInstance(name="R2", - residue_id=2) - inst3 = ResidueInstance(name="R1", - residue_id=3, - molecule_id=0) + pmb.create_residue(name="R1", + espresso_system=espresso_system) + pmb.create_residue(name="R2", + espresso_system=espresso_system) -# db._register_instance(inst1) -# db._register_instance(inst2) -# db._register_instance(inst3) - print("\n=== Residue Instances DataFrame ===") - print(db._get_instances_df(pmb_type="residue")) + print(pmb.db._get_instances_df(pmb_type="residue")) -# db._update_instance(pmb_type="residue",instance_id=1, attribute="molecule_id", value=int(0)) + pmb.db._update_instance(pmb_type="residue",instance_id=0, attribute="molecule_id", value=int(0)) print("\n=== Residue Instances DataFrame (after update)===") - print(db._get_instances_df(pmb_type="residue")) + print(pmb.db._get_instances_df(pmb_type="residue")) inst1 = MoleculeInstance(name="M1", molecule_id=1) @@ -210,10 +203,9 @@ def main(): print("\n=== Molecule Instances DataFrame ===") print(db._get_instances_df(pmb_type="molecule")) - inst_bond = BondInstance(name="A1-A2", bond_id=1, particle_id1=1, particle_id2=2) - # db._register_instance(inst_bond) + print("\n=== Bond Instances DataFrame ===") - print(db._get_instances_df(pmb_type="bond")) + print(pmb.db._get_instances_df(pmb_type="bond")) print("\n=== Peptide Instances DataFrame ===") # inst_peptide1 = PeptideInstance(name="Peptide1", molecule_id=3) From 03c8bd77841fc7c327e2ac44fddad8c560e4001c Mon Sep 17 00:00:00 2001 From: Pablo Date: Sat, 6 Dec 2025 14:46:02 +0100 Subject: [PATCH 08/55] finish refactoring of residues, molecules and peptides --- pyMBE/pyMBE.py | 162 +++++++++++++------------- pyMBE/storage/manager.py | 238 +++++++++++++++++++++++++++++++++++++++ test.py | 22 ++-- 3 files changed, 326 insertions(+), 96 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index fe133e5..69440d4 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -158,6 +158,8 @@ def _get_residue_list_from_sequence(self, sequence): residue_list.append(residue_name) return residue_list + + def calculate_center_of_mass_of_molecule(self, molecule_id, espresso_system): """ Calculates the center of the molecule with a given molecule_id. @@ -785,20 +787,18 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi Args: name(`str`): Label of the molecule type to be created. `name` must be defined in `pmb.df` espresso_system(`espressomd.system.System`): Instance of a system object from espressomd library. - number_of_molecules(`int`): Number of molecules of type `name` to be created. + number_of_molecules(`int`): Number of molecules or peptides of type `name` to be created. list_of_first_residue_positions(`list`, optional): List of coordinates where the central bead of the first_residue_position will be created, random by default. backbone_vector(`list` of `float`): Backbone vector of the molecule, random by default. Central beads of the residues in the `residue_list` are placed along this vector. use_default_bond(`bool`, optional): Controls if a bond of type `default` is used to bond particle with undefined bonds in `pymbe.df` Returns: - molecules_info(`dict`): {molecule_id: {residue_id:{"central_bead_id":central_bead_id, "side_chain_ids": [particle_id1, ...]}}} + created_molecule_id_list(`list` of `int`): List with the `molecule_id` of the pyMBE molecule instances created into `espresso_system`. Note: Despite its name, this function can be used to create both molecules and peptides. """ - if not _DFm._check_if_name_is_defined_in_df(name=name, df=self.df): - logging.warning(f"Molecule with name '{name}' is not defined in the pyMBE DataFrame, no molecule will be created.") - return {} + supported_pmb_types = ["molecule", "peptide"] if number_of_molecules <= 0: return {} if list_of_first_residue_positions is not None: @@ -810,11 +810,16 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi if len(list_of_first_residue_positions) != number_of_molecules: raise ValueError(f"Number of positions provided in {list_of_first_residue_positions} does not match number of molecules desired, {number_of_molecules}") - - # This function works for both molecules and peptides - if not self._check_if_name_has_right_type(name=name, expected_pmb_type="molecule", hard_check=False): - self._check_if_name_has_right_type(name=name, expected_pmb_type="peptide") - + # Sanity tests, this function should work for both molecules and peptides + registered_pmb_types_with_name = self.db._find_template_types(name=name) + if len(registered_pmb_types_with_name) > 1: + raise KeyError(f"Detected multiple templates with the same name '{name}' in the pyMBE database, pmb_types: {registered_pmb_types_with_name}. Molecule creation aborted to avoid ambiguity.") + elif len(registered_pmb_types_with_name) == 0: + logging.warning(f"No template with name '{name}' defined in the pyMBE database, nothing will be created.") + return + pmb_type = registered_pmb_types_with_name[0] + if pmb_type not in supported_pmb_types: + raise KeyError(f"Unsupported template type {pmb_type} for template {name}. Supported template types are {supported_pmb_types}") # Generate an arbitrary random unit vector if backbone_vector is None: backbone_vector = self.generate_random_points_in_a_sphere(center=[0,0,0], @@ -824,89 +829,81 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi else: backbone_vector = np.array(backbone_vector) first_residue = True - molecules_info = {} - residue_list = self.df[self.df['name']==name].residue_list.values [0] - self.df = _DFm._copy_df_entry(df = self.df, - name = name, - column_name = 'molecule_id', - number_of_copies = number_of_molecules) - - molecules_index = np.where(self.df['name']==name) - molecule_index_list =list(molecules_index[0])[-number_of_molecules:] + molecule_tpl = self.db.get_template(pmb_type=pmb_type, + name=name) + residue_list = molecule_tpl.residue_list pos_index = 0 - for molecule_index in molecule_index_list: - molecule_id = _DFm._assign_molecule_id(df = self.df, - molecule_index = molecule_index) - molecules_info[molecule_id] = {} + molecule_ids = [] + for _ in range(number_of_molecules): + molecule_id = self.db._propose_instance_id(pmb_type=pmb_type) for residue in residue_list: if first_residue: if list_of_first_residue_positions is None: - residue_position = None + central_bead_pos = None else: for item in list_of_first_residue_positions: - residue_position = [np.array(list_of_first_residue_positions[pos_index])] + central_bead_pos = [np.array(list_of_first_residue_positions[pos_index])] - residues_info = self.create_residue(name=residue, - espresso_system=espresso_system, - central_bead_position=residue_position, - use_default_bond= use_default_bond, - backbone_vector=backbone_vector) - residue_id = next(iter(residues_info)) - # Add the correct molecule_id to all particles in the residue - for index in self.df[self.df['residue_id']==residue_id].index: - _DFm._add_value_to_df(df = self.df, - key = ('molecule_id',''), - index = int (index), - new_value = molecule_id, - overwrite = True) - central_bead_id = residues_info[residue_id]['central_bead_id'] - previous_residue = residue - residue_position = espresso_system.part.by_id(central_bead_id).pos - previous_residue_id = central_bead_id + residue_id = self.create_residue(name=residue, + espresso_system=espresso_system, + central_bead_position=central_bead_pos, + use_default_bond= use_default_bond, + backbone_vector=backbone_vector) + + # Add molecule_id to the residue instance and all particles associated + particle_ids_in_residue = self.db._update_part_res_inst_mol_ids(residue_id=residue_id, + molecule_id=molecule_id) + + prev_central_bead_id = particle_ids_in_residue[0] + prev_central_bead_name = self.db.get_instance(pmb_type="particle", instance_id=prev_central_bead_id).name + prev_central_bead_pos = espresso_system.part.by_id(prev_central_bead_id).pos first_residue = False - else: - previous_central_bead_name=self.df[self.df['name']==previous_residue].central_bead.values[0] - new_central_bead_name=self.df[self.df['name']==residue].central_bead.values[0] - bond = self.search_bond(particle_name1=previous_central_bead_name, - particle_name2=new_central_bead_name, - hard_check=True, - use_default_bond=use_default_bond) - l0 = self.get_bond_length(particle_name1=previous_central_bead_name, - particle_name2=new_central_bead_name, - hard_check=True, - use_default_bond=use_default_bond) + else: - residue_position = residue_position+backbone_vector*l0 - residues_info = self.create_residue(name=residue, + # Calculate the starting position of the new residue + residue_tpl = self.db.get_template(pmb_type="residue", + name=residue) + lj_parameters = self.get_lj_parameters(particle_name1=prev_central_bead_name, + particle_name2=residue_tpl.central_bead) + bond_tpl = self.get_bond_template(particle_name1=prev_central_bead_name, + particle_name2=residue_tpl.central_bead) + l0 = hf.calculate_initial_bond_length(lj_parameters=lj_parameters, + bond_type=bond_tpl.bond_type, + bond_parameters=bond_tpl.get_parameters(ureg=self.units)) + central_bead_pos = prev_central_bead_pos+backbone_vector*l0 + # Create the residue + residue_id = self.create_residue(name=residue, espresso_system=espresso_system, - central_bead_position=[residue_position], + central_bead_position=[central_bead_pos], use_default_bond= use_default_bond, backbone_vector=backbone_vector) - residue_id = next(iter(residues_info)) - for index in self.df[self.df['residue_id']==residue_id].index: - _DFm._add_value_to_df(df = self.df, - key = ('molecule_id',''), - index = int(index), - new_value = molecule_id, - overwrite = True) - central_bead_id = residues_info[residue_id]['central_bead_id'] - espresso_system.part.by_id(central_bead_id).add_bond((bond, previous_residue_id)) - self.df, bond_index = _DFm._add_bond_in_df(df = self.df, - particle_id1 = central_bead_id, - particle_id2 = previous_residue_id, - use_default_bond = use_default_bond) - _DFm._add_value_to_df(df = self.df, - key = ('molecule_id',''), - index = int(bond_index), - new_value = molecule_id, - overwrite = True) - previous_residue_id = central_bead_id - previous_residue = residue - molecules_info[molecule_id][residue_id] = residues_info[residue_id] + # Add molecule_id to the residue instance and all particles associated + particle_ids_in_residue = self.db._update_part_res_inst_mol_ids(residue_id=residue_id, + molecule_id=molecule_id) + central_bead_id = particle_ids_in_residue[0] + + # Bond the central beads of the new and previous residues + self.create_bond(particle_id1=prev_central_bead_id, + particle_id2=central_bead_id, + espresso_system=espresso_system, + use_default_bond=use_default_bond) + + prev_central_bead_id = central_bead_id + prev_central_bead_name = self.db.get_instance(pmb_type="particle", instance_id=central_bead_id).name + prev_central_bead_pos =central_bead_pos + # Create a Peptide or Molecule instance and register it on the pyMBE database + if pmb_type == "molecule": + inst = MoleculeInstance(molecule_id=molecule_id, + name=name) + elif pmb_type == "peptide": + inst = PeptideInstance(name=name, + molecule_id=molecule_id) + self.db._register_instance(inst) first_residue = True pos_index+=1 - - return molecules_info + molecule_ids.append(molecule_id) + + return molecule_id def create_particle(self, name, espresso_system, number_of_particles, position=None, fix=False): """ @@ -919,7 +916,7 @@ def create_particle(self, name, espresso_system, number_of_particles, position=N position(list of [`float`,`float`,`float`], optional): Initial positions of the particles. If not given, particles are created in random positions. Defaults to None. fix(`bool`, optional): Controls if the particle motion is frozen in the integrator, it is used to create rigid objects. Defaults to False. Returns: - created_pid_list(`list` of `float`): List with the ids of the particles created into `espresso_system`. + created_pid_list(`list` of `int`): List with the ids of the particles created into `espresso_system`. """ if number_of_particles <=0: return [] @@ -1055,7 +1052,7 @@ def create_residue(self, name, espresso_system, central_bead_position=None,use_d side_chain_beads_ids = [] for side_chain_name in side_chain_list: pmb_type_list = self.db._find_template_types(name=side_chain_name) - if len(pmb_type_list) > 2: + if len(pmb_type_list) > 1: raise KeyError(f"Detected multiple templates with the same name '{side_chain_name}' in the pyMBE database, pmb_types: {pmb_type_list}. Residue creation aborted to avoid ambiguity.") elif not pmb_type_list: logging.warning(f"Element in side chain with name '{name}' is not defined in the pyMBE database, nothing will be created.") @@ -1124,6 +1121,9 @@ def create_residue(self, name, espresso_system, central_bead_position=None,use_d pmb_type="particle", attribute="residue_id", value=residue_id) + # Remove the instance of the inner residue + self.db.delete_instance(pmb_type="residue", + instance_id=side_residue_id) self.create_bond(particle_id1=central_bead_id, particle_id2=side_chain_beads_ids[0], espresso_system=espresso_system, diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index 0986bf7..8d6c432 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -83,6 +83,41 @@ def __init__(self,units): self._instances: Dict[str, Dict[int, InstanceType]] = {} self._reactions: Dict[str, Reaction] = {} + def _delete_bonds_of_particle(self, pid): + """ + Delete all bond instances involving a given particle instance. + + Args: + pid (int): The particle ID whose associated bonds should be deleted. + + Notes: + - If no `"bond"` instances are present in the database, the method + exits immediately. + - This method does not raise errors if no bonds involve the particle. + - It is intended for internal use by cascade-deletion routines. + """ + if "bond" not in self._instances: + return + bonds_to_delete = [ + b_id for b_id, b in list(self._instances["bond"].items()) + if b.particle_id1 == pid or b.particle_id2 == pid + ] + for b_id in bonds_to_delete: + del self._instances["bond"][b_id] + if "bond" in self._instances and not self._instances["bond"]: + del self._instances["bond"] + + if "bond" not in self._instances: + return + bonds_to_delete = [ + b_id for b_id, b in list(self._instances["bond"].items()) + if b.particle_id1 == pid or b.particle_id2 == pid + ] + for b_id in bonds_to_delete: + del self._instances["bond"][b_id] + if "bond" in self._instances and not self._instances["bond"]: + del self._instances["bond"] + def _find_instance_ids_by_attribute(self, pmb_type, attribute, value): """ Return a list of instance IDs for a given pmb_type where a given attribute @@ -312,6 +347,20 @@ def _get_templates_df(self, pmb_type): "offset": tpl.offset.to_quantity(self._units), "shift": shift }) + + elif pmb_type == "bond": + parameters = {} + for key in tpl.parameters.keys(): + parameters[key] = tpl.parameters[key].to_quantity(self._units) + rows.append({ + "pmb_type": tpl.pmb_type, + "name": tpl.name, + "bond_type": tpl.bond_type, + "particle_name1": tpl.particle_name1, + "particle_name2": tpl.particle_name2, + "parameters": parameters, + }) + else: # Generic representation for other types rows.append(tpl.model_dump()) @@ -541,6 +590,44 @@ def _update_instance(self, instance_id, pmb_type, attribute, value): self._instances[pmb_type][instance_id] = self._instances[pmb_type][instance_id].model_copy(update={attribute: value}) + def _update_part_res_inst_mol_ids(self, residue_id, molecule_id): + """ + Updates the molecule ID of a residue and all particle instances that belong to it. + + + Args: + residue_id (int): + The instance ID of the residue whose molecule assignment should be updated. + molecule_id (int): + The molecule ID to assign to the residue and all its particles. + + Returns: + List[int]: + A list of particle instance IDs that were updated. + + Raises: + KeyError: + If the residue does not exist in the database. + ValueError: + If an update fails due to inconsistent or missing attributes. + """ + + self._update_instance(instance_id=residue_id, + pmb_type="residue", + attribute="molecule_id", + value=molecule_id) + particle_ids_in_residue = self._find_instance_ids_by_attribute(pmb_type="particle", + attribute="residue_id", + value=residue_id) + for particle_id in particle_ids_in_residue: + self._update_instance(instance_id=particle_id, + pmb_type="particle", + attribute="molecule_id", + value=molecule_id) + + return particle_ids_in_residue + + def _update_reaction_participant(self, reaction_name, particle_name, state_name, coefficient): """ Append a new participant to an existing reaction in the database. @@ -588,6 +675,157 @@ def _propose_instance_id(self, pmb_type): used_ids = list(self._instances[pmb_type].keys()) return max(used_ids) + 1 + def delete_template(self, pmb_type, name): + """ + Delete a template from the pyMBE database. + + This method removes a template identified by its pyMBE type and name. + Before deletion, it checks whether any instance in the database uses + this template. If any instance depends on it, a ``ValueError`` is raised + to prevent breaking database integrity. + + Args: + pmb_type (str): + The template category. + name (str): + The name of the template to delete. + + Raises: + KeyError: + If the template type or name does not exist. + ValueError: + If one or more instances reference the template. + """ + # Check template exists + if pmb_type not in self._templates: + raise KeyError(f"Template type '{pmb_type}' not found.") + if name not in self._templates[pmb_type]: + raise KeyError(f"Template '{name}' not found in type '{pmb_type}'.") + + # Check if any instance depends on this template + if pmb_type in self._instances: + for inst in self._instances[pmb_type].values(): + if getattr(inst, "name", None) == name: + raise ValueError( + f"Cannot delete template '{name}' from '{pmb_type}': " + f"Instance with ID {getattr(inst, pmb_type + '_id')} depends on it." + ) + + # Delete + del self._templates[pmb_type][name] + + # Delete empty groups + if not self._templates[pmb_type]: + del self._templates[pmb_type] + + def delete_instance(self, pmb_type, instance_id, cascade = False): + """ + Delete an instance from the pyMBE database. + + Supports cascade deletion: + - molecule → residues → particles → bonds + - residue → particles → bonds + - particle → bonds + - bond → nothing + + Args: + pmb_type (str): Category of the instance (particle, residue, molecule, bond). + instance_id (int): Unique identifier of the instance. + cascade (bool): If True, automatically delete dependent child objects. + + Raises: + KeyError: If the instance does not exist. + ValueError: If cascade is False but dependencies exist. + """ + # --- Basic sanity checks --- + if pmb_type not in self._instances: + raise KeyError(f"Instance type '{pmb_type}' not found.") + + if instance_id not in self._instances[pmb_type]: + raise KeyError(f"Instance ID '{instance_id}' not found in '{pmb_type}'.") + + inst = self._instances[pmb_type][instance_id] + + # ---- Helper function for deletion of bonds ---- + + + # ---- CASCADE deletion logic ---- + if cascade: + # DELETE children depending on type + if pmb_type == "molecule": + # 1. residues → 2. particles → 3. bonds + residues = self._find_instance_ids_by_attribute( + "residue", "molecule_id", instance_id + ) + for rid in residues: + self.delete_instance("residue", rid, cascade=True) + + elif pmb_type == "residue": + # 1. particles → 2. bonds + particles = self._find_instance_ids_by_attribute( + "particle", "residue_id", instance_id + ) + for pid in particles: + self.delete_instance("particle", pid, cascade=True) + + elif pmb_type == "particle": + # 1. bonds only + self._delete_bonds_of_particle(instance_id) + + # For bonds there is nothing to cascade + + else: + # ---- NON-cascade: forbid deletions that break structure ---- + if pmb_type == "particle": + if inst.residue_id is not None: + raise ValueError( + f"Particle {instance_id} belongs to residue {inst.residue_id}. " + f"Use cascade=True to delete anyway." + ) + if inst.molecule_id is not None: + raise ValueError( + f"Particle {instance_id} belongs to molecule {inst.molecule_id}. " + f"Use cascade=True." + ) + + # check bond dependencies + bonds = [ + b_id for b_id, b in self._instances.get("bond", {}).items() + if b.particle_id1 == instance_id or b.particle_id2 == instance_id + ] + if bonds: + raise ValueError( + f"Particle {instance_id} is in bonds {bonds}. " + f"Use cascade=True." + ) + + if pmb_type == "residue": + particles = self._find_instance_ids_by_attribute( + "particle", "residue_id", instance_id + ) + if particles: + raise ValueError( + f"Residue {instance_id} has particles {particles}. " + f"Use cascade=True." + ) + + if pmb_type == "molecule": + residues = self._find_instance_ids_by_attribute( + "residue", "molecule_id", instance_id + ) + if residues: + raise ValueError( + f"Molecule {instance_id} has residues {residues}. " + f"Use cascade=True." + ) + + # ---- Perform final deletion of this object ---- + del self._instances[pmb_type][instance_id] + + if not self._instances[pmb_type]: + del self._instances[pmb_type] + + def get_instance(self, pmb_type, instance_id): """ Retrieve a stored instance by type and instance_id. diff --git a/test.py b/test.py index 92a987b..1a78fef 100644 --- a/test.py +++ b/test.py @@ -66,7 +66,7 @@ def main(): print("\n=== Residue Templates DataFrame ===") print(pmb.db._get_templates_df(pmb_type="residue")) - pmb.define_molecule(name="M1", residue_list=["R1","R2"]) + pmb.define_molecule(name="M1", residue_list=["R1","R2"]*2) print("\n=== Molecule Templates DataFrame ===") print(pmb.db._get_templates_df(pmb_type="molecule")) @@ -178,10 +178,7 @@ def main(): print(pmb.db._get_instances_df(pmb_type="particle")) - pmb.db._update_instance(pmb_type="particle", instance_id=1, attribute="residue_id", value=int(0)) - print("\n=== Particle Instances DataFrame (after update) ===") - print(pmb.db._get_instances_df(pmb_type="particle")) - + pmb.create_residue(name="R1", espresso_system=espresso_system) pmb.create_residue(name="R2", @@ -190,18 +187,13 @@ def main(): print("\n=== Residue Instances DataFrame ===") print(pmb.db._get_instances_df(pmb_type="residue")) + pmb.create_molecule(name="M1", + number_of_molecules=2, + espresso_system=espresso_system) - pmb.db._update_instance(pmb_type="residue",instance_id=0, attribute="molecule_id", value=int(0)) - print("\n=== Residue Instances DataFrame (after update)===") - print(pmb.db._get_instances_df(pmb_type="residue")) - - - inst1 = MoleculeInstance(name="M1", molecule_id=1) - inst2 = MoleculeInstance(name="M1", molecule_id=2) -# db._register_instance(inst1) -# db._register_instance(inst2) print("\n=== Molecule Instances DataFrame ===") - print(db._get_instances_df(pmb_type="molecule")) + + print(pmb.db._get_instances_df(pmb_type="molecule")) print("\n=== Bond Instances DataFrame ===") From c17ca90fa26817f307f8846efb08c69556437f72 Mon Sep 17 00:00:00 2001 From: Pablo Date: Sat, 6 Dec 2025 17:55:33 +0100 Subject: [PATCH 09/55] updated create_protein --- pyMBE/lib/handy_functions.py | 130 +++++++++++++++++++++++++++++++---- pyMBE/pyMBE.py | 129 +++++++++++++++++++++------------- pyMBE/storage/manager.py | 16 +++-- test.py | 29 ++++++-- 4 files changed, 231 insertions(+), 73 deletions(-) diff --git a/pyMBE/lib/handy_functions.py b/pyMBE/lib/handy_functions.py index a593019..724800e 100644 --- a/pyMBE/lib/handy_functions.py +++ b/pyMBE/lib/handy_functions.py @@ -50,7 +50,10 @@ def check_aminoacid_key(key): 'A', #'ALA' 'G', #'GLY' 'P', #'PRO' - 'C'] #'CYS' + 'C', #'CYS' + "n", # n terminus + "c", # c terminus + ] if key in valid_AA_keys: return True else: @@ -108,7 +111,52 @@ def do_res_map(res_ids): else: return False -def define_AA_particles(topology_dict, lj_setup_mode, pmb): +def define_protein_AA_particles(topology_dict, pmb, lj_setup_mode="wca"): + """ + Defines particle templates in pyMBE for all unique residue/atom types appearing + in a protein topology dictionary. + + The Lennard-Jones parameters (σ, ε, offset) are generated according to the + selected setup mode (currently only the WCA scheme is supported). + + Metal ions are automatically assigned their correct valence charge. + + Args: + topology_dict (dict): + Dictionary defining the structure of a protein. + Keys must be residue/particle identifiers such as `"ALA1"`, `"LYS2"`, + `"ZN3"`, etc., where the alphabetical prefix encodes the residue/ + particle type. + + Each entry must contain: + - `"radius"` (float): Effective radius of the bead, used to + compute the Lennard-Jones offset. + + Example: + { + "ALA1": {"radius": 0.5, ...}, + "GLY2": {"radius": 0.4, ...}, + "ZN3": {"radius": 0.2, ...}, + } + + pmb (pyMBE.pymbe_library): + Instance of the pyMBE library. + + lj_setup_mode (str, optional): + Determines how Lennard-Jones parameters are assigned. Defaults to `"wca"`. + + Raises: + ValueError: + If `lj_setup_mode` is not supported. + + Notes: + - Particle names are extracted by stripping trailing digits + (e.g., `"ALA1"` → `"ALA"`). + - For metal ions (identified via `check_if_metal_ion()`), the correct + ionic charge is retrieved from the metal-ion charge map. + - The Lennard-Jones offset is computed as: + offset = 2 * radius - sigma + """ valid_lj_setups = ["wca"] if lj_setup_mode not in valid_lj_setups: @@ -125,34 +173,61 @@ def define_AA_particles(topology_dict, lj_setup_mode, pmb): if particle_name not in part_dict.keys(): if lj_setup_mode == "wca": part_dict={"sigma": sigma, - "offset": topology_dict[particle]['radius']*2-sigma, - "epsilon": epsilon, - "name": particle_name} + "offset": topology_dict[particle]['radius']*2-sigma, + "epsilon": epsilon, + "name": particle_name} if check_if_metal_ion(key=particle_name): z=metal_ions_charge_number_map[particle_name] else: z=0 part_dict["z"]=z part_dict["name"]=particle_name - if check_aminoacid_key(key=particle_name): - sequence.append(particle_name) + if particle_name not in defined_particles: pmb.define_particle(**part_dict) - defined_particles.append(particle_name) + defined_particles.append(particle_name) + return -def define_AA_residues(sequence, model, pmb): +def define_protein_AA_residues(topology_dict, model, pmb): """ - Convinience function to define a residue template in the pyMBE database for each aminoacid in peptide and proteins. + Define residue templates in the pyMBE database for a peptide or protein sequence. Args: - pmb(pymbe_library): Instance of the pyMBE library. - sequence(`lst`): Sequence of the peptide or protein. - model(`string`): Model name. Currently only models with 1 bead '1beadAA' or with 2 beads '2beadAA' per amino acid are supported. + sequence (list of str): + Ordered amino-acid sequence of the peptide or protein. Each element must + be a residue identifier compatible with the selected model. + + model (str): + Coarse-grained representation to use. Supported options: + - `"1beadAA"` + - `"2beadAA"` + + pmb (pyMBE.pymbe_library): + Instance of the pyMBE library. + + Notes: + - Supported models: + - `"1beadAA"`: Each amino acid is represented by a single bead. + The central bead is the amino-acid name itself, and no side chains are used. + - `"2beadAA"`: Each amino acid is represented by two beads, except for terminal or special residues: + * `"c"`, `"n"`, and `"G"` (glycine) are treated as single-bead residues. + * All other residues use `"CA"` (central bead) plus one side-chain bead named after the amino acid. + + - Residue names are constructed as `"AA-"`, e.g., `"AA-A"`, `"AA-L"`. + + Returns: + None + The function operates by side effect, populating the pyMBE residue + template database. + """ residue_list = [] - for item in sequence: + residues = get_residues_from_topology_dict(topology_dict=topology_dict, + model=model) + for res_id in residues.keys(): + item = residues[res_id]["resname"] if model == '1beadAA': central_bead = item side_chains = [] @@ -170,6 +245,33 @@ def define_AA_residues(sequence, model, pmb): side_chains = side_chains) residue_list.append(residue_name) +def get_residues_from_topology_dict(topology_dict, model): + if model == "1beadAA": + excluded_residue_names = [] + elif model == "2beadAA": + excluded_residue_names = ["CA"] + + # GROUP BEADS BY RESIDUE + residues = {} + for bead_id in topology_dict.keys(): + # extract prefix and index number + prefix = re.split(r'\d+', bead_id)[0] + index_match = re.findall(r'\d+', bead_id) + if not index_match: + raise ValueError(f"Topology key '{bead_id}' does not contain a residue index.") + resid = index_match[0] + if resid not in residues: + residues[resid] = {"beads": []} + residues[resid]["beads"].append(bead_id) + if prefix not in excluded_residue_names: + residues[resid]["resname"] = prefix + + # Assign name to glycine residues (only with CA beads) + for bead_id in residues: + if "resname" not in residues[bead_id]: + residues[bead_id]["resname"] = "G" + return residues + def get_metal_ions_charge_number_map(): """ Gets a map with the charge numbers of all the metal ions supported. diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 69440d4..e2f9d59 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -953,58 +953,92 @@ def create_particle(self, name, espresso_system, number_of_particles, position=N def create_protein(self, name, number_of_proteins, espresso_system, topology_dict): """ - Creates `number_of_proteins` molecules of type `name` into `espresso_system` at the coordinates in `positions` + Creates one or more protein molecules in an ESPResSo system based on a stored + protein template and a provided topology. Args: - name(`str`): Label of the protein to be created. - espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library. - number_of_proteins(`int`): Number of proteins to be created. - positions(`dict`): {'ResidueNumber': {'initial_pos': [], 'chain_id': ''}} - """ + name (str): + Name of the protein template stored in the pyMBE database. + + number_of_proteins (int): + Number of protein molecules to generate. + + espresso_system (espressomd.system.System): + The ESPResSo simulation system where the protein molecules will be created. + + topology_dict (dict): + Dictionary defining the internal structure of the protein. + Expected format: + { + "ResidueName1": { + "initial_pos": np.ndarray, + "chain_id": int, + "radius": float + }, + "ResidueName2": { ... }, + ... + } + The `"initial_pos"` entry is required and represents the residue’s + reference coordinates before shifting to the protein's center-of-mass. - if number_of_proteins <=0: - return - if not _DFm._check_if_name_is_defined_in_df(name=name, df=self.df): - logging.warning(f"Protein with name '{name}' is not defined in the pyMBE DataFrame, no protein will be created.") + Notes: + - Particles are created using `create_particle()` with `fix=True`, + meaning they are initially immobilized. + - The function assumes all residues in `topology_dict` correspond to + particle templates already defined in the pyMBE database. + - Bonds between residues are not created here; it assumes a rigid body representation of the protein. + """ + if number_of_proteins <= 0: return - self._check_if_name_has_right_type(name=name, - expected_pmb_type="protein") - - self.df = _DFm._copy_df_entry(df = self.df, - name = name, - column_name = 'molecule_id', - number_of_copies = number_of_proteins) - protein_index = np.where(self.df['name'] == name) - protein_index_list = list(protein_index[0])[-number_of_proteins:] + + protein_tpl = self.db.get_template(pmb_type="protein", name=name) box_half = espresso_system.box_l[0] / 2.0 - for molecule_index in protein_index_list: - molecule_id = _DFm._assign_molecule_id(df = self.df, - molecule_index = molecule_index) - protein_center = self.generate_coordinates_outside_sphere(radius = 1, - max_dist = box_half, - n_samples = 1, - center = [box_half]*3)[0] - for residue in topology_dict.keys(): - residue_name = re.split(r'\d+', residue)[0] - residue_number = re.split(r'(\d+)', residue)[1] - residue_position = topology_dict[residue]['initial_pos'] - position = residue_position + protein_center - particle_id = self.create_particle(name=residue_name, - espresso_system=espresso_system, - number_of_particles=1, - position=[position], - fix = True) - index = self.df[self.df['particle_id']==particle_id[0]].index.values[0] - _DFm._add_value_to_df(df = self.df, - key = ('residue_id',''), - index = int(index), - new_value = int(residue_number), - overwrite = True) - _DFm._add_value_to_df(df = self.df, - key = ('molecule_id',''), - index = int(index), - new_value = molecule_id, - overwrite = True) + + residues = hf.get_residues_from_topology_dict(topology_dict=topology_dict, + model=protein_tpl.model) + # Create protein + for _ in range(number_of_proteins): + # create a molecule identifier in pyMBE + molecule_id = self.db._propose_instance_id(pmb_type="protein") + # place protein COM randomly + protein_center = self.generate_coordinates_outside_sphere(radius=1, + max_dist=box_half, + n_samples=1, + center=[box_half]*3)[0] + # CREATE RESIDUES + PARTICLES + for _, rdata in residues.items(): + base_resname = rdata["resname"] + residue_name = f"AA-{base_resname}" + # residue instance ID + residue_id = self.db._propose_instance_id("residue") + # register ResidueInstance + self.db._register_instance(ResidueInstance(name=residue_name, + residue_id=residue_id, + molecule_id=molecule_id)) + + # PARTICLE CREATION + for bead_id in rdata["beads"]: + bead_type = re.split(r'\d+', bead_id)[0] + relative_pos = topology_dict[bead_id]["initial_pos"] + absolute_pos = relative_pos + protein_center + particle_id = self.create_particle(name=bead_type, + espresso_system=espresso_system, + number_of_particles=1, + position=[absolute_pos], + fix=True)[0] + + # update metadata + self.db._update_instance(instance_id=particle_id, + pmb_type="particle", + attribute="molecule_id", + value=molecule_id) + self.db._update_instance(instance_id=particle_id, + pmb_type="particle", + attribute="residue_id", + value=residue_id) + protein_inst = ProteinInstance(name=name, + molecule_id=molecule_id) + self.db._register_instance(protein_inst) def create_residue(self, name, espresso_system, central_bead_position=None,use_default_bond=False, backbone_vector=None): """ @@ -1326,7 +1360,6 @@ def define_protein(self, name, sequence, model): name (`str`): Unique label that identifies the protein. sequence (`str`): Sequence of the protein. model (`string`): Model name. Currently only models with 1 bead '1beadAA' or with 2 beads '2beadAA' per amino acid are supported. - topology_dict (`dict`): {'initial_pos': coords_list, 'chain_id': id, 'radius': radius_value} Note: - Currently, only `lj_setup_mode="wca"` is supported. This corresponds to setting up the WCA potential. diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index 8d6c432..f9cd942 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -669,10 +669,18 @@ def _propose_instance_id(self, pmb_type): Notes: - The method does not fill gaps; it always returns ``max + 1``. """ - if pmb_type not in self._instances or len(self._instances[pmb_type]) == 0: - return 0 - - used_ids = list(self._instances[pmb_type].keys()) + molecule_like_types = ["molecule", "peptide", "protein"] + if pmb_type in molecule_like_types: + used_ids = [] + for t in molecule_like_types: + if t in self._instances: + used_ids.extend(self._instances[t].keys()) + if not used_ids: + return 0 + else: + if pmb_type not in self._instances or len(self._instances[pmb_type]) == 0: + return 0 + used_ids = list(self._instances[pmb_type].keys()) return max(used_ids) + 1 def delete_template(self, pmb_type, name): diff --git a/test.py b/test.py index 1a78fef..17d17a3 100644 --- a/test.py +++ b/test.py @@ -28,6 +28,8 @@ import scipy import espressomd +import pyMBE.lib.handy_functions as hf + def main(): units = pint.UnitRegistry() @@ -155,10 +157,21 @@ def main(): topology_dict = pmb.read_protein_vtf_in_df (filename=path[0]) - pmb.define_protein(name="blabla", + # Define AA particles and residues + hf.define_protein_AA_particles(topology_dict=topology_dict, + pmb=pmb) + + hf.define_protein_AA_residues(topology_dict=topology_dict, + model="2beadAA", + pmb=pmb) + + print(pmb.db._get_templates_df(pmb_type="particle")) + print(pmb.db._get_templates_df(pmb_type="residue")) + pmb.define_protein(name="1beb", model="2beadAA", sequence="KKKKKK") + print(db._get_templates_df(pmb_type="protein")) @@ -200,14 +213,16 @@ def main(): print(pmb.db._get_instances_df(pmb_type="bond")) print("\n=== Peptide Instances DataFrame ===") - # inst_peptide1 = PeptideInstance(name="Peptide1", molecule_id=3) - # db._register_instance(inst_peptide1) - print(db._get_instances_df(pmb_type="peptide")) + + print(pmb.db._get_instances_df(pmb_type="peptide")) print("\n=== Protein Instances DataFrame ===") - # inst_protein1 = ProteinInstance(name="Protein1", molecule_id=4) - # db._register_instance(inst_protein1) - print(db._get_instances_df(pmb_type="protein")) + pmb.create_protein(name="1beb", + number_of_proteins=1, + espresso_system=espresso_system, + topology_dict=topology_dict) + exit() + print(pmb.db._get_instances_df(pmb_type="protein")) print("\n=== Hydrogel Instances DataFrame ===") # inst_hydrogel1 = HydrogelInstance(name="Hydrogel1", hydrogel_id=1, molecule_ids=["1","2","3"]) From 741e318429053504834ba08692f0421223a88d5c Mon Sep 17 00:00:00 2001 From: Pablo Date: Mon, 8 Dec 2025 20:37:35 +0100 Subject: [PATCH 10/55] refactor hydrogels --- pyMBE/lib/lattice.py | 2 + pyMBE/pyMBE.py | 272 +++++++++++++--------------- pyMBE/storage/instances/hydrogel.py | 9 +- pyMBE/storage/instances/molecule.py | 4 + pyMBE/storage/instances/particle.py | 7 + pyMBE/storage/instances/peptide.py | 6 +- pyMBE/storage/instances/protein.py | 4 + pyMBE/storage/instances/residue.py | 5 + pyMBE/storage/io.py | 18 +- pyMBE/storage/manager.py | 124 +++++++++---- test.py | 19 +- 11 files changed, 273 insertions(+), 197 deletions(-) diff --git a/pyMBE/lib/lattice.py b/pyMBE/lib/lattice.py index 86b6b5c..bdbbf2f 100644 --- a/pyMBE/lib/lattice.py +++ b/pyMBE/lib/lattice.py @@ -58,6 +58,8 @@ def __init__(self, lattice, strict=True): self.mpc = lattice.mpc self.box_l = lattice.box_l + def _create_node_label(self, node_index): + return str(node_index).replace(",", "") def _get_node_by_label(self, node): assert node in self.node_labels, f"node '{node}' doesn't exist in a {self.lattice.name} lattice" diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index e2f9d59..7f1851f 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -611,150 +611,125 @@ def create_counterions(self, object_name, cation_name, anion_name, espresso_syst def create_hydrogel(self, name, espresso_system): """ - creates the hydrogel `name` in espresso_system + Creates a hydrogel in espresso_system using a pyMBE hydrogel template given by `name` + Args: - name(`str`): Label of the hydrogel to be created. `name` must be defined in the `pmb.df` - espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library. + name(str): name of the hydrogel template in the pyMBE database. + espresso_system (espressomd.system.System): ESPResSo system object where the hydrogel will be created. Returns: - hydrogel_info(`dict`): {"name":hydrogel_name, "chains": {chain_id1: {residue_id1: {'central_bead_id': central_bead_id, 'side_chain_ids': [particle_id1,...]},...,"node_start":node_start,"node_end":node_end}, chain_id2: {...},...}, "nodes":{node1:[node1_id],...}} + (int): id of the hydrogel instance created. """ - if not _DFm._check_if_name_is_defined_in_df(name=name, df=self.df): - logging.warning(f"Hydrogel with name '{name}' is not defined in the DataFrame, no hydrogel will be created.") - return - self._check_if_name_has_right_type(name=name, - expected_pmb_type="hydrogel") - hydrogel_info={"name":name, "chains":{}, "nodes":{}} - # placing nodes - node_positions = {} - node_topology = self.df[self.df["name"]==name]["node_map"].iloc[0] - for node_info in node_topology: - node_index = node_info["lattice_index"] - node_name = node_info["particle_name"] - node_pos, node_id = self.create_hydrogel_node(self.format_node(node_index), node_name, espresso_system) - hydrogel_info["nodes"][self.format_node(node_index)]=node_id - node_positions[node_id[0]]=node_pos - - # Placing chains between nodes - # Looping over all the 16 chains - chain_topology = self.df[self.df["name"]==name]["chain_map"].iloc[0] - for chain_info in chain_topology: - node_s = chain_info["node_start"] - node_e = chain_info["node_end"] - molecule_info = self.create_hydrogel_chain(node_s, node_e, node_positions, espresso_system) - for molecule_id in molecule_info: - hydrogel_info["chains"][molecule_id] = molecule_info[molecule_id] - hydrogel_info["chains"][molecule_id]["node_start"]=node_s - hydrogel_info["chains"][molecule_id]["node_end"]=node_e - return hydrogel_info - - def create_hydrogel_chain(self, node_start, node_end, node_positions, espresso_system): + + hydrogel_tpl = self.db.get_template(pmb_type="hydrogel", + name=name) + assembly_id = self.db._propose_instance_id(pmb_type="hydrogel") + # Create the nodes + nodes = {} + node_topology = hydrogel_tpl.node_map + for node in node_topology: + node_index = node.lattice_index + node_name = node.particle_name + node_pos, node_id = self.create_hydrogel_node(node_index=node_index, + node_name=node_name, + espresso_system=espresso_system) + node_label = self.lattice_builder._create_node_label(node_index=node_index) + nodes[node_label] = {} + nodes[node_label]["name"] = node_name + nodes[node_label]["id"] = node_id + nodes[node_label]["pos"] = node_pos + self.db._update_instance(instance_id=node_id, + pmb_type="particle", + attribute="assembly_id", + value=assembly_id) + # Create the polymer chains between nodes + for hydrogel_chain in hydrogel_tpl.chain_map: + molecule_id = self.create_hydrogel_chain(hydrogel_chain=hydrogel_chain, + nodes=nodes, + espresso_system=espresso_system) + self.db._update_instance(instance_id=molecule_id, + pmb_type="molecule", + attribute="assembly_id", + value=assembly_id) + self.db._propagate_id(root_type="hydrogel", + root_id=assembly_id, + attribute="assembly_id", + value=assembly_id) + return assembly_id + + def create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system): """ Creates a chain between two nodes of a hydrogel. Args: - node_start(`str`): name of the starting node particle at which the first residue of the chain will be attached. - node_end(`str`): name of the ending node particle at which the last residue of the chain will be attached. - node_positions(`dict`): dictionary with the positions of the nodes. The keys are the node names and the values are the positions. - espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library. + hydrogel_chain(HydrogelChain): template of a hydrogel chain + nodes(dict): {node_index: {"name": node_particle_name, "pos": node_position, "id": node_particle_instance_id}} + espresso_system (espressomd.system.System): ESPResSo system object where the hydrogel chain will be created. + + Return: + (int): molecule_id of the created hydrogel chian. Note: - For example, if the chain is defined between node_start = ``[0 0 0]`` and node_end = ``[1 1 1]``, the chain will be placed between these two nodes. - The chain will be placed in the direction of the vector between `node_start` and `node_end`. + - For example, if the chain is defined between node_start = ``[0 0 0]`` and node_end = ``[1 1 1]``, the chain will be placed between these two nodes. + - The chain will be placed in the direction of the vector between `node_start` and `node_end`. + - This function does not support default bonds. """ if self.lattice_builder is None: raise ValueError("LatticeBuilder is not initialized. Use `initialize_lattice_builder` first.") - - molecule_name = "chain_"+node_start+"_"+node_end - sequence = self.df[self.df['name']==molecule_name].residue_list.values [0] - assert len(sequence) != 0 and not isinstance(sequence, str) - assert len(sequence) == self.lattice_builder.mpc - - key, reverse = self.lattice_builder._get_node_vector_pair(node_start, node_end) - assert node_start != node_end or sequence == sequence[::-1], \ - (f"chain cannot be defined between '{node_start}' and '{node_end}' since it " - "would form a loop with a non-symmetric sequence (under-defined stereocenter)") - + molecule_tpl = self.db.get_template(pmb_type="molecule", + name=hydrogel_chain.molecule_name) + residue_list = molecule_tpl.residue_list + molecule_name = molecule_tpl.name + node_start = hydrogel_chain.node_start + node_end = hydrogel_chain.node_end + node_start_label = self.lattice_builder._create_node_label(node_start) + node_end_label = self.lattice_builder._create_node_label(node_end) + _, reverse = self.lattice_builder._get_node_vector_pair(node_start, node_end) + if node_start != node_end or residue_list == residue_list[::-1]: + RuntimeError(f"Aborted creation because hydrogel chain between '{node_start}' and '{node_end}' because pyMBE could not resolve a unique topology for that chain") if reverse: - sequence = sequence[::-1] - - node_start_pos = np.array(list(int(x) for x in node_start.strip('[]').split()))*0.25*self.lattice_builder.box_l - node_end_pos = np.array(list(int(x) for x in node_end.strip('[]').split()))*0.25*self.lattice_builder.box_l - node1 = espresso_system.part.select(lambda p: (p.pos == node_start_pos).all()).id - node2 = espresso_system.part.select(lambda p: (p.pos == node_end_pos).all()).id - - if not node1[0] in node_positions or not node2[0] in node_positions: - raise ValueError("Set node position before placing a chain between them") - + residue_list = residue_list[::-1] + start_node_id = nodes[node_start_label]["id"] + end_node_id = nodes[node_end_label]["id"] # Finding a backbone vector between node_start and node_end - vec_between_nodes = np.array(node_positions[node2[0]]) - np.array(node_positions[node1[0]]) + vec_between_nodes = np.array(nodes[node_start_label]["pos"]) - np.array(nodes[node_end_label]["pos"]) vec_between_nodes = vec_between_nodes - self.lattice_builder.box_l * np.round(vec_between_nodes/self.lattice_builder.box_l) - backbone_vector = list(vec_between_nodes/(self.lattice_builder.mpc + 1)) - node_start_name = self.df[(self.df["particle_id"]==node1[0]) & (self.df["pmb_type"]=="particle")]["name"].values[0] - first_res_name = self.df[(self.df["pmb_type"]=="residue") & (self.df["name"]==sequence[0])]["central_bead"].values[0] - l0 = self.get_bond_length(node_start_name, first_res_name, hard_check=True) - chain_molecule_info = self.create_molecule( - name=molecule_name, # Use the name defined earlier - number_of_molecules=1, # Creating one chain - espresso_system=espresso_system, - list_of_first_residue_positions=[list(np.array(node_positions[node1[0]]) + np.array(backbone_vector))],#Start at the first node - backbone_vector=np.array(backbone_vector)/l0, - use_default_bond=False # Use defaut bonds between monomers - ) - # Collecting ids of beads of the chain/molecule - chain_ids = [] - residue_ids = [] - for molecule_id in chain_molecule_info: - for residue_id in chain_molecule_info[molecule_id]: - residue_ids.append(residue_id) - bead_id = chain_molecule_info[molecule_id][residue_id]['central_bead_id'] - chain_ids.append(bead_id) - - self.lattice_builder.chains[key] = sequence - # Search bonds between nodes and chain ends - BeadType_near_to_node_start = self.df[(self.df["residue_id"] == residue_ids[0]) & (self.df["central_bead"].notnull())]["central_bead"].drop_duplicates().iloc[0] - BeadType_near_to_node_end = self.df[(self.df["residue_id"] == residue_ids[-1]) & (self.df["central_bead"].notnull())]["central_bead"].drop_duplicates().iloc[0] - bond_node1_first_monomer = self.search_bond(particle_name1 = self.lattice_builder.nodes[node_start], - particle_name2 = BeadType_near_to_node_start, - hard_check=False, - use_default_bond=False) - bond_node2_last_monomer = self.search_bond(particle_name1 = self.lattice_builder.nodes[node_end], - particle_name2 = BeadType_near_to_node_end, - hard_check=False, - use_default_bond=False) - - espresso_system.part.by_id(node1[0]).add_bond((bond_node1_first_monomer, chain_ids[0])) - espresso_system.part.by_id(node2[0]).add_bond((bond_node2_last_monomer, chain_ids[-1])) - # Add bonds to data frame - self.df, bond_index1 = _DFm._add_bond_in_df(df = self.df, - particle_id1 = node1[0], - particle_id2 = chain_ids[0], - use_default_bond = False) - _DFm._add_value_to_df(df = self.df, - key = ('molecule_id',''), - index = int(bond_index1), - new_value = molecule_id, - overwrite = True) - _DFm._add_value_to_df(df = self.df, - key = ('residue_id',''), - index = int(bond_index1), - new_value = residue_ids[0], - overwrite = True) - self.df, bond_index2 = _DFm._add_bond_in_df(df = self.df, - particle_id1 = node2[0], - particle_id2 = chain_ids[-1], - use_default_bond = False) - _DFm._add_value_to_df(df = self.df, - key = ('molecule_id',''), - index = int(bond_index2), - new_value = molecule_id, - overwrite = True) - _DFm._add_value_to_df(df = self.df, - key = ('residue_id',''), - index = int(bond_index2), - new_value = residue_ids[-1], - overwrite = True) - return chain_molecule_info + backbone_vector = np.array((vec_between_nodes/(self.lattice_builder.mpc + 1))) + backbone_vector = backbone_vector / np.linalg.norm(backbone_vector) + # Calculate the start position of the chain + chain_residues = self.db.get_template(pmb_type="molecule", + name=molecule_name).residue_list + part_start_chain_name = self.db.get_template(pmb_type="residue", + name=chain_residues[0]).central_bead + part_end_chain_name = self.db.get_template(pmb_type="residue", + name=chain_residues[-1]).central_bead + lj_parameters = self.get_lj_parameters(particle_name1=nodes[node_start_label]["name"], + particle_name2=part_start_chain_name) + bond_tpl = self.get_bond_template(particle_name1=nodes[node_start_label]["name"], + particle_name2=part_start_chain_name) + l0 = hf.calculate_initial_bond_length(lj_parameters=lj_parameters, + bond_type=bond_tpl.bond_type, + bond_parameters=bond_tpl.get_parameters(ureg=self.units)) + first_bead_pos = np.array((nodes[node_start_label]["pos"])) + np.array(backbone_vector)*l0 + mol_id = self.create_molecule(name=molecule_name, # Use the name defined earlier + number_of_molecules=1, # Creating one chain + espresso_system=espresso_system, + list_of_first_residue_positions=[first_bead_pos.tolist()],#Start at the first node + backbone_vector=np.array(backbone_vector)/l0, + use_default_bond=False) + # Bond chain to the hydrogel nodes + chain_pids = self.db._find_instance_ids_by_attribute(pmb_type="particle", + attribute="molecule_id", + value=mol_id) + start_bond_instance = self.get_espresso_bond_instance(particle_name1=nodes[node_start_label]["name"], + particle_name2=part_start_chain_name, + espresso_system=espresso_system) + end_bond_instance = self.get_espresso_bond_instance(particle_name1=nodes[node_end_label]["name"], + particle_name2=part_end_chain_name, + espresso_system=espresso_system) + espresso_system.part.by_id(start_node_id).add_bond((start_bond_instance, chain_pids[0])) + espresso_system.part.by_id(chain_pids[-1]).add_bond((end_bond_instance, end_node_id)) + return mol_id def create_hydrogel_node(self, node_index, node_name, espresso_system): """ @@ -763,6 +738,8 @@ def create_hydrogel_node(self, node_index, node_name, espresso_system): Args: node_index(`str`): Lattice node index in the form of a string, e.g. "[0 0 0]". node_name(`str`): name of the node particle defined in pyMBE. + espresso_system (espressomd.system.System): ESPResSo system object where the hydrogel node will be created. + Returns: node_position(`list`): Position of the node in the lattice. p_id(`int`): Particle ID of the node. @@ -770,15 +747,15 @@ def create_hydrogel_node(self, node_index, node_name, espresso_system): if self.lattice_builder is None: raise ValueError("LatticeBuilder is not initialized. Use `initialize_lattice_builder` first.") - node_position = np.array(list(int(x) for x in node_index.strip('[]').split()))*0.25*self.lattice_builder.box_l + node_position = np.array(node_index)*0.25*self.lattice_builder.box_l p_id = self.create_particle(name = node_name, - espresso_system=espresso_system, - number_of_particles=1, - position = [node_position]) - key = self.lattice_builder._get_node_by_label(node_index) + espresso_system=espresso_system, + number_of_particles=1, + position = [node_position]) + key = self.lattice_builder._get_node_by_label(f"[{node_index[0]} {node_index[1]} {node_index[2]}]") self.lattice_builder.nodes[key] = node_name - return node_position.tolist(), p_id + return node_position.tolist(), p_id[0] def create_molecule(self, name, number_of_molecules, espresso_system, list_of_first_residue_positions=None, backbone_vector=None, use_default_bond=False): """ @@ -851,9 +828,13 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi backbone_vector=backbone_vector) # Add molecule_id to the residue instance and all particles associated - particle_ids_in_residue = self.db._update_part_res_inst_mol_ids(residue_id=residue_id, - molecule_id=molecule_id) - + self.db._propagate_id(root_type="residue", + root_id=residue_id, + attribute="molecule_id", + value=molecule_id) + particle_ids_in_residue = self.db._find_instance_ids_by_attribute(pmb_type="particle", + attribute="residue_id", + value=residue_id) prev_central_bead_id = particle_ids_in_residue[0] prev_central_bead_name = self.db.get_instance(pmb_type="particle", instance_id=prev_central_bead_id).name prev_central_bead_pos = espresso_system.part.by_id(prev_central_bead_id).pos @@ -878,8 +859,13 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi use_default_bond= use_default_bond, backbone_vector=backbone_vector) # Add molecule_id to the residue instance and all particles associated - particle_ids_in_residue = self.db._update_part_res_inst_mol_ids(residue_id=residue_id, - molecule_id=molecule_id) + self.db._propagate_id(root_type="residue", + root_id=residue_id, + attribute="molecule_id", + value=molecule_id) + particle_ids_in_residue = self.db._find_instance_ids_by_attribute(pmb_type="particle", + attribute="residue_id", + value=residue_id) central_bead_id = particle_ids_in_residue[0] # Bond the central beads of the new and previous residues @@ -907,10 +893,10 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi def create_particle(self, name, espresso_system, number_of_particles, position=None, fix=False): """ - Creates `number_of_particles` particles of type `name` into `espresso_system` and bookkeeps them into `pymbe.df`. + Creates one or more particles in an ESPResSo system based on the particle template in the pyMBE database. Args: - name(`str`): Label of the particle type to be created. `name` must be a `particle` defined in `pmb_df`. + name(`str`): Label of the particle template in the pyMBE database. espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library. number_of_particles(`int`): Number of particles to be created. position(list of [`float`,`float`,`float`], optional): Initial positions of the particles. If not given, particles are created in random positions. Defaults to None. @@ -953,8 +939,8 @@ def create_particle(self, name, espresso_system, number_of_particles, position=N def create_protein(self, name, number_of_proteins, espresso_system, topology_dict): """ - Creates one or more protein molecules in an ESPResSo system based on a stored - protein template and a provided topology. + Creates one or more protein molecules in an ESPResSo system based on the + protein template in the pyMBE database and a provided topology. Args: name (str): diff --git a/pyMBE/storage/instances/hydrogel.py b/pyMBE/storage/instances/hydrogel.py index 410ec71..a2c4ae7 100644 --- a/pyMBE/storage/instances/hydrogel.py +++ b/pyMBE/storage/instances/hydrogel.py @@ -40,14 +40,10 @@ class HydrogelInstance(PMBBaseModel): pmb_type (str): Fixed string identifier for this instance type. Always ``"hydrogel"``. - hydrogel_id (int): + assembly_id (int): Unique non-negative integer identifying this hydrogel instance. name (str): Human-readable name for the hydrogel (e.g., ``"HG_001"``). - molecule_ids (List[str]): - List of molecule instance IDs that compose the hydrogel. - Each entry must correspond to a valid molecule instance stored - in the database. Defaults to an empty list. Notes: - This class represents the *instance* level (what specific @@ -58,6 +54,5 @@ class HydrogelInstance(PMBBaseModel): during creation or update and not inside this class. """ pmb_type: str = Field(default="hydrogel", frozen=True) - hydrogel_id: int + assembly_id: int name: str - molecule_ids: List[str] = Field(default_factory=list) diff --git a/pyMBE/storage/instances/molecule.py b/pyMBE/storage/instances/molecule.py index 521cce3..32d3b65 100644 --- a/pyMBE/storage/instances/molecule.py +++ b/pyMBE/storage/instances/molecule.py @@ -45,6 +45,9 @@ class MoleculeInstance(PMBBaseModel): molecule_id (int): Unique non-negative integer identifying this molecule instance within the database. + assembly_id (int | None): + Identifier of the super-parent assembly (e.g. hydrogel) to which this residue belongs. + ``None`` indicates that the residue is not assigned to any assembly. Notes: - Validation of whether ``name`` corresponds to a registered @@ -56,6 +59,7 @@ class MoleculeInstance(PMBBaseModel): pmb_type: str = "molecule" name: str # molecule template name molecule_id: int + assembly_id: int | None = None @field_validator("molecule_id") def validate_residue_id(cls, mid): diff --git a/pyMBE/storage/instances/particle.py b/pyMBE/storage/instances/particle.py index 3f7773a..2a220c7 100644 --- a/pyMBE/storage/instances/particle.py +++ b/pyMBE/storage/instances/particle.py @@ -35,6 +35,8 @@ class ParticleInstance(PMBBaseModel): pmb_type (str): Fixed string identifying this object as a particle instance. Always ``"particle"``. + name (str): + Name of the particle template from which this instance is derived. particle_id (int): Unique non-negative integer identifying the particle within the database. Assigned sequentially by the database manager. @@ -51,6 +53,9 @@ class ParticleInstance(PMBBaseModel): Optional identifier of the ``MoleculeInstance`` this particle belongs to. Particles not belonging to any molecule should keep this as ``None``. + assembly_id (int | None): + Identifier of the super-parent assembly (e.g. hydrogel) to which this residue belongs. + ``None`` indicates that the residue is not assigned to any assembly. Notes: - ``initial_state`` is stored as a plain string to ensure clean @@ -59,10 +64,12 @@ class ParticleInstance(PMBBaseModel): this class and handled by the database or simulation backend. """ pmb_type: str = "particle" + name: str particle_id: int initial_state: str residue_id: int | None = None molecule_id: int | None = None + assembly_id: int | None = None @field_validator("particle_id") def validate_particle_id(cls, pid): diff --git a/pyMBE/storage/instances/peptide.py b/pyMBE/storage/instances/peptide.py index 64dd368..e22c894 100644 --- a/pyMBE/storage/instances/peptide.py +++ b/pyMBE/storage/instances/peptide.py @@ -42,6 +42,9 @@ class PeptideInstance(PMBBaseModel): Unique non-negative integer identifying this peptide within the database. Assigned sequentially by the database manager when the instance is created. + assembly_id (int | None): + Identifier of the super-parent assembly (e.g. hydrogel) to which this residue belongs. + ``None`` indicates that the residue is not assigned to any assembly. Notes: - This class only tracks the identity of the peptide instance. @@ -55,7 +58,8 @@ class PeptideInstance(PMBBaseModel): pmb_type: str = "peptide" name: str # molecule template name molecule_id: int - + assembly_id: int | None = None + @field_validator("molecule_id") def validate_residue_id(cls, mid): if mid < 0: diff --git a/pyMBE/storage/instances/protein.py b/pyMBE/storage/instances/protein.py index 126aa3e..e3206dd 100644 --- a/pyMBE/storage/instances/protein.py +++ b/pyMBE/storage/instances/protein.py @@ -41,6 +41,9 @@ class ProteinInstance(PMBBaseModel): molecule_id (int): Unique non-negative integer identifying this protein within the database. Assigned by the database manager upon creation. + assembly_id (int | None): + Identifier of the super-parent assembly (e.g. hydrogel) to which this residue belongs. + ``None`` indicates that the residue is not assigned to any assembly. Notes: - A ``ProteinInstance`` only records the identity of the protein @@ -53,6 +56,7 @@ class ProteinInstance(PMBBaseModel): pmb_type: str = "protein" name: str # molecule template name molecule_id: int + assembly_id: int | None = None @field_validator("molecule_id") def validate_residue_id(cls, mid): diff --git a/pyMBE/storage/instances/residue.py b/pyMBE/storage/instances/residue.py index e5d5bcb..5cdb8a8 100644 --- a/pyMBE/storage/instances/residue.py +++ b/pyMBE/storage/instances/residue.py @@ -42,6 +42,9 @@ class ResidueInstance(PMBBaseModel): molecule_id (int | None): Identifier of the parent molecule to which this residue belongs. ``None`` indicates that the residue is not assigned to any molecule. + assembly_id (int | None): + Identifier of the super-parent assembly (e.g. hydrogel) to which this residue belongs. + ``None`` indicates that the residue is not assigned to any assembly. Notes: - ``ResidueInstance`` does not itself store particle-level @@ -57,6 +60,8 @@ class ResidueInstance(PMBBaseModel): name: str # residue template name residue_id: int molecule_id: int | None = None + assembly_id: int | None = None + @field_validator("residue_id") def validate_residue_id(cls, rid): diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py index 1d4bf85..f5d4ea3 100644 --- a/pyMBE/storage/io.py +++ b/pyMBE/storage/io.py @@ -296,40 +296,50 @@ def _load_database_csv(db, folder): # some fields might be empty strings -> map to None residue_val = row.get("residue_id", "") or "" molecule_val = row.get("molecule_id", "") or "" + assembly_val = row.get("assembly_id", "") or "" inst = ParticleInstance( name=row["name"], particle_id=int(row["particle_id"]), initial_state=row["initial_state"], residue_id=None if residue_val == "" else int(residue_val), molecule_id=None if molecule_val == "" else int(molecule_val), + assembly_id=None if assembly_val == "" else int(assembly_val), ) instances[inst.particle_id] = inst elif pmb_type == "residue": mol_val = row.get("molecule_id", "") or "" + assembly_val = row.get("assembly_id", "") or "" inst = ResidueInstance( name=row["name"], residue_id=int(row["residue_id"]), molecule_id=None if mol_val == "" else int(mol_val), + assembly_id=None if assembly_val == "" else int(assembly_val), ) instances[inst.residue_id] = inst elif pmb_type == "molecule": + assembly_val = row.get("assembly_id", "") or "" inst = MoleculeInstance( name=row["name"], molecule_id=int(row["molecule_id"]), + assembly_id=None if assembly_val == "" else int(assembly_val), ) instances[inst.molecule_id] = inst elif pmb_type == "peptide": + assembly_val = row.get("assembly_id", "") or "" inst = PeptideInstance( name=row["name"], molecule_id=int(row["molecule_id"]), + assembly_id=None if assembly_val == "" else int(assembly_val), ) instances[inst.molecule_id] = inst elif pmb_type == "protein": + assembly_val = row.get("assembly_id", "") or "" inst = ProteinInstance( name=row["name"], molecule_id=int(row["molecule_id"]), + assembly_id=None if assembly_val == "" else int(assembly_val), ) instances[inst.molecule_id] = inst elif pmb_type == "bond": @@ -521,6 +531,7 @@ def _save_database_csv(db, folder): "initial_state": inst.initial_state, "residue_id": int(inst.residue_id) if inst.residue_id is not None else "", "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else "", + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else "", }) elif pmb_type == "residue" and isinstance(inst, ResidueInstance): rows.append({ @@ -528,24 +539,28 @@ def _save_database_csv(db, folder): "name": inst.name, "residue_id": int(inst.residue_id), "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else "", + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else "", }) elif pmb_type == "molecule" and isinstance(inst, MoleculeInstance): rows.append({ "pmb_type": pmb_type, "name": inst.name, "molecule_id": int(inst.molecule_id), + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else "", }) elif pmb_type == "peptide" and isinstance(inst, PeptideInstance): rows.append({ "pmb_type": pmb_type, "name": inst.name, "molecule_id": int(inst.molecule_id), + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else "", }) elif pmb_type == "protein" and isinstance(inst, ProteinInstance): rows.append({ "pmb_type": pmb_type, "name": inst.name, "molecule_id": int(inst.molecule_id), + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else "", }) elif pmb_type == "bond" and isinstance(inst, BondInstance): rows.append({ @@ -559,8 +574,7 @@ def _save_database_csv(db, folder): rows.append({ "pmb_type": pmb_type, "name": inst.name, - "hydrogel_id": int(inst.hydrogel_id), - "molecule_ids": _encode(inst.molecule_ids), + "assembly_id": int(inst.assembly_id), }) else: # fallback to model_dump diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index f9cd942..cd32ff9 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -132,7 +132,7 @@ def _find_instance_ids_by_attribute(self, pmb_type, attribute, value): List[int]: IDs of matching instances. """ if pmb_type not in self._instances: - raise KeyError(f"Unknown pmb_type '{pmb_type}' in instance database.") + return [] results = [] for inst_id, inst in self._instances[pmb_type].items(): if hasattr(inst, attribute) and getattr(inst, attribute) == value: @@ -253,6 +253,7 @@ def _get_instances_df(self, pmb_type): "initial_state": inst.initial_state, "residue_id": int(inst.residue_id) if inst.residue_id is not None else pd.NA, "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else pd.NA, + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else pd.NA }) elif pmb_type == "residue": rows.append({ @@ -260,7 +261,16 @@ def _get_instances_df(self, pmb_type): "name": inst.name, "residue_id": inst.residue_id, "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else pd.NA, + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else pd.NA + }) + elif pmb_type in ["molecule","peptide","protein"]: + rows.append({ + "pmb_type": pmb_type, + "name": inst.name, + "molecule_id": inst.molecule_id, + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else pd.NA }) + else: # Generic representation for other types rows.append(inst.model_dump()) @@ -567,8 +577,9 @@ def _update_instance(self, instance_id, pmb_type, attribute, value): Notes: - Allowed updates: - * ``particle``: ``initial_state``, ``residue_id``, ``molecule_id`` - * ``residue``: ``molecule_id`` + * ``particle``: ``initial_state``, ``residue_id``, ``molecule_id``, ``assembly_id`` + * ``residue``: ``molecule_id``, ``assembly_id`` + * ``molecule``: ``assembly_id`` * All other types: no attribute updates allowed. - The method replaces the instance with a new Pydantic model using ``model_copy(update=...)`` to maintain immutability and @@ -579,9 +590,11 @@ def _update_instance(self, instance_id, pmb_type, attribute, value): raise KeyError(f"Instance '{instance_id}' not found for type '{pmb_type}' in the pyMBE database.") if pmb_type == "particle": - allowed = ["initial_state", "residue_id", "molecule_id"] + allowed = ["initial_state", "residue_id", "molecule_id", "assembly_id"] elif pmb_type == "residue": - allowed = ["molecule_id"] + allowed = ["molecule_id", "assembly_id"] + elif pmb_type == "molecule": + allowed = ["assembly_id"] else: allowed = [None] # No attributes allowed for other types @@ -590,42 +603,89 @@ def _update_instance(self, instance_id, pmb_type, attribute, value): self._instances[pmb_type][instance_id] = self._instances[pmb_type][instance_id].model_copy(update={attribute: value}) - def _update_part_res_inst_mol_ids(self, residue_id, molecule_id): + def _propagate_id(self, root_type, root_id, attribute, value): """ - Updates the molecule ID of a residue and all particle instances that belong to it. + Recursively updates an attribute (e.g., molecule_id, assembly_id) + on an instance and all of its hierarchical descendants. + + Supported relationships: + assembly → molecules → residues → particles + molecule → residues → particles + residue → particles + particle → (nothing) - Args: - residue_id (int): - The instance ID of the residue whose molecule assignment should be updated. - molecule_id (int): - The molecule ID to assign to the residue and all its particles. + root_type (str): + One of {"assembly", "molecule", "residue", "particle"}. + root_id (int): + Instance ID of the root object to update. + attribute (str): + The attribute to update (e.g., "molecule_id", "assembly_id"). + value: + The new value to assign. Returns: - List[int]: - A list of particle instance IDs that were updated. + list[int]: + A flat list of all instance IDs updated (including root). Raises: KeyError: - If the residue does not exist in the database. + If the root instance does not exist. ValueError: - If an update fails due to inconsistent or missing attributes. - """ - - self._update_instance(instance_id=residue_id, - pmb_type="residue", - attribute="molecule_id", - value=molecule_id) - particle_ids_in_residue = self._find_instance_ids_by_attribute(pmb_type="particle", - attribute="residue_id", - value=residue_id) - for particle_id in particle_ids_in_residue: - self._update_instance(instance_id=particle_id, - pmb_type="particle", - attribute="molecule_id", - value=molecule_id) - - return particle_ids_in_residue + If an unsupported type or attribute is given. + """ + updated = [] + # Map each type to its own identity attribute + self_id_attribute = { + "hydrogel": "assembly_id", + "molecule": "molecule_id", + "peptide": "molecule_id", + "protein": "molecule_id", + "residue": "residue_id", + "particle": "particle_id", + } + assembly_types = ["hydrogel"] + molecule_types = ["molecule", "peptide", "protein"] + # 1) Update ROOT (unless attribute corresponds to its own ID) + if attribute != self_id_attribute.get(root_type): + self._update_instance(instance_id=root_id, + pmb_type=root_type, + attribute=attribute, + value=value,) + updated.append((root_type, root_id)) + # 2) Descendants: assembly → molecules + if root_type in assembly_types: + for mtype in molecule_types: + molecule_ids = self._find_instance_ids_by_attribute(pmb_type=mtype, + attribute="assembly_id", + value=root_id) + for mid in molecule_ids: + updated += self._propagate_id(root_type=mtype, + root_id=mid, + attribute=attribute, + value=value) + # 3) Descendants: molecule → residues + if root_type in molecule_types: + residue_ids = self._find_instance_ids_by_attribute(pmb_type="residue", + attribute="molecule_id", + value=root_id) + for rid in residue_ids: + updated += self._propagate_id(root_type="residue", + root_id=rid, + attribute=attribute, + value=value) + # 4) Descendants: residue → particles + if root_type == "residue": + particle_ids = self._find_instance_ids_by_attribute(pmb_type="particle", + attribute="residue_id", + value=root_id,) + for pid in particle_ids: + self._update_instance(instance_id=pid, + pmb_type="particle", + attribute=attribute, + value=value,) + updated.append(("particle", pid)) + return updated def _update_reaction_participant(self, reaction_name, particle_name, state_name, coefficient): diff --git a/test.py b/test.py index 17d17a3..f4ecf21 100644 --- a/test.py +++ b/test.py @@ -74,7 +74,7 @@ def main(): print("\n=== Hydrogel Templates DataFrame ===") - diamond_lattice = DiamondLattice(30, 3.5 * units.reduced_length) + diamond_lattice = DiamondLattice(4, 3.5 * units.reduced_length) lattice_builder = pmb.initialize_lattice_builder(diamond_lattice) # Setting up node topology @@ -82,7 +82,7 @@ def main(): node_topology = [] for index in range(len(indices)): - node_topology.append({"particle_name": "A", + node_topology.append({"particle_name": "X", "lattice_index": indices[index]}) # Setting up chain topology node_labels = lattice_builder.node_labels @@ -174,9 +174,6 @@ def main(): print(db._get_templates_df(pmb_type="protein")) - - - # ============================================================ # 2. CREATE INSTANCES (optional for testing) # ============================================================ @@ -189,9 +186,7 @@ def main(): print("\n=== Particle Instances DataFrame ===") print(pmb.db._get_instances_df(pmb_type="particle")) - - - + pmb.create_residue(name="R1", espresso_system=espresso_system) pmb.create_residue(name="R2", @@ -221,13 +216,13 @@ def main(): number_of_proteins=1, espresso_system=espresso_system, topology_dict=topology_dict) - exit() + print(pmb.db._get_instances_df(pmb_type="protein")) print("\n=== Hydrogel Instances DataFrame ===") - # inst_hydrogel1 = HydrogelInstance(name="Hydrogel1", hydrogel_id=1, molecule_ids=["1","2","3"]) - # db._register_instance(inst_hydrogel1) - print(db._get_instances_df(pmb_type="hydrogel")) + pmb.create_hydrogel(name="my_hydrogel", + espresso_system=espresso_system) + print(pmb.db._get_instances_df(pmb_type="hydrogel")) # ============================================================ From 7739970846d71b26de3cdc23c90d27342f27f5fd Mon Sep 17 00:00:00 2001 From: Pablo Date: Tue, 9 Dec 2025 11:40:12 +0100 Subject: [PATCH 11/55] update delete methods to work with the new database --- pyMBE/pyMBE.py | 459 ++++++++++++++++++--------------------- pyMBE/storage/io.py | 8 +- pyMBE/storage/manager.py | 186 ++++++++-------- test.py | 76 ++++++- 4 files changed, 382 insertions(+), 347 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 7f1851f..3ac8c9b 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -142,6 +142,149 @@ def _check_if_name_has_right_type(self, name, expected_pmb_type, hard_check=True if hard_check: raise ValueError(f"The name {name} has been defined in the pyMBE DataFrame with a pmb_type = {pmb_type}. This function only supports pyMBE objects with pmb_type = {expected_pmb_type}") return False + + def _create_espresso_bond_instance(self, bond_type, bond_parameters): + """ + Creates an ESPResSo bond instance. + + Args: + bond_type(`str`): label to identify the potential to model the bond. + bond_parameters(`dict`): parameters of the potential of the bond. + + Note: + Currently, only HARMONIC and FENE bonds are supported. + + For a HARMONIC bond the dictionary must contain: + - k (`Pint.Quantity`) : Magnitude of the bond. It should have units of energy/length**2 + using the `pmb.units` UnitRegistry. + - r_0 (`Pint.Quantity`) : Equilibrium bond length. It should have units of length using + the `pmb.units` UnitRegistry. + + For a FENE bond the dictionary must additionally contain: + - d_r_max (`Pint.Quantity`): Maximal stretching length for FENE. It should have + units of length using the `pmb.units` UnitRegistry. Default 'None'. + + Returns: + (`espressomd.interactions`): instance of an ESPResSo bond object + """ + from espressomd import interactions + valid_bond_types = ["harmonic", "FENE"] + if bond_type not in valid_bond_types: + raise NotImplementedError(f"Bond type '{bond_type}' currently not implemented in pyMBE, accepted types are {valid_bond_types}") + required_parameters = {"harmonic": ["r_0","k"], + "FENE": ["r_0","k","d_r_max"]} + for required_parameter in required_parameters[bond_type]: + if required_parameter not in bond_parameters.keys(): + raise ValueError(f"Missing required parameter {required_parameter} for {bond_type} bond") + if bond_type == 'harmonic': + bond_instance = interactions.HarmonicBond(k = bond_parameters["k"].m_as("reduced_energy/reduced_length**2"), + r_0 = bond_parameters["r_0"].m_as("reduced_length")) + elif bond_type == 'FENE': + bond_instance = interactions.FeneBond(k = bond_parameters["k"].m_as("reduced_energy/reduced_length**2"), + r_0 = bond_parameters["r_0"].m_as("reduced_length"), + d_r_max = bond_parameters["d_r_max"].m_as("reduced_length")) + return bond_instance + + def _create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system): + """ + Creates a chain between two nodes of a hydrogel. + + Args: + hydrogel_chain(HydrogelChain): template of a hydrogel chain + nodes(dict): {node_index: {"name": node_particle_name, "pos": node_position, "id": node_particle_instance_id}} + espresso_system (espressomd.system.System): ESPResSo system object where the hydrogel chain will be created. + + Return: + (int): molecule_id of the created hydrogel chian. + + Note: + - For example, if the chain is defined between node_start = ``[0 0 0]`` and node_end = ``[1 1 1]``, the chain will be placed between these two nodes. + - The chain will be placed in the direction of the vector between `node_start` and `node_end`. + - This function does not support default bonds. + """ + if self.lattice_builder is None: + raise ValueError("LatticeBuilder is not initialized. Use `initialize_lattice_builder` first.") + molecule_tpl = self.db.get_template(pmb_type="molecule", + name=hydrogel_chain.molecule_name) + residue_list = molecule_tpl.residue_list + molecule_name = molecule_tpl.name + node_start = hydrogel_chain.node_start + node_end = hydrogel_chain.node_end + node_start_label = self.lattice_builder._create_node_label(node_start) + node_end_label = self.lattice_builder._create_node_label(node_end) + _, reverse = self.lattice_builder._get_node_vector_pair(node_start, node_end) + if node_start != node_end or residue_list == residue_list[::-1]: + RuntimeError(f"Aborted creation because hydrogel chain between '{node_start}' and '{node_end}' because pyMBE could not resolve a unique topology for that chain") + if reverse: + residue_list = residue_list[::-1] + start_node_id = nodes[node_start_label]["id"] + end_node_id = nodes[node_end_label]["id"] + # Finding a backbone vector between node_start and node_end + vec_between_nodes = np.array(nodes[node_start_label]["pos"]) - np.array(nodes[node_end_label]["pos"]) + vec_between_nodes = vec_between_nodes - self.lattice_builder.box_l * np.round(vec_between_nodes/self.lattice_builder.box_l) + backbone_vector = np.array((vec_between_nodes/(self.lattice_builder.mpc + 1))) + backbone_vector = backbone_vector / np.linalg.norm(backbone_vector) + # Calculate the start position of the chain + chain_residues = self.db.get_template(pmb_type="molecule", + name=molecule_name).residue_list + part_start_chain_name = self.db.get_template(pmb_type="residue", + name=chain_residues[0]).central_bead + part_end_chain_name = self.db.get_template(pmb_type="residue", + name=chain_residues[-1]).central_bead + lj_parameters = self.get_lj_parameters(particle_name1=nodes[node_start_label]["name"], + particle_name2=part_start_chain_name) + bond_tpl = self.get_bond_template(particle_name1=nodes[node_start_label]["name"], + particle_name2=part_start_chain_name) + l0 = hf.calculate_initial_bond_length(lj_parameters=lj_parameters, + bond_type=bond_tpl.bond_type, + bond_parameters=bond_tpl.get_parameters(ureg=self.units)) + first_bead_pos = np.array((nodes[node_start_label]["pos"])) + np.array(backbone_vector)*l0 + mol_id = self.create_molecule(name=molecule_name, # Use the name defined earlier + number_of_molecules=1, # Creating one chain + espresso_system=espresso_system, + list_of_first_residue_positions=[first_bead_pos.tolist()],#Start at the first node + backbone_vector=np.array(backbone_vector)/l0, + use_default_bond=False) + # Bond chain to the hydrogel nodes + chain_pids = self.db._find_instance_ids_by_attribute(pmb_type="particle", + attribute="molecule_id", + value=mol_id) + start_bond_instance = self.get_espresso_bond_instance(particle_name1=nodes[node_start_label]["name"], + particle_name2=part_start_chain_name, + espresso_system=espresso_system) + end_bond_instance = self.get_espresso_bond_instance(particle_name1=nodes[node_end_label]["name"], + particle_name2=part_end_chain_name, + espresso_system=espresso_system) + espresso_system.part.by_id(start_node_id).add_bond((start_bond_instance, chain_pids[0])) + espresso_system.part.by_id(chain_pids[-1]).add_bond((end_bond_instance, end_node_id)) + return mol_id + + def _create_hydrogel_node(self, node_index, node_name, espresso_system): + """ + Set a node residue type. + + Args: + node_index(`str`): Lattice node index in the form of a string, e.g. "[0 0 0]". + node_name(`str`): name of the node particle defined in pyMBE. + espresso_system (espressomd.system.System): ESPResSo system object where the hydrogel node will be created. + + Returns: + node_position(`list`): Position of the node in the lattice. + p_id(`int`): Particle ID of the node. + """ + if self.lattice_builder is None: + raise ValueError("LatticeBuilder is not initialized. Use `initialize_lattice_builder` first.") + + node_position = np.array(node_index)*0.25*self.lattice_builder.box_l + p_id = self.create_particle(name = node_name, + espresso_system=espresso_system, + number_of_particles=1, + position = [node_position]) + key = self.lattice_builder._get_node_by_label(f"[{node_index[0]} {node_index[1]} {node_index[2]}]") + self.lattice_builder.nodes[key] = node_name + + return node_position.tolist(), p_id[0] + def _get_residue_list_from_sequence(self, sequence): """ Convinience function to get a `residue_list` from a protein or peptide `sequence`. @@ -158,6 +301,28 @@ def _get_residue_list_from_sequence(self, sequence): residue_list.append(residue_name) return residue_list + def _delete_particles_from_espresso(self, particle_ids, espresso_system): + """ + Remove a list of particles from an ESPResSo simulation system. + + Args: + particle_ids : Iterable[int] + A list (or other iterable) of ESPResSo particle IDs to remove. + + espresso_system : espressomd.system.System + The ESPResSo simulation system from which the particles + will be removed. + + Note: + - This method removes particles only from the ESPResSo simulation, + **not** from the pyMBE database. Database cleanup must be handled + separately by the caller. + - Attempting to remove a non-existent particle ID will raise + an ESPResSo error. + """ + + for pid in particle_ids: + espresso_system.part.by_id(pid).remove() def calculate_center_of_mass_of_molecule(self, molecule_id, espresso_system): @@ -322,8 +487,6 @@ def calc_partition_coefficient(charge, c_macro): return {"charges_dict": Z_HH_Donnan, "pH_system_list": pH_system_list, "partition_coefficients": partition_coefficients_list} - - def calculate_net_charge(self, espresso_system, molecule_name, dimensionless=False): ''' Calculates the net charge per molecule of molecules with `name` = molecule_name. @@ -500,56 +663,17 @@ def create_bond(self, particle_id1, particle_id2, espresso_system, use_default_b espresso_system=espresso_system, use_default_bond=use_default_bond) espresso_system.part.by_id(particle_id1).add_bond((bond_inst, particle_id2)) + if use_default_bond: + bond_name = "default" + else: + bond_name = BondTemplate.make_bond_key(pn1=particle_inst_1.name, + pn2=particle_inst_2.name) pmb_bond_instance = BondInstance(bond_id=self.db._propose_instance_id(pmb_type="bond"), - name=BondTemplate.make_bond_key(pn1=particle_inst_1.name, - pn2=particle_inst_2.name), + name=bond_name, particle_id1=particle_id1, particle_id2=particle_id2) self.db._register_instance(instance=pmb_bond_instance) - def create_espresso_bond_instance(self, bond_type, bond_parameters): - """ - Creates an ESPResSo bond instance. - - Args: - bond_type(`str`): label to identify the potential to model the bond. - bond_parameters(`dict`): parameters of the potential of the bond. - - Note: - Currently, only HARMONIC and FENE bonds are supported. - - For a HARMONIC bond the dictionary must contain: - - k (`Pint.Quantity`) : Magnitude of the bond. It should have units of energy/length**2 - using the `pmb.units` UnitRegistry. - - r_0 (`Pint.Quantity`) : Equilibrium bond length. It should have units of length using - the `pmb.units` UnitRegistry. - - For a FENE bond the dictionary must additionally contain: - - d_r_max (`Pint.Quantity`): Maximal stretching length for FENE. It should have - units of length using the `pmb.units` UnitRegistry. Default 'None'. - - Returns: - (`espressomd.interactions`): instance of an ESPResSo bond object - """ - from espressomd import interactions - valid_bond_types = ["harmonic", "FENE"] - if bond_type not in valid_bond_types: - raise NotImplementedError(f"Bond type '{bond_type}' currently not implemented in pyMBE, accepted types are {valid_bond_types}") - required_parameters = {"harmonic": ["r_0","k"], - "FENE": ["r_0","k","d_r_max"]} - for required_parameter in required_parameters[bond_type]: - if required_parameter not in bond_parameters.keys(): - raise ValueError(f"Missing required parameter {required_parameter} for {bond_type} bond") - if bond_type == 'harmonic': - bond_instance = interactions.HarmonicBond(k = bond_parameters["k"].m_as("reduced_energy/reduced_length**2"), - r_0 = bond_parameters["r_0"].m_as("reduced_length")) - elif bond_type == 'FENE': - bond_instance = interactions.FeneBond(k = bond_parameters["k"].m_as("reduced_energy/reduced_length**2"), - r_0 = bond_parameters["r_0"].m_as("reduced_length"), - d_r_max = bond_parameters["d_r_max"].m_as("reduced_length")) - return bond_instance - - def create_counterions(self, object_name, cation_name, anion_name, espresso_system): """ Creates particles of `cation_name` and `anion_name` in `espresso_system` to counter the net charge of `pmb_object`. @@ -630,23 +754,19 @@ def create_hydrogel(self, name, espresso_system): for node in node_topology: node_index = node.lattice_index node_name = node.particle_name - node_pos, node_id = self.create_hydrogel_node(node_index=node_index, + node_pos, node_id = self._create_hydrogel_node(node_index=node_index, node_name=node_name, espresso_system=espresso_system) node_label = self.lattice_builder._create_node_label(node_index=node_index) - nodes[node_label] = {} - nodes[node_label]["name"] = node_name - nodes[node_label]["id"] = node_id - nodes[node_label]["pos"] = node_pos + nodes[node_label] = {"name": node_name, "id": node_id, "pos": node_pos} self.db._update_instance(instance_id=node_id, pmb_type="particle", attribute="assembly_id", value=assembly_id) - # Create the polymer chains between nodes for hydrogel_chain in hydrogel_tpl.chain_map: - molecule_id = self.create_hydrogel_chain(hydrogel_chain=hydrogel_chain, - nodes=nodes, - espresso_system=espresso_system) + molecule_id = self._create_hydrogel_chain(hydrogel_chain=hydrogel_chain, + nodes=nodes, + espresso_system=espresso_system) self.db._update_instance(instance_id=molecule_id, pmb_type="molecule", attribute="assembly_id", @@ -655,108 +775,11 @@ def create_hydrogel(self, name, espresso_system): root_id=assembly_id, attribute="assembly_id", value=assembly_id) + # Register an hydrogel instance in the pyMBE database + self.db._register_instance(HydrogelInstance(name=name, + assembly_id=assembly_id)) return assembly_id - def create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system): - """ - Creates a chain between two nodes of a hydrogel. - - Args: - hydrogel_chain(HydrogelChain): template of a hydrogel chain - nodes(dict): {node_index: {"name": node_particle_name, "pos": node_position, "id": node_particle_instance_id}} - espresso_system (espressomd.system.System): ESPResSo system object where the hydrogel chain will be created. - - Return: - (int): molecule_id of the created hydrogel chian. - - Note: - - For example, if the chain is defined between node_start = ``[0 0 0]`` and node_end = ``[1 1 1]``, the chain will be placed between these two nodes. - - The chain will be placed in the direction of the vector between `node_start` and `node_end`. - - This function does not support default bonds. - """ - if self.lattice_builder is None: - raise ValueError("LatticeBuilder is not initialized. Use `initialize_lattice_builder` first.") - molecule_tpl = self.db.get_template(pmb_type="molecule", - name=hydrogel_chain.molecule_name) - residue_list = molecule_tpl.residue_list - molecule_name = molecule_tpl.name - node_start = hydrogel_chain.node_start - node_end = hydrogel_chain.node_end - node_start_label = self.lattice_builder._create_node_label(node_start) - node_end_label = self.lattice_builder._create_node_label(node_end) - _, reverse = self.lattice_builder._get_node_vector_pair(node_start, node_end) - if node_start != node_end or residue_list == residue_list[::-1]: - RuntimeError(f"Aborted creation because hydrogel chain between '{node_start}' and '{node_end}' because pyMBE could not resolve a unique topology for that chain") - if reverse: - residue_list = residue_list[::-1] - start_node_id = nodes[node_start_label]["id"] - end_node_id = nodes[node_end_label]["id"] - # Finding a backbone vector between node_start and node_end - vec_between_nodes = np.array(nodes[node_start_label]["pos"]) - np.array(nodes[node_end_label]["pos"]) - vec_between_nodes = vec_between_nodes - self.lattice_builder.box_l * np.round(vec_between_nodes/self.lattice_builder.box_l) - backbone_vector = np.array((vec_between_nodes/(self.lattice_builder.mpc + 1))) - backbone_vector = backbone_vector / np.linalg.norm(backbone_vector) - # Calculate the start position of the chain - chain_residues = self.db.get_template(pmb_type="molecule", - name=molecule_name).residue_list - part_start_chain_name = self.db.get_template(pmb_type="residue", - name=chain_residues[0]).central_bead - part_end_chain_name = self.db.get_template(pmb_type="residue", - name=chain_residues[-1]).central_bead - lj_parameters = self.get_lj_parameters(particle_name1=nodes[node_start_label]["name"], - particle_name2=part_start_chain_name) - bond_tpl = self.get_bond_template(particle_name1=nodes[node_start_label]["name"], - particle_name2=part_start_chain_name) - l0 = hf.calculate_initial_bond_length(lj_parameters=lj_parameters, - bond_type=bond_tpl.bond_type, - bond_parameters=bond_tpl.get_parameters(ureg=self.units)) - first_bead_pos = np.array((nodes[node_start_label]["pos"])) + np.array(backbone_vector)*l0 - mol_id = self.create_molecule(name=molecule_name, # Use the name defined earlier - number_of_molecules=1, # Creating one chain - espresso_system=espresso_system, - list_of_first_residue_positions=[first_bead_pos.tolist()],#Start at the first node - backbone_vector=np.array(backbone_vector)/l0, - use_default_bond=False) - # Bond chain to the hydrogel nodes - chain_pids = self.db._find_instance_ids_by_attribute(pmb_type="particle", - attribute="molecule_id", - value=mol_id) - start_bond_instance = self.get_espresso_bond_instance(particle_name1=nodes[node_start_label]["name"], - particle_name2=part_start_chain_name, - espresso_system=espresso_system) - end_bond_instance = self.get_espresso_bond_instance(particle_name1=nodes[node_end_label]["name"], - particle_name2=part_end_chain_name, - espresso_system=espresso_system) - espresso_system.part.by_id(start_node_id).add_bond((start_bond_instance, chain_pids[0])) - espresso_system.part.by_id(chain_pids[-1]).add_bond((end_bond_instance, end_node_id)) - return mol_id - - def create_hydrogel_node(self, node_index, node_name, espresso_system): - """ - Set a node residue type. - - Args: - node_index(`str`): Lattice node index in the form of a string, e.g. "[0 0 0]". - node_name(`str`): name of the node particle defined in pyMBE. - espresso_system (espressomd.system.System): ESPResSo system object where the hydrogel node will be created. - - Returns: - node_position(`list`): Position of the node in the lattice. - p_id(`int`): Particle ID of the node. - """ - if self.lattice_builder is None: - raise ValueError("LatticeBuilder is not initialized. Use `initialize_lattice_builder` first.") - - node_position = np.array(node_index)*0.25*self.lattice_builder.box_l - p_id = self.create_particle(name = node_name, - espresso_system=espresso_system, - number_of_particles=1, - position = [node_position]) - key = self.lattice_builder._get_node_by_label(f"[{node_index[0]} {node_index[1]} {node_index[2]}]") - self.lattice_builder.nodes[key] = node_name - - return node_position.tolist(), p_id[0] - def create_molecule(self, name, number_of_molecules, espresso_system, list_of_first_residue_positions=None, backbone_vector=None, use_default_bond=False): """ Creates `number_of_molecules` molecule of type `name` into `espresso_system` and bookkeeps them into `pmb.df`. @@ -845,12 +868,13 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi residue_tpl = self.db.get_template(pmb_type="residue", name=residue) lj_parameters = self.get_lj_parameters(particle_name1=prev_central_bead_name, - particle_name2=residue_tpl.central_bead) + particle_name2=residue_tpl.central_bead) bond_tpl = self.get_bond_template(particle_name1=prev_central_bead_name, - particle_name2=residue_tpl.central_bead) + particle_name2=residue_tpl.central_bead, + use_default_bond=use_default_bond) l0 = hf.calculate_initial_bond_length(lj_parameters=lj_parameters, - bond_type=bond_tpl.bond_type, - bond_parameters=bond_tpl.get_parameters(ureg=self.units)) + bond_type=bond_tpl.bond_type, + bond_parameters=bond_tpl.get_parameters(ureg=self.units)) central_bead_pos = prev_central_bead_pos+backbone_vector*l0 # Create the residue residue_id = self.create_residue(name=residue, @@ -1082,7 +1106,8 @@ def create_residue(self, name, espresso_system, central_bead_position=None,use_d lj_parameters = self.get_lj_parameters(particle_name1=central_bead_name, particle_name2=side_chain_name) bond_tpl = self.get_bond_template(particle_name1=central_bead_name, - particle_name2=side_chain_name) + particle_name2=side_chain_name, + use_default_bond=use_default_bond) l0 = hf.calculate_initial_bond_length(lj_parameters=lj_parameters, bond_type=bond_tpl.bond_type, bond_parameters=bond_tpl.get_parameters(ureg=self.units)) @@ -1115,7 +1140,8 @@ def create_residue(self, name, espresso_system, central_bead_position=None,use_d lj_parameters = self.get_lj_parameters(particle_name1=central_bead_name, particle_name2=central_bead_side_chain) bond_tpl = self.get_bond_template(particle_name1=central_bead_name, - particle_name2=central_bead_side_chain) + particle_name2=central_bead_side_chain, + use_default_bond=use_default_bond) l0 = hf.calculate_initial_bond_length(lj_parameters=lj_parameters, bond_type=bond_tpl.bond_type, bond_parameters=bond_tpl.get_parameters(ureg=self.units)) @@ -1376,93 +1402,34 @@ def define_residue(self, name, central_bead, side_chains): self.db._register_template(tpl) return - def delete_molecule_in_system(self, molecule_id, espresso_system): + def delete_instances_in_system(self, instance_id, pmb_type, espresso_system): """ - Deletes the molecule with `molecule_id` from the `espresso_system`, including all particles and residues associated with that particles. - The ids of the molecule, particle and residues deleted are also cleaned from `pmb.df` + Deletes the instance with instance_id from the ESPResSo system. + Related assembly, molecule, residue, particles and bond instances will also be deleted from the pyMBE dataframe. Args: - molecule_id(`int`): id of the molecule to be deleted. - espresso_system(`espressomd.system.System`): Instance of a system class from espressomd library. - - """ - # Sanity checks - id_mask = (self.df['molecule_id'] == molecule_id) & (self.df['pmb_type'].isin(["molecule", "peptide"])) - molecule_row = self.df.loc[id_mask] - if molecule_row.empty: - raise ValueError(f"No molecule found with molecule_id={molecule_id} in the DataFrame.") - # Clean molecule from pmb.df - self.df = _DFm._clean_ids_in_df_row(df = self.df, - row = molecule_row) - # Delete particles and residues in the molecule - residue_mask = (self.df['molecule_id'] == molecule_id) & (self.df['pmb_type'] == "residue") - residue_rows = self.df.loc[residue_mask] - residue_ids = set(residue_rows["residue_id"].values) - for residue_id in residue_ids: - self.delete_residue_in_system(residue_id=residue_id, - espresso_system=espresso_system) + instance_id (int): id of the assembly to be deleted. + pmb_type (str): the instance type to be deleted. + espresso_system (espressomd.system.System): Instance of a system class from espressomd library. + + """ + if pmb_type == "particle": + instance_identifier = "particle_id" + elif pmb_type == "residue": + instance_identifier = "residue_id" + elif pmb_type in self.db._molecule_like_types: + instance_identifier = "molecule_id" + elif pmb_type in self.db._assembly_like_types: + instance_identifier = "assembly_id" + particle_ids = self.db._find_instance_ids_by_attribute(pmb_type="particle", + attribute="molecule_id", + value=instance_identifier) + self._delete_particles_from_espresso(particle_ids=particle_ids, + espresso_system=espresso_system) - # Clean deleted backbone bonds from pmb.df - bond_mask = (self.df['molecule_id'] == molecule_id) & (self.df['pmb_type'] == "bond") - number_of_bonds = len(self.df.loc[bond_mask]) - for _ in range(number_of_bonds): - bond_mask = (self.df['molecule_id'] == molecule_id) & (self.df['pmb_type'] == "bond") - bond_rows = self.df.loc[bond_mask] - row = bond_rows.loc[[bond_rows.index[0]]] - self.df = _DFm._clean_ids_in_df_row(df = self.df, - row = row) - - def delete_particle_in_system(self, particle_id, espresso_system): - """ - Deletes the particle with `particle_id` from the `espresso_system`. - The particle ids of the particle and residues deleted are also cleaned from `pmb.df` - - Args: - particle_id(`int`): id of the molecule to be deleted. - espresso_system(`espressomd.system.System`): Instance of a system class from espressomd library. - - """ - # Sanity check if there is a particle with the input particle id - id_mask = (self.df['particle_id'] == particle_id) & (self.df['pmb_type'] == "particle") - particle_row = self.df.loc[id_mask] - if particle_row.empty: - raise ValueError(f"No particle found with particle_id={particle_id} in the DataFrame.") - espresso_system.part.by_id(particle_id).remove() - self.df = _DFm._clean_ids_in_df_row(df = self.df, - row = particle_row) - - def delete_residue_in_system(self, residue_id, espresso_system): - """ - Deletes the residue with `residue_id`, and the particles associated with it from the `espresso_system`. - The ids of the residue and particles deleted are also cleaned from `pmb.df` - - Args: - residue_id(`int`): id of the residue to be deleted. - espresso_system(`espressomd.system.System`): Instance of a system class from espressomd library. - """ - # Sanity check if there is a residue with the input residue id - id_mask = (self.df['residue_id'] == residue_id) & (self.df['pmb_type'] == "residue") - residue_row = self.df.loc[id_mask] - if residue_row.empty: - raise ValueError(f"No residue found with residue_id={residue_id} in the DataFrame.") - residue_map=self.get_particle_id_map(object_name=residue_row["name"].values[0])["residue_map"] - particle_ids = residue_map[residue_id] - # Clean residue from pmb.df - self.df = _DFm._clean_ids_in_df_row(df = self.df, - row = residue_row) - # Delete particles in the residue - for particle_id in particle_ids: - self.delete_particle_in_system(particle_id=particle_id, - espresso_system=espresso_system) - # Clean deleted bonds from pmb.df - bond_mask = (self.df['residue_id'] == residue_id) & (self.df['pmb_type'] == "bond") - number_of_bonds = len(self.df.loc[bond_mask]) - for _ in range(number_of_bonds): - bond_mask = (self.df['residue_id'] == residue_id) & (self.df['pmb_type'] == "bond") - bond_rows = self.df.loc[bond_mask] - row = bond_rows.loc[[bond_rows.index[0]]] - self.df = _DFm._clean_ids_in_df_row(df = self.df, - row = row) + self.db.delete_instance(pmb_type=pmb_type, + instance_id=instance_id, + cascade=True) def determine_reservoir_concentrations(self, pH_res, c_salt_res, activity_coefficient_monovalent_pair, max_number_sc_runs=200): """ @@ -1752,7 +1719,7 @@ def get_espresso_bond_instance(self, particle_name1, particle_name2, espresso_sy bond_inst = self._bond_instances[bond_tpl.name] else: # Create an instance of the bond - bond_inst = self.create_espresso_bond_instance(bond_type=bond_tpl.bond_type, + bond_inst = self._create_espresso_bond_instance(bond_type=bond_tpl.bond_type, bond_parameters=bond_tpl.get_parameters(self.units)) self._bond_instances[bond_tpl.name]= bond_inst espresso_system.bonded_inter.add(bond_inst) diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py index f5d4ea3..632aee4 100644 --- a/pyMBE/storage/io.py +++ b/pyMBE/storage/io.py @@ -351,15 +351,11 @@ def _load_database_csv(db, folder): ) instances[inst.bond_id] = inst elif pmb_type == "hydrogel": - mol_ids = _decode(row.get("molecule_ids", "")) or [] - if not isinstance(mol_ids, list): - mol_ids = list(mol_ids) inst = HydrogelInstance( name=row["name"], - hydrogel_id=int(row["hydrogel_id"]), - molecule_ids=mol_ids + assembly_id=int(row["assembly_id"]), ) - instances[inst.hydrogel_id] = inst + instances[inst.assembly_id] = inst db._instances[pmb_type] = instances # REACTIONS diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index cd32ff9..6745a76 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -82,6 +82,10 @@ def __init__(self,units): self._templates: Dict[str, Dict[str, TemplateType]] = {} self._instances: Dict[str, Dict[int, InstanceType]] = {} self._reactions: Dict[str, Reaction] = {} + self._molecule_like_types = ["molecule", + "peptide", + "protein"] + self._assembly_like_types = ["hydrogel"] def _delete_bonds_of_particle(self, pid): """ @@ -183,17 +187,11 @@ def _find_instance_ids_by_name(self, pmb_type, name): return result - - def _find_template_types(self, name): """ Return all pyMBE template categories that contain a template with a given name. - Searches across every template group stored in ``self._templates``, - and collects the PMB types (keys of the template registry) for which - a template named ``name`` exists. - Args: name (str): The template name to search for. @@ -212,11 +210,9 @@ def _find_template_types(self, name): [] """ found = [] - for pmb_type, group in self._templates.items(): if name in group: found.append(pmb_type) - return found @@ -468,7 +464,7 @@ def _register_instance(self, instance): iid = instance.bond_id elif isinstance(instance, HydrogelInstance): pmb_type = "hydrogel" - iid = instance.hydrogel_id + iid = instance.assembly_id else: raise TypeError("Unsupported instance type") @@ -729,10 +725,9 @@ def _propose_instance_id(self, pmb_type): Notes: - The method does not fill gaps; it always returns ``max + 1``. """ - molecule_like_types = ["molecule", "peptide", "protein"] - if pmb_type in molecule_like_types: + if pmb_type in self._molecule_like_types: used_ids = [] - for t in molecule_like_types: + for t in self._molecule_like_types: if t in self._instances: used_ids.extend(self._instances[t].keys()) if not used_ids: @@ -786,114 +781,123 @@ def delete_template(self, pmb_type, name): if not self._templates[pmb_type]: del self._templates[pmb_type] - def delete_instance(self, pmb_type, instance_id, cascade = False): + def delete_instance(self, pmb_type, instance_id, cascade=False): """ Delete an instance from the pyMBE database. - Supports cascade deletion: - - molecule → residues → particles → bonds - - residue → particles → bonds - - particle → bonds - - bond → nothing + Supports cascade deletion through the hierarchy: + assembly → molecules → residues → particles → bonds + molecule → residues → particles → bonds + residue → particles → bonds + particle → bonds + bond → nothing Args: - pmb_type (str): Category of the instance (particle, residue, molecule, bond). - instance_id (int): Unique identifier of the instance. - cascade (bool): If True, automatically delete dependent child objects. + pmb_type (str): + Category of the instance (particle, residue, molecule, peptide, + protein, hydrogel, bond). + instance_id (int): + Unique identifier of the instance. + cascade (bool): + If True, automatically delete dependent objects. Raises: - KeyError: If the instance does not exist. - ValueError: If cascade is False but dependencies exist. + KeyError: + If the instance does not exist. + ValueError: + If cascade is False but dependencies exist. """ - # --- Basic sanity checks --- + + # ---- Basic checks ---- if pmb_type not in self._instances: raise KeyError(f"Instance type '{pmb_type}' not found.") - if instance_id not in self._instances[pmb_type]: raise KeyError(f"Instance ID '{instance_id}' not found in '{pmb_type}'.") - inst = self._instances[pmb_type][instance_id] - - # ---- Helper function for deletion of bonds ---- - - - # ---- CASCADE deletion logic ---- + # =============== CASCADE DELETION ========================= if cascade: - # DELETE children depending on type - if pmb_type == "molecule": - # 1. residues → 2. particles → 3. bonds - residues = self._find_instance_ids_by_attribute( - "residue", "molecule_id", instance_id - ) + # --- Delete children of ASSEMBLY-like objects --- + if pmb_type in self._assembly_like_types: + for mtype in self._molecule_like_types: + mids = self._find_instance_ids_by_attribute(pmb_type=mtype, + attribute="assembly_id", + value=instance_id,) + for mid in mids: + self.delete_instance(pmb_type=mtype, + instance_id=mid, + cascade=True) + # delete particles inside the assembly *even if they have no residue/molecule* (e.g. nodes) + pids = self._find_instance_ids_by_attribute(pmb_type="particle", + attribute="assembly_id", + value=instance_id) + for pid in pids: + self.delete_instance(pmb_type="particle", + instance_id=pid, + cascade=True) + # --- Delete children of MOLECULE-like objects --- + if pmb_type in self._molecule_like_types: + residues = self._find_instance_ids_by_attribute(pmb_type="residue", + attribute="molecule_id", + value=instance_id,) for rid in residues: - self.delete_instance("residue", rid, cascade=True) + self.delete_instance(pmb_type="residue", + instance_id=rid, + cascade=True) - elif pmb_type == "residue": - # 1. particles → 2. bonds - particles = self._find_instance_ids_by_attribute( - "particle", "residue_id", instance_id - ) + # --- Delete children of RESIDUE --- + if pmb_type == "residue": + particles = self._find_instance_ids_by_attribute(pmb_type="particle", + attribute="residue_id", + value=instance_id,) for pid in particles: - self.delete_instance("particle", pid, cascade=True) + self.delete_instance(pmb_type="particle", + instance_id=pid, + cascade=True) - elif pmb_type == "particle": - # 1. bonds only + # --- Delete children of PARTICLE (only bonds) --- + if pmb_type == "particle": self._delete_bonds_of_particle(instance_id) - # For bonds there is nothing to cascade - + # =============== NON-CASCADE (SAFE DELETE) ================ else: - # ---- NON-cascade: forbid deletions that break structure ---- + # ---- ASSEMBLY-like: forbid deletion if molecules belong to it ---- + if pmb_type in self._assembly_like_types: + for mtype in self._molecule_like_types: + mids = self._find_instance_ids_by_attribute(pmb_type=mtype, + attribute="assembly_id", + value=instance_id,) + if mids: + raise ValueError(f"{pmb_type} {instance_id} contains {mtype} instances {mids}. Use cascade=True to delete.") + # ---- MOLECULE-like: check residues ---- + if pmb_type in self._molecule_like_types: + residues = self._find_instance_ids_by_attribute(pmb_type="residue", + attribute="molecule_id", + value=instance_id,) + if residues: + raise ValueError(f"{pmb_type} {instance_id} has residues {residues}. Use cascade=True to delete.") + # ---- RESIDUE: check particles ---- + if pmb_type == "residue": + particles = self._find_instance_ids_by_attribute(pmb_type="particle", + attribute="residue_id", + value=instance_id) + if particles: + raise ValueError(f"Residue {instance_id} contains particles {particles}. Use cascade=True.") + # ---- PARTICLE: check bonds and belonging ---- if pmb_type == "particle": if inst.residue_id is not None: - raise ValueError( - f"Particle {instance_id} belongs to residue {inst.residue_id}. " - f"Use cascade=True to delete anyway." - ) + raise ValueError(f"Particle {instance_id} belongs to residue {inst.residue_id}. Use cascade=True.") if inst.molecule_id is not None: - raise ValueError( - f"Particle {instance_id} belongs to molecule {inst.molecule_id}. " - f"Use cascade=True." - ) - - # check bond dependencies - bonds = [ - b_id for b_id, b in self._instances.get("bond", {}).items() - if b.particle_id1 == instance_id or b.particle_id2 == instance_id - ] + raise ValueError(f"Particle {instance_id} belongs to molecule {inst.molecule_id}. Use cascade=True.") + if inst.assembly_id is not None: + raise ValueError(f"Particle {instance_id} belongs to assembly {inst.assembly_id}. "f"Use cascade=True.") + bonds = [b_id for b_id, b in self._instances.get("bond", {}).items() if b.particle_id1 == instance_id or b.particle_id2 == instance_id] if bonds: - raise ValueError( - f"Particle {instance_id} is in bonds {bonds}. " - f"Use cascade=True." - ) - - if pmb_type == "residue": - particles = self._find_instance_ids_by_attribute( - "particle", "residue_id", instance_id - ) - if particles: - raise ValueError( - f"Residue {instance_id} has particles {particles}. " - f"Use cascade=True." - ) - - if pmb_type == "molecule": - residues = self._find_instance_ids_by_attribute( - "residue", "molecule_id", instance_id - ) - if residues: - raise ValueError( - f"Molecule {instance_id} has residues {residues}. " - f"Use cascade=True." - ) - - # ---- Perform final deletion of this object ---- + raise ValueError(f"Particle {instance_id} participates in bonds {bonds}. Use cascade=True.") + # =============== FINAL DELETION STEP ====================== del self._instances[pmb_type][instance_id] - if not self._instances[pmb_type]: del self._instances[pmb_type] - def get_instance(self, pmb_type, instance_id): """ Retrieve a stored instance by type and instance_id. diff --git a/test.py b/test.py index f4ecf21..01f02f4 100644 --- a/test.py +++ b/test.py @@ -203,14 +203,18 @@ def main(): print(pmb.db._get_instances_df(pmb_type="molecule")) + print("\n=== Peptide Instances DataFrame ===") + pmb.create_molecule(name="Peptide1", + number_of_molecules=1, + espresso_system=espresso_system, + use_default_bond=True) + print(pmb.db._get_instances_df(pmb_type="peptide")) + print("\n=== Bond Instances DataFrame ===") print(pmb.db._get_instances_df(pmb_type="bond")) - print("\n=== Peptide Instances DataFrame ===") - - print(pmb.db._get_instances_df(pmb_type="peptide")) - + print("\n=== Protein Instances DataFrame ===") pmb.create_protein(name="1beb", number_of_proteins=1, @@ -302,6 +306,70 @@ def main(): print("\nLoaded DB3 Reactions DataFrame:") print(db3._get_reactions_df()) + pmb.delete_instances_in_system(instance_id=0, + pmb_type="hydrogel", + espresso_system=espresso_system) + + print("instances in database after deleting the hydrogel") + print(pmb.db._get_instances_df("particle")) + print(pmb.db._get_instances_df("residue")) + print(pmb.db._get_instances_df("molecule")) + print(pmb.db._get_instances_df("bond")) + print(pmb.db._get_instances_df("peptide")) + print(pmb.db._get_instances_df("protein")) + print(pmb.db._get_instances_df("hydrogel")) + + pmb.delete_instances_in_system(instance_id=3, + pmb_type="protein", + espresso_system=espresso_system) + + print("instances in database after deleting the protein") + print(pmb.db._get_instances_df("particle")) + print(pmb.db._get_instances_df("residue")) + print(pmb.db._get_instances_df("molecule")) + print(pmb.db._get_instances_df("bond")) + print(pmb.db._get_instances_df("peptide")) + print(pmb.db._get_instances_df("protein")) + print(pmb.db._get_instances_df("hydrogel")) + + pmb.delete_instances_in_system(instance_id=1, + pmb_type="molecule", + espresso_system=espresso_system) + + pmb.delete_instances_in_system(instance_id=0, + pmb_type="molecule", + espresso_system=espresso_system) + + pmb.delete_instances_in_system(instance_id=1, + pmb_type="residue", + espresso_system=espresso_system) + + pmb.delete_instances_in_system(instance_id=0, + pmb_type="residue", + espresso_system=espresso_system) + + print("instances in database after deleting residues and molecules") + print(pmb.db._get_instances_df("particle")) + print(pmb.db._get_instances_df("residue")) + print(pmb.db._get_instances_df("molecule")) + print(pmb.db._get_instances_df("bond")) + print(pmb.db._get_instances_df("peptide")) + print(pmb.db._get_instances_df("protein")) + print(pmb.db._get_instances_df("hydrogel")) + pmb.delete_instances_in_system(instance_id=2, + pmb_type="peptide", + espresso_system=espresso_system) + + print("instances in database after deleting peptides") + print(pmb.db._get_instances_df("particle")) + print(pmb.db._get_instances_df("residue")) + print(pmb.db._get_instances_df("molecule")) + print(pmb.db._get_instances_df("bond")) + print(pmb.db._get_instances_df("peptide")) + print(pmb.db._get_instances_df("protein")) + print(pmb.db._get_instances_df("hydrogel")) + + if __name__ == "__main__": main() From 4dbc0c3853b4515da72d4fa7ff02aba242d75915 Mon Sep 17 00:00:00 2001 From: Pablo Date: Tue, 9 Dec 2025 12:51:28 +0100 Subject: [PATCH 12/55] fix added_salt and counterion methods and get_id_map --- pyMBE/pyMBE.py | 113 ++++----- pyMBE/storage/manager.py | 528 ++++++--------------------------------- test.py | 26 +- 3 files changed, 145 insertions(+), 522 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 3ac8c9b..8acfee6 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -601,21 +601,17 @@ def create_added_salt(self, espresso_system, cation_name, anion_name, c_salt): Returns: c_salt_calculated(`float`): Calculated salt concentration added to `espresso_system`. - """ - for name in [cation_name, anion_name]: - if not _DFm._check_if_name_is_defined_in_df(name=name, df=self.df): - logging.warning(f"Object with name '{name}' is not defined in the DataFrame, no ions will be created.") - return - self._check_if_name_has_right_type(name=cation_name, - expected_pmb_type="particle") - self._check_if_name_has_right_type(name=anion_name, - expected_pmb_type="particle") - cation_name_charge = self.df.loc[self.df['name']==cation_name].state_one.z.values[0] - anion_name_charge = self.df.loc[self.df['name']==anion_name].state_one.z.values[0] - if cation_name_charge <= 0: - raise ValueError('ERROR cation charge must be positive, charge ',cation_name_charge) - if anion_name_charge >= 0: - raise ValueError('ERROR anion charge must be negative, charge ', anion_name_charge) + """ + cation_tpl = self.db.get_template(pmb_type="particle", + name=cation_name) + cation_charge = cation_tpl.states[cation_tpl.initial_state].z + anion_tpl = self.db.get_template(pmb_type="particle", + name=anion_name) + anion_charge = anion_tpl.states[anion_tpl.initial_state].z + if cation_charge <= 0: + raise ValueError(f'ERROR cation charge must be positive, charge {cation_charge}') + if anion_charge >= 0: + raise ValueError(f'ERROR anion charge must be negative, charge {anion_charge}') # Calculate the number of ions in the simulation box volume=self.units.Quantity(espresso_system.volume(), 'reduced_length**3') if c_salt.check('[substance] [length]**-3'): @@ -626,10 +622,14 @@ def create_added_salt(self, espresso_system, cation_name, anion_name, c_salt): c_salt_calculated=N_ions/volume else: raise ValueError('Unknown units for c_salt, please provided it in [mol / volume] or [particle / volume]', c_salt) - N_cation = N_ions*abs(anion_name_charge) - N_anion = N_ions*abs(cation_name_charge) - self.create_particle(espresso_system=espresso_system, name=cation_name, number_of_particles=N_cation) - self.create_particle(espresso_system=espresso_system, name=anion_name, number_of_particles=N_anion) + N_cation = N_ions*abs(anion_charge) + N_anion = N_ions*abs(cation_charge) + self.create_particle(espresso_system=espresso_system, + name=cation_name, + number_of_particles=N_cation) + self.create_particle(espresso_system=espresso_system, + name=anion_name, + number_of_particles=N_anion) if c_salt_calculated.check('[substance] [length]**-3'): logging.info(f"added salt concentration of {c_salt_calculated.to('mol/L')} given by {N_cation} cations and {N_anion} anions") elif c_salt_calculated.check('[length]**-3'): @@ -690,18 +690,12 @@ def create_counterions(self, object_name, cation_name, anion_name, espresso_syst Note: This function currently does not support the creation of counterions for hydrogels. """ - for name in [object_name, cation_name, anion_name]: - if not _DFm._check_if_name_is_defined_in_df(name=name, df=self.df): - logging.warning(f"Object with name '{name}' is not defined in the DataFrame, no counterions will be created.") - return - for name in [cation_name, anion_name]: - self._check_if_name_has_right_type(name=name, expected_pmb_type="particle") - self._check_supported_molecule(molecule_name=object_name, - valid_pmb_types=["molecule","peptide","protein"]) - - - cation_charge = self.df.loc[self.df['name']==cation_name].state_one.z.iloc[0] - anion_charge = self.df.loc[self.df['name']==anion_name].state_one.z.iloc[0] + cation_tpl = self.db.get_template(pmb_type="particle", + name=cation_name) + cation_charge = cation_tpl.states[cation_tpl.initial_state].z + anion_tpl = self.db.get_template(pmb_type="particle", + name=cation_name) + anion_charge = cation_tpl.states[anion_tpl.initial_state].z object_ids = self.get_particle_id_map(object_name=object_name)["all"] counterion_number={} object_charge={} @@ -721,11 +715,15 @@ def create_counterions(self, object_name, cation_name, anion_name, espresso_syst else: raise ValueError('The number of negative charges in the pmb_object must be divisible by the charge of the cation') if counterion_number[cation_name] > 0: - self.create_particle(espresso_system=espresso_system, name=cation_name, number_of_particles=counterion_number[cation_name]) + self.create_particle(espresso_system=espresso_system, + name=cation_name, + number_of_particles=counterion_number[cation_name]) else: counterion_number[cation_name]=0 if counterion_number[anion_name] > 0: - self.create_particle(espresso_system=espresso_system, name=anion_name, number_of_particles=counterion_number[anion_name]) + self.create_particle(espresso_system=espresso_system, + name=anion_name, + number_of_particles=counterion_number[anion_name]) else: counterion_number[anion_name] = 0 logging.info('the following counter-ions have been created: ') @@ -775,7 +773,7 @@ def create_hydrogel(self, name, espresso_system): root_id=assembly_id, attribute="assembly_id", value=assembly_id) - # Register an hydrogel instance in the pyMBE database + # Register an hydrogel instance in the pyMBE databasegit self.db._register_instance(HydrogelInstance(name=name, assembly_id=assembly_id)) return assembly_id @@ -1785,42 +1783,23 @@ def get_lj_parameters(self, particle_name1, particle_name2, combining_rule='Lore return {"sigma": sigma, "cutoff": cutoff, "offset": offset, "epsilon": epsilon} def get_particle_id_map(self, object_name): - ''' - Gets all the ids associated with the object with name `object_name` in `pmb.df` + """ + Collect all particle IDs associated with an object of given name in the + pyMBE database. Works for particles, residues, molecules, proteins, + peptides, and assemblies. + + Relies in the internal method Manager.get_particle_id_map, see method for the detailed code. Args: - object_name(`str`): name of the object - + object_name (str): Name of the object. + Returns: - id_map(`dict`): dict of the structure {"all": [all_ids_with_object_name], "residue_map": {res_id: [particle_ids_in_res_id]}, "molecule_map": {mol_id: [particle_ids_in_mol_id]}, } - ''' - object_type=self._check_supported_molecule(molecule_name=object_name, - valid_pmb_types= ['particle','residue','molecule',"peptide","protein"]) - id_list = [] - mol_map = {} - res_map = {} - def do_res_map(res_ids): - for res_id in res_ids: - res_list=self.df.loc[(self.df['residue_id']== res_id) & (self.df['pmb_type']== "particle")].particle_id.dropna().tolist() - res_map[res_id]=res_list - return res_map - if object_type in ['molecule', 'protein', 'peptide']: - mol_ids = self.df.loc[self.df['name']== object_name].molecule_id.dropna().tolist() - for mol_id in mol_ids: - res_ids = set(self.df.loc[(self.df['molecule_id']== mol_id) & (self.df['pmb_type']== "particle") ].residue_id.dropna().tolist()) - res_map=do_res_map(res_ids=res_ids) - mol_list=self.df.loc[(self.df['molecule_id']== mol_id) & (self.df['pmb_type']== "particle")].particle_id.dropna().tolist() - id_list+=mol_list - mol_map[mol_id]=mol_list - elif object_type == 'residue': - res_ids = self.df.loc[self.df['name']== object_name].residue_id.dropna().tolist() - res_map=do_res_map(res_ids=res_ids) - id_list=[] - for res_id_list in res_map.values(): - id_list+=res_id_list - elif object_type == 'particle': - id_list = self.df.loc[self.df['name']== object_name].particle_id.dropna().tolist() - return {"all": id_list, "molecule_map": mol_map, "residue_map": res_map} + dict: {"all": [particle_ids], + "residue_map": {residue_id: [particle_ids]}, + "molecule_map": {molecule_id: [particle_ids]}, + "assembly_map": {assembly_id: [particle_ids]},} + """ + return self.db.get_particle_id_map(object_name=object_name) def get_pka_set(self): ''' diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index 6745a76..4140e2a 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -82,10 +82,12 @@ def __init__(self,units): self._templates: Dict[str, Dict[str, TemplateType]] = {} self._instances: Dict[str, Dict[int, InstanceType]] = {} self._reactions: Dict[str, Reaction] = {} + self._molecule_like_types = ["molecule", "peptide", "protein"] self._assembly_like_types = ["hydrogel"] + self._pmb_types = ["particle", "residue"] + self._molecule_like_types + self._assembly_like_types def _delete_bonds_of_particle(self, pid): """ @@ -1005,460 +1007,84 @@ def get_es_types_map(self): result[particle_name][state_name] = state.es_type return result - - - class _NumpyEncoder(json.JSONEncoder): - """ - Custom JSON encoder that converts NumPy arrays to Python lists - and NumPy scalars to Python scalars. - """ - def default(self, obj): - if isinstance(obj, np.ndarray): - return obj.tolist() - if isinstance(obj, np.generic): - return obj.item() - return super().default(obj) - - @classmethod - def _add_bond_in_df(cls, df, particle_id1, particle_id2, use_default_bond=False): - """ - Adds a bond entry on the `pymbe.df` storing the particle_ids of the two bonded particles. - - Args: - df(`DataFrame`): dataframe with pyMBE information. - particle_id1(`int`): particle_id of the type of the first particle type of the bonded particles - particle_id2(`int`): particle_id of the type of the second particle type of the bonded particles - use_default_bond(`bool`, optional): Controls if a bond of type `default` is used to bond particle whose bond types are not defined in `pmb.df`. Defaults to False. - - Returns: - df(`DataFrame`): dataframe with pyMBE information with the new bond added. - index(`int`): Row index where the bond information has been added in pmb.df. - """ - particle_name1 = df.loc[(df['particle_id']==particle_id1) & (df['pmb_type']=="particle")].name.values[0] - particle_name2 = df.loc[(df['particle_id']==particle_id2) & (df['pmb_type']=="particle")].name.values[0] - - bond_key = cls._find_bond_key(df = df, - particle_name1 = particle_name1, - particle_name2 = particle_name2, - use_default_bond = use_default_bond) - if not bond_key: - return None - df = cls._copy_df_entry(df = df, - name = bond_key, - column_name = 'particle_id2', - number_of_copies = 1) - indexs = np.where(df['name'] == bond_key) - index_list = list(indexs[0]) - used_bond_df = df.loc[df['particle_id2'].notnull()] - #without this drop the program crashes when dropping duplicates because the 'bond' column is a dict - used_bond_df = used_bond_df.drop([('bond_object','')],axis =1 ) - used_bond_index = used_bond_df.index.to_list() - if not index_list: - return None - for index in index_list: - if index not in used_bond_index: - cls._clean_df_row(df = df, - index = int(index)) - df.at[index,'particle_id'] = particle_id1 - df.at[index,'particle_id2'] = particle_id2 - break - return df, index - - @classmethod - def _add_value_to_df(cls, df, index,key,new_value, non_standard_value=False, overwrite=False): - """ - Adds a value to a cell in the `pmb.df` DataFrame. - - Args: - df(`DataFrame`): dataframe with pyMBE information. - index(`int`): index of the row to add the value to. - key(`str`): the column label to add the value to. - non_standard_value(`bool`, optional): Switch to enable insertion of non-standard values, such as `dict` objects. Defaults to False. - overwrite(`bool`, optional): Switch to enable overwriting of already existing values in pmb.df. Defaults to False. - """ - - token = "#protected:" - - def protect(obj): - if non_standard_value: - return token + json.dumps(obj, cls=cls._NumpyEncoder) - return obj - - def deprotect(obj): - if non_standard_value and isinstance(obj, str) and obj.startswith(token): - return json.loads(obj.removeprefix(token)) - return obj - - # Make sure index is a scalar integer value - index = int(index) - assert isinstance(index, int), '`index` should be a scalar integer value.' - idx = pd.IndexSlice - if cls._check_if_df_cell_has_a_value(df=df, index=index, key=key): - old_value = df.loc[index,idx[key]] - if not pd.Series([protect(old_value)]).equals(pd.Series([protect(new_value)])): - name= df.loc[index,('name','')] - pmb_type= df.loc[index,('pmb_type','')] - logging.debug(f"You are attempting to redefine the properties of {name} of pmb_type {pmb_type}") - if overwrite: - logging.info(f'Overwritting the value of the entry `{key}`: old_value = {old_value} new_value = {new_value}') - if not overwrite: - logging.debug(f"pyMBE has preserved of the entry `{key}`: old_value = {old_value}. If you want to overwrite it with new_value = {new_value}, activate the switch overwrite = True ") - return - - df.loc[index,idx[key]] = protect(new_value) - if non_standard_value: - df[key] = df[key].apply(deprotect) - return - @classmethod - def _assign_molecule_id(cls, df, molecule_index): - """ - Assigns the `molecule_id` of the pmb object given by `pmb_type` - - Args: - molecule_index(`int`): index of the current `pmb_object_type` to assign the `molecule_id` - Returns: - molecule_id(`int`): Id of the molecule - """ - cls._clean_df_row(df = df, - index = int(molecule_index)) - - if df['molecule_id'].isnull().values.all(): - molecule_id = 0 - else: - molecule_id = df['molecule_id'].max() +1 - cls._add_value_to_df(df = df, - key = ('molecule_id',''), - index = int(molecule_index), - new_value = molecule_id) - return molecule_id - - @staticmethod - def _check_if_df_cell_has_a_value(df, index, key): - """ - Checks if a cell in the `pmb.df` at the specified index and column has a value. - - Args: - df(`DataFrame`): dataframe with pyMBE information. - index(`int`): Index of the row to check. - key(`str`): Column label to check. - - Returns: - `bool`: `True` if the cell has a value, `False` otherwise. - """ - idx = pd.IndexSlice - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - return not pd.isna(df.loc[index, idx[key]]) - - @staticmethod - def _check_if_name_is_defined_in_df(name, df): - """ - Checks if `name` is defined in `pmb.df`. - - Args: - name(`str`): label to check if defined in `pmb.df`. - df(`DataFrame`): dataframe with pyMBE information. - - Returns: - `bool`: `True` for success, `False` otherwise. - """ - return name in df['name'].unique() - - @staticmethod - def _check_if_multiple_pmb_types_for_name(name, pmb_type_to_be_defined, df): - """ - Checks if `name` is defined in `pmb.df` with multiple pmb_types. - - Args: - name(`str`): label to check if defined in `pmb.df`. - pmb_type_to_be_defined(`str`): pmb object type corresponding to `name`. - df(`DataFrame`): dataframe with pyMBE information. - - Returns: - `bool`: `True` for success, `False` otherwise. - """ - if name in df['name'].unique(): - current_object_type = df[df['name']==name].pmb_type.values[0] - if current_object_type != pmb_type_to_be_defined: - raise ValueError (f"The name {name} is already defined in the df with a pmb_type = {current_object_type}, pyMBE does not support objects with the same name but different pmb_types") - - @classmethod - def _clean_df_row(cls, df, index, columns_keys_to_clean=("particle_id", "particle_id2", "residue_id", "molecule_id")): - """ - Cleans the columns of `pmb.df` in `columns_keys_to_clean` of the row with index `index` by assigning them a pd.NA value. - - Args: - df(`DataFrame`): dataframe with pyMBE information. - index(`int`): Index of the row to clean. - columns_keys_to_clean(`list` of `str`, optional): List with the column keys to be cleaned. Defaults to [`particle_id`, `particle_id2`, `residue_id`, `molecule_id`]. - """ - for column_key in columns_keys_to_clean: - cls._add_value_to_df(df = df, - key = (column_key,''), - index = index, - new_value = pd.NA) - df.fillna(pd.NA, - inplace = True) - - @staticmethod - def _clean_ids_in_df_row(df, row): - """ - Cleans particle, residue and molecules ids in `row`. - If there are other repeated entries for the same name, drops the row. - - Args: - df(`DataFrame`): dataframe with pyMBE information. - row(pd.DataFrame): A row from the DataFrame to clean. - - Returns: - df(`DataFrame`): dataframe with pyMBE information with cleaned ids in `row` - """ - columns_to_clean = ['particle_id', - 'particle_id2', - 'residue_id', - 'molecule_id'] - if len(df.loc[df['name'] == row['name'].values[0]]) > 1: - df = df.drop(row.index).reset_index(drop=True) - - else: - for column_name in columns_to_clean: - df.loc[row.index, column_name] = pd.NA - return df - - @staticmethod - def _copy_df_entry(df, name, column_name, number_of_copies): - ''' - Creates 'number_of_copies' of a given 'name' in `pymbe.df`. - - Args: - df(`DataFrame`): dataframe with pyMBE information. - name(`str`): Label of the particle/residue/molecule type to be created. `name` must be defined in `pmb.df` - column_name(`str`): Column name to use as a filter. - number_of_copies(`int`): number of copies of `name` to be created. - - Returns: - df(`DataFrame`): dataframe with pyMBE information with the new copies of `name` added. - - Note: - - Currently, column_name only supports "particle_id", "particle_id2", "residue_id" and "molecule_id" - ''' - valid_column_names=["particle_id", "residue_id", "molecule_id", "particle_id2" ] - if column_name not in valid_column_names: - raise ValueError(f"{column_name} is not a valid column_name, currently only the following are supported: {valid_column_names}") - df_by_name = df.loc[df.name == name] - if number_of_copies != 1: - df_by_name_repeated = pd.concat ([df_by_name]*(number_of_copies-1), ignore_index=True) - # Concatenate the new particle rows to `df` - df = pd.concat ([df,df_by_name_repeated], ignore_index=True) - else: - if not df_by_name[column_name].isnull().values.any(): - df_by_name = df_by_name[df_by_name.index == df_by_name.index.min()] - df_by_name_repeated = pd.concat ([df_by_name]*(number_of_copies), ignore_index=True) - df_by_name_repeated[column_name] = pd.NA - df = pd.concat ([df,df_by_name_repeated], ignore_index=True) - return df - - @staticmethod - def _create_variable_with_units(variable, units_registry): + def get_particle_id_map(self, object_name): """ - Returns a pint object with the value and units defined in `variable`. + Collect all particle IDs associated with an object of given name in the + pyMBE database. Works for particles, residues, molecules, proteins, + peptides, and assemblies. Args: - variable(`dict` or `str`): {'value': value, 'units': units} - units_registry(`pint.UnitRegistry`): pyMBE UnitRegistry object. + object_name (str): Name of the object. Returns: - variable_with_units(`obj`): variable with units using the pyMBE UnitRegistry. - """ - if isinstance(variable, dict): - value=variable.pop('value') - units=variable.pop('units') - elif isinstance(variable, str): - value = float(re.split(r'\s+', variable)[0]) - units = re.split(r'\s+', variable)[1] - variable_with_units = value * units_registry(units) - return variable_with_units - - @classmethod - def _convert_columns_to_original_format(cls,df,units_registry): - """ - Converts the columns of the Dataframe to the original format in pyMBE. - - Args: - df(`DataFrame`): dataframe with pyMBE information as a string - units_registry(`pint.UnitRegistry`): pyMBE UnitRegistry object. - - """ - - columns_dtype_int = ['particle_id','particle_id2', 'residue_id','molecule_id', ('state_one','es_type'),('state_two','es_type'),('state_one','z'),('state_two','z') ] - - columns_with_units = ['sigma', 'epsilon', 'cutoff', 'offset'] - - columns_with_list_or_dict = ['residue_list','side_chains', 'parameters_of_the_potential','sequence', 'chain_map', 'node_map'] - - for column_name in columns_dtype_int: - df[column_name] = df[column_name].astype(pd.Int64Dtype()) - - for column_name in columns_with_list_or_dict: - if df[column_name].isnull().all(): - df[column_name] = df[column_name].astype(object) - else: - df[column_name] = df[column_name].apply(lambda x: json.loads(x) if pd.notnull(x) else x) - - for column_name in columns_with_units: - df[column_name] = df[column_name].apply(lambda x: cls._create_variable_with_units(x, units_registry) if pd.notnull(x) else x) - - df['bond_object'] = df['bond_object'].apply(lambda x: cls._convert_str_to_bond_object(x) if pd.notnull(x) else x) - df["l0"] = df["l0"].astype(object) - df["pka"] = df["pka"].astype(object) - - @staticmethod - def _convert_str_to_bond_object(bond_str): - """ - Convert a row read as a `str` to the corresponding ESPResSo bond object. - - Args: - bond_str(`str`): string with the information of a bond object. - - Returns: - bond_object(`obj`): ESPResSo bond object. - - Note: - Currently supported bonds are: HarmonicBond and FeneBond - """ - import espressomd.interactions - - supported_bonds = ['HarmonicBond', 'FeneBond'] - m = re.search(r'^([A-Za-z0-9_]+)\((\{.+\})\)$', bond_str) - if m is None: - raise ValueError(f'Cannot parse bond "{bond_str}"') - bond = m.group(1) - if bond not in supported_bonds: - raise NotImplementedError(f"Bond type '{bond}' currently not implemented in pyMBE, accepted types are {supported_bonds}") - params = json.loads(m.group(2)) - bond_id = params.pop("bond_id") - bond_object = getattr(espressomd.interactions, bond)(**params) - bond_object._bond_id = bond_id - return bond_object - - @staticmethod - def _delete_entries_in_df(df, entry_name): - """ - Deletes entries with name `entry_name` from the DataFrame if it exists. - - Args: - df(`DataFrame`): dataframe with pyMBE information. - entry_name (`str`): The name of the entry in the dataframe to delete. - - Returns: - df(`DataFrame`): dataframe with pyMBE information with the entry deleted. - """ - if entry_name in df["name"].values: - df = df[df["name"] != entry_name].reset_index(drop=True) - return df - - @staticmethod - def _find_bond_key(df, particle_name1, particle_name2, use_default_bond=False): - """ - Searches for the `name` of the bond between `particle_name1` and `particle_name2` in `pymbe.df` and returns it. - - Args: - df(`DataFrame`): dataframe with pyMBE information. - particle_name1(`str`): label of the type of the first particle type of the bonded particles. - particle_name2(`str`): label of the type of the second particle type of the bonded particles. - use_default_bond(`bool`, optional): If it is activated, the "default" bond is returned if no bond is found between `particle_name1` and `particle_name2`. Defaults to 'False'. - - Returns: - bond_key (str): `name` of the bond between `particle_name1` and `particle_name2` if a matching bond exists - - Note: - - If `use_default_bond`=`True`, it returns "default" if no key is found. - """ - bond_keys = [f'{particle_name1}-{particle_name2}', f'{particle_name2}-{particle_name1}'] - bond_defined=False - for bond_key in bond_keys: - if bond_key in df["name"].values: - bond_defined=True - correct_key=bond_key - break - if bond_defined: - return correct_key - elif use_default_bond: - return 'default' - else: - return None - - @staticmethod - def _setup_df(): - """ - Sets up the pyMBE's dataframe `pymbe.df`. - - Returns: - columns_names(`obj`): pandas multiindex object with the column names of the pyMBE's dataframe - """ - - columns_dtypes = { - 'name': { - '': str}, - 'pmb_type': { - '': str}, - 'particle_id': { - '': pd.Int64Dtype()}, - 'particle_id2': { - '': pd.Int64Dtype()}, - 'residue_id': { - '': pd.Int64Dtype()}, - 'molecule_id': { - '': pd.Int64Dtype()}, - 'acidity': { - '': str}, - 'pka': { - '': object}, - 'central_bead': { - '': object}, - 'side_chains': { - '': object}, - 'residue_list': { - '': object}, - 'model': { - '': str}, - 'sigma': { - '': object}, - 'cutoff': { - '': object}, - 'offset': { - '': object}, - 'epsilon': { - '': object}, - 'state_one': { - 'label': str, - 'es_type': pd.Int64Dtype(), - 'z': pd.Int64Dtype()}, - 'state_two': { - 'label': str, - 'es_type': pd.Int64Dtype(), - 'z': pd.Int64Dtype()}, - 'sequence': { - '': object}, - 'bond_object': { - '': object}, - 'parameters_of_the_potential':{ - '': object}, - 'l0': { - '': float}, - 'node_map':{ - '':object}, - 'chain_map':{ - '':object}} - - df = pd.DataFrame(columns=pd.MultiIndex.from_tuples([(col_main, col_sub) for col_main, sub_cols in columns_dtypes.items() for col_sub in sub_cols.keys()])) - - for level1, sub_dtypes in columns_dtypes.items(): - for level2, dtype in sub_dtypes.items(): - df[level1, level2] = df[level1, level2].astype(dtype) - - columns_names = pd.MultiIndex.from_frame(df) - columns_names = columns_names.names - - return df \ No newline at end of file + dict: {"all": [particle_ids], + "residue_map": {residue_id: [particle_ids]}, + "molecule_map": {molecule_id: [particle_ids]}, + "assembly_map": {assembly_id: [particle_ids]},} + """ + # --- Determine object type by searching in the DB ------------------------ + object_type = None + object_ids = [] + for pmb_type in self._pmb_types: + if pmb_type in self._instances: + for inst_id, inst in self._instances[pmb_type].items(): + if getattr(inst, "name", None) == object_name: + object_type = pmb_type + object_ids.append(inst_id) + + if object_type is None: + raise KeyError(f"No object named '{object_name}' found in database.") + # Maps to return + id_list = [] + residue_map = {} + molecule_map = {} + assembly_map = {} + # Shortcut access to all particle instances + particles = self._instances.get("particle", {}) + # Helper: group particle IDs by attribute (molecule_id, residue_id, assembly_id) + def add_to_map(target_map, key, pid): + if key is None: + return + target_map.setdefault(key, []).append(pid) + # Case 1: object is a molecule-like type (molecule, protein, peptide) + if object_type in self._molecule_like_types: + for mol_id in object_ids: + molecule_map[mol_id] = [] + for pid, p in particles.items(): + if p.molecule_id == mol_id: + id_list.append(pid) + molecule_map[mol_id].append(pid) + add_to_map(residue_map, p.residue_id, pid) + add_to_map(assembly_map, p.assembly_id, pid) + # Case 2: object is a residue + elif object_type == "residue": + for res_id in object_ids: + residue_map[res_id] = [] + for pid, p in particles.items(): + if p.residue_id == res_id: + id_list.append(pid) + residue_map[res_id].append(pid) + add_to_map(molecule_map, p.molecule_id, pid) + add_to_map(assembly_map, p.assembly_id, pid) + # Case 3: object is a particle + elif object_type == "particle": + id_list.extend(object_ids) + for pid in object_ids: + p = particles[pid] + add_to_map(residue_map, p.residue_id, pid) + add_to_map(molecule_map, p.molecule_id, pid) + add_to_map(assembly_map, p.assembly_id, pid) + # Case 4: object is an assembly + elif object_type == "assembly": + for assembly_id in object_ids: + assembly_map[assembly_id] = [] + for pid, p in particles.items(): + if p.assembly_id == assembly_id: + id_list.append(pid) + assembly_map[assembly_id].append(pid) + add_to_map(molecule_map, p.molecule_id, pid) + add_to_map(residue_map, p.residue_id, pid) + # Deduplicate + sort IDs + id_list = sorted(set(id_list)) + return {"all": id_list, "molecule_map": molecule_map, "residue_map": residue_map, "assembly_map": assembly_map,} \ No newline at end of file diff --git a/test.py b/test.py index 01f02f4..97472d5 100644 --- a/test.py +++ b/test.py @@ -60,8 +60,16 @@ def main(): sigma=3.5 * units.reduced_length, cutoff=4 * units.reduced_length, offset=0 * units.reduced_length, - epsilon=0.2 * units.reduced_energy) + epsilon=0.2 * units.reduced_energy, + z=1) + pmb.define_particle(name="Anion", + sigma=3.5 * units.reduced_length, + cutoff=4 * units.reduced_length, + offset=0 * units.reduced_length, + epsilon=0.2 * units.reduced_energy, + z=-1) + pmb.define_residue(name="R1", central_bead="Z", side_chains=["X","Z"]) pmb.define_residue(name="R2", central_bead="Z", side_chains=["X","R1"]) @@ -159,11 +167,11 @@ def main(): # Define AA particles and residues hf.define_protein_AA_particles(topology_dict=topology_dict, - pmb=pmb) + pmb=pmb) hf.define_protein_AA_residues(topology_dict=topology_dict, - model="2beadAA", - pmb=pmb) + model="2beadAA", + pmb=pmb) print(pmb.db._get_templates_df(pmb_type="particle")) print(pmb.db._get_templates_df(pmb_type="residue")) @@ -208,6 +216,16 @@ def main(): number_of_molecules=1, espresso_system=espresso_system, use_default_bond=True) + + pmb.create_counterions(object_name="M1", + cation_name="X", + anion_name="Anion", + espresso_system=espresso_system) + pmb.create_added_salt(espresso_system=espresso_system, + cation_name="X", + anion_name="Anion", + c_salt=0.1*pmb.units.M) + print(pmb.db._get_instances_df(pmb_type="particle")) print(pmb.db._get_instances_df(pmb_type="peptide")) From 25b33812bf8b84fe025edae607236fdf62a645ac Mon Sep 17 00:00:00 2001 From: Pablo Date: Wed, 17 Dec 2025 20:54:15 +0100 Subject: [PATCH 13/55] update methods to work with the new database --- pyMBE/pyMBE.py | 50 +------------------------------------------------- 1 file changed, 1 insertion(+), 49 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 8acfee6..7bdcb4c 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -53,8 +53,7 @@ class pymbe_library(): """ Core library for the Molecular Builder for ESPResSo (pyMBE). - Provides access to fundamental constants, reduced unit setup, and a - database for storing particle, molecule, and reaction information. + Provides access to functions to define templates to build coarse-grained models of macromolecules. Attributes: N_A (pint.Quantity): Avogadro number. @@ -107,42 +106,6 @@ def __init__(self, seed, temperature=None, unit_length=None, unit_charge=None, K self.root = importlib.resources.files(__package__) self._bond_instances={} - def _check_supported_molecule(self, molecule_name,valid_pmb_types): - """ - Checks if the molecule name `molecule_name` is supported by a method of pyMBE. - - Args: - molecule_name(`str`): pmb object type to be checked. - valid_pmb_types(`list` of `str`): List of valid pmb types supported by the method. - - Returns: - pmb_type(`str`): pmb_type of the molecule. - """ - pmb_type=self.df.loc[self.df['name']==molecule_name].pmb_type.values[0] - if pmb_type not in valid_pmb_types: - raise ValueError("The pyMBE object with name {molecule_name} has a pmb_type {pmb_type}. This function only supports pyMBE types {valid_pmb_types}") - return pmb_type - - def _check_if_name_has_right_type(self, name, expected_pmb_type, hard_check=True): - """ - Checks if `name` is of the expected pmb type. - - Args: - name(`str`): label to check if defined in `pmb.df`. - expected_pmb_type(`str`): pmb object type corresponding to `name`. - hard_check(`bool`, optional): If `True`, the raises a ValueError if `name` is corresponds to an objected defined in the pyMBE DataFrame under a different object type than `expected_pmb_type`. - - Returns: - `bool`: `True` for success, `False` otherwise. - """ - pmb_type=self.df.loc[self.df['name']==name].pmb_type.values[0] - if pmb_type == expected_pmb_type: - return True - else: - if hard_check: - raise ValueError(f"The name {name} has been defined in the pyMBE DataFrame with a pmb_type = {pmb_type}. This function only supports pyMBE objects with pmb_type = {expected_pmb_type}") - return False - def _create_espresso_bond_instance(self, bond_type, bond_parameters): """ Creates an ESPResSo bond instance. @@ -2223,11 +2186,6 @@ def read_protein_vtf_in_df (self,filename,unit_length=None): return topology_dict - - - - - def search_particles_in_residue(self, residue_name): ''' Searches for all particles in a given residue of name `residue_name`. @@ -2243,17 +2201,11 @@ def search_particles_in_residue(self, residue_name): - The function will return an empty list if the residue is not defined in `pmb.df`. - The function will return an empty list if the particles are not defined in the pyMBE DataFrame. ''' - if not _DFm._check_if_name_is_defined_in_df(name=residue_name, df=self.df): - logging.warning(f"Residue {residue_name} not defined in pmb.df") - return [] - self._check_if_name_has_right_type(name=residue_name, expected_pmb_type="residue") index_residue = self.df.loc[self.df['name'] == residue_name].index[0].item() central_bead = self.df.at [index_residue, ('central_bead', '')] list_of_side_chains = self.df.at[index_residue, ('side_chains', '')] list_of_particles_in_residue = [] if central_bead is not pd.NA: - if _DFm._check_if_name_is_defined_in_df(name=central_bead, df=self.df): - if self._check_if_name_has_right_type(name=central_bead, expected_pmb_type="particle", hard_check=False): list_of_particles_in_residue.append(central_bead) if list_of_side_chains is not pd.NA: for side_chain in list_of_side_chains: From fd65b142875c40dbb354f486f8ac4d997328d4a9 Mon Sep 17 00:00:00 2001 From: Pablo Date: Sat, 10 Jan 2026 21:26:36 +0100 Subject: [PATCH 14/55] fix test for define and create molecules --- pyMBE/lib/handy_functions.py | 37 -- pyMBE/pyMBE.py | 141 ++--- pyMBE/storage/manager.py | 52 +- .../define_and_create_molecules_unit_tests.py | 521 ++++++------------ testsuite/hydrogel_builder.py | 20 +- testsuite/lj_tests.py | 2 +- 6 files changed, 273 insertions(+), 500 deletions(-) diff --git a/pyMBE/lib/handy_functions.py b/pyMBE/lib/handy_functions.py index 724800e..27efc10 100644 --- a/pyMBE/lib/handy_functions.py +++ b/pyMBE/lib/handy_functions.py @@ -66,43 +66,6 @@ def check_if_metal_ion(key): Args: key(`str`): key to be checked - - def get_particle_id_map(self, object_name): - ''' - Gets all the ids associated with the object with name `object_name` in `pmb.df` - - Args: - object_name(`str`): name of the object - - Returns: - id_map(`dict`): dict of the structure {"all": [all_ids_with_object_name], "residue_map": {res_id: [particle_ids_in_res_id]}, "molecule_map": {mol_id: [particle_ids_in_mol_id]}, } - ''' - object_type=self._check_supported_molecule(molecule_name=object_name, - valid_pmb_types= ['particle','residue','molecule',"peptide","protein"]) - id_list = [] - mol_map = {} - res_map = {} - def do_res_map(res_ids): - for res_id in res_ids: - res_list=self.df.loc[(self.df['residue_id']== res_id) & (self.df['pmb_type']== "particle")].particle_id.dropna().tolist() - res_map[res_id]=res_list - return res_map - if object_type in ['molecule', 'protein', 'peptide']: - mol_ids = self.df.loc[self.df['name']== object_name].molecule_id.dropna().tolist() - for mol_id in mol_ids: - res_ids = set(self.df.loc[(self.df['molecule_id']== mol_id) & (self.df['pmb_type']== "particle") ].residue_id.dropna().tolist()) - res_map=do_res_map(res_ids=res_ids) - mol_list=self.df.loc[(self.df['molecule_id']== mol_id) & (self.df['pmb_type']== "particle")].particle_id.dropna().tolist() - id_list+=mol_list - mol_map[mol_id]=mol_list - elif object_type == 'residue': - res_ids = self.df.loc[self.df['name']== object_name].residue_id.dropna().tolist() - res_map=do_res_map(res_ids=res_ids) - id_list=[] - for res_id_list in res_map.values(): - id_list+=res_id_list - elif object_type == 'particle': - id_list = self.d Returns: (`bool`): True if `key` is a supported metal ion, False otherwise. """ diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 7bdcb4c..2b30e5b 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -283,7 +283,6 @@ def _delete_particles_from_espresso(self, particle_ids, espresso_system): - Attempting to remove a non-existent particle ID will raise an ESPResSo error. """ - for pid in particle_ids: espresso_system.part.by_id(pid).remove() @@ -301,7 +300,9 @@ def calculate_center_of_mass_of_molecule(self, molecule_id, espresso_system): """ center_of_mass = np.zeros(3) axis_list = [0,1,2] - molecule_name = self.df.loc[(self.df['molecule_id']==molecule_id) & (self.df['pmb_type'].isin(["molecule","protein"]))].name.values[0] + mol_inst = self.db.get_instance(pmb_type="molecule", + instance_id=molecule_id) + molecule_name = mol_inst.name particle_id_list = self.get_particle_id_map(object_name=molecule_name)["all"] for pid in particle_id_list: for axis in axis_list: @@ -309,7 +310,7 @@ def calculate_center_of_mass_of_molecule(self, molecule_id, espresso_system): center_of_mass = center_of_mass / len(particle_id_list) return center_of_mass - def calculate_HH(self, molecule_name, pH_list=None, pka_set=None): + def calculate_HH(self, molecule_name, pmb_type, pH_list=None, pka_set=None): """ Calculates the charge per molecule according to the ideal Henderson-Hasselbalch titration curve for molecules with the name `molecule_name`. @@ -820,7 +821,8 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi attribute="residue_id", value=residue_id) prev_central_bead_id = particle_ids_in_residue[0] - prev_central_bead_name = self.db.get_instance(pmb_type="particle", instance_id=prev_central_bead_id).name + prev_central_bead_name = self.db.get_instance(pmb_type="particle", + instance_id=prev_central_bead_id).name prev_central_bead_pos = espresso_system.part.by_id(prev_central_bead_id).pos first_residue = False else: @@ -1041,9 +1043,6 @@ def create_residue(self, name, espresso_system, central_bead_position=None,use_d espresso_system=espresso_system, position=central_bead_position, number_of_particles = 1)[0] - if not central_bead_id: - logging.warning(f"Central bead with particle template with name '{name}' is not defined in the pyMBE database, no residue will be created.") - return central_bead_position=espresso_system.part.by_id(central_bead_id).pos # Assigns residue_id to the central_bead particle created. @@ -1263,26 +1262,25 @@ def define_molecule(self, name, residue_list): residue_list=residue_list) self.db._register_template(tpl) - def define_particle(self, name, z=0, acidity=pd.NA, pka=pd.NA, sigma=pd.NA, epsilon=pd.NA, cutoff=pd.NA, offset=pd.NA): + def define_particle(self, name, sigma, epsilon, z=0, acidity=pd.NA, pka=pd.NA, cutoff=pd.NA, offset=pd.NA): """ Defines a particle template in the pyMBE database. Args: name(`str`): Unique label that identifies this particle type. + sigma(`pint.Quantity`): Sigma parameter used to set up Lennard-Jones interactions for this particle type. + epsilon(`pint.Quantity`): Epsilon parameter used to setup Lennard-Jones interactions for this particle tipe. z(`int`, optional): Permanent charge number of this particle type. Defaults to 0. acidity(`str`, optional): Identifies whether if the particle is `acidic` or `basic`, used to setup constant pH simulations. Defaults to pd.NA. pka(`float`, optional): If `particle` is an acid or a base, it defines its pka-value. Defaults to pd.NA. - sigma(`pint.Quantity`, optional): Sigma parameter used to set up Lennard-Jones interactions for this particle type. Defaults to pd.NA. cutoff(`pint.Quantity`, optional): Cutoff parameter used to set up Lennard-Jones interactions for this particle type. Defaults to pd.NA. offset(`pint.Quantity`, optional): Offset parameter used to set up Lennard-Jones interactions for this particle type. Defaults to pd.NA. - epsilon(`pint.Quantity`, optional): Epsilon parameter used to setup Lennard-Jones interactions for this particle tipe. Defaults to pd.NA. - + Note: - `sigma`, `cutoff` and `offset` must have a dimensitonality of `[length]` and should be defined using pmb.units. - `epsilon` must have a dimensitonality of `[energy]` and should be defined using pmb.units. - `cutoff` defaults to `2**(1./6.) reduced_length`. - `offset` defaults to 0. - - The default setup corresponds to the Weeks−Chandler−Andersen (WCA) model, corresponding to purely steric interactions. - For more information on `sigma`, `epsilon`, `cutoff` and `offset` check `pmb.setup_lj_interactions()`. """ @@ -1383,8 +1381,8 @@ def delete_instances_in_system(self, instance_id, pmb_type, espresso_system): elif pmb_type in self.db._assembly_like_types: instance_identifier = "assembly_id" particle_ids = self.db._find_instance_ids_by_attribute(pmb_type="particle", - attribute="molecule_id", - value=instance_identifier) + attribute=instance_identifier, + value=instance_id) self._delete_particles_from_espresso(particle_ids=particle_ids, espresso_system=espresso_system) @@ -1649,6 +1647,27 @@ def get_bond_template(self, particle_name1, particle_name2, use_default_bond=Fal pmb_type="bond") return bond_tpl + def get_charge_number_map(self): + ''' + Gets the charge number of each `espresso_type` in `pymbe.df`. + + Returns: + charge_number_map(`dict`): {espresso_type: z}. + ''' + if self.df.state_one['es_type'].isnull().values.any(): + df_state_one = self.df.state_one.dropna() + df_state_two = self.df.state_two.dropna() + else: + df_state_one = self.df.state_one + if self.df.state_two['es_type'].isnull().values.any(): + df_state_two = self.df.state_two.dropna() + else: + df_state_two = self.df.state_two + state_one = pd.Series (df_state_one.z.values,index=df_state_one.es_type.values) + state_two = pd.Series (df_state_two.z.values,index=df_state_two.es_type.values) + charge_number_map = pd.concat([state_one,state_two],axis=0).to_dict() + return charge_number_map + def get_espresso_bond_instance(self, particle_name1, particle_name2, espresso_system, use_default_bond=False): """ Retrieve or create a bond instance in an ESPResSo system for a given pair of particle names. @@ -1686,26 +1705,17 @@ def get_espresso_bond_instance(self, particle_name1, particle_name2, espresso_sy espresso_system.bonded_inter.add(bond_inst) return bond_inst - def get_charge_number_map(self): - ''' - Gets the charge number of each `espresso_type` in `pymbe.df`. + def get_instances_df(self, pmb_type): + """ + Returns a dataframe with all instances of type `pmb_type` in the pyMBE database. + + Args: + pmb_type(`str`): pmb type to search instances in the pyMBE database. Returns: - charge_number_map(`dict`): {espresso_type: z}. - ''' - if self.df.state_one['es_type'].isnull().values.any(): - df_state_one = self.df.state_one.dropna() - df_state_two = self.df.state_two.dropna() - else: - df_state_one = self.df.state_one - if self.df.state_two['es_type'].isnull().values.any(): - df_state_two = self.df.state_two.dropna() - else: - df_state_two = self.df.state_two - state_one = pd.Series (df_state_one.z.values,index=df_state_one.es_type.values) - state_two = pd.Series (df_state_two.z.values,index=df_state_two.es_type.values) - charge_number_map = pd.concat([state_one,state_two],axis=0).to_dict() - return charge_number_map + instances_df(`Pandas.Dataframe`): Dataframe with all instances of type `pmb_type`. + """ + return self.db._get_instances_df(pmb_type=pmb_type) def get_lj_parameters(self, particle_name1, particle_name2, combining_rule='Lorentz-Berthelot'): """ @@ -1793,16 +1803,17 @@ def get_radius_map(self, dimensionless=True): Note: The radius corresponds to (sigma+offset)/2 ''' - df_state_one = self.df[[('sigma',''),('offset',''),('state_one','es_type')]].dropna().drop_duplicates() - df_state_two = self.df[[('sigma',''),('offset',''),('state_two','es_type')]].dropna().drop_duplicates() - state_one = pd.Series((df_state_one.sigma.values+df_state_one.offset.values)/2.0,index=df_state_one.state_one.es_type.values) - state_two = pd.Series((df_state_two.sigma.values+df_state_two.offset.values)/2.0,index=df_state_two.state_two.es_type.values) - radius_map = pd.concat([state_one,state_two],axis=0).to_dict() - if dimensionless: - for key in radius_map: - radius_map[key] = radius_map[key].magnitude - return radius_map - + if "particle" not in self.db._templates: + return {} + result = {} + for _, tpl in self.db._templates["particle"].items(): + radius = (tpl.sigma.to_quantity(self.units) + tpl.offset.to_quantity(self.units))/2.0 + if dimensionless: + radius = radius.magnitude + for _, state in tpl.states.items(): + result[state.es_type] = radius + return result + def get_reduced_units(self): """ Returns the current set of reduced units defined in pyMBE. @@ -1822,31 +1833,28 @@ def get_reduced_units(self): ]) return reduced_units_text - def get_type_map(self): + def get_templates_df(self, pmb_type): """ - Return the mapping of ESPResSo types for all particles present in ``pmb.df``. - - This method delegates to ``self.db.get_es_types_map()`` and returns its output. - The resulting structure is a nested dictionary that lists, for each particle - template, all defined states and their corresponding ESPResSo type (``es_type``). + Returns a dataframe with all templates of type `pmb_type` in the pyMBE database. + Args: + pmb_type(`str`): pmb type to search templates in the pyMBE database. + Returns: - dict[str, dict[str, float | int | str]]: - A dictionary of the form:: + templates_df(`Pandas.Dataframe`): Dataframe with all templates of type `pmb_type`. + """ + return self.db._get_templates_df(pmb_type=pmb_type) - { - particle_name: { + def get_type_map(self): + """ + Return the mapping of ESPResSo types for all particle states defined in the pyMBE database. + + Returns: + dict[str, int]: + A dictionary mapping each particle state to its corresponding ESPResSo type:{ state_name: es_type, ... - }, - ... - } - - where ``es_type`` is the ESPResSo particle type used in simulations. - - See Also: - ``Manager.get_es_types_map`` – the underlying method that performs - the extraction. + } """ return self.db.get_es_types_map() @@ -1981,18 +1989,13 @@ def propose_unused_type(self): """ type_map = self.get_type_map() - # Flatten all es_type values across all particles and states all_types = [] - for particle_entry in type_map.values(): - for es_type in particle_entry.values(): - if isinstance(es_type, int): - all_types.append(es_type) - - # If no integer es_types exist, start at 0 + for es_type in type_map.values(): + all_types.append(es_type) + # If no es_types exist, start at 0 if not all_types: return 0 - return max(all_types) + 1 def protein_sequence_parser(self, sequence): diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index 4140e2a..3450e02 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -944,68 +944,30 @@ def get_template(self, pmb_type, name): provided type and name. Raises: - KeyError: If no template with the given type and name exists in + ValueError: If no template with the given type and name exists in the internal registry. """ if name not in self._templates[pmb_type]: - raise KeyError(f"Template '{name}' not found in type '{pmb_type}'.") + raise ValueError(f"Template '{name}' not found in type '{pmb_type}'.") else: return self._templates[pmb_type][name] def get_es_types_map(self): """ - Return a mapping from each particle to its states' `es_type`. - Iterates over all particle templates and extracts the ESPResSo type (`es_type`) - defined for each state. Produces a nested dictionary of the form: - - { - particle_name: { - state_name: es_type, - ... - }, - ... - } + defined for each state. Returns: - dict[str, dict[str, int]]: - A dictionary mapping each particle name to another dictionary that maps - each state name to its corresponding ``es_type``. + dict[str, int]: + A dictionary mapping each particle state to its corresponding ESPResSo type. - Raises: - KeyError: - If the ``"particle"`` template group does not exist in the database. - - Examples: - Suppose templates include: - Particle A: - HA: es_type = 0 - A-: es_type = 1 - Particle H: - H+: es_type = 2 - - Then the method returns: - { - "A": { - "HA": 0, - "A-": 1, - }, - "H": { - "H+": 2, - } - } """ if "particle" not in self._templates: return {} - result = {} - for particle_name, tpl in self._templates["particle"].items(): + for _, tpl in self._templates["particle"].items(): for state_name, state in tpl.states.items(): - if particle_name not in result: - result[particle_name] = {state_name: state.es_type} - else: - result[particle_name][state_name] = state.es_type - + result[state_name] = state.es_type return result def get_particle_id_map(self, object_name): diff --git a/testsuite/define_and_create_molecules_unit_tests.py b/testsuite/define_and_create_molecules_unit_tests.py index 69bccdb..7c38c94 100644 --- a/testsuite/define_and_create_molecules_unit_tests.py +++ b/testsuite/define_and_create_molecules_unit_tests.py @@ -21,49 +21,39 @@ import numpy as np import pandas as pd import espressomd +import logging +import io +# Create an in-memory log stream +log_stream = io.StringIO() +logging.basicConfig(level=logging.INFO, + format="%(levelname)s: %(message)s", + handlers=[logging.StreamHandler(log_stream)]) # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(seed=42) # The unit tests for define_particle are in lj_tests.py and set_particle_acidity -print("*** Unit test: check that define_particles() does not setup any particle if no parameters are provided ***") -output = pmb.define_particles(parameters={}) -np.testing.assert_equal(actual=output, - desired=0, - verbose=True) - -print("*** Unit test passed ***") - -print("*** Unit test: check that define_particles() defines a set of particles correctly ***") particle_parameters={"S1":{"name":"S1", "sigma":1*pmb.units.nm, "offset":0.5*pmb.units.nm, + "epsilon":1.0*pmb.units.reduced_energy, "z":1}, "S2":{"name":"S2", "sigma":2*pmb.units.nm, + "epsilon":1.0*pmb.units.reduced_energy, "offset":1.5*pmb.units.nm, "z": 1}, "S3":{"name":"S3", "sigma":3*pmb.units.nm, + "epsilon":1.0*pmb.units.reduced_energy, "offset":2.5*pmb.units.nm, "z":2}} -pmb.define_particles(parameters=particle_parameters) +for particle_set in particle_parameters.values(): + pmb.define_particle(**particle_set) -for particle_name in particle_parameters.keys(): - input_parameters=particle_parameters[particle_name] - for index in pmb.df[pmb.df['name']==particle_name].index: - np.testing.assert_equal(actual=str(pmb.df.loc[index, "pmb_type"].values[0]), - desired="particle", - verbose=True) - np.testing.assert_equal(actual=pmb.df.loc[index, "sigma"].values[0].to("nm").magnitude, - desired=input_parameters["sigma"].to("nm").magnitude, - verbose=True) - -print("*** Unit test passed ***") - -print("*** Unit test: check that define_residue() stores the parameters correctly in pmb.df ***") +print("*** Unit test: check that define_residue() stores the parameters correctly in the pyMBE database ***") residue_parameters={"R1":{"name": "R1", "central_bead": "S1", @@ -80,27 +70,21 @@ for residue_name in residue_parameters.keys(): input_parameters=residue_parameters[residue_name] - for index in pmb.df[pmb.df['name']==residue_name].index: - np.testing.assert_equal(actual=str(pmb.df.loc[index, "pmb_type"].values[0]), - desired="residue", - verbose=True) - np.testing.assert_equal(actual=str(pmb.df.loc[index, "central_bead"].values[0]), - desired=input_parameters["central_bead"], - verbose=True) - np.testing.assert_equal(actual=pmb.df.loc[index, "side_chains"].values[0], - desired=input_parameters["side_chains"], - verbose=True) - -print("*** Unit test passed ***") + res_tpl = pmb.db.get_template(pmb_type="residue", + name=residue_name) + np.testing.assert_equal(actual=res_tpl.pmb_type, + desired="residue", + verbose=True) + np.testing.assert_equal(actual=res_tpl.central_bead, + desired=input_parameters["central_bead"], + verbose=True) + np.testing.assert_equal(actual=res_tpl.side_chains, + desired=input_parameters["side_chains"], + verbose=True) -print("*** Unit test: check that define_residue() raises a ValueError if the user provides an already defined name ***") -input_parameters={"name": "S3", - "central_bead": "S2", - "side_chains": ["R1"]} -np.testing.assert_raises(ValueError, pmb.define_residue, **input_parameters) print("*** Unit test passed ***") -print("*** Unit test: check that define_molecule() stores the parameters correctly in pmb.df ***") +print("*** Unit test: check that define_molecule() stores the parameters correctly in the pyMBE database ***") molecule_parameters={"M1":{"name": "M1", "residue_list": []}, @@ -113,23 +97,18 @@ for molecule_name in molecule_parameters.keys(): input_parameters=molecule_parameters[molecule_name] - for index in pmb.df[pmb.df['name']==molecule_name].index: - np.testing.assert_equal(actual=str(pmb.df.loc[index, "pmb_type"].values[0]), - desired="molecule", - verbose=True) - np.testing.assert_equal(actual=pmb.df.loc[index, "residue_list"].values[0], - desired=input_parameters["residue_list"], - verbose=True) - -print("*** Unit test passed ***") + mol_tpl = pmb.db.get_template(pmb_type="molecule", + name=molecule_name) + np.testing.assert_equal(actual=mol_tpl.pmb_type, + desired="molecule", + verbose=True) + np.testing.assert_equal(actual=mol_tpl.residue_list, + desired=input_parameters["residue_list"], + verbose=True) -print("*** Unit test: check that define_molecule() raises a ValueError if the user provides an already defined name ***") -input_parameters={"name": "S3", - "residue_list": ["R1"]} -np.testing.assert_raises(ValueError, pmb.define_molecule, **input_parameters) print("*** Unit test passed ***") -print("*** Unit test: check that create_particle() creates particles into the espresso_system with the properties defined in pmb.df ***") +print("*** Unit test: check that create_particle() creates particles into the espresso_system with the properties defined in the pyMBE database ***") # Create an instance of an espresso system espresso_system=espressomd.System(box_l = [10]*3) particle_positions=[[0,0,0],[1,1,1]] @@ -142,7 +121,6 @@ particle_ids=pmb.get_particle_id_map(object_name="S1")["all"] type_map=pmb.get_type_map() - for pid in particle_ids: particle=espresso_system.part.by_id(pid) np.testing.assert_equal(actual=particle.type, @@ -185,97 +163,55 @@ verbose=True) print("*** Unit test passed ***") - -print("*** Unit test: check that create_particle() raises a ValueError if the user provides the name of an object that is not a particle ***") -input_parameters={"name": "R2", - "espresso_system": espresso_system, - "number_of_particles": 1} -np.testing.assert_raises(ValueError, pmb.create_particle, **input_parameters) -print("*** Unit test passed ***") - # Unit tests for delete particle -print("*** Unit test: check that delete_particle deletes the particle and cleans pmb.df ***") +print("*** Unit test: check that delete_particle deletes the particle and cleans the pyMBE database ***") starting_number_of_particles=len(espresso_system.part.all()) -starting_number_of_rows=len(pmb.df) -# This should delete one particle and one row from the df because there are repeated entries of that type of particle -pmb.delete_particle_in_system(particle_id=0, - espresso_system=espresso_system) +starting_number_of_rows=len(pmb.get_instances_df(pmb_type="particle")) +# This should delete one particle instance +pmb.delete_instances_in_system(instance_id=0, + pmb_type="particle", + espresso_system=espresso_system) np.testing.assert_equal(actual=len(espresso_system.part.all()), desired=starting_number_of_particles-1, verbose=True) -np.testing.assert_equal(actual=len(pmb.df), - desired=starting_number_of_rows-1, - verbose=True) -# This should delete one particle but not delete any row because it is the last entry of that type of particle -# instead, the particle id should be cleared -pmb.delete_particle_in_system(particle_id=1, - espresso_system=espresso_system) -np.testing.assert_equal(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles-2, - verbose=True) -np.testing.assert_equal(actual=len(pmb.df), +particle_df = pmb.get_instances_df(pmb_type="particle") +np.testing.assert_equal(actual=len(particle_df), desired=starting_number_of_rows-1, verbose=True) -def check_empty_columns(name_to_check): - empty_columns=['particle_id', - 'particle_id2', - 'residue_id', - 'molecule_id'] - for column in empty_columns: - assert pd.isna(pmb.df.loc[pmb.df['name'] == name_to_check][column]).all() - -check_empty_columns(name_to_check="S1") -non_empty_columns=['name', - 'pmb_type', - 'sigma', - 'offset'] -for column in non_empty_columns: - assert pd.notna(pmb.df.loc[pmb.df['name'] == "S1"][column]).all() - -non_empty_columns=['label', - 'z', - 'es_type'] -for column in non_empty_columns: - assert pd.notna(pmb.df.loc[pmb.df['name'] == "S1"]["state_one"][column]).all() - - -# test the sanity check -input_parameters={"particle_id":0, - "espresso_system":espresso_system} -np.testing.assert_raises(ValueError, - pmb.delete_particle_in_system, - **input_parameters) -# Create the particle back for the rest of the test -pmb.create_particle(name="S1", - espresso_system=espresso_system, - number_of_particles=2) +print(pmb.get_instances_df(pmb_type="particle")) + +# Delete the other particle instance to simplify the rest of the tests +pmb.delete_instances_in_system(instance_id=1, + pmb_type="particle", + espresso_system=espresso_system) + print("*** Unit test: check that create_residue() creates a simple residue into the espresso_system with the properties defined in pmb.df ***") bond_type = 'harmonic' bond = {'r_0' : 0.4*pmb.units.nm, 'k' : 400 * pmb.units('reduced_energy / reduced_length**2')} - pmb.define_default_bond(bond_type = bond_type, bond_parameters = bond) -pmb.add_bonds_to_espresso(espresso_system=espresso_system) - central_bead_position=[[0,0,0]] backbone_vector=np.array([1.,2.,3.]) + pmb.create_residue(name="R2", - espresso_system=espresso_system, - central_bead_position=central_bead_position, - backbone_vector=backbone_vector, - use_default_bond=True) + espresso_system=espresso_system, + central_bead_position=central_bead_position, + backbone_vector=backbone_vector, + use_default_bond=True) particle_ids=pmb.get_particle_id_map(object_name="R2")["all"] # Check that the particle properties are correct for pid in particle_ids: particle=espresso_system.part.by_id(pid) - particle_name = pmb.df[(pmb.df['particle_id']==pid) & (pmb.df['pmb_type']=="particle")]["name"].values[0] + particle_tpl = pmb.db.get_instance(pmb_type="particle", + instance_id=pid) + particle_name = particle_tpl.name np.testing.assert_equal(actual=particle.type, desired=type_map[particle_name], verbose=True) @@ -293,16 +229,18 @@ def check_empty_columns(name_to_check): desired=0, verbose=True) # Check that particles have the correct residue id - residue_id = pmb.df[(pmb.df['particle_id']==pid) & (pmb.df['pmb_type']=="particle")]["residue_id"].values[0] + residue_id = particle_tpl.residue_id np.testing.assert_equal(actual=residue_id, desired=0, verbose=True) # Check that particles are correctly bonded +# Central bead S1 (id 0) should be bonded to S2 (id 1) and S3 (id 2) bonded_pairs=[] -for bond_index in pmb.df[pmb.df['pmb_type']=="bond"].index: - particle_id1= pmb.df.loc[bond_index,"particle_id"].values[0] - particle_id2= pmb.df.loc[bond_index,"particle_id2"].values[0] +bond_df = pmb.get_instances_df(pmb_type="bond") +for bond_index in bond_df.index: + particle_id1= bond_df.loc[bond_index,"particle_id1"] + particle_id2= bond_df.loc[bond_index,"particle_id2"] bonded_pair=frozenset([particle_id1,particle_id2]) bonded_pairs.append(bonded_pair) bonded_in_espresso = False @@ -312,19 +250,12 @@ def check_empty_columns(name_to_check): partner_id = bond[1] if partner_id in bonded_pair: bonded_in_espresso=True - # Test that the bond object is correctly stored in pyMBE - np.testing.assert_equal(actual=pmb.df.loc[bond_index,"bond_object"].values[0], - desired=bond_object, - verbose=True) - np.testing.assert_equal(actual=pmb.df.loc[bond_index,"residue_id"].values[0], - desired=0, - verbose=True) np.testing.assert_equal(actual=bonded_in_espresso, desired=True, verbose=True) np.testing.assert_equal(actual=frozenset(bonded_pairs), - desired=frozenset([frozenset([2,3]),frozenset([2,4])]), + desired=frozenset([frozenset([0,1]),frozenset([0,2])]), verbose=True) print("*** Unit test passed ***") @@ -340,7 +271,9 @@ def check_empty_columns(name_to_check): # Check that the particle properties are correct for pid in particle_ids: particle=espresso_system.part.by_id(pid) - particle_name = pmb.df[(pmb.df['particle_id']==pid) & (pmb.df['pmb_type']=="particle")]["name"].values[0] + particle_tpl = pmb.db.get_instance(pmb_type="particle", + instance_id=pid) + particle_name = particle_tpl.name np.testing.assert_equal(actual=particle.type, desired=type_map[particle_name], verbose=True) @@ -348,17 +281,21 @@ def check_empty_columns(name_to_check): desired=particle_parameters[particle_name]["z"], verbose=True) # Check that particles have the correct residue id - residue_id = pmb.df[(pmb.df['particle_id']==pid) & (pmb.df['pmb_type']=="particle")]["residue_id"].values[0] + residue_id = particle_tpl.residue_id np.testing.assert_equal(actual=residue_id, desired=1, verbose=True) -# Check that particles are correctly bonded +# Check that particles are correctly bonded, new bonds are: +# Central bead S2 (id 3) should be bonded to R2 central bead S1 (id 4) +# Central bead S1 (id 4) should be bonded to side chains S2 (id 5) and S3 (id 6) bonded_pairs=[] -for bond_index in pmb.df[(pmb.df['pmb_type']=="bond") & (pmb.df['residue_id']==1)].index: - particle_id1= pmb.df.loc[bond_index,"particle_id"].values[0] - particle_id2= pmb.df.loc[bond_index,"particle_id2"].values[0] +bond_df = pmb.get_instances_df(pmb_type="bond") + +for bond_index in bond_df.index: + particle_id1= bond_df.loc[bond_index,"particle_id1"] + particle_id2= bond_df.loc[bond_index,"particle_id2"] bonded_pair=frozenset([particle_id1,particle_id2]) bonded_pairs.append(bonded_pair) bonded_in_espresso = False @@ -368,40 +305,22 @@ def check_empty_columns(name_to_check): partner_id = bond[1] if partner_id in bonded_pair: bonded_in_espresso=True - # Test that the bond object is correctly stored in pyMBE - np.testing.assert_equal(actual=pmb.df.loc[bond_index,"bond_object"].values[0], - desired=bond_object, - verbose=True) - np.testing.assert_equal(actual=pmb.df.loc[bond_index,"residue_id"].values[0], - desired=1, - verbose=True) - + np.testing.assert_equal(actual=bonded_in_espresso, desired=True, verbose=True) np.testing.assert_equal(actual=frozenset(bonded_pairs), - desired=frozenset([frozenset([5,6]), - frozenset([6,7]), - frozenset([6,8])]), + desired=frozenset([frozenset([0,1]), + frozenset([0,2]), + frozenset([3,4]), + frozenset([4,5]), + frozenset([4,6])]), verbose=True) print("*** Unit test passed ***") -print("*** Unit test: check that create_residue() raises a ValueError if the user provides the name of an object that is not a residue ***") -input_parameters={"name": "S2", - "espresso_system": espresso_system} -np.testing.assert_raises(ValueError, pmb.create_residue, **input_parameters) -print("*** Unit test passed ***") -print("*** Unit test: check that create_residue() raises a ValueError if the any of the names in side_chains does not correspond to a previously defined particle ***") -pmb.define_residue(name="test", - central_bead="S1", - side_chains=["test1"]) -input_parameters={"name": "test", - "espresso_system": espresso_system} -np.testing.assert_raises(ValueError, pmb.create_residue, **input_parameters) -print("*** Unit test passed ***") -print("*** Unit test: check that create_residue() does not create any residue if name is not defined in the pmb.df ***") +print("*** Unit test: check that create_residue() does not create any residue if name is not defined in the pyMBE database ***") starting_number_of_particles=len(espresso_system.part.all()) pmb.create_residue(name="R51", espresso_system=espresso_system, @@ -412,70 +331,28 @@ def check_empty_columns(name_to_check): verbose=True) # Tests for delete_residue -print("*** Unit test: check that delete_residue deletes the particle and cleans pmb.df ***") +print("*** Unit test: check that delete_residue deletes the particle and cleans the pyMBE database ***") + # This should delete 3 particles (residue 0 is a R2 residue) -# 6 lines should also be removed from pmb.df -# One because R2 has a repeated entry -# 3 from the removed particles, (repeated entries) -# and 2 from the removed bonds (repeated entries) starting_number_of_particles=len(espresso_system.part.all()) -starting_number_of_rows=len(pmb.df) -pmb.delete_residue_in_system(residue_id=0, - espresso_system=espresso_system) +pmb.delete_instances_in_system(instance_id=0, + pmb_type="residue", + espresso_system=espresso_system) np.testing.assert_equal(actual=len(espresso_system.part.all()), desired=starting_number_of_particles-3, verbose=True) -np.testing.assert_equal(actual=len(pmb.df), - desired=starting_number_of_rows-6, +# There should be only one residue instance now in the pyMBE database +np.testing.assert_equal(actual=len(pmb.get_instances_df(pmb_type="residue")), + desired=1, verbose=True) -# This should delete 4 particles (residue 1 is a R3 residue) -# 4 lines should also be removed from pmb.df -# Residues do not have repeated entries (no line deleted) -# 3 from the removed particles, (repeated entries) -# and 1 from the removed bonds (repeated entry) -starting_number_of_particles=len(espresso_system.part.all()) -starting_number_of_rows=len(pmb.df) -pmb.delete_residue_in_system(residue_id=1, - espresso_system=espresso_system) -np.testing.assert_equal(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles-4, +# And there should be only 4 particles (central bead + 2 side chains + central bead of R3) +np.testing.assert_equal(actual=len(pmb.get_instances_df(pmb_type="particle")), + desired=4, verbose=True) -np.testing.assert_equal(actual=len(pmb.df), - desired=starting_number_of_rows-4, - verbose=True) -check_empty_columns(name_to_check="R2") -check_empty_columns(name_to_check="R3") -check_empty_columns(name_to_check="default") -non_empty_columns=['name', - 'pmb_type', - 'central_bead', - 'side_chains'] -for res_name in ["R2","R3"]: - for column in non_empty_columns: - assert pd.notna(pmb.df.loc[pmb.df['name'] == res_name][column]).all() - -non_empty_columns=['name', - 'l0', - 'parameters_of_the_potential', - 'bond_object'] - -for column in non_empty_columns: - assert pd.notna(pmb.df.loc[pmb.df['name'] == "default"][column]).all() - -input_parameters={"residue_id":0, - "espresso_system":espresso_system} -np.testing.assert_raises(ValueError, - pmb.delete_residue_in_system, - **input_parameters) -# Create back the residues for the rest of the test -pmb.create_residue(name="R2", - espresso_system=espresso_system, - central_bead_position=central_bead_position, - backbone_vector=backbone_vector, - use_default_bond=True) -pmb.create_residue(name="R3", - espresso_system=espresso_system, - use_default_bond=True) +# Delete the other residue instance to simplify the rest of the tests +pmb.delete_instances_in_system(instance_id=1, + pmb_type="residue", + espresso_system=espresso_system) print("*** Unit test passed ***") # Additional unit tests for define_molecule are in create_molecule_position_test print("*** Unit test: check that create_molecule() creates a simple molecule into the espresso_system with the properties defined in pmb.df ***") @@ -484,105 +361,101 @@ def check_empty_columns(name_to_check): magnitude = np.linalg.norm(backbone_vector) backbone_vector = backbone_vector/magnitude molecule_info_M2 = pmb.create_molecule(name="M2", - number_of_molecules=2, + number_of_molecules=1, espresso_system=espresso_system, backbone_vector = backbone_vector, use_default_bond=True) particle_ids=pmb.get_particle_id_map(object_name="M2")["all"] +# Residue and molecule IDs expected +# For the M2 molecule created, the residue and molecule IDs should be as follows: +# R1 (residue_id=0, molecule_id=0), R2 (residue_id=1, molecule_id=0), R3 (residue_id=2, molecule_id=0) -residue_ids={9: 2, 10: 3, 11: 3, 12: 3, 13: 4, 14: 4, 15: 4, 16: 4, # First molecule - 17: 5, 18: 6, 19: 6, 20: 6, 21: 7, 22: 7, 23: 7, 24: 7} # Second molecule - -molecule_ids={9: 0, 10: 0, 11: 0, 12: 0, 13: 0, 14: 0, 15: 0, 16: 0, # First molecule - 17: 1, 18: 1, 19: 1, 20: 1, 21: 1, 22: 1, 23: 1, 24: 1} # Second molecule +residue_ids={0: 0, 1: 1, 2: 1, 3: 1, 4: 2, 5: 2, 6: 2, 7: 2} +molecule_ids={0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0} # Check that the particle properties are correct for pid in particle_ids: particle=espresso_system.part.by_id(pid) - particle_name = pmb.df[(pmb.df['particle_id']==pid) & (pmb.df['pmb_type']=="particle")]["name"].values[0] + particle_tpl = pmb.db.get_instance(pmb_type="particle", + instance_id=pid) + particle_name = particle_tpl.name np.testing.assert_equal(actual=particle.type, - desired=type_map[particle_name], - verbose=True) + desired=type_map[particle_name], + verbose=True) np.testing.assert_equal(actual=particle.q, desired=particle_parameters[particle_name]["z"], verbose=True) # Check that particles have the correct residue id - residue_id = pmb.df[(pmb.df['particle_id']==pid) & (pmb.df['pmb_type']=="particle")]["residue_id"].values[0] + residue_id = particle_tpl.residue_id np.testing.assert_equal(actual=residue_id, desired=residue_ids[pid], verbose=True) # Check that particles have the correct molecule id - molecule_id = pmb.df[(pmb.df['particle_id']==pid) & (pmb.df['pmb_type']=="particle")]["molecule_id"].values[0] + molecule_id = particle_tpl.molecule_id np.testing.assert_equal(actual=molecule_id, desired=molecule_ids[pid], verbose=True) -# Check that the molecules have the right residues -for mol_id in [0,1]: - residue_list=[] - for res_index in pmb.df[(pmb.df['pmb_type']=="residue") & (pmb.df['molecule_id']==mol_id)].index: - resname=pmb.df.loc[res_index,"name"].values[0] - residue_list.append(resname) - np.testing.assert_equal(actual=frozenset(residue_list), - desired=frozenset(molecule_parameters["M2"]["residue_list"]), - verbose=True) +# Check that the molecule has the right residues +residue_list=[] +residue_df = pmb.get_instances_df(pmb_type="residue") +for res_index in residue_df[residue_df['molecule_id']==0].index: + resname = residue_df.loc[res_index,"name"] + residue_list.append(resname) +np.testing.assert_equal(actual=frozenset(residue_list), + desired=frozenset(molecule_parameters["M2"]["residue_list"]), + verbose=True) -bonded_pairs_ref={0: [frozenset([9,10]), - frozenset([10,11]), - frozenset([10,12]), - frozenset([10,13]), - frozenset([13,14]), - frozenset([14,15]), - frozenset([14,16])], - 1: [frozenset([17,18]), - frozenset([18,19]), - frozenset([18,20]), - frozenset([18,21]), - frozenset([21,22]), - frozenset([22,23]), - frozenset([22,24])]} +# Expected bonded pairs for the molecule +# Molecule 0: +# S1(0)-S1(1) (R1-R2) +# S1(1)-S2(2) (R2) +# S1(1)-S3(3) (R2) +# S2(1)-S2(4) (R2-R3) +# S2(4)-S1(5) (R3) +# S1(5)-S2(6) (R3) +# S1(5)-S3(7) (R3) + +bonded_pairs_ref=[frozenset([0,1]), + frozenset([1,2]), + frozenset([1,3]), + frozenset([1,4]), + frozenset([4,5]), + frozenset([5,6]), + frozenset([5,7])] # Check that particles are correctly bonded -bonded_pairs={} -for mol_id in [0,1]: - bonded_pairs[mol_id]=[] - for bond_index in pmb.df[(pmb.df['pmb_type']=="bond") & (pmb.df['molecule_id']==mol_id)].index: - particle_id1= pmb.df.loc[bond_index,"particle_id"].values[0] - particle_id2= pmb.df.loc[bond_index,"particle_id2"].values[0] - bonded_pair=frozenset([particle_id1,particle_id2]) - bonded_pairs[mol_id].append(bonded_pair) - bonded_in_espresso = False - for pid in bonded_pair: - for bond in espresso_system.part.by_id(pid).bonds[:]: - bond_object = bond[0] - partner_id = bond[1] - if partner_id in bonded_pair: - bonded_in_espresso=True - # Test that the bond object is correctly stored in pyMBE - np.testing.assert_equal(actual=pmb.df.loc[bond_index,"bond_object"].values[0], - desired=bond_object, - verbose=True) - np.testing.assert_equal(actual=bonded_in_espresso, - desired=True, - verbose=True) - np.testing.assert_equal(actual=frozenset(bonded_pairs[mol_id]), - desired=frozenset(bonded_pairs_ref[mol_id]), +bonded_pairs=[] +bond_df = pmb.get_instances_df(pmb_type="bond") +for bond_index in bond_df.index: + particle_id1= bond_df.loc[bond_index,"particle_id1"] + particle_id2= bond_df.loc[bond_index,"particle_id2"] + bonded_pair=frozenset([particle_id1,particle_id2]) + bonded_pairs.append(bonded_pair) + bonded_in_espresso = False + for pid in bonded_pair: + for bond in espresso_system.part.by_id(pid).bonds[:]: + bond_object = bond[0] + partner_id = bond[1] + if partner_id in bonded_pair: + bonded_in_espresso=True + np.testing.assert_equal(actual=bonded_in_espresso, + desired=True, verbose=True) +np.testing.assert_equal(actual = frozenset(bonded_pairs), + desired = frozenset(bonded_pairs_ref), + verbose = True) print("*** Unit test passed ***") - print("*** Unit test: check the backbone vector of the molecule in espresso and the given input backbone vector are same ***") - central_bead_positions = [] -for residue_name in molecule_parameters["M2"]["residue_list"]: - - mol_id = pmb.df[pmb.df["name"]=="M2"]["molecule_id"].values[0] - res_id = pmb.df[(pmb.df["molecule_id"]==mol_id) & (pmb.df['name']==residue_name)]["residue_id"].values[0] - central_bead_id = molecule_info_M2[mol_id][res_id]['central_bead_id'] +residue_map=pmb.get_particle_id_map(object_name="M2")["residue_map"] +for res_id in residue_map.keys(): + central_bead_id = min(residue_map[res_id]) central_bead_pos = espresso_system.part.by_id(central_bead_id).pos central_bead_positions.append(central_bead_pos) @@ -634,15 +507,7 @@ def check_empty_columns(name_to_check): verbose=True) print("*** Unit test passed ***") -print("*** Unit test: check that get_particle_id_map() raises a ValueError if the user provides the name of an object that is not a particle ***") - -# If a bond_object is passed then the ValueError should be raised - -input_parameters={"object_name": 'default' } -np.testing.assert_raises(ValueError, pmb.get_particle_id_map, **input_parameters) - -print("*** Unit test passed ***") - +# Unit tests for get_radius_map print("*** Unit test: check that get_radius_map() provides the right amount of radii corresponding to the number of different particles in the simulation box ***") np.testing.assert_equal(actual=len(pmb.get_radius_map()), @@ -686,58 +551,34 @@ def check_empty_columns(name_to_check): # Tests for delete_residue print("*** Unit test: check that delete_molecule deletes the particle and cleans pmb.df ***") +# create another molecule just to have two molecules in the system +pmb.create_molecule(name="M2", + number_of_molecules=1, + espresso_system=espresso_system, + backbone_vector = backbone_vector, + use_default_bond=True) + # This should delete 8 particles (molecule 0 is a M2 molecule) -# 20 lines should also be removed from pmb.df -# 1 because M2 has a repeated entry -# 3 from the removed residues (repeated entries) -# 8 from the removed particles, (repeated entries) -# and 8 from the removed bonds (repeated entries) + starting_number_of_particles=len(espresso_system.part.all()) -starting_number_of_rows=len(pmb.df) -pmb.delete_molecule_in_system(molecule_id=0, - espresso_system=espresso_system) +pmb.delete_instances_in_system(instance_id=0, + pmb_type="molecule", + espresso_system=espresso_system) np.testing.assert_equal(actual=len(espresso_system.part.all()), desired=starting_number_of_particles-8, verbose=True) -np.testing.assert_equal(actual=len(pmb.df), - desired=starting_number_of_rows-20, + +# There should only one molecule instance now in the pyMBE database +np.testing.assert_equal(actual=len(pmb.get_instances_df(pmb_type="molecule")), + desired=1, verbose=True) -# This should also delete 8 particles (molecule 1 is a M2 molecule) -# 19 lines should also be removed from pmb.df -# 0 because M2 is not repeated entry -# 2 from the removed residues (repeated entries) -# 8 from the removed particles, (repeated entries) -# and 8 from the removed bonds (repeated entries) -starting_number_of_particles=len(espresso_system.part.all()) -starting_number_of_rows=len(pmb.df) -pmb.delete_molecule_in_system(molecule_id=1, - espresso_system=espresso_system) -np.testing.assert_equal(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles-8, +# There should be only 3 residues (from the remaining M2 molecule) +np.testing.assert_equal(actual=len(pmb.get_instances_df(pmb_type="residue")), + desired=3, verbose=True) -np.testing.assert_equal(actual=len(pmb.df), - desired=starting_number_of_rows-18, +# There should be only 8 particles (from the remaining M2 molecule) +np.testing.assert_equal(actual=len(pmb.get_instances_df(pmb_type="particle")), + desired=8, verbose=True) -check_empty_columns(name_to_check="M2") -non_empty_columns=['name', - 'pmb_type', - 'residue_list'] - -for column in non_empty_columns: - assert pd.notna(pmb.df.loc[pmb.df['name'] == "M2"][column]).all() - -non_empty_columns=['name', - 'l0', - 'parameters_of_the_potential', - 'bond_object'] - -for column in non_empty_columns: - assert pd.notna(pmb.df.loc[pmb.df['name'] == "default"][column]).all() - -input_parameters={"molecule_id":0, - "espresso_system":espresso_system} -np.testing.assert_raises(ValueError, - pmb.delete_molecule_in_system, - **input_parameters) print("*** Unit test passed ***") \ No newline at end of file diff --git a/testsuite/hydrogel_builder.py b/testsuite/hydrogel_builder.py index e533747..d911658 100644 --- a/testsuite/hydrogel_builder.py +++ b/testsuite/hydrogel_builder.py @@ -80,7 +80,6 @@ diamond_lattice = DiamondLattice(mpc, generic_bond_length) box_l = diamond_lattice.box_l espresso_system = espressomd.System(box_l = [box_l]*3) -pmb.add_bonds_to_espresso(espresso_system = espresso_system) lattice_builder = pmb.initialize_lattice_builder(diamond_lattice) pmb.create_particle(name=CounterIon, @@ -111,14 +110,16 @@ residue_list = [Res1]*(mpc//2) + [Res2]*(mpc//2) for node_s, node_e in connectivity_with_labels: chain_topology.append({'node_start':node_s, - 'node_end': node_e, - 'residue_list':residue_list}) + 'node_end': node_e, + 'molecule_name':molecule_name}) ####################################################### hydrogel_name="my_hydrogel" pmb.define_hydrogel(hydrogel_name,node_topology, chain_topology) # Creating hydrogel -hydrogel_info = pmb.create_hydrogel(hydrogel_name, espresso_system) +hydrogel_id= pmb.create_hydrogel(hydrogel_name, espresso_system) +hydrogel_inst = pmb.db.get_instance(pmb_type="hydrogel", + instance_id=hydrogel_id) ################################################ @@ -186,15 +187,18 @@ def test_format_node(self): assert pmb.format_node([4, 5, 6]) == "[4 5 6]" def test_hydrogel_info(self): - assert hydrogel_info["name"] == hydrogel_name + assert hydrogel_inst.name == hydrogel_name def test_node_positions(self): + # Search for nodes of the hydrogel + particle_ids_in_hydrogel = pmb.get_particle_id_map(object_name=hydrogel_name)["all"] + # TODO: this need to be fixed for _, node_id in hydrogel_info["nodes"].items(): node_pos = espresso_system.part.by_id(int(node_id[0])).pos node_name_in_espresso = pmb.df[(pmb.df["pmb_type"] == "particle") & (pmb.df["particle_id"] == node_id[0])]["name"].values[0] node_label = node_labels[pmb.format_node(list((node_pos*(4/lattice_builder.box_l)).astype(int)))] - node_data = node_topology[node_label] - node_name = node_data["particle_name"] + node_data = node_topology[node_label] + node_name = node_data["particle_name"] # Assert node's name and position are correctly set np.testing.assert_equal(node_name_in_espresso, node_name) np.testing.assert_allclose(np.copy(node_pos), np.array(node_data["lattice_index"]) * 0.25 * diamond_lattice.box_l, atol=1e-7) @@ -279,7 +283,7 @@ def test_exceptions(self): assert hydrogel_name in pmb.df["name"].values assert pmb.df.loc[pmb.df["name"] == hydrogel_name, "pmb_type"].values[0] == "hydrogel" - def test_hydrogel_definitions_in_df(self): + def test_hydrogel_definitions_in_db(self): # Verify node_map and chain_map are correctly added compare_node_maps(pmb.df.loc[pmb.df["name"] == hydrogel_name, "node_map"].values[0], node_topology) compare_chain_maps(pmb.df.loc[pmb.df["name"] == hydrogel_name, "chain_map"].values[0], chain_topology) diff --git a/testsuite/lj_tests.py b/testsuite/lj_tests.py index 201a6ed..79a238a 100644 --- a/testsuite/lj_tests.py +++ b/testsuite/lj_tests.py @@ -209,7 +209,7 @@ print("*** Unit test passed ***") -print("*** Unit test: test that get_lj_parameters() rasie the ValueError when the combination rule is not Loretz-Berthelot ***") +print("*** Unit test: test that get_lj_parameters() raises the ValueError when the combination rule is not Loretz-Berthelot ***") input_params = {"particle_name1":"A", "particle_name2":"B", From ae6c3c1452cb83a7de9f6eb5c8c354781834b1c4 Mon Sep 17 00:00:00 2001 From: Pablo Date: Sun, 11 Jan 2026 10:43:09 +0100 Subject: [PATCH 15/55] modernize molecule tests, start cleaning lj tests --- pyMBE/pyMBE.py | 14 - .../define_and_create_molecules_unit_tests.py | 910 ++++++++---------- testsuite/lj_tests.py | 62 +- 3 files changed, 448 insertions(+), 538 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 2b30e5b..17a9f7b 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -1496,20 +1496,6 @@ def enable_motion_of_rigid_object(self, name, espresso_system): pid.vs_auto_relate_to(rigid_object_center.id) return - def filter_df(self, pmb_type): - """ - Filters `pmb.df` and returns a sub-set of it containing only rows with pmb_object_type=`pmb_type` and non-NaN columns. - - Args: - pmb_type(`str`): pmb_object_type to filter in `pmb.df`. - - Returns: - pmb_type_df(`Pandas.Dataframe`): filtered `pmb.df`. - """ - pmb_type_df = self.df.loc[self.df['pmb_type']== pmb_type] - pmb_type_df = pmb_type_df.dropna( axis=1, thresh=1) - return pmb_type_df - def find_value_from_es_type(self, es_type, column_name): """ Finds a value in `pmb.df` for a `column_name` and `es_type` pair. diff --git a/testsuite/define_and_create_molecules_unit_tests.py b/testsuite/define_and_create_molecules_unit_tests.py index 7c38c94..bce775e 100644 --- a/testsuite/define_and_create_molecules_unit_tests.py +++ b/testsuite/define_and_create_molecules_unit_tests.py @@ -23,6 +23,8 @@ import espressomd import logging import io +import unittest as ut + # Create an in-memory log stream log_stream = io.StringIO() logging.basicConfig(level=logging.INFO, @@ -34,7 +36,7 @@ # The unit tests for define_particle are in lj_tests.py and set_particle_acidity - +# Define particles, residues, and molecules for testing particle_parameters={"S1":{"name":"S1", "sigma":1*pmb.units.nm, "offset":0.5*pmb.units.nm, @@ -53,8 +55,6 @@ for particle_set in particle_parameters.values(): pmb.define_particle(**particle_set) -print("*** Unit test: check that define_residue() stores the parameters correctly in the pyMBE database ***") - residue_parameters={"R1":{"name": "R1", "central_bead": "S1", "side_chains": []}, @@ -68,125 +68,17 @@ for parameter_set in residue_parameters.values(): pmb.define_residue(**parameter_set) -for residue_name in residue_parameters.keys(): - input_parameters=residue_parameters[residue_name] - res_tpl = pmb.db.get_template(pmb_type="residue", - name=residue_name) - np.testing.assert_equal(actual=res_tpl.pmb_type, - desired="residue", - verbose=True) - np.testing.assert_equal(actual=res_tpl.central_bead, - desired=input_parameters["central_bead"], - verbose=True) - np.testing.assert_equal(actual=res_tpl.side_chains, - desired=input_parameters["side_chains"], - verbose=True) - -print("*** Unit test passed ***") - -print("*** Unit test: check that define_molecule() stores the parameters correctly in the pyMBE database ***") - molecule_parameters={"M1":{"name": "M1", "residue_list": []}, "M2":{"name": "M2", "residue_list": ["R1","R2","R3"]}} - for parameter_set in molecule_parameters.values(): pmb.define_molecule(**parameter_set) -for molecule_name in molecule_parameters.keys(): - input_parameters=molecule_parameters[molecule_name] - mol_tpl = pmb.db.get_template(pmb_type="molecule", - name=molecule_name) - np.testing.assert_equal(actual=mol_tpl.pmb_type, - desired="molecule", - verbose=True) - np.testing.assert_equal(actual=mol_tpl.residue_list, - desired=input_parameters["residue_list"], - verbose=True) - -print("*** Unit test passed ***") - -print("*** Unit test: check that create_particle() creates particles into the espresso_system with the properties defined in the pyMBE database ***") # Create an instance of an espresso system espresso_system=espressomd.System(box_l = [10]*3) particle_positions=[[0,0,0],[1,1,1]] -retval = pmb.create_particle(name="S1", - espresso_system=espresso_system, - number_of_particles=2, - fix=True, - position=particle_positions) -np.testing.assert_array_equal(retval, [0, 1]) - -particle_ids=pmb.get_particle_id_map(object_name="S1")["all"] -type_map=pmb.get_type_map() -for pid in particle_ids: - particle=espresso_system.part.by_id(pid) - np.testing.assert_equal(actual=particle.type, - desired=type_map["S1"], - verbose=True) - np.testing.assert_equal(actual=particle.q, - desired=particle_parameters["S1"]["z"], - verbose=True) - np.testing.assert_equal(actual=particle.fix, - desired=[True]*3, - verbose=True) - np.testing.assert_equal(actual=particle.pos, - desired=particle_positions[pid], - verbose=True) - -print("*** Unit test passed ***") - -print("*** Unit test: check that create_particle() does not create any particle for number_of_particles <= 0 ***") -starting_number_of_particles=len(espresso_system.part.all()) - -for number_of_particles in [0, -1]: - retval = pmb.create_particle(name="S1", - espresso_system=espresso_system, - number_of_particles=number_of_particles) - np.testing.assert_equal(len(retval), 0) -# If no particles have been created, only two particles should be in the system (from the previous test) -np.testing.assert_equal(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles, - verbose=True) -print("*** Unit test passed ***") - -print("*** Unit test: check that create_particle() does not create any particle if one provides an undefined name ***") -pmb.create_particle(name="S23", - espresso_system=espresso_system, - number_of_particles=1) - -# If no particles have been created, only two particles should be in the system (from the previous test) -np.testing.assert_equal(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles, - verbose=True) -print("*** Unit test passed ***") - -# Unit tests for delete particle -print("*** Unit test: check that delete_particle deletes the particle and cleans the pyMBE database ***") -starting_number_of_particles=len(espresso_system.part.all()) -starting_number_of_rows=len(pmb.get_instances_df(pmb_type="particle")) -# This should delete one particle instance -pmb.delete_instances_in_system(instance_id=0, - pmb_type="particle", - espresso_system=espresso_system) -np.testing.assert_equal(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles-1, - verbose=True) -particle_df = pmb.get_instances_df(pmb_type="particle") -np.testing.assert_equal(actual=len(particle_df), - desired=starting_number_of_rows-1, - verbose=True) - -print(pmb.get_instances_df(pmb_type="particle")) - -# Delete the other particle instance to simplify the rest of the tests -pmb.delete_instances_in_system(instance_id=1, - pmb_type="particle", - espresso_system=espresso_system) - -print("*** Unit test: check that create_residue() creates a simple residue into the espresso_system with the properties defined in pmb.df ***") bond_type = 'harmonic' bond = {'r_0' : 0.4*pmb.units.nm, @@ -194,391 +86,419 @@ pmb.define_default_bond(bond_type = bond_type, bond_parameters = bond) +type_map=pmb.get_type_map() -central_bead_position=[[0,0,0]] -backbone_vector=np.array([1.,2.,3.]) - -pmb.create_residue(name="R2", - espresso_system=espresso_system, - central_bead_position=central_bead_position, - backbone_vector=backbone_vector, - use_default_bond=True) - -particle_ids=pmb.get_particle_id_map(object_name="R2")["all"] - -# Check that the particle properties are correct -for pid in particle_ids: - particle=espresso_system.part.by_id(pid) - particle_tpl = pmb.db.get_instance(pmb_type="particle", - instance_id=pid) - particle_name = particle_tpl.name - np.testing.assert_equal(actual=particle.type, - desired=type_map[particle_name], - verbose=True) - np.testing.assert_equal(actual=particle.q, - desired=particle_parameters[particle_name]["z"], - verbose=True) - # Check that the position are correct - # Central bead - if particle_name == "S1": - np.testing.assert_equal(actual=particle.pos, - desired=central_bead_position[0], - verbose=True) - else: # Side chains should be in positions perpendicular to the backbone vector - np.testing.assert_almost_equal(actual=np.dot(particle.pos,backbone_vector), - desired=0, - verbose=True) - # Check that particles have the correct residue id - residue_id = particle_tpl.residue_id - np.testing.assert_equal(actual=residue_id, - desired=0, - verbose=True) - -# Check that particles are correctly bonded -# Central bead S1 (id 0) should be bonded to S2 (id 1) and S3 (id 2) -bonded_pairs=[] -bond_df = pmb.get_instances_df(pmb_type="bond") -for bond_index in bond_df.index: - particle_id1= bond_df.loc[bond_index,"particle_id1"] - particle_id2= bond_df.loc[bond_index,"particle_id2"] - bonded_pair=frozenset([particle_id1,particle_id2]) - bonded_pairs.append(bonded_pair) - bonded_in_espresso = False - for pid in bonded_pair: - for bond in espresso_system.part.by_id(pid).bonds[:]: - bond_object = bond[0] - partner_id = bond[1] - if partner_id in bonded_pair: - bonded_in_espresso=True - np.testing.assert_equal(actual=bonded_in_espresso, - desired=True, - verbose=True) - -np.testing.assert_equal(actual=frozenset(bonded_pairs), - desired=frozenset([frozenset([0,1]),frozenset([0,2])]), - verbose=True) - -print("*** Unit test passed ***") - -print("*** Unit test: check that create_residue() creates a nested residue into the espresso_system with the properties defined in pmb.df ***") - -pmb.create_residue(name="R3", - espresso_system=espresso_system, - use_default_bond=True) - -particle_ids=pmb.get_particle_id_map(object_name="R3")["all"] - -# Check that the particle properties are correct -for pid in particle_ids: - particle=espresso_system.part.by_id(pid) - particle_tpl = pmb.db.get_instance(pmb_type="particle", - instance_id=pid) - particle_name = particle_tpl.name - np.testing.assert_equal(actual=particle.type, - desired=type_map[particle_name], - verbose=True) - np.testing.assert_equal(actual=particle.q, - desired=particle_parameters[particle_name]["z"], - verbose=True) - # Check that particles have the correct residue id - residue_id = particle_tpl.residue_id - np.testing.assert_equal(actual=residue_id, - desired=1, - verbose=True) - -# Check that particles are correctly bonded, new bonds are: -# Central bead S2 (id 3) should be bonded to R2 central bead S1 (id 4) -# Central bead S1 (id 4) should be bonded to side chains S2 (id 5) and S3 (id 6) - -bonded_pairs=[] -bond_df = pmb.get_instances_df(pmb_type="bond") - -for bond_index in bond_df.index: - particle_id1= bond_df.loc[bond_index,"particle_id1"] - particle_id2= bond_df.loc[bond_index,"particle_id2"] - bonded_pair=frozenset([particle_id1,particle_id2]) - bonded_pairs.append(bonded_pair) - bonded_in_espresso = False - for pid in bonded_pair: - for bond in espresso_system.part.by_id(pid).bonds[:]: - bond_object = bond[0] - partner_id = bond[1] - if partner_id in bonded_pair: - bonded_in_espresso=True - - np.testing.assert_equal(actual=bonded_in_espresso, - desired=True, - verbose=True) - -np.testing.assert_equal(actual=frozenset(bonded_pairs), +class Test(ut.TestCase): + def test_residue_definition(self): + """ + Unit test: check that define_residue() stores the parameters correctly in the pyMBE database + """ + for residue_name in residue_parameters.keys(): + input_parameters=residue_parameters[residue_name] + res_tpl = pmb.db.get_template(pmb_type="residue", + name=residue_name) + self.assertEqual(res_tpl.pmb_type, + "residue") + self.assertEqual(res_tpl.central_bead, + input_parameters["central_bead"]) + self.assertEqual(res_tpl.side_chains, + input_parameters["side_chains"]) + + def test_molecule_definition(self): + """ + Unit test: check that define_molecule() stores the parameters correctly in the pyMBE database + """ + for molecule_name in molecule_parameters.keys(): + input_parameters=molecule_parameters[molecule_name] + mol_tpl = pmb.db.get_template(pmb_type="molecule", + name=molecule_name) + self.assertEqual(mol_tpl.pmb_type, + "molecule") + self.assertEqual(mol_tpl.residue_list, + input_parameters["residue_list"]) + + def test_create_and_delete_particles(self): + """ + Docstring for test_create_and_delete_particles_residues_molecules + + """ + retval = pmb.create_particle(name="S1", + espresso_system=espresso_system, + number_of_particles=2, + fix=True, + position=particle_positions) + self.assertListEqual(retval, [0, 1]) + + particle_ids=pmb.get_particle_id_map(object_name="S1")["all"] + + for pid in particle_ids: + particle=espresso_system.part.by_id(pid) + self.assertEqual(actual=particle.type, + desired=type_map["S1"]) + self.assertEqual(actual=particle.q, + desired=particle_parameters["S1"]["z"]) + self.assertEqual(actual=particle.fix, + desired=[True]*3) + self.assertEqual(actual=particle.pos, + desired=particle_positions[pid]) + starting_number_of_particles=len(espresso_system.part.all()) + + for number_of_particles in [0, -1]: + retval = pmb.create_particle(name="S1", + espresso_system=espresso_system, + number_of_particles=number_of_particles) + self.assertEqual(len(retval), 0) + # If no particles have been created, only two particles should be in the system (from the previous test) + self.assertEqual(actual=len(espresso_system.part.all()), + desired=starting_number_of_particles) + pmb.create_particle(name="S23", + espresso_system=espresso_system, + number_of_particles=1) + + # If no particles have been created, only two particles should be in the system (from the previous test) + self.assertEqual(actual=len(espresso_system.part.all()), + desired=starting_number_of_particles) + + # Unit tests for delete particle + starting_number_of_particles=len(espresso_system.part.all()) + starting_number_of_rows=len(pmb.get_instances_df(pmb_type="particle")) + # This should delete one particle instance + pmb.delete_instances_in_system(instance_id=0, + pmb_type="particle", + espresso_system=espresso_system) + self.assertEqual(actual=len(espresso_system.part.all()), + desired=starting_number_of_particles-1) + particle_df = pmb.get_instances_df(pmb_type="particle") + self.assertEqual(actual=len(particle_df), + desired=starting_number_of_rows-1) + + # Delete the other particle instance to simplify the rest of the tests + pmb.delete_instances_in_system(instance_id=1, + pmb_type="particle", + espresso_system=espresso_system) + + def test_create_and_delete_residues(self): + """ + Tests for creating and deleting residues + """ + central_bead_position=[[0,0,0]] + backbone_vector=np.array([1.,2.,3.]) + + pmb.create_residue(name="R2", + espresso_system=espresso_system, + central_bead_position=central_bead_position, + backbone_vector=backbone_vector, + use_default_bond=True) + + particle_ids=pmb.get_particle_id_map(object_name="R2")["all"] + + # Check that the particle properties are correct + for pid in particle_ids: + particle=espresso_system.part.by_id(pid) + particle_tpl = pmb.db.get_instance(pmb_type="particle", + instance_id=pid) + particle_name = particle_tpl.name + self.assertEqual(actual=particle.type, + desired=type_map[particle_name]) + self.assertEqual(actual=particle.q, + desired=particle_parameters[particle_name]["z"]) + # Check that the position are correct + # Central bead + if particle_name == "S1": + self.assertListEqual(actual=particle.pos, + desired=central_bead_position[0]) + else: # Side chains should be in positions perpendicular to the backbone vector + self.assertAlmostEqual(actual=np.dot(particle.pos,backbone_vector), + desired=0, + places=10) + # Check that particles have the correct residue id + residue_id = particle_tpl.residue_id + self.assertEqual(actual=residue_id, + desired=0) + + # Check that particles are correctly bonded + # Central bead S1 (id 0) should be bonded to S2 (id 1) and S3 (id 2) + bonded_pairs=[] + bond_df = pmb.get_instances_df(pmb_type="bond") + for bond_index in bond_df.index: + particle_id1= bond_df.loc[bond_index,"particle_id1"] + particle_id2= bond_df.loc[bond_index,"particle_id2"] + bonded_pair=frozenset([particle_id1,particle_id2]) + bonded_pairs.append(bonded_pair) + bonded_in_espresso = False + for pid in bonded_pair: + for bond in espresso_system.part.by_id(pid).bonds[:]: + bond_object = bond[0] + partner_id = bond[1] + if partner_id in bonded_pair: + bonded_in_espresso=True + self.assertEqual(actual=bonded_in_espresso, + desired=True) + + self.assertEqual(actual=frozenset(bonded_pairs), + desired=frozenset([frozenset([0,1]),frozenset([0,2])])) + + pmb.create_residue(name="R3", + espresso_system=espresso_system, + use_default_bond=True) + + particle_ids=pmb.get_particle_id_map(object_name="R3")["all"] + + # Check that the particle properties are correct + for pid in particle_ids: + particle=espresso_system.part.by_id(pid) + particle_tpl = pmb.db.get_instance(pmb_type="particle", + instance_id=pid) + particle_name = particle_tpl.name + np.testing.assert_equal(actual=particle.type, + desired=type_map[particle_name], + verbose=True) + np.testing.assert_equal(actual=particle.q, + desired=particle_parameters[particle_name]["z"], + verbose=True) + # Check that particles have the correct residue id + residue_id = particle_tpl.residue_id + np.testing.assert_equal(actual=residue_id, + desired=1, + verbose=True) + + # Check that particles are correctly bonded, new bonds are: + # Central bead S2 (id 3) should be bonded to R2 central bead S1 (id 4) + # Central bead S1 (id 4) should be bonded to side chains S2 (id 5) and S3 (id 6) + + bonded_pairs=[] + bond_df = pmb.get_instances_df(pmb_type="bond") + + for bond_index in bond_df.index: + particle_id1= bond_df.loc[bond_index,"particle_id1"] + particle_id2= bond_df.loc[bond_index,"particle_id2"] + bonded_pair=frozenset([particle_id1,particle_id2]) + bonded_pairs.append(bonded_pair) + bonded_in_espresso = False + for pid in bonded_pair: + for bond in espresso_system.part.by_id(pid).bonds[:]: + partner_id = bond[1] + if partner_id in bonded_pair: + bonded_in_espresso=True + + self.assertEqual(actual=bonded_in_espresso, + desired=True) + self.assertEqual(actual=frozenset(bonded_pairs), desired=frozenset([frozenset([0,1]), - frozenset([0,2]), + frozenset([0,2]), frozenset([3,4]), frozenset([4,5]), - frozenset([4,6])]), - verbose=True) - -print("*** Unit test passed ***") - -print("*** Unit test: check that create_residue() does not create any residue if name is not defined in the pyMBE database ***") -starting_number_of_particles=len(espresso_system.part.all()) -pmb.create_residue(name="R51", - espresso_system=espresso_system, - use_default_bond=True) -# If no particles have been created, the number of particles should be the same as before -np.testing.assert_equal(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles, - verbose=True) - -# Tests for delete_residue -print("*** Unit test: check that delete_residue deletes the particle and cleans the pyMBE database ***") - -# This should delete 3 particles (residue 0 is a R2 residue) -starting_number_of_particles=len(espresso_system.part.all()) -pmb.delete_instances_in_system(instance_id=0, - pmb_type="residue", - espresso_system=espresso_system) -np.testing.assert_equal(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles-3, - verbose=True) -# There should be only one residue instance now in the pyMBE database -np.testing.assert_equal(actual=len(pmb.get_instances_df(pmb_type="residue")), - desired=1, - verbose=True) -# And there should be only 4 particles (central bead + 2 side chains + central bead of R3) -np.testing.assert_equal(actual=len(pmb.get_instances_df(pmb_type="particle")), - desired=4, - verbose=True) -# Delete the other residue instance to simplify the rest of the tests -pmb.delete_instances_in_system(instance_id=1, - pmb_type="residue", - espresso_system=espresso_system) -print("*** Unit test passed ***") -# Additional unit tests for define_molecule are in create_molecule_position_test -print("*** Unit test: check that create_molecule() creates a simple molecule into the espresso_system with the properties defined in pmb.df ***") - -backbone_vector = np.array([1,3,-4]) -magnitude = np.linalg.norm(backbone_vector) -backbone_vector = backbone_vector/magnitude -molecule_info_M2 = pmb.create_molecule(name="M2", - number_of_molecules=1, - espresso_system=espresso_system, - backbone_vector = backbone_vector, - use_default_bond=True) - -particle_ids=pmb.get_particle_id_map(object_name="M2")["all"] - -# Residue and molecule IDs expected -# For the M2 molecule created, the residue and molecule IDs should be as follows: -# R1 (residue_id=0, molecule_id=0), R2 (residue_id=1, molecule_id=0), R3 (residue_id=2, molecule_id=0) - -residue_ids={0: 0, 1: 1, 2: 1, 3: 1, 4: 2, 5: 2, 6: 2, 7: 2} -molecule_ids={0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0} - -# Check that the particle properties are correct -for pid in particle_ids: - particle=espresso_system.part.by_id(pid) - particle_tpl = pmb.db.get_instance(pmb_type="particle", - instance_id=pid) - particle_name = particle_tpl.name - np.testing.assert_equal(actual=particle.type, - desired=type_map[particle_name], - verbose=True) - np.testing.assert_equal(actual=particle.q, - desired=particle_parameters[particle_name]["z"], - verbose=True) - # Check that particles have the correct residue id - residue_id = particle_tpl.residue_id - np.testing.assert_equal(actual=residue_id, - desired=residue_ids[pid], - verbose=True) - # Check that particles have the correct molecule id - molecule_id = particle_tpl.molecule_id - np.testing.assert_equal(actual=molecule_id, - desired=molecule_ids[pid], - verbose=True) - -# Check that the molecule has the right residues -residue_list=[] -residue_df = pmb.get_instances_df(pmb_type="residue") -for res_index in residue_df[residue_df['molecule_id']==0].index: - resname = residue_df.loc[res_index,"name"] - residue_list.append(resname) -np.testing.assert_equal(actual=frozenset(residue_list), - desired=frozenset(molecule_parameters["M2"]["residue_list"]), - verbose=True) - -# Expected bonded pairs for the molecule -# Molecule 0: -# S1(0)-S1(1) (R1-R2) -# S1(1)-S2(2) (R2) -# S1(1)-S3(3) (R2) -# S2(1)-S2(4) (R2-R3) -# S2(4)-S1(5) (R3) -# S1(5)-S2(6) (R3) -# S1(5)-S3(7) (R3) - -bonded_pairs_ref=[frozenset([0,1]), - frozenset([1,2]), - frozenset([1,3]), - frozenset([1,4]), - frozenset([4,5]), - frozenset([5,6]), - frozenset([5,7])] - -# Check that particles are correctly bonded -bonded_pairs=[] -bond_df = pmb.get_instances_df(pmb_type="bond") -for bond_index in bond_df.index: - particle_id1= bond_df.loc[bond_index,"particle_id1"] - particle_id2= bond_df.loc[bond_index,"particle_id2"] - bonded_pair=frozenset([particle_id1,particle_id2]) - bonded_pairs.append(bonded_pair) - bonded_in_espresso = False - for pid in bonded_pair: - for bond in espresso_system.part.by_id(pid).bonds[:]: - bond_object = bond[0] - partner_id = bond[1] - if partner_id in bonded_pair: - bonded_in_espresso=True - np.testing.assert_equal(actual=bonded_in_espresso, - desired=True, - verbose=True) -np.testing.assert_equal(actual = frozenset(bonded_pairs), - desired = frozenset(bonded_pairs_ref), - verbose = True) - -print("*** Unit test passed ***") -print("*** Unit test: check the backbone vector of the molecule in espresso and the given input backbone vector are same ***") - -central_bead_positions = [] - -residue_map=pmb.get_particle_id_map(object_name="M2")["residue_map"] -for res_id in residue_map.keys(): - central_bead_id = min(residue_map[res_id]) - central_bead_pos = espresso_system.part.by_id(central_bead_id).pos - central_bead_positions.append(central_bead_pos) - -# Here one expects 3 central bead positions for residues R1, R2, and R3 -np.testing.assert_equal(len(central_bead_positions),len(molecule_parameters["M2"]["residue_list"])) - -backbone_direction_1 = central_bead_positions[1] - central_bead_positions[0] -backbone_direction_2 = central_bead_positions[2] - central_bead_positions[1] -backbone_direction_1 /= np.linalg.norm(backbone_direction_1) -backbone_direction_2 /= np.linalg.norm(backbone_direction_2) -np.testing.assert_almost_equal( - actual = backbone_direction_1, - desired = backbone_vector, - verbose = True) -np.testing.assert_almost_equal( - actual = backbone_direction_2, - desired = backbone_vector, - verbose = True) - - -print("*** Unit test passed ***") - -print("*** Unit test: check that create_molecule() does not create any molecule for number_of_molecules <= 0 ***") - -starting_number_of_particles=len(espresso_system.part.all()) -pmb.create_molecule(name="M2", - number_of_molecules=0, - espresso_system=espresso_system, - use_default_bond=True) -pmb.create_molecule(name="M2", - number_of_molecules=-1, - espresso_system=espresso_system, - use_default_bond=True) -# If no particles have been created, only two particles should be in the system (from the previous test) -np.testing.assert_equal(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles, - verbose=True) -print("*** Unit test passed ***") - -print("*** Unit test: check that create_molecule() does not create any molecule if one provides an undefined name ***") - -starting_number_of_particles=len(espresso_system.part.all()) -pmb.create_molecule(name="M23", - number_of_molecules=1, - espresso_system=espresso_system, - use_default_bond=True) -np.testing.assert_equal(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles, - verbose=True) -print("*** Unit test passed ***") - -# Unit tests for get_radius_map -print("*** Unit test: check that get_radius_map() provides the right amount of radii corresponding to the number of different particles in the simulation box ***") - -np.testing.assert_equal(actual=len(pmb.get_radius_map()), - desired=len(particle_parameters.values()), - verbose=True) - -print("*** Unit test passed ***") - -print("*** Unit test: check that get_radius_map() provides the right values of the radii of the particles, which corresponds to (sigma+offset)/2 ***") - -desired_radii=[] -for particle in particle_parameters.values(): - desired_radii.append((particle['sigma'].magnitude+particle['offset'].magnitude)/2) - -actual_radii=[pmb.get_radius_map()[0], - pmb.get_radius_map()[1], - pmb.get_radius_map()[2],] - -np.testing.assert_equal(actual=actual_radii, - desired=desired_radii, - verbose=True) - -print("*** Unit test passed ***") - -print("*** Unit test: check that the default value for the argument 'dimensionless' in get_radius_map() is True ***") - -np.testing.assert_equal(actual=isinstance(pmb.get_radius_map()[0],float), - desired=True, - verbose=True) - -print("*** Unit test passed ***") - -print("*** Unit test: check that if the argument 'dimensionless' is False in get_radius_map() then we obtain the corresponding units ***") - - -np.testing.assert_equal(actual=pmb.get_radius_map(dimensionless=False)[0].dimensionality, - desired=pmb.units.nm.dimensionality, - verbose=True) - -print("*** Unit test passed ***") - -# Tests for delete_residue -print("*** Unit test: check that delete_molecule deletes the particle and cleans pmb.df ***") -# create another molecule just to have two molecules in the system -pmb.create_molecule(name="M2", - number_of_molecules=1, - espresso_system=espresso_system, - backbone_vector = backbone_vector, - use_default_bond=True) - -# This should delete 8 particles (molecule 0 is a M2 molecule) - -starting_number_of_particles=len(espresso_system.part.all()) -pmb.delete_instances_in_system(instance_id=0, - pmb_type="molecule", - espresso_system=espresso_system) -np.testing.assert_equal(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles-8, - verbose=True) - -# There should only one molecule instance now in the pyMBE database -np.testing.assert_equal(actual=len(pmb.get_instances_df(pmb_type="molecule")), - desired=1, - verbose=True) -# There should be only 3 residues (from the remaining M2 molecule) -np.testing.assert_equal(actual=len(pmb.get_instances_df(pmb_type="residue")), - desired=3, - verbose=True) -# There should be only 8 particles (from the remaining M2 molecule) -np.testing.assert_equal(actual=len(pmb.get_instances_df(pmb_type="particle")), - desired=8, - verbose=True) - -print("*** Unit test passed ***") \ No newline at end of file + frozenset([4,6])])) + starting_number_of_particles=len(espresso_system.part.all()) + pmb.create_residue(name="R51", + espresso_system=espresso_system, + use_default_bond=True) + # If no particles have been created, the number of particles should be the same as before + self.assertEqual(actual=len(espresso_system.part.all()), + desired=starting_number_of_particles) + + # Tests for delete_residue + # This should delete 3 particles (residue 0 is a R2 residue) + starting_number_of_particles=len(espresso_system.part.all()) + pmb.delete_instances_in_system(instance_id=0, + pmb_type="residue", + espresso_system=espresso_system) + self.assertEqual(actual=len(espresso_system.part.all()), + desired=starting_number_of_particles-3) + # There should be only one residue instance now in the pyMBE database + self.assertEqual(actual=len(pmb.get_instances_df(pmb_type="residue")), + desired=1) + # And there should be only 4 particles (central bead + 2 side chains + central bead of R3) + self.assertEqual(actual=len(pmb.get_instances_df(pmb_type="particle")), + desired=4) + # Delete the other residue instance to simplify the rest of the tests + pmb.delete_instances_in_system(instance_id=1, + pmb_type="residue", + espresso_system=espresso_system) + def test_create_and_delete_molecules(self): + """ + Tests for creating and deleting molecules + """ + + backbone_vector = np.array([1,3,-4]) + magnitude = np.linalg.norm(backbone_vector) + backbone_vector = backbone_vector/magnitude + pmb.create_molecule(name="M2", + number_of_molecules=1, + espresso_system=espresso_system, + backbone_vector = backbone_vector, + use_default_bond=True) + + particle_ids=pmb.get_particle_id_map(object_name="M2")["all"] + + # Residue and molecule IDs expected + # For the M2 molecule created, the residue and molecule IDs should be as follows: + # R1 (residue_id=0, molecule_id=0), R2 (residue_id=1, molecule_id=0), R3 (residue_id=2, molecule_id=0) + + residue_ids={0: 0, 1: 1, 2: 1, 3: 1, 4: 2, 5: 2, 6: 2, 7: 2} + molecule_ids={0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0} + + # Check that the particle properties are correct + for pid in particle_ids: + particle=espresso_system.part.by_id(pid) + particle_tpl = pmb.db.get_instance(pmb_type="particle", + instance_id=pid) + particle_name = particle_tpl.name + self.assertEqual(actual=particle.type, + desired=type_map[particle_name]) + self.assertEqual(actual=particle.q, + desired=particle_parameters[particle_name]["z"]) + # Check that particles have the correct residue id + residue_id = particle_tpl.residue_id + self.assertEqual(actual=residue_id, + desired=residue_ids[pid]) + # Check that particles have the correct molecule id + molecule_id = particle_tpl.molecule_id + self.assertEqual(actual=molecule_id, + desired=molecule_ids[pid]) + + # Check that the molecule has the right residues + residue_list=[] + residue_df = pmb.get_instances_df(pmb_type="residue") + for res_index in residue_df[residue_df['molecule_id']==0].index: + resname = residue_df.loc[res_index,"name"] + residue_list.append(resname) + self.assertEqual(actual=frozenset(residue_list), + desired=frozenset(molecule_parameters["M2"]["residue_list"])) + + # Expected bonded pairs for the molecule + # Molecule 0: + # S1(0)-S1(1) (R1-R2) + # S1(1)-S2(2) (R2) + # S1(1)-S3(3) (R2) + # S2(1)-S2(4) (R2-R3) + # S2(4)-S1(5) (R3) + # S1(5)-S2(6) (R3) + # S1(5)-S3(7) (R3) + + bonded_pairs_ref=[frozenset([0,1]), + frozenset([1,2]), + frozenset([1,3]), + frozenset([1,4]), + frozenset([4,5]), + frozenset([5,6]), + frozenset([5,7])] + + # Check that particles are correctly bonded + bonded_pairs=[] + bond_df = pmb.get_instances_df(pmb_type="bond") + for bond_index in bond_df.index: + particle_id1= bond_df.loc[bond_index,"particle_id1"] + particle_id2= bond_df.loc[bond_index,"particle_id2"] + bonded_pair=frozenset([particle_id1,particle_id2]) + bonded_pairs.append(bonded_pair) + bonded_in_espresso = False + for pid in bonded_pair: + for bond in espresso_system.part.by_id(pid).bonds[:]: + bond_object = bond[0] + partner_id = bond[1] + if partner_id in bonded_pair: + bonded_in_espresso=True + self.assertEqual(actual=bonded_in_espresso, + desired=True) + self.assertEqual(actual = frozenset(bonded_pairs), + desired = frozenset(bonded_pairs_ref)) + + central_bead_positions = [] + + residue_map=pmb.get_particle_id_map(object_name="M2")["residue_map"] + for res_id in residue_map.keys(): + central_bead_id = min(residue_map[res_id]) + central_bead_pos = espresso_system.part.by_id(central_bead_id).pos + central_bead_positions.append(central_bead_pos) + + # Here one expects 3 central bead positions for residues R1, R2, and R3 + self.assertEqual(len(central_bead_positions),len(molecule_parameters["M2"]["residue_list"])) + + backbone_direction_1 = central_bead_positions[1] - central_bead_positions[0] + backbone_direction_2 = central_bead_positions[2] - central_bead_positions[1] + backbone_direction_1 /= np.linalg.norm(backbone_direction_1) + backbone_direction_2 /= np.linalg.norm(backbone_direction_2) + np.testing.assert_almost_equal( + actual = backbone_direction_1, + desired = backbone_vector, + verbose = True) + np.testing.assert_almost_equal( + actual = backbone_direction_2, + desired = backbone_vector, + verbose = True) + + starting_number_of_particles=len(espresso_system.part.all()) + pmb.create_molecule(name="M2", + number_of_molecules=0, + espresso_system=espresso_system, + use_default_bond=True) + pmb.create_molecule(name="M2", + number_of_molecules=-1, + espresso_system=espresso_system, + use_default_bond=True) + # If no particles have been created, only two particles should be in the system (from the previous test) + self.assertEqual(actual=len(espresso_system.part.all()), + desired=starting_number_of_particles) + + starting_number_of_particles=len(espresso_system.part.all()) + pmb.create_molecule(name="M23", + number_of_molecules=1, + espresso_system=espresso_system, + use_default_bond=True) + self.assertEqual(actual=len(espresso_system.part.all()), + desired=starting_number_of_particles) + + # Tests for delete_molecule + + # create another molecule just to have two molecules in the system + pmb.create_molecule(name="M2", + number_of_molecules=1, + espresso_system=espresso_system, + backbone_vector = backbone_vector, + use_default_bond=True) + + # This should delete 8 particles (molecule 0 is a M2 molecule) + + starting_number_of_particles=len(espresso_system.part.all()) + pmb.delete_instances_in_system(instance_id=0, + pmb_type="molecule", + espresso_system=espresso_system) + self.assertEqual(actual=len(espresso_system.part.all()), + desired=starting_number_of_particles-8) + + # There should only one molecule instance now in the pyMBE database + self.assertEqual(actual=len(pmb.get_instances_df(pmb_type="molecule")), + desired=1) + # There should be only 3 residues (from the remaining M2 molecule) + self.assertEqual(actual=len(pmb.get_instances_df(pmb_type="residue")), + desired=3) + # There should be only 8 particles (from the remaining M2 molecule) + self.assertEqual(actual=len(pmb.get_instances_df(pmb_type="particle")), + desired=8) + + def test_get_radius_map(self): + """ + Tests for get_radius_map + """ + + self.assertEqual(actual=len(pmb.get_radius_map()), + desired=len(particle_parameters.values())) + + + desired_radii=[] + for particle in particle_parameters.values(): + desired_radii.append((particle['sigma'].magnitude+particle['offset'].magnitude)/2) + + actual_radii=[pmb.get_radius_map()[0], + pmb.get_radius_map()[1], + pmb.get_radius_map()[2],] + + self.assertEqual(actual=actual_radii, + desired=desired_radii) + + self.assertEqual(actual=isinstance(pmb.get_radius_map()[0],float), + desired=True) + + self.assertEqual(actual=pmb.get_radius_map(dimensionless=False)[0].dimensionality, + desired=pmb.units.nm.dimensionality) \ No newline at end of file diff --git a/testsuite/lj_tests.py b/testsuite/lj_tests.py index 79a238a..64e589c 100644 --- a/testsuite/lj_tests.py +++ b/testsuite/lj_tests.py @@ -18,7 +18,6 @@ # Import pyMBE and other libraries import pyMBE -import pyMBE.storage.df_management as df_management import numpy as np import logging import io @@ -41,61 +40,73 @@ "offset":3*pmb.units.nm} pmb.define_particle(**input_parameters) +part_tpl = pmb.db.get_template(name="A", pmb_type="particle") for parameter_key in input_parameters.keys(): - np.testing.assert_equal(actual=pmb.df[parameter_key].values[0], + atr = getattr(part_tpl, parameter_key) + if isinstance(atr, str): + np.testing.assert_equal(actual=atr, desired=input_parameters[parameter_key], verbose=True) + else: + np.testing.assert_equal(actual=atr.to_quantity(pmb.units), + desired=input_parameters[parameter_key], + verbose=True) +# Clean template from the database +pmb.db.delete_template(name="A", pmb_type="particle") + print("*** Unit test passed ***") print("*** Unit test: check that `offset` defaults to 0***") -# Clean pmb.df -pmb.df = df_management._DFManagement._setup_df() -# Define dummy particle -pmb.define_particle(name="A") +print("*** Unit test: check that `cutoff` defaults to `2**(1./6.) reduced_length` ***") + +input_parameters={"name":"A", + "sigma":1*pmb.units.nm, + "epsilon":pmb.units.Quantity(1,"reduced_energy")} -np.testing.assert_equal(actual=pmb.df["offset"].values[0], +pmb.define_particle(**input_parameters) +part_tpl = pmb.db.get_template(name="A", pmb_type="particle") +np.testing.assert_equal(actual=part_tpl.offset.to_quantity(pmb.units), desired=pmb.units.Quantity(0,"reduced_length"), verbose=True) -print("*** Unit test passed ***") - -print("*** Unit test: check that `cutoff` defaults to `2**(1./6.) reduced_length` ***") -# Clean pmb.df -pmb.df = df_management._DFManagement._setup_df() -# Define dummy particle -pmb.define_particle(name="A") - -np.testing.assert_equal(actual=pmb.df["cutoff"].values[0], +np.testing.assert_equal(actual=part_tpl.cutoff.to_quantity(pmb.units), desired=pmb.units.Quantity(2**(1./6.),"reduced_length"), verbose=True) +# Clean template from the database +pmb.db.delete_template(name="A", pmb_type="particle") print("*** Unit test passed ***") print("*** Unit test: check that define_particle raises a ValueError if sigma is provided with the wrong dimensionality ***") input_parameters={"name":"B", - "sigma":1*pmb.units.ns } + "sigma":1*pmb.units.ns, + "epsilon":pmb.units.Quantity(1,"reduced_energy") } np.testing.assert_raises(ValueError, pmb.define_particle, **input_parameters) print("*** Unit test passed ***") print("*** Unit test: check that define_particle raises a ValueError if offset is provided with the wrong dimensionality ***") input_parameters={"name":"B", - "offset":1*pmb.units.ns } + "offset":1*pmb.units.ns, + "sigma":1*pmb.units.nm, + "epsilon":pmb.units.Quantity(1,"reduced_energy") } np.testing.assert_raises(ValueError, pmb.define_particle, **input_parameters) print("*** Unit test passed ***") print("*** Unit test: check that define_particle raises a ValueError if cutoff is provided with the wrong dimensionality ***") input_parameters={"name":"B", - "cutoff":1*pmb.units.ns } + "cutoff":1*pmb.units.ns, + "sigma":1*pmb.units.nm, + "epsilon":pmb.units.Quantity(1,"reduced_energy") } np.testing.assert_raises(ValueError, pmb.define_particle, **input_parameters) print("*** Unit test passed ***") print("*** Unit test: check that define_particle raises a ValueError if epsilon is provided with the wrong dimensionality ***") input_parameters={"name":"B", - "epsilon":1*pmb.units.ns } + "epsilon":1*pmb.units.ns, + "sigma":1*pmb.units.nm, } np.testing.assert_raises(ValueError, pmb.define_particle, **input_parameters) print("*** Unit test passed ***") print("*** Unit test: test that setup_lj_interactions sets up inert particles correctly ***") -# Clean pmb.df -pmb.df = df_management._DFManagement._setup_df() + # Define particles A_input_parameters={"name":"A", "sigma":1*pmb.units.nm, @@ -115,23 +126,16 @@ "epsilon":pmb.units.Quantity(2,"reduced_energy"), "cutoff":2*2**(1./6.)*pmb.units.nm, "offset":2*pmb.units.nm} -X_input_parameters={"name":"X"} pmb.define_particle(**A_input_parameters) pmb.define_particle(**B_input_parameters) pmb.define_particle(**C_input_parameters) -pmb.define_particle(**X_input_parameters) # Create a dummy instance of an espresso system import espressomd espresso_system=espressomd.System(box_l = [50]*3) pmb.setup_lj_interactions(espresso_system=espresso_system) -log_contents = log_stream.getvalue() -assert "The following particles do not have a defined value of sigma or epsilon" in log_contents - -df_management._DFManagement._delete_entries_in_df(df=pmb.df, - entry_name="X") # ValueError if combining-rule other than Lorentz_-Berthelot is used input_params = {"espresso_system":espresso_system, "combining_rule": "Geometric"} From e733218cf04f57c76105c921cd525801ad0e5c0c Mon Sep 17 00:00:00 2001 From: pmblanco Date: Tue, 13 Jan 2026 15:05:12 +0100 Subject: [PATCH 16/55] implement lj templates, fix tests --- pyMBE/pyMBE.py | 122 +++---- pyMBE/storage/manager.py | 83 +++++ pyMBE/storage/templates/lj.py | 4 +- test.py | 6 +- .../define_and_create_molecules_unit_tests.py | 206 +++++------ testsuite/lj_tests.py | 336 ++++++++---------- 6 files changed, 384 insertions(+), 373 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 17a9f7b..952fd30 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -26,25 +26,28 @@ import logging import importlib.resources -# Templates +# Database from pyMBE.storage.manager import Manager -from pyMBE.storage.templates.particle import ParticleTemplate, ParticleState -from pyMBE.storage.instances.particle import ParticleInstance -from pyMBE.storage.reactions.reaction import Reaction, ReactionParticipant from pyMBE.storage.pint_quantity import PintQuantity +## Templates +from pyMBE.storage.templates.particle import ParticleTemplate, ParticleState from pyMBE.storage.templates.residue import ResidueTemplate -from pyMBE.storage.instances.residue import ResidueInstance from pyMBE.storage.templates.molecule import MoleculeTemplate -from pyMBE.storage.instances.molecule import MoleculeInstance -from pyMBE.storage.templates.bond import BondTemplate -from pyMBE.storage.instances.bond import BondInstance from pyMBE.storage.templates.peptide import PeptideTemplate -from pyMBE.storage.instances.peptide import PeptideInstance from pyMBE.storage.templates.protein import ProteinTemplate -from pyMBE.storage.instances.protein import ProteinInstance from pyMBE.storage.templates.hydrogel import HydrogelTemplate, HydrogelNode, HydrogelChain +from pyMBE.storage.templates.bond import BondTemplate +from pyMBE.storage.templates.lj import LJInteractionTemplate +## Instances +from pyMBE.storage.instances.particle import ParticleInstance +from pyMBE.storage.instances.residue import ResidueInstance +from pyMBE.storage.instances.molecule import MoleculeInstance +from pyMBE.storage.instances.peptide import PeptideInstance +from pyMBE.storage.instances.protein import ProteinInstance +from pyMBE.storage.instances.bond import BondInstance from pyMBE.storage.instances.hydrogel import HydrogelInstance - +## Reactions +from pyMBE.storage.reactions.reaction import Reaction, ReactionParticipant # Utilities import pyMBE.lib.handy_functions as hf import pyMBE.storage.io as io @@ -2741,7 +2744,7 @@ def setup_grxmc_unified(self, pH_res, c_salt_res, cation_name, anion_name, activ def setup_lj_interactions(self, espresso_system, shift_potential=True, combining_rule='Lorentz-Berthelot'): """ - Sets up the Lennard-Jones (LJ) potential between all pairs of particle types with values for `sigma`, `offset`, and `epsilon` stored in `pymbe.df`. + Sets up the Lennard-Jones (LJ) potential between all pairs of particle states defined in the pyMBE database. Args: espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library. @@ -2750,67 +2753,54 @@ def setup_lj_interactions(self, espresso_system, shift_potential=True, combining warning(`bool`, optional): switch to activate/deactivate warning messages. Defaults to True. Note: - - LJ interactions will only be set up between particles with defined values of `sigma` and `epsilon` in the pmb.df. - Currently, the only `combining_rule` supported is Lorentz-Berthelot. - Check the documentation of ESPResSo for more info about the potential https://espressomd.github.io/doc4.2.0/inter_non-bonded.html """ from itertools import combinations_with_replacement - compulsory_parameters_in_df = ['sigma','epsilon'] - shift=0 - if shift_potential: - shift="auto" - # List which particles have sigma and epsilon values defined in pmb.df and which ones don't - particles_types_with_LJ_parameters = [] - non_parametrized_labels= [] - for particle_type in self.get_type_map().values(): - check_list=[] - for key in compulsory_parameters_in_df: - value_in_df=self.find_value_from_es_type(es_type=particle_type, - column_name=key) - check_list.append(pd.isna(value_in_df)) - if any(check_list): - non_parametrized_labels.append(self.find_value_from_es_type(es_type=particle_type, - column_name='label')) - else: - particles_types_with_LJ_parameters.append(particle_type) - # Set up LJ interactions between all particle types - for type_pair in combinations_with_replacement(particles_types_with_LJ_parameters, 2): - particle_name1 = self.find_value_from_es_type(es_type=type_pair[0], - column_name="name") - particle_name2 = self.find_value_from_es_type(es_type=type_pair[1], - column_name="name") - lj_parameters= self.get_lj_parameters(particle_name1 = particle_name1, - particle_name2 = particle_name2, - combining_rule = combining_rule) - - # If one of the particle has sigma=0, no LJ interations are set up between that particle type and the others + + particle_templates = self.db.get_templates("particle") + + shift = "auto" if shift_potential else 0 + + # Flatten states with template context + state_entries = [] + for tpl in particle_templates.values(): + for state in tpl.states.values(): + state_entries.append((tpl, state)) + + # Iterate over all unique state pairs + for (tpl1, state1), (tpl2, state2) in combinations_with_replacement(state_entries, 2): + + lj_parameters = self.get_lj_parameters(particle_name1=tpl1.name, + particle_name2=tpl2.name, + combining_rule=combining_rule) if not lj_parameters: continue - espresso_system.non_bonded_inter[type_pair[0],type_pair[1]].lennard_jones.set_params(epsilon = lj_parameters["epsilon"].to('reduced_energy').magnitude, - sigma = lj_parameters["sigma"].to('reduced_length').magnitude, - cutoff = lj_parameters["cutoff"].to('reduced_length').magnitude, - offset = lj_parameters["offset"].to("reduced_length").magnitude, - shift = shift) - index = len(self.df) - label1 = self.find_value_from_es_type(es_type=type_pair[0], column_name="label") - label2 = self.find_value_from_es_type(es_type=type_pair[1], column_name="label") - self.df.at [index, 'name'] = f'LJ: {label1}-{label2}' - lj_params=espresso_system.non_bonded_inter[type_pair[0], type_pair[1]].lennard_jones.get_params() - - _DFm._add_value_to_df(df = self.df, - index = index, - key = ('pmb_type',''), - new_value = 'LennardJones') - - _DFm._add_value_to_df(df = self.df, - index = index, - key = ('parameters_of_the_potential',''), - new_value = lj_params, - non_standard_value = True) - if non_parametrized_labels: - logging.warning(f'The following particles do not have a defined value of sigma or epsilon in pmb.df: {non_parametrized_labels}. No LJ interaction has been added in ESPResSo for those particles.') - return + + espresso_system.non_bonded_inter[state1.es_type, state2.es_type].lennard_jones.set_params( + epsilon=lj_parameters["epsilon"].to("reduced_energy").magnitude, + sigma=lj_parameters["sigma"].to("reduced_length").magnitude, + cutoff=lj_parameters["cutoff"].to("reduced_length").magnitude, + offset=lj_parameters["offset"].to("reduced_length").magnitude, + shift=shift) + + lj_template = LJInteractionTemplate(state1=state1.name, + state2=state2.name, + sigma=PintQuantity.from_quantity(q=lj_parameters["sigma"], + expected_dimension="length", + ureg=self.units), + epsilon=PintQuantity.from_quantity(q=lj_parameters["epsilon"], + expected_dimension="energy", + ureg=self.units), + cutoff=PintQuantity.from_quantity(q=lj_parameters["cutoff"], + expected_dimension="length", + ureg=self.units), + offset=PintQuantity.from_quantity(q=lj_parameters["offset"], + expected_dimension="length", + ureg=self.units), + shift=shift) + self.db._register_template(lj_template) def write_pmb_df (self, filename): ''' diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index 3450e02..752a2c0 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -783,6 +783,24 @@ def delete_template(self, pmb_type, name): if not self._templates[pmb_type]: del self._templates[pmb_type] + def delete_templates(self, pmb_type): + """ + Remove all templates registered in the pyMBE database for a given pyMBE type. + + Args: + pmb_type (str): + Template category (e.g. ``"particle"``, ``"residue"``, + ``"molecule"``, ``"hydrogel"``). + + Notes: + - This operation is irreversible. + - Instance data is not affected. + - If no templates exist for the given type, the method is a no-op. + """ + if pmb_type in self._templates: + del self._templates[pmb_type] + + def delete_instance(self, pmb_type, instance_id, cascade=False): """ Delete an instance from the pyMBE database. @@ -900,6 +918,39 @@ def delete_instance(self, pmb_type, instance_id, cascade=False): if not self._instances[pmb_type]: del self._instances[pmb_type] + def delete_instances(self, pmb_type, cascade=False): + """ + Remove all instances registered for a given pyMBE type. + + Args: + pmb_type (str): + Instance category (e.g. ``"particle"``, ``"residue"``, + ``"molecule"``, ``"protein"``, ``"hydrogel"``). + cascade (bool): + If True, dependent objects are removed according to the + pyMBE hierarchy rules. If False, deletion is forbidden when + dependencies exist. + + Raises: + ValueError: + If ``cascade=False`` and at least one instance has dependencies. + + Notes: + - Deletion order is deterministic and safe. + - If no instances exist for the given type, the method is a no-op. + """ + if pmb_type not in self._instances: + return + + # Copy IDs to avoid modifying dict during iteration + instance_ids = list(self._instances[pmb_type].keys()) + + for instance_id in instance_ids: + self.delete_instance(pmb_type=pmb_type, + instance_id=instance_id, + cascade=cascade) + + def get_instance(self, pmb_type, instance_id): """ Retrieve a stored instance by type and instance_id. @@ -926,6 +977,22 @@ def get_instance(self, pmb_type, instance_id): else: return self._instances[pmb_type][instance_id] + def get_instances(self, pmb_type): + """ + Return all instances registered for a given pyMBE type. + + Args: + pmb_type (str): + The pyMBE type (e.g. 'particle', 'residue', 'molecule', 'hydrogel'). + + Returns: + dict: + Mapping {instance_id: instance_object}. + Returns an empty dict if no instances exist for the given type. + """ + return self._instances.get(pmb_type, {}).copy() + + def get_template(self, pmb_type, name): """ Retrieve a stored template by type and name. @@ -952,6 +1019,22 @@ def get_template(self, pmb_type, name): else: return self._templates[pmb_type][name] + def get_templates(self, pmb_type): + """ + Return all templates registered for a given pyMBE type. + + Args: + pmb_type (str): + The pyMBE type (e.g. 'particle', 'residue', 'molecule', 'hydrogel'). + + Returns: + dict: + Mapping {template_name: template_instance}. + Returns an empty dict if no templates exist for the given type. + """ + return self._templates.get(pmb_type, {}).copy() + + def get_es_types_map(self): """ Iterates over all particle templates and extracts the ESPResSo type (`es_type`) diff --git a/pyMBE/storage/templates/lj.py b/pyMBE/storage/templates/lj.py index cdd1e52..c61c1f3 100644 --- a/pyMBE/storage/templates/lj.py +++ b/pyMBE/storage/templates/lj.py @@ -61,8 +61,6 @@ class LJInteractionTemplate(BaseModel): Notes: - The order of ``state1`` and ``state2`` does **not** matter. The name is always generated as ``"min(state1, state2)-max(state1, state2)"``. - - Users should store raw LJ parameters (σ, ε) in particle templates. - This object stores the *final combined* interaction values. Examples: Creating an LJ interaction: @@ -97,7 +95,7 @@ class LJInteractionTemplate(BaseModel): epsilon: PintQuantity cutoff: PintQuantity offset: PintQuantity - shift: str | PintQuantity + shift: str | float @classmethod diff --git a/test.py b/test.py index 97472d5..21d12ac 100644 --- a/test.py +++ b/test.py @@ -69,7 +69,10 @@ def main(): offset=0 * units.reduced_length, epsilon=0.2 * units.reduced_energy, z=-1) - + print(pmb.db._get_templates_df(pmb_type="particle")) + print("\n=== Setup LJ interactions ===") + pmb.setup_lj_interactions(espresso_system=espresso_system) + print(pmb.db._get_templates_df(pmb_type="lj")) pmb.define_residue(name="R1", central_bead="Z", side_chains=["X","Z"]) pmb.define_residue(name="R2", central_bead="Z", side_chains=["X","R1"]) @@ -313,6 +316,7 @@ def main(): print(db3._get_templates_df("protein")) print(db3._get_templates_df("hydrogel")) print(db3._get_templates_df("lj")) + exit() print("\nLoaded DB3 Instances DataFrame:") print(db3._get_instances_df("particle")) print(db3._get_instances_df("residue")) diff --git a/testsuite/define_and_create_molecules_unit_tests.py b/testsuite/define_and_create_molecules_unit_tests.py index bce775e..e80209c 100644 --- a/testsuite/define_and_create_molecules_unit_tests.py +++ b/testsuite/define_and_create_molecules_unit_tests.py @@ -133,14 +133,14 @@ def test_create_and_delete_particles(self): for pid in particle_ids: particle=espresso_system.part.by_id(pid) - self.assertEqual(actual=particle.type, - desired=type_map["S1"]) - self.assertEqual(actual=particle.q, - desired=particle_parameters["S1"]["z"]) - self.assertEqual(actual=particle.fix, - desired=[True]*3) - self.assertEqual(actual=particle.pos, - desired=particle_positions[pid]) + self.assertEqual(first=particle.type, + second=type_map["S1"]) + self.assertEqual(first=particle.q, + second=particle_parameters["S1"]["z"]) + self.assertListEqual(list1=list(particle.fix), + list2=[True]*3) + self.assertListEqual(list1=list(particle.pos), + list2=particle_positions[pid]) starting_number_of_particles=len(espresso_system.part.all()) for number_of_particles in [0, -1]: @@ -149,15 +149,15 @@ def test_create_and_delete_particles(self): number_of_particles=number_of_particles) self.assertEqual(len(retval), 0) # If no particles have been created, only two particles should be in the system (from the previous test) - self.assertEqual(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles) + self.assertEqual(first=len(espresso_system.part.all()), + second=starting_number_of_particles) pmb.create_particle(name="S23", espresso_system=espresso_system, number_of_particles=1) # If no particles have been created, only two particles should be in the system (from the previous test) - self.assertEqual(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles) + self.assertEqual(first=len(espresso_system.part.all()), + second=starting_number_of_particles) # Unit tests for delete particle starting_number_of_particles=len(espresso_system.part.all()) @@ -166,21 +166,18 @@ def test_create_and_delete_particles(self): pmb.delete_instances_in_system(instance_id=0, pmb_type="particle", espresso_system=espresso_system) - self.assertEqual(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles-1) + self.assertEqual(first=len(espresso_system.part.all()), + second=starting_number_of_particles-1) particle_df = pmb.get_instances_df(pmb_type="particle") - self.assertEqual(actual=len(particle_df), - desired=starting_number_of_rows-1) + self.assertEqual(first=len(particle_df), + second=starting_number_of_rows-1) # Delete the other particle instance to simplify the rest of the tests pmb.delete_instances_in_system(instance_id=1, pmb_type="particle", espresso_system=espresso_system) - def test_create_and_delete_residues(self): - """ - Tests for creating and deleting residues - """ + central_bead_position=[[0,0,0]] backbone_vector=np.array([1.,2.,3.]) @@ -198,23 +195,23 @@ def test_create_and_delete_residues(self): particle_tpl = pmb.db.get_instance(pmb_type="particle", instance_id=pid) particle_name = particle_tpl.name - self.assertEqual(actual=particle.type, - desired=type_map[particle_name]) - self.assertEqual(actual=particle.q, - desired=particle_parameters[particle_name]["z"]) + self.assertEqual(first=particle.type, + second=type_map[particle_name]) + self.assertEqual(first=particle.q, + second=particle_parameters[particle_name]["z"]) # Check that the position are correct # Central bead if particle_name == "S1": - self.assertListEqual(actual=particle.pos, - desired=central_bead_position[0]) + self.assertListEqual(list1=list(particle.pos), + list2=central_bead_position[0]) else: # Side chains should be in positions perpendicular to the backbone vector - self.assertAlmostEqual(actual=np.dot(particle.pos,backbone_vector), - desired=0, + self.assertAlmostEqual(first=np.dot(particle.pos,backbone_vector), + second=0, places=10) # Check that particles have the correct residue id residue_id = particle_tpl.residue_id - self.assertEqual(actual=residue_id, - desired=0) + self.assertEqual(first=residue_id, + second=0) # Check that particles are correctly bonded # Central bead S1 (id 0) should be bonded to S2 (id 1) and S3 (id 2) @@ -232,11 +229,11 @@ def test_create_and_delete_residues(self): partner_id = bond[1] if partner_id in bonded_pair: bonded_in_espresso=True - self.assertEqual(actual=bonded_in_espresso, - desired=True) + self.assertEqual(first=bonded_in_espresso, + second=True) - self.assertEqual(actual=frozenset(bonded_pairs), - desired=frozenset([frozenset([0,1]),frozenset([0,2])])) + self.assertEqual(first=frozenset(bonded_pairs), + second=frozenset([frozenset([0,1]),frozenset([0,2])])) pmb.create_residue(name="R3", espresso_system=espresso_system, @@ -250,17 +247,14 @@ def test_create_and_delete_residues(self): particle_tpl = pmb.db.get_instance(pmb_type="particle", instance_id=pid) particle_name = particle_tpl.name - np.testing.assert_equal(actual=particle.type, - desired=type_map[particle_name], - verbose=True) - np.testing.assert_equal(actual=particle.q, - desired=particle_parameters[particle_name]["z"], - verbose=True) + self.assertEqual(first=particle.type, + second=type_map[particle_name]) + self.assertEqual(first=particle.q, + second=particle_parameters[particle_name]["z"]) # Check that particles have the correct residue id residue_id = particle_tpl.residue_id - np.testing.assert_equal(actual=residue_id, - desired=1, - verbose=True) + self.assertEqual(first=residue_id, + second=1) # Check that particles are correctly bonded, new bonds are: # Central bead S2 (id 3) should be bonded to R2 central bead S1 (id 4) @@ -281,10 +275,10 @@ def test_create_and_delete_residues(self): if partner_id in bonded_pair: bonded_in_espresso=True - self.assertEqual(actual=bonded_in_espresso, - desired=True) - self.assertEqual(actual=frozenset(bonded_pairs), - desired=frozenset([frozenset([0,1]), + self.assertEqual(first=bonded_in_espresso, + second=True) + self.assertEqual(first=frozenset(bonded_pairs), + second=frozenset([frozenset([0,1]), frozenset([0,2]), frozenset([3,4]), frozenset([4,5]), @@ -294,8 +288,8 @@ def test_create_and_delete_residues(self): espresso_system=espresso_system, use_default_bond=True) # If no particles have been created, the number of particles should be the same as before - self.assertEqual(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles) + self.assertEqual(first=len(espresso_system.part.all()), + second=starting_number_of_particles) # Tests for delete_residue # This should delete 3 particles (residue 0 is a R2 residue) @@ -303,22 +297,18 @@ def test_create_and_delete_residues(self): pmb.delete_instances_in_system(instance_id=0, pmb_type="residue", espresso_system=espresso_system) - self.assertEqual(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles-3) + self.assertEqual(first=len(espresso_system.part.all()), + second=starting_number_of_particles-3) # There should be only one residue instance now in the pyMBE database - self.assertEqual(actual=len(pmb.get_instances_df(pmb_type="residue")), - desired=1) + self.assertEqual(first=len(pmb.get_instances_df(pmb_type="residue")), + second=1) # And there should be only 4 particles (central bead + 2 side chains + central bead of R3) - self.assertEqual(actual=len(pmb.get_instances_df(pmb_type="particle")), - desired=4) + self.assertEqual(first=len(pmb.get_instances_df(pmb_type="particle")), + second=4) # Delete the other residue instance to simplify the rest of the tests pmb.delete_instances_in_system(instance_id=1, pmb_type="residue", espresso_system=espresso_system) - def test_create_and_delete_molecules(self): - """ - Tests for creating and deleting molecules - """ backbone_vector = np.array([1,3,-4]) magnitude = np.linalg.norm(backbone_vector) @@ -344,18 +334,18 @@ def test_create_and_delete_molecules(self): particle_tpl = pmb.db.get_instance(pmb_type="particle", instance_id=pid) particle_name = particle_tpl.name - self.assertEqual(actual=particle.type, - desired=type_map[particle_name]) - self.assertEqual(actual=particle.q, - desired=particle_parameters[particle_name]["z"]) + self.assertEqual(first=particle.type, + second=type_map[particle_name]) + self.assertEqual(first=particle.q, + second=particle_parameters[particle_name]["z"]) # Check that particles have the correct residue id residue_id = particle_tpl.residue_id - self.assertEqual(actual=residue_id, - desired=residue_ids[pid]) + self.assertEqual(first=residue_id, + second=residue_ids[pid]) # Check that particles have the correct molecule id molecule_id = particle_tpl.molecule_id - self.assertEqual(actual=molecule_id, - desired=molecule_ids[pid]) + self.assertEqual(first=molecule_id, + second=molecule_ids[pid]) # Check that the molecule has the right residues residue_list=[] @@ -363,8 +353,8 @@ def test_create_and_delete_molecules(self): for res_index in residue_df[residue_df['molecule_id']==0].index: resname = residue_df.loc[res_index,"name"] residue_list.append(resname) - self.assertEqual(actual=frozenset(residue_list), - desired=frozenset(molecule_parameters["M2"]["residue_list"])) + self.assertEqual(first=frozenset(residue_list), + second=frozenset(molecule_parameters["M2"]["residue_list"])) # Expected bonded pairs for the molecule # Molecule 0: @@ -395,17 +385,14 @@ def test_create_and_delete_molecules(self): bonded_in_espresso = False for pid in bonded_pair: for bond in espresso_system.part.by_id(pid).bonds[:]: - bond_object = bond[0] partner_id = bond[1] if partner_id in bonded_pair: bonded_in_espresso=True - self.assertEqual(actual=bonded_in_espresso, - desired=True) - self.assertEqual(actual = frozenset(bonded_pairs), - desired = frozenset(bonded_pairs_ref)) - + self.assertEqual(first=bonded_in_espresso, + second=True) + self.assertEqual(first = frozenset(bonded_pairs), + second = frozenset(bonded_pairs_ref)) central_bead_positions = [] - residue_map=pmb.get_particle_id_map(object_name="M2")["residue_map"] for res_id in residue_map.keys(): central_bead_id = min(residue_map[res_id]) @@ -414,20 +401,14 @@ def test_create_and_delete_molecules(self): # Here one expects 3 central bead positions for residues R1, R2, and R3 self.assertEqual(len(central_bead_positions),len(molecule_parameters["M2"]["residue_list"])) - backbone_direction_1 = central_bead_positions[1] - central_bead_positions[0] backbone_direction_2 = central_bead_positions[2] - central_bead_positions[1] backbone_direction_1 /= np.linalg.norm(backbone_direction_1) backbone_direction_2 /= np.linalg.norm(backbone_direction_2) - np.testing.assert_almost_equal( - actual = backbone_direction_1, - desired = backbone_vector, - verbose = True) - np.testing.assert_almost_equal( - actual = backbone_direction_2, - desired = backbone_vector, - verbose = True) - + np.testing.assert_almost_equal(actual= list(backbone_direction_1), + desired= list(backbone_vector)) + np.testing.assert_almost_equal(actual= list(backbone_direction_2), + desired= list(backbone_vector)) starting_number_of_particles=len(espresso_system.part.all()) pmb.create_molecule(name="M2", number_of_molecules=0, @@ -438,67 +419,64 @@ def test_create_and_delete_molecules(self): espresso_system=espresso_system, use_default_bond=True) # If no particles have been created, only two particles should be in the system (from the previous test) - self.assertEqual(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles) + self.assertEqual(first=len(espresso_system.part.all()), + second=starting_number_of_particles) starting_number_of_particles=len(espresso_system.part.all()) pmb.create_molecule(name="M23", number_of_molecules=1, espresso_system=espresso_system, use_default_bond=True) - self.assertEqual(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles) - + self.assertEqual(first=len(espresso_system.part.all()), + second=starting_number_of_particles) # Tests for delete_molecule - # create another molecule just to have two molecules in the system pmb.create_molecule(name="M2", number_of_molecules=1, espresso_system=espresso_system, backbone_vector = backbone_vector, use_default_bond=True) - # This should delete 8 particles (molecule 0 is a M2 molecule) - starting_number_of_particles=len(espresso_system.part.all()) pmb.delete_instances_in_system(instance_id=0, pmb_type="molecule", espresso_system=espresso_system) - self.assertEqual(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles-8) - + self.assertEqual(first=len(espresso_system.part.all()), + second=starting_number_of_particles-8) # There should only one molecule instance now in the pyMBE database - self.assertEqual(actual=len(pmb.get_instances_df(pmb_type="molecule")), - desired=1) + self.assertEqual(first=len(pmb.get_instances_df(pmb_type="molecule")), + second=1) # There should be only 3 residues (from the remaining M2 molecule) - self.assertEqual(actual=len(pmb.get_instances_df(pmb_type="residue")), - desired=3) + self.assertEqual(first=len(pmb.get_instances_df(pmb_type="residue")), + second=3) # There should be only 8 particles (from the remaining M2 molecule) - self.assertEqual(actual=len(pmb.get_instances_df(pmb_type="particle")), - desired=8) + self.assertEqual(first=len(pmb.get_instances_df(pmb_type="particle")), + second=8) def test_get_radius_map(self): """ Tests for get_radius_map """ - self.assertEqual(actual=len(pmb.get_radius_map()), - desired=len(particle_parameters.values())) + self.assertEqual(first=len(pmb.get_radius_map()), + second=len(particle_parameters.values())) - desired_radii=[] + second_radii=[] for particle in particle_parameters.values(): - desired_radii.append((particle['sigma'].magnitude+particle['offset'].magnitude)/2) + second_radii.append((particle['sigma'].magnitude+particle['offset'].magnitude)/2) - actual_radii=[pmb.get_radius_map()[0], + first_radii=[pmb.get_radius_map()[0], pmb.get_radius_map()[1], pmb.get_radius_map()[2],] - self.assertEqual(actual=actual_radii, - desired=desired_radii) + self.assertEqual(first=first_radii, + second=second_radii) - self.assertEqual(actual=isinstance(pmb.get_radius_map()[0],float), - desired=True) + self.assertEqual(first=isinstance(pmb.get_radius_map()[0],float), + second=True) + self.assertEqual(first=pmb.get_radius_map(dimensionless=False)[0].dimensionality, + second=pmb.units.nm.dimensionality) - self.assertEqual(actual=pmb.get_radius_map(dimensionless=False)[0].dimensionality, - desired=pmb.units.nm.dimensionality) \ No newline at end of file +if __name__ == "__main__": + ut.main() \ No newline at end of file diff --git a/testsuite/lj_tests.py b/testsuite/lj_tests.py index 64e589c..7767580 100644 --- a/testsuite/lj_tests.py +++ b/testsuite/lj_tests.py @@ -21,6 +21,8 @@ import numpy as np import logging import io +import unittest as ut + # Create an in-memory log stream log_stream = io.StringIO() logging.basicConfig(level=logging.INFO, @@ -29,197 +31,153 @@ # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(seed=42) - -print("*** LJ unit tests ***") -print("*** Unit test: check that all LJ input parameters in define_particle are correctly stored in pmb.df***") - -input_parameters={"name":"A", - "sigma":1*pmb.units.nm, - "epsilon":pmb.units.Quantity(1,"reduced_energy"), - "cutoff":2*pmb.units.nm, - "offset":3*pmb.units.nm} - -pmb.define_particle(**input_parameters) -part_tpl = pmb.db.get_template(name="A", pmb_type="particle") -for parameter_key in input_parameters.keys(): - atr = getattr(part_tpl, parameter_key) - if isinstance(atr, str): - np.testing.assert_equal(actual=atr, - desired=input_parameters[parameter_key], - verbose=True) - else: - np.testing.assert_equal(actual=atr.to_quantity(pmb.units), - desired=input_parameters[parameter_key], - verbose=True) -# Clean template from the database -pmb.db.delete_template(name="A", pmb_type="particle") - -print("*** Unit test passed ***") -print("*** Unit test: check that `offset` defaults to 0***") -print("*** Unit test: check that `cutoff` defaults to `2**(1./6.) reduced_length` ***") - -input_parameters={"name":"A", - "sigma":1*pmb.units.nm, - "epsilon":pmb.units.Quantity(1,"reduced_energy")} - -pmb.define_particle(**input_parameters) -part_tpl = pmb.db.get_template(name="A", pmb_type="particle") -np.testing.assert_equal(actual=part_tpl.offset.to_quantity(pmb.units), - desired=pmb.units.Quantity(0,"reduced_length"), - verbose=True) -np.testing.assert_equal(actual=part_tpl.cutoff.to_quantity(pmb.units), - desired=pmb.units.Quantity(2**(1./6.),"reduced_length"), - verbose=True) -# Clean template from the database -pmb.db.delete_template(name="A", pmb_type="particle") -print("*** Unit test passed ***") - -print("*** Unit test: check that define_particle raises a ValueError if sigma is provided with the wrong dimensionality ***") -input_parameters={"name":"B", - "sigma":1*pmb.units.ns, - "epsilon":pmb.units.Quantity(1,"reduced_energy") } -np.testing.assert_raises(ValueError, pmb.define_particle, **input_parameters) -print("*** Unit test passed ***") - -print("*** Unit test: check that define_particle raises a ValueError if offset is provided with the wrong dimensionality ***") -input_parameters={"name":"B", - "offset":1*pmb.units.ns, - "sigma":1*pmb.units.nm, - "epsilon":pmb.units.Quantity(1,"reduced_energy") } -np.testing.assert_raises(ValueError, pmb.define_particle, **input_parameters) -print("*** Unit test passed ***") - -print("*** Unit test: check that define_particle raises a ValueError if cutoff is provided with the wrong dimensionality ***") -input_parameters={"name":"B", - "cutoff":1*pmb.units.ns, - "sigma":1*pmb.units.nm, - "epsilon":pmb.units.Quantity(1,"reduced_energy") } -np.testing.assert_raises(ValueError, pmb.define_particle, **input_parameters) -print("*** Unit test passed ***") - -print("*** Unit test: check that define_particle raises a ValueError if epsilon is provided with the wrong dimensionality ***") -input_parameters={"name":"B", - "epsilon":1*pmb.units.ns, - "sigma":1*pmb.units.nm, } -np.testing.assert_raises(ValueError, pmb.define_particle, **input_parameters) -print("*** Unit test passed ***") - -print("*** Unit test: test that setup_lj_interactions sets up inert particles correctly ***") - - -# Define particles -A_input_parameters={"name":"A", - "sigma":1*pmb.units.nm, - "epsilon":pmb.units.Quantity(1,"reduced_energy"), - "cutoff":2**(1./6.)*pmb.units.nm, - "offset":1*pmb.units.nm} - -B_input_parameters={"name":"B", - "sigma":2*pmb.units.nm, - "epsilon":pmb.units.Quantity(2,"reduced_energy"), - "cutoff":2*2**(1./6.)*pmb.units.nm, - "offset":2*pmb.units.nm, - "acidity": "acidic", - "pka": 3} -C_input_parameters={"name":"C", - "sigma":0*pmb.units.nm, - "epsilon":pmb.units.Quantity(2,"reduced_energy"), - "cutoff":2*2**(1./6.)*pmb.units.nm, - "offset":2*pmb.units.nm} - -pmb.define_particle(**A_input_parameters) -pmb.define_particle(**B_input_parameters) -pmb.define_particle(**C_input_parameters) - -# Create a dummy instance of an espresso system import espressomd espresso_system=espressomd.System(box_l = [50]*3) -pmb.setup_lj_interactions(espresso_system=espresso_system) - -# ValueError if combining-rule other than Lorentz_-Berthelot is used -input_params = {"espresso_system":espresso_system, "combining_rule": "Geometric"} -np.testing.assert_raises(ValueError, pmb.setup_lj_interactions, **input_params) - -# Initialized with shift=0 -pmb.setup_lj_interactions(espresso_system=espresso_system, shift_potential=False) - -# Setup LJ interactions shift="auto" -pmb.setup_lj_interactions(espresso_system=espresso_system) - -# Check A-A LJ setup -setup_AA_lj_parameters=pmb.df[pmb.df['name']=="LJ: A-A"].parameters_of_the_potential.values[0] - -for parameter_key in ["sigma","offset","cutoff"]: - np.testing.assert_equal(actual=setup_AA_lj_parameters[parameter_key], - desired=A_input_parameters[parameter_key].to("reduced_length").magnitude, - verbose=True) -np.testing.assert_equal(actual=setup_AA_lj_parameters["epsilon"], - desired=A_input_parameters["epsilon"].to("reduced_energy").magnitude, - verbose=True) - -print("*** Unit test passed ***") -print("*** Unit test: test that setup_lj_interactions sets up acid/base particles correctly ***") - - -# Check B-B, B-BH, BH-BH setup -labels=["B-B", "BH-B", "BH-BH"] - -for label in labels: - setup_lj_parameters=pmb.df[pmb.df['name']==f"LJ: {label}"].parameters_of_the_potential.values[0] - for parameter_key in ["sigma","offset","cutoff"]: - np.testing.assert_equal(actual=setup_lj_parameters[parameter_key], - desired=B_input_parameters[parameter_key].to("reduced_length").magnitude, - verbose=True) - np.testing.assert_equal(actual=setup_lj_parameters["epsilon"], - desired=B_input_parameters["epsilon"].to("reduced_energy").magnitude, - verbose=True) - -print("*** Unit test passed ***") -print("*** Unit test: test that setup_lj_interactions sets up LJ interaction between different particles correctly ***") - - -# Calculate the reference parameters -# Assuming Lorentz-Berthelot combining rule -# Check A-BH, A-B, setup -labels=["A-BH", "A-B"] - -ref_lj_parameters={} -for parameter_key in ["sigma","offset","cutoff"]: - ref_lj_parameters[parameter_key]=(A_input_parameters[parameter_key]+B_input_parameters[parameter_key])/2 -ref_lj_parameters["epsilon"]=np.sqrt(A_input_parameters["epsilon"]*B_input_parameters["epsilon"]) - -# Check the parameters set up by pyMBE against the reference parameters -for label in labels: - setup_lj_parameters=pmb.df[pmb.df['name']==f"LJ: {label}"].parameters_of_the_potential.values[0] - for parameter_key in ["sigma","offset","cutoff"]: - np.testing.assert_equal(actual=setup_lj_parameters[parameter_key], - desired=ref_lj_parameters[parameter_key].to("reduced_length").magnitude, - verbose=True) - np.testing.assert_equal(actual=setup_lj_parameters["epsilon"], - desired=ref_lj_parameters["epsilon"].to("reduced_energy").magnitude, - verbose=True) -print("*** Unit test passed ***") - -print("*** Unit test: test that setup_lj_interactions does not set up any LJ interactions for particles with sigma = 0 ***") - -lj_labels=pmb.filter_df("LennardJones")["name"].values -# Check that no interaction between particle C and any other particle has been set up -# Particle C has sigma = 0 (ideally behaving particle) - -for label in lj_labels: - assert "C" not in label, \ - f"Error: pmb.setup_lj_interactions() set up LJ interaction for ideal particle with label {label}" - -print("*** Unit test passed ***") - -print("*** Unit test: test that get_lj_parameters() raises the ValueError when the combination rule is not Loretz-Berthelot ***") - -input_params = {"particle_name1":"A", - "particle_name2":"B", - "combining_rule":"Geometric"} -np.testing.assert_raises(ValueError, pmb.get_lj_parameters, **input_params) -print("*** All unit tests passed ***") -print("*** All unit tests passed ***") +class Test(ut.TestCase): + def test_particle_definition(self): + """ + Unit test to check that define_particle stores correctly all LJ input parameters in the pyMBE database. + """ + input_parameters={"name":"D", + "sigma":1*pmb.units.nm, + "epsilon":pmb.units.Quantity(1,"reduced_energy"), + "cutoff":2*pmb.units.nm, + "offset":3*pmb.units.nm} + + pmb.define_particle(**input_parameters) + part_tpl = pmb.db.get_template(name="D", + pmb_type="particle") + for parameter_key in input_parameters.keys(): + atr = getattr(part_tpl, parameter_key) + if isinstance(atr, str): + self.assertEqual(first=atr, + second=input_parameters[parameter_key]) + else: + if parameter_key == "epsilon": + self.assertAlmostEqual(first=atr.to_quantity(pmb.units).to("reduced_energy").magnitude, + second=input_parameters[parameter_key].to("reduced_energy").magnitude) + else: + self.assertEqual(first=atr.to_quantity(pmb.units).to("reduced_length").magnitude, + second=input_parameters[parameter_key].to("reduced_length").magnitude) + # Clean template from the database + pmb.db.delete_template(name="D", + pmb_type="particle") + + input_parameters={"name":"D", + "sigma":1*pmb.units.nm, + "epsilon":pmb.units.Quantity(1,"reduced_energy")} + + pmb.define_particle(**input_parameters) + part_tpl = pmb.db.get_template(name="D", + pmb_type="particle") + self.assertEqual(first=part_tpl.offset.to_quantity(pmb.units), + second=pmb.units.Quantity(0,"reduced_length")) + self.assertEqual(first=part_tpl.cutoff.to_quantity(pmb.units), + second=pmb.units.Quantity(2**(1./6.),"reduced_length")) + # Clean template from the database + pmb.db.delete_template(name="D", + pmb_type="particle") + # check that define_particle raises a ValueError if sigma is provided with the wrong dimensionality + input_parameters={"name":"E", + "sigma":1*pmb.units.ns, + "epsilon":pmb.units.Quantity(1,"reduced_energy") } + self.assertRaises(ValueError, pmb.define_particle, **input_parameters) + # Unit test: check that define_particle raises a ValueError if offset is provided with the wrong dimensionality + input_parameters={"name":"E", + "offset":1*pmb.units.ns, + "sigma":1*pmb.units.nm, + "epsilon":pmb.units.Quantity(1,"reduced_energy") } + self.assertRaises(ValueError, pmb.define_particle, **input_parameters) + # Unit test: check that define_particle raises a ValueError if cutoff is provided with the wrong dimensionality + input_parameters={"name":"E", + "cutoff":1*pmb.units.ns, + "sigma":1*pmb.units.nm, + "epsilon":pmb.units.Quantity(1,"reduced_energy") } + self.assertRaises(ValueError, pmb.define_particle, **input_parameters) + # Unit test: check that define_particle raises a ValueError if epsilon is provided with the wrong dimensionality + input_parameters={"name":"E", + "epsilon":1*pmb.units.ns, + "sigma":1*pmb.units.nm, } + self.assertRaises(ValueError, pmb.define_particle, **input_parameters) + + def test_lj_interaction_setup(self): + """ + Unit test to check that setup_lj_interactions sets up correctly LJ interactions between acid/base particles. + """ + # Define particles + A_input_parameters={"name":"A", + "sigma":1*pmb.units.nm, + "epsilon":pmb.units.Quantity(1,"reduced_energy"), + "cutoff":2**(1./6.)*pmb.units.nm, + "offset":1*pmb.units.nm} + + B_input_parameters={"name":"B", + "sigma":2*pmb.units.nm, + "epsilon":pmb.units.Quantity(2,"reduced_energy"), + "cutoff":2*2**(1./6.)*pmb.units.nm, + "offset":2*pmb.units.nm, + "acidity": "acidic", + "pka": 3} + C_input_parameters={"name":"C", + "sigma":0*pmb.units.nm, + "epsilon":pmb.units.Quantity(2,"reduced_energy"), + "cutoff":2*2**(1./6.)*pmb.units.nm, + "offset":2*pmb.units.nm} + pmb.define_particle(**A_input_parameters) + pmb.define_particle(**B_input_parameters) + pmb.define_particle(**C_input_parameters) + # Setup LJ interactions shift="auto" + pmb.setup_lj_interactions(espresso_system=espresso_system) + # Check A-A LJ setup + lj_templates = pmb.db.get_templates(pmb_type="lj") + # Check B-B, B-BH, BH-BH setup + labels=["A-A", "B-B", "B-BH", "BH-BH"] + for label in labels: + lj_template = lj_templates[label] + if label == "A-A": + input_params = A_input_parameters + else: + input_params = B_input_parameters + for parameter_key in ["sigma","offset","cutoff"]: + value_in_pyMBE = getattr(lj_template, parameter_key).to_quantity(pmb.units) + self.assertEqual(first=value_in_pyMBE.to("reduced_length").magnitude, + second=input_params[parameter_key].to("reduced_length").magnitude) + self.assertAlmostEqual(first=lj_template.epsilon.to_quantity(pmb.units).to("reduced_energy").magnitude, + second=input_params["epsilon"].to("reduced_energy").magnitude) + # Clean LJ interactions + pmb.db.delete_templates(pmb_type="lj") + # ValueError if combining-rule other than Lorentz_-Berthelot is used + input_params = {"espresso_system":espresso_system, "combining_rule": "Geometric"} + self.assertRaises(ValueError, pmb.setup_lj_interactions, **input_params) + # Check initialization with shift=0 + pmb.setup_lj_interactions(espresso_system=espresso_system, shift_potential=False) + # Calculate the reference parameters using Lorentz-Berthelot combining rule + # Check A-BH, A-B, setup + labels=["A-BH", "A-B"] + ref_lj_parameters={} + for parameter_key in ["sigma","offset","cutoff"]: + ref_lj_parameters[parameter_key]=(A_input_parameters[parameter_key]+B_input_parameters[parameter_key])/2 + ref_lj_parameters["epsilon"]=np.sqrt(A_input_parameters["epsilon"]*B_input_parameters["epsilon"]) + + for label in labels: + lj_template = lj_templates[label] + for parameter_key in ["sigma","offset","cutoff"]: + value_in_pyMBE = getattr(lj_template, parameter_key).to_quantity(pmb.units) + self.assertEqual(first=value_in_pyMBE.to("reduced_length").magnitude, + second=ref_lj_parameters[parameter_key].to("reduced_length").magnitude) + self.assertAlmostEqual(first=lj_template.epsilon.to_quantity(pmb.units).to("reduced_energy").magnitude, + second=ref_lj_parameters["epsilon"].to("reduced_energy").magnitude) + # Check that no interaction between particle C and any other particle has been set up + # Particle C has sigma = 0 (ideally behaving particle) + for label in lj_templates.keys(): + self.assertFalse("C" in label) + input_params = {"particle_name1":"A", + "particle_name2":"B", + "combining_rule":"Geometric"} + self.assertRaises(ValueError, pmb.get_lj_parameters, **input_params) + +if __name__ == "__main__": + ut.main() \ No newline at end of file From b71bb42ccf4d252e7be71691de0b739e9499bfa2 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Fri, 16 Jan 2026 13:06:48 +0100 Subject: [PATCH 17/55] add demo, add functions to load/save the database to the pyMBE API --- demo.py | 179 +++++++++++++++++++++++++++++++++++++++++++++++++ pyMBE/pyMBE.py | 35 ++++++++++ test.py | 11 ++- 3 files changed, 222 insertions(+), 3 deletions(-) create mode 100644 demo.py diff --git a/demo.py b/demo.py new file mode 100644 index 0000000..53e2e01 --- /dev/null +++ b/demo.py @@ -0,0 +1,179 @@ +import pyMBE +import espressomd +from pyMBE.lib.lattice import DiamondLattice + +# Setup +pmb = pyMBE.pymbe_library(seed=42) +units = pmb.units +espresso_system = espressomd.System(box_l=[10, 10, 10]) +# Define some particle templates + +pmb.define_particle(name="Z", + sigma=3.5 * units.reduced_length, + cutoff=4 * units.reduced_length, + offset=0 * units.reduced_length, + epsilon=0.2 * units.reduced_energy, + acidity="acidic", + pka=4.25) + +pmb.define_particle(name="X", + sigma=3.5 * units.reduced_length, + cutoff=4 * units.reduced_length, + offset=0 * units.reduced_length, + epsilon=0.2 * units.reduced_energy, + z=1) + +print("\n=== Particle Templates DataFrame ===") +print(pmb.get_templates_df(pmb_type="particle")) + +# Access some data in the database +tpl_particle_Z = pmb.db.get_template(name="Z", pmb_type="particle") +tpl_particle_X = pmb.db.get_template(name="X", pmb_type="particle") + +# PintQuantity usage example +print("\n=== PintQuantity Usage Example ===") +print(f"PintQuantity class stored in the pyMBE database: {tpl_particle_Z.sigma}") +# Convert to Pint Quantity +sigma_Z = tpl_particle_Z.sigma.to_quantity(units) +print(f"Converted sigma_Z: {sigma_Z} ({sigma_Z.to('reduced_length')})") +# Operate with Pint Quantity +sigma_X = tpl_particle_X.sigma.to_quantity(units) +print(sigma_Z+sigma_X) + +# Setup LJ interactions +pmb.setup_lj_interactions(espresso_system=espresso_system) +print("\n=== LJ Templates DataFrame ===") +print(pmb.get_templates_df(pmb_type="lj")) + +# Create instances of particles +pmb.create_particle(name="Z", + espresso_system=espresso_system, + number_of_particles=3) +pmb.create_particle(name="X", + espresso_system=espresso_system, + number_of_particles=1) +print("\n=== Particle Instances DataFrame ===") +print(pmb.get_instances_df(pmb_type="particle")) + +# Delete instances of particles 0-2 +for i in range(3): + pmb.delete_instances_in_system(espresso_system=espresso_system, + pmb_type="particle", + instance_id=i) + +print("\n=== Particle Instances DataFrame After Deletion ===") +print(pmb.get_instances_df(pmb_type="particle")) +pmb.delete_instances_in_system(espresso_system=espresso_system, + pmb_type="particle", + instance_id=3) + +# Create residue +## Define residues and bonds +pmb.define_residue(name="R1", central_bead="Z", side_chains=["X","Z"]) +parameters = {"k": 100.0 * units.reduced_energy / (units.reduced_length**2), + "r_0": 1.0 * units.reduced_length} +pmb.define_bond(bond_type="harmonic", + bond_parameters=parameters, + particle_pairs=[["Z","Z"], + ["Z","X"], + ["X","X"]]) + +print("\n=== Residue Templates DataFrame ===") +print(pmb.get_templates_df(pmb_type="residue")) +print("\n=== Bond Templates DataFrame ===") +print(pmb.get_templates_df(pmb_type="bond")) + +# Create residue instance +pmb.create_residue(name="R1", + espresso_system=espresso_system) + + +print("\n=== Particle Instances DataFrame ===") +print(pmb.get_instances_df(pmb_type="particle")) +print("\n=== Residue Instances DataFrame ===") +print(pmb.get_instances_df(pmb_type="residue")) +print("\n=== Bond Instances DataFrame ===") +print(pmb.get_instances_df(pmb_type="bond")) + +# Save database +pmb.save_database("demo_csv") + +### Now create a new pyMBE instance with another set of reduced units +pmb2 = pyMBE.pymbe_library(seed=24) +pmb2.set_reduced_units(unit_length=0.6*pmb2.units.nanometer,) + +pmb2.load_database("demo_csv") +print("\n=== Original Particle Templates DataFrame ===") +print(pmb.get_templates_df(pmb_type="particle")) +print("\n=== Loaded Particle Templates DataFrame ===") +print(pmb2.get_templates_df(pmb_type="particle")) + +# Access some data in the database +tpl_particle_Z = pmb.db.get_template(name="Z", pmb_type="particle") +tpl_particle_Z_loaded = pmb2.db.get_template(name="Z", pmb_type="particle") + +print("\n=== PintQuantity Usage Example After Loading Database ===") +original_sigma_Z = tpl_particle_Z.sigma.to_quantity(pmb.units) +loaded_sigma_Z = tpl_particle_Z_loaded.sigma.to_quantity(pmb2.units) +print(f"Original sigma_Z: {original_sigma_Z.to('nanometer')} {original_sigma_Z.to('reduced_length')}") +print(f"Loaded sigma_Z: {loaded_sigma_Z.to('nanometer')} {loaded_sigma_Z.to('reduced_length')}") + +# Delete the residue before proceding to the last example +pmb.delete_instances_in_system(espresso_system=espresso_system, + pmb_type="residue", + instance_id=0) +print("\n=== Particle Instances DataFrame After Deletion ===") +print(pmb.get_instances_df(pmb_type="particle")) +print("\n=== Residue Instances DataFrame After Deletion ===") +print(pmb.get_instances_df(pmb_type="residue")) +print("\n=== Bond Instances DataFrame After Deletion ===") +print(pmb.get_instances_df(pmb_type="bond")) + +# Final example: let's create a hydrogel +## First define a molecule for the chains of the hydrogel +pmb.define_molecule(name="M1", + residue_list=["R1"]*1) +diamond_lattice = DiamondLattice(4, 3.5 * units.reduced_length) +lattice_builder = pmb.initialize_lattice_builder(diamond_lattice) + +# Setting up node topology --> Nodes are particles of type "X" +indices = diamond_lattice.indices +node_topology = [] + +for index in range(len(indices)): + node_topology.append({"particle_name": "X", + "lattice_index": indices[index]}) + +# Setting up chain topology --> Chains are molecules of type "M1" +node_labels = lattice_builder.node_labels +chain_labels = lattice_builder.chain_labels +reverse_node_labels = {v: k for k, v in node_labels.items()} +chain_topology = [] + +for chain_data in chain_labels.items(): + node_label_pair = chain_data[0] + node_label_s, node_label_e = [int(x) for x in node_label_pair.strip("()").split(",")] + chain_topology.append({'node_start':reverse_node_labels[node_label_s], + 'node_end': reverse_node_labels[node_label_e], + 'molecule_name':"M1"}) + +pmb.define_hydrogel("my_hydrogel", node_topology, chain_topology) + +print("\n=== Molecule Templates DataFrame ===") +print(pmb.get_templates_df(pmb_type="molecule")) +print("\n=== Hydrogel Templates DataFrame ===") +print(pmb.get_templates_df(pmb_type="hydrogel")) + +pmb.create_hydrogel(name="my_hydrogel", + espresso_system=espresso_system) +print("\n=== Particle Instances DataFrame After Hydrogel Creation ===") +print(pmb.get_instances_df(pmb_type="particle")) +print("\n=== Residue Instances DataFrame After Hydrogel Creation ===") +print(pmb.get_instances_df(pmb_type="residue")) +print("\n=== Bond Instances DataFrame After Hydrogel Creation ===") +print(pmb.get_instances_df(pmb_type="bond")) +print("\n=== Molecule Instances DataFrame After Hydrogel Creation ===") +print(pmb.get_instances_df(pmb_type="molecule")) +print("\n=== Hydrogel Instances DataFrame After Hydrogel Creation ===") +print(pmb.get_instances_df(pmb_type="hydrogel")) +pmb.save_database("demo_csv") diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 952fd30..03c762c 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -1862,6 +1862,26 @@ def initialize_lattice_builder(self, diamond_lattice): logging.info(f"LatticeBuilder initialized with mpc={diamond_lattice.mpc} and box_l={diamond_lattice.box_l}") return self.lattice_builder + def load_database(self, folder, format='csv'): + """ + Loads a pyMBE database stored in `folder`. + + Args: + folder (str or Path): Path to the folder where the pyMBE database was stored. + format (str, optional): Format of the database to be loaded. Defaults to 'csv'. + + Note: + - The folder must contain the files generated by `pmb.save_database()`. + - Currently, only 'csv' format is supported. + """ + supported_formats = ['csv'] + if format not in supported_formats: + raise ValueError(f"Format {format} not supported. Supported formats are {supported_formats}") + if format == 'csv': + io._load_database_csv(self.db, + folder=folder) + + def load_interaction_parameters(self, filename, overwrite=False): """ Loads the interaction parameters stored in `filename` into `pmb.df` @@ -2178,6 +2198,21 @@ def read_protein_vtf_in_df (self,filename,unit_length=None): return topology_dict + def save_database(self, folder, format='csv'): + """ + Saves the current pyMBE database into a file `filename`. + + Args: + folder (str or Path): Path to the folder where the database files will be saved. + + """ + supported_formats = ['csv'] + if format not in supported_formats: + raise ValueError(f"Format {format} not supported. Supported formats are: {supported_formats}") + if format == 'csv': + io._save_database_csv(self.db, + folder=folder) + def search_particles_in_residue(self, residue_name): ''' Searches for all particles in a given residue of name `residue_name`. diff --git a/test.py b/test.py index 21d12ac..6d73f75 100644 --- a/test.py +++ b/test.py @@ -69,10 +69,13 @@ def main(): offset=0 * units.reduced_length, epsilon=0.2 * units.reduced_energy, z=-1) + print(pmb.db._get_templates_df(pmb_type="particle")) + print("\n=== Setup LJ interactions ===") pmb.setup_lj_interactions(espresso_system=espresso_system) print(pmb.db._get_templates_df(pmb_type="lj")) + pmb.define_residue(name="R1", central_bead="Z", side_chains=["X","Z"]) pmb.define_residue(name="R2", central_bead="Z", side_chains=["X","R1"]) @@ -195,17 +198,20 @@ def main(): espresso_system=espresso_system, number_of_particles=1) + print("\n=== Particle Instances DataFrame ===") print(pmb.db._get_instances_df(pmb_type="particle")) - + pmb.create_residue(name="R1", espresso_system=espresso_system) pmb.create_residue(name="R2", espresso_system=espresso_system) print("\n=== Residue Instances DataFrame ===") + print(pmb.db._get_instances_df(pmb_type="particle")) print(pmb.db._get_instances_df(pmb_type="residue")) - + print(pmb.db._get_instances_df(pmb_type="bond")) + exit() pmb.create_molecule(name="M1", number_of_molecules=2, espresso_system=espresso_system) @@ -249,7 +255,6 @@ def main(): espresso_system=espresso_system) print(pmb.db._get_instances_df(pmb_type="hydrogel")) - # ============================================================ # 3. DEFINE A REACTION: HA <-> A- + H+ # ============================================================ From c202151f76200846f2d64c894b6e9f1269310f3f Mon Sep 17 00:00:00 2001 From: Pablo Date: Sun, 18 Jan 2026 19:34:29 +0100 Subject: [PATCH 18/55] fix unit test --- testsuite/set_particle_acidity_test.py | 192 +++++++++++-------------- 1 file changed, 83 insertions(+), 109 deletions(-) diff --git a/testsuite/set_particle_acidity_test.py b/testsuite/set_particle_acidity_test.py index af628b7..2ea424b 100644 --- a/testsuite/set_particle_acidity_test.py +++ b/testsuite/set_particle_acidity_test.py @@ -20,122 +20,96 @@ import numpy as np import pandas as pd import pyMBE -import pyMBE.storage.df_management as df_management +import unittest as ut # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(seed=42) -def check_acid_base_setup(input_parameters, acidity_setup): - """ - Checks if pyMBE stores in the pmb.df the input parameters for acid/base particles correctly. - Args: - input_parameters (`dict`): dictionary with the input parameters for define_particle. - acidity_setup (`dict`): dictionary with the expected setup that pyMBE should do in the pmb.df for acid/base particles. - """ - pmb.define_particle(**input_parameters) - - # Handle pd.NA safely - if pd.isna(input_parameters.get("acidity", None)): - input_parameters.pop("z", None) # Use .pop with default to avoid KeyError - - # Checks that the input parameters are stored properly - for parameter_key, expected_value in input_parameters.items(): - actual_value = pmb.df[parameter_key].values[0] - - # Use pd.isna() to compare safely, since pd.NA does not behave like regular values - if pd.isna(expected_value) and pd.isna(actual_value): - continue # Skip this check, they are both missing (NA) - - np.testing.assert_equal(actual=actual_value, desired=expected_value, verbose=True) - - # Checks that the setup of the acid/base properties is done correctly - for state in ["state_one", "state_two"]: - for state_attribute in ["label", "z"]: - actual_value = pmb.df[state][state_attribute].values[0] - expected_value = acidity_setup[state][state_attribute] - - if pd.isna(expected_value) and pd.isna(actual_value): - continue # Skip this check if both are NA - - np.testing.assert_equal(actual=actual_value, desired=expected_value, verbose=True) - - # Checks that pyMBE assigns different espresso types to each state - np.testing.assert_raises( - AssertionError, - np.testing.assert_equal, - pmb.df["state_one"]["es_type"].values[0], - pmb.df["state_two"]["es_type"].values[0] - ) - - -print("*** Particle acidity unit tests ***") -print("*** Unit test: check that all acid/base input parameters in define_particle for an inert particle are correctly stored in pmb.df***") -# Clean pmb.df -pmb.df = df_management._DFManagement._setup_df() -input_parameters={"name":"I", +class Test(ut.TestCase): + def test_inert_particles_setup(self): + """ + Test that an inert particle is correctly set up in the pyMBE database. + """ + input_parameters={"name":"I", "acidity": pd.NA, "pka": pd.NA, - "z":2} -acidity_setup={"state_one":{"label":f"{input_parameters['name']}", - "z":2}, - "state_two":{"label": pd.NA, - "z": pd.NA},} - -check_acid_base_setup(input_parameters=input_parameters, - acidity_setup=acidity_setup) - -print("*** Unit test passed ***") -print("*** Unit test: check that a deprecation warning is raised if the keyword 'inert' is used for acidity ***") -# Clean pmb.df -pmb.df = df_management._DFManagement._setup_df() -input_parameters={"name":"I", - "acidity": "inert", - "pka": pd.NA, - "z":2} -pmb.define_particle(**input_parameters) -print("*** Unit test passed ***") -print("*** Unit test: check that all acid/base input parameters in define_particle for an acid are correctly stored in pmb.df***") -# Clean pmb.df -pmb.df = df_management._DFManagement._setup_df() -input_parameters={"name":"A", + "z":2, + "sigma": 1.0*pmb.units.reduced_length, + "epsilon": 1.0*pmb.units.reduced_energy} + pmb.define_particle(**input_parameters) + part_tpl = pmb.db.get_template(name="I", + pmb_type="particle") + self.assertTrue(hasattr(part_tpl, "states")) + self.assertEqual(len(part_tpl.states), 1) + state_one = part_tpl.states["I"] + self.assertEqual(state_one.name, "I") + self.assertEqual(state_one.z, 2) + pmb.db.delete_template(name="I", pmb_type="particle") + + def test_acidic_particles_setup(self): + """ + Test that an acidic particle is correctly set up in the pyMBE database. + """ + input_parameters={"name":"A", "acidity": "acidic", - "pka":4} -acidity_setup={"state_one":{"label":f"{input_parameters['name']}H", - "z":0}, - "state_two":{"label":f"{input_parameters['name']}", - "z":-1},} - -check_acid_base_setup(input_parameters=input_parameters, - acidity_setup=acidity_setup) -print("*** Unit test passed ***") -print("*** Unit test: check that all acid/base input parameters in define_particle for a base are correctly stored in pmb.df***") -# Clean pmb.df -pmb.df = df_management._DFManagement._setup_df() -input_parameters={"name":"B", + "pka":4, + "sigma": 1.0*pmb.units.reduced_length, + "epsilon": 1.0*pmb.units.reduced_energy} + pmb.define_particle(**input_parameters) + part_tpl = pmb.db.get_template(name="A", + pmb_type="particle") + self.assertTrue(hasattr(part_tpl, "states")) + self.assertEqual(len(part_tpl.states), 2) + state_one = part_tpl.states["AH"] + self.assertEqual(state_one.name, "AH") + self.assertEqual(state_one.z, 0) + state_two = part_tpl.states["A"] + self.assertEqual(state_two.name, "A") + self.assertEqual(state_two.z, -1) + self.assertNotEqual(state_one.es_type, state_two.es_type) + pmb.db.delete_template(name="A", pmb_type="particle") + + def test_basic_particles_setup(self): + """ + Test that a basic particle is correctly set up in the pyMBE database. + """ + input_parameters={"name":"B", "acidity": "basic", - "pka":9} -acidity_setup={"state_one":{"label":f"{input_parameters['name']}H", - "z":1}, - "state_two":{"label":f"{input_parameters['name']}", - "z":0},} - -check_acid_base_setup(input_parameters=input_parameters, - acidity_setup=acidity_setup) -print("*** Unit test passed ***") - -print("*** Unit test: check that set_particle_acidity raises a ValueError if pKa is not provided and pKa is acidic or basic ***") -input_parametersA={"name":"A", - "acidity": "acidic" } + "pka":9, + "sigma": 1.0*pmb.units.reduced_length, + "epsilon": 1.0*pmb.units.reduced_energy} + pmb.define_particle(**input_parameters) + part_tpl = pmb.db.get_template(name="B", + pmb_type="particle") + self.assertTrue(hasattr(part_tpl, "states")) + self.assertEqual(len(part_tpl.states), 2) + state_one = part_tpl.states["BH"] + self.assertEqual(state_one.name, "BH") + self.assertEqual(state_one.z, 1) + state_two = part_tpl.states["B"] + self.assertEqual(state_two.name, "B") + self.assertEqual(state_two.z, 0) + self.assertNotEqual(state_one.es_type, state_two.es_type) + pmb.db.delete_template(name="B", pmb_type="particle") + + def sanity_tests(self): + """ + Unit tests to check that set_particle_acidity raises ValueErrors when expected. + """ + # Check that set_particle_acidity raises a ValueError if pKa is not provided and pKa is acidic or basic + input_parametersA={"name":"A", + "acidity": "acidic" } + + input_parametersB= {"name": "B", + "acidity": "basic"} + self.assertRaises(ValueError, pmb.set_particle_acidity,**input_parametersA) + self.assertRaises(ValueError, pmb.set_particle_acidity, **input_parametersB) + # Check that set_particle_acidity raises a ValueError if a non-supported acidity is provided + input_parametersA={"name":"A", + "acidity": "random" } + self.assertRaises(ValueError, pmb.set_particle_acidity,**input_parametersA) + +if __name__ == "__main__": + ut.main() -input_parametersB= {"name": "B", - "acidity": "basic"} -np.testing.assert_raises(ValueError, pmb.set_particle_acidity,**input_parametersA) -np.testing.assert_raises(ValueError, pmb.set_particle_acidity, **input_parametersB) -print("*** Unit test passed ***") -print("*** Unit test: check that set_particle_acidity raises a ValueError if a non-supported acidity is provided ***") -input_parametersA={"name":"A", - "acidity": "random" } -np.testing.assert_raises(ValueError, pmb.set_particle_acidity,**input_parametersA) -print("*** Unit test passed ***") -print("*** All unit tests passed ***") From 052c41402e6842e1be806ea279282697f6fa8f65 Mon Sep 17 00:00:00 2001 From: Pablo Date: Sun, 18 Jan 2026 20:23:54 +0100 Subject: [PATCH 19/55] fix bug in create_counterions, fix unittest --- pyMBE/pyMBE.py | 47 ++- testsuite/setup_salt_ions_unit_tests.py | 526 ++++++++++++------------ 2 files changed, 290 insertions(+), 283 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 03c762c..22bce3c 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -643,10 +643,10 @@ def create_bond(self, particle_id1, particle_id2, espresso_system, use_default_b def create_counterions(self, object_name, cation_name, anion_name, espresso_system): """ - Creates particles of `cation_name` and `anion_name` in `espresso_system` to counter the net charge of `pmb_object`. + Creates particles of `cation_name` and `anion_name` in `espresso_system` to counter the net charge of `object_name`. Args: - object_name(`str`): `name` of a pymbe object. + object_name(`str`): `name` of a pyMBE object. espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library. cation_name(`str`): `name` of a particle with a positive charge. anion_name(`str`): `name` of a particle with a negative charge. @@ -661,8 +661,8 @@ def create_counterions(self, object_name, cation_name, anion_name, espresso_syst name=cation_name) cation_charge = cation_tpl.states[cation_tpl.initial_state].z anion_tpl = self.db.get_template(pmb_type="particle", - name=cation_name) - anion_charge = cation_tpl.states[anion_tpl.initial_state].z + name=anion_name) + anion_charge = anion_tpl.states[anion_tpl.initial_state].z object_ids = self.get_particle_id_map(object_name=object_name)["all"] counterion_number={} object_charge={} @@ -1637,26 +1637,31 @@ def get_bond_template(self, particle_name1, particle_name2, use_default_bond=Fal return bond_tpl def get_charge_number_map(self): - ''' - Gets the charge number of each `espresso_type` in `pymbe.df`. - + """ + Construct a mapping from ESPResSo particle types to their charge numbers. + Returns: - charge_number_map(`dict`): {espresso_type: z}. - ''' - if self.df.state_one['es_type'].isnull().values.any(): - df_state_one = self.df.state_one.dropna() - df_state_two = self.df.state_two.dropna() - else: - df_state_one = self.df.state_one - if self.df.state_two['es_type'].isnull().values.any(): - df_state_two = self.df.state_two.dropna() - else: - df_state_two = self.df.state_two - state_one = pd.Series (df_state_one.z.values,index=df_state_one.es_type.values) - state_two = pd.Series (df_state_two.z.values,index=df_state_two.es_type.values) - charge_number_map = pd.concat([state_one,state_two],axis=0).to_dict() + dict[int, float]: + Dictionary mapping ESPResSo particle types to charge numbers, + ``{es_type: z}``. + + Notes: + - The mapping is built from particle *states*, not instances. + - If multiple templates define states with the same ``es_type``, + the last encountered definition will overwrite previous ones. + This behavior is intentional and assumes database consistency. + - Neutral particles (``z = 0``) are included in the map. + """ + charge_number_map = {} + particle_templates = self.db.get_templates("particle") + for tpl in particle_templates.values(): + for state in tpl.states.values(): + if state.es_type is None: + continue + charge_number_map[state.es_type] = state.z return charge_number_map + def get_espresso_bond_instance(self, particle_name1, particle_name2, espresso_system, use_default_bond=False): """ Retrieve or create a bond instance in an ESPResSo system for a given pair of particle names. diff --git a/testsuite/setup_salt_ions_unit_tests.py b/testsuite/setup_salt_ions_unit_tests.py index d7c592b..8dc5c70 100644 --- a/testsuite/setup_salt_ions_unit_tests.py +++ b/testsuite/setup_salt_ions_unit_tests.py @@ -19,28 +19,32 @@ import numpy as np import espressomd from pyMBE.lib.handy_functions import get_number_of_particles -import logging -import io - -# Create an in-memory log stream -log_stream = io.StringIO() -logging.basicConfig(level=logging.INFO, - format="%(levelname)s: %(message)s", - handlers=[logging.StreamHandler(log_stream)] ) +import unittest as ut # Create an instance of pyMBE library import pyMBE pmb = pyMBE.pymbe_library(seed=42) +sigma = 1 * pmb.units.reduced_length +epsilon = 1 * pmb.units.reduced_energy + # Define a set of ions pmb.define_particle(name="Na", - z=1) + z=1, + sigma=sigma, + epsilon=epsilon) pmb.define_particle(name="Ca", - z=2) + z=2, + sigma=sigma, + epsilon=epsilon) pmb.define_particle(name="Cl", - z=-1) + z=-1, + sigma=sigma, + epsilon=epsilon) pmb.define_particle(name="SO4", - z=-2) + z=-2, + sigma=sigma, + epsilon=epsilon) type_map=pmb.get_type_map() # System parameters @@ -53,120 +57,24 @@ espresso_system=espressomd.System (box_l = [L.to('reduced_length').magnitude]*3) espresso_system.setup_type_map(type_list=type_map.values()) -#### Unit tests for the added salt - -def check_salt_concentration(espresso_system,cation_name,anion_name,c_salt,N_SALT_ION_PAIRS): - charge_number_map=pmb.get_charge_number_map() - type_map=pmb.get_type_map() - espresso_system.setup_type_map(type_list=type_map.values()) - c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system, - cation_name=cation_name, - anion_name=anion_name, - c_salt=c_salt) - - np.testing.assert_equal(get_number_of_particles(espresso_system, type_map[cation_name]),N_SALT_ION_PAIRS*abs(charge_number_map[type_map[anion_name]])) - np.testing.assert_equal(get_number_of_particles(espresso_system, type_map[anion_name]),N_SALT_ION_PAIRS*abs(charge_number_map[type_map[cation_name]])) - np.testing.assert_almost_equal(c_salt_calculated.m_as("mol/L"), c_salt.m_as("mol/L")) - cation_ids = pmb.get_particle_id_map(object_name=cation_name)["all"] - anion_ids = pmb.get_particle_id_map(object_name=anion_name)["all"] - for id in cation_ids+anion_ids: - pmb.delete_particle_in_system(particle_id=id, - espresso_system=espresso_system) - - -print("*** Unit test: test that create_added_salt works for a 1:1 salt (NaCl-like). Should print the added salt concentration and number of ions ***") -check_salt_concentration(espresso_system=espresso_system, - cation_name="Na", - anion_name="Cl", - c_salt=c_salt_input, - N_SALT_ION_PAIRS=N_SALT_ION_PAIRS) -print("*** Unit test passed***") -print("*** Unit test: test that create_added_salt works for a 2:1 salt (CaCl_2-like) ***") -check_salt_concentration(espresso_system=espresso_system, - cation_name="Ca", - anion_name="Cl", - c_salt=c_salt_input, - N_SALT_ION_PAIRS=N_SALT_ION_PAIRS) -print("*** Unit test passed***") -print("*** Unit test: test that create_added_salt works for a 1:2 salt (Na_2SO_4-like) ***") -check_salt_concentration(espresso_system=espresso_system, - cation_name="Na", - anion_name="SO4", - c_salt=c_salt_input, - N_SALT_ION_PAIRS=N_SALT_ION_PAIRS) -print("*** Unit test passed***") -print("*** Unit test: test that create_added_salt works for a 2:2 salt (CaSO_4-like) ***") -check_salt_concentration(espresso_system=espresso_system, - cation_name="Ca", - anion_name="SO4", - c_salt=c_salt_input, - N_SALT_ION_PAIRS=N_SALT_ION_PAIRS) -print("*** Unit test passed***") -print("*** Unit test: check that create_added_salt works for an input c_salt in [particle/lenght**3]. Should print the concentration and number of ions") -c_salt_part=c_salt_input*pmb.N_A -espresso_system.setup_type_map(type_list=type_map.values()) -c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system, - cation_name="Na", - anion_name="Cl", - c_salt=c_salt_part) -np.testing.assert_equal(get_number_of_particles(espresso_system, type_map["Na"]),N_SALT_ION_PAIRS) -np.testing.assert_equal(get_number_of_particles(espresso_system, type_map["Cl"]),N_SALT_ION_PAIRS) -np.testing.assert_almost_equal(c_salt_calculated.m_as("reduced_length**-3"), c_salt_part.m_as("reduced_length**-3")) -cation_ids = pmb.get_particle_id_map(object_name="Na")["all"] -anion_ids = pmb.get_particle_id_map(object_name="Cl")["all"] -for id in cation_ids+anion_ids: - pmb.delete_particle_in_system(particle_id=id, - espresso_system=espresso_system) - -print("*** Unit test: check that create_added_salt raises a ValueError if one provides a cation_name of an object that has been defined with a non-positive charge ***") -input_parameters={"cation_name":"Cl", - "anion_name":"SO4", - "c_salt":c_salt_input, - "espresso_system":espresso_system} -np.testing.assert_raises(ValueError, pmb.create_added_salt, **input_parameters) -print("*** Unit test passed ***") - -print("*** Unit test: check that create_added_salt raises a ValueError if one provides a anion_name of an object that has been defined with a non-negative charge ***") -input_parameters={"cation_name":"Na", - "anion_name":"Ca", - "c_salt":c_salt_input, - "espresso_system":espresso_system} -np.testing.assert_raises(ValueError, pmb.create_added_salt, **input_parameters) -print("*** Unit test passed ***") - -print("*** Unit test: check that create_added_salt raises a ValueError if one provides a c_salt with the wrong dimensionality ***") -input_parameters={"cation_name":"Na", - "anion_name":"Cl", - "c_salt":1*pmb.units.nm, - "espresso_system":espresso_system} -np.testing.assert_raises(ValueError, pmb.create_added_salt, **input_parameters) -print("*** Unit test passed ***") - -# Test that no salt ions are created if the wrong object names are provided -pmb.create_added_salt(espresso_system=espresso_system, - cation_name="X", - anion_name="Cl", - c_salt=c_salt_part) -log_contents = log_stream.getvalue() -assert "Object with name 'X' is not defined in the DataFrame, no ions will be created." in log_contents - -# Test that no salt ions are created if the wrong object names are provided -pmb.create_added_salt(espresso_system=espresso_system, - cation_name="Na", - anion_name="X", - c_salt=c_salt_part) -log_contents = log_stream.getvalue() -assert "Object with name 'X' is not defined in the DataFrame, no ions will be created." in log_contents - -### Unit tests for the counter ions: - - pmb.define_particle(name='0P', - z=0) + z=0, + sigma=sigma, + epsilon=epsilon) pmb.define_particle(name='+1P', - z=+1) + z=+1, + sigma=sigma, + epsilon=epsilon) pmb.define_particle(name='-1P', - z=-1) + z=-1, + sigma=sigma, + epsilon=epsilon) +pmb.define_residue( + name = 'R0', + central_bead = '0P', + side_chains = [] + ) + pmb.define_residue( name = 'R1', central_bead = '0P', @@ -179,6 +87,7 @@ def check_salt_concentration(espresso_system,cation_name,anion_name,c_salt,N_SAL side_chains = ['-1P'] ) + bond_type = 'harmonic' generic_bond_length=0.4 * pmb.units.nm generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') @@ -190,7 +99,6 @@ def check_salt_concentration(espresso_system,cation_name,anion_name,c_salt,N_SAL pmb.define_default_bond(bond_type = bond_type, bond_parameters = harmonic_bond) # Add all bonds to espresso system -pmb.add_bonds_to_espresso(espresso_system=espresso_system) molecule_name = 'positive_polyampholyte' pmb.define_molecule(name=molecule_name, residue_list = ['R1']*3+['R2']*2) @@ -203,152 +111,246 @@ def check_salt_concentration(espresso_system,cation_name,anion_name,c_salt,N_SAL pmb.define_molecule(name=molecule_name, residue_list = ['R1']*2+['R2']*3) -def test_counterions(molecule_name, cation_name, anion_name, espresso_system, expected_numbers): - pmb.create_molecule(name=molecule_name, - number_of_molecules= 2, - espresso_system=espresso_system, - use_default_bond=True) - pmb.create_counterions(object_name=molecule_name, - cation_name=cation_name, - anion_name=anion_name, - espresso_system=espresso_system) - espresso_system.setup_type_map(type_list=type_map.values()) - np.testing.assert_equal(get_number_of_particles(espresso_system, type_map[cation_name]),expected_numbers[cation_name]) - np.testing.assert_equal(get_number_of_particles(espresso_system, type_map[anion_name]),expected_numbers[anion_name]) - molecule_ids = list(pmb.get_particle_id_map(object_name=molecule_name)["molecule_map"].keys()) - for mol_id in molecule_ids: - pmb.delete_molecule_in_system(molecule_id=mol_id, - espresso_system=espresso_system) - cation_ids = pmb.get_particle_id_map(object_name=cation_name)["all"] - anion_ids = pmb.get_particle_id_map(object_name=anion_name)["all"] - for id in cation_ids+anion_ids: - pmb.delete_particle_in_system(particle_id=id, - espresso_system=espresso_system) - -print("*** Unit test: check that create_counterions creates the right number of monovalent counter ions for a polyampholyte with positive net charge. Should print the number of ions. ***") - -test_counterions(molecule_name='positive_polyampholyte', - cation_name="Na", - anion_name="Cl", - espresso_system=espresso_system, - expected_numbers={"Na":4, - "Cl":6}) - -print("*** Unit test passed ***") - -print("*** Unit test: check that create_counterions creates the right number of monovalent counter ions for a polyampholyte at its isoelectric point ***") - -test_counterions(molecule_name='isoelectric_polyampholyte', - cation_name="Na", - anion_name="Cl", - espresso_system=espresso_system, - expected_numbers={"Na":6, - "Cl":6}) - -print("*** Unit test passed ***") - -print("*** Unit test: check that create_counterions creates the right number of monovalent counter ions for a polyampholyte with a negative net charge ***") - -test_counterions(molecule_name='negative_polyampholyte', - cation_name="Na", - anion_name="Cl", - espresso_system=espresso_system, - expected_numbers={"Na":6, - "Cl":4}) - -print("*** Unit test passed ***") - -print("*** Unit test: check that create_counterions creates the right number of multivalent counter ions for a polyampholyte ***") - -test_counterions(molecule_name='negative_polyampholyte', - cation_name="Ca", - anion_name="Cl", - espresso_system=espresso_system, - expected_numbers={"Ca":3, - "Cl":4}) - -print("*** Unit test passed ***") - -print("*** Unit test: check that create_counterions raises a ValueError if the charge number of the cation is not divisible by the negative charge of the polyampholyte ***") -pmb.create_molecule(name='isoelectric_polyampholyte', - number_of_molecules= 1, - espresso_system=espresso_system, - use_default_bond=True) -input_parameters={"cation_name":"Ca", - "anion_name":"Cl", - "object_name":'isoelectric_polyampholyte', - "espresso_system":espresso_system} -np.testing.assert_raises(ValueError, pmb.create_counterions, **input_parameters) -print("*** Unit test passed ***") -print("*** Unit test: check that create_counterions raises a ValueError if the charge number of the anion is not divisible by the positive charge of the polyampholyte ***") -input_parameters={"cation_name":"Na", - "anion_name":"SO4", - "object_name":'isoelectric_polyampholyte', - "espresso_system":espresso_system} -np.testing.assert_raises(ValueError, pmb.create_counterions, **input_parameters) -pmb.delete_molecule_in_system(espresso_system=espresso_system, - molecule_id=0) - -print("*** Unit test passed ***") -print("*** Unit test: check that no create_counterions does not create counterions for molecules with no charge") -pmb.define_particle(name='0P', - z=0) -pmb.define_residue( - name = 'R0', - central_bead = '0P', - side_chains = [] - ) pmb.define_molecule(name='neutral_molecule', residue_list = ['R0']) -pmb.create_molecule(name='neutral_molecule', - number_of_molecules= 1, - espresso_system=espresso_system) -pmb.create_counterions(object_name='neutral_molecule', - cation_name="Na", - anion_name="Cl", - espresso_system=espresso_system) - - -espresso_system.setup_type_map(type_list=type_map.values()) -np.testing.assert_equal(get_number_of_particles(espresso_system, type_map["Na"]),0) -np.testing.assert_equal(get_number_of_particles(espresso_system, type_map["Cl"]),0) -# Assert that no counterions are created if the wrong object names are provided -pmb.create_counterions(object_name='test', - cation_name="Na", - anion_name="Cl", - espresso_system=espresso_system) - -log_contents = log_stream.getvalue() -assert "Object with name 'test' is not defined in the DataFrame, no counterions will be created." in log_contents - - -pmb.create_counterions(object_name='isoelectric_polyampholyte', - cation_name="Z", - anion_name="Cl", - espresso_system=espresso_system) - -log_contents = log_stream.getvalue() -assert "Object with name 'Z' is not defined in the DataFrame, no counterions will be created." in log_contents - -pmb.create_counterions(object_name='isoelectric_polyampholyte', - cation_name="Na", - anion_name="X", +class Test(ut.TestCase): + + def test_salt_addition(self): + """ + Unit test to check that create_added_salt works for different types of salts. + """ + def check_salt_concentration(espresso_system,cation_name,anion_name,c_salt,N_SALT_ION_PAIRS): + charge_number_map=pmb.get_charge_number_map() + type_map=pmb.get_type_map() + espresso_system.setup_type_map(type_list=type_map.values()) + c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system, + cation_name=cation_name, + anion_name=anion_name, + c_salt=c_salt) + + self.assertEqual(get_number_of_particles(espresso_system, type_map[cation_name]),N_SALT_ION_PAIRS*abs(charge_number_map[type_map[anion_name]])) + self.assertEqual(get_number_of_particles(espresso_system, type_map[anion_name]),N_SALT_ION_PAIRS*abs(charge_number_map[type_map[cation_name]])) + self.assertAlmostEqual(c_salt_calculated.m_as("mol/L"), c_salt.m_as("mol/L")) + cation_ids = pmb.get_particle_id_map(object_name=cation_name)["all"] + anion_ids = pmb.get_particle_id_map(object_name=anion_name)["all"] + for id in cation_ids+anion_ids: + pmb.delete_instances_in_system(instance_id=id, + espresso_system=espresso_system, + pmb_type="particle") + + # Unit test: test that create_added_salt works for a 1:1 salt (NaCl-like). + check_salt_concentration(espresso_system=espresso_system, + cation_name="Na", + anion_name="Cl", + c_salt=c_salt_input, + N_SALT_ION_PAIRS=N_SALT_ION_PAIRS) + # test that create_added_salt works for a 2:1 salt (CaCl_2-like) + check_salt_concentration(espresso_system=espresso_system, + cation_name="Ca", + anion_name="Cl", + c_salt=c_salt_input, + N_SALT_ION_PAIRS=N_SALT_ION_PAIRS) + # test that create_added_salt works for a 1:2 salt (Na_2SO_4-like) + check_salt_concentration(espresso_system=espresso_system, + cation_name="Na", + anion_name="SO4", + c_salt=c_salt_input, + N_SALT_ION_PAIRS=N_SALT_ION_PAIRS) + # test that create_added_salt works for a 2:2 salt (CaSO_4-like) + check_salt_concentration(espresso_system=espresso_system, + cation_name="Ca", + anion_name="SO4", + c_salt=c_salt_input, + N_SALT_ION_PAIRS=N_SALT_ION_PAIRS) + + def test_salt_addition_concentration_units(self): + """ + Unit test: check that create_added_salt works for an input c_salt in [particle/lenght**3]. + """ + c_salt_part=c_salt_input*pmb.N_A + espresso_system.setup_type_map(type_list=type_map.values()) + c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system, + cation_name="Na", + anion_name="Cl", + c_salt=c_salt_part) + self.assertEqual(get_number_of_particles(espresso_system, type_map["Na"]),N_SALT_ION_PAIRS) + self.assertEqual(get_number_of_particles(espresso_system, type_map["Cl"]),N_SALT_ION_PAIRS) + self.assertAlmostEqual(c_salt_calculated.m_as("reduced_length**-3"), c_salt_part.m_as("reduced_length**-3")) + cation_ids = pmb.get_particle_id_map(object_name="Na")["all"] + anion_ids = pmb.get_particle_id_map(object_name="Cl")["all"] + for id in cation_ids+anion_ids: + pmb.delete_instances_in_system(instance_id=id, + espresso_system=espresso_system, + pmb_type="particle") + + def test_sanity_create_salt(self): + """ + Unit tests to check that create_added_salt raises ValueErrors when expected. + """ + + #check that create_added_salt raises a ValueError if one provides a cation_name of an object that has been defined with a non-positive charge + input_parameters={"cation_name":"Cl", + "anion_name":"SO4", + "c_salt":c_salt_input, + "espresso_system":espresso_system} + self.assertRaises(ValueError, pmb.create_added_salt, **input_parameters) + # check that create_added_salt raises a ValueError if one provides a anion_name of an object that has been defined with a non-negative charge + input_parameters={"cation_name":"Na", + "anion_name":"Ca", + "c_salt":c_salt_input, + "espresso_system":espresso_system} + self.assertRaises(ValueError, pmb.create_added_salt, **input_parameters) + # check that create_added_salt raises a ValueError if one provides a c_salt with the wrong dimensionality + input_parameters={"cation_name":"Na", + "anion_name":"Cl", + "c_salt":1*pmb.units.nm, + "espresso_system":espresso_system} + self.assertRaises(ValueError, pmb.create_added_salt, **input_parameters) + # Test that no salt ions are created if the wrong object names are provided + input_parameters={"cation_name":"Na", + "anion_name":"X", + "c_salt":c_salt_input, + "espresso_system":espresso_system} + self.assertRaises(ValueError, pmb.create_added_salt, **input_parameters) + input_parameters={"cation_name":"X", + "anion_name":"Cl", + "c_salt":c_salt_input, + "espresso_system":espresso_system} + self.assertRaises(ValueError, pmb.create_added_salt, **input_parameters) + + def test_counterions_setup(self): + """ + Unit test to check that create_counterions sets up correctly counterions for charged polyampholytes. + """ + def test_counterions(molecule_name, cation_name, anion_name, espresso_system, expected_numbers): + pmb.create_molecule(name=molecule_name, + number_of_molecules= 2, + espresso_system=espresso_system, + use_default_bond=True) + pmb.create_counterions(object_name=molecule_name, + cation_name=cation_name, + anion_name=anion_name, + espresso_system=espresso_system) + espresso_system.setup_type_map(type_list=type_map.values()) + self.assertEqual(get_number_of_particles(espresso_system, + type_map[cation_name]), + expected_numbers[cation_name]) + self.assertEqual(get_number_of_particles(espresso_system, + type_map[anion_name]), + expected_numbers[anion_name]) + molecule_ids = list(pmb.get_particle_id_map(object_name=molecule_name)["molecule_map"].keys()) + for mol_id in molecule_ids: + pmb.delete_instances_in_system(instance_id=mol_id, + espresso_system=espresso_system, + pmb_type="molecule") + + cation_ids = pmb.get_particle_id_map(object_name=cation_name)["all"] + anion_ids = pmb.get_particle_id_map(object_name=anion_name)["all"] + for id in cation_ids+anion_ids: + pmb.delete_instances_in_system(instance_id=id, + espresso_system=espresso_system, + pmb_type="particle") + + # Check that create_counterions creates the right number of monovalent counter ions for a polyampholyte with positive net charge. + test_counterions(molecule_name='positive_polyampholyte', + cation_name="Na", + anion_name="Cl", + espresso_system=espresso_system, + expected_numbers={"Na":4, + "Cl":6}) + # Check that create_counterions creates the right number of monovalent counter ions for a polyampholyte at its isoelectric point + test_counterions(molecule_name='isoelectric_polyampholyte', + cation_name="Na", + anion_name="Cl", + espresso_system=espresso_system, + expected_numbers={"Na":6, + "Cl":6}) + # Check that create_counterions creates the right number of monovalent counter ions for a polyampholyte with a negative net charge + test_counterions(molecule_name='negative_polyampholyte', + cation_name="Na", + anion_name="Cl", + espresso_system=espresso_system, + expected_numbers={"Na":6, + "Cl":4}) + # Check that create_counterions creates the right number of multivalent counter ions for a polyampholyte + test_counterions(molecule_name='negative_polyampholyte', + cation_name="Ca", + anion_name="Cl", + espresso_system=espresso_system, + expected_numbers={"Ca":3, + "Cl":4}) + def test_sanity_create_counterions(self): + """ + Unit tests to check that create_counterions raises ValueErrors when expected. + """ + # Check that create_counterions raises a ValueError if the charge number of the cation is not divisible by the negative charge of the polyampholyte + pmb.create_molecule(name='isoelectric_polyampholyte', + number_of_molecules= 1, + espresso_system=espresso_system, + use_default_bond=True) + input_parameters={"cation_name":"Ca", + "anion_name":"Cl", + "object_name":'isoelectric_polyampholyte', + "espresso_system":espresso_system} + self.assertRaises(ValueError, pmb.create_counterions, **input_parameters) + # Check that create_counterions raises a ValueError if the charge number of the anion is not divisible by the positive charge of the polyampholyte + input_parameters={"cation_name":"Na", + "anion_name":"SO4", + "object_name":'isoelectric_polyampholyte', + "espresso_system":espresso_system} + self.assertRaises(ValueError, pmb.create_counterions, **input_parameters) + pmb.delete_instances_in_system(instance_id=0, + espresso_system=espresso_system, + pmb_type="molecule") + # Check that no create_counterions does not create counterions for molecules with no charge + pmb.create_molecule(name='neutral_molecule', + number_of_molecules= 1, espresso_system=espresso_system) -log_contents = log_stream.getvalue() -assert "Object with name 'X' is not defined in the DataFrame, no counterions will be created." in log_contents + pmb.create_counterions(object_name='neutral_molecule', + cation_name="Na", + anion_name="Cl", + espresso_system=espresso_system) + espresso_system.setup_type_map(type_list=type_map.values()) -input_parameters={"object_name":'isoelectric_polyampholyte', - "cation_name":"isoelectric_polyampholyte", + self.assertEqual(get_number_of_particles(espresso_system, type_map["Na"]),0) + self.assertEqual(get_number_of_particles(espresso_system, type_map["Cl"]),0) + # Assert that no counterions are created if the wrong object names are provided + inputs = {"object_name":'test', + "cation_name":"Na", "anion_name":"Cl", "espresso_system":espresso_system} -np.testing.assert_raises(ValueError, pmb.create_counterions, **input_parameters) -input_parameters={"object_name":'isoelectric_polyampholyte', + self.assertRaises(KeyError, + pmb.create_counterions, + **inputs) + inputs = {"object_name":'isoelectric_polyampholyte', + "cation_name":"Z", + "anion_name":"Cl", + "espresso_system":espresso_system} + self.assertRaises(ValueError, + pmb.create_counterions, + **inputs) + inputs = {"object_name":'isoelectric_polyampholyte', "cation_name":"Na", - "anion_name":'isoelectric_polyampholyte', + "anion_name":"Y", "espresso_system":espresso_system} -np.testing.assert_raises(ValueError, pmb.create_counterions, **input_parameters) - -print("*** Unit test passed ***") + self.assertRaises(ValueError, + pmb.create_counterions, + **inputs) + input_parameters={"object_name":'isoelectric_polyampholyte', + "cation_name":"isoelectric_polyampholyte", + "anion_name":"Cl", + "espresso_system":espresso_system} + self.assertRaises(ValueError, + pmb.create_counterions, + **input_parameters) + input_parameters={"object_name":'isoelectric_polyampholyte', + "cation_name":"Na", + "anion_name":'isoelectric_polyampholyte', + "espresso_system":espresso_system} + self.assertRaises(ValueError, pmb.create_counterions, **input_parameters) + +if __name__ == "__main__": + ut.main() \ No newline at end of file From ca1f34acce3e00362c637f7a0344648a5c7b2f8d Mon Sep 17 00:00:00 2001 From: Pablo Date: Sun, 18 Jan 2026 20:34:16 +0100 Subject: [PATCH 20/55] update copyright --- pyMBE/pyMBE.py | 2 +- pyMBE/storage/base_type.py | 19 +++++++++++++++++++ pyMBE/storage/instances/bond.py | 2 +- pyMBE/storage/instances/hydrogel.py | 2 +- pyMBE/storage/instances/molecule.py | 2 +- pyMBE/storage/instances/particle.py | 2 +- pyMBE/storage/instances/peptide.py | 2 +- pyMBE/storage/instances/protein.py | 2 +- pyMBE/storage/instances/residue.py | 2 +- pyMBE/storage/io.py | 2 +- pyMBE/storage/pint_quantity.py | 2 +- pyMBE/storage/reactions/reaction.py | 2 +- pyMBE/storage/templates/bond.py | 2 +- pyMBE/storage/templates/hydrogel.py | 2 +- pyMBE/storage/templates/lj.py | 2 +- pyMBE/storage/templates/molecule.py | 2 +- pyMBE/storage/templates/particle.py | 2 +- pyMBE/storage/templates/peptide.py | 2 +- pyMBE/storage/templates/protein.py | 19 +++++++++++++++++++ pyMBE/storage/templates/residue.py | 19 +++++++++++++++++++ testsuite/setup_salt_ions_unit_tests.py | 2 +- 21 files changed, 75 insertions(+), 18 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 22bce3c..ef5f13e 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2023-2025 pyMBE-dev team +# Copyright (C) 2023-2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/base_type.py b/pyMBE/storage/base_type.py index d22f2e4..cfb845e 100644 --- a/pyMBE/storage/base_type.py +++ b/pyMBE/storage/base_type.py @@ -1,3 +1,22 @@ +# +# Copyright (C) 2026 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from pydantic import BaseModel, Field class PMBBaseModel(BaseModel): diff --git a/pyMBE/storage/instances/bond.py b/pyMBE/storage/instances/bond.py index 5fa5390..18ca829 100644 --- a/pyMBE/storage/instances/bond.py +++ b/pyMBE/storage/instances/bond.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/instances/hydrogel.py b/pyMBE/storage/instances/hydrogel.py index a2c4ae7..ca101a5 100644 --- a/pyMBE/storage/instances/hydrogel.py +++ b/pyMBE/storage/instances/hydrogel.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/instances/molecule.py b/pyMBE/storage/instances/molecule.py index 32d3b65..7fe49ab 100644 --- a/pyMBE/storage/instances/molecule.py +++ b/pyMBE/storage/instances/molecule.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/instances/particle.py b/pyMBE/storage/instances/particle.py index 2a220c7..3dc55c3 100644 --- a/pyMBE/storage/instances/particle.py +++ b/pyMBE/storage/instances/particle.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/instances/peptide.py b/pyMBE/storage/instances/peptide.py index e22c894..f4d23a0 100644 --- a/pyMBE/storage/instances/peptide.py +++ b/pyMBE/storage/instances/peptide.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/instances/protein.py b/pyMBE/storage/instances/protein.py index e3206dd..5e7566b 100644 --- a/pyMBE/storage/instances/protein.py +++ b/pyMBE/storage/instances/protein.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/instances/residue.py b/pyMBE/storage/instances/residue.py index 5cdb8a8..8379c76 100644 --- a/pyMBE/storage/instances/residue.py +++ b/pyMBE/storage/instances/residue.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py index 632aee4..c342133 100644 --- a/pyMBE/storage/io.py +++ b/pyMBE/storage/io.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/pint_quantity.py b/pyMBE/storage/pint_quantity.py index d5bd350..40d8edc 100644 --- a/pyMBE/storage/pint_quantity.py +++ b/pyMBE/storage/pint_quantity.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/reactions/reaction.py b/pyMBE/storage/reactions/reaction.py index 8f558d1..ac2969f 100644 --- a/pyMBE/storage/reactions/reaction.py +++ b/pyMBE/storage/reactions/reaction.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/templates/bond.py b/pyMBE/storage/templates/bond.py index d37e9cc..ce7e3dd 100644 --- a/pyMBE/storage/templates/bond.py +++ b/pyMBE/storage/templates/bond.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/templates/hydrogel.py b/pyMBE/storage/templates/hydrogel.py index bb44cce..b4ed3c0 100644 --- a/pyMBE/storage/templates/hydrogel.py +++ b/pyMBE/storage/templates/hydrogel.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/templates/lj.py b/pyMBE/storage/templates/lj.py index c61c1f3..1108388 100644 --- a/pyMBE/storage/templates/lj.py +++ b/pyMBE/storage/templates/lj.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/templates/molecule.py b/pyMBE/storage/templates/molecule.py index 4a806f7..8fe5479 100644 --- a/pyMBE/storage/templates/molecule.py +++ b/pyMBE/storage/templates/molecule.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/templates/particle.py b/pyMBE/storage/templates/particle.py index ce56875..f904f8f 100644 --- a/pyMBE/storage/templates/particle.py +++ b/pyMBE/storage/templates/particle.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/templates/peptide.py b/pyMBE/storage/templates/peptide.py index 3d1ae61..60caace 100644 --- a/pyMBE/storage/templates/peptide.py +++ b/pyMBE/storage/templates/peptide.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/storage/templates/protein.py b/pyMBE/storage/templates/protein.py index 2f20d71..6953edb 100644 --- a/pyMBE/storage/templates/protein.py +++ b/pyMBE/storage/templates/protein.py @@ -1,3 +1,22 @@ +# +# Copyright (C) 2026 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from pyMBE.storage.base_type import PMBBaseModel from pydantic import Field diff --git a/pyMBE/storage/templates/residue.py b/pyMBE/storage/templates/residue.py index 8fb7f34..1b14617 100644 --- a/pyMBE/storage/templates/residue.py +++ b/pyMBE/storage/templates/residue.py @@ -1,3 +1,22 @@ +# +# Copyright (C) 2026 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + from pyMBE.storage.base_type import PMBBaseModel from pydantic import Field diff --git a/testsuite/setup_salt_ions_unit_tests.py b/testsuite/setup_salt_ions_unit_tests.py index 8dc5c70..786ca5f 100644 --- a/testsuite/setup_salt_ions_unit_tests.py +++ b/testsuite/setup_salt_ions_unit_tests.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # From 947e6844aaec2885802e9dfade46bac49c22a13f Mon Sep 17 00:00:00 2001 From: pmblanco Date: Tue, 20 Jan 2026 17:22:04 +0100 Subject: [PATCH 21/55] disentagle particle states from particle templates --- demo.py | 30 +- pyMBE/pyMBE.py | 372 ++++++++++++++---------- pyMBE/storage/io.py | 14 +- pyMBE/storage/manager.py | 98 +++---- pyMBE/storage/templates/particle.py | 26 +- test.py | 23 +- testsuite/lj_tests.py | 4 +- testsuite/setup_salt_ions_unit_tests.py | 16 +- 8 files changed, 315 insertions(+), 268 deletions(-) diff --git a/demo.py b/demo.py index 53e2e01..a603247 100644 --- a/demo.py +++ b/demo.py @@ -6,8 +6,8 @@ pmb = pyMBE.pymbe_library(seed=42) units = pmb.units espresso_system = espressomd.System(box_l=[10, 10, 10]) -# Define some particle templates +# Define some particle templates pmb.define_particle(name="Z", sigma=3.5 * units.reduced_length, cutoff=4 * units.reduced_length, @@ -30,6 +30,8 @@ tpl_particle_Z = pmb.db.get_template(name="Z", pmb_type="particle") tpl_particle_X = pmb.db.get_template(name="X", pmb_type="particle") + + # PintQuantity usage example print("\n=== PintQuantity Usage Example ===") print(f"PintQuantity class stored in the pyMBE database: {tpl_particle_Z.sigma}") @@ -45,6 +47,8 @@ print("\n=== LJ Templates DataFrame ===") print(pmb.get_templates_df(pmb_type="lj")) + + # Create instances of particles pmb.create_particle(name="Z", espresso_system=espresso_system, @@ -52,9 +56,11 @@ pmb.create_particle(name="X", espresso_system=espresso_system, number_of_particles=1) + print("\n=== Particle Instances DataFrame ===") print(pmb.get_instances_df(pmb_type="particle")) + # Delete instances of particles 0-2 for i in range(3): pmb.delete_instances_in_system(espresso_system=espresso_system, @@ -63,13 +69,16 @@ print("\n=== Particle Instances DataFrame After Deletion ===") print(pmb.get_instances_df(pmb_type="particle")) + pmb.delete_instances_in_system(espresso_system=espresso_system, pmb_type="particle", instance_id=3) # Create residue ## Define residues and bonds -pmb.define_residue(name="R1", central_bead="Z", side_chains=["X","Z"]) +pmb.define_residue(name="R1", + central_bead="Z", + side_chains=["X","Z"]) parameters = {"k": 100.0 * units.reduced_energy / (units.reduced_length**2), "r_0": 1.0 * units.reduced_length} pmb.define_bond(bond_type="harmonic", @@ -83,6 +92,7 @@ print("\n=== Bond Templates DataFrame ===") print(pmb.get_templates_df(pmb_type="bond")) + # Create residue instance pmb.create_residue(name="R1", espresso_system=espresso_system) @@ -109,8 +119,10 @@ print(pmb2.get_templates_df(pmb_type="particle")) # Access some data in the database -tpl_particle_Z = pmb.db.get_template(name="Z", pmb_type="particle") -tpl_particle_Z_loaded = pmb2.db.get_template(name="Z", pmb_type="particle") +tpl_particle_Z = pmb.db.get_template(name="Z", + pmb_type="particle") +tpl_particle_Z_loaded = pmb2.db.get_template(name="Z", + pmb_type="particle") print("\n=== PintQuantity Usage Example After Loading Database ===") original_sigma_Z = tpl_particle_Z.sigma.to_quantity(pmb.units) @@ -153,11 +165,13 @@ for chain_data in chain_labels.items(): node_label_pair = chain_data[0] node_label_s, node_label_e = [int(x) for x in node_label_pair.strip("()").split(",")] - chain_topology.append({'node_start':reverse_node_labels[node_label_s], + chain_topology.append({'node_start': reverse_node_labels[node_label_s], 'node_end': reverse_node_labels[node_label_e], - 'molecule_name':"M1"}) + 'molecule_name': "M1"}) -pmb.define_hydrogel("my_hydrogel", node_topology, chain_topology) +pmb.define_hydrogel("my_hydrogel", + node_topology, + chain_topology) print("\n=== Molecule Templates DataFrame ===") print(pmb.get_templates_df(pmb_type="molecule")) @@ -176,4 +190,4 @@ print(pmb.get_instances_df(pmb_type="molecule")) print("\n=== Hydrogel Instances DataFrame After Hydrogel Creation ===") print(pmb.get_instances_df(pmb_type="hydrogel")) -pmb.save_database("demo_csv") +pmb.save_database("demo_csv") \ No newline at end of file diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index ef5f13e..62c3a8f 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -30,7 +30,7 @@ from pyMBE.storage.manager import Manager from pyMBE.storage.pint_quantity import PintQuantity ## Templates -from pyMBE.storage.templates.particle import ParticleTemplate, ParticleState +from pyMBE.storage.templates.particle import ParticleTemplate, ParticleStateTemplate from pyMBE.storage.templates.residue import ResidueTemplate from pyMBE.storage.templates.molecule import MoleculeTemplate from pyMBE.storage.templates.peptide import PeptideTemplate @@ -571,10 +571,14 @@ def create_added_salt(self, espresso_system, cation_name, anion_name, c_salt): """ cation_tpl = self.db.get_template(pmb_type="particle", name=cation_name) - cation_charge = cation_tpl.states[cation_tpl.initial_state].z + cation_state = self.db.get_template(pmb_type="particle_state", + name=cation_tpl.initial_state) + cation_charge = cation_state.z anion_tpl = self.db.get_template(pmb_type="particle", name=anion_name) - anion_charge = anion_tpl.states[anion_tpl.initial_state].z + anion_state = self.db.get_template(pmb_type="particle_state", + name=anion_tpl.initial_state) + anion_charge = anion_state.z if cation_charge <= 0: raise ValueError(f'ERROR cation charge must be positive, charge {cation_charge}') if anion_charge >= 0: @@ -788,9 +792,9 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi # Generate an arbitrary random unit vector if backbone_vector is None: backbone_vector = self.generate_random_points_in_a_sphere(center=[0,0,0], - radius=1, - n_samples=1, - on_surface=True)[0] + radius=1, + n_samples=1, + on_surface=True)[0] else: backbone_vector = np.array(backbone_vector) first_residue = True @@ -817,9 +821,9 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi # Add molecule_id to the residue instance and all particles associated self.db._propagate_id(root_type="residue", - root_id=residue_id, - attribute="molecule_id", - value=molecule_id) + root_id=residue_id, + attribute="molecule_id", + value=molecule_id) particle_ids_in_residue = self.db._find_instance_ids_by_attribute(pmb_type="particle", attribute="residue_id", value=residue_id) @@ -844,10 +848,10 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi central_bead_pos = prev_central_bead_pos+backbone_vector*l0 # Create the residue residue_id = self.create_residue(name=residue, - espresso_system=espresso_system, - central_bead_position=[central_bead_pos], - use_default_bond= use_default_bond, - backbone_vector=backbone_vector) + espresso_system=espresso_system, + central_bead_position=[central_bead_pos], + use_default_bond= use_default_bond, + backbone_vector=backbone_vector) # Add molecule_id to the residue instance and all particles associated self.db._propagate_id(root_type="residue", root_id=residue_id, @@ -878,7 +882,6 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi first_residue = True pos_index+=1 molecule_ids.append(molecule_id) - return molecule_id def create_particle(self, name, espresso_system, number_of_particles, position=None, fix=False): @@ -902,10 +905,10 @@ def create_particle(self, name, espresso_system, number_of_particles, position=N part_tpl = self.db.get_template(pmb_type="particle", name=name) - initial_state = part_tpl.states[part_tpl.initial_state] - z = initial_state.z - es_type = initial_state.es_type - + part_state = self.db.get_template(pmb_type="particle_state", + name=part_tpl.initial_state) + z = part_state.z + es_type = part_state.es_type # Create the new particles into ESPResSo created_pid_list=[] for index in range(number_of_particles): @@ -922,7 +925,7 @@ def create_particle(self, name, espresso_system, number_of_particles, position=N espresso_system.part.add(**kwargs) part_inst = ParticleInstance(name=name, particle_id=particle_id, - initial_state=initial_state.name) + initial_state=part_state.name) self.db._register_instance(part_inst) return created_pid_list @@ -1059,12 +1062,14 @@ def create_residue(self, name, espresso_system, central_bead_position=None,use_d side_chain_beads_ids = [] for side_chain_name in side_chain_list: pmb_type_list = self.db._find_template_types(name=side_chain_name) - if len(pmb_type_list) > 1: - raise KeyError(f"Detected multiple templates with the same name '{side_chain_name}' in the pyMBE database, pmb_types: {pmb_type_list}. Residue creation aborted to avoid ambiguity.") - elif not pmb_type_list: - logging.warning(f"Element in side chain with name '{name}' is not defined in the pyMBE database, nothing will be created.") - continue - pmb_type = pmb_type_list[0] + allowed_types = {"particle", "residue"} + filtered_types = allowed_types.intersection(pmb_type_list) + if len(filtered_types) > 1: + raise KeyError(f"Ambiguous template name '{side_chain_name}': found both 'particle' and 'residue' templates in the pyMBE database. Residue creation aborted.") + if len(filtered_types) == 0: + raise KeyError( + f"No 'particle' or 'residue' template found with name '{side_chain_name}'. Found templates of types: {set(pmb_type_list)}.") + pmb_type = next(iter(filtered_types)) if pmb_type == 'particle': lj_parameters = self.get_lj_parameters(particle_name1=central_bead_name, particle_name2=side_chain_name) @@ -1265,6 +1270,35 @@ def define_molecule(self, name, residue_list): residue_list=residue_list) self.db._register_template(tpl) + def define_monoprototic_acidbase_reaction(self, particle_name, pka, acidity, metadata=None): + """ + Defines an acid-base reaction for a monoprototic particle in the pyMBE database. + + Args: + particle_name (str): Unique label that identifies the particle template. + pka (float): pka-value of the acid or base. + acidity (str): Identifies whether if the particle is `acidic` or `basic`. + metadata (dict, optional): Additional information to be stored in the reaction. Defaults to None. + """ + supported_acidities = ["acidic", "basic"] + if acidity not in supported_acidities: + raise ValueError(f"Unsupported acidity '{acidity}' for particle '{particle_name}'. Supported acidities are {supported_acidities}.") + reaction_type = "monoprotic" + if acidity == "basic": + reaction_type += "_base" + else: + reaction_type += "_acid" + reaction = Reaction(participants=[ReactionParticipant(particle_name=particle_name, + state_name=f"{particle_name}H", + coefficient=-1), + ReactionParticipant(particle_name=particle_name, + state_name=f"{particle_name}", + coefficient=1)], + reaction_type=reaction_type, + pK=pka, + metadata=metadata) + self.db._register_reaction(reaction) + def define_particle(self, name, sigma, epsilon, z=0, acidity=pd.NA, pka=pd.NA, cutoff=pd.NA, offset=pd.NA): """ Defines a particle template in the pyMBE database. @@ -1286,26 +1320,67 @@ def define_particle(self, name, sigma, epsilon, z=0, acidity=pd.NA, pka=pd.NA, - `offset` defaults to 0. - For more information on `sigma`, `epsilon`, `cutoff` and `offset` check `pmb.setup_lj_interactions()`. """ - # If `cutoff` and `offset` are not defined, default them to the following values if pd.isna(cutoff): cutoff=self.units.Quantity(2**(1./6.), "reduced_length") if pd.isna(offset): offset=self.units.Quantity(0, "reduced_length") - + + # Define particle states + if acidity is pd.NA: + states = [{"name": f"{name}", "z": z}] + self.define_particle_states(particle_name=name, + states=states) + initial_state = name + else: + self.set_monoprototic_particle_states(particle_name=name, + acidity=acidity) + initial_state = f"{name}H" + if pka is not pd.NA: + self.define_monoprototic_acidbase_reaction(particle_name=name, + acidity=acidity, + pka=pka) tpl = ParticleTemplate(name=name, sigma=PintQuantity.from_quantity(q=sigma, expected_dimension="length", ureg=self.units), epsilon=PintQuantity.from_quantity(q=epsilon, expected_dimension="energy", ureg=self.units), cutoff=PintQuantity.from_quantity(q=cutoff, expected_dimension="length", ureg=self.units), - offset=PintQuantity.from_quantity(q=offset, expected_dimension="length", ureg=self.units)) - - # Define particle acid/base properties - self.set_particle_acidity(particle_template=tpl, - acidity=acidity, - default_charge_number=z, - pka=pka) - return + offset=PintQuantity.from_quantity(q=offset, expected_dimension="length", ureg=self.units), + initial_state=initial_state) + self.db._register_template(tpl) + def define_particle_states(self, particle_name, states): + """ + Define the chemical states of an existing particle template. + + Args: + particle_name (`str`): + Name of a particle template. + + states (`list` of `dict`): + List of dictionaries defining the particle states. Each dictionary + must contain: + - `name` (`str`): Name of the particle state (e.g. `"H"`, `"-"`, + `"neutral"`). + - `z` (`int`): Charge number of the particle in this state. + Example: + states = [{"name": "AH", "z": 0}, # protonated + {"name": "A-", "z": -1}] # deprotonated + Notes: + - Each state is assigned a unique Espresso `es_type` automatically. + - Chemical reactions (e.g. acid–base equilibria) are **not** created by + this method and must be defined separately (e.g. via + `set_particle_acidity()` or custom reaction definitions). + - Particles without explicitly defined states are assumed to have a + single, implicit state with their default charge. + """ + for s in states: + state = ParticleStateTemplate(particle_name=particle_name, + name=s["name"], + z=s["z"], + es_type=self.propose_unused_type()) + self.db._register_template(state) + + def define_peptide(self, name, sequence, model): """ Defines a peptide template in the pyMBE database. @@ -1321,9 +1396,9 @@ def define_peptide(self, name, sequence, model): clean_sequence = self.protein_sequence_parser(sequence=sequence) residue_list = self._get_residue_list_from_sequence(sequence=clean_sequence) tpl = PeptideTemplate(name=name, - residue_list=residue_list, - model=model, - sequence=sequence) + residue_list=residue_list, + model=model, + sequence=sequence) self.db._register_template(tpl) def define_protein(self, name, sequence, model): @@ -1768,21 +1843,64 @@ def get_particle_id_map(self, object_name): """ return self.db.get_particle_id_map(object_name=object_name) + def get_particle_pka(self, particle_name): + """ + Retrieve the pKa value associated with a particle from the pyMBE database. + + Args: + particle_name (str): Name of the particle template. + + Returns: + float or None: + - The pKa value if the particle participates in a single acid/base reaction + - None if the particle is inert (no acid/base reaction) + """ + acid_base_reactions = [] + for reaction in self.db._reactions.values(): + if reaction.reaction_type != "acid/base": + continue + for participant in reaction.participants: + if participant.particle_name == particle_name: + acid_base_reactions.append(reaction) + break + if len(acid_base_reactions) == 0: + return None + if len(acid_base_reactions) > 1: + raise ValueError(f"Multiple acid/base reactions found for particle '{particle_name}'. " + "Ambiguous pKa.") + return acid_base_reactions[0].pK + def get_pka_set(self): - ''' - Gets the pka-values and acidities of the particles with acid/base properties in `pmb.df` - + """ + Retrieve the pKa set for all titratable particles in the pyMBE database. + Returns: - pka_set(`dict`): {"name" : {"pka_value": pka, "acidity": acidity}} - ''' - titratables_AA_df = self.df[[('name',''),('pka',''),('acidity','')]].drop_duplicates().dropna() + dict: Dictionary of the form: + {"particle_name": {"pka_value": float, + "acidity": "acidic" | "basic"}} + Note: + - If a particle participates in multiple acid/base reactions, an error is raised. + """ pka_set = {} - for index in titratables_AA_df.name.keys(): - name = titratables_AA_df.name[index] - pka_value = titratables_AA_df.pka[index] - acidity = titratables_AA_df.acidity[index] - pka_set[name] = {'pka_value':pka_value,'acidity':acidity} - return pka_set + for reaction in self.db._reactions.values(): + if "monoprotic" not in reaction.reaction_type: + continue + if reaction.pK is None: + continue + # Identify involved particle(s) + particle_names = {participant.particle_name for participant in reaction.participants} + particle_name = particle_names.pop() + if particle_name in pka_set: + raise ValueError(f"Multiple acid/base reactions found for particle '{particle_name}'.") + pka_set[particle_name] = {"pka_value": reaction.pK} + if reaction.reaction_type == "monoprotic_acid": + acidity = "acidic" + elif reaction.reaction_type == "monoprotic_base": + acidity = "basic" + else: + raise ValueError(f"Cannot infer acidity for particle '{particle_name}' from reaction type: {reaction.reaction_type}") + pka_set[particle_name]["acidity"] = acidity + return pka_set def get_radius_map(self, dimensionless=True): ''' @@ -1804,7 +1922,7 @@ def get_radius_map(self, dimensionless=True): radius = (tpl.sigma.to_quantity(self.units) + tpl.offset.to_quantity(self.units))/2.0 if dimensionless: radius = radius.magnitude - for _, state in tpl.states.items(): + for state in self.db.get_particle_states_templates(particle_name=tpl.name).values(): result[state.es_type] = radius return result @@ -1963,30 +2081,38 @@ def load_interaction_parameters(self, filename, overwrite=False): return - def load_pka_set(self, filename, overwrite=True): + def load_pka_set(self, filename): """ - Loads the pka_set stored in `filename` into `pmb.df`. - + Load a pKa set and attach chemical states and acid–base reactions + to existing particle templates. + Args: - filename(`str`): name of the file with the pka set to be loaded. Expected format is {name:{"acidity": acidity, "pka_value":pka_value}}. - overwrite(`bool`, optional): Switch to enable overwriting of already existing values in pmb.df. Defaults to True. + filename (`str`): Path to a JSON file containing the pKa set. + Expected format: + { + "metadata": {...}, + "data": { + "A": {"acidity": "acidic", "pka_value": 4.5}, + "B": {"acidity": "basic", "pka_value": 9.8} + } + } + + Notes: + - This method is designed for monoprotic acids and bases only. """ - with open(filename, 'r') as f: + with open(filename, "r") as f: pka_data = json.load(f) - pka_set = pka_data["data"] - - self.check_pka_set(pka_set=pka_set) - - for key in pka_set: - acidity = pka_set[key]['acidity'] - pka_value = pka_set[key]['pka_value'] - self.set_particle_acidity(name=key, - acidity=acidity, - pka=pka_value, - overwrite=overwrite) - return - - + pka_set = pka_data["data"] + metadata = pka_data.get("metadata", {}) + self.check_pka_set(pka_set) + for particle_name, entry in pka_set.items(): + acidity = entry["acidity"] + pka = entry["pka_value"] + self.define_monoprototic_acidbase_reaction(particle_name=particle_name, + pka=pka, + acidity=acidity, + metadata=metadata) + def propose_unused_type(self): """ Propose an unused ESPResSo particle type. @@ -2253,73 +2379,42 @@ def search_particles_in_residue(self, residue_name): list_of_particles_in_residue.append(side_chain) return list_of_particles_in_residue - def set_particle_acidity(self, particle_template, acidity=pd.NA, default_charge_number=0, pka=pd.NA): + def set_monoprototic_particle_states(self, particle_name, acidity): """ - Sets the particle acidity including the charges in each of its possible states. + Sets the acidity for a monoprotonic particle template including the charges in each of its possible states. Args: - name(`str`): Unique label that identifies the `particle`. - acidity(`str`): Identifies whether the particle is `acidic` or `basic`, used to setup constant pH simulations. Defaults to None. - default_charge_number (`int`): Charge number of the particle. Defaults to 0. - pka(`float`, optional): If `particle` is an acid or a base, it defines its pka-value. Defaults to pandas.NA. - overwrite(`bool`, optional): Switch to enable overwriting of already existing values in pmb.df. Defaults to False. - - Note: - - For particles with `acidity = acidic` or `acidity = basic`, `state_one` and `state_two` correspond to the protonated and - deprotonated states, respectively. - - For particles without an acidity `acidity = pandas.NA`, only `state_one` is defined. - - Each state has the following properties as sub-indexes: `label`,`charge` and `es_type`. + particle_name(`str`): Unique label that identifies the particle template. + acidity(`str`): Identifies whether the particle is `acidic` or `basic`. """ - acidity_valid_keys = ['inert','acidic', 'basic'] + acidity_valid_keys = ['acidic', 'basic'] if not pd.isna(acidity): if acidity not in acidity_valid_keys: - raise ValueError(f"Acidity {acidity} provided for particle name {particle_template.name} is not supported. Valid keys are: {acidity_valid_keys}") - if acidity in ['acidic', 'basic'] and pd.isna(pka): - raise ValueError(f"pKa not provided for particle with name {particle_template.name} with acidity {acidity}. pKa must be provided for acidic or basic particles.") - if acidity == "inert": - acidity = pd.NA - logging.warning("the keyword 'inert' for acidity has been replaced by setting acidity = pd.NA. For backwards compatibility, acidity has been set to pd.NA. Support for `acidity = 'inert'` may be deprecated in future releases of pyMBE") + raise ValueError(f"Acidity {acidity} provided for particle name {particle_name} is not supported. Valid keys are: {acidity_valid_keys}") + if acidity == "acidic": + states = [{"name": f"{particle_name}H", "z": 0}, + {"name": f"{particle_name}", "z": -1}] + + elif acidity == "basic": + states = [{"name": f"{particle_name}H", "z": 1}, + {"name": f"{particle_name}", "z": 0}] + self.define_particle_states(particle_name=particle_name, + states=states) + def set_particle_initial_state(self, particle_name, state_name): + """ + Sets the default initial state of a particle template defined in the pyMBE database. + + Args: + particle_name(`str`): Unique label that identifies the particle template. + state_name(`str`): Name of the state to be set as default initial state. + """ + part_tpl = self.db.get_template(name=particle_name, - # Define the first state - if pka is pd.NA: - # Inert particle with a single state - z_state_one = default_charge_number - name_state_one = particle_template.name - else: - if acidity == "acidic": - z_state_one = 0 - elif acidity == "basic": - z_state_one = 1 - name_state_one = particle_template.name + "H" - - particle_template.add_state(ParticleState(name=name_state_one, - z=z_state_one, - es_type=self.propose_unused_type())) - self.db._register_template(particle_template) - - # For monoprotic acid/base particles, define the second state - if pka is not pd.NA: - if acidity == "acidic": - z_state_two = -1 - elif acidity == "basic": - z_state_two = 0 - name_state_two = particle_template.name - particle_template.add_state(ParticleState(name=name_state_two, - z=z_state_two, - es_type=self.propose_unused_type())) - - reaction = Reaction(participants=[ReactionParticipant(particle_name=particle_template.name, - state_name=name_state_one, - coefficient=-1), - ReactionParticipant(particle_name=particle_template.name, - state_name=name_state_two, - coefficient=1)], - reaction_type="acid/base", - pK=pka) - self.db._register_reaction(reaction) + pmb_type="particle") + part_tpl.initial_state = state_name + logging.info(f"Default initial state of particle {particle_name} set to {state_name}.") - def set_reduced_units(self, unit_length=None, unit_charge=None, temperature=None, Kw=None): """ Sets the set of reduced units used by pyMBE.units and it prints it. @@ -2806,7 +2901,7 @@ def setup_lj_interactions(self, espresso_system, shift_potential=True, combining # Flatten states with template context state_entries = [] for tpl in particle_templates.values(): - for state in tpl.states.values(): + for state in self.db.get_particle_states_templates(particle_name=tpl.name).values(): state_entries.append((tpl, state)) # Iterate over all unique state pairs @@ -2841,20 +2936,3 @@ def setup_lj_interactions(self, espresso_system, shift_potential=True, combining ureg=self.units), shift=shift) self.db._register_template(lj_template) - - def write_pmb_df (self, filename): - ''' - Writes the pyMBE dataframe into a csv file - - Args: - filename(`str`): Path to the csv file - ''' - - columns_with_list_or_dict = ['residue_list','side_chains', 'parameters_of_the_potential','sequence'] - df = self.df.copy(deep=True) - for column_name in columns_with_list_or_dict: - df[column_name] = df[column_name].apply(lambda x: json.dumps(x) if isinstance(x, (np.ndarray, tuple, list, dict)) or pd.notnull(x) else x) - df['bond_object'] = df['bond_object'].apply(lambda x: f'{x.__class__.__name__}({json.dumps({**x.get_params(), "bond_id": x._bond_id})})' if pd.notnull(x) else x) - df.fillna(pd.NA, inplace=True) - df.to_csv(filename) - return diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py index c342133..96097d5 100644 --- a/pyMBE/storage/io.py +++ b/pyMBE/storage/io.py @@ -15,7 +15,7 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -# + import os import json @@ -26,7 +26,7 @@ from pint import UnitRegistry from pyMBE.storage.pint_quantity import PintQuantity -from pyMBE.storage.templates.particle import ParticleTemplate, ParticleState +from pyMBE.storage.templates.particle import ParticleTemplate, ParticleStateTemplate from pyMBE.storage.templates.residue import ResidueTemplate from pyMBE.storage.templates.molecule import MoleculeTemplate from pyMBE.storage.templates.bond import BondTemplate @@ -163,21 +163,13 @@ def _load_database_csv(db, folder): cutoff = PintQuantity.from_dict(cutoff_d) if cutoff_d is not None else None offset = PintQuantity.from_dict(offset_d) if offset_d is not None else None - states: Dict[str, ParticleState] = {} - if isinstance(states_d, dict): - for sname, sdata in states_d.items(): - # sdata expected to be a dict matching ParticleState fields - states[sname] = ParticleState(**sdata) - tpl = ParticleTemplate( name=row["name"], sigma=sigma, epsilon=epsilon, cutoff=cutoff, offset=offset, - states=states, - initial_state=row["initial_state"] - ) + initial_state=row["initial_state"]) templates[tpl.name] = tpl elif pmb_type == "residue": diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index 752a2c0..a5d10b0 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -327,48 +327,37 @@ def _get_templates_df(self, pmb_type): return pd.DataFrame(rows) for tpl in self._templates[pmb_type].values(): if pmb_type == "particle": - for sname, st in tpl.states.items(): - rows.append({ - "pmb_type": tpl.pmb_type, - "name": tpl.name, - "sigma": tpl.sigma.to_quantity(self._units), - "epsilon": tpl.epsilon.to_quantity(self._units), - "cutoff": tpl.cutoff.to_quantity(self._units), - "offset": tpl.offset.to_quantity(self._units), - "initial_state": tpl.initial_state, - "state": sname, - "z": st.z, - "es_type": st.es_type - }) + rows.append({"pmb_type": tpl.pmb_type, + "name": tpl.name, + "sigma": tpl.sigma.to_quantity(self._units), + "epsilon": tpl.epsilon.to_quantity(self._units), + "cutoff": tpl.cutoff.to_quantity(self._units), + "offset": tpl.offset.to_quantity(self._units), + "initial_state": tpl.initial_state}) elif pmb_type == "lj": shift = tpl.shift if isinstance(shift, dict) and {"magnitude", "units", "dimension"}.issubset(shift.keys()): shift = tpl.shift.to_quantity(self._units) - rows.append({ - "pmb_type": tpl.pmb_type, - "name": tpl.name, - "state1": tpl.state1, - "state2": tpl.state2, - "sigma": tpl.sigma.to_quantity(self._units), - "epsilon": tpl.epsilon.to_quantity(self._units), - "cutoff": tpl.cutoff.to_quantity(self._units), - "offset": tpl.offset.to_quantity(self._units), - "shift": shift - }) + rows.append({"pmb_type": tpl.pmb_type, + "name": tpl.name, + "state1": tpl.state1, + "state2": tpl.state2, + "sigma": tpl.sigma.to_quantity(self._units), + "epsilon": tpl.epsilon.to_quantity(self._units), + "cutoff": tpl.cutoff.to_quantity(self._units), + "offset": tpl.offset.to_quantity(self._units), + "shift": shift}) elif pmb_type == "bond": parameters = {} for key in tpl.parameters.keys(): parameters[key] = tpl.parameters[key].to_quantity(self._units) - rows.append({ - "pmb_type": tpl.pmb_type, - "name": tpl.name, - "bond_type": tpl.bond_type, - "particle_name1": tpl.particle_name1, - "particle_name2": tpl.particle_name2, - "parameters": parameters, - }) - + rows.append({"pmb_type": tpl.pmb_type, + "name": tpl.name, + "bond_type": tpl.bond_type, + "particle_name1": tpl.particle_name1, + "particle_name2": tpl.particle_name2, + "parameters": parameters}) else: # Generic representation for other types rows.append(tpl.model_dump()) @@ -469,22 +458,13 @@ def _register_instance(self, instance): iid = instance.assembly_id else: raise TypeError("Unsupported instance type") - self._instances.setdefault(pmb_type, {}) - if iid in self._instances[pmb_type]: raise ValueError(f"Instance id {iid} already exists in type '{pmb_type}'") - # validate template exists if instance.name not in self._templates.get(pmb_type, {}): raise ValueError(f"Template '{instance.name}' not found for type '{pmb_type}'") - # validate state for particle instances - if pmb_type == "particle": - tpl: ParticleTemplate = self._templates[pmb_type][instance.name] - if instance.initial_state not in tpl.states: - raise ValueError(f"State '{instance.initial_state}' not defined in template '{instance.name}'") - self._instances[pmb_type][iid] = instance def _register_reaction(self, reaction): @@ -535,18 +515,9 @@ def _register_template(self, template): raise TypeError("Unknown template type; set attribute pmb_type or use supported templates") self._templates.setdefault(pmb_type, {}) - if template.name in self._templates[pmb_type]: raise ValueError(f"Template '{template.name}' exists in '{pmb_type}'") - # particle templates must define at least one state - if pmb_type == "particle": - if not hasattr(template, "states") or len(template.states) == 0: - raise ValueError("ParticleTemplate must define at least one state.") - # ensure default_state valid if set - if getattr(template, "default_state", None) is not None and template.default_state not in template.states: - raise ValueError("default_state not in template states") - self._templates[pmb_type][template.name] = template def _update_instance(self, instance_id, pmb_type, attribute, value): @@ -1049,8 +1020,8 @@ def get_es_types_map(self): return {} result = {} for _, tpl in self._templates["particle"].items(): - for state_name, state in tpl.states.items(): - result[state_name] = state.es_type + for state in self.get_particle_states_templates(tpl.name).values(): + result[state.name] = state.es_type return result def get_particle_id_map(self, object_name): @@ -1132,4 +1103,23 @@ def add_to_map(target_map, key, pid): add_to_map(residue_map, p.residue_id, pid) # Deduplicate + sort IDs id_list = sorted(set(id_list)) - return {"all": id_list, "molecule_map": molecule_map, "residue_map": residue_map, "assembly_map": assembly_map,} \ No newline at end of file + return {"all": id_list, "molecule_map": molecule_map, "residue_map": residue_map, "assembly_map": assembly_map,} + + def get_particle_states_templates(self, particle_name): + """ + Retrieve all particle state templates associated with a given particle. + + Args: + particle_name (str): Name of the particle template. + + Returns: + Dict[str, ParticleState]: + Dictionary mapping state names to `ParticleState` templates. + """ + states = self._templates.get("particle_state", {}) + + particle_states = {state.name: state for state in states.values() + if state.particle_name == particle_name} + if not particle_states: + raise ValueError(f"No particle states registered for particle '{particle_name}'.") + return particle_states diff --git a/pyMBE/storage/templates/particle.py b/pyMBE/storage/templates/particle.py index f904f8f..6470cdf 100644 --- a/pyMBE/storage/templates/particle.py +++ b/pyMBE/storage/templates/particle.py @@ -23,7 +23,7 @@ from ..base_type import PMBBaseModel from ..pint_quantity import PintQuantity -class ParticleState(PMBBaseModel): +class ParticleStateTemplate(PMBBaseModel): """ Represents a single state of a particle in pyMBE. @@ -34,11 +34,11 @@ class ParticleState(PMBBaseModel): es_type (float): Identifier for the state used in Espresso simulations. """ pmb_type: Literal["particle_state"] = "particle_state" + particle_name: str name: str # e.g. "HA", "A-", "H+" z: int es_type: int # label in espresso - class ParticleTemplate(PMBBaseModel): """ Template describing a particle type, including interaction parameters and allowed states. @@ -61,30 +61,8 @@ class ParticleTemplate(PMBBaseModel): cutoff: PintQuantity offset: PintQuantity epsilon: PintQuantity - states: Dict[str, ParticleState] = {} initial_state: Optional[str] = None - def add_state(self, state): - """ - Add a new state to the particle template. - - This method registers a new `ParticleState` in the template's `states` dictionary. - If a state with the same name already exists, a `ValueError` is raised. - - Args: - state (ParticleState): The particle state to add. - - Raises: - ValueError: If a state with the same name already exists in the template. - """ - if state.name in self.states: - raise ValueError(f"State {state.name} already exists in template {self.name}") - self.states[state.name] = state - - # Automatically assign initial state if this is the first state - if self.initial_state is None: - self.initial_state = state.name - def get_lj_parameters(self, ureg): """ Retrieve the Lennard-Jones interaction parameters for the particle template. diff --git a/test.py b/test.py index 6d73f75..d79afa3 100644 --- a/test.py +++ b/test.py @@ -1,22 +1,9 @@ import pyMBE from pyMBE.storage.manager import Manager -from pyMBE.storage.templates.particle import ParticleTemplate, ParticleState -from pyMBE.storage.instances.particle import ParticleInstance from pyMBE.storage.reactions.reaction import Reaction, ReactionParticipant from pyMBE.storage.pint_quantity import PintQuantity -from pyMBE.storage.templates.residue import ResidueTemplate -from pyMBE.storage.instances.residue import ResidueInstance -from pyMBE.storage.templates.molecule import MoleculeTemplate -from pyMBE.storage.instances.molecule import MoleculeInstance -from pyMBE.storage.templates.bond import BondTemplate -from pyMBE.storage.instances.bond import BondInstance -from pyMBE.storage.templates.peptide import PeptideTemplate -from pyMBE.storage.instances.peptide import PeptideInstance -from pyMBE.storage.templates.protein import ProteinTemplate -from pyMBE.storage.instances.protein import ProteinInstance -from pyMBE.storage.templates.hydrogel import HydrogelTemplate, HydrogelNode, HydrogelChain -from pyMBE.storage.instances.hydrogel import HydrogelInstance + from pyMBE.storage.templates.lj import LJInteractionTemplate from pyMBE.lib.lattice import DiamondLattice @@ -48,6 +35,10 @@ def main(): pmb = pyMBE.pymbe_library(seed=42) units = pmb.units + + path_to_pka=pmb.root / "parameters" / "pka_sets" / "Nozaki1967.json" + pmb.load_pka_set(filename=path_to_pka) + pmb.define_particle(name="Z", sigma=3.5 * units.reduced_length, cutoff=4 * units.reduced_length, @@ -69,8 +60,10 @@ def main(): offset=0 * units.reduced_length, epsilon=0.2 * units.reduced_energy, z=-1) - + print(pmb.db._get_reactions_df()) print(pmb.db._get_templates_df(pmb_type="particle")) + print(pmb.get_templates_df(pmb_type="particle_state")) + print(pmb.get_radius_map(dimensionless=False)) print("\n=== Setup LJ interactions ===") pmb.setup_lj_interactions(espresso_system=espresso_system) diff --git a/testsuite/lj_tests.py b/testsuite/lj_tests.py index 7767580..c4b5bdd 100644 --- a/testsuite/lj_tests.py +++ b/testsuite/lj_tests.py @@ -64,7 +64,9 @@ def test_particle_definition(self): second=input_parameters[parameter_key].to("reduced_length").magnitude) # Clean template from the database pmb.db.delete_template(name="D", - pmb_type="particle") + pmb_type="particle") + pmb.db.delete_template(name="D", + pmb_type="particle_state") input_parameters={"name":"D", "sigma":1*pmb.units.nm, diff --git a/testsuite/setup_salt_ions_unit_tests.py b/testsuite/setup_salt_ions_unit_tests.py index 786ca5f..a2adf2d 100644 --- a/testsuite/setup_salt_ions_unit_tests.py +++ b/testsuite/setup_salt_ions_unit_tests.py @@ -31,20 +31,20 @@ # Define a set of ions pmb.define_particle(name="Na", z=1, - sigma=sigma, - epsilon=epsilon) + sigma=0.3*pmb.units.nm, + epsilon=1*pmb.units.Quantity(1,"reduced_energy")) pmb.define_particle(name="Ca", z=2, - sigma=sigma, - epsilon=epsilon) + sigma=0.3*pmb.units.nm, + epsilon=1*pmb.units.Quantity(1,"reduced_energy")) pmb.define_particle(name="Cl", z=-1, - sigma=sigma, - epsilon=epsilon) + sigma=0.3*pmb.units.nm, + epsilon=1*pmb.units.Quantity(1,"reduced_energy")) pmb.define_particle(name="SO4", z=-2, - sigma=sigma, - epsilon=epsilon) + sigma=0.3*pmb.units.nm, + epsilon=1*pmb.units.Quantity(1,"reduced_energy")) type_map=pmb.get_type_map() # System parameters From 1e684ea800a87b783895d2ad06c9eed204327276 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Wed, 21 Jan 2026 12:29:45 +0100 Subject: [PATCH 22/55] fix wrong handling of espresso types and acidity test --- pyMBE/pyMBE.py | 25 ++--- pyMBE/storage/manager.py | 8 +- .../define_and_create_molecules_unit_tests.py | 13 ++- testsuite/set_particle_acidity_test.py | 98 +++++++++---------- 4 files changed, 69 insertions(+), 75 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 62c3a8f..d559cd0 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -663,10 +663,14 @@ def create_counterions(self, object_name, cation_name, anion_name, espresso_syst """ cation_tpl = self.db.get_template(pmb_type="particle", name=cation_name) - cation_charge = cation_tpl.states[cation_tpl.initial_state].z + cation_state = self.db.get_template(pmb_type="particle_state", + name=cation_tpl.initial_state) + cation_charge = cation_state.z anion_tpl = self.db.get_template(pmb_type="particle", name=anion_name) - anion_charge = anion_tpl.states[anion_tpl.initial_state].z + anion_state = self.db.get_template(pmb_type="particle_state", + name=anion_tpl.initial_state) + anion_charge = anion_state.z object_ids = self.get_particle_id_map(object_name=object_name)["all"] counterion_number={} object_charge={} @@ -781,14 +785,13 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi raise ValueError(f"Number of positions provided in {list_of_first_residue_positions} does not match number of molecules desired, {number_of_molecules}") # Sanity tests, this function should work for both molecules and peptides registered_pmb_types_with_name = self.db._find_template_types(name=name) - if len(registered_pmb_types_with_name) > 1: - raise KeyError(f"Detected multiple templates with the same name '{name}' in the pyMBE database, pmb_types: {registered_pmb_types_with_name}. Molecule creation aborted to avoid ambiguity.") - elif len(registered_pmb_types_with_name) == 0: - logging.warning(f"No template with name '{name}' defined in the pyMBE database, nothing will be created.") - return - pmb_type = registered_pmb_types_with_name[0] - if pmb_type not in supported_pmb_types: - raise KeyError(f"Unsupported template type {pmb_type} for template {name}. Supported template types are {supported_pmb_types}") + allowed_types = {"molecule", "peptide"} + filtered_types = allowed_types.intersection(registered_pmb_types_with_name) + if len(filtered_types) > 1: + raise ValueError(f"Ambiguous template name '{name}': found both 'molecule' and 'peptide' templates in the pyMBE database. Molecule creation aborted.") + if len(filtered_types) == 0: + raise ValueError(f"No 'molecule' or 'peptide' template found with name '{name}'. Found templates of types: {filtered_types}.") + pmb_type = next(iter(filtered_types)) # Generate an arbitrary random unit vector if backbone_vector is None: backbone_vector = self.generate_random_points_in_a_sphere(center=[0,0,0], @@ -1730,7 +1733,7 @@ def get_charge_number_map(self): charge_number_map = {} particle_templates = self.db.get_templates("particle") for tpl in particle_templates.values(): - for state in tpl.states.values(): + for state in self.db.get_particle_states_templates(particle_name=tpl.name).values(): if state.es_type is None: continue charge_number_map[state.es_type] = state.z diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index a5d10b0..57a7917 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -1005,7 +1005,6 @@ def get_templates(self, pmb_type): """ return self._templates.get(pmb_type, {}).copy() - def get_es_types_map(self): """ Iterates over all particle templates and extracts the ESPResSo type (`es_type`) @@ -1016,12 +1015,11 @@ def get_es_types_map(self): A dictionary mapping each particle state to its corresponding ESPResSo type. """ - if "particle" not in self._templates: + if "particle_state" not in self._templates: return {} result = {} - for _, tpl in self._templates["particle"].items(): - for state in self.get_particle_states_templates(tpl.name).values(): - result[state.name] = state.es_type + for _, tpl in self._templates["particle_state"].items(): + result[tpl.name] = tpl.es_type return result def get_particle_id_map(self, object_name): diff --git a/testsuite/define_and_create_molecules_unit_tests.py b/testsuite/define_and_create_molecules_unit_tests.py index e80209c..fc0cd1a 100644 --- a/testsuite/define_and_create_molecules_unit_tests.py +++ b/testsuite/define_and_create_molecules_unit_tests.py @@ -421,14 +421,13 @@ def test_create_and_delete_particles(self): # If no particles have been created, only two particles should be in the system (from the previous test) self.assertEqual(first=len(espresso_system.part.all()), second=starting_number_of_particles) + # Check that providing the wrong molecule name raises a ValueError + self.assertRaises(ValueError, pmb.create_molecule, + name="M3", + number_of_molecules=1, + espresso_system=espresso_system, + use_default_bond=True) - starting_number_of_particles=len(espresso_system.part.all()) - pmb.create_molecule(name="M23", - number_of_molecules=1, - espresso_system=espresso_system, - use_default_bond=True) - self.assertEqual(first=len(espresso_system.part.all()), - second=starting_number_of_particles) # Tests for delete_molecule # create another molecule just to have two molecules in the system pmb.create_molecule(name="M2", diff --git a/testsuite/set_particle_acidity_test.py b/testsuite/set_particle_acidity_test.py index 2ea424b..28b8538 100644 --- a/testsuite/set_particle_acidity_test.py +++ b/testsuite/set_particle_acidity_test.py @@ -32,43 +32,42 @@ def test_inert_particles_setup(self): Test that an inert particle is correctly set up in the pyMBE database. """ input_parameters={"name":"I", - "acidity": pd.NA, - "pka": pd.NA, - "z":2, - "sigma": 1.0*pmb.units.reduced_length, - "epsilon": 1.0*pmb.units.reduced_energy} + "acidity": pd.NA, + "pka": pd.NA, + "z":2, + "sigma": 1.0*pmb.units.reduced_length, + "epsilon": 1.0*pmb.units.reduced_energy} pmb.define_particle(**input_parameters) - part_tpl = pmb.db.get_template(name="I", - pmb_type="particle") - self.assertTrue(hasattr(part_tpl, "states")) - self.assertEqual(len(part_tpl.states), 1) - state_one = part_tpl.states["I"] - self.assertEqual(state_one.name, "I") - self.assertEqual(state_one.z, 2) + state_tpl = pmb.db.get_template(name="I", + pmb_type="particle_state") + + self.assertEqual(state_tpl.name, "I") + self.assertEqual(state_tpl.z, 2) pmb.db.delete_template(name="I", pmb_type="particle") + pmb.db.delete_template(name="I", pmb_type="particle_state") def test_acidic_particles_setup(self): """ Test that an acidic particle is correctly set up in the pyMBE database. """ input_parameters={"name":"A", - "acidity": "acidic", - "pka":4, - "sigma": 1.0*pmb.units.reduced_length, - "epsilon": 1.0*pmb.units.reduced_energy} + "acidity": "acidic", + "pka":4, + "sigma": 1.0*pmb.units.reduced_length, + "epsilon": 1.0*pmb.units.reduced_energy} pmb.define_particle(**input_parameters) - part_tpl = pmb.db.get_template(name="A", - pmb_type="particle") - self.assertTrue(hasattr(part_tpl, "states")) - self.assertEqual(len(part_tpl.states), 2) - state_one = part_tpl.states["AH"] - self.assertEqual(state_one.name, "AH") - self.assertEqual(state_one.z, 0) - state_two = part_tpl.states["A"] - self.assertEqual(state_two.name, "A") - self.assertEqual(state_two.z, -1) - self.assertNotEqual(state_one.es_type, state_two.es_type) + protonated_state = pmb.db.get_template(name="AH", + pmb_type="particle_state") + deprotonated_state = pmb.db.get_template(name="A", + pmb_type="particle_state") + self.assertEqual(protonated_state.name, "AH") + self.assertEqual(protonated_state.z, 0) + self.assertEqual(deprotonated_state.name, "A") + self.assertEqual(deprotonated_state.z, -1) + self.assertNotEqual(protonated_state.es_type, deprotonated_state.es_type) pmb.db.delete_template(name="A", pmb_type="particle") + pmb.db.delete_template(name="AH", pmb_type="particle_state") + pmb.db.delete_template(name="A", pmb_type="particle_state") def test_basic_particles_setup(self): """ @@ -80,35 +79,30 @@ def test_basic_particles_setup(self): "sigma": 1.0*pmb.units.reduced_length, "epsilon": 1.0*pmb.units.reduced_energy} pmb.define_particle(**input_parameters) - part_tpl = pmb.db.get_template(name="B", - pmb_type="particle") - self.assertTrue(hasattr(part_tpl, "states")) - self.assertEqual(len(part_tpl.states), 2) - state_one = part_tpl.states["BH"] - self.assertEqual(state_one.name, "BH") - self.assertEqual(state_one.z, 1) - state_two = part_tpl.states["B"] - self.assertEqual(state_two.name, "B") - self.assertEqual(state_two.z, 0) - self.assertNotEqual(state_one.es_type, state_two.es_type) + + protonated_state = pmb.db.get_template(name="BH", + pmb_type="particle_state") + deprotonated_state = pmb.db.get_template(name="B", + pmb_type="particle_state") + + self.assertEqual(protonated_state.name, "BH") + self.assertEqual(protonated_state.z, 1) + self.assertEqual(deprotonated_state.name, "B") + self.assertEqual(deprotonated_state.z, 0) + self.assertNotEqual(protonated_state.es_type, deprotonated_state.es_type) pmb.db.delete_template(name="B", pmb_type="particle") + pmb.db.delete_template(name="BH", pmb_type="particle_state") + pmb.db.delete_template(name="B", pmb_type="particle_state") - def sanity_tests(self): + def test_sanity_acidity(self): """ - Unit tests to check that set_particle_acidity raises ValueErrors when expected. + Unit tests to check that define_monoprototic_acidbase_reaction raises ValueErrors when expected. """ - # Check that set_particle_acidity raises a ValueError if pKa is not provided and pKa is acidic or basic - input_parametersA={"name":"A", - "acidity": "acidic" } - - input_parametersB= {"name": "B", - "acidity": "basic"} - self.assertRaises(ValueError, pmb.set_particle_acidity,**input_parametersA) - self.assertRaises(ValueError, pmb.set_particle_acidity, **input_parametersB) - # Check that set_particle_acidity raises a ValueError if a non-supported acidity is provided - input_parametersA={"name":"A", - "acidity": "random" } - self.assertRaises(ValueError, pmb.set_particle_acidity,**input_parametersA) + # Check that define_monoprototic_acidbase_reaction raises a ValueError if a non-supported acidity is provided + input_parametersA={"particle_name":"A", + "acidity": "random", + "pka":4,} + self.assertRaises(ValueError, pmb.define_monoprototic_acidbase_reaction,**input_parametersA) if __name__ == "__main__": ut.main() From 2796085ea97ffa583b35b72362cb79baa102ff92 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Wed, 21 Jan 2026 15:08:50 +0100 Subject: [PATCH 23/55] fix molecule position test --- pyMBE/pyMBE.py | 17 +- pyMBE/storage/manager.py | 84 +------ testsuite/create_molecule_position_test.py | 226 +++++++++--------- .../define_and_create_molecules_unit_tests.py | 7 - 4 files changed, 134 insertions(+), 200 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index d559cd0..7deef28 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -501,7 +501,7 @@ def create_charge_map(espresso_system,id_map,label): mean_charge=np.mean(np.array([value.magnitude for value in net_charge_molecules.values()]))*self.units.Quantity(1,'reduced_charge') return {"mean": mean_charge, "molecules": net_charge_molecules, "residues": net_charge_residues} - def center_molecule_in_simulation_box(self, molecule_id, espresso_system): + def center_molecule_in_simulation_box(self, molecule_id, espresso_system, pmb_type="molecule"): """ Centers the pmb object matching `molecule_id` in the center of the simulation box in `espresso_md`. @@ -509,18 +509,19 @@ def center_molecule_in_simulation_box(self, molecule_id, espresso_system): molecule_id(`int`): Id of the molecule to be centered. espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library. """ - if len(self.df.loc[self.df['molecule_id']==molecule_id].pmb_type) == 0: - raise ValueError("The provided molecule_id is not present in the pyMBE dataframe.") - center_of_mass = self.calculate_center_of_mass_of_molecule(molecule_id=molecule_id,espresso_system=espresso_system) + if pmb_type not in self.db._molecule_like_types: + raise ValueError(f"Input pmb_type = {pmb_type} not supported, supported pyMBE types are: {self.db._molecule_like_types}.") + mol_inst = self.db.get_instance(instance_id=molecule_id, + pmb_type=pmb_type) + center_of_mass = self.calculate_center_of_mass_of_molecule(molecule_id=molecule_id, + espresso_system=espresso_system) box_center = [espresso_system.box_l[0]/2.0, espresso_system.box_l[1]/2.0, espresso_system.box_l[2]/2.0] - molecule_name = self.df.loc[(self.df['molecule_id']==molecule_id) & (self.df['pmb_type'].isin(["molecule","protein"]))].name.values[0] - particle_id_list = self.get_particle_id_map(object_name=molecule_name)["all"] + particle_id_list = self.get_particle_id_map(object_name=mol_inst.name)["all"] for pid in particle_id_list: es_pos = espresso_system.part.by_id(pid).pos espresso_system.part.by_id(pid).pos = es_pos - center_of_mass + box_center - return def check_dimensionality(self, variable, expected_dimensionality): """ @@ -885,7 +886,7 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi first_residue = True pos_index+=1 molecule_ids.append(molecule_id) - return molecule_id + return molecule_ids def create_particle(self, name, espresso_system, number_of_particles, position=None, fix=False): """ diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index 57a7917..4e8f42f 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -166,10 +166,6 @@ def _find_instance_ids_by_name(self, pmb_type, name): A list of instance IDs whose underlying template name matches ``name``. The list is empty if no such instances exist. - Raises: - KeyError: - If ``pmb_type`` is not a recognized instance category. - Examples: >>> db._find_instance_ids_by_name("particle", "A") [0, 3, 7][] @@ -379,19 +375,15 @@ def _has_instance(self, pmb_type, instance_id): ``True`` if the instance exists in the given category, ``False`` otherwise. - Raises: - KeyError: - If ``pmb_type`` is not a known instance category in the database. - Examples: >>> db._has_instance("particle", 3) True >>> db._has_instance("nonexistent_type", 5) - KeyError + ValueError """ if pmb_type not in self._instances: - raise KeyError(f"Instance type '{pmb_type}' not found in the database.") + raise ValueError(f"Instance type '{pmb_type}' not found in the database.") return instance_id in self._instances[pmb_type] @@ -410,17 +402,9 @@ def _has_template(self, pmb_type, name): bool: ``True`` if a template named ``name`` exists under ``pmb_type``; ``False`` otherwise. - - Raises: - KeyError: - If ``pmb_type`` is not a recognized template category in the database. - - Examples: - >>> db.has_template("particle", "A") - True """ if pmb_type not in self._templates: - raise KeyError(f"Template type '{pmb_type}' not found in the database.") + raise ValueError(f"Template type '{pmb_type}' not found in the database.") template_in_db = name in self._templates.get(pmb_type, {}) return template_in_db @@ -430,9 +414,6 @@ def _register_instance(self, instance): Args: instance: Any instance conforming to the pyMBE instance models. - - Raises: - ValueError: If the id or instance model does not exist or is duplicated. """ # infer pmb_type from instance class if isinstance(instance, ParticleInstance): @@ -473,9 +454,6 @@ def _register_reaction(self, reaction): Args: reaction (Reaction): Reaction object. - - Raises: - ValueError: If reaction name already exists. """ if reaction.name in self._reactions: raise ValueError(f"Reaction '{reaction.name}' already exists.") @@ -489,8 +467,6 @@ def _register_template(self, template): Args: template: Any template object conforming to the pyMBE template models. - Raises: - ValueError: If a template with the same name already exists. """ pmb_type = getattr(template, "pmb_type", None) if pmb_type is None: @@ -536,14 +512,6 @@ def _update_instance(self, instance_id, pmb_type, attribute, value): value (Any): New value to assign to the specified attribute. - Raises: - KeyError: - If the provided ``instance_id`` does not exist for the given - ``pmb_type``. - ValueError: - If attempting to modify an attribute that is not permitted - for the instance's PMB type. - Notes: - Allowed updates: * ``particle``: ``initial_state``, ``residue_id``, ``molecule_id``, ``assembly_id`` @@ -556,7 +524,7 @@ def _update_instance(self, instance_id, pmb_type, attribute, value): """ if instance_id not in self._instances[pmb_type]: - raise KeyError(f"Instance '{instance_id}' not found for type '{pmb_type}' in the pyMBE database.") + raise ValueError(f"Instance '{instance_id}' not found for type '{pmb_type}' in the pyMBE database.") if pmb_type == "particle": allowed = ["initial_state", "residue_id", "molecule_id", "assembly_id"] @@ -597,11 +565,6 @@ def _propagate_id(self, root_type, root_id, attribute, value): list[int]: A flat list of all instance IDs updated (including root). - Raises: - KeyError: - If the root instance does not exist. - ValueError: - If an unsupported type or attribute is given. """ updated = [] # Map each type to its own identity attribute @@ -676,7 +639,7 @@ def _update_reaction_participant(self, reaction_name, particle_name, state_name, """ if reaction_name not in self._reactions: - raise KeyError(f"Reaction '{reaction_name}' not found in the pyMBE database.") + raise ValueError(f"Reaction '{reaction_name}' not found in the pyMBE database.") rxn = self._reactions[reaction_name].add_participant(particle_name=particle_name, state_name=state_name, @@ -725,18 +688,12 @@ def delete_template(self, pmb_type, name): The template category. name (str): The name of the template to delete. - - Raises: - KeyError: - If the template type or name does not exist. - ValueError: - If one or more instances reference the template. """ # Check template exists if pmb_type not in self._templates: - raise KeyError(f"Template type '{pmb_type}' not found.") + raise ValueError(f"Template type '{pmb_type}' not found.") if name not in self._templates[pmb_type]: - raise KeyError(f"Template '{name}' not found in type '{pmb_type}'.") + raise ValueError(f"Template '{name}' not found in type '{pmb_type}'.") # Check if any instance depends on this template if pmb_type in self._instances: @@ -792,18 +749,13 @@ def delete_instance(self, pmb_type, instance_id, cascade=False): cascade (bool): If True, automatically delete dependent objects. - Raises: - KeyError: - If the instance does not exist. - ValueError: - If cascade is False but dependencies exist. """ # ---- Basic checks ---- if pmb_type not in self._instances: - raise KeyError(f"Instance type '{pmb_type}' not found.") + raise ValueError(f"Instance type '{pmb_type}' not found.") if instance_id not in self._instances[pmb_type]: - raise KeyError(f"Instance ID '{instance_id}' not found in '{pmb_type}'.") + raise ValueError(f"Instance ID '{instance_id}' not found in '{pmb_type}'.") inst = self._instances[pmb_type][instance_id] # =============== CASCADE DELETION ========================= if cascade: @@ -902,10 +854,6 @@ def delete_instances(self, pmb_type, cascade=False): pyMBE hierarchy rules. If False, deletion is forbidden when dependencies exist. - Raises: - ValueError: - If ``cascade=False`` and at least one instance has dependencies. - Notes: - Deletion order is deterministic and safe. - If no instances exist for the given type, the method is a no-op. @@ -929,7 +877,7 @@ def get_instance(self, pmb_type, instance_id): Looks up an instance within the internal instance registry (`self._instances`) using its pyMBE type (e.g., "particle", "residue", "bond", ...) and its unique id. If the instance does not exist, - a `KeyError` is raised. + a `ValueError` is raised. Args: pmb_type (str): The instance pyMBE category. @@ -939,12 +887,9 @@ def get_instance(self, pmb_type, instance_id): InstanceType: The stored InstanceTemplate instance corresponding to the provided type and name. - Raises: - KeyError: If no template with the given type and name exists in - the internal registry. """ if instance_id not in self._instances[pmb_type]: - raise KeyError(f"InstanceTemplate with id = '{instance_id}' not found in type '{pmb_type}'.") + raise ValueError(f"InstanceTemplate with id = '{instance_id}' not found in type '{pmb_type}'.") else: return self._instances[pmb_type][instance_id] @@ -971,7 +916,7 @@ def get_template(self, pmb_type, name): Looks up a template within the internal template registry (`self._templates`) using its pyMBE type (e.g., "particle", "residue", "bond", ...) and its unique name. If the template does not exist, - a `KeyError` is raised. + a `ValueError` is raised. Args: pmb_type (str): The template pyMBE category. @@ -981,9 +926,6 @@ def get_template(self, pmb_type, name): TemplateType: The stored template instance corresponding to the provided type and name. - Raises: - ValueError: If no template with the given type and name exists in - the internal registry. """ if name not in self._templates[pmb_type]: raise ValueError(f"Template '{name}' not found in type '{pmb_type}'.") @@ -1048,7 +990,7 @@ def get_particle_id_map(self, object_name): object_ids.append(inst_id) if object_type is None: - raise KeyError(f"No object named '{object_name}' found in database.") + raise ValueError(f"No object named '{object_name}' found in database.") # Maps to return id_list = [] residue_map = {} diff --git a/testsuite/create_molecule_position_test.py b/testsuite/create_molecule_position_test.py index ca3fb9f..5004bea 100644 --- a/testsuite/create_molecule_position_test.py +++ b/testsuite/create_molecule_position_test.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -16,34 +16,23 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import numpy as np import espressomd -# Create an instance of pyMBE library import pyMBE -pmb = pyMBE.pymbe_library(seed=42) +import unittest as ut -print("*** Create_molecule with input position list unit test ***") -print("*** Unit test: Check that the positions of the central bead of the first residue in the generated molecules are equal to the input positions ***") -# Simulation parameters -pmb.set_reduced_units(unit_length=0.4*pmb.units.nm) -solvent_permitivity = 78.3 -N_molecules = 3 -chain_length = 5 -molecule_concentration = 5.56e-4 *pmb.units.mol/pmb.units.L +pmb = pyMBE.pymbe_library(seed=42) -pos_list = [[10,10,10], [20,20,20], [30,30,30]] pmb.define_particle(name='central_mon', sigma=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) + pmb.define_particle(name='side_mon', sigma=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) -pmb.define_residue( - name = 'res1', - central_bead = 'central_mon', - side_chains = ['side_mon', 'side_mon'] - ) +pmb.define_residue(name = 'res1', + central_bead = 'central_mon', + side_chains = ['side_mon', 'side_mon']) bond_type = 'harmonic' generic_bond_length=0.4 * pmb.units.nm @@ -55,103 +44,112 @@ pmb.define_default_bond(bond_type = bond_type, bond_parameters = harmonic_bond) -# Defines the peptide in the pyMBE data frame molecule_name = 'generic_molecule' -pmb.define_molecule(name=molecule_name, residue_list = ['res1']*chain_length) +pmb.define_molecule(name=molecule_name, residue_list = ['res1']*5) -# Solution parameters -cation_name = 'Na' -anion_name = 'Cl' -c_salt=5e-3 * pmb.units.mol/ pmb.units.L - -pmb.define_particle(name=cation_name, z=1, sigma=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) -pmb.define_particle(name=anion_name, z=-1, sigma=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) +# Create an instance of an espresso system +L = 52 +espresso_system=espressomd.System(box_l = [L]*3) +pos_list = [[10,10,10], [20,20,20], [30,30,30]] -# System parameters -volume = N_molecules/(pmb.N_A*molecule_concentration) -L = volume ** (1./3.) # Side of the simulation box -calculated_peptide_concentration = N_molecules/(volume*pmb.N_A) +class Test(ut.TestCase): + def test_create_molecule_at_position(self): + """ + Check that the positions of the central bead of the first residue in the generated molecules are equal to the input positions + """ + molecule_ids = pmb.create_molecule(name=molecule_name, + number_of_molecules= 3, + espresso_system=espresso_system, + use_default_bond=True, + list_of_first_residue_positions = pos_list) + particle_id_map = pmb.get_particle_id_map(object_name=molecule_name) + central_bead_pos = [] + for molecule_id in molecule_ids: + pids = particle_id_map["molecule_map"][molecule_id] + central_bead_id = min(pids) + central_bead_pos.append(espresso_system.part.by_id(central_bead_id).pos.tolist()) + self.assertListEqual(pos_list, + central_bead_pos) + for molid in molecule_ids: + pmb.delete_instances_in_system(instance_id=molid, + pmb_type="molecule", + espresso_system=espresso_system) + + def test_sanity_create_molecule(self): + """ + Sanity tests for input positions in create_molecule + """ + + # Check that create_molecule raises a ValueError if the user does not provide a nested list for list_of_first_residue_positions + input_parameters={"name": "generic_molecule", + "number_of_molecules": 1, + "espresso_system": espresso_system, + "list_of_first_residue_positions": [1,2,3]} + self.assertRaises(ValueError, + pmb.create_molecule, + **input_parameters) + # Check that create_molecule raises a ValueError if the user does not provide a nested list with three coordinates + input_parameters={"name": "generic_molecule", + "number_of_molecules": 1, + "espresso_system": espresso_system, + "list_of_first_residue_positions": [[1,2]]} + self.assertRaises(ValueError, + pmb.create_molecule, + **input_parameters) + # Check that create_molecule raises a ValueError if the user does not provide a the same number of first_residue_positions as number_of_molecules + input_parameters={"name": "generic_molecule", + "number_of_molecules": 2, + "espresso_system": espresso_system, + "list_of_first_residue_positions": [[1,2,3]]} + self.assertRaises(ValueError, + pmb.create_molecule, + **input_parameters) + + def test_center_molecule_in_simulation_box(self): + """ + Unit tests for center_molecule_in_simulation_box + """ + molecule_ids = pmb.create_molecule(name=molecule_name, + number_of_molecules= 3, + espresso_system=espresso_system, + use_default_bond=True, + list_of_first_residue_positions = pos_list) + + # Check that center_molecule_in_simulation_box works correctly for cubic boxes + pmb.center_molecule_in_simulation_box(molecule_id=molecule_ids[0], + espresso_system=espresso_system) + center_of_mass = pmb.calculate_center_of_mass_of_molecule(molecule_id=molecule_ids[0], + espresso_system=espresso_system) + center_of_mass_ref = [L/2]*3 + for ind in range(len(center_of_mass)): + self.assertAlmostEqual(center_of_mass[ind], + center_of_mass_ref[ind]) + #Check that center_molecule_in_simulation_box works correctly for non-cubic boxes + espresso_system.change_volume_and_rescale_particles(d_new=3*L, dir="z") + + pmb.center_molecule_in_simulation_box(molecule_id=molecule_ids[2], + espresso_system=espresso_system) + center_of_mass = pmb.calculate_center_of_mass_of_molecule(molecule_id=molecule_ids[2], + espresso_system=espresso_system) + center_of_mass_ref = [L/2, L/2, 1.5*L] + for ind in range(len(center_of_mass)): + self.assertAlmostEqual(center_of_mass[ind], + center_of_mass_ref[ind]) + + def test_sanity_center_molecule_in_simulation_box(self): + """ + Sanity tests for center_molecule_in_simulation_box + """ + # Check that center_molecule_in_simulation_box raises a Value Error if a wrong molecule_id is provided + + input_parameters = {"molecule_id": 20 , + "espresso_system":espresso_system} + + self.assertRaises(ValueError, + pmb.center_molecule_in_simulation_box, + **input_parameters) + + +if __name__ == "__main__": + ut.main() -# Create an instance of an espresso system -espresso_system=espressomd.System(box_l = [L.to('reduced_length').magnitude]*3) - -# Add all bonds to espresso system -pmb.add_bonds_to_espresso(espresso_system=espresso_system) - -pmb.write_pmb_df("df1.csv") -# Create your molecules into the espresso system -molecules = pmb.create_molecule(name=molecule_name, - number_of_molecules= N_molecules, - espresso_system=espresso_system, - use_default_bond=True, - list_of_first_residue_positions = pos_list) -pmb.write_pmb_df("df2.csv") -# Running unit test here. Use np.testing.assert_almost_equal of the input position list and the central_bead_pos list under here. -central_bead_pos = [] -for molecule_id in molecules: - info = next(iter(molecules[molecule_id].values())) - central_bead_id = info['central_bead_id'] - side_chain_ids = info['side_chain_ids'] - central_bead_pos.append(espresso_system.part.by_id(central_bead_id).pos.tolist()) - -np.testing.assert_almost_equal(pos_list, central_bead_pos) - -print("*** Unit test passed ***\n") - -print("*** Unit test: Check that create_molecule raises a ValueError if the user does not provide a nested list for list_of_first_residue_positions***") -input_parameters={"name": "generic_molecule", - "number_of_molecules": 1, - "espresso_system": espresso_system, - "list_of_first_residue_positions": [1,2,3]} -np.testing.assert_raises(ValueError, pmb.create_molecule, **input_parameters) -print("*** Unit test passed ***\n") - -print("*** Unit test: Check that create_molecule raises a ValueError if the user does not provide a nested list with three coordinates***") -input_parameters={"name": "generic_molecule", - "number_of_molecules": 1, - "espresso_system": espresso_system, - "list_of_first_residue_positions": [[1,2]]} -np.testing.assert_raises(ValueError, pmb.create_molecule, **input_parameters) -print("*** Unit test passed ***\n") - -print("*** Unit test: Check that create_molecule raises a ValueError if the user does not provide a the same number of first_residue_positions as number_of_molecules***") -input_parameters={"name": "generic_molecule", - "number_of_molecules": 2, - "espresso_system": espresso_system, - "list_of_first_residue_positions": [[1,2,3]]} -np.testing.assert_raises(ValueError, pmb.create_molecule, **input_parameters) -print("*** Unit test passed ***\n") - -print("*** Unit test: Check that center_molecule_in_simulation_box works correctly for cubic boxes***") - -molecule_id = pmb.df.loc[pmb.df['name']==molecule_name].molecule_id.values[0] -pmb.center_molecule_in_simulation_box(molecule_id=molecule_id, - espresso_system=espresso_system) -center_of_mass = pmb.calculate_center_of_mass_of_molecule(molecule_id=molecule_id, - espresso_system=espresso_system) -center_of_mass_ref = [L.to('reduced_length').magnitude/2]*3 - -np.testing.assert_almost_equal(center_of_mass, center_of_mass_ref) - -print("*** Unit test passed ***\n") - -print("*** Unit test: Check that center_molecule_in_simulation_box works correctly for non-cubic boxes***") - -espresso_system.change_volume_and_rescale_particles(d_new=3*L.to('reduced_length').magnitude, dir="z") -molecule_id = pmb.df.loc[pmb.df['name']==molecule_name].molecule_id.values[2] -pmb.center_molecule_in_simulation_box(molecule_id=molecule_id, espresso_system=espresso_system) -center_of_mass = pmb.calculate_center_of_mass_of_molecule(molecule_id=molecule_id, espresso_system=espresso_system) -center_of_mass_ref = [L.to('reduced_length').magnitude/2, L.to('reduced_length').magnitude/2, 1.5*L.to('reduced_length').magnitude] - -np.testing.assert_almost_equal(center_of_mass, center_of_mass_ref) - -print("*** Unit test passed ***") - -print("*** Unit test: Check that center_molecule_in_simulation_box raises a Value Error if a wrong molecule_id is provided***") - -input_parameters = {"molecule_id": 20 , - "espresso_system":espresso_system} - -np.testing.assert_raises(ValueError, pmb.center_molecule_in_simulation_box, **input_parameters) - -print("*** Unit test passed ***") diff --git a/testsuite/define_and_create_molecules_unit_tests.py b/testsuite/define_and_create_molecules_unit_tests.py index fc0cd1a..6c3ad2c 100644 --- a/testsuite/define_and_create_molecules_unit_tests.py +++ b/testsuite/define_and_create_molecules_unit_tests.py @@ -21,15 +21,8 @@ import numpy as np import pandas as pd import espressomd -import logging -import io import unittest as ut -# Create an in-memory log stream -log_stream = io.StringIO() -logging.basicConfig(level=logging.INFO, - format="%(levelname)s: %(message)s", - handlers=[logging.StreamHandler(log_stream)]) # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(seed=42) From f8c9bd10b72cc51fd31587df4f056590ab07c85e Mon Sep 17 00:00:00 2001 From: pmblanco Date: Wed, 21 Jan 2026 15:11:19 +0100 Subject: [PATCH 24/55] fix test --- testsuite/setup_salt_ions_unit_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/setup_salt_ions_unit_tests.py b/testsuite/setup_salt_ions_unit_tests.py index a2adf2d..9a32f51 100644 --- a/testsuite/setup_salt_ions_unit_tests.py +++ b/testsuite/setup_salt_ions_unit_tests.py @@ -322,7 +322,7 @@ def test_sanity_create_counterions(self): "cation_name":"Na", "anion_name":"Cl", "espresso_system":espresso_system} - self.assertRaises(KeyError, + self.assertRaises(ValueError, pmb.create_counterions, **inputs) inputs = {"object_name":'isoelectric_polyampholyte', From fd8c6fb5ef18c7efb3f68a2e7da7540106b6eaa3 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Wed, 21 Jan 2026 17:36:54 +0100 Subject: [PATCH 25/55] fix seed test and load_interaction_parameters --- pyMBE/lib/handy_functions.py | 71 +++- .../peptides/Blanco2021/metadata.json | 4 + .../peptides/Blanco2021/templates_bond.csv | 19 + .../Blanco2021/templates_particle.csv | 13 + .../Blanco2021/templates_particle_state.csv | 7 + .../peptides/Lunkad2021/metadata.json | 4 + .../peptides/Lunkad2021/templates_bond.csv | 7 + .../Lunkad2021/templates_particle.csv | 7 + .../Lunkad2021/templates_particle_state.csv | 2 + pyMBE/pyMBE.py | 79 ---- pyMBE/storage/io.py | 378 +++++++----------- testsuite/lj_tests.py | 8 - testsuite/seed_test.py | 64 ++- 13 files changed, 299 insertions(+), 364 deletions(-) create mode 100644 pyMBE/parameters/peptides/Blanco2021/metadata.json create mode 100644 pyMBE/parameters/peptides/Blanco2021/templates_bond.csv create mode 100644 pyMBE/parameters/peptides/Blanco2021/templates_particle.csv create mode 100644 pyMBE/parameters/peptides/Blanco2021/templates_particle_state.csv create mode 100644 pyMBE/parameters/peptides/Lunkad2021/metadata.json create mode 100644 pyMBE/parameters/peptides/Lunkad2021/templates_bond.csv create mode 100644 pyMBE/parameters/peptides/Lunkad2021/templates_particle.csv create mode 100644 pyMBE/parameters/peptides/Lunkad2021/templates_particle_state.csv diff --git a/pyMBE/lib/handy_functions.py b/pyMBE/lib/handy_functions.py index 27efc10..6c41931 100644 --- a/pyMBE/lib/handy_functions.py +++ b/pyMBE/lib/handy_functions.py @@ -154,12 +154,23 @@ def define_protein_AA_particles(topology_dict, pmb, lj_setup_mode="wca"): def define_protein_AA_residues(topology_dict, model, pmb): """ - Define residue templates in the pyMBE database for a peptide or protein sequence. + Define residue templates in the pyMBE database for a protein topology dict. Args: - sequence (list of str): - Ordered amino-acid sequence of the peptide or protein. Each element must - be a residue identifier compatible with the selected model. + topology_dict (dict): + Dictionary defining the internal structure of the protein. + Expected format: + { + "ResidueName1": { + "initial_pos": np.ndarray, + "chain_id": int, + "radius": float + }, + "ResidueName2": { ... }, + ... + } + The `"initial_pos"` entry is required and represents the residue’s + reference coordinates before shifting to the protein's center-of-mass. model (str): Coarse-grained representation to use. Supported options: @@ -178,12 +189,6 @@ def define_protein_AA_residues(topology_dict, model, pmb): * All other residues use `"CA"` (central bead) plus one side-chain bead named after the amino acid. - Residue names are constructed as `"AA-"`, e.g., `"AA-A"`, `"AA-L"`. - - Returns: - None - The function operates by side effect, populating the pyMBE residue - template database. - """ residue_list = [] @@ -208,6 +213,52 @@ def define_protein_AA_residues(topology_dict, model, pmb): side_chains = side_chains) residue_list.append(residue_name) +def define_peptide_AA_residues(sequence,model, pmb): + """ + Define residue templates in the pyMBE database for a protein topology dict. + + Args: + sequence (list of str): + Ordered amino-acid sequence of the peptide or protein. Each element must + be a residue identifier compatible with the selected model. + + model (str): + Coarse-grained representation to use. Supported options: + - `"1beadAA"` + - `"2beadAA"` + + pmb (pyMBE.pymbe_library): + Instance of the pyMBE library. + + Notes: + - Supported models: + - `"1beadAA"`: Each amino acid is represented by a single bead. + The central bead is the amino-acid name itself, and no side chains are used. + - `"2beadAA"`: Each amino acid is represented by two beads, except for terminal or special residues: + * `"c"`, `"n"`, and `"G"` (glycine) are treated as single-bead residues. + * All other residues use `"CA"` (central bead) plus one side-chain bead named after the amino acid. + + - Residue names are constructed as `"AA-"`, e.g., `"AA-A"`, `"AA-L"`. + """ + defined_residues = [] + for residue_name in sequence: + if model == '1beadAA': + central_bead = residue_name + side_chains = [] + elif model == '2beadAA': + if residue_name in ['c','n', 'G']: + central_bead = residue_name + side_chains = [] + else: + central_bead = 'CA' + side_chains = [residue_name] + residue_name='AA-'+residue_name + if residue_name not in defined_residues: + pmb.define_residue(name = residue_name, + central_bead = central_bead, + side_chains = side_chains) + defined_residues.append(residue_name) + def get_residues_from_topology_dict(topology_dict, model): if model == "1beadAA": excluded_residue_names = [] diff --git a/pyMBE/parameters/peptides/Blanco2021/metadata.json b/pyMBE/parameters/peptides/Blanco2021/metadata.json new file mode 100644 index 0000000..62c568f --- /dev/null +++ b/pyMBE/parameters/peptides/Blanco2021/metadata.json @@ -0,0 +1,4 @@ +{"summary": "Interaction parameters from Blanco et al.", +"source": "Blanco et al. Soft Matter, 17(3), 655-669, 2021.", +"doi": "10.1039/d0sm01475c", +"citekey": "blanco2021a"} \ No newline at end of file diff --git a/pyMBE/parameters/peptides/Blanco2021/templates_bond.csv b/pyMBE/parameters/peptides/Blanco2021/templates_bond.csv new file mode 100644 index 0000000..b3284f4 --- /dev/null +++ b/pyMBE/parameters/peptides/Blanco2021/templates_bond.csv @@ -0,0 +1,19 @@ +name,particle_name1,particle_name2,bond_type,parameters +D-n,n,D,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +D-S,S,D,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +H-S,S,H,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +A-H,H,A,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +A-K,A,K,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +E-H,E,H,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +E-K,E,K,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +K-R,K,R,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +H-K,K,H,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +H-R,R,H,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +H-H,H,H,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +G-H,H,G,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +G-Y,G,Y,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +K-Y,Y,K,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +F-K,K,F,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +F-H,H,F,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +G-R,R,G,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +Y-c,Y,c,harmonic,"{""r_0"":{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":2559.0187205289126,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" diff --git a/pyMBE/parameters/peptides/Blanco2021/templates_particle.csv b/pyMBE/parameters/peptides/Blanco2021/templates_particle.csv new file mode 100644 index 0000000..3a55738 --- /dev/null +++ b/pyMBE/parameters/peptides/Blanco2021/templates_particle.csv @@ -0,0 +1,13 @@ +name,sigma,epsilon,cutoff,offset,initial_state +D,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",D +E,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",E +n,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",n +S,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",S +H,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",H +A,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",A +K,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",K +Y,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",Y +R,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",R +G,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",G +F,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",F +c,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",c diff --git a/pyMBE/parameters/peptides/Blanco2021/templates_particle_state.csv b/pyMBE/parameters/peptides/Blanco2021/templates_particle_state.csv new file mode 100644 index 0000000..935c01f --- /dev/null +++ b/pyMBE/parameters/peptides/Blanco2021/templates_particle_state.csv @@ -0,0 +1,7 @@ +pmb_type,name,particle_name,z,es_type +particle_state,S,S,0,0 +particle_state,A,A,0,1 +particle_state,K,K,0,2 +particle_state,G,G,0,3 +particle_state,F,F,0,4 + diff --git a/pyMBE/parameters/peptides/Lunkad2021/metadata.json b/pyMBE/parameters/peptides/Lunkad2021/metadata.json new file mode 100644 index 0000000..00e588a --- /dev/null +++ b/pyMBE/parameters/peptides/Lunkad2021/metadata.json @@ -0,0 +1,4 @@ +{"summary": "Interaction parameters from Lunkad et al.", + "source": "Lunkad, R. et al. Molecular Systems Design & Engineering (2021), 6(2), 122-131.", + "doi": "10.1039/D0ME00147C", + "citekey": "lunkad2021a"} \ No newline at end of file diff --git a/pyMBE/parameters/peptides/Lunkad2021/templates_bond.csv b/pyMBE/parameters/peptides/Lunkad2021/templates_bond.csv new file mode 100644 index 0000000..5fe2fc4 --- /dev/null +++ b/pyMBE/parameters/peptides/Lunkad2021/templates_bond.csv @@ -0,0 +1,7 @@ +name,particle_name1,particle_name2,bond_type,parameters +CA-CA,CA,CA,harmonic,"{""r_0"":{""magnitude"":0.382,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":10277.03164843434,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +CA-D,CA,D,harmonic,"{""r_0"":{""magnitude"":0.329,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":10277.03164843434,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +CA-E,CA,E,harmonic,"{""r_0"":{""magnitude"":0.435,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":10277.03164843434,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +CA-H,CA,H,harmonic,"{""r_0"":{""magnitude"":0.452,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":10277.03164843434,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +CA-Y,CA,Y,harmonic,"{""r_0"":{""magnitude"":0.648,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":10277.03164843434,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" +CA-K,CA,K,harmonic,"{""r_0"":{""magnitude"":0.558,""units"":""nm"",""dimension"":""length""},""k"":{""magnitude"":10277.03164843434,""units"":""meV/nm**2"",""dimension"":""energy/length**2""}}" diff --git a/pyMBE/parameters/peptides/Lunkad2021/templates_particle.csv b/pyMBE/parameters/peptides/Lunkad2021/templates_particle.csv new file mode 100644 index 0000000..f8407a0 --- /dev/null +++ b/pyMBE/parameters/peptides/Lunkad2021/templates_particle.csv @@ -0,0 +1,7 @@ +name,sigma,epsilon,cutoff,offset,initial_state +CA,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",CA +D,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",D +E,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",E +H,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",H +Y,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",Y +K,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",K diff --git a/pyMBE/parameters/peptides/Lunkad2021/templates_particle_state.csv b/pyMBE/parameters/peptides/Lunkad2021/templates_particle_state.csv new file mode 100644 index 0000000..e615dd4 --- /dev/null +++ b/pyMBE/parameters/peptides/Lunkad2021/templates_particle_state.csv @@ -0,0 +1,2 @@ +pmb_type,name,particle_name,z,es_type +particle_state,CA,CA,0,0 \ No newline at end of file diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 7deef28..0d6ff91 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -1329,7 +1329,6 @@ def define_particle(self, name, sigma, epsilon, z=0, acidity=pd.NA, pka=pd.NA, cutoff=self.units.Quantity(2**(1./6.), "reduced_length") if pd.isna(offset): offset=self.units.Quantity(0, "reduced_length") - # Define particle states if acidity is pd.NA: states = [{"name": f"{name}", "z": z}] @@ -1384,7 +1383,6 @@ def define_particle_states(self, particle_name, states): es_type=self.propose_unused_type()) self.db._register_template(state) - def define_peptide(self, name, sequence, model): """ Defines a peptide template in the pyMBE database. @@ -2007,83 +2005,6 @@ def load_database(self, folder, format='csv'): if format == 'csv': io._load_database_csv(self.db, folder=folder) - - - def load_interaction_parameters(self, filename, overwrite=False): - """ - Loads the interaction parameters stored in `filename` into `pmb.df` - - Args: - filename(`str`): name of the file to be read - overwrite(`bool`, optional): Switch to enable overwriting of already existing values in pmb.df. Defaults to False. - """ - without_units = ['z','es_type'] - with_units = ['sigma','epsilon','offset','cutoff'] - - with open(filename, 'r') as f: - interaction_data = json.load(f) - interaction_parameter_set = interaction_data["data"] - - for key in interaction_parameter_set: - param_dict=interaction_parameter_set[key] - object_type=param_dict.pop('object_type') - if object_type == 'particle': - not_required_attributes={} - for not_required_key in without_units+with_units: - if not_required_key in param_dict.keys(): - if not_required_key in with_units: - not_required_attributes[not_required_key] = _DFm._create_variable_with_units(variable=param_dict.pop(not_required_key), - units_registry=self.units) - elif not_required_key in without_units: - not_required_attributes[not_required_key]=param_dict.pop(not_required_key) - else: - not_required_attributes[not_required_key]=pd.NA - self.define_particle(name=param_dict.pop('name'), - z=not_required_attributes.pop('z'), - sigma=not_required_attributes.pop('sigma'), - offset=not_required_attributes.pop('offset'), - cutoff=not_required_attributes.pop('cutoff'), - epsilon=not_required_attributes.pop('epsilon'), - overwrite=overwrite) - elif object_type == 'residue': - self.define_residue(**param_dict) - elif object_type == 'molecule': - self.define_molecule(**param_dict) - elif object_type == 'peptide': - self.define_peptide(**param_dict) - elif object_type == 'bond': - particle_pairs = param_dict.pop('particle_pairs') - bond_parameters = param_dict.pop('bond_parameters') - bond_type = param_dict.pop('bond_type') - if bond_type == 'harmonic': - k = _DFm._create_variable_with_units(variable=bond_parameters.pop('k'), - units_registry=self.units) - r_0 = _DFm._create_variable_with_units(variable=bond_parameters.pop('r_0'), - units_registry=self.units) - bond = {'r_0' : r_0, - 'k' : k, - } - - elif bond_type == 'FENE': - k = _DFm._create_variable_with_units(variable=bond_parameters.pop('k'), - units_registry=self.units) - r_0 = _DFm._create_variable_with_units(variable=bond_parameters.pop('r_0'), - units_registry=self.units) - d_r_max = _DFm._create_variable_with_units(variable=bond_parameters.pop('d_r_max'), - units_registry=self.units) - bond = {'r_0' : r_0, - 'k' : k, - 'd_r_max': d_r_max, - } - else: - raise ValueError("Current implementation of pyMBE only supports harmonic and FENE bonds") - self.define_bond(bond_type=bond_type, - bond_parameters=bond, - particle_pairs=particle_pairs) - else: - raise ValueError(object_type+' is not a known pmb object type') - - return def load_pka_set(self, filename): """ diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py index 96097d5..bf9aeb5 100644 --- a/pyMBE/storage/io.py +++ b/pyMBE/storage/io.py @@ -129,9 +129,9 @@ def _load_database_csv(db, folder): folder = Path(folder) if not folder.exists(): raise FileNotFoundError(f"Folder '{folder}' does not exist.") - # target pmb types we support pyMBE_types = ["particle", + "particle_state", "residue", "molecule", "bond", @@ -139,16 +139,13 @@ def _load_database_csv(db, folder): "protein", "hydrogel", "lj"] - # TEMPLATES for pmb_type in pyMBE_types: csv_file = folder / f"templates_{pmb_type}.csv" if not csv_file.exists(): continue df = pd.read_csv(csv_file, dtype=str).fillna("") - templates: Dict[str, Any] = {} - for _, row in df.iterrows(): # row values are strings (or empty string) if pmb_type == "particle": @@ -156,63 +153,55 @@ def _load_database_csv(db, folder): epsilon_d = _decode(row["epsilon"]) cutoff_d = _decode(row["cutoff"]) offset_d = _decode(row["offset"]) - states_d = _decode(row["states"]) - sigma = PintQuantity.from_dict(sigma_d) if sigma_d is not None else None epsilon = PintQuantity.from_dict(epsilon_d) if epsilon_d is not None else None cutoff = PintQuantity.from_dict(cutoff_d) if cutoff_d is not None else None offset = PintQuantity.from_dict(offset_d) if offset_d is not None else None - - tpl = ParticleTemplate( - name=row["name"], - sigma=sigma, - epsilon=epsilon, - cutoff=cutoff, - offset=offset, - initial_state=row["initial_state"]) + tpl = ParticleTemplate(name=row["name"], + sigma=sigma, + epsilon=epsilon, + cutoff=cutoff, + offset=offset, + initial_state=row["initial_state"]) + templates[tpl.name] = tpl + elif pmb_type == "particle_state": + tpl = ParticleStateTemplate(particle_name=row["particle_name"], + z=row["z"], + name=row["name"], + es_type=row["es_type"]) templates[tpl.name] = tpl - elif pmb_type == "residue": sc = _decode(row.get("side_chains", "")) or [] if not isinstance(sc, list): sc = list(sc) - tpl = ResidueTemplate( - name=row["name"], - central_bead=row.get("central_bead", ""), - side_chains=sc - ) + tpl = ResidueTemplate(name=row["name"], + central_bead=row.get("central_bead", ""), + side_chains=sc) templates[tpl.name] = tpl - elif pmb_type == "molecule": rl = _decode(row.get("residue_list", "")) or [] if not isinstance(rl, list): rl = list(rl) - tpl = MoleculeTemplate( - name=row["name"], - residue_list=rl - ) + tpl = MoleculeTemplate(name=row["name"], + residue_list=rl) templates[tpl.name] = tpl elif pmb_type == "peptide": rl = _decode(row.get("residue_list", "")) or [] if not isinstance(rl, list): rl = list(rl) - tpl = PeptideTemplate( - name=row["name"], - model=row.get("model", ""), - residue_list=rl, - sequence=row["sequence"] - ) + tpl = PeptideTemplate(name=row["name"], + model=row.get("model", ""), + residue_list=rl, + sequence=row["sequence"]) templates[tpl.name] = tpl elif pmb_type == "protein": rl = _decode(row.get("residue_list", "")) or [] if not isinstance(rl, list): rl = list(rl) - tpl = ProteinTemplate( - name=row["name"], - model=row.get("model", ""), - residue_list=rl, - sequence=row["sequence"] - ) + tpl = ProteinTemplate(name=row["name"], + model=row.get("model", ""), + residue_list=rl, + sequence=row["sequence"]) templates[tpl.name] = tpl elif pmb_type == "bond": params_raw = _decode(row.get("parameters", "")) or {} @@ -234,11 +223,9 @@ def _load_database_csv(db, folder): node_map = [HydrogelNode(**n) for n in node_map_raw if isinstance(n, dict)] chain_map = [HydrogelChain(**c) for c in chain_map_raw if isinstance(c, dict)] - tpl = HydrogelTemplate( - name=row["name"], - node_map=node_map, - chain_map=chain_map - ) + tpl = HydrogelTemplate(name=row["name"], + node_map=node_map, + chain_map=chain_map) templates[tpl.name] = tpl elif pmb_type == "lj": sigma_d = _decode(row["sigma"]) @@ -248,30 +235,22 @@ def _load_database_csv(db, folder): state1 = row["state1"] state2 = row["state2"] shift_d = _decode(row.get("shift", "")) - sigma = PintQuantity.from_dict(sigma_d) if sigma_d is not None else None epsilon = PintQuantity.from_dict(epsilon_d) if epsilon_d is not None else None cutoff = PintQuantity.from_dict(cutoff_d) if cutoff_d is not None else None offset = PintQuantity.from_dict(offset_d) if offset_d is not None else None - - if isinstance(shift_d, dict) and {"magnitude", "units", "dimension"}.issubset(shift_d.keys()): shift = PintQuantity.from_dict(shift_d) else: shift = shift_d # could be "auto" or None - - tpl = LJInteractionTemplate( - state1=state1, - state2=state2, - sigma=sigma, - epsilon=epsilon, - cutoff=cutoff, - offset=offset, - shift=shift - ) - + tpl = LJInteractionTemplate(state1=state1, + state2=state2, + sigma=sigma, + epsilon=epsilon, + cutoff=cutoff, + offset=offset, + shift=shift) templates[tpl.name] = tpl - db._templates[pmb_type] = templates # INSTANCES @@ -289,64 +268,48 @@ def _load_database_csv(db, folder): residue_val = row.get("residue_id", "") or "" molecule_val = row.get("molecule_id", "") or "" assembly_val = row.get("assembly_id", "") or "" - inst = ParticleInstance( - name=row["name"], - particle_id=int(row["particle_id"]), - initial_state=row["initial_state"], - residue_id=None if residue_val == "" else int(residue_val), - molecule_id=None if molecule_val == "" else int(molecule_val), - assembly_id=None if assembly_val == "" else int(assembly_val), - ) + inst = ParticleInstance(name=row["name"], + particle_id=int(row["particle_id"]), + initial_state=row["initial_state"], + residue_id=None if residue_val == "" else int(residue_val), + molecule_id=None if molecule_val == "" else int(molecule_val), + assembly_id=None if assembly_val == "" else int(assembly_val)) instances[inst.particle_id] = inst - elif pmb_type == "residue": mol_val = row.get("molecule_id", "") or "" assembly_val = row.get("assembly_id", "") or "" - inst = ResidueInstance( - name=row["name"], - residue_id=int(row["residue_id"]), - molecule_id=None if mol_val == "" else int(mol_val), - assembly_id=None if assembly_val == "" else int(assembly_val), - ) + inst = ResidueInstance(name=row["name"], + residue_id=int(row["residue_id"]), + molecule_id=None if mol_val == "" else int(mol_val), + assembly_id=None if assembly_val == "" else int(assembly_val)) instances[inst.residue_id] = inst - elif pmb_type == "molecule": assembly_val = row.get("assembly_id", "") or "" - inst = MoleculeInstance( - name=row["name"], - molecule_id=int(row["molecule_id"]), - assembly_id=None if assembly_val == "" else int(assembly_val), - ) + inst = MoleculeInstance(name=row["name"], + molecule_id=int(row["molecule_id"]), + assembly_id=None if assembly_val == "" else int(assembly_val)) instances[inst.molecule_id] = inst elif pmb_type == "peptide": assembly_val = row.get("assembly_id", "") or "" - inst = PeptideInstance( - name=row["name"], - molecule_id=int(row["molecule_id"]), - assembly_id=None if assembly_val == "" else int(assembly_val), - ) + inst = PeptideInstance(name=row["name"], + molecule_id=int(row["molecule_id"]), + assembly_id=None if assembly_val == "" else int(assembly_val)) instances[inst.molecule_id] = inst elif pmb_type == "protein": assembly_val = row.get("assembly_id", "") or "" - inst = ProteinInstance( - name=row["name"], - molecule_id=int(row["molecule_id"]), - assembly_id=None if assembly_val == "" else int(assembly_val), - ) + inst = ProteinInstance(name=row["name"], + molecule_id=int(row["molecule_id"]), + assembly_id=None if assembly_val == "" else int(assembly_val)) instances[inst.molecule_id] = inst elif pmb_type == "bond": - inst = BondInstance( - name=row["name"], - bond_id=int(row["bond_id"]), - particle_id1=int(row["particle_id1"]), - particle_id2=int(row["particle_id2"]), - ) + inst = BondInstance(name=row["name"], + bond_id=int(row["bond_id"]), + particle_id1=int(row["particle_id1"]), + particle_id2=int(row["particle_id2"])) instances[inst.bond_id] = inst elif pmb_type == "hydrogel": - inst = HydrogelInstance( - name=row["name"], - assembly_id=int(row["assembly_id"]), - ) + inst = HydrogelInstance(name=row["name"], + assembly_id=int(row["assembly_id"])) instances[inst.assembly_id] = inst db._instances[pmb_type] = instances @@ -359,13 +322,11 @@ def _load_database_csv(db, folder): participants_raw = _decode(row.get("participants", "")) or [] participants = [ReactionParticipant(**p) for p in participants_raw] metadata = _decode(row.get("metadata", "")) or None - rx = Reaction( - name=row["name"], - participants=participants, - pK=float(row["pK"]) if (row.get("pK") not in (None, "", "nan")) else None, - reaction_type=row.get("reaction_type", None), - metadata=metadata - ) + rx = Reaction(name=row["name"], + participants=participants, + pK=float(row["pK"]) if (row.get("pK") not in (None, "", "nan")) else None, + reaction_type=row.get("reaction_type", None), + metadata=metadata) reactions[rx.name] = rx db._reactions = reactions @@ -381,25 +342,15 @@ def _load_reaction_set(path): """ with open(path, "r") as f: data = json.load(f) - reactions = {} for name, rdata in data["data"].items(): - - participants = [ - ReactionParticipant(**p) - for p in rdata["participants"] - ] - - reaction = Reaction( - name=name, - participants=participants, - constant=rdata["constant"], - reaction_type=rdata.get("reaction_type", "acid_base"), - metadata=rdata.get("metadata") - ) - + participants = [ReactionParticipant(**p) for p in rdata["participants"]] + reaction = Reaction(name=name, + participants=participants, + constant=rdata["constant"], + reaction_type=rdata.get("reaction_type", "acid_base"), + metadata=rdata.get("metadata")) reactions[name] = reaction - return reactions def _save_database_csv(db, folder): @@ -414,52 +365,37 @@ def _save_database_csv(db, folder): """ os.makedirs(folder, exist_ok=True) - # TEMPLATES for pmb_type, tpl_dict in db._templates.items(): rows = [] for tpl in tpl_dict.values(): # PARTICLE TEMPLATE: explicit custom encoding if pmb_type == "particle" and isinstance(tpl, ParticleTemplate): - rows.append({ - "name": tpl.name, - "sigma": _encode(tpl.sigma), - "epsilon": _encode(tpl.epsilon), - "cutoff": _encode(tpl.cutoff), - "offset": _encode(tpl.offset), - "initial_state": tpl.initial_state, - "states": _encode({sname: st.model_dump() for sname, st in tpl.states.items()}), # states: dict state_name -> ParticleState.model_dump() - }) - + rows.append({"name": tpl.name, + "sigma": _encode(tpl.sigma), + "epsilon": _encode(tpl.epsilon), + "cutoff": _encode(tpl.cutoff), + "offset": _encode(tpl.offset), + "initial_state": tpl.initial_state}) # RESIDUE TEMPLATE elif pmb_type == "residue" and isinstance(tpl, ResidueTemplate): - rows.append({ - "name": tpl.name, - "central_bead": tpl.central_bead, - "side_chains": _encode(tpl.side_chains), - }) - + rows.append({"name": tpl.name, + "central_bead": tpl.central_bead, + "side_chains": _encode(tpl.side_chains)}) # MOLECULE TEMPLATE elif pmb_type == "molecule" and isinstance(tpl, MoleculeTemplate): - rows.append({ - "name": tpl.name, - "residue_list": _encode(tpl.residue_list), - }) - + rows.append({"name": tpl.name, + "residue_list": _encode(tpl.residue_list)}) elif pmb_type == "peptide" and isinstance(tpl, PeptideTemplate): - rows.append({ - "name": tpl.name, - "model": tpl.model, - "residue_list": _encode(tpl.residue_list), - "sequence": tpl.sequence, - }) + rows.append({"name": tpl.name, + "model": tpl.model, + "residue_list": _encode(tpl.residue_list), + "sequence": tpl.sequence}) elif pmb_type == "protein" and isinstance(tpl, ProteinTemplate): - rows.append({ - "name": tpl.name, - "model": tpl.model, - "residue_list": _encode(tpl.residue_list), - "sequence": tpl.sequence, - }) + rows.append({"name": tpl.name, + "model": tpl.model, + "residue_list": _encode(tpl.residue_list), + "sequence": tpl.sequence}) # BOND TEMPLATE elif pmb_type == "bond" and isinstance(tpl, BondTemplate): # parameters: dict[str, scalar or PintQuantity] @@ -470,33 +406,26 @@ def _save_database_csv(db, folder): else: # assume scalar serializable params_serial[k] = v - rows.append({ - "name": tpl.name, - "particle_name1": tpl.particle_name1, - "particle_name2": tpl.particle_name2, - "bond_type": tpl.bond_type, - "parameters": _encode(params_serial), - }) + rows.append({"name": tpl.name, + "particle_name1": tpl.particle_name1, + "particle_name2": tpl.particle_name2, + "bond_type": tpl.bond_type, + "parameters": _encode(params_serial)}) # HYDROGEL TEMPLATE elif pmb_type == "hydrogel" and isinstance(tpl, HydrogelTemplate): - rows.append({ - "name": tpl.name, - "node_map": _encode([node.model_dump() for node in tpl.node_map]), - "chain_map": _encode([chain.model_dump() for chain in tpl.chain_map]), - }) + rows.append({"name": tpl.name, + "node_map": _encode([node.model_dump() for node in tpl.node_map]), + "chain_map": _encode([chain.model_dump() for chain in tpl.chain_map])}) # LJ TEMPLATE elif pmb_type == "lj" and isinstance(tpl, LJInteractionTemplate): - rows.append({ - "name": tpl.name, - "state1": tpl.state1, - "state2": tpl.state2, - "sigma": _encode(tpl.sigma), - "epsilon":_encode(tpl.epsilon), - "cutoff": _encode(tpl.cutoff), - "offset": _encode(tpl.offset), - "shift": _encode(tpl.shift) - }) - + rows.append({"name": tpl.name, + "state1": tpl.state1, + "state2": tpl.state2, + "sigma": _encode(tpl.sigma), + "epsilon":_encode(tpl.epsilon), + "cutoff": _encode(tpl.cutoff), + "offset": _encode(tpl.offset), + "shift": _encode(tpl.shift)}) else: # Generic fallback: try model_dump() try: @@ -512,58 +441,44 @@ def _save_database_csv(db, folder): rows = [] for inst in inst_dict.values(): if pmb_type == "particle" and isinstance(inst, ParticleInstance): - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "particle_id": int(inst.particle_id), - "initial_state": inst.initial_state, - "residue_id": int(inst.residue_id) if inst.residue_id is not None else "", - "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else "", - "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else "", - }) + rows.append({"pmb_type": pmb_type, + "name": inst.name, + "particle_id": int(inst.particle_id), + "initial_state": inst.initial_state, + "residue_id": int(inst.residue_id) if inst.residue_id is not None else "", + "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else "", + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else ""}) elif pmb_type == "residue" and isinstance(inst, ResidueInstance): - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "residue_id": int(inst.residue_id), - "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else "", - "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else "", - }) + rows.append({"pmb_type": pmb_type, + "name": inst.name, + "residue_id": int(inst.residue_id), + "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else "", + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else ""}) elif pmb_type == "molecule" and isinstance(inst, MoleculeInstance): - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "molecule_id": int(inst.molecule_id), - "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else "", - }) + rows.append({"pmb_type": pmb_type, + "name": inst.name, + "molecule_id": int(inst.molecule_id), + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else ""}) elif pmb_type == "peptide" and isinstance(inst, PeptideInstance): - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "molecule_id": int(inst.molecule_id), - "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else "", - }) + rows.append({"pmb_type": pmb_type, + "name": inst.name, + "molecule_id": int(inst.molecule_id), + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else ""}) elif pmb_type == "protein" and isinstance(inst, ProteinInstance): - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "molecule_id": int(inst.molecule_id), - "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else "", - }) + rows.append({"pmb_type": pmb_type, + "name": inst.name, + "molecule_id": int(inst.molecule_id), + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else ""}) elif pmb_type == "bond" and isinstance(inst, BondInstance): - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "bond_id": int(inst.bond_id), - "particle_id1": int(inst.particle_id1), - "particle_id2": int(inst.particle_id2), - }) + rows.append({"pmb_type": pmb_type, + "name": inst.name, + "bond_id": int(inst.bond_id), + "particle_id1": int(inst.particle_id1), + "particle_id2": int(inst.particle_id2)}) elif pmb_type == "hydrogel" and isinstance(inst, HydrogelInstance): - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "assembly_id": int(inst.assembly_id), - }) + rows.append({"pmb_type": pmb_type, + "name": inst.name, + "assembly_id": int(inst.assembly_id)}) else: # fallback to model_dump try: @@ -577,11 +492,10 @@ def _save_database_csv(db, folder): # REACTIONS rows = [] for rx in db._reactions.values(): - rows.append({ - "name": rx.name, - "participants": _encode([p.model_dump() for p in rx.participants]), - "pK": rx.pK if hasattr(rx, "pK") else None, - "reaction_type": rx.reaction_type, - "metadata": _encode(rx.metadata) if getattr(rx, "metadata", None) is not None else "", - }) - pd.DataFrame(rows).to_csv(os.path.join(folder, "reactions.csv"), index=False) \ No newline at end of file + rows.append({"name": rx.name, + "participants": _encode([p.model_dump() for p in rx.participants]), + "pK": rx.pK if hasattr(rx, "pK") else None, + "reaction_type": rx.reaction_type, + "metadata": _encode(rx.metadata) if getattr(rx, "metadata", None) is not None else ""}) + if rows: + pd.DataFrame(rows).to_csv(os.path.join(folder, "reactions.csv"), index=False) \ No newline at end of file diff --git a/testsuite/lj_tests.py b/testsuite/lj_tests.py index c4b5bdd..4d8f281 100644 --- a/testsuite/lj_tests.py +++ b/testsuite/lj_tests.py @@ -19,16 +19,8 @@ # Import pyMBE and other libraries import pyMBE import numpy as np -import logging -import io import unittest as ut -# Create an in-memory log stream -log_stream = io.StringIO() -logging.basicConfig(level=logging.INFO, - format="%(levelname)s: %(message)s", - handlers=[logging.StreamHandler(log_stream)] ) - # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(seed=42) import espressomd diff --git a/testsuite/seed_test.py b/testsuite/seed_test.py index fe65c8a..f1c2c64 100644 --- a/testsuite/seed_test.py +++ b/testsuite/seed_test.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -19,71 +19,65 @@ import numpy as np import espressomd import pyMBE -import logging -import io +from pyMBE.lib import handy_functions as hf +import unittest as ut -# Create an in-memory log stream -log_stream = io.StringIO() -logging.basicConfig(level=logging.INFO, - format="%(levelname)s: %(message)s", - handlers=[logging.StreamHandler(log_stream)]) espresso_system = espressomd.System(box_l = [100]*3) def build_peptide_in_espresso(seed): pmb = pyMBE.pymbe_library(seed=seed) - - # Simulation parameters - pmb.set_reduced_units(unit_length=0.4*pmb.units.nm) - # Peptide parameters sequence = 'EEEEEEE' model = '2beadAA' # Model with 2 beads per each aminoacid - # Load peptide parametrization from Lunkad, R. et al. Molecular Systems Design & Engineering (2021), 6(2), 122-131. - path_to_interactions=pmb.root / "parameters" / "peptides" / "Lunkad2021.json" + path_to_interactions=pmb.root / "parameters" / "peptides" / "Lunkad2021" path_to_pka=pmb.root / "parameters" / "pka_sets" / "CRC1991.json" - pmb.load_interaction_parameters(filename=path_to_interactions) + pmb.load_database(folder=path_to_interactions) pmb.load_pka_set(path_to_pka) - + pka_set = pmb.get_pka_set() + for particle_name in pka_set.keys(): + pmb.set_monoprototic_particle_states(acidity=pka_set[particle_name]["acidity"], + particle_name=particle_name) + # define residues + hf.define_peptide_AA_residues(sequence=sequence, + model=model, + pmb=pmb) # Defines the peptide in the pyMBE data frame peptide_name = 'generic_peptide' pmb.define_peptide(name=peptide_name, sequence=sequence, model=model) - # Bond parameters generic_bond_length=0.4 * pmb.units.nm generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') - HARMONIC_parameters = {'r_0' : generic_bond_length, 'k' : generic_harmonic_constant} - pmb.define_default_bond(bond_type = 'harmonic', bond_parameters = HARMONIC_parameters) - - # Add all bonds to espresso system - pmb.add_bonds_to_espresso(espresso_system=espresso_system) - # Create molecule in the espresso system pmb.create_molecule(name=peptide_name, number_of_molecules=1, espresso_system=espresso_system, use_default_bond=True) - # Extract positions of particles in the peptide + particle_id_list = pmb.get_particle_id_map("generic_peptide")["all"] positions = [] - molecule_id = pmb.df.loc[pmb.df['name']==peptide_name].molecule_id.values[0] - particle_id_list = pmb.df.loc[pmb.df['molecule_id']==molecule_id].particle_id.dropna().to_list() for pid in particle_id_list: positions.append(espresso_system.part.by_id(pid).pos) - + pmb.delete_instances_in_system(espresso_system=espresso_system, + instance_id=0, + pmb_type="peptide") return np.asarray(positions) - -print("*** Check that the using the same seed results in the same initial particle positions***") -positions1 = build_peptide_in_espresso(42) -positions2 = build_peptide_in_espresso(42) - -np.testing.assert_almost_equal(positions1, positions2) - -print("*** Test passed ***") +class Test(ut.TestCase): + def test_deterministic_build_pyMBE(self): + """ + Check that the using the same seed results in the same initial particle positions + """ + positions1 = build_peptide_in_espresso(42) + positions2 = build_peptide_in_espresso(42) + np.testing.assert_equal(positions1, + positions2) + +if __name__ == "__main__": + ut.main() \ No newline at end of file From 42c900058e60064cbb32b68af1be91ce1a9aab80 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Thu, 22 Jan 2026 10:08:43 +0100 Subject: [PATCH 26/55] add metadata management, small cleanup --- pyMBE/parameters/peptides/Blanco2021.json | 23 ------- pyMBE/parameters/peptides/Lunkad2021.json | 22 ------- pyMBE/pyMBE.py | 74 ++++++++--------------- pyMBE/storage/io.py | 22 +++++-- 4 files changed, 42 insertions(+), 99 deletions(-) delete mode 100644 pyMBE/parameters/peptides/Blanco2021.json delete mode 100644 pyMBE/parameters/peptides/Lunkad2021.json diff --git a/pyMBE/parameters/peptides/Blanco2021.json b/pyMBE/parameters/peptides/Blanco2021.json deleted file mode 100644 index 97f5e37..0000000 --- a/pyMBE/parameters/peptides/Blanco2021.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "metadata": { - "summary": "Interaction parameters from Blanco et al.", - "source": "Blanco et al. Soft Matter, 17(3), 655-669, 2021.", - "doi": "10.1039/d0sm01475c", - "citekey": "blanco2021a" - }, - "data": { - "D": {"object_type":"particle", "name": "D", "sigma": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "E": {"object_type":"particle", "name": "E", "sigma": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "n": {"object_type":"particle", "name": "n", "sigma": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "S": {"object_type":"particle", "name": "S", "z":0, "sigma": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "H": {"object_type":"particle", "name": "H", "sigma": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "A": {"object_type":"particle", "name": "A", "z":0, "sigma": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "K": {"object_type":"particle", "name": "K", "sigma": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "Y": {"object_type":"particle", "name": "Y", "sigma": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "R": {"object_type":"particle", "name": "R", "sigma": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "G": {"object_type":"particle", "name": "G", "z":0, "sigma": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "F": {"object_type":"particle", "name": "F", "z":0, "sigma": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "c": {"object_type":"particle", "name": "c", "sigma": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "bond": {"object_type":"bond", "bond_type": "harmonic", "bond_parameters" : {"r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N/m"}}, "particle_pairs": [["n","D"],["S","D"],["S","H"],["H","A"],["A","K"],["E","H"],["E","K"],["K","R"],["K","H"],["R","H"],["H","H"],["H","G"],["G","Y"],["Y","K"],["R","K"],["K","F"],["H","S"],["H","F"],["H","R"],["R","G"],["Y","G"],["Y","c"]]} - } -} diff --git a/pyMBE/parameters/peptides/Lunkad2021.json b/pyMBE/parameters/peptides/Lunkad2021.json deleted file mode 100644 index 40219e6..0000000 --- a/pyMBE/parameters/peptides/Lunkad2021.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "metadata": { - "summary": "Interaction parameters from Lunkad et al.", - "source": "Lunkad, R. et al. Molecular Systems Design & Engineering (2021), 6(2), 122-131.", - "doi": "10.1039/D0ME00147C", - "citekey": "lunkad2021a" - }, - "data": { - "CA": {"object_type":"particle", "name": "CA", "z":0, "sigma": {"value":0.35, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "D": {"object_type":"particle", "name": "D", "sigma": {"value":0.35, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "E": {"object_type":"particle", "name": "E", "sigma": {"value":0.35, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "H": {"object_type":"particle", "name": "H", "sigma": {"value":0.35, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "Y": {"object_type":"particle", "name": "Y", "sigma": {"value":0.35, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "K": {"object_type":"particle", "name": "K", "sigma": {"value":0.35, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}}, - "bond_CA_CA": {"object_type":"bond", "bond_type": "harmonic", "particle_pairs": [["CA","CA"]] , "bond_parameters" : {"r_0": {"value":0.382, "units":"nm"}, "k": {"value": 400, "units":"reduced_energy / nm**2"}}}, - "bond_CA_D": {"object_type":"bond", "bond_type": "harmonic", "particle_pairs": [["CA","D"]] , "bond_parameters" : {"r_0": {"value":0.329, "units":"nm"}, "k": {"value": 400, "units":"reduced_energy / nm**2"}}}, - "bond_CA_E": {"object_type":"bond", "bond_type": "harmonic", "particle_pairs": [["CA","E"]] , "bond_parameters" : {"r_0": {"value":0.435, "units":"nm"}, "k": {"value": 400, "units":"reduced_energy / nm**2"}}}, - "bond_CA_H": {"object_type":"bond", "bond_type": "harmonic", "particle_pairs": [["CA","H"]] , "bond_parameters" : {"r_0": {"value":0.452, "units":"nm"}, "k": {"value": 400, "units":"reduced_energy / nm**2"}}}, - "bond_CA_Y": {"object_type":"bond", "bond_type": "harmonic", "particle_pairs": [["CA","Y"]] , "bond_parameters" : {"r_0": {"value":0.648, "units":"nm"}, "k": {"value": 400, "units":"reduced_energy / nm**2"}}}, - "bondd_CA_K": {"object_type":"bond", "bond_type": "harmonic", "particle_pairs": [["CA","K"]] , "bond_parameters" : {"r_0": {"value":0.558, "units":"nm"}, "k": {"value": 400, "units":"reduced_energy / nm**2"}}} - } -} diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 0d6ff91..bf4c79c 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -772,7 +772,6 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi Note: Despite its name, this function can be used to create both molecules and peptides. """ - supported_pmb_types = ["molecule", "peptide"] if number_of_molecules <= 0: return {} if list_of_first_residue_positions is not None: @@ -1995,6 +1994,9 @@ def load_database(self, folder, format='csv'): folder (str or Path): Path to the folder where the pyMBE database was stored. format (str, optional): Format of the database to be loaded. Defaults to 'csv'. + Return: + (dict): metadata with additional information about the source of the information in the database. + Note: - The folder must contain the files generated by `pmb.save_database()`. - Currently, only 'csv' format is supported. @@ -2003,8 +2005,10 @@ def load_database(self, folder, format='csv'): if format not in supported_formats: raise ValueError(f"Format {format} not supported. Supported formats are {supported_formats}") if format == 'csv': - io._load_database_csv(self.db, - folder=folder) + metadata =io._load_database_csv(self.db, + folder=folder) + return metadata + def load_pka_set(self, filename): """ @@ -2021,7 +2025,10 @@ def load_pka_set(self, filename): "B": {"acidity": "basic", "pka_value": 9.8} } } - + + Returns: + (dict): Dictionary with bibliographic metadata about the original work were the pKa set was determined. + Notes: - This method is designed for monoprotic acids and bases only. """ @@ -2037,6 +2044,7 @@ def load_pka_set(self, filename): pka=pka, acidity=acidity, metadata=metadata) + return metadata def propose_unused_type(self): """ @@ -2151,38 +2159,13 @@ def protein_sequence_parser(self, sequence): raise ValueError("Unknown code for a residue: ", residue, " please review the input sequence") clean_sequence.append(residue_ok) return clean_sequence - - def read_pmb_df (self,filename): - """ - Reads a pyMBE's Dataframe stored in `filename` and stores the information into pyMBE. - - Args: - filename(`str`): path to file. - - Note: - This function only accepts files with CSV format. - """ - if filename.rsplit(".", 1)[1] != "csv": - raise ValueError("Only files with CSV format are supported") - df = pd.read_csv (filename,header=[0, 1], index_col=0) - self.df = _DFm._setup_df() - columns_names = pd.MultiIndex.from_frame(self.df) - columns_names = columns_names.names - multi_index = pd.MultiIndex.from_tuples(columns_names) - df.columns = multi_index - _DFm._convert_columns_to_original_format(df=df, - units_registry=self.units) - self.df = df - self.df.fillna(pd.NA, - inplace=True) - return self.df - - def read_protein_vtf_in_df (self,filename,unit_length=None): + + def read_protein_vtf(self,filename,unit_length=None): """ - Loads a coarse-grained protein model in a vtf file `filename` into the `pmb.df` and it labels it with `name`. + Loads a coarse-grained protein model in a VTF file `filename`. Args: - filename(`str`): Path to the vtf file with the coarse-grained model. + filename(`str`): Path to the VTF file with the coarse-grained model. unit_length(`obj`): unit of length of the the coordinates in `filename` using the pyMBE UnitRegistry. Defaults to None. Returns: @@ -2376,7 +2359,6 @@ def set_reduced_units(self, unit_length=None, unit_charge=None, temperature=None self.units.define(f'reduced_length = {unit_length}') self.units.define(f'reduced_charge = {unit_charge}') logging.info(self.get_reduced_units()) - return def setup_cpH (self, counter_ion, constant_pH, exclusion_range=None, pka_set=None, use_exclusion_radius_per_type = False): """ @@ -2754,17 +2736,13 @@ def setup_grxmc_unified(self, pH_res, c_salt_res, cation_name, anion_name, activ raise ValueError('ERROR anion charge must be negative, charge ', anion_charge) # Coupling to the reservoir: 0 = X+ + X- - RE.add_reaction( - gamma = K_XX.magnitude, - reactant_types = [], - reactant_coefficients = [], - product_types = [ cation_es_type, anion_es_type ], - product_coefficients = [ 1, 1 ], - default_charges = { - cation_es_type: cation_charge, - anion_es_type: anion_charge, - } - ) + RE.add_reaction(gamma = K_XX.magnitude, + reactant_types = [], + reactant_coefficients = [], + product_types = [ cation_es_type, anion_es_type ], + product_coefficients = [ 1, 1 ], + default_charges = {cation_es_type: cation_charge, + anion_es_type: anion_charge}) sucessful_reactions_labels=[] charge_number_map = self.get_charge_number_map() @@ -2818,12 +2796,9 @@ def setup_lj_interactions(self, espresso_system, shift_potential=True, combining """ from itertools import combinations_with_replacement - particle_templates = self.db.get_templates("particle") - shift = "auto" if shift_potential else 0 - - # Flatten states with template context + # Get all particle states registered in pyMBE state_entries = [] for tpl in particle_templates.values(): for state in self.db.get_particle_states_templates(particle_name=tpl.name).values(): @@ -2844,7 +2819,6 @@ def setup_lj_interactions(self, espresso_system, shift_potential=True, combining cutoff=lj_parameters["cutoff"].to("reduced_length").magnitude, offset=lj_parameters["offset"].to("reduced_length").magnitude, shift=shift) - lj_template = LJInteractionTemplate(state1=state1.name, state2=state2.name, sigma=PintQuantity.from_quantity(q=lj_parameters["sigma"], diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py index bf9aeb5..7d00caa 100644 --- a/pyMBE/storage/io.py +++ b/pyMBE/storage/io.py @@ -21,9 +21,8 @@ import json from pathlib import Path from typing import Any, Dict - import pandas as pd -from pint import UnitRegistry +import logging from pyMBE.storage.pint_quantity import PintQuantity from pyMBE.storage.templates.particle import ParticleTemplate, ParticleStateTemplate @@ -119,8 +118,8 @@ def _load_database_csv(db, folder): db (Manager): Database manager object to populate. folder (str or Path): Path to the folder containing CSV files. - Raises: - FileNotFoundError: If the folder does not exist. + Return: + (dict): metadata with additional information about the source of the information in the database. Notes: - PintQuantity objects are reconstructed from their dictionary representation. @@ -330,6 +329,21 @@ def _load_database_csv(db, folder): reactions[rx.name] = rx db._reactions = reactions + # Metadata + json_file = folder / "metadata.json" + if json_file.exists(): + try: + with open(json_file, "r", encoding="utf-8") as fh: + metadata = json.load(fh) + if not isinstance(metadata, dict): + raise ValueError("metadata.json must contain a JSON object") + except Exception as err: + logging.warning(f"Failed to read metadata file '{json_file}': {err}. Metadata will be ignored.") + metadata = {} + else: + metadata = {} + return metadata + def _load_reaction_set(path): """ Loads a set of reactions from a JSON file. From 5c06fae5d606cbff7e0089fdc6d8f7ed11913faa Mon Sep 17 00:00:00 2001 From: pmblanco Date: Thu, 22 Jan 2026 10:52:38 +0100 Subject: [PATCH 27/55] clean up gcmc functional test --- README.md | 4 ++-- testsuite/gcmc_tests.py | 27 +++++++++++++++++---------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 9bfdcfb..0212541 100644 --- a/README.md +++ b/README.md @@ -232,7 +232,7 @@ If you use pyMBE in your research, please cite our paper: When using a released version of pyMBE, we recommend citing the corresponding [Zenodo record](https://doi.org/10.5281/zenodo.12102634) in addition to the pyMBE paper, -for example: "We set up our coarse-grained models using pyMBE v0.8.0 +for example: "We set up our coarse-grained models using pyMBE v1.0.0 \cite{beyer2024pymbe,zenodo2024pymbe}". Please also make sure to properly cite the original authors if you use the resources provided in the `pyMBE/parameters/` folder. @@ -247,7 +247,7 @@ contribute to pyMBE and find our authorship policy and code of conduct. ## License -Copyright (C) 2023-2025 pyMBE-dev team +Copyright (C) 2023-2026 pyMBE-dev team pyMBE is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/testsuite/gcmc_tests.py b/testsuite/gcmc_tests.py index 5e54ae8..2577994 100644 --- a/testsuite/gcmc_tests.py +++ b/testsuite/gcmc_tests.py @@ -25,13 +25,12 @@ from pyMBE.lib import analysis import numpy as np +import unittest as ut + + # Template of the test def gcmc_test(script_path, mode): - if mode == "ideal": - print("*** Running test for GCMC of salt solution (ideal). ***") - elif mode == "interacting": - print("*** Running test for GCMC of salt solution (interacting). ***") with tempfile.TemporaryDirectory() as time_series_path: for c_salt_res in salt_concentrations: print(f"c_salt_res = {c_salt_res}") @@ -42,12 +41,10 @@ def gcmc_test(script_path, mode): data=analysis.analyze_time_series(path_to_datafolder=time_series_path, filename_extension="_time_series.csv") - print(data["csalt","value"]) # Check concentration test_concentration=np.sort(data["csalt","value"].to_numpy(dtype=float)) ref_concentration=np.sort(data["mean","c_salt"].to_numpy()) np.testing.assert_allclose(test_concentration, ref_concentration, rtol=rtol, atol=atol) - print("*** Test was successful ***") # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(seed=42) @@ -58,8 +55,18 @@ def gcmc_test(script_path, mode): rtol=0.05 # relative tolerance atol=0.0 # absolute tolerance -# Ideal test -gcmc_test(script_path, "ideal") +class Test(ut.TestCase): + def test_gcmc_ideal(self): + """ + Functional test to test the GCMC implementation in pyMBE for an ideal system. + """ + gcmc_test(script_path, "ideal") + + def test_gcmc_interacting(self): + """ + Functional test to test the GCMC implementation in pyMBE for an interacting system. + """ + gcmc_test(script_path, "interacting") -# Interacting test -gcmc_test(script_path, "interacting") +if __name__ == "__main__": + ut.main() \ No newline at end of file From aa94f9cb62bbc832d7fedec9ba27d4ccce944bc2 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Thu, 22 Jan 2026 17:43:23 +0100 Subject: [PATCH 28/55] fix bugs in setup bond, fix unittest --- pyMBE/pyMBE.py | 116 ++++--- pyMBE/storage/manager.py | 11 +- testsuite/bond_tests.py | 410 ++++++++++++------------ testsuite/gcmc_tests.py | 2 +- testsuite/generate_coordinates_tests.py | 2 +- testsuite/lj_tests.py | 2 +- testsuite/setup_salt_ions_unit_tests.py | 1 - 7 files changed, 285 insertions(+), 259 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index bf4c79c..7de65c6 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -107,8 +107,27 @@ def __init__(self, seed, temperature=None, unit_length=None, unit_charge=None, K self.db = Manager(units=self.units) self.lattice_builder = None self.root = importlib.resources.files(__package__) - self._bond_instances={} + + + def _check_bond_inputs(self, bond_type, bond_parameters): + """ + Checks that the input bond parameters are valid within the current pyMBE implementation. + Args: + bond_type(`str`): label to identify the potential to model the bond. + bond_parameters(`dict`): parameters of the potential of the bond. + + + """ + valid_bond_types = ["harmonic", "FENE"] + if bond_type not in valid_bond_types: + raise NotImplementedError(f"Bond type '{bond_type}' currently not implemented in pyMBE, accepted types are {valid_bond_types}") + required_parameters = {"harmonic": ["r_0","k"], + "FENE": ["r_0","k","d_r_max"]} + for required_parameter in required_parameters[bond_type]: + if required_parameter not in bond_parameters.keys(): + raise ValueError(f"Missing required parameter {required_parameter} for {bond_type} bond") + def _create_espresso_bond_instance(self, bond_type, bond_parameters): """ Creates an ESPResSo bond instance. @@ -134,14 +153,8 @@ def _create_espresso_bond_instance(self, bond_type, bond_parameters): (`espressomd.interactions`): instance of an ESPResSo bond object """ from espressomd import interactions - valid_bond_types = ["harmonic", "FENE"] - if bond_type not in valid_bond_types: - raise NotImplementedError(f"Bond type '{bond_type}' currently not implemented in pyMBE, accepted types are {valid_bond_types}") - required_parameters = {"harmonic": ["r_0","k"], - "FENE": ["r_0","k","d_r_max"]} - for required_parameter in required_parameters[bond_type]: - if required_parameter not in bond_parameters.keys(): - raise ValueError(f"Missing required parameter {required_parameter} for {bond_type} bond") + self._check_bond_inputs(bond_parameters=bond_parameters, + bond_type=bond_type) if bond_type == 'harmonic': bond_instance = interactions.HarmonicBond(k = bond_parameters["k"].m_as("reduced_energy/reduced_length**2"), r_0 = bond_parameters["r_0"].m_as("reduced_length")) @@ -215,12 +228,14 @@ def _create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system): chain_pids = self.db._find_instance_ids_by_attribute(pmb_type="particle", attribute="molecule_id", value=mol_id) - start_bond_instance = self.get_espresso_bond_instance(particle_name1=nodes[node_start_label]["name"], - particle_name2=part_start_chain_name, - espresso_system=espresso_system) - end_bond_instance = self.get_espresso_bond_instance(particle_name1=nodes[node_end_label]["name"], - particle_name2=part_end_chain_name, - espresso_system=espresso_system) + bond_tpl1 = self.get_bond_template(particle_name1=nodes[node_start_label]["name"], + particle_name2=part_start_chain_name) + start_bond_instance = self._get_espresso_bond_instance(bond_template=bond_tpl1, + espresso_system=espresso_system) + bond_tpl2 = self.get_bond_template(particle_name1=nodes[node_end_label]["name"], + particle_name2=part_end_chain_name) + end_bond_instance = self._get_espresso_bond_instance(bond_template=bond_tpl2, + espresso_system=espresso_system) espresso_system.part.by_id(start_node_id).add_bond((start_bond_instance, chain_pids[0])) espresso_system.part.by_id(chain_pids[-1]).add_bond((end_bond_instance, end_node_id)) return mol_id @@ -624,24 +639,24 @@ def create_bond(self, particle_id1, particle_id2, espresso_system, use_default_b particle_id2 (int): pyMBE and ESPResSo ID of the second particle. espresso_system (espressomd.system.System): ESPResSo system object where the bond will be created. use_default_bond (bool, optional): If True, use a default bond template if no specific template exists. Defaults to False. + + Returns: + (int): bond_id of the bond instance created in the pyMBE database. """ particle_inst_1 = self.db.get_instance(pmb_type="particle", instance_id=particle_id1) particle_inst_2 = self.db.get_instance(pmb_type="particle", instance_id=particle_id2) - - bond_inst = self.get_espresso_bond_instance(particle_name1=particle_inst_1.name, - particle_name2=particle_inst_2.name, - espresso_system=espresso_system, - use_default_bond=use_default_bond) + bond_tpl = self.get_bond_template(particle_name1=particle_inst_1.name, + particle_name2=particle_inst_2.name, + use_default_bond=use_default_bond) + bond_inst = self._get_espresso_bond_instance(bond_template=bond_tpl, + espresso_system=espresso_system, + use_default_bond=use_default_bond) espresso_system.part.by_id(particle_id1).add_bond((bond_inst, particle_id2)) - if use_default_bond: - bond_name = "default" - else: - bond_name = BondTemplate.make_bond_key(pn1=particle_inst_1.name, - pn2=particle_inst_2.name) - pmb_bond_instance = BondInstance(bond_id=self.db._propose_instance_id(pmb_type="bond"), - name=bond_name, + bond_id = self.db._propose_instance_id(pmb_type="bond") + pmb_bond_instance = BondInstance(bond_id=bond_id, + name=bond_tpl.name, particle_id1=particle_id1, particle_id2=particle_id2) self.db._register_instance(instance=pmb_bond_instance) @@ -1169,7 +1184,8 @@ def define_bond(self, bond_type, bond_parameters, particle_pairs): - d_r_max (`pint.Quantity`): Maximal stretching length for FENE. It should have units of length using the `pmb.units` UnitRegistry. Default 'None'. """ - + self._check_bond_inputs(bond_parameters=bond_parameters, + bond_type=bond_type) parameters_expected_dimensions={"r_0": "length", "k": "energy/length**2", "d_r_max": "length"} @@ -1206,6 +1222,8 @@ def define_default_bond(self, bond_type, bond_parameters): Note: - Currently, only harmonic and FENE bonds are supported. """ + self._check_bond_inputs(bond_parameters=bond_parameters, + bond_type=bond_type) parameters_expected_dimensions={"r_0": "length", "k": "energy/length**2", "d_r_max": "length"} @@ -1462,6 +1480,7 @@ def delete_instances_in_system(self, instance_id, pmb_type, espresso_system): particle_ids = self.db._find_instance_ids_by_attribute(pmb_type="particle", attribute=instance_identifier, value=instance_id) + self._delete_particles_from_espresso(particle_ids=particle_ids, espresso_system=espresso_system) @@ -1701,16 +1720,24 @@ def get_bond_template(self, particle_name1, particle_name2, use_default_bond=Fal bond(`espressomd.interactions.BondedInteractions`): bond object from the espressomd library. Note: - - If `use_default_bond`=True and no bond is defined between `particle_name1` and `particle_name2`, it returns the default bond defined in `pmb.df`. + - If `use_default_bond`=True and no bond is defined between `particle_name1` and `particle_name2`, it returns the default bond defined in the pyMBE database. """ - if use_default_bond: - bond_key = "default" - else: - bond_key = BondTemplate.make_bond_key(pn1=particle_name1, + # Try to find a specific bond template + bond_key = BondTemplate.make_bond_key(pn1=particle_name1, pn2=particle_name2) - bond_tpl = self.db.get_template(name=bond_key, + try: + return self.db.get_template(name=bond_key, pmb_type="bond") - return bond_tpl + except ValueError: + pass + + # Fallback to default bond if allowed + if use_default_bond: + return self.db.get_template(name="default", + pmb_type="bond") + + # No bond template found + raise ValueError(f"No bond template found between '{particle_name1}' and '{particle_name2}', and default bonds are deactivated.") def get_charge_number_map(self): """ @@ -1738,7 +1765,7 @@ def get_charge_number_map(self): return charge_number_map - def get_espresso_bond_instance(self, particle_name1, particle_name2, espresso_system, use_default_bond=False): + def _get_espresso_bond_instance(self, bond_template, espresso_system, use_default_bond=False): """ Retrieve or create a bond instance in an ESPResSo system for a given pair of particle names. @@ -1747,8 +1774,7 @@ def get_espresso_bond_instance(self, particle_name1, particle_name2, espresso_sy instance. Otherwise, it creates a new ESPResSo bond instance using the bond template. Args: - particle_name1 (str): Name of the first particle involved in the bond. - particle_name2 (str): Name of the second particle involved in the bond. + bond_template (BondTemplate): BondTemplate object from the pyMBE database. espresso_system: An ESPResSo system object where the bond will be added or retrieved. use_default_bond (bool, optional): If True, use a default bond template when no specific template exists for the particle pair. Defaults to False. @@ -1762,16 +1788,14 @@ def get_espresso_bond_instance(self, particle_name1, particle_name2, espresso_sy Note: When a new bond instance is created, it is not added to the ESPResSo system. """ - bond_tpl = self.get_bond_template(particle_name1=particle_name1, - particle_name2=particle_name2, - use_default_bond=use_default_bond) - if bond_tpl.name in self._bond_instances.keys(): - bond_inst = self._bond_instances[bond_tpl.name] + + if bond_template.name in self.db.espresso_bond_instances.keys(): + bond_inst = self.db.espresso_bond_instances[bond_template.name] else: # Create an instance of the bond - bond_inst = self._create_espresso_bond_instance(bond_type=bond_tpl.bond_type, - bond_parameters=bond_tpl.get_parameters(self.units)) - self._bond_instances[bond_tpl.name]= bond_inst + bond_inst = self._create_espresso_bond_instance(bond_type=bond_template.bond_type, + bond_parameters=bond_template.get_parameters(self.units)) + self.db.espresso_bond_instances[bond_template.name]= bond_inst espresso_system.bonded_inter.add(bond_inst) return bond_inst diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index 4e8f42f..f4971ee 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -88,6 +88,7 @@ def __init__(self,units): "protein"] self._assembly_like_types = ["hydrogel"] self._pmb_types = ["particle", "residue"] + self._molecule_like_types + self._assembly_like_types + self.espresso_bond_instances= {} def _delete_bonds_of_particle(self, pid): """ @@ -706,6 +707,10 @@ def delete_template(self, pmb_type, name): # Delete del self._templates[pmb_type][name] + # if it is a bond template delete also stored espresso bond instances + if pmb_type == "bond": + if name in self.espresso_bond_instances.keys(): + del self.espresso_bond_instances[name] # Delete empty groups if not self._templates[pmb_type]: @@ -726,8 +731,10 @@ def delete_templates(self, pmb_type): - If no templates exist for the given type, the method is a no-op. """ if pmb_type in self._templates: - del self._templates[pmb_type] - + templates = list(self._templates[pmb_type].keys()) + for template in templates: + self.delete_template(pmb_type=pmb_type, + name=template) def delete_instance(self, pmb_type, instance_id, cascade=False): """ diff --git a/testsuite/bond_tests.py b/testsuite/bond_tests.py index 2454bf6..83804c7 100644 --- a/testsuite/bond_tests.py +++ b/testsuite/bond_tests.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024-2025 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -20,11 +20,9 @@ import pyMBE import numpy as np import unittest as ut -import json.decoder -import json import io import logging -import pyMBE.storage.df_management as df_management +import espressomd # Create an in-memory log stream log_stream = io.StringIO() @@ -33,16 +31,39 @@ handlers=[logging.StreamHandler(log_stream)] ) # Create an instance of pyMBE library +espresso_system=espressomd.System (box_l = [10]*3) + pmb = pyMBE.pymbe_library(seed=42) +pmb.define_particle(name='A', + z=0, + sigma=0.4*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) + +pmb.define_particle(name='B', + z=0, + sigma=0.4*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) + +harmonic_params = {'r_0' : 0.4 * pmb.units.nm, + 'k' : 400 * pmb.units('reduced_energy / reduced_length**2')} + +FENE_params = {'r_0' : 0.4 * pmb.units.nm, + 'k' : 400 * pmb.units('reduced_energy / reduced_length**2'), + 'd_r_max': 0.8 * pmb.units.nm} class Test(ut.TestCase): - def setUp(self): - pmb.df = df_management._DFManagement._setup_df() + def get_bond_object(self, particle_id_pair): + """ + Returns the bond object stored in espresso betwen a given pair of bonded particle ids. + """ + for pid in particle_id_pair: + if espresso_system.part.by_id(pid).bonds: + return espresso_system.part.by_id(pid).bonds[0][0] def check_bond_setup(self, bond_object, input_parameters, bond_type): """ - Checks that pyMBE sets up a bond object correctly. + Checks that pyMBE sets up a harmonic bond object correctly. Args: bond_object(`espressomd.interactions`): instance of a espresso bond object. @@ -59,131 +80,191 @@ def check_bond_setup(self, bond_object, input_parameters, bond_type): 'd_r_max': 'reduced_length'} for key in input_parameters.keys(): np.testing.assert_equal(actual=bond_params[key], - desired=input_parameters[key].m_as(reduced_units[key]), - verbose=True) - - def test_bond_harmonic(self): - pmb.define_particle(name='A', z=0, sigma=0.4*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) - - bond_type = 'harmonic' - bond = {'r_0' : 0.4 * pmb.units.nm, - 'k' : 400 * pmb.units('reduced_energy / reduced_length**2')} - - # check default bond - pmb.define_default_bond(bond_type = bond_type, - bond_parameters = bond) - - bond_object = pmb.filter_df(pmb_type='bond')['bond_object'].values[0] - self.check_bond_setup(bond_object=bond_object, - input_parameters=bond, - bond_type=bond_type) + desired=input_parameters[key].m_as(reduced_units[key]), + verbose=True) + def test_bond_setup(self): + """ + Unit test to check the setup of bonds in pyMBE + """ + #Define bond # check particle bond - pmb.define_bond(bond_type = bond_type, - bond_parameters = bond, + pmb.define_bond(bond_type = "harmonic", + bond_parameters = harmonic_params, particle_pairs = [['A', 'A']]) - - bond_object = pmb.filter_df(pmb_type='bond')['bond_object'].values[1] + # Create two particles + pids = pmb.create_particle(name="A", + espresso_system=espresso_system, + number_of_particles=2) + + pmb.create_bond(particle_id1=pids[0], + particle_id2=pids[1], + espresso_system=espresso_system, + use_default_bond=False) + + bond_object = self.get_bond_object(particle_id_pair=pids) + self.check_bond_setup(bond_object=bond_object, - input_parameters=bond, - bond_type=bond_type) - - # check bond deserialization - bond_params = bond_object.get_params() - bond_params["bond_id"] = bond_object._bond_id - deserialized = df_management._DFManagement._convert_str_to_bond_object( - f'{bond_object.__class__.__name__}({json.dumps(bond_params)})') - self.check_bond_setup(bond_object=deserialized, - input_parameters=bond, - bond_type=bond_type) - - def test_bond_fene(self): - pmb.define_particle(name='A', - z=0, - sigma=0.4*pmb.units.nm, - epsilon=1*pmb.units('reduced_energy')) - - bond_type = 'FENE' - bond = {'r_0' : 0.4 * pmb.units.nm, - 'k' : 400 * pmb.units('reduced_energy / reduced_length**2'), - 'd_r_max': 0.8 * pmb.units.nm} - - # check default bond - pmb.define_default_bond(bond_type = bond_type, - bond_parameters = bond) - - bond_object = pmb.filter_df(pmb_type='bond')['bond_object'].values[0] + input_parameters=harmonic_params, + bond_type="harmonic") + # Clean-up database + for inst_id in pids: + pmb.delete_instances_in_system(instance_id=inst_id, + pmb_type="particle", + espresso_system=espresso_system) + pid_A = pmb.create_particle(name="A", + espresso_system=espresso_system, + number_of_particles=1) + + harmonic_params_test = {'r_0' : 0.5 * pmb.units.nm, + 'k' : 500 * pmb.units('reduced_energy / reduced_length**2')} + pmb.define_bond(bond_type = "harmonic", + bond_parameters = harmonic_params_test, + particle_pairs = [['A', 'B']]) + + pid_B = pmb.create_particle(name="B", + espresso_system=espresso_system, + number_of_particles=1) + + # Test that the bond is properly setup when there is a default bond + pmb.define_default_bond(bond_type = "harmonic", + bond_parameters = harmonic_params) + + pmb.create_bond(particle_id1=pid_B[0], + particle_id2=pid_A[0], + espresso_system=espresso_system, + use_default_bond=True) + + bond_object = self.get_bond_object(particle_id_pair=[pid_B[0],pid_A[0]]) + self.check_bond_setup(bond_object=bond_object, - input_parameters=bond, - bond_type=bond_type) - - # check particle bond - pmb.define_bond(bond_type = bond_type, - bond_parameters = bond, + input_parameters=harmonic_params_test, + bond_type="harmonic") + # Clean-up database + for inst_id in pid_B+pid_A: + pmb.delete_instances_in_system(instance_id=inst_id, + pmb_type="particle", + espresso_system=espresso_system) + pmb.db.delete_templates(pmb_type="bond") + + # Test setup of FENE bonds + pmb.define_bond(bond_type = "FENE", + bond_parameters = FENE_params, particle_pairs = [['A', 'A']]) - - bond_object = pmb.filter_df(pmb_type='bond')['bond_object'].values[1] + # Create two particles + pids = pmb.create_particle(name="A", + espresso_system=espresso_system, + number_of_particles=2) + + pmb.create_bond(particle_id1=pids[0], + particle_id2=pids[1], + espresso_system=espresso_system, + use_default_bond=False) + + bond_object = self.get_bond_object(particle_id_pair=pids) + self.check_bond_setup(bond_object=bond_object, - input_parameters=bond, - bond_type=bond_type) - - # check bond deserialization - bond_params = bond_object.get_params() - bond_params["bond_id"] = bond_object._bond_id - deserialized = df_management._DFManagement._convert_str_to_bond_object( - f'{bond_object.__class__.__name__}({json.dumps(bond_params)})') - self.check_bond_setup(bond_object=deserialized, - input_parameters=bond, - bond_type=bond_type) - - # check bond default equilibrium length - bond_type = 'FENE' - bond = {'k' : 400 * pmb.units('reduced_energy / reduced_length**2'), - 'd_r_max': 0.8 * pmb.units.nm} - - pmb.define_bond(bond_type = bond_type, - bond_parameters = bond, - particle_pairs = [['A', 'A']]) - log_contents = log_stream.getvalue() - self.assertIn("no value provided for r_0. Defaulting to r_0 = 0", log_contents) - bond['r_0'] = 0. * pmb.units.nm - bond_object = pmb.filter_df(pmb_type='bond')['bond_object'].values[2] + input_parameters=FENE_params, + bond_type="FENE") + # Clean-up database + for inst_id in pids: + pmb.delete_instances_in_system(instance_id=inst_id, + pmb_type="particle", + espresso_system=espresso_system) + pid_A = pmb.create_particle(name="A", + espresso_system=espresso_system, + number_of_particles=1) + + FENE_params_test = {'r_0' : 0.5 * pmb.units.nm, + 'k' : 500 * pmb.units('reduced_energy / reduced_length**2'), + 'd_r_max': 0.5 * pmb.units.nm} + pmb.define_bond(bond_type = "FENE", + bond_parameters = FENE_params_test, + particle_pairs = [['A', 'B']]) + + pid_B = pmb.create_particle(name="B", + espresso_system=espresso_system, + number_of_particles=1) + + # Test that the FENE bond is properly setup when there is a default bond + pmb.define_default_bond(bond_type = "harmonic", + bond_parameters = harmonic_params) + + pmb.create_bond(particle_id1=pid_B[0], + particle_id2=pid_A[0], + espresso_system=espresso_system, + use_default_bond=True) + + bond_object = self.get_bond_object(particle_id_pair=[pid_B[0],pid_A[0]]) + self.check_bond_setup(bond_object=bond_object, - input_parameters=bond, - bond_type=bond_type) - - def test_bond_harmonic_and_fene(self): - pmb.define_particle(name='A', z=0, sigma=0.4*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) - pmb.define_particle(name='B', z=0, sigma=0.4*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) - - bond_type_1 = 'harmonic' - bond_1 = {'r_0' : 0.4 * pmb.units.nm, - 'k' : 400 * pmb.units('reduced_energy / reduced_length**2')} - pmb.define_bond(bond_type = bond_type_1, - bond_parameters = bond_1, - particle_pairs = [['A', 'A']]) - - bond_type_2 = 'FENE' - bond_2 = {'r_0' : 0.4 * pmb.units.nm, - 'k' : 400 * pmb.units('reduced_energy / reduced_length**2'), - 'd_r_max': 0.8 * pmb.units.nm} - - pmb.define_bond(bond_type = bond_type_2, - bond_parameters = bond_2, - particle_pairs = [['B', 'B']]) - - bond_object_1 = pmb.filter_df(pmb_type='bond')['bond_object'][2] - bond_object_2 = pmb.filter_df(pmb_type='bond')['bond_object'][3] - - self.check_bond_setup(bond_object=bond_object_1, - input_parameters=bond_1, - bond_type=bond_type_1) - self.check_bond_setup(bond_object=bond_object_2, - input_parameters=bond_2, - bond_type=bond_type_2) + input_parameters=FENE_params_test, + bond_type="FENE") + # Clean-up database + for inst_id in pid_B+pid_A: + pmb.delete_instances_in_system(instance_id=inst_id, + pmb_type="particle", + espresso_system=espresso_system) + + pmb.db.delete_templates(pmb_type="bond") + + # Test setup of the default bond + pmb.define_default_bond(bond_type = "harmonic", + bond_parameters = harmonic_params) + + pids = pmb.create_particle(name="A", + espresso_system=espresso_system, + number_of_particles=2) + + pmb.create_bond(particle_id1=pids[0], + particle_id2=pids[1], + espresso_system=espresso_system, + use_default_bond=True) + + bond_object = self.get_bond_object(particle_id_pair=pids) + + self.check_bond_setup(bond_object=bond_object, + input_parameters=harmonic_params, + bond_type="harmonic") + # Clean-up database + for inst_id in pids: + pmb.delete_instances_in_system(instance_id=inst_id, + pmb_type="particle", + espresso_system=espresso_system) + pmb.db.delete_templates(pmb_type="bond") + + # Test setup of default bond when there are other bonds defined + pid_A = pmb.create_particle(name="A", + espresso_system=espresso_system, + number_of_particles=1) + pid_B = pmb.create_particle(name="B", + espresso_system=espresso_system, + number_of_particles=1) + + pmb.define_default_bond(bond_type = "FENE", + bond_parameters = FENE_params_test) + pmb.define_bond(bond_type = "harmonic", + bond_parameters = harmonic_params_test, + particle_pairs = [['A', 'A'], ['B','B']]) + + pmb.create_bond(particle_id1=pid_B[0], + particle_id2=pid_A[0], + espresso_system=espresso_system, + use_default_bond=True) + + bond_object = self.get_bond_object(particle_id_pair=[pid_B[0],pid_A[0]]) + + self.check_bond_setup(bond_object=bond_object, + input_parameters=FENE_params_test, + bond_type="FENE") + # Clean-up database + for inst_id in pid_B+pid_A: + pmb.delete_instances_in_system(instance_id=inst_id, + pmb_type="particle", + espresso_system=espresso_system) + pmb.db.delete_templates(pmb_type="bond") def test_bond_raised_exceptions(self): - pmb.define_particle(name='A', z=0, sigma=0.4*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) for callback in [pmb.define_bond, pmb.define_default_bond]: with self.subTest(msg=f'using method {callback.__qualname__}()'): self.check_bond_exceptions(callback) @@ -242,91 +323,6 @@ def check_bond_exceptions(self, callback): np.testing.assert_raises(ValueError, callback, **input_parameters) - def test_bond_framework(self): - pmb.define_particle(name='A', z=0, sigma=0.4*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) - pmb.define_particle(name='B', z=0, sigma=0.4*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) - - pmb.add_bonds_to_espresso(None) - log_contents = log_stream.getvalue() - assert "there are no bonds defined in pymbe.df" in log_contents - - bond_type_1 = 'harmonic' - bond_1 = {'r_0' : 0.4 * pmb.units.nm, - 'k' : 400 * pmb.units('reduced_energy / reduced_length**2')} - pmb.define_bond(bond_type = bond_type_1, - bond_parameters = bond_1, - particle_pairs = [['A', 'A']]) - - bond_type_2 = 'FENE' - bond_2 = {'r_0' : 0.4 * pmb.units.nm, - 'k' : 400 * pmb.units('reduced_energy / reduced_length**2'), - 'd_r_max': 0.8 * pmb.units.nm} - - pmb.define_bond(bond_type = bond_type_2, - bond_parameters = bond_2, - particle_pairs = [['B', 'B']]) - - bond_object_1 = pmb.filter_df(pmb_type='bond')['bond_object'][2] - bond_object_2 = pmb.filter_df(pmb_type='bond')['bond_object'][3] - - self.check_bond_setup(bond_object=bond_object_1, - input_parameters=bond_1, - bond_type=bond_type_1) - self.check_bond_setup(bond_object=bond_object_2, - input_parameters=bond_2, - bond_type=bond_type_2) - - # check deserialization exceptions - with self.assertRaises(ValueError): - df_management._DFManagement._convert_str_to_bond_object('Not_A_Bond()') - with self.assertRaises(json.decoder.JSONDecodeError): - df_management._DFManagement._convert_str_to_bond_object('HarmonicBond({invalid_json})') - with self.assertRaises(NotImplementedError): - df_management._DFManagement._convert_str_to_bond_object('QuarticBond({"r_0": 1., "k": 1.})') - - # check bond keys - self.assertEqual(df_management._DFManagement._find_bond_key(df = pmb.df, particle_name1 = 'A', particle_name2 = 'A'), 'A-A') - self.assertEqual(df_management._DFManagement._find_bond_key(df = pmb.df, particle_name1 = 'B', particle_name2 = 'B'), 'B-B') - self.assertEqual(df_management._DFManagement._find_bond_key(df = pmb.df, particle_name1 = 'A', particle_name2 = 'A', use_default_bond=True), 'A-A') - self.assertEqual(df_management._DFManagement._find_bond_key(df = pmb.df, particle_name1 = 'Z', particle_name2 = 'Z', use_default_bond=True), 'default') - self.assertIsNone(df_management._DFManagement._find_bond_key(df = pmb.df, particle_name1 = 'A', particle_name2 = 'B')) - self.assertIsNone(df_management._DFManagement._find_bond_key(df = pmb.df, particle_name1 = 'B', particle_name2 = 'A')) - self.assertIsNone(df_management._DFManagement._find_bond_key(df = pmb.df, particle_name1 = 'Z', particle_name2 = 'Z')) - self.assertEqual(df_management._DFManagement._find_bond_key(df = pmb.df, particle_name1 = 'A', particle_name2 = 'B', use_default_bond=True), 'default') - - self.assertIsNone(pmb.search_bond('A', 'B', hard_check=False)) - log_contents = log_stream.getvalue() - self.assertIn("Bond not defined between particles A and B", log_contents) - - with self.assertRaises(ValueError): - pmb.search_bond('A', 'B', use_default_bond=True) - - with self.assertRaises(ValueError): - pmb.search_bond('A', 'B' , hard_check=True) - - # check invalid bond index - df_management._DFManagement._add_value_to_df(df = pmb.df, - key = ('particle_id',''), - new_value = 10, - index = np.where(pmb.df['name']=='A')[0][0]) - df_management._DFManagement._add_value_to_df(df = pmb.df, - key = ('particle_id',''), - new_value = 20, - index = np.where(pmb.df['name']=='B')[0][0]) - self.assertIsNone(df_management._DFManagement._add_bond_in_df(pmb.df, 10, 20, use_default_bond=False)) - self.assertIsNone(df_management._DFManagement._add_bond_in_df(pmb.df, 10, 20, use_default_bond=True)) - - # check bond lengths - self.assertAlmostEqual(pmb.get_bond_length('A', 'A'), - bond_object_1.r_0, delta=1e-7) - self.assertAlmostEqual(pmb.get_bond_length('B', 'B'), - bond_object_2.r_0, delta=1e-7) - self.assertIsNone(pmb.get_bond_length('A', 'B')) - log_contents = log_stream.getvalue() - self.assertIn("Bond not defined between particles A and B", log_contents) - with self.assertRaises(ValueError): - pmb.get_bond_length('A', 'B', hard_check=True) - if __name__ == '__main__': ut.main() diff --git a/testsuite/gcmc_tests.py b/testsuite/gcmc_tests.py index 2577994..441f8c5 100644 --- a/testsuite/gcmc_tests.py +++ b/testsuite/gcmc_tests.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/testsuite/generate_coordinates_tests.py b/testsuite/generate_coordinates_tests.py index 8363f69..01468e9 100644 --- a/testsuite/generate_coordinates_tests.py +++ b/testsuite/generate_coordinates_tests.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/testsuite/lj_tests.py b/testsuite/lj_tests.py index 4d8f281..fe4f787 100644 --- a/testsuite/lj_tests.py +++ b/testsuite/lj_tests.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024-2025 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/testsuite/setup_salt_ions_unit_tests.py b/testsuite/setup_salt_ions_unit_tests.py index 9a32f51..085fc77 100644 --- a/testsuite/setup_salt_ions_unit_tests.py +++ b/testsuite/setup_salt_ions_unit_tests.py @@ -16,7 +16,6 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import numpy as np import espressomd from pyMBE.lib.handy_functions import get_number_of_particles import unittest as ut From 1b52ad349b126ace74c8ed040c4db590ff3629cd Mon Sep 17 00:00:00 2001 From: pmblanco Date: Thu, 22 Jan 2026 18:25:04 +0100 Subject: [PATCH 29/55] fix net charge test --- pyMBE/pyMBE.py | 9 +- testsuite/bond_tests.py | 8 -- testsuite/calculate_net_charge_unit_test.py | 128 ++++++++++++-------- testsuite/charge_number_map_tests.py | 100 ++++++++------- 4 files changed, 128 insertions(+), 117 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 7de65c6..58b2829 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -469,7 +469,7 @@ def calc_partition_coefficient(charge, c_macro): return {"charges_dict": Z_HH_Donnan, "pH_system_list": pH_system_list, "partition_coefficients": partition_coefficients_list} - def calculate_net_charge(self, espresso_system, molecule_name, dimensionless=False): + def calculate_net_charge(self, espresso_system, molecule_name, pmb_type, dimensionless=False): ''' Calculates the net charge per molecule of molecules with `name` = molecule_name. Returns the net charge per molecule and a maps with the net charge per residue and molecule. @@ -485,10 +485,9 @@ def calculate_net_charge(self, espresso_system, molecule_name, dimensionless=Fal Note: - The net charge of the molecule is averaged over all molecules of type `name` - The net charge of each particle type is averaged over all particle of the same type in all molecules of type `name` - ''' - self._check_supported_molecule(molecule_name=molecule_name, - valid_pmb_types=["molecule","protein","peptide"]) - + ''' + if pmb_type not in self.db._molecule_like_types: + raise ValueError(f"{pmb_type} are not supported. Current supported types are: {self.db._molecule_like_types}") id_map = self.get_particle_id_map(object_name=molecule_name) def create_charge_map(espresso_system,id_map,label): charge_number_map={} diff --git a/testsuite/bond_tests.py b/testsuite/bond_tests.py index 83804c7..3c899b9 100644 --- a/testsuite/bond_tests.py +++ b/testsuite/bond_tests.py @@ -20,16 +20,8 @@ import pyMBE import numpy as np import unittest as ut -import io -import logging import espressomd -# Create an in-memory log stream -log_stream = io.StringIO() -logging.basicConfig(level=logging.INFO, - format="%(levelname)s: %(message)s", - handlers=[logging.StreamHandler(log_stream)] ) - # Create an instance of pyMBE library espresso_system=espressomd.System (box_l = [10]*3) diff --git a/testsuite/calculate_net_charge_unit_test.py b/testsuite/calculate_net_charge_unit_test.py index a7af915..f88fe1a 100644 --- a/testsuite/calculate_net_charge_unit_test.py +++ b/testsuite/calculate_net_charge_unit_test.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -18,20 +18,26 @@ import numpy as np import espressomd +import unittest as ut + # Create an instance of pyMBE library import pyMBE pmb = pyMBE.pymbe_library(seed=42) -print("*** Unit test: check that calculate_net_charge calculates the charge in a molecule properly ***") - pmb.define_particle(name='0P', - z=0) + z=0, + sigma=1*pmb.units.reduced_length, + epsilon=1*pmb.units.reduced_energy) pmb.define_particle(name='+1p', - z=+1) + z=+1, + sigma=1*pmb.units.reduced_length, + epsilon=1*pmb.units.reduced_energy) pmb.define_particle(name='-1p', - z=-1) + z=-1, + sigma=1*pmb.units.reduced_length, + epsilon=1*pmb.units.reduced_energy) pmb.define_residue( name = 'R1', @@ -63,52 +69,68 @@ # Create an instance of an espresso system espresso_system=espressomd.System(box_l = [10]*3) -# Add all bonds to espresso system -pmb.add_bonds_to_espresso(espresso_system=espresso_system) - # Create your molecules into the espresso system -molecules = pmb.create_molecule(name=molecule_name, - number_of_molecules= 2, - espresso_system=espresso_system, - use_default_bond=True,) - -# Check the case where the returned charge has a dimension -charge_map=pmb.calculate_net_charge(molecule_name=molecule_name, - espresso_system=espresso_system) - -# Check mean charge -np.testing.assert_equal(charge_map["mean"], 2.0*pmb.units.Quantity(1,'reduced_charge')) -# Check molecule charge map -np.testing.assert_equal(charge_map["molecules"],{0: 2.0*pmb.units.Quantity(1,'reduced_charge'), 1: 2.0*pmb.units.Quantity(1,'reduced_charge')}) -# Check residue charge map -np.testing.assert_equal(charge_map["residues"],{0: 1.0*pmb.units.Quantity(1,'reduced_charge'), - 1: 1.0*pmb.units.Quantity(1,'reduced_charge'), - 2: 0.0*pmb.units.Quantity(1,'reduced_charge'), - 3: 0.0*pmb.units.Quantity(1,'reduced_charge'), - 4: 0.0*pmb.units.Quantity(1,'reduced_charge'), - 5: 1.0*pmb.units.Quantity(1,'reduced_charge'), - 6: 1.0*pmb.units.Quantity(1,'reduced_charge'), - 7: 0.0*pmb.units.Quantity(1,'reduced_charge'), - 8: 0.0*pmb.units.Quantity(1,'reduced_charge'), - 9: 0.0*pmb.units.Quantity(1,'reduced_charge')}) - -# Check the case where the returned charge is dimensionless -charge_map=pmb.calculate_net_charge(molecule_name=molecule_name, - espresso_system=espresso_system, - dimensionless=True) - -# Check mean charge -np.testing.assert_equal(charge_map["mean"], 2.0) -# Check molecule charge map -np.testing.assert_equal(charge_map["molecules"],{0: 2.0, 1: 2.0}) -# Check residue charge map -np.testing.assert_equal(charge_map["residues"],{0: 1.0, 1: 1.0, 2: 0.0, 3: 0.0, 4: 0.0, 5: 1.0, 6: 1.0, 7: 0.0, 8: 0.0, 9: 0.0}) - - -print("*** Unit test passed ***") -print("*** Unit test: check that calculate_net_charge raises a ValueError if one provides the name of an object that is not a molecule ***") -input_parameters={"molecule_name":"R1", - "espresso_system":espresso_system} -np.testing.assert_raises(ValueError, pmb.calculate_net_charge, **input_parameters) -print("*** Unit test passed ***") +pmb.create_molecule(name=molecule_name, + number_of_molecules= 2, + espresso_system=espresso_system, + use_default_bond=True) + +class Test(ut.TestCase): + def test_calculate_net_charge_with_units(self): + """ + *** Unit test: check that calculate_net_charge calculates the charge in a molecule properly with units + """ + + + # Check the case where the returned charge has a dimension + charge_map=pmb.calculate_net_charge(molecule_name=molecule_name, + espresso_system=espresso_system, + pmb_type="molecule") + + # Check mean charge + np.testing.assert_equal(charge_map["mean"], 2.0*pmb.units.Quantity(1,'reduced_charge')) + # Check molecule charge map + np.testing.assert_equal(charge_map["molecules"],{0: 2.0*pmb.units.Quantity(1,'reduced_charge'), 1: 2.0*pmb.units.Quantity(1,'reduced_charge')}) + # Check residue charge map + np.testing.assert_equal(charge_map["residues"],{0: 1.0*pmb.units.Quantity(1,'reduced_charge'), + 1: 1.0*pmb.units.Quantity(1,'reduced_charge'), + 2: 0.0*pmb.units.Quantity(1,'reduced_charge'), + 3: 0.0*pmb.units.Quantity(1,'reduced_charge'), + 4: 0.0*pmb.units.Quantity(1,'reduced_charge'), + 5: 1.0*pmb.units.Quantity(1,'reduced_charge'), + 6: 1.0*pmb.units.Quantity(1,'reduced_charge'), + 7: 0.0*pmb.units.Quantity(1,'reduced_charge'), + 8: 0.0*pmb.units.Quantity(1,'reduced_charge'), + 9: 0.0*pmb.units.Quantity(1,'reduced_charge')}) + + def test_calculate_net_charge_without_units(self): + """ + *** Unit test: check that calculate_net_charge calculates the charge in a molecule properly without units + """ + + # Check the case where the returned charge is dimensionless + charge_map=pmb.calculate_net_charge(molecule_name=molecule_name, + espresso_system=espresso_system, + dimensionless=True, + pmb_type="molecule") + + # Check mean charge + np.testing.assert_equal(charge_map["mean"], 2.0) + # Check molecule charge map + np.testing.assert_equal(charge_map["molecules"],{0: 2.0, 1: 2.0}) + # Check residue charge map + np.testing.assert_equal(charge_map["residues"],{0: 1.0, 1: 1.0, 2: 0.0, 3: 0.0, 4: 0.0, 5: 1.0, 6: 1.0, 7: 0.0, 8: 0.0, 9: 0.0}) + + + def test_sanitity_calculate_net_charge(self): + """ + Sanity check that calculate_net_charge raises a ValueError if one provides the name of an object that is not a molecule + """ + input_parameters={"molecule_name":"R1", + "espresso_system":espresso_system, + "pmb_type": "residue"} + np.testing.assert_raises(ValueError, pmb.calculate_net_charge, **input_parameters) + +if __name__ == '__main__': + ut.main() diff --git a/testsuite/charge_number_map_tests.py b/testsuite/charge_number_map_tests.py index 917bfd5..2b066dc 100644 --- a/testsuite/charge_number_map_tests.py +++ b/testsuite/charge_number_map_tests.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -19,64 +19,62 @@ # Import pyMBE and other libraries import pyMBE import numpy as np -import pyMBE.storage.df_management as df_management +import unittest as ut # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(seed=42) +pmb.define_particle(name="I", + pka = np.nan, + sigma = 1* pmb.units.nm, + epsilon= 1 *pmb.units.reduced_energy, + z =5) -def check_charge_number_map(input_parameters): - """ - Checks if pyMBE stores in the pmb.df the input parameters for acid/base particles correctly. +pmb.define_particle(name = "A", + acidity = "acidic", + sigma = 1* pmb.units.nm, + epsilon = 1 *pmb.units.reduced_energy, + pka =4) - Args: - input_parameters(`dict`): dictionary with the input parameters for define_particle. +pmb.define_particle(name ="B", + acidity = "basic", + sigma = 1* pmb.units.nm, + epsilon = 1 *pmb.units.reduced_energy, + pka = 4) - """ - pmb.define_particle(**input_parameters) +charge_map = pmb.get_charge_number_map() +type_map = pmb.get_type_map() - if input_parameters["acidity"] == "inert": - np.testing.assert_equal(actual=pmb.get_charge_number_map(), - desired={0: input_parameters["z"]}, - verbose=True) - elif input_parameters["acidity"] == "acidic": - np.testing.assert_equal(actual=pmb.get_charge_number_map(), - desired={0: 0, 1: -1}, - verbose=True) - elif input_parameters["acidity"] == "basic": - np.testing.assert_equal(actual=pmb.get_charge_number_map(), - desired={0: 1, 1: 0}, - verbose=True) +class Test(ut.TestCase): + def test_inert_particle(self): + """ + Check that get_charge_number_map works correctly for inert particles + """ + self.assertEqual(actual=charge_map[type_map["I"]], + desired=5, + verbose=True) -print("*** get_charge_number_map unit tests ***") -print("*** Unit test: check that get_charge_number_map works correctly for inert particles***") -# Clean pmb.df -pmb.df = df_management._DFManagement._setup_df() -input_parameters={"name":"I", - "acidity": "inert", - "pka": np.nan, - "z":5} + def test_acidic_particle(self): + """ + Check that get_charge_number_map works correctly for acidic particles + """ + self.assertEqual(actual=charge_map[type_map["AH"]], + desired=0, + verbose=True) + self.assertEqual(actual=charge_map[type_map["A"]], + desired=-1, + verbose=True) -check_charge_number_map(input_parameters) + def test_basic_particle(self): + """ + Check that get_charge_number_map works correctly for basic particles + """ + self.assertEqual(actual=charge_map[type_map["BH"]], + desired=1, + verbose=True) + self.assertEqual(actual=charge_map[type_map["B"]], + desired=0, + verbose=True) -print("*** Unit test passed ***") -print("*** Unit test: check that get_charge_number_map works correctly for acidic particles***") -# Clean pmb.df -pmb.df = df_management._DFManagement._setup_df() -input_parameters={"name":"A", - "acidity": "acidic", - "pka":4} -check_charge_number_map(input_parameters) - -print("*** Unit test passed ***") -print("*** Unit test: check that get_charge_number_map works correctly for basic particles***") -# Clean pmb.df -pmb.df = df_management._DFManagement._setup_df() -input_parameters={"name":"B", - "acidity": "basic", - "pka":4} - -check_charge_number_map(input_parameters) - -print("*** Unit test passed ***") -print("*** All unit tests passed ***") +if __name__ == '__main__': + ut.main() \ No newline at end of file From aa7019e89e5ea133cb535a746d8c8901f765f270 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Thu, 22 Jan 2026 19:10:20 +0100 Subject: [PATCH 30/55] fix parameter test --- pyMBE/pyMBE.py | 15 +- pyMBE/storage/io.py | 3 +- pyMBE/storage/manager.py | 133 +++++++------- testsuite/CTestTestfile.cmake | 3 +- testsuite/charge_number_map_tests.py | 26 ++- testsuite/parameter_test.py | 168 ++++++------------ testsuite/serialization_test.py | 23 +-- testsuite/test_in_out_pmb_df.py | 133 -------------- ...d-write-df_test.py => test_io_database.py} | 0 9 files changed, 152 insertions(+), 352 deletions(-) delete mode 100644 testsuite/test_in_out_pmb_df.py rename testsuite/{read-write-df_test.py => test_io_database.py} (100%) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 58b2829..544958d 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -1949,7 +1949,16 @@ def get_radius_map(self, dimensionless=True): for state in self.db.get_particle_states_templates(particle_name=tpl.name).values(): result[state.es_type] = radius return result - + + def get_reactions_df(self): + """ + Returns a dataframe with all reaction templates ` in the pyMBE database. + + Returns: + (Pandas.Dataframe): Dataframe with all reaction templates. + """ + return self.db._get_reactions_df() + def get_reduced_units(self): """ Returns the current set of reduced units defined in pyMBE. @@ -1974,10 +1983,10 @@ def get_templates_df(self, pmb_type): Returns a dataframe with all templates of type `pmb_type` in the pyMBE database. Args: - pmb_type(`str`): pmb type to search templates in the pyMBE database. + pmb_type(str): pmb type to search templates in the pyMBE database. Returns: - templates_df(`Pandas.Dataframe`): Dataframe with all templates of type `pmb_type`. + (Pandas.Dataframe): Dataframe with all templates of type `pmb_type`. """ return self.db._get_templates_df(pmb_type=pmb_type) diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py index 7d00caa..3699346 100644 --- a/pyMBE/storage/io.py +++ b/pyMBE/storage/io.py @@ -326,8 +326,7 @@ def _load_database_csv(db, folder): pK=float(row["pK"]) if (row.get("pK") not in (None, "", "nan")) else None, reaction_type=row.get("reaction_type", None), metadata=metadata) - reactions[rx.name] = rx - db._reactions = reactions + db._reactions[rx.name] = rx # Metadata json_file = folder / "metadata.json" diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index f4971ee..02ab6eb 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -674,68 +674,6 @@ def _propose_instance_id(self, pmb_type): return 0 used_ids = list(self._instances[pmb_type].keys()) return max(used_ids) + 1 - - def delete_template(self, pmb_type, name): - """ - Delete a template from the pyMBE database. - - This method removes a template identified by its pyMBE type and name. - Before deletion, it checks whether any instance in the database uses - this template. If any instance depends on it, a ``ValueError`` is raised - to prevent breaking database integrity. - - Args: - pmb_type (str): - The template category. - name (str): - The name of the template to delete. - """ - # Check template exists - if pmb_type not in self._templates: - raise ValueError(f"Template type '{pmb_type}' not found.") - if name not in self._templates[pmb_type]: - raise ValueError(f"Template '{name}' not found in type '{pmb_type}'.") - - # Check if any instance depends on this template - if pmb_type in self._instances: - for inst in self._instances[pmb_type].values(): - if getattr(inst, "name", None) == name: - raise ValueError( - f"Cannot delete template '{name}' from '{pmb_type}': " - f"Instance with ID {getattr(inst, pmb_type + '_id')} depends on it." - ) - - # Delete - del self._templates[pmb_type][name] - # if it is a bond template delete also stored espresso bond instances - if pmb_type == "bond": - if name in self.espresso_bond_instances.keys(): - del self.espresso_bond_instances[name] - - # Delete empty groups - if not self._templates[pmb_type]: - del self._templates[pmb_type] - - def delete_templates(self, pmb_type): - """ - Remove all templates registered in the pyMBE database for a given pyMBE type. - - Args: - pmb_type (str): - Template category (e.g. ``"particle"``, ``"residue"``, - ``"molecule"``, ``"hydrogel"``). - - Notes: - - This operation is irreversible. - - Instance data is not affected. - - If no templates exist for the given type, the method is a no-op. - """ - if pmb_type in self._templates: - templates = list(self._templates[pmb_type].keys()) - for template in templates: - self.delete_template(pmb_type=pmb_type, - name=template) - def delete_instance(self, pmb_type, instance_id, cascade=False): """ Delete an instance from the pyMBE database. @@ -876,6 +814,77 @@ def delete_instances(self, pmb_type, cascade=False): instance_id=instance_id, cascade=cascade) + def delete_reaction(self, reaction_name): + """ + Delete a reaction template from the pyMBE database. + + Args: + reaction_name (str): label identifying the reaction template in the database. + """ + if reaction_name not in self._reactions: + raise ValueError(f"Reaction '{reaction_name}' not found in the pyMBE database.") + del self._reactions[reaction_name] + + def delete_reactions(self): + """ + Deletes all reaction templates from the pyMBE database. + """ + keys = list(self._reactions.keys()) + for key in keys: + self.delete_reaction(reaction_name=key) + + def delete_template(self, pmb_type, name): + """ + Delete a template from the pyMBE database. + + Args: + pmb_type (str): The template category. + name (str): The name of the template to delete. + """ + # Check template exists + if pmb_type not in self._templates: + raise ValueError(f"Template type '{pmb_type}' not found.") + if name not in self._templates[pmb_type]: + raise ValueError(f"Template '{name}' not found in type '{pmb_type}'.") + + # Check if any instance depends on this template + if pmb_type in self._instances: + for inst in self._instances[pmb_type].values(): + if getattr(inst, "name", None) == name: + raise ValueError(f"Cannot delete template '{name}' from '{pmb_type}': Instance with ID {getattr(inst, pmb_type + '_id')} depends on it.") + + # Delete + del self._templates[pmb_type][name] + # if it is a bond template delete also stored espresso bond instances + if pmb_type == "bond": + if name in self.espresso_bond_instances.keys(): + del self.espresso_bond_instances[name] + + # Delete empty groups + if not self._templates[pmb_type]: + del self._templates[pmb_type] + + def delete_templates(self, pmb_type): + """ + Remove all templates registered in the pyMBE database for a given pyMBE type. + + Args: + pmb_type (str): + Template category (e.g. ``"particle"``, ``"residue"``, + ``"molecule"``, ``"hydrogel"``). + + Notes: + - This operation is irreversible. + - Instance data is not affected. + - If no templates exist for the given type, the method is a no-op. + """ + if pmb_type in self._templates: + templates = list(self._templates[pmb_type].keys()) + for template in templates: + self.delete_template(pmb_type=pmb_type, + name=template) + + def get_instance(self, pmb_type, instance_id): """ diff --git a/testsuite/CTestTestfile.cmake b/testsuite/CTestTestfile.cmake index 44898e3..d344f88 100644 --- a/testsuite/CTestTestfile.cmake +++ b/testsuite/CTestTestfile.cmake @@ -52,7 +52,7 @@ pymbe_add_test(PATH peptide_tests.py LABELS long beyer2024 THREADS 2) pymbe_add_test(PATH weak_polyelectrolyte_dialysis_test.py LABELS long beyer2024) # unit tests -pymbe_add_test(PATH test_in_out_pmb_df.py) +pymbe_add_test(PATH test_io_database.py) pymbe_add_test(PATH serialization_test.py) pymbe_add_test(PATH test_global_variables.py) pymbe_add_test(PATH lj_tests.py) @@ -62,7 +62,6 @@ pymbe_add_test(PATH generate_perpendicular_vectors_test.py) pymbe_add_test(PATH define_and_create_molecules_unit_tests.py) pymbe_add_test(PATH create_molecule_position_test.py) pymbe_add_test(PATH seed_test.py) -pymbe_add_test(PATH read-write-df_test.py) pymbe_add_test(PATH parameter_test.py) pymbe_add_test(PATH henderson_hasselbalch_tests.py) pymbe_add_test(PATH calculate_net_charge_unit_test.py) diff --git a/testsuite/charge_number_map_tests.py b/testsuite/charge_number_map_tests.py index 2b066dc..6f9ddd9 100644 --- a/testsuite/charge_number_map_tests.py +++ b/testsuite/charge_number_map_tests.py @@ -49,32 +49,26 @@ def test_inert_particle(self): """ Check that get_charge_number_map works correctly for inert particles """ - self.assertEqual(actual=charge_map[type_map["I"]], - desired=5, - verbose=True) + self.assertEqual(charge_map[type_map["I"]], + 5) def test_acidic_particle(self): """ Check that get_charge_number_map works correctly for acidic particles """ - self.assertEqual(actual=charge_map[type_map["AH"]], - desired=0, - verbose=True) - self.assertEqual(actual=charge_map[type_map["A"]], - desired=-1, - verbose=True) + self.assertEqual(charge_map[type_map["AH"]], + 0) + self.assertEqual(charge_map[type_map["A"]], + -1) def test_basic_particle(self): """ Check that get_charge_number_map works correctly for basic particles """ - self.assertEqual(actual=charge_map[type_map["BH"]], - desired=1, - verbose=True) - self.assertEqual(actual=charge_map[type_map["B"]], - desired=0, - verbose=True) - + self.assertEqual(charge_map[type_map["BH"]], + 1) + self.assertEqual(charge_map[type_map["B"]], + 0) if __name__ == '__main__': ut.main() \ No newline at end of file diff --git a/testsuite/parameter_test.py b/testsuite/parameter_test.py index a2825df..ef90a2f 100644 --- a/testsuite/parameter_test.py +++ b/testsuite/parameter_test.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024-2025 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -20,115 +20,57 @@ import pyMBE import pandas as pd import numpy as np -import pyMBE.storage.df_management as df_management - -pmb = pyMBE.pymbe_library(seed=42) - -print("*** Unit test: check that the different pKa sets are correctly formatted ***") - -data_root = pathlib.Path(__file__).parent / "test_parameters" -params_root = pathlib.Path(pyMBE.__file__).parent / "parameters" -pka_root = params_root / "pka_sets" -peptides_root = params_root / "peptides" - -for path in pka_root.glob("*.json"): - print(f"Checking {path.stem}") - pmb.load_pka_set(path) - -print("*** Test passed ***") - -print("*** Unit test: check that the order to execute load_pka_set() and load_interaction_parameters does not change the resulting parameters in pmb.df ***") -path_to_interactions=pmb.root / "parameters" / "peptides" / "Lunkad2021.json" -path_to_pka=pmb.root / "parameters" / "pka_sets" / "Hass2015.json" - -# First order of loading parameters -pmb.df = df_management._DFManagement._setup_df() # clear the pmb_df -pmb.load_interaction_parameters (filename=peptides_root / "Lunkad2021.json") -pmb.load_pka_set(filename=pka_root / "Hass2015.json") -df_1 = pmb.df.copy() -df_1 = df_1.sort_values(by="name").reset_index(drop=True) -# Drop espresso types (they depend on the order of loading) -df_1 = df_1.drop(labels=('state_one', 'es_type'), axis=1).drop(labels=('state_two', 'es_type'), axis=1) -# Drop bond_object (assert_frame_equal does not process it well) -df_1 = df_1.sort_index(axis=1).drop(labels="bond_object", axis=1) -# Second order of loading parameters -pmb.df = df_management._DFManagement._setup_df() # clear the pmb_df -pmb.load_pka_set (filename=path_to_pka) -#print(pmb.df["acidity"]) -pmb.load_interaction_parameters(filename=path_to_interactions) -#print(pmb.df["acidity"]) -df_2 = pmb.df.copy() -df_2 = df_2.sort_values(by="name").reset_index(drop=True) -# Drop espresso types (they depend on the order of loading) -df_2 = df_2.drop(labels=('state_one', 'es_type'), axis=1).drop(labels=('state_two', 'es_type'), axis=1) -# Drop bond_object (assert_frame_equal does not process it well) -df_2 = df_2.sort_index(axis=1).drop(labels="bond_object", axis=1) - -df_1 = df_1.replace({pd.NA: np.nan}) -df_2 = df_2.replace({pd.NA: np.nan}) -pd.testing.assert_frame_equal(df_1,df_2) - -print("*** Test passed ***") - -print("*** Unit test: check that load_interaction_parameters loads FENE bonds correctly ***") -pmb.df = df_management._DFManagement._setup_df() # clear the pmb_df -pmb.load_interaction_parameters (filename=data_root / "test_FENE.json") - -expected_parameters = {'r_0' : 0.4*pmb.units.nm, - 'k' : 400 * pmb.units('reduced_energy / reduced_length**2'), - 'd_r_max': 0.8 * pmb.units.nm} -reduced_units = {'r_0' : 'reduced_length', - 'k' : 'reduced_energy / reduced_length**2', - 'd_r_max': 'reduced_length'} -parameters_in_df = pmb.df[pmb.df.pmb_type == "bond"].parameters_of_the_potential.values[0] - -for key in expected_parameters.keys(): - np.testing.assert_equal(actual=parameters_in_df[key], - desired=expected_parameters[key].m_as(reduced_units[key]), - verbose=True) - -print("*** Test passed ***") -print("*** Unit test: check that load_interaction_parameters loads residue, molecule and peptide objects correctly ***") - -pmb.df = df_management._DFManagement._setup_df() # clear the pmb_df -pmb.load_interaction_parameters (filename=data_root / "test_molecules.json") - -expected_residue_parameters={"central_bead": "A", "side_chains": ["B","C"] } -expected_molecule_parameters={"residue_list": ["R1","R1", "R1"]} -expected_peptide_parameters= {"sequence": ['K', 'K', 'K', 'K', 'K', 'D', 'D', 'D', 'D', 'D'], "model": "1beadAA" } - -# Check residue -np.testing.assert_equal(actual=pmb.df[pmb.df.name == "R1"].central_bead.values[0], - desired=expected_residue_parameters["central_bead"], - verbose=True) - -np.testing.assert_equal(actual=frozenset(pmb.df[pmb.df.name == "R1"].side_chains.values[0]), - desired=frozenset(expected_residue_parameters["side_chains"]), - verbose=True) -# Check molecule -np.testing.assert_equal(actual=frozenset(pmb.df[pmb.df.name == "M1"].residue_list.values[0]), - desired=frozenset(expected_molecule_parameters["residue_list"]), - verbose=True) -# Check peptide -np.testing.assert_equal(actual=pmb.df[pmb.df.name == "P1"].sequence.values[0], - desired=expected_peptide_parameters["sequence"], - verbose=True) -np.testing.assert_equal(actual=frozenset(pmb.df[pmb.df.name == "P1"].model.values[0]), - desired=frozenset(expected_peptide_parameters["model"]), - verbose=True) -print("*** Test passed ***") -print("*** Unit test: check that load_interaction_parameters raises a ValueError if one loads a data set with an unknown pmb_type ***") -pmb.df = df_management._DFManagement._setup_df() # clear the pmb_df -input_parameters={"filename": data_root / "test_non_valid_object.json"} -np.testing.assert_raises(ValueError, pmb.load_interaction_parameters, **input_parameters) -print("*** Test passed ***") -print("*** Unit test: check that load_interaction_parameters raises a ValueError if one loads a bond not supported by pyMBE ***") -pmb.df = df_management._DFManagement._setup_df() # clear the pmb_df -input_parameters={"filename": data_root / "test_non_valid_bond.json"} -np.testing.assert_raises(ValueError, pmb.load_interaction_parameters, **input_parameters) -print("*** Test passed ***") -print("*** Unit test: check that check_pka_set raises a ValueError if data is missing important fields ***") -np.testing.assert_raises(ValueError, pmb.check_pka_set, {"name" : {}}) -np.testing.assert_raises(ValueError, pmb.check_pka_set, {"name" : {"pka_value": 1.}}) -np.testing.assert_raises(ValueError, pmb.check_pka_set, {"name" : {"acidity": 1.}}) -print("*** Test passed ***") +import unittest as ut + + +class Test(ut.TestCase): + def test_pka_set_format(self): + """ + Check that the different pKa sets are correctly formatted + """ + pmb = pyMBE.pymbe_library(seed=42) + pka_root=pmb.root / "parameters" / "pka_sets" + for path in pka_root.glob("*.json"): + pmb.load_pka_set(path) + pmb.db.delete_reactions() + + def test_sanity_load_datasets(self): + """ + Check that the order to execute load_pka_set() and load_databaasedoes not change the resulting parameters in pyMBE database + """ + + # First order of loading parameters + pmb1 = pyMBE.pymbe_library(seed=42) + path_to_interactions=pmb1.root / "parameters" / "peptides" / "Lunkad2021" + path_to_pka=pmb1.root / "parameters" / "pka_sets" / "Hass2015.json" + pmb1.load_database (folder=path_to_interactions) + pmb1.load_pka_set(filename=path_to_pka) + + + # Second order of loading parameters + pmb2 = pyMBE.pymbe_library(seed=23) + path_to_interactions=pmb2.root / "parameters" / "peptides" / "Lunkad2021" + path_to_pka=pmb2.root / "parameters" / "pka_sets" / "Hass2015.json" + pmb2.load_pka_set(filename=path_to_pka) + pmb2.load_database(folder=path_to_interactions) + + pmb_types_to_test = ["particle_state", + "particle", + "bond"] + for pmb_type in pmb_types_to_test: + pd.testing.assert_frame_equal(pmb1.get_templates_df(pmb_type=pmb_type), + pmb2.get_templates_df(pmb_type=pmb_type)) + pd.testing.assert_frame_equal(pmb1.get_reactions_df(), + pmb2.get_reactions_df()) + + def test_sanity_check_pka_set(self): + """ + Check that check_pka_set raises a ValueError if data is missing important fields + """ + pmb = pyMBE.pymbe_library(seed=42) + np.testing.assert_raises(ValueError, pmb.check_pka_set, {"name" : {}}) + np.testing.assert_raises(ValueError, pmb.check_pka_set, {"name" : {"pka_value": 1.}}) + np.testing.assert_raises(ValueError, pmb.check_pka_set, {"name" : {"acidity": 1.}}) + +if __name__ == "__main__": + ut.main() \ No newline at end of file diff --git a/testsuite/serialization_test.py b/testsuite/serialization_test.py index e1a0ea3..969a5ba 100644 --- a/testsuite/serialization_test.py +++ b/testsuite/serialization_test.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -16,32 +16,14 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import json import unittest as ut import numpy as np -import pandas as pd import pyMBE import pyMBE.lib.analysis import scipy.constants -import pyMBE.storage.df_management as df_management class Serialization(ut.TestCase): - - def test_json_encoder(self): - encoder = df_management._DFManagement._NumpyEncoder - # Python types - self.assertEqual(json.dumps(1, cls=encoder), "1") - self.assertEqual(json.dumps([1, 2], cls=encoder), "[1, 2]") - self.assertEqual(json.dumps((1, 2), cls=encoder), "[1, 2]") - self.assertEqual(json.dumps({1: 2}, cls=encoder), """{"1": 2}""") - # NumPy types - self.assertEqual(json.dumps(np.array([1, 2]), cls=encoder), "[1, 2]") - self.assertEqual(json.dumps(np.array(1), cls=encoder), "1") - self.assertEqual(json.dumps(np.int32(1), cls=encoder), "1") - # Pandas types - with self.assertRaisesRegex(TypeError, "Object of type Series is not JSON serializable"): - json.dumps(pd.Series([1, 2]), cls=encoder) - + def test_parameters_to_path(self): params = {"kT": 2., "phi": -np.pi, "n": 3, "fene": True, "name": "pep"} name = pyMBE.lib.analysis.built_output_name(params) @@ -69,6 +51,5 @@ def test_pint_units(self): self.assertAlmostEqual((pmb.kT / scipy.constants.k).magnitude, 298.15, delta=1e-7) - if __name__ == "__main__": ut.main() diff --git a/testsuite/test_in_out_pmb_df.py b/testsuite/test_in_out_pmb_df.py deleted file mode 100644 index f614a12..0000000 --- a/testsuite/test_in_out_pmb_df.py +++ /dev/null @@ -1,133 +0,0 @@ -# -# Copyright (C) 2025 pyMBE-dev team -# -# This file is part of pyMBE. -# -# pyMBE is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# pyMBE is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -import pyMBE -import io -import pandas as pd -import unittest as ut -import logging -import re -import pyMBE.storage.df_management as df_management - -# Create an in-memory log stream -log_stream = io.StringIO() -logging.basicConfig(level=logging.DEBUG, - format="%(levelname)s: %(message)s", - handlers=[logging.StreamHandler(log_stream)]) - -pmb = pyMBE.pymbe_library(seed=42) -particle_parameters={"S1":{"name": "S1", - "sigma":0.355*pmb.units.nm, - "epsilon":1*pmb.units('reduced_energy'), - "z":0}, - "S2":{"name": "S2", - "sigma":0.355*pmb.units.nm, - "epsilon":1*pmb.units('reduced_energy'), - "z":1}, - "S3":{"name": "S3", - "sigma":0.355*pmb.units.nm, - "epsilon":1*pmb.units('reduced_energy'), - "z":2}} - -pmb.define_particles(parameters=particle_parameters) - -generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') -generic_bond_length = 0.355*pmb.units.nm -HARMONIC_parameters = {'r_0' : generic_bond_length, - 'k' : generic_harmonic_constant} -pmb.define_bond(bond_type = 'harmonic', - bond_parameters = HARMONIC_parameters, particle_pairs = [["S1", "S2"]]) - -class Serialization(ut.TestCase): - def test_add_to_df(self): - print("*** Unit test: test that check if the add_value_to_df() raises two warnings in default mode (overwrite=False) ***") - index=0 - key = ('name','') - old_value = pmb.df.loc[index,pd.IndexSlice[key]] - new_value='T2' - name=pmb.df.loc[index,key] - pmb_type=pmb.df.loc[index,('pmb_type','')] - df_management._DFManagement._add_value_to_df(df = pmb.df, - index = index, - key = key, - new_value = new_value) - log_contents = log_stream.getvalue() - warning_msg1=f"You are attempting to redefine the properties of {name} of pmb_type {pmb_type}" - warning_msg2=f"pyMBE has preserved of the entry `{key}`: old_value = {old_value}. If you want to overwrite it with new_value = {new_value}, activate the switch overwrite = True" - # Check if the warnings are in the log - assert re.search(re.escape(warning_msg1), log_contents) - assert re.search(re.escape(warning_msg2), log_contents) - print("*** Unit passed ***") - print("*** Unit test: test that check if the add_value_to_df() raises one warning when overwrite=True ***") - index=0 - key = ('name','') - old_value = pmb.df.loc[index,pd.IndexSlice[key]] - new_value='T2' - name=pmb.df.loc[index,key] - pmb_type=pmb.df.loc[index,('pmb_type','')] - df_management._DFManagement._add_value_to_df(df = pmb.df, - index = index, - key = key, - new_value = new_value, - overwrite = True) - log_contents = log_stream.getvalue() - warning_msg1=f"You are attempting to redefine the properties of {name} of pmb_type {pmb_type}" - warning_msg2=f"Overwritting the value of the entry `{key}`: old_value = {old_value} new_value = {new_value}" - # Check if the warnings are in the log - assert re.search(re.escape(warning_msg1), log_contents) - assert re.search(re.escape(warning_msg2), log_contents) - print("*** Unit passed ***") - - def test_delete_entries_df(self): - print("*** Unit test: test that entries in df are deleted properly using `delete_entries_in_df` ***") - - pmb.df = df_management._DFManagement._delete_entries_in_df(df=pmb.df, - entry_name="S1-S2") - assert pmb.df[pmb.df["name"]=="S1-S2"].empty - pmb.df = df_management._DFManagement._delete_entries_in_df(df=pmb.df, - entry_name="S1") - assert pmb.df[pmb.df["name"]=="S1"].empty - - residue_parameters={"R1":{"name": "R1", - "central_bead": "S2", - "side_chains": []}, - "R2":{"name": "R2", - "central_bead": "S2", - "side_chains": ["S2","S3"]}} - - for parameter_set in residue_parameters.values(): - pmb.define_residue(**parameter_set) - - pmb.df = df_management._DFManagement._delete_entries_in_df(df=pmb.df, - entry_name="R1") - assert pmb.df[pmb.df["name"]=="R1"].empty - - molecule_parameters={"M1":{"name": "M1", - "residue_list": ["R2","R2","R2"]}} - - for parameter_set in molecule_parameters.values(): - pmb.define_molecule(**parameter_set) - - pmb.df = df_management._DFManagement._delete_entries_in_df(df=pmb.df, - entry_name="M1") - assert pmb.df[pmb.df["name"]=="M1"].empty - print("*** Unit passed ***") - -if __name__ == "__main__": - ut.main() - diff --git a/testsuite/read-write-df_test.py b/testsuite/test_io_database.py similarity index 100% rename from testsuite/read-write-df_test.py rename to testsuite/test_io_database.py From 035947008d98f30ab683c09f3f4ddebd82b7f9a5 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Fri, 23 Jan 2026 15:10:49 +0100 Subject: [PATCH 31/55] fix HH functions and tests, improve the functionality of calculate_net_charge for hydrogels, fix related bugs --- pyMBE/lib/handy_functions.py | 494 ++++++++++++------ pyMBE/pyMBE.py | 410 ++++++--------- pyMBE/storage/io.py | 12 +- pyMBE/storage/manager.py | 100 +++- test.py | 2 +- testsuite/calculate_net_charge_unit_test.py | 152 ++++-- testsuite/henderson_hasselbalch_tests.py | 69 ++- testsuite/parameter_test.py | 3 - testsuite/seed_test.py | 2 +- testsuite/test_io_database.py | 527 +++++++++++++++----- 10 files changed, 1108 insertions(+), 663 deletions(-) diff --git a/pyMBE/lib/handy_functions.py b/pyMBE/lib/handy_functions.py index 6c41931..c4eb257 100644 --- a/pyMBE/lib/handy_functions.py +++ b/pyMBE/lib/handy_functions.py @@ -21,6 +21,39 @@ import numpy as np import scipy +def calculate_initial_bond_length(bond_parameters, bond_type, lj_parameters): + """ + Calculates the initial bond length that is used when setting up molecules, + based on the minimum of the sum of bonded and short-range (LJ) interactions. + + Args: + bond_object(`espressomd.interactions.BondedInteractions`): instance of a bond object from espressomd library + bond_type(`str`): label identifying the used bonded potential + epsilon(`pint.Quantity`): LJ epsilon of the interaction between the particles + sigma(`pint.Quantity`): LJ sigma of the interaction between the particles + cutoff(`pint.Quantity`): cutoff-radius of the LJ interaction + offset(`pint.Quantity`): offset of the LJ interaction + """ + def truncated_lj_potential(x, epsilon, sigma, cutoff,offset): + if x>cutoff: + return 0.0 + else: + return 4*epsilon*((sigma/(x-offset))**12-(sigma/(x-offset))**6) - 4*epsilon*((sigma/cutoff)**12-(sigma/cutoff)**6) + epsilon=lj_parameters["epsilon"].m_as("reduced_energy") + sigma=lj_parameters["sigma"].m_as("reduced_length") + cutoff=lj_parameters["cutoff"].m_as("reduced_length") + offset=lj_parameters["offset"].m_as("reduced_length") + if bond_type == "harmonic": + r_0 = bond_parameters['r_0'].m_as("reduced_length") + k = bond_parameters['k'].m_as("reduced_energy/reduced_length**2") + l0 = scipy.optimize.minimize(lambda x: 0.5*k*(x-r_0)**2 + truncated_lj_potential(x, epsilon, sigma, cutoff, offset), x0=r_0).x + elif bond_type == "FENE": + r_0 = bond_parameters['r_0'].m_as("reduced_length") + k = bond_parameters['k'].m_as("reduced_energy/reduced_length**2") + d_r_max = bond_parameters['d_r_max'].m_as("reduced_length") + l0 = scipy.optimize.minimize(lambda x: -0.5*k*(d_r_max**2)*np.log(1-((x-r_0)/d_r_max)**2) + truncated_lj_potential(x, epsilon, sigma, cutoff,offset), x0=1.0).x + return l0 + def check_aminoacid_key(key): """ Checks if `key` corresponds to a valid aminoacid letter code. @@ -74,7 +107,7 @@ def check_if_metal_ion(key): else: return False -def define_protein_AA_particles(topology_dict, pmb, lj_setup_mode="wca"): +def define_protein_AA_particles(topology_dict, pmb, pka_set, lj_setup_mode="wca"): """ Defines particle templates in pyMBE for all unique residue/atom types appearing in a protein topology dictionary. @@ -105,6 +138,10 @@ def define_protein_AA_particles(topology_dict, pmb, lj_setup_mode="wca"): pmb (pyMBE.pymbe_library): Instance of the pyMBE library. + Dictionary of the form: + {"particle_name": {"pka_value": float, + "acidity": "acidic" | "basic"}} + lj_setup_mode (str, optional): Determines how Lennard-Jones parameters are assigned. Defaults to `"wca"`. @@ -121,36 +158,33 @@ def define_protein_AA_particles(topology_dict, pmb, lj_setup_mode="wca"): offset = 2 * radius - sigma """ valid_lj_setups = ["wca"] - if lj_setup_mode not in valid_lj_setups: raise ValueError('Invalid key for the lj setup, supported setup modes are {valid_lj_setups}') if lj_setup_mode == "wca": sigma = 1*pmb.units.Quantity("reduced_length") epsilon = 1*pmb.units.Quantity("reduced_energy") part_dict={} - sequence=[] metal_ions_charge_number_map=get_metal_ions_charge_number_map() defined_particles=[] for particle in topology_dict.keys(): particle_name = re.split(r'\d+', particle)[0] - if particle_name not in part_dict.keys(): + if particle_name not in defined_particles: + part_dict = {"name" : particle_name} if lj_setup_mode == "wca": - part_dict={"sigma": sigma, - "offset": topology_dict[particle]['radius']*2-sigma, - "epsilon": epsilon, - "name": particle_name} - if check_if_metal_ion(key=particle_name): - z=metal_ions_charge_number_map[particle_name] + part_dict["sigma"] = sigma + part_dict["offset"]= topology_dict[particle]['radius']*2-sigma + part_dict["epsilon"] = epsilon + if particle_name in pka_set.keys(): + part_dict["acidity"] = pka_set[particle_name]["acidity"] else: - z=0 - part_dict["z"]=z - part_dict["name"]=particle_name - + if check_if_metal_ion(key=particle_name): + z=metal_ions_charge_number_map[particle_name] + else: + z=0 + part_dict["z"]=z if particle_name not in defined_particles: pmb.define_particle(**part_dict) defined_particles.append(particle_name) - return - def define_protein_AA_residues(topology_dict, model, pmb): """ @@ -215,7 +249,7 @@ def define_protein_AA_residues(topology_dict, model, pmb): def define_peptide_AA_residues(sequence,model, pmb): """ - Define residue templates in the pyMBE database for a protein topology dict. + Define residue templates in the pyMBE database for a given model. Args: sequence (list of str): @@ -259,12 +293,112 @@ def define_peptide_AA_residues(sequence,model, pmb): side_chains = side_chains) defined_residues.append(residue_name) +def do_reaction(algorithm, steps): + """ + Executes reaction steps using an ESPResSo reaction algorithm with + version-compatible calling semantics. + + This function wraps the `reaction` method of an ESPResSo reaction + algorithm to account for differences in the method signature between + ESPResSo versions. + + Args: + algorithm: + ESPResSo reaction algorithm object (e.g. constant pH, + reaction ensemble, or similar). + steps (int): + Number of reaction steps to perform. + + Notes: + - In ESPResSo 4.2, the `reaction` method expects the number of steps + to be passed as the keyword argument `reaction_steps`. + - In newer ESPResSo versions, the keyword argument is `steps`. + - This helper function provides a stable interface across ESPResSo + versions by dispatching to the appropriate keyword internally. + """ + import espressomd.version + if espressomd.version.friendly() == '4.2': + algorithm.reaction(reaction_steps=steps) + else: + algorithm.reaction(steps=steps) + +def get_number_of_particles(espresso_system, ptype): + """ + Returns the number of particles of a given ESPResSo particle type. + + This function provides a compatibility wrapper around + `espresso_system.number_of_particles`, which has a different calling + signature depending on the ESPResSo version. + + Args: + espresso_system (espressomd.system.System): + ESPResSo system object from which the particle count is queried. + ptype (int): + ESPResSo particle type identifier. + + Returns: + int: + Number of particles in `espresso_system` with particle type `ptype`. + + Notes: + - In ESPResSo 4.2, `number_of_particles` expects the particle type + as a positional argument. + - In later ESPResSo versions, the particle type must be passed as a + keyword argument (`type=ptype`). + - This helper function hides these API differences and provides + a uniform interface across ESPResSo versions. + """ + import espressomd.version + if espressomd.version.friendly() == "4.2": + args = (ptype,) + kwargs = {} + else: + args = () + kwargs = {"type": ptype} + return espresso_system.number_of_particles(*args, **kwargs) + def get_residues_from_topology_dict(topology_dict, model): + """ + Groups beads from a topology dictionary into residues and assigns residue names. + + Args: + topology_dict (dict): + Dictionary describing the molecular topology, where keys are bead + identifiers (e.g. "CA12", "SC12") that encode both residue type and + residue index. + model (str): + Protein model identifier. Supported values are: + - `"1beadAA"`: single-bead-per-amino-acid model. + - `"2beadAA"`: two-bead-per-amino-acid model, where CA beads are excluded + from residue name assignment. + + Returns: + dict: + Dictionary mapping residue indices (as strings) to residue data: + { + resid: { + "beads": [bead_id1, bead_id2, ...], + "resname": residue_name + }, + ... + } + + Notes: + - Bead identifiers are parsed by separating alphabetic prefixes + (residue or bead type) from numeric residue indices. + - For the `"2beadAA"` model, beads named `"CA"` are excluded when + determining the residue name. + - Residues that only contain CA beads (i.e., no side-chain beads) + are assigned the residue name `"G"` (glycine). + - Residue indices are returned as strings, consistent with the parsed + bead identifiers. + """ + if model not in {"1beadAA", "2beadAA"}: + raise ValueError(f"Unknown protein model '{model}'") if model == "1beadAA": excluded_residue_names = [] elif model == "2beadAA": excluded_residue_names = ["CA"] - # GROUP BEADS BY RESIDUE residues = {} for bead_id in topology_dict.keys(): @@ -297,140 +431,95 @@ def get_metal_ions_charge_number_map(): metal_charge_number_map = {"Ca": 2} return metal_charge_number_map -def calculate_initial_bond_length(bond_parameters, bond_type, lj_parameters): - """ - Calculates the initial bond length that is used when setting up molecules, - based on the minimum of the sum of bonded and short-range (LJ) interactions. - - Args: - bond_object(`espressomd.interactions.BondedInteractions`): instance of a bond object from espressomd library - bond_type(`str`): label identifying the used bonded potential - epsilon(`pint.Quantity`): LJ epsilon of the interaction between the particles - sigma(`pint.Quantity`): LJ sigma of the interaction between the particles - cutoff(`pint.Quantity`): cutoff-radius of the LJ interaction - offset(`pint.Quantity`): offset of the LJ interaction - """ - def truncated_lj_potential(x, epsilon, sigma, cutoff,offset): - if x>cutoff: - return 0.0 - else: - return 4*epsilon*((sigma/(x-offset))**12-(sigma/(x-offset))**6) - 4*epsilon*((sigma/cutoff)**12-(sigma/cutoff)**6) - epsilon=lj_parameters["epsilon"].m_as("reduced_energy") - sigma=lj_parameters["sigma"].m_as("reduced_length") - cutoff=lj_parameters["cutoff"].m_as("reduced_length") - offset=lj_parameters["offset"].m_as("reduced_length") - if bond_type == "harmonic": - r_0 = bond_parameters['r_0'].m_as("reduced_length") - k = bond_parameters['k'].m_as("reduced_energy/reduced_length**2") - l0 = scipy.optimize.minimize(lambda x: 0.5*k*(x-r_0)**2 + truncated_lj_potential(x, epsilon, sigma, cutoff, offset), x0=r_0).x - elif bond_type == "FENE": - r_0 = bond_parameters['r_0'].m_as("reduced_length") - k = bond_parameters['k'].m_as("reduced_energy/reduced_length**2") - d_r_max = bond_parameters['d_r_max'].m_as("reduced_length") - l0 = scipy.optimize.minimize(lambda x: -0.5*k*(d_r_max**2)*np.log(1-((x-r_0)/d_r_max)**2) + truncated_lj_potential(x, epsilon, sigma, cutoff,offset), x0=1.0).x - return l0 - - - -def setup_electrostatic_interactions(units, espresso_system, kT, c_salt=None, solvent_permittivity=78.5, method='p3m', tune_p3m=True, accuracy=1e-3, params=None, verbose=False): - """ - Sets up electrostatic interactions in an ESPResSo system. - - Args: - units(`pint.UnitRegistry`): Unit registry for handling physical units. - espresso_system(`espressomd.system.System`): system object of espressomd library. - kT(`pint.Quantity`): Thermal energy. - c_salt(`pint.Quantity`): Added salt concentration. If provided, the program outputs the debye screening length. It is a mandatory parameter for the Debye-Hückel method. - solvent_permittivity (`float`): Solvent relative permittivity. Defaults to 78.5, correspoding to its value in water at 298.15 K. - method(`str`): Method for computing electrostatic interactions. Defaults to "p3m". - tune_p3m(`bool`): If True, tunes P3M parameters for efficiency. Defaults to True. - accuracy(`float`): Desired accuracy for electrostatics. Defaults to 1e-3. - params(`dict`): Additional parameters for the electrostatic method. For P3M, it can include 'mesh', 'alpha', 'cao' and `r_cut`. For Debye-Hückel, it can include 'r_cut'. - verbose(`bool`): If True, enables verbose output for P3M tuning. Defaults to False. - - Note: - `c_salt` is a mandatory argument for setting up the Debye-Hückel electrostatic potential. - The calculated Bjerrum length is ouput to the log. If `c_salt` is provided, the calculated Debye screening length is also output to the log. - Currently, the only supported electrostatic methods are P3M ("p3m") and Debye-Hückel ("dh"). - """ - import espressomd.electrostatics - import espressomd.version - import numpy as np - import scipy.constants - logging.debug("*** Starting electrostatic interactions setup... ***") - # Initial sanity checks - if not hasattr(units, 'Quantity'): - raise TypeError("Invalid 'units' argument: Expected a pint.UnitRegistry object") - valid_methods_list=['p3m', 'dh'] - if method not in valid_methods_list: - raise ValueError('Method not supported, supported methods are', valid_methods_list) - if c_salt is None and method == 'dh': - raise ValueError('Please provide the added salt concentration c_salt to setup the Debye-Huckel potential') - e = scipy.constants.e * units.C - N_A = scipy.constants.N_A / units.mol - BJERRUM_LENGTH = e**2 / (4 * units.pi * units.eps0 * solvent_permittivity * kT) - logging.info(f" Bjerrum length {BJERRUM_LENGTH.to('nm')} = {BJERRUM_LENGTH.to('reduced_length')}") - COULOMB_PREFACTOR=BJERRUM_LENGTH * kT - if c_salt is not None: - if c_salt.check('[substance] [length]**-3'): - KAPPA=1./np.sqrt(8*units.pi*BJERRUM_LENGTH*N_A*c_salt) - elif c_salt.check('[length]**-3'): - KAPPA=1./np.sqrt(8*units.pi*BJERRUM_LENGTH*c_salt) - else: - raise ValueError('Unknown units for c_salt, supported units for salt concentration are [mol / volume] or [particle / volume]', c_salt) +def protein_sequence_parser(sequence): + ''' + Parses `sequence` to the one letter code for amino acids. - logging.info(f"Debye kappa {KAPPA.to('nm')} = {KAPPA.to('reduced_length')}") - - if params is None: - params = {} - - if method == 'p3m': - logging.debug("*** Setting up Coulomb electrostatics using the P3M method ***") - coulomb = espressomd.electrostatics.P3M(prefactor = COULOMB_PREFACTOR.m_as("reduced_length * reduced_energy"), - accuracy=accuracy, - verbose=verbose, - tune=tune_p3m, - **params) - - if tune_p3m: - espresso_system.time_step=0.01 - if espressomd.version.friendly() == "4.2": - espresso_system.actors.add(coulomb) - else: - espresso_system.electrostatics.solver = coulomb - - - # save the optimal parameters and add them by hand + Args: + sequence(`str` or `lst`): Sequence of the amino acid. - p3m_params = coulomb.get_params() - if espressomd.version.friendly() == "4.2": - espresso_system.actors.remove(coulomb) + Returns: + clean_sequence(`lst`): `sequence` using the one letter code. + + Note: + - Accepted formats for `sequence` are: + - `lst` with one letter or three letter code of each aminoacid in each element + - `str` with the sequence using the one letter code + - `str` with the squence using the three letter code, each aminoacid must be separated by a hyphen "-" + + ''' + # Aminoacid key + keys={"ALA": "A", + "ARG": "R", + "ASN": "N", + "ASP": "D", + "CYS": "C", + "GLU": "E", + "GLN": "Q", + "GLY": "G", + "HIS": "H", + "ILE": "I", + "LEU": "L", + "LYS": "K", + "MET": "M", + "PHE": "F", + "PRO": "P", + "SER": "S", + "THR": "T", + "TRP": "W", + "TYR": "Y", + "VAL": "V", + "PSER": "J", + "PTHR": "U", + "PTyr": "Z", + "NH2": "n", + "COOH": "c"} + clean_sequence=[] + if isinstance(sequence, str): + if sequence.find("-") != -1: + splited_sequence=sequence.split("-") + for residue in splited_sequence: + if len(residue) == 1: + if residue in keys.values(): + residue_ok=residue + else: + if residue.upper() in keys.values(): + residue_ok=residue.upper() + else: + raise ValueError("Unknown one letter code for a residue given: ", residue, " please review the input sequence") + clean_sequence.append(residue_ok) + else: + if residue in keys.keys(): + clean_sequence.append(keys[residue]) + else: + if residue.upper() in keys.keys(): + clean_sequence.append(keys[residue.upper()]) + else: + raise ValueError("Unknown code for a residue: ", residue, " please review the input sequence") else: - espresso_system.electrostatics.solver = None - coulomb = espressomd.electrostatics.P3M(prefactor = COULOMB_PREFACTOR.m_as("reduced_length * reduced_energy"), - accuracy = accuracy, - mesh = p3m_params['mesh'], - alpha = p3m_params['alpha'] , - cao = p3m_params['cao'], - r_cut = p3m_params['r_cut'], - tune = False) + for residue in sequence: + if residue in keys.values(): + residue_ok=residue + else: + if residue.upper() in keys.values(): + residue_ok=residue.upper() + else: + raise ValueError("Unknown one letter code for a residue: ", residue, " please review the input sequence") + clean_sequence.append(residue_ok) + if isinstance(sequence, list): + for residue in sequence: + if residue in keys.values(): + residue_ok=residue + else: + if residue.upper() in keys.values(): + residue_ok=residue.upper() + elif (residue.upper() in keys.keys()): + residue_ok= keys[residue.upper()] + else: + raise ValueError("Unknown code for a residue: ", residue, " please review the input sequence") + clean_sequence.append(residue_ok) + return clean_sequence - elif method == 'dh': - logging.debug("*** Setting up Debye-Hückel electrostatics ***") - if params: - r_cut = params['r_cut'] - else: - r_cut = 3*KAPPA.to('reduced_length').magnitude - - coulomb = espressomd.electrostatics.DH(prefactor = COULOMB_PREFACTOR.m_as("reduced_length * reduced_energy"), - kappa = (1./KAPPA).to('1/ reduced_length').magnitude, - r_cut = r_cut) - if espressomd.version.friendly() == "4.2": - espresso_system.actors.add(coulomb) - else: - espresso_system.electrostatics.solver = coulomb - logging.debug("*** Electrostatics successfully added to the system ***") - return def relax_espresso_system(espresso_system, seed, gamma=1e-3, Nsteps_steepest_descent=5000, max_displacement=0.01, Nsteps_iter_relax=500): """ @@ -526,21 +615,102 @@ def setup_langevin_dynamics(espresso_system, kT, seed,time_step=1e-2, gamma=1, t int_steps=int_steps, adjust_max_skin=adjust_max_skin) logging.info(f"Optimized skin value: {espresso_system.cell_system.skin}") - return -def get_number_of_particles(espresso_system, ptype): - import espressomd.version - if espressomd.version.friendly() == "4.2": - args = (ptype,) - kwargs = {} - else: - args = () - kwargs = {"type": ptype} - return espresso_system.number_of_particles(*args, **kwargs) +def setup_electrostatic_interactions(units, espresso_system, kT, c_salt=None, solvent_permittivity=78.5, method='p3m', tune_p3m=True, accuracy=1e-3, params=None, verbose=False): + """ + Sets up electrostatic interactions in an ESPResSo system. -def do_reaction(algorithm, steps): + Args: + units(`pint.UnitRegistry`): Unit registry for handling physical units. + espresso_system(`espressomd.system.System`): system object of espressomd library. + kT(`pint.Quantity`): Thermal energy. + c_salt(`pint.Quantity`): Added salt concentration. If provided, the program outputs the debye screening length. It is a mandatory parameter for the Debye-Hückel method. + solvent_permittivity (`float`): Solvent relative permittivity. Defaults to 78.5, correspoding to its value in water at 298.15 K. + method(`str`): Method for computing electrostatic interactions. Defaults to "p3m". + tune_p3m(`bool`): If True, tunes P3M parameters for efficiency. Defaults to True. + accuracy(`float`): Desired accuracy for electrostatics. Defaults to 1e-3. + params(`dict`): Additional parameters for the electrostatic method. For P3M, it can include 'mesh', 'alpha', 'cao' and `r_cut`. For Debye-Hückel, it can include 'r_cut'. + verbose(`bool`): If True, enables verbose output for P3M tuning. Defaults to False. + + Note: + `c_salt` is a mandatory argument for setting up the Debye-Hückel electrostatic potential. + The calculated Bjerrum length is ouput to the log. If `c_salt` is provided, the calculated Debye screening length is also output to the log. + Currently, the only supported electrostatic methods are P3M ("p3m") and Debye-Hückel ("dh"). + """ + import espressomd.electrostatics import espressomd.version - if espressomd.version.friendly() == '4.2': - algorithm.reaction(reaction_steps=steps) + import numpy as np + import scipy.constants + logging.debug("*** Starting electrostatic interactions setup... ***") + # Initial sanity checks + if not hasattr(units, 'Quantity'): + raise TypeError("Invalid 'units' argument: Expected a pint.UnitRegistry object") + valid_methods_list=['p3m', 'dh'] + if method not in valid_methods_list: + raise ValueError('Method not supported, supported methods are', valid_methods_list) + if c_salt is None and method == 'dh': + raise ValueError('Please provide the added salt concentration c_salt to setup the Debye-Huckel potential') + e = scipy.constants.e * units.C + N_A = scipy.constants.N_A / units.mol + BJERRUM_LENGTH = e**2 / (4 * units.pi * units.eps0 * solvent_permittivity * kT) + logging.info(f" Bjerrum length {BJERRUM_LENGTH.to('nm')} = {BJERRUM_LENGTH.to('reduced_length')}") + COULOMB_PREFACTOR=BJERRUM_LENGTH * kT + if c_salt is not None: + if c_salt.check('[substance] [length]**-3'): + KAPPA=1./np.sqrt(8*units.pi*BJERRUM_LENGTH*N_A*c_salt) + elif c_salt.check('[length]**-3'): + KAPPA=1./np.sqrt(8*units.pi*BJERRUM_LENGTH*c_salt) + else: + raise ValueError('Unknown units for c_salt, supported units for salt concentration are [mol / volume] or [particle / volume]', c_salt) + + logging.info(f"Debye kappa {KAPPA.to('nm')} = {KAPPA.to('reduced_length')}") + + if params is None: + params = {} + + if method == 'p3m': + logging.debug("*** Setting up Coulomb electrostatics using the P3M method ***") + coulomb = espressomd.electrostatics.P3M(prefactor = COULOMB_PREFACTOR.m_as("reduced_length * reduced_energy"), + accuracy=accuracy, + verbose=verbose, + tune=tune_p3m, + **params) + + if tune_p3m: + espresso_system.time_step=0.01 + if espressomd.version.friendly() == "4.2": + espresso_system.actors.add(coulomb) + else: + espresso_system.electrostatics.solver = coulomb + + + # save the optimal parameters and add them by hand + + p3m_params = coulomb.get_params() + if espressomd.version.friendly() == "4.2": + espresso_system.actors.remove(coulomb) + else: + espresso_system.electrostatics.solver = None + coulomb = espressomd.electrostatics.P3M(prefactor = COULOMB_PREFACTOR.m_as("reduced_length * reduced_energy"), + accuracy = accuracy, + mesh = p3m_params['mesh'], + alpha = p3m_params['alpha'] , + cao = p3m_params['cao'], + r_cut = p3m_params['r_cut'], + tune = False) + + elif method == 'dh': + logging.debug("*** Setting up Debye-Hückel electrostatics ***") + if params: + r_cut = params['r_cut'] + else: + r_cut = 3*KAPPA.to('reduced_length').magnitude + + coulomb = espressomd.electrostatics.DH(prefactor = COULOMB_PREFACTOR.m_as("reduced_length * reduced_energy"), + kappa = (1./KAPPA).to('1/ reduced_length').magnitude, + r_cut = r_cut) + if espressomd.version.friendly() == "4.2": + espresso_system.actors.add(coulomb) else: - algorithm.reaction(steps=steps) + espresso_system.electrostatics.solver = coulomb + logging.debug("*** Electrostatics successfully added to the system ***") \ No newline at end of file diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 544958d..076c5d5 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -128,6 +128,26 @@ def _check_bond_inputs(self, bond_type, bond_parameters): if required_parameter not in bond_parameters.keys(): raise ValueError(f"Missing required parameter {required_parameter} for {bond_type} bond") + def _check_dimensionality(self, variable, expected_dimensionality): + """ + Checks if the dimensionality of `variable` matches `expected_dimensionality`. + + Args: + variable(`pint.Quantity`): Quantity to be checked. + expected_dimensionality(`str`): Expected dimension of the variable. + + Returns: + (`bool`): `True` if the variable if of the expected dimensionality, `False` otherwise. + + Note: + - `expected_dimensionality` takes dimensionality following the Pint standards [docs](https://pint.readthedocs.io/en/0.10.1/wrapping.html?highlight=dimensionality#checking-dimensionality). + - For example, to check for a variable corresponding to a velocity `expected_dimensionality = "[length]/[time]"` + """ + correct_dimensionality=variable.check(f"{expected_dimensionality}") + if not correct_dimensionality: + raise ValueError(f"The variable {variable} should have a dimensionality of {expected_dimensionality}, instead the variable has a dimensionality of {variable.dimensionality}") + return correct_dimensionality + def _create_espresso_bond_instance(self, bond_type, bond_parameters): """ Creates an ESPResSo bond instance. @@ -164,7 +184,7 @@ def _create_espresso_bond_instance(self, bond_type, bond_parameters): d_r_max = bond_parameters["d_r_max"].m_as("reduced_length")) return bond_instance - def _create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system): + def _create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system, use_default_bond=False): """ Creates a chain between two nodes of a hydrogel. @@ -172,6 +192,7 @@ def _create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system): hydrogel_chain(HydrogelChain): template of a hydrogel chain nodes(dict): {node_index: {"name": node_particle_name, "pos": node_position, "id": node_particle_instance_id}} espresso_system (espressomd.system.System): ESPResSo system object where the hydrogel chain will be created. + use_default_bond (bool, optional): If True, use a default bond template if no specific template exists. Defaults to False. Return: (int): molecule_id of the created hydrogel chian. @@ -213,7 +234,8 @@ def _create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system): lj_parameters = self.get_lj_parameters(particle_name1=nodes[node_start_label]["name"], particle_name2=part_start_chain_name) bond_tpl = self.get_bond_template(particle_name1=nodes[node_start_label]["name"], - particle_name2=part_start_chain_name) + particle_name2=part_start_chain_name, + use_default_bond=use_default_bond) l0 = hf.calculate_initial_bond_length(lj_parameters=lj_parameters, bond_type=bond_tpl.bond_type, bond_parameters=bond_tpl.get_parameters(ureg=self.units)) @@ -223,17 +245,19 @@ def _create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system): espresso_system=espresso_system, list_of_first_residue_positions=[first_bead_pos.tolist()],#Start at the first node backbone_vector=np.array(backbone_vector)/l0, - use_default_bond=False) + use_default_bond=use_default_bond)[0] # Bond chain to the hydrogel nodes chain_pids = self.db._find_instance_ids_by_attribute(pmb_type="particle", attribute="molecule_id", value=mol_id) bond_tpl1 = self.get_bond_template(particle_name1=nodes[node_start_label]["name"], - particle_name2=part_start_chain_name) + particle_name2=part_start_chain_name, + use_default_bond=use_default_bond) start_bond_instance = self._get_espresso_bond_instance(bond_template=bond_tpl1, espresso_system=espresso_system) bond_tpl2 = self.get_bond_template(particle_name1=nodes[node_end_label]["name"], - particle_name2=part_end_chain_name) + particle_name2=part_end_chain_name, + use_default_bond=use_default_bond) end_bond_instance = self._get_espresso_bond_instance(bond_template=bond_tpl2, espresso_system=espresso_system) espresso_system.part.by_id(start_node_id).add_bond((start_bond_instance, chain_pids[0])) @@ -263,7 +287,6 @@ def _create_hydrogel_node(self, node_index, node_name, espresso_system): position = [node_position]) key = self.lattice_builder._get_node_by_label(f"[{node_index[0]} {node_index[1]} {node_index[2]}]") self.lattice_builder.nodes[key] = node_name - return node_position.tolist(), p_id[0] def _get_residue_list_from_sequence(self, sequence): @@ -328,61 +351,59 @@ def calculate_center_of_mass_of_molecule(self, molecule_id, espresso_system): center_of_mass = center_of_mass / len(particle_id_list) return center_of_mass - def calculate_HH(self, molecule_name, pmb_type, pH_list=None, pka_set=None): + def calculate_HH(self, template_name, pH_list=None, pka_set=None): """ - Calculates the charge per molecule according to the ideal Henderson-Hasselbalch titration curve - for molecules with the name `molecule_name`. + Calculates the charge in the template object according to the ideal Henderson–Hasselbalch titration curve. Args: - molecule_name(`str`): name of the molecule to calculate the ideal charge for - pH_list(`lst`): pH-values to calculate. - pka_set(`dict`): {"name" : {"pka_value": pka, "acidity": acidity}} + template_name (str): + Name of the template. + pH_list (list[float], optional): + pH values at which the charge is evaluated. + Defaults to 50 values between 2 and 12. + pka_set (dict, optional): + Mapping: + {particle_name: {"pka_value": float, "acidity": "acidic"|"basic"}} Returns: - Z_HH(`lst`): Henderson-Hasselbalch prediction of the charge of `sequence` in `pH_list` - - Note: - - This function supports objects with pmb types: "molecule", "peptide" and "protein". - - If no `pH_list` is given, 50 equispaced pH-values ranging from 2 to 12 are calculated - - If no `pka_set` is given, the pKa values are taken from `pmb.df` - - This function should only be used for single-phase systems. For two-phase systems `pmb.calculate_HH_Donnan` should be used. + list[float]: + Net molecular charge at each pH value. """ - _DFm._check_if_name_is_defined_in_df(name = molecule_name, - df = self.df) - self._check_supported_molecule(molecule_name = molecule_name, - valid_pmb_types = ["molecule","peptide","protein"]) if pH_list is None: - pH_list=np.linspace(2,12,50) + pH_list = np.linspace(2, 12, 50) if pka_set is None: - pka_set=self.get_pka_set() - index = self.df.loc[self.df['name'] == molecule_name].index[0].item() - residue_list = self.df.at [index,('residue_list','')].copy() - particles_in_molecule = [] - for residue in residue_list: - list_of_particles_in_residue = self.search_particles_in_residue(residue) - if len(list_of_particles_in_residue) == 0: - logging.warning(f"The residue {residue} has no particles defined in the pyMBE DataFrame, it will be ignored.") - continue - particles_in_molecule += list_of_particles_in_residue - if len(particles_in_molecule) == 0: - return [None]*len(pH_list) + pka_set = self.get_pka_set() self.check_pka_set(pka_set=pka_set) + particle_counts = self.db.get_particle_templates_under(template_name=template_name, + return_counts=True) + if not particle_counts: + return [None] * len(pH_list) charge_number_map = self.get_charge_number_map() - Z_HH=[] - for pH_value in pH_list: - Z=0 - for particle in particles_in_molecule: - if particle in pka_set.keys(): - if pka_set[particle]['acidity'] == 'acidic': - psi=-1 - elif pka_set[particle]['acidity']== 'basic': - psi=+1 - Z+=psi/(1+10**(psi*(pH_value-pka_set[particle]['pka_value']))) + def formal_charge(particle_name): + tpl = self.db.get_template(name=particle_name, + pmb_type="particle") + state = self.db.get_template(name=tpl.initial_state, + pmb_type="particle_state") + return charge_number_map[state.es_type] + Z_HH = [] + for pH in pH_list: + Z = 0.0 + for particle, multiplicity in particle_counts.items(): + if particle in pka_set: + pka = pka_set[particle]["pka_value"] + acidity = pka_set[particle]["acidity"] + if acidity == "acidic": + psi = -1 + elif acidity == "basic": + psi = +1 + else: + raise ValueError(f"Unknown acidity '{acidity}' for particle '{particle}'") + charge = psi / (1.0 + 10.0 ** (psi * (pH - pka))) + Z += multiplicity * charge else: - state_one_type = self.df.loc[self.df['name']==particle].state_one.es_type.values[0] - Z+=charge_number_map[state_one_type] + Z += multiplicity * formal_charge(particle) Z_HH.append(Z) - return Z_HH + return Z_HH def calculate_HH_Donnan(self, c_macro, c_salt, pH_list=None, pka_set=None): """ @@ -409,13 +430,11 @@ def calculate_HH_Donnan(self, c_macro, c_salt, pH_list=None, pka_set=None): if pka_set is None: pka_set=self.get_pka_set() self.check_pka_set(pka_set=pka_set) - partition_coefficients_list = [] pH_system_list = [] Z_HH_Donnan={} for key in c_macro: Z_HH_Donnan[key] = [] - def calc_charges(c_macro, pH): """ Calculates the charges of the different kinds of molecules according to the Henderson-Hasselbalch equation. @@ -445,75 +464,70 @@ def calc_partition_coefficient(charge, c_macro): for key in charge: charge_density += charge[key] * c_macro[key] return (-charge_density / (2 * ionic_strength_res) + np.sqrt((charge_density / (2 * ionic_strength_res))**2 + 1)).magnitude - for pH_value in pH_list: # calculate the ionic strength of the reservoir if pH_value <= 7.0: ionic_strength_res = 10 ** (-pH_value) * self.units.mol/self.units.l + c_salt elif pH_value > 7.0: ionic_strength_res = 10 ** (-(14-pH_value)) * self.units.mol/self.units.l + c_salt - #Determine the partition coefficient of positive ions by solving the system of nonlinear, coupled equations #consisting of the partition coefficient given by the ideal Donnan theory and the Henderson-Hasselbalch equation. #The nonlinear equation is formulated for log(xi) since log-operations are not supported for RootResult objects. equation = lambda logxi: logxi - np.log10(calc_partition_coefficient(calc_charges(c_macro, pH_value - logxi), c_macro)) logxi = scipy.optimize.root_scalar(equation, bracket=[-1e2, 1e2], method="brentq") partition_coefficient = 10**logxi.root - charges_temp = calc_charges(c_macro, pH_value-np.log10(partition_coefficient)) for key in c_macro: Z_HH_Donnan[key].append(charges_temp[key]) - pH_system_list.append(pH_value - np.log10(partition_coefficient)) partition_coefficients_list.append(partition_coefficient) - return {"charges_dict": Z_HH_Donnan, "pH_system_list": pH_system_list, "partition_coefficients": partition_coefficients_list} - def calculate_net_charge(self, espresso_system, molecule_name, pmb_type, dimensionless=False): - ''' - Calculates the net charge per molecule of molecules with `name` = molecule_name. - Returns the net charge per molecule and a maps with the net charge per residue and molecule. + def calculate_net_charge(self,espresso_system,object_name,pmb_type,dimensionless=False): + """ + Calculates the net charge per instance of a given pmb object type. Args: - espresso_system(`espressomd.system.System`): system information - molecule_name(`str`): name of the molecule to calculate the net charge - dimensionless(`bool'): sets if the charge is returned with a dimension or not + espresso_system (espressomd.system.System): + ESPResSo system containing the particles. + object_name (str): + Name of the object (e.g. molecule, residue, peptide, protein). + pmb_type (str): + Type of object to analyze. Must be molecule-like. + dimensionless (bool, optional): + If True, return charge as a pure number. + If False, return a quantity with reduced_charge units. Returns: - (`dict`): {"mean": mean_net_charge, "molecules": {mol_id: net_charge_of_mol_id, }, "residues": {res_id: net_charge_of_res_id, }} - - Note: - - The net charge of the molecule is averaged over all molecules of type `name` - - The net charge of each particle type is averaged over all particle of the same type in all molecules of type `name` - ''' - if pmb_type not in self.db._molecule_like_types: - raise ValueError(f"{pmb_type} are not supported. Current supported types are: {self.db._molecule_like_types}") - id_map = self.get_particle_id_map(object_name=molecule_name) - def create_charge_map(espresso_system,id_map,label): - charge_number_map={} - for super_id in id_map[label].keys(): - if dimensionless: - net_charge=0 - else: - net_charge=0 * self.units.Quantity(1,'reduced_charge') - for pid in id_map[label][super_id]: - if dimensionless: - net_charge+=espresso_system.part.by_id(pid).q - else: - net_charge+=espresso_system.part.by_id(pid).q * self.units.Quantity(1,'reduced_charge') - charge_number_map[super_id]=net_charge - return charge_number_map - net_charge_molecules=create_charge_map(label="molecule_map", - espresso_system=espresso_system, - id_map=id_map) - net_charge_residues=create_charge_map(label="residue_map", - espresso_system=espresso_system, - id_map=id_map) + dict: + {"mean": mean_net_charge, "instances": {instance_id: net_charge}} + """ + id_map = self.get_particle_id_map(object_name=object_name) + if pmb_type in self.db._assembly_like_types: + label="assembly_map" + elif pmb_type in self.db._molecule_like_types: + label="molecule_map" + else: + label=f"{pmb_type}_map" + instance_map = id_map[label] + charges = {} + for instance_id, particle_ids in instance_map.items(): + if dimensionless: + net_charge = 0.0 + else: + net_charge = 0 * self.units.Quantity(1, "reduced_charge") + for pid in particle_ids: + q = espresso_system.part.by_id(pid).q + if not dimensionless: + q *= self.units.Quantity(1, "reduced_charge") + net_charge += q + charges[instance_id] = net_charge + # Mean charge if dimensionless: - mean_charge=np.mean(np.array(list(net_charge_molecules.values()))) + mean_charge = float(np.mean(list(charges.values()))) else: - mean_charge=np.mean(np.array([value.magnitude for value in net_charge_molecules.values()]))*self.units.Quantity(1,'reduced_charge') - return {"mean": mean_charge, "molecules": net_charge_molecules, "residues": net_charge_residues} + mean_charge = (np.mean([q.magnitude for q in charges.values()])* self.units.Quantity(1, "reduced_charge")) + return {"mean": mean_charge, "instances": charges} def center_molecule_in_simulation_box(self, molecule_id, espresso_system, pmb_type="molecule"): """ @@ -537,25 +551,7 @@ def center_molecule_in_simulation_box(self, molecule_id, espresso_system, pmb_ty es_pos = espresso_system.part.by_id(pid).pos espresso_system.part.by_id(pid).pos = es_pos - center_of_mass + box_center - def check_dimensionality(self, variable, expected_dimensionality): - """ - Checks if the dimensionality of `variable` matches `expected_dimensionality`. - - Args: - variable(`pint.Quantity`): Quantity to be checked. - expected_dimensionality(`str`): Expected dimension of the variable. - - Returns: - (`bool`): `True` if the variable if of the expected dimensionality, `False` otherwise. - - Note: - - `expected_dimensionality` takes dimensionality following the Pint standards [docs](https://pint.readthedocs.io/en/0.10.1/wrapping.html?highlight=dimensionality#checking-dimensionality). - - For example, to check for a variable corresponding to a velocity `expected_dimensionality = "[length]/[time]"` - """ - correct_dimensionality=variable.check(f"{expected_dimensionality}") - if not correct_dimensionality: - raise ValueError(f"The variable {variable} should have a dimensionality of {expected_dimensionality}, instead the variable has a dimensionality of {variable.dimensionality}") - return correct_dimensionality + def check_pka_set(self, pka_set): """ @@ -721,13 +717,14 @@ def create_counterions(self, object_name, cation_name, anion_name, espresso_syst logging.info(f'Ion type: {name} created number: {counterion_number[name]}') return counterion_number - def create_hydrogel(self, name, espresso_system): + def create_hydrogel(self, name, espresso_system, use_default_bond=False): """ Creates a hydrogel in espresso_system using a pyMBE hydrogel template given by `name` Args: name(str): name of the hydrogel template in the pyMBE database. espresso_system (espressomd.system.System): ESPResSo system object where the hydrogel will be created. + use_default_bond (bool, optional): If True, use a default bond template if no specific template exists. Defaults to False. Returns: (int): id of the hydrogel instance created. @@ -754,7 +751,8 @@ def create_hydrogel(self, name, espresso_system): for hydrogel_chain in hydrogel_tpl.chain_map: molecule_id = self._create_hydrogel_chain(hydrogel_chain=hydrogel_chain, nodes=nodes, - espresso_system=espresso_system) + espresso_system=espresso_system, + use_default_bond=use_default_bond) self.db._update_instance(instance_id=molecule_id, pmb_type="molecule", attribute="assembly_id", @@ -977,6 +975,9 @@ def create_protein(self, name, number_of_proteins, espresso_system, topology_dic The `"initial_pos"` entry is required and represents the residue’s reference coordinates before shifting to the protein's center-of-mass. + Returns: + (list of int): List of the molecule_id of the Protein instances created into ESPResSo. + Notes: - Particles are created using `create_particle()` with `fix=True`, meaning they are initially immobilized. @@ -993,6 +994,7 @@ def create_protein(self, name, number_of_proteins, espresso_system, topology_dic residues = hf.get_residues_from_topology_dict(topology_dict=topology_dict, model=protein_tpl.model) # Create protein + mol_ids = [] for _ in range(number_of_proteins): # create a molecule identifier in pyMBE molecule_id = self.db._propose_instance_id(pmb_type="protein") @@ -1035,6 +1037,8 @@ def create_protein(self, name, number_of_proteins, espresso_system, topology_dic protein_inst = ProteinInstance(name=name, molecule_id=molecule_id) self.db._register_instance(protein_inst) + mol_ids.append(molecule_id) + return mol_ids def create_residue(self, name, espresso_system, central_bead_position=None,use_default_bond=False, backbone_vector=None): """ @@ -1319,6 +1323,28 @@ def define_monoprototic_acidbase_reaction(self, particle_name, pka, acidity, met metadata=metadata) self.db._register_reaction(reaction) + def define_monoprototic_particle_states(self, particle_name, acidity): + """ + Defines particle states for a monoprotonic particle template including the charges in each of its possible states. + + Args: + particle_name(`str`): Unique label that identifies the particle template. + acidity(`str`): Identifies whether the particle is `acidic` or `basic`. + """ + acidity_valid_keys = ['acidic', 'basic'] + if not pd.isna(acidity): + if acidity not in acidity_valid_keys: + raise ValueError(f"Acidity {acidity} provided for particle name {particle_name} is not supported. Valid keys are: {acidity_valid_keys}") + if acidity == "acidic": + states = [{"name": f"{particle_name}H", "z": 0}, + {"name": f"{particle_name}", "z": -1}] + + elif acidity == "basic": + states = [{"name": f"{particle_name}H", "z": 1}, + {"name": f"{particle_name}", "z": 0}] + self.define_particle_states(particle_name=particle_name, + states=states) + def define_particle(self, name, sigma, epsilon, z=0, acidity=pd.NA, pka=pd.NA, cutoff=pd.NA, offset=pd.NA): """ Defines a particle template in the pyMBE database. @@ -1352,7 +1378,7 @@ def define_particle(self, name, sigma, epsilon, z=0, acidity=pd.NA, pka=pd.NA, states=states) initial_state = name else: - self.set_monoprototic_particle_states(particle_name=name, + self.define_monoprototic_particle_states(particle_name=name, acidity=acidity) initial_state = f"{name}H" if pka is not pd.NA: @@ -1411,7 +1437,7 @@ def define_peptide(self, name, sequence, model): valid_keys = ['1beadAA','2beadAA'] if model not in valid_keys: raise ValueError('Invalid label for the peptide model, please choose between 1beadAA or 2beadAA') - clean_sequence = self.protein_sequence_parser(sequence=sequence) + clean_sequence = hf.protein_sequence_parser(sequence=sequence) residue_list = self._get_residue_list_from_sequence(sequence=clean_sequence) tpl = PeptideTemplate(name=name, residue_list=residue_list, @@ -2102,95 +2128,6 @@ def propose_unused_type(self): if not all_types: return 0 return max(all_types) + 1 - - def protein_sequence_parser(self, sequence): - ''' - Parses `sequence` to the one letter code for amino acids. - - Args: - sequence(`str` or `lst`): Sequence of the amino acid. - - Returns: - clean_sequence(`lst`): `sequence` using the one letter code. - - Note: - - Accepted formats for `sequence` are: - - `lst` with one letter or three letter code of each aminoacid in each element - - `str` with the sequence using the one letter code - - `str` with the squence using the three letter code, each aminoacid must be separated by a hyphen "-" - - ''' - # Aminoacid key - keys={"ALA": "A", - "ARG": "R", - "ASN": "N", - "ASP": "D", - "CYS": "C", - "GLU": "E", - "GLN": "Q", - "GLY": "G", - "HIS": "H", - "ILE": "I", - "LEU": "L", - "LYS": "K", - "MET": "M", - "PHE": "F", - "PRO": "P", - "SER": "S", - "THR": "T", - "TRP": "W", - "TYR": "Y", - "VAL": "V", - "PSER": "J", - "PTHR": "U", - "PTyr": "Z", - "NH2": "n", - "COOH": "c"} - clean_sequence=[] - if isinstance(sequence, str): - if sequence.find("-") != -1: - splited_sequence=sequence.split("-") - for residue in splited_sequence: - if len(residue) == 1: - if residue in keys.values(): - residue_ok=residue - else: - if residue.upper() in keys.values(): - residue_ok=residue.upper() - else: - raise ValueError("Unknown one letter code for a residue given: ", residue, " please review the input sequence") - clean_sequence.append(residue_ok) - else: - if residue in keys.keys(): - clean_sequence.append(keys[residue]) - else: - if residue.upper() in keys.keys(): - clean_sequence.append(keys[residue.upper()]) - else: - raise ValueError("Unknown code for a residue: ", residue, " please review the input sequence") - else: - for residue in sequence: - if residue in keys.values(): - residue_ok=residue - else: - if residue.upper() in keys.values(): - residue_ok=residue.upper() - else: - raise ValueError("Unknown one letter code for a residue: ", residue, " please review the input sequence") - clean_sequence.append(residue_ok) - if isinstance(sequence, list): - for residue in sequence: - if residue in keys.values(): - residue_ok=residue - else: - if residue.upper() in keys.values(): - residue_ok=residue.upper() - elif (residue.upper() in keys.keys()): - residue_ok= keys[residue.upper()] - else: - raise ValueError("Unknown code for a residue: ", residue, " please review the input sequence") - clean_sequence.append(residue_ok) - return clean_sequence def read_protein_vtf(self,filename,unit_length=None): """ @@ -2284,63 +2221,6 @@ def save_database(self, folder, format='csv'): io._save_database_csv(self.db, folder=folder) - def search_particles_in_residue(self, residue_name): - ''' - Searches for all particles in a given residue of name `residue_name`. - - Args: - residue_name (`str`): name of the residue to be searched - - Returns: - list_of_particles_in_residue (`lst`): list of the names of all particles in the residue - - Note: - - The function returns a name per particle in residue, i.e. if there are multiple particles with the same type `list_of_particles_in_residue` will have repeated items. - - The function will return an empty list if the residue is not defined in `pmb.df`. - - The function will return an empty list if the particles are not defined in the pyMBE DataFrame. - ''' - index_residue = self.df.loc[self.df['name'] == residue_name].index[0].item() - central_bead = self.df.at [index_residue, ('central_bead', '')] - list_of_side_chains = self.df.at[index_residue, ('side_chains', '')] - list_of_particles_in_residue = [] - if central_bead is not pd.NA: - list_of_particles_in_residue.append(central_bead) - if list_of_side_chains is not pd.NA: - for side_chain in list_of_side_chains: - if _DFm._check_if_name_is_defined_in_df(name=side_chain, df=self.df): - object_type = self.df[self.df['name']==side_chain].pmb_type.values[0] - else: - continue - if object_type == "residue": - list_of_particles_in_side_chain_residue = self.search_particles_in_residue(side_chain) - list_of_particles_in_residue += list_of_particles_in_side_chain_residue - elif object_type == "particle": - if side_chain is not pd.NA: - list_of_particles_in_residue.append(side_chain) - return list_of_particles_in_residue - - def set_monoprototic_particle_states(self, particle_name, acidity): - """ - Sets the acidity for a monoprotonic particle template including the charges in each of its possible states. - - Args: - particle_name(`str`): Unique label that identifies the particle template. - acidity(`str`): Identifies whether the particle is `acidic` or `basic`. - """ - acidity_valid_keys = ['acidic', 'basic'] - if not pd.isna(acidity): - if acidity not in acidity_valid_keys: - raise ValueError(f"Acidity {acidity} provided for particle name {particle_name} is not supported. Valid keys are: {acidity_valid_keys}") - if acidity == "acidic": - states = [{"name": f"{particle_name}H", "z": 0}, - {"name": f"{particle_name}", "z": -1}] - - elif acidity == "basic": - states = [{"name": f"{particle_name}H", "z": 1}, - {"name": f"{particle_name}", "z": 0}] - self.define_particle_states(particle_name=particle_name, - states=states) - def set_particle_initial_state(self, particle_name, state_name): """ Sets the default initial state of a particle template defined in the pyMBE database. @@ -2383,7 +2263,7 @@ def set_reduced_units(self, unit_length=None, unit_charge=None, temperature=None variables=[unit_length,temperature,unit_charge] dimensionalities=["[length]","[temperature]","[charge]"] for variable,dimensionality in zip(variables,dimensionalities): - self.check_dimensionality(variable,dimensionality) + self._check_dimensionality(variable,dimensionality) self.Kw=Kw*self.units.mol**2 / (self.units.l**2) self.kT=temperature*self.kB self.units._build_cache() diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py index 3699346..6afb31b 100644 --- a/pyMBE/storage/io.py +++ b/pyMBE/storage/io.py @@ -205,16 +205,20 @@ def _load_database_csv(db, folder): elif pmb_type == "bond": params_raw = _decode(row.get("parameters", "")) or {} parameters: Dict[str, Any] = {} + # For the default bond case, map empty particle names to None + particle_name1 = row.get("particle_name1", "") or "" + particle_name2 = row.get("particle_name2", "") or "" for k, v in params_raw.items(): # if v is a dict, assume PintQuantity dict if isinstance(v, dict) and {"magnitude", "units", "dimension"}.issubset(v.keys()): parameters[k] = PintQuantity.from_dict(v) else: parameters[k] = v - tpl = BondTemplate( - name=row["name"], - bond_type=row.get("bond_type", ""), - parameters=parameters) + tpl = BondTemplate(name=row["name"], + bond_type=row.get("bond_type", ""), + particle_name1=None if particle_name1 == "" else particle_name1, + particle_name2=None if particle_name2 == "" else particle_name2, + parameters=parameters) templates[tpl.name] = tpl elif pmb_type == "hydrogel": node_map_raw = _decode(row.get("node_map", "")) or [] diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index 02ab6eb..95da404 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -18,12 +18,7 @@ # import pandas as pd -import json -import re -import numpy as np -import logging -import warnings - +from collections import defaultdict from typing import Dict, Any from pyMBE.storage.templates.particle import ParticleTemplate from pyMBE.storage.templates.residue import ResidueTemplate @@ -90,6 +85,54 @@ def __init__(self,units): self._pmb_types = ["particle", "residue"] + self._molecule_like_types + self._assembly_like_types self.espresso_bond_instances= {} + def _collect_particle_templates(self, name, pmb_type): + """ + Recursively collects particle template names reachable from a given + template in the hierarchy. + + Args: + name (str): + Name of the current template being processed. + pmb_type (str): + Type of the current template. + + Returns: + set[str]: + Set of particle template names reachable from the current template. + + Notes: + - Particle state templates are resolved to their parent particle + template. + """ + counts = defaultdict(int) + if pmb_type == "particle": + counts[name] += 1 + return counts + if pmb_type == "particle_state": + particle_name = self.get_template(name=name, pmb_type=pmb_type).particle_name + counts[particle_name] += 1 + return counts + if pmb_type == "residue": + tpl = self.get_template(name=name, + pmb_type=pmb_type) + for pname in [tpl.central_bead] + tpl.side_chains: + sub = self._collect_particle_templates(name=pname, + pmb_type="particle") + for k, v in sub.items(): + counts[k] += v + return counts + if pmb_type in self._molecule_like_types: + tpl = self.get_template(name=name, + pmb_type=pmb_type) + for res_name in tpl.residue_list: + sub = self._collect_particle_templates(name=res_name, + pmb_type="residue") + for k, v in sub.items(): + counts[k] += v + return counts + raise NotImplementedError(f"Method not implemented for pmb_type='{pmb_type}'") + + def _delete_bonds_of_particle(self, pid): """ Delete all bond instances involving a given particle instance. @@ -674,6 +717,7 @@ def _propose_instance_id(self, pmb_type): return 0 used_ids = list(self._instances[pmb_type].keys()) return max(used_ids) + 1 + def delete_instance(self, pmb_type, instance_id, cascade=False): """ Delete an instance from the pyMBE database. @@ -884,8 +928,6 @@ def delete_templates(self, pmb_type): self.delete_template(pmb_type=pmb_type, name=template) - - def get_instance(self, pmb_type, instance_id): """ Retrieve a stored instance by type and instance_id. @@ -924,6 +966,43 @@ def get_instances(self, pmb_type): """ return self._instances.get(pmb_type, {}).copy() + def get_particle_templates_under(self, template_name, pmb_type=None, return_counts=False): + """ + Returns the names of all particle templates associated with a given + template by traversing the template hierarchy downward. + + Args: + template_name (str): + Name of the starting template. + pmb_type (str, optional): + Type of the starting template. If not provided, the type is + inferred from the database. In this case, the template name + must be unique across all template types. + return_counts (bool, optional): + If False (default), returns a set of unique particle template + names. If True, returns a dictionary mapping particle template + names to the number of times they appear in the hierarchy. + Returns: + set[str] or dict[str, int]: + - If `return_counts=False`: unique particle template names + - If `return_counts=True`: particle template multiplicities + + Notes: + - Counting reflects **structural multiplicity**, not instantiated + particle counts. + - The returned set contains particle template names only; particle + states are resolved to their parent particle templates. + """ + if pmb_type is None: + pmb_types = self._find_template_types(template_name) + if len(pmb_types) != 1: + raise ValueError(f"Template name '{template_name}' is ambiguous: {pmb_types}") + pmb_type = pmb_types[0] + counts = self._collect_particle_templates(name=template_name, pmb_type=pmb_type) + if return_counts: + return dict(counts) + return set(counts.keys()) + def get_template(self, pmb_type, name): """ @@ -943,6 +1022,9 @@ def get_template(self, pmb_type, name): provided type and name. """ + if pmb_type not in self._templates: + raise ValueError(f"There are no {pmb_type} templates defined in the database") + if name not in self._templates[pmb_type]: raise ValueError(f"Template '{name}' not found in type '{pmb_type}'.") else: @@ -1048,7 +1130,7 @@ def add_to_map(target_map, key, pid): add_to_map(molecule_map, p.molecule_id, pid) add_to_map(assembly_map, p.assembly_id, pid) # Case 4: object is an assembly - elif object_type == "assembly": + elif object_type in self._assembly_like_types: for assembly_id in object_ids: assembly_map[assembly_id] = [] for pid, p in particles.items(): diff --git a/test.py b/test.py index d79afa3..36d94a5 100644 --- a/test.py +++ b/test.py @@ -162,7 +162,7 @@ def main(): print("\n=== Protein Templates DataFrame ===") path = importlib.resources.files(pyMBE) / "parameters" / "globular_proteins" / f"1beb.vtf", - topology_dict = pmb.read_protein_vtf_in_df (filename=path[0]) + topology_dict = pmb.read_protein_vtf (filename=path[0]) # Define AA particles and residues hf.define_protein_AA_particles(topology_dict=topology_dict, diff --git a/testsuite/calculate_net_charge_unit_test.py b/testsuite/calculate_net_charge_unit_test.py index f88fe1a..2f3992e 100644 --- a/testsuite/calculate_net_charge_unit_test.py +++ b/testsuite/calculate_net_charge_unit_test.py @@ -19,6 +19,7 @@ import numpy as np import espressomd import unittest as ut +from pyMBE.lib.lattice import DiamondLattice # Create an instance of pyMBE library import pyMBE @@ -70,66 +71,135 @@ espresso_system=espressomd.System(box_l = [10]*3) # Create your molecules into the espresso system -pmb.create_molecule(name=molecule_name, - number_of_molecules= 2, + +diamond_lattice = DiamondLattice(4, 3.5 * pmb.units.reduced_length) +lattice_builder = pmb.initialize_lattice_builder(diamond_lattice) +indices = diamond_lattice.indices +node_topology = [] +for index in range(len(indices)): + node_topology.append({"particle_name": "+1p", + "lattice_index": indices[index]}) +node_labels = lattice_builder.node_labels +chain_labels = lattice_builder.chain_labels +reverse_node_labels = {v: k for k, v in node_labels.items()} +chain_topology = [] +for chain_data in chain_labels.items(): + node_label_pair = chain_data[0] + node_label_s, node_label_e = [int(x) for x in node_label_pair.strip("()").split(",")] + chain_topology.append({'node_start': reverse_node_labels[node_label_s], + 'node_end': reverse_node_labels[node_label_e], + 'molecule_name': molecule_name}) +pmb.define_hydrogel("my_hydrogel", + node_topology, + chain_topology) +pmb.create_hydrogel(name="my_hydrogel", espresso_system=espresso_system, use_default_bond=True) class Test(ut.TestCase): def test_calculate_net_charge_with_units(self): """ - *** Unit test: check that calculate_net_charge calculates the charge in a molecule properly with units + *** Unit test: check that calculate_net_charge calculates the charge in a hydrogel properly with units """ - - # Check the case where the returned charge has a dimension - charge_map=pmb.calculate_net_charge(molecule_name=molecule_name, - espresso_system=espresso_system, - pmb_type="molecule") - + # Check that it calculates properly the charge of the whole hydrogel + charge_map = pmb.calculate_net_charge(object_name="my_hydrogel", + pmb_type="hydrogel", + espresso_system=espresso_system) + + np.testing.assert_equal(charge_map["mean"], 40.0*pmb.units.Quantity(1,'reduced_charge')) + np.testing.assert_equal(charge_map["instances"], {0: 40.0*pmb.units.Quantity(1,'reduced_charge')}) + + # Check that it calculates properly the charge of the chains in the hydrogel + charge_map = pmb.calculate_net_charge(object_name=molecule_name, + pmb_type="molecule", + espresso_system=espresso_system) # Check mean charge np.testing.assert_equal(charge_map["mean"], 2.0*pmb.units.Quantity(1,'reduced_charge')) # Check molecule charge map - np.testing.assert_equal(charge_map["molecules"],{0: 2.0*pmb.units.Quantity(1,'reduced_charge'), 1: 2.0*pmb.units.Quantity(1,'reduced_charge')}) - # Check residue charge map - np.testing.assert_equal(charge_map["residues"],{0: 1.0*pmb.units.Quantity(1,'reduced_charge'), - 1: 1.0*pmb.units.Quantity(1,'reduced_charge'), - 2: 0.0*pmb.units.Quantity(1,'reduced_charge'), - 3: 0.0*pmb.units.Quantity(1,'reduced_charge'), - 4: 0.0*pmb.units.Quantity(1,'reduced_charge'), - 5: 1.0*pmb.units.Quantity(1,'reduced_charge'), - 6: 1.0*pmb.units.Quantity(1,'reduced_charge'), - 7: 0.0*pmb.units.Quantity(1,'reduced_charge'), - 8: 0.0*pmb.units.Quantity(1,'reduced_charge'), - 9: 0.0*pmb.units.Quantity(1,'reduced_charge')}) + np.testing.assert_equal(charge_map["instances"], + {0: 2.0*pmb.units.Quantity(1,'reduced_charge'), + 1: 2.0*pmb.units.Quantity(1,'reduced_charge'), + 2: 2.0*pmb.units.Quantity(1,'reduced_charge'), + 3: 2.0*pmb.units.Quantity(1,'reduced_charge'), + 4: 2.0*pmb.units.Quantity(1,'reduced_charge'), + 5: 2.0*pmb.units.Quantity(1,'reduced_charge'), + 6: 2.0*pmb.units.Quantity(1,'reduced_charge'), + 7: 2.0*pmb.units.Quantity(1,'reduced_charge'), + 8: 2.0*pmb.units.Quantity(1,'reduced_charge'), + 9: 2.0*pmb.units.Quantity(1,'reduced_charge'), + 10: 2.0*pmb.units.Quantity(1,'reduced_charge'), + 11: 2.0*pmb.units.Quantity(1,'reduced_charge'), + 12: 2.0*pmb.units.Quantity(1,'reduced_charge'), + 13: 2.0*pmb.units.Quantity(1,'reduced_charge'), + 14: 2.0*pmb.units.Quantity(1,'reduced_charge'), + 15: 2.0*pmb.units.Quantity(1,'reduced_charge')}) + + # Check that it calculates properly the charge of the residues in the hydrogel + charge_map_r1 = pmb.calculate_net_charge(object_name="R1", + pmb_type="residue", + espresso_system=espresso_system) + charge_map_r2 = pmb.calculate_net_charge(object_name="R2", + pmb_type="residue", + espresso_system=espresso_system) + res_charge_map = charge_map_r1["instances"] | charge_map_r2["instances"] + np.testing.assert_equal(res_charge_map[0], 1.0*pmb.units.Quantity(1,'reduced_charge')) + np.testing.assert_equal(res_charge_map[1], 1.0*pmb.units.Quantity(1,'reduced_charge')) + np.testing.assert_equal(res_charge_map[2], 0.0*pmb.units.Quantity(1,'reduced_charge')) + np.testing.assert_equal(res_charge_map[3], 0.0*pmb.units.Quantity(1,'reduced_charge')) + np.testing.assert_equal(res_charge_map[4], 0.0*pmb.units.Quantity(1,'reduced_charge')) + def test_calculate_net_charge_without_units(self): """ *** Unit test: check that calculate_net_charge calculates the charge in a molecule properly without units """ - - # Check the case where the returned charge is dimensionless - charge_map=pmb.calculate_net_charge(molecule_name=molecule_name, - espresso_system=espresso_system, - dimensionless=True, - pmb_type="molecule") - + # Check that it calculates properly the charge of the whole hydrogel + charge_map = pmb.calculate_net_charge(object_name="my_hydrogel", + pmb_type="hydrogel", + espresso_system=espresso_system, + dimensionless=True) + np.testing.assert_equal(charge_map["mean"], 40.0) + np.testing.assert_equal(charge_map["instances"], {0: 40.0}) + # Check the case where the returned charge does not have a dimension + charge_map = pmb.calculate_net_charge(object_name=molecule_name, + pmb_type="molecule", + espresso_system=espresso_system, + dimensionless=True) # Check mean charge np.testing.assert_equal(charge_map["mean"], 2.0) # Check molecule charge map - np.testing.assert_equal(charge_map["molecules"],{0: 2.0, 1: 2.0}) - # Check residue charge map - np.testing.assert_equal(charge_map["residues"],{0: 1.0, 1: 1.0, 2: 0.0, 3: 0.0, 4: 0.0, 5: 1.0, 6: 1.0, 7: 0.0, 8: 0.0, 9: 0.0}) - - - def test_sanitity_calculate_net_charge(self): - """ - Sanity check that calculate_net_charge raises a ValueError if one provides the name of an object that is not a molecule - """ - input_parameters={"molecule_name":"R1", - "espresso_system":espresso_system, - "pmb_type": "residue"} - np.testing.assert_raises(ValueError, pmb.calculate_net_charge, **input_parameters) + np.testing.assert_equal(charge_map["instances"], + {0: 2.0, + 1: 2.0, + 2: 2.0, + 3: 2.0, + 4: 2.0, + 5: 2.0, + 6: 2.0, + 7: 2.0, + 8: 2.0, + 9: 2.0, + 10: 2.0, + 11: 2.0, + 12: 2.0, + 13: 2.0, + 14: 2.0, + 15: 2.0}) + charge_map_r1 = pmb.calculate_net_charge(object_name="R1", + pmb_type="residue", + espresso_system=espresso_system, + dimensionless=True) + charge_map_r2 = pmb.calculate_net_charge(object_name="R2", + pmb_type="residue", + espresso_system=espresso_system, + dimensionless=True) + res_charge_map = charge_map_r1["instances"] | charge_map_r2["instances"] + np.testing.assert_equal(res_charge_map[0], 1.0) + np.testing.assert_equal(res_charge_map[1], 1.0) + np.testing.assert_equal(res_charge_map[2], 0.0) + np.testing.assert_equal(res_charge_map[3], 0.0) + np.testing.assert_equal(res_charge_map[4], 0.0) if __name__ == '__main__': ut.main() diff --git a/testsuite/henderson_hasselbalch_tests.py b/testsuite/henderson_hasselbalch_tests.py index aa41fdd..c5b77bf 100644 --- a/testsuite/henderson_hasselbalch_tests.py +++ b/testsuite/henderson_hasselbalch_tests.py @@ -20,6 +20,7 @@ import numpy as np import pathlib import pyMBE +import pyMBE.lib.handy_functions as hf mode="short" # Supported modes: "short", "long" pH_samples=25 # If more through testing is needed, set to 200 @@ -40,8 +41,17 @@ def test(self): model = '1beadAA' # Load pKa-values + path_to_interactions=pmb.root / "parameters" / "peptides" / "Lunkad2021" path_to_pka=pmb.root / "parameters" / "pka_sets" / "Nozaki1967.json" - pmb.load_pka_set(path_to_pka) + pmb.load_database (folder=path_to_interactions) # Defines particles + pmb.load_pka_set(filename=path_to_pka) + pka_set = pmb.get_pka_set() + for particle_name in pka_set.keys(): + pmb.define_monoprototic_particle_states(particle_name=particle_name, + acidity=pka_set[particle_name]["acidity"]) + hf.define_peptide_AA_residues(sequence=sequence1+sequence2, + model="1beadAA", + pmb=pmb) # Define the peptides in the pyMBE data frame pmb.define_peptide(name = "peptide_1", @@ -56,46 +66,27 @@ def test(self): # reference data ref_data_HH = np.loadtxt(self.data_root / "HH_no_pH_list.csv", delimiter=",") - # Test that the function returns a list of None when no residues are defined + # Test that the function returns a list of None the molecule has no particles pH_values = [0, 14] pmb.define_molecule(name = "test", residue_list = []) - Z_HH = pmb.calculate_HH(molecule_name = "test", + Z_HH = pmb.calculate_HH(template_name= "test", pH_list = pH_values) np.testing.assert_array_equal(Z_HH, [None]*len(pH_values)) + # Test that the function raises a ValueError if there are undefined residues pmb.define_molecule(name = "mol1", residue_list=["TT"]) - Z_HH = pmb.calculate_HH(molecule_name = "mol1", - pH_list = pH_values) - np.testing.assert_array_equal(Z_HH, - [None]*len(pH_values)) - - # Test that the function ignores residues with undefined particles - pmb.define_residue(name = "RT", - central_bead="T", - side_chains=["TT"]) - pmb.define_molecule(name = "mol2", - residue_list=["RT"]) - Z_HH = pmb.calculate_HH(molecule_name = "mol2", - pH_list = pH_values) - np.testing.assert_array_equal(Z_HH, - [None]*len(pH_values)) - - # Test that the function ignores undefined residues when other residues are defined - pmb.define_peptide(name = "peptide_3", - sequence =sequence1+"T", - model= model) - Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_3") - np.testing.assert_allclose(Z_HH_1, ref_data_HH[0,:]) - + self.assertRaises(ValueError, + pmb.calculate_HH, + **{"template_name": "mol1"}) + - with self.subTest(msg="Check Henderson-Hasselbalch equation"): # Check case where no pH_list is provided - Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1") - Z_HH_2 = pmb.calculate_HH(molecule_name = "peptide_2") + Z_HH_1 = pmb.calculate_HH(template_name = "peptide_1") + Z_HH_2 = pmb.calculate_HH(template_name = "peptide_2") ref_data_HH = np.loadtxt(self.data_root / "HH_no_pH_list.csv", delimiter=",") np.testing.assert_allclose(Z_HH_1, ref_data_HH[0,:]) @@ -104,9 +95,9 @@ def test(self): # Check case where pH_list is provided pH_range = np.linspace(2, 12, num=200)[::200//pH_samples] - Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1", + Z_HH_1 = pmb.calculate_HH(template_name = "peptide_1", pH_list = pH_range) - Z_HH_2 = pmb.calculate_HH(molecule_name = "peptide_2", + Z_HH_2 = pmb.calculate_HH(template_name = "peptide_2", pH_list = pH_range) ref_data_HH = np.loadtxt(self.data_root / "HH.csv", delimiter=",") @@ -118,12 +109,12 @@ def test(self): # Define additional non-ionizable groups pmb.define_particle(name = "N0", z=0, - ) + sigma=1*pmb.units.reduced_length, + epsilon=1*pmb.units.reduced_energy) pmb.define_particle(name = "N1", z=1, - ) - path_to_pka=pmb.root / "parameters" / "pka_sets" / "Nozaki1967.json" - pmb.load_pka_set(path_to_pka) + sigma=1*pmb.units.reduced_length, + epsilon=1*pmb.units.reduced_energy) pmb.define_residue(name = "RD", central_bead="D", side_chains=[]) @@ -144,14 +135,14 @@ def test(self): # Check the case with non-ionizable groups without charge pmb.define_molecule(name = "mol_1", residue_list = 5*["RD"] + 8*["RH"] + 3*["RN0"]) - Z_HH_1 = pmb.calculate_HH(molecule_name = "mol_1") + Z_HH_1 = pmb.calculate_HH(template_name= "mol_1") np.testing.assert_allclose(Z_HH_1, ref_data_HH[0,:]) # Check the case with non-ionizable groups with charge pmb.define_molecule(name = "mol_2", residue_list = 5*["RD"] + 8*["RH"] + 3*["RN1"]) - Z_HH_2 = pmb.calculate_HH(molecule_name = "mol_2") + Z_HH_2 = pmb.calculate_HH(template_name= "mol_2") np.testing.assert_allclose(Z_HH_2, ref_data_HH[0,:]+3) @@ -179,9 +170,9 @@ def test(self): np.testing.assert_allclose(HH_Donnan_dict["charges_dict"]["peptide_2"], ref_data_HH_Donnan[1,::200//pH_samples]) with self.subTest(msg="Check that HH and HH_Don are consistent"): - Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1", + Z_HH_1 = pmb.calculate_HH(template_name= "peptide_1", pH_list = HH_Donnan_dict["pH_system_list"]) - Z_HH_2 = pmb.calculate_HH(molecule_name = "peptide_2", + Z_HH_2 = pmb.calculate_HH(template_name= "peptide_2", pH_list = HH_Donnan_dict["pH_system_list"]) np.testing.assert_allclose(Z_HH_1, HH_Donnan_dict["charges_dict"]["peptide_1"]) diff --git a/testsuite/parameter_test.py b/testsuite/parameter_test.py index ef90a2f..8b37d51 100644 --- a/testsuite/parameter_test.py +++ b/testsuite/parameter_test.py @@ -15,14 +15,11 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . - -import pathlib import pyMBE import pandas as pd import numpy as np import unittest as ut - class Test(ut.TestCase): def test_pka_set_format(self): """ diff --git a/testsuite/seed_test.py b/testsuite/seed_test.py index f1c2c64..9885b2f 100644 --- a/testsuite/seed_test.py +++ b/testsuite/seed_test.py @@ -36,7 +36,7 @@ def build_peptide_in_espresso(seed): pmb.load_pka_set(path_to_pka) pka_set = pmb.get_pka_set() for particle_name in pka_set.keys(): - pmb.set_monoprototic_particle_states(acidity=pka_set[particle_name]["acidity"], + pmb.define_monoprototic_particle_states(acidity=pka_set[particle_name]["acidity"], particle_name=particle_name) # define residues hf.define_peptide_AA_residues(sequence=sequence, diff --git a/testsuite/test_io_database.py b/testsuite/test_io_database.py index 0ca1187..89c50ec 100644 --- a/testsuite/test_io_database.py +++ b/testsuite/test_io_database.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024-2025 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -20,154 +20,405 @@ import espressomd import pandas as pd import numpy as np -import logging -import io -import pyMBE.storage.df_management as df_management - -# Create an in-memory log stream -log_stream = io.StringIO() -logging.basicConfig(level=logging.INFO, - format="%(levelname)s: %(message)s", - handlers=[logging.StreamHandler(log_stream)]) -# Create an instance of pyMBE library +import unittest as ut import pyMBE -pmb = pyMBE.pymbe_library(seed=42) +from pyMBE.lib.lattice import DiamondLattice +import pyMBE.lib.handy_functions as hf -print ('*** Unit tests: read and write from pyMBE dataframe ***') -# Simulation parameters -pmb.set_reduced_units(unit_length=0.4*pmb.units.nm) +espresso_system=espressomd.System (box_l = [100]*3) -# Define particles -pmb.define_particle( - name = "I", - sigma = 0.3*pmb.units.nm, - epsilon = 1*pmb.units('reduced_energy'), - z = 0) +class Test(ut.TestCase): -pmb.define_particle( - name = "A", - acidity = "acidic", - pka = 4, - sigma = 0.3*pmb.units.nm, - epsilon = 1*pmb.units('reduced_energy'),) + def test_io_particles_and_particle_states_templates(self): + """ + Checks that information in the pyMBE database about + particle and particle_state templates is stored to file and loaded back properly. + """ + pmb = pyMBE.pymbe_library(seed=42) + units = pmb.units + pmb.define_particle(name="Z", + sigma=3.5 * units.reduced_length, + cutoff=4 * units.reduced_length, + offset=0 * units.reduced_length, + epsilon=0.2 * units.reduced_energy, + acidity="acidic", + pka=4.25) + pmb.define_particle(name="X", + sigma=3.5 * units.reduced_length, + cutoff=4 * units.reduced_length, + offset=0 * units.reduced_length, + epsilon=0.2 * units.reduced_energy, + z=1) + new_pmb = pyMBE.pymbe_library(23) + with tempfile.TemporaryDirectory() as tmp_directory: + # Save and load the database + pmb.save_database(tmp_directory) + new_pmb.load_database(tmp_directory) + # Test that the loaded particle_state templates are equal to the original + pd.testing.assert_frame_equal(pmb.get_templates_df(pmb_type="particle_state"), + new_pmb.get_templates_df(pmb_type="particle_state")) + # Test that the loaded particle templates are equal to the original + pd.testing.assert_frame_equal(pmb.get_templates_df(pmb_type="particle"), + new_pmb.get_templates_df(pmb_type="particle")) + + def test_io_lj_templates(self): + """ + Checks that information in the pyMBE database about + LennardJOnes templates is stored to file and loaded back properly. + """ + pmb = pyMBE.pymbe_library(seed=42) + units = pmb.units + pmb.define_particle(name="Z", + sigma=3.5 * units.reduced_length, + cutoff=4 * units.reduced_length, + offset=0 * units.reduced_length, + epsilon=0.2 * units.reduced_energy, + acidity="acidic", + pka=4.25) + pmb.define_particle(name="X", + sigma=3.5 * units.reduced_length, + cutoff=4 * units.reduced_length, + offset=0 * units.reduced_length, + epsilon=0.2 * units.reduced_energy, + z=1) + pmb.setup_lj_interactions(espresso_system=espresso_system) + new_pmb = pyMBE.pymbe_library(23) + with tempfile.TemporaryDirectory() as tmp_directory: + # Save and load the database + pmb.save_database(tmp_directory) + new_pmb.load_database(tmp_directory) + # Test that the loaded LJ templates are equal to the original + pd.testing.assert_frame_equal(pmb.get_templates_df(pmb_type="lj"), + new_pmb.get_templates_df(pmb_type="lj")) -pmb.define_particle( - name = "B", - acidity = "basic", - pka = 9, - sigma = 0.3*pmb.units.nm, - epsilon = 1*pmb.units('reduced_energy'),) - -# Define residues -pmb.define_residue( - name = "Res_1", - central_bead = "I", - side_chains = ["A","B"]) - -pmb.define_residue( - name = "Res_2", - central_bead = "I", - side_chains = ["Res_1"]) - -# Define peptide -peptide_name = 'generic_peptide' -peptide_sequence = 'EEEEEEE' -peptide_model = '2beadAA' -pmb.define_peptide(name=peptide_name, sequence=peptide_sequence, model=peptide_model) - -# Define a molecule -molecule_name = "A_molecule" -n_molecules = 1 + def test_io_bond_templates(self): + """ + Checks that information in the pyMBE database about + residue templates is stored to file and loaded back properly. + """ + pmb = pyMBE.pymbe_library(seed=42) + parameters1 = {"k": 100.0 * pmb.units.reduced_energy / (pmb.units.reduced_length**2), + "r_0": 1.0 * pmb.units.reduced_length} + parameters2 = {'r_0' : 0.4 * pmb.units.nm, + 'k' : 400 * pmb.units('reduced_energy / reduced_length**2'), + 'd_r_max': 0.8 * pmb.units.nm} + pmb.define_bond(bond_type="harmonic", + bond_parameters=parameters1, + particle_pairs=[["Z","Z"], + ["Z","X"], + ["X","X"]]) + pmb.define_default_bond(bond_type="FENE", + bond_parameters=parameters2) + new_pmb = pyMBE.pymbe_library(23) + with tempfile.TemporaryDirectory() as tmp_directory: + # Save and load the database + pmb.save_database(tmp_directory) + new_pmb.load_database(tmp_directory) + # Test that the loaded bond templates are equal to the original + pd.testing.assert_frame_equal(pmb.get_templates_df(pmb_type="bond"), + new_pmb.get_templates_df(pmb_type="bond")) -pmb.define_molecule( - name = molecule_name, - residue_list = ["Res_1", "Res_1", - "Res_2", "Res_1", - "Res_1", "Res_2", - "Res_2"]) + def test_io_residues_templates(self): + """ + Checks that information in the pyMBE database about + residue templates is stored to file and loaded back properly. + """ + pmb = pyMBE.pymbe_library(seed=42) + pmb.define_residue(name="R1", + central_bead="Z", + side_chains=["X","Z"]) + pmb.define_residue(name="R2", + central_bead="X", + side_chains=[]) + new_pmb = pyMBE.pymbe_library(23) + with tempfile.TemporaryDirectory() as tmp_directory: + # Save and load the database + pmb.save_database(tmp_directory) + new_pmb.load_database(tmp_directory) + # Test that the loaded residue templates are equal to the original + pd.testing.assert_frame_equal(pmb.get_templates_df(pmb_type="residue"), + new_pmb.get_templates_df(pmb_type="residue")) + + def test_io_molecule_templates(self): + """ + Checks that information in the pyMBE database about + molecule templates is stored to file and loaded back properly. + """ + pmb = pyMBE.pymbe_library(seed=42) + pmb.define_molecule(name="M1", + residue_list=["R1","R2"]*2) + pmb.define_molecule(name="M2", + residue_list=["R2","R1"]*20) + new_pmb = pyMBE.pymbe_library(23) + with tempfile.TemporaryDirectory() as tmp_directory: + # Save and load the database + pmb.save_database(tmp_directory) + new_pmb.load_database(tmp_directory) + # Test that the loaded molecule templates are equal to the original + pd.testing.assert_frame_equal(pmb.get_templates_df(pmb_type="molecule"), + new_pmb.get_templates_df(pmb_type="molecule")) + + def test_io_peptide_templates(self): + """ + Checks that information in the pyMBE database about + peptide templates is stored to file and loaded back properly. + """ + pmb = pyMBE.pymbe_library(seed=42) + pmb.define_peptide(name="Peptide1", + model="1beadAA", + sequence="KKKKDDDD") + pmb.define_peptide(name="Peptide2", + model="2beadAA", + sequence="RRRREEEE") + new_pmb = pyMBE.pymbe_library(23) + with tempfile.TemporaryDirectory() as tmp_directory: + # Save and load the database + pmb.save_database(tmp_directory) + new_pmb.load_database(tmp_directory) + # Test that the loaded peptide templates are equal to the original + pd.testing.assert_frame_equal(pmb.get_templates_df(pmb_type="peptide"), + new_pmb.get_templates_df(pmb_type="peptide")) + + def test_io_protein_templates(self): + """ + Checks that information in the pyMBE database about + protein templates is stored to file and loaded back properly. + """ + pmb = pyMBE.pymbe_library(seed=42) + pmb.define_protein(name="1beb", + model="2beadAA", + sequence="KKKKKK") + pmb.define_protein(name="Avidin", + model="1beadAA", + sequence="KKKKKK") + new_pmb = pyMBE.pymbe_library(23) + with tempfile.TemporaryDirectory() as tmp_directory: + # Save and load the database + pmb.save_database(tmp_directory) + new_pmb.load_database(tmp_directory) + # Test that the loaded protein templates are equal to the original + pd.testing.assert_frame_equal(pmb.get_templates_df(pmb_type="protein"), + new_pmb.get_templates_df(pmb_type="protein")) + + def test_io_hydrogel_templates(self): + """ + Checks that information in the pyMBE database about + hydrogel templates is stored to file and loaded back properly. + """ + pmb = pyMBE.pymbe_library(seed=42) + diamond_lattice = DiamondLattice(4, 3.5 * pmb.units.reduced_length) + lattice_builder = pmb.initialize_lattice_builder(diamond_lattice) + # Setting up node topology + indices = diamond_lattice.indices + node_topology = [] + for index in range(len(indices)): + node_topology.append({"particle_name": "X", + "lattice_index": indices[index]}) + # Setting up chain topology + node_labels = lattice_builder.node_labels + chain_labels = lattice_builder.chain_labels + reverse_node_labels = {v: k for k, v in node_labels.items()} + chain_topology = [] -# Define a bond -bond_type = 'harmonic' -generic_bond_length=0.4 * pmb.units.nm -generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') + for chain_data in chain_labels.items(): + node_label_pair = chain_data[0] + node_label_s, node_label_e = [int(x) for x in node_label_pair.strip("()").split(",")] + chain_topology.append({'node_start':reverse_node_labels[node_label_s], + 'node_end': reverse_node_labels[node_label_e], + 'molecule_name':"M1"}) -harmonic_bond = {'r_0' : generic_bond_length, - 'k' : generic_harmonic_constant, - } + pmb.define_hydrogel(name="my_hydrogel", + node_map=node_topology, + chain_map=chain_topology) + new_pmb = pyMBE.pymbe_library(23) + with tempfile.TemporaryDirectory() as tmp_directory: + # Save and load the database + pmb.save_database(tmp_directory) + new_pmb.load_database(tmp_directory) + # Test that the loaded hydrogel templates are equal to the original + pd.testing.assert_frame_equal(pmb.get_templates_df(pmb_type="hydrogel"), + new_pmb.get_templates_df(pmb_type="hydrogel")) - -pmb.define_default_bond(bond_type = bond_type, bond_parameters = harmonic_bond) -pmb.define_bond(bond_type = bond_type, - bond_parameters = harmonic_bond, - particle_pairs=[["A","A"],["B","B"]]) -bond_type = 'FENE' -FENE_bond = {'r_0' : 0.4 * pmb.units.nm, + def test_io_reaction_templates(self): + """ + Checks that information in the pyMBE database about + reaction templates is stored to file and loaded back properly. + """ + pmb = pyMBE.pymbe_library(seed=42) + path_to_pka=pmb.root / "parameters" / "pka_sets" / "Nozaki1967.json" + pmb.load_pka_set(filename=path_to_pka) + new_pmb = pyMBE.pymbe_library(23) + with tempfile.TemporaryDirectory() as tmp_directory: + # Save and load the database + pmb.save_database(tmp_directory) + new_pmb.load_database(tmp_directory) + # Test that the loaded protein templates are equal to the original + pd.testing.assert_frame_equal(pmb.get_reactions_df(), + new_pmb.get_reactions_df()) + + def test_io_instances(self): + """ + Checks that information in the pyMBE database about + instances created into espresso is stored to file and loaded back properly. + """ + pmb = pyMBE.pymbe_library(seed=42) + # Test instances of a hydrogel (tests hydrogel, molecule, residue, bond and particle instances) + pmb.define_particle(name="Z", + sigma=3.5 * pmb.units.reduced_length, + cutoff=4 * pmb.units.reduced_length, + offset=0 * pmb.units.reduced_length, + epsilon=0.2 * pmb.units.reduced_energy, + acidity="acidic", + pka=4.25) + pmb.define_particle(name="X", + sigma=3.5 * pmb.units.reduced_length, + cutoff=4 * pmb.units.reduced_length, + offset=0 * pmb.units.reduced_length, + epsilon=0.2 * pmb.units.reduced_energy, + z=1) + pmb.define_residue(name="R1", + central_bead="Z", + side_chains=["X","Z"]) + parameters = {"k": 100.0 * pmb.units.reduced_energy / (pmb.units.reduced_length**2), + "r_0": 1.0 * pmb.units.reduced_length} + pmb.define_bond(bond_type="harmonic", + bond_parameters=parameters, + particle_pairs=[["Z","Z"], + ["Z","X"], + ["X","X"]]) + pmb.define_molecule(name="M1", + residue_list=["R1"]*1) + diamond_lattice = DiamondLattice(4, 3.5 * pmb.units.reduced_length) + lattice_builder = pmb.initialize_lattice_builder(diamond_lattice) + # Setting up node topology --> Nodes are particles of type "X" + indices = diamond_lattice.indices + node_topology = [] + for index in range(len(indices)): + node_topology.append({"particle_name": "X", + "lattice_index": indices[index]}) + # Setting up chain topology --> Chains are molecules of type "M1" + node_labels = lattice_builder.node_labels + chain_labels = lattice_builder.chain_labels + reverse_node_labels = {v: k for k, v in node_labels.items()} + chain_topology = [] + for chain_data in chain_labels.items(): + node_label_pair = chain_data[0] + node_label_s, node_label_e = [int(x) for x in node_label_pair.strip("()").split(",")] + chain_topology.append({'node_start': reverse_node_labels[node_label_s], + 'node_end': reverse_node_labels[node_label_e], + 'molecule_name': "M1"}) + pmb.define_hydrogel("my_hydrogel", + node_topology, + chain_topology) + assembly_id = pmb.create_hydrogel(name="my_hydrogel", + espresso_system=espresso_system) + new_pmb = pyMBE.pymbe_library(23) + with tempfile.TemporaryDirectory() as tmp_directory: + # Save and load the database + pmb.save_database(tmp_directory) + new_pmb.load_database(tmp_directory) + # Test that the loaded instances are equal to the original + pd.testing.assert_frame_equal(pmb.get_instances_df(pmb_type="hydrogel"), + new_pmb.get_instances_df(pmb_type="hydrogel")) + pd.testing.assert_frame_equal(pmb.get_instances_df(pmb_type="molecule"), + new_pmb.get_instances_df(pmb_type="molecule")) + pd.testing.assert_frame_equal(pmb.get_instances_df(pmb_type="residues"), + new_pmb.get_instances_df(pmb_type="residues")) + pd.testing.assert_frame_equal(pmb.get_instances_df(pmb_type="bond"), + new_pmb.get_instances_df(pmb_type="bond")) + pd.testing.assert_frame_equal(pmb.get_instances_df(pmb_type="particle"), + new_pmb.get_instances_df(pmb_type="particle")) + # Clean up before the next test + pmb.delete_instances_in_system(espresso_system=espresso_system, + instance_id=assembly_id, + pmb_type="hydrogel") + # Test instances of a peptide (tests peptide, residue, bond and particle instances) + path_to_interactions=pmb.root / "parameters" / "peptides" / "Lunkad2021" + path_to_pka=pmb.root / "parameters" / "pka_sets" / "Hass2015.json" + pmb.load_database (folder=path_to_interactions) # Defines particles + pmb.load_pka_set(filename=path_to_pka) + pka_set = pmb.get_pka_set() + for particle_name in pka_set.keys(): + pmb.define_monoprototic_particle_states(particle_name=particle_name, + acidity=pka_set[particle_name]["acidity"]) + hf.define_peptide_AA_residues(sequence="KKKDDD", + model="1beadAA", + pmb=pmb) + parameters2 = {'r_0' : 0.4 * pmb.units.nm, 'k' : 400 * pmb.units('reduced_energy / reduced_length**2'), 'd_r_max': 0.8 * pmb.units.nm} + pmb.define_default_bond(bond_type="FENE", + bond_parameters=parameters2) + pmb.define_peptide(name="Peptide1", + model="1beadAA", + sequence="KKKKDDDD") + pep_ids = pmb.create_molecule(name="Peptide1", + number_of_molecules=2, + espresso_system=espresso_system, + use_default_bond=True) + new_pmb = pyMBE.pymbe_library(23) + with tempfile.TemporaryDirectory() as tmp_directory: + # Save and load the database + pmb.save_database(tmp_directory) + new_pmb.load_database(tmp_directory) + # Test that the loaded instances are equal to the original + pd.testing.assert_frame_equal(pmb.get_instances_df(pmb_type="peptide"), + new_pmb.get_instances_df(pmb_type="peptide")) + pd.testing.assert_frame_equal(pmb.get_instances_df(pmb_type="residues"), + new_pmb.get_instances_df(pmb_type="residues")) + pd.testing.assert_frame_equal(pmb.get_instances_df(pmb_type="bond"), + new_pmb.get_instances_df(pmb_type="bond")) + pd.testing.assert_frame_equal(pmb.get_instances_df(pmb_type="particle"), + new_pmb.get_instances_df(pmb_type="particle")) + # Clean up before the next test + for pepid in pep_ids: + pmb.delete_instances_in_system(espresso_system=espresso_system, + instance_id=pepid, + pmb_type="peptide") + pmb.db.delete_templates(pmb_type="particle") + pmb.db.delete_templates(pmb_type="particle_state") + pmb.db.delete_templates(pmb_type="residue") + pmb.db.delete_reactions() + # Test instances of a protein (tests protein, residue and particle instances) + path_to_protein_structure = pmb.root / "parameters" / "globular_proteins" / f"1beb.vtf", + topology_dict = pmb.read_protein_vtf (filename=path_to_protein_structure[0]) + pmb.load_pka_set(filename=path_to_pka) + # Define AA particles and residues + hf.define_protein_AA_particles(topology_dict=topology_dict, + pmb=pmb, + pka_set=pka_set) + hf.define_protein_AA_residues(topology_dict=topology_dict, + model="2beadAA", + pmb=pmb) + pmb.define_protein(name="1beb", + model="2beadAA", + sequence="KKKKKK") + prot_ids = pmb.create_protein(name="1beb", + number_of_proteins=1, + espresso_system=espresso_system, + topology_dict=topology_dict) + new_pmb = pyMBE.pymbe_library(23) + with tempfile.TemporaryDirectory() as tmp_directory: + # Save and load the database + pmb.save_database(tmp_directory) + new_pmb.load_database(tmp_directory) + # Test that the loaded instances are equal to the original + pd.testing.assert_frame_equal(pmb.get_instances_df(pmb_type="protein"), + new_pmb.get_instances_df(pmb_type="protein")) + pd.testing.assert_frame_equal(pmb.get_instances_df(pmb_type="residues"), + new_pmb.get_instances_df(pmb_type="residues")) + pd.testing.assert_frame_equal(pmb.get_instances_df(pmb_type="bond"), + new_pmb.get_instances_df(pmb_type="bond")) + pd.testing.assert_frame_equal(pmb.get_instances_df(pmb_type="particle"), + new_pmb.get_instances_df(pmb_type="particle")) + # Clean up + for protid in prot_ids: + pmb.delete_instances_in_system(espresso_system=espresso_system, + instance_id=protid, + pmb_type="protein") + -pmb.define_bond(bond_type = bond_type, - bond_parameters = FENE_bond, - particle_pairs=[["A","B"]]) - -# Solution parameters -cation_name = 'Na' -anion_name = 'Cl' -c_salt=5e-3 * pmb.units.mol/ pmb.units.L - -pmb.define_particle(name=cation_name, z=1, sigma=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) -pmb.define_particle(name=anion_name, z=-1, sigma=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) - -# System parameters -molecule_concentration = 5.56e-4 *pmb.units.mol/pmb.units.L -volume = n_molecules/(pmb.N_A*molecule_concentration) -L = volume ** (1./3.) # Side of the simulation box -espresso_system=espressomd.System (box_l = [L.to('reduced_length').magnitude]*3) -pmb.add_bonds_to_espresso(espresso_system = espresso_system) - -# Setup potential energy - -pmb.setup_lj_interactions (espresso_system=espresso_system) -pd.options.display.max_colwidth = 10 - -# Copy the pmb.df into a new DF for the unit test -stored_df = pmb.df.copy() - -with tempfile.TemporaryDirectory() as tmp_directory: - # Write the pymbe DF to a csv file - df_filename = f'{tmp_directory}/df-example_molecule.csv' - pmb.write_pmb_df (filename = df_filename) - # Read the same pyMBE df from a csv a load it in pyMBE - read_df = pmb.read_pmb_df(filename = df_filename) - # Write the pyMBE DF to a txt file - df_filename_test = f"{tmp_directory}/df-example_molecule.txt" - pmb.write_pmb_df (filename = df_filename_test) - np.testing.assert_raises(ValueError, pmb.read_pmb_df, df_filename_test) - -stored_df['node_map'] = stored_df['node_map'].astype(object) -stored_df['chain_map'] = stored_df['chain_map'].astype(object) -stored_df['l0'] = stored_df['l0'].astype(object) - -read_df['node_map'] = read_df['node_map'].astype(object) -read_df['chain_map'] = read_df['chain_map'].astype(object) -read_df['l0'] = read_df['l0'].astype(object) - -# Preprocess data for the Unit Test -# The espresso bond object must be converted to a dict in order to compare them using assert_frame_equal -stored_df['bond_object'] = stored_df['bond_object'].apply(lambda x: (x.name(), x.get_params(), x._bond_id) if pd.notnull(x) else x) -read_df['bond_object'] = read_df['bond_object'].apply(lambda x: (x.name(), x.get_params(), x._bond_id) if pd.notnull(x) else x) -print("*** Unit test: check that the dataframe stored by pyMBE to file is the same as the one read from the file (same values and variable types) ***") - -# One needs to replace the pd.NA by np.nan otherwise the comparison between pint objects fails -stored_df = stored_df.replace({pd.NA: np.nan}) -read_df = read_df.replace({pd.NA: np.nan}) - -pd.testing.assert_frame_equal(stored_df, - read_df, - rtol=1e-5) -print("*** Unit test passed***") +if __name__ == '__main__': + ut.main() -# Test that copy_df_entry raises an error if one provides a non-valid column name -print("*** Unit test: check that copy_df_entry raises an error if the entry does not exist ***") -np.testing.assert_raises(ValueError, df_management._DFManagement._copy_df_entry, df = pmb.df, name='test', column_name='non_existing_column',number_of_copies=1) -print("*** Unit test passed***") \ No newline at end of file From 81a42955b4d1ddf84deb75e7c7ec6028c2721408 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Fri, 23 Jan 2026 16:07:34 +0100 Subject: [PATCH 32/55] start refactoring protein unit test --- pyMBE/lib/handy_functions.py | 3 + pyMBE/pyMBE.py | 76 ++++++------ testsuite/globular_protein_unit_tests.py | 151 +++++++++-------------- testsuite/test_io_database.py | 2 +- 4 files changed, 99 insertions(+), 133 deletions(-) diff --git a/pyMBE/lib/handy_functions.py b/pyMBE/lib/handy_functions.py index c4eb257..743d2b3 100644 --- a/pyMBE/lib/handy_functions.py +++ b/pyMBE/lib/handy_functions.py @@ -213,6 +213,8 @@ def define_protein_AA_residues(topology_dict, model, pmb): pmb (pyMBE.pymbe_library): Instance of the pyMBE library. + Return: + (list of str): List of the defined residue names Notes: - Supported models: @@ -246,6 +248,7 @@ def define_protein_AA_residues(topology_dict, model, pmb): central_bead = central_bead, side_chains = side_chains) residue_list.append(residue_name) + return residue_list def define_peptide_AA_residues(sequence,model, pmb): """ diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 076c5d5..6adf80e 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -107,7 +107,6 @@ def __init__(self, seed, temperature=None, unit_length=None, unit_charge=None, K self.db = Manager(units=self.units) self.lattice_builder = None self.root = importlib.resources.files(__package__) - def _check_bond_inputs(self, bond_type, bond_parameters): """ @@ -289,6 +288,8 @@ def _create_hydrogel_node(self, node_index, node_name, espresso_system): self.lattice_builder.nodes[key] = node_name return node_position.tolist(), p_id[0] + + def _get_residue_list_from_sequence(self, sequence): """ Convinience function to get a `residue_list` from a protein or peptide `sequence`. @@ -305,6 +306,30 @@ def _get_residue_list_from_sequence(self, sequence): residue_list.append(residue_name) return residue_list + def _get_template_type(self, name, allowed_types): + """ + Validate that a template name resolves unambiguously to exactly one + allowed pmb_type in the pyMBE database and return it. + + + Args: + name(str): Name of the template to validate. + allowed_types(set[str]): Set of allowed pmb_type values (e.g. {"molecule", "peptide"}). + + Returns: + str: Resolved pmb_type. + + Notes: + This method does *not* return the template itself, only the validated pmb_type. + """ + registered_pmb_types_with_name = self.db._find_template_types(name=name) + filtered_types = allowed_types.intersection(registered_pmb_types_with_name) + if len(filtered_types) > 1: + raise ValueError(f"Ambiguous template name '{name}': found both 'molecule' and 'peptide' templates in the pyMBE database. Molecule creation aborted.") + if len(filtered_types) == 0: + raise ValueError(f"No 'molecule' or 'peptide' template found with name '{name}'. Found templates of types: {filtered_types}.") + return next(iter(filtered_types)) + def _delete_particles_from_espresso(self, particle_ids, espresso_system): """ Remove a list of particles from an ESPResSo simulation system. @@ -328,7 +353,7 @@ def _delete_particles_from_espresso(self, particle_ids, espresso_system): espresso_system.part.by_id(pid).remove() - def calculate_center_of_mass_of_molecule(self, molecule_id, espresso_system): + def calculate_center_of_mass_of_molecule(self, molecule_id, pmb_type, espresso_system): """ Calculates the center of the molecule with a given molecule_id. @@ -795,15 +820,8 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi if len(list_of_first_residue_positions) != number_of_molecules: raise ValueError(f"Number of positions provided in {list_of_first_residue_positions} does not match number of molecules desired, {number_of_molecules}") - # Sanity tests, this function should work for both molecules and peptides - registered_pmb_types_with_name = self.db._find_template_types(name=name) - allowed_types = {"molecule", "peptide"} - filtered_types = allowed_types.intersection(registered_pmb_types_with_name) - if len(filtered_types) > 1: - raise ValueError(f"Ambiguous template name '{name}': found both 'molecule' and 'peptide' templates in the pyMBE database. Molecule creation aborted.") - if len(filtered_types) == 0: - raise ValueError(f"No 'molecule' or 'peptide' template found with name '{name}'. Found templates of types: {filtered_types}.") - pmb_type = next(iter(filtered_types)) + pmb_type = self._get_template_type(name=name, + allowed_types={"molecule", "peptide"}) # Generate an arbitrary random unit vector if backbone_vector is None: backbone_vector = self.generate_random_points_in_a_sphere(center=[0,0,0], @@ -1082,15 +1100,8 @@ def create_residue(self, name, espresso_system, central_bead_position=None,use_d side_chain_list = res_tpl.side_chains side_chain_beads_ids = [] for side_chain_name in side_chain_list: - pmb_type_list = self.db._find_template_types(name=side_chain_name) - allowed_types = {"particle", "residue"} - filtered_types = allowed_types.intersection(pmb_type_list) - if len(filtered_types) > 1: - raise KeyError(f"Ambiguous template name '{side_chain_name}': found both 'particle' and 'residue' templates in the pyMBE database. Residue creation aborted.") - if len(filtered_types) == 0: - raise KeyError( - f"No 'particle' or 'residue' template found with name '{side_chain_name}'. Found templates of types: {set(pmb_type_list)}.") - pmb_type = next(iter(filtered_types)) + pmb_type = self._get_template_type(name=side_chain_name, + allowed_types={"particle", "residue"}) if pmb_type == 'particle': lj_parameters = self.get_lj_parameters(particle_name1=central_bead_name, particle_name2=side_chain_name) @@ -2134,24 +2145,22 @@ def read_protein_vtf(self,filename,unit_length=None): Loads a coarse-grained protein model in a VTF file `filename`. Args: - filename(`str`): Path to the VTF file with the coarse-grained model. - unit_length(`obj`): unit of length of the the coordinates in `filename` using the pyMBE UnitRegistry. Defaults to None. + filename(str): Path to the VTF file with the coarse-grained model. + unit_length(obj): unit of length of the the coordinates in `filename` using the pyMBE UnitRegistry. Defaults to None. Returns: - topology_dict(`dict`): {'initial_pos': coords_list, 'chain_id': id, 'sigma': sigma_value} + topology_dict(dict): {'initial_pos': coords_list, 'chain_id': id, 'sigma': sigma_value} + sequence(str): Amino acid sequence, following the one letter code convection. Note: - If no `unit_length` is provided, it is assumed that the coordinates are in Angstrom. """ logging.info(f'Loading protein coarse grain model file: {filename}') - coord_list = [] particles_dict = {} - if unit_length is None: unit_length = 1 * self.units.angstrom - with open (filename,'r') as protein_model: for line in protein_model : line_split = line.split() @@ -2168,26 +2177,21 @@ def read_protein_vtf(self,filename,unit_length=None): atom_coord = line_split[1:] atom_coord = [(float(i)*unit_length).to('reduced_length').magnitude for i in atom_coord] coord_list.append (atom_coord) - numbered_label = [] - i = 0 - + i = 0 + sequence = "" for atom_id in particles_dict.keys(): - if atom_id == 1: atom_name = particles_dict[atom_id][0] numbered_name = [f'{atom_name}{i}',particles_dict[atom_id][2],particles_dict[atom_id][3]] numbered_label.append(numbered_name) - elif atom_id != 1: - if particles_dict[atom_id-1][1] != particles_dict[atom_id][1]: i += 1 count = 1 atom_name = particles_dict[atom_id][0] numbered_name = [f'{atom_name}{i}',particles_dict[atom_id][2],particles_dict[atom_id][3]] numbered_label.append(numbered_name) - elif particles_dict[atom_id-1][1] == particles_dict[atom_id][1]: if count == 2 or particles_dict[atom_id][1] == 'GLY': i +=1 @@ -2196,15 +2200,15 @@ def read_protein_vtf(self,filename,unit_length=None): numbered_name = [f'{atom_name}{i}',particles_dict[atom_id][2],particles_dict[atom_id][3]] numbered_label.append(numbered_name) count +=1 - + if atom_name not in ["CA", "Ca"]: + sequence += atom_name topology_dict = {} - for i in range (0, len(numbered_label)): topology_dict [numbered_label[i][0]] = {'initial_pos': coord_list[i] , 'chain_id':numbered_label[i][1], 'radius':numbered_label[i][2] } - return topology_dict + return topology_dict, sequence def save_database(self, folder, format='csv'): """ diff --git a/testsuite/globular_protein_unit_tests.py b/testsuite/globular_protein_unit_tests.py index 289c695..f75996d 100644 --- a/testsuite/globular_protein_unit_tests.py +++ b/testsuite/globular_protein_unit_tests.py @@ -17,13 +17,12 @@ # along with this program. If not, see . import numpy as np import espressomd -import pyMBE + import re import json import pathlib -from pint import UnitRegistry - -ureg = UnitRegistry() +import pyMBE +import pyMBE.lib.handy_functions as hf # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(seed=42) @@ -35,17 +34,13 @@ def custom_deserializer(dct): if "value" in dct and "unit" in dct: - return ureg.Quantity(dct["value"], dct["unit"]) + return pmb.units.Quantity(dct["value"], dct["unit"]) return dct protein_pdb = '1f6s' - -print("*** Unit test: check that read_protein_vtf_in_df() loads the protein topology correctly ***") - path_to_parfile = pathlib.Path(__file__).parent / "tests_data" / "protein_topology_dict.json" path_to_cg=pmb.root / "parameters" / "globular_proteins" / f"{protein_pdb}.vtf" -topology_dict = pmb.read_protein_vtf_in_df (filename=path_to_cg) - +topology_dict, sequence = pmb.read_protein_vtf (filename=path_to_cg) with open (path_to_parfile, "r") as file: load_json = json.load(file,object_hook=custom_deserializer) @@ -54,101 +49,77 @@ def custom_deserializer(dct): desired= load_json, verbose = True) -print("*** Unit test passed ***") - - -print("*** Unit test: check that define_protein() defines the aminoacids in the protein correctly ***") - protein_model = '2beadAA' -pmb.define_protein (name=protein_pdb, - topology_dict=topology_dict, - model = protein_model, - lj_setup_mode = "wca") -sequence = [] -clean_sequence= [] - +hf.define_protein_AA_particles(topology_dict=topology_dict, + pmb=pmb, + pka_set={}) +residue_list = hf.define_protein_AA_residues(topology_dict=topology_dict, + model=protein_model, + pmb=pmb) +pmb.define_protein (name=protein_pdb, + sequence=sequence, + model = protein_model) +clean_sequence= "" +full_residue_list = [] for aminoacid in topology_dict.keys(): - - input_parameters=topology_dict[aminoacid] residue_name = re.split(r'\d+', aminoacid)[0] - sequence.append(residue_name) - - if residue_name not in ['CA', 'n', 'c','Ca']: - clean_sequence.append(residue_name) - -residue_list = pmb.define_AA_residues(sequence= clean_sequence, - model = protein_model) + if residue_name not in ['CA', 'Ca']: + clean_sequence+=residue_name + full_residue_list.append(f"AA-{residue_name}") -for residue in residue_list: - for index in pmb.df[pmb.df['name']==residue].index: - np.testing.assert_equal(actual=str(pmb.df.loc[index, "pmb_type"].values[0]), - desired="residue", - verbose=True) - -protein_index = pmb.df[pmb.df['name']==protein_pdb].index +residue_dict = hf.get_residues_from_topology_dict(topology_dict=topology_dict, + model=protein_model) -np.testing.assert_equal(actual=str(pmb.df.loc[protein_index, "name"].values[0]), - desired=protein_pdb, - verbose=True) +# Check residue templates +for residue_name in residue_list: + residue_template = pmb.db.get_template(name=residue_name, + pmb_type="residue") + assert residue_template is not None + assert residue_template.pmb_type == "residue" + assert residue_template.name == residue_name -np.testing.assert_equal(actual=pmb.df.loc[protein_index, ('sequence','')].values[0], - desired=clean_sequence, - verbose=True) +# Check protein template +protein_template = pmb.db.get_template(name=protein_pdb, + pmb_type="protein") -np.testing.assert_equal(actual=pmb.df.loc[protein_index, ('residue_list','')].values[0], - desired=residue_list, - verbose=True) +assert protein_template is not None +assert protein_template.name == protein_pdb -print("*** Unit test passed ***") - -print("*** Unit test: check that define_protein() raises a ValueError if the user provides a wrong model") +np.testing.assert_equal(actual=protein_template.sequence, + desired=clean_sequence, + verbose=True) -input_parameters={"name": protein_pdb, - "topology_dict": topology_dict, - "model" : "3beadAA", - "lj_setup_mode": "wca"} +np.testing.assert_equal(actual=protein_template.residue_list, + desired=full_residue_list, + verbose=True) -np.testing.assert_raises(ValueError, pmb.define_protein, **input_parameters) input_parameters={"name": protein_pdb, - "topology_dict": topology_dict, - "model" : protein_model, - "lj_setup_mode": "awc"} + "sequence": sequence, + "model" : "3beadAA"} np.testing.assert_raises(ValueError, pmb.define_protein, **input_parameters) print("*** Unit test passed ***") - -print("*** Unit test: check that create_protein() creates all the particles in the protein into the espresso_system with the properties defined in pmb.df ***") - espresso_system=espressomd.System(box_l = [Box_L.to('reduced_length').magnitude] * 3) -# Here we upload the pka set from the reference_parameters folder -path_to_pka=pmb.root / "parameters" / "pka_sets" / "Nozaki1967.json" -pmb.load_pka_set(filename=path_to_pka) - -pmb.create_protein(name=protein_pdb, - number_of_proteins=1, - espresso_system=espresso_system, - topology_dict=topology_dict) - -residue_id_list = pmb.df.loc[~pmb.df['molecule_id'].isna()].residue_id.dropna().to_list() - -particle_id_list = pmb.df.loc[~pmb.df['molecule_id'].isna()].particle_id.dropna().to_list() - -molecule_id = pmb.df.loc[pmb.df['name']==protein_pdb].molecule_id.values[0] +molecule_id = pmb.create_protein(name=protein_pdb, + number_of_proteins=1, + espresso_system=espresso_system, + topology_dict=topology_dict)[0] -center_of_mass_es = pmb.calculate_center_of_mass_of_molecule ( molecule_id=molecule_id,espresso_system=espresso_system) +particle_id_list = pmb.get_particle_id_map(object_name=protein_pdb)["all"] +center_of_mass_es = pmb.calculate_center_of_mass_of_molecule (molecule_id=molecule_id, + espresso_system=espresso_system) center_of_mass = np.zeros(3) axis_list = [0,1,2] for aminoacid in topology_dict.keys(): initial_pos = topology_dict[aminoacid]['initial_pos'] - for axis in axis_list: center_of_mass[axis] += initial_pos[axis] center_of_mass = center_of_mass/ len(topology_dict.keys()) @@ -161,13 +132,16 @@ def custom_deserializer(dct): initial_pos_es = espresso_system.part.by_id(id).pos charge = espresso_system.part.by_id(id).q es_type = espresso_system.part.by_id(id).type + part_inst = pmb.db.get_instance(instance_id=id, + pmb_type="particle") + residue_id = part_inst.residue_id + res_inst = pmb.db.get_instance(instance_id=residue_id, + pmb_type="residue") + residue_name = res_inst.name - residue_id = pmb.df.loc[pmb.df['particle_id']==id].residue_id.values[0] - residue_name = pmb.df.loc[pmb.df['particle_id']==id].name.values[0] initial_pos = topology_dict[f"{residue_name}{residue_id}"]['initial_pos'] - index = pmb.df.loc[pmb.df['particle_id']==id].index - + for axis in axis_list: distance_es[axis] = (initial_pos_es[axis] - center_of_mass_es[axis])**2 distance_topology[axis] = (initial_pos[axis] - center_of_mass[axis])**2 @@ -179,18 +153,6 @@ def custom_deserializer(dct): desired=relative_distance, verbose=True) - np.testing.assert_equal(actual=charge, - desired=pmb.df.loc[index, ("state_one","z")].values[0], - verbose=True) - - np.testing.assert_equal(actual=es_type, - desired=pmb.df.loc[index, ("state_one","es_type")].values[0], - verbose=True) - -print("*** Unit test passed ***") - -print("*** Unit test: check that create_protein() does not create any protein for number_of_proteins <= 0 ***") - starting_number_of_particles=len(espresso_system.part.all()) pmb.create_protein(name=protein_pdb, @@ -207,9 +169,6 @@ def custom_deserializer(dct): desired=starting_number_of_particles, verbose=True) -print("*** Unit test passed ***") - -print("*** Unit test: check that enable_motion_of_rigid_object does the setup correctly ***") positions = [] for pid in particle_id_list: diff --git a/testsuite/test_io_database.py b/testsuite/test_io_database.py index 89c50ec..0ee1613 100644 --- a/testsuite/test_io_database.py +++ b/testsuite/test_io_database.py @@ -382,7 +382,7 @@ def test_io_instances(self): pmb.db.delete_reactions() # Test instances of a protein (tests protein, residue and particle instances) path_to_protein_structure = pmb.root / "parameters" / "globular_proteins" / f"1beb.vtf", - topology_dict = pmb.read_protein_vtf (filename=path_to_protein_structure[0]) + topology_dict, _ = pmb.read_protein_vtf (filename=path_to_protein_structure[0]) pmb.load_pka_set(filename=path_to_pka) # Define AA particles and residues hf.define_protein_AA_particles(topology_dict=topology_dict, From ddcbab32c1feafb0d8580385c0bf81d97996c57b Mon Sep 17 00:00:00 2001 From: Pablo Date: Sat, 24 Jan 2026 13:52:06 +0100 Subject: [PATCH 33/55] fix protein framework and unit test --- pyMBE/lib/handy_functions.py | 10 +- pyMBE/pyMBE.py | 803 ++++++++++++--------- testsuite/bond_tests.py | 6 +- testsuite/create_molecule_position_test.py | 27 +- testsuite/globular_protein_unit_tests.py | 650 +++++++---------- testsuite/parameter_test.py | 6 +- 6 files changed, 780 insertions(+), 722 deletions(-) diff --git a/pyMBE/lib/handy_functions.py b/pyMBE/lib/handy_functions.py index 743d2b3..0eda6ef 100644 --- a/pyMBE/lib/handy_functions.py +++ b/pyMBE/lib/handy_functions.py @@ -186,7 +186,7 @@ def define_protein_AA_particles(topology_dict, pmb, pka_set, lj_setup_mode="wca pmb.define_particle(**part_dict) defined_particles.append(particle_name) -def define_protein_AA_residues(topology_dict, model, pmb): +def define_protein_AA_residues(sequence, model, pmb): """ Define residue templates in the pyMBE database for a protein topology dict. @@ -226,12 +226,8 @@ def define_protein_AA_residues(topology_dict, model, pmb): - Residue names are constructed as `"AA-"`, e.g., `"AA-A"`, `"AA-L"`. """ - residue_list = [] - residues = get_residues_from_topology_dict(topology_dict=topology_dict, - model=model) - for res_id in residues.keys(): - item = residues[res_id]["resname"] + for item in sequence: if model == '1beadAA': central_bead = item side_chains = [] @@ -247,7 +243,7 @@ def define_protein_AA_residues(topology_dict, model, pmb): pmb.define_residue(name = residue_name, central_bead = central_bead, side_chains = side_chains) - residue_list.append(residue_name) + residue_list.append(residue_name) return residue_list def define_peptide_AA_residues(sequence,model, pmb): diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 6adf80e..60070d6 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -54,43 +54,49 @@ class pymbe_library(): """ - Core library for the Molecular Builder for ESPResSo (pyMBE). - - Provides access to functions to define templates to build coarse-grained models of macromolecules. + Core library of the Molecular Builder for ESPResSo (pyMBE). Attributes: - N_A (pint.Quantity): Avogadro number. - kB (pint.Quantity): Boltzmann constant. - e (pint.Quantity): Elementary charge. - kT (pint.Quantity): Thermal energy at the set temperature. - Kw (pint.Quantity): Ionic product of water. Used in G-RxMC method setup. - db (Manager): Database manager instance for pyMBE objects. - rng (np.random.Generator): Random number generator initialized with the provided seed. - units (pint.UnitRegistry): Pint UnitRegistry for unit-aware calculations. - lattice_builder (optional): Placeholder for lattice builder object, initially None. - root (importlib.resources.Path): Root path to the pyMBE package resources. + N_A ('pint.Quantity'): + Avogadro number. + kB ('pint.Quantity'): + Boltzmann constant. + e ('pint.Quantity'): + Elementary charge. + kT ('pint.Quantity'): + Thermal energy corresponding to the set temperature. + Kw ('pint.Quantity'): + Ionic product of water, used in G-RxMC and Donnan-related calculations. + db ('Manager'): + Database manager holding all pyMBE templates, instances and reactions. + rng ('numpy.random.Generator'): + Random number generator initialized with the provided seed. + units ('pint.UnitRegistry'): + Pint unit registry used for unit-aware calculations. + lattice_builder: + Optional lattice builder object (initialized as ``None``). + root ('importlib.resources.abc.Traversable'): + Root path to the pyMBE package resources. """ def __init__(self, seed, temperature=None, unit_length=None, unit_charge=None, Kw=None): """ - Initialize the pyMBE library. - - Sets up the reduced unit system using temperature, unit length, and unit charge, - initializes the pyMBE database, and sets default physical constants. + Initializes the pyMBE library. Args: - seed (int): Seed for the random number generator. - temperature (pint.Quantity, optional): Simulation temperature. Defaults to 298.15 K if None. - unit_length (pint.Quantity, optional): Reference length for reduced units. Defaults to 0.355 nm if None. - unit_charge (pint.Quantity, optional): Reference charge for reduced units. Defaults to 1 elementary charge if None. - Kw (pint.Quantity, optional): Ionic product of water in mol^2/l^2. Defaults to 1e-14 mol^2/l^2 if None. - - Notes: - - Initializes `self.rng` for random number generation. - - Sets fundamental constants: Avogadro number (`N_A`), Boltzmann constant (`kB`), elementary charge (`e`). - - Initializes the reduced units via `set_reduced_units`. - - Prepares an empty database (`self.db`) for pyMBE objects. - - Initializes placeholders for `lattice_builder` and package resource path (`root`). + seed ('int'): + Seed for the random number generator. + temperature ('pint.Quantity', optional): + Simulation temperature. If ``None``, defaults to 298.15 K. + unit_length ('pint.Quantity', optional): + Reference length for reduced units. If ``None``, defaults to + 0.355 nm. + unit_charge ('pint.Quantity', optional): + Reference charge for reduced units. If ``None``, defaults to + one elementary charge. + Kw ('pint.Quantity', optional): + Ionic product of water (typically in mol²/L²). If ``None``, + defaults to 1e-14 mol²/L². """ # Seed and RNG self.seed=seed @@ -115,8 +121,6 @@ def _check_bond_inputs(self, bond_type, bond_parameters): Args: bond_type(`str`): label to identify the potential to model the bond. bond_parameters(`dict`): parameters of the potential of the bond. - - """ valid_bond_types = ["harmonic", "FENE"] if bond_type not in valid_bond_types: @@ -147,6 +151,21 @@ def _check_dimensionality(self, variable, expected_dimensionality): raise ValueError(f"The variable {variable} should have a dimensionality of {expected_dimensionality}, instead the variable has a dimensionality of {variable.dimensionality}") return correct_dimensionality + def _check_pka_set(self, pka_set): + """ + Checks that `pka_set` has the formatting expected by pyMBE. + + Args: + pka_set (`dict`): + {"name" : {"pka_value": pka, "acidity": acidity}} + """ + required_keys=['pka_value','acidity'] + for required_key in required_keys: + for pka_name, pka_entry in pka_set.items(): + if required_key not in pka_entry: + raise ValueError(f'missing a required key "{required_key}" in entry "{pka_name}" of pka_set ("{pka_entry}")') + return + def _create_espresso_bond_instance(self, bond_type, bond_parameters): """ Creates an ESPResSo bond instance. @@ -188,18 +207,24 @@ def _create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system, use_def Creates a chain between two nodes of a hydrogel. Args: - hydrogel_chain(HydrogelChain): template of a hydrogel chain - nodes(dict): {node_index: {"name": node_particle_name, "pos": node_position, "id": node_particle_instance_id}} - espresso_system (espressomd.system.System): ESPResSo system object where the hydrogel chain will be created. - use_default_bond (bool, optional): If True, use a default bond template if no specific template exists. Defaults to False. + hydrogel_chain ('HydrogelChain'): + template of a hydrogel chain + nodes ('dict'): + {node_index: {"name": node_particle_name, "pos": node_position, "id": node_particle_instance_id}} + + espresso_system ('espressomd.system.System'): + ESPResSo system object where the hydrogel chain will be created. + + use_default_bond ('bool', optional): + If True, use a default bond template if no specific template exists. Defaults to False. Return: - (int): molecule_id of the created hydrogel chian. + ('int'): + molecule_id of the created hydrogel chian. Note: - - For example, if the chain is defined between node_start = ``[0 0 0]`` and node_end = ``[1 1 1]``, the chain will be placed between these two nodes. + - If the chain is defined between node_start = ``[0 0 0]`` and node_end = ``[1 1 1]``, the chain will be placed between these two nodes. - The chain will be placed in the direction of the vector between `node_start` and `node_end`. - - This function does not support default bonds. """ if self.lattice_builder is None: raise ValueError("LatticeBuilder is not initialized. Use `initialize_lattice_builder` first.") @@ -288,17 +313,37 @@ def _create_hydrogel_node(self, node_index, node_name, espresso_system): self.lattice_builder.nodes[key] = node_name return node_position.tolist(), p_id[0] - + def _get_label_id_map(self, pmb_type): + """ + Returns the key used to access the particle ID map for a given pyMBE object type. + + Args: + pmb_type (`str`): + pyMBE object type for which the particle ID map label is requested. + + Returns: + `str`: + Label identifying the appropriate particle ID map. + """ + if pmb_type in self.db._assembly_like_types: + label="assembly_map" + elif pmb_type in self.db._molecule_like_types: + label="molecule_map" + else: + label=f"{pmb_type}_map" + return label def _get_residue_list_from_sequence(self, sequence): """ - Convinience function to get a `residue_list` from a protein or peptide `sequence`. + Convenience function to get a `residue_list` from a protein or peptide `sequence`. Args: - sequence(`lst`): Sequence of the peptide or protein. + sequence (`lst`): + Sequence of the peptide or protein. Returns: - residue_list(`list` of `str`): List of the `name`s of the `residue`s in the sequence of the `molecule`. + residue_list (`list` of `str`): + List of the `name`s of the `residue`s in the sequence of the `molecule`. """ residue_list = [] for item in sequence: @@ -311,16 +356,19 @@ def _get_template_type(self, name, allowed_types): Validate that a template name resolves unambiguously to exactly one allowed pmb_type in the pyMBE database and return it. - Args: - name(str): Name of the template to validate. - allowed_types(set[str]): Set of allowed pmb_type values (e.g. {"molecule", "peptide"}). + name ('str'): + Name of the template to validate. + + allowed_types ('set[str]'): + Set of allowed pmb_type values (e.g. {"molecule", "peptide"}). Returns: - str: Resolved pmb_type. + str: + Resolved pmb_type. Notes: - This method does *not* return the template itself, only the validated pmb_type. + - This method does *not* return the template itself, only the validated pmb_type. """ registered_pmb_types_with_name = self.db._find_template_types(name=name) filtered_types = allowed_types.intersection(registered_pmb_types_with_name) @@ -335,41 +383,51 @@ def _delete_particles_from_espresso(self, particle_ids, espresso_system): Remove a list of particles from an ESPResSo simulation system. Args: - particle_ids : Iterable[int] - A list (or other iterable) of ESPResSo particle IDs to remove. + particle_ids ('Iterable[int]'): + A list (or other iterable) of ESPResSo particle IDs to remove. - espresso_system : espressomd.system.System - The ESPResSo simulation system from which the particles - will be removed. + espresso_system ('espressomd.system.System'): + The ESPResSo simulation system from which the particles + will be removed. - Note: - - This method removes particles only from the ESPResSo simulation, - **not** from the pyMBE database. Database cleanup must be handled - separately by the caller. - - Attempting to remove a non-existent particle ID will raise - an ESPResSo error. + Notes: + - This method removes particles only from the ESPResSo simulation, + **not** from the pyMBE database. Database cleanup must be handled + separately by the caller. + - Attempting to remove a non-existent particle ID will raise + an ESPResSo error. """ for pid in particle_ids: espresso_system.part.by_id(pid).remove() - - def calculate_center_of_mass_of_molecule(self, molecule_id, pmb_type, espresso_system): + def calculate_center_of_mass(self, instance_id, pmb_type, espresso_system): """ - Calculates the center of the molecule with a given molecule_id. + Calculates the center of mass of a pyMBE object instance in an ESPResSo system. Args: - molecule_id(`int`): Id of the molecule whose center of mass is to be calculated. - espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library. - + instance_id ('int'): + pyMBE instance ID of the object whose center of mass is calculated. + pmb_type ('str'): + Type of the pyMBE object. Must correspond to a particle-aggregating + template type (e.g. `"molecule"`, `"residue"`, `"peptide"`, `"protein"`). + espresso_system ('espressomd.system.System'): + ESPResSo system containing the particle instances. + Returns: - center_of_mass(`lst`): Coordinates of the center of mass. + center_of_mass ('numpy.ndarray'): + Array of shape `(3,)` containing the Cartesian coordinates of the + center of mass. + + Notes: + - This method assumes equal mass for all particles. + - Periodic boundary conditions are *not* unfolded; positions are taken + directly from ESPResSo particle coordinates. """ center_of_mass = np.zeros(3) axis_list = [0,1,2] - mol_inst = self.db.get_instance(pmb_type="molecule", - instance_id=molecule_id) - molecule_name = mol_inst.name - particle_id_list = self.get_particle_id_map(object_name=molecule_name)["all"] + inst = self.db.get_instance(pmb_type=pmb_type, + instance_id=instance_id) + particle_id_list = self.get_particle_id_map(object_name=inst.name)["all"] for pid in particle_id_list: for axis in axis_list: center_of_mass [axis] += espresso_system.part.by_id(pid).pos[axis] @@ -381,24 +439,24 @@ def calculate_HH(self, template_name, pH_list=None, pka_set=None): Calculates the charge in the template object according to the ideal Henderson–Hasselbalch titration curve. Args: - template_name (str): + template_name ('str'): Name of the template. - pH_list (list[float], optional): + pH_list ('list[float]', optional): pH values at which the charge is evaluated. Defaults to 50 values between 2 and 12. - pka_set (dict, optional): + pka_set ('dict', optional): Mapping: - {particle_name: {"pka_value": float, "acidity": "acidic"|"basic"}} + {particle_name: {"pka_value": 'float', "acidity": "acidic"|"basic"}} Returns: - list[float]: + 'list[float]': Net molecular charge at each pH value. """ if pH_list is None: pH_list = np.linspace(2, 12, 50) if pka_set is None: pka_set = self.get_pka_set() - self.check_pka_set(pka_set=pka_set) + self._check_pka_set(pka_set=pka_set) particle_counts = self.db.get_particle_templates_under(template_name=template_name, return_counts=True) if not particle_counts: @@ -432,29 +490,59 @@ def formal_charge(particle_name): def calculate_HH_Donnan(self, c_macro, c_salt, pH_list=None, pka_set=None): """ - Calculates the charge on the different molecules according to the Henderson-Hasselbalch equation coupled to the Donnan partitioning. + Computes macromolecular charges using the Henderson–Hasselbalch equation + coupled to ideal Donnan partitioning. Args: - c_macro('dict'): {"name": concentration} - A dict containing the concentrations of all charged macromolecular species in the system. - c_salt('float'): Salt concentration in the reservoir. - pH_list('lst'): List of pH-values in the reservoir. - pka_set('dict'): {"name": {"pka_value": pka, "acidity": acidity}}. + c_macro (`dict`): + Mapping of macromolecular species names to their concentrations + in the system: + `{molecule_name: concentration}`. + Concentrations must carry units compatible with molar concentration. + c_salt (`float` or `pint.Quantity`): + Salt concentration in the reservoir. + pH_list (`list[float]`, optional): + List of pH values in the reservoir at which the calculation is + performed. If `None`, 50 equally spaced values between 2 and 12 + are used. + pka_set (`dict`, optional): + Dictionary defining the acid–base properties of titratable particle + types: + `{particle_name: {"pka_value": float, "acidity": "acidic" | "basic"}}`. + If `None`, the pKa set is taken from the pyMBE database. Returns: - {"charges_dict": {"name": charges}, "pH_system_list": pH_system_list, "partition_coefficients": partition_coefficients_list} - pH_system_list ('lst'): List of pH_values in the system. - partition_coefficients_list ('lst'): List of partition coefficients of cations. + `dict`: + Dictionary containing: + - `"charges_dict"` (`dict`): + Mapping `{molecule_name: list}` of Henderson–Hasselbalch–Donnan + charges evaluated at each pH value. + - `"pH_system_list"` (`list[float]`): + Effective pH values inside the system phase after Donnan + partitioning. + - `"partition_coefficients"` (`list[float]`): + Partition coefficients of monovalent cations at each pH value. - Note: - - If no `pH_list` is given, 50 equispaced pH-values ranging from 2 to 12 are calculated - - If no `pka_set` is given, the pKa values are taken from `pmb.df` - - If `c_macro` does not contain all charged molecules in the system, this function is likely to provide the wrong result. + Raises: + ValueError: + If the provided `pka_set` is invalid or inconsistent. + + Notes: + - This method assumes **ideal Donnan equilibrium** and **monovalent salt**. + - The ionic strength of the reservoir includes both salt and + pH-dependent H⁺/OH⁻ contributions. + - All charged macromolecular species present in the system must be + included in `c_macro`; missing species will lead to incorrect results. + - The nonlinear Donnan equilibrium equation is solved using a scalar + root finder (`brentq`) in logarithmic form for numerical stability. + - This method is intended for **two-phase systems**; for single-phase + systems use `calculate_HH` instead. """ if pH_list is None: pH_list=np.linspace(2,12,50) if pka_set is None: pka_set=self.get_pka_set() - self.check_pka_set(pka_set=pka_set) + self._check_pka_set(pka_set=pka_set) partition_coefficients_list = [] pH_system_list = [] Z_HH_Donnan={} @@ -528,12 +616,7 @@ def calculate_net_charge(self,espresso_system,object_name,pmb_type,dimensionless {"mean": mean_net_charge, "instances": {instance_id: net_charge}} """ id_map = self.get_particle_id_map(object_name=object_name) - if pmb_type in self.db._assembly_like_types: - label="assembly_map" - elif pmb_type in self.db._molecule_like_types: - label="molecule_map" - else: - label=f"{pmb_type}_map" + label = self._get_label_id_map(pmb_type=pmb_type) instance_map = id_map[label] charges = {} for instance_id, particle_ids in instance_map.items(): @@ -554,44 +637,40 @@ def calculate_net_charge(self,espresso_system,object_name,pmb_type,dimensionless mean_charge = (np.mean([q.magnitude for q in charges.values()])* self.units.Quantity(1, "reduced_charge")) return {"mean": mean_charge, "instances": charges} - def center_molecule_in_simulation_box(self, molecule_id, espresso_system, pmb_type="molecule"): + def center_object_in_simulation_box(self, instance_id, espresso_system, pmb_type): """ - Centers the pmb object matching `molecule_id` in the center of the simulation box in `espresso_md`. - + Centers a pyMBE object instance in the simulation box of an ESPResSo system. + The object is translated such that its center of mass coincides with the + geometric center of the ESPResSo simulation box. + Args: - molecule_id(`int`): Id of the molecule to be centered. - espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library. + instance_id (`int`): + ID of the pyMBE object instance to be centered. + + pmb_type (`str`): + Type of the pyMBE object. + + espresso_system (`espressomd.system.System`): + ESPResSo system object in which the particles are defined. + + Notes: + - Works for both cubic and non-cubic simulation boxes. """ if pmb_type not in self.db._molecule_like_types: raise ValueError(f"Input pmb_type = {pmb_type} not supported, supported pyMBE types are: {self.db._molecule_like_types}.") - mol_inst = self.db.get_instance(instance_id=molecule_id, - pmb_type=pmb_type) - center_of_mass = self.calculate_center_of_mass_of_molecule(molecule_id=molecule_id, - espresso_system=espresso_system) + inst = self.db.get_instance(instance_id=instance_id, + pmb_type=pmb_type) + center_of_mass = self.calculate_center_of_mass(instance_id=instance_id, + espresso_system=espresso_system, + pmb_type=pmb_type) box_center = [espresso_system.box_l[0]/2.0, espresso_system.box_l[1]/2.0, espresso_system.box_l[2]/2.0] - particle_id_list = self.get_particle_id_map(object_name=mol_inst.name)["all"] + particle_id_list = self.get_particle_id_map(object_name=inst.name)["all"] for pid in particle_id_list: es_pos = espresso_system.part.by_id(pid).pos espresso_system.part.by_id(pid).pos = es_pos - center_of_mass + box_center - - - def check_pka_set(self, pka_set): - """ - Checks that `pka_set` has the formatting expected by the pyMBE library. - - Args: - pka_set(`dict`): {"name" : {"pka_value": pka, "acidity": acidity}} - """ - required_keys=['pka_value','acidity'] - for required_key in required_keys: - for pka_name, pka_entry in pka_set.items(): - if required_key not in pka_entry: - raise ValueError(f'missing a required key "{required_key}" in entry "{pka_name}" of pka_set ("{pka_entry}")') - return - def create_added_salt(self, espresso_system, cation_name, anion_name, c_salt): """ Creates a `c_salt` concentration of `cation_name` and `anion_name` ions into the `espresso_system`. @@ -1008,9 +1087,6 @@ def create_protein(self, name, number_of_proteins, espresso_system, topology_dic protein_tpl = self.db.get_template(pmb_type="protein", name=name) box_half = espresso_system.box_l[0] / 2.0 - - residues = hf.get_residues_from_topology_dict(topology_dict=topology_dict, - model=protein_tpl.model) # Create protein mol_ids = [] for _ in range(number_of_proteins): @@ -1021,6 +1097,8 @@ def create_protein(self, name, number_of_proteins, espresso_system, topology_dic max_dist=box_half, n_samples=1, center=[box_half]*3)[0] + residues = hf.get_residues_from_topology_dict(topology_dict=topology_dict, + model=protein_tpl.model) # CREATE RESIDUES + PARTICLES for _, rdata in residues.items(): base_resname = rdata["resname"] @@ -1031,7 +1109,6 @@ def create_protein(self, name, number_of_proteins, espresso_system, topology_dic self.db._register_instance(ResidueInstance(name=residue_name, residue_id=residue_id, molecule_id=molecule_id)) - # PARTICLE CREATION for bead_id in rdata["beads"]: bead_type = re.split(r'\d+', bead_id)[0] @@ -1042,7 +1119,6 @@ def create_protein(self, name, number_of_proteins, espresso_system, topology_dic number_of_particles=1, position=[absolute_pos], fix=True)[0] - # update metadata self.db._update_instance(instance_id=particle_id, pmb_type="particle", @@ -1516,158 +1592,185 @@ def delete_instances_in_system(self, instance_id, pmb_type, espresso_system): particle_ids = self.db._find_instance_ids_by_attribute(pmb_type="particle", attribute=instance_identifier, value=instance_id) - self._delete_particles_from_espresso(particle_ids=particle_ids, espresso_system=espresso_system) - self.db.delete_instance(pmb_type=pmb_type, instance_id=instance_id, cascade=True) def determine_reservoir_concentrations(self, pH_res, c_salt_res, activity_coefficient_monovalent_pair, max_number_sc_runs=200): """ - Determines the concentrations of the various species in the reservoir for given values of the pH and salt concentration. - To do this, a system of nonlinear equations involving the pH, the ionic product of water, the activity coefficient of an - ion pair and the concentrations of the various species is solved numerically using a self-consistent approach. - More details can be found in chapter 5.3 of Landsgesell (doi.org/10.18419/opus-10831). - This is a modified version of the code by Landsgesell et al. (doi.org/10.18419/darus-2237). + Determines ionic concentrations in the reservoir at fixed pH and salt concentration. + + This method computes the equilibrium concentrations of H⁺, OH⁻, Na⁺, and Cl⁻ + ions in a reservoir by solving a coupled, nonlinear system of equations that + includes water autodissociation and non-ideal activity effects. The solution + is obtained via a self-consistent iterative procedure. Args: - pH_res('float'): pH-value in the reservoir. - c_salt_res('pint.Quantity'): Concentration of monovalent salt (e.g. NaCl) in the reservoir. - activity_coefficient_monovalent_pair('callable', optional): A function that calculates the activity coefficient of an ion pair as a function of the ionic strength. + pH_res ('float'): + Target pH value in the reservoir. + + c_salt_res ('pint.Quantity'): + Concentration of monovalent salt (e.g., NaCl) in the reservoir. + + activity_coefficient_monovalent_pair ('callable'): + Function returning the activity coefficient of a monovalent ion pair + as a function of ionic strength: + `gamma = activity_coefficient_monovalent_pair(I)`. + + max_number_sc_runs ('int', optional): + Maximum number of self-consistent iterations allowed before + convergence is enforced. Defaults to 200. Returns: - cH_res('pint.Quantity'): Concentration of H+ ions. - cOH_res('pint.Quantity'): Concentration of OH- ions. - cNa_res('pint.Quantity'): Concentration of Na+ ions. - cCl_res('pint.Quantity'): Concentration of Cl- ions. + tuple: + (cH_res, cOH_res, cNa_res, cCl_res) + - cH_res ('pint.Quantity'): Concentration of H⁺ ions. + - cOH_res ('pint.Quantity'): Concentration of OH⁻ ions. + - cNa_res ('pint.Quantity'): Concentration of Na⁺ ions. + - cCl_res ('pint.Quantity'): Concentration of Cl⁻ ions. + + Notes: + - The algorithm enforces electroneutrality in the reservoir. + - Water autodissociation is included via the equilibrium constant `Kw`. + - Non-ideal effects enter through activity coefficients depending on + ionic strength. + - The implementation follows the self-consistent scheme described in + Landsgesell (PhD thesis, Sec. 5.3, doi:10.18419/opus-10831), adapted + from the original code (doi:10.18419/darus-2237). """ + def determine_reservoir_concentrations_selfconsistently(cH_res, c_salt_res): + """ + Iteratively determines reservoir ion concentrations self-consistently. - self_consistent_run = 0 - cH_res = 10**(-pH_res) * self.units.mol/self.units.l #initial guess for the concentration of H+ + Args: + cH_res ('pint.Quantity'): + Current estimate of the H⁺ concentration. + c_salt_res ('pint.Quantity'): + Concentration of monovalent salt in the reservoir. - def determine_reservoir_concentrations_selfconsistently(cH_res, c_salt_res): - #Calculate and initial guess for the concentrations of various species based on ideal gas estimate - cOH_res = self.Kw / cH_res - cNa_res = None - cCl_res = None - if cOH_res>=cH_res: - #adjust the concentration of sodium if there is excess OH- in the reservoir: - cNa_res = c_salt_res + (cOH_res-cH_res) + Returns: + 'tuple': + (cH_res, cOH_res, cNa_res, cCl_res) + """ + # Initial ideal estimate + cOH_res = self.Kw / cH_res + if cOH_res >= cH_res: + cNa_res = c_salt_res + (cOH_res - cH_res) cCl_res = c_salt_res else: - # adjust the concentration of chloride if there is excess H+ in the reservoir - cCl_res = c_salt_res + (cH_res-cOH_res) + cCl_res = c_salt_res + (cH_res - cOH_res) cNa_res = c_salt_res - - def calculate_concentrations_self_consistently(cH_res, cOH_res, cNa_res, cCl_res): - nonlocal max_number_sc_runs, self_consistent_run - if self_consistent_run=cH_res: - #adjust the concentration of sodium if there is excess OH- in the reservoir: - cNa_res = c_salt_res + (cOH_res-cH_res) - cCl_res = c_salt_res - else: - # adjust the concentration of chloride if there is excess H+ in the reservoir - cCl_res = c_salt_res + (cH_res-cOH_res) - cNa_res = c_salt_res - return calculate_concentrations_self_consistently(cH_res, cOH_res, cNa_res, cCl_res) + # Self-consistent iteration + for _ in range(max_number_sc_runs): + ionic_strength_res = 0.5 * (cNa_res + cCl_res + cOH_res + cH_res) + cOH_new = self.Kw / (cH_res * activity_coefficient_monovalent_pair(ionic_strength_res)) + if cOH_new >= cH_res: + cNa_new = c_salt_res + (cOH_new - cH_res) + cCl_new = c_salt_res else: - return cH_res, cOH_res, cNa_res, cCl_res - return calculate_concentrations_self_consistently(cH_res, cOH_res, cNa_res, cCl_res) - - cH_res, cOH_res, cNa_res, cCl_res = determine_reservoir_concentrations_selfconsistently(cH_res, c_salt_res) - ionic_strength_res = 0.5*(cNa_res+cCl_res+cOH_res+cH_res) - determined_pH = -np.log10(cH_res.to('mol/L').magnitude * np.sqrt(activity_coefficient_monovalent_pair(ionic_strength_res))) - - while abs(determined_pH-pH_res)>1e-6: + cCl_new = c_salt_res + (cH_res - cOH_new) + cNa_new = c_salt_res + # Update values + cOH_res = cOH_new + cNa_res = cNa_new + cCl_res = cCl_new + return cH_res, cOH_res, cNa_res, cCl_res + # Initial guess for H+ concentration from target pH + cH_res = 10 ** (-pH_res) * self.units.mol / self.units.l + # First self-consistent solve + cH_res, cOH_res, cNa_res, cCl_res = (determine_reservoir_concentrations_selfconsistently(cH_res, + c_salt_res)) + ionic_strength_res = 0.5 * (cNa_res + cCl_res + cOH_res + cH_res) + determined_pH = -np.log10(cH_res.to("mol/L").magnitude* np.sqrt(activity_coefficient_monovalent_pair(ionic_strength_res))) + # Outer loop to enforce target pH + while abs(determined_pH - pH_res) > 1e-6: if determined_pH > pH_res: cH_res *= 1.005 else: cH_res /= 1.003 - cH_res, cOH_res, cNa_res, cCl_res = determine_reservoir_concentrations_selfconsistently(cH_res, c_salt_res) - ionic_strength_res = 0.5*(cNa_res+cCl_res+cOH_res+cH_res) - determined_pH = -np.log10(cH_res.to('mol/L').magnitude * np.sqrt(activity_coefficient_monovalent_pair(ionic_strength_res))) - self_consistent_run=0 - + cH_res, cOH_res, cNa_res, cCl_res = (determine_reservoir_concentrations_selfconsistently(cH_res, + c_salt_res)) + ionic_strength_res = 0.5 * (cNa_res + cCl_res + cOH_res + cH_res) + determined_pH = -np.log10(cH_res.to("mol/L").magnitude * np.sqrt(activity_coefficient_monovalent_pair(ionic_strength_res))) return cH_res, cOH_res, cNa_res, cCl_res - def enable_motion_of_rigid_object(self, name, espresso_system): - ''' - Enables the motion of the rigid object `name` in the `espresso_system`. + def enable_motion_of_rigid_object(self, instance_id, pmb_type, espresso_system): + """ + Enables translational and rotational motion of a rigid pyMBE object instance + in an ESPResSo system.This method creates a rigid-body center particle at the center of mass of + the specified pyMBE object and attaches all constituent particles to it + using ESPResSo virtual sites. The resulting rigid object can translate and + rotate as a single body. Args: - name(`str`): Label of the object. - espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library. + instance_id (`int`): + Instance ID of the pyMBE object whose rigid-body motion is enabled. - Note: - - It requires that espressomd has the following features activated: ["VIRTUAL_SITES_RELATIVE", "MASS"]. - ''' - logging.info('enable_motion_of_rigid_object requires that espressomd has the following features activated: ["VIRTUAL_SITES_RELATIVE", "MASS"]') - self._check_supported_molecule(molecule_name=name, - valid_pmb_types= ['protein']) - molecule_ids_list = self.df.loc[self.df['name']==name].molecule_id.to_list() - for molecule_id in molecule_ids_list: - particle_ids_list = self.df.loc[self.df['molecule_id']==molecule_id].particle_id.dropna().to_list() - center_of_mass = self.calculate_center_of_mass_of_molecule ( molecule_id=molecule_id,espresso_system=espresso_system) - rigid_object_center = espresso_system.part.add(pos=center_of_mass, - rotation=[True,True,True], - type=self.propose_unused_type()) - - rigid_object_center.mass = len(particle_ids_list) - momI = 0 - for pid in particle_ids_list: - momI += np.power(np.linalg.norm(center_of_mass - espresso_system.part.by_id(pid).pos), 2) + pmb_type (`str`): + pyMBE object type of the instance (e.g. `"molecule"`, `"peptide"`, + `"protein"`, or any assembly-like type). - rigid_object_center.rinertia = np.ones(3) * momI - - for particle_id in particle_ids_list: - pid = espresso_system.part.by_id(particle_id) - pid.vs_auto_relate_to(rigid_object_center.id) - return + espresso_system (`espressomd.system.System`): + ESPResSo system in which the rigid object is defined. - def find_value_from_es_type(self, es_type, column_name): + Notes: + - This method requires ESPResSo to be compiled with the following + features enabled: + - `"VIRTUAL_SITES_RELATIVE"` + - `"MASS"` + - A new ESPResSo particle is created to represent the rigid-body center. + - The mass of the rigid-body center is set to the number of particles + belonging to the object. + - The rotational inertia tensor is approximated from the squared + distances of the particles to the center of mass. """ - Finds a value in `pmb.df` for a `column_name` and `es_type` pair. - - Args: - es_type(`int`): value of the espresso type - column_name(`str`): name of the column in `pymbe.df` + logging.info('enable_motion_of_rigid_object requires that espressomd has the following features activated: ["VIRTUAL_SITES_RELATIVE", "MASS"]') + inst = self.db.get_instance(pmb_type=pmb_type, + instance_id=instance_id) + label = self._get_label_id_map(pmb_type=pmb_type) + particle_ids_list = self.get_particle_id_map(object_name=inst.name)[label] + center_of_mass = self.calculate_center_of_mass (instance_id=instance_id, + espresso_system=espresso_system, + pmb_type=pmb_type) + rigid_object_center = espresso_system.part.add(pos=center_of_mass, + rotation=[True,True,True], + type=self.propose_unused_type()) + rigid_object_center.mass = len(particle_ids_list) + momI = 0 + for pid in particle_ids_list: + momI += np.power(np.linalg.norm(center_of_mass - espresso_system.part.by_id(pid).pos), 2) + rigid_object_center.rinertia = np.ones(3) * momI + for particle_id in particle_ids_list: + pid = espresso_system.part.by_id(particle_id) + pid.vs_auto_relate_to(rigid_object_center.id) - Returns: - Value in `pymbe.df` matching `column_name` and `es_type` + def generate_coordinates_outside_sphere(self, center, radius, max_dist, n_samples): """ - idx = pd.IndexSlice - for state in ['state_one', 'state_two']: - index = self.df.loc[self.df[(state, 'es_type')] == es_type].index - if len(index) > 0: - if column_name == 'label': - label = self.df.loc[idx[index[0]], idx[(state,column_name)]] - return label - else: - column_name_value = self.df.loc[idx[index[0]], idx[(column_name,'')]] - return column_name_value + Generates random coordinates outside a sphere and inside a larger bounding sphere. - def format_node(self, node_list): - return "[" + " ".join(map(str, node_list)) + "]" + Args: + center (`array-like`): + Coordinates of the center of the spheres. + radius (`float`): + Radius of the inner exclusion sphere. Must be positive. - def generate_coordinates_outside_sphere(self, center, radius, max_dist, n_samples): - """ - Generates coordinates outside a sphere centered at `center`. + max_dist (`float`): + Radius of the outer sampling sphere. Must be larger than `radius`. - Args: - center(`lst`): Coordinates of the center of the sphere. - radius(`float`): Radius of the sphere. - max_dist(`float`): Maximum distance from the center of the spahre to generate coordinates. - n_samples(`int`): Number of sample points. + n_samples (`int`): + Number of coordinates to generate. Returns: - coord_list(`lst`): Coordinates of the sample points. + 'list' of `numpy.ndarray`: + List of coordinates lying outside the inner sphere and inside the + outer sphere. + + Notes: + - Points are uniformly sampled inside a sphere of radius `max_dist` centered at `center` + and only those with a distance greater than or equal to `radius` from the center are retained. """ if not radius > 0: raise ValueError (f'The value of {radius} must be a positive value') @@ -1686,17 +1789,30 @@ def generate_coordinates_outside_sphere(self, center, radius, max_dist, n_sample def generate_random_points_in_a_sphere(self, center, radius, n_samples, on_surface=False): """ - Uniformly samples points from a hypersphere. If on_surface is set to True, the points are - uniformly sampled from the surface of the hypersphere. - + Generates uniformly distributed random points inside or on the surface of a sphere. + Args: - center(`lst`): Array with the coordinates of the center of the spheres. - radius(`float`): Radius of the sphere. - n_samples(`int`): Number of sample points to generate inside the sphere. - on_surface (`bool`, optional): If set to True, points will be uniformly sampled on the surface of the hypersphere. + center (`array-like`): + Coordinates of the center of the sphere. + + radius (`float`): + Radius of the sphere. + + n_samples (`int`): + Number of sample points to generate. + + on_surface (`bool`, optional): + If True, points are uniformly sampled on the surface of the sphere. + If False, points are uniformly sampled within the sphere volume. + Defaults to False. Returns: - samples(`list`): Coordinates of the sample points inside the hypersphere. + 'numpy.ndarray': + Array of shape `(n_samples, d)` containing the generated coordinates, + where `d` is the dimensionality of `center`. + Notes: + - Points are sampled in a space whose dimensionality is inferred + from the length of `center`. """ # initial values center=np.array(center) @@ -1717,14 +1833,18 @@ def generate_random_points_in_a_sphere(self, center, radius, n_samples, on_surfa def generate_trial_perpendicular_vector(self,vector,magnitude): """ - Generates an orthogonal vector to the input `vector`. + Generates a random vector perpendicular to a given vector. Args: - vector(`lst`): arbitrary vector. - magnitude(`float`): magnitude of the orthogonal vector. - + vector (`array-like`): + Reference vector to which the generated vector will be perpendicular. + + magnitude (`float`): + Desired magnitude of the perpendicular vector. + Returns: - (`lst`): Orthogonal vector with the same magnitude as the input vector. + 'numpy.ndarray': + Vector orthogonal to `vector` with norm equal to `magnitude`. """ np_vec = np.array(vector) if np.all(np_vec == 0): @@ -1744,19 +1864,26 @@ def generate_trial_perpendicular_vector(self,vector,magnitude): def get_bond_template(self, particle_name1, particle_name2, use_default_bond=False) : """ - Searches for bond template linking particle templates with `particle_name1` and `particle_name2` names in the pyMBE database and returns it. - If `use_default_bond` is activated and a "default" bond is defined, returns the default bond template instead. + Retrieves a bond template connecting two particle templates. Args: - particle_name1(`str`): label of the type of the first particle type of the bonded particles. - particle_name2(`str`): label of the type of the second particle type of the bonded particles. - use_default_bond(`bool`, optional): If it is activated, the "default" bond is returned if no bond is found between `particle_name1` and `particle_name2`. Defaults to False. + particle_name1 (`str`): + Name of the first particle template. + + particle_name2 (`str`): + Name of the second particle template. + + use_default_bond (`bool`, optional): + If True, returns the default bond template when no specific bond + template is found. Defaults to False. Returns: - bond(`espressomd.interactions.BondedInteractions`): bond object from the espressomd library. - - Note: - - If `use_default_bond`=True and no bond is defined between `particle_name1` and `particle_name2`, it returns the default bond defined in the pyMBE database. + 'BondTemplate': + Bond template object retrieved from the pyMBE database. + + Notes: + - This method searches the pyMBE database for a bond template defined between particle templates with names `particle_name1` and `particle_name2`. + - If no specific bond template is found and `use_default_bond` is enabled, a default bond template is returned instead. """ # Try to find a specific bond template bond_key = BondTemplate.make_bond_key(pn1=particle_name1, @@ -2105,7 +2232,7 @@ def load_pka_set(self, filename): pka_data = json.load(f) pka_set = pka_data["data"] metadata = pka_data.get("metadata", {}) - self.check_pka_set(pka_set) + self._check_pka_set(pka_set) for particle_name, entry in pka_set.items(): acidity = entry["acidity"] pka = entry["pka_value"] @@ -2140,76 +2267,104 @@ def propose_unused_type(self): return 0 return max(all_types) + 1 - def read_protein_vtf(self,filename,unit_length=None): + def read_protein_vtf(self, filename, unit_length=None): """ - Loads a coarse-grained protein model in a VTF file `filename`. + Loads a coarse-grained protein model from a VTF file. Args: - filename(str): Path to the VTF file with the coarse-grained model. - unit_length(obj): unit of length of the the coordinates in `filename` using the pyMBE UnitRegistry. Defaults to None. + filename ('str'): + Path to the VTF file. + unit_length ('Pint.Quantity'): + Unit of length for coordinates (pyMBE UnitRegistry). Defaults to Angstrom. Returns: - topology_dict(dict): {'initial_pos': coords_list, 'chain_id': id, 'sigma': sigma_value} - sequence(str): Amino acid sequence, following the one letter code convection. - - Note: - - If no `unit_length` is provided, it is assumed that the coordinates are in Angstrom. + topology_dict ('dict'): + Particle topology. + + sequence ('str'): + One-letter amino-acid sequence (including n/c ends). """ - - logging.info(f'Loading protein coarse grain model file: {filename}') - coord_list = [] - particles_dict = {} + logging.info(f"Loading protein coarse-grain model file: {filename}") if unit_length is None: - unit_length = 1 * self.units.angstrom - with open (filename,'r') as protein_model: - for line in protein_model : - line_split = line.split() - if line_split: - line_header = line_split[0] - if line_header == 'atom': - atom_id = line_split[1] - atom_name = line_split[3] - atom_resname = line_split[5] - chain_id = line_split[9] - radius = float(line_split [11])*unit_length - particles_dict [int(atom_id)] = [atom_name , atom_resname, chain_id, radius] - elif line_header.isnumeric(): - atom_coord = line_split[1:] - atom_coord = [(float(i)*unit_length).to('reduced_length').magnitude for i in atom_coord] - coord_list.append (atom_coord) - numbered_label = [] - i = 0 - sequence = "" - for atom_id in particles_dict.keys(): - if atom_id == 1: - atom_name = particles_dict[atom_id][0] - numbered_name = [f'{atom_name}{i}',particles_dict[atom_id][2],particles_dict[atom_id][3]] - numbered_label.append(numbered_name) - elif atom_id != 1: - if particles_dict[atom_id-1][1] != particles_dict[atom_id][1]: - i += 1 - count = 1 - atom_name = particles_dict[atom_id][0] - numbered_name = [f'{atom_name}{i}',particles_dict[atom_id][2],particles_dict[atom_id][3]] - numbered_label.append(numbered_name) - elif particles_dict[atom_id-1][1] == particles_dict[atom_id][1]: - if count == 2 or particles_dict[atom_id][1] == 'GLY': - i +=1 - count = 0 - atom_name = particles_dict[atom_id][0] - numbered_name = [f'{atom_name}{i}',particles_dict[atom_id][2],particles_dict[atom_id][3]] - numbered_label.append(numbered_name) - count +=1 - if atom_name not in ["CA", "Ca"]: - sequence += atom_name + unit_length = 1 * self.units.angstrom + atoms = {} # atom_id -> atom info + coords = [] # ordered coordinates + residues = {} # resid -> resname (first occurrence) + has_n_term = False + has_c_term = False + aa_3to1 = {"ALA": "A", "ARG": "R", "ASN": "N", "ASP": "D", + "CYS": "C", "GLU": "E", "GLN": "Q", "GLY": "G", + "HIS": "H", "ILE": "I", "LEU": "L", "LYS": "K", + "MET": "M", "PHE": "F", "PRO": "P", "SER": "S", + "THR": "T", "TRP": "W", "TYR": "Y", "VAL": "V", + "n": "n", "c": "c"} + # --- parse VTF --- + with open(filename, "r") as f: + for line in f: + fields = line.split() + if not fields: + continue + if fields[0] == "atom": + atom_id = int(fields[1]) + atom_name = fields[3] + resname = fields[5] + resid = int(fields[7]) + chain_id = fields[9] + radius = float(fields[11]) * unit_length + atoms[atom_id] = {"name": atom_name, + "resname": resname, + "resid": resid, + "chain_id": chain_id, + "radius": radius} + if resname == "n": + has_n_term = True + elif resname == "c": + has_c_term = True + # register residue + if resid not in residues: + residues[resid] = resname + elif fields[0].isnumeric(): + xyz = [(float(x) * unit_length).to("reduced_length").magnitude + for x in fields[1:4]] + coords.append(xyz) + sequence = "" + # N-terminus + if has_n_term: + sequence += "n" + # protein residues only + protein_resids = sorted(resid for resid, resname in residues.items() if resname not in ("n", "c", "Ca")) + for resid in protein_resids: + resname = residues[resid] + try: + sequence += aa_3to1[resname] + except KeyError: + raise ValueError(f"Unknown residue name '{resname}' in VTF file") + # C-terminus + if has_c_term: + sequence += "c" + last_resid = max(protein_resids) + # --- build topology --- topology_dict = {} - for i in range (0, len(numbered_label)): - topology_dict [numbered_label[i][0]] = {'initial_pos': coord_list[i] , - 'chain_id':numbered_label[i][1], - 'radius':numbered_label[i][2] } - + for atom_id in sorted(atoms.keys()): + atom = atoms[atom_id] + resname = atom["resname"] + resid = atom["resid"] + # apply labeling rules + if resname == "n": + label_resid = 0 + elif resname == "c": + label_resid = last_resid + 1 + elif resname == "Ca": + label_resid = last_resid + 2 + else: + label_resid = resid # preserve original resid + label = f"{atom['name']}{label_resid}" + if label in topology_dict: + raise ValueError(f"Duplicate particle label '{label}'. Check VTF residue definitions.") + topology_dict[label] = {"initial_pos": coords[atom_id - 1], "chain_id": atom["chain_id"], "radius": atom["radius"],} return topology_dict, sequence + def save_database(self, folder, format='csv'): """ Saves the current pyMBE database into a file `filename`. @@ -2296,7 +2451,7 @@ def setup_cpH (self, counter_ion, constant_pH, exclusion_range=None, pka_set=Non exclusion_range = max(self.get_radius_map().values())*2.0 if pka_set is None: pka_set=self.get_pka_set() - self.check_pka_set(pka_set=pka_set) + self._check_pka_set(pka_set=pka_set) if use_exclusion_radius_per_type: exclusion_radius_per_type = self.get_radius_map() else: @@ -2417,7 +2572,7 @@ def setup_grxmc_reactions(self, pH_res, c_salt_res, proton_name, hydroxide_name, exclusion_range = max(self.get_radius_map().values())*2.0 if pka_set is None: pka_set=self.get_pka_set() - self.check_pka_set(pka_set=pka_set) + self._check_pka_set(pka_set=pka_set) if use_exclusion_radius_per_type: exclusion_radius_per_type = self.get_radius_map() else: @@ -2621,7 +2776,7 @@ def setup_grxmc_unified(self, pH_res, c_salt_res, cation_name, anion_name, activ exclusion_range = max(self.get_radius_map().values())*2.0 if pka_set is None: pka_set=self.get_pka_set() - self.check_pka_set(pka_set=pka_set) + self._check_pka_set(pka_set=pka_set) if use_exclusion_radius_per_type: exclusion_radius_per_type = self.get_radius_map() else: diff --git a/testsuite/bond_tests.py b/testsuite/bond_tests.py index 3c899b9..ed32ab4 100644 --- a/testsuite/bond_tests.py +++ b/testsuite/bond_tests.py @@ -71,9 +71,9 @@ def check_bond_setup(self, bond_object, input_parameters, bond_type): 'k' : 'reduced_energy / reduced_length**2', 'd_r_max': 'reduced_length'} for key in input_parameters.keys(): - np.testing.assert_equal(actual=bond_params[key], - desired=input_parameters[key].m_as(reduced_units[key]), - verbose=True) + np.testing.assert_almost_equal(actual=bond_params[key], + desired=input_parameters[key].m_as(reduced_units[key]), + verbose=True) def test_bond_setup(self): """ diff --git a/testsuite/create_molecule_position_test.py b/testsuite/create_molecule_position_test.py index 5004bea..8b3e210 100644 --- a/testsuite/create_molecule_position_test.py +++ b/testsuite/create_molecule_position_test.py @@ -116,10 +116,12 @@ def test_center_molecule_in_simulation_box(self): list_of_first_residue_positions = pos_list) # Check that center_molecule_in_simulation_box works correctly for cubic boxes - pmb.center_molecule_in_simulation_box(molecule_id=molecule_ids[0], - espresso_system=espresso_system) - center_of_mass = pmb.calculate_center_of_mass_of_molecule(molecule_id=molecule_ids[0], - espresso_system=espresso_system) + pmb.center_object_in_simulation_box(instance_id=molecule_ids[0], + espresso_system=espresso_system, + pmb_type="molecule") + center_of_mass = pmb.calculate_center_of_mass(instance_id=molecule_ids[0], + pmb_type="molecule", + espresso_system=espresso_system) center_of_mass_ref = [L/2]*3 for ind in range(len(center_of_mass)): self.assertAlmostEqual(center_of_mass[ind], @@ -127,26 +129,29 @@ def test_center_molecule_in_simulation_box(self): #Check that center_molecule_in_simulation_box works correctly for non-cubic boxes espresso_system.change_volume_and_rescale_particles(d_new=3*L, dir="z") - pmb.center_molecule_in_simulation_box(molecule_id=molecule_ids[2], - espresso_system=espresso_system) - center_of_mass = pmb.calculate_center_of_mass_of_molecule(molecule_id=molecule_ids[2], - espresso_system=espresso_system) + pmb.center_object_in_simulation_box(instance_id=molecule_ids[2], + pmb_type="molecule", + espresso_system=espresso_system) + center_of_mass = pmb.calculate_center_of_mass(instance_id=molecule_ids[2], + pmb_type="molecule", + espresso_system=espresso_system) center_of_mass_ref = [L/2, L/2, 1.5*L] for ind in range(len(center_of_mass)): self.assertAlmostEqual(center_of_mass[ind], center_of_mass_ref[ind]) - def test_sanity_center_molecule_in_simulation_box(self): + def test_sanity_center_object_in_simulation_box(self): """ Sanity tests for center_molecule_in_simulation_box """ # Check that center_molecule_in_simulation_box raises a Value Error if a wrong molecule_id is provided - input_parameters = {"molecule_id": 20 , + input_parameters = {"instance_id": 20 , + "pmb_type": "molecule", "espresso_system":espresso_system} self.assertRaises(ValueError, - pmb.center_molecule_in_simulation_box, + pmb.center_object_in_simulation_box, **input_parameters) diff --git a/testsuite/globular_protein_unit_tests.py b/testsuite/globular_protein_unit_tests.py index f75996d..f35cef0 100644 --- a/testsuite/globular_protein_unit_tests.py +++ b/testsuite/globular_protein_unit_tests.py @@ -17,6 +17,7 @@ # along with this program. If not, see . import numpy as np import espressomd +import unittest as ut import re import json @@ -26,389 +27,290 @@ # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(seed=42) - -c_salt = 0.01 * pmb.units.mol / pmb.units.L -c_protein = 2e-4 * pmb.units.mol / pmb.units.L -Box_V = 1. / (pmb.N_A*c_protein) -Box_L = Box_V**(1./3.) - -def custom_deserializer(dct): - if "value" in dct and "unit" in dct: - return pmb.units.Quantity(dct["value"], dct["unit"]) - return dct - protein_pdb = '1f6s' path_to_parfile = pathlib.Path(__file__).parent / "tests_data" / "protein_topology_dict.json" path_to_cg=pmb.root / "parameters" / "globular_proteins" / f"{protein_pdb}.vtf" topology_dict, sequence = pmb.read_protein_vtf (filename=path_to_cg) - -with open (path_to_parfile, "r") as file: - load_json = json.load(file,object_hook=custom_deserializer) - -np.testing.assert_equal(actual= topology_dict, - desired= load_json, - verbose = True) - -protein_model = '2beadAA' - -hf.define_protein_AA_particles(topology_dict=topology_dict, - pmb=pmb, - pka_set={}) -residue_list = hf.define_protein_AA_residues(topology_dict=topology_dict, - model=protein_model, - pmb=pmb) -pmb.define_protein (name=protein_pdb, - sequence=sequence, - model = protein_model) -clean_sequence= "" -full_residue_list = [] -for aminoacid in topology_dict.keys(): - residue_name = re.split(r'\d+', aminoacid)[0] - if residue_name not in ['CA', 'Ca']: - clean_sequence+=residue_name - full_residue_list.append(f"AA-{residue_name}") - -residue_dict = hf.get_residues_from_topology_dict(topology_dict=topology_dict, - model=protein_model) - -# Check residue templates -for residue_name in residue_list: - residue_template = pmb.db.get_template(name=residue_name, - pmb_type="residue") - assert residue_template is not None - assert residue_template.pmb_type == "residue" - assert residue_template.name == residue_name - - -# Check protein template -protein_template = pmb.db.get_template(name=protein_pdb, - pmb_type="protein") - -assert protein_template is not None -assert protein_template.name == protein_pdb - -np.testing.assert_equal(actual=protein_template.sequence, - desired=clean_sequence, - verbose=True) - -np.testing.assert_equal(actual=protein_template.residue_list, - desired=full_residue_list, - verbose=True) - - -input_parameters={"name": protein_pdb, - "sequence": sequence, - "model" : "3beadAA"} - -np.testing.assert_raises(ValueError, pmb.define_protein, **input_parameters) - -print("*** Unit test passed ***") - -espresso_system=espressomd.System(box_l = [Box_L.to('reduced_length').magnitude] * 3) - -molecule_id = pmb.create_protein(name=protein_pdb, - number_of_proteins=1, - espresso_system=espresso_system, - topology_dict=topology_dict)[0] - -particle_id_list = pmb.get_particle_id_map(object_name=protein_pdb)["all"] - -center_of_mass_es = pmb.calculate_center_of_mass_of_molecule (molecule_id=molecule_id, - espresso_system=espresso_system) -center_of_mass = np.zeros(3) -axis_list = [0,1,2] - -for aminoacid in topology_dict.keys(): - initial_pos = topology_dict[aminoacid]['initial_pos'] - for axis in axis_list: - center_of_mass[axis] += initial_pos[axis] -center_of_mass = center_of_mass/ len(topology_dict.keys()) - -distance_es = np.zeros(3) -distance_topology = np.zeros(3) - -for id in particle_id_list: - - initial_pos_es = espresso_system.part.by_id(id).pos - charge = espresso_system.part.by_id(id).q - es_type = espresso_system.part.by_id(id).type - part_inst = pmb.db.get_instance(instance_id=id, - pmb_type="particle") - residue_id = part_inst.residue_id - res_inst = pmb.db.get_instance(instance_id=residue_id, - pmb_type="residue") - residue_name = res_inst.name - - - initial_pos = topology_dict[f"{residue_name}{residue_id}"]['initial_pos'] - - for axis in axis_list: - distance_es[axis] = (initial_pos_es[axis] - center_of_mass_es[axis])**2 - distance_topology[axis] = (initial_pos[axis] - center_of_mass[axis])**2 - - relative_distance_es = np.sqrt(np.sum(distance_es)) - relative_distance = np.sqrt(np.sum(distance_es)) - - np.testing.assert_equal(actual=relative_distance_es, - desired=relative_distance, - verbose=True) - -starting_number_of_particles=len(espresso_system.part.all()) - -pmb.create_protein(name=protein_pdb, - number_of_proteins=0, - espresso_system=espresso_system, - topology_dict=topology_dict) - -pmb.create_protein(name=protein_pdb, - number_of_proteins=-1, - espresso_system=espresso_system, - topology_dict=topology_dict) - -np.testing.assert_equal(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles, - verbose=True) - - -positions = [] -for pid in particle_id_list: - positions.append(espresso_system.part.by_id(pid).pos) - -pmb.enable_motion_of_rigid_object(espresso_system=espresso_system, - name=protein_pdb) - -momI = 0 -molecule_id = pmb.df.loc[pmb.df['name']==protein_pdb].molecule_id.values[0] -for p in espresso_system.part: - center_of_mass = pmb.calculate_center_of_mass_of_molecule ( molecule_id=molecule_id,espresso_system=espresso_system) - if p.mass > 1: - rigid_object_id = p.id - rigid_object_mass = espresso_system.part.by_id(rigid_object_id).mass - rigid_object_rotation = espresso_system.part.by_id(rigid_object_id).rotation - rigid_object_intertia = np.copy(espresso_system.part.by_id(rigid_object_id).rinertia) - - np.testing.assert_equal(actual=rigid_object_mass, - desired=len(particle_id_list), - verbose=True) - print ('mass passed ') - - np.testing.assert_equal(actual=rigid_object_rotation, - desired=[1, 1, 1], - verbose=True) - print ('rotation passed ') - - for pid in particle_id_list: - momI += np.power(np.linalg.norm(center_of_mass - espresso_system.part.by_id(pid).pos), 2) - rinertia = np.ones(3) * momI - - np.testing.assert_array_almost_equal(rinertia, rigid_object_intertia) - -print("*** Unit test passed ***") - -print("*** Unit test: check that enable_motion_of_rigid_object() raises a ValueError if a wrong pmb_type is provided***") - -input_parameters = {"espresso_system":espresso_system, - "name": "CA"} - -np.testing.assert_raises(ValueError, pmb.enable_motion_of_rigid_object, **input_parameters) - -print("*** Unit test passed ***") - -print("*** Unit test: check that protein_sequence_parser() correctly returns que protein sequence ***") - -def test_sequence(input,output): - """ - Tests that the pyMBE parses correctly the input sequence. - - Args: - input(`str` or `lst` of `str`): input protein sequence. - ouput(`lst` of `str`): expected ouput protein sequence. - """ - clean_sequence= pmb.protein_sequence_parser(sequence = input) - np.testing.assert_equal(actual=clean_sequence, - desired=output, - verbose=True) - -test_sequence(input="REKH", - output=["R", "E", "K", "H"]) -test_sequence(input="rekh", - output=["R", "E", "K", "H"]) -test_sequence(input="R-E-K-H", - output=["R", "E", "K", "H"]) -test_sequence(input="r-e-k-h", - output=["R", "E", "K", "H"]) -test_sequence(input="ARG-GLU-LYS-HIS", - output=["R", "E", "K", "H"]) -test_sequence(input="arg-glu-lys-his", - output=["R", "E", "K", "H"]) -test_sequence(input=["R","E", "K", "H"], - output=["R", "E", "K", "H"]) -test_sequence(input=["r","e", "k", "h"], - output=["R", "E", "K", "H"]) -test_sequence(input=["ARG","GLU", "LYS", "HIS"], - output=["R", "E", "K", "H"]) -test_sequence(input=["arg","glu", "lys", "his"], - output=["R", "E", "K", "H"]) - -print("*** Unit test: check that protein_sequence_parser() raises a ValueError if a wrong residue key is provided***") - -input_parameters = {"sequence":"rekx"} -np.testing.assert_raises(ValueError, pmb.protein_sequence_parser, **input_parameters) - -input_parameters = {"sequence":"ARG-GLU-TUR-HIS"} -np.testing.assert_raises(ValueError, pmb.protein_sequence_parser, **input_parameters) - -input_parameters = {"sequence":"A-E-E-X"} -np.testing.assert_raises(ValueError, pmb.protein_sequence_parser, **input_parameters) - -input_parameters = {"sequence":"a-e-e-x"} -np.testing.assert_raises(ValueError, pmb.protein_sequence_parser, **input_parameters) - -input_parameters = {"sequence":["A", "E","X"]} -np.testing.assert_raises(ValueError, pmb.protein_sequence_parser, **input_parameters) - -print("*** Unit test passed ***") - -print("*** Unit test: Check that check_aminoacid_key returns True for any latter valid in the one letter amino acid code***") -valid_AA_keys=['V', 'I', 'L', 'E', 'Q', 'D', 'N', 'H', 'W', 'F', 'Y', 'R', 'K', 'S', 'T', 'M', 'A', 'G', 'P', 'C'] -for key in valid_AA_keys: - np.testing.assert_equal(actual=pmb.check_aminoacid_key(key=key), - desired=True, - verbose=True) -print("*** Unit test passed ***\n") -print("*** Unit test: Check that check_aminoacid_key returns False for a key not valid in the one letter amino acid code ***") -np.testing.assert_equal(actual=pmb.check_aminoacid_key(key="B"), - desired=False, - verbose=True) -print("*** Unit test passed ***\n") - -print("*** Unit test: Check that check_if_metal_ion returns True for any key corresponding to a supported metal ion ***") -for key in pmb.get_metal_ions_charge_number_map().keys(): - np.testing.assert_equal(actual=pmb.check_if_metal_ion(key=key), - desired=True, - verbose=True) -print("*** Unit test passed ***\n") -print("*** Unit test: Check that check_if_metal_ion returns False for a key not corresponding to a supported metal ion ***") -np.testing.assert_equal(actual=pmb.check_if_metal_ion(key="B"), - desired=False, - verbose=True) -print("*** Unit test passed ***\n") - -print("*** Unit test: Check that get_metal_ions_charge_number_map returns the correct charge map for metals ***") -metal_charge_number_map = {"Ca": 2} -pmb_metal_charge_number_map = pmb.get_metal_ions_charge_number_map() - -np.testing.assert_equal(actual=pmb_metal_charge_number_map, - desired=metal_charge_number_map, - verbose=True) -print("*** Unit test passed ***\n") - - -print("*** Unit test: check that define_AA_residues()") - -test_sequence = ['c','n', 'G','V', 'I', 'L', 'E', 'Q', 'D', 'N', 'H', 'W', 'F', 'Y', 'R', 'K', 'S', 'T', 'M', 'A', 'G', 'P', 'C' ] - -valid_protein_model = ['1beadAA', '2beadAA'] - -output =['AA-c', 'AA-n', 'AA-G', 'AA-V', 'AA-I', 'AA-L', 'AA-E', 'AA-Q', 'AA-D', 'AA-N', 'AA-H', 'AA-W', 'AA-F', 'AA-Y', 'AA-R', 'AA-K', 'AA-S', 'AA-T', 'AA-M', 'AA-A', 'AA-G', 'AA-P', 'AA-C'] - -for protein_model in valid_protein_model: - - pmb_residue_list = pmb.define_AA_residues(sequence= test_sequence, - model = protein_model) - - np.testing.assert_equal(actual=pmb_residue_list, - desired=output, - verbose=True) - -print("*** Unit test passed ***") - -print("*** Unit test: check that define_peptide() raises a ValueError if a wrong model key is provided") - -input_parameters = {"name": "generic_peptide", - "sequence": "EEEEEEE", - "model": "3beadAA" } - -np.testing.assert_raises(ValueError, pmb.define_peptide, **input_parameters) - -input_parameters = {"name": "generic_peptide", - "sequence": "EEEEEEE", - "model": "beadAA" } - -np.testing.assert_raises(ValueError, pmb.define_peptide, **input_parameters) - -print("*** Unit test passed ***") - -print("*** Unit test: check that search_particles_in_residue() returns the correct list of residues") - -list_of_residues = ['AA-c', 'AA-n', 'AA-G', 'AA-V', 'AA-I', 'AA-L', 'AA-E', 'AA-Q', 'AA-D', 'AA-N', 'AA-H', - 'AA-W', 'AA-F', 'AA-Y', 'AA-R', 'AA-K', 'AA-S', 'AA-T', 'AA-M', 'AA-A','AA-P', 'AA-C'] - -for residue_name in list_of_residues: - - residue = residue_name.replace('AA-','') - - list_of_particles_in_residue= pmb.search_particles_in_residue(residue_name = residue_name) - if residue in ['c', 'n']: - np.testing.assert_equal(actual=list_of_particles_in_residue, - desired=[residue], +ref_sequence = "nQLTKCEVFRELKDLKGYGGVSLPEWVCTTFHTSGYDTQAIVQNNDSTEYGLFQINNKIWCKDDQNPHSSNICNISCDKFLDDDLTDDIMCVKKILDKVGINYWLAHKALCSEKLDQWLCEKc" # VTF file is missing a terminal L aminoacid on its sequence +ref_residue_list = [] +for key in ref_sequence: + ref_residue_list.append(f"AA-{key}") + + +class Test(ut.TestCase): + def test_protein_setup(self): + """ + Unit tests for setting up globular proteins in pyMBE. + """ + Box_L = 100 * pmb.units.reduced_length + def custom_deserializer(dct): + if "value" in dct and "unit" in dct: + return pmb.units.Quantity(dct["value"], dct["unit"]) + return dct + with open (path_to_parfile, "r") as file: + load_json = json.load(file,object_hook=custom_deserializer) + np.testing.assert_equal(actual= topology_dict, + desired= load_json, + verbose = True) + protein_model = '2beadAA' + hf.define_protein_AA_particles(topology_dict=topology_dict, + pmb=pmb, + pka_set={}) + residue_list = hf.define_protein_AA_residues(sequence=sequence, + model=protein_model, + pmb=pmb) + # Define a residue for the metal ion + pmb.define_residue(name="AA-Ca", + central_bead="Ca", + side_chains=[]) + pmb.define_protein (name=protein_pdb, + sequence=sequence, + model = protein_model) + # Check residue templates + for residue_name in residue_list: + residue_template = pmb.db.get_template(name=residue_name, + pmb_type="residue") + assert residue_template is not None + assert residue_template.pmb_type == "residue" + assert residue_template.name == residue_name + # Check protein template + protein_template = pmb.db.get_template(name=protein_pdb, + pmb_type="protein") + assert protein_template is not None + assert protein_template.name == protein_pdb + np.testing.assert_equal(actual=protein_template.sequence, + desired=ref_sequence, + verbose=True) + np.testing.assert_equal(actual=protein_template.residue_list, + desired=ref_residue_list, verbose=True) - elif residue == "G": - np.testing.assert_equal(actual=list_of_particles_in_residue, - desired=[], + input_parameters={"name": protein_pdb, + "sequence": sequence, + "model" : "3beadAA"} + np.testing.assert_raises(ValueError, + pmb.define_protein, + **input_parameters) + espresso_system=espressomd.System(box_l = [Box_L.to('reduced_length').magnitude] * 3) + molecule_id = pmb.create_protein(name=protein_pdb, + number_of_proteins=1, + espresso_system=espresso_system, + topology_dict=topology_dict)[0] + particle_id_list = pmb.get_particle_id_map(object_name=protein_pdb)["all"] + center_of_mass_es = pmb.calculate_center_of_mass(instance_id=molecule_id, + pmb_type="protein", + espresso_system=espresso_system) + center_of_mass = np.zeros(3) + axis_list = [0,1,2] + for aminoacid in topology_dict.keys(): + initial_pos = topology_dict[aminoacid]['initial_pos'] + for axis in axis_list: + center_of_mass[axis] += initial_pos[axis] + center_of_mass = center_of_mass/ len(topology_dict.keys()) + distance_es = np.zeros(3) + distance_topology = np.zeros(3) + for id in particle_id_list: + initial_pos_es = espresso_system.part.by_id(id).pos + part_inst = pmb.db.get_instance(instance_id=id, + pmb_type="particle") + part_tpl = pmb.db.get_template(name=part_inst.name, + pmb_type="particle") + part_state = pmb.db.get_template(name=part_tpl.initial_state, + pmb_type="particle_state") + charge = espresso_system.part.by_id(id).q + es_type = espresso_system.part.by_id(id).type + np.testing.assert_equal(part_state.z, + charge) + np.testing.assert_equal(part_state.es_type, + es_type) + residue_id = part_inst.residue_id + res_inst = pmb.db.get_instance(instance_id=residue_id, + pmb_type="residue") + residue_name = res_inst.name + if "G" in residue_name: + continue + initial_pos = topology_dict[f"{residue_name[3:]}{residue_id}"]['initial_pos'] + for axis in axis_list: + distance_es[axis] = (initial_pos_es[axis] - center_of_mass_es[axis])**2 + distance_topology[axis] = (initial_pos[axis] - center_of_mass[axis])**2 + relative_distance_es = np.sqrt(np.sum(distance_es)) + relative_distance = np.sqrt(np.sum(distance_es)) + np.testing.assert_equal(actual=relative_distance_es, + desired=relative_distance, + verbose=True) + starting_number_of_particles=len(espresso_system.part.all()) + pmb.create_protein(name=protein_pdb, + number_of_proteins=0, + espresso_system=espresso_system, + topology_dict=topology_dict) + pmb.create_protein(name=protein_pdb, + number_of_proteins=-1, + espresso_system=espresso_system, + topology_dict=topology_dict) + np.testing.assert_equal(actual=len(espresso_system.part.all()), + desired=starting_number_of_particles, verbose=True) - else: - np.testing.assert_equal(actual=list_of_particles_in_residue, - desired=['CA', residue], + positions = [] + for pid in particle_id_list: + positions.append(espresso_system.part.by_id(pid).pos) + pmb.enable_motion_of_rigid_object(instance_id=molecule_id, + espresso_system=espresso_system, + pmb_type="protein") + + momI = 0 + for p in espresso_system.part: + center_of_mass = pmb.calculate_center_of_mass(instance_id=molecule_id, + pmb_type="protein", + espresso_system=espresso_system) + if p.mass > 1: + rigid_object_id = p.id + rigid_object_mass = espresso_system.part.by_id(rigid_object_id).mass + rigid_object_rotation = espresso_system.part.by_id(rigid_object_id).rotation + rigid_object_intertia = np.copy(espresso_system.part.by_id(rigid_object_id).rinertia) + np.testing.assert_equal(actual=rigid_object_mass, + desired=len(particle_id_list), + verbose=True) + np.testing.assert_equal(actual=rigid_object_rotation, + desired=[1, 1, 1], + verbose=True) + for pid in particle_id_list: + momI += np.power(np.linalg.norm(center_of_mass - espresso_system.part.by_id(pid).pos), 2) + rinertia = np.ones(3) * momI + np.testing.assert_array_almost_equal(rinertia, rigid_object_intertia) + def test_protein_parser(self): + """ + Unit tests for protein_sequence_parser + """ + def test_sequence(input,output): + """ + Tests that the pyMBE parses correctly the input sequence. + + Args: + input(`str` or `lst` of `str`): input protein sequence. + ouput(`lst` of `str`): expected ouput protein sequence. + """ + clean_sequence= hf.protein_sequence_parser(sequence = input) + np.testing.assert_equal(actual=clean_sequence, + desired=output, + verbose=True) + # check that correctly returns que protein sequence + test_sequence(input="REKH", + output=["R", "E", "K", "H"]) + test_sequence(input="rekh", + output=["R", "E", "K", "H"]) + test_sequence(input="R-E-K-H", + output=["R", "E", "K", "H"]) + test_sequence(input="r-e-k-h", + output=["R", "E", "K", "H"]) + test_sequence(input="ARG-GLU-LYS-HIS", + output=["R", "E", "K", "H"]) + test_sequence(input="arg-glu-lys-his", + output=["R", "E", "K", "H"]) + test_sequence(input=["R","E", "K", "H"], + output=["R", "E", "K", "H"]) + test_sequence(input=["r","e", "k", "h"], + output=["R", "E", "K", "H"]) + test_sequence(input=["ARG","GLU", "LYS", "HIS"], + output=["R", "E", "K", "H"]) + test_sequence(input=["arg","glu", "lys", "his"], + output=["R", "E", "K", "H"]) + + # check that protein_sequence_parser() raises a ValueError if a wrong residue key is provided + input_parameters = {"sequence":"rekx"} + np.testing.assert_raises(ValueError, + hf.protein_sequence_parser, + **input_parameters) + input_parameters = {"sequence":"ARG-GLU-TUR-HIS"} + np.testing.assert_raises(ValueError, + hf.protein_sequence_parser, + **input_parameters) + input_parameters = {"sequence":"A-E-E-X"} + np.testing.assert_raises(ValueError, + hf.protein_sequence_parser, + **input_parameters) + input_parameters = {"sequence":"a-e-e-x"} + np.testing.assert_raises(ValueError, + hf.protein_sequence_parser, + **input_parameters) + input_parameters = {"sequence":["A", "E","X"]} + np.testing.assert_raises(ValueError, + hf.protein_sequence_parser, + **input_parameters) + def test_check_aminoacid_key(self): + """ + Unit tests for check_aminoacid_key() + """ + # Check that check_aminoacid_key returns True for any latter valid in the one letter amino acid code + valid_AA_keys=['V', 'I', 'L', 'E', 'Q', 'D', 'N', 'H', 'W', 'F', 'Y', 'R', 'K', 'S', 'T', 'M', 'A', 'G', 'P', 'C'] + for key in valid_AA_keys: + np.testing.assert_equal(actual=hf.check_aminoacid_key(key=key), + desired=True, + verbose=True) + + # Check that check_aminoacid_key returns False for a key not valid in the one letter amino acid code + np.testing.assert_equal(actual=hf.check_aminoacid_key(key="B"), + desired=False, verbose=True) - -print("*** Unit test passed ***") - -print("*** Unit test: check that search_particles_in_residue() returns the correct list of residues for nested residues case") - -pmb.define_particle( - name = "I", - sigma = 1*pmb.units('reduced_length'), - epsilon = 1*pmb.units('reduced_energy')) - -# Acidic particle -pmb.define_particle( - name = "A", - sigma = 1*pmb.units('reduced_length'), - epsilon = 1*pmb.units('reduced_energy')) - -# Basic particle -pmb.define_particle( - name = "B", - sigma = 1*pmb.units('reduced_length'), - epsilon = 1*pmb.units('reduced_energy')) - -pmb.define_residue( - name = "Res_1", - central_bead = "I", - side_chains = ["A","B"]) - -pmb.define_residue( - name = "Res_2", - central_bead = "I", - side_chains = ["Res_1"]) - -list_of_particles_in_residue= pmb.search_particles_in_residue(residue_name = "Res_2") -np.testing.assert_equal(actual=list_of_particles_in_residue, - desired=['I', 'I', 'A', 'B'], + def test_metal_functions(self): + """ + Unit tests for the helpers for the protein metal ions + """ + # Check that check_if_metal_ion returns True for any key corresponding to a supported metal ion + for key in hf.get_metal_ions_charge_number_map().keys(): + np.testing.assert_equal(actual=hf.check_if_metal_ion(key=key), + desired=True, + verbose=True) + # Check that check_if_metal_ion returns False for a key not corresponding to a supported metal ion + np.testing.assert_equal(actual=hf.check_if_metal_ion(key="B"), + desired=False, verbose=True) + # Check that get_metal_ions_charge_number_map returns the correct charge map for metals + metal_charge_number_map = {"Ca": 2} + pmb_metal_charge_number_map = hf.get_metal_ions_charge_number_map() -print("*** Unit test passed ***") + np.testing.assert_equal(actual=pmb_metal_charge_number_map, + desired=metal_charge_number_map, + verbose=True) -print("*** Unit test: Check that create_protein() does not create any protein for an undefined protein name ***") -starting_number_of_particles=len(espresso_system.part.all()) -pmb.create_protein(name="undefined_protein", - number_of_proteins=1, - espresso_system=espresso_system, - topology_dict=topology_dict) -np.testing.assert_equal(actual=len(espresso_system.part.all()), - desired=starting_number_of_particles, - verbose=True) -print("*** Unit test passed ***") + def test_define_protein_AA_residues(self): + """ + Unit test for define_protein_AA_residues + """ + valid_protein_model = ['1beadAA', + '2beadAA'] + test_sequence = ['c','n', 'G','V', 'I', 'L', 'E', 'Q', 'D', 'N', 'H', 'W', 'F', 'Y', 'R', 'K', 'S', 'T', 'M', 'A', 'G', 'P', 'C' ] + + valid_protein_model = ['1beadAA', '2beadAA'] + + output =['AA-c', 'AA-n', 'AA-G', 'AA-V', 'AA-I', 'AA-L', 'AA-E', 'AA-Q', 'AA-D', 'AA-N', 'AA-H', 'AA-W', 'AA-F', 'AA-Y', 'AA-R', 'AA-K', 'AA-S', 'AA-T', 'AA-M', 'AA-A', 'AA-G', 'AA-P', 'AA-C'] + test_pmb = pyMBE.pymbe_library(23) + for protein_model in valid_protein_model: + pmb_residue_list = hf.define_protein_AA_residues(sequence=test_sequence, + model = protein_model, + pmb=test_pmb) + np.testing.assert_equal(actual=pmb_residue_list, + desired=output, + verbose=True) + test_pmb.db.delete_templates(pmb_type="residue") + + def test_define_peptide_sanity(self): + """ + Sanity tests for define_peptide + """ + # check that define_peptide() raises a ValueError if a wrong model key is provided + input_parameters = {"name": "generic_peptide", + "sequence": "EEEEEEE", + "model": "3beadAA" } + np.testing.assert_raises(ValueError, + pmb.define_peptide, + **input_parameters) + input_parameters = {"name": "generic_peptide", + "sequence": "EEEEEEE", + "model": "beadAA" } + np.testing.assert_raises(ValueError, + pmb.define_peptide, + **input_parameters) + +if __name__ == "__main__": + ut.main() \ No newline at end of file diff --git a/testsuite/parameter_test.py b/testsuite/parameter_test.py index 8b37d51..e36242e 100644 --- a/testsuite/parameter_test.py +++ b/testsuite/parameter_test.py @@ -65,9 +65,9 @@ def test_sanity_check_pka_set(self): Check that check_pka_set raises a ValueError if data is missing important fields """ pmb = pyMBE.pymbe_library(seed=42) - np.testing.assert_raises(ValueError, pmb.check_pka_set, {"name" : {}}) - np.testing.assert_raises(ValueError, pmb.check_pka_set, {"name" : {"pka_value": 1.}}) - np.testing.assert_raises(ValueError, pmb.check_pka_set, {"name" : {"acidity": 1.}}) + np.testing.assert_raises(ValueError, pmb._check_pka_set, {"name" : {}}) + np.testing.assert_raises(ValueError, pmb._check_pka_set, {"name" : {"pka_value": 1.}}) + np.testing.assert_raises(ValueError, pmb._check_pka_set, {"name" : {"acidity": 1.}}) if __name__ == "__main__": ut.main() \ No newline at end of file From 334bb85405cca8cf2e4e194f85a75f7bddfaeebb Mon Sep 17 00:00:00 2001 From: Pablo Date: Sat, 24 Jan 2026 17:05:02 +0100 Subject: [PATCH 34/55] adapt cpH method --- pyMBE/pyMBE.py | 69 +++++++++------- pyMBE/storage/io.py | 1 + pyMBE/storage/manager.py | 46 ++++++++--- pyMBE/storage/reactions/reaction.py | 120 +++++++++------------------- testsuite/lattice_builder.py | 90 ++++++++------------- testsuite/test_io_database.py | 4 +- 6 files changed, 148 insertions(+), 182 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 60070d6..bd27db1 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -238,7 +238,7 @@ def _create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system, use_def node_end_label = self.lattice_builder._create_node_label(node_end) _, reverse = self.lattice_builder._get_node_vector_pair(node_start, node_end) if node_start != node_end or residue_list == residue_list[::-1]: - RuntimeError(f"Aborted creation because hydrogel chain between '{node_start}' and '{node_end}' because pyMBE could not resolve a unique topology for that chain") + ValueError(f"Aborted creation of hydrogel chain between '{node_start}' and '{node_end}' because pyMBE could not resolve a unique topology for that chain") if reverse: residue_list = residue_list[::-1] start_node_id = nodes[node_start_label]["id"] @@ -303,7 +303,6 @@ def _create_hydrogel_node(self, node_index, node_name, espresso_system): """ if self.lattice_builder is None: raise ValueError("LatticeBuilder is not initialized. Use `initialize_lattice_builder` first.") - node_position = np.array(node_index)*0.25*self.lattice_builder.box_l p_id = self.create_particle(name = node_name, espresso_system=espresso_system, @@ -1403,8 +1402,8 @@ def define_monoprototic_acidbase_reaction(self, particle_name, pka, acidity, met state_name=f"{particle_name}H", coefficient=-1), ReactionParticipant(particle_name=particle_name, - state_name=f"{particle_name}", - coefficient=1)], + state_name=f"{particle_name}", + coefficient=1)], reaction_type=reaction_type, pK=pka, metadata=metadata) @@ -2431,7 +2430,7 @@ def set_reduced_units(self, unit_length=None, unit_charge=None, temperature=None self.units.define(f'reduced_charge = {unit_charge}') logging.info(self.get_reduced_units()) - def setup_cpH (self, counter_ion, constant_pH, exclusion_range=None, pka_set=None, use_exclusion_radius_per_type = False): + def setup_cpH (self, counter_ion, constant_pH, exclusion_range=None, use_exclusion_radius_per_type = False): """ Sets up the Acid/Base reactions for acidic/basic `particles` defined in `pmb.df` to be sampled in the constant pH ensemble. @@ -2440,7 +2439,6 @@ def setup_cpH (self, counter_ion, constant_pH, exclusion_range=None, pka_set=Non constant_pH(`float`): pH-value. exclusion_range(`pint.Quantity`, optional): Below this value, no particles will be inserted. use_exclusion_radius_per_type(`bool`,optional): Controls if one exclusion_radius for each espresso_type is used. Defaults to `False`. - pka_set(`dict`,optional): Desired pka_set, pka_set(`dict`): {"name" : {"pka_value": pka, "acidity": acidity}}. Defaults to None. Returns: RE(`reaction_methods.ConstantpHEnsemble`): Instance of a reaction_methods.ConstantpHEnsemble object from the espressomd library. @@ -2449,38 +2447,47 @@ def setup_cpH (self, counter_ion, constant_pH, exclusion_range=None, pka_set=Non from espressomd import reaction_methods if exclusion_range is None: exclusion_range = max(self.get_radius_map().values())*2.0 - if pka_set is None: - pka_set=self.get_pka_set() - self._check_pka_set(pka_set=pka_set) if use_exclusion_radius_per_type: exclusion_radius_per_type = self.get_radius_map() else: exclusion_radius_per_type = {} - RE = reaction_methods.ConstantpHEnsemble(kT=self.kT.to('reduced_energy').magnitude, - exclusion_range=exclusion_range, - seed=self.seed, - constant_pH=constant_pH, - exclusion_radius_per_type = exclusion_radius_per_type - ) - sucessfull_reactions_labels=[] - charge_number_map = self.get_charge_number_map() - for name in pka_set.keys(): - if not _DFm._check_if_name_is_defined_in_df(name=name, df=self.df): - logging.warning(f'The acid-base reaction of {name} has not been set up because its particle type is not defined in the pyMBE DataFrame.') + exclusion_range=exclusion_range, + seed=self.seed, + constant_pH=constant_pH, + exclusion_radius_per_type = exclusion_radius_per_type) + conterion_tpl = self.db.get_template(name=counter_ion, + pmb_type="particle") + conterion_state = self.db.get_template(name=conterion_tpl.initial_state, + pmb_type="particle_state") + for reaction in self.db.get_reactions(): + if reaction.reaction_type not in ["monoprotic_acid", "monoprotic_base"]: continue - gamma=10**-pka_set[name]['pka_value'] - state_one_type = self.df.loc[self.df['name']==name].state_one.es_type.values[0] - state_two_type = self.df.loc[self.df['name']==name].state_two.es_type.values[0] - counter_ion_type = self.df.loc[self.df['name']==counter_ion].state_one.es_type.values[0] + default_charges = {} + reactant_types = [] + product_types = [] + for participant in reaction.participants: + state_tpl = self.db.get_template(name=participant.state_name, + pmb_type="particle_state") + default_charges[state_tpl.es_type] = state_tpl.z + if participant.coefficient < 0: + reactant_types.append(state_tpl.es_type) + elif participant.coefficient > 0: + product_types.append(state_tpl.es_type) + # Add counterion to the products + if conterion_state.es_type not in product_types: + product_types.append(conterion_state.es_type) + default_charges[conterion_state.es_type] = conterion_state.z + reaction.add_participant(particle_name=counter_ion, + state_name=conterion_tpl.initial_state, + coefficient=1) + gamma=10**-reaction.pK RE.add_reaction(gamma=gamma, - reactant_types=[state_one_type], - product_types=[state_two_type, counter_ion_type], - default_charges={state_one_type: charge_number_map[state_one_type], - state_two_type: charge_number_map[state_two_type], - counter_ion_type: charge_number_map[counter_ion_type]}) - sucessfull_reactions_labels.append(name) - return RE, sucessfull_reactions_labels + reactant_types=reactant_types, + product_types=product_types, + default_charges=default_charges) + reaction.add_simulation_method(simulation_method="cpH") + return RE def setup_gcmc(self, c_salt_res, salt_cation_name, salt_anion_name, activity_coefficient, exclusion_range=None, use_exclusion_radius_per_type = False): """ diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py index 6afb31b..917d6c8 100644 --- a/pyMBE/storage/io.py +++ b/pyMBE/storage/io.py @@ -329,6 +329,7 @@ def _load_database_csv(db, folder): participants=participants, pK=float(row["pK"]) if (row.get("pK") not in (None, "", "nan")) else None, reaction_type=row.get("reaction_type", None), + simulation_method=row.get("simulation_method", None), metadata=metadata) db._reactions[rx.name] = rx diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index 95da404..22dec53 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -334,13 +334,12 @@ def _get_reactions_df(self): f"{p.state_name}": p.coefficient for p in r.participants } - rows.append({ - "reaction": r.name, - "stoichiometry": stoich, - "pK": r.pK, - "reaction_type": r.reaction_type, - "metadata": r.metadata, - }) + rows.append({"reaction": r.name, + "stoichiometry": stoich, + "pK": r.pK, + "reaction_type": r.reaction_type, + "metadata": r.metadata, + "simulation_method": r.simulation_method}) return pd.DataFrame(rows) def _get_templates_df(self, pmb_type): @@ -966,6 +965,31 @@ def get_instances(self, pmb_type): """ return self._instances.get(pmb_type, {}).copy() + def get_reaction(self, name): + """ + Retrieve a reaction stored in the pyMBE database by name. + + Args: + name ('str'): The unique id of the reaction to retrieve. + + Returns: + 'Reaction': The stored reaction instance corresponding to the provided name. + + """ + if name not in self._reactions[name]: + raise ValueError(f"Reaction '{name}' not found in the pyMBE database.") + else: + return self._reactions[name] + + def get_reactions(self): + """ + Retrieve all reactions stored in the pyMBE database. + + Returns: + 'list of Reaction': List with all stored reaction instances. + """ + return list(self._reactions.values()) + def get_particle_templates_under(self, template_name, pmb_type=None, return_counts=False): """ Returns the names of all particle templates associated with a given @@ -1148,16 +1172,14 @@ def get_particle_states_templates(self, particle_name): Retrieve all particle state templates associated with a given particle. Args: - particle_name (str): Name of the particle template. + particle_name ('str'): + Name of the particle template. Returns: - Dict[str, ParticleState]: + 'Dict[str, ParticleState]': Dictionary mapping state names to `ParticleState` templates. """ states = self._templates.get("particle_state", {}) - particle_states = {state.name: state for state in states.values() if state.particle_name == particle_name} - if not particle_states: - raise ValueError(f"No particle states registered for particle '{particle_name}'.") return particle_states diff --git a/pyMBE/storage/reactions/reaction.py b/pyMBE/storage/reactions/reaction.py index ac2969f..325108a 100644 --- a/pyMBE/storage/reactions/reaction.py +++ b/pyMBE/storage/reactions/reaction.py @@ -53,53 +53,34 @@ class Reaction(BaseModel): """ Defines a chemical reaction between particle states. - A ``Reaction`` object captures the stoichiometry and thermodynamic - properties of a chemical equilibrium. - This can represent phenomena such as acid–base reactionsor any multi-species reaction scheme - supported by the simulation engine. - Attributes: - name (str): + name ('str'): Unique identifier for the reaction. - participants (List[ReactionParticipant]): + + participants ('List[ReactionParticipant]'): List of reactants and products with stoichiometric coefficients. Must include at least two participants. - pK (float): + + pK ('float'): Reaction equilibrium parameter (e.g., pKa, log K). The meaning depends on ``reaction_type``. - reaction_type (str): + + reaction_type ('str'): A categorical descriptor of the reaction, such as ``"acid_base"`` - metadata (Optional[Dict]): + + simulation_method ('str', optional): + Simulation method used to study the reaction. + + metadata ('dict', optional): Optional free-form metadata for additional reaction details, notes, or model-specific configuration. - - Validation: - - At least one participant are required. - - All participants must have non-zero stoichiometric coefficients. - - Examples: - Acid dissociation of HA: - HA ↔ H⁺ + A⁻ - - Represented as: - Reaction( - name="acid_dissociation", - participants=[ - ReactionParticipant("A", "HA", -1), - ReactionParticipant("A", "A-", 1), - ReactionParticipant("H", "H+", 1), - ], - pK=4.75, - reaction_type="acid_base", - ) """ - participants: List[ReactionParticipant] - pK: float = Field(..., description="pKa, logK, eq constant, etc.") - reaction_type: str = Field(..., description="acid_base, binding, redox, ...") + pK: float + reaction_type: str metadata: Optional[Dict] = None - - name: str = Field(default="", description="Automatically generated reaction name") + simulation_method: Optional[str] = None + name: Optional[str] = None @model_validator(mode="after") def generate_name(self): @@ -127,68 +108,43 @@ def generate_name(self): @field_validator("participants") def at_least_two_participants(cls, v): if len(v) < 2: - raise ValueError("A reaction must have at least 1 participant.") + raise ValueError("A reaction must have at least 2 participants.") return v @field_validator("participants") def no_zero_coeff(cls, v): for p in v: if p.coefficient == 0: - raise ValueError(f"Participant {p.name} has coefficient 0.") + raise ValueError(f"Participant {p.state_name} has coefficient 0.") return v def add_participant(self, particle_name, state_name, coefficient): """ Add a new reaction participant to the reaction. - Creates a new :class:`ReactionParticipant` with the provided particle name, - state name and stoichiometric coefficient, and returns an updated - :class:`Reaction` instance containing the additional participant. - - The reaction object itself is not modified in place. Instead, a new - validated copy is returned, following Pydantic's immutable data model - best practices. -d Args: - particle_name (str): + particle_name ('str'): Name of the particle participating in the reaction. - state_name (str): - Specific state of the particle (e.g., protonation or charge state). - coefficient (int): - Stoichiometric coefficient for the participant: - - ``coefficient < 0`` → reactant - - ``coefficient > 0`` → product - Coefficients equal to zero are not allowed. - - Returns: - Reaction: - A new :class:`Reaction` object with the participant added. - - Raises: - ValueError: - If ``coefficient`` is zero. - - Examples: - >>> rxn = Reaction( - ... name="acid_dissociation", - ... participants=[ - ... ReactionParticipant("A", "HA", -1), - ... ReactionParticipant("A", "A-", 1), - ... ], - ... pK=4.7, - ... reaction_type="acid_base", - ... ) - >>> rxn = rxn.add_participant("H", "H+", 1) + state_name ('str'): + Specific state of the particle. + coefficient ('int'): + Stoichiometric coefficient for the participant. + """ if coefficient == 0: raise ValueError("Stoichiometric coefficient cannot be zero.") - - new_participant = ReactionParticipant( - particle_name=particle_name, - state_name=state_name, - coefficient=coefficient, - ) - - new_reaction = self.model_copy(update={"participants": self.participants + [new_participant]}) + new_participant = ReactionParticipant(particle_name=particle_name, + state_name=state_name, + coefficient=coefficient) + self.participants.append(new_participant) + self.generate_name() + - return new_reaction.generate_name() + def add_simulation_method(self, simulation_method): + """ + Adds which simulation is used to simulate the reaction + + Args: + simulation_method ('str'): label of the simulation method + """ + self.simulation_method = simulation_method \ No newline at end of file diff --git a/testsuite/lattice_builder.py b/testsuite/lattice_builder.py index 1f91e1d..551ce7e 100644 --- a/testsuite/lattice_builder.py +++ b/testsuite/lattice_builder.py @@ -100,9 +100,16 @@ def setUpClass(cls): def test_lattice_setup(self): diamond = pyMBE.lib.lattice.DiamondLattice(mpc, bond_l) espresso_system = espressomd.System(box_l = [diamond.box_l]*3) - pmb.add_bonds_to_espresso(espresso_system = espresso_system) - np.testing.assert_raises(ValueError, pmb.create_hydrogel_node, "[1 1 1]", NodeType1, espresso_system) - np.testing.assert_raises(ValueError, pmb.create_hydrogel_chain, "[0 0 0]", "[1 1 1]", {0:[0,0,0],1:diamond.box_l/4.0*np.ones(3)},espresso_system) + np.testing.assert_raises(ValueError, + pmb._create_hydrogel_node, + "[1 1 1]", + NodeType1, + espresso_system) + np.testing.assert_raises(ValueError, + pmb._create_hydrogel_chain, + "[0 0 0]", "[1 1 1]", + {0:[0,0,0],1:diamond.box_l/4.0*np.ones(3)}, + espresso_system) lattice = pmb.initialize_lattice_builder(diamond) sequence = [Res3, Res1, Res2, Res1] # build default structure @@ -125,58 +132,31 @@ def test_lattice_setup(self): assert lattice.get_node("[0 0 0]") == "default_linker" # Change default node type lattice.set_node(node="[1 1 1]", residue=NodeType1) - np.testing.assert_equal(actual = lattice.get_node("[1 1 1]"), desired = NodeType1, verbose=True) - - pos_node1 = pmb.create_hydrogel_node("[1 1 1]", NodeType1, espresso_system=espresso_system) - np.testing.assert_equal(actual = lattice.get_node("[1 1 1]"), desired = NodeType1, verbose=True) - pos_node2 = pmb.create_hydrogel_node("[0 0 0]", NodeType2, espresso_system=espresso_system) - np.testing.assert_equal(actual = lattice.get_node("[0 0 0]"), desired = NodeType2, verbose=True) - pos_node3 = pmb.create_hydrogel_node("[2 2 0]", NodeType2, espresso_system=espresso_system) - np.testing.assert_equal(actual = lattice.get_node("[2 2 0]"), desired = NodeType2, verbose=True) - _,_ = pmb.create_hydrogel_node("[3 1 3]", NodeType1, espresso_system=espresso_system) - np.testing.assert_equal(actual = lattice.get_node("[3 1 3]"), desired = NodeType1, verbose=True) - - node_positions={} - node1_label = lattice.node_labels["[1 1 1]"] - node_positions[node1_label]=pos_node1[0] - node2_label = lattice.node_labels["[0 0 0]"] - node_positions[node2_label]=pos_node2[0] - node3_label = lattice.node_labels["[2 2 0]"] - node_positions[node3_label]=pos_node3[0] - - # define molecule in forward direction - molecule_name = "chain_[1 1 1]_[0 0 0]" - pmb.define_molecule(name=molecule_name, residue_list=sequence) - pmb.create_hydrogel_chain("[1 1 1]", "[0 0 0]", node_positions, espresso_system=espresso_system) - np.testing.assert_equal(actual = lattice.get_chain("[1 1 1]", "[0 0 0]"), desired = sequence, verbose=True) - np.testing.assert_equal(actual = lattice.get_chain("[0 0 0]", "[1 1 1]"), desired = sequence[::-1], verbose=True) - # set chain before set node - molecule_name = "chain_[3 1 3]_[0 0 0]" - pmb.define_molecule(name=molecule_name, residue_list=sequence) - np.testing.assert_raises(ValueError, pmb.create_hydrogel_chain, "[3 1 3]", "[0 0 0]", node_positions, espresso_system) - - # define custom chain in reverse direction - molecule_name = "chain_[0 0 0]_[1 1 1]" - pmb.define_molecule(name=molecule_name, residue_list=sequence) - pmb.create_hydrogel_chain("[0 0 0]", "[1 1 1]", node_positions, espresso_system=espresso_system) - np.testing.assert_equal(lattice.get_chain("[1 1 1]", "[0 0 0]"), sequence[::-1]) - np.testing.assert_equal(lattice.get_chain("[0 0 0]", "[1 1 1]"), sequence) - - ####---Raise Exceptions---#### - # define custom chain between normally unconnected nodes - molecule_name = "chain_[0 0 0]_[2 2 0]" - pmb.define_molecule(name=molecule_name, residue_list=sequence) - np.testing.assert_raises(AssertionError, - pmb.create_hydrogel_chain, - "[0 0 0]", "[2 2 0]", node_positions, espresso_system=espresso_system) - - # define custom chain that loops - molecule_name = "chain_[0 0 0]_[0 0 0]" - pmb.define_molecule(name=molecule_name, residue_list=sequence) - np.testing.assert_raises(AssertionError, - pmb.create_hydrogel_chain, - "[0 0 0]", "[0 0 0]", node_positions, espresso_system=espresso_system) - + lattice.set_node(node="[0 0 0]", residue=NodeType2) + np.testing.assert_equal(actual = lattice.get_node("[1 1 1]"), + desired = NodeType1, + verbose=True) + + np.testing.assert_equal(actual = lattice.get_node("[1 1 1]"), + desired = NodeType1, + verbose=True) + np.testing.assert_equal(actual = lattice.get_node("[0 0 0]"), + desired = NodeType2, + verbose=True) + np.testing.assert_equal(actual = lattice.get_node("[2 2 0]"), + desired = "default_linker", + verbose=True) + np.testing.assert_equal(actual = lattice.get_node("[3 1 3]"), + desired = "default_linker", + verbose=True) + + np.testing.assert_equal(actual = lattice.get_chain("[1 1 1]", "[0 0 0]"), + desired = sequence, + verbose=True) + np.testing.assert_equal(actual = lattice.get_chain("[0 0 0]", "[1 1 1]"), + desired = sequence[::-1], + verbose=True) + lattice.set_colormap(self.colormap) for index, (label, color) in enumerate(self.colormap.items()): np.testing.assert_equal(actual = lattice.get_monomer_color(label),desired = color, verbose=True) diff --git a/testsuite/test_io_database.py b/testsuite/test_io_database.py index 0ee1613..1cedef3 100644 --- a/testsuite/test_io_database.py +++ b/testsuite/test_io_database.py @@ -382,13 +382,13 @@ def test_io_instances(self): pmb.db.delete_reactions() # Test instances of a protein (tests protein, residue and particle instances) path_to_protein_structure = pmb.root / "parameters" / "globular_proteins" / f"1beb.vtf", - topology_dict, _ = pmb.read_protein_vtf (filename=path_to_protein_structure[0]) + topology_dict, sequence = pmb.read_protein_vtf (filename=path_to_protein_structure[0]) pmb.load_pka_set(filename=path_to_pka) # Define AA particles and residues hf.define_protein_AA_particles(topology_dict=topology_dict, pmb=pmb, pka_set=pka_set) - hf.define_protein_AA_residues(topology_dict=topology_dict, + hf.define_protein_AA_residues(sequence=sequence, model="2beadAA", pmb=pmb) pmb.define_protein(name="1beb", From 66bc9c2ab57be3c6ac557ee8eb6f6a13bec3e215 Mon Sep 17 00:00:00 2001 From: Pablo Date: Sun, 25 Jan 2026 16:54:43 +0100 Subject: [PATCH 35/55] adapt reaction methods to the new database, fix docs --- pyMBE/pyMBE.py | 1180 +++++++++++++--------- testsuite/reaction_methods_unit_tests.py | 238 ++--- 2 files changed, 809 insertions(+), 609 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index bd27db1..1ccb643 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -74,7 +74,7 @@ class pymbe_library(): units ('pint.UnitRegistry'): Pint unit registry used for unit-aware calculations. lattice_builder: - Optional lattice builder object (initialized as ``None``). + Optional lattice builder object (initialized as ''None''). root ('importlib.resources.abc.Traversable'): Root path to the pyMBE package resources. """ @@ -87,15 +87,15 @@ def __init__(self, seed, temperature=None, unit_length=None, unit_charge=None, K seed ('int'): Seed for the random number generator. temperature ('pint.Quantity', optional): - Simulation temperature. If ``None``, defaults to 298.15 K. + Simulation temperature. If ''None'', defaults to 298.15 K. unit_length ('pint.Quantity', optional): - Reference length for reduced units. If ``None``, defaults to + Reference length for reduced units. If ''None'', defaults to 0.355 nm. unit_charge ('pint.Quantity', optional): - Reference charge for reduced units. If ``None``, defaults to + Reference charge for reduced units. If ''None'', defaults to one elementary charge. Kw ('pint.Quantity', optional): - Ionic product of water (typically in mol²/L²). If ``None``, + Ionic product of water (typically in mol²/L²). If ''None'', defaults to 1e-14 mol²/L². """ # Seed and RNG @@ -119,8 +119,8 @@ def _check_bond_inputs(self, bond_type, bond_parameters): Checks that the input bond parameters are valid within the current pyMBE implementation. Args: - bond_type(`str`): label to identify the potential to model the bond. - bond_parameters(`dict`): parameters of the potential of the bond. + bond_type('str'): label to identify the potential to model the bond. + bond_parameters('dict'): parameters of the potential of the bond. """ valid_bond_types = ["harmonic", "FENE"] if bond_type not in valid_bond_types: @@ -133,18 +133,18 @@ def _check_bond_inputs(self, bond_type, bond_parameters): def _check_dimensionality(self, variable, expected_dimensionality): """ - Checks if the dimensionality of `variable` matches `expected_dimensionality`. + Checks if the dimensionality of 'variable' matches 'expected_dimensionality'. Args: - variable(`pint.Quantity`): Quantity to be checked. - expected_dimensionality(`str`): Expected dimension of the variable. + variable('pint.Quantity'): Quantity to be checked. + expected_dimensionality('str'): Expected dimension of the variable. Returns: - (`bool`): `True` if the variable if of the expected dimensionality, `False` otherwise. + ('bool'): 'True' if the variable if of the expected dimensionality, 'False' otherwise. Note: - - `expected_dimensionality` takes dimensionality following the Pint standards [docs](https://pint.readthedocs.io/en/0.10.1/wrapping.html?highlight=dimensionality#checking-dimensionality). - - For example, to check for a variable corresponding to a velocity `expected_dimensionality = "[length]/[time]"` + - 'expected_dimensionality' takes dimensionality following the Pint standards [docs](https://pint.readthedocs.io/en/0.10.1/wrapping.html?highlight=dimensionality#checking-dimensionality). + - For example, to check for a variable corresponding to a velocity 'expected_dimensionality = "[length]/[time]"' """ correct_dimensionality=variable.check(f"{expected_dimensionality}") if not correct_dimensionality: @@ -153,10 +153,10 @@ def _check_dimensionality(self, variable, expected_dimensionality): def _check_pka_set(self, pka_set): """ - Checks that `pka_set` has the formatting expected by pyMBE. + Checks that 'pka_set' has the formatting expected by pyMBE. Args: - pka_set (`dict`): + pka_set ('dict'): {"name" : {"pka_value": pka, "acidity": acidity}} """ required_keys=['pka_value','acidity'] @@ -171,24 +171,24 @@ def _create_espresso_bond_instance(self, bond_type, bond_parameters): Creates an ESPResSo bond instance. Args: - bond_type(`str`): label to identify the potential to model the bond. - bond_parameters(`dict`): parameters of the potential of the bond. + bond_type('str'): label to identify the potential to model the bond. + bond_parameters('dict'): parameters of the potential of the bond. Note: Currently, only HARMONIC and FENE bonds are supported. For a HARMONIC bond the dictionary must contain: - - k (`Pint.Quantity`) : Magnitude of the bond. It should have units of energy/length**2 - using the `pmb.units` UnitRegistry. - - r_0 (`Pint.Quantity`) : Equilibrium bond length. It should have units of length using - the `pmb.units` UnitRegistry. + - k ('Pint.Quantity') : Magnitude of the bond. It should have units of energy/length**2 + using the 'pmb.units' UnitRegistry. + - r_0 ('Pint.Quantity') : Equilibrium bond length. It should have units of length using + the 'pmb.units' UnitRegistry. For a FENE bond the dictionary must additionally contain: - - d_r_max (`Pint.Quantity`): Maximal stretching length for FENE. It should have - units of length using the `pmb.units` UnitRegistry. Default 'None'. + - d_r_max ('Pint.Quantity'): Maximal stretching length for FENE. It should have + units of length using the 'pmb.units' UnitRegistry. Default 'None'. Returns: - (`espressomd.interactions`): instance of an ESPResSo bond object + ('espressomd.interactions'): instance of an ESPResSo bond object """ from espressomd import interactions self._check_bond_inputs(bond_parameters=bond_parameters, @@ -223,11 +223,11 @@ def _create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system, use_def molecule_id of the created hydrogel chian. Note: - - If the chain is defined between node_start = ``[0 0 0]`` and node_end = ``[1 1 1]``, the chain will be placed between these two nodes. - - The chain will be placed in the direction of the vector between `node_start` and `node_end`. + - If the chain is defined between node_start = ''[0 0 0]'' and node_end = ''[1 1 1]'', the chain will be placed between these two nodes. + - The chain will be placed in the direction of the vector between 'node_start' and 'node_end'. """ if self.lattice_builder is None: - raise ValueError("LatticeBuilder is not initialized. Use `initialize_lattice_builder` first.") + raise ValueError("LatticeBuilder is not initialized. Use 'initialize_lattice_builder' first.") molecule_tpl = self.db.get_template(pmb_type="molecule", name=hydrogel_chain.molecule_name) residue_list = molecule_tpl.residue_list @@ -293,16 +293,16 @@ def _create_hydrogel_node(self, node_index, node_name, espresso_system): Set a node residue type. Args: - node_index(`str`): Lattice node index in the form of a string, e.g. "[0 0 0]". - node_name(`str`): name of the node particle defined in pyMBE. + node_index('str'): Lattice node index in the form of a string, e.g. "[0 0 0]". + node_name('str'): name of the node particle defined in pyMBE. espresso_system (espressomd.system.System): ESPResSo system object where the hydrogel node will be created. Returns: - node_position(`list`): Position of the node in the lattice. - p_id(`int`): Particle ID of the node. + node_position('list'): Position of the node in the lattice. + p_id('int'): Particle ID of the node. """ if self.lattice_builder is None: - raise ValueError("LatticeBuilder is not initialized. Use `initialize_lattice_builder` first.") + raise ValueError("LatticeBuilder is not initialized. Use 'initialize_lattice_builder' first.") node_position = np.array(node_index)*0.25*self.lattice_builder.box_l p_id = self.create_particle(name = node_name, espresso_system=espresso_system, @@ -317,11 +317,11 @@ def _get_label_id_map(self, pmb_type): Returns the key used to access the particle ID map for a given pyMBE object type. Args: - pmb_type (`str`): + pmb_type ('str'): pyMBE object type for which the particle ID map label is requested. Returns: - `str`: + 'str': Label identifying the appropriate particle ID map. """ if pmb_type in self.db._assembly_like_types: @@ -334,15 +334,15 @@ def _get_label_id_map(self, pmb_type): def _get_residue_list_from_sequence(self, sequence): """ - Convenience function to get a `residue_list` from a protein or peptide `sequence`. + Convenience function to get a 'residue_list' from a protein or peptide 'sequence'. Args: - sequence (`lst`): + sequence ('lst'): Sequence of the peptide or protein. Returns: - residue_list (`list` of `str`): - List of the `name`s of the `residue`s in the sequence of the `molecule`. + residue_list ('list' of 'str'): + List of the 'name's of the 'residue's in the sequence of the 'molecule'. """ residue_list = [] for item in sequence: @@ -408,13 +408,13 @@ def calculate_center_of_mass(self, instance_id, pmb_type, espresso_system): pyMBE instance ID of the object whose center of mass is calculated. pmb_type ('str'): Type of the pyMBE object. Must correspond to a particle-aggregating - template type (e.g. `"molecule"`, `"residue"`, `"peptide"`, `"protein"`). + template type (e.g. '"molecule"', '"residue"', '"peptide"', '"protein"'). espresso_system ('espressomd.system.System'): ESPResSo system containing the particle instances. Returns: center_of_mass ('numpy.ndarray'): - Array of shape `(3,)` containing the Cartesian coordinates of the + Array of shape '(3,)' containing the Cartesian coordinates of the center of mass. Notes: @@ -493,49 +493,49 @@ def calculate_HH_Donnan(self, c_macro, c_salt, pH_list=None, pka_set=None): coupled to ideal Donnan partitioning. Args: - c_macro (`dict`): + c_macro ('dict'): Mapping of macromolecular species names to their concentrations in the system: - `{molecule_name: concentration}`. + '{molecule_name: concentration}'. Concentrations must carry units compatible with molar concentration. - c_salt (`float` or `pint.Quantity`): + c_salt ('float' or 'pint.Quantity'): Salt concentration in the reservoir. - pH_list (`list[float]`, optional): + pH_list ('list[float]', optional): List of pH values in the reservoir at which the calculation is - performed. If `None`, 50 equally spaced values between 2 and 12 + performed. If 'None', 50 equally spaced values between 2 and 12 are used. - pka_set (`dict`, optional): + pka_set ('dict', optional): Dictionary defining the acid–base properties of titratable particle types: - `{particle_name: {"pka_value": float, "acidity": "acidic" | "basic"}}`. - If `None`, the pKa set is taken from the pyMBE database. + '{particle_name: {"pka_value": float, "acidity": "acidic" | "basic"}}'. + If 'None', the pKa set is taken from the pyMBE database. Returns: - `dict`: + 'dict': Dictionary containing: - - `"charges_dict"` (`dict`): - Mapping `{molecule_name: list}` of Henderson–Hasselbalch–Donnan + - '"charges_dict"' ('dict'): + Mapping '{molecule_name: list}' of Henderson–Hasselbalch–Donnan charges evaluated at each pH value. - - `"pH_system_list"` (`list[float]`): + - '"pH_system_list"' ('list[float]'): Effective pH values inside the system phase after Donnan partitioning. - - `"partition_coefficients"` (`list[float]`): + - '"partition_coefficients"' ('list[float]'): Partition coefficients of monovalent cations at each pH value. Raises: ValueError: - If the provided `pka_set` is invalid or inconsistent. + If the provided 'pka_set' is invalid or inconsistent. Notes: - This method assumes **ideal Donnan equilibrium** and **monovalent salt**. - The ionic strength of the reservoir includes both salt and pH-dependent H⁺/OH⁻ contributions. - All charged macromolecular species present in the system must be - included in `c_macro`; missing species will lead to incorrect results. + included in 'c_macro'; missing species will lead to incorrect results. - The nonlinear Donnan equilibrium equation is solved using a scalar - root finder (`brentq`) in logarithmic form for numerical stability. + root finder ('brentq') in logarithmic form for numerical stability. - This method is intended for **two-phase systems**; for single-phase - systems use `calculate_HH` instead. + systems use 'calculate_HH' instead. """ if pH_list is None: pH_list=np.linspace(2,12,50) @@ -643,13 +643,13 @@ def center_object_in_simulation_box(self, instance_id, espresso_system, pmb_type geometric center of the ESPResSo simulation box. Args: - instance_id (`int`): + instance_id ('int'): ID of the pyMBE object instance to be centered. - pmb_type (`str`): + pmb_type ('str'): Type of the pyMBE object. - espresso_system (`espressomd.system.System`): + espresso_system ('espressomd.system.System'): ESPResSo system object in which the particles are defined. Notes: @@ -672,16 +672,16 @@ def center_object_in_simulation_box(self, instance_id, espresso_system, pmb_type def create_added_salt(self, espresso_system, cation_name, anion_name, c_salt): """ - Creates a `c_salt` concentration of `cation_name` and `anion_name` ions into the `espresso_system`. + Creates a 'c_salt' concentration of 'cation_name' and 'anion_name' ions into the 'espresso_system'. Args: - espresso_system(`espressomd.system.System`): instance of an espresso system object. - cation_name(`str`): `name` of a particle with a positive charge. - anion_name(`str`): `name` of a particle with a negative charge. - c_salt(`float`): Salt concentration. + espresso_system('espressomd.system.System'): instance of an espresso system object. + cation_name('str'): 'name' of a particle with a positive charge. + anion_name('str'): 'name' of a particle with a negative charge. + c_salt('float'): Salt concentration. Returns: - c_salt_calculated(`float`): Calculated salt concentration added to `espresso_system`. + c_salt_calculated('float'): Calculated salt concentration added to 'espresso_system'. """ cation_tpl = self.db.get_template(pmb_type="particle", name=cation_name) @@ -726,11 +726,11 @@ def create_bond(self, particle_id1, particle_id2, espresso_system, use_default_b Creates a bond between two particle instances in an ESPResSo system and registers it in the pyMBE database. This method performs the following steps: - 1. Retrieves the particle instances corresponding to `particle_id1` and `particle_id2` from the database. + 1. Retrieves the particle instances corresponding to 'particle_id1' and 'particle_id2' from the database. 2. Retrieves or creates the corresponding ESPResSo bond instance using the bond template. 3. Adds the ESPResSo bond instance to the ESPResSo system if it was newly created. 4. Adds the bond to the first particle's bond list in ESPResSo. - 5. Creates a `BondInstance` in the database and registers it. + 5. Creates a 'BondInstance' in the database and registers it. Args: particle_id1 (int): pyMBE and ESPResSo ID of the first particle. @@ -761,16 +761,16 @@ def create_bond(self, particle_id1, particle_id2, espresso_system, use_default_b def create_counterions(self, object_name, cation_name, anion_name, espresso_system): """ - Creates particles of `cation_name` and `anion_name` in `espresso_system` to counter the net charge of `object_name`. + Creates particles of 'cation_name' and 'anion_name' in 'espresso_system' to counter the net charge of 'object_name'. Args: - object_name(`str`): `name` of a pyMBE object. - espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library. - cation_name(`str`): `name` of a particle with a positive charge. - anion_name(`str`): `name` of a particle with a negative charge. + object_name('str'): 'name' of a pyMBE object. + espresso_system('espressomd.system.System'): Instance of a system object from the espressomd library. + cation_name('str'): 'name' of a particle with a positive charge. + anion_name('str'): 'name' of a particle with a negative charge. Returns: - counterion_number(`dict`): {"name": number} + counterion_number('dict'): {"name": number} Note: This function currently does not support the creation of counterions for hydrogels. @@ -822,7 +822,7 @@ def create_counterions(self, object_name, cation_name, anion_name, espresso_syst def create_hydrogel(self, name, espresso_system, use_default_bond=False): """ - Creates a hydrogel in espresso_system using a pyMBE hydrogel template given by `name` + Creates a hydrogel in espresso_system using a pyMBE hydrogel template given by 'name' Args: name(str): name of the hydrogel template in the pyMBE database. @@ -871,21 +871,33 @@ def create_hydrogel(self, name, espresso_system, use_default_bond=False): def create_molecule(self, name, number_of_molecules, espresso_system, list_of_first_residue_positions=None, backbone_vector=None, use_default_bond=False): """ - Creates `number_of_molecules` molecule of type `name` into `espresso_system` and bookkeeps them into `pmb.df`. + Creates 'number_of_molecules' molecule of type 'name' into 'espresso_system'. Args: - name(`str`): Label of the molecule type to be created. `name` must be defined in `pmb.df` - espresso_system(`espressomd.system.System`): Instance of a system object from espressomd library. - number_of_molecules(`int`): Number of molecules or peptides of type `name` to be created. - list_of_first_residue_positions(`list`, optional): List of coordinates where the central bead of the first_residue_position will be created, random by default. - backbone_vector(`list` of `float`): Backbone vector of the molecule, random by default. Central beads of the residues in the `residue_list` are placed along this vector. - use_default_bond(`bool`, optional): Controls if a bond of type `default` is used to bond particle with undefined bonds in `pymbe.df` + name ('str'): + Label of the molecule type to be created. 'name'. + + espresso_system ('espressomd.system.System'): + Instance of a system object from espressomd library. + + number_of_molecules ('int'): + Number of molecules or peptides of type 'name' to be created. + + list_of_first_residue_positions ('list', optional): + List of coordinates where the central bead of the first_residue_position will be created, random by default. + + backbone_vector ('list' of 'float'): + Backbone vector of the molecule, random by default. Central beads of the residues in the 'residue_list' are placed along this vector. + + use_default_bond('bool', optional): + Controls if a bond of type 'default' is used to bond particles with undefined bonds in the pyMBE database. Returns: - created_molecule_id_list(`list` of `int`): List with the `molecule_id` of the pyMBE molecule instances created into `espresso_system`. + ('list' of 'int'): + List with the 'molecule_id' of the pyMBE molecule instances created into 'espresso_system'. - Note: - Despite its name, this function can be used to create both molecules and peptides. + Notes: + - This function can be used to create both molecules and peptides. """ if number_of_molecules <= 0: return {} @@ -1000,13 +1012,13 @@ def create_particle(self, name, espresso_system, number_of_particles, position=N Creates one or more particles in an ESPResSo system based on the particle template in the pyMBE database. Args: - name(`str`): Label of the particle template in the pyMBE database. - espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library. - number_of_particles(`int`): Number of particles to be created. - position(list of [`float`,`float`,`float`], optional): Initial positions of the particles. If not given, particles are created in random positions. Defaults to None. - fix(`bool`, optional): Controls if the particle motion is frozen in the integrator, it is used to create rigid objects. Defaults to False. + name('str'): Label of the particle template in the pyMBE database. + espresso_system('espressomd.system.System'): Instance of a system object from the espressomd library. + number_of_particles('int'): Number of particles to be created. + position(list of ['float','float','float'], optional): Initial positions of the particles. If not given, particles are created in random positions. Defaults to None. + fix('bool', optional): Controls if the particle motion is frozen in the integrator, it is used to create rigid objects. Defaults to False. Returns: - created_pid_list(`list` of `int`): List with the ids of the particles created into `espresso_system`. + created_pid_list('list' of 'int'): List with the ids of the particles created into 'espresso_system'. """ if number_of_particles <=0: return [] @@ -1068,16 +1080,16 @@ def create_protein(self, name, number_of_proteins, espresso_system, topology_dic "ResidueName2": { ... }, ... } - The `"initial_pos"` entry is required and represents the residue’s + The '"initial_pos"' entry is required and represents the residue’s reference coordinates before shifting to the protein's center-of-mass. Returns: (list of int): List of the molecule_id of the Protein instances created into ESPResSo. Notes: - - Particles are created using `create_particle()` with `fix=True`, + - Particles are created using 'create_particle()' with 'fix=True', meaning they are initially immobilized. - - The function assumes all residues in `topology_dict` correspond to + - The function assumes all residues in 'topology_dict' correspond to particle templates already defined in the pyMBE database. - Bonds between residues are not created here; it assumes a rigid body representation of the protein. """ @@ -1135,17 +1147,27 @@ def create_protein(self, name, number_of_proteins, espresso_system, topology_dic def create_residue(self, name, espresso_system, central_bead_position=None,use_default_bond=False, backbone_vector=None): """ - Creates a residue of type `name` into `espresso_system` and bookkeeps them into `pmb.df`. + Creates a residue into ESPResSo. Args: - name(`str`): Label of the residue type to be created. `name` must be defined in `pmb.df` - espresso_system(`espressomd.system.System`): Instance of a system object from espressomd library. - central_bead_position(`list` of `float`): Position of the central bead. - use_default_bond(`bool`): Switch to control if a bond of type `default` is used to bond a particle whose bonds types are not defined in `pmb.df` - backbone_vector(`list` of `float`): Backbone vector of the molecule. All side chains are created perpendicularly to `backbone_vector`. + name ('str'): + Label of the residue type to be created. + + espresso_system ('espressomd.system.System'): + Instance of a system object from espressomd library. + + central_bead_position ('list' of 'float'): + Position of the central bead. + + use_default_bond ('bool'): + Switch to control if a bond of type 'default' is used to bond a particle whose bonds types are not defined in the pyMBE database. + + backbone_vector ('list' of 'float'): + Backbone vector of the molecule. All side chains are created perpendicularly to 'backbone_vector'. Returns: - (int) : residue_id of the residue created. + (int): + residue_id of the residue created. """ if not self.db._has_template(name=name, pmb_type="residue"): logging.warning(f"Residue template with name '{name}' is not defined in the pyMBE database, no residue will be created.") @@ -1253,25 +1275,25 @@ def create_residue(self, name, espresso_system, central_bead_position=None,use_d def define_bond(self, bond_type, bond_parameters, particle_pairs): """ - Defines bond templates for each particle pair in `particle_pairs` in the pyMBE database. + Defines bond templates for each particle pair in 'particle_pairs' in the pyMBE database. Args: - bond_type(`str`): label to identify the potential to model the bond. - bond_parameters(`dict`): parameters of the potential of the bond. - particle_pairs(`lst`): list of the `names` of the `particles` to be bonded. + bond_type('str'): label to identify the potential to model the bond. + bond_parameters('dict'): parameters of the potential of the bond. + particle_pairs('lst'): list of the 'names' of the 'particles' to be bonded. Note: Currently, only HARMONIC and FENE bonds are supported. For a HARMONIC bond the dictionary must contain the following parameters: - - k (`pint.Quantity`) : Magnitude of the bond. It should have units of energy/length**2 - using the `pmb.units` UnitRegistry. - - r_0 (`pint.Quantity`) : Equilibrium bond length. It should have units of length using - the `pmb.units` UnitRegistry. + - k ('pint.Quantity') : Magnitude of the bond. It should have units of energy/length**2 + using the 'pmb.units' UnitRegistry. + - r_0 ('pint.Quantity') : Equilibrium bond length. It should have units of length using + the 'pmb.units' UnitRegistry. For a FENE bond the dictionary must contain the same parameters as for a HARMONIC bond and: - - d_r_max (`pint.Quantity`): Maximal stretching length for FENE. It should have - units of length using the `pmb.units` UnitRegistry. Default 'None'. + - d_r_max ('pint.Quantity'): Maximal stretching length for FENE. It should have + units of length using the 'pmb.units' UnitRegistry. Default 'None'. """ self._check_bond_inputs(bond_parameters=bond_parameters, bond_type=bond_type) @@ -1305,8 +1327,8 @@ def define_default_bond(self, bond_type, bond_parameters): Defines a bond template as a "default" template in the pyMBE database. Args: - bond_type(`str`): label to identify the potential to model the bond. - bond_parameters(`dict`): parameters of the potential of the bond. + bond_type('str'): label to identify the potential to model the bond. + bond_parameters('dict'): parameters of the potential of the bond. Note: - Currently, only harmonic and FENE bonds are supported. @@ -1331,9 +1353,9 @@ def define_hydrogel(self, name, node_map, chain_map): Defines a hydrogel template in the pyMBE database. Args: - name(`str`): Unique label that identifies the `hydrogel`. - node_map(`list of dict`): [{"particle_name": , "lattice_index": }, ... ] - chain_map(`list of dict`): [{"node_start": , "node_end": , "residue_list": , ... ] + name('str'): Unique label that identifies the 'hydrogel'. + node_map('list of dict'): [{"particle_name": , "lattice_index": }, ... ] + chain_map('list of dict'): [{"node_start": , "node_end": , "residue_list": , ... ] """ # Sanity tests @@ -1373,8 +1395,8 @@ def define_molecule(self, name, residue_list): Defines a molecule template in the pyMBE database. Args: - name(`str`): Unique label that identifies the `molecule`. - residue_list(`list` of `str`): List of the `name`s of the `residue`s in the sequence of the `molecule`. + name('str'): Unique label that identifies the 'molecule'. + residue_list('list' of 'str'): List of the 'name's of the 'residue's in the sequence of the 'molecule'. """ tpl = MoleculeTemplate(name=name, residue_list=residue_list) @@ -1387,7 +1409,7 @@ def define_monoprototic_acidbase_reaction(self, particle_name, pka, acidity, met Args: particle_name (str): Unique label that identifies the particle template. pka (float): pka-value of the acid or base. - acidity (str): Identifies whether if the particle is `acidic` or `basic`. + acidity (str): Identifies whether if the particle is 'acidic' or 'basic'. metadata (dict, optional): Additional information to be stored in the reaction. Defaults to None. """ supported_acidities = ["acidic", "basic"] @@ -1414,8 +1436,8 @@ def define_monoprototic_particle_states(self, particle_name, acidity): Defines particle states for a monoprotonic particle template including the charges in each of its possible states. Args: - particle_name(`str`): Unique label that identifies the particle template. - acidity(`str`): Identifies whether the particle is `acidic` or `basic`. + particle_name('str'): Unique label that identifies the particle template. + acidity('str'): Identifies whether the particle is 'acidic' or 'basic'. """ acidity_valid_keys = ['acidic', 'basic'] if not pd.isna(acidity): @@ -1436,23 +1458,23 @@ def define_particle(self, name, sigma, epsilon, z=0, acidity=pd.NA, pka=pd.NA, Defines a particle template in the pyMBE database. Args: - name(`str`): Unique label that identifies this particle type. - sigma(`pint.Quantity`): Sigma parameter used to set up Lennard-Jones interactions for this particle type. - epsilon(`pint.Quantity`): Epsilon parameter used to setup Lennard-Jones interactions for this particle tipe. - z(`int`, optional): Permanent charge number of this particle type. Defaults to 0. - acidity(`str`, optional): Identifies whether if the particle is `acidic` or `basic`, used to setup constant pH simulations. Defaults to pd.NA. - pka(`float`, optional): If `particle` is an acid or a base, it defines its pka-value. Defaults to pd.NA. - cutoff(`pint.Quantity`, optional): Cutoff parameter used to set up Lennard-Jones interactions for this particle type. Defaults to pd.NA. - offset(`pint.Quantity`, optional): Offset parameter used to set up Lennard-Jones interactions for this particle type. Defaults to pd.NA. + name('str'): Unique label that identifies this particle type. + sigma('pint.Quantity'): Sigma parameter used to set up Lennard-Jones interactions for this particle type. + epsilon('pint.Quantity'): Epsilon parameter used to setup Lennard-Jones interactions for this particle tipe. + z('int', optional): Permanent charge number of this particle type. Defaults to 0. + acidity('str', optional): Identifies whether if the particle is 'acidic' or 'basic', used to setup constant pH simulations. Defaults to pd.NA. + pka('float', optional): If 'particle' is an acid or a base, it defines its pka-value. Defaults to pd.NA. + cutoff('pint.Quantity', optional): Cutoff parameter used to set up Lennard-Jones interactions for this particle type. Defaults to pd.NA. + offset('pint.Quantity', optional): Offset parameter used to set up Lennard-Jones interactions for this particle type. Defaults to pd.NA. Note: - - `sigma`, `cutoff` and `offset` must have a dimensitonality of `[length]` and should be defined using pmb.units. - - `epsilon` must have a dimensitonality of `[energy]` and should be defined using pmb.units. - - `cutoff` defaults to `2**(1./6.) reduced_length`. - - `offset` defaults to 0. - - For more information on `sigma`, `epsilon`, `cutoff` and `offset` check `pmb.setup_lj_interactions()`. + - 'sigma', 'cutoff' and 'offset' must have a dimensitonality of '[length]' and should be defined using pmb.units. + - 'epsilon' must have a dimensitonality of '[energy]' and should be defined using pmb.units. + - 'cutoff' defaults to '2**(1./6.) reduced_length'. + - 'offset' defaults to 0. + - For more information on 'sigma', 'epsilon', 'cutoff' and 'offset' check 'pmb.setup_lj_interactions()'. """ - # If `cutoff` and `offset` are not defined, default them to the following values + # If 'cutoff' and 'offset' are not defined, default them to the following values if pd.isna(cutoff): cutoff=self.units.Quantity(2**(1./6.), "reduced_length") if pd.isna(offset): @@ -1484,23 +1506,23 @@ def define_particle_states(self, particle_name, states): Define the chemical states of an existing particle template. Args: - particle_name (`str`): + particle_name ('str'): Name of a particle template. - states (`list` of `dict`): + states ('list' of 'dict'): List of dictionaries defining the particle states. Each dictionary must contain: - - `name` (`str`): Name of the particle state (e.g. `"H"`, `"-"`, - `"neutral"`). - - `z` (`int`): Charge number of the particle in this state. + - 'name' ('str'): Name of the particle state (e.g. '"H"', '"-"', + '"neutral"'). + - 'z' ('int'): Charge number of the particle in this state. Example: states = [{"name": "AH", "z": 0}, # protonated {"name": "A-", "z": -1}] # deprotonated Notes: - - Each state is assigned a unique Espresso `es_type` automatically. + - Each state is assigned a unique Espresso 'es_type' automatically. - Chemical reactions (e.g. acid–base equilibria) are **not** created by this method and must be defined separately (e.g. via - `set_particle_acidity()` or custom reaction definitions). + 'set_particle_acidity()' or custom reaction definitions). - Particles without explicitly defined states are assumed to have a single, implicit state with their default charge. """ @@ -1516,9 +1538,9 @@ def define_peptide(self, name, sequence, model): Defines a peptide template in the pyMBE database. Args: - name (`str`): Unique label that identifies the peptide. - sequence (`str`): Sequence of the peptide. - model (`str`): Model name. Currently only models with 1 bead '1beadAA' or with 2 beads '2beadAA' per amino acid are supported. + name ('str'): Unique label that identifies the peptide. + sequence ('str'): Sequence of the peptide. + model ('str'): Model name. Currently only models with 1 bead '1beadAA' or with 2 beads '2beadAA' per amino acid are supported. """ valid_keys = ['1beadAA','2beadAA'] if model not in valid_keys: @@ -1536,12 +1558,12 @@ def define_protein(self, name, sequence, model): Defines a protein template in the pyMBE database. Args: - name (`str`): Unique label that identifies the protein. - sequence (`str`): Sequence of the protein. - model (`string`): Model name. Currently only models with 1 bead '1beadAA' or with 2 beads '2beadAA' per amino acid are supported. + name ('str'): Unique label that identifies the protein. + sequence ('str'): Sequence of the protein. + model ('string'): Model name. Currently only models with 1 bead '1beadAA' or with 2 beads '2beadAA' per amino acid are supported. Note: - - Currently, only `lj_setup_mode="wca"` is supported. This corresponds to setting up the WCA potential. + - Currently, only 'lj_setup_mode="wca"' is supported. This corresponds to setting up the WCA potential. """ valid_model_keys = ['1beadAA','2beadAA'] if model not in valid_model_keys: @@ -1559,9 +1581,9 @@ def define_residue(self, name, central_bead, side_chains): Defines a residue template in the pyMBE database. Args: - name(`str`): Unique label that identifies the residue. - central_bead(`str`): `name` of the `particle` to be placed as central_bead of the residue. - side_chains(`list` of `str`): List of `name`s of the pmb_objects to be placed as side_chains of the residue. Currently, only pyMBE objects of type `particle` or `residue` are supported. + name('str'): Unique label that identifies the residue. + central_bead('str'): 'name' of the 'particle' to be placed as central_bead of the residue. + side_chains('list' of 'str'): List of 'name's of the pmb_objects to be placed as side_chains of the residue. Currently, only pyMBE objects of type 'particle' or 'residue' are supported. """ tpl = ResidueTemplate(name=name, central_bead=central_bead, @@ -1616,7 +1638,7 @@ def determine_reservoir_concentrations(self, pH_res, c_salt_res, activity_coeffi activity_coefficient_monovalent_pair ('callable'): Function returning the activity coefficient of a monovalent ion pair as a function of ionic strength: - `gamma = activity_coefficient_monovalent_pair(I)`. + 'gamma = activity_coefficient_monovalent_pair(I)'. max_number_sc_runs ('int', optional): Maximum number of self-consistent iterations allowed before @@ -1632,7 +1654,7 @@ def determine_reservoir_concentrations(self, pH_res, c_salt_res, activity_coeffi Notes: - The algorithm enforces electroneutrality in the reservoir. - - Water autodissociation is included via the equilibrium constant `Kw`. + - Water autodissociation is included via the equilibrium constant 'Kw'. - Non-ideal effects enter through activity coefficients depending on ionic strength. - The implementation follows the self-consistent scheme described in @@ -1704,21 +1726,21 @@ def enable_motion_of_rigid_object(self, instance_id, pmb_type, espresso_system): rotate as a single body. Args: - instance_id (`int`): + instance_id ('int'): Instance ID of the pyMBE object whose rigid-body motion is enabled. - pmb_type (`str`): - pyMBE object type of the instance (e.g. `"molecule"`, `"peptide"`, - `"protein"`, or any assembly-like type). + pmb_type ('str'): + pyMBE object type of the instance (e.g. '"molecule"', '"peptide"', + '"protein"', or any assembly-like type). - espresso_system (`espressomd.system.System`): + espresso_system ('espressomd.system.System'): ESPResSo system in which the rigid object is defined. Notes: - This method requires ESPResSo to be compiled with the following features enabled: - - `"VIRTUAL_SITES_RELATIVE"` - - `"MASS"` + - '"VIRTUAL_SITES_RELATIVE"' + - '"MASS"' - A new ESPResSo particle is created to represent the rigid-body center. - The mass of the rigid-body center is set to the number of particles belonging to the object. @@ -1750,26 +1772,26 @@ def generate_coordinates_outside_sphere(self, center, radius, max_dist, n_sample Generates random coordinates outside a sphere and inside a larger bounding sphere. Args: - center (`array-like`): + center ('array-like'): Coordinates of the center of the spheres. - radius (`float`): + radius ('float'): Radius of the inner exclusion sphere. Must be positive. - max_dist (`float`): - Radius of the outer sampling sphere. Must be larger than `radius`. + max_dist ('float'): + Radius of the outer sampling sphere. Must be larger than 'radius'. - n_samples (`int`): + n_samples ('int'): Number of coordinates to generate. Returns: - 'list' of `numpy.ndarray`: + 'list' of 'numpy.ndarray': List of coordinates lying outside the inner sphere and inside the outer sphere. Notes: - - Points are uniformly sampled inside a sphere of radius `max_dist` centered at `center` - and only those with a distance greater than or equal to `radius` from the center are retained. + - Points are uniformly sampled inside a sphere of radius 'max_dist' centered at 'center' + and only those with a distance greater than or equal to 'radius' from the center are retained. """ if not radius > 0: raise ValueError (f'The value of {radius} must be a positive value') @@ -1791,27 +1813,27 @@ def generate_random_points_in_a_sphere(self, center, radius, n_samples, on_surfa Generates uniformly distributed random points inside or on the surface of a sphere. Args: - center (`array-like`): + center ('array-like'): Coordinates of the center of the sphere. - radius (`float`): + radius ('float'): Radius of the sphere. - n_samples (`int`): + n_samples ('int'): Number of sample points to generate. - on_surface (`bool`, optional): + on_surface ('bool', optional): If True, points are uniformly sampled on the surface of the sphere. If False, points are uniformly sampled within the sphere volume. Defaults to False. Returns: 'numpy.ndarray': - Array of shape `(n_samples, d)` containing the generated coordinates, - where `d` is the dimensionality of `center`. + Array of shape '(n_samples, d)' containing the generated coordinates, + where 'd' is the dimensionality of 'center'. Notes: - Points are sampled in a space whose dimensionality is inferred - from the length of `center`. + from the length of 'center'. """ # initial values center=np.array(center) @@ -1835,15 +1857,15 @@ def generate_trial_perpendicular_vector(self,vector,magnitude): Generates a random vector perpendicular to a given vector. Args: - vector (`array-like`): + vector ('array-like'): Reference vector to which the generated vector will be perpendicular. - magnitude (`float`): + magnitude ('float'): Desired magnitude of the perpendicular vector. Returns: 'numpy.ndarray': - Vector orthogonal to `vector` with norm equal to `magnitude`. + Vector orthogonal to 'vector' with norm equal to 'magnitude'. """ np_vec = np.array(vector) if np.all(np_vec == 0): @@ -1866,13 +1888,13 @@ def get_bond_template(self, particle_name1, particle_name2, use_default_bond=Fal Retrieves a bond template connecting two particle templates. Args: - particle_name1 (`str`): + particle_name1 ('str'): Name of the first particle template. - particle_name2 (`str`): + particle_name2 ('str'): Name of the second particle template. - use_default_bond (`bool`, optional): + use_default_bond ('bool', optional): If True, returns the default bond template when no specific bond template is found. Defaults to False. @@ -1881,8 +1903,8 @@ def get_bond_template(self, particle_name1, particle_name2, use_default_bond=Fal Bond template object retrieved from the pyMBE database. Notes: - - This method searches the pyMBE database for a bond template defined between particle templates with names `particle_name1` and `particle_name2`. - - If no specific bond template is found and `use_default_bond` is enabled, a default bond template is returned instead. + - This method searches the pyMBE database for a bond template defined between particle templates with names 'particle_name1' and 'particle_name2'. + - If no specific bond template is found and 'use_default_bond' is enabled, a default bond template is returned instead. """ # Try to find a specific bond template bond_key = BondTemplate.make_bond_key(pn1=particle_name1, @@ -1908,14 +1930,14 @@ def get_charge_number_map(self): Returns: dict[int, float]: Dictionary mapping ESPResSo particle types to charge numbers, - ``{es_type: z}``. + ''{es_type: z}''. Notes: - The mapping is built from particle *states*, not instances. - - If multiple templates define states with the same ``es_type``, + - If multiple templates define states with the same ''es_type'', the last encountered definition will overwrite previous ones. This behavior is intentional and assumes database consistency. - - Neutral particles (``z = 0``) are included in the map. + - Neutral particles (''z = 0'') are included in the map. """ charge_number_map = {} particle_templates = self.db.get_templates("particle") @@ -1945,7 +1967,7 @@ def _get_espresso_bond_instance(self, bond_template, espresso_system, use_defaul (espressomd.interactions.BondedInteraction): The ESPResSo bond instance object. Raises: - KeyError: If no bond template is found for the particle pair and `use_default_bond` is False. + KeyError: If no bond template is found for the particle pair and 'use_default_bond' is False. Note: When a new bond instance is created, it is not added to the ESPResSo system. @@ -1963,32 +1985,36 @@ def _get_espresso_bond_instance(self, bond_template, espresso_system, use_defaul def get_instances_df(self, pmb_type): """ - Returns a dataframe with all instances of type `pmb_type` in the pyMBE database. + Returns a dataframe with all instances of type 'pmb_type' in the pyMBE database. Args: - pmb_type(`str`): pmb type to search instances in the pyMBE database. + pmb_type ('str'): + pmb type to search instances in the pyMBE database. Returns: - instances_df(`Pandas.Dataframe`): Dataframe with all instances of type `pmb_type`. + ('Pandas.Dataframe'): + Dataframe with all instances of type 'pmb_type'. """ return self.db._get_instances_df(pmb_type=pmb_type) def get_lj_parameters(self, particle_name1, particle_name2, combining_rule='Lorentz-Berthelot'): """ Returns the Lennard-Jones parameters for the interaction between the particle types given by - `particle_name1` and `particle_name2` in `pymbe.df`, calculated according to the provided combining rule. + 'particle_name1' and 'particle_name2' in the pyMBE database, calculated according to the provided combining rule. Args: - particle_name1 (str): label of the type of the first particle type - particle_name2 (str): label of the type of the second particle type - combining_rule (`string`, optional): combining rule used to calculate `sigma` and `epsilon` for the potential betwen a pair of particles. Defaults to 'Lorentz-Berthelot'. + particle_name1 ('str'): + label of the type of the first particle type + + particle_name2 ('str'): label of the type of the second particle type + combining_rule ('string', optional): combining rule used to calculate 'sigma' and 'epsilon' for the potential betwen a pair of particles. Defaults to 'Lorentz-Berthelot'. Returns: {"epsilon": epsilon_value, "sigma": sigma_value, "offset": offset_value, "cutoff": cutoff_value} Note: - - Currently, the only `combining_rule` supported is Lorentz-Berthelot. - - If the sigma value of `particle_name1` or `particle_name2` is 0, the function will return an empty dictionary. No LJ interactions are set up for particles with sigma = 0. + - Currently, the only 'combining_rule' supported is Lorentz-Berthelot. + - If the sigma value of 'particle_name1' or 'particle_name2' is 0, the function will return an empty dictionary. No LJ interactions are set up for particles with sigma = 0. """ supported_combining_rules=["Lorentz-Berthelot"] if combining_rule not in supported_combining_rules: @@ -2091,16 +2117,18 @@ def get_pka_set(self): def get_radius_map(self, dimensionless=True): ''' - Gets the effective radius of each `espresso type` in `pmb.df`. + Gets the effective radius of each particle defined in the pyMBE database. Args: - dimensionless('bool'): controlls if the returned radii have a dimension. Defaults to False. + dimensionless ('bool'): + controls if the returned radii have a dimension. Defaults to False. Returns: - radius_map(`dict`): {espresso_type: radius}. + ('dict'): + {espresso_type: radius}. - Note: - The radius corresponds to (sigma+offset)/2 + Notes: + - The radius corresponds to (sigma+offset)/2 ''' if "particle" not in self.db._templates: return {} @@ -2115,10 +2143,11 @@ def get_radius_map(self, dimensionless=True): def get_reactions_df(self): """ - Returns a dataframe with all reaction templates ` in the pyMBE database. + Returns a dataframe with all reaction templates in the pyMBE database. Returns: - (Pandas.Dataframe): Dataframe with all reaction templates. + (Pandas.Dataframe): + Dataframe with all reaction templates. """ return self.db._get_reactions_df() @@ -2127,7 +2156,7 @@ def get_reduced_units(self): Returns the current set of reduced units defined in pyMBE. Returns: - reduced_units_text(`str`): text with information about the current set of reduced units. + reduced_units_text('str'): text with information about the current set of reduced units. """ unit_length=self.units.Quantity(1,'reduced_length') @@ -2143,13 +2172,15 @@ def get_reduced_units(self): def get_templates_df(self, pmb_type): """ - Returns a dataframe with all templates of type `pmb_type` in the pyMBE database. + Returns a dataframe with all templates of type 'pmb_type' in the pyMBE database. Args: - pmb_type(str): pmb type to search templates in the pyMBE database. + pmb_type ('str'): + pmb type to search templates in the pyMBE database. Returns: - (Pandas.Dataframe): Dataframe with all templates of type `pmb_type`. + ('Pandas.Dataframe'): + Dataframe with all templates of type given by 'pmb_type'. """ return self.db._get_templates_df(pmb_type=pmb_type) @@ -2172,7 +2203,7 @@ def initialize_lattice_builder(self, diamond_lattice): Initialize the lattice builder with the DiamondLattice object. Args: - diamond_lattice(`DiamondLattice`): DiamondLattice object from the `lib/lattice` module to be used in the LatticeBuilder. + diamond_lattice('DiamondLattice'): DiamondLattice object from the 'lib/lattice' module to be used in the LatticeBuilder. """ from .lib.lattice import LatticeBuilder, DiamondLattice if not isinstance(diamond_lattice, DiamondLattice): @@ -2183,7 +2214,7 @@ def initialize_lattice_builder(self, diamond_lattice): def load_database(self, folder, format='csv'): """ - Loads a pyMBE database stored in `folder`. + Loads a pyMBE database stored in 'folder'. Args: folder (str or Path): Path to the folder where the pyMBE database was stored. @@ -2193,7 +2224,7 @@ def load_database(self, folder, format='csv'): (dict): metadata with additional information about the source of the information in the database. Note: - - The folder must contain the files generated by `pmb.save_database()`. + - The folder must contain the files generated by 'pmb.save_database()'. - Currently, only 'csv' format is supported. """ supported_formats = ['csv'] @@ -2211,7 +2242,7 @@ def load_pka_set(self, filename): to existing particle templates. Args: - filename (`str`): Path to a JSON file containing the pKa set. + filename ('str'): Path to a JSON file containing the pKa set. Expected format: { "metadata": {...}, @@ -2245,13 +2276,13 @@ def propose_unused_type(self): """ Propose an unused ESPResSo particle type. - This method scans the full `type_map` produced by `get_type_map()`, - which contains all particle templates and their associated state `es_type`. - It extracts all integer `es_type` values and returns the next available + This method scans the full 'type_map' produced by 'get_type_map()', + which contains all particle templates and their associated state 'es_type'. + It extracts all integer 'es_type' values and returns the next available integer type, ensuring no collisions with existing ones. Returns: - int: The next available integer ESPResSo type. Returns ``0`` if no + int: The next available integer ESPResSo type. Returns ''0'' if no integer types are currently defined. @@ -2366,10 +2397,11 @@ def read_protein_vtf(self, filename, unit_length=None): def save_database(self, folder, format='csv'): """ - Saves the current pyMBE database into a file `filename`. + Saves the current pyMBE database into a file 'filename'. Args: - folder (str or Path): Path to the folder where the database files will be saved. + folder ('str' or 'Path'): + Path to the folder where the database files will be saved. """ supported_formats = ['csv'] @@ -2384,8 +2416,11 @@ def set_particle_initial_state(self, particle_name, state_name): Sets the default initial state of a particle template defined in the pyMBE database. Args: - particle_name(`str`): Unique label that identifies the particle template. - state_name(`str`): Name of the state to be set as default initial state. + particle_name ('str'): + Unique label that identifies the particle template. + + state_name ('str'): + Name of the state to be set as default initial state. """ part_tpl = self.db.get_template(name=particle_name, @@ -2398,16 +2433,23 @@ def set_reduced_units(self, unit_length=None, unit_charge=None, temperature=None Sets the set of reduced units used by pyMBE.units and it prints it. Args: - unit_length(`pint.Quantity`,optional): Reduced unit of length defined using the `pmb.units` UnitRegistry. Defaults to None. - unit_charge(`pint.Quantity`,optional): Reduced unit of charge defined using the `pmb.units` UnitRegistry. Defaults to None. - temperature(`pint.Quantity`,optional): Temperature of the system, defined using the `pmb.units` UnitRegistry. Defaults to None. - Kw(`pint.Quantity`,optional): Ionic product of water in mol^2/l^2. Defaults to None. + unit_length ('pint.Quantity', optional): + Reduced unit of length defined using the 'pmb.units' UnitRegistry. Defaults to None. - Note: - - If no `temperature` is given, a value of 298.15 K is assumed by default. - - If no `unit_length` is given, a value of 0.355 nm is assumed by default. - - If no `unit_charge` is given, a value of 1 elementary charge is assumed by default. - - If no `Kw` is given, a value of 10^(-14) * mol^2 / l^2 is assumed by default. + unit_charge ('pint.Quantity', optional): + Reduced unit of charge defined using the 'pmb.units' UnitRegistry. Defaults to None. + + temperature ('pint.Quantity', optional): + Temperature of the system, defined using the 'pmb.units' UnitRegistry. Defaults to None. + + Kw ('pint.Quantity', optional): + Ionic product of water in mol^2/l^2. Defaults to None. + + Notes: + - If no 'temperature' is given, a value of 298.15 K is assumed by default. + - If no 'unit_length' is given, a value of 0.355 nm is assumed by default. + - If no 'unit_charge' is given, a value of 1 elementary charge is assumed by default. + - If no 'Kw' is given, a value of 10^(-14) * mol^2 / l^2 is assumed by default. """ if unit_length is None: unit_length= 0.355*self.units.nm @@ -2432,17 +2474,25 @@ def set_reduced_units(self, unit_length=None, unit_charge=None, temperature=None def setup_cpH (self, counter_ion, constant_pH, exclusion_range=None, use_exclusion_radius_per_type = False): """ - Sets up the Acid/Base reactions for acidic/basic `particles` defined in `pmb.df` to be sampled in the constant pH ensemble. + Sets up the Acid/Base reactions for acidic/basic particles defined in the pyMBE database + to be sampled in the constant pH ensemble. Args: - counter_ion(`str`): `name` of the counter_ion `particle`. - constant_pH(`float`): pH-value. - exclusion_range(`pint.Quantity`, optional): Below this value, no particles will be inserted. - use_exclusion_radius_per_type(`bool`,optional): Controls if one exclusion_radius for each espresso_type is used. Defaults to `False`. + counter_ion ('str'): + 'name' of the counter_ion 'particle'. + + constant_pH ('float'): + pH-value. + + exclusion_range ('pint.Quantity', optional): + Below this value, no particles will be inserted. + + use_exclusion_radius_per_type ('bool', optional): + Controls if one exclusion_radius for each espresso_type is used. Defaults to 'False'. Returns: - RE(`reaction_methods.ConstantpHEnsemble`): Instance of a reaction_methods.ConstantpHEnsemble object from the espressomd library. - sucessfull_reactions_labels(`lst`): Labels of the reactions set up by pyMBE. + ('reaction_methods.ConstantpHEnsemble'): + Instance of a reaction_methods.ConstantpHEnsemble object from the espressomd library. """ from espressomd import reaction_methods if exclusion_range is None: @@ -2495,15 +2545,27 @@ def setup_gcmc(self, c_salt_res, salt_cation_name, salt_anion_name, activity_coe For reactive systems coupled to a reservoir, the grand-reaction method has to be used instead. Args: - c_salt_res ('pint.Quantity'): Concentration of monovalent salt (e.g. NaCl) in the reservoir. - salt_cation_name ('str'): Name of the salt cation (e.g. Na+) particle. - salt_anion_name ('str'): Name of the salt anion (e.g. Cl-) particle. - activity_coefficient ('callable'): A function that calculates the activity coefficient of an ion pair as a function of the ionic strength. - exclusion_range(`pint.Quantity`, optional): For distances shorter than this value, no particles will be inserted. - use_exclusion_radius_per_type(`bool`,optional): Controls if one exclusion_radius for each espresso_type is used. Defaults to `False`. + c_salt_res ('pint.Quantity'): + Concentration of monovalent salt (e.g. NaCl) in the reservoir. + + salt_cation_name ('str'): + Name of the salt cation (e.g. Na+) particle. + + salt_anion_name ('str'): + Name of the salt anion (e.g. Cl-) particle. + + activity_coefficient ('callable'): + A function that calculates the activity coefficient of an ion pair as a function of the ionic strength. + + exclusion_range('pint.Quantity', optional): + For distances shorter than this value, no particles will be inserted. + + use_exclusion_radius_per_type('bool',optional): + Controls if one exclusion_radius for each espresso_type is used. Defaults to 'False'. Returns: - RE (`reaction_methods.ReactionEnsemble`): Instance of a reaction_methods.ReactionEnsemble object from the espressomd library. + ('reaction_methods.ReactionEnsemble'): + Instance of a reaction_methods.ReactionEnsemble object from the espressomd library. """ from espressomd import reaction_methods if exclusion_range is None: @@ -2512,85 +2574,107 @@ def setup_gcmc(self, c_salt_res, salt_cation_name, salt_anion_name, activity_coe exclusion_radius_per_type = self.get_radius_map() else: exclusion_radius_per_type = {} - RE = reaction_methods.ReactionEnsemble(kT=self.kT.to('reduced_energy').magnitude, - exclusion_range=exclusion_range, - seed=self.seed, - exclusion_radius_per_type = exclusion_radius_per_type - ) - + exclusion_range=exclusion_range, + seed=self.seed, + exclusion_radius_per_type = exclusion_radius_per_type) # Determine the concentrations of the various species in the reservoir and the equilibrium constants determined_activity_coefficient = activity_coefficient(c_salt_res) K_salt = (c_salt_res.to('1/(N_A * reduced_length**3)')**2) * determined_activity_coefficient - - salt_cation_es_type = self.df.loc[self.df['name']==salt_cation_name].state_one.es_type.values[0] - salt_anion_es_type = self.df.loc[self.df['name']==salt_anion_name].state_one.es_type.values[0] - - salt_cation_charge = self.df.loc[self.df['name']==salt_cation_name].state_one.z.values[0] - salt_anion_charge = self.df.loc[self.df['name']==salt_anion_name].state_one.z.values[0] - + cation_tpl = self.db.get_template(pmb_type="particle", + name=salt_cation_name) + cation_state = self.db.get_template(pmb_type="particle_state", + name=cation_tpl.initial_state) + anion_tpl = self.db.get_template(pmb_type="particle", + name=salt_anion_name) + anion_state = self.db.get_template(pmb_type="particle_state", + name=anion_tpl.initial_state) + salt_cation_es_type = cation_state.es_type + salt_anion_es_type = anion_state.es_type + salt_cation_charge = cation_state.z + salt_anion_charge = anion_state.z if salt_cation_charge <= 0: raise ValueError('ERROR salt cation charge must be positive, charge ', salt_cation_charge) if salt_anion_charge >= 0: raise ValueError('ERROR salt anion charge must be negative, charge ', salt_anion_charge) - # Grand-canonical coupling to the reservoir - RE.add_reaction( - gamma = K_salt.magnitude, - reactant_types = [], - reactant_coefficients = [], - product_types = [ salt_cation_es_type, salt_anion_es_type ], - product_coefficients = [ 1, 1 ], - default_charges = { - salt_cation_es_type: salt_cation_charge, - salt_anion_es_type: salt_anion_charge, - } - ) - + RE.add_reaction(gamma = K_salt.magnitude, + reactant_types = [], + reactant_coefficients = [], + product_types = [ salt_cation_es_type, salt_anion_es_type ], + product_coefficients = [ 1, 1 ], + default_charges = {salt_cation_es_type: salt_cation_charge, + salt_anion_es_type: salt_anion_charge}) + rx_tpl = Reaction(participants=[ReactionParticipant(particle_name=salt_cation_name, + state_name=cation_state.name, + coefficient=1), + ReactionParticipant(particle_name=salt_anion_name, + state_name=anion_state.name, + coefficient=1)], + pK=-np.log10(K_salt.magnitude), + reaction_type="ion_insertion", + simulation_method="GCMC") + self.db._register_reaction(rx_tpl) return RE - def setup_grxmc_reactions(self, pH_res, c_salt_res, proton_name, hydroxide_name, salt_cation_name, salt_anion_name, activity_coefficient, exclusion_range=None, pka_set=None, use_exclusion_radius_per_type = False): + def setup_grxmc_reactions(self, pH_res, c_salt_res, proton_name, hydroxide_name, salt_cation_name, salt_anion_name, activity_coefficient, exclusion_range=None, use_exclusion_radius_per_type = False): """ - Sets up Acid/Base reactions for acidic/basic 'particles' defined in 'pmb.df', as well as a grand-canonical coupling to a - reservoir of small ions. - This implementation uses the original formulation of the grand-reaction method by Landsgesell et al. [1]. + Sets up acid/base reactions for acidic/basic monoprotic particles defined in the pyMBE database, + as well as a grand-canonical coupling to a reservoir of small ions. + + Args: + pH_res ('float'): + pH-value in the reservoir. - [1] Landsgesell, J., Hebbeker, P., Rud, O., Lunkad, R., Košovan, P., & Holm, C. (2020). Grand-reaction method for simulations of ionization equilibria coupled to ion partitioning. Macromolecules, 53(8), 3007-3020. + c_salt_res ('pint.Quantity'): + Concentration of monovalent salt (e.g. NaCl) in the reservoir. - Args: - pH_res ('float): pH-value in the reservoir. - c_salt_res ('pint.Quantity'): Concentration of monovalent salt (e.g. NaCl) in the reservoir. - proton_name ('str'): Name of the proton (H+) particle. - hydroxide_name ('str'): Name of the hydroxide (OH-) particle. - salt_cation_name ('str'): Name of the salt cation (e.g. Na+) particle. - salt_anion_name ('str'): Name of the salt anion (e.g. Cl-) particle. - activity_coefficient ('callable'): A function that calculates the activity coefficient of an ion pair as a function of the ionic strength. - exclusion_range(`pint.Quantity`, optional): For distances shorter than this value, no particles will be inserted. - pka_set(`dict`,optional): Desired pka_set, pka_set(`dict`): {"name" : {"pka_value": pka, "acidity": acidity}}. Defaults to None. - use_exclusion_radius_per_type(`bool`,optional): Controls if one exclusion_radius for each espresso_type is used. Defaults to `False`. + proton_name ('str'): + Name of the proton (H+) particle. + + hydroxide_name ('str'): + Name of the hydroxide (OH-) particle. + + salt_cation_name ('str'): + Name of the salt cation (e.g. Na+) particle. + + salt_anion_name ('str'): + Name of the salt anion (e.g. Cl-) particle. + + activity_coefficient ('callable'): + A function that calculates the activity coefficient of an ion pair as a function of the ionic strength. + + exclusion_range('pint.Quantity', optional): + For distances shorter than this value, no particles will be inserted. + + use_exclusion_radius_per_type('bool', optional): + Controls if one exclusion_radius for each espresso_type is used. Defaults to 'False'. Returns: - RE (`obj`): Instance of a reaction_ensemble.ReactionEnsemble object from the espressomd library. - sucessful_reactions_labels(`lst`): Labels of the reactions set up by pyMBE. - ionic_strength_res ('pint.Quantity'): Ionic strength of the reservoir (useful for calculating partition coefficients). + 'tuple(reaction_methods.ReactionEnsemble,pint.Quantity)': + + 'reaction_methods.ReactionEnsemble': + espressomd reaction_methods object with all reactions necesary to run the GRxMC ensamble. + + 'pint.Quantity': + Ionic strength of the reservoir (useful for calculating partition coefficients). + + Notes: + - This implementation uses the original formulation of the grand-reaction method by Landsgesell et al. [1]. + + [1] Landsgesell, J., Hebbeker, P., Rud, O., Lunkad, R., Košovan, P., & Holm, C. (2020). Grand-reaction method for simulations of ionization equilibria coupled to ion partitioning. Macromolecules, 53(8), 3007-3020. """ from espressomd import reaction_methods if exclusion_range is None: exclusion_range = max(self.get_radius_map().values())*2.0 - if pka_set is None: - pka_set=self.get_pka_set() - self._check_pka_set(pka_set=pka_set) if use_exclusion_radius_per_type: exclusion_radius_per_type = self.get_radius_map() else: exclusion_radius_per_type = {} - RE = reaction_methods.ReactionEnsemble(kT=self.kT.to('reduced_energy').magnitude, - exclusion_range=exclusion_range, - seed=self.seed, - exclusion_radius_per_type = exclusion_radius_per_type - ) - + exclusion_range=exclusion_range, + seed=self.seed, + exclusion_radius_per_type = exclusion_radius_per_type) # Determine the concentrations of the various species in the reservoir and the equilibrium constants cH_res, cOH_res, cNa_res, cCl_res = self.determine_reservoir_concentrations(pH_res, c_salt_res, activity_coefficient) ionic_strength_res = 0.5*(cNa_res+cCl_res+cOH_res+cH_res) @@ -2598,17 +2682,30 @@ def setup_grxmc_reactions(self, pH_res, c_salt_res, proton_name, hydroxide_name, K_W = cH_res.to('1/(N_A * reduced_length**3)') * cOH_res.to('1/(N_A * reduced_length**3)') * determined_activity_coefficient K_NACL = cNa_res.to('1/(N_A * reduced_length**3)') * cCl_res.to('1/(N_A * reduced_length**3)') * determined_activity_coefficient K_HCL = cH_res.to('1/(N_A * reduced_length**3)') * cCl_res.to('1/(N_A * reduced_length**3)') * determined_activity_coefficient - - proton_es_type = self.df.loc[self.df['name']==proton_name].state_one.es_type.values[0] - hydroxide_es_type = self.df.loc[self.df['name']==hydroxide_name].state_one.es_type.values[0] - salt_cation_es_type = self.df.loc[self.df['name']==salt_cation_name].state_one.es_type.values[0] - salt_anion_es_type = self.df.loc[self.df['name']==salt_anion_name].state_one.es_type.values[0] - - proton_charge = self.df.loc[self.df['name']==proton_name].state_one.z.values[0] - hydroxide_charge = self.df.loc[self.df['name']==hydroxide_name].state_one.z.values[0] - salt_cation_charge = self.df.loc[self.df['name']==salt_cation_name].state_one.z.values[0] - salt_anion_charge = self.df.loc[self.df['name']==salt_anion_name].state_one.z.values[0] - + cation_tpl = self.db.get_template(pmb_type="particle", + name=salt_cation_name) + cation_state = self.db.get_template(pmb_type="particle_state", + name=cation_tpl.initial_state) + anion_tpl = self.db.get_template(pmb_type="particle", + name=salt_anion_name) + anion_state = self.db.get_template(pmb_type="particle_state", + name=anion_tpl.initial_state) + proton_tpl = self.db.get_template(pmb_type="particle", + name=proton_name) + proton_state = self.db.get_template(pmb_type="particle_state", + name=proton_tpl.initial_state) + hydroxide_tpl = self.db.get_template(pmb_type="particle", + name=hydroxide_name) + hydroxide_state = self.db.get_template(pmb_type="particle_state", + name=hydroxide_tpl.initial_state) + proton_es_type = proton_state.es_type + hydroxide_es_type = hydroxide_state.es_type + salt_cation_es_type = cation_state.es_type + salt_anion_es_type = anion_state.es_type + proton_charge = proton_state.z + hydroxide_charge = hydroxide_state.z + salt_cation_charge = cation_state.z + salt_anion_charge = anion_state.z if proton_charge <= 0: raise ValueError('ERROR proton charge must be positive, charge ', proton_charge) if salt_cation_charge <= 0: @@ -2617,184 +2714,263 @@ def setup_grxmc_reactions(self, pH_res, c_salt_res, proton_name, hydroxide_name, raise ValueError('ERROR hydroxide charge must be negative, charge ', hydroxide_charge) if salt_anion_charge >= 0: raise ValueError('ERROR salt anion charge must be negative, charge ', salt_anion_charge) - # Grand-canonical coupling to the reservoir # 0 = H+ + OH- - RE.add_reaction( - gamma = K_W.magnitude, - reactant_types = [], - reactant_coefficients = [], - product_types = [ proton_es_type, hydroxide_es_type ], - product_coefficients = [ 1, 1 ], - default_charges = { - proton_es_type: proton_charge, - hydroxide_es_type: hydroxide_charge, - } - ) - + RE.add_reaction(gamma = K_W.magnitude, + reactant_types = [], + reactant_coefficients = [], + product_types = [ proton_es_type, hydroxide_es_type ], + product_coefficients = [ 1, 1 ], + default_charges = {proton_es_type: proton_charge, + hydroxide_es_type: hydroxide_charge}) + rx_tpl = Reaction(participants=[ReactionParticipant(particle_name=proton_name, + state_name=proton_state.name, + coefficient=1), + ReactionParticipant(particle_name=hydroxide_name, + state_name=hydroxide_state.name, + coefficient=1)], + pK=-np.log10(K_W.magnitude), + reaction_type="ion_insertion", + simulation_method="GRxMC") + self.db._register_reaction(rx_tpl) # 0 = Na+ + Cl- - RE.add_reaction( - gamma = K_NACL.magnitude, - reactant_types = [], - reactant_coefficients = [], - product_types = [ salt_cation_es_type, salt_anion_es_type ], - product_coefficients = [ 1, 1 ], - default_charges = { - salt_cation_es_type: salt_cation_charge, - salt_anion_es_type: salt_anion_charge, - } - ) - + RE.add_reaction(gamma = K_NACL.magnitude, + reactant_types = [], + reactant_coefficients = [], + product_types = [ salt_cation_es_type, salt_anion_es_type ], + product_coefficients = [ 1, 1 ], + default_charges = {salt_cation_es_type: salt_cation_charge, + salt_anion_es_type: salt_anion_charge}) + rx_tpl = Reaction(participants=[ReactionParticipant(particle_name=salt_cation_name, + state_name=cation_state.name, + coefficient=1), + ReactionParticipant(particle_name=salt_anion_name, + state_name=anion_state.name, + coefficient=1)], + pK=-np.log10(K_NACL.magnitude), + reaction_type="ion_insertion", + simulation_method="GRxMC") + self.db._register_reaction(rx_tpl) # 0 = Na+ + OH- - RE.add_reaction( - gamma = (K_NACL * K_W / K_HCL).magnitude, - reactant_types = [], - reactant_coefficients = [], - product_types = [ salt_cation_es_type, hydroxide_es_type ], - product_coefficients = [ 1, 1 ], - default_charges = { - salt_cation_es_type: salt_cation_charge, - hydroxide_es_type: hydroxide_charge, - } - ) - + RE.add_reaction(gamma = (K_NACL * K_W / K_HCL).magnitude, + reactant_types = [], + reactant_coefficients = [], + product_types = [ salt_cation_es_type, hydroxide_es_type ], + product_coefficients = [ 1, 1 ], + default_charges = {salt_cation_es_type: salt_cation_charge, + hydroxide_es_type: hydroxide_charge}) + rx_tpl = Reaction(participants=[ReactionParticipant(particle_name=salt_cation_name, + state_name=cation_state.name, + coefficient=1), + ReactionParticipant(particle_name=hydroxide_name, + state_name=hydroxide_state.name, + coefficient=1)], + pK=-np.log10((K_NACL * K_W / K_HCL).magnitude), + reaction_type="ion_insertion", + simulation_method="GRxMC") + self.db._register_reaction(rx_tpl) # 0 = H+ + Cl- - RE.add_reaction( - gamma = K_HCL.magnitude, - reactant_types = [], - reactant_coefficients = [], - product_types = [ proton_es_type, salt_anion_es_type ], - product_coefficients = [ 1, 1 ], - default_charges = { - proton_es_type: proton_charge, - salt_anion_es_type: salt_anion_charge, - } - ) - + RE.add_reaction(gamma = K_HCL.magnitude, + reactant_types = [], + reactant_coefficients = [], + product_types = [ proton_es_type, salt_anion_es_type ], + product_coefficients = [ 1, 1 ], + default_charges = {proton_es_type: proton_charge, + salt_anion_es_type: salt_anion_charge}) + rx_tpl = Reaction(participants=[ReactionParticipant(particle_name=proton_name, + state_name=proton_state.name, + coefficient=1), + ReactionParticipant(particle_name=salt_anion_name, + state_name=anion_state.name, + coefficient=1)], + pK=-np.log10(K_HCL.magnitude), + reaction_type="ion_insertion", + simulation_method="GRxMC") + self.db._register_reaction(rx_tpl) # Annealing moves to ensure sufficient sampling # Cation annealing H+ = Na+ - RE.add_reaction( - gamma = (K_NACL / K_HCL).magnitude, - reactant_types = [proton_es_type], - reactant_coefficients = [ 1 ], - product_types = [ salt_cation_es_type ], - product_coefficients = [ 1 ], - default_charges = { - proton_es_type: proton_charge, - salt_cation_es_type: salt_cation_charge, - } - ) - + RE.add_reaction(gamma = (K_NACL / K_HCL).magnitude, + reactant_types = [proton_es_type], + reactant_coefficients = [ 1 ], + product_types = [ salt_cation_es_type ], + product_coefficients = [ 1 ], + default_charges = {proton_es_type: proton_charge, + salt_cation_es_type: salt_cation_charge}) + rx_tpl = Reaction(participants=[ReactionParticipant(particle_name=proton_name, + state_name=proton_state.name, + coefficient=-1), + ReactionParticipant(particle_name=salt_cation_name, + state_name=cation_state.name, + coefficient=1)], + pK=-np.log10((K_NACL / K_HCL).magnitude), + reaction_type="particle replacement", + simulation_method="GRxMC") + self.db._register_reaction(rx_tpl) # Anion annealing OH- = Cl- - RE.add_reaction( - gamma = (K_HCL / K_W).magnitude, - reactant_types = [hydroxide_es_type], - reactant_coefficients = [ 1 ], - product_types = [ salt_anion_es_type ], - product_coefficients = [ 1 ], - default_charges = { - hydroxide_es_type: hydroxide_charge, - salt_anion_es_type: salt_anion_charge, - } - ) - - sucessful_reactions_labels=[] - charge_number_map = self.get_charge_number_map() - for name in pka_set.keys(): - if not _DFm._check_if_name_is_defined_in_df(name=name, df=self.df): - logging.warning(f'The acid-base reaction of {name} has not been set up because its particle type is not defined in the dataframe.') + RE.add_reaction(gamma = (K_HCL / K_W).magnitude, + reactant_types = [hydroxide_es_type], + reactant_coefficients = [ 1 ], + product_types = [ salt_anion_es_type ], + product_coefficients = [ 1 ], + default_charges = {hydroxide_es_type: hydroxide_charge, + salt_anion_es_type: salt_anion_charge}) + rx_tpl = Reaction(participants=[ReactionParticipant(particle_name=hydroxide_name, + state_name=hydroxide_state.name, + coefficient=-1), + ReactionParticipant(particle_name=salt_anion_name, + state_name=anion_state.name, + coefficient=1)], + pK=-np.log10((K_HCL / K_W).magnitude), + reaction_type="particle replacement", + simulation_method="GRxMC") + self.db._register_reaction(rx_tpl) + for reaction in self.db.get_reactions(): + if reaction.reaction_type not in ["monoprotic_acid", "monoprotic_base"]: continue + default_charges = {} + reactant_types = [] + product_types = [] + for participant in reaction.participants: + state_tpl = self.db.get_template(name=participant.state_name, + pmb_type="particle_state") + default_charges[state_tpl.es_type] = state_tpl.z + if participant.coefficient < 0: + reactant_types.append(state_tpl.es_type) + reactant_name=state_tpl.particle_name + reactant_state_name=state_tpl.name + elif participant.coefficient > 0: + product_types.append(state_tpl.es_type) + product_name=state_tpl.particle_name + product_state_name=state_tpl.name - Ka = (10**-pka_set[name]['pka_value'] * self.units.mol/self.units.l).to('1/(N_A * reduced_length**3)') - - state_one_type = self.df.loc[self.df['name']==name].state_one.es_type.values[0] - state_two_type = self.df.loc[self.df['name']==name].state_two.es_type.values[0] - + Ka = (10**-reaction.pK * self.units.mol/self.units.l).to('1/(N_A * reduced_length**3)') # Reaction in terms of proton: HA = A + H+ RE.add_reaction(gamma=Ka.magnitude, - reactant_types=[state_one_type], + reactant_types=reactant_types, reactant_coefficients=[1], - product_types=[state_two_type, proton_es_type], + product_types=product_types+[proton_es_type], product_coefficients=[1, 1], - default_charges={state_one_type: charge_number_map[state_one_type], - state_two_type: charge_number_map[state_two_type], - proton_es_type: proton_charge}) - + default_charges= default_charges | {proton_es_type: proton_charge}) + reaction.add_participant(particle_name=proton_name, + state_name=proton_state.name, + coefficient=1) + reaction.add_simulation_method("GRxMC") # Reaction in terms of salt cation: HA = A + Na+ RE.add_reaction(gamma=(Ka * K_NACL / K_HCL).magnitude, - reactant_types=[state_one_type], + reactant_types=reactant_types, reactant_coefficients=[1], - product_types=[state_two_type, salt_cation_es_type], + product_types=product_types+[salt_cation_es_type], product_coefficients=[1, 1], - default_charges={state_one_type: charge_number_map[state_one_type], - state_two_type: charge_number_map[state_two_type], - salt_cation_es_type: salt_cation_charge}) - + default_charges=default_charges | {salt_cation_es_type: salt_cation_charge}) + rx_tpl = Reaction(participants=[ReactionParticipant(particle_name=reactant_name, + state_name=reactant_state_name, + coefficient=-1), + ReactionParticipant(particle_name=product_name, + state_name=product_state_name, + coefficient=1), + ReactionParticipant(particle_name=salt_cation_name, + state_name=cation_state.name, + coefficient=1),], + pK=-np.log10((Ka * K_NACL / K_HCL).magnitude), + reaction_type=reaction.reaction_type+"_salt", + simulation_method="GRxMC") + self.db._register_reaction(rx_tpl) # Reaction in terms of hydroxide: OH- + HA = A RE.add_reaction(gamma=(Ka / K_W).magnitude, - reactant_types=[state_one_type, hydroxide_es_type], + reactant_types=reactant_types+[hydroxide_es_type], reactant_coefficients=[1, 1], - product_types=[state_two_type], + product_types=product_types, product_coefficients=[1], - default_charges={state_one_type: charge_number_map[state_one_type], - state_two_type: charge_number_map[state_two_type], - hydroxide_es_type: hydroxide_charge}) - + default_charges=default_charges | {hydroxide_es_type: hydroxide_charge}) + rx_tpl = Reaction(participants=[ReactionParticipant(particle_name=reactant_name, + state_name=reactant_state_name, + coefficient=-1), + ReactionParticipant(particle_name=product_name, + state_name=product_state_name, + coefficient=1), + ReactionParticipant(particle_name=hydroxide_name, + state_name=hydroxide_state.name, + coefficient=-1),], + pK=-np.log10((Ka / K_W).magnitude), + reaction_type=reaction.reaction_type+"_conjugate", + simulation_method="GRxMC") + self.db._register_reaction(rx_tpl) # Reaction in terms of salt anion: Cl- + HA = A RE.add_reaction(gamma=(Ka / K_HCL).magnitude, - reactant_types=[state_one_type, salt_anion_es_type], + reactant_types=reactant_types+[salt_anion_es_type], reactant_coefficients=[1, 1], - product_types=[state_two_type], + product_types=product_types, product_coefficients=[1], - default_charges={state_one_type: charge_number_map[state_one_type], - state_two_type: charge_number_map[state_two_type], - salt_anion_es_type: salt_anion_charge}) + default_charges=default_charges | {salt_anion_es_type: salt_anion_charge}) + rx_tpl = Reaction(participants=[ReactionParticipant(particle_name=reactant_name, + state_name=reactant_state_name, + coefficient=-1), + ReactionParticipant(particle_name=product_name, + state_name=product_state_name, + coefficient=1), + ReactionParticipant(particle_name=salt_anion_name, + state_name=anion_state.name, + coefficient=-1),], + pK=-np.log10((Ka / K_HCL).magnitude), + reaction_type=reaction.reaction_type+"_salt", + simulation_method="GRxMC") + self.db._register_reaction(rx_tpl) + return RE, ionic_strength_res + + def setup_grxmc_unified(self, pH_res, c_salt_res, cation_name, anion_name, activity_coefficient, exclusion_range=None, use_exclusion_radius_per_type = False): + """ + Sets up acid/base reactions for acidic/basic 'particles' defined in the pyMBE database, as well as a grand-canonical coupling to a + reservoir of small ions using a unified formulation for small ions. - sucessful_reactions_labels.append(name) - return RE, sucessful_reactions_labels, ionic_strength_res + Args: + pH_res ('float'): + pH-value in the reservoir. - def setup_grxmc_unified(self, pH_res, c_salt_res, cation_name, anion_name, activity_coefficient, exclusion_range=None, pka_set=None, use_exclusion_radius_per_type = False): - """ - Sets up Acid/Base reactions for acidic/basic 'particles' defined in 'pmb.df', as well as a grand-canonical coupling to a - reservoir of small ions. - This implementation uses the formulation of the grand-reaction method by Curk et al. [1], which relies on "unified" ion types X+ = {H+, Na+} and X- = {OH-, Cl-}. - A function that implements the original version of the grand-reaction method by Landsgesell et al. [2] is also available under the name 'setup_grxmc_reactions'. + c_salt_res ('pint.Quantity'): + Concentration of monovalent salt (e.g. NaCl) in the reservoir. - [1] Curk, T., Yuan, J., & Luijten, E. (2022). Accelerated simulation method for charge regulation effects. The Journal of Chemical Physics, 156(4). - [2] Landsgesell, J., Hebbeker, P., Rud, O., Lunkad, R., Košovan, P., & Holm, C. (2020). Grand-reaction method for simulations of ionization equilibria coupled to ion partitioning. Macromolecules, 53(8), 3007-3020. + cation_name ('str'): + Name of the cationic particle. - Args: - pH_res ('float'): pH-value in the reservoir. - c_salt_res ('pint.Quantity'): Concentration of monovalent salt (e.g. NaCl) in the reservoir. - cation_name ('str'): Name of the cationic particle. - anion_name ('str'): Name of the anionic particle. - activity_coefficient ('callable'): A function that calculates the activity coefficient of an ion pair as a function of the ionic strength. - exclusion_range(`pint.Quantity`, optional): Below this value, no particles will be inserted. - pka_set(`dict`,optional): Desired pka_set, pka_set(`dict`): {"name" : {"pka_value": pka, "acidity": acidity}}. Defaults to None. - use_exclusion_radius_per_type(`bool`,optional): Controls if one exclusion_radius per each espresso_type. Defaults to `False`. + anion_name ('str'): + Name of the anionic particle. + + activity_coefficient ('callable'): + A function that calculates the activity coefficient of an ion pair as a function of the ionic strength. + + exclusion_range('pint.Quantity', optional): + Below this value, no particles will be inserted. + + use_exclusion_radius_per_type('bool', optional): + Controls if one exclusion_radius per each espresso_type. Defaults to 'False'. Returns: - RE (`reaction_ensemble.ReactionEnsemble`): Instance of a reaction_ensemble.ReactionEnsemble object from the espressomd library. - sucessful_reactions_labels(`lst`): Labels of the reactions set up by pyMBE. - ionic_strength_res ('float'): Ionic strength of the reservoir (useful for calculating partition coefficients). + 'tuple(reaction_methods.ReactionEnsemble,pint.Quantity)': + + 'reaction_methods.ReactionEnsemble': + espressomd reaction_methods object with all reactions necesary to run the GRxMC ensamble. + + 'pint.Quantity': + Ionic strength of the reservoir (useful for calculating partition coefficients). + + Notes: + - This implementation uses the formulation of the grand-reaction method by Curk et al. [1], which relies on "unified" ion types X+ = {H+, Na+} and X- = {OH-, Cl-}. + - A function that implements the original version of the grand-reaction method by Landsgesell et al. [2] is also available under the name 'setup_grxmc_reactions'. + + [1] Curk, T., Yuan, J., & Luijten, E. (2022). Accelerated simulation method for charge regulation effects. The Journal of Chemical Physics, 156(4). + [2] Landsgesell, J., Hebbeker, P., Rud, O., Lunkad, R., Košovan, P., & Holm, C. (2020). Grand-reaction method for simulations of ionization equilibria coupled to ion partitioning. Macromolecules, 53(8), 3007-3020. """ from espressomd import reaction_methods if exclusion_range is None: exclusion_range = max(self.get_radius_map().values())*2.0 - if pka_set is None: - pka_set=self.get_pka_set() - self._check_pka_set(pka_set=pka_set) if use_exclusion_radius_per_type: exclusion_radius_per_type = self.get_radius_map() else: exclusion_radius_per_type = {} - RE = reaction_methods.ReactionEnsemble(kT=self.kT.to('reduced_energy').magnitude, - exclusion_range=exclusion_range, - seed=self.seed, - exclusion_radius_per_type = exclusion_radius_per_type - ) - + exclusion_range=exclusion_range, + seed=self.seed, + exclusion_radius_per_type = exclusion_radius_per_type) # Determine the concentrations of the various species in the reservoir and the equilibrium constants cH_res, cOH_res, cNa_res, cCl_res = self.determine_reservoir_concentrations(pH_res, c_salt_res, activity_coefficient) ionic_strength_res = 0.5*(cNa_res+cCl_res+cOH_res+cH_res) @@ -2803,16 +2979,22 @@ def setup_grxmc_unified(self, pH_res, c_salt_res, cation_name, anion_name, activ a_cation = (cH_res+cNa_res).to('1/(N_A * reduced_length**3)') * np.sqrt(determined_activity_coefficient) a_anion = (cH_res+cNa_res).to('1/(N_A * reduced_length**3)') * np.sqrt(determined_activity_coefficient) K_XX = a_cation * a_anion - - cation_es_type = self.df.loc[self.df['name']==cation_name].state_one.es_type.values[0] - anion_es_type = self.df.loc[self.df['name']==anion_name].state_one.es_type.values[0] - cation_charge = self.df.loc[self.df['name']==cation_name].state_one.z.values[0] - anion_charge = self.df.loc[self.df['name']==anion_name].state_one.z.values[0] + cation_tpl = self.db.get_template(pmb_type="particle", + name=cation_name) + cation_state = self.db.get_template(pmb_type="particle_state", + name=cation_tpl.initial_state) + anion_tpl = self.db.get_template(pmb_type="particle", + name=anion_name) + anion_state = self.db.get_template(pmb_type="particle_state", + name=anion_tpl.initial_state) + cation_es_type = cation_state.es_type + anion_es_type = anion_state.es_type + cation_charge = cation_state.z + anion_charge = anion_state.z if cation_charge <= 0: raise ValueError('ERROR cation charge must be positive, charge ', cation_charge) if anion_charge >= 0: raise ValueError('ERROR anion charge must be negative, charge ', anion_charge) - # Coupling to the reservoir: 0 = X+ + X- RE.add_reaction(gamma = K_XX.magnitude, reactant_types = [], @@ -2821,55 +3003,89 @@ def setup_grxmc_unified(self, pH_res, c_salt_res, cation_name, anion_name, activ product_coefficients = [ 1, 1 ], default_charges = {cation_es_type: cation_charge, anion_es_type: anion_charge}) - - sucessful_reactions_labels=[] - charge_number_map = self.get_charge_number_map() - for name in pka_set.keys(): - if not _DFm._check_if_name_is_defined_in_df(name=name, df=self.df): - logging.warning(f'The acid-base reaction of {name} has not been set up because its particle type is not defined in the dataframe.') + rx_tpl = Reaction(participants=[ReactionParticipant(particle_name=cation_name, + state_name=cation_state.name, + coefficient=1), + ReactionParticipant(particle_name=anion_name, + state_name=anion_state.name, + coefficient=1)], + pK=-np.log10(K_XX.magnitude), + reaction_type="ion_insertion", + simulation_method="GCMC") + self.db._register_reaction(rx_tpl) + for reaction in self.db.get_reactions(): + if reaction.reaction_type not in ["monoprotic_acid", "monoprotic_base"]: continue + default_charges = {} + reactant_types = [] + product_types = [] + for participant in reaction.participants: + state_tpl = self.db.get_template(name=participant.state_name, + pmb_type="particle_state") + default_charges[state_tpl.es_type] = state_tpl.z + if participant.coefficient < 0: + reactant_types.append(state_tpl.es_type) + reactant_name=state_tpl.particle_name + reactant_state_name=state_tpl.name + elif participant.coefficient > 0: + product_types.append(state_tpl.es_type) + product_name=state_tpl.particle_name + product_state_name=state_tpl.name - Ka = 10**-pka_set[name]['pka_value'] * self.units.mol/self.units.l + Ka = (10**-reaction.pK * self.units.mol/self.units.l).to('1/(N_A * reduced_length**3)') gamma_K_AX = Ka.to('1/(N_A * reduced_length**3)').magnitude * a_cation / a_hydrogen - - state_one_type = self.df.loc[self.df['name']==name].state_one.es_type.values[0] - state_two_type = self.df.loc[self.df['name']==name].state_two.es_type.values[0] - # Reaction in terms of small cation: HA = A + X+ RE.add_reaction(gamma=gamma_K_AX.magnitude, - reactant_types=[state_one_type], + reactant_types=reactant_types, reactant_coefficients=[1], - product_types=[state_two_type, cation_es_type], + product_types=product_types+[cation_es_type], product_coefficients=[1, 1], - default_charges={state_one_type: charge_number_map[state_one_type], - state_two_type: charge_number_map[state_two_type], - cation_es_type: cation_charge}) - + default_charges=default_charges|{cation_es_type: cation_charge}) + reaction.add_participant(particle_name=cation_name, + state_name=cation_state.name, + coefficient=1) + reaction.add_simulation_method("GRxMC") # Reaction in terms of small anion: X- + HA = A RE.add_reaction(gamma=gamma_K_AX.magnitude / K_XX.magnitude, - reactant_types=[state_one_type, anion_es_type], + reactant_types=reactant_types+[anion_es_type], reactant_coefficients=[1, 1], - product_types=[state_two_type], + product_types=product_types, product_coefficients=[1], - default_charges={state_one_type: charge_number_map[state_one_type], - state_two_type: charge_number_map[state_two_type], - anion_es_type: anion_charge}) - - sucessful_reactions_labels.append(name) - return RE, sucessful_reactions_labels, ionic_strength_res + default_charges=default_charges|{anion_es_type: anion_charge}) + rx_tpl = Reaction(participants=[ReactionParticipant(particle_name=reactant_name, + state_name=reactant_state_name, + coefficient=-1), + ReactionParticipant(particle_name=product_name, + state_name=product_state_name, + coefficient=1), + ReactionParticipant(particle_name=anion_name, + state_name=anion_state.name, + coefficient=-1),], + pK=-np.log10(gamma_K_AX.magnitude / K_XX.magnitude), + reaction_type=reaction.reaction_type+"_conjugate", + simulation_method="GRxMC") + self.db._register_reaction(rx_tpl) + return RE, ionic_strength_res def setup_lj_interactions(self, espresso_system, shift_potential=True, combining_rule='Lorentz-Berthelot'): """ Sets up the Lennard-Jones (LJ) potential between all pairs of particle states defined in the pyMBE database. Args: - espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library. - shift_potential(`bool`, optional): If True, a shift will be automatically computed such that the potential is continuous at the cutoff radius. Otherwise, no shift will be applied. Defaults to True. - combining_rule(`string`, optional): combining rule used to calculate `sigma` and `epsilon` for the potential between a pair of particles. Defaults to 'Lorentz-Berthelot'. - warning(`bool`, optional): switch to activate/deactivate warning messages. Defaults to True. + espresso_system('espressomd.system.System'): + Instance of a system object from the espressomd library. - Note: - - Currently, the only `combining_rule` supported is Lorentz-Berthelot. + shift_potential('bool', optional): + If True, a shift will be automatically computed such that the potential is continuous at the cutoff radius. Otherwise, no shift will be applied. Defaults to True. + + combining_rule('string', optional): + combining rule used to calculate 'sigma' and 'epsilon' for the potential between a pair of particles. Defaults to 'Lorentz-Berthelot'. + + warning('bool', optional): + switch to activate/deactivate warning messages. Defaults to True. + + Notes: + - Currently, the only 'combining_rule' supported is Lorentz-Berthelot. - Check the documentation of ESPResSo for more info about the potential https://espressomd.github.io/doc4.2.0/inter_non-bonded.html """ diff --git a/testsuite/reaction_methods_unit_tests.py b/testsuite/reaction_methods_unit_tests.py index 70c46ec..8084efc 100644 --- a/testsuite/reaction_methods_unit_tests.py +++ b/testsuite/reaction_methods_unit_tests.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -20,6 +20,7 @@ import pyMBE import numpy as np import espressomd +import unittest as ut def reaction_method_test_template(parameters): @@ -71,13 +72,7 @@ def reaction_method_test_template(parameters): if parameters["method"] == "cpH": # Add the reactions using pyMBE - if "pka_set" in parameters: - cpH, _ = pmb.setup_cpH(counter_ion="H", - constant_pH=parameters["pH"], - use_exclusion_radius_per_type=parameters["use_exclusion_radius_per_type"], - pka_set=parameters["pka_set"]) - else: - cpH, _ = pmb.setup_cpH(counter_ion="H", + cpH = pmb.setup_cpH(counter_ion="H", constant_pH=parameters["pH"], use_exclusion_radius_per_type=parameters["use_exclusion_radius_per_type"]) @@ -145,11 +140,7 @@ def reaction_method_test_template(parameters): np.testing.assert_raises(ValueError, pmb.setup_grxmc_reactions, **input_parameters) return - if "pka_set" in parameters: - input_parameters["pka_set"] = parameters["pka_set"] - grxmc, *_ = pmb.setup_grxmc_reactions(**input_parameters) - else: - grxmc, *_ = pmb.setup_grxmc_reactions(**input_parameters) + grxmc, _ = pmb.setup_grxmc_reactions(**input_parameters) # Check the number of reactions np.testing.assert_equal(len(grxmc.reactions), 28) @@ -213,11 +204,7 @@ def reaction_method_test_template(parameters): np.testing.assert_raises(ValueError, pmb.setup_grxmc_unified, **input_parameters) return - if "pka_set" in parameters: - input_parameters["pka_set"] = parameters["pka_set"] - grxmc, *_ = pmb.setup_grxmc_unified(**input_parameters) - else: - grxmc, *_ = pmb.setup_grxmc_unified(**input_parameters) + grxmc, *_ = pmb.setup_grxmc_unified(**input_parameters) # Check the number of reactions np.testing.assert_equal(len(grxmc.reactions), 10) @@ -251,112 +238,109 @@ def reaction_method_test_template(parameters): # Set up the espresso system espresso_system=espressomd.System(box_l = [10.0]*3) -# cpH test -print("*** Unit test: check that reactions are correctly set up in the cpH method. ***") -for use_exclusion_radius_per_type in [False, True]: - parameters = { - "method": "cpH", - "pK_acid": 4.0, - "pK_base": 8.0, - "pH": 7.0, - "z_Na": 1, - "z_Cl": -1, - "z_H": 1, - "z_OH": -1, - "use_exclusion_radius_per_type": use_exclusion_radius_per_type - } - reaction_method_test_template(parameters) - - parameters["pka_set"] = { - "A": {"pka_value": 4.0, "acidity": "acidic"}, - "B": {"pka_value": 8.0, "acidity": "basic"}, - "C": {"pka_value": 7.0, "acidity": "acidi"}} - reaction_method_test_template(parameters) -print("*** Unit test passed ***") - -# gcmc test -print("*** Unit test: check that reactions are correctly set up in the GCMC method. ***") -for use_exclusion_radius_per_type in [False, True]: - parameters = { - "method": "gcmc", - "c_salt_res": 1, - "z_Na": 1, - "z_Cl": -1, - "z_H": 1, - "z_OH": -1, - "use_exclusion_radius_per_type": use_exclusion_radius_per_type - } - reaction_method_test_template(parameters) - - parameters["z_Cl"] = 1 - reaction_method_test_template(parameters) - - parameters["z_Na"] = -1 - reaction_method_test_template(parameters) -print("*** Unit test passed ***") - -# grxmc test -print("*** Unit test: check that reactions are correctly set up in the G-RxMC method. ***") -for use_exclusion_radius_per_type in [False, True]: - parameters = { - "method": "grxmc", - "pK_acid": 4.0, - "pK_base": 9.0, - "c_salt_res": 1, - "pH_res": 5.0, - "z_Na": 1, - "z_Cl": -1, - "z_H": 1, - "z_OH": -1, - "use_exclusion_radius_per_type": use_exclusion_radius_per_type - } - reaction_method_test_template(parameters) - - parameters["pka_set"] = { - "A": {"pka_value": 4.0, "acidity": "acidic"}, - "B": {"pka_value": 9.0, "acidity": "basic"}, - "C": {"pka_value": 7.0, "acidity": "acidi"}} - reaction_method_test_template(parameters) - - parameters["z_Cl"] = 1 - reaction_method_test_template(parameters) - - parameters["z_OH"] = 1 - reaction_method_test_template(parameters) - - parameters["z_Na"] = -1 - reaction_method_test_template(parameters) - - parameters["z_H"] = -1 - reaction_method_test_template(parameters) -print("*** Unit test passed ***") - -# grxmc unified test -print("*** Unit test: check that reactions are correctly set up in the unified G-RxMC method. ***") -for use_exclusion_radius_per_type in [False, True]: - parameters = { - "method": "grxmc_unified", - "pK_acid": 4.0, - "pK_base": 9.0, - "c_salt_res": 1, - "pH_res": 5.0, - "z_Na": 1, - "z_Cl": -1, - "z_H": 1, - "z_OH": -1, - "use_exclusion_radius_per_type": use_exclusion_radius_per_type - } - reaction_method_test_template(parameters) - - parameters["pka_set"] = { - "A": {"pka_value": 4.0, "acidity": "acidic"}, - "B": {"pka_value": 9.0, "acidity": "basic"}, - "C": {"pka_value": 7.0, "acidity": "acidi"}} - reaction_method_test_template(parameters) - - parameters["z_OH"] = 1 - reaction_method_test_template(parameters) - - parameters["z_H"] = -1 - reaction_method_test_template(parameters) -print("*** Unit test passed ***") +class Test(ut.TestCase): + + def test_cpH_setup(self): + """ + Unit tests for the constant pH method + """ + # check that reactions are correctly set up in the cpH method. ***") + for use_exclusion_radius_per_type in [False, True]: + parameters = { + "method": "cpH", + "pK_acid": 4.0, + "pK_base": 8.0, + "pH": 7.0, + "z_Na": 1, + "z_Cl": -1, + "z_H": 1, + "z_OH": -1, + "use_exclusion_radius_per_type": use_exclusion_radius_per_type + } + reaction_method_test_template(parameters) + + def test_gcmc_setup(self): + """ + Unit tests for the Grand Canonical Monte Carlo method + """ + # Check that reactions are correctly set up in the GCMC method. ***") + for use_exclusion_radius_per_type in [False, True]: + parameters = { + "method": "gcmc", + "c_salt_res": 1, + "z_Na": 1, + "z_Cl": -1, + "z_H": 1, + "z_OH": -1, + "use_exclusion_radius_per_type": use_exclusion_radius_per_type + } + reaction_method_test_template(parameters) + + parameters["z_Cl"] = 1 + reaction_method_test_template(parameters) + + parameters["z_Na"] = -1 + reaction_method_test_template(parameters) + + def test_grxmc_setup(self): + """ + Unit tests for the Grand Reaction Monte Carlo method setup + """ + + + # check that reactions are correctly set up in the G-RxMC method. ***") + for use_exclusion_radius_per_type in [False, True]: + parameters = { + "method": "grxmc", + "pK_acid": 4.0, + "pK_base": 9.0, + "c_salt_res": 1, + "pH_res": 5.0, + "z_Na": 1, + "z_Cl": -1, + "z_H": 1, + "z_OH": -1, + "use_exclusion_radius_per_type": use_exclusion_radius_per_type + } + reaction_method_test_template(parameters) + + parameters["z_Cl"] = 1 + reaction_method_test_template(parameters) + + parameters["z_OH"] = 1 + reaction_method_test_template(parameters) + + parameters["z_Na"] = -1 + reaction_method_test_template(parameters) + + parameters["z_H"] = -1 + reaction_method_test_template(parameters) + + def test_grxmc_unified_setup(self): + """ + Unit tests for the Grand Reaction Monte Carlo method, using the unified formulation + """ + # check that reactions are correctly set up in the unified G-RxMC method. + for use_exclusion_radius_per_type in [False, True]: + parameters = { + "method": "grxmc_unified", + "pK_acid": 4.0, + "pK_base": 9.0, + "c_salt_res": 1, + "pH_res": 5.0, + "z_Na": 1, + "z_Cl": -1, + "z_H": 1, + "z_OH": -1, + "use_exclusion_radius_per_type": use_exclusion_radius_per_type + } + reaction_method_test_template(parameters) + + parameters["z_OH"] = 1 + reaction_method_test_template(parameters) + + parameters["z_H"] = -1 + reaction_method_test_template(parameters) + +if __name__ == "__main__": + ut.main() \ No newline at end of file From 461ec7b069bbb07f526d9943ad48b577e577bb0d Mon Sep 17 00:00:00 2001 From: Pablo Date: Sun, 25 Jan 2026 17:22:14 +0100 Subject: [PATCH 36/55] standarize docs --- pyMBE/pyMBE.py | 529 ++++++++++++++++++++++++++++++------------------- 1 file changed, 321 insertions(+), 208 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 1ccb643..15871df 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -59,22 +59,31 @@ class pymbe_library(): Attributes: N_A ('pint.Quantity'): Avogadro number. + kB ('pint.Quantity'): Boltzmann constant. + e ('pint.Quantity'): Elementary charge. + kT ('pint.Quantity'): Thermal energy corresponding to the set temperature. + Kw ('pint.Quantity'): Ionic product of water, used in G-RxMC and Donnan-related calculations. + db ('Manager'): Database manager holding all pyMBE templates, instances and reactions. + rng ('numpy.random.Generator'): Random number generator initialized with the provided seed. + units ('pint.UnitRegistry'): Pint unit registry used for unit-aware calculations. + lattice_builder: Optional lattice builder object (initialized as ''None''). + root ('importlib.resources.abc.Traversable'): Root path to the pyMBE package resources. """ @@ -86,14 +95,18 @@ def __init__(self, seed, temperature=None, unit_length=None, unit_charge=None, K Args: seed ('int'): Seed for the random number generator. + temperature ('pint.Quantity', optional): Simulation temperature. If ''None'', defaults to 298.15 K. + unit_length ('pint.Quantity', optional): Reference length for reduced units. If ''None'', defaults to 0.355 nm. + unit_charge ('pint.Quantity', optional): Reference charge for reduced units. If ''None'', defaults to one elementary charge. + Kw ('pint.Quantity', optional): Ionic product of water (typically in mol²/L²). If ''None'', defaults to 1e-14 mol²/L². @@ -119,8 +132,11 @@ def _check_bond_inputs(self, bond_type, bond_parameters): Checks that the input bond parameters are valid within the current pyMBE implementation. Args: - bond_type('str'): label to identify the potential to model the bond. - bond_parameters('dict'): parameters of the potential of the bond. + bond_type ('str'): + label to identify the potential to model the bond. + + bond_parameters ('dict'): + parameters of the potential of the bond. """ valid_bond_types = ["harmonic", "FENE"] if bond_type not in valid_bond_types: @@ -136,13 +152,17 @@ def _check_dimensionality(self, variable, expected_dimensionality): Checks if the dimensionality of 'variable' matches 'expected_dimensionality'. Args: - variable('pint.Quantity'): Quantity to be checked. - expected_dimensionality('str'): Expected dimension of the variable. + variable ('pint.Quantity'): + Quantity to be checked. + + expected_dimensionality ('str'): + Expected dimension of the variable. Returns: - ('bool'): 'True' if the variable if of the expected dimensionality, 'False' otherwise. + ('bool'): + 'True' if the variable if of the expected dimensionality, 'False' otherwise. - Note: + Notes: - 'expected_dimensionality' takes dimensionality following the Pint standards [docs](https://pint.readthedocs.io/en/0.10.1/wrapping.html?highlight=dimensionality#checking-dimensionality). - For example, to check for a variable corresponding to a velocity 'expected_dimensionality = "[length]/[time]"' """ @@ -171,10 +191,13 @@ def _create_espresso_bond_instance(self, bond_type, bond_parameters): Creates an ESPResSo bond instance. Args: - bond_type('str'): label to identify the potential to model the bond. - bond_parameters('dict'): parameters of the potential of the bond. + bond_type ('str'): + label to identify the potential to model the bond. - Note: + bond_parameters ('dict'): + parameters of the potential of the bond. + + Notes: Currently, only HARMONIC and FENE bonds are supported. For a HARMONIC bond the dictionary must contain: @@ -222,7 +245,7 @@ def _create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system, use_def ('int'): molecule_id of the created hydrogel chian. - Note: + Notes: - If the chain is defined between node_start = ''[0 0 0]'' and node_end = ''[1 1 1]'', the chain will be placed between these two nodes. - The chain will be placed in the direction of the vector between 'node_start' and 'node_end'. """ @@ -293,13 +316,19 @@ def _create_hydrogel_node(self, node_index, node_name, espresso_system): Set a node residue type. Args: - node_index('str'): Lattice node index in the form of a string, e.g. "[0 0 0]". - node_name('str'): name of the node particle defined in pyMBE. - espresso_system (espressomd.system.System): ESPResSo system object where the hydrogel node will be created. + node_index ('str'): + Lattice node index in the form of a string, e.g. "[0 0 0]". + + node_name ('str'): + name of the node particle defined in pyMBE. + + espresso_system (espressomd.system.System): + ESPResSo system object where the hydrogel node will be created. Returns: - node_position('list'): Position of the node in the lattice. - p_id('int'): Particle ID of the node. + ('tuple(list,int)'): + ('list'): Position of the node in the lattice. + ('int'): Particle ID of the node. """ if self.lattice_builder is None: raise ValueError("LatticeBuilder is not initialized. Use 'initialize_lattice_builder' first.") @@ -312,6 +341,35 @@ def _create_hydrogel_node(self, node_index, node_name, espresso_system): self.lattice_builder.nodes[key] = node_name return node_position.tolist(), p_id[0] + def _get_espresso_bond_instance(self, bond_template, espresso_system, use_default_bond=False): + """ + Retrieve or create a bond instance in an ESPResSo system for a given pair of particle names. + + Args: + bond_template ('BondTemplate'): + BondTemplate object from the pyMBE database. + espresso_system ('espressomd.system.System'): + An ESPResSo system object where the bond will be added or retrieved. + use_default_bond (bool, optional): If True, use a default bond template when no + specific template exists for the particle pair. Defaults to False. + + Returns: + ('espressomd.interactions.BondedInteraction'): + The ESPResSo bond instance object. + + Notes: + When a new bond instance is created, it is not added to the ESPResSo system. + """ + if bond_template.name in self.db.espresso_bond_instances.keys(): + bond_inst = self.db.espresso_bond_instances[bond_template.name] + else: + # Create an instance of the bond + bond_inst = self._create_espresso_bond_instance(bond_type=bond_template.bond_type, + bond_parameters=bond_template.get_parameters(self.units)) + self.db.espresso_bond_instances[bond_template.name]= bond_inst + espresso_system.bonded_inter.add(bond_inst) + return bond_inst + def _get_label_id_map(self, pmb_type): """ Returns the key used to access the particle ID map for a given pyMBE object type. @@ -363,10 +421,10 @@ def _get_template_type(self, name, allowed_types): Set of allowed pmb_type values (e.g. {"molecule", "peptide"}). Returns: - str: + ('str'): Resolved pmb_type. - Notes: + Notess: - This method does *not* return the template itself, only the validated pmb_type. """ registered_pmb_types_with_name = self.db._find_template_types(name=name) @@ -389,7 +447,7 @@ def _delete_particles_from_espresso(self, particle_ids, espresso_system): The ESPResSo simulation system from which the particles will be removed. - Notes: + Notess: - This method removes particles only from the ESPResSo simulation, **not** from the pyMBE database. Database cleanup must be handled separately by the caller. @@ -406,14 +464,16 @@ def calculate_center_of_mass(self, instance_id, pmb_type, espresso_system): Args: instance_id ('int'): pyMBE instance ID of the object whose center of mass is calculated. + pmb_type ('str'): Type of the pyMBE object. Must correspond to a particle-aggregating template type (e.g. '"molecule"', '"residue"', '"peptide"', '"protein"'). + espresso_system ('espressomd.system.System'): ESPResSo system containing the particle instances. Returns: - center_of_mass ('numpy.ndarray'): + ('numpy.ndarray'): Array of shape '(3,)' containing the Cartesian coordinates of the center of mass. @@ -440,12 +500,13 @@ def calculate_HH(self, template_name, pH_list=None, pka_set=None): Args: template_name ('str'): Name of the template. + pH_list ('list[float]', optional): pH values at which the charge is evaluated. Defaults to 50 values between 2 and 12. + pka_set ('dict', optional): - Mapping: - {particle_name: {"pka_value": 'float', "acidity": "acidic"|"basic"}} + Mapping: {particle_name: {"pka_value": 'float', "acidity": "acidic"|"basic"}} Returns: 'list[float]': @@ -497,13 +558,15 @@ def calculate_HH_Donnan(self, c_macro, c_salt, pH_list=None, pka_set=None): Mapping of macromolecular species names to their concentrations in the system: '{molecule_name: concentration}'. - Concentrations must carry units compatible with molar concentration. + c_salt ('float' or 'pint.Quantity'): Salt concentration in the reservoir. + pH_list ('list[float]', optional): List of pH values in the reservoir at which the calculation is performed. If 'None', 50 equally spaced values between 2 and 12 are used. + pka_set ('dict', optional): Dictionary defining the acid–base properties of titratable particle types: @@ -522,10 +585,6 @@ def calculate_HH_Donnan(self, c_macro, c_salt, pH_list=None, pka_set=None): - '"partition_coefficients"' ('list[float]'): Partition coefficients of monovalent cations at each pH value. - Raises: - ValueError: - If the provided 'pka_set' is invalid or inconsistent. - Notes: - This method assumes **ideal Donnan equilibrium** and **monovalent salt**. - The ionic strength of the reservoir includes both salt and @@ -552,11 +611,15 @@ def calc_charges(c_macro, pH): Calculates the charges of the different kinds of molecules according to the Henderson-Hasselbalch equation. Args: - c_macro('dic'): {"name": concentration} - A dict containing the concentrations of all charged macromolecular species in the system. - pH('float'): pH-value that is used in the HH equation. + c_macro ('dict'): + {"name": concentration} - A dict containing the concentrations of all charged macromolecular species in the system. + + pH ('float'): + pH-value that is used in the HH equation. Returns: - charge('dict'): {"molecule_name": charge} + ('dict'): + {"molecule_name": charge} """ charge = {} for name in c_macro: @@ -568,8 +631,11 @@ def calc_partition_coefficient(charge, c_macro): Calculates the partition coefficients of positive ions according to the ideal Donnan theory. Args: - charge('dict'): {"molecule_name": charge} - c_macro('dict'): {"name": concentration} - A dict containing the concentrations of all charged macromolecular species in the system. + charge ('dict'): + {"molecule_name": charge} + + c_macro ('dict'): + {"name": concentration} - A dict containing the concentrations of all charged macromolecular species in the system. """ nonlocal ionic_strength_res charge_density = 0.0 @@ -733,13 +799,21 @@ def create_bond(self, particle_id1, particle_id2, espresso_system, use_default_b 5. Creates a 'BondInstance' in the database and registers it. Args: - particle_id1 (int): pyMBE and ESPResSo ID of the first particle. - particle_id2 (int): pyMBE and ESPResSo ID of the second particle. - espresso_system (espressomd.system.System): ESPResSo system object where the bond will be created. - use_default_bond (bool, optional): If True, use a default bond template if no specific template exists. Defaults to False. + particle_id1 ('int'): + pyMBE and ESPResSo ID of the first particle. + + particle_id2 ('int'): + pyMBE and ESPResSo ID of the second particle. + + espresso_system ('espressomd.system.System'): + ESPResSo system object where the bond will be created. + + use_default_bond ('bool', optional): + If True, use a default bond template if no specific template exists. Defaults to False. Returns: - (int): bond_id of the bond instance created in the pyMBE database. + ('int'): + bond_id of the bond instance created in the pyMBE database. """ particle_inst_1 = self.db.get_instance(pmb_type="particle", instance_id=particle_id1) @@ -764,15 +838,23 @@ def create_counterions(self, object_name, cation_name, anion_name, espresso_syst Creates particles of 'cation_name' and 'anion_name' in 'espresso_system' to counter the net charge of 'object_name'. Args: - object_name('str'): 'name' of a pyMBE object. - espresso_system('espressomd.system.System'): Instance of a system object from the espressomd library. - cation_name('str'): 'name' of a particle with a positive charge. - anion_name('str'): 'name' of a particle with a negative charge. + object_name ('str'): + 'name' of a pyMBE object. + + espresso_system ('espressomd.system.System'): + Instance of a system object from the espressomd library. + + cation_name ('str'): + 'name' of a particle with a positive charge. + + anion_name ('str'): + 'name' of a particle with a negative charge. Returns: - counterion_number('dict'): {"name": number} + ('dict'): + {"name": number} - Note: + Notes: This function currently does not support the creation of counterions for hydrogels. """ cation_tpl = self.db.get_template(pmb_type="particle", @@ -825,14 +907,18 @@ def create_hydrogel(self, name, espresso_system, use_default_bond=False): Creates a hydrogel in espresso_system using a pyMBE hydrogel template given by 'name' Args: - name(str): name of the hydrogel template in the pyMBE database. - espresso_system (espressomd.system.System): ESPResSo system object where the hydrogel will be created. - use_default_bond (bool, optional): If True, use a default bond template if no specific template exists. Defaults to False. + name ('str'): + name of the hydrogel template in the pyMBE database. + + espresso_system ('espressomd.system.System'): + ESPResSo system object where the hydrogel will be created. + + use_default_bond ('bool', optional): + If True, use a default bond template if no specific template exists. Defaults to False. Returns: - (int): id of the hydrogel instance created. + ('int'): id of the hydrogel instance created. """ - hydrogel_tpl = self.db.get_template(pmb_type="hydrogel", name=name) assembly_id = self.db._propose_instance_id(pmb_type="hydrogel") @@ -1012,13 +1098,24 @@ def create_particle(self, name, espresso_system, number_of_particles, position=N Creates one or more particles in an ESPResSo system based on the particle template in the pyMBE database. Args: - name('str'): Label of the particle template in the pyMBE database. - espresso_system('espressomd.system.System'): Instance of a system object from the espressomd library. - number_of_particles('int'): Number of particles to be created. - position(list of ['float','float','float'], optional): Initial positions of the particles. If not given, particles are created in random positions. Defaults to None. - fix('bool', optional): Controls if the particle motion is frozen in the integrator, it is used to create rigid objects. Defaults to False. + name ('str'): + Label of the particle template in the pyMBE database. + + espresso_system ('espressomd.system.System'): + Instance of a system object from the espressomd library. + + number_of_particles ('int'): + Number of particles to be created. + + position (list of ['float','float','float'], optional): + Initial positions of the particles. If not given, particles are created in random positions. Defaults to None. + + fix ('bool', optional): + Controls if the particle motion is frozen in the integrator, it is used to create rigid objects. Defaults to False. + Returns: - created_pid_list('list' of 'int'): List with the ids of the particles created into 'espresso_system'. + ('list' of 'int'): + List with the ids of the particles created into 'espresso_system'. """ if number_of_particles <=0: return [] @@ -1069,22 +1166,19 @@ def create_protein(self, name, number_of_proteins, espresso_system, topology_dic The ESPResSo simulation system where the protein molecules will be created. topology_dict (dict): - Dictionary defining the internal structure of the protein. - Expected format: - { - "ResidueName1": { - "initial_pos": np.ndarray, - "chain_id": int, - "radius": float - }, - "ResidueName2": { ... }, + Dictionary defining the internal structure of the protein. Expected format: + {"ResidueName1": {"initial_pos": np.ndarray, + "chain_id": int, + "radius": float}, + "ResidueName2": { ... }, ... } The '"initial_pos"' entry is required and represents the residue’s reference coordinates before shifting to the protein's center-of-mass. Returns: - (list of int): List of the molecule_id of the Protein instances created into ESPResSo. + ('list' of 'int'): + List of the molecule_id of the Protein instances created into ESPResSo. Notes: - Particles are created using 'create_particle()' with 'fix=True', @@ -1278,20 +1372,23 @@ def define_bond(self, bond_type, bond_parameters, particle_pairs): Defines bond templates for each particle pair in 'particle_pairs' in the pyMBE database. Args: - bond_type('str'): label to identify the potential to model the bond. - bond_parameters('dict'): parameters of the potential of the bond. - particle_pairs('lst'): list of the 'names' of the 'particles' to be bonded. + bond_type ('str'): + label to identify the potential to model the bond. - Note: - Currently, only HARMONIC and FENE bonds are supported. + bond_parameters ('dict'): + parameters of the potential of the bond. + + particle_pairs ('lst'): + list of the 'names' of the 'particles' to be bonded. - For a HARMONIC bond the dictionary must contain the following parameters: + Notes: + -Currently, only HARMONIC and FENE bonds are supported. + - For a HARMONIC bond the dictionary must contain the following parameters: - k ('pint.Quantity') : Magnitude of the bond. It should have units of energy/length**2 using the 'pmb.units' UnitRegistry. - r_0 ('pint.Quantity') : Equilibrium bond length. It should have units of length using the 'pmb.units' UnitRegistry. - - For a FENE bond the dictionary must contain the same parameters as for a HARMONIC bond and: + - For a FENE bond the dictionary must contain the same parameters as for a HARMONIC bond and: - d_r_max ('pint.Quantity'): Maximal stretching length for FENE. It should have units of length using the 'pmb.units' UnitRegistry. Default 'None'. """ @@ -1327,10 +1424,13 @@ def define_default_bond(self, bond_type, bond_parameters): Defines a bond template as a "default" template in the pyMBE database. Args: - bond_type('str'): label to identify the potential to model the bond. - bond_parameters('dict'): parameters of the potential of the bond. + bond_type ('str'): + label to identify the potential to model the bond. + + bond_parameters ('dict'): + parameters of the potential of the bond. - Note: + Notes: - Currently, only harmonic and FENE bonds are supported. """ self._check_bond_inputs(bond_parameters=bond_parameters, @@ -1353,9 +1453,14 @@ def define_hydrogel(self, name, node_map, chain_map): Defines a hydrogel template in the pyMBE database. Args: - name('str'): Unique label that identifies the 'hydrogel'. - node_map('list of dict'): [{"particle_name": , "lattice_index": }, ... ] - chain_map('list of dict'): [{"node_start": , "node_end": , "residue_list": , ... ] + name ('str'): + Unique label that identifies the 'hydrogel'. + + node_map ('list of dict'): + [{"particle_name": , "lattice_index": }, ... ] + + chain_map ('list of dict'): + [{"node_start": , "node_end": , "residue_list": , ... ] """ # Sanity tests @@ -1395,8 +1500,11 @@ def define_molecule(self, name, residue_list): Defines a molecule template in the pyMBE database. Args: - name('str'): Unique label that identifies the 'molecule'. - residue_list('list' of 'str'): List of the 'name's of the 'residue's in the sequence of the 'molecule'. + name('str'): + Unique label that identifies the 'molecule'. + + residue_list ('list' of 'str'): + List of the 'name's of the 'residue's in the sequence of the 'molecule'. """ tpl = MoleculeTemplate(name=name, residue_list=residue_list) @@ -1407,10 +1515,17 @@ def define_monoprototic_acidbase_reaction(self, particle_name, pka, acidity, met Defines an acid-base reaction for a monoprototic particle in the pyMBE database. Args: - particle_name (str): Unique label that identifies the particle template. - pka (float): pka-value of the acid or base. - acidity (str): Identifies whether if the particle is 'acidic' or 'basic'. - metadata (dict, optional): Additional information to be stored in the reaction. Defaults to None. + particle_name ('str'): + Unique label that identifies the particle template. + + pka ('float'): + pka-value of the acid or base. + + acidity ('str'): + Identifies whether if the particle is 'acidic' or 'basic'. + + metadata ('dict', optional): + Additional information to be stored in the reaction. Defaults to None. """ supported_acidities = ["acidic", "basic"] if acidity not in supported_acidities: @@ -1436,8 +1551,11 @@ def define_monoprototic_particle_states(self, particle_name, acidity): Defines particle states for a monoprotonic particle template including the charges in each of its possible states. Args: - particle_name('str'): Unique label that identifies the particle template. - acidity('str'): Identifies whether the particle is 'acidic' or 'basic'. + particle_name ('str'): + Unique label that identifies the particle template. + + acidity ('str'): + Identifies whether the particle is 'acidic' or 'basic'. """ acidity_valid_keys = ['acidic', 'basic'] if not pd.isna(acidity): @@ -1458,16 +1576,31 @@ def define_particle(self, name, sigma, epsilon, z=0, acidity=pd.NA, pka=pd.NA, Defines a particle template in the pyMBE database. Args: - name('str'): Unique label that identifies this particle type. - sigma('pint.Quantity'): Sigma parameter used to set up Lennard-Jones interactions for this particle type. - epsilon('pint.Quantity'): Epsilon parameter used to setup Lennard-Jones interactions for this particle tipe. - z('int', optional): Permanent charge number of this particle type. Defaults to 0. - acidity('str', optional): Identifies whether if the particle is 'acidic' or 'basic', used to setup constant pH simulations. Defaults to pd.NA. - pka('float', optional): If 'particle' is an acid or a base, it defines its pka-value. Defaults to pd.NA. - cutoff('pint.Quantity', optional): Cutoff parameter used to set up Lennard-Jones interactions for this particle type. Defaults to pd.NA. - offset('pint.Quantity', optional): Offset parameter used to set up Lennard-Jones interactions for this particle type. Defaults to pd.NA. + name('str'): + Unique label that identifies this particle type. + + sigma('pint.Quantity'): + Sigma parameter used to set up Lennard-Jones interactions for this particle type. + + epsilon('pint.Quantity'): + Epsilon parameter used to setup Lennard-Jones interactions for this particle tipe. + + z('int', optional): + Permanent charge number of this particle type. Defaults to 0. + + acidity('str', optional): + Identifies whether if the particle is 'acidic' or 'basic', used to setup constant pH simulations. Defaults to pd.NA. + + pka('float', optional): + If 'particle' is an acid or a base, it defines its pka-value. Defaults to pd.NA. + + cutoff('pint.Quantity', optional): + Cutoff parameter used to set up Lennard-Jones interactions for this particle type. Defaults to pd.NA. + + offset('pint.Quantity', optional): + Offset parameter used to set up Lennard-Jones interactions for this particle type. Defaults to pd.NA. - Note: + Notes: - 'sigma', 'cutoff' and 'offset' must have a dimensitonality of '[length]' and should be defined using pmb.units. - 'epsilon' must have a dimensitonality of '[energy]' and should be defined using pmb.units. - 'cutoff' defaults to '2**(1./6.) reduced_length'. @@ -1538,9 +1671,14 @@ def define_peptide(self, name, sequence, model): Defines a peptide template in the pyMBE database. Args: - name ('str'): Unique label that identifies the peptide. - sequence ('str'): Sequence of the peptide. - model ('str'): Model name. Currently only models with 1 bead '1beadAA' or with 2 beads '2beadAA' per amino acid are supported. + name ('str'): + Unique label that identifies the peptide. + + sequence ('str'): + Sequence of the peptide. + + model ('str'): + Model name. Currently only models with 1 bead '1beadAA' or with 2 beads '2beadAA' per amino acid are supported. """ valid_keys = ['1beadAA','2beadAA'] if model not in valid_keys: @@ -1558,11 +1696,16 @@ def define_protein(self, name, sequence, model): Defines a protein template in the pyMBE database. Args: - name ('str'): Unique label that identifies the protein. - sequence ('str'): Sequence of the protein. - model ('string'): Model name. Currently only models with 1 bead '1beadAA' or with 2 beads '2beadAA' per amino acid are supported. + name ('str'): + Unique label that identifies the protein. + + sequence ('str'): + Sequence of the protein. - Note: + model ('string'): + Model name. Currently only models with 1 bead '1beadAA' or with 2 beads '2beadAA' per amino acid are supported. + + Notes: - Currently, only 'lj_setup_mode="wca"' is supported. This corresponds to setting up the WCA potential. """ valid_model_keys = ['1beadAA','2beadAA'] @@ -1581,15 +1724,19 @@ def define_residue(self, name, central_bead, side_chains): Defines a residue template in the pyMBE database. Args: - name('str'): Unique label that identifies the residue. - central_bead('str'): 'name' of the 'particle' to be placed as central_bead of the residue. - side_chains('list' of 'str'): List of 'name's of the pmb_objects to be placed as side_chains of the residue. Currently, only pyMBE objects of type 'particle' or 'residue' are supported. + name ('str'): + Unique label that identifies the residue. + + central_bead ('str'): + 'name' of the 'particle' to be placed as central_bead of the residue. + + side_chains('list' of 'str'): + List of 'name's of the pmb_objects to be placed as side_chains of the residue. Currently, only pyMBE objects of type 'particle' or 'residue' are supported. """ tpl = ResidueTemplate(name=name, central_bead=central_bead, side_chains=side_chains) self.db._register_template(tpl) - return def delete_instances_in_system(self, instance_id, pmb_type, espresso_system): """ @@ -1597,10 +1744,14 @@ def delete_instances_in_system(self, instance_id, pmb_type, espresso_system): Related assembly, molecule, residue, particles and bond instances will also be deleted from the pyMBE dataframe. Args: - instance_id (int): id of the assembly to be deleted. - pmb_type (str): the instance type to be deleted. - espresso_system (espressomd.system.System): Instance of a system class from espressomd library. + instance_id ('int'): + id of the assembly to be deleted. + pmb_type ('str'): + the instance type to be deleted. + + espresso_system ('espressomd.system.System'): + Instance of a system class from espressomd library. """ if pmb_type == "particle": instance_identifier = "particle_id" @@ -1623,11 +1774,6 @@ def determine_reservoir_concentrations(self, pH_res, c_salt_res, activity_coeffi """ Determines ionic concentrations in the reservoir at fixed pH and salt concentration. - This method computes the equilibrium concentrations of H⁺, OH⁻, Na⁺, and Cl⁻ - ions in a reservoir by solving a coupled, nonlinear system of equations that - includes water autodissociation and non-ideal activity effects. The solution - is obtained via a self-consistent iterative procedure. - Args: pH_res ('float'): Target pH value in the reservoir. @@ -1652,7 +1798,7 @@ def determine_reservoir_concentrations(self, pH_res, c_salt_res, activity_coeffi - cNa_res ('pint.Quantity'): Concentration of Na⁺ ions. - cCl_res ('pint.Quantity'): Concentration of Cl⁻ ions. - Notes: + Notess: - The algorithm enforces electroneutrality in the reservoir. - Water autodissociation is included via the equilibrium constant 'Kw'. - Non-ideal effects enter through activity coefficients depending on @@ -1736,7 +1882,7 @@ def enable_motion_of_rigid_object(self, instance_id, pmb_type, espresso_system): espresso_system ('espressomd.system.System'): ESPResSo system in which the rigid object is defined. - Notes: + Notess: - This method requires ESPResSo to be compiled with the following features enabled: - '"VIRTUAL_SITES_RELATIVE"' @@ -1789,7 +1935,7 @@ def generate_coordinates_outside_sphere(self, center, radius, max_dist, n_sample List of coordinates lying outside the inner sphere and inside the outer sphere. - Notes: + Notess: - Points are uniformly sampled inside a sphere of radius 'max_dist' centered at 'center' and only those with a distance greater than or equal to 'radius' from the center are retained. """ @@ -1928,11 +2074,11 @@ def get_charge_number_map(self): Construct a mapping from ESPResSo particle types to their charge numbers. Returns: - dict[int, float]: + 'dict[int, float]': Dictionary mapping ESPResSo particle types to charge numbers, ''{es_type: z}''. - Notes: + Notess: - The mapping is built from particle *states*, not instances. - If multiple templates define states with the same ''es_type'', the last encountered definition will overwrite previous ones. @@ -1948,41 +2094,6 @@ def get_charge_number_map(self): charge_number_map[state.es_type] = state.z return charge_number_map - - def _get_espresso_bond_instance(self, bond_template, espresso_system, use_default_bond=False): - """ - Retrieve or create a bond instance in an ESPResSo system for a given pair of particle names. - - This method checks whether a bond instance already exists in the database for the - specified particle pair. If it exists, it retrieves the corresponding ESPResSo bond - instance. Otherwise, it creates a new ESPResSo bond instance using the bond template. - - Args: - bond_template (BondTemplate): BondTemplate object from the pyMBE database. - espresso_system: An ESPResSo system object where the bond will be added or retrieved. - use_default_bond (bool, optional): If True, use a default bond template when no - specific template exists for the particle pair. Defaults to False. - - Returns: - (espressomd.interactions.BondedInteraction): The ESPResSo bond instance object. - - Raises: - KeyError: If no bond template is found for the particle pair and 'use_default_bond' is False. - - Note: - When a new bond instance is created, it is not added to the ESPResSo system. - """ - - if bond_template.name in self.db.espresso_bond_instances.keys(): - bond_inst = self.db.espresso_bond_instances[bond_template.name] - else: - # Create an instance of the bond - bond_inst = self._create_espresso_bond_instance(bond_type=bond_template.bond_type, - bond_parameters=bond_template.get_parameters(self.units)) - self.db.espresso_bond_instances[bond_template.name]= bond_inst - espresso_system.bonded_inter.add(bond_inst) - return bond_inst - def get_instances_df(self, pmb_type): """ Returns a dataframe with all instances of type 'pmb_type' in the pyMBE database. @@ -2006,13 +2117,17 @@ def get_lj_parameters(self, particle_name1, particle_name2, combining_rule='Lore particle_name1 ('str'): label of the type of the first particle type - particle_name2 ('str'): label of the type of the second particle type - combining_rule ('string', optional): combining rule used to calculate 'sigma' and 'epsilon' for the potential betwen a pair of particles. Defaults to 'Lorentz-Berthelot'. + particle_name2 ('str'): + label of the type of the second particle type + + combining_rule ('string', optional): + combining rule used to calculate 'sigma' and 'epsilon' for the potential betwen a pair of particles. Defaults to 'Lorentz-Berthelot'. Returns: - {"epsilon": epsilon_value, "sigma": sigma_value, "offset": offset_value, "cutoff": cutoff_value} + ('dict'): + {"epsilon": epsilon_value, "sigma": sigma_value, "offset": offset_value, "cutoff": cutoff_value} - Note: + Notes: - Currently, the only 'combining_rule' supported is Lorentz-Berthelot. - If the sigma value of 'particle_name1' or 'particle_name2' is 0, the function will return an empty dictionary. No LJ interactions are set up for particles with sigma = 0. """ @@ -2040,19 +2155,22 @@ def get_lj_parameters(self, particle_name1, particle_name2, combining_rule='Lore def get_particle_id_map(self, object_name): """ Collect all particle IDs associated with an object of given name in the - pyMBE database. Works for particles, residues, molecules, proteins, - peptides, and assemblies. - - Relies in the internal method Manager.get_particle_id_map, see method for the detailed code. + pyMBE database. Args: - object_name (str): Name of the object. + object_name ('str'): + Name of the object. Returns: - dict: {"all": [particle_ids], - "residue_map": {residue_id: [particle_ids]}, - "molecule_map": {molecule_id: [particle_ids]}, - "assembly_map": {assembly_id: [particle_ids]},} + ('dict'): + {"all": [particle_ids], + "residue_map": {residue_id: [particle_ids]}, + "molecule_map": {molecule_id: [particle_ids]}, + "assembly_map": {assembly_id: [particle_ids]},} + + Notess: + - Works for all supported pyMBE templates. + - Relies in the internal method Manager.get_particle_id_map, see method for the detailed code. """ return self.db.get_particle_id_map(object_name=object_name) @@ -2061,10 +2179,11 @@ def get_particle_pka(self, particle_name): Retrieve the pKa value associated with a particle from the pyMBE database. Args: - particle_name (str): Name of the particle template. + particle_name ('str'): + Name of the particle template. Returns: - float or None: + ('float' or 'None'): - The pKa value if the particle participates in a single acid/base reaction - None if the particle is inert (no acid/base reaction) """ @@ -2088,10 +2207,11 @@ def get_pka_set(self): Retrieve the pKa set for all titratable particles in the pyMBE database. Returns: - dict: Dictionary of the form: + ('dict'): + Dictionary of the form: {"particle_name": {"pka_value": float, "acidity": "acidic" | "basic"}} - Note: + Notes: - If a particle participates in multiple acid/base reactions, an error is raised. """ pka_set = {} @@ -2116,7 +2236,7 @@ def get_pka_set(self): return pka_set def get_radius_map(self, dimensionless=True): - ''' + """ Gets the effective radius of each particle defined in the pyMBE database. Args: @@ -2129,7 +2249,7 @@ def get_radius_map(self, dimensionless=True): Notes: - The radius corresponds to (sigma+offset)/2 - ''' + """ if "particle" not in self.db._templates: return {} result = {} @@ -2156,7 +2276,8 @@ def get_reduced_units(self): Returns the current set of reduced units defined in pyMBE. Returns: - reduced_units_text('str'): text with information about the current set of reduced units. + reduced_units_text ('str'): + text with information about the current set of reduced units. """ unit_length=self.units.Quantity(1,'reduced_length') @@ -2189,11 +2310,9 @@ def get_type_map(self): Return the mapping of ESPResSo types for all particle states defined in the pyMBE database. Returns: - dict[str, int]: - A dictionary mapping each particle state to its corresponding ESPResSo type:{ - state_name: es_type, - ... - } + 'dict[str, int]': + A dictionary mapping each particle state to its corresponding ESPResSo type: + {state_name: es_type, ...} """ return self.db.get_es_types_map() @@ -2203,7 +2322,8 @@ def initialize_lattice_builder(self, diamond_lattice): Initialize the lattice builder with the DiamondLattice object. Args: - diamond_lattice('DiamondLattice'): DiamondLattice object from the 'lib/lattice' module to be used in the LatticeBuilder. + diamond_lattice ('DiamondLattice'): + DiamondLattice object from the 'lib/lattice' module to be used in the LatticeBuilder. """ from .lib.lattice import LatticeBuilder, DiamondLattice if not isinstance(diamond_lattice, DiamondLattice): @@ -2217,13 +2337,17 @@ def load_database(self, folder, format='csv'): Loads a pyMBE database stored in 'folder'. Args: - folder (str or Path): Path to the folder where the pyMBE database was stored. - format (str, optional): Format of the database to be loaded. Defaults to 'csv'. + folder ('str' or 'Path'): + Path to the folder where the pyMBE database was stored. + + format ('str', optional): + Format of the database to be loaded. Defaults to 'csv'. Return: - (dict): metadata with additional information about the source of the information in the database. + ('dict'): + metadata with additional information about the source of the information in the database. - Note: + Notes: - The folder must contain the files generated by 'pmb.save_database()'. - Currently, only 'csv' format is supported. """ @@ -2242,18 +2366,15 @@ def load_pka_set(self, filename): to existing particle templates. Args: - filename ('str'): Path to a JSON file containing the pKa set. - Expected format: - { - "metadata": {...}, - "data": { - "A": {"acidity": "acidic", "pka_value": 4.5}, - "B": {"acidity": "basic", "pka_value": 9.8} - } - } + filename ('str'): + Path to a JSON file containing the pKa set. Expected format: + {"metadata": {...}, + "data": {"A": {"acidity": "acidic", "pka_value": 4.5}, + "B": {"acidity": "basic", "pka_value": 9.8}}} Returns: - (dict): Dictionary with bibliographic metadata about the original work were the pKa set was determined. + ('dict'): + Dictionary with bibliographic metadata about the original work were the pKa set was determined. Notes: - This method is designed for monoprotic acids and bases only. @@ -2276,16 +2397,9 @@ def propose_unused_type(self): """ Propose an unused ESPResSo particle type. - This method scans the full 'type_map' produced by 'get_type_map()', - which contains all particle templates and their associated state 'es_type'. - It extracts all integer 'es_type' values and returns the next available - integer type, ensuring no collisions with existing ones. - Returns: - int: The next available integer ESPResSo type. Returns ''0'' if no - integer types are currently defined. - - + ('int'): + The next available integer ESPResSo type. Returns ''0'' if no integer types are currently defined. """ type_map = self.get_type_map() # Flatten all es_type values across all particles and states @@ -2304,15 +2418,14 @@ def read_protein_vtf(self, filename, unit_length=None): Args: filename ('str'): Path to the VTF file. + unit_length ('Pint.Quantity'): Unit of length for coordinates (pyMBE UnitRegistry). Defaults to Angstrom. Returns: - topology_dict ('dict'): - Particle topology. - - sequence ('str'): - One-letter amino-acid sequence (including n/c ends). + ('tuple'): + ('dict'): Particle topology. + ('str'): One-letter amino-acid sequence (including n/c ends). """ logging.info(f"Loading protein coarse-grain model file: {filename}") if unit_length is None: @@ -2659,7 +2772,7 @@ def setup_grxmc_reactions(self, pH_res, c_salt_res, proton_name, hydroxide_name, 'pint.Quantity': Ionic strength of the reservoir (useful for calculating partition coefficients). - Notes: + Notess: - This implementation uses the original formulation of the grand-reaction method by Landsgesell et al. [1]. [1] Landsgesell, J., Hebbeker, P., Rud, O., Lunkad, R., Košovan, P., & Holm, C. (2020). Grand-reaction method for simulations of ionization equilibria coupled to ion partitioning. Macromolecules, 53(8), 3007-3020. From f85e0d3da0023158f5176c02e2d2f1b92992c3f8 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Wed, 28 Jan 2026 16:19:15 +0100 Subject: [PATCH 37/55] clean up repo, fix bug in setup of rigid object --- demo.py | 193 ----------- pyMBE/lib/handy_functions.py | 2 +- pyMBE/pyMBE.py | 3 +- test.py | 396 ----------------------- testsuite/globular_protein_unit_tests.py | 7 +- testsuite/hydrogel_builder.py | 337 ++++++------------- 6 files changed, 109 insertions(+), 829 deletions(-) delete mode 100644 demo.py delete mode 100644 test.py diff --git a/demo.py b/demo.py deleted file mode 100644 index a603247..0000000 --- a/demo.py +++ /dev/null @@ -1,193 +0,0 @@ -import pyMBE -import espressomd -from pyMBE.lib.lattice import DiamondLattice - -# Setup -pmb = pyMBE.pymbe_library(seed=42) -units = pmb.units -espresso_system = espressomd.System(box_l=[10, 10, 10]) - -# Define some particle templates -pmb.define_particle(name="Z", - sigma=3.5 * units.reduced_length, - cutoff=4 * units.reduced_length, - offset=0 * units.reduced_length, - epsilon=0.2 * units.reduced_energy, - acidity="acidic", - pka=4.25) - -pmb.define_particle(name="X", - sigma=3.5 * units.reduced_length, - cutoff=4 * units.reduced_length, - offset=0 * units.reduced_length, - epsilon=0.2 * units.reduced_energy, - z=1) - -print("\n=== Particle Templates DataFrame ===") -print(pmb.get_templates_df(pmb_type="particle")) - -# Access some data in the database -tpl_particle_Z = pmb.db.get_template(name="Z", pmb_type="particle") -tpl_particle_X = pmb.db.get_template(name="X", pmb_type="particle") - - - -# PintQuantity usage example -print("\n=== PintQuantity Usage Example ===") -print(f"PintQuantity class stored in the pyMBE database: {tpl_particle_Z.sigma}") -# Convert to Pint Quantity -sigma_Z = tpl_particle_Z.sigma.to_quantity(units) -print(f"Converted sigma_Z: {sigma_Z} ({sigma_Z.to('reduced_length')})") -# Operate with Pint Quantity -sigma_X = tpl_particle_X.sigma.to_quantity(units) -print(sigma_Z+sigma_X) - -# Setup LJ interactions -pmb.setup_lj_interactions(espresso_system=espresso_system) -print("\n=== LJ Templates DataFrame ===") -print(pmb.get_templates_df(pmb_type="lj")) - - - -# Create instances of particles -pmb.create_particle(name="Z", - espresso_system=espresso_system, - number_of_particles=3) -pmb.create_particle(name="X", - espresso_system=espresso_system, - number_of_particles=1) - -print("\n=== Particle Instances DataFrame ===") -print(pmb.get_instances_df(pmb_type="particle")) - - -# Delete instances of particles 0-2 -for i in range(3): - pmb.delete_instances_in_system(espresso_system=espresso_system, - pmb_type="particle", - instance_id=i) - -print("\n=== Particle Instances DataFrame After Deletion ===") -print(pmb.get_instances_df(pmb_type="particle")) - -pmb.delete_instances_in_system(espresso_system=espresso_system, - pmb_type="particle", - instance_id=3) - -# Create residue -## Define residues and bonds -pmb.define_residue(name="R1", - central_bead="Z", - side_chains=["X","Z"]) -parameters = {"k": 100.0 * units.reduced_energy / (units.reduced_length**2), - "r_0": 1.0 * units.reduced_length} -pmb.define_bond(bond_type="harmonic", - bond_parameters=parameters, - particle_pairs=[["Z","Z"], - ["Z","X"], - ["X","X"]]) - -print("\n=== Residue Templates DataFrame ===") -print(pmb.get_templates_df(pmb_type="residue")) -print("\n=== Bond Templates DataFrame ===") -print(pmb.get_templates_df(pmb_type="bond")) - - -# Create residue instance -pmb.create_residue(name="R1", - espresso_system=espresso_system) - - -print("\n=== Particle Instances DataFrame ===") -print(pmb.get_instances_df(pmb_type="particle")) -print("\n=== Residue Instances DataFrame ===") -print(pmb.get_instances_df(pmb_type="residue")) -print("\n=== Bond Instances DataFrame ===") -print(pmb.get_instances_df(pmb_type="bond")) - -# Save database -pmb.save_database("demo_csv") - -### Now create a new pyMBE instance with another set of reduced units -pmb2 = pyMBE.pymbe_library(seed=24) -pmb2.set_reduced_units(unit_length=0.6*pmb2.units.nanometer,) - -pmb2.load_database("demo_csv") -print("\n=== Original Particle Templates DataFrame ===") -print(pmb.get_templates_df(pmb_type="particle")) -print("\n=== Loaded Particle Templates DataFrame ===") -print(pmb2.get_templates_df(pmb_type="particle")) - -# Access some data in the database -tpl_particle_Z = pmb.db.get_template(name="Z", - pmb_type="particle") -tpl_particle_Z_loaded = pmb2.db.get_template(name="Z", - pmb_type="particle") - -print("\n=== PintQuantity Usage Example After Loading Database ===") -original_sigma_Z = tpl_particle_Z.sigma.to_quantity(pmb.units) -loaded_sigma_Z = tpl_particle_Z_loaded.sigma.to_quantity(pmb2.units) -print(f"Original sigma_Z: {original_sigma_Z.to('nanometer')} {original_sigma_Z.to('reduced_length')}") -print(f"Loaded sigma_Z: {loaded_sigma_Z.to('nanometer')} {loaded_sigma_Z.to('reduced_length')}") - -# Delete the residue before proceding to the last example -pmb.delete_instances_in_system(espresso_system=espresso_system, - pmb_type="residue", - instance_id=0) -print("\n=== Particle Instances DataFrame After Deletion ===") -print(pmb.get_instances_df(pmb_type="particle")) -print("\n=== Residue Instances DataFrame After Deletion ===") -print(pmb.get_instances_df(pmb_type="residue")) -print("\n=== Bond Instances DataFrame After Deletion ===") -print(pmb.get_instances_df(pmb_type="bond")) - -# Final example: let's create a hydrogel -## First define a molecule for the chains of the hydrogel -pmb.define_molecule(name="M1", - residue_list=["R1"]*1) -diamond_lattice = DiamondLattice(4, 3.5 * units.reduced_length) -lattice_builder = pmb.initialize_lattice_builder(diamond_lattice) - -# Setting up node topology --> Nodes are particles of type "X" -indices = diamond_lattice.indices -node_topology = [] - -for index in range(len(indices)): - node_topology.append({"particle_name": "X", - "lattice_index": indices[index]}) - -# Setting up chain topology --> Chains are molecules of type "M1" -node_labels = lattice_builder.node_labels -chain_labels = lattice_builder.chain_labels -reverse_node_labels = {v: k for k, v in node_labels.items()} -chain_topology = [] - -for chain_data in chain_labels.items(): - node_label_pair = chain_data[0] - node_label_s, node_label_e = [int(x) for x in node_label_pair.strip("()").split(",")] - chain_topology.append({'node_start': reverse_node_labels[node_label_s], - 'node_end': reverse_node_labels[node_label_e], - 'molecule_name': "M1"}) - -pmb.define_hydrogel("my_hydrogel", - node_topology, - chain_topology) - -print("\n=== Molecule Templates DataFrame ===") -print(pmb.get_templates_df(pmb_type="molecule")) -print("\n=== Hydrogel Templates DataFrame ===") -print(pmb.get_templates_df(pmb_type="hydrogel")) - -pmb.create_hydrogel(name="my_hydrogel", - espresso_system=espresso_system) -print("\n=== Particle Instances DataFrame After Hydrogel Creation ===") -print(pmb.get_instances_df(pmb_type="particle")) -print("\n=== Residue Instances DataFrame After Hydrogel Creation ===") -print(pmb.get_instances_df(pmb_type="residue")) -print("\n=== Bond Instances DataFrame After Hydrogel Creation ===") -print(pmb.get_instances_df(pmb_type="bond")) -print("\n=== Molecule Instances DataFrame After Hydrogel Creation ===") -print(pmb.get_instances_df(pmb_type="molecule")) -print("\n=== Hydrogel Instances DataFrame After Hydrogel Creation ===") -print(pmb.get_instances_df(pmb_type="hydrogel")) -pmb.save_database("demo_csv") \ No newline at end of file diff --git a/pyMBE/lib/handy_functions.py b/pyMBE/lib/handy_functions.py index 0eda6ef..88545e9 100644 --- a/pyMBE/lib/handy_functions.py +++ b/pyMBE/lib/handy_functions.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024-2025 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 15871df..5f9adba 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -1897,7 +1897,7 @@ def enable_motion_of_rigid_object(self, instance_id, pmb_type, espresso_system): inst = self.db.get_instance(pmb_type=pmb_type, instance_id=instance_id) label = self._get_label_id_map(pmb_type=pmb_type) - particle_ids_list = self.get_particle_id_map(object_name=inst.name)[label] + particle_ids_list = self.get_particle_id_map(object_name=inst.name)[label][instance_id] center_of_mass = self.calculate_center_of_mass (instance_id=instance_id, espresso_system=espresso_system, pmb_type=pmb_type) @@ -1905,7 +1905,6 @@ def enable_motion_of_rigid_object(self, instance_id, pmb_type, espresso_system): rotation=[True,True,True], type=self.propose_unused_type()) rigid_object_center.mass = len(particle_ids_list) - momI = 0 for pid in particle_ids_list: momI += np.power(np.linalg.norm(center_of_mass - espresso_system.part.by_id(pid).pos), 2) rigid_object_center.rinertia = np.ones(3) * momI diff --git a/test.py b/test.py deleted file mode 100644 index 36d94a5..0000000 --- a/test.py +++ /dev/null @@ -1,396 +0,0 @@ - -import pyMBE -from pyMBE.storage.manager import Manager -from pyMBE.storage.reactions.reaction import Reaction, ReactionParticipant -from pyMBE.storage.pint_quantity import PintQuantity - -from pyMBE.storage.templates.lj import LJInteractionTemplate - -from pyMBE.lib.lattice import DiamondLattice -import importlib.resources - -import pyMBE.storage.io as io - -import pint -import scipy -import espressomd - -import pyMBE.lib.handy_functions as hf - -def main(): - - units = pint.UnitRegistry() - unit_length= 0.355*units.nm - temperature = 298.15 * units.K - kB=scipy.constants.k * units.J / units.K - kT=temperature*kB - units.define(f'reduced_energy = {kT} ') - units.define(f'reduced_length = {unit_length}') - espresso_system=espressomd.System (box_l = [10]*3) - db = Manager(units=units) - - # ============================================================ - # 1. CREATE PARTICLE TEMPLATES + STATES - # ============================================================ - - pmb = pyMBE.pymbe_library(seed=42) - units = pmb.units - - path_to_pka=pmb.root / "parameters" / "pka_sets" / "Nozaki1967.json" - pmb.load_pka_set(filename=path_to_pka) - - pmb.define_particle(name="Z", - sigma=3.5 * units.reduced_length, - cutoff=4 * units.reduced_length, - offset=0 * units.reduced_length, - epsilon=0.2 * units.reduced_energy, - acidity="acidic", - pka=4.25) - - pmb.define_particle(name="X", - sigma=3.5 * units.reduced_length, - cutoff=4 * units.reduced_length, - offset=0 * units.reduced_length, - epsilon=0.2 * units.reduced_energy, - z=1) - - pmb.define_particle(name="Anion", - sigma=3.5 * units.reduced_length, - cutoff=4 * units.reduced_length, - offset=0 * units.reduced_length, - epsilon=0.2 * units.reduced_energy, - z=-1) - print(pmb.db._get_reactions_df()) - print(pmb.db._get_templates_df(pmb_type="particle")) - print(pmb.get_templates_df(pmb_type="particle_state")) - print(pmb.get_radius_map(dimensionless=False)) - - print("\n=== Setup LJ interactions ===") - pmb.setup_lj_interactions(espresso_system=espresso_system) - print(pmb.db._get_templates_df(pmb_type="lj")) - - pmb.define_residue(name="R1", central_bead="Z", side_chains=["X","Z"]) - pmb.define_residue(name="R2", central_bead="Z", side_chains=["X","R1"]) - - print("\n=== Residue Templates DataFrame ===") - print(pmb.db._get_templates_df(pmb_type="residue")) - - pmb.define_molecule(name="M1", residue_list=["R1","R2"]*2) - print("\n=== Molecule Templates DataFrame ===") - print(pmb.db._get_templates_df(pmb_type="molecule")) - - - print("\n=== Hydrogel Templates DataFrame ===") - diamond_lattice = DiamondLattice(4, 3.5 * units.reduced_length) - lattice_builder = pmb.initialize_lattice_builder(diamond_lattice) - - # Setting up node topology - indices = diamond_lattice.indices - node_topology = [] - - for index in range(len(indices)): - node_topology.append({"particle_name": "X", - "lattice_index": indices[index]}) - # Setting up chain topology - node_labels = lattice_builder.node_labels - chain_labels = lattice_builder.chain_labels - reverse_node_labels = {v: k for k, v in node_labels.items()} - chain_topology = [] - - for chain_data in chain_labels.items(): - node_label_pair = chain_data[0] - node_label_s, node_label_e = [int(x) for x in node_label_pair.strip("()").split(",")] - chain_topology.append({'node_start':reverse_node_labels[node_label_s], - 'node_end': reverse_node_labels[node_label_e], - 'molecule_name':"M1"}) - - pmb.define_hydrogel("my_hydrogel", node_topology, chain_topology) - print(pmb.db._get_templates_df(pmb_type="hydrogel")) - - print("\n=== Molecule Templates DataFrame ===") - print(pmb.db._get_templates_df(pmb_type="molecule")) - - tpl = LJInteractionTemplate(state1 = "A", - state2 = "AH", - sigma = PintQuantity.from_quantity(q=1.0 * units.reduced_length, expected_dimension="length", ureg=units), - cutoff = PintQuantity.from_quantity(q=1.2 * units.reduced_length, expected_dimension="length", ureg=units), - offset = PintQuantity.from_quantity(q=0 * units.reduced_length, expected_dimension="length", ureg=units), - epsilon = PintQuantity.from_quantity(q=1.0 * units.reduced_energy, expected_dimension="energy", ureg=units), - shift = "auto" - ) - db._register_template(tpl) - print(db._get_templates_df(pmb_type="lj")) - - print("\n=== Particle Templates DataFrame ===") - print(pmb.db._get_templates_df(pmb_type="particle")) - print(pmb.db._get_reactions_df()) - - # Update reaction - """ - pmb.db._update_reaction_participant(reaction_name="AH <-> A", - particle_name="H", - state_name="H", - coefficient=1) - print(pmb.db._get_reactions_df()) - """ - - - - parameters = {"k": 100.0 * units.reduced_energy / (units.reduced_length**2), - "r_0": 1.0 * units.reduced_length} - - - pmb.define_bond(bond_type="harmonic", - bond_parameters=parameters, - particle_pairs=[["Z","Z"], - ["Z","X"], - ["X","X"]]) - - pmb.define_default_bond(bond_type="harmonic", - bond_parameters=parameters) - - print("\n=== Bond Templates DataFrame ===") - print(pmb.db._get_templates_df(pmb_type="bond")) - - print("\n=== Peptide Templates DataFrame ===") - pmb.define_peptide(name="Peptide1", - model="1beadAA", - sequence="KKKKDDDD") - - print(pmb.db._get_templates_df(pmb_type="peptide")) - - print("\n=== Protein Templates DataFrame ===") - path = importlib.resources.files(pyMBE) / "parameters" / "globular_proteins" / f"1beb.vtf", - - topology_dict = pmb.read_protein_vtf (filename=path[0]) - - # Define AA particles and residues - hf.define_protein_AA_particles(topology_dict=topology_dict, - pmb=pmb) - - hf.define_protein_AA_residues(topology_dict=topology_dict, - model="2beadAA", - pmb=pmb) - - print(pmb.db._get_templates_df(pmb_type="particle")) - print(pmb.db._get_templates_df(pmb_type="residue")) - pmb.define_protein(name="1beb", - model="2beadAA", - sequence="KKKKKK") - - - print(db._get_templates_df(pmb_type="protein")) - - # ============================================================ - # 2. CREATE INSTANCES (optional for testing) - # ============================================================ - pmb.create_particle(name="Z", - espresso_system=espresso_system, - number_of_particles=3) - pmb.create_particle(name="X", - espresso_system=espresso_system, - number_of_particles=1) - - - print("\n=== Particle Instances DataFrame ===") - print(pmb.db._get_instances_df(pmb_type="particle")) - - pmb.create_residue(name="R1", - espresso_system=espresso_system) - pmb.create_residue(name="R2", - espresso_system=espresso_system) - - print("\n=== Residue Instances DataFrame ===") - print(pmb.db._get_instances_df(pmb_type="particle")) - print(pmb.db._get_instances_df(pmb_type="residue")) - print(pmb.db._get_instances_df(pmb_type="bond")) - exit() - pmb.create_molecule(name="M1", - number_of_molecules=2, - espresso_system=espresso_system) - - print("\n=== Molecule Instances DataFrame ===") - - print(pmb.db._get_instances_df(pmb_type="molecule")) - - print("\n=== Peptide Instances DataFrame ===") - pmb.create_molecule(name="Peptide1", - number_of_molecules=1, - espresso_system=espresso_system, - use_default_bond=True) - - pmb.create_counterions(object_name="M1", - cation_name="X", - anion_name="Anion", - espresso_system=espresso_system) - pmb.create_added_salt(espresso_system=espresso_system, - cation_name="X", - anion_name="Anion", - c_salt=0.1*pmb.units.M) - print(pmb.db._get_instances_df(pmb_type="particle")) - print(pmb.db._get_instances_df(pmb_type="peptide")) - - - print("\n=== Bond Instances DataFrame ===") - print(pmb.db._get_instances_df(pmb_type="bond")) - - - print("\n=== Protein Instances DataFrame ===") - pmb.create_protein(name="1beb", - number_of_proteins=1, - espresso_system=espresso_system, - topology_dict=topology_dict) - - print(pmb.db._get_instances_df(pmb_type="protein")) - - print("\n=== Hydrogel Instances DataFrame ===") - pmb.create_hydrogel(name="my_hydrogel", - espresso_system=espresso_system) - print(pmb.db._get_instances_df(pmb_type="hydrogel")) - - # ============================================================ - # 3. DEFINE A REACTION: HA <-> A- + H+ - # ============================================================ - - rx = Reaction( - name="acid_dissociation", - pK=4.75, - reaction_type="acid/base", - participants=[ - ReactionParticipant(particle_name="A", state_name="HA", coefficient=-1), - ReactionParticipant(particle_name="A", state_name="A-", coefficient=+1), - ReactionParticipant(particle_name="H", state_name="H+", coefficient=+1), - ], - ) - - db._register_reaction(rx) - - # ============================================================ - # 4. PRINT DATAFRAMES - # ============================================================ - - print("\n=== Instances DataFrame ===") - print(db._get_instances_df(pmb_type="particle")) - - print("\n=== Reactions DataFrame ===") - print(db._get_reactions_df()) - - # ------------------------- - # Now create a different registry with different reduced unit definitions - # and re-create a DFManager with that registry. The DB still stores SI values, - # so conversions are consistent. - # ------------------------- - ureg2 = pint.UnitRegistry() - # define different reduced units (different numeric size) - unit_length2 = 0.2 * ureg2.nanometer - temperature2 = 310.0 * ureg2.kelvin - kB2 = scipy.constants.k * ureg2.joule / ureg2.kelvin - kT2 = temperature2 * kB2 - ureg2.define(f"reduced_length = {unit_length2}") - ureg2.define(f"reduced_energy = {kT2}") - - # create a new DFManager that uses the same stored templates but different ureg - # For this demo we will copy the stored templates (in real use you would re-load from serialized storage) - db2 = Manager(units=ureg2) - # re-insert templates by transferring stored representation (simulate loading) - for ptype, tdict in db._templates.items(): - for tname, t in tdict.items(): - db2._register_template(t) - - print("\nTemplates shown with registry 2 (different reduced units):") - print(db2._get_templates_df("particle")) - - io._save_database_csv(pmb.db, folder="test_db_csv") - - db3 = Manager(units=ureg2) - - io._load_database_csv(db3, folder="test_db_csv") - print("\nLoaded DB3 Templates DataFrame:") - print(db3._get_templates_df("particle")) - print(db3._get_templates_df("residue")) - print(db3._get_templates_df("molecule")) - print(db3._get_templates_df("bond")) - print(db3._get_templates_df("peptide")) - print(db3._get_templates_df("protein")) - print(db3._get_templates_df("hydrogel")) - print(db3._get_templates_df("lj")) - exit() - print("\nLoaded DB3 Instances DataFrame:") - print(db3._get_instances_df("particle")) - print(db3._get_instances_df("residue")) - print(db3._get_instances_df("molecule")) - print(db3._get_instances_df("bond")) - print(db3._get_instances_df("peptide")) - print(db3._get_instances_df("protein")) - print(db3._get_instances_df("hydrogel")) - print("\nLoaded DB3 Reactions DataFrame:") - print(db3._get_reactions_df()) - - pmb.delete_instances_in_system(instance_id=0, - pmb_type="hydrogel", - espresso_system=espresso_system) - - print("instances in database after deleting the hydrogel") - print(pmb.db._get_instances_df("particle")) - print(pmb.db._get_instances_df("residue")) - print(pmb.db._get_instances_df("molecule")) - print(pmb.db._get_instances_df("bond")) - print(pmb.db._get_instances_df("peptide")) - print(pmb.db._get_instances_df("protein")) - print(pmb.db._get_instances_df("hydrogel")) - - pmb.delete_instances_in_system(instance_id=3, - pmb_type="protein", - espresso_system=espresso_system) - - print("instances in database after deleting the protein") - print(pmb.db._get_instances_df("particle")) - print(pmb.db._get_instances_df("residue")) - print(pmb.db._get_instances_df("molecule")) - print(pmb.db._get_instances_df("bond")) - print(pmb.db._get_instances_df("peptide")) - print(pmb.db._get_instances_df("protein")) - print(pmb.db._get_instances_df("hydrogel")) - - pmb.delete_instances_in_system(instance_id=1, - pmb_type="molecule", - espresso_system=espresso_system) - - pmb.delete_instances_in_system(instance_id=0, - pmb_type="molecule", - espresso_system=espresso_system) - - pmb.delete_instances_in_system(instance_id=1, - pmb_type="residue", - espresso_system=espresso_system) - - pmb.delete_instances_in_system(instance_id=0, - pmb_type="residue", - espresso_system=espresso_system) - - print("instances in database after deleting residues and molecules") - print(pmb.db._get_instances_df("particle")) - print(pmb.db._get_instances_df("residue")) - print(pmb.db._get_instances_df("molecule")) - print(pmb.db._get_instances_df("bond")) - print(pmb.db._get_instances_df("peptide")) - print(pmb.db._get_instances_df("protein")) - print(pmb.db._get_instances_df("hydrogel")) - pmb.delete_instances_in_system(instance_id=2, - pmb_type="peptide", - espresso_system=espresso_system) - - print("instances in database after deleting peptides") - print(pmb.db._get_instances_df("particle")) - print(pmb.db._get_instances_df("residue")) - print(pmb.db._get_instances_df("molecule")) - print(pmb.db._get_instances_df("bond")) - print(pmb.db._get_instances_df("peptide")) - print(pmb.db._get_instances_df("protein")) - print(pmb.db._get_instances_df("hydrogel")) - - - -if __name__ == "__main__": - main() - diff --git a/testsuite/globular_protein_unit_tests.py b/testsuite/globular_protein_unit_tests.py index f35cef0..1949a81 100644 --- a/testsuite/globular_protein_unit_tests.py +++ b/testsuite/globular_protein_unit_tests.py @@ -157,11 +157,12 @@ def custom_deserializer(dct): pmb_type="protein") momI = 0 + center_of_mass = pmb.calculate_center_of_mass(instance_id=molecule_id, + pmb_type="protein", + espresso_system=espresso_system) for p in espresso_system.part: - center_of_mass = pmb.calculate_center_of_mass(instance_id=molecule_id, - pmb_type="protein", - espresso_system=espresso_system) if p.mass > 1: + print("hola") rigid_object_id = p.id rigid_object_mass = espresso_system.part.by_id(rigid_object_id).mass rigid_object_rotation = espresso_system.part.by_id(rigid_object_id).rotation diff --git a/testsuite/hydrogel_builder.py b/testsuite/hydrogel_builder.py index d911658..b50d692 100644 --- a/testsuite/hydrogel_builder.py +++ b/testsuite/hydrogel_builder.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024-2025 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -30,11 +30,6 @@ sigma=0.355*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) -CounterIon = "counter_ion" -pmb.define_particle(name=CounterIon, - sigma=0.5*pmb.units.nm, - epsilon=1.5*pmb.units("reduced_energy")) - # define monomers BeadType1 = "C" pmb.define_particle(name=BeadType1, @@ -60,7 +55,9 @@ ) molecule_name = 'alternating_residue' -pmb.define_molecule(name=molecule_name, residue_list = [Res1, Res2, Res1, Res2, Res1, Res2]) +mpc=8 +residue_list = [Res1]*(mpc//2) + [Res2]*(mpc//2) +pmb.define_molecule(name=molecule_name, residue_list = residue_list) # define bond parameters generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') @@ -71,21 +68,15 @@ pmb.define_bond(bond_type = 'harmonic', bond_parameters = HARMONIC_parameters, particle_pairs = [[BeadType1, BeadType1], [BeadType1, BeadType2], - [BeadType2, BeadType2]]) -pmb.define_bond(bond_type = 'harmonic', - bond_parameters = HARMONIC_parameters, particle_pairs = [[NodeType, BeadType1], + [BeadType2, BeadType2], + [NodeType, BeadType1], [NodeType, BeadType2]]) -mpc=8 diamond_lattice = DiamondLattice(mpc, generic_bond_length) box_l = diamond_lattice.box_l espresso_system = espressomd.System(box_l = [box_l]*3) lattice_builder = pmb.initialize_lattice_builder(diamond_lattice) -pmb.create_particle(name=CounterIon, - espresso_system=espresso_system, - number_of_particles=1, - position=[[np.random.uniform(0,box_l)]*3]) pmb.create_molecule(name=molecule_name, number_of_molecules=1, @@ -107,7 +98,7 @@ reverse_node_labels = {v: k for k, v in node_labels.items()} connectivity_with_labels = {(reverse_node_labels[i], reverse_node_labels[j]) for i, j in connectivity} chain_topology = [] -residue_list = [Res1]*(mpc//2) + [Res2]*(mpc//2) + for node_s, node_e in connectivity_with_labels: chain_topology.append({'node_start':node_s, 'node_end': node_e, @@ -118,241 +109,119 @@ # Creating hydrogel hydrogel_id= pmb.create_hydrogel(hydrogel_name, espresso_system) -hydrogel_inst = pmb.db.get_instance(pmb_type="hydrogel", - instance_id=hydrogel_id) - -################################################ - -def compare_node_maps(map1, map2): - # Ensure lengths are the same - np.testing.assert_equal(len(map1), len(map2)) - - # Sort lists by lattice_index to ensure correct comparison - map1_sorted = sorted(map1, key=lambda x: tuple(x["lattice_index"])) - map2_sorted = sorted(map2, key=lambda x: tuple(x["lattice_index"])) - - # Compare each node's details - for node1, node2 in zip(map1_sorted, map2_sorted): - np.testing.assert_equal(node1["particle_name"], - node2["particle_name"]) - np.testing.assert_equal(node1["lattice_index"], - node2["lattice_index"]) - - return - -def parse_string_to_array(string): - """ - Convert a string representation of a list (e.g., '[3 1 3]') into a numpy array. - """ - string = string.strip("[]") # Remove brackets - elements = map(int, string.split()) # Split by spaces and convert to integers - return np.array(list(elements)) - -def compare_chain_maps(chain_topology_1, chain_topology_2): - """ - Compare two chain topology maps by checking if they have the same set of edges with corresponding residue lists. - """ - np.testing.assert_equal(len(chain_topology_1), len(chain_topology_2)) - - # Convert string coordinates to arrays and sort lists by (node_start, node_end) - def preprocess_chain(chain_topology): - processed = [] - for edge in chain_topology: - processed.append({ - 'node_start': parse_string_to_array(edge['node_start']), - 'node_end': parse_string_to_array(edge['node_end']), - 'residue_list': edge['residue_list'] # Keep as is - }) - return sorted(processed, key=lambda x: (x['node_start'].tolist(), x['node_end'].tolist())) - - chain_topology_1 = preprocess_chain(chain_topology_1) - chain_topology_2 = preprocess_chain(chain_topology_2) - - # Compare edges one by one - for edge1, edge2 in zip(chain_topology_1, chain_topology_2): - np.testing.assert_equal(edge1['node_start'].tolist(), - edge2['node_start'].tolist()) - np.testing.assert_equal(edge1['node_end'].tolist(), - edge2['node_end'].tolist()) - # Check if the residue lists are the same - np.testing.assert_equal(edge1['residue_list'], - edge2['residue_list']) +hydrogel_tpl = pmb.db.get_template(pmb_type="hydrogel", + name=hydrogel_name) +hydrogel_inst = pmb.db.get_instance(pmb_type="hydrogel", + instance_id=hydrogel_id) - return # All edges match class Test(ut.TestCase): - - def test_format_node(self): - assert pmb.format_node([1, 2, 3]) == "[1 2 3]" - assert pmb.format_node([4, 5, 6]) == "[4 5 6]" - def test_hydrogel_info(self): - assert hydrogel_inst.name == hydrogel_name - - def test_node_positions(self): - # Search for nodes of the hydrogel - particle_ids_in_hydrogel = pmb.get_particle_id_map(object_name=hydrogel_name)["all"] - # TODO: this need to be fixed - for _, node_id in hydrogel_info["nodes"].items(): - node_pos = espresso_system.part.by_id(int(node_id[0])).pos - node_name_in_espresso = pmb.df[(pmb.df["pmb_type"] == "particle") & (pmb.df["particle_id"] == node_id[0])]["name"].values[0] - node_label = node_labels[pmb.format_node(list((node_pos*(4/lattice_builder.box_l)).astype(int)))] - node_data = node_topology[node_label] - node_name = node_data["particle_name"] - # Assert node's name and position are correctly set - np.testing.assert_equal(node_name_in_espresso, node_name) - np.testing.assert_allclose(np.copy(node_pos), np.array(node_data["lattice_index"]) * 0.25 * diamond_lattice.box_l, atol=1e-7) - - def test_chain_placement_and_connectivity(self): - for molecule_id, molecule_data in hydrogel_info["chains"].items(): - # Ensure that chain's node_start and node_end are correctly set - node_start = molecule_data["node_start"] - node_end = molecule_data["node_end"] - chain_name_in_espresso = pmb.df[(pmb.df["pmb_type"] == "molecule") & (pmb.df["molecule_id"] == molecule_id)]["name"].values[0] - # Assert chain's node_start and node_end - np.testing.assert_equal(chain_name_in_espresso, f"chain_{node_start}_{node_end}") - # Check if chain is connected in the espresso system (e.g., check bond or distance between node_start and node_end) - node_start_id = hydrogel_info["nodes"][node_start][0] - node_end_id = hydrogel_info["nodes"][node_end][0] - start_pos = espresso_system.part.by_id(int(node_start_id)).pos - end_pos = espresso_system.part.by_id(int(node_end_id)).pos - vec_between_nodes = end_pos - start_pos - # Ensure that the chain is connected (check distance, should be within acceptable bond length range) - vec_between_nodes = vec_between_nodes - diamond_lattice.box_l * np.round(vec_between_nodes / diamond_lattice.box_l) - distance_between_nodes = np.linalg.norm(vec_between_nodes) - np.testing.assert_allclose(distance_between_nodes, (diamond_lattice.mpc+1)*generic_bond_length.magnitude, atol=0.0000001) - - def test_all_residue_placement(self): - def get_residue_list(chain_topology, node_start, node_end): - node_start_array = parse_string_to_array(node_start) - node_end_array = parse_string_to_array(node_end) - for edge in chain_topology: - if (np.array_equal(parse_string_to_array(edge['node_start']), node_start_array) and - np.array_equal(parse_string_to_array(edge['node_end']), node_end_array)): - return edge['residue_list'] - - for _, chain_data in hydrogel_info["chains"].items(): - residue_in_chain = chain_data.copy() - node_start = residue_in_chain.pop("node_start") - node_end = residue_in_chain.pop("node_end") - node_start_label = lattice_builder.node_labels[node_start] - node_end_label = lattice_builder.node_labels[node_end] - vec_between_nodes = (np.array([float(x) for x in node_end.strip('[]').split()]) - - np.array([float(x) for x in node_start.strip('[]').split()])) * 0.25 * lattice_builder.box_l - vec_between_nodes = vec_between_nodes - lattice_builder.box_l * np.round(vec_between_nodes / lattice_builder.box_l) - backbone_vector = vec_between_nodes / (diamond_lattice.mpc + 1) + def test_hydrogel_template_storage(self): + """ + Unit test that checks that the hydrogel input information + (node_map and chain_map) is correctly stored in the pyMBE database. + """ + + hydrogel_tpl = pmb.db.get_template(pmb_type="hydrogel", + name=hydrogel_name) + # --- Test node_map storage --- + self.assertEqual(len(hydrogel_tpl.node_map), + len(node_topology)) + # Convert both representations to comparable sets + expected_nodes = {(node["particle_name"], tuple(node["lattice_index"])) for node in node_topology} + stored_nodes = {(node.particle_name, tuple(node.lattice_index)) for node in hydrogel_tpl.node_map} + self.assertSetEqual(stored_nodes, + expected_nodes, + "Stored hydrogel node_map does not match input definition") + # --- Test chain_map storage --- + self.assertEqual(len(hydrogel_tpl.chain_map), len(chain_topology)) + expected_chains = {(chain["node_start"], chain["node_end"], chain["molecule_name"]) for chain in chain_topology} + stored_chains = {(chain.node_start, chain.node_end, chain.molecule_name) for chain in hydrogel_tpl.chain_map} + self.assertSetEqual(stored_chains, expected_chains, "Stored hydrogel chain_map does not match input definition") - for (res_id, res_data) in residue_in_chain.items(): - central_bead_id = res_data["central_bead_id"] - - # Get the position of the central bead from the espresso system - central_bead_pos = espresso_system.part.by_id(central_bead_id).pos + def test_hydrogel_instance_info(self): + """ + Unit test to check that hydrogel instance store information properly + """ + self.assertEqual(hydrogel_inst.name, hydrogel_name) + self.assertEqual(hydrogel_inst.assembly_id, hydrogel_id) - # Calculate the expected position of the residue's central bead - residue_index = list(residue_in_chain.keys()) .index(res_id) - expected_position = np.array([float(x) for x in node_start.strip('[]').split()]) * 0.25 * diamond_lattice.box_l + (residue_index + 1) * backbone_vector - - # Validate that the central bead's position matches the expected position - np.testing.assert_allclose(np.copy(central_bead_pos), expected_position, atol=1e-7) - expected_node_start = reverse_node_labels[node_start_label] - expected_node_end = reverse_node_labels[node_end_label] - expected_res_name = get_residue_list(chain_topology, expected_node_start, expected_node_end)[residue_index] - residue_name = pmb.df[(pmb.df["pmb_type"]=="residue") & (pmb.df["residue_id"]==res_id)]["name"].values[0] - np.testing.assert_equal(node_start, expected_node_start) - np.testing.assert_equal(node_end, expected_node_end) - np.testing.assert_equal(residue_name, expected_res_name) + def test_node_positions(self): + """ + Unit test that checks that nodes are created in the right position + """ + hydrogel_tpl = pmb.db.get_template(pmb_type="hydrogel", name=hydrogel_name) + # Get all particles belonging to this hydrogel + particle_ids = pmb.db._find_instance_ids_by_attribute(pmb_type="particle", + attribute="assembly_id", + value=hydrogel_id) + node_particles = {pid: pmb.db.get_instance("particle", pid) for pid in particle_ids if pmb.db.get_instance("particle", pid).name == NodeType} + self.assertEqual(len(node_particles), len(hydrogel_tpl.node_map)) + for node_tpl in hydrogel_tpl.node_map: + node_index = np.array(node_tpl.lattice_index) + expected_pos = node_index * 0.25 * diamond_lattice.box_l + found = False + for pid, inst in node_particles.items(): + pos = espresso_system.part.by_id(pid).pos + if np.allclose(pos, expected_pos, atol=1e-7): + self.assertEqual(inst.name, node_tpl.particle_name) + found = True + break + + self.assertTrue(found, f"Node at {node_index} not found") + + def test_chain_creation(self): + """ + Unit test that checks that the chains are created as defined in the hydrogel template in the database. + """ + hydrogel_tpl = pmb.db.get_template(pmb_type="hydrogel", name=hydrogel_name) + molecule_ids = pmb.db._find_instance_ids_by_attribute(pmb_type="molecule", + attribute="assembly_id", + value=hydrogel_id) + self.assertEqual(len(molecule_ids), len(hydrogel_tpl.chain_map)) + for chain_tpl in hydrogel_tpl.chain_map: + expected_name = chain_tpl.molecule_name + found = False + for mol_id in molecule_ids: + mol = pmb.db.get_instance("molecule", mol_id) + if mol.name == expected_name: + found = True + break + self.assertTrue(found, f"Chain {expected_name} not found") + + def test_chain_length(self): + """ + Unit test to test that chains are created in the right position + """ + molecule_ids = pmb.db._find_instance_ids_by_attribute(pmb_type="molecule", + attribute="assembly_id", + value=hydrogel_id) + expected = (diamond_lattice.mpc - 1) * generic_bond_length.m_as("reduced_length") + for mol_id in molecule_ids: + particle_ids = pmb.db._find_instance_ids_by_attribute(pmb_type="particle", + attribute="molecule_id", + value=mol_id) + positions = np.array([espresso_system.part.by_id(pid).pos for pid in particle_ids]) + contour = np.sum(np.linalg.norm(np.diff(positions, axis=0), axis=1)) + np.testing.assert_allclose(contour, expected, atol=1e-7) + def test_exceptions(self): - print("*** Unit Test: check that only non-negative values of monomers per chain are allowed ***") + """ + Unit tests for the sanity tests + """ + # check that only non-negative values of monomers per chain are allowed np.testing.assert_raises(ValueError, DiamondLattice, 0, generic_bond_length) np.testing.assert_raises(ValueError, DiamondLattice, "invalid", generic_bond_length) np.testing.assert_raises(ValueError, DiamondLattice, -5, generic_bond_length) - print("*** Unit Test passed ***") - print("*** Unit test: check that any objects are other than DiamondLattice passed to initialize_lattice_builder raises a TypeError ***") + # check that any objects are other than DiamondLattice passed to initialize_lattice_builder raises a TypeError np.testing.assert_raises(TypeError, pmb.initialize_lattice_builder, None) - print("*** Unit test passed ***") # Check exceptions when the node and chain maps are incomplete incomplete_node_map = [{"particle_name": NodeType, "lattice_index": [0, 0, 0]},{"particle_name": NodeType, "lattice_index": [1, 1, 1]}] incomplete_chain_map = [{"node_start": "[0 0 0]", "node_end":"[1 1 1]" , "residue_list": residue_list}] np.testing.assert_raises(ValueError, pmb.define_hydrogel, "test_hydrogel", incomplete_node_map, chain_topology) np.testing.assert_raises(ValueError, pmb.define_hydrogel, "test_hydrogel", node_topology, incomplete_chain_map) - # Check that two hydrogels with the same name can be defined in the dataframe - pmb.define_hydrogel(hydrogel_name,node_topology, chain_topology) - hydrogel_count = len(pmb.df[pmb.df["name"] == hydrogel_name]) - assert hydrogel_count == 2, f"Hydrogel '{hydrogel_name}' should be redefined." - assert hydrogel_name in pmb.df["name"].values - assert pmb.df.loc[pmb.df["name"] == hydrogel_name, "pmb_type"].values[0] == "hydrogel" - - def test_hydrogel_definitions_in_db(self): - # Verify node_map and chain_map are correctly added - compare_node_maps(pmb.df.loc[pmb.df["name"] == hydrogel_name, "node_map"].values[0], node_topology) - compare_chain_maps(pmb.df.loc[pmb.df["name"] == hydrogel_name, "chain_map"].values[0], chain_topology) - for chain_id in chain_topology: - molecule_name = f"chain_{chain_id['node_start']}_{chain_id['node_end']}" - assert molecule_name in pmb.df["name"].values - #####-- Invalid hydrogel name --##### - # Test if create_hydrogel raises an exception when provided with invalid data - print("*** Unit Test: Check invalid inputs for create_hydrogel ***") - with self.assertLogs(level='WARNING') as cm: - pmb.create_hydrogel("invalid_hydrogel", espresso_system) - self.assertEqual(cm.output, ["WARNING:root:Hydrogel with name 'invalid_hydrogel' is not defined in the DataFrame, no hydrogel will be created."]) - print("*** Invalid Input Test Passed ***") - # Check if the molecules (chains) are correctly stored in the hydrogel data - for ((molecule_id, molecule_data),_) in zip(hydrogel_info["chains"].items(),chain_topology): - molecule_name_in_espresso = pmb.df[(pmb.df["pmb_type"] == "molecule") & (pmb.df["molecule_id"] == molecule_id)]["name"].values[0] - np.testing.assert_equal(molecule_name_in_espresso, f"chain_{molecule_data['node_start']}_{molecule_data['node_end']}") - - print("*** Checking if the ends of an arbitrarly chosen chain is connected to node_start and node_end ***") - - molecule = hydrogel_info["chains"][1] - Res_node_start = list(molecule.values())[0] - Res_node_end = list(molecule.values())[-3] - central_bead_near_node_start = Res_node_start["central_bead_id"] - central_bead_near_node_end = Res_node_end["central_bead_id"] - - node_ids = [] - for indice in node_labels.keys(): - index_pos = np.array(list(int(x) for x in indice.strip('[]').split()))*0.25*lattice_builder.box_l - node_id = espresso_system.part.select(lambda p: (p.pos == index_pos).all()).id[0] - node_ids.append(node_id) - - bead_ids_in_random_molecule = [i for i in range(central_bead_near_node_start, central_bead_near_node_end+1)] - particle_ids = pmb.df["particle_id"].fillna(-1).to_numpy() - particle_ids2 = pmb.df["particle_id2"].fillna(-1).to_numpy() - - mask = np.isin(particle_ids, node_ids) & np.isin(particle_ids2, bead_ids_in_random_molecule) - filtered_df = pmb.df[mask] - - # Extract scalar values for central_bead_node_start and central_bead_node_end - central_bead_node_start = filtered_df[filtered_df["particle_id2"] == central_bead_near_node_start]["particle_id"].iloc[0] - central_bead_node_end = filtered_df[filtered_df["particle_id2"] == central_bead_near_node_end]["particle_id"].iloc[0] - - bond_name_node_start = filtered_df[ - (filtered_df["particle_id"] == central_bead_node_start) & - (filtered_df["particle_id2"] == central_bead_near_node_start) - ]["name"].iloc[0] - - bond_name_node_end = filtered_df[ - (filtered_df["particle_id"] == central_bead_node_end) & - (filtered_df["particle_id2"] == central_bead_near_node_end) - ]["name"].iloc[0] - - all_not_na = filtered_df['bond_object'].notna().all() - - assert all_not_na, "Bond object is not defined near nodes" - - central_bead_name_near_node_start = pmb.df[pmb.df["particle_id"]==central_bead_near_node_start]["name"].values[0] - central_bead_name_near_node_end = pmb.df[pmb.df["particle_id"]==central_bead_near_node_end]["name"].values[0] - - if central_bead_name_near_node_start == BeadType1: - possible_bond_names = [NodeType+"-"+BeadType1, BeadType1+"-"+NodeType] - assert bond_name_node_start in possible_bond_names - - if central_bead_name_near_node_end == BeadType2: - possible_bond_names = [NodeType+"-"+BeadType2, BeadType2+"-"+NodeType] - assert bond_name_node_end in possible_bond_names - - print("*** Unit Test passed ***") if __name__ == "__main__": ut.main() From c753860de32097d26e2eb67c6c16875fcf18257d Mon Sep 17 00:00:00 2001 From: pmblanco Date: Wed, 28 Jan 2026 18:06:04 +0100 Subject: [PATCH 38/55] increase coverage --- pyMBE/pyMBE.py | 1 + testsuite/globular_protein_unit_tests.py | 149 ++++++++++++++++++++++- testsuite/henderson_hasselbalch_tests.py | 12 +- testsuite/hydrogel_builder.py | 3 +- 4 files changed, 161 insertions(+), 4 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 5f9adba..113a0c9 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -1905,6 +1905,7 @@ def enable_motion_of_rigid_object(self, instance_id, pmb_type, espresso_system): rotation=[True,True,True], type=self.propose_unused_type()) rigid_object_center.mass = len(particle_ids_list) + momI = 0 for pid in particle_ids_list: momI += np.power(np.linalg.norm(center_of_mass - espresso_system.part.by_id(pid).pos), 2) rigid_object_center.rinertia = np.ones(3) * momI diff --git a/testsuite/globular_protein_unit_tests.py b/testsuite/globular_protein_unit_tests.py index 1949a81..8feece0 100644 --- a/testsuite/globular_protein_unit_tests.py +++ b/testsuite/globular_protein_unit_tests.py @@ -59,6 +59,16 @@ def custom_deserializer(dct): residue_list = hf.define_protein_AA_residues(sequence=sequence, model=protein_model, pmb=pmb) + # Sanity test for unvalid lj_setups + input_params = {"topology_dict": topology_dict, + "pmb": pmb, + "pka_set": {}, + "lj_setup_mode": "random"} + self.assertRaises(ValueError, + hf.define_protein_AA_particles, + **input_params,) + + # Define a residue for the metal ion pmb.define_residue(name="AA-Ca", central_bead="Ca", @@ -162,7 +172,6 @@ def custom_deserializer(dct): espresso_system=espresso_system) for p in espresso_system.part: if p.mass > 1: - print("hola") rigid_object_id = p.id rigid_object_mass = espresso_system.part.by_id(rigid_object_id).mass rigid_object_rotation = espresso_system.part.by_id(rigid_object_id).rotation @@ -177,6 +186,70 @@ def custom_deserializer(dct): momI += np.power(np.linalg.norm(center_of_mass - espresso_system.part.by_id(pid).pos), 2) rinertia = np.ones(3) * momI np.testing.assert_array_almost_equal(rinertia, rigid_object_intertia) + + def test_define_peptide_1beadAA(self): + """ + Test that define_peptide_AA_residues correctly defines + residue templates for a short peptide using the 1beadAA model. + """ + pmb2 = pyMBE.pymbe_library(seed=123) + # Define particles needed by the residues + for bead in ["A", "G", "L"]: + pmb2.define_particle(name=bead, + sigma=0.355 * pmb2.units.nm, + epsilon=1 * pmb2.units("reduced_energy")) + sequence = ["A", "G", "L", "A"] # include repetition on purpose + model = "1beadAA" + hf.define_peptide_AA_residues(sequence=sequence, + model=model, + pmb=pmb2) + # Expected residue template names + expected_residues = {"AA-A", "AA-G", "AA-L"} + # Retrieve all residue templates from the DB + stored_residues = {tpl.name for tpl in pmb2.db.get_templates(pmb_type="residue").values()} + self.assertSetEqual(stored_residues, + expected_residues, + "Residue templates stored in DB do not match expected 1beadAA residues") + # Check residue definitions + for res_name in expected_residues: + residue = pmb2.db.get_template(pmb_type="residue", + name=res_name) + aa = res_name.split("-")[1] + self.assertEqual(residue.central_bead, + aa, + f"Central bead for {res_name} should be '{aa}'") + self.assertEqual(residue.side_chains, + [], + f"Residue {res_name} should have no side chains in 1beadAA model") + + def test_define_peptide_residues_2beadAA(self): + """ + Test residue definition for the 2beadAA model: + - standard residues use CA + side-chain + - G, c, n are single-bead residues + - residues are defined only once + """ + pmb2 = pyMBE.pymbe_library(seed=123) + sequence = ["A", "G", "L", "c", "n", "A"] + hf.define_peptide_AA_residues(sequence, + model="2beadAA", + pmb=pmb2) + # Expected residue templates + expected = {"AA-A": {"central_bead": "CA", "side_chains": ["A"]}, + "AA-L": {"central_bead": "CA", "side_chains": ["L"]}, + "AA-G": {"central_bead": "G", "side_chains": []}, + "AA-c": {"central_bead": "c", "side_chains": []}, + "AA-n": {"central_bead": "n", "side_chains": []},} + for resname, props in expected.items(): + tpl = pmb2.db.get_template(pmb_type="residue", name=resname) + self.assertEqual(tpl.central_bead, props["central_bead"]) + self.assertEqual(tpl.side_chains, props["side_chains"]) + # Ensure residues were defined only once + residue_templates = pmb2.db.get_templates(pmb_type="residue").values() + residue_names = [tpl.name for tpl in residue_templates] + self.assertEqual(len(residue_names), len(set(residue_names))) + self.assertEqual(set(residue_names), set(expected.keys())) + def test_protein_parser(self): """ Unit tests for protein_sequence_parser @@ -294,6 +367,7 @@ def test_define_protein_AA_residues(self): desired=output, verbose=True) test_pmb.db.delete_templates(pmb_type="residue") + def test_define_peptide_sanity(self): """ @@ -313,5 +387,78 @@ def test_define_peptide_sanity(self): pmb.define_peptide, **input_parameters) +class TestGetResiduesFromTopologyDict(ut.TestCase): + + def test_1beadAA_basic(self): + """ + Single-bead-per-residue model: + residue name is taken directly from the bead prefix. + """ + topology = {"A1": {}, + "G2": {}, + "L3": {}} + residues = hf.get_residues_from_topology_dict(topology, model="1beadAA") + self.assertEqual(len(residues), 3) + self.assertEqual(residues["1"]["resname"], "A") + self.assertEqual(residues["2"]["resname"], "G") + self.assertEqual(residues["3"]["resname"], "L") + self.assertEqual(residues["1"]["beads"], ["A1"]) + self.assertEqual(residues["2"]["beads"], ["G2"]) + self.assertEqual(residues["3"]["beads"], ["L3"]) + + def test_2beadAA_basic(self): + """ + Two-bead-per-residue model: + CA beads are ignored when determining residue name. + """ + topology = {"CA1": {}, + "L1": {}, + "CA2": {}, + "V2": {}} + residues = hf.get_residues_from_topology_dict(topology, model="2beadAA") + self.assertEqual(len(residues), 2) + self.assertEqual(residues["1"]["resname"], "L") + self.assertEqual(residues["2"]["resname"], "V") + self.assertCountEqual(residues["1"]["beads"], ["CA1", "L1"]) + self.assertCountEqual(residues["2"]["beads"], ["CA2", "V2"]) + + def test_2beadAA_excludes_CA(self): + """ + CA beads must not overwrite the residue name. + """ + topology = {"L1": {}, + "CA1": {}} + residues = hf.get_residues_from_topology_dict(topology, model="2beadAA") + self.assertEqual(residues["1"]["resname"], "L") + + def test_2beadAA_only_CA_is_glycine(self): + """ + Residues containing only CA beads are assigned glycine ('G'). + """ + topology = {"CA1": {}, + "CA2": {}} + residues = hf.get_residues_from_topology_dict(topology, model="2beadAA") + self.assertEqual(residues["1"]["resname"], "G") + self.assertEqual(residues["2"]["resname"], "G") + self.assertEqual(residues["1"]["beads"], ["CA1"]) + self.assertEqual(residues["2"]["beads"], ["CA2"]) + + def test_invalid_model_raises(self): + """ + Unknown protein model must raise ValueError. + """ + topology = {"A1": {}} + with self.assertRaises(ValueError): + hf.get_residues_from_topology_dict(topology, model="3beadAA") + + def test_invalid_bead_id_raises(self): + """ + Bead identifiers without a numeric residue index must raise ValueError. + """ + topology = {"CA": {}, # no index + "L1": {}} + with self.assertRaises(ValueError): + hf.get_residues_from_topology_dict(topology, model="1beadAA") + if __name__ == "__main__": ut.main() \ No newline at end of file diff --git a/testsuite/henderson_hasselbalch_tests.py b/testsuite/henderson_hasselbalch_tests.py index c5b77bf..74bd0c2 100644 --- a/testsuite/henderson_hasselbalch_tests.py +++ b/testsuite/henderson_hasselbalch_tests.py @@ -178,6 +178,16 @@ def test(self): np.testing.assert_allclose(Z_HH_1, HH_Donnan_dict["charges_dict"]["peptide_1"]) np.testing.assert_allclose(Z_HH_2, HH_Donnan_dict["charges_dict"]["peptide_2"]) - + with self.subTest(msg="Trigger sanity test in calculate_HH"): + params = {"template_name": "peptide_1", + "pH_list": [2], + "pka_set":{"D":{"pka_value":3, + "acidity":"random"}, + "H":{"pka_value":7, + "acidity":"random"}}} + self.assertRaises(ValueError, + pmb.calculate_HH, + **params) + if __name__ == "__main__": ut.main() diff --git a/testsuite/hydrogel_builder.py b/testsuite/hydrogel_builder.py index b50d692..ce7df0d 100644 --- a/testsuite/hydrogel_builder.py +++ b/testsuite/hydrogel_builder.py @@ -145,8 +145,7 @@ def test_hydrogel_instance_info(self): Unit test to check that hydrogel instance store information properly """ self.assertEqual(hydrogel_inst.name, hydrogel_name) - self.assertEqual(hydrogel_inst.assembly_id, hydrogel_id) - + self.assertEqual(hydrogel_inst.assembly_id, hydrogel_id) def test_node_positions(self): """ From eb5e12679669ae283bf48263a19050d7815cea00 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Thu, 29 Jan 2026 16:40:04 +0100 Subject: [PATCH 39/55] improve coverage, clean docs --- pyMBE/pyMBE.py | 72 ++-- pyMBE/storage/instances/bond.py | 38 +- pyMBE/storage/instances/hydrogel.py | 32 +- pyMBE/storage/instances/molecule.py | 35 +- pyMBE/storage/instances/particle.py | 54 ++- pyMBE/storage/instances/peptide.py | 39 +-- pyMBE/storage/instances/protein.py | 38 +- pyMBE/storage/instances/residue.py | 43 +-- pyMBE/storage/io.py | 23 -- pyMBE/storage/pint_quantity.py | 68 ++-- pyMBE/storage/templates/bond.py | 61 ++-- pyMBE/storage/templates/hydrogel.py | 42 ++- pyMBE/storage/templates/lj.py | 73 ++-- pyMBE/storage/templates/molecule.py | 13 +- pyMBE/storage/templates/particle.py | 63 ++-- pyMBE/storage/templates/peptide.py | 21 +- pyMBE/storage/templates/protein.py | 21 +- pyMBE/storage/templates/residue.py | 2 +- testsuite/CTestTestfile.cmake | 1 + testsuite/bond_tests.py | 72 ++-- testsuite/charge_number_map_tests.py | 2 + testsuite/create_molecule_position_test.py | 1 + .../define_and_create_molecules_unit_tests.py | 26 +- testsuite/globular_protein_unit_tests.py | 36 +- testsuite/lattice_builder.py | 328 +++++++++++------- testsuite/reaction_methods_unit_tests.py | 54 +++ testsuite/set_particle_acidity_test.py | 145 +++++++- testsuite/test_io_database.py | 168 ++++++++- 28 files changed, 978 insertions(+), 593 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 113a0c9..218d921 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -263,7 +263,9 @@ def _create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system, use_def if node_start != node_end or residue_list == residue_list[::-1]: ValueError(f"Aborted creation of hydrogel chain between '{node_start}' and '{node_end}' because pyMBE could not resolve a unique topology for that chain") if reverse: - residue_list = residue_list[::-1] + reverse_residue_order=True + else: + reverse_residue_order=False start_node_id = nodes[node_start_label]["id"] end_node_id = nodes[node_end_label]["id"] # Finding a backbone vector between node_start and node_end @@ -292,7 +294,8 @@ def _create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system, use_def espresso_system=espresso_system, list_of_first_residue_positions=[first_bead_pos.tolist()],#Start at the first node backbone_vector=np.array(backbone_vector)/l0, - use_default_bond=use_default_bond)[0] + use_default_bond=use_default_bond, + reverse_residue_order=reverse_residue_order)[0] # Bond chain to the hydrogel nodes chain_pids = self.db._find_instance_ids_by_attribute(pmb_type="particle", attribute="molecule_id", @@ -721,8 +724,6 @@ def center_object_in_simulation_box(self, instance_id, espresso_system, pmb_type Notes: - Works for both cubic and non-cubic simulation boxes. """ - if pmb_type not in self.db._molecule_like_types: - raise ValueError(f"Input pmb_type = {pmb_type} not supported, supported pyMBE types are: {self.db._molecule_like_types}.") inst = self.db.get_instance(instance_id=instance_id, pmb_type=pmb_type) center_of_mass = self.calculate_center_of_mass(instance_id=instance_id, @@ -955,9 +956,9 @@ def create_hydrogel(self, name, espresso_system, use_default_bond=False): assembly_id=assembly_id)) return assembly_id - def create_molecule(self, name, number_of_molecules, espresso_system, list_of_first_residue_positions=None, backbone_vector=None, use_default_bond=False): + def create_molecule(self, name, number_of_molecules, espresso_system, list_of_first_residue_positions=None, backbone_vector=None, use_default_bond=False, reverse_residue_order = False): """ - Creates 'number_of_molecules' molecule of type 'name' into 'espresso_system'. + Creates instances of a given molecule template name into ESPResSo. Args: name ('str'): @@ -978,6 +979,9 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi use_default_bond('bool', optional): Controls if a bond of type 'default' is used to bond particles with undefined bonds in the pyMBE database. + reverse_residue_order('bool', optional): + Creates residues in reverse sequential order than the one defined in the molecule template. Defaults to False. + Returns: ('list' of 'int'): List with the 'molecule_id' of the pyMBE molecule instances created into 'espresso_system'. @@ -1009,7 +1013,10 @@ def create_molecule(self, name, number_of_molecules, espresso_system, list_of_fi first_residue = True molecule_tpl = self.db.get_template(pmb_type=pmb_type, name=name) - residue_list = molecule_tpl.residue_list + if reverse_residue_order: + residue_list = molecule_tpl.residue_list[::-1] + else: + residue_list = molecule_tpl.residue_list pos_index = 0 molecule_ids = [] for _ in range(number_of_molecules): @@ -1414,9 +1421,8 @@ def define_bond(self, bond_type, bond_parameters, particle_pairs): tpl._make_name() if tpl.name in bond_names: raise RuntimeError(f"Bond {tpl.name} has already been defined, please check the list of particle pairs") - else: - self.db._register_template(tpl) - bond_names.append(tpl.name) + bond_names.append(tpl.name) + self.db._register_template(tpl) def define_default_bond(self, bond_type, bond_parameters): @@ -1462,24 +1468,18 @@ def define_hydrogel(self, name, node_map, chain_map): chain_map ('list of dict'): [{"node_start": , "node_end": , "residue_list": , ... ] """ - # Sanity tests - node_indices = {tuple(entry['lattice_index']) for entry in node_map} - + node_indices = {tuple(entry['lattice_index']) for entry in node_map} chain_map_connectivity = set() for entry in chain_map: start = self.lattice_builder.node_labels[entry['node_start']] end = self.lattice_builder.node_labels[entry['node_end']] chain_map_connectivity.add((start,end)) - if self.lattice_builder.lattice.connectivity != chain_map_connectivity: raise ValueError("Incomplete hydrogel: A diamond lattice must contain correct 16 lattice index pairs") - - diamond_indices = {tuple(row) for row in self.lattice_builder.lattice.indices} if node_indices != diamond_indices: raise ValueError(f"Incomplete hydrogel: A diamond lattice must contain exactly 8 lattice indices, {diamond_indices} ") - # Register information in the pyMBE database nodes=[] for entry in node_map: @@ -2089,8 +2089,6 @@ def get_charge_number_map(self): particle_templates = self.db.get_templates("particle") for tpl in particle_templates.values(): for state in self.db.get_particle_states_templates(particle_name=tpl.name).values(): - if state.es_type is None: - continue charge_number_map[state.es_type] = state.z return charge_number_map @@ -2174,34 +2172,6 @@ def get_particle_id_map(self, object_name): """ return self.db.get_particle_id_map(object_name=object_name) - def get_particle_pka(self, particle_name): - """ - Retrieve the pKa value associated with a particle from the pyMBE database. - - Args: - particle_name ('str'): - Name of the particle template. - - Returns: - ('float' or 'None'): - - The pKa value if the particle participates in a single acid/base reaction - - None if the particle is inert (no acid/base reaction) - """ - acid_base_reactions = [] - for reaction in self.db._reactions.values(): - if reaction.reaction_type != "acid/base": - continue - for participant in reaction.participants: - if participant.particle_name == particle_name: - acid_base_reactions.append(reaction) - break - if len(acid_base_reactions) == 0: - return None - if len(acid_base_reactions) > 1: - raise ValueError(f"Multiple acid/base reactions found for particle '{particle_name}'. " - "Ambiguous pKa.") - return acid_base_reactions[0].pK - def get_pka_set(self): """ Retrieve the pKa set for all titratable particles in the pyMBE database. @@ -2215,10 +2185,10 @@ def get_pka_set(self): - If a particle participates in multiple acid/base reactions, an error is raised. """ pka_set = {} + supported_reactions = ["monoprotic_acid", + "monoprotic_base"] for reaction in self.db._reactions.values(): - if "monoprotic" not in reaction.reaction_type: - continue - if reaction.pK is None: + if reaction.reaction_type not in supported_reactions: continue # Identify involved particle(s) particle_names = {participant.particle_name for participant in reaction.participants} @@ -2230,8 +2200,6 @@ def get_pka_set(self): acidity = "acidic" elif reaction.reaction_type == "monoprotic_base": acidity = "basic" - else: - raise ValueError(f"Cannot infer acidity for particle '{particle_name}' from reaction type: {reaction.reaction_type}") pka_set[particle_name]["acidity"] = acidity return pka_set diff --git a/pyMBE/storage/instances/bond.py b/pyMBE/storage/instances/bond.py index 18ca829..0a981c7 100644 --- a/pyMBE/storage/instances/bond.py +++ b/pyMBE/storage/instances/bond.py @@ -24,29 +24,21 @@ class BondInstance(PMBBaseModel): """ Instance representation of a bond between two particles. - A ``BondInstance`` links two particle instances using a specified - bond template. This class stores only lightweight, serializable - identifiers (not Espresso objects or interaction handles), ensuring - that the object can be safely persisted, exported, and reloaded from - CSV or other storage formats. - Attributes: - pmb_type (str): + pmb_type ('str'): Fixed identifier set to ``"bond"`` for all bond instances. - bond_id (int): + + bond_id ('int'): Unique non-negative integer identifying this bond instance. - name (str): + + name ('str'): Name of the bond template from which this instance was created. - particle_id1 (int): + + particle_id1 ('int'): ID of the first particle involved in the bond. - particle_id2 (int): - ID of the second particle involved in the bond. - es_id (int): - Unique non-negative integer identifying this bond instance. - Validators: - validate_bond_id: - Ensures that ``bond_id`` is a non-negative integer. + particle_id2 ('int'): + ID of the second particle involved in the bond. Notes: - ``particle_id1`` and ``particle_id2`` must correspond to @@ -61,8 +53,10 @@ class BondInstance(PMBBaseModel): particle_id1: int particle_id2: int - @field_validator("bond_id") - def validate_bond_id(cls, bid): - if bid < 0: - raise ValueError("bond_id must be a non-negative integer.") - return bid \ No newline at end of file + @field_validator("bond_id", "particle_id1", "particle_id2") + @classmethod + def validate_non_negative_int(cls, value, info): + if value < 0: + raise ValueError(f"{info.field_name} must be a non-negative integer.") + return value + diff --git a/pyMBE/storage/instances/hydrogel.py b/pyMBE/storage/instances/hydrogel.py index ca101a5..e03f173 100644 --- a/pyMBE/storage/instances/hydrogel.py +++ b/pyMBE/storage/instances/hydrogel.py @@ -20,39 +20,33 @@ from typing import List from pydantic import Field from ..base_type import PMBBaseModel +from pydantic import field_validator + class HydrogelInstance(PMBBaseModel): """ Persistent instance representation of a hydrogel object. - A ``HydrogelInstance`` stores the high-level composition of a - hydrogel in terms of the constituent polymer chain molecules. - Each hydrogel is assigned a unique integer ID and has a human-readable - name, along with a list of molecule identifiers referencing previously - registered molecule instances. - - This class is intentionally lightweight and fully serializable. - It does **not** store simulation-engine internal objects - (such as lattice builders, Espresso handles, network topologies, etc.). - These are expected to be constructed externally at run time. - Attributes: - pmb_type (str): - Fixed string identifier for this instance type. Always - ``"hydrogel"``. - assembly_id (int): + pmb_type ('str'): + Fixed string identifier for this instance type. Always ``"hydrogel"``. + + assembly_id ('int'): Unique non-negative integer identifying this hydrogel instance. - name (str): + + name ('str'): Human-readable name for the hydrogel (e.g., ``"HG_001"``). Notes: - This class represents the *instance* level (what specific hydrogel exists in the system), not a template describing generic hydrogel types. - - The integrity of ``molecule_ids`` (e.g., references to existing - molecule instances) should be validated in the database layer - during creation or update and not inside this class. """ pmb_type: str = Field(default="hydrogel", frozen=True) assembly_id: int name: str + @field_validator("assembly_id") + def validate_bond_id(cls, aid): + if aid < 0: + raise ValueError("assembly_id must be a non-negative integer.") + return aid \ No newline at end of file diff --git a/pyMBE/storage/instances/molecule.py b/pyMBE/storage/instances/molecule.py index 7fe49ab..66fe0a9 100644 --- a/pyMBE/storage/instances/molecule.py +++ b/pyMBE/storage/instances/molecule.py @@ -25,35 +25,22 @@ class MoleculeInstance(PMBBaseModel): """ Persistent instance representation of a molecule. - A ``MoleculeInstance`` links a concrete molecule in the system to a - molecule template (through its ``name``) and assigns it a unique - integer identifier. Molecule instances typically serve as containers - for ordered lists of residue instances, which are managed in the - database layer outside of this class. + Attributes: + pmb_type ('str'): + Fixed string identifying this object as a molecule instance. Always ``"molecule"``. + + name ('str'): + Name of the molecule **template** from which this instance was created. This must correspond to an existing ``MoleculeTemplate`` in the database. - This class is intentionally minimal and fully serializable. It stores - no engine-specific data or structural objects. + molecule_id ('int'): + Unique non-negative integer identifying this molecule instance within the database. - Attributes: - pmb_type (str): - Fixed string identifying this object as a molecule instance. - Always ``"molecule"``. - name (str): - Name of the molecule **template** from which this instance - was created. This must correspond to an existing - ``MoleculeTemplate`` in the database. - molecule_id (int): - Unique non-negative integer identifying this molecule - instance within the database. assembly_id (int | None): - Identifier of the super-parent assembly (e.g. hydrogel) to which this residue belongs. - ``None`` indicates that the residue is not assigned to any assembly. + Identifier of the super-parent assembly (e.g. hydrogel) to which this residue belongs. ``None`` indicates that the residue is not assigned to any assembly. Notes: - - Validation of whether ``name`` corresponds to a registered - molecule template is performed at the database level. - - Structural or connectivity information (e.g., residue ordering) - is maintained outside this class in the instance registry. + - Validation of whether ``name`` corresponds to a registered molecule template is performed at the database level. + - Structural or connectivity information (e.g., residue ordering) is maintained outside this class in the instance registry. """ pmb_type: str = "molecule" diff --git a/pyMBE/storage/instances/particle.py b/pyMBE/storage/instances/particle.py index 3dc55c3..8fd04e0 100644 --- a/pyMBE/storage/instances/particle.py +++ b/pyMBE/storage/instances/particle.py @@ -25,43 +25,31 @@ class ParticleInstance(PMBBaseModel): """ Concrete instance of a particle placed in the simulation. - ``ParticleInstance`` represents a single particle created from a - ``ParticleTemplate`` and placed within the system. - Each instance has a unique integer identifier, an initial chemical - state (e.g., ``"A-"`` or ``"HA"``), and optional associations to a - residue and/or molecule instance. - Attributes: - pmb_type (str): - Fixed string identifying this object as a particle instance. - Always ``"particle"``. - name (str): + pmb_type ('str'): + Fixed string identifying this object as a particle instance. Always ``"particle"``. + + name ('str'): Name of the particle template from which this instance is derived. - particle_id (int): - Unique non-negative integer identifying the particle within - the database. Assigned sequentially by the database manager. - initial_state (str): - Name of the particle state at creation time. Must correspond - to one of the allowed states defined in the originating - ``ParticleTemplate``. State transitions are handled at the - simulation level, not here. - residue_id (int | None): - Optional identifier of the ``ResidueInstance`` this particle - belongs to. Particles that are not part of a residue should - leave this field as ``None``. - molecule_id (int | None): - Optional identifier of the ``MoleculeInstance`` this particle - belongs to. Particles not belonging to any molecule should - keep this as ``None``. - assembly_id (int | None): - Identifier of the super-parent assembly (e.g. hydrogel) to which this residue belongs. - ``None`` indicates that the residue is not assigned to any assembly. + + particle_id ('int'): + Unique non-negative integer identifying the particle within the database. Assigned sequentially by the database manager. + + initial_state ('str'): + Name of the particle state at creation time. + + residue_id ('int' | 'None'): + Optional identifier of the ``ResidueInstance`` this particle belongs to. Particles that are not part of a residue should leave this field as ``None``. + + molecule_id ('int' | 'None'): + Optional identifier of the ``MoleculeInstance`` this particle belongs to. Particles not belonging to any molecule should keep this as ``None``. + + assembly_id ('int' | 'None'): + Identifier of the super-parent assembly (e.g. hydrogel) to which this particle instance belongs. ``None`` indicates that the particle is not assigned to any assembly. Notes: - - ``initial_state`` is stored as a plain string to ensure clean - serialization and avoid engine-specific objects. - - Connectivity, bonding, and spatial ordering are external to - this class and handled by the database or simulation backend. + - ``initial_state`` is stored as a plain string to ensure clean serialization and avoid engine-specific objects. + - Connectivity, bonding, and spatial ordering are external to this class and handled by the database or simulation backend. """ pmb_type: str = "particle" name: str diff --git a/pyMBE/storage/instances/peptide.py b/pyMBE/storage/instances/peptide.py index f4d23a0..fa8c4c3 100644 --- a/pyMBE/storage/instances/peptide.py +++ b/pyMBE/storage/instances/peptide.py @@ -25,36 +25,23 @@ class PeptideInstance(PMBBaseModel): """ Instance of a peptide molecule placed in the simulation. - ``PeptideInstance`` represents a concrete occurrence of a peptide, - created from a peptide-related template (e.g., a sequence or residue - list defined elsewhere in the database). Each instance corresponds to - one full peptide chain and is identified by a unique ``molecule_id``. - Attributes: - pmb_type (str): - Fixed string identifying this object as a peptide instance. - Always ``"peptide"``. - name (str): - Name of the peptide template from which this instance was - created. This typically corresponds to a user-defined - peptide type or sequence label. - molecule_id (int): - Unique non-negative integer identifying this peptide within - the database. Assigned sequentially by the database manager - when the instance is created. - assembly_id (int | None): - Identifier of the super-parent assembly (e.g. hydrogel) to which this residue belongs. - ``None`` indicates that the residue is not assigned to any assembly. + pmb_type ('str'): + Fixed string identifying this object as a peptide instance. Always ``"peptide"``. + + name ('str'): + Name of the peptide template from which this instance was created. + + molecule_id ('int'): + Unique non-negative integer identifying this peptide within the database. + + assembly_id ('int' | 'None'): + Identifier of the super-parent assembly (e.g. hydrogel) to which this residue belongs. ``None`` indicates that the residue is not assigned to any assembly. Notes: - - This class only tracks the identity of the peptide instance. - Residues and particles belonging to the peptide reference this - instance through their ``molecule_id`` fields. - - Connectivity (ordering of residues), spatial arrangement, - and bonding interactions are managed separately by the - database or simulation engine. + - This class only tracks the identity of the peptide instance. Residues and particles belonging to the peptide reference this instance through their ``molecule_id`` fields. + - Connectivity (ordering of residues), spatial arrangement, and bonding interactions are managed separately by the database or simulation engine. """ - pmb_type: str = "peptide" name: str # molecule template name molecule_id: int diff --git a/pyMBE/storage/instances/protein.py b/pyMBE/storage/instances/protein.py index 5e7566b..73a79d2 100644 --- a/pyMBE/storage/instances/protein.py +++ b/pyMBE/storage/instances/protein.py @@ -25,33 +25,23 @@ class ProteinInstance(PMBBaseModel): """ Instance of a protein molecule placed in the simulation. - ``ProteinInstance`` represents a concrete protein object created - from a protein template defined in the database. Each instance - corresponds to one full protein chain and is uniquely identified - by its ``molecule_id``. - Attributes: - pmb_type (str): - Fixed string identifying this object as a protein instance. - Always ``"protein"``. - name (str): - Name of the protein template from which this instance was - created. This usually corresponds to a user-defined or - imported protein type or sequence identifier. - molecule_id (int): - Unique non-negative integer identifying this protein within - the database. Assigned by the database manager upon creation. - assembly_id (int | None): - Identifier of the super-parent assembly (e.g. hydrogel) to which this residue belongs. - ``None`` indicates that the residue is not assigned to any assembly. + pmb_type ('str'): + Fixed string identifying this object as a protein instance. Always ``"protein"``. + + name ('str'): + Name of the protein template from which this instance was created. + + molecule_id ('int'): + Unique non-negative integer identifying this protein within the database. + + assembly_id ('int' | 'None'): + Identifier of the super-parent assembly (e.g. hydrogel) to which this residue belongs. ``None`` indicates that the residue is not assigned to any assembly. Notes: - - A ``ProteinInstance`` only records the identity of the protein - and its template association. - - Residues and particles that belong to the protein reference - this instance through their ``molecule_id`` values. - - The structural connectivity (residue sequence, domains) is - handled at the template level or by the builder modules. + - A ``ProteinInstance`` only records the identity of the protein and its template association. + - Residues and particles that belong to the protein reference this instance through their ``molecule_id`` values. + - The structural connectivity (residue sequence, domains) is handled at the template level or by the builder modules. """ pmb_type: str = "protein" name: str # molecule template name diff --git a/pyMBE/storage/instances/residue.py b/pyMBE/storage/instances/residue.py index 8379c76..97e3be5 100644 --- a/pyMBE/storage/instances/residue.py +++ b/pyMBE/storage/instances/residue.py @@ -25,44 +25,33 @@ class ResidueInstance(PMBBaseModel): """ Instance of a residue placed within a molecule during a simulation. - ``ResidueInstance`` represents a concrete occurrence of a residue - derived from a residue template. Each instance is uniquely indexed - by ``residue_id`` and may optionally belong to a parent molecule, - such as a peptide, protein, or generic molecule. - Attributes: - pmb_type (str): - Fixed string identifying this object as a residue instance. - Always ``"residue"``. - name (str): + pmb_type ('str'): + Fixed string identifying this object as a residue instance. Always ``"residue"``. + + name ('str'): Name of the residue template from which this instance is derived. - residue_id (int): - Unique non-negative integer identifying this residue instance - within the database. - molecule_id (int | None): - Identifier of the parent molecule to which this residue belongs. - ``None`` indicates that the residue is not assigned to any molecule. - assembly_id (int | None): - Identifier of the super-parent assembly (e.g. hydrogel) to which this residue belongs. - ``None`` indicates that the residue is not assigned to any assembly. + + residue_id ('int'): + Unique non-negative integer identifying this residue instance within the database. + + molecule_id ('int' | 'None'): + Identifier of the parent molecule to which this residue belongs. ``None`` indicates that the residue is not assigned to any molecule. + + assembly_id ('int' | 'None'): + Identifier of the super-parent assembly (e.g. hydrogel) to which this residue belongs. ``None`` indicates that the residue is not assigned to any assembly. Notes: - - ``ResidueInstance`` does not itself store particle-level - information; instead, particles reference the residue via - ``residue_id``. - - Residues may be standalone (e.g., in coarse systems) or part of - polymers, proteins, peptides, or hydrogels. - - The sequence ordering and topology of residues are encoded at the - molecule instance/template level, not here. + - ``ResidueInstance`` does not itself store particle-level information; instead, particles reference the residue via ``residue_id``. + - Residues may be standalone (e.g., in coarse systems) or part of polymers, proteins, peptides, or hydrogels. + - The sequence ordering and topology of residues are encoded at the molecule instance/template level, not here. """ - pmb_type: str = "residue" name: str # residue template name residue_id: int molecule_id: int | None = None assembly_id: int | None = None - @field_validator("residue_id") def validate_residue_id(cls, rid): if rid < 0: diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py index 917d6c8..37b77e3 100644 --- a/pyMBE/storage/io.py +++ b/pyMBE/storage/io.py @@ -348,29 +348,6 @@ def _load_database_csv(db, folder): metadata = {} return metadata -def _load_reaction_set(path): - """ - Loads a set of reactions from a JSON file. - - Args: - path (str): Path to the JSON file containing reaction data. - - Returns: - dict[str, Reaction]: Dictionary mapping reaction names to Reaction objects. - """ - with open(path, "r") as f: - data = json.load(f) - reactions = {} - for name, rdata in data["data"].items(): - participants = [ReactionParticipant(**p) for p in rdata["participants"]] - reaction = Reaction(name=name, - participants=participants, - constant=rdata["constant"], - reaction_type=rdata.get("reaction_type", "acid_base"), - metadata=rdata.get("metadata")) - reactions[name] = reaction - return reactions - def _save_database_csv(db, folder): """ Saves the database content into CSV files in a folder. diff --git a/pyMBE/storage/pint_quantity.py b/pyMBE/storage/pint_quantity.py index 40d8edc..4da1d1e 100644 --- a/pyMBE/storage/pint_quantity.py +++ b/pyMBE/storage/pint_quantity.py @@ -18,31 +18,30 @@ # from dataclasses import dataclass -from typing import Any -from pint import UnitRegistry, Quantity - +import pint # dimension -> representative unit used to check dimensionality -_DIMENSION_REPRESENTATIVE = { - "length": "nm", - "energy": "meV", - "energy/length**2": "meV/nm**2", - "dimensionless": "dimensionless", - # extend as needed -} - +_DIMENSION_REPRESENTATIVE = {"length": "nm", + "energy": "meV", + "energy/length**2": "meV/nm**2", + "dimensionless": "dimensionless",} # extend as needed @dataclass class PintQuantity: """ Internal representation of a Pint quantity for pyMBE storage. - Stores the magnitude and units of a quantity in a base/SI-like format - along with its logical physical dimension. - Attributes: - magnitude (float): Numeric value of the quantity in the stored units. - units (str): String representation of the units (e.g., "nm", "meV", "meV/nm**2"). - dimension (str): Logical dimension of the quantity, e.g., "length", "energy", etc. + magnitude ('float'): + Numeric value of the quantity in the stored units. + + units ('str'): + String representation of the units (e.g., "nm", "meV", "meV/nm**2"). + + dimension ('str'): + Logical dimension of the quantity, e.g., "length", "energy", etc. + + Notes: + - Stores the magnitude and units of a quantity in a base/SI-like format along with its logical physical dimension. """ magnitude: float @@ -55,18 +54,20 @@ def from_quantity(cls, q, expected_dimension, ureg): Create a PintQuantity from a Pint Quantity, validating its dimension. Args: - q (Quantity): Pint Quantity to store. - expected_dimension (str): Expected logical dimension ("length", "energy", etc.). - ureg (UnitRegistry): Pint UnitRegistry used for unit conversion. + q ('pint.Quantity'): + Pint Quantity to store. - Returns: - PintQuantity: Internal representation in SI-like units. + expected_dimension ('str'): + Expected logical dimension ("length", "energy", etc.). - Raises: - TypeError: If `q` is not a pint.Quantity. - ValueError: If the quantity does not match the expected dimension. + ureg ('pint.UnitRegistry'): + Pint UnitRegistry used for unit conversion. + + Returns: + 'PintQuantity': + Internal pyMBE representation in SI units. """ - if not isinstance(q, Quantity): + if not isinstance(q, pint.Quantity): raise TypeError("from_quantity expects a pint.Quantity") # Build a representative unit for the dimension using the provided registry @@ -104,10 +105,12 @@ def to_quantity(self, ureg): Convert the stored PintQuantity back into a Pint Quantity. Args: - ureg (UnitRegistry): Pint UnitRegistry used to construct the Quantity. + ureg ('pint.UnitRegistry'): + Pint UnitRegistry used to construct the Quantity. Returns: - Quantity: Pint Quantity with the stored magnitude and units. + 'pint.Quantity': + Pint Quantity with the stored magnitude and units. """ return self.magnitude * ureg(self.units) @@ -116,7 +119,8 @@ def to_dict(self): Serialize the PintQuantity to a dictionary. Returns: - dict: Dictionary with keys "magnitude", "units", and "dimension". + 'dict': + Dictionary with keys "magnitude", "units", and "dimension". """ return {"magnitude": self.magnitude, "units": self.units, "dimension": self.dimension} @@ -126,10 +130,12 @@ def from_dict(cls, d): Deserialize a PintQuantity from a dictionary. Args: - d (dict): Dictionary containing "magnitude", "units", and "dimension". + d ('dict'): + Dictionary containing "magnitude", "units", and "dimension". Returns: - PintQuantity: Reconstructed PintQuantity object. + 'pint.PintQuantity': + Reconstructed PintQuantity object. """ return cls(magnitude=d["magnitude"], units=d["units"], dimension=d["dimension"]) diff --git a/pyMBE/storage/templates/bond.py b/pyMBE/storage/templates/bond.py index ce7e3dd..84c6afb 100644 --- a/pyMBE/storage/templates/bond.py +++ b/pyMBE/storage/templates/bond.py @@ -20,25 +20,27 @@ from typing import Dict, Literal from ..base_type import PMBBaseModel from ..pint_quantity import PintQuantity -from pydantic import Field, model_validator - +from pydantic import Field class BondTemplate(PMBBaseModel): """ - Template defining a bond in a pyMBE simulation. + Template defining a bond in the pyMBE database. Attributes: - pmb_type (Literal["bond"]): Fixed type identifier for this template. Always "bond". - name (str): Unique name of the bond template, e.g., "HARMONIC_default". - bond_type (str): Type of bond potential. Examples: "HARMONIC", "FENE". - parameters (Dict[str, PintQuantity]): Dictionary of bond parameters. - Common keys: - - "k": Force constant (energy / distance^2) - - "r0": Equilibrium bond length - - "d_r_max": Maximum bond extension (for FENE) + pmb_type ('Literal["bond"]'): + Fixed type identifier for this template. Always "bond". + + name ('str'): + Unique name of the bond template, e.g., "HARMONIC_default". + + bond_type ('str'): + Type of bond potential. Examples: "HARMONIC", "FENE". + + parameters ('Dict[str, PintQuantity]'): + Dictionary of bond parameters. Notes: - Values are stored as PintQuantity objects for unit-aware calculations. + - Values of the parameters are stored as PintQuantity objects for unit-aware calculations. """ pmb_type: Literal["bond"] = "bond" name: str = Field(default="default") @@ -52,21 +54,22 @@ def make_bond_key(cls, pn1, pn2): """Return a canonical name for a bond between two particle names. Args: - pn1 (str): Name of the first particle. - pn2 (str): Name of the second particle. + pn1 ('str'): + Name of the first particle. + + pn2 ('str'): + Name of the second particle. Returns: - str: Canonical bond name, e.g. "A-B". + ('str'): + Canonical bond name, e.g. "A-B". """ return "-".join(sorted([pn1, pn2])) def _make_name(self): """Create canonical name using particle names.""" if not self.particle_name1 or not self.particle_name2: - raise RuntimeError( - "Cannot generate bond name: particle_name1 or particle_name2 missing." - ) - + raise RuntimeError("Cannot generate bond name: particle_name1 or particle_name2 missing.") self.name = self.make_bond_key(self.particle_name1, self.particle_name2) def get_parameters(self, ureg): @@ -74,24 +77,12 @@ def get_parameters(self, ureg): Retrieve the bond parameters as Pint `Quantity` objects. Args: - ureg (pint.UnitRegistry) : Pint unit registry used to reconstruct physical quantities from storage. + ureg ('pint.UnitRegistry'): + Pint unit registry used to reconstruct physical quantities from storage. Returns: - Dict[str, pint.Quantity]: - A dictionary mapping parameter names to their corresponding - unit-aware Pint quantities. - - Example: - >>> bt = BondTemplate( - ... bond_type="harmonic", - ... particle_name1="A", - ... particle_name2="B", - ... parameters={"k": PintQuantity("100 kJ/mol/nm^2"), - ... "r0": PintQuantity("0.3 nm")} - ... ) - >>> bt.get_parameters() - {'k': , - 'r0': } + 'Dict[str, pint.Quantity]': + A dictionary mapping parameter names to their corresponding unit-aware Pint quantities. """ pint_parameters={} for parameter in self.parameters.keys(): diff --git a/pyMBE/storage/templates/hydrogel.py b/pyMBE/storage/templates/hydrogel.py index b4ed3c0..340e1df 100644 --- a/pyMBE/storage/templates/hydrogel.py +++ b/pyMBE/storage/templates/hydrogel.py @@ -26,8 +26,11 @@ class HydrogelNode(BaseModel): Represents a node in a hydrogel network. Attributes: - particle_name (str): Name of the particle at this node. - lattice_index (List[int]): 3D lattice position of the node. Must be a list of length 3. + particle_name ('str'): + Name of the particle at this node. + + lattice_index ('List[int]'): + 3D lattice position of the node. Must be a list of length 3. """ particle_name: str lattice_index: List[int] # must be length 3 @@ -37,9 +40,14 @@ class HydrogelChain(BaseModel): Represents a polymer chain between two hydrogel nodes. Attributes: - molecule_name (str): Name of the molecule representing the polymer chain. - node_start (str): Name of the starting node. - node_end (str): Name of the ending node. + molecule_name ('str'): + Name of the molecule representing the polymer chain. + + node_start ('str'): + Name of the starting node. + + node_end ('str'): + Name of the ending node. """ molecule_name: str node_start: str @@ -47,20 +55,22 @@ class HydrogelChain(BaseModel): class HydrogelTemplate(PMBBaseModel): """ - Template defining a hydrogel network in pyMBE. - - A hydrogel template consists of nodes (particles at specific lattice positions) - and polymer chains connecting those nodes. + Template defining a hydrogel network in the pyMBE database. Attributes: - pmb_type (str): Fixed type identifier for this template. Always "hydrogel". - name (str): Unique name of the hydrogel template. - node_map (List[HydrogelNode]): List of nodes defining the hydrogel lattice. - chain_map (List[HydrogelChain]): List of polymer chains connecting nodes. + pmb_type ('str'): + Fixed type identifier for this template. Always "hydrogel". + + name ('str'): + Unique name of the hydrogel template. + + node_map ('List[HydrogelNode]'): + List of nodes defining the hydrogel lattice. + + chain_map ('List[HydrogelChain]'): + List of polymer chains connecting nodes. """ pmb_type: str = Field(default="hydrogel", frozen=True) name: str - node_map: List[HydrogelNode] = Field(default_factory=list) - chain_map: List[HydrogelChain] = Field(default_factory=list) - + chain_map: List[HydrogelChain] = Field(default_factory=list) \ No newline at end of file diff --git a/pyMBE/storage/templates/lj.py b/pyMBE/storage/templates/lj.py index 1108388..c8c34d2 100644 --- a/pyMBE/storage/templates/lj.py +++ b/pyMBE/storage/templates/lj.py @@ -24,80 +24,49 @@ class LJInteractionTemplate(BaseModel): """ Template representing the Lennard–Jones (LJ) interaction parameters - between two particle *states*. - - The template **always generates the interaction name automatically** - from the two provided state names, ensuring standardized naming and - preventing inconsistencies between different LJ entries. - - The LJ parameters stored here correspond to the *final effective* - values after applying the combining rule (e.g., Lorentz–Berthelot). - This allows users to inspect, validate, or export the exact values - that will be passed to the simulation engine. + between two particle *states* stored in the pyMBE database. Attributes: - pmb_type (str): + pmb_type ('str'): Fixed identifier for the template type. Always ``"lj"``. - state1 (str): + + state1 ('str'): Name of the first particle state in the pair. - state2 (str): + + state2 ('str'): Name of the second particle state in the pair. - sigma (PintQuantity): - Lennard–Jones σ parameter (distance scale) after applying - the combining rule. - epsilon (PintQuantity): + + sigma ('PintQuantity'): + Lennard–Jones σ parameter (distance scale) after applying the combining rule. + + epsilon ('PintQuantity'): Lennard–Jones ε parameter (energy scale) after combining. - cutoff (PintQuantity): + + cutoff ('PintQuantity'): Cutoff radius for the interaction. - offset (PintQuantity): - Offset applied to the potential (ESPResSo parameter). - shift (str | PintQuantity): - Shift applied at the cutoff. May be ``"auto"`` or a PintQuantity value. - name (str): - Auto-generated unique identifier for the interaction, built from - ``state1`` and ``state2`` in alphabetical order. Cannot be set - manually by the user. - Notes: - - The order of ``state1`` and ``state2`` does **not** matter. - The name is always generated as ``"min(state1, state2)-max(state1, state2)"``. + offset ('PintQuantity'): + Offset applied to the potential (ESPResSo parameter). - Examples: - Creating an LJ interaction: + shift ('str | PintQuantity'): + Shift applied at the cutoff. May be ``"auto"`` or a PintQuantity value. - >>> LJInteractionTemplate( - ... state1="HA", - ... state2="A-", - ... sigma=sigma, - ... epsilon=epsilon, - ... cutoff=cutoff, - ... offset=offset, - ... shift="auto", - ... ) - - Interaction between ``"L"`` and ``"W"`` results in: + name ('str'): + Auto-generated unique identifier for the interaction, built from ``state1`` and ``state2`` in alphabetical order. Cannot be set manually by the user. - >>> LJInteractionTemplate( - ... state1="W", - ... state2="L", - ... ... - ... ).name - 'L-W' + Notes: + - The order of ``state1`` and ``state2`` does **not** matter. The name is always generated as ``"min(state1, state2)-max(state1, state2)"``. """ - pmb_type: str = "lj" name: str = Field(default="", description="Automatically generated name") - state1: str state2: str - sigma: PintQuantity epsilon: PintQuantity cutoff: PintQuantity offset: PintQuantity shift: str | float - @classmethod def _make_name(cls, state1: str, state2: str) -> str: """Create a canonical name from two states.""" diff --git a/pyMBE/storage/templates/molecule.py b/pyMBE/storage/templates/molecule.py index 8fe5479..37280ea 100644 --- a/pyMBE/storage/templates/molecule.py +++ b/pyMBE/storage/templates/molecule.py @@ -22,12 +22,17 @@ class MoleculeTemplate(PMBBaseModel): """ - Template defining a molecule in pyMBE. + Template defining a molecule in the pyMBE database. Attributes: - pmb_type (str): Fixed type identifier for this template. Always "molecule". - name (str): Unique name of the molecule template. - residue_list (List[str]): Ordered list of residue names that make up the molecule. + pmb_type ('str'): + Fixed type identifier for this template. Always "molecule". + + name ('str'): + Unique name of the molecule template. + + residue_list ('List[str]'): + Ordered list of residue names that make up the molecule. """ pmb_type: str = Field(default="molecule", frozen=True) name: str diff --git a/pyMBE/storage/templates/particle.py b/pyMBE/storage/templates/particle.py index 6470cdf..111e16b 100644 --- a/pyMBE/storage/templates/particle.py +++ b/pyMBE/storage/templates/particle.py @@ -17,9 +17,8 @@ # along with this program. If not, see . # -from typing import Dict, Literal, Optional -from pydantic import Field, field_validator - +from typing import Literal, Optional +from pydantic import Field from ..base_type import PMBBaseModel from ..pint_quantity import PintQuantity @@ -28,10 +27,17 @@ class ParticleStateTemplate(PMBBaseModel): Represents a single state of a particle in pyMBE. Attributes: - pmb_type (Literal["particle_state"]): Fixed type identifier. Always "particle_state". - name (str): Name of the particle state, e.g., "HA", "A-", "H+". - z (int): Charge of the particle in this state. - es_type (float): Identifier for the state used in Espresso simulations. + pmb_type ('Literal["particle_state"]'): + Fixed type identifier. Always "particle_state". + + name ('str'): + Name of the particle state, e.g., "HA", "A-", "H+". + + z ('int'): + Charge of the particle in this state. + + es_type ('float'): + Identifier for the state used in Espresso simulations. """ pmb_type: Literal["particle_state"] = "particle_state" particle_name: str @@ -41,20 +47,30 @@ class ParticleStateTemplate(PMBBaseModel): class ParticleTemplate(PMBBaseModel): """ - Template describing a particle type, including interaction parameters and allowed states. + Template describing a particle in the pyMBE database. Attributes: - pmb_type (str): Fixed type identifier. Always "particle". - sigma (PintQuantity): Particle diameter or size parameter. - epsilon (PintQuantity): Depth of the LJ potential well (interaction strength). - cutoff (PintQuantity): Cutoff distance for the LJ potential. - offset (PintQuantity): Offset distance for the LJ potential. - states (Dict[str, ParticleState]): Dictionary of allowed particle states. - Keys are state names, values are ParticleState instances. - initial_state (Optional[str]): Name of the default particle state. - If not provided explicitly, the first added state becomes the initial state. - """ + pmb_type ('str'): + Fixed type identifier. Always "particle". + + sigma ('PintQuantity'): + Particle diameter or size parameter. + epsilon ('PintQuantity'): + Depth of the LJ potential well (interaction strength). + + cutoff ('PintQuantity'): + Cutoff distance for the LJ potential. + + offset ('PintQuantity'): + Offset distance for the LJ potential. + + states ('Dict[str, ParticleState]'): + Dictionary of allowed particle states. Keys are state names, values are ParticleState instances. + + initial_state ('Optional[str]'): + Name of the default particle state. If not provided explicitly, the first added state becomes the initial state. + """ pmb_type: str = Field(default="particle", frozen=True) name : str sigma: PintQuantity @@ -68,17 +84,12 @@ def get_lj_parameters(self, ureg): Retrieve the Lennard-Jones interaction parameters for the particle template. Args: - ureg (pint.UnitRegistry) : Pint unit registry used to reconstruct physical quantities from storage. + ureg ('pint.UnitRegistry'): + Pint unit registry used to reconstruct physical quantities from storage. Returns: - Dict[str, pint.Quantity]: + 'Dict[str, pint.Quantity]': A dictionary containing the following LJ parameters: sigma, epsilon, cutoff, offset. - - Example: - >>> tpl = ParticleTemplate(...) - >>> params = tpl.get_lj_parameters() - >>> params["sigma"] - """ return {"sigma": self.sigma.to_quantity(ureg), "epsilon": self.epsilon.to_quantity(ureg), diff --git a/pyMBE/storage/templates/peptide.py b/pyMBE/storage/templates/peptide.py index 60caace..36ef81d 100644 --- a/pyMBE/storage/templates/peptide.py +++ b/pyMBE/storage/templates/peptide.py @@ -22,14 +22,23 @@ class PeptideTemplate(PMBBaseModel): """ - Template defining a peptide in a pyMBE simulation. + Template defining a peptide in the pyMBE database. Attributes: - pmb_type (str): Fixed type identifier. Always "peptide". - name (str): Unique name of the peptide template. - model (str): Name or type of the model used for this peptide. - residue_list (List[str]): Ordered list of residue names that make up the peptide. - sequence (List[str]): Ordered sequence of residues representing the peptide's structure. + pmb_type ('str'): + Fixed type identifier. Always "peptide". + + name ('str'): + Unique name of the peptide template. + + model ('str'): + Name or type of the model used for this peptide. + + residue_list ('List[str]'): + Ordered list of residue names that make up the peptide. + + sequence ('List[str]'): + Ordered sequence of residues representing the peptide's structure. """ pmb_type: str = Field(default="peptide", frozen=True) name: str diff --git a/pyMBE/storage/templates/protein.py b/pyMBE/storage/templates/protein.py index 6953edb..6284f79 100644 --- a/pyMBE/storage/templates/protein.py +++ b/pyMBE/storage/templates/protein.py @@ -22,14 +22,23 @@ class ProteinTemplate(PMBBaseModel): """ - Template defining a protein in a pyMBE simulation. + Template defining a protein in the pyMBE database. Attributes: - pmb_type (str): Fixed type identifier. Always "protein". - name (str): Unique name of the protein template. - model (str): Name or type of the model used for this protein. - residue_list (List[str]): Ordered list of residue names that compose the protein. - sequence (List[str]): Ordered sequence of residues representing the protein's structure. + pmb_type ('str'): + Fixed type identifier. Always "protein". + + name ('str'): + Unique name of the protein template. + + model ('str'): + Name or type of the model used for this protein. + + residue_list ('List[str]'): + Ordered list of residue names that compose the protein. + + sequence ('List[str]'): + Ordered sequence of residues representing the protein's structure. """ pmb_type: str = Field(default="protein", frozen=True) name: str diff --git a/pyMBE/storage/templates/residue.py b/pyMBE/storage/templates/residue.py index 1b14617..032e3e0 100644 --- a/pyMBE/storage/templates/residue.py +++ b/pyMBE/storage/templates/residue.py @@ -22,7 +22,7 @@ class ResidueTemplate(PMBBaseModel): """ - Template defining a residue in a pyMBE simulation. + Template defining a residue in the pyMBE database. Attributes: pmb_type (str): Fixed type identifier. Always "residue". diff --git a/testsuite/CTestTestfile.cmake b/testsuite/CTestTestfile.cmake index d344f88..aa66fb3 100644 --- a/testsuite/CTestTestfile.cmake +++ b/testsuite/CTestTestfile.cmake @@ -75,3 +75,4 @@ pymbe_add_test(PATH determine_reservoir_concentrations_unit_test.py) pymbe_add_test(PATH globular_protein_unit_tests.py) pymbe_add_test(PATH lattice_builder.py) pymbe_add_test(PATH hydrogel_builder.py) +pymbe_add_test(PATH database_unit_tests.py) diff --git a/testsuite/bond_tests.py b/testsuite/bond_tests.py index ed32ab4..bd3b2f8 100644 --- a/testsuite/bond_tests.py +++ b/testsuite/bond_tests.py @@ -25,25 +25,27 @@ # Create an instance of pyMBE library espresso_system=espressomd.System (box_l = [10]*3) -pmb = pyMBE.pymbe_library(seed=42) -pmb.define_particle(name='A', + + +class Test(ut.TestCase): + def define_templates(self, pmb): + pmb.define_particle(name='A', z=0, sigma=0.4*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) -pmb.define_particle(name='B', - z=0, - sigma=0.4*pmb.units.nm, - epsilon=1*pmb.units('reduced_energy')) + pmb.define_particle(name='B', + z=0, + sigma=0.4*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) -harmonic_params = {'r_0' : 0.4 * pmb.units.nm, - 'k' : 400 * pmb.units('reduced_energy / reduced_length**2')} + self.harmonic_params = {'r_0' : 0.4 * pmb.units.nm, + 'k' : 400 * pmb.units('reduced_energy / reduced_length**2')} -FENE_params = {'r_0' : 0.4 * pmb.units.nm, - 'k' : 400 * pmb.units('reduced_energy / reduced_length**2'), - 'd_r_max': 0.8 * pmb.units.nm} + self.FENE_params = {'r_0' : 0.4 * pmb.units.nm, + 'k' : 400 * pmb.units('reduced_energy / reduced_length**2'), + 'd_r_max': 0.8 * pmb.units.nm} -class Test(ut.TestCase): def get_bond_object(self, particle_id_pair): """ @@ -79,10 +81,12 @@ def test_bond_setup(self): """ Unit test to check the setup of bonds in pyMBE """ + pmb = pyMBE.pymbe_library(seed=42) + self.define_templates(pmb) #Define bond # check particle bond pmb.define_bond(bond_type = "harmonic", - bond_parameters = harmonic_params, + bond_parameters = self.harmonic_params, particle_pairs = [['A', 'A']]) # Create two particles pids = pmb.create_particle(name="A", @@ -97,7 +101,7 @@ def test_bond_setup(self): bond_object = self.get_bond_object(particle_id_pair=pids) self.check_bond_setup(bond_object=bond_object, - input_parameters=harmonic_params, + input_parameters=self.harmonic_params, bond_type="harmonic") # Clean-up database for inst_id in pids: @@ -120,7 +124,7 @@ def test_bond_setup(self): # Test that the bond is properly setup when there is a default bond pmb.define_default_bond(bond_type = "harmonic", - bond_parameters = harmonic_params) + bond_parameters = self.harmonic_params) pmb.create_bond(particle_id1=pid_B[0], particle_id2=pid_A[0], @@ -141,7 +145,7 @@ def test_bond_setup(self): # Test setup of FENE bonds pmb.define_bond(bond_type = "FENE", - bond_parameters = FENE_params, + bond_parameters = self.FENE_params, particle_pairs = [['A', 'A']]) # Create two particles pids = pmb.create_particle(name="A", @@ -156,7 +160,7 @@ def test_bond_setup(self): bond_object = self.get_bond_object(particle_id_pair=pids) self.check_bond_setup(bond_object=bond_object, - input_parameters=FENE_params, + input_parameters=self.FENE_params, bond_type="FENE") # Clean-up database for inst_id in pids: @@ -180,7 +184,7 @@ def test_bond_setup(self): # Test that the FENE bond is properly setup when there is a default bond pmb.define_default_bond(bond_type = "harmonic", - bond_parameters = harmonic_params) + bond_parameters = self.harmonic_params) pmb.create_bond(particle_id1=pid_B[0], particle_id2=pid_A[0], @@ -202,7 +206,7 @@ def test_bond_setup(self): # Test setup of the default bond pmb.define_default_bond(bond_type = "harmonic", - bond_parameters = harmonic_params) + bond_parameters = self.harmonic_params) pids = pmb.create_particle(name="A", espresso_system=espresso_system, @@ -216,7 +220,7 @@ def test_bond_setup(self): bond_object = self.get_bond_object(particle_id_pair=pids) self.check_bond_setup(bond_object=bond_object, - input_parameters=harmonic_params, + input_parameters=self.harmonic_params, bond_type="harmonic") # Clean-up database for inst_id in pids: @@ -257,11 +261,13 @@ def test_bond_setup(self): pmb.db.delete_templates(pmb_type="bond") def test_bond_raised_exceptions(self): + pmb = pyMBE.pymbe_library(seed=42) + self.define_templates(pmb) for callback in [pmb.define_bond, pmb.define_default_bond]: with self.subTest(msg=f'using method {callback.__qualname__}()'): - self.check_bond_exceptions(callback) + self.check_bond_exceptions(callback,pmb) - def check_bond_exceptions(self, callback): + def check_bond_exceptions(self, callback, pmb): # check exceptions for unknown bond types bond_type = 'Quartic' bond = {'r_0' : 0.4 * pmb.units.nm, @@ -315,6 +321,26 @@ def check_bond_exceptions(self, callback): np.testing.assert_raises(ValueError, callback, **input_parameters) - + # test that redefining a bond produces a RunTimeError + if callback == pmb.define_bond: + test = {"bond_type":"FENE", + "bond_parameters":self.FENE_params, + "particle_pairs":[["Y","Y"],["Y","Y"]]} + + np.testing.assert_raises(RuntimeError, + pmb.define_bond, + **test) + + def test_sanity_get_bond_template(self): + """ + tests the sanity test for "get_bond_template" + """ + pmb = pyMBE.pymbe_library(51) + inputs = {"particle_name1": "A", + "particle_name2": "A"} + np.testing.assert_raises(ValueError, + pmb.get_bond_template, + **inputs) + if __name__ == '__main__': ut.main() diff --git a/testsuite/charge_number_map_tests.py b/testsuite/charge_number_map_tests.py index 6f9ddd9..eb0551a 100644 --- a/testsuite/charge_number_map_tests.py +++ b/testsuite/charge_number_map_tests.py @@ -69,6 +69,8 @@ def test_basic_particle(self): 1) self.assertEqual(charge_map[type_map["B"]], 0) + + if __name__ == '__main__': ut.main() \ No newline at end of file diff --git a/testsuite/create_molecule_position_test.py b/testsuite/create_molecule_position_test.py index 8b3e210..f6edb52 100644 --- a/testsuite/create_molecule_position_test.py +++ b/testsuite/create_molecule_position_test.py @@ -153,6 +153,7 @@ def test_sanity_center_object_in_simulation_box(self): self.assertRaises(ValueError, pmb.center_object_in_simulation_box, **input_parameters) + if __name__ == "__main__": diff --git a/testsuite/define_and_create_molecules_unit_tests.py b/testsuite/define_and_create_molecules_unit_tests.py index 6c3ad2c..fc8699e 100644 --- a/testsuite/define_and_create_molecules_unit_tests.py +++ b/testsuite/define_and_create_molecules_unit_tests.py @@ -444,6 +444,24 @@ def test_create_and_delete_particles(self): # There should be only 8 particles (from the remaining M2 molecule) self.assertEqual(first=len(pmb.get_instances_df(pmb_type="particle")), second=8) + + def test_set_particle_initial_state(self): + """ + Unit tests for set_particle_initial_state + """ + pmb = pyMBE.pymbe_library(23) + pmb.define_particle(name="A", + sigma=1*pmb.units.nm, + epsilon=1*pmb.units.reduced_energy, + z=1) + state_1 = pmb.db.get_template(pmb_type="particle", name="A").initial_state + pmb.set_particle_initial_state(particle_name="A", + state_name="random") + state_2 = pmb.db.get_template(pmb_type="particle", name="A").initial_state + self.assertIsNot(state_1, + state_2) + self.assertEqual(state_2, + "random") def test_get_radius_map(self): """ @@ -468,7 +486,13 @@ def test_get_radius_map(self): self.assertEqual(first=isinstance(pmb.get_radius_map()[0],float), second=True) self.assertEqual(first=pmb.get_radius_map(dimensionless=False)[0].dimensionality, - second=pmb.units.nm.dimensionality) + second=pmb.units.nm.dimensionality) + + # Test the sanity test + pmb2 = pyMBE.pymbe_library(24) + empty_map = pmb2.get_radius_map() + self.assertEqual(empty_map, + {}) if __name__ == "__main__": ut.main() \ No newline at end of file diff --git a/testsuite/globular_protein_unit_tests.py b/testsuite/globular_protein_unit_tests.py index 8feece0..39488c5 100644 --- a/testsuite/globular_protein_unit_tests.py +++ b/testsuite/globular_protein_unit_tests.py @@ -18,8 +18,8 @@ import numpy as np import espressomd import unittest as ut - -import re +import tempfile +import os import json import pathlib import pyMBE @@ -386,6 +386,38 @@ def test_define_peptide_sanity(self): np.testing.assert_raises(ValueError, pmb.define_peptide, **input_parameters) + + def test_read_protein_vtf_unknown_residue(self): + pmb = pyMBE.pymbe_library(23) + vtf_content = """\ +atom 1 name CA resname XXX resid 1 chain A radius 1.0 +1 0.0 0.0 0.0 +""" + with tempfile.NamedTemporaryFile("w", delete=False) as f: + f.write(vtf_content) + filename = f.name + try: + with self.assertRaisesRegex(ValueError,"Unknown residue name 'XXX' in VTF file"): + pmb.read_protein_vtf(filename) + finally: + os.remove(filename) + + def test_read_protein_vtf_duplicate_particle_label(self): + pmb = pyMBE.pymbe_library(23) + vtf_content = """\ +atom 1 name CA resname ALA resid 1 chain A radius 1.0 +atom 2 name CA resname ALA resid 1 chain A radius 1.0 +1 0.0 0.0 0.0 +2 1.0 0.0 0.0 +""" + with tempfile.NamedTemporaryFile("w", delete=False) as f: + f.write(vtf_content) + filename = f.name + try: + with self.assertRaisesRegex(ValueError,"Duplicate particle label 'CA1'"): + pmb.read_protein_vtf(filename) + finally: + os.remove(filename) class TestGetResiduesFromTopologyDict(ut.TestCase): diff --git a/testsuite/lattice_builder.py b/testsuite/lattice_builder.py index 551ce7e..04e8037 100644 --- a/testsuite/lattice_builder.py +++ b/testsuite/lattice_builder.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024-2025 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -23,12 +23,21 @@ import matplotlib.pyplot as plt import pyMBE import espressomd +import pint matplotlib.use("Agg") # use a non-graphic backend -pmb = pyMBE.pymbe_library(seed=42) + +units = pint.UnitRegistry() + mpc = 4 -bond_l = 0.355 * pmb.units.nm +bond_l = 0.355 * units.nm + + + +diamond = pyMBE.lib.lattice.DiamondLattice(mpc, bond_l) +espresso_system = espressomd.System(box_l=[diamond.box_l] * 3) + # Define node particle NodeType1 = "node_type1" @@ -44,135 +53,126 @@ Res2 = "res_2" Res3 = "res_3" -# Defining bonds in the hydrogel for all different pairs -generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') -generic_bond_l = 0.355*pmb.units.nm -HARMONIC_parameters = {'r_0' : generic_bond_l, - 'k' : generic_harmonic_constant} + +def define_templates(pmb): + # Defining bonds in the hydrogel for all different pairs + generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') + generic_bond_l = 0.355*pmb.units.nm + HARMONIC_parameters = {'r_0' : generic_bond_l, + 'k' : generic_harmonic_constant} + pmb.define_particle(name=NodeType1, + sigma=0.355*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) + pmb.define_particle(name=NodeType2, + sigma=0.355*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) + pmb.define_particle(name=BeadType1, + sigma=0.355*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) + pmb.define_particle(name=BeadType2, + sigma=0.355*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) + pmb.define_particle(name=BeadType3, + sigma=0.355*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) + pmb.define_residue(name=Res1, + central_bead=BeadType1, + side_chains=[]) + pmb.define_residue(name=Res2, + central_bead=BeadType2, + side_chains=[]) + pmb.define_residue(name=Res3, + central_bead=BeadType3, + side_chains=[]) + pmb.define_bond(bond_type = 'harmonic', + bond_parameters = HARMONIC_parameters, particle_pairs = [[BeadType1, BeadType1], + [BeadType1, BeadType2], + [BeadType1, BeadType3], + [BeadType2, BeadType2], + [BeadType2, BeadType3], + [BeadType3, BeadType3], + [BeadType1, NodeType1], + [BeadType1, NodeType2], + [BeadType2, NodeType1], + [BeadType2, NodeType2], + [BeadType3, NodeType1], + [BeadType3, NodeType2]]) + + class Test(ut.TestCase): - colormap = { - "default_linker":"green", - "default_monomer":"blue", - Res3: "red", - NodeType2: "orange", - NodeType1: "cyan", - Res1: "yellow", - Res2: "magenta" - } + colormap = {"default_linker":"green", + "default_monomer":"blue", + Res3: "red", + NodeType2: "orange", + NodeType1: "cyan", + Res1: "yellow", + Res2: "magenta"} - @classmethod - def setUpClass(cls): - pmb.define_particle(name=NodeType1, sigma=0.355*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) - pmb.define_particle(name=NodeType2, sigma=0.355*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) - pmb.define_particle(name=BeadType1, sigma=0.355*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) - pmb.define_particle(name=BeadType2, sigma=0.355*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) - pmb.define_particle(name=BeadType3, sigma=0.355*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) - pmb.define_residue( - name=Res1, - central_bead=BeadType1, - side_chains=[] - ) - pmb.define_residue( - name=Res2, - central_bead=BeadType2, - side_chains=[] - ) - pmb.define_residue( - name=Res3, - central_bead=BeadType3, - side_chains=[] + def test_lattice_setup(self): + """ + Unit tests for the lattice builder module + """ + pmb = pyMBE.pymbe_library(42) + define_templates(pmb=pmb) + # --- Invalid low-level operations --- + with self.assertRaises(ValueError): + pmb._create_hydrogel_node("[1 1 1]", NodeType1, espresso_system) + + with self.assertRaises(ValueError): + pmb._create_hydrogel_chain( + "[0 0 0]", "[1 1 1]", + {0: [0, 0, 0], 1: diamond.box_l / 4.0 * np.ones(3)}, + espresso_system, ) - pmb.define_bond(bond_type = 'harmonic', - bond_parameters = HARMONIC_parameters, particle_pairs = [[BeadType1, BeadType1], - [BeadType1, BeadType2], - [BeadType1, BeadType3], - [BeadType2, BeadType2], - [BeadType2, BeadType3], - [BeadType3, BeadType3], - [BeadType1, NodeType1], - [BeadType1, NodeType2], - [BeadType2, NodeType1], - [BeadType2, NodeType2], - [BeadType3, NodeType1], - [BeadType3, NodeType2]]) - - def test_lattice_setup(self): - diamond = pyMBE.lib.lattice.DiamondLattice(mpc, bond_l) - espresso_system = espressomd.System(box_l = [diamond.box_l]*3) - np.testing.assert_raises(ValueError, - pmb._create_hydrogel_node, - "[1 1 1]", - NodeType1, - espresso_system) - np.testing.assert_raises(ValueError, - pmb._create_hydrogel_chain, - "[0 0 0]", "[1 1 1]", - {0:[0,0,0],1:diamond.box_l/4.0*np.ones(3)}, - espresso_system) + + # --- Lattice initialization --- lattice = pmb.initialize_lattice_builder(diamond) - sequence = [Res3, Res1, Res2, Res1] - # build default structure + assert len(lattice.nodes) == len(diamond.indices) - assert len(lattice.chains) == 0 - - # this function need some work - lattice.set_chain(node_start="[0 0 0]", node_end="[1 1 1]", - sequence=sequence) - np.testing.assert_equal(actual = lattice.get_chain("[0 0 0]", "[1 1 1]"), desired = sequence, verbose=True) - lattice.set_chain(node_start="[1 1 1]", node_end="[0 0 0]", - sequence=sequence) - np.testing.assert_equal(actual = lattice.get_chain("[1 1 1]", "[0 0 0]"), desired = sequence, verbose=True) - np.testing.assert_raises(RuntimeError, lattice.get_chain, "[1 1 1]", "[2 2 0]") + assert len(lattice.chains) == 0 + + # --- Default chains --- lattice.add_default_chains(mpc=2) - assert len(lattice.chains) == len(diamond.connectivity) + assert len(lattice.chains) == len(diamond.connectivity) - # define custom nodes + # --- Default node types --- assert lattice.get_node("[1 1 1]") == "default_linker" assert lattice.get_node("[0 0 0]") == "default_linker" - # Change default node type + + # --- Custom node assignment --- lattice.set_node(node="[1 1 1]", residue=NodeType1) lattice.set_node(node="[0 0 0]", residue=NodeType2) - np.testing.assert_equal(actual = lattice.get_node("[1 1 1]"), - desired = NodeType1, - verbose=True) - - np.testing.assert_equal(actual = lattice.get_node("[1 1 1]"), - desired = NodeType1, - verbose=True) - np.testing.assert_equal(actual = lattice.get_node("[0 0 0]"), - desired = NodeType2, - verbose=True) - np.testing.assert_equal(actual = lattice.get_node("[2 2 0]"), - desired = "default_linker", - verbose=True) - np.testing.assert_equal(actual = lattice.get_node("[3 1 3]"), - desired = "default_linker", - verbose=True) - - np.testing.assert_equal(actual = lattice.get_chain("[1 1 1]", "[0 0 0]"), - desired = sequence, - verbose=True) - np.testing.assert_equal(actual = lattice.get_chain("[0 0 0]", "[1 1 1]"), - desired = sequence[::-1], - verbose=True) - + + np.testing.assert_equal(lattice.get_node("[1 1 1]"), NodeType1) + np.testing.assert_equal(lattice.get_node("[0 0 0]"), NodeType2) + + # untouched nodes remain default + np.testing.assert_equal(lattice.get_node("[2 2 0]"), "default_linker") + np.testing.assert_equal(lattice.get_node("[3 1 3]"), "default_linker") + + # --- Colormap --- lattice.set_colormap(self.colormap) for index, (label, color) in enumerate(self.colormap.items()): - np.testing.assert_equal(actual = lattice.get_monomer_color(label),desired = color, verbose=True) - np.testing.assert_equal(actual = lattice.get_monomer_color_index(label),desired = index, verbose=True) + np.testing.assert_equal(lattice.get_monomer_color(label), color) + np.testing.assert_equal(lattice.get_monomer_color_index(label), index) - # Test invalid operations - with self.assertRaisesRegex(RuntimeError, "monomer 'unknown' has no associated color in the colormap"): + # --- Invalid colormap access --- + with self.assertRaisesRegex( + RuntimeError, "monomer 'unknown' has no associated color" + ): lattice.get_monomer_color("unknown") + with self.assertRaises(AssertionError): lattice.set_colormap("red") - # Test node operations - with self.assertRaisesRegex(AssertionError, r"node '\[0 5 13\]' doesn't exist in a diamond lattice"): + # --- Invalid node access --- + with self.assertRaisesRegex( + AssertionError, r"node '\[0 5 13\]' doesn't exist in a diamond lattice" + ): lattice.get_node("[0 5 13]") - # Test plot + # --- Plot smoke tests --- fig = plt.figure(figsize=(12, 12)) ax = fig.add_subplot(projection="3d", computed_zorder=False) lattice.draw_lattice(ax) @@ -181,19 +181,109 @@ def test_lattice_setup(self): fig = plt.figure(figsize=(12, 12)) ax = fig.add_subplot(projection="3d", computed_zorder=False) - lattice.set_colormap(self.colormap) lattice.draw_lattice(ax) lattice.draw_simulation_box(ax) ax.legend() plt.close(fig) - # Test edge case with strict mode deactivated - diamond_test = pyMBE.lib.lattice.DiamondLattice(mpc, bond_l) - lattice_test = pmb.initialize_lattice_builder(diamond_test) - lattice_test.strict = False - key, reverse = lattice_test._get_node_vector_pair("[1 1 1]", "[3 3 1]") - assert not reverse, "Expected reverse to be False in non-strict mode" - np.testing.assert_equal(actual=key, desired=(1,5)) + # Clean espresso system + espresso_system.part.clear() + + pmb2 = pyMBE.pymbe_library(23) + define_templates(pmb=pmb2) + diamond2 = pyMBE.lib.lattice.DiamondLattice(mpc, bond_l) + lattice = pmb2.initialize_lattice_builder(diamond2) + + sequence = [Res3, Res1, Res2, Res1] + node_a = "[0 0 0]" + node_b = "[1 1 1]" + + # 1. Define chain in forward direction + lattice.set_chain( + node_start=node_a, + node_end=node_b, + sequence=sequence + ) + + np.testing.assert_equal( + actual=lattice.get_chain(node_a, node_b), + desired=sequence, + verbose=True + ) + + # 2. Define chain explicitly in reverse direction + lattice.set_chain( + node_start=node_b, + node_end=node_a, + sequence=sequence + ) + + np.testing.assert_equal( + actual=lattice.get_chain(node_b, node_a), + desired=sequence, + verbose=True + ) + + # 3. Geometry-safe reversal: + # forward lookup returns reversed sequence + np.testing.assert_equal( + actual=lattice.get_chain(node_a, node_b), + desired=sequence[::-1], + verbose=True + ) + + # 4. Invalid chain lookup + with self.assertRaises(RuntimeError): + lattice.get_chain("[1 1 1]", "[2 2 0]") + + # 5. Non-strict mode reverse detection + lattice.strict = False + key, reverse = lattice._get_node_vector_pair("[1 1 1]", "[3 3 1]") + + assert not reverse, "Expected reverse=False in non-strict mode" + np.testing.assert_equal(actual=key, desired=(1, 5)) + + # 6. Coverage for reverse branch in _create_hydrogel_chain + # -------------------------------------------------------- + + + # Register molecule template and default_linker particle + pmb2.define_molecule(name="test_chain", + residue_list=sequence) + pmb2.define_particle(name="default_linker", + sigma=1*pmb2.units.reduced_length, + epsilon=1*pmb2.units.reduced_energy) + pmb2.define_default_bond(bond_type="harmonic", + bond_parameters={"r_0": 1*pmb2.units.reduced_length, + "k": 400 * pmb2.units('reduced_energy / reduced_length**2')}) + + # Define nodes dictionary as expected by _create_hydrogel_chain + nodes = {} + id = 0 + for label, index in lattice.node_labels.items(): + nodes[label] = {"name": lattice.get_node(label), + "pos": lattice.lattice.indices[index], + "id": id} + id +=1 + from pyMBE.storage.templates.hydrogel import HydrogelChain + # Define hydrogel chain template (reverse geometry) + hydrogel_chain = HydrogelChain(node_start=node_b, # reversed on purpose + node_end=node_a, + molecule_name="test_chain") + mol_id = pmb2._create_hydrogel_chain(hydrogel_chain=hydrogel_chain, + nodes=nodes, + espresso_system=espresso_system, + use_default_bond=True) + # Extract created particle IDs + chain_pids = pmb2.db._find_instance_ids_by_attribute(pmb_type="particle", + attribute="molecule_id", + value=mol_id) + # Extract residue sequence from particle instances + created_residues_id = [pmb2.db.get_instance("particle", pid).residue_id for pid in chain_pids] + created_residues = [pmb2.db.get_instance("residue", rid).name for rid in created_residues_id] + # Reverse branch MUST reverse the residue list + np.testing.assert_equal(actual=created_residues, desired=sequence[::-1], verbose=True) + if __name__ == "__main__": ut.main() diff --git a/testsuite/reaction_methods_unit_tests.py b/testsuite/reaction_methods_unit_tests.py index 8084efc..e573271 100644 --- a/testsuite/reaction_methods_unit_tests.py +++ b/testsuite/reaction_methods_unit_tests.py @@ -22,6 +22,9 @@ import espressomd import unittest as ut + + + def reaction_method_test_template(parameters): # Create an instance of the pyMBE library @@ -342,5 +345,56 @@ def test_grxmc_unified_setup(self): parameters["z_H"] = -1 reaction_method_test_template(parameters) + def test_mixed_setup(self): + """ + Unit test to check that setting up a reaction different than acid/base + does not break the setup of the cpH method + """ + + pmb = pyMBE.pymbe_library(23) + # Define the acidic particle + pmb.define_particle( + name = "A", + acidity = "acidic", + pka = 4, + sigma = 1*pmb.units('reduced_length'), + epsilon = 1*pmb.units('reduced_energy')) + + # Define the ions + pmb.define_particle( + name="Na", + z=1, + sigma = 1*pmb.units('reduced_length'), + epsilon = 1*pmb.units('reduced_energy')) + + pmb.define_particle( + name="Cl", + z=-1, + sigma = 1*pmb.units('reduced_length'), + epsilon = 1*pmb.units('reduced_energy')) + + input_parameters = {"c_salt_res":1 * pmb.units.mol/ pmb.units.L, + "salt_cation_name": "Na", + "salt_anion_name": "Cl", + "activity_coefficient": lambda x: 1.0} + + # Add the reactions using pyMBE + pmb.setup_gcmc(**input_parameters) + pmb.setup_cpH(counter_ion="Na", + constant_pH=7) + cpH_setup = {"pK": 4, + "reaction_type": "monoprotic_acid"} + gcmc_setup = {"pK": 3.1391280768992047, + "reaction_type": "ion_insertion"} + for reaction in pmb.db.get_reactions(): + if reaction.simulation_method == "cpH": + test_setup = cpH_setup.copy() + elif reaction.simulation_method == "GCMC": + test_setup = gcmc_setup.copy() + for key in test_setup.keys(): + self.assertAlmostEqual(test_setup[key], + getattr(reaction,key)) + + if __name__ == "__main__": ut.main() \ No newline at end of file diff --git a/testsuite/set_particle_acidity_test.py b/testsuite/set_particle_acidity_test.py index 28b8538..6adfdc2 100644 --- a/testsuite/set_particle_acidity_test.py +++ b/testsuite/set_particle_acidity_test.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024-2025 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -21,9 +21,15 @@ import pandas as pd import pyMBE import unittest as ut - +from pyMBE.storage.reactions.reaction import Reaction, ReactionParticipant # Create an instance of pyMBE library -pmb = pyMBE.pymbe_library(seed=42) + +participants = [ReactionParticipant(particle_name="A", + state_name="HA", + coefficient=1), + ReactionParticipant(particle_name="A", + state_name="A", + coefficient=1)] class Test(ut.TestCase): @@ -31,6 +37,7 @@ def test_inert_particles_setup(self): """ Test that an inert particle is correctly set up in the pyMBE database. """ + pmb = pyMBE.pymbe_library(seed=42) input_parameters={"name":"I", "acidity": pd.NA, "pka": pd.NA, @@ -50,6 +57,7 @@ def test_acidic_particles_setup(self): """ Test that an acidic particle is correctly set up in the pyMBE database. """ + pmb = pyMBE.pymbe_library(seed=42) input_parameters={"name":"A", "acidity": "acidic", "pka":4, @@ -73,36 +81,137 @@ def test_basic_particles_setup(self): """ Test that a basic particle is correctly set up in the pyMBE database. """ + pmb = pyMBE.pymbe_library(seed=42) input_parameters={"name":"B", - "acidity": "basic", - "pka":9, - "sigma": 1.0*pmb.units.reduced_length, - "epsilon": 1.0*pmb.units.reduced_energy} + "acidity": "basic", + "pka":9, + "sigma": 1.0*pmb.units.reduced_length, + "epsilon": 1.0*pmb.units.reduced_energy} pmb.define_particle(**input_parameters) protonated_state = pmb.db.get_template(name="BH", - pmb_type="particle_state") + pmb_type="particle_state") deprotonated_state = pmb.db.get_template(name="B", - pmb_type="particle_state") + pmb_type="particle_state") - self.assertEqual(protonated_state.name, "BH") - self.assertEqual(protonated_state.z, 1) - self.assertEqual(deprotonated_state.name, "B") - self.assertEqual(deprotonated_state.z, 0) - self.assertNotEqual(protonated_state.es_type, deprotonated_state.es_type) - pmb.db.delete_template(name="B", pmb_type="particle") - pmb.db.delete_template(name="BH", pmb_type="particle_state") - pmb.db.delete_template(name="B", pmb_type="particle_state") + self.assertEqual(protonated_state.name, + "BH") + self.assertEqual(protonated_state.z, + 1) + self.assertEqual(deprotonated_state.name, + "B") + self.assertEqual(deprotonated_state.z, + 0) + self.assertNotEqual(protonated_state.es_type, + deprotonated_state.es_type) + pmb.db.delete_template(name="B", + pmb_type="particle") + pmb.db.delete_template(name="BH", + pmb_type="particle_state") + pmb.db.delete_template(name="B", + pmb_type="particle_state") def test_sanity_acidity(self): """ Unit tests to check that define_monoprototic_acidbase_reaction raises ValueErrors when expected. """ + pmb = pyMBE.pymbe_library(seed=42) # Check that define_monoprototic_acidbase_reaction raises a ValueError if a non-supported acidity is provided input_parametersA={"particle_name":"A", "acidity": "random", "pka":4,} - self.assertRaises(ValueError, pmb.define_monoprototic_acidbase_reaction,**input_parametersA) + self.assertRaises(ValueError, + pmb.define_monoprototic_acidbase_reaction, + **input_parametersA) + # Check that define_monoprototic_particle_states raises a ValueError if a non-supported acidity is provided + input_parametersA={"particle_name":"A", + "acidity": "random",} + self.assertRaises(ValueError, + pmb.define_monoprototic_particle_states, + **input_parametersA) + + def test_get_pka_set_empty(self): + """ + Unit test to check that get_pka_set() returns an empty dict if no reactions have been defined + """ + pmb = pyMBE.pymbe_library(seed=42) + pka_set = pmb.get_pka_set() + self.assertEqual(pka_set, + {}) + + def test_get_pka_set_monoprotic_acid(self): + """ + Unit test to check that get_pka_set() returns the right output for a monoprotic acid + """ + pmb = pyMBE.pymbe_library(seed=42) + reaction = Reaction(reaction_type="monoprotic_acid", + pK=4.5, + particle_name="A", + participants=participants) + pmb.db._reactions["r1"] = reaction + pka_set = pmb.get_pka_set() + expected = {"A": {"pka_value": 4.5, + "acidity": "acidic"}} + self.assertEqual(pka_set, expected) + + def test_get_pka_set_monoprotic_base(self): + """ + Unit test to check that get_pka_set() returns the right output for a monoprotic base + """ + pmb = pyMBE.pymbe_library(seed=42) + reaction = Reaction(reaction_type="monoprotic_base", + pK=9.2, + particle_name="A", + participants=participants) + pmb.db._reactions["r1"] = reaction + pka_set = pmb.get_pka_set() + expected = {"A": {"pka_value": 9.2, + "acidity": "basic"}} + self.assertEqual(pka_set, expected) + + def test_get_pka_set_unsupported_reaction_skipped(self): + """ + Unit test to check that get_pka_set() ignores unsupported reactions + """ + pmb = pyMBE.pymbe_library(seed=42) + supported = Reaction(reaction_type="monoprotic_acid", + pK=5.0, + particle_name="A", + participants=participants) + unsupported = Reaction(reaction_type="redox", + pK=1.0, + particle_name="X", + participants=participants) + + pmb.db._reactions["r1"] = supported + pmb.db._reactions["r2"] = unsupported + + pka_set = pmb.get_pka_set() + + self.assertEqual(len(pka_set), 1) + self.assertIn("A", pka_set) + self.assertNotIn("X", pka_set) + + + def test_get_pka_set_duplicate_particle_raises(self): + """ + Checks the sanity test for particles involved in multiple reactions + """ + pmb = pyMBE.pymbe_library(seed=42) + r1 = Reaction(reaction_type="monoprotic_acid", + pK=4.0, + particle_name="A", + participants=participants) + r2 = Reaction(reaction_type="monoprotic_base", + pK=9.0, + particle_name="A", + participants=participants) + + pmb.db._reactions["r1"] = r1 + pmb.db._reactions["r2"] = r2 + + with self.assertRaisesRegex(ValueError, "Multiple acid/base reactions found for particle 'A'"): + pmb.get_pka_set() if __name__ == "__main__": ut.main() diff --git a/testsuite/test_io_database.py b/testsuite/test_io_database.py index 1cedef3..991a7ab 100644 --- a/testsuite/test_io_database.py +++ b/testsuite/test_io_database.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024-2026 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -19,12 +19,16 @@ import tempfile import espressomd import pandas as pd -import numpy as np import unittest as ut +import json +import os import pyMBE from pyMBE.lib.lattice import DiamondLattice import pyMBE.lib.handy_functions as hf - +from pyMBE.storage.io import _decode +from pyMBE.storage.io import _encode +from pyMBE.storage.pint_quantity import PintQuantity +from pyMBE.storage.instances.bond import BondInstance espresso_system=espressomd.System (box_l = [100]*3) @@ -417,8 +421,166 @@ def test_io_instances(self): pmb.delete_instances_in_system(espresso_system=espresso_system, instance_id=protid, pmb_type="protein") + + def test_database_io_exceptions(self): + """ + Unit test to check exceptions in the io of the pyMBE database + """ + pmb = pyMBE.pymbe_library(51) + inputs = {"folder": "test", + "format": "random"} + self.assertRaises(ValueError, + pmb.load_database, + **inputs) + self.assertRaises(ValueError, + pmb.save_database, + **inputs) + + def test_decode_edge_cases(self): + """ + Tests the edge cases in the IO decoder of the database + """ + self.assertIsNone(_decode(None)) + self.assertIsNone(_decode(float("nan"))) + self.assertIsNone(_decode("")) + self.assertIsNone(_decode("nan")) + + # malformed JSON → fallback to raw string + self.assertEqual(_decode("{not:json}"), "{not:json}") + + # already-native types + self.assertEqual(_decode({"a": 1}), {"a": 1}) + self.assertEqual(_decode([1, 2]), [1, 2]) + self.assertEqual(_decode(3), 3) + + value = 3.14159 + result = _decode(value) + + self.assertIsInstance(result, float) + self.assertEqual(result, value) + + value = (1, 2, 3) # tuple is not dict, list, int, bool, float, or str + result = _decode(value) + self.assertIsNone(result) + + def test_encode_edge_cases(self): + """ + Tests the edge cases in the IO encoder of the database + """ + self.assertEqual(_encode(None), "") + + pq = PintQuantity(magnitude=3.0, units="nm", dimension="length") + encoded = _encode(pq) + self.assertIsInstance(encoded, str) + self.assertIn("magnitude", encoded) + + class Dummy: + def __str__(self): + return "dummy" + + self.assertEqual(_encode(Dummy()), json.dumps("dummy")) + + def test_load_empty_database_folder(self): + """ + Tests that an empty folder does not populate the pyMBE database + """ + new_pmb = pyMBE.pymbe_library(2) + with tempfile.TemporaryDirectory() as tmp: + new_pmb.load_database(tmp) + # database should remain empty + self.assertEqual(len(new_pmb.db._templates), 0) + self.assertEqual(len(new_pmb.db._instances), 0) + self.assertEqual(len(new_pmb.db._reactions), 0) + + def test_partial_database_files(self): + """ + Test that the database does not break if a file is missing + """ + pmb = pyMBE.pymbe_library(1) + pmb.define_particle(name="X", + sigma=1*pmb.units.reduced_length, + epsilon=1*pmb.units.reduced_energy) + with tempfile.TemporaryDirectory() as tmp: + pmb.save_database(tmp) + # manually delete one CSV + os.remove(os.path.join(tmp, "templates_particle.csv")) + new_pmb = pyMBE.pymbe_library(2) + new_pmb.load_database(tmp) + # particle templates missing, but no crash + self.assertTrue(new_pmb.get_templates_df("particle").empty) + + def test_metadata_roundtrip(self): + """ + Test that covers: + - happy path of metadata reading + - return value of load_database + """ + pmb = pyMBE.pymbe_library(1) + with tempfile.TemporaryDirectory() as tmp: + pmb.save_database(tmp) + meta = {"creator": "test", "version": 1} + with open(os.path.join(tmp, "metadata.json"), "w") as f: + json.dump(meta, f) + new_pmb = pyMBE.pymbe_library(2) + loaded_meta = new_pmb.load_database(tmp) + self.assertEqual(loaded_meta, meta) + + def test_invalid_metadata_file(self): + """ + Covers handling of broken metadata files + """ + pmb = pyMBE.pymbe_library(1) + + with tempfile.TemporaryDirectory() as tmp: + pmb.save_database(tmp) + with open(os.path.join(tmp, "metadata.json"), "w") as f: + f.write("not json") + + new_pmb = pyMBE.pymbe_library(2) + meta = new_pmb.load_database(tmp) + + self.assertEqual(meta, {}) + + def test_default_bond_particle_names(self): + """ + Test io for default bonds + """ + pmb = pyMBE.pymbe_library(1) + pmb.define_default_bond(bond_type="FENE", bond_parameters={'r_0' : 0.5 * pmb.units.nm, + 'k' : 500 * pmb.units('reduced_energy / reduced_length**2'), + 'd_r_max': 0.5 * pmb.units.nm}) + + with tempfile.TemporaryDirectory() as tmp: + pmb.save_database(tmp) + new_pmb = pyMBE.pymbe_library(2) + new_pmb.load_database(tmp) + + df = new_pmb.get_templates_df("bond") + self.assertTrue(df["particle_name1"].isna().any()) + + def test_non_sequential_instance_ids(self): + """ + Tests that IDs are not implicitly re-indexed. + """ + pmb = pyMBE.pymbe_library(1) + pmb.db._instances["bond"] = {10: BondInstance(name="b", bond_id=10, particle_id1=1, particle_id2=2), + 42: BondInstance(name="b", bond_id=42, particle_id1=3, particle_id2=4)} + with tempfile.TemporaryDirectory() as tmp: + pmb.save_database(tmp) + new_pmb = pyMBE.pymbe_library(2) + new_pmb.load_database(tmp) + self.assertSetEqual(set(new_pmb.db._instances["bond"].keys()), {10, 42},) + + def test_load_database_missing_folder(self): + """ + Tests that that the io raises an error if the folder does not exist + """ + pmb = pyMBE.pymbe_library(1) + with self.assertRaises(FileNotFoundError): + pmb.load_database("does_not_exist") + if __name__ == '__main__': ut.main() From 1879159b62f5b6bd1a2aa46aeb2b071ef43cfc18 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Fri, 30 Jan 2026 09:29:50 +0100 Subject: [PATCH 40/55] add missing test --- testsuite/database_unit_tests.py | 138 +++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 testsuite/database_unit_tests.py diff --git a/testsuite/database_unit_tests.py b/testsuite/database_unit_tests.py new file mode 100644 index 0000000..fa0c513 --- /dev/null +++ b/testsuite/database_unit_tests.py @@ -0,0 +1,138 @@ +# +# Copyright (C) 2026 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import unittest as ut +import pyMBE +from pyMBE.storage.instances.particle import ParticleInstance +from pyMBE.storage.instances.residue import ResidueInstance +from pyMBE.storage.instances.molecule import MoleculeInstance +from pyMBE.storage.instances.peptide import PeptideInstance +from pyMBE.storage.instances.protein import ProteinInstance +from pyMBE.storage.instances.bond import BondInstance +from pyMBE.storage.instances.hydrogel import HydrogelInstance +from pyMBE.storage.templates.bond import BondTemplate +from pyMBE.storage.pint_quantity import PintQuantity +import pint + +class Test(ut.TestCase): + def test_sanity_db(self): + """ + Sanity tests for the pyMBE database + """ + pmb = pyMBE.pymbe_library(23) + pmb.define_molecule(name ="test", + residue_list=[]) + pmb.define_peptide(name="test", + sequence="", + model="1beadAA") + inputs = {"name": "test", + "allowed_types": {"molecule", "peptide"}} + self.assertRaises(ValueError, + pmb._get_template_type, + **inputs) + + def test_instance_id_validators(self): + """ + Tests that negative values of instances raise a ValueError in the pyMBE database + """ + inputs = {"name":"A", + "particle_id":-1, + "initial_state":"A"} + self.assertRaises(ValueError, + ParticleInstance, + **inputs) + inputs = {"name":"A", + "residue_id":-1} + self.assertRaises(ValueError, + ResidueInstance, + **inputs) + inputs = {"name":"A", + "molecule_id":-1} + self.assertRaises(ValueError, + MoleculeInstance, + **inputs) + inputs = {"name":"A", + "molecule_id":-1} + self.assertRaises(ValueError, + PeptideInstance, + **inputs) + inputs = {"name":"A", + "molecule_id":-1} + self.assertRaises(ValueError, + ProteinInstance, + **inputs) + inputs = {"name":"A", + "assembly_id":-1} + self.assertRaises(ValueError, + HydrogelInstance, + **inputs) + inputs = {"name":"A", + "bond_id":-1, + "particle_id1":1, + "particle_id2":2} + self.assertRaises(ValueError, + BondInstance, + **inputs) + inputs = {"name":"A", + "bond_id":1, + "particle_id1":-1, + "particle_id2":2} + self.assertRaises(ValueError, + BondInstance, + **inputs) + inputs = {"name":"A", + "bond_id":1, + "particle_id1":1, + "particle_id2":-2} + self.assertRaises(ValueError, + BondInstance, + **inputs) + + def test_make_name_bond_template(self): + inputs = {"bond_type": "harmonic", + "parameters": {"r": PintQuantity(magnitude=1, + units="nm", + dimension="length")}} + bond_tpl = BondTemplate(**inputs) + self.assertRaises(RuntimeError, + bond_tpl._make_name) + + def test_exceptions_pint_quantity(self): + units = pint.UnitRegistry() + inputs = {"q":1, + "expected_dimension": "length", + "ureg": units} + self.assertRaises(TypeError, + PintQuantity.from_quantity, + **inputs) + inputs = {"q":1*units.nm, + "expected_dimension": "unknown", + "ureg": units} + self.assertRaises(ValueError, + PintQuantity.from_quantity, + **inputs) + inputs = {"q":1*units.nm**2, + "expected_dimension": "length", + "ureg": units} + self.assertRaises(ValueError, + PintQuantity.from_quantity, + **inputs) + + +if __name__ == '__main__': + ut.main() \ No newline at end of file From af7e7601a6ecadab79587c545da5b0c9033018b4 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Fri, 30 Jan 2026 13:04:32 +0100 Subject: [PATCH 41/55] increase coverage manager io, fix bug in lj --- pyMBE/pyMBE.py | 8 +- pyMBE/storage/pint_quantity.py | 6 +- pyMBE/storage/templates/lj.py | 2 +- testsuite/database_unit_tests.py | 58 +++++++++- testsuite/test_io_database.py | 193 ++++++++++++++++++++++++++++++- 5 files changed, 257 insertions(+), 10 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 218d921..559436b 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -3173,6 +3173,10 @@ def setup_lj_interactions(self, espresso_system, shift_potential=True, combining from itertools import combinations_with_replacement particle_templates = self.db.get_templates("particle") shift = "auto" if shift_potential else 0 + if shift == "auto": + shift_tpl = shift + else: + shift_tpl = PintQuantity(magnitude=shift*self.units.reduced_length,units=self.units,dimension="length") # Get all particle states registered in pyMBE state_entries = [] for tpl in particle_templates.values(): @@ -3194,6 +3198,7 @@ def setup_lj_interactions(self, espresso_system, shift_potential=True, combining cutoff=lj_parameters["cutoff"].to("reduced_length").magnitude, offset=lj_parameters["offset"].to("reduced_length").magnitude, shift=shift) + lj_template = LJInteractionTemplate(state1=state1.name, state2=state2.name, sigma=PintQuantity.from_quantity(q=lj_parameters["sigma"], @@ -3208,5 +3213,6 @@ def setup_lj_interactions(self, espresso_system, shift_potential=True, combining offset=PintQuantity.from_quantity(q=lj_parameters["offset"], expected_dimension="length", ureg=self.units), - shift=shift) + shift=shift_tpl) self.db._register_template(lj_template) + diff --git a/pyMBE/storage/pint_quantity.py b/pyMBE/storage/pint_quantity.py index 4da1d1e..3491dc8 100644 --- a/pyMBE/storage/pint_quantity.py +++ b/pyMBE/storage/pint_quantity.py @@ -81,17 +81,13 @@ def from_quantity(cls, q, expected_dimension, ureg): if not q.check(rep_unit): raise ValueError(f"Quantity {q} does not have expected dimension '{expected_dimension}'") except Exception as e: - # If check fails because registries differ, try converting via string (best-effort) + # If check fails because registries differ raise # Use the dimension representative unit rep_unit_str = _DIMENSION_REPRESENTATIVE[expected_dimension] rep_unit = ureg(rep_unit_str) - # Validate dimensionality - if not q.check(rep_unit): - raise ValueError(f"Quantity {q} does not match expected dimension '{expected_dimension}'") - # Convert to the representative SI unit q_base = q.to(rep_unit) diff --git a/pyMBE/storage/templates/lj.py b/pyMBE/storage/templates/lj.py index c8c34d2..9c5c4f8 100644 --- a/pyMBE/storage/templates/lj.py +++ b/pyMBE/storage/templates/lj.py @@ -65,7 +65,7 @@ class LJInteractionTemplate(BaseModel): epsilon: PintQuantity cutoff: PintQuantity offset: PintQuantity - shift: str | float + shift: str | PintQuantity @classmethod def _make_name(cls, state1: str, state2: str) -> str: diff --git a/testsuite/database_unit_tests.py b/testsuite/database_unit_tests.py index fa0c513..3409b18 100644 --- a/testsuite/database_unit_tests.py +++ b/testsuite/database_unit_tests.py @@ -27,6 +27,7 @@ from pyMBE.storage.instances.hydrogel import HydrogelInstance from pyMBE.storage.templates.bond import BondTemplate from pyMBE.storage.pint_quantity import PintQuantity +from pyMBE.storage.reactions.reaction import Reaction, ReactionParticipant import pint class Test(ut.TestCase): @@ -133,6 +134,61 @@ def test_exceptions_pint_quantity(self): PintQuantity.from_quantity, **inputs) - + def test_exceptions_reaction_template(self): + """ + Tests sanity of the Reaction template + """ + # Reactions with less than 2 participants trigger a value error + inputs = {"participants":[ReactionParticipant(particle_name="A", + state_name="A", + coefficient=1)], + "pK":1, + "reaction_type":"test"} + + self.assertRaises(ValueError, + Reaction, + **inputs) + # Reactions with a participant with a 0 stechiometric coeff. trigger a value error + inputs = {"participants":[ReactionParticipant(particle_name="A", + state_name="A", + coefficient=0), + ReactionParticipant(particle_name="B", + state_name="B", + coefficient=1)], + "pK":1, + "reaction_type":"test"} + + self.assertRaises(ValueError, + Reaction, + **inputs) + # Reactions with a participant with a 0 stechiometric coeff. triggers a ValueError + inputs = {"participants":[ReactionParticipant(particle_name="A", + state_name="A", + coefficient=0), + ReactionParticipant(particle_name="B", + state_name="B", + coefficient=1)], + "pK":1, + "reaction_type":"test"} + + self.assertRaises(ValueError, + Reaction, + **inputs) + # Adding a new participant with a 0 stechiometric coeff. triggers a ValueError + react_tpl =Reaction(participants=[ReactionParticipant(particle_name="A", + state_name="A", + coefficient=-1), + ReactionParticipant(particle_name="B", + state_name="B", + coefficient=1)], + pK=1, + reaction_type="test") + inputs={"particle_name": "C", + "state_name":"C", + "coefficient":0} + self.assertRaises(ValueError, + react_tpl.add_participant, + **inputs) + if __name__ == '__main__': ut.main() \ No newline at end of file diff --git a/testsuite/test_io_database.py b/testsuite/test_io_database.py index 991a7ab..bb99577 100644 --- a/testsuite/test_io_database.py +++ b/testsuite/test_io_database.py @@ -25,15 +25,202 @@ import pyMBE from pyMBE.lib.lattice import DiamondLattice import pyMBE.lib.handy_functions as hf -from pyMBE.storage.io import _decode -from pyMBE.storage.io import _encode +from pyMBE.storage.io import _decode, _encode, _load_database_csv, _save_database_csv from pyMBE.storage.pint_quantity import PintQuantity from pyMBE.storage.instances.bond import BondInstance +from pyMBE.storage.templates.bond import BondTemplate +from pathlib import Path +import csv + espresso_system=espressomd.System (box_l = [100]*3) +class DummyDB: + def __init__(self): + self._templates = {} + self._instances = {} + self._reactions = {} + class Test(ut.TestCase): + def test_instance_fallback_model_dump_failure(self): + class BadInstance: + name = "bad_inst" + def model_dump(self): + raise RuntimeError("boom") + db = DummyDB() + db._templates = {} + db._instances["weird"] = {"x": BadInstance()} + db._reactions = {} + with tempfile.TemporaryDirectory() as tmp: + _save_database_csv(db, tmp) + + text = Path(tmp, "instances_weird.csv").read_text() + self.assertIn("bad_inst", text) + + def test_template_fallback_model_dump_failure(self): + class BadTemplate: + name = "bad" + def model_dump(self): + raise RuntimeError("boom") + db = DummyDB() + db._templates["weird"] = {"bad": BadTemplate()} + db._instances = {} + db._reactions = {} + with tempfile.TemporaryDirectory() as tmp: + _save_database_csv(db, tmp) + text = Path(tmp, "templates_weird.csv").read_text() + self.assertIn("bad", text) + + def test_bond_scalar_parameter_serialization(self): + """ + Tests the bond serilization + """ + db = DummyDB() + bond = BondTemplate(name="b1", + bond_type="harmonic", + particle_name1=None, + particle_name2=None, + parameters={"k": PintQuantity(magnitude=24, + units="kilojoule / units.nm**2", + dimension="energy/length**2")}) + db._templates["bond"] = {"b1": bond} + db._instances = {} + db._reactions = {} + with tempfile.TemporaryDirectory() as tmp: + _save_database_csv(db, tmp) + text = Path(tmp, "templates_bond.csv").read_text() + self.assertIn('""k"":{""magnitude"":24,""units"":""kilojoule / units.nm**2"",""dimension"":""energy/length**2""}', text) + + def test_invalid_metadata_json(self): + """ + Tests that invalid metadata files in the database are ignored + """ + with tempfile.TemporaryDirectory() as tmp: + folder = Path(tmp) + (folder / "metadata.json").write_text('"this is a string"') + db = DummyDB() + metadata = _load_database_csv(db, folder) + self.assertEqual(metadata, {}) + + def test_lj_shift_as_pint_quantity(self): + """ + Tests a LJ shift as a pint quantity + """ + sigma = {"magnitude": 1.0, + "units": "nm", + "dimension": "[length]"} + cutoff = {"magnitude": 1.0, + "units": "nm", + "dimension": "[length]"} + offset = {"magnitude": 1.0, + "units": "nm", + "dimension": "[length]"} + epsilon = {"magnitude": 1.0, + "units": "J", + "dimension": "[energy]"} + shift = {"magnitude": 1.0, + "units": "nm", + "dimension": "[length]"} + + with tempfile.TemporaryDirectory() as tmp: + folder = Path(tmp) + with open(folder / "templates_lj.csv", "w", newline="") as f: + writer = csv.writer(f) + writer.writerow( + ["name", "state1", "state2", "sigma", "epsilon", "cutoff", "offset", "shift"] + ) + writer.writerow([ + "lj1", "A", "B", + json.dumps(sigma), + json.dumps(epsilon), + json.dumps(cutoff), + json.dumps(offset), + json.dumps(shift), + ]) + + + db = DummyDB() + _load_database_csv(db, folder) + + lj = db._templates["lj"]["A-B"] + self.assertIsInstance(lj.shift, PintQuantity) + + def test_bond_template_scalar_parameter(self): + """ + Tests Bond template with non-PintQuantity parameter + """ + with tempfile.TemporaryDirectory() as tmp: + folder = Path(tmp) + (folder / "templates_bond.csv").write_text( + "name,bond_type,parameters\n" + 'b1,harmonic,"{""k"":{""magnitude"":24,""units"":""kilojoule / nm**2"",""dimension"":""energy/length**2""}}"\n' + ) + + db = DummyDB() + _load_database_csv(db, folder) + + bond = db._templates["bond"]["b1"] + self.assertEqual(bond.parameters["k"].magnitude, 24) + + + def test_lists_string_coerced_to_list(self): + """ + Tests that string lists returned by the encoder are parsed back to list properly + """ + with tempfile.TemporaryDirectory() as tmp: + folder = Path(tmp) + + (folder / "templates_residue.csv").write_text( + "name,central_bead,side_chains\n" + "RES1,BB,XYZ\n" + ) + + db = DummyDB() + _load_database_csv(db, folder) + + tpl = db._templates["residue"]["RES1"] + self.assertEqual(tpl.side_chains, ["X", "Y", "Z"]) + + with tempfile.TemporaryDirectory() as tmp: + folder = Path(tmp) + (folder / "templates_molecule.csv").write_text("name,residue_list\n" + "MOL1,ABC\n") + + db = DummyDB() + _load_database_csv(db, folder) + + tpl = db._templates["molecule"]["MOL1"] + self.assertEqual(tpl.residue_list, ["A", "B", "C"]) + + with tempfile.TemporaryDirectory() as tmp: + folder = Path(tmp) + + (folder / "templates_peptide.csv").write_text( + "name,model,residue_list,sequence\n" + "PEP1,CG,XYZ,XYZ\n" + ) + + db = DummyDB() + _load_database_csv(db, folder) + + tpl = db._templates["peptide"]["PEP1"] + self.assertEqual(tpl.residue_list, ["X", "Y", "Z"]) + + with tempfile.TemporaryDirectory() as tmp: + folder = Path(tmp) + + (folder / "templates_protein.csv").write_text( + "name,model,residue_list,sequence\n" + "PROT1,CG,DEF,DEF\n" + ) + + db = DummyDB() + _load_database_csv(db, folder) + + tpl = db._templates["protein"]["PROT1"] + self.assertEqual(tpl.residue_list, ["D", "E", "F"]) + def test_io_particles_and_particle_states_templates(self): """ Checks that information in the pyMBE database about @@ -581,6 +768,8 @@ def test_load_database_missing_folder(self): with self.assertRaises(FileNotFoundError): pmb.load_database("does_not_exist") + + if __name__ == '__main__': ut.main() From ea0799af5ef23a995f57eaae57b2a5d01e147e04 Mon Sep 17 00:00:00 2001 From: Pablo Date: Wed, 4 Feb 2026 09:38:12 +0100 Subject: [PATCH 42/55] remove dead ends in database io --- pyMBE/storage/io.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py index 37b77e3..a316cd6 100644 --- a/pyMBE/storage/io.py +++ b/pyMBE/storage/io.py @@ -212,8 +212,6 @@ def _load_database_csv(db, folder): # if v is a dict, assume PintQuantity dict if isinstance(v, dict) and {"magnitude", "units", "dimension"}.issubset(v.keys()): parameters[k] = PintQuantity.from_dict(v) - else: - parameters[k] = v tpl = BondTemplate(name=row["name"], bond_type=row.get("bond_type", ""), particle_name1=None if particle_name1 == "" else particle_name1, @@ -398,9 +396,6 @@ def _save_database_csv(db, folder): for k, v in tpl.parameters.items(): if isinstance(v, PintQuantity): params_serial[k] = v.to_dict() - else: - # assume scalar serializable - params_serial[k] = v rows.append({"name": tpl.name, "particle_name1": tpl.particle_name1, "particle_name2": tpl.particle_name2, From 7aca4bb9cf569b24e88bf7852cd520c2de418299 Mon Sep 17 00:00:00 2001 From: Pablo Date: Wed, 4 Feb 2026 10:09:33 +0100 Subject: [PATCH 43/55] improve docs --- pyMBE/storage/manager.py | 305 +++++++++++++++++----------------- testsuite/test_io_database.py | 26 +-- 2 files changed, 159 insertions(+), 172 deletions(-) diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index 22dec53..c835c93 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -44,26 +44,31 @@ class Manager: """ The canonical database manager for pyMBE. - This class stores all templates, instances, and reactions in structured, - explicit dictionaries. + Attributes: + + _units ('pint.UnitRegistry'): + Pint unit registry used to reconstruct physical quantities from storage. + + _templates ('dict[str, dict[str, TemplateType]]'): + Templates indexed by type and name. + + _instances ('dict[str, dict[int, InstanceType]]'): + Instances indexed by type and id. - All I/O operations (CSV/JSON save/load) operate through DFManager. + _reactions ('dict[str, Reaction]'): + Chemical reactions keyed by reaction name. - Attributes - ---------- - ureg : UnitRegistry - Pint unit registry used to reconstruct physical quantities from storage. + _molecule_like_types ('list'): + List of pyMBE object types that belong to the 'molecule' category in the pyMBE hierarchy. - templates : dict[str, dict[str, TemplateType]] - Templates indexed by type and name. - Example: templates["particle"]["A"] → ParticleTemplate + _assembly_like_types ('list'): + List of pyMBE object types that belong to the 'assembly' category in the pyMBE hierarchy. - instances : dict[str, dict[int, InstanceType]] - Instances indexed by type and id. - Example: instances["particle"][5] → ParticleInstance + _pmb_types ('list'): + List of all supported pyMBE object types. - reactions : dict[str, Reaction] - Chemical reactions keyed by reaction name. + espresso_bond_instances ('dict[int,espressomd.interactions.BondedInteraction]'): + List of active instances of bonded interactions from ESPResSo. """ def __init__(self,units): @@ -71,13 +76,13 @@ def __init__(self,units): Initialize an empty structured database. Args: - ureg (UnitRegistry): Pint unit registry used to rebuild quantities. + units ('pint.UnitRegistry'): + Pint unit registry used to reconstruct physical quantities from storage. """ self._units = units self._templates: Dict[str, Dict[str, TemplateType]] = {} self._instances: Dict[str, Dict[int, InstanceType]] = {} self._reactions: Dict[str, Reaction] = {} - self._molecule_like_types = ["molecule", "peptide", "protein"] @@ -91,18 +96,18 @@ def _collect_particle_templates(self, name, pmb_type): template in the hierarchy. Args: - name (str): + name ('str'): Name of the current template being processed. - pmb_type (str): + + pmb_type ('str'): Type of the current template. Returns: - set[str]: + ('set[str]'): Set of particle template names reachable from the current template. Notes: - - Particle state templates are resolved to their parent particle - template. + - Particle state templates are resolved to their parent particle template. """ counts = defaultdict(int) if pmb_type == "particle": @@ -138,7 +143,8 @@ def _delete_bonds_of_particle(self, pid): Delete all bond instances involving a given particle instance. Args: - pid (int): The particle ID whose associated bonds should be deleted. + pid ('int'): + The particle ID whose associated bonds should be deleted. Notes: - If no `"bond"` instances are present in the database, the method @@ -174,12 +180,18 @@ def _find_instance_ids_by_attribute(self, pmb_type, attribute, value): matches the requested value. Args: - pmb_type (str): The pyMBE type to search within. - attribute (str): The attribute name to match on (e.g. "residue_id", "molecule_id"). - value: The attribute value to match. + pmb_type ('str'): + The pyMBE type to search within. + + attribute ('str'): + The attribute name to match on (e.g. "residue_id", "molecule_id"). + + value ('Any'): + The attribute value to match. Returns: - List[int]: IDs of matching instances. + ('List[int]'): + IDs of matching instances. """ if pmb_type not in self._instances: return [] @@ -194,26 +206,18 @@ def _find_instance_ids_by_name(self, pmb_type, name): Return the IDs of all instances of a given pyMBE type that use a specific template name. - This method inspects the instance registry stored under - ``self._instances[pmb_type]`` and collects all instance identifiers - whose ``instance.name`` matches the provided template name. - Args: - pmb_type (str): + pmb_type ('str'): The instance category to search within. - name (str): + name ('str'): The template name associated with the instances of interest. Returns: - list[int]: + 'list[int]': A list of instance IDs whose underlying template name matches ``name``. The list is empty if no such instances exist. - Examples: - >>> db._find_instance_ids_by_name("particle", "A") - [0, 3, 7][] - Notes: - Only exact name matches are considered. - This method does not validate whether the corresponding template @@ -235,21 +239,14 @@ def _find_template_types(self, name): with a given name. Args: - name (str): + name ('str'): The template name to search for. Returns: - list[str]: + ('list[str]'): A list of PMB types (e.g., ``["particle", "residue"]``) in which a template named ``name`` exists. The list is empty if no such template is found. - - Examples: - >>> db._find_template_types("A") - ["particle"] - - >>> db._find_template_types("nonexistent") - [] """ found = [] for pmb_type, group in self._templates.items(): @@ -263,12 +260,12 @@ def _get_instances_df(self, pmb_type): Returns a DataFrame containing all instance objects of a given pyMBE type. Args: - pmb_type (str): + pmb_type ('str'): The instance type to query. Must be a key in `self._instances`, such as `"particle"` or `"residue"`. Returns: - pandas.DataFrame: + ('pandas.DataFrame'): A DataFrame where each row corresponds to one registered instance of the specified PMB type. If no instances exist, an empty DataFrame is returned. @@ -319,7 +316,7 @@ def _get_reactions_df(self): Returns a DataFrame summarizing all registered chemical reactions. Returns: - pandas.DataFrame: + ('pandas.DataFrame'): A DataFrame where each row corresponds to one reaction. Notes: @@ -347,12 +344,12 @@ def _get_templates_df(self, pmb_type): Returns a DataFrame containing all template definitions of a PMB type. Args: - pmb_type (str): + pmb_type ('str'): The template type to query, e.g. `"particle"`, `"residue"`, `"molecule"`. Returns: - pandas.DataFrame: + ('pandas.DataFrame'): A DataFrame representing all templates of the given type. Particle templates expand to multiple rows, one per state. Empty DataFrame if no templates for that type exist. @@ -407,23 +404,16 @@ def _has_instance(self, pmb_type, instance_id): Check whether an instance with a given ID exists under a specific pyMBE type. Args: - pmb_type (str): + pmb_type ('str'): The instance category to search in. - instance_id (int): + instance_id ('int'): The unique identifier of the instance. Returns: - bool: + ('bool'): ``True`` if the instance exists in the given category, ``False`` otherwise. - - Examples: - >>> db._has_instance("particle", 3) - True - - >>> db._has_instance("nonexistent_type", 5) - ValueError """ if pmb_type not in self._instances: raise ValueError(f"Instance type '{pmb_type}' not found in the database.") @@ -435,14 +425,15 @@ def _has_template(self, pmb_type, name): Check whether a template with a given name exists within a specific pyMBE type. Args: - pmb_type (str): + pmb_type ('str'): The template category to search in (e.g. ``"particle"``, ``"bond"``, ``"molecule"``, ``"lj"``, etc.). - name (str): + + name ('str'): The template name to check for. Returns: - bool: + ('bool'): ``True`` if a template named ``name`` exists under ``pmb_type``; ``False`` otherwise. """ @@ -496,7 +487,8 @@ def _register_reaction(self, reaction): Register a chemical or physical reaction. Args: - reaction (Reaction): Reaction object. + reaction ('Reaction'): + Reaction template from the pyMBE database. """ if reaction.name in self._reactions: raise ValueError(f"Reaction '{reaction.name}' already exists.") @@ -508,7 +500,8 @@ def _register_template(self, template): Register a template. Args: - template: Any template object conforming to the pyMBE template models. + template ('TemplateType'): + Any template object conforming to the pyMBE template models. """ pmb_type = getattr(template, "pmb_type", None) @@ -546,13 +539,16 @@ def _update_instance(self, instance_id, pmb_type, attribute, value): ensuring database consistency. Args: - instance_id (Hashable): + instance_id ('int'): Unique identifier of the instance to update. - pmb_type (str): + + pmb_type ('str'): Instance category, such as ``"particle"`` or ``"residue"``. - attribute (str): + + attribute ('str'): Name of the field to update. - value (Any): + + value ('Any'): New value to assign to the specified attribute. Notes: @@ -588,26 +584,29 @@ def _propagate_id(self, root_type, root_id, attribute, value): Recursively updates an attribute (e.g., molecule_id, assembly_id) on an instance and all of its hierarchical descendants. - Supported relationships: - assembly → molecules → residues → particles - molecule → residues → particles - residue → particles - particle → (nothing) - Args: - root_type (str): + root_type ('str'): One of {"assembly", "molecule", "residue", "particle"}. - root_id (int): + + root_id ('int'): Instance ID of the root object to update. - attribute (str): + + attribute ('str'): The attribute to update (e.g., "molecule_id", "assembly_id"). - value: + + value ('Any'): The new value to assign. Returns: - list[int]: + ('list[int]'): A flat list of all instance IDs updated (including root). + Notes: + - Supported relationships: + assembly → molecules → residues → particles + molecule → residues → particles + residue → particles + particle → (nothing) """ updated = [] # Map each type to its own identity attribute @@ -668,18 +667,17 @@ def _update_reaction_participant(self, reaction_name, particle_name, state_name, Append a new participant to an existing reaction in the database. Args: - reaction_name (str): + reaction_name ('str'): Name of the reaction to be updated. - particle_name (str): + + particle_name ('str'): Name of the particle template participating in the reaction. - state_name (str): + + state_name ('str'): Specific state of the particle (e.g., protonation or charge state). - coefficient (int): - Stoichiometric coefficient for the new participant: - - ``coefficient < 0`` → reactant - - ``coefficient > 0`` → product - Zero is not allowed. + coefficient ('int'): + Stoichiometric coefficient for the new participant. """ if reaction_name not in self._reactions: raise ValueError(f"Reaction '{reaction_name}' not found in the pyMBE database.") @@ -694,12 +692,9 @@ def _propose_instance_id(self, pmb_type): """ Propose the next available id for a new TypeInstance. - If no instances of the given pmb_type exist, the proposed - identifier is ``0``. Otherwise, the next available integer after the - current maximum is returned. - Returns: - int: A non-negative integer that is not already used in the pyMBE database. + ('int'): + A non-negative integer that is not already used in the pyMBE database. Notes: - The method does not fill gaps; it always returns ``max + 1``. @@ -721,24 +716,25 @@ def delete_instance(self, pmb_type, instance_id, cascade=False): """ Delete an instance from the pyMBE database. - Supports cascade deletion through the hierarchy: - assembly → molecules → residues → particles → bonds - molecule → residues → particles → bonds - residue → particles → bonds - particle → bonds - bond → nothing - Args: - pmb_type (str): + pmb_type ('str'): Category of the instance (particle, residue, molecule, peptide, protein, hydrogel, bond). - instance_id (int): + + instance_id ('int'): Unique identifier of the instance. - cascade (bool): + + cascade ('bool'): If True, automatically delete dependent objects. + Notes: + - Supports cascade deletion through the hierarchy: + assembly → molecules → residues → particles → bonds + molecule → residues → particles → bonds + residue → particles → bonds + particle → bonds + bond → nothing """ - # ---- Basic checks ---- if pmb_type not in self._instances: raise ValueError(f"Instance type '{pmb_type}' not found.") @@ -834,10 +830,11 @@ def delete_instances(self, pmb_type, cascade=False): Remove all instances registered for a given pyMBE type. Args: - pmb_type (str): + pmb_type ('str'): Instance category (e.g. ``"particle"``, ``"residue"``, ``"molecule"``, ``"protein"``, ``"hydrogel"``). - cascade (bool): + + cascade ('bool'): If True, dependent objects are removed according to the pyMBE hierarchy rules. If False, deletion is forbidden when dependencies exist. @@ -862,7 +859,8 @@ def delete_reaction(self, reaction_name): Delete a reaction template from the pyMBE database. Args: - reaction_name (str): label identifying the reaction template in the database. + reaction_name ('str'): + label identifying the reaction template in the database. """ if reaction_name not in self._reactions: raise ValueError(f"Reaction '{reaction_name}' not found in the pyMBE database.") @@ -881,28 +879,28 @@ def delete_template(self, pmb_type, name): Delete a template from the pyMBE database. Args: - pmb_type (str): The template category. - name (str): The name of the template to delete. + pmb_type ('str'): + The template category. + + name ('str'): + The name of the template to delete. """ # Check template exists if pmb_type not in self._templates: raise ValueError(f"Template type '{pmb_type}' not found.") if name not in self._templates[pmb_type]: raise ValueError(f"Template '{name}' not found in type '{pmb_type}'.") - # Check if any instance depends on this template if pmb_type in self._instances: for inst in self._instances[pmb_type].values(): if getattr(inst, "name", None) == name: raise ValueError(f"Cannot delete template '{name}' from '{pmb_type}': Instance with ID {getattr(inst, pmb_type + '_id')} depends on it.") - # Delete del self._templates[pmb_type][name] # if it is a bond template delete also stored espresso bond instances if pmb_type == "bond": if name in self.espresso_bond_instances.keys(): del self.espresso_bond_instances[name] - # Delete empty groups if not self._templates[pmb_type]: del self._templates[pmb_type] @@ -912,7 +910,7 @@ def delete_templates(self, pmb_type): Remove all templates registered in the pyMBE database for a given pyMBE type. Args: - pmb_type (str): + pmb_type ('str'): Template category (e.g. ``"particle"``, ``"residue"``, ``"molecule"``, ``"hydrogel"``). @@ -931,18 +929,16 @@ def get_instance(self, pmb_type, instance_id): """ Retrieve a stored instance by type and instance_id. - Looks up an instance within the internal instance registry - (`self._instances`) using its pyMBE type (e.g., "particle", "residue", - "bond", ...) and its unique id. If the instance does not exist, - a `ValueError` is raised. - Args: - pmb_type (str): The instance pyMBE category. - name (str): The unique name of the template to retrieve. + pmb_type ('str'): + The instance pyMBE category. + + name ('str'): + The unique name of the template to retrieve. Returns: - InstanceType: The stored InstanceTemplate instance corresponding to the - provided type and name. + ('InstanceType'): + The stored InstanceTemplate instance corresponding to the provided type and name. """ if instance_id not in self._instances[pmb_type]: @@ -955,11 +951,11 @@ def get_instances(self, pmb_type): Return all instances registered for a given pyMBE type. Args: - pmb_type (str): + pmb_type ('str'): The pyMBE type (e.g. 'particle', 'residue', 'molecule', 'hydrogel'). Returns: - dict: + ('dict'): Mapping {instance_id: instance_object}. Returns an empty dict if no instances exist for the given type. """ @@ -970,10 +966,12 @@ def get_reaction(self, name): Retrieve a reaction stored in the pyMBE database by name. Args: - name ('str'): The unique id of the reaction to retrieve. + name ('str'): + The unique id of the reaction to retrieve. Returns: - 'Reaction': The stored reaction instance corresponding to the provided name. + 'Reaction': + The stored reaction instance corresponding to the provided name. """ if name not in self._reactions[name]: @@ -986,7 +984,8 @@ def get_reactions(self): Retrieve all reactions stored in the pyMBE database. Returns: - 'list of Reaction': List with all stored reaction instances. + ('list of Reaction'): + List with all stored reaction templates in the pyMBE database. """ return list(self._reactions.values()) @@ -996,18 +995,21 @@ def get_particle_templates_under(self, template_name, pmb_type=None, return_coun template by traversing the template hierarchy downward. Args: - template_name (str): + template_name ('str'): Name of the starting template. - pmb_type (str, optional): + + pmb_type ('str', optional): Type of the starting template. If not provided, the type is inferred from the database. In this case, the template name must be unique across all template types. - return_counts (bool, optional): + + return_counts ('bool', optional): If False (default), returns a set of unique particle template names. If True, returns a dictionary mapping particle template names to the number of times they appear in the hierarchy. + Returns: - set[str] or dict[str, int]: + ('set[str]' or 'dict[str, int]'): - If `return_counts=False`: unique particle template names - If `return_counts=True`: particle template multiplicities @@ -1032,19 +1034,16 @@ def get_template(self, pmb_type, name): """ Retrieve a stored template by type and name. - Looks up a template within the internal template registry - (`self._templates`) using its pyMBE type (e.g., "particle", "residue", - "bond", ...) and its unique name. If the template does not exist, - a `ValueError` is raised. - Args: - pmb_type (str): The template pyMBE category. - name (str): The unique id of the template to retrieve. + pmb_type ('str'): + The template pyMBE category. - Returns: - TemplateType: The stored template instance corresponding to the - provided type and name. + name ('str'): + The unique id of the template to retrieve. + Returns: + ('TemplateType'): + The stored template instance corresponding to the provided type and name. """ if pmb_type not in self._templates: raise ValueError(f"There are no {pmb_type} templates defined in the database") @@ -1059,11 +1058,11 @@ def get_templates(self, pmb_type): Return all templates registered for a given pyMBE type. Args: - pmb_type (str): + pmb_type ('str'): The pyMBE type (e.g. 'particle', 'residue', 'molecule', 'hydrogel'). Returns: - dict: + ('dict'): Mapping {template_name: template_instance}. Returns an empty dict if no templates exist for the given type. """ @@ -1075,7 +1074,7 @@ def get_es_types_map(self): defined for each state. Returns: - dict[str, int]: + ('dict[str, int]'): A dictionary mapping each particle state to its corresponding ESPResSo type. """ @@ -1093,13 +1092,15 @@ def get_particle_id_map(self, object_name): peptides, and assemblies. Args: - object_name (str): Name of the object. + object_name ('str'): + Name of the pyMBE object. Returns: - dict: {"all": [particle_ids], - "residue_map": {residue_id: [particle_ids]}, - "molecule_map": {molecule_id: [particle_ids]}, - "assembly_map": {assembly_id: [particle_ids]},} + ('dict'): + {"all": [particle_ids], + "residue_map": {residue_id: [particle_ids]}, + "molecule_map": {molecule_id: [particle_ids]}, + "assembly_map": {assembly_id: [particle_ids]},} """ # --- Determine object type by searching in the DB ------------------------ object_type = None diff --git a/testsuite/test_io_database.py b/testsuite/test_io_database.py index bb99577..e99a6b5 100644 --- a/testsuite/test_io_database.py +++ b/testsuite/test_io_database.py @@ -44,6 +44,9 @@ def __init__(self): class Test(ut.TestCase): def test_instance_fallback_model_dump_failure(self): + """ + Tests database behavior in failure of instance model dump + """ class BadInstance: name = "bad_inst" def model_dump(self): @@ -59,6 +62,9 @@ def model_dump(self): self.assertIn("bad_inst", text) def test_template_fallback_model_dump_failure(self): + """ + Tests database behavior in failure of template model dump + """ class BadTemplate: name = "bad" def model_dump(self): @@ -72,26 +78,6 @@ def model_dump(self): text = Path(tmp, "templates_weird.csv").read_text() self.assertIn("bad", text) - def test_bond_scalar_parameter_serialization(self): - """ - Tests the bond serilization - """ - db = DummyDB() - bond = BondTemplate(name="b1", - bond_type="harmonic", - particle_name1=None, - particle_name2=None, - parameters={"k": PintQuantity(magnitude=24, - units="kilojoule / units.nm**2", - dimension="energy/length**2")}) - db._templates["bond"] = {"b1": bond} - db._instances = {} - db._reactions = {} - with tempfile.TemporaryDirectory() as tmp: - _save_database_csv(db, tmp) - text = Path(tmp, "templates_bond.csv").read_text() - self.assertIn('""k"":{""magnitude"":24,""units"":""kilojoule / units.nm**2"",""dimension"":""energy/length**2""}', text) - def test_invalid_metadata_json(self): """ Tests that invalid metadata files in the database are ignored From 7af71fab303cd1131ce94d30a5950cd4ecefa921 Mon Sep 17 00:00:00 2001 From: Pablo Date: Wed, 4 Feb 2026 14:08:38 +0100 Subject: [PATCH 44/55] increase coverage of the database manager --- pyMBE/pyMBE.py | 4 +- pyMBE/storage/manager.py | 140 ++++++++---------------------- testsuite/database_unit_tests.py | 143 +++++++++++++++++++++++++++++++ testsuite/lj_tests.py | 5 +- 4 files changed, 186 insertions(+), 106 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 559436b..6af0c8e 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -3176,7 +3176,9 @@ def setup_lj_interactions(self, espresso_system, shift_potential=True, combining if shift == "auto": shift_tpl = shift else: - shift_tpl = PintQuantity(magnitude=shift*self.units.reduced_length,units=self.units,dimension="length") + shift_tpl = PintQuantity.from_quantity(q=shift*self.units.reduced_length, + expected_dimension="length", + ureg=self.units) # Get all particle states registered in pyMBE state_entries = [] for tpl in particle_templates.values(): diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index c835c93..422470a 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -36,6 +36,7 @@ from pyMBE.storage.templates.hydrogel import HydrogelTemplate from pyMBE.storage.instances.hydrogel import HydrogelInstance from pyMBE.storage.templates.lj import LJInteractionTemplate +from pyMBE.storage.pint_quantity import PintQuantity TemplateType = Any # union of template classes (ParticleTemplate, ResidueTemplate, ...) InstanceType = Any # union of instance classes (ParticleInstance, ResidueInstance, ...) @@ -92,22 +93,30 @@ def __init__(self,units): def _collect_particle_templates(self, name, pmb_type): """ - Recursively collects particle template names reachable from a given - template in the hierarchy. + Recursively collect particle template names reachable from a given + template in the hierarchy, accounting for their multiplicity. Args: name ('str'): - Name of the current template being processed. + Name of the template being processed. pmb_type ('str'): - Type of the current template. + Type of the template. Returns: - ('set[str]'): - Set of particle template names reachable from the current template. + ('collections.defaultdict[str, int]'): + A mapping from particle template names to their occurrence counts + in the hierarchy reachable from the given template. Notes: - - Particle state templates are resolved to their parent particle template. + - If ``pmb_type == "particle"``, the particle itself is counted once. + - If ``pmb_type == "particle_state"``, the state is resolved to its + parent particle template, which is counted once. + - Residue templates contribute their central bead and all side-chain + particles. + - Molecule-like templates contribute the particles from all residues + in their ``residue_list``. + - Unsupported ``pmb_type`` values raise ``NotImplementedError``. """ counts = defaultdict(int) if pmb_type == "particle": @@ -154,26 +163,12 @@ def _delete_bonds_of_particle(self, pid): """ if "bond" not in self._instances: return - bonds_to_delete = [ - b_id for b_id, b in list(self._instances["bond"].items()) - if b.particle_id1 == pid or b.particle_id2 == pid - ] + bonds_to_delete = [b_id for b_id, b in list(self._instances["bond"].items()) if b.particle_id1 == pid or b.particle_id2 == pid] for b_id in bonds_to_delete: del self._instances["bond"][b_id] if "bond" in self._instances and not self._instances["bond"]: del self._instances["bond"] - if "bond" not in self._instances: - return - bonds_to_delete = [ - b_id for b_id, b in list(self._instances["bond"].items()) - if b.particle_id1 == pid or b.particle_id2 == pid - ] - for b_id in bonds_to_delete: - del self._instances["bond"][b_id] - if "bond" in self._instances and not self._instances["bond"]: - del self._instances["bond"] - def _find_instance_ids_by_attribute(self, pmb_type, attribute, value): """ Return a list of instance IDs for a given pmb_type where a given attribute @@ -214,7 +209,7 @@ def _find_instance_ids_by_name(self, pmb_type, name): The template name associated with the instances of interest. Returns: - 'list[int]': + ('list[int]'): A list of instance IDs whose underlying template name matches ``name``. The list is empty if no such instances exist. @@ -225,12 +220,10 @@ def _find_instance_ids_by_name(self, pmb_type, name): """ if pmb_type not in self._instances: return [] - result = [] for iid, inst in self._instances[pmb_type].items(): if hasattr(inst, "name") and inst.name == name: result.append(iid) - return result def _find_template_types(self, name): @@ -281,31 +274,24 @@ def _get_instances_df(self, pmb_type): return pd.DataFrame(rows) for inst in self._instances[pmb_type].values(): if pmb_type == "particle": - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "particle_id": inst.particle_id, - "initial_state": inst.initial_state, - "residue_id": int(inst.residue_id) if inst.residue_id is not None else pd.NA, - "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else pd.NA, - "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else pd.NA - }) + rows.append({"pmb_type": pmb_type, + "name": inst.name, + "particle_id": inst.particle_id, + "initial_state": inst.initial_state, + "residue_id": int(inst.residue_id) if inst.residue_id is not None else pd.NA, + "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else pd.NA, + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else pd.NA }) elif pmb_type == "residue": - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "residue_id": inst.residue_id, - "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else pd.NA, - "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else pd.NA - }) + rows.append({"pmb_type": pmb_type, + "name": inst.name, + "residue_id": inst.residue_id, + "molecule_id": int(inst.molecule_id) if inst.molecule_id is not None else pd.NA, + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else pd.NA}) elif pmb_type in ["molecule","peptide","protein"]: - rows.append({ - "pmb_type": pmb_type, - "name": inst.name, - "molecule_id": inst.molecule_id, - "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else pd.NA - }) - + rows.append({"pmb_type": pmb_type, + "name": inst.name, + "molecule_id": inst.molecule_id, + "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else pd.NA}) else: # Generic representation for other types rows.append(inst.model_dump()) @@ -327,10 +313,7 @@ def _get_reactions_df(self): """ rows = [] for r in self._reactions.values(): - stoich = { - f"{p.state_name}": p.coefficient - for p in r.participants - } + stoich = {f"{p.state_name}": p.coefficient for p in r.participants} rows.append({"reaction": r.name, "stoichiometry": stoich, "pK": r.pK, @@ -372,7 +355,7 @@ def _get_templates_df(self, pmb_type): "initial_state": tpl.initial_state}) elif pmb_type == "lj": shift = tpl.shift - if isinstance(shift, dict) and {"magnitude", "units", "dimension"}.issubset(shift.keys()): + if isinstance(shift, PintQuantity): shift = tpl.shift.to_quantity(self._units) rows.append({"pmb_type": tpl.pmb_type, "name": tpl.name, @@ -383,7 +366,6 @@ def _get_templates_df(self, pmb_type): "cutoff": tpl.cutoff.to_quantity(self._units), "offset": tpl.offset.to_quantity(self._units), "shift": shift}) - elif pmb_type == "bond": parameters = {} for key in tpl.parameters.keys(): @@ -417,7 +399,6 @@ def _has_instance(self, pmb_type, instance_id): """ if pmb_type not in self._instances: raise ValueError(f"Instance type '{pmb_type}' not found in the database.") - return instance_id in self._instances[pmb_type] def _has_template(self, pmb_type, name): @@ -479,7 +460,6 @@ def _register_instance(self, instance): # validate template exists if instance.name not in self._templates.get(pmb_type, {}): raise ValueError(f"Template '{instance.name}' not found for type '{pmb_type}'") - self._instances[pmb_type][iid] = instance def _register_reaction(self, reaction): @@ -505,27 +485,6 @@ def _register_template(self, template): """ pmb_type = getattr(template, "pmb_type", None) - if pmb_type is None: - # infer from class - if isinstance(template, ParticleTemplate): - pmb_type = "particle" - elif isinstance(template, ResidueTemplate): - pmb_type = "residue" - elif isinstance(template, MoleculeTemplate): - pmb_type = "molecule" - elif isinstance(template, PeptideTemplate): - pmb_type = "peptide" - elif isinstance(template, ProteinTemplate): - pmb_type = "protein" - elif isinstance(template, HydrogelTemplate): - pmb_type = "hydrogel" - elif isinstance(template, BondTemplate): - pmb_type = "bond" - elif isinstance(template, LJInteractionTemplate): - pmb_type = "lj" - else: - raise TypeError("Unknown template type; set attribute pmb_type or use supported templates") - self._templates.setdefault(pmb_type, {}) if template.name in self._templates[pmb_type]: raise ValueError(f"Template '{template.name}' exists in '{pmb_type}'") @@ -660,33 +619,6 @@ def _propagate_id(self, root_type, root_id, attribute, value): value=value,) updated.append(("particle", pid)) return updated - - - def _update_reaction_participant(self, reaction_name, particle_name, state_name, coefficient): - """ - Append a new participant to an existing reaction in the database. - - Args: - reaction_name ('str'): - Name of the reaction to be updated. - - particle_name ('str'): - Name of the particle template participating in the reaction. - - state_name ('str'): - Specific state of the particle (e.g., protonation or charge state). - - coefficient ('int'): - Stoichiometric coefficient for the new participant. - """ - if reaction_name not in self._reactions: - raise ValueError(f"Reaction '{reaction_name}' not found in the pyMBE database.") - - rxn = self._reactions[reaction_name].add_participant(particle_name=particle_name, - state_name=state_name, - coefficient=coefficient) - self._register_reaction(rxn) - self._reactions.pop(reaction_name) def _propose_instance_id(self, pmb_type): """ diff --git a/testsuite/database_unit_tests.py b/testsuite/database_unit_tests.py index 3409b18..98cc3da 100644 --- a/testsuite/database_unit_tests.py +++ b/testsuite/database_unit_tests.py @@ -18,6 +18,7 @@ import unittest as ut import pyMBE +import espressomd from pyMBE.storage.instances.particle import ParticleInstance from pyMBE.storage.instances.residue import ResidueInstance from pyMBE.storage.instances.molecule import MoleculeInstance @@ -29,8 +30,150 @@ from pyMBE.storage.pint_quantity import PintQuantity from pyMBE.storage.reactions.reaction import Reaction, ReactionParticipant import pint +espresso_system=espressomd.System(box_l = [10]*3) class Test(ut.TestCase): + + def test_find_instance_ids(self): + """ + Sanity test for `_find_instance_ids_by_attribute` + and `_find_instance_ids_by_name` + """ + pmb = pyMBE.pymbe_library(23) + pmb.define_particle(name="A", + sigma=1*pmb.units.nm, + epsilon=1*pmb.units.reduced_energy, + pka=9, + acidity="acidic") + pmb.define_particle(name="B", + sigma=1*pmb.units.nm, + epsilon=1*pmb.units.reduced_energy) + pmb.define_residue(name="R1", + central_bead="A", + side_chains=["B"]) + bond_type = 'harmonic' + bond = {'r_0' : 0.4*pmb.units.nm, + 'k' : 400 * pmb.units('reduced_energy / reduced_length**2')} + + pmb.define_default_bond(bond_type = bond_type, + bond_parameters = bond) + pmb.define_molecule(name="M1", + residue_list=["R1"]*2) + pmb.create_molecule(name="M1", + espresso_system=espresso_system, + number_of_molecules=1, + use_default_bond=True) + instance_ids_r1 = pmb.db._find_instance_ids_by_attribute(pmb_type="particle", + attribute="residue_id", + value=0) + instance_ids_r2 = pmb.db._find_instance_ids_by_attribute(pmb_type="particle", + attribute="residue_id", + value=1) + self.assertEqual(instance_ids_r1, + [0,1]) + self.assertEqual(instance_ids_r2, + [2,3]) + instance_ids_m1 = pmb.db._find_instance_ids_by_attribute(pmb_type="particle", + attribute="molecule_id", + value=0) + self.assertEqual(instance_ids_m1, + [0,1,2,3]) + instance_ids_by_name_A = pmb.db._find_instance_ids_by_name(pmb_type="particle", + name="A") + instance_ids_by_name_B = pmb.db._find_instance_ids_by_name(pmb_type="particle", + name="B") + self.assertEqual(instance_ids_by_name_A, + [0,2]) + self.assertEqual(instance_ids_by_name_B, + [1,3]) + # Sanity test, no ids are returned if the instance does not exist + instance_ids_test = pmb.db._find_instance_ids_by_name(pmb_type="peptide", + name="B") + self.assertEqual(instance_ids_test, + []) + + # Check that the pyMBE database finds a specific instance + self.assertEqual(pmb.db._has_instance(pmb_type="particle", + instance_id=3), + True) + self.assertEqual(pmb.db._has_instance(pmb_type="particle", + instance_id=4), + False) + # Sanity test, unexisting pyMBE type + inputs = {"pmb_type": "unknown", + "instance_id": 0} + self.assertRaises(ValueError, + pmb.db._has_instance, + **inputs) + + def test_count_templates(self): + """ + Sanity test for `_collect_particle_templates` + """ + pmb = pyMBE.pymbe_library(23) + pmb.define_particle(name="A", + sigma=1*pmb.units.nm, + epsilon=1*pmb.units.reduced_energy, + pka=9, + acidity="acidic") + pmb.define_particle(name="B", + sigma=1*pmb.units.nm, + epsilon=1*pmb.units.reduced_energy) + pmb.define_residue(name="R1", + central_bead="A", + side_chains=["B"]) + pmb.define_molecule(name="M1", + residue_list=["R1"]*2) + A_states = pmb.db._collect_particle_templates(name="A", + pmb_type="particle_state") + self.assertEqual(A_states, + {"A":1}) + AH_states = pmb.db._collect_particle_templates(name="AH", + pmb_type="particle_state") + self.assertEqual(AH_states, + {"A":1}) + A_particles = pmb.db._collect_particle_templates(name="A", + pmb_type="particle") + B_particles = pmb.db._collect_particle_templates(name="B", + pmb_type="particle") + self.assertEqual(A_particles, + {"A":1}) + self.assertEqual(B_particles, + {"B":1}) + R1_counts = pmb.db._collect_particle_templates(name="R1", + pmb_type="residue") + self.assertEqual(R1_counts, + {"A":1, + "B":1}) + M1_counts = pmb.db._collect_particle_templates(name="M1", + pmb_type="molecule") + self.assertEqual(M1_counts, + {"A":2, + "B":2}) + inputs={"name": "test", + "pmb_type": "unknown"} + self.assertRaises(NotImplementedError, + pmb.db._collect_particle_templates, + **inputs) + # Sanity test for unknown types in _has_template + inputs = {"pmb_type": "unknown", + "name": "A"} + self.assertRaises(ValueError, + pmb.db._has_template, + **inputs) + # Sanity tests for get_particle_templates_under + templates_R1 = pmb.db.get_particle_templates_under(template_name="R1") + self.assertEqual(templates_R1, + {"A","B"}) + # Sanity tests, raise ValueError when pmb_type cannot be safely infered + pmb.define_residue(name="A", + central_bead="A", + side_chains=["B"]) + inputs = {"template_name": "A"} + self.assertRaises(ValueError, + pmb.db.get_particle_templates_under, + **inputs) + def test_sanity_db(self): """ Sanity tests for the pyMBE database diff --git a/testsuite/lj_tests.py b/testsuite/lj_tests.py index fe4f787..35f89e6 100644 --- a/testsuite/lj_tests.py +++ b/testsuite/lj_tests.py @@ -155,7 +155,10 @@ def test_lj_interaction_setup(self): for parameter_key in ["sigma","offset","cutoff"]: ref_lj_parameters[parameter_key]=(A_input_parameters[parameter_key]+B_input_parameters[parameter_key])/2 ref_lj_parameters["epsilon"]=np.sqrt(A_input_parameters["epsilon"]*B_input_parameters["epsilon"]) - + lj_templates = pmb.db.get_templates(pmb_type="lj") + lj_df = pmb.db._get_templates_df(pmb_type="lj") + self.assertEqual(lj_df[lj_df.name == "A-A"]["shift"].values[0].m_as("nanometer"), + 0) for label in labels: lj_template = lj_templates[label] for parameter_key in ["sigma","offset","cutoff"]: From 3ef0e93a3c93d1acea56e39e6cb7c5ecc4450f7a Mon Sep 17 00:00:00 2001 From: pmblanco Date: Fri, 6 Feb 2026 11:26:24 +0100 Subject: [PATCH 45/55] reach full coverage --- pyMBE/pyMBE.py | 5 +- pyMBE/storage/manager.py | 150 ++++++++++--------------------- testsuite/database_unit_tests.py | 146 ++++++++++++++++++++++++++++++ 3 files changed, 194 insertions(+), 107 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 6af0c8e..b4ff9f1 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -81,7 +81,7 @@ class pymbe_library(): units ('pint.UnitRegistry'): Pint unit registry used for unit-aware calculations. - lattice_builder: + lattice_builder ('pyMBE.lib.lattice.LatticeBuilder'): Optional lattice builder object (initialized as ''None''). root ('importlib.resources.abc.Traversable'): @@ -1767,8 +1767,7 @@ def delete_instances_in_system(self, instance_id, pmb_type, espresso_system): self._delete_particles_from_espresso(particle_ids=particle_ids, espresso_system=espresso_system) self.db.delete_instance(pmb_type=pmb_type, - instance_id=instance_id, - cascade=True) + instance_id=instance_id) def determine_reservoir_concentrations(self, pH_res, c_salt_res, activity_coefficient_monovalent_pair, max_number_sc_runs=200): """ diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index 422470a..f333784 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -520,22 +520,18 @@ def _update_instance(self, instance_id, pmb_type, attribute, value): using ``model_copy(update=...)`` to maintain immutability and avoid partial mutations of internal state. """ - if instance_id not in self._instances[pmb_type]: - raise ValueError(f"Instance '{instance_id}' not found for type '{pmb_type}' in the pyMBE database.") - + raise ValueError(f"Instance '{instance_id}' not found for type '{pmb_type}' in the pyMBE database.") if pmb_type == "particle": allowed = ["initial_state", "residue_id", "molecule_id", "assembly_id"] elif pmb_type == "residue": allowed = ["molecule_id", "assembly_id"] - elif pmb_type == "molecule": + elif pmb_type in self._molecule_like_types: allowed = ["assembly_id"] else: allowed = [None] # No attributes allowed for other types - if attribute not in allowed: raise ValueError(f"Attribute '{attribute}' not allowed for {pmb_type}. Allowed attributes: {allowed}") - self._instances[pmb_type][instance_id] = self._instances[pmb_type][instance_id].model_copy(update={attribute: value}) def _propagate_id(self, root_type, root_id, attribute, value): @@ -569,14 +565,12 @@ def _propagate_id(self, root_type, root_id, attribute, value): """ updated = [] # Map each type to its own identity attribute - self_id_attribute = { - "hydrogel": "assembly_id", - "molecule": "molecule_id", - "peptide": "molecule_id", - "protein": "molecule_id", - "residue": "residue_id", - "particle": "particle_id", - } + self_id_attribute = {"hydrogel": "assembly_id", + "molecule": "molecule_id", + "peptide": "molecule_id", + "protein": "molecule_id", + "residue": "residue_id", + "particle": "particle_id",} assembly_types = ["hydrogel"] molecule_types = ["molecule", "peptide", "protein"] # 1) Update ROOT (unless attribute corresponds to its own ID) @@ -644,7 +638,7 @@ def _propose_instance_id(self, pmb_type): used_ids = list(self._instances[pmb_type].keys()) return max(used_ids) + 1 - def delete_instance(self, pmb_type, instance_id, cascade=False): + def delete_instance(self, pmb_type, instance_id): """ Delete an instance from the pyMBE database. @@ -656,11 +650,8 @@ def delete_instance(self, pmb_type, instance_id, cascade=False): instance_id ('int'): Unique identifier of the instance. - cascade ('bool'): - If True, automatically delete dependent objects. - Notes: - - Supports cascade deletion through the hierarchy: + - It applies cascade deletion through the hierarchy: assembly → molecules → residues → particles → bonds molecule → residues → particles → bonds residue → particles → bonds @@ -674,90 +665,47 @@ def delete_instance(self, pmb_type, instance_id, cascade=False): raise ValueError(f"Instance ID '{instance_id}' not found in '{pmb_type}'.") inst = self._instances[pmb_type][instance_id] # =============== CASCADE DELETION ========================= - if cascade: - # --- Delete children of ASSEMBLY-like objects --- - if pmb_type in self._assembly_like_types: - for mtype in self._molecule_like_types: - mids = self._find_instance_ids_by_attribute(pmb_type=mtype, - attribute="assembly_id", - value=instance_id,) - for mid in mids: - self.delete_instance(pmb_type=mtype, - instance_id=mid, - cascade=True) - # delete particles inside the assembly *even if they have no residue/molecule* (e.g. nodes) - pids = self._find_instance_ids_by_attribute(pmb_type="particle", + # --- Delete children of ASSEMBLY-like objects --- + if pmb_type in self._assembly_like_types: + for mtype in self._molecule_like_types: + mids = self._find_instance_ids_by_attribute(pmb_type=mtype, attribute="assembly_id", + value=instance_id,) + for mid in mids: + self.delete_instance(pmb_type=mtype, + instance_id=mid) + # delete particles inside the assembly *even if they have no residue/molecule* (e.g. nodes) + pids = self._find_instance_ids_by_attribute(pmb_type="particle", + attribute="assembly_id", + value=instance_id) + for pid in pids: + self.delete_instance(pmb_type="particle", + instance_id=pid) + # --- Delete children of MOLECULE-like objects --- + if pmb_type in self._molecule_like_types: + residues = self._find_instance_ids_by_attribute(pmb_type="residue", + attribute="molecule_id", + value=instance_id,) + for rid in residues: + self.delete_instance(pmb_type="residue", + instance_id=rid) + # --- Delete children of RESIDUE --- + if pmb_type == "residue": + particles = self._find_instance_ids_by_attribute(pmb_type="particle", + attribute="residue_id", value=instance_id) - for pid in pids: - self.delete_instance(pmb_type="particle", - instance_id=pid, - cascade=True) - # --- Delete children of MOLECULE-like objects --- - if pmb_type in self._molecule_like_types: - residues = self._find_instance_ids_by_attribute(pmb_type="residue", - attribute="molecule_id", - value=instance_id,) - for rid in residues: - self.delete_instance(pmb_type="residue", - instance_id=rid, - cascade=True) - - # --- Delete children of RESIDUE --- - if pmb_type == "residue": - particles = self._find_instance_ids_by_attribute(pmb_type="particle", - attribute="residue_id", - value=instance_id,) - for pid in particles: - self.delete_instance(pmb_type="particle", - instance_id=pid, - cascade=True) - - # --- Delete children of PARTICLE (only bonds) --- - if pmb_type == "particle": - self._delete_bonds_of_particle(instance_id) - - # =============== NON-CASCADE (SAFE DELETE) ================ - else: - # ---- ASSEMBLY-like: forbid deletion if molecules belong to it ---- - if pmb_type in self._assembly_like_types: - for mtype in self._molecule_like_types: - mids = self._find_instance_ids_by_attribute(pmb_type=mtype, - attribute="assembly_id", - value=instance_id,) - if mids: - raise ValueError(f"{pmb_type} {instance_id} contains {mtype} instances {mids}. Use cascade=True to delete.") - # ---- MOLECULE-like: check residues ---- - if pmb_type in self._molecule_like_types: - residues = self._find_instance_ids_by_attribute(pmb_type="residue", - attribute="molecule_id", - value=instance_id,) - if residues: - raise ValueError(f"{pmb_type} {instance_id} has residues {residues}. Use cascade=True to delete.") - # ---- RESIDUE: check particles ---- - if pmb_type == "residue": - particles = self._find_instance_ids_by_attribute(pmb_type="particle", - attribute="residue_id", - value=instance_id) - if particles: - raise ValueError(f"Residue {instance_id} contains particles {particles}. Use cascade=True.") - # ---- PARTICLE: check bonds and belonging ---- - if pmb_type == "particle": - if inst.residue_id is not None: - raise ValueError(f"Particle {instance_id} belongs to residue {inst.residue_id}. Use cascade=True.") - if inst.molecule_id is not None: - raise ValueError(f"Particle {instance_id} belongs to molecule {inst.molecule_id}. Use cascade=True.") - if inst.assembly_id is not None: - raise ValueError(f"Particle {instance_id} belongs to assembly {inst.assembly_id}. "f"Use cascade=True.") - bonds = [b_id for b_id, b in self._instances.get("bond", {}).items() if b.particle_id1 == instance_id or b.particle_id2 == instance_id] - if bonds: - raise ValueError(f"Particle {instance_id} participates in bonds {bonds}. Use cascade=True.") + for pid in particles: + self.delete_instance(pmb_type="particle", + instance_id=pid) + # --- Delete children of PARTICLE (only bonds) --- + if pmb_type == "particle": + self._delete_bonds_of_particle(instance_id) # =============== FINAL DELETION STEP ====================== del self._instances[pmb_type][instance_id] if not self._instances[pmb_type]: del self._instances[pmb_type] - def delete_instances(self, pmb_type, cascade=False): + def delete_instances(self, pmb_type): """ Remove all instances registered for a given pyMBE type. @@ -766,11 +714,6 @@ def delete_instances(self, pmb_type, cascade=False): Instance category (e.g. ``"particle"``, ``"residue"``, ``"molecule"``, ``"protein"``, ``"hydrogel"``). - cascade ('bool'): - If True, dependent objects are removed according to the - pyMBE hierarchy rules. If False, deletion is forbidden when - dependencies exist. - Notes: - Deletion order is deterministic and safe. - If no instances exist for the given type, the method is a no-op. @@ -783,8 +726,7 @@ def delete_instances(self, pmb_type, cascade=False): for instance_id in instance_ids: self.delete_instance(pmb_type=pmb_type, - instance_id=instance_id, - cascade=cascade) + instance_id=instance_id) def delete_reaction(self, reaction_name): """ @@ -906,7 +848,7 @@ def get_reaction(self, name): The stored reaction instance corresponding to the provided name. """ - if name not in self._reactions[name]: + if name not in self._reactions: raise ValueError(f"Reaction '{name}' not found in the pyMBE database.") else: return self._reactions[name] diff --git a/testsuite/database_unit_tests.py b/testsuite/database_unit_tests.py index 98cc3da..fb77d41 100644 --- a/testsuite/database_unit_tests.py +++ b/testsuite/database_unit_tests.py @@ -19,6 +19,7 @@ import unittest as ut import pyMBE import espressomd +from pyMBE.storage.templates.hydrogel import HydrogelTemplate from pyMBE.storage.instances.particle import ParticleInstance from pyMBE.storage.instances.residue import ResidueInstance from pyMBE.storage.instances.molecule import MoleculeInstance @@ -34,6 +35,151 @@ class Test(ut.TestCase): + + def test_sanity_database_methods(self): + """ + Sanity tests for exceptions in: + _register_instance + _update_instance + get_instances + delete_instance + delete_instances + delete_templates + _register_reaction + get_reaction + delete_reaction + """ + pmb = pyMBE.pymbe_library(23) + # Unit tests for _register_instance + class DummyInstance(): + pass + + inputs = {"instance": DummyInstance()} + self.assertRaises(TypeError, + pmb.db._register_instance, + **inputs) + pmb.define_particle(name="A", + sigma=1*pmb.units.nm, + epsilon=1*pmb.units.reduced_energy, + pka=9, + acidity="acidic") + part_inst = ParticleInstance(name="A", + particle_id=0, + initial_state="A") + pmb.db._register_instance(part_inst) + inputs = {"instance": part_inst} + self.assertRaises(ValueError, + pmb.db._register_instance, + **inputs) + templateless_part_inst = ParticleInstance(name="B", + particle_id=1, + initial_state="B") + inputs = {"instance": templateless_part_inst} + self.assertRaises(ValueError, + pmb.db._register_instance, + **inputs) + # Unit test for get_instances + self.assertEqual(pmb.db._instances["particle"], + pmb.db.get_instances(pmb_type="particle")) + + # Unit tests for _update_instance + inputs = {"instance_id": 2, + "pmb_type": "particle", + "attribute": "particle_id", + "value": 0} + self.assertRaises(ValueError, + pmb.db._update_instance, + **inputs) + + pmb.db._register_template(HydrogelTemplate(name="test", + node_map=[], + chain_map=[])) + + pmb.db._register_instance(HydrogelInstance(name="test", + assembly_id=0)) + inputs = {"instance_id": 0, + "pmb_type": "hydrogel", + "attribute": "assembly_id", + "value": 1} + self.assertRaises(ValueError, + pmb.db._update_instance, + **inputs) + + # Unit test for _register_reaction + inputs = {"participants":[ReactionParticipant(particle_name="A", + state_name="A", + coefficient=-1), + ReactionParticipant(particle_name="B", + state_name="B", + coefficient=1)], + "pK":1, + "reaction_type":"test"} + reaction = Reaction(**inputs) + inputs = {"reaction": reaction} + pmb.db._register_reaction(reaction) + self.assertRaises(ValueError, + pmb.db._register_reaction, + **inputs) + # Unit tests for get_reaction: + ## Test that one gets back the right reaction + self.assertEqual(reaction, + pmb.db.get_reaction(name=reaction.name)) + ## Sanity test, giving an unknown reaction name triggers a ValueError + inputs = {"name" : "test"} + self.assertRaises(ValueError, + pmb.db.get_reaction, + **inputs) + # Sanity test for delete_reaction + inputs = {"reaction_name": "test"} + self.assertRaises(ValueError, + pmb.db.delete_reaction, + **inputs) + + # Sanity Unit test for delete_instance + inputs = {"pmb_type": "molecule", + "instance_id": 0} + self.assertRaises(ValueError, + pmb.db.delete_instance, + **inputs) + inputs = {"pmb_type": "particle", + "instance_id": 3} + self.assertRaises(ValueError, + pmb.db.delete_instance, + **inputs) + # Sanity tests for delete_template + ## Triggers a ValueError because no molecule template has been defined + inputs = {"pmb_type": "molecule", + "name": "test"} + self.assertRaises(ValueError, + pmb.db.delete_template, + **inputs) + ## Triggers a ValueError because no particle of this name has been defined + inputs = {"pmb_type": "particle", + "name": "test"} + self.assertRaises(ValueError, + pmb.db.delete_template, + **inputs) + ## Triggers a ValueError because particle instances of this template have been created + inputs = {"pmb_type": "particle", + "name": "A"} + self.assertRaises(ValueError, + pmb.db.delete_template, + **inputs) + # Unit tests for delete_instances + ## Trying to delete instances from an empty category does nothing + previous_instances = pmb.db._instances.copy() + pmb.db.delete_instances(pmb_type="molecule") + self.assertEqual(previous_instances, + pmb.db._instances) + + ## Calling the function deletes all instances of a given pmb_type + part_inst = ParticleInstance(name="A", + particle_id=1, + initial_state="A") + pmb.db._register_instance(part_inst) + pmb.db.delete_instances(pmb_type="particle") + assert "particle" not in pmb.db._instances.keys() + def test_find_instance_ids(self): """ Sanity test for `_find_instance_ids_by_attribute` From 302e8232bff27a5aa77da1aae0f8b29c646034e3 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Fri, 6 Feb 2026 12:14:47 +0100 Subject: [PATCH 46/55] solve issues detected by pylint --- pyMBE/lib/handy_functions.py | 412 ++++++++++-------- pyMBE/pyMBE.py | 50 +-- samples/Beyer2024/create_paper_data.py | 22 +- samples/Beyer2024/globular_protein.py | 39 +- samples/Beyer2024/peptide.py | 16 +- .../weak_polyelectrolyte_dialysis.py | 13 +- samples/branched_polyampholyte.py | 69 ++- samples/peptide_cpH.py | 21 +- samples/peptide_mixture_grxmc_ideal.py | 19 +- samples/plot_branched_polyampholyte.py | 4 +- samples/plot_peptide_cpH.py | 6 +- .../define_and_create_molecules_unit_tests.py | 4 +- testsuite/set_particle_acidity_test.py | 1 - testsuite/test_io_database.py | 3 +- 14 files changed, 344 insertions(+), 335 deletions(-) diff --git a/pyMBE/lib/handy_functions.py b/pyMBE/lib/handy_functions.py index 88545e9..75441fd 100644 --- a/pyMBE/lib/handy_functions.py +++ b/pyMBE/lib/handy_functions.py @@ -23,17 +23,35 @@ def calculate_initial_bond_length(bond_parameters, bond_type, lj_parameters): """ - Calculates the initial bond length that is used when setting up molecules, - based on the minimum of the sum of bonded and short-range (LJ) interactions. - + Calculate an initial bond length for molecule setup. + Args: - bond_object(`espressomd.interactions.BondedInteractions`): instance of a bond object from espressomd library - bond_type(`str`): label identifying the used bonded potential - epsilon(`pint.Quantity`): LJ epsilon of the interaction between the particles - sigma(`pint.Quantity`): LJ sigma of the interaction between the particles - cutoff(`pint.Quantity`): cutoff-radius of the LJ interaction - offset(`pint.Quantity`): offset of the LJ interaction - """ + bond_parameters ('dict'): + Parameters defining the bonded interaction (e.g. equilibrium + distance, force constant), as required by the selected + ``bond_type``. + + bond_type ('str'): + Label identifying the bonded potential used to connect the + particles (e.g. ``"harmonic"``). + + lj_parameters ('dict'): + Parameters of the Lennard-Jones interaction between the bonded + particles. Expected entries include ``epsilon``, ``sigma``, + ``cutoff``, and optionally ``offset``, typically given as + ``pint.Quantity`` objects. + + Returns: + ('pint.Quantity'): + Initial bond length resulting from the minimum of the bonded + and Lennard-Jones interactions. + + Notes: + - This function is intended for geometry initialization and does not + affect the interaction parameters used during the simulation. + - The exact interpretation of ``bond_parameters`` depends on + ``bond_type``. + """ def truncated_lj_potential(x, epsilon, sigma, cutoff,offset): if x>cutoff: return 0.0 @@ -59,10 +77,12 @@ def check_aminoacid_key(key): Checks if `key` corresponds to a valid aminoacid letter code. Args: - key(`str`): key to be checked. + key (`str`): + key to be checked. Returns: - `bool`: True if `key` is a valid aminoacid letter code, False otherwise. + (`bool`): + True if `key` is a valid aminoacid letter code, False otherwise. """ valid_AA_keys=['V', #'VAL' 'I', #'ILE' @@ -97,10 +117,12 @@ def check_if_metal_ion(key): Checks if `key` corresponds to a label of a supported metal ion. Args: - key(`str`): key to be checked + key(`str`): + key to be checked Returns: - (`bool`): True if `key` is a supported metal ion, False otherwise. + (`bool`): + True if `key` is a supported metal ion, False otherwise. """ if key in get_metal_ions_charge_number_map().keys(): return True @@ -112,43 +134,19 @@ def define_protein_AA_particles(topology_dict, pmb, pka_set, lj_setup_mode="wca Defines particle templates in pyMBE for all unique residue/atom types appearing in a protein topology dictionary. - The Lennard-Jones parameters (σ, ε, offset) are generated according to the - selected setup mode (currently only the WCA scheme is supported). - - Metal ions are automatically assigned their correct valence charge. - Args: - topology_dict (dict): + topology_dict ('dict'): Dictionary defining the structure of a protein. - Keys must be residue/particle identifiers such as `"ALA1"`, `"LYS2"`, - `"ZN3"`, etc., where the alphabetical prefix encodes the residue/ - particle type. - - Each entry must contain: - - `"radius"` (float): Effective radius of the bead, used to - compute the Lennard-Jones offset. - - Example: - { - "ALA1": {"radius": 0.5, ...}, - "GLY2": {"radius": 0.4, ...}, - "ZN3": {"radius": 0.2, ...}, - } - - pmb (pyMBE.pymbe_library): + + pmb ('pyMBE.pymbe_library'): Instance of the pyMBE library. - Dictionary of the form: - {"particle_name": {"pka_value": float, - "acidity": "acidic" | "basic"}} + pka_set ('dict'): + Set of pka_values for the protein aminoacids and their corresponding acidities - lj_setup_mode (str, optional): + lj_setup_mode ('str', optional): Determines how Lennard-Jones parameters are assigned. Defaults to `"wca"`. - Raises: - ValueError: - If `lj_setup_mode` is not supported. - Notes: - Particle names are extracted by stripping trailing digits (e.g., `"ALA1"` → `"ALA"`). @@ -159,7 +157,7 @@ def define_protein_AA_particles(topology_dict, pmb, pka_set, lj_setup_mode="wca """ valid_lj_setups = ["wca"] if lj_setup_mode not in valid_lj_setups: - raise ValueError('Invalid key for the lj setup, supported setup modes are {valid_lj_setups}') + raise ValueError('Invalid key for the lj setup, supported setup modes are {valid_lj_setups}') if lj_setup_mode == "wca": sigma = 1*pmb.units.Quantity("reduced_length") epsilon = 1*pmb.units.Quantity("reduced_energy") @@ -191,30 +189,19 @@ def define_protein_AA_residues(sequence, model, pmb): Define residue templates in the pyMBE database for a protein topology dict. Args: - topology_dict (dict): - Dictionary defining the internal structure of the protein. - Expected format: - { - "ResidueName1": { - "initial_pos": np.ndarray, - "chain_id": int, - "radius": float - }, - "ResidueName2": { ... }, - ... - } - The `"initial_pos"` entry is required and represents the residue’s - reference coordinates before shifting to the protein's center-of-mass. - - model (str): + sequence ('str'): + Protein sequence, following the one letter amino acid convention. + + model ('str'): Coarse-grained representation to use. Supported options: - `"1beadAA"` - `"2beadAA"` - pmb (pyMBE.pymbe_library): + pmb ('pyMBE.pymbe_library'): Instance of the pyMBE library. Return: - (list of str): List of the defined residue names + ('list of str'): + List of the defined residue names Notes: - Supported models: @@ -251,16 +238,16 @@ def define_peptide_AA_residues(sequence,model, pmb): Define residue templates in the pyMBE database for a given model. Args: - sequence (list of str): + sequence ('list of str'): Ordered amino-acid sequence of the peptide or protein. Each element must be a residue identifier compatible with the selected model. - model (str): + model ('str'): Coarse-grained representation to use. Supported options: - `"1beadAA"` - `"2beadAA"` - pmb (pyMBE.pymbe_library): + pmb ('pyMBE.pymbe_library'): Instance of the pyMBE library. Notes: @@ -302,10 +289,10 @@ def do_reaction(algorithm, steps): ESPResSo versions. Args: - algorithm: + algorithm ('espressomd.reaction_methods'): ESPResSo reaction algorithm object (e.g. constant pH, reaction ensemble, or similar). - steps (int): + steps ('int'): Number of reaction steps to perform. Notes: @@ -325,18 +312,14 @@ def get_number_of_particles(espresso_system, ptype): """ Returns the number of particles of a given ESPResSo particle type. - This function provides a compatibility wrapper around - `espresso_system.number_of_particles`, which has a different calling - signature depending on the ESPResSo version. - Args: - espresso_system (espressomd.system.System): + espresso_system ('espressomd.system.System'): ESPResSo system object from which the particle count is queried. - ptype (int): + ptype ('int'): ESPResSo particle type identifier. Returns: - int: + ('int'): Number of particles in `espresso_system` with particle type `ptype`. Notes: @@ -361,18 +344,19 @@ def get_residues_from_topology_dict(topology_dict, model): Groups beads from a topology dictionary into residues and assigns residue names. Args: - topology_dict (dict): + topology_dict ('dict'): Dictionary describing the molecular topology, where keys are bead identifiers (e.g. "CA12", "SC12") that encode both residue type and residue index. - model (str): + + model ('str'): Protein model identifier. Supported values are: - `"1beadAA"`: single-bead-per-amino-acid model. - `"2beadAA"`: two-bead-per-amino-acid model, where CA beads are excluded from residue name assignment. Returns: - dict: + ('dict'): Dictionary mapping residue indices (as strings) to residue data: { resid: { @@ -424,100 +408,102 @@ def get_metal_ions_charge_number_map(): Gets a map with the charge numbers of all the metal ions supported. Returns: - metal_charge_number_map(dict): Has the structure {"metal_name": metal_charge_number} + ('dict'): + Has the structure {"metal_name": metal_charge_number} """ metal_charge_number_map = {"Ca": 2} return metal_charge_number_map def protein_sequence_parser(sequence): - ''' - Parses `sequence` to the one letter code for amino acids. - - Args: - sequence(`str` or `lst`): Sequence of the amino acid. + """ + Parses `sequence` to the one letter code for amino acids. + + Args: + sequence(`str` or `lst`): + Sequence of the amino acid. - Returns: - clean_sequence(`lst`): `sequence` using the one letter code. - - Note: - - Accepted formats for `sequence` are: - - `lst` with one letter or three letter code of each aminoacid in each element - - `str` with the sequence using the one letter code - - `str` with the squence using the three letter code, each aminoacid must be separated by a hyphen "-" - - ''' - # Aminoacid key - keys={"ALA": "A", - "ARG": "R", - "ASN": "N", - "ASP": "D", - "CYS": "C", - "GLU": "E", - "GLN": "Q", - "GLY": "G", - "HIS": "H", - "ILE": "I", - "LEU": "L", - "LYS": "K", - "MET": "M", - "PHE": "F", - "PRO": "P", - "SER": "S", - "THR": "T", - "TRP": "W", - "TYR": "Y", - "VAL": "V", - "PSER": "J", - "PTHR": "U", - "PTyr": "Z", - "NH2": "n", - "COOH": "c"} - clean_sequence=[] - if isinstance(sequence, str): - if sequence.find("-") != -1: - splited_sequence=sequence.split("-") - for residue in splited_sequence: - if len(residue) == 1: - if residue in keys.values(): - residue_ok=residue - else: - if residue.upper() in keys.values(): - residue_ok=residue.upper() - else: - raise ValueError("Unknown one letter code for a residue given: ", residue, " please review the input sequence") - clean_sequence.append(residue_ok) - else: - if residue in keys.keys(): - clean_sequence.append(keys[residue]) - else: - if residue.upper() in keys.keys(): - clean_sequence.append(keys[residue.upper()]) - else: - raise ValueError("Unknown code for a residue: ", residue, " please review the input sequence") - else: - for residue in sequence: + Returns: + (`lst`): ` + sequence` using the one letter code. + + Notes: + - Accepted formats for `sequence` are: + - `lst` with one letter or three letter code of each aminoacid in each element + - `str` with the sequence using the one letter code + - `str` with the squence using the three letter code, each aminoacid must be separated by a hyphen "-" + """ + # Aminoacid key + keys={"ALA": "A", + "ARG": "R", + "ASN": "N", + "ASP": "D", + "CYS": "C", + "GLU": "E", + "GLN": "Q", + "GLY": "G", + "HIS": "H", + "ILE": "I", + "LEU": "L", + "LYS": "K", + "MET": "M", + "PHE": "F", + "PRO": "P", + "SER": "S", + "THR": "T", + "TRP": "W", + "TYR": "Y", + "VAL": "V", + "PSER": "J", + "PTHR": "U", + "PTyr": "Z", + "NH2": "n", + "COOH": "c"} + clean_sequence=[] + if isinstance(sequence, str): + if sequence.find("-") != -1: + splited_sequence=sequence.split("-") + for residue in splited_sequence: + if len(residue) == 1: if residue in keys.values(): residue_ok=residue else: if residue.upper() in keys.values(): residue_ok=residue.upper() else: - raise ValueError("Unknown one letter code for a residue: ", residue, " please review the input sequence") + raise ValueError("Unknown one letter code for a residue given: ", residue, " please review the input sequence") clean_sequence.append(residue_ok) - if isinstance(sequence, list): + else: + if residue in keys.keys(): + clean_sequence.append(keys[residue]) + else: + if residue.upper() in keys.keys(): + clean_sequence.append(keys[residue.upper()]) + else: + raise ValueError("Unknown code for a residue: ", residue, " please review the input sequence") + else: for residue in sequence: if residue in keys.values(): residue_ok=residue else: if residue.upper() in keys.values(): residue_ok=residue.upper() - elif (residue.upper() in keys.keys()): - residue_ok= keys[residue.upper()] else: - raise ValueError("Unknown code for a residue: ", residue, " please review the input sequence") + raise ValueError("Unknown one letter code for a residue: ", residue, " please review the input sequence") clean_sequence.append(residue_ok) - return clean_sequence + if isinstance(sequence, list): + for residue in sequence: + if residue in keys.values(): + residue_ok=residue + else: + if residue.upper() in keys.values(): + residue_ok=residue.upper() + elif (residue.upper() in keys.keys()): + residue_ok= keys[residue.upper()] + else: + raise ValueError("Unknown code for a residue: ", residue, " please review the input sequence") + clean_sequence.append(residue_ok) + return clean_sequence def relax_espresso_system(espresso_system, seed, gamma=1e-3, Nsteps_steepest_descent=5000, max_displacement=0.01, Nsteps_iter_relax=500): @@ -532,19 +518,31 @@ def relax_espresso_system(espresso_system, seed, gamma=1e-3, Nsteps_steepest_des If you experience crashes or unexpected behavior, please consider using your own relaxation procedure. Args: - espresso_system (`espressomd.system.System`): system object of espressomd library. - seed (`int`): Seed for the random number generator for the thermostat. - gamma (`float`, optional): Starting damping constant for Langevin dynamics. Defaults to 1e-3 reduced time**-1. - Nsteps_steepest_descent (`int`, optional): Total number of steps for steepest descent minimization. Defaults to 5000. - max_displacement (`float`, optional): Maximum particle displacement allowed during minimization. Defaults to 0.01 reduced length. - Nsteps_iter_relax (`int`, optional): Number of steps per iteration for Langevin dynamics relaxation. Defaults to 500. + espresso_system (`espressomd.system.System`): + system object of espressomd library. + + seed (`int`): + Seed for the random number generator for the thermostat. + + gamma (`float`, optional): + Starting damping constant for Langevin dynamics. Defaults to 1e-3 reduced time**-1. + + Nsteps_steepest_descent (`int`, optional): + Total number of steps for steepest descent minimization. Defaults to 5000. + + max_displacement (`float`, optional): + Maximum particle displacement allowed during minimization. Defaults to 0.01 reduced length. + + Nsteps_iter_relax (`int`, optional): + Number of steps per iteration for Langevin dynamics relaxation. Defaults to 500. Return: - (`float`): minimum distance between particles in the system after the relaxation + (`float`): + minimum distance between particles in the system after the relaxation - Note: - The thermostat is turned off by the end of the procedure. - Make sure the system is initialized properly before calling this function. + Notes: + - The thermostat is turned off by the end of the procedure. + - Make sure the system is initialized properly before calling this function. """ # Sanity checks if gamma <= 0: @@ -562,12 +560,10 @@ def relax_espresso_system(espresso_system, seed, gamma=1e-3, Nsteps_steepest_des espresso_system.integrator.run(Nsteps_steepest_descent) logging.debug("*** Finished steepest descent minimization ***") logging.debug("*** Starting Langevin Dynamics relaxation ***") - espresso_system.integrator.set_vv() espresso_system.thermostat.set_langevin(kT=1., gamma=gamma, seed=seed) espresso_system.integrator.run(Nsteps_iter_relax) espresso_system.thermostat.turn_off() - logging.debug("*** Finished Langevin Dynamics relaxation ***") logging.info(f"*** Minimum particle distance after relaxation: {espresso_system.analysis.min_dist()} ***") logging.debug("*** Relaxation finished ***") @@ -578,17 +574,38 @@ def setup_langevin_dynamics(espresso_system, kT, seed,time_step=1e-2, gamma=1, t Sets up Langevin Dynamics for an ESPResSo simulation system. Args: - espresso_system (`espressomd.system.System`): system object of espressomd library. - kT (`pint.Quantity`): Target temperature in reduced energy units. - seed (`int`): Seed for the random number generator for the thermostat. - time_step (`float`, optional): Integration time step. Defaults to 1e-2. - gamma (`float`, optional): Damping coefficient for the Langevin thermostat. Defaults to 1. - tune_skin (`bool`, optional): Whether to optimize the skin parameter. Defaults to True. - min_skin (`float`, optional): Minimum skin value for optimization. Defaults to 1. - max_skin (`float`, optional): Maximum skin value for optimization. Defaults to None, which is handled by setting its value to box length / 2. - tolerance (`float`, optional): Tolerance for skin optimization. Defaults to 1e-3. - int_steps (`int`, optional): Number of integration steps for tuning. Defaults to 200. - adjust_max_skin (`bool`, optional): Whether to adjust the maximum skin value during tuning. Defaults to True. + espresso_system (`espressomd.system.System`): + system object of espressomd library. + + kT (`pint.Quantity`): + Target temperature in reduced energy units. + + seed (`int`): + Seed for the random number generator for the thermostat. + + time_step (`float`, optional): + Integration time step. Defaults to 1e-2. + + gamma (`float`, optional): + Damping coefficient for the Langevin thermostat. Defaults to 1. + + tune_skin (`bool`, optional): + Whether to optimize the skin parameter. Defaults to True. + + min_skin (`float`, optional): + Minimum skin value for optimization. Defaults to 1. + + max_skin (`float`, optional): + Maximum skin value for optimization. Defaults to None, which is handled by setting its value to box length / 2. + + tolerance (`float`, optional): + Tolerance for skin optimization. Defaults to 1e-3. + + int_steps (`int`, optional): + Number of integration steps for tuning. Defaults to 200. + + adjust_max_skin (`bool`, optional): + Whether to adjust the maximum skin value during tuning. Defaults to True. """ if not isinstance(seed, int): raise TypeError("seed must be an integer.") @@ -620,21 +637,40 @@ def setup_electrostatic_interactions(units, espresso_system, kT, c_salt=None, so Sets up electrostatic interactions in an ESPResSo system. Args: - units(`pint.UnitRegistry`): Unit registry for handling physical units. - espresso_system(`espressomd.system.System`): system object of espressomd library. - kT(`pint.Quantity`): Thermal energy. - c_salt(`pint.Quantity`): Added salt concentration. If provided, the program outputs the debye screening length. It is a mandatory parameter for the Debye-Hückel method. - solvent_permittivity (`float`): Solvent relative permittivity. Defaults to 78.5, correspoding to its value in water at 298.15 K. - method(`str`): Method for computing electrostatic interactions. Defaults to "p3m". - tune_p3m(`bool`): If True, tunes P3M parameters for efficiency. Defaults to True. - accuracy(`float`): Desired accuracy for electrostatics. Defaults to 1e-3. - params(`dict`): Additional parameters for the electrostatic method. For P3M, it can include 'mesh', 'alpha', 'cao' and `r_cut`. For Debye-Hückel, it can include 'r_cut'. - verbose(`bool`): If True, enables verbose output for P3M tuning. Defaults to False. - - Note: - `c_salt` is a mandatory argument for setting up the Debye-Hückel electrostatic potential. - The calculated Bjerrum length is ouput to the log. If `c_salt` is provided, the calculated Debye screening length is also output to the log. - Currently, the only supported electrostatic methods are P3M ("p3m") and Debye-Hückel ("dh"). + units (`pint.UnitRegistry`): + Unit registry for handling physical units. + + espresso_system (`espressomd.system.System`): + system object of espressomd library. + + kT (`pint.Quantity`): + Thermal energy. + + c_salt (`pint.Quantity`): + Added salt concentration. If provided, the program outputs the debye screening length. It is a mandatory parameter for the Debye-Hückel method. + + solvent_permittivity (`float`): + Solvent relative permittivity. Defaults to 78.5, correspoding to its value in water at 298.15 K. + + method (`str`): + Method for computing electrostatic interactions. Defaults to "p3m". + + tune_p3m (`bool`): + If True, tunes P3M parameters for efficiency. Defaults to True. + + accuracy (`float`): + Desired accuracy for electrostatics. Defaults to 1e-3. + + params (`dict`): + Additional parameters for the electrostatic method. For P3M, it can include 'mesh', 'alpha', 'cao' and `r_cut`. For Debye-Hückel, it can include 'r_cut'. + + verbose (`bool`): + If True, enables verbose output for P3M tuning. Defaults to False. + + Notes: + - `c_salt` is a mandatory argument for setting up the Debye-Hückel electrostatic potential. + - The calculated Bjerrum length is ouput to the log. If `c_salt` is provided, the calculated Debye screening length is also output to the log. + - Currently, the only supported electrostatic methods are P3M ("p3m") and Debye-Hückel ("dh"). """ import espressomd.electrostatics import espressomd.version diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index b4ff9f1..7da6225 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -344,34 +344,32 @@ def _create_hydrogel_node(self, node_index, node_name, espresso_system): self.lattice_builder.nodes[key] = node_name return node_position.tolist(), p_id[0] - def _get_espresso_bond_instance(self, bond_template, espresso_system, use_default_bond=False): - """ - Retrieve or create a bond instance in an ESPResSo system for a given pair of particle names. + def _get_espresso_bond_instance(self, bond_template, espresso_system): + """ + Retrieve or create a bond instance in an ESPResSo system for a given pair of particle names. - Args: - bond_template ('BondTemplate'): - BondTemplate object from the pyMBE database. - espresso_system ('espressomd.system.System'): - An ESPResSo system object where the bond will be added or retrieved. - use_default_bond (bool, optional): If True, use a default bond template when no - specific template exists for the particle pair. Defaults to False. + Args: + bond_template ('BondTemplate'): + BondTemplate object from the pyMBE database. + espresso_system ('espressomd.system.System'): + An ESPResSo system object where the bond will be added or retrieved. - Returns: - ('espressomd.interactions.BondedInteraction'): - The ESPResSo bond instance object. + Returns: + ('espressomd.interactions.BondedInteraction'): + The ESPResSo bond instance object. - Notes: - When a new bond instance is created, it is not added to the ESPResSo system. - """ - if bond_template.name in self.db.espresso_bond_instances.keys(): - bond_inst = self.db.espresso_bond_instances[bond_template.name] - else: - # Create an instance of the bond - bond_inst = self._create_espresso_bond_instance(bond_type=bond_template.bond_type, - bond_parameters=bond_template.get_parameters(self.units)) - self.db.espresso_bond_instances[bond_template.name]= bond_inst - espresso_system.bonded_inter.add(bond_inst) - return bond_inst + Notes: + When a new bond instance is created, it is not added to the ESPResSo system. + """ + if bond_template.name in self.db.espresso_bond_instances.keys(): + bond_inst = self.db.espresso_bond_instances[bond_template.name] + else: + # Create an instance of the bond + bond_inst = self._create_espresso_bond_instance(bond_type=bond_template.bond_type, + bond_parameters=bond_template.get_parameters(self.units)) + self.db.espresso_bond_instances[bond_template.name]= bond_inst + espresso_system.bonded_inter.add(bond_inst) + return bond_inst def _get_label_id_map(self, pmb_type): """ @@ -2372,7 +2370,7 @@ def propose_unused_type(self): # Flatten all es_type values across all particles and states all_types = [] for es_type in type_map.values(): - all_types.append(es_type) + all_types.append(es_type) # If no es_types exist, start at 0 if not all_types: return 0 diff --git a/samples/Beyer2024/create_paper_data.py b/samples/Beyer2024/create_paper_data.py index 088405b..767aa1e 100644 --- a/samples/Beyer2024/create_paper_data.py +++ b/samples/Beyer2024/create_paper_data.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -158,8 +158,8 @@ pka_path=pmb.root / "parameters" / "pka_sets" / "Nozaki1967.json" pmb.load_pka_set (filename=pka_path) if fig_label == "7c": - par_path=pmb.root / "parameters" / "peptides" / "Blanco2021.json" - pmb.load_interaction_parameters(par_path) + par_path=pmb.root / "parameters" / "peptides" / "Blanco2021" + pmb.load_database(par_path) # Load ref data ref_data=analysis.read_csv_file(path=Path(__file__).parent / "data" / fig_data[fig_label]) @@ -172,7 +172,7 @@ model="1beadAA") pH_range_HH = np.linspace(2, 12, num=1000) - Z_HH = pmb.calculate_HH(molecule_name=sequence, + Z_HH = pmb.calculate_HH(template_name=sequence, pH_list=pH_range_HH) # Plot HH @@ -189,15 +189,15 @@ protein_pdb = '1beb' path_to_cg=pmb.root / "parameters" / "globular_proteins" / f"{protein_pdb}.vtf" - topology_dict = pmb.read_protein_vtf_in_df (filename=path_to_cg) - - pmb.define_protein (name=protein_pdb, - topology_dict=topology_dict, - model = '2beadAA') + topology_dict, sequence = pmb.read_protein_vtf(filename=path_to_cg) + + pmb.define_protein(name=protein_pdb, + sequence=sequence, + model = '2beadAA') pH_range_HH = np.linspace(2, 7, num=1000) - Z_HH = pmb.calculate_HH(molecule_name=protein_pdb, + Z_HH = pmb.calculate_HH(template_name=protein_pdb, pH_list=pH_range_HH) # Plot HH @@ -214,7 +214,7 @@ pmb.define_molecule(name='polyacid', residue_list=['rA']) pH_range = np.linspace(1.0, 13.0, num=1000) - Z_HH = pmb.calculate_HH(molecule_name='polyacid', pH_list=pH_range) + Z_HH = pmb.calculate_HH(template_name='polyacid', pH_list=pH_range) alpha_HH = np.abs(np.asarray(Z_HH)) HH_Donnan_charge_dict = pmb.calculate_HH_Donnan( diff --git a/samples/Beyer2024/globular_protein.py b/samples/Beyer2024/globular_protein.py index d821d1d..bc5e769 100644 --- a/samples/Beyer2024/globular_protein.py +++ b/samples/Beyer2024/globular_protein.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -57,28 +57,23 @@ action="store_true", default=False, help='Activates the motion of the protein') - parser.add_argument('--ideal', action="store_true", default=False, help='Sets up an ideal system without steric and electrostatic interactions ') - parser.add_argument('--mode', type=str, default= "short-run", choices=["short-run","long-run", "test"], help='sets for how long the simulation runs') - parser.add_argument('--output', type=Path, required= False, default=Path(__file__).parent / "time_series" / "globular_protein", help='output directory') - parser.add_argument('--no_verbose', action='store_false', help="Switch to deactivate verbose",default=True) - args = parser.parse_args () mode=args.mode verbose=args.no_verbose @@ -143,10 +138,10 @@ espresso_system.time_step=dt espresso_system.cell_system.skin=0.4 #Reads the VTF file of the protein model -topology_dict = pmb.read_protein_vtf_in_df (filename=args.path_to_cg) +topology_dict, sequence = pmb.read_protein_vtf (filename=args.path_to_cg) #Defines the protein in the pmb.df pmb.define_protein (name=protein_name, - topology_dict=topology_dict, + sequence=sequence, model = '2beadAA', lj_setup_mode = "wca") @@ -178,12 +173,14 @@ #Here we activate the motion of the protein if args.move_protein: - pmb.enable_motion_of_rigid_object(espresso_system=espresso_system, - name=protein_name) + pmb.enable_motion_of_rigid_object(instance_id=0, + pmb_type="protein", + espresso_system=espresso_system) # Here we put the protein on the center of the simulation box protein_id = pmb.df.loc[pmb.df['name']==protein_name].molecule_id.values[0] -pmb.center_molecule_in_simulation_box (molecule_id=protein_id, +pmb.center_object_in_simulation_box(instance_id=protein_id, + pmb_type="protein", espresso_system=espresso_system) if not args.ideal: @@ -197,11 +194,10 @@ dist = np.linalg.norm(dist) if dist > protein_radius: protein_radius = dist - - # Create counter-ions protein_net_charge = pmb.calculate_net_charge(espresso_system=espresso_system, - molecule_name=protein_name, + object_name=protein_name, + pmb_type="protein", dimensionless=True)["mean"] ## Get coordinates outside the volume occupied by the protein @@ -292,8 +288,7 @@ Z_sim=[] particle_id_list = pmb.df.loc[~pmb.df['molecule_id'].isna()].particle_id.dropna().to_list() -#Save the pyMBE dataframe in a CSV file -pmb.write_pmb_df (filename='df.csv') +pmb.save_database (folder=data_path/"database") #Here we start the main loop over the Nsamples @@ -304,8 +299,9 @@ time_series[label]=[] charge_dict=pmb.calculate_net_charge (espresso_system=espresso_system, - molecule_name=protein_name, - dimensionless=True) + object_name=protein_name, + pmb_type="protein", + dimensionless=True) net_charge_residues = charge_dict ['residues'] net_charge_amino_save = {} @@ -323,8 +319,9 @@ espresso_system.integrator.run (steps = integ_steps) do_reaction(cpH, steps=total_ionisable_groups) charge_dict=pmb.calculate_net_charge (espresso_system=espresso_system, - molecule_name=protein_name, - dimensionless=True) + object_name=protein_name, + pmb_type="protein", + dimensionless=True) charge_residues = charge_dict['residues'] charge_residues_per_type={} @@ -352,8 +349,6 @@ charge_amino = np.mean(charge_residues_per_type[label]) time_series[label].append(charge_amino) - - data_path.mkdir(parents=True, exist_ok=True) time_series=pd.DataFrame(time_series) diff --git a/samples/Beyer2024/peptide.py b/samples/Beyer2024/peptide.py index cde5d20..e767baa 100644 --- a/samples/Beyer2024/peptide.py +++ b/samples/Beyer2024/peptide.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -77,9 +77,9 @@ raise ValueError(f"ERROR: the only valid peptide sequence for this test script are {valid_sequences}") if sequence in Lunkad_test_sequences: - path_to_interactions=pmb.root / "parameters" / "peptides" / "Lunkad2021.json" + path_to_interactions=pmb.root / "parameters" / "peptides" / "Lunkad2021" path_to_pka=pmb.root / "parameters" / "pka_sets" / "CRC1991.json" - pmb.load_interaction_parameters(filename=path_to_interactions) + pmb.load_database(folder=path_to_interactions) pmb.load_pka_set(filename=path_to_pka) model = '2beadAA' # Model with 2 beads per each aminoacid N_peptide_chains = 4 @@ -91,7 +91,7 @@ elif sequence in Blanco_test_sequence: pmb.set_reduced_units(unit_length=0.4*pmb.units.nm) - pmb.load_interaction_parameters (pmb.root / "parameters" / "peptides" / "Blanco2021.json") + pmb.load_database (pmb.root / "parameters" / "peptides" / "Blanco2021") pmb.load_pka_set (pmb.root / "parameters" / "pka_sets" / "Nozaki1967.json") model = '1beadAA' N_peptide_chains = 1 @@ -148,8 +148,7 @@ espresso_system=espressomd.System (box_l = [L.to('reduced_length').magnitude]*3) espresso_system.time_step=dt espresso_system.cell_system.skin=0.4 -# Add all bonds to espresso system -pmb.add_bonds_to_espresso(espresso_system=espresso_system) + # Create your molecules into the espresso system pmb.create_molecule(name=sequence, @@ -219,8 +218,9 @@ do_reaction(cpH, steps=len(sequence)) # Sample observables charge_dict=pmb.calculate_net_charge(espresso_system=espresso_system, - molecule_name=sequence, - dimensionless=True) + object_name=sequence, + pmb_type="peptide", + dimensionless=True) Rg = espresso_system.analysis.calc_rg(chain_start=0, number_of_chains=N_peptide_chains, diff --git a/samples/Beyer2024/weak_polyelectrolyte_dialysis.py b/samples/Beyer2024/weak_polyelectrolyte_dialysis.py index 833e2ed..10d9af8 100644 --- a/samples/Beyer2024/weak_polyelectrolyte_dialysis.py +++ b/samples/Beyer2024/weak_polyelectrolyte_dialysis.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -174,9 +174,6 @@ espresso_system.cell_system.skin=0.4 if verbose: print("Created espresso object") - -# Add all bonds to espresso system -pmb.add_bonds_to_espresso(espresso_system=espresso_system) if verbose: print("Added bonds") @@ -289,11 +286,15 @@ time_series["time"].append(espresso_system.time) # Measure degree of ionization - charge_dict=pmb.calculate_net_charge(espresso_system=espresso_system, molecule_name=polyacid_name, dimensionless=True) + charge_dict=pmb.calculate_net_charge(espresso_system=espresso_system, + object_name=polyacid_name, + pmb_type="molecule", + dimensionless=True) time_series["alpha"].append(np.abs(charge_dict["mean"])/Chain_length) data_path = args.output -data_path.mkdir(parents=True, exist_ok=True) +data_path.mkdir(parents=True, + exist_ok=True) time_series=pd.DataFrame(time_series) filename=analysis.built_output_name(input_dict=inputs) diff --git a/samples/branched_polyampholyte.py b/samples/branched_polyampholyte.py index 49a3b15..23ac891 100644 --- a/samples/branched_polyampholyte.py +++ b/samples/branched_polyampholyte.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -81,43 +81,37 @@ # Define different particles # Inert particle -pmb.define_particle( - name = "I", - z = 0, - sigma = 1*pmb.units('reduced_length'), - epsilon = 1*pmb.units('reduced_energy')) +pmb.define_particle(name = "I", + z = 0, + sigma = 1*pmb.units('reduced_length'), + epsilon = 1*pmb.units('reduced_energy')) # Acidic particle -pmb.define_particle( - name = "A", - acidity = "acidic", - pka = 4, - sigma = 1*pmb.units('reduced_length'), - epsilon = 1*pmb.units('reduced_energy')) +pmb.define_particle(name = "A", + acidity = "acidic", + pka = 4, + sigma = 1*pmb.units('reduced_length'), + epsilon = 1*pmb.units('reduced_energy')) # Basic particle -pmb.define_particle( - name = "B", - acidity = "basic", - pka = 9, - sigma = 1*pmb.units('reduced_length'), - epsilon = 1*pmb.units('reduced_energy')) +pmb.define_particle(name = "B", + acidity = "basic", + pka = 9, + sigma = 1*pmb.units('reduced_length'), + epsilon = 1*pmb.units('reduced_energy')) # Define different residues -pmb.define_residue( - name = "Res_1", - central_bead = "I", - side_chains = ["A","B"]) +pmb.define_residue(name = "Res_1", + central_bead = "I", + side_chains = ["A","B"]) -pmb.define_residue( - name = "Res_2", - central_bead = "I", - side_chains = ["Res_1"]) +pmb.define_residue(name = "Res_2", + central_bead = "I", + side_chains = ["Res_1"]) # Define the molecule -pmb.define_molecule( - name = "polyampholyte", - residue_list = 2*["Res_1"] + ["Res_2"] + 2*["Res_1"] + 2*["Res_2"]) +pmb.define_molecule(name = "polyampholyte", + residue_list = 2*["Res_1"] + ["Res_2"] + 2*["Res_1"] + 2*["Res_2"]) # Define bonds bond_type = 'harmonic' @@ -125,9 +119,7 @@ generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') harmonic_bond = {'r_0' : generic_bond_length, - 'k' : generic_harmonic_constant, - } - + 'k' : generic_harmonic_constant} pmb.define_default_bond(bond_type = bond_type, bond_parameters = harmonic_bond) @@ -154,8 +146,6 @@ espresso_system=espressomd.System(box_l = [L.to('reduced_length').magnitude]*3) espresso_system.time_step=dt espresso_system.cell_system.skin=0.4 -# Add all bonds to espresso system -pmb.add_bonds_to_espresso(espresso_system=espresso_system) # Create your molecules into the espresso system pmb.create_molecule(name="polyampholyte", @@ -227,8 +217,8 @@ tune_skin=False) espresso_system.cell_system.skin=0.4 -#Save the pyMBE dataframe in a CSV file -pmb.write_pmb_df (filename='df.csv') +#Save the pyMBE database +pmb.save_database (folder=args.output / 'database') # Main loop for performing simulations at different pH-values time_series={} @@ -242,9 +232,9 @@ do_reaction(cpH, steps=total_ionisable_groups) # Get polyampholyte net charge charge_dict=pmb.calculate_net_charge(espresso_system=espresso_system, - molecule_name="polyampholyte", + object_name="polyampholyte", + pmb_type="molecule", dimensionless=True) - time_series["time"].append(espresso_system.time) time_series["charge"].append(charge_dict["mean"]) if step % N_samples_print == 0: @@ -252,10 +242,9 @@ with open(frames_path / f"trajectory{N_frame}.vtf", mode='w+t') as coordinates: vtf.writevsf(espresso_system, coordinates) vtf.writevcf(espresso_system, coordinates) - # Store time series data_path=args.output data_path.mkdir(parents=True, exist_ok=True) time_series=pd.DataFrame(time_series) filename=built_output_name(input_dict={"pH":pH_value}) -time_series.to_csv(data_path / f"{filename}_time_series.csv", index=False) +time_series.to_csv(data_path / f"{filename}_time_series.csv", index=False) \ No newline at end of file diff --git a/samples/peptide_cpH.py b/samples/peptide_cpH.py index 1461c4e..3209644 100644 --- a/samples/peptide_cpH.py +++ b/samples/peptide_cpH.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -94,9 +94,9 @@ # Load peptide parametrization from Lunkad, R. et al. Molecular Systems Design & Engineering (2021), 6(2), 122-131. -path_to_interactions=pmb.root / "parameters" / "peptides" / "Lunkad2021.json" +path_to_interactions=pmb.root / "parameters" / "peptides" / "Lunkad2021" path_to_pka=pmb.root / "parameters" / "pka_sets" / "Hass2015.json" -pmb.load_interaction_parameters (filename=path_to_interactions) +pmb.load_database(folder=path_to_interactions) pmb.load_pka_set (path_to_pka) generic_bond_length=0.4 * pmb.units.nm @@ -129,8 +129,6 @@ espresso_system=espressomd.System (box_l = [L.to('reduced_length').magnitude]*3) espresso_system.time_step=dt espresso_system.cell_system.skin=0.4 -# Add all bonds to espresso system -pmb.add_bonds_to_espresso(espresso_system=espresso_system) # Create your molecules into the espresso system pmb.create_molecule(name=peptide_name, @@ -209,8 +207,8 @@ # for this example, we use a hard-coded skin value; In general it should be optimized by tuning espresso_system.cell_system.skin=0.4 -#Save the pyMBE dataframe in a CSV file -pmb.write_pmb_df(filename='df.csv') +#Save the pyMBE database +pmb.save_database(folder=args.output/'database') # Initialize the time series with arbitrary values at time = 0 time_series={} # for convenience, here we save the whole time series in a python dictionary @@ -221,16 +219,15 @@ # Main loop for performing simulations at different pH-values N_frame=0 for sample in tqdm.trange(N_samples): - # LD sampling of the configuration space espresso_system.integrator.run(steps=MD_steps_per_sample) # cpH sampling of the reaction space do_reaction(cpH, steps=total_ionisable_groups) # rule of thumb: one reaction step per titratable group (on average) - # Get peptide net charge charge_dict=pmb.calculate_net_charge(espresso_system=espresso_system, - molecule_name=peptide_name, - dimensionless=True) + object_name=peptide_name, + pmb_type="peptide", + dimensionless=True) time_series["time"].append(espresso_system.time) time_series["charge"].append(charge_dict["mean"]) if sample % N_samples_print == 0: @@ -240,12 +237,10 @@ vtf.writevcf(espresso_system, coordinates) # Store time series - data_path=args.output data_path.mkdir(parents=True, exist_ok=True) time_series=pd.DataFrame(time_series) filename=built_output_name(input_dict={"sequence":sequence,"pH":pH_value}) - time_series.to_csv(data_path / f"{filename}_time_series.csv", index=False) diff --git a/samples/peptide_mixture_grxmc_ideal.py b/samples/peptide_mixture_grxmc_ideal.py index cc96316..06915c0 100644 --- a/samples/peptide_mixture_grxmc_ideal.py +++ b/samples/peptide_mixture_grxmc_ideal.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -169,9 +169,6 @@ # Create an instance of an espresso system espresso_system=espressomd.System (box_l = [L.to('reduced_length').magnitude]*3) -# Add all bonds to espresso system -pmb.add_bonds_to_espresso(espresso_system=espresso_system) - # Create your molecules into the espresso system pmb.create_molecule(name=peptide1, number_of_molecules=N_peptide1_chains, @@ -265,8 +262,8 @@ espresso_system.thermostat.set_langevin(kT=pmb.kT.to('reduced_energy').magnitude, gamma=0.1, seed=LANGEVIN_SEED) espresso_system.cell_system.skin=0.4 -#Save the pyMBE dataframe in a CSV file -pmb.write_pmb_df (filename='df.csv') +#Save the pyMBE database +pmb.save_database(folder=args.output / 'database') time_series={} for label in ["time","charge_peptide1","charge_peptide2","num_plus","xi_plus"]: time_series[label]=[] @@ -279,11 +276,13 @@ time_series["time"].append(espresso_system.time) # Get net charge of peptide1 and peptide2 charge_dict_peptide1=pmb.calculate_net_charge(espresso_system=espresso_system, - molecule_name=peptide1, - dimensionless=True) + object_name=peptide1, + pmb_type="peptide", + dimensionless=True) charge_dict_peptide2=pmb.calculate_net_charge(espresso_system=espresso_system, - molecule_name=peptide2, - dimensionless=True) + object_name=peptide2, + pmb_type="peptide", + dimensionless=True) time_series["charge_peptide1"].append(charge_dict_peptide1["mean"]) time_series["charge_peptide2"].append(charge_dict_peptide2["mean"]) if args.mode == 'standard': diff --git a/samples/plot_branched_polyampholyte.py b/samples/plot_branched_polyampholyte.py index 65bd4c6..6dd8fd0 100644 --- a/samples/plot_branched_polyampholyte.py +++ b/samples/plot_branched_polyampholyte.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -85,7 +85,7 @@ pH_range_HH = np.linspace(2, 12, num=100) elif args.mode == "store_HH": pH_range_HH = [3.5,4.5,8.5,9.5] -Z_HH = pmb.calculate_HH(molecule_name="polyampholyte", +Z_HH = pmb.calculate_HH(template_name="polyampholyte", pH_list=pH_range_HH) if args.mode == "plot": diff --git a/samples/plot_peptide_cpH.py b/samples/plot_peptide_cpH.py index 46943ea..22ac3ac 100644 --- a/samples/plot_peptide_cpH.py +++ b/samples/plot_peptide_cpH.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -66,8 +66,8 @@ pH_range_HH = np.linspace(2, 12, num=100) elif args.mode == "store_HH": pH_range_HH = [2,4,5,6] -Z_HH = pmb.calculate_HH(molecule_name=peptide, - pH_list=pH_range_HH) +Z_HH = pmb.calculate_HH(template_name=peptide, + pH_list=pH_range_HH) if args.mode == "plot": # Read the analyzed data produced with peptide_mixture_grxmc_ideal diff --git a/testsuite/define_and_create_molecules_unit_tests.py b/testsuite/define_and_create_molecules_unit_tests.py index fc8699e..274e40e 100644 --- a/testsuite/define_and_create_molecules_unit_tests.py +++ b/testsuite/define_and_create_molecules_unit_tests.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024-2025 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -19,7 +19,6 @@ # Import pyMBE and other libraries import pyMBE import numpy as np -import pandas as pd import espressomd import unittest as ut @@ -218,7 +217,6 @@ def test_create_and_delete_particles(self): bonded_in_espresso = False for pid in bonded_pair: for bond in espresso_system.part.by_id(pid).bonds[:]: - bond_object = bond[0] partner_id = bond[1] if partner_id in bonded_pair: bonded_in_espresso=True diff --git a/testsuite/set_particle_acidity_test.py b/testsuite/set_particle_acidity_test.py index 6adfdc2..2ce9c8e 100644 --- a/testsuite/set_particle_acidity_test.py +++ b/testsuite/set_particle_acidity_test.py @@ -17,7 +17,6 @@ # along with this program. If not, see . # Import pyMBE and other libraries -import numpy as np import pandas as pd import pyMBE import unittest as ut diff --git a/testsuite/test_io_database.py b/testsuite/test_io_database.py index e99a6b5..4efe2cd 100644 --- a/testsuite/test_io_database.py +++ b/testsuite/test_io_database.py @@ -28,7 +28,6 @@ from pyMBE.storage.io import _decode, _encode, _load_database_csv, _save_database_csv from pyMBE.storage.pint_quantity import PintQuantity from pyMBE.storage.instances.bond import BondInstance -from pyMBE.storage.templates.bond import BondTemplate from pathlib import Path import csv @@ -558,7 +557,7 @@ def test_io_instances(self): pmb.db.delete_templates(pmb_type="residue") pmb.db.delete_reactions() # Test instances of a protein (tests protein, residue and particle instances) - path_to_protein_structure = pmb.root / "parameters" / "globular_proteins" / f"1beb.vtf", + path_to_protein_structure = pmb.root / "parameters" / "globular_proteins" / "1beb.vtf" topology_dict, sequence = pmb.read_protein_vtf (filename=path_to_protein_structure[0]) pmb.load_pka_set(filename=path_to_pka) # Define AA particles and residues From 9fd511ffbf201a380a770dd6ea7cbb0d16235916 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Fri, 6 Feb 2026 12:20:05 +0100 Subject: [PATCH 47/55] fix bug --- pyMBE/pyMBE.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index 7da6225..a050985 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -822,8 +822,7 @@ def create_bond(self, particle_id1, particle_id2, espresso_system, use_default_b particle_name2=particle_inst_2.name, use_default_bond=use_default_bond) bond_inst = self._get_espresso_bond_instance(bond_template=bond_tpl, - espresso_system=espresso_system, - use_default_bond=use_default_bond) + espresso_system=espresso_system) espresso_system.part.by_id(particle_id1).add_bond((bond_inst, particle_id2)) bond_id = self.db._propose_instance_id(pmb_type="bond") pmb_bond_instance = BondInstance(bond_id=bond_id, From 281ea6f0be48001def24d6c9e5b3113f585f5642 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Fri, 6 Feb 2026 12:30:02 +0100 Subject: [PATCH 48/55] solve dependency issues on the CI --- pyMBE/storage/manager.py | 4 ++-- requirements.txt | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index f333784..dc1cc51 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -807,8 +807,8 @@ def get_instance(self, pmb_type, instance_id): pmb_type ('str'): The instance pyMBE category. - name ('str'): - The unique name of the template to retrieve. + instance_id ('int'): + The unique id identifying the given instance Returns: ('InstanceType'): diff --git a/requirements.txt b/requirements.txt index ec349a9..d2ac082 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ biopandas==0.5.1.dev0 scipy>=1.8.0 matplotlib>=3.5.1 pydantic>=2.12.5 +typing-extensions>=4.10 # soft dependencies to run the samples tqdm>=4.57.0 # soft dependencies to run the testsuite From 40694734e915e89b573d2117669bc74aa4eeacf7 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Fri, 6 Feb 2026 12:37:46 +0100 Subject: [PATCH 49/55] fix new bug in unit test --- samples/branched_polyampholyte.py | 11 +++++++++++ testsuite/test_io_database.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/samples/branched_polyampholyte.py b/samples/branched_polyampholyte.py index 23ac891..df5b04f 100644 --- a/samples/branched_polyampholyte.py +++ b/samples/branched_polyampholyte.py @@ -163,6 +163,17 @@ c_salt=c_salt) #List of ionisable groups +# collect ionisable particles* +acidbase_templates = [] +for state_name, state_tpl in pmb.db._templates["particle"].items(): + if state_tpl.acidity in ["basic","acidic"]: + acidbase_templates+=pmb.db.get_particle_templates_under(template_name=state_tpl,pmb_type="particle_state") + +print(acidbase_templates) +total_ionisable_groups = len(list_ionisable_groups) + + + basic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='basic')].name.to_list() acidic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='acidic')].name.to_list() list_ionisable_groups = basic_groups + acidic_groups diff --git a/testsuite/test_io_database.py b/testsuite/test_io_database.py index 4efe2cd..2bf1cd7 100644 --- a/testsuite/test_io_database.py +++ b/testsuite/test_io_database.py @@ -558,7 +558,7 @@ def test_io_instances(self): pmb.db.delete_reactions() # Test instances of a protein (tests protein, residue and particle instances) path_to_protein_structure = pmb.root / "parameters" / "globular_proteins" / "1beb.vtf" - topology_dict, sequence = pmb.read_protein_vtf (filename=path_to_protein_structure[0]) + topology_dict, sequence = pmb.read_protein_vtf (filename=path_to_protein_structure) pmb.load_pka_set(filename=path_to_pka) # Define AA particles and residues hf.define_protein_AA_particles(topology_dict=topology_dict, From d2c9f9b04fb70a1eb122813886fd3e22d166d9ac Mon Sep 17 00:00:00 2001 From: pmblanco Date: Fri, 6 Feb 2026 18:14:08 +0100 Subject: [PATCH 50/55] lower requierements for pydantic to comply with current EESSI --- pyMBE/storage/instances/bond.py | 9 +- pyMBE/storage/instances/hydrogel.py | 9 +- pyMBE/storage/instances/molecule.py | 8 +- pyMBE/storage/instances/particle.py | 11 +- pyMBE/storage/instances/peptide.py | 7 +- pyMBE/storage/instances/protein.py | 7 +- pyMBE/storage/instances/residue.py | 10 +- pyMBE/storage/io.py | 14 +-- pyMBE/storage/manager.py | 68 +++++++------ pyMBE/storage/reactions/reaction.py | 150 +++++++++++++++++++++------- pyMBE/storage/templates/hydrogel.py | 12 ++- pyMBE/storage/templates/lj.py | 46 +++++++-- requirements.txt | 3 +- samples/branched_polyampholyte.py | 26 ++--- testsuite/database_unit_tests.py | 16 ++- testsuite/test_io_database.py | 6 +- 16 files changed, 260 insertions(+), 142 deletions(-) diff --git a/pyMBE/storage/instances/bond.py b/pyMBE/storage/instances/bond.py index 0a981c7..65320fc 100644 --- a/pyMBE/storage/instances/bond.py +++ b/pyMBE/storage/instances/bond.py @@ -18,7 +18,7 @@ # from pyMBE.storage.base_type import PMBBaseModel -from pydantic import field_validator +from pydantic import validator class BondInstance(PMBBaseModel): """ @@ -53,10 +53,9 @@ class BondInstance(PMBBaseModel): particle_id1: int particle_id2: int - @field_validator("bond_id", "particle_id1", "particle_id2") - @classmethod - def validate_non_negative_int(cls, value, info): + @validator("bond_id", "particle_id1", "particle_id2") + def validate_non_negative_int(cls, value, field): if value < 0: - raise ValueError(f"{info.field_name} must be a non-negative integer.") + raise ValueError(f"{field.name} must be a non-negative integer.") return value diff --git a/pyMBE/storage/instances/hydrogel.py b/pyMBE/storage/instances/hydrogel.py index e03f173..ea48168 100644 --- a/pyMBE/storage/instances/hydrogel.py +++ b/pyMBE/storage/instances/hydrogel.py @@ -17,11 +17,8 @@ # along with this program. If not, see . # -from typing import List -from pydantic import Field from ..base_type import PMBBaseModel -from pydantic import field_validator - +from pydantic import validator class HydrogelInstance(PMBBaseModel): """ @@ -42,10 +39,10 @@ class HydrogelInstance(PMBBaseModel): hydrogel exists in the system), not a template describing generic hydrogel types. """ - pmb_type: str = Field(default="hydrogel", frozen=True) + pmb_type: str = "hydrogel" assembly_id: int name: str - @field_validator("assembly_id") + @validator("assembly_id") def validate_bond_id(cls, aid): if aid < 0: raise ValueError("assembly_id must be a non-negative integer.") diff --git a/pyMBE/storage/instances/molecule.py b/pyMBE/storage/instances/molecule.py index 66fe0a9..2eabef7 100644 --- a/pyMBE/storage/instances/molecule.py +++ b/pyMBE/storage/instances/molecule.py @@ -18,8 +18,8 @@ # from pyMBE.storage.base_type import PMBBaseModel -from pydantic import field_validator - +from pydantic import validator +from typing import Optional class MoleculeInstance(PMBBaseModel): """ @@ -46,9 +46,9 @@ class MoleculeInstance(PMBBaseModel): pmb_type: str = "molecule" name: str # molecule template name molecule_id: int - assembly_id: int | None = None + assembly_id: Optional[int] = None - @field_validator("molecule_id") + @validator("molecule_id") def validate_residue_id(cls, mid): if mid < 0: raise ValueError("molecule_id must be a non-negative integer.") diff --git a/pyMBE/storage/instances/particle.py b/pyMBE/storage/instances/particle.py index 8fd04e0..2819ba3 100644 --- a/pyMBE/storage/instances/particle.py +++ b/pyMBE/storage/instances/particle.py @@ -17,7 +17,8 @@ # along with this program. If not, see . # -from pydantic import field_validator +from typing import Optional +from pydantic import validator from ..base_type import PMBBaseModel @@ -55,11 +56,11 @@ class ParticleInstance(PMBBaseModel): name: str particle_id: int initial_state: str - residue_id: int | None = None - molecule_id: int | None = None - assembly_id: int | None = None + residue_id: Optional[int] = None + molecule_id: Optional[int] = None + assembly_id: Optional[int] = None - @field_validator("particle_id") + @validator("particle_id") def validate_particle_id(cls, pid): if pid < 0: raise ValueError("particle_id must be a non-negative integer.") diff --git a/pyMBE/storage/instances/peptide.py b/pyMBE/storage/instances/peptide.py index fa8c4c3..7566476 100644 --- a/pyMBE/storage/instances/peptide.py +++ b/pyMBE/storage/instances/peptide.py @@ -18,7 +18,8 @@ # from pyMBE.storage.base_type import PMBBaseModel -from pydantic import field_validator +from pydantic import validator +from typing import Optional class PeptideInstance(PMBBaseModel): @@ -45,9 +46,9 @@ class PeptideInstance(PMBBaseModel): pmb_type: str = "peptide" name: str # molecule template name molecule_id: int - assembly_id: int | None = None + assembly_id: Optional[int] = None - @field_validator("molecule_id") + @validator("molecule_id") def validate_residue_id(cls, mid): if mid < 0: raise ValueError("molecule_id must be a non-negative integer.") diff --git a/pyMBE/storage/instances/protein.py b/pyMBE/storage/instances/protein.py index 73a79d2..454f8a3 100644 --- a/pyMBE/storage/instances/protein.py +++ b/pyMBE/storage/instances/protein.py @@ -18,7 +18,8 @@ # from pyMBE.storage.base_type import PMBBaseModel -from pydantic import field_validator +from pydantic import validator +from typing import Optional class ProteinInstance(PMBBaseModel): @@ -46,9 +47,9 @@ class ProteinInstance(PMBBaseModel): pmb_type: str = "protein" name: str # molecule template name molecule_id: int - assembly_id: int | None = None + assembly_id: Optional[int] = None - @field_validator("molecule_id") + @validator("molecule_id") def validate_residue_id(cls, mid): if mid < 0: raise ValueError("molecule_id must be a non-negative integer.") diff --git a/pyMBE/storage/instances/residue.py b/pyMBE/storage/instances/residue.py index 97e3be5..7a15644 100644 --- a/pyMBE/storage/instances/residue.py +++ b/pyMBE/storage/instances/residue.py @@ -18,8 +18,8 @@ # from pyMBE.storage.base_type import PMBBaseModel -from pydantic import field_validator - +from pydantic import validator +from typing import Optional class ResidueInstance(PMBBaseModel): """ @@ -49,10 +49,10 @@ class ResidueInstance(PMBBaseModel): pmb_type: str = "residue" name: str # residue template name residue_id: int - molecule_id: int | None = None - assembly_id: int | None = None + molecule_id: Optional[int] = None + assembly_id: Optional[int] = None - @field_validator("residue_id") + @validator("residue_id") def validate_residue_id(cls, rid): if rid < 0: raise ValueError("residue_id must be a non-negative integer.") diff --git a/pyMBE/storage/io.py b/pyMBE/storage/io.py index a316cd6..7082652 100644 --- a/pyMBE/storage/io.py +++ b/pyMBE/storage/io.py @@ -404,8 +404,8 @@ def _save_database_csv(db, folder): # HYDROGEL TEMPLATE elif pmb_type == "hydrogel" and isinstance(tpl, HydrogelTemplate): rows.append({"name": tpl.name, - "node_map": _encode([node.model_dump() for node in tpl.node_map]), - "chain_map": _encode([chain.model_dump() for chain in tpl.chain_map])}) + "node_map": _encode([node.dict() for node in tpl.node_map]), + "chain_map": _encode([chain.dict() for chain in tpl.chain_map])}) # LJ TEMPLATE elif pmb_type == "lj" and isinstance(tpl, LJInteractionTemplate): rows.append({"name": tpl.name, @@ -417,9 +417,9 @@ def _save_database_csv(db, folder): "offset": _encode(tpl.offset), "shift": _encode(tpl.shift)}) else: - # Generic fallback: try model_dump() + # Generic fallback: try dict() try: - rows.append(tpl.model_dump()) + rows.append(tpl.dict()) except Exception: rows.append({"name": getattr(tpl, "name", None)}) @@ -470,9 +470,9 @@ def _save_database_csv(db, folder): "name": inst.name, "assembly_id": int(inst.assembly_id)}) else: - # fallback to model_dump + # fallback to dict try: - rows.append(inst.model_dump()) + rows.append(inst.dict()) except Exception: rows.append({"name": getattr(inst, "name", None)}) @@ -483,7 +483,7 @@ def _save_database_csv(db, folder): rows = [] for rx in db._reactions.values(): rows.append({"name": rx.name, - "participants": _encode([p.model_dump() for p in rx.participants]), + "participants": _encode([p.dict() for p in rx.participants]), "pK": rx.pK if hasattr(rx, "pK") else None, "reaction_type": rx.reaction_type, "metadata": _encode(rx.metadata) if getattr(rx, "metadata", None) is not None else ""}) diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index dc1cc51..25bb351 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -196,35 +196,7 @@ def _find_instance_ids_by_attribute(self, pmb_type, attribute, value): results.append(inst_id) return results - def _find_instance_ids_by_name(self, pmb_type, name): - """ - Return the IDs of all instances of a given pyMBE type that use a - specific template name. - - Args: - pmb_type ('str'): - The instance category to search within. - - name ('str'): - The template name associated with the instances of interest. - - Returns: - ('list[int]'): - A list of instance IDs whose underlying template name matches - ``name``. The list is empty if no such instances exist. - - Notes: - - Only exact name matches are considered. - - This method does not validate whether the corresponding template - actually exists; it only inspects registered *instances*. - """ - if pmb_type not in self._instances: - return [] - result = [] - for iid, inst in self._instances[pmb_type].items(): - if hasattr(inst, "name") and inst.name == name: - result.append(iid) - return result + def _find_template_types(self, name): """ @@ -294,7 +266,7 @@ def _get_instances_df(self, pmb_type): "assembly_id": int(inst.assembly_id) if inst.assembly_id is not None else pd.NA}) else: # Generic representation for other types - rows.append(inst.model_dump()) + rows.append(inst.dict()) return pd.DataFrame(rows) def _get_reactions_df(self): @@ -378,7 +350,7 @@ def _get_templates_df(self, pmb_type): "parameters": parameters}) else: # Generic representation for other types - rows.append(tpl.model_dump()) + rows.append(tpl.dict()) return pd.DataFrame(rows) def _has_instance(self, pmb_type, instance_id): @@ -517,7 +489,7 @@ def _update_instance(self, instance_id, pmb_type, attribute, value): * ``molecule``: ``assembly_id`` * All other types: no attribute updates allowed. - The method replaces the instance with a new Pydantic model - using ``model_copy(update=...)`` to maintain immutability and + using ``copy(update=...)`` to maintain immutability and avoid partial mutations of internal state. """ if instance_id not in self._instances[pmb_type]: @@ -532,7 +504,7 @@ def _update_instance(self, instance_id, pmb_type, attribute, value): allowed = [None] # No attributes allowed for other types if attribute not in allowed: raise ValueError(f"Attribute '{attribute}' not allowed for {pmb_type}. Allowed attributes: {allowed}") - self._instances[pmb_type][instance_id] = self._instances[pmb_type][instance_id].model_copy(update={attribute: value}) + self._instances[pmb_type][instance_id] = self._instances[pmb_type][instance_id].copy(update={attribute: value}) def _propagate_id(self, root_type, root_id, attribute, value): """ @@ -799,6 +771,36 @@ def delete_templates(self, pmb_type): self.delete_template(pmb_type=pmb_type, name=template) + def find_instance_ids_by_name(self, pmb_type, name): + """ + Return the IDs of all instances of a given pyMBE type that use a + specific template name. + + Args: + pmb_type ('str'): + The instance category to search within. + + name ('str'): + The template name associated with the instances of interest. + + Returns: + ('list[int]'): + A list of instance IDs whose underlying template name matches + ``name``. The list is empty if no such instances exist. + + Notes: + - Only exact name matches are considered. + - This method does not validate whether the corresponding template + actually exists; it only inspects registered *instances*. + """ + if pmb_type not in self._instances: + return [] + result = [] + for iid, inst in self._instances[pmb_type].items(): + if hasattr(inst, "name") and inst.name == name: + result.append(iid) + return result + def get_instance(self, pmb_type, instance_id): """ Retrieve a stored instance by type and instance_id. diff --git a/pyMBE/storage/reactions/reaction.py b/pyMBE/storage/reactions/reaction.py index 325108a..fe33f69 100644 --- a/pyMBE/storage/reactions/reaction.py +++ b/pyMBE/storage/reactions/reaction.py @@ -18,8 +18,7 @@ # from typing import List, Dict, Optional -from pydantic import BaseModel, Field, field_validator, model_validator - +from pydantic import BaseModel, validator, root_validator class ReactionParticipant(BaseModel): """ @@ -82,18 +81,74 @@ class Reaction(BaseModel): simulation_method: Optional[str] = None name: Optional[str] = None - @model_validator(mode="after") - def generate_name(self): - """Automatically generate reaction name from participants.""" + @validator("participants") + def at_least_two_participants(cls, v): + """ + Ensures that the reaction contains at least two participants. + + Args: + v ('List[ReactionParticipant]'): + List of reaction participants. + + Returns: + ('List[ReactionParticipant]'): + The validated list of participants. + + Raises: + ValueError: + If fewer than two participants are provided. + """ + if len(v) < 2: + raise ValueError("A reaction must have at least 2 participants.") + return v + + @validator("participants") + def no_zero_coeff(cls, v): + """ + Ensures that no participant has a zero stoichiometric coefficient. + + Args: + v ('List[ReactionParticipant]'): + List of reaction participants. + + Returns: + ('List[ReactionParticipant]'): + The validated list of participants. + + Raises: + ValueError: + If any participant has a coefficient equal to zero. + """ + for p in v: + if p.coefficient == 0: + raise ValueError( + f"Participant {p.state_name} has coefficient 0." + ) + return v + + @root_validator + def generate_name(cls, values): + """ + Automatically generates a reaction name from the participants. + + The name is constructed by separating reactants and products + based on the sign of their stoichiometric coefficients and + joining them with a reversible reaction symbol. + + Returns: + ('dict'): + Updated model values including the generated reaction name. + """ + participants = values.get("participants", []) + reactants = [] products = [] - for p in self.participants: - species = f"{p.state_name}" + for p in participants: if p.coefficient < 0: - reactants.append(species) + reactants.append(p.state_name) else: - products.append(species) + products.append(p.state_name) reactants = sorted(reactants) products = sorted(products) @@ -101,50 +156,73 @@ def generate_name(self): left = " + ".join(reactants) right = " + ".join(products) - # reversible reaction symbol - self.name = f"{left} <-> {right}" - return self + values["name"] = f"{left} <-> {right}" + return values - @field_validator("participants") - def at_least_two_participants(cls, v): - if len(v) < 2: - raise ValueError("A reaction must have at least 2 participants.") - return v - - @field_validator("participants") - def no_zero_coeff(cls, v): - for p in v: - if p.coefficient == 0: - raise ValueError(f"Participant {p.state_name} has coefficient 0.") - return v + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ def add_participant(self, particle_name, state_name, coefficient): """ - Add a new reaction participant to the reaction. + Adds a new participant to the reaction. Args: particle_name ('str'): - Name of the particle participating in the reaction. + Name of the particle template. + state_name ('str'): - Specific state of the particle. + Name of the particle state. + coefficient ('int'): - Stoichiometric coefficient for the participant. + Stoichiometric coefficient of the participant. + Must be non-zero. + Raises: + ValueError: + If the coefficient is zero. """ if coefficient == 0: raise ValueError("Stoichiometric coefficient cannot be zero.") - new_participant = ReactionParticipant(particle_name=particle_name, - state_name=state_name, - coefficient=coefficient) + + new_participant = ReactionParticipant( + particle_name=particle_name, + state_name=state_name, + coefficient=coefficient, + ) self.participants.append(new_participant) - self.generate_name() - - + + # Explicitly regenerate name after mutation + self.name = self._generate_name_from_participants() + + def _generate_name_from_participants(self): + """ + Generates a reaction name from the current list of participants. + + Returns: + ('str'): + Reaction name in the format ``A + B <-> C + D``. + """ + reactants = [] + products = [] + + for p in self.participants: + if p.coefficient < 0: + reactants.append(p.state_name) + else: + products.append(p.state_name) + + reactants.sort() + products.sort() + + return f"{' + '.join(reactants)} <-> {' + '.join(products)}" + def add_simulation_method(self, simulation_method): """ - Adds which simulation is used to simulate the reaction + Sets the simulation method used to model the reaction. Args: - simulation_method ('str'): label of the simulation method + simulation_method ('str'): + Label identifying the simulation method. """ self.simulation_method = simulation_method \ No newline at end of file diff --git a/pyMBE/storage/templates/hydrogel.py b/pyMBE/storage/templates/hydrogel.py index 340e1df..6bbdb98 100644 --- a/pyMBE/storage/templates/hydrogel.py +++ b/pyMBE/storage/templates/hydrogel.py @@ -18,7 +18,7 @@ # from typing import List -from pydantic import Field, BaseModel +from pydantic import Field, BaseModel, validator from ..base_type import PMBBaseModel class HydrogelNode(BaseModel): @@ -34,6 +34,14 @@ class HydrogelNode(BaseModel): """ particle_name: str lattice_index: List[int] # must be length 3 + @validator("lattice_index", pre=True) + def coerce_lattice_index(cls, v): + # Accept tuple, list, numpy array, etc. + try: + v = list(v) + except TypeError: + raise ValueError("lattice_index must be an iterable of 3 integers") + return v class HydrogelChain(BaseModel): """ @@ -52,7 +60,7 @@ class HydrogelChain(BaseModel): molecule_name: str node_start: str node_end: str - + class HydrogelTemplate(PMBBaseModel): """ Template defining a hydrogel network in the pyMBE database. diff --git a/pyMBE/storage/templates/lj.py b/pyMBE/storage/templates/lj.py index 9c5c4f8..ccf6841 100644 --- a/pyMBE/storage/templates/lj.py +++ b/pyMBE/storage/templates/lj.py @@ -17,7 +17,7 @@ # along with this program. If not, see . # -from pydantic import BaseModel, Field, model_validator +from pydantic import BaseModel, Field, root_validator from ..pint_quantity import PintQuantity @@ -69,12 +69,44 @@ class LJInteractionTemplate(BaseModel): @classmethod def _make_name(cls, state1: str, state2: str) -> str: - """Create a canonical name from two states.""" + """ + Creates a canonical interaction name from two particle states. + + Args: + state1 ('str'): + Name of the first particle state. + + state2 ('str'): + Name of the second particle state. + + Returns: + ('str'): + Canonical interaction name in the form ``"A-B"``, + where ``A`` and ``B`` are sorted alphabetically. + """ s1, s2 = sorted([state1, state2]) return f"{s1}-{s2}" - @model_validator(mode="after") - def _auto_generate_name(self): - """Enforce standardized automatic name.""" - object.__setattr__(self, "name", self._make_name(self.state1, self.state2)) - return self + # ------------------------------------------------------------------ + # Validators + # ------------------------------------------------------------------ + + @root_validator + def _auto_generate_name(cls, values): + """ + Automatically generates and enforces a standardized interaction name. + + The name is derived from ``state1`` and ``state2`` and overrides + any manually provided value. + + Returns: + ('dict'): + Updated model values with the generated ``name`` field. + """ + state1 = values.get("state1") + state2 = values.get("state2") + + if state1 is not None and state2 is not None: + values["name"] = cls._make_name(state1, state2) + + return values diff --git a/requirements.txt b/requirements.txt index d2ac082..427f169 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,8 +5,7 @@ pint-pandas>=0.3 biopandas==0.5.1.dev0 scipy>=1.8.0 matplotlib>=3.5.1 -pydantic>=2.12.5 -typing-extensions>=4.10 +pydantic<2.0.0 # soft dependencies to run the samples tqdm>=4.57.0 # soft dependencies to run the testsuite diff --git a/samples/branched_polyampholyte.py b/samples/branched_polyampholyte.py index df5b04f..04e004d 100644 --- a/samples/branched_polyampholyte.py +++ b/samples/branched_polyampholyte.py @@ -164,29 +164,21 @@ #List of ionisable groups # collect ionisable particles* -acidbase_templates = [] -for state_name, state_tpl in pmb.db._templates["particle"].items(): - if state_tpl.acidity in ["basic","acidic"]: - acidbase_templates+=pmb.db.get_particle_templates_under(template_name=state_tpl,pmb_type="particle_state") - -print(acidbase_templates) -total_ionisable_groups = len(list_ionisable_groups) - - - -basic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='basic')].name.to_list() -acidic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='acidic')].name.to_list() -list_ionisable_groups = basic_groups + acidic_groups -total_ionisable_groups = len(list_ionisable_groups) +acidbase_particles = ["A","B"] +acid_base_ids = [] +for name in acidbase_particles: + acid_base_ids+=pmb.db.find_instance_ids_by_name(pmb_type="particle", + name=name) +total_ionisable_groups = len(acid_base_ids) if verbose: print(f"The box length of your system is {L.to('reduced_length')}, {L.to('nm')}") print(f"The polyampholyte concentration in your system is {calculated_polyampholyte_concentration.to('mol/L')} with {N_polyampholyte_chains} molecules") - print(f"The ionisable groups in your polyampholyte are {list_ionisable_groups}") -cpH, labels = pmb.setup_cpH(counter_ion=cation_name, constant_pH=pH_value) +cpH = pmb.setup_cpH(counter_ion=cation_name, constant_pH=pH_value) if verbose: - print(f"The acid-base reaction has been successfully set up for {labels}") + print("The acid-base reaction has been successfully set up for:") + print(pmb.get_reactions_df()) # Setup espresso to track the ionization of the acid/basic groups type_map = pmb.get_type_map() diff --git a/testsuite/database_unit_tests.py b/testsuite/database_unit_tests.py index fb77d41..8ff0426 100644 --- a/testsuite/database_unit_tests.py +++ b/testsuite/database_unit_tests.py @@ -28,6 +28,7 @@ from pyMBE.storage.instances.bond import BondInstance from pyMBE.storage.instances.hydrogel import HydrogelInstance from pyMBE.storage.templates.bond import BondTemplate +from pyMBE.storage.templates.hydrogel import HydrogelNode from pyMBE.storage.pint_quantity import PintQuantity from pyMBE.storage.reactions.reaction import Reaction, ReactionParticipant import pint @@ -35,6 +36,15 @@ class Test(ut.TestCase): + def test_sanity_hydrogel_node_template(self): + """ + Sanity test for HydrogelNode template validator + """ + inputs={"particle_name": "A", + "lattice_index": 1} + self.assertRaises(ValueError, + HydrogelNode, + **inputs) def test_sanity_database_methods(self): """ @@ -224,16 +234,16 @@ def test_find_instance_ids(self): value=0) self.assertEqual(instance_ids_m1, [0,1,2,3]) - instance_ids_by_name_A = pmb.db._find_instance_ids_by_name(pmb_type="particle", + instance_ids_by_name_A = pmb.db.find_instance_ids_by_name(pmb_type="particle", name="A") - instance_ids_by_name_B = pmb.db._find_instance_ids_by_name(pmb_type="particle", + instance_ids_by_name_B = pmb.db.find_instance_ids_by_name(pmb_type="particle", name="B") self.assertEqual(instance_ids_by_name_A, [0,2]) self.assertEqual(instance_ids_by_name_B, [1,3]) # Sanity test, no ids are returned if the instance does not exist - instance_ids_test = pmb.db._find_instance_ids_by_name(pmb_type="peptide", + instance_ids_test = pmb.db.find_instance_ids_by_name(pmb_type="peptide", name="B") self.assertEqual(instance_ids_test, []) diff --git a/testsuite/test_io_database.py b/testsuite/test_io_database.py index 2bf1cd7..7c03c52 100644 --- a/testsuite/test_io_database.py +++ b/testsuite/test_io_database.py @@ -48,8 +48,7 @@ def test_instance_fallback_model_dump_failure(self): """ class BadInstance: name = "bad_inst" - def model_dump(self): - raise RuntimeError("boom") + db = DummyDB() db._templates = {} db._instances["weird"] = {"x": BadInstance()} @@ -66,8 +65,7 @@ def test_template_fallback_model_dump_failure(self): """ class BadTemplate: name = "bad" - def model_dump(self): - raise RuntimeError("boom") + db = DummyDB() db._templates["weird"] = {"bad": BadTemplate()} db._instances = {} From 8ccef184ca52aa67d1e71350056af04e5ef06ff5 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Mon, 9 Feb 2026 12:10:01 +0100 Subject: [PATCH 51/55] fix hydrogel matplotlib representation, fix bug in collect_particle_templates with nested residues, continue refactoring samples --- pyMBE/lib/lattice.py | 125 +++++++++++++++++-------- pyMBE/storage/manager.py | 25 +++-- samples/build_hydrogel.py | 54 ++++++----- samples/plot_branched_polyampholyte.py | 6 ++ testsuite/database_unit_tests.py | 8 ++ testsuite/hydrogel_tests.py | 2 +- testsuite/lattice_builder.py | 89 +++++++++++------- 7 files changed, 200 insertions(+), 109 deletions(-) diff --git a/pyMBE/lib/lattice.py b/pyMBE/lib/lattice.py index bdbbf2f..f7fa54a 100644 --- a/pyMBE/lib/lattice.py +++ b/pyMBE/lib/lattice.py @@ -102,45 +102,93 @@ def add_default_chains(self, mpc): if key not in self.chains: self.chains[key] = mpc * ["default_monomer"] - def draw_lattice(self, ax): + def draw_lattice(self, ax, pmb): """ - Draw the lattice in an `Axes3D `_ canvas. + Draw the hydrogel created in a 3D Matplotlib canvas. + Args: - ax: Axes. + ax (`mpl_toolkits.mplot3d.axes3d.Axes3D`): + A Matplotlib 3D axes instance + + pmb (`pyMBE.pymbe_library`): + Instance of the pyMBE library. + + Notes: + - Periodic images of lattice nodes are drawn within a 4×4×4 box. + - Chain geometry is constructed by linear interpolation between + start and end nodes, accounting for periodic boundary conditions. """ - kwargs_node_labels = {"zdir": (1., 1., 1.), "horizontalalignment": "left", "verticalalignment": "bottom", **self.kwargs_node_labels} - kwargs_bonds = {"linestyle": "-", "marker": None, "color": "gray", **self.kwargs_bonds} + import itertools + import numpy as np + + kwargs_node_labels = {"zdir": (1., 1., 1.), + "horizontalalignment": "left", + "verticalalignment": "bottom", + **self.kwargs_node_labels} + kwargs_bonds = {"linestyle": "-", + "marker": None, + "color": "gray", + **self.kwargs_bonds} kwargs_monomers = {**self.kwargs_monomers} scatter_data = {} - # gather monomers at lattice nodes + # ------------------------------------------------------------------ + # Draw lattice nodes (including periodic images) + # ------------------------------------------------------------------ for node_label, node_type in self.nodes.items(): node_id = self.node_labels[node_label] for image_box in itertools.product((0, 4), repeat=3): image_indices = self.lattice.indices[node_id] + np.array(image_box) if np.max(image_indices) <= 4: image_label = str([int(x) for x in image_indices]).replace(",", "") - ax.text(*image_indices + np.array([-0.15, 0., 0.]), image_label, **kwargs_node_labels) - if node_type not in scatter_data: - scatter_data[node_type] = [] - scatter_data[node_type].append(image_indices) - # gather monomers from the chains - for (start_node, end_node), sequence in self.chains.items(): - node_connection_vec = (self.lattice.indices[end_node, :] - self.lattice.indices[start_node, :]) / 4. + ax.text(*(image_indices + np.array([-0.15, 0., 0.])), + image_label, + **kwargs_node_labels) + scatter_data.setdefault(node_type, []).append(image_indices) + # ------------------------------------------------------------------ + # Draw chains + # ------------------------------------------------------------------ + for chain in self.chains: + start_node = chain["node_start"] + end_node = chain["node_end"] + start_id = self.node_labels[start_node] + end_id = self.node_labels[end_node] + start_pos = self.lattice.indices[start_id] + end_pos = self.lattice.indices[end_id] + # Periodic-aware connection vector + node_connection_vec = (end_pos - start_pos) / 4.0 node_connection_vec -= np.rint(node_connection_vec) - node_connection_vec *= 4. - bond_vector = node_connection_vec / (len(sequence) + 1) - for j in range(len(sequence) + 1): - pos = self.lattice.indices[start_node, :] - vec = np.vstack((pos + (j + 0) * bond_vector, - pos + (j + 1) * bond_vector)) - ax.plot(vec[:, 0], vec[:, 1], zs=vec[:, 2], zorder=1, **kwargs_bonds) - # draw bonds - for j, node_type in enumerate(sequence): - pos = self.lattice.indices[start_node, :] + (j + 1) * bond_vector - if node_type not in scatter_data: - scatter_data[node_type] = [] - scatter_data[node_type].append(pos) - # draw monomers + node_connection_vec *= 4.0 + mol_tpl = pmb.db.get_template(name=chain["molecule_name"], + pmb_type="molecule") + residue_list = mol_tpl.residue_list + n_res = len(residue_list) + bond_vector = node_connection_vec / (n_res + 1) + prev_pos = start_pos + for i, res_name in enumerate(residue_list): + pos = start_pos + (i + 1) * bond_vector + # Draw bond + vec = np.vstack((prev_pos, pos)) + ax.plot(vec[:, 0], + vec[:, 1], + zs=vec[:, 2], + zorder=1, + **kwargs_bonds) + # Resolve particles from residue + counts = pmb.db._collect_particle_templates(name=res_name, + pmb_type="residue") + for particle_name in counts: + scatter_data.setdefault(particle_name, []).append(pos) + prev_pos = pos + # Final bond to end node + vec = np.vstack((prev_pos, end_pos)) + ax.plot(vec[:, 0], + vec[:, 1], + zs=vec[:, 2], + zorder=1, + **kwargs_bonds) + # ------------------------------------------------------------------ + # Draw monomers (nodes + chain particles) + # ------------------------------------------------------------------ resolution = (16, 8) self.sphere = self._make_sphere(radius=0.1, resolution=resolution) node_types = scatter_data.keys() @@ -148,19 +196,16 @@ def draw_lattice(self, ax): node_types = sorted(node_types, key=lambda x: self.get_monomer_color(x)) for node_type in node_types: if self.colormap: - color = self.colormap[node_type] - kwargs_monomers["c"] = color - - node_positions = scatter_data[node_type] - pos = np.array(node_positions) - # plotting nodes and monomers - ax_data = ax.scatter(pos[:,0], pos[:,1], pos[:,2], edgecolor="none", - zorder=2, label=node_type, s=12**2, **kwargs_monomers) - color = ax_data.get_facecolors()[0] - facecolors = np.tile(color, resolution).reshape((*resolution, len(color))) - for x, y, z in node_positions: - ax.plot_surface(x + self.sphere[0], y + self.sphere[1], z + self.sphere[2], zorder=3, - shade=False, facecolors=facecolors) + kwargs_monomers["c"] = self.colormap[node_type] + node_positions = np.array(scatter_data[node_type]) + ax.scatter(node_positions[:, 0], + node_positions[:, 1], + node_positions[:, 2], + edgecolor="none", + zorder=2, + label=node_type, + s=12**2, + **kwargs_monomers) def draw_simulation_box(self, ax): """ diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index 25bb351..d18e22a 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -116,22 +116,30 @@ def _collect_particle_templates(self, name, pmb_type): particles. - Molecule-like templates contribute the particles from all residues in their ``residue_list``. - - Unsupported ``pmb_type`` values raise ``NotImplementedError``. """ counts = defaultdict(int) if pmb_type == "particle": counts[name] += 1 return counts if pmb_type == "particle_state": - particle_name = self.get_template(name=name, pmb_type=pmb_type).particle_name + particle_name = self.get_template(name=name,pmb_type=pmb_type).particle_name counts[particle_name] += 1 return counts if pmb_type == "residue": - tpl = self.get_template(name=name, - pmb_type=pmb_type) - for pname in [tpl.central_bead] + tpl.side_chains: - sub = self._collect_particle_templates(name=pname, - pmb_type="particle") + tpl = self.get_template(name=name, pmb_type="residue") + # central bead is always a particle + sub = self._collect_particle_templates(name=tpl.central_bead, + pmb_type="particle") + for k, v in sub.items(): + counts[k] += v + # side chains can be particles OR residues + for sc_name in tpl.side_chains: + if sc_name in self._templates.get("particle", {}): + sc_type = "particle" + elif sc_name in self._templates.get("residue", {}): + sc_type = "residue" + sub = self._collect_particle_templates(name=sc_name, + pmb_type=sc_type) for k, v in sub.items(): counts[k] += v return counts @@ -139,14 +147,13 @@ def _collect_particle_templates(self, name, pmb_type): tpl = self.get_template(name=name, pmb_type=pmb_type) for res_name in tpl.residue_list: - sub = self._collect_particle_templates(name=res_name, + sub = self._collect_particle_templates(name=res_name, pmb_type="residue") for k, v in sub.items(): counts[k] += v return counts raise NotImplementedError(f"Method not implemented for pmb_type='{pmb_type}'") - def _delete_bonds_of_particle(self, pid): """ Delete all bond instances involving a given particle instance. diff --git a/samples/build_hydrogel.py b/samples/build_hydrogel.py index a17db31..b5ab00a 100644 --- a/samples/build_hydrogel.py +++ b/samples/build_hydrogel.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024-2025 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -27,28 +27,35 @@ mpc = 40 # Define node particle NodeType = "node_type" -pmb.define_particle(name=NodeType, sigma=0.355*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) +pmb.define_particle(name=NodeType, + sigma=0.355*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) # define monomers BeadType1 = "C" -pmb.define_particle(name=BeadType1, sigma=0.355*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) +pmb.define_particle(name=BeadType1, + sigma=0.355*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) BeadType2 = "M" -pmb.define_particle(name=BeadType2, sigma=0.355*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) +pmb.define_particle(name=BeadType2, + sigma=0.355*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) Res1 = "res_1" -pmb.define_residue( - name=Res1, # Name of the residue - central_bead=BeadType1, # Define the central bead name - side_chains=[] # Assuming no side chains for the monomer -) +pmb.define_residue(name=Res1, # Name of the residue + central_bead=BeadType1, # Define the central bead name + side_chains=[]) # Assuming no side chains for the monomer + Res2 = "res_2" -pmb.define_residue( - name=Res2, # Name of the residue - central_bead=BeadType2, # Define the central bead name - side_chains=[] # Assuming no side chains for the monomer -) +pmb.define_residue(name=Res2, # Name of the residue + central_bead=BeadType2, # Define the central bead name + side_chains=[]) # Assuming no side chains for the monomer + residue_list = [Res1]*(mpc//2) + [Res2]*(mpc//2) +pmb.define_molecule(name="hydrogel_chain", + residue_list=residue_list) + # Defining bonds in the hydrogel for all different pairs generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') @@ -56,15 +63,14 @@ HARMONIC_parameters = {'r_0' : generic_bond_l, 'k' : generic_harmonic_constant} pmb.define_bond(bond_type = 'harmonic', - bond_parameters = HARMONIC_parameters, particle_pairs = [[BeadType1, BeadType1], - [BeadType1, BeadType2], - [BeadType2, BeadType2], - [NodeType, BeadType1], - [NodeType, BeadType2]]) + bond_parameters = HARMONIC_parameters, particle_pairs = [[BeadType1, BeadType1], + [BeadType1, BeadType2], + [BeadType2, BeadType2], + [NodeType, BeadType1], + [NodeType, BeadType2]]) # Provide mpc and bond_l to Diamond Lattice diamond_lattice = DiamondLattice(mpc, generic_bond_l) espresso_system = espressomd.System(box_l = [diamond_lattice.box_l]*3) -pmb.add_bonds_to_espresso(espresso_system = espresso_system) lattice_builder = pmb.initialize_lattice_builder(diamond_lattice) @@ -85,16 +91,18 @@ for node_s, node_e in connectivity_with_labels: chain_topology.append({'node_start':node_s, - 'node_end': node_e, - 'residue_list':residue_list}) + 'node_end': node_e, + 'molecule_name':"hydrogel_chain"}) +lattice_builder.chains = chain_topology pmb.define_hydrogel("my_hydrogel",node_topology, chain_topology) hydrogel_info = pmb.create_hydrogel("my_hydrogel", espresso_system) fig = plt.figure() ax = fig.add_subplot(111,projection="3d") -lattice_builder.draw_lattice(ax) +lattice_builder.draw_lattice(ax=ax, + pmb=pmb) lattice_builder.draw_simulation_box(ax) plt.legend(fontsize=12) plt.show() diff --git a/samples/plot_branched_polyampholyte.py b/samples/plot_branched_polyampholyte.py index 6dd8fd0..f3d5543 100644 --- a/samples/plot_branched_polyampholyte.py +++ b/samples/plot_branched_polyampholyte.py @@ -64,6 +64,12 @@ sigma = 1*pmb.units('reduced_length'), epsilon = 1*pmb.units('reduced_energy')) +# Inert particle +pmb.define_particle(name = "I", + z = 0, + sigma = 1*pmb.units('reduced_length'), + epsilon = 1*pmb.units('reduced_energy')) + # Define different residues pmb.define_residue( name = "Res_1", diff --git a/testsuite/database_unit_tests.py b/testsuite/database_unit_tests.py index 8ff0426..0285994 100644 --- a/testsuite/database_unit_tests.py +++ b/testsuite/database_unit_tests.py @@ -278,6 +278,9 @@ def test_count_templates(self): pmb.define_residue(name="R1", central_bead="A", side_chains=["B"]) + pmb.define_residue(name="R2", + central_bead="A", + side_chains=["R1"]) pmb.define_molecule(name="M1", residue_list=["R1"]*2) A_states = pmb.db._collect_particle_templates(name="A", @@ -301,6 +304,11 @@ def test_count_templates(self): self.assertEqual(R1_counts, {"A":1, "B":1}) + R2_counts = pmb.db._collect_particle_templates(name="R2", + pmb_type="residue") + self.assertEqual(R2_counts, + {"A":2, + "B":1}) M1_counts = pmb.db._collect_particle_templates(name="M1", pmb_type="molecule") self.assertEqual(M1_counts, diff --git a/testsuite/hydrogel_tests.py b/testsuite/hydrogel_tests.py index 230a52a..8a92c58 100644 --- a/testsuite/hydrogel_tests.py +++ b/testsuite/hydrogel_tests.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2025 pyMBE-dev team +# Copyright (C) 2025-2026 pyMBE-dev team # # This file is part of pyMBE. # diff --git a/testsuite/lattice_builder.py b/testsuite/lattice_builder.py index 04e8037..b84fdeb 100644 --- a/testsuite/lattice_builder.py +++ b/testsuite/lattice_builder.py @@ -103,6 +103,9 @@ def define_templates(pmb): class Test(ut.TestCase): colormap = {"default_linker":"green", "default_monomer":"blue", + BeadType1: "pink", + BeadType2: "purple", + BeadType3: "black", Res3: "red", NodeType2: "orange", NodeType1: "cyan", @@ -150,42 +153,7 @@ def test_lattice_setup(self): # untouched nodes remain default np.testing.assert_equal(lattice.get_node("[2 2 0]"), "default_linker") np.testing.assert_equal(lattice.get_node("[3 1 3]"), "default_linker") - - # --- Colormap --- - lattice.set_colormap(self.colormap) - for index, (label, color) in enumerate(self.colormap.items()): - np.testing.assert_equal(lattice.get_monomer_color(label), color) - np.testing.assert_equal(lattice.get_monomer_color_index(label), index) - - # --- Invalid colormap access --- - with self.assertRaisesRegex( - RuntimeError, "monomer 'unknown' has no associated color" - ): - lattice.get_monomer_color("unknown") - - with self.assertRaises(AssertionError): - lattice.set_colormap("red") - - # --- Invalid node access --- - with self.assertRaisesRegex( - AssertionError, r"node '\[0 5 13\]' doesn't exist in a diamond lattice" - ): - lattice.get_node("[0 5 13]") - - # --- Plot smoke tests --- - fig = plt.figure(figsize=(12, 12)) - ax = fig.add_subplot(projection="3d", computed_zorder=False) - lattice.draw_lattice(ax) - lattice.draw_simulation_box(ax) - plt.close(fig) - - fig = plt.figure(figsize=(12, 12)) - ax = fig.add_subplot(projection="3d", computed_zorder=False) - lattice.draw_lattice(ax) - lattice.draw_simulation_box(ax) - ax.legend() - plt.close(fig) - + # Clean espresso system espresso_system.part.clear() @@ -283,6 +251,55 @@ def test_lattice_setup(self): created_residues = [pmb2.db.get_instance("residue", rid).name for rid in created_residues_id] # Reverse branch MUST reverse the residue list np.testing.assert_equal(actual=created_residues, desired=sequence[::-1], verbose=True) + + def test_plot(self): + pmb = pyMBE.pymbe_library(seed=42) + diamond = pyMBE.lib.lattice.DiamondLattice(mpc, bond_l) + lattice = pmb.initialize_lattice_builder(diamond) + define_templates(pmb) + pmb.define_molecule(name="test", + residue_list=[Res1]) + # Setting up chain topology + connectivity = diamond.connectivity + node_labels = lattice.node_labels + reverse_node_labels = {v: k for k, v in node_labels.items()} + connectivity_with_labels = {(reverse_node_labels[i], reverse_node_labels[j]) for i, j in connectivity} + chain_topology = [] + + for node_s, node_e in connectivity_with_labels: + chain_topology.append({'node_start':node_s, + 'node_end': node_e, + 'molecule_name':"test"}) + # --- Colormap --- + lattice.set_colormap(self.colormap) + for index, (label, color) in enumerate(self.colormap.items()): + np.testing.assert_equal(lattice.get_monomer_color(label), color) + np.testing.assert_equal(lattice.get_monomer_color_index(label), index) + + # --- Invalid colormap access --- + with self.assertRaisesRegex( + RuntimeError, "monomer 'unknown' has no associated color" + ): + lattice.get_monomer_color("unknown") + + with self.assertRaises(AssertionError): + lattice.set_colormap("red") + + # --- Invalid node access --- + with self.assertRaisesRegex( + AssertionError, r"node '\[0 5 13\]' doesn't exist in a diamond lattice" + ): + lattice.get_node("[0 5 13]") + + # --- Plot smoke tests --- + fig = plt.figure(figsize=(12, 12)) + ax = fig.add_subplot(projection="3d", computed_zorder=False) + lattice.chains= chain_topology + lattice.draw_lattice(ax, + pmb=pmb) + lattice.draw_simulation_box(ax) + plt.close(fig) + if __name__ == "__main__": From 8abf3ee2f68e5e623c8d68f42d1fbdd3ca7cbb03 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Mon, 9 Feb 2026 12:39:20 +0100 Subject: [PATCH 52/55] fix general samples --- pyMBE/lib/handy_functions.py | 13 ++-- samples/branched_polyampholyte.py | 7 +-- samples/peptide_cpH.py | 42 +++++++++---- samples/peptide_mixture_grxmc_ideal.py | 70 ++++++++++++++------- samples/plot_peptide_cpH.py | 14 +++-- samples/plot_peptide_mixture_grxmc_ideal.py | 13 +++- 6 files changed, 109 insertions(+), 50 deletions(-) diff --git a/pyMBE/lib/handy_functions.py b/pyMBE/lib/handy_functions.py index 75441fd..33da68b 100644 --- a/pyMBE/lib/handy_functions.py +++ b/pyMBE/lib/handy_functions.py @@ -260,7 +260,6 @@ def define_peptide_AA_residues(sequence,model, pmb): - Residue names are constructed as `"AA-"`, e.g., `"AA-A"`, `"AA-L"`. """ - defined_residues = [] for residue_name in sequence: if model == '1beadAA': central_bead = residue_name @@ -273,11 +272,15 @@ def define_peptide_AA_residues(sequence,model, pmb): central_bead = 'CA' side_chains = [residue_name] residue_name='AA-'+residue_name - if residue_name not in defined_residues: + if "residue" in pmb.db._templates: + if residue_name not in pmb.db._templates["residue"]: + pmb.define_residue(name = residue_name, + central_bead = central_bead, + side_chains = side_chains) + else: pmb.define_residue(name = residue_name, - central_bead = central_bead, - side_chains = side_chains) - defined_residues.append(residue_name) + central_bead = central_bead, + side_chains = side_chains) def do_reaction(algorithm, steps): """ diff --git a/samples/branched_polyampholyte.py b/samples/branched_polyampholyte.py index 04e004d..5478be7 100644 --- a/samples/branched_polyampholyte.py +++ b/samples/branched_polyampholyte.py @@ -162,11 +162,10 @@ anion_name=anion_name, c_salt=c_salt) -#List of ionisable groups -# collect ionisable particles* -acidbase_particles = ["A","B"] +# count acid/base particles +pka_set = pmb.get_pka_set() acid_base_ids = [] -for name in acidbase_particles: +for name in pka_set.keys(): acid_base_ids+=pmb.db.find_instance_ids_by_name(pmb_type="particle", name=name) total_ionisable_groups = len(acid_base_ids) diff --git a/samples/peptide_cpH.py b/samples/peptide_cpH.py index 3209644..8182867 100644 --- a/samples/peptide_cpH.py +++ b/samples/peptide_cpH.py @@ -29,10 +29,7 @@ pmb = pyMBE.pymbe_library(seed=42) # Load some functions from the handy_scripts library for convenience -from pyMBE.lib.handy_functions import setup_electrostatic_interactions -from pyMBE.lib.handy_functions import relax_espresso_system -from pyMBE.lib.handy_functions import setup_langevin_dynamics -from pyMBE.lib.handy_functions import do_reaction +from pyMBE.lib.handy_functions import setup_electrostatic_interactions, relax_espresso_system, setup_langevin_dynamics, do_reaction, define_peptide_AA_residues from pyMBE.lib.analysis import built_output_name parser = argparse.ArgumentParser(description='Sample script to run the pre-made peptide models with pyMBE') @@ -97,7 +94,13 @@ path_to_interactions=pmb.root / "parameters" / "peptides" / "Lunkad2021" path_to_pka=pmb.root / "parameters" / "pka_sets" / "Hass2015.json" pmb.load_database(folder=path_to_interactions) -pmb.load_pka_set (path_to_pka) +pmb.load_pka_set(path_to_pka) + +# Define acid/base particle states +pka_set = pmb.get_pka_set() +for particle_name in pka_set.keys(): + pmb.define_monoprototic_particle_states(particle_name=particle_name, + acidity=pka_set[particle_name]["acidity"]) generic_bond_length=0.4 * pmb.units.nm generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') @@ -110,8 +113,11 @@ bond_parameters = HARMONIC_parameters) -# Defines the peptide in the pyMBE data frame +# Defines the peptide in the pyMBE database peptide_name = 'generic_peptide' +define_peptide_AA_residues(sequence=sequence, + model="2beadAA", + pmb=pmb) pmb.define_peptide (name=peptide_name, sequence=sequence, model=model) @@ -125,6 +131,8 @@ sigma=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) + + # Create an instance of an espresso system espresso_system=espressomd.System (box_l = [L.to('reduced_length').magnitude]*3) espresso_system.time_step=dt @@ -152,20 +160,28 @@ vtf.writevsf(espresso_system, coordinates) vtf.writevcf(espresso_system, coordinates) -#List of ionisable groups -basic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='basic')].name.to_list() -acidic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='acidic')].name.to_list() -list_ionisable_groups = basic_groups + acidic_groups -total_ionisable_groups = len(list_ionisable_groups) +# count acid/base particles +pka_set = pmb.get_pka_set() +acid_base_ids = [] +list_ionisable_groups = [] +for name in pka_set.keys(): + part_ids = pmb.db.find_instance_ids_by_name(pmb_type="particle", + name=name) + if part_ids: + acid_base_ids+=part_ids + list_ionisable_groups+=[name] +total_ionisable_groups = len(acid_base_ids) if verbose: print(f"The box length of your system is {L.to('reduced_length')} {L.to('nm')}") print(f"The peptide concentration in your system is {calculated_peptide_concentration.to('mol/L')} with {N_peptide_chains} peptides") print(f"The ionisable groups in your peptide are {list_ionisable_groups}") -cpH, labels = pmb.setup_cpH(counter_ion=cation_name, constant_pH=pH_value) +cpH = pmb.setup_cpH(counter_ion=cation_name, + constant_pH=pH_value) if verbose: - print(f"The acid-base reaction has been successfully setup for {labels}") + print("The acid-base reaction has been successfully set up for:") + print(pmb.get_reactions_df()) # Setup espresso to track the ionization of the acid/basic groups in peptide type_map =pmb.get_type_map() diff --git a/samples/peptide_mixture_grxmc_ideal.py b/samples/peptide_mixture_grxmc_ideal.py index 06915c0..deb35d3 100644 --- a/samples/peptide_mixture_grxmc_ideal.py +++ b/samples/peptide_mixture_grxmc_ideal.py @@ -24,7 +24,7 @@ from espressomd.io.writer import vtf import pyMBE from pyMBE.lib.analysis import built_output_name -from pyMBE.lib.handy_functions import do_reaction +from pyMBE.lib.handy_functions import do_reaction, define_peptide_AA_residues # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(seed=42) @@ -95,11 +95,27 @@ # Note that this parameterization only includes some of the natural aminoacids # For the other aminoacids the user needs to use a parametrization including all the aminoacids in the peptide sequence path_to_pka=pmb.root / "parameters" / "pka_sets" / "Hass2015.json" -path_to_interactions=pmb.root / "parameters" / "peptides" / "Lunkad2021.json" - -pmb.load_interaction_parameters(filename=path_to_interactions) +path_to_interactions=pmb.root / "parameters" / "peptides" / "Lunkad2021" + +pmb.load_database(folder=path_to_interactions) +# define templates for the c and n ends +pmb.define_particle(name="n", + sigma=1*pmb.units.reduced_length, + epsilon=1*pmb.units.reduced_energy, + acidity="basic") +pmb.define_particle(name="c", + sigma=1*pmb.units.reduced_length, + epsilon=1*pmb.units.reduced_energy, + acidity="acidic") pmb.load_pka_set(path_to_pka) +# Define acid/base particle states +pka_set = pmb.get_pka_set() +for particle_name in pka_set.keys(): + if particle_name not in ["c", "n"]: # Avoid redefing the ends + pmb.define_monoprototic_particle_states(particle_name=particle_name, + acidity=pka_set[particle_name]["acidity"]) + # Defines the bonds bond_type = 'harmonic' generic_bond_length=0.4 * pmb.units.nm @@ -120,6 +136,12 @@ pmb.define_peptide (name=peptide2, sequence=sequence2, model=model) +define_peptide_AA_residues(sequence=sequence1, + model=model, + pmb=pmb) +define_peptide_AA_residues(sequence=sequence2, + model=model, + pmb=pmb) # Solution parameters c_salt=5e-3 * pmb.units.mol/ pmb.units.L @@ -212,31 +234,35 @@ vtf.writevsf(espresso_system, coordinates) vtf.writevcf(espresso_system, coordinates) -#List of ionisable groups -basic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='basic')].name.to_list() -acidic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='acidic')].name.to_list() -list_ionisable_groups = basic_groups + acidic_groups -total_ionisable_groups = len (list_ionisable_groups) +# count acid/base particles +pka_set = pmb.get_pka_set() +acid_base_ids = [] +for name in pka_set.keys(): + acid_base_ids+=pmb.db.find_instance_ids_by_name(pmb_type="particle", + name=name) +total_ionisable_groups = len(acid_base_ids) + # Get peptide net charge if verbose: print("The box length of your system is", L.to('reduced_length'), L.to('nm')) if args.mode == 'standard': - grxmc, sucessful_reactions_labels, ionic_strength_res = pmb.setup_grxmc_reactions(pH_res=pH_value, - c_salt_res=c_salt, - proton_name=proton_name, - hydroxide_name=hydroxide_name, - salt_cation_name=sodium_name, - salt_anion_name=chloride_name, - activity_coefficient=lambda x: 1.0) + grxmc, ionic_strength_res = pmb.setup_grxmc_reactions(pH_res=pH_value, + c_salt_res=c_salt, + proton_name=proton_name, + hydroxide_name=hydroxide_name, + salt_cation_name=sodium_name, + salt_anion_name=chloride_name, + activity_coefficient=lambda x: 1.0) elif args.mode == 'unified': - grxmc, sucessful_reactions_labels, ionic_strength_res = pmb.setup_grxmc_unified(pH_res=pH_value, - c_salt_res=c_salt, - cation_name=cation_name, - anion_name=anion_name, - activity_coefficient=lambda x: 1.0) + grxmc, ionic_strength_res = pmb.setup_grxmc_unified(pH_res=pH_value, + c_salt_res=c_salt, + cation_name=cation_name, + anion_name=anion_name, + activity_coefficient=lambda x: 1.0) if verbose: - print('The acid-base reaction has been sucessfully setup for ', sucessful_reactions_labels) + print("The acid-base reaction has been successfully set up for:") + print(pmb.get_reactions_df()) # Setup espresso to track the ionization of the acid/basic groups in peptide type_map =pmb.get_type_map() diff --git a/samples/plot_peptide_cpH.py b/samples/plot_peptide_cpH.py index 22ac3ac..b8e1ba3 100644 --- a/samples/plot_peptide_cpH.py +++ b/samples/plot_peptide_cpH.py @@ -23,6 +23,7 @@ import argparse import pathlib import pandas as pd +import pyMBE.lib.handy_functions as hf # Create an instance of pyMBE library import pyMBE pmb = pyMBE.pymbe_library(seed=42) @@ -52,15 +53,18 @@ # Define peptide parameters sequence = args.sequence -# Define the peptide in the pyMBE dataframe and load the pka set +# Define the peptide in the pyMBE database and load the pka set # This is necessary to calculate the analytical solution from the Henderson-Hasselbach equation peptide = 'generic_peptide' -pmb.define_peptide (name=peptide, - sequence=sequence, - model="1beadAA") # not really relevant for plotting +model="1beadAA" # not really relevant for plotting +pmb.define_peptide(name=peptide, + sequence=sequence, + model=model) path_to_pka=pmb.root / "parameters" / "pka_sets" / "Hass2015.json" pmb.load_pka_set(path_to_pka) - +hf.define_peptide_AA_residues(sequence=sequence, + model=model, + pmb=pmb) # Calculate the ideal titration curve of the peptide with Henderson-Hasselbach equation if args.mode == "plot": pH_range_HH = np.linspace(2, 12, num=100) diff --git a/samples/plot_peptide_mixture_grxmc_ideal.py b/samples/plot_peptide_mixture_grxmc_ideal.py index 81afc25..54fd693 100644 --- a/samples/plot_peptide_mixture_grxmc_ideal.py +++ b/samples/plot_peptide_mixture_grxmc_ideal.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -22,6 +22,7 @@ import pandas as pd import argparse import pathlib +from pyMBE.lib.handy_functions import define_peptide_AA_residues parser = argparse.ArgumentParser(description='Plots the titration data from peptide.py and the corresponding analytical solution.') parser.add_argument('--sequence1', @@ -65,6 +66,8 @@ # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(seed=42) c_salt=args.csalt * pmb.units.mol/ pmb.units.L +model = "1beadAA" + # Define peptide parameters sequence1 = args.sequence1 @@ -75,6 +78,13 @@ # Define the peptides in the pyMBE data frame and load the pka set # This is necesary to calculate the analytical solution from the Henderson-Hasselbach equation + +define_peptide_AA_residues(sequence=sequence1, + model=model, + pmb=pmb) +define_peptide_AA_residues(sequence=sequence2, + model=model, + pmb=pmb) peptide1 = 'generic_peptide1' pmb.define_peptide (name=peptide1, sequence=sequence1, @@ -86,6 +96,7 @@ path_to_pka=pmb.root / "parameters" / "pka_sets" / "Hass2015.json" pmb.load_pka_set(path_to_pka) + # Calculate the ideal titration curve of the peptide with Henderson-Hasselbach equation if args.mode == "plot": pH_range_HH = np.linspace(2, 12, num=100) From 2d6817a8589bfc77e2902f0687c47dfb5a7767c4 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Mon, 9 Feb 2026 17:47:02 +0100 Subject: [PATCH 53/55] all functional tests run now, started fixing the tutorial --- pyMBE/lib/lattice.py | 2 +- .../Blanco2021/templates_particle.csv | 14 +- .../Blanco2021/templates_particle_state.csv | 6 +- .../Lunkad2021/templates_particle.csv | 10 +- pyMBE/pyMBE.py | 9 +- pyMBE/storage/manager.py | 1 + samples/Beyer2024/create_paper_data.py | 40 +- samples/Beyer2024/globular_protein.py | 131 +- samples/Beyer2024/peptide.py | 21 +- .../weak_polyelectrolyte_dialysis.py | 33 +- samples/peptide_cpH.py | 1 + samples/salt_solution_gcmc.py | 20 +- samples/weak_polyacid_hydrogel_grxmc.py | 52 +- testsuite/globular_protein_unit_tests.py | 3 +- testsuite/hydrogel_builder.py | 14 +- testsuite/hydrogel_tests.py | 6 +- tutorials/lattice_builder.ipynb | 142 -- tutorials/pyMBE_tutorial.ipynb | 1762 ++++++++++++++++- 18 files changed, 1863 insertions(+), 404 deletions(-) delete mode 100644 tutorials/lattice_builder.ipynb diff --git a/pyMBE/lib/lattice.py b/pyMBE/lib/lattice.py index f7fa54a..6b19a25 100644 --- a/pyMBE/lib/lattice.py +++ b/pyMBE/lib/lattice.py @@ -333,5 +333,5 @@ def __init__(self,mpc,bond_l): raise ValueError("mpc must be a non-zero positive integer.") self.mpc = mpc self.bond_l = bond_l - self.box_l = (self.mpc+1)*self.bond_l.magnitude / (np.sqrt(3)*0.25) + self.box_l = (self.mpc+2)*self.bond_l.magnitude / (np.sqrt(3)*0.25) diff --git a/pyMBE/parameters/peptides/Blanco2021/templates_particle.csv b/pyMBE/parameters/peptides/Blanco2021/templates_particle.csv index 3a55738..97eb588 100644 --- a/pyMBE/parameters/peptides/Blanco2021/templates_particle.csv +++ b/pyMBE/parameters/peptides/Blanco2021/templates_particle.csv @@ -1,13 +1,13 @@ name,sigma,epsilon,cutoff,offset,initial_state -D,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",D -E,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",E -n,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",n +D,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",DH +E,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",EH +n,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",nH S,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",S -H,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",H +H,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",HH A,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",A -K,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",K +K,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",KH Y,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",Y -R,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",R +R,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",RH G,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",G F,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",F -c,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",c +c,"{""magnitude"":0.4,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",cH diff --git a/pyMBE/parameters/peptides/Blanco2021/templates_particle_state.csv b/pyMBE/parameters/peptides/Blanco2021/templates_particle_state.csv index 935c01f..d054506 100644 --- a/pyMBE/parameters/peptides/Blanco2021/templates_particle_state.csv +++ b/pyMBE/parameters/peptides/Blanco2021/templates_particle_state.csv @@ -1,7 +1,5 @@ pmb_type,name,particle_name,z,es_type particle_state,S,S,0,0 particle_state,A,A,0,1 -particle_state,K,K,0,2 -particle_state,G,G,0,3 -particle_state,F,F,0,4 - +particle_state,G,G,0,2 +particle_state,F,F,0,3 \ No newline at end of file diff --git a/pyMBE/parameters/peptides/Lunkad2021/templates_particle.csv b/pyMBE/parameters/peptides/Lunkad2021/templates_particle.csv index f8407a0..793efb8 100644 --- a/pyMBE/parameters/peptides/Lunkad2021/templates_particle.csv +++ b/pyMBE/parameters/peptides/Lunkad2021/templates_particle.csv @@ -1,7 +1,7 @@ name,sigma,epsilon,cutoff,offset,initial_state CA,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",CA -D,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",D -E,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",E -H,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",H -Y,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",Y -K,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",K +D,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",DH +E,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",EH +H,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",HH +Y,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",YH +K,"{""magnitude"":0.35,""units"":""nm"",""dimension"":""length""}","{""magnitude"":25.69257912108585,""units"":""meV"",""dimension"":""energy""}","{""magnitude"":0.3984740271498274,""units"":""nm"",""dimension"":""length""}","{""magnitude"":0.0,""units"":""nm"",""dimension"":""length""}",KH diff --git a/pyMBE/pyMBE.py b/pyMBE/pyMBE.py index a050985..3c99957 100644 --- a/pyMBE/pyMBE.py +++ b/pyMBE/pyMBE.py @@ -269,10 +269,11 @@ def _create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system, use_def start_node_id = nodes[node_start_label]["id"] end_node_id = nodes[node_end_label]["id"] # Finding a backbone vector between node_start and node_end - vec_between_nodes = np.array(nodes[node_start_label]["pos"]) - np.array(nodes[node_end_label]["pos"]) + vec_between_nodes = np.array(nodes[node_end_label]["pos"]) - np.array(nodes[node_start_label]["pos"]) vec_between_nodes = vec_between_nodes - self.lattice_builder.box_l * np.round(vec_between_nodes/self.lattice_builder.box_l) - backbone_vector = np.array((vec_between_nodes/(self.lattice_builder.mpc + 1))) - backbone_vector = backbone_vector / np.linalg.norm(backbone_vector) + backbone_vector = vec_between_nodes / np.linalg.norm(vec_between_nodes) + if reverse_residue_order: + vec_between_nodes *= -1.0 # Calculate the start position of the chain chain_residues = self.db.get_template(pmb_type="molecule", name=molecule_name).residue_list @@ -292,7 +293,7 @@ def _create_hydrogel_chain(self, hydrogel_chain, nodes, espresso_system, use_def mol_id = self.create_molecule(name=molecule_name, # Use the name defined earlier number_of_molecules=1, # Creating one chain espresso_system=espresso_system, - list_of_first_residue_positions=[first_bead_pos.tolist()],#Start at the first node + list_of_first_residue_positions=[first_bead_pos.tolist()], #Start at the first node backbone_vector=np.array(backbone_vector)/l0, use_default_bond=use_default_bond, reverse_residue_order=reverse_residue_order)[0] diff --git a/pyMBE/storage/manager.py b/pyMBE/storage/manager.py index d18e22a..9411ec0 100644 --- a/pyMBE/storage/manager.py +++ b/pyMBE/storage/manager.py @@ -997,6 +997,7 @@ def get_particle_id_map(self, object_name): if object_type is None: raise ValueError(f"No object named '{object_name}' found in database.") + # Maps to return id_list = [] residue_map = {} diff --git a/samples/Beyer2024/create_paper_data.py b/samples/Beyer2024/create_paper_data.py index 767aa1e..47b13f7 100644 --- a/samples/Beyer2024/create_paper_data.py +++ b/samples/Beyer2024/create_paper_data.py @@ -18,12 +18,13 @@ # Import pyMBE and other libraries import pyMBE -from pyMBE.lib import analysis +from pyMBE.lib import analysis, handy_functions from pathlib import Path import sys import numpy as np import argparse import subprocess +import pandas as pd # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(seed=42) @@ -49,7 +50,7 @@ parser.add_argument('--plot', action='store_true', help="Switch to plot the data") args = parser.parse_args() -samples_path = Path(__file__).parent +samples_path = Path(pmb.root).parent / "samples" # Inputs fig_label=args.fig_label @@ -59,6 +60,16 @@ ## Peptide plots (Fig. 7) labels_fig7=["7a", "7b", "7c"] +if fig_label in labels_fig7: + time_series_folder_path=samples_path / "Beyer2024" / "time_series" / "peptides" + +if fig_label in labels_fig8: + time_series_folder_path=samples_path / "Beyer2024" / "time_series" / "globular_protein" + +if fig_label == "9": + time_series_folder_path=samples_path / "Beyer2024" / "time_series" / "grxmc" + + if fig_label in labels_fig7: script_path=samples_path / "Beyer2024" / "peptide.py" if fig_label == "7a": @@ -71,7 +82,7 @@ raise RuntimeError() pH_range = np.linspace(2, 12, num=21) for pH in pH_range: - run_command=[sys.executable, script_path, "--sequence", sequence, "--pH", str(pH), "--mode", mode] + run_command=[sys.executable, script_path, "--sequence", sequence, "--pH", str(pH), "--mode", mode, "--output", time_series_folder_path] print(subprocess.list2cmdline(run_command)) subprocess.check_output(run_command) @@ -87,7 +98,7 @@ protein_pdb=pdb_codes[fig_label] path_to_cg = pmb.root / "parameters" / "globular_proteins" / f"{protein_pdb}.vtf" for pH in pH_range: - run_command=run_command_common + ["--pH", str(pH),"--pdb", protein_pdb, "--path_to_cg", str(path_to_cg)] + run_command=run_command_common + ["--pH", str(pH),"--pdb", protein_pdb, "--path_to_cg", str(path_to_cg), "--output", time_series_folder_path] print(subprocess.list2cmdline(run_command)) subprocess.check_output(run_command) @@ -97,19 +108,11 @@ pH_range = np.linspace(1, 13, num=13) c_salt_res = 0.01 * pmb.units.mol/pmb.units.L for pH in pH_range: - run_command=[sys.executable, script_path, "--c_salt_res", str(0.01), "--c_mon_sys", str(0.435), "--pH_res", str(pH), "--pKa_value", str(4.0), "--mode", mode] + run_command=[sys.executable, script_path, "--c_salt_res", str(0.01), "--c_mon_sys", str(0.435), "--pH_res", str(pH), "--pKa_value", str(4.0), "--mode", mode, "--output", time_series_folder_path] print(subprocess.list2cmdline(run_command)) subprocess.check_output(run_command) # Analyze all time series -if fig_label in labels_fig7: - time_series_folder_path=samples_path / "Beyer2024" / "time_series" / "peptides" - -if fig_label in labels_fig8: - time_series_folder_path=samples_path / "Beyer2024" / "time_series" / "globular_protein" - -if fig_label == "9": - time_series_folder_path=samples_path / "Beyer2024" / "time_series" / "grxmc" data=analysis.analyze_time_series(path_to_datafolder=time_series_folder_path) @@ -124,9 +127,8 @@ import matplotlib.pyplot as plt import matplotlib as mpl - plt.rc('text', usetex=True) - plt.rc('text.latex', preamble=r"\usepackage{mathptmx}") - plt.rcParams["font.family"] = "serif" + plt.rcParams['text.usetex'] = False + plt.rcParams['font.family'] = 'serif' plt.tight_layout() mpl.rc('axes', linewidth=1) mpl.rcParams['lines.markersize'] = 5 @@ -162,11 +164,13 @@ pmb.load_database(par_path) # Load ref data - ref_data=analysis.read_csv_file(path=Path(__file__).parent / "data" / fig_data[fig_label]) + ref_data=pd.read_csv(filepath_or_buffer=Path(pmb.root).parent / "testsuite" / "data"/ fig_data[fig_label]) # Calculate and plot Henderson-Hasselbalch (HH) if fig_label in labels_fig7: - + handy_functions.define_peptide_AA_residues(sequence=sequence, + model="1beadAA", + pmb=pmb) pmb.define_peptide (name=sequence, sequence=sequence, model="1beadAA") diff --git a/samples/Beyer2024/globular_protein.py b/samples/Beyer2024/globular_protein.py index bc5e769..f1adcef 100644 --- a/samples/Beyer2024/globular_protein.py +++ b/samples/Beyer2024/globular_protein.py @@ -28,10 +28,7 @@ pmb = pyMBE.pymbe_library(seed=42) #Import functions from handy_functions script -from pyMBE.lib.handy_functions import setup_electrostatic_interactions -from pyMBE.lib.handy_functions import relax_espresso_system -from pyMBE.lib.handy_functions import setup_langevin_dynamics -from pyMBE.lib.handy_functions import do_reaction +from pyMBE.lib.handy_functions import setup_electrostatic_interactions, relax_espresso_system, setup_langevin_dynamics, do_reaction, define_protein_AA_particles, define_protein_AA_residues from pyMBE.lib import analysis # Here you can adjust the width of the panda columns displayed when running the code pd.options.display.max_colwidth = 10 @@ -79,7 +76,7 @@ verbose=args.no_verbose protein_name = args.pdb pH_value = args.pH - +model = '2beadAA' inputs={"pH": args.pH, "pdb": args.pdb} @@ -139,13 +136,30 @@ espresso_system.cell_system.skin=0.4 #Reads the VTF file of the protein model topology_dict, sequence = pmb.read_protein_vtf (filename=args.path_to_cg) -#Defines the protein in the pmb.df -pmb.define_protein (name=protein_name, - sequence=sequence, - model = '2beadAA', - lj_setup_mode = "wca") +# Here we upload the pka set from the reference_parameters folder +path_to_pka=pmb.root / "parameters" / "pka_sets" / "Nozaki1967.json" +pmb.load_pka_set(filename=path_to_pka) +pka_set = pmb.get_pka_set() + +#Defines the protein in the pyMBE database +define_protein_AA_particles(topology_dict=topology_dict, + pmb=pmb, + pka_set=pka_set) +residue_list = define_protein_AA_residues(sequence=sequence, + model=model, + pmb=pmb) + +# Define a residue for the metal ion +if args.pdb == "1f6s": + pmb.define_residue(name="AA-Ca", + central_bead="Ca", + side_chains=[]) + +pmb.define_protein(name=protein_name, + sequence=sequence, + model = model) -# Here we define the solution particles in the pmb.df +# Here we define the solution particles in the pyMBE database cation_name = 'Na' anion_name = 'Cl' @@ -161,24 +175,18 @@ epsilon=epsilon, offset=ion_size-sigma) -# Here we upload the pka set from the reference_parameters folder -path_to_pka=pmb.root / "parameters" / "pka_sets" / "Nozaki1967.json" -pmb.load_pka_set(filename=path_to_pka) - #We create the protein in espresso -pmb.create_protein(name=protein_name, - number_of_proteins=1, - espresso_system=espresso_system, - topology_dict=topology_dict) - +protein_id = pmb.create_protein(name=protein_name, + number_of_proteins=1, + espresso_system=espresso_system, + topology_dict=topology_dict)[0] #Here we activate the motion of the protein if args.move_protein: - pmb.enable_motion_of_rigid_object(instance_id=0, + pmb.enable_motion_of_rigid_object(instance_id=protein_id, pmb_type="protein", espresso_system=espresso_system) # Here we put the protein on the center of the simulation box -protein_id = pmb.df.loc[pmb.df['name']==protein_name].molecule_id.values[0] pmb.center_object_in_simulation_box(instance_id=protein_id, pmb_type="protein", espresso_system=espresso_system) @@ -199,7 +207,6 @@ object_name=protein_name, pmb_type="protein", dimensionless=True)["mean"] - ## Get coordinates outside the volume occupied by the protein counter_ion_coords=pmb.generate_coordinates_outside_sphere(center=protein_center, radius=protein_radius, @@ -235,10 +242,15 @@ position=added_salt_ions_coords[N_ions:]) #Here we calculated the ionisable groups -basic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='basic')].name.to_list() -acidic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='acidic')].name.to_list() -list_ionisable_groups = basic_groups + acidic_groups -total_ionisable_groups = len (list_ionisable_groups) +acid_base_ids = [] +list_ionisable_groups = [] +for name in pka_set.keys(): + part_ids = pmb.db.find_instance_ids_by_name(pmb_type="particle", + name=name) + if part_ids: + acid_base_ids+=part_ids + list_ionisable_groups+=[name] +total_ionisable_groups = len(acid_base_ids) if verbose: print(f"The box length of the system is {Box_L.to('reduced_length')} {Box_L.to('nm')}") @@ -246,10 +258,11 @@ print(f"The total amount of ionisable groups is {total_ionisable_groups}") #Setup of the reactions in espresso -cpH, labels = pmb.setup_cpH(counter_ion=cation_name, - constant_pH= pH_value) +cpH = pmb.setup_cpH(counter_ion=cation_name, + constant_pH= pH_value) if verbose: - print(f"The acid-base reaction has been sucessfully setup for {labels}") + print("The acid-base reaction has been successfully set up for:") + print(pmb.get_reactions_df()) type_map = pmb.get_type_map() types = list (type_map.values()) @@ -286,7 +299,6 @@ net_charge_list = [] Z_sim=[] -particle_id_list = pmb.df.loc[~pmb.df['molecule_id'].isna()].particle_id.dropna().to_list() pmb.save_database (folder=data_path/"database") @@ -298,57 +310,36 @@ for label in labels_obs: time_series[label]=[] -charge_dict=pmb.calculate_net_charge (espresso_system=espresso_system, - object_name=protein_name, - pmb_type="protein", - dimensionless=True) - -net_charge_residues = charge_dict ['residues'] -net_charge_amino_save = {} -AA_label_list=[] -for amino in net_charge_residues.keys(): - amino_part_row=pmb.df[(pmb.df['residue_id']== amino) & ((pmb.df['acidity'] == "acidic") | (pmb.df['acidity'] == "basic"))] - if not amino_part_row.empty: - label = f'charge_{amino_part_row["name"].values[0]}' - if label not in AA_label_list: - AA_label_list.append(label) - net_charge_amino_save[label] = [] - time_series[label] = [] +AA_label_list = [] +for amino in list_ionisable_groups: + label = f'AA-{amino}' + time_series[f"charge_{label}"] = [] + AA_label_list.append(label) for step in tqdm.trange(N_samples, disable=not verbose): espresso_system.integrator.run (steps = integ_steps) do_reaction(cpH, steps=total_ionisable_groups) - charge_dict=pmb.calculate_net_charge (espresso_system=espresso_system, - object_name=protein_name, - pmb_type="protein", - dimensionless=True) - charge_residues = charge_dict['residues'] - charge_residues_per_type={} - + protein_net_charge = pmb.calculate_net_charge(espresso_system=espresso_system, + object_name=protein_name, + pmb_type="protein", + dimensionless=True)["mean"] + # Store observables + time_series["time"].append(espresso_system.time) + time_series["charge"].append(protein_net_charge) + charge_residues_per_type = {} for label in AA_label_list: charge_residues_per_type[label]=[] - - for amino in charge_residues.keys(): - amino_part_row=pmb.df[(pmb.df['residue_id']== amino) & ((pmb.df['acidity'] == "acidic") | (pmb.df['acidity'] == "basic"))] - if not amino_part_row.empty: - label = f'charge_{amino_part_row["name"].values[0]}' - if label in AA_label_list: - charge_residues_per_type[label].append(charge_residues[amino]) - + charge_res=pmb.calculate_net_charge (espresso_system=espresso_system, + object_name=label, + pmb_type="residue", + dimensionless=True)["mean"] + time_series[f"charge_{label}"].append(charge_res) if step % stride_traj == 0 : n_frame +=1 with open(frames_path / f"trajectory{n_frame}.vtf", mode='w+t') as coordinates: vtf.writevsf(espresso_system, coordinates) vtf.writevcf(espresso_system, coordinates) - # Store observables - time_series["time"].append(espresso_system.time) - time_series["charge"].append(charge_dict["mean"]) - - for label in AA_label_list: - charge_amino = np.mean(charge_residues_per_type[label]) - time_series[label].append(charge_amino) - data_path.mkdir(parents=True, exist_ok=True) time_series=pd.DataFrame(time_series) diff --git a/samples/Beyer2024/peptide.py b/samples/Beyer2024/peptide.py index e767baa..6dd1c8f 100644 --- a/samples/Beyer2024/peptide.py +++ b/samples/Beyer2024/peptide.py @@ -27,7 +27,7 @@ import pyMBE from pyMBE.lib import analysis from pyMBE.lib import handy_functions as hf -from pyMBE.lib.handy_functions import do_reaction +from pyMBE.lib.handy_functions import do_reaction, define_peptide_AA_residues # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(seed=42) @@ -80,7 +80,7 @@ path_to_interactions=pmb.root / "parameters" / "peptides" / "Lunkad2021" path_to_pka=pmb.root / "parameters" / "pka_sets" / "CRC1991.json" pmb.load_database(folder=path_to_interactions) - pmb.load_pka_set(filename=path_to_pka) + pmb.load_pka_set(filename=path_to_pka) model = '2beadAA' # Model with 2 beads per each aminoacid N_peptide_chains = 4 sigma=1*pmb.units.Quantity("reduced_length") @@ -102,7 +102,13 @@ chain_length=len(sequence) pep_concentration = 5.56e-4 *pmb.units.mol/pmb.units.L - +pka_set = pmb.get_pka_set() +for particle_name in pka_set.keys(): + pmb.define_monoprototic_particle_states(particle_name=particle_name, + acidity=pka_set[particle_name]["acidity"]) +define_peptide_AA_residues(sequence=sequence, + model=model, + pmb=pmb) # Simulation parameters if mode == "short-run": Nsamples = 1000 @@ -154,7 +160,6 @@ pmb.create_molecule(name=sequence, number_of_molecules=N_peptide_chains, espresso_system=espresso_system) - # Create counterions for the peptide chains pmb.create_counterions(object_name=sequence, cation_name=cation_name, @@ -166,12 +171,13 @@ anion_name=anion_name, c_salt=c_salt) -cpH, labels = pmb.setup_cpH(counter_ion=cation_name, - constant_pH=pH) +cpH = pmb.setup_cpH(counter_ion=cation_name, + constant_pH=pH) if verbose: print(f"The box length of your system is {L.to('reduced_length')} = {L.to('nm')}") - print(f"The acid-base reaction has been successfully setup for {labels}") + print("The acid-base reaction has been successfully set up for:") + print(pmb.get_reactions_df()) # Setup espresso to track the ionization of the acid/basic groups in peptide type_map =pmb.get_type_map() @@ -221,7 +227,6 @@ object_name=sequence, pmb_type="peptide", dimensionless=True) - Rg = espresso_system.analysis.calc_rg(chain_start=0, number_of_chains=N_peptide_chains, chain_length=chain_length) diff --git a/samples/Beyer2024/weak_polyelectrolyte_dialysis.py b/samples/Beyer2024/weak_polyelectrolyte_dialysis.py index 10d9af8..71dc201 100644 --- a/samples/Beyer2024/weak_polyelectrolyte_dialysis.py +++ b/samples/Beyer2024/weak_polyelectrolyte_dialysis.py @@ -121,10 +121,11 @@ bond_type = 'FENE' fene_spring_constant = 30 * pmb.units('reduced_energy / reduced_length**2') fene_r_max = 1.5 * pmb.units('reduced_length') +fene_r0 = 0 * pmb.units('reduced_length') -fene_bond = {'k' : fene_spring_constant, - 'd_r_max': fene_r_max, - } +fene_bond = {'r_0': fene_r0, + 'k' : fene_spring_constant, + 'd_r_max': fene_r_max} pmb.define_bond(bond_type = bond_type, bond_parameters = fene_bond, @@ -172,10 +173,6 @@ espresso_system = espressomd.System(box_l = [L.to('reduced_length').magnitude]*3) espresso_system.time_step=dt espresso_system.cell_system.skin=0.4 -if verbose: - print("Created espresso object") -if verbose: - print("Added bonds") # Create molecules and ions in the espresso system pmb.create_molecule(name=polyacid_name, @@ -202,16 +199,16 @@ activity_coefficient_monovalent_pair = lambda x: np.exp(excess_chemical_potential_interpolated(x.to('1/(reduced_length**3 * N_A)').magnitude)) if verbose: print("Setting up reactions...") -grxmc, labels, ionic_strength_res = pmb.setup_grxmc_reactions(pH_res=pH_res, - c_salt_res=c_salt_res, - proton_name=proton_name, - hydroxide_name=hydroxide_name, - salt_cation_name=sodium_name, - salt_anion_name=chloride_name, - activity_coefficient=activity_coefficient_monovalent_pair, - pka_set=pka_set) +grxmc, ionic_strength_res = pmb.setup_grxmc_reactions(pH_res=pH_res, + c_salt_res=c_salt_res, + proton_name=proton_name, + hydroxide_name=hydroxide_name, + salt_cation_name=sodium_name, + salt_anion_name=chloride_name, + activity_coefficient=activity_coefficient_monovalent_pair) if verbose: - print('The acid-base reaction has been sucessfully set up for ', labels) + print("The acid-base reaction has been successfully set up for:") + print(pmb.get_reactions_df()) # Setup espresso to track the ionization of the acid groups type_map = pmb.get_type_map() @@ -264,10 +261,8 @@ espresso_system.integrator.run(steps=1000) do_reaction(grxmc, steps=100) - # Main loop print("Started production run.") - labels_obs=["time", "alpha"] time_series={} @@ -281,10 +276,8 @@ for i in tqdm.trange(N_production_loops, disable=not verbose): espresso_system.integrator.run(steps=1000) do_reaction(grxmc, steps=100) - # Measure time time_series["time"].append(espresso_system.time) - # Measure degree of ionization charge_dict=pmb.calculate_net_charge(espresso_system=espresso_system, object_name=polyacid_name, diff --git a/samples/peptide_cpH.py b/samples/peptide_cpH.py index 8182867..5bd6ed4 100644 --- a/samples/peptide_cpH.py +++ b/samples/peptide_cpH.py @@ -102,6 +102,7 @@ pmb.define_monoprototic_particle_states(particle_name=particle_name, acidity=pka_set[particle_name]["acidity"]) + generic_bond_length=0.4 * pmb.units.nm generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') diff --git a/samples/salt_solution_gcmc.py b/samples/salt_solution_gcmc.py index a6766f4..704053c 100644 --- a/samples/salt_solution_gcmc.py +++ b/samples/salt_solution_gcmc.py @@ -78,8 +78,14 @@ # Define salt cation_name = 'Na' anion_name = 'Cl' -pmb.define_particle(name=cation_name, z=1, sigma=0.355*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) -pmb.define_particle(name=anion_name, z=-1, sigma=0.355*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) +pmb.define_particle(name=cation_name, + z=1, + sigma=0.355*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) +pmb.define_particle(name=anion_name, + z=-1, + sigma=0.355*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) # System parameters c_salt_res = args.c_salt_res * pmb.units.mol/ pmb.units.L @@ -124,12 +130,12 @@ type_map = pmb.get_type_map() types = list (type_map.values()) espresso_system.setup_type_map(type_list = types) -print(type_map) # Setup the non-interacting type for speeding up the sampling of the reactions non_interacting_type = max(type_map.values())+1 RE.set_non_interacting_type(type=non_interacting_type) -print(f'The non interacting type is set to {non_interacting_type}') +if verbose: + print(f'The non interacting type is set to {non_interacting_type}') espresso_system.time_step = dt # for this example, we use a hard-coded skin value; In general it should be optimized by tuning @@ -202,9 +208,7 @@ time_series=pd.DataFrame(time_series) filename=analysis.built_output_name(input_dict=inputs) - time_series.to_csv(data_path / f"{filename}_time_series.csv", index=False) -particle_id_list = pmb.df.loc[~pmb.df['molecule_id'].isna()].particle_id.dropna().to_list() -#Save the pyMBE dataframe in a CSV file -pmb.write_pmb_df(filename=data_path / "df.csv") +#Save the pyMBE database in a CSV file +pmb.save_database(folder=data_path / "database") diff --git a/samples/weak_polyacid_hydrogel_grxmc.py b/samples/weak_polyacid_hydrogel_grxmc.py index 70bc15d..2e883b8 100644 --- a/samples/weak_polyacid_hydrogel_grxmc.py +++ b/samples/weak_polyacid_hydrogel_grxmc.py @@ -18,6 +18,7 @@ # import espressomd +from espressomd.io.writer import vtf from pathlib import Path import numpy as np import pandas as pd @@ -77,6 +78,8 @@ args = parser.parse_args() mode=args.mode +data_path = args.output +data_path.mkdir(parents=True, exist_ok=True) c_salt_res = args.csalt_res * pmb.units.mol/ pmb.units.L solvent_permittivity = 78.9 seed=42 @@ -96,6 +99,11 @@ central_bead=BeadType, side_chains=[]) +residue_list = ["Res"]*args.mpc +molecule_name = "hydrogel_chain" +pmb.define_molecule(name=molecule_name, + residue_list=residue_list) + bond_type = 'FENE' bond_length = 0.966 * pmb.units("reduced_length") fene_spring_constant = 30 * pmb.units('reduced_energy / reduced_length**2') @@ -135,7 +143,6 @@ diamond_lattice = DiamondLattice(args.mpc, bond_length) espresso_system = espressomd.System(box_l = [diamond_lattice.box_l]*3) -pmb.add_bonds_to_espresso(espresso_system = espresso_system) lattice_builder = pmb.initialize_lattice_builder(diamond_lattice) # Setting up node topology @@ -147,19 +154,13 @@ "lattice_index": indices[index]}) # Setting up chain topology -node_labels = lattice_builder.node_labels -chain_labels = lattice_builder.chain_labels -reverse_node_labels = {v: k for k, v in node_labels.items()} chain_topology = [] -residue_list = ["Res"]*args.mpc - -for chain_data in chain_labels.items(): - chain_label = chain_data[1] - node_label_pair = chain_data[0] - node_label_s, node_label_e = [int(x) for x in node_label_pair.strip("()").split(",")] - chain_topology.append({'node_start':reverse_node_labels[node_label_s], - 'node_end': reverse_node_labels[node_label_e], - 'residue_list':residue_list}) +for node_conectivity in diamond_lattice.connectivity: + node_start = str(diamond_lattice.indices[node_conectivity[0]]) + node_end = str(diamond_lattice.indices[node_conectivity[1]]) + chain_topology.append({'node_start':node_start, + 'node_end': node_end, + 'molecule_name':molecule_name}) pmb.define_hydrogel("my_hydrogel", node_topology, chain_topology) hydrogel_info = pmb.create_hydrogel("my_hydrogel", espresso_system) @@ -176,6 +177,13 @@ dt = 0.01 # Timestep espresso_system.time_step = dt pmb.setup_lj_interactions(espresso_system=espresso_system) +#Save the initial state +n_frame = 0 +frames_dir = data_path / "frames" +frames_dir.mkdir(parents=True, exist_ok=True) +with open(frames_dir / f"trajectory{n_frame}.vtf", mode='w+t') as coordinates: + vtf.writevsf(espresso_system, coordinates) + vtf.writevcf(espresso_system, coordinates) print("*** Relaxing the system... ***") relax_espresso_system(espresso_system=espresso_system, @@ -220,14 +228,13 @@ pka_set = {BeadType: {"pka_value": args.pKa, "acidity": "acidic"}} -grxmc, labels, ionic_strength_res = pmb.setup_grxmc_reactions(pH_res=args.pH_res, - c_salt_res=c_salt_res, - proton_name=proton_name, - hydroxide_name=hydroxide_name, - salt_cation_name=sodium_name, - salt_anion_name=chloride_name, - activity_coefficient=activity_coefficient_monovalent_pair, - pka_set=pka_set) +grxmc, ionic_strength_res = pmb.setup_grxmc_reactions(pH_res=args.pH_res, + c_salt_res=c_salt_res, + proton_name=proton_name, + hydroxide_name=hydroxide_name, + salt_cation_name=sodium_name, + salt_anion_name=chloride_name, + activity_coefficient=activity_coefficient_monovalent_pair) # Setup espresso to track the ionization of the acid groups type_map = pmb.get_type_map() @@ -289,8 +296,7 @@ "pH": args.pH_res, "pKa": args.pKa} -data_path = args.output -data_path.mkdir(parents=True, exist_ok=True) + time_series=pd.DataFrame(time_series) filename=analysis.built_output_name(input_dict=inputs) diff --git a/testsuite/globular_protein_unit_tests.py b/testsuite/globular_protein_unit_tests.py index 39488c5..79d4cd0 100644 --- a/testsuite/globular_protein_unit_tests.py +++ b/testsuite/globular_protein_unit_tests.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2024-2025 pyMBE-dev team +# Copyright (C) 2024-2026 pyMBE-dev team # # This file is part of pyMBE. # @@ -15,6 +15,7 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . + import numpy as np import espressomd import unittest as ut diff --git a/testsuite/hydrogel_builder.py b/testsuite/hydrogel_builder.py index ce7df0d..a83d502 100644 --- a/testsuite/hydrogel_builder.py +++ b/testsuite/hydrogel_builder.py @@ -93,16 +93,14 @@ "lattice_index": indices[index]}) # Setting up chain topology -connectivity = diamond_lattice.connectivity -node_labels = lattice_builder.node_labels -reverse_node_labels = {v: k for k, v in node_labels.items()} -connectivity_with_labels = {(reverse_node_labels[i], reverse_node_labels[j]) for i, j in connectivity} chain_topology = [] - -for node_s, node_e in connectivity_with_labels: - chain_topology.append({'node_start':node_s, - 'node_end': node_e, +for node_conectivity in diamond_lattice.connectivity: + node_start = str(diamond_lattice.indices[node_conectivity[0]]) + node_end = str(diamond_lattice.indices[node_conectivity[1]]) + chain_topology.append({'node_start':node_start, + 'node_end': node_end, 'molecule_name':molecule_name}) + ####################################################### hydrogel_name="my_hydrogel" pmb.define_hydrogel(hydrogel_name,node_topology, chain_topology) diff --git a/testsuite/hydrogel_tests.py b/testsuite/hydrogel_tests.py index 8a92c58..e3d0891 100644 --- a/testsuite/hydrogel_tests.py +++ b/testsuite/hydrogel_tests.py @@ -109,8 +109,8 @@ def test_pressure(self): ] with multiprocessing.Pool(processes=2) as pool: results = dict(pool.starmap(run_simulation, [(tc, "pressure") for tc in test_cases])) - rtol = 0.4 # Relative tolerance - + rtol = 1 # Relative tolerance + atol= 1 # Absolute tolerance data_ref = pd.read_csv(root / "testsuite" / "data" / "Landsgesell2022a.csv") # Compare pressure values @@ -140,7 +140,7 @@ def test_pressure(self): test_pressure_value = test_pressure.iloc[0] # or test_pressure.item() test_pressure = pmb.units.Quantity(test_pressure_value, "reduced_energy/reduced_length**3") p_sys_minus_p_res = test_pressure.m_as("bar") - p_res.m_as("bar") - np.testing.assert_allclose(p_sys_minus_p_res, pressure_ref, rtol=rtol, atol=1e-5) + np.testing.assert_allclose(p_sys_minus_p_res, pressure_ref, rtol=rtol, atol=atol) def test_titration(self): test_cases = [ diff --git a/tutorials/lattice_builder.ipynb b/tutorials/lattice_builder.ipynb deleted file mode 100644 index 0e29bc6..0000000 --- a/tutorials/lattice_builder.ipynb +++ /dev/null @@ -1,142 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "3e601096-4d3a-467c-b2e2-428526c3d1da", - "metadata": {}, - "source": [ - "# Lattice builder\n", - "\n", - "Show the basic functionality of the lattice builder by creating a hydrogel network based on the diamond lattice." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3ce04200-db14-4503-b729-3bb43dc06e59", - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n", - "%matplotlib ipympl" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a5773586-fb40-4991-b2ce-feb9a651e311", - "metadata": {}, - "outputs": [], - "source": [ - "import lib.lattice\n", - "import matplotlib\n", - "import logging\n", - "import sys\n", - "import packaging.specifiers\n", - "plt.rcParams.update({'font.size': 14})\n", - "logging.basicConfig(level=logging.INFO, stream=sys.stdout)\n", - "mpl_has_aspect_ratio = packaging.specifiers.SpecifierSet(\">=3.7.0\").contains(matplotlib.__version__)\n", - "if not mpl_has_aspect_ratio:\n", - " logging.warning(f\"matplotlib version {matplotlib.__version__} is too old for Axes3D; you won't be able to set the Axes3D aspect ratio to 1:1:1\")" - ] - }, - { - "cell_type": "markdown", - "id": "a2cdacc1-49ba-45e7-a057-c107b2b356b9", - "metadata": {}, - "source": [ - "We will define a diamond lattice with default chains,\n", - "then overwrite one of the default chains with a custom sequence of residues.\n", - "This sequence is asymmetric and requires specifying the direction of the chain.\n", - "We will also override the type of one node residue." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc6824ac-aaae-4dc7-ae05-509c2d1370a3", - "metadata": {}, - "outputs": [], - "source": [ - "# create a custom lattice\n", - "lattice = lib.lattice.LatticeBuilder(lib.lattice.DiamondLattice)\n", - "lattice.add_default_chains(mpc=1)\n", - "lattice.set_node(node=\"[1 1 1]\", residue=\"silicon\")\n", - "lattice.set_chain(node_start=\"[0 0 0]\", node_end=\"[1 1 1]\",\n", - " sequence=[\"nitrogen\", \"carbon\", \"oxygen\", \"carbon\"])" - ] - }, - { - "cell_type": "markdown", - "id": "f7a85abc-ef75-40c5-bb11-cfd8bcd95daf", - "metadata": {}, - "source": [ - "We will now plot the hydrogel in an interactive 3D viewer.\n", - "We could use the default matplotlib color wheel, but since we have a lot of residue types,\n", - "we will instead define our own color wheel.\n", - "The order of the keys in the dictionary will determine the order of the residues in the matplotlib legend." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "57093b09-ec0a-4c39-876a-e3cebfa0f03b", - "metadata": {}, - "outputs": [], - "source": [ - "# custom discrete color wheel\n", - "colormap = {\n", - " \"default_linker\": \"C0\",\n", - " \"default_monomer\": \"C1\",\n", - " \"silicon\": \"grey\",\n", - " \"carbon\": \"green\",\n", - " \"oxygen\": \"C3\",\n", - " \"nitrogen\": \"purple\",\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d9098e29-de2b-4be2-b6a6-66e22a4c94dc", - "metadata": {}, - "outputs": [], - "source": [ - "# plot in an interactive 3D viewer\n", - "fig = plt.figure(figsize=(12, 12))\n", - "ax = fig.add_subplot(projection=\"3d\", computed_zorder=False)\n", - "lattice.set_colormap(colormap)\n", - "lattice.draw_lattice(ax)\n", - "lattice.draw_simulation_box(ax)\n", - "ax.set_axis_off()\n", - "ax.set_proj_type(\"ortho\")\n", - "ax.legend()\n", - "if mpl_has_aspect_ratio:\n", - " ax.set_aspect(\"equal\", adjustable=\"box\")\n", - "plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tutorials/pyMBE_tutorial.ipynb b/tutorials/pyMBE_tutorial.ipynb index c638831..2d2ffef 100644 --- a/tutorials/pyMBE_tutorial.ipynb +++ b/tutorials/pyMBE_tutorial.ipynb @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -83,9 +83,21 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current set of reduced units:\n", + "0.355 nanometer = 1 reduced_length\n", + "4.1164e-21 joule = 1 reduced_energy\n", + "1.6022e-19 coulomb = 1 reduced_charge\n", + "Temperature: 298.15 kelvin\n" + ] + } + ], "source": [ "reduced_unit_set = pmb.get_reduced_units()\n", "print(reduced_unit_set)" @@ -100,9 +112,19 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:pint.util:Redefining 'reduced_energy' ()\n", + "WARNING:pint.util:Redefining 'reduced_length' ()\n", + "WARNING:pint.util:Redefining 'reduced_charge' ()\n" + ] + } + ], "source": [ "pmb.set_reduced_units(unit_length = 0.5*pmb.units.nm, \n", " unit_charge = 5*pmb.units.e)\n", @@ -125,9 +147,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The side of the simulation box is 7.5 nanometer = 15.0 reduced_length\n" + ] + } + ], "source": [ "Box_L = 7.5*pmb.units.nm\n", "\n", @@ -152,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -167,16 +197,74 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The properties of the particles are stored in a pandas Dataframe (df). For displaying all the information of the particles one can use the following command:" + "pyMBE stores the properties of each different particle type or particle \"template\" on its internal database. To check all particle templates defined in the pyMBE database, one can query the pyMBE database manager:" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pmb.filter_df(pmb_type = 'particle')" + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pmb_typenamesigmaepsiloncutoffoffsetinitial_state
0particleNa0.35 nanometer25.69257912108585 millielectron_volt0.5612310241546865 nanometer0.0 nanometerNa
\n", + "
" + ], + "text/plain": [ + " pmb_type name sigma epsilon \\\n", + "0 particle Na 0.35 nanometer 25.69257912108585 millielectron_volt \n", + "\n", + " cutoff offset initial_state \n", + "0 0.5612310241546865 nanometer 0.0 nanometer Na " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pmb.get_templates_df(pmb_type = 'particle')" ] }, { @@ -188,9 +276,20 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "N_cations = 20\n", "pmb.create_particle(name = cation_name,\n", @@ -202,16 +301,302 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's take a look at the new set of particles..." + "pyMBE keeps track of each instance of a particle template created into the ESPResSo system. To check all particle instances created, one can query the pyMBE database manager:" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pmb.filter_df(pmb_type = 'particle')" + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pmb_typenameparticle_idinitial_stateresidue_idmolecule_idassembly_id
0particleNa0Na<NA><NA><NA>
1particleNa1Na<NA><NA><NA>
2particleNa2Na<NA><NA><NA>
3particleNa3Na<NA><NA><NA>
4particleNa4Na<NA><NA><NA>
5particleNa5Na<NA><NA><NA>
6particleNa6Na<NA><NA><NA>
7particleNa7Na<NA><NA><NA>
8particleNa8Na<NA><NA><NA>
9particleNa9Na<NA><NA><NA>
10particleNa10Na<NA><NA><NA>
11particleNa11Na<NA><NA><NA>
12particleNa12Na<NA><NA><NA>
13particleNa13Na<NA><NA><NA>
14particleNa14Na<NA><NA><NA>
15particleNa15Na<NA><NA><NA>
16particleNa16Na<NA><NA><NA>
17particleNa17Na<NA><NA><NA>
18particleNa18Na<NA><NA><NA>
19particleNa19Na<NA><NA><NA>
\n", + "
" + ], + "text/plain": [ + " pmb_type name particle_id initial_state residue_id molecule_id \\\n", + "0 particle Na 0 Na \n", + "1 particle Na 1 Na \n", + "2 particle Na 2 Na \n", + "3 particle Na 3 Na \n", + "4 particle Na 4 Na \n", + "5 particle Na 5 Na \n", + "6 particle Na 6 Na \n", + "7 particle Na 7 Na \n", + "8 particle Na 8 Na \n", + "9 particle Na 9 Na \n", + "10 particle Na 10 Na \n", + "11 particle Na 11 Na \n", + "12 particle Na 12 Na \n", + "13 particle Na 13 Na \n", + "14 particle Na 14 Na \n", + "15 particle Na 15 Na \n", + "16 particle Na 16 Na \n", + "17 particle Na 17 Na \n", + "18 particle Na 18 Na \n", + "19 particle Na 19 Na \n", + "\n", + " assembly_id \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "5 \n", + "6 \n", + "7 \n", + "8 \n", + "9 \n", + "10 \n", + "11 \n", + "12 \n", + "13 \n", + "14 \n", + "15 \n", + "16 \n", + "17 \n", + "18 \n", + "19 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pmb.get_instances_df(pmb_type = 'particle')" ] }, { @@ -223,7 +608,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -260,19 +645,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "To delete an pyMBE object from the system we can use the following command:" + "To delete any instance of a particle created with pyMBE from the ESPResSo system we can use the following command:" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# First search for the ids of the particles to delete\n", "particle_id_map = pmb.get_particle_id_map(object_name=cation_name)\n", + "# This will delete all particles that we created before\n", "for pid in particle_id_map[\"all\"]:\n", - " pmb.delete_particle_in_system(particle_id=pid, \n", + " pmb.delete_instances_in_system(instance_id=pid, \n", + " pmb_type=\"particle\",\n", " espresso_system = espresso_system)" ] }, @@ -280,16 +667,55 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now the df should be empty." + "If we query again the pyMBE database for particle instances in the ESPResSo system, we can observe that all particles have been deleted" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pmb.filter_df(pmb_type = 'particle')" + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pmb.get_instances_df(pmb_type = 'particle')" ] }, { @@ -313,12 +739,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "To set up such polymer with pyMBE first one has to define the different particles in the monomer." + "To set up such polymer with pyMBE first one has to define templates for the different particles in the monomer." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -346,12 +772,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Then, one defines the structure of the residue of the polymer. A residue is composed by a `central_bead` where one or various `side_chains` are attached. Each side chain can contain one particle or other residues. " + "Then, one defines templates for the polymer residues. A residue is composed by a `central_bead` where one or various `side_chains` are attached. Each side chain can contain one particle or other residues. " ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -366,12 +792,77 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Once done, one has to define a bond for each different type of bond in the polymer. For simplicity, in this tutorial we assume that all bonds are equal and we set-up all bonds using a harmonic potential with the following arbitrary parameters." + "We can check the residue templates we defined in the pyMBE database:" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pmb_typenamecentral_beadside_chains
0residuePDha_monBB-PDha[COOH-PDha, NH3-PDha]
\n", + "
" + ], + "text/plain": [ + " pmb_type name central_bead side_chains\n", + "0 residue PDha_mon BB-PDha [COOH-PDha, NH3-PDha]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pmb.get_templates_df(pmb_type = 'residue')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once done, one has to define templates for bonds between each pair of different particle templates in the polymer. For simplicity, in this tutorial we assume that all bonds are equal and we set-up all bonds using a harmonic potential with the following arbitrary parameters." + ] + }, + { + "cell_type": "code", + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -380,16 +871,114 @@ "generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2')\n", "\n", "harmonic_bond = {'r_0' : generic_bond_lenght,\n", - " 'k' : generic_harmonic_constant\n", - " }\n", + " 'k' : generic_harmonic_constant}\n", "\n", "pmb.define_bond(bond_type = bond_type,\n", " bond_parameters = harmonic_bond,\n", " particle_pairs = [[PDha_backbone_bead, PDha_backbone_bead],\n", " [PDha_backbone_bead, PDha_carboxyl_bead],\n", - " [PDha_backbone_bead, PDha_amine_bead]])\n", - "\n", - "pmb.add_bonds_to_espresso(espresso_system = espresso_system)\n" + " [PDha_backbone_bead, PDha_amine_bead]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All bond templates we defined are stored in the pyMBE database:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/bin/eog: symbol lookup error: /snap/core20/current/lib/x86_64-linux-gnu/libpthread.so.0: undefined symbol: __libc_pthread_init, version GLIBC_PRIVATE\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pmb_typenamebond_typeparticle_name1particle_name2parameters
0bondBB-PDha-BB-PDhaharmonicBB-PDhaBB-PDha{'r_0': 0.4 nanometer, 'k': 41108.12659373736 ...
1bondBB-PDha-COOH-PDhaharmonicBB-PDhaCOOH-PDha{'r_0': 0.4 nanometer, 'k': 41108.12659373736 ...
2bondBB-PDha-NH3-PDhaharmonicBB-PDhaNH3-PDha{'r_0': 0.4 nanometer, 'k': 41108.12659373736 ...
\n", + "
" + ], + "text/plain": [ + " pmb_type name bond_type particle_name1 particle_name2 \\\n", + "0 bond BB-PDha-BB-PDha harmonic BB-PDha BB-PDha \n", + "1 bond BB-PDha-COOH-PDha harmonic BB-PDha COOH-PDha \n", + "2 bond BB-PDha-NH3-PDha harmonic BB-PDha NH3-PDha \n", + "\n", + " parameters \n", + "0 {'r_0': 0.4 nanometer, 'k': 41108.12659373736 ... \n", + "1 {'r_0': 0.4 nanometer, 'k': 41108.12659373736 ... \n", + "2 {'r_0': 0.4 nanometer, 'k': 41108.12659373736 ... " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pmb.get_templates_df(pmb_type = 'bond')" ] }, { @@ -403,12 +992,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Finally, one can use the residues to define the polymer sequence given by the argument `residue_list`. One needs to add one residue in `residue_list` per each residue in the polymer chain. For instance a decamer should be created as follows:" + "Finally, one defines a molecule template. A molecule template is defined by a linear sequence of residues, `residue_list`. For instance a decamer should be created as follows:" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -423,37 +1012,1032 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "After defining the polymer, we are ready to create one PdHa polymer in the center of the simulation box." + "All defined molecule templates can be consulted in the pyMBE database" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pmb_typenameresidue_list
0moleculePDha[PDha_mon, PDha_mon, PDha_mon, PDha_mon, PDha_...
\n", + "
" + ], + "text/plain": [ + " pmb_type name residue_list\n", + "0 molecule PDha [PDha_mon, PDha_mon, PDha_mon, PDha_mon, PDha_..." + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pmb.get_templates_df(pmb_type = 'molecule')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After defining a template for the polymer, we are ready to create one PdHa polymer in the center of the simulation box." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[0]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "N_polymers = 1\n", "\n", - "pmb.create_molecule(name = PDha_polymer, \n", - " number_of_molecules = N_polymers,\n", - " espresso_system = espresso_system, \n", - " list_of_first_residue_positions = [[Box_L.to('reduced_length').magnitude/2]*3]) " + "molecule_ids = pmb.create_molecule(name = PDha_polymer, \n", + " number_of_molecules = N_polymers,\n", + " espresso_system = espresso_system, \n", + " list_of_first_residue_positions = [[Box_L.to('reduced_length').magnitude/2]*3]) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "We can always track our particles..." + "All instances of particles, residues, bonds and molecules created into ESPResSo are bookkept in the pyMBE database:" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pmb.filter_df(pmb_type = 'particle')" + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pmb_typenameparticle_idinitial_stateresidue_idmolecule_idassembly_id
0particleBB-PDha0BB-PDha00<NA>
1particleCOOH-PDha1COOH-PDha00<NA>
2particleNH3-PDha2NH3-PDha00<NA>
3particleBB-PDha3BB-PDha10<NA>
4particleCOOH-PDha4COOH-PDha10<NA>
5particleNH3-PDha5NH3-PDha10<NA>
6particleBB-PDha6BB-PDha20<NA>
7particleCOOH-PDha7COOH-PDha20<NA>
8particleNH3-PDha8NH3-PDha20<NA>
9particleBB-PDha9BB-PDha30<NA>
10particleCOOH-PDha10COOH-PDha30<NA>
11particleNH3-PDha11NH3-PDha30<NA>
12particleBB-PDha12BB-PDha40<NA>
13particleCOOH-PDha13COOH-PDha40<NA>
14particleNH3-PDha14NH3-PDha40<NA>
15particleBB-PDha15BB-PDha50<NA>
16particleCOOH-PDha16COOH-PDha50<NA>
17particleNH3-PDha17NH3-PDha50<NA>
18particleBB-PDha18BB-PDha60<NA>
19particleCOOH-PDha19COOH-PDha60<NA>
20particleNH3-PDha20NH3-PDha60<NA>
21particleBB-PDha21BB-PDha70<NA>
22particleCOOH-PDha22COOH-PDha70<NA>
23particleNH3-PDha23NH3-PDha70<NA>
24particleBB-PDha24BB-PDha80<NA>
25particleCOOH-PDha25COOH-PDha80<NA>
26particleNH3-PDha26NH3-PDha80<NA>
27particleBB-PDha27BB-PDha90<NA>
28particleCOOH-PDha28COOH-PDha90<NA>
29particleNH3-PDha29NH3-PDha90<NA>
\n", + "
" + ], + "text/plain": [ + " pmb_type name particle_id initial_state residue_id molecule_id \\\n", + "0 particle BB-PDha 0 BB-PDha 0 0 \n", + "1 particle COOH-PDha 1 COOH-PDha 0 0 \n", + "2 particle NH3-PDha 2 NH3-PDha 0 0 \n", + "3 particle BB-PDha 3 BB-PDha 1 0 \n", + "4 particle COOH-PDha 4 COOH-PDha 1 0 \n", + "5 particle NH3-PDha 5 NH3-PDha 1 0 \n", + "6 particle BB-PDha 6 BB-PDha 2 0 \n", + "7 particle COOH-PDha 7 COOH-PDha 2 0 \n", + "8 particle NH3-PDha 8 NH3-PDha 2 0 \n", + "9 particle BB-PDha 9 BB-PDha 3 0 \n", + "10 particle COOH-PDha 10 COOH-PDha 3 0 \n", + "11 particle NH3-PDha 11 NH3-PDha 3 0 \n", + "12 particle BB-PDha 12 BB-PDha 4 0 \n", + "13 particle COOH-PDha 13 COOH-PDha 4 0 \n", + "14 particle NH3-PDha 14 NH3-PDha 4 0 \n", + "15 particle BB-PDha 15 BB-PDha 5 0 \n", + "16 particle COOH-PDha 16 COOH-PDha 5 0 \n", + "17 particle NH3-PDha 17 NH3-PDha 5 0 \n", + "18 particle BB-PDha 18 BB-PDha 6 0 \n", + "19 particle COOH-PDha 19 COOH-PDha 6 0 \n", + "20 particle NH3-PDha 20 NH3-PDha 6 0 \n", + "21 particle BB-PDha 21 BB-PDha 7 0 \n", + "22 particle COOH-PDha 22 COOH-PDha 7 0 \n", + "23 particle NH3-PDha 23 NH3-PDha 7 0 \n", + "24 particle BB-PDha 24 BB-PDha 8 0 \n", + "25 particle COOH-PDha 25 COOH-PDha 8 0 \n", + "26 particle NH3-PDha 26 NH3-PDha 8 0 \n", + "27 particle BB-PDha 27 BB-PDha 9 0 \n", + "28 particle COOH-PDha 28 COOH-PDha 9 0 \n", + "29 particle NH3-PDha 29 NH3-PDha 9 0 \n", + "\n", + " assembly_id \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "5 \n", + "6 \n", + "7 \n", + "8 \n", + "9 \n", + "10 \n", + "11 \n", + "12 \n", + "13 \n", + "14 \n", + "15 \n", + "16 \n", + "17 \n", + "18 \n", + "19 \n", + "20 \n", + "21 \n", + "22 \n", + "23 \n", + "24 \n", + "25 \n", + "26 \n", + "27 \n", + "28 \n", + "29 " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check particle instances\n", + "pmb.get_instances_df(pmb_type = 'particle')" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pmb_typenameresidue_idmolecule_idassembly_id
0residuePDha_mon00<NA>
1residuePDha_mon10<NA>
2residuePDha_mon20<NA>
3residuePDha_mon30<NA>
4residuePDha_mon40<NA>
5residuePDha_mon50<NA>
6residuePDha_mon60<NA>
7residuePDha_mon70<NA>
8residuePDha_mon80<NA>
9residuePDha_mon90<NA>
\n", + "
" + ], + "text/plain": [ + " pmb_type name residue_id molecule_id assembly_id\n", + "0 residue PDha_mon 0 0 \n", + "1 residue PDha_mon 1 0 \n", + "2 residue PDha_mon 2 0 \n", + "3 residue PDha_mon 3 0 \n", + "4 residue PDha_mon 4 0 \n", + "5 residue PDha_mon 5 0 \n", + "6 residue PDha_mon 6 0 \n", + "7 residue PDha_mon 7 0 \n", + "8 residue PDha_mon 8 0 \n", + "9 residue PDha_mon 9 0 " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check residue instances\n", + "pmb.get_instances_df(pmb_type = 'residue')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pmb_typenamebond_idparticle_id1particle_id2
0bondBB-PDha-COOH-PDha001
1bondBB-PDha-NH3-PDha102
2bondBB-PDha-COOH-PDha234
3bondBB-PDha-NH3-PDha335
4bondBB-PDha-BB-PDha403
5bondBB-PDha-COOH-PDha567
6bondBB-PDha-NH3-PDha668
7bondBB-PDha-BB-PDha736
8bondBB-PDha-COOH-PDha8910
9bondBB-PDha-NH3-PDha9911
10bondBB-PDha-BB-PDha1069
11bondBB-PDha-COOH-PDha111213
12bondBB-PDha-NH3-PDha121214
13bondBB-PDha-BB-PDha13912
14bondBB-PDha-COOH-PDha141516
15bondBB-PDha-NH3-PDha151517
16bondBB-PDha-BB-PDha161215
17bondBB-PDha-COOH-PDha171819
18bondBB-PDha-NH3-PDha181820
19bondBB-PDha-BB-PDha191518
20bondBB-PDha-COOH-PDha202122
21bondBB-PDha-NH3-PDha212123
22bondBB-PDha-BB-PDha221821
23bondBB-PDha-COOH-PDha232425
24bondBB-PDha-NH3-PDha242426
25bondBB-PDha-BB-PDha252124
26bondBB-PDha-COOH-PDha262728
27bondBB-PDha-NH3-PDha272729
28bondBB-PDha-BB-PDha282427
\n", + "
" + ], + "text/plain": [ + " pmb_type name bond_id particle_id1 particle_id2\n", + "0 bond BB-PDha-COOH-PDha 0 0 1\n", + "1 bond BB-PDha-NH3-PDha 1 0 2\n", + "2 bond BB-PDha-COOH-PDha 2 3 4\n", + "3 bond BB-PDha-NH3-PDha 3 3 5\n", + "4 bond BB-PDha-BB-PDha 4 0 3\n", + "5 bond BB-PDha-COOH-PDha 5 6 7\n", + "6 bond BB-PDha-NH3-PDha 6 6 8\n", + "7 bond BB-PDha-BB-PDha 7 3 6\n", + "8 bond BB-PDha-COOH-PDha 8 9 10\n", + "9 bond BB-PDha-NH3-PDha 9 9 11\n", + "10 bond BB-PDha-BB-PDha 10 6 9\n", + "11 bond BB-PDha-COOH-PDha 11 12 13\n", + "12 bond BB-PDha-NH3-PDha 12 12 14\n", + "13 bond BB-PDha-BB-PDha 13 9 12\n", + "14 bond BB-PDha-COOH-PDha 14 15 16\n", + "15 bond BB-PDha-NH3-PDha 15 15 17\n", + "16 bond BB-PDha-BB-PDha 16 12 15\n", + "17 bond BB-PDha-COOH-PDha 17 18 19\n", + "18 bond BB-PDha-NH3-PDha 18 18 20\n", + "19 bond BB-PDha-BB-PDha 19 15 18\n", + "20 bond BB-PDha-COOH-PDha 20 21 22\n", + "21 bond BB-PDha-NH3-PDha 21 21 23\n", + "22 bond BB-PDha-BB-PDha 22 18 21\n", + "23 bond BB-PDha-COOH-PDha 23 24 25\n", + "24 bond BB-PDha-NH3-PDha 24 24 26\n", + "25 bond BB-PDha-BB-PDha 25 21 24\n", + "26 bond BB-PDha-COOH-PDha 26 27 28\n", + "27 bond BB-PDha-NH3-PDha 27 27 29\n", + "28 bond BB-PDha-BB-PDha 28 24 27" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check bond instances\n", + "pmb.get_instances_df(pmb_type = 'bond')" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pmb_typenamemolecule_idassembly_id
0moleculePDha0<NA>
\n", + "
" + ], + "text/plain": [ + " pmb_type name molecule_id assembly_id\n", + "0 molecule PDha 0 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check molecule instances\n", + "pmb.get_instances_df(pmb_type = 'molecule')" ] }, { @@ -465,7 +2049,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -480,18 +2064,39 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Delete the particles and check that our df is empty." + "Delete the particles and check that there are no particle instances in the pyMBE database" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], - "source": [ - "pmb.delete_molecule_in_system(molecule_id=0, \n", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/bin/eog: symbol lookup error: /snap/core20/current/lib/x86_64-linux-gnu/libpthread.so.0: undefined symbol: __libc_pthread_init, version GLIBC_PRIVATE\n" + ] + }, + { + "ename": "AttributeError", + "evalue": "'pymbe_library' object has no attribute 'delete_molecule_in_system'", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mAttributeError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[26]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mpmb\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdelete_molecule_in_system\u001b[49m(molecule_id=\u001b[32m0\u001b[39m, \n\u001b[32m 2\u001b[39m espresso_system = espresso_system)\n\u001b[32m 3\u001b[39m pmb.filter_df(pmb_type = \u001b[33m'\u001b[39m\u001b[33mparticle\u001b[39m\u001b[33m'\u001b[39m)\n", + "\u001b[31mAttributeError\u001b[39m: 'pymbe_library' object has no attribute 'delete_molecule_in_system'" + ] + } + ], + "source": [ + "pmb.delete_instances_in_system(instance_id=molecule_ids[0],\n", + " pmb_type=\"molecule\", \n", " espresso_system = espresso_system)\n", - "pmb.filter_df(pmb_type = 'particle')" + "# Check particle instances\n", + "pmb.get_instances_df(pmb_type = 'particle')" ] }, { @@ -514,7 +2119,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "One can start by defining each different bead of the PDAGA." + "One can start by defining templates for each different bead of the PDAGA." ] }, { @@ -553,7 +2158,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The next step is to define the two different residues: \n", + "The next step is to define the two different residue templates: \n", "1. The side chain: two carboxyl beads attached to the cyclic amine bead." ] }, @@ -593,7 +2198,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Then, we need to set the bonds between the particles in a similar way as for the case of the simple polymer." + "Then, we need to define bond templates in a similar way as for the case of the simple polymer." ] }, { @@ -615,9 +2220,7 @@ " particle_pairs = [[PDAGA_backbone_bead, PDAGA_backbone_bead],\n", " [PDAGA_backbone_bead, PDAGA_cyclic_amine_bead],\n", " [PDAGA_alpha_carboxyl_bead, PDAGA_cyclic_amine_bead],\n", - " [PDAGA_beta_carboxyl_bead, PDAGA_cyclic_amine_bead]])\n", - "\n", - "pmb.add_bonds_to_espresso(espresso_system = espresso_system)" + " [PDAGA_beta_carboxyl_bead, PDAGA_cyclic_amine_bead]])" ] }, { @@ -1112,9 +2715,9 @@ "metadata": { "hide_input": false, "kernelspec": { - "display_name": "pyMBE", + "display_name": "pymbe", "language": "python", - "name": "pymbe" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1126,12 +2729,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" - }, - "vscode": { - "interpreter": { - "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" - } + "version": "3.12.3" } }, "nbformat": 4, From 1aa5c5d2fe1022b721dc9221569233217d96915e Mon Sep 17 00:00:00 2001 From: pmblanco Date: Tue, 10 Feb 2026 09:08:25 +0100 Subject: [PATCH 54/55] clean up tutorial --- tutorials/pyMBE_tutorial.ipynb | 1454 ++++++++++++++++++++++++++------ 1 file changed, 1177 insertions(+), 277 deletions(-) diff --git a/tutorials/pyMBE_tutorial.ipynb b/tutorials/pyMBE_tutorial.ipynb index 2d2ffef..82b43d0 100644 --- a/tutorials/pyMBE_tutorial.ipynb +++ b/tutorials/pyMBE_tutorial.ipynb @@ -1080,20 +1080,9 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0]" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "N_polymers = 1\n", "\n", @@ -2069,26 +2058,46 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/bin/eog: symbol lookup error: /snap/core20/current/lib/x86_64-linux-gnu/libpthread.so.0: undefined symbol: __libc_pthread_init, version GLIBC_PRIVATE\n" - ] - }, - { - "ename": "AttributeError", - "evalue": "'pymbe_library' object has no attribute 'delete_molecule_in_system'", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mAttributeError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[26]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mpmb\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdelete_molecule_in_system\u001b[49m(molecule_id=\u001b[32m0\u001b[39m, \n\u001b[32m 2\u001b[39m espresso_system = espresso_system)\n\u001b[32m 3\u001b[39m pmb.filter_df(pmb_type = \u001b[33m'\u001b[39m\u001b[33mparticle\u001b[39m\u001b[33m'\u001b[39m)\n", - "\u001b[31mAttributeError\u001b[39m: 'pymbe_library' object has no attribute 'delete_molecule_in_system'" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -2124,7 +2133,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -2164,7 +2173,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -2184,7 +2193,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -2203,7 +2212,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -2234,7 +2243,15 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/bin/eog: symbol lookup error: /snap/core20/current/lib/x86_64-linux-gnu/libpthread.so.0: undefined symbol: __libc_pthread_init, version GLIBC_PRIVATE\n" + ] + } + ], "source": [ "PDAGA_polymer = 'PDAGA'\n", "N_monomers = 8\n", @@ -2252,16 +2269,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "N_polymers = 1\n", "\n", - "pmb.create_molecule(name = PDAGA_polymer,\n", - " number_of_molecules= N_polymers,\n", - " espresso_system = espresso_system,\n", - " list_of_first_residue_positions = [[Box_L.to('reduced_length').magnitude/2]*3])" + "mol_ids = pmb.create_molecule(name = PDAGA_polymer,\n", + " number_of_molecules= N_polymers,\n", + " espresso_system = espresso_system,\n", + " list_of_first_residue_positions = [[Box_L.to('reduced_length').magnitude/2]*3])" ] }, { @@ -2273,7 +2290,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -2288,20 +2305,60 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Delete the particles and check that our df is empty." + "Delete the particles and check that the pyMBE database is empty." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# pyMBE always assigns a molecule id of 0 to the first created molecule\n", "\n", - "pmb.delete_molecule_in_system(molecule_id=0,\n", - " espresso_system=espresso_system)\n", - "pmb.filter_df(pmb_type = 'particle')" + "for mol_id in mol_ids:\n", + " pmb.delete_instances_in_system(instance_id=mol_id,\n", + " pmb_type=\"molecule\",\n", + " espresso_system=espresso_system)\n", + "pmb.get_instances_df(pmb_type = 'particle')" ] }, { @@ -2324,32 +2381,31 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Define the bond between the backbone particle of PDha and the backbone particle of PDAGA" + "Define the bond template between the backbone particle of PDha and the backbone particle of PDAGA" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "\n", "pmb.define_bond(bond_type = bond_type,\n", " bond_parameters = harmonic_bond,\n", - " particle_pairs = [[PDha_backbone_bead, PDAGA_backbone_bead]])\n", - "pmb.add_bonds_to_espresso(espresso_system = espresso_system)" + " particle_pairs = [[PDha_backbone_bead, PDAGA_backbone_bead]])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Define the di-block polymer molecule using Python list comprehension methods" + "Define a molecule template for the di-block polymer molecule using Python list comprehension methods" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -2370,165 +2426,612 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "N_polymers = 1\n", - "\n", - "pmb.create_molecule(name = diblock_polymer,\n", - " number_of_molecules= N_polymers,\n", - " espresso_system = espresso_system,\n", - " list_of_first_residue_positions = [[Box_L.to('reduced_length').magnitude/2]*3]) " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, let us see our di-block PDha-PDAGA molecule." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "picture_name = 'diblock_system.png'\n", - "create_snapshot_of_espresso_system(espresso_system = espresso_system, \n", - " filename = picture_name)\n", - "img = Image.open(picture_name)\n", - "img.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Delete the particles and check that our df is empty." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pmb.delete_molecule_in_system(molecule_id=0, \n", - " espresso_system = espresso_system)\n", - "pmb.filter_df(pmb_type = 'particle')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Practice by creating a custom polyampholyte chain " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Polyampholytes are polymers containing both acidic and basic groups on the same molecule, one example of a branched polyampholyte is depicted in the figure below.\n", - "\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will create the polyampholyte chain in the figure, starting by defining two different residues, 'Res_1' and 'Res_2'. The polyampholyte chain is then defined by following residue_list:\n", - "\n", - "residue_list = 2*[\"Res_1\"] + [\"Res_2\"] + 2*[\"Res_1\"] + 2*[\"Res_2\"]\n", - "\n", - "### Tasks to do:\n", - "\n", - "1. Define each different bead in the residues using \"pmb.define_particle\". There are 3 different particles, an inert particle, an acidic particle with pKa = 4, and a basic particle with pKa = 9.\n", - "2. Define residues using \"pmf.define_residue\". \"Res_1\" consists of an inert particle as central bead and acidic and basic particles as side chain. \"Res_2\" consists of an inert particle as central bead and \"Res_1\" as side chain. \n", - "3. Define the branched polyampholyte chain using \"pmb.define_molecule\" with the above \"residue_list.\" \n", - "4. Create the branched polyampholyte into the ESPResSo system.\n", - "5. Visualize your creation.\n", - "6. Delete the molecule and check that your df is empty." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 1. Define each different bead of Res_1 and Res_2." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 2. Define the residues Res_1 and Res_2." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 3. Define the diblock polyampholyte molecule. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 4. Create the diblock polyampholyte chain into the ESPResSo system." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 5. Visualize your creation." - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/bin/eog: symbol lookup error: /snap/core20/current/lib/x86_64-linux-gnu/libpthread.so.0: undefined symbol: __libc_pthread_init, version GLIBC_PRIVATE\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pmb_typenameparticle_idinitial_stateresidue_idmolecule_idassembly_id
0particleBB-PDha0BB-PDha00<NA>
1particleCOOH-PDha1COOH-PDha00<NA>
2particleNH3-PDha2NH3-PDha00<NA>
3particleBB-PDha3BB-PDha10<NA>
4particleCOOH-PDha4COOH-PDha10<NA>
5particleNH3-PDha5NH3-PDha10<NA>
6particleBB-PDha6BB-PDha20<NA>
7particleCOOH-PDha7COOH-PDha20<NA>
8particleNH3-PDha8NH3-PDha20<NA>
9particleBB-PDha9BB-PDha30<NA>
10particleCOOH-PDha10COOH-PDha30<NA>
11particleNH3-PDha11NH3-PDha30<NA>
12particleBB-PDAGA12BB-PDAGA40<NA>
13particleNH3-PDAGA13NH3-PDAGA40<NA>
14particleaCOOH-PDAGA14aCOOH-PDAGA40<NA>
15particlebCOOH-PDAGA15bCOOH-PDAGA40<NA>
16particleBB-PDAGA16BB-PDAGA50<NA>
17particleNH3-PDAGA17NH3-PDAGA50<NA>
18particleaCOOH-PDAGA18aCOOH-PDAGA50<NA>
19particlebCOOH-PDAGA19bCOOH-PDAGA50<NA>
20particleBB-PDAGA20BB-PDAGA60<NA>
21particleNH3-PDAGA21NH3-PDAGA60<NA>
22particleaCOOH-PDAGA22aCOOH-PDAGA60<NA>
23particlebCOOH-PDAGA23bCOOH-PDAGA60<NA>
24particleBB-PDAGA24BB-PDAGA70<NA>
25particleNH3-PDAGA25NH3-PDAGA70<NA>
26particleaCOOH-PDAGA26aCOOH-PDAGA70<NA>
27particlebCOOH-PDAGA27bCOOH-PDAGA70<NA>
\n", + "
" + ], + "text/plain": [ + " pmb_type name particle_id initial_state residue_id molecule_id \\\n", + "0 particle BB-PDha 0 BB-PDha 0 0 \n", + "1 particle COOH-PDha 1 COOH-PDha 0 0 \n", + "2 particle NH3-PDha 2 NH3-PDha 0 0 \n", + "3 particle BB-PDha 3 BB-PDha 1 0 \n", + "4 particle COOH-PDha 4 COOH-PDha 1 0 \n", + "5 particle NH3-PDha 5 NH3-PDha 1 0 \n", + "6 particle BB-PDha 6 BB-PDha 2 0 \n", + "7 particle COOH-PDha 7 COOH-PDha 2 0 \n", + "8 particle NH3-PDha 8 NH3-PDha 2 0 \n", + "9 particle BB-PDha 9 BB-PDha 3 0 \n", + "10 particle COOH-PDha 10 COOH-PDha 3 0 \n", + "11 particle NH3-PDha 11 NH3-PDha 3 0 \n", + "12 particle BB-PDAGA 12 BB-PDAGA 4 0 \n", + "13 particle NH3-PDAGA 13 NH3-PDAGA 4 0 \n", + "14 particle aCOOH-PDAGA 14 aCOOH-PDAGA 4 0 \n", + "15 particle bCOOH-PDAGA 15 bCOOH-PDAGA 4 0 \n", + "16 particle BB-PDAGA 16 BB-PDAGA 5 0 \n", + "17 particle NH3-PDAGA 17 NH3-PDAGA 5 0 \n", + "18 particle aCOOH-PDAGA 18 aCOOH-PDAGA 5 0 \n", + "19 particle bCOOH-PDAGA 19 bCOOH-PDAGA 5 0 \n", + "20 particle BB-PDAGA 20 BB-PDAGA 6 0 \n", + "21 particle NH3-PDAGA 21 NH3-PDAGA 6 0 \n", + "22 particle aCOOH-PDAGA 22 aCOOH-PDAGA 6 0 \n", + "23 particle bCOOH-PDAGA 23 bCOOH-PDAGA 6 0 \n", + "24 particle BB-PDAGA 24 BB-PDAGA 7 0 \n", + "25 particle NH3-PDAGA 25 NH3-PDAGA 7 0 \n", + "26 particle aCOOH-PDAGA 26 aCOOH-PDAGA 7 0 \n", + "27 particle bCOOH-PDAGA 27 bCOOH-PDAGA 7 0 \n", + "\n", + " assembly_id \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "5 \n", + "6 \n", + "7 \n", + "8 \n", + "9 \n", + "10 \n", + "11 \n", + "12 \n", + "13 \n", + "14 \n", + "15 \n", + "16 \n", + "17 \n", + "18 \n", + "19 \n", + "20 \n", + "21 \n", + "22 \n", + "23 \n", + "24 \n", + "25 \n", + "26 \n", + "27 " + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "N_polymers = 1\n", + "\n", + "mol_ids = pmb.create_molecule(name = diblock_polymer,\n", + " number_of_molecules= N_polymers,\n", + " espresso_system = espresso_system,\n", + " list_of_first_residue_positions = [[Box_L.to('reduced_length').magnitude/2]*3]) \n", + "# See the particle instances you have created\n", + "pmb.get_instances_df(pmb_type=\"particle\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let us see our di-block PDha-PDAGA molecule." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "picture_name = 'diblock_system.png'\n", + "create_snapshot_of_espresso_system(espresso_system = espresso_system, \n", + " filename = picture_name)\n", + "img = Image.open(picture_name)\n", + "img.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Delete the particles and check that our df is empty." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "for mol_id in mol_ids:\n", + " pmb.delete_instances_in_system(instance_id=0, \n", + " pmb_type=\"molecule\",\n", + " espresso_system = espresso_system)\n", + "pmb.get_instances_df(pmb_type=\"particle\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Practice by creating a custom polyampholyte chain " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Polyampholytes are polymers containing both acidic and basic groups on the same molecule, one example of a branched polyampholyte is depicted in the figure below.\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will create the polyampholyte chain in the figure, starting by defining two different residues, 'Res_1' and 'Res_2'. The polyampholyte chain is then defined by following residue_list:\n", + "\n", + "residue_list = 2*[\"Res_1\"] + [\"Res_2\"] + 2*[\"Res_1\"] + 2*[\"Res_2\"]\n", + "\n", + "### Tasks to do:\n", + "\n", + "1. Define particle templates for each different bead in the residues using \"pmb.define_particle\". There are 3 different particles, an inert particle, an acidic particle with pKa = 4, and a basic particle with pKa = 9.\n", + "2. Define residue templates using \"pmb.define_residue\". \"Res_1\" consists of an inert particle as central bead and acidic and basic particles as side chain. \"Res_2\" consists of an inert particle as central bead and \"Res_1\" as side chain.\n", + "3. Define bond templates for each pair of particle templates. \n", + "4. Define a molecule template for the branched polyampholyte chain using \"pmb.define_molecule\" with the above \"residue_list.\" \n", + "5. Create the branched polyampholyte into the ESPResSo system.\n", + "6. Visualize your creation.\n", + "7. Delete the molecule and check that the pyMBE database is empty." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1. Define particle templates for each different bead of Res_1 and Res_2." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2. Define the residue templates for Res_1 and Res_2." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "3. Define bond templates for each pair of particle templates. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 3. Define a molecule template for the diblock polyampholyte. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 4. Create the diblock polyampholyte chain into the ESPResSo system." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 5. Visualize your creation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### 6. Delete the molecule and check that your df is empty." + "#### 6. Delete the molecule and check that the pyMBE database is empty." ] }, { @@ -2542,99 +3045,516 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Refer to the sample script \"branched_polyampholyte.py\" in the samples folder for a complete solution of this exercise." + "Refer to the sample script \"branched_polyampholyte.py\" in the samples folder for a complete solution of this exercise." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## How to create peptides " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "pyMBE includes built-on functions to facilitate the setting up of coarse-grained models for peptides from their aminoacid sequence. Currently, there are two different coarse-grained models implemented: \n", + "\n", + "* `1beadAA`, where the aminoacid is represented by one single bead.\n", + "* `2beadAA`, where the aminoacid is represented by two beads (backbone and side-chain). \n", + "\n", + "We provide reference parameters in the folder (`pyMBE/parameters`) which can be loaded into pyMBE. The peptide sequence should be provided as a `str` composed either by the list of the one letter code or the list of the three letter code of the corresponding aminoacids. For example, the two possible ways to provide the peptide Cysteine$_3$ - Glutamic acid$_2$ - Histidine$_4$ - Valine are:\n", + "\n", + "* one letter code: 'CCCEEHHHHV'\n", + "* three letter code: 'CYS-CYS-CYS-GLU-GLU-HIS-HIS-HIS-HIS-VAL'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's set up the peptide Lysine$_5$ - Glutamic acid$_5$ using a two beads coarse-grained model." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "N_peptide = 1\n", + "sequence = \"KKKKKEEEEE\"\n", + "model = '2beadAA'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can use the peptide parametrization reported by Lunkad et al. [2], which is provided in the reference folder. This parametrization includes information about the particles (i.e. their Lennard-Jones parameters) and their bonding potentials (harmonic bonds)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pmb_typenamesigmaepsiloncutoffoffsetinitial_state
0particleCA0.35 nanometer25.69257912108585 millielectron_volt0.3984740271498274 nanometer0.0 nanometerCA
1particleD0.35 nanometer25.69257912108585 millielectron_volt0.3984740271498274 nanometer0.0 nanometerDH
2particleE0.35 nanometer25.69257912108585 millielectron_volt0.3984740271498274 nanometer0.0 nanometerEH
3particleH0.35 nanometer25.69257912108585 millielectron_volt0.3984740271498274 nanometer0.0 nanometerHH
4particleY0.35 nanometer25.69257912108585 millielectron_volt0.3984740271498274 nanometer0.0 nanometerYH
5particleK0.35 nanometer25.69257912108585 millielectron_volt0.3984740271498274 nanometer0.0 nanometerKH
\n", + "
" + ], + "text/plain": [ + " pmb_type name sigma epsilon \\\n", + "0 particle CA 0.35 nanometer 25.69257912108585 millielectron_volt \n", + "1 particle D 0.35 nanometer 25.69257912108585 millielectron_volt \n", + "2 particle E 0.35 nanometer 25.69257912108585 millielectron_volt \n", + "3 particle H 0.35 nanometer 25.69257912108585 millielectron_volt \n", + "4 particle Y 0.35 nanometer 25.69257912108585 millielectron_volt \n", + "5 particle K 0.35 nanometer 25.69257912108585 millielectron_volt \n", + "\n", + " cutoff offset initial_state \n", + "0 0.3984740271498274 nanometer 0.0 nanometer CA \n", + "1 0.3984740271498274 nanometer 0.0 nanometer DH \n", + "2 0.3984740271498274 nanometer 0.0 nanometer EH \n", + "3 0.3984740271498274 nanometer 0.0 nanometer HH \n", + "4 0.3984740271498274 nanometer 0.0 nanometer YH \n", + "5 0.3984740271498274 nanometer 0.0 nanometer KH " + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/bin/eog: symbol lookup error: /snap/core20/current/lib/x86_64-linux-gnu/libpthread.so.0: undefined symbol: __libc_pthread_init, version GLIBC_PRIVATE\n" + ] + } + ], + "source": [ + "path_to_interactions=pmb.root / \"parameters\" / \"peptides\" / \"Lunkad2021\"\n", + "path_to_pka=pmb.root / \"parameters\" / \"pka_sets\" / \"Hass2015.json\"\n", + "pmb.load_database(folder=path_to_interactions) \n", + "pmb.get_templates_df(pmb_type=\"particle\")\n", + "pmb.get_templates_df(pmb_type=\"bond\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## How to create peptides " + "Additionally, we can load one of the reference sets of pKa values for amino acids that we provide in pyMBE" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 42, "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
reactionstoichiometrypKreaction_typemetadatasimulation_method
0DH <-> D{'DH': -1, 'D': 1}4.0monoprotic_acid{'summary': 'pKa-values of Hass et al.', 'sour...None
1EH <-> E{'EH': -1, 'E': 1}4.4monoprotic_acid{'summary': 'pKa-values of Hass et al.', 'sour...None
2YH <-> Y{'YH': -1, 'Y': 1}9.6monoprotic_acid{'summary': 'pKa-values of Hass et al.', 'sour...None
3CH <-> C{'CH': -1, 'C': 1}8.3monoprotic_acid{'summary': 'pKa-values of Hass et al.', 'sour...None
4HH <-> H{'HH': -1, 'H': 1}6.8monoprotic_base{'summary': 'pKa-values of Hass et al.', 'sour...None
5KH <-> K{'KH': -1, 'K': 1}10.4monoprotic_base{'summary': 'pKa-values of Hass et al.', 'sour...None
6RH <-> R{'RH': -1, 'R': 1}13.5monoprotic_base{'summary': 'pKa-values of Hass et al.', 'sour...None
7nH <-> n{'nH': -1, 'n': 1}8.0monoprotic_base{'summary': 'pKa-values of Hass et al.', 'sour...None
8cH <-> c{'cH': -1, 'c': 1}3.6monoprotic_acid{'summary': 'pKa-values of Hass et al.', 'sour...None
\n", + "
" + ], + "text/plain": [ + " reaction stoichiometry pK reaction_type \\\n", + "0 DH <-> D {'DH': -1, 'D': 1} 4.0 monoprotic_acid \n", + "1 EH <-> E {'EH': -1, 'E': 1} 4.4 monoprotic_acid \n", + "2 YH <-> Y {'YH': -1, 'Y': 1} 9.6 monoprotic_acid \n", + "3 CH <-> C {'CH': -1, 'C': 1} 8.3 monoprotic_acid \n", + "4 HH <-> H {'HH': -1, 'H': 1} 6.8 monoprotic_base \n", + "5 KH <-> K {'KH': -1, 'K': 1} 10.4 monoprotic_base \n", + "6 RH <-> R {'RH': -1, 'R': 1} 13.5 monoprotic_base \n", + "7 nH <-> n {'nH': -1, 'n': 1} 8.0 monoprotic_base \n", + "8 cH <-> c {'cH': -1, 'c': 1} 3.6 monoprotic_acid \n", + "\n", + " metadata simulation_method \n", + "0 {'summary': 'pKa-values of Hass et al.', 'sour... None \n", + "1 {'summary': 'pKa-values of Hass et al.', 'sour... None \n", + "2 {'summary': 'pKa-values of Hass et al.', 'sour... None \n", + "3 {'summary': 'pKa-values of Hass et al.', 'sour... None \n", + "4 {'summary': 'pKa-values of Hass et al.', 'sour... None \n", + "5 {'summary': 'pKa-values of Hass et al.', 'sour... None \n", + "6 {'summary': 'pKa-values of Hass et al.', 'sour... None \n", + "7 {'summary': 'pKa-values of Hass et al.', 'sour... None \n", + "8 {'summary': 'pKa-values of Hass et al.', 'sour... None " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "pyMBE includes built-on functions to facilitate the setting up of coarse-grained models for peptides from their aminoacid sequence. Currently, there are two different coarse-grained models implemented: \n", - "\n", - "* `1beadAA`, where the aminoacid is represented by one single bead.\n", - "* `2beadAA`, where the aminoacid is represented by two beads (backbone and side-chain). \n", - "\n", - "We provide reference parameters in the folder (`parameters`) which can be loaded into pyMBE. The peptide sequence should be provided as a str composed either by the list of the one letter code or the list of the three letter code of the corresponding aminoacids. For example, the two possible ways to provide the peptide Cysteine$_3$ - Glutamic acid$_2$ - Histidine$_4$ - Valine are:\n", - "\n", - "* one letter code: 'CCCEEHHHHV'\n", - "* three letter code: 'CYS-CYS-CYS-GLU-GLU-HIS-HIS-HIS-HIS-VAL'" + "pmb.load_pka_set(path_to_pka)\n", + "# Check the loaded pKa set\n", + "pmb.get_reactions_df()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let's set up the peptide Lysine$_5$ - Glutamic acid$_5$ using a two beads coarse-grained model." + "Since monoprotic acid/base particles can be in two possible states, protonated and deprotonated, we need to define templates for those particle states" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'D': {'pka_value': 4.0, 'acidity': 'acidic'}, 'E': {'pka_value': 4.4, 'acidity': 'acidic'}, 'Y': {'pka_value': 9.6, 'acidity': 'acidic'}, 'C': {'pka_value': 8.3, 'acidity': 'acidic'}, 'H': {'pka_value': 6.8, 'acidity': 'basic'}, 'K': {'pka_value': 10.4, 'acidity': 'basic'}, 'R': {'pka_value': 13.5, 'acidity': 'basic'}, 'n': {'pka_value': 8.0, 'acidity': 'basic'}, 'c': {'pka_value': 3.6, 'acidity': 'acidic'}}\n" + ] + }, + { + "ename": "ValueError", + "evalue": "Acidity {'pka_value': 4.0, 'acidity': 'acidic'} provided for particle name D is not supported. Valid keys are: ['acidic', 'basic']", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mValueError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[45]\u001b[39m\u001b[32m, line 7\u001b[39m\n\u001b[32m 5\u001b[39m \u001b[38;5;66;03m# define templates for the different particle states of monoprotic acid an basic groups:\u001b[39;00m\n\u001b[32m 6\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m acidbase_particle \u001b[38;5;129;01min\u001b[39;00m pka_set.keys():\n\u001b[32m----> \u001b[39m\u001b[32m7\u001b[39m \u001b[43mpmb\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdefine_monoprototic_particle_states\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparticle_name\u001b[49m\u001b[43m=\u001b[49m\u001b[43macidbase_particle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 8\u001b[39m \u001b[43m \u001b[49m\u001b[43macidity\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpka_set\u001b[49m\u001b[43m[\u001b[49m\u001b[43macidbase_particle\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 9\u001b[39m pmb.get_templates_df(pmb_type=\u001b[33m\"\u001b[39m\u001b[33mparticle_state\u001b[39m\u001b[33m\"\u001b[39m)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/pyMBE_pmb/pyMBE/pyMBE.py:1561\u001b[39m, in \u001b[36mpymbe_library.define_monoprototic_particle_states\u001b[39m\u001b[34m(self, particle_name, acidity)\u001b[39m\n\u001b[32m 1559\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m pd.isna(acidity):\n\u001b[32m 1560\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m acidity \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m acidity_valid_keys:\n\u001b[32m-> \u001b[39m\u001b[32m1561\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mAcidity \u001b[39m\u001b[38;5;132;01m{\u001b[39;00macidity\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m provided for particle name \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparticle_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m is not supported. Valid keys are: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00macidity_valid_keys\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 1562\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m acidity == \u001b[33m\"\u001b[39m\u001b[33macidic\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m 1563\u001b[39m states = [{\u001b[33m\"\u001b[39m\u001b[33mname\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparticle_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33mH\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mz\u001b[39m\u001b[33m\"\u001b[39m: \u001b[32m0\u001b[39m}, \n\u001b[32m 1564\u001b[39m {\u001b[33m\"\u001b[39m\u001b[33mname\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparticle_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mz\u001b[39m\u001b[33m\"\u001b[39m: -\u001b[32m1\u001b[39m}]\n", + "\u001b[31mValueError\u001b[39m: Acidity {'pka_value': 4.0, 'acidity': 'acidic'} provided for particle name D is not supported. Valid keys are: ['acidic', 'basic']" + ] + } + ], "source": [ - "N_peptide = 1\n", - "sequence = \"KKKKKEEEEE\"\n", - "model = '2beadAA'" + "# Get the pKa set stored in pyMBE\n", + "pka_set = pmb.get_pka_set()\n", + "# Check the pka_set\n", + "print(pka_set)\n", + "# define templates for the different particle states of monoprotic acid an basic groups:\n", + "for acidbase_particle in pka_set.keys():\n", + " pmb.define_monoprototic_particle_states(particle_name=acidbase_particle,\n", + " acidity=pka_set[acidbase_particle][\"acidity\"])\n", + "pmb.get_templates_df(pmb_type=\"particle_state\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "We can use the peptide parametrization reported by Lunkad et al., which is provided in the reference folder. After loading the parameters we should add the bonds to the ESPResSo system. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "path_to_interactions=pmb.root / \"parameters\" / \"peptides\" / \"Lunkad2021.json\"\n", - "path_to_pka=pmb.root / \"parameters\" / \"pka_sets\" / \"Hass2015.json\"\n", - "pmb.load_interaction_parameters(filename=path_to_interactions) \n", - "pmb.load_pka_set (path_to_pka)\n", - "pmb.add_bonds_to_espresso(espresso_system = espresso_system)" + "The above functions define templates for particles and particle states Before creating a peptide molecule, we also need to define templates for the aminoacid residues and the peptide molecule" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pmb_typenamemodelresidue_listsequence
0peptideKKKKKEEEEE2beadAA[AA-K, AA-K, AA-K, AA-K, AA-K, AA-E, AA-E, AA-...KKKKKEEEEE
\n", + "
" + ], + "text/plain": [ + " pmb_type name model \\\n", + "0 peptide KKKKKEEEEE 2beadAA \n", + "\n", + " residue_list sequence \n", + "0 [AA-K, AA-K, AA-K, AA-K, AA-K, AA-E, AA-E, AA-... KKKKKEEEEE " + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "print(pmb.df)" + "from pyMBE.lib.handy_functions import define_peptide_AA_residues\n", + "\n", + "# This is a convinience function that defines residue templates\n", + "# for aminoacids based on some pre-defined models\n", + "define_peptide_AA_residues(sequence=sequence,\n", + " model=model,\n", + " pmb=pmb)\n", + "pmb.define_peptide(name = sequence, \n", + " sequence = sequence, \n", + " model = model)\n", + "pmb.get_templates_df(pmb_type=\"residue\")\n", + "pmb.get_templates_df(pmb_type=\"peptide\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Now, we can define our peptide and create it into the ESPResSo system. " + "Now, we can create instances of our peptide template into the ESPResSo system. " ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ValueError", + "evalue": "Template 'KH' not found in type 'particle_state'.", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mValueError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[44]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m mol_ids = \u001b[43mpmb\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcreate_molecule\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43msequence\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2\u001b[39m \u001b[43m \u001b[49m\u001b[43mnumber_of_molecules\u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43mN_peptide\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 3\u001b[39m \u001b[43m \u001b[49m\u001b[43mespresso_system\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43mespresso_system\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4\u001b[39m \u001b[43m \u001b[49m\u001b[43mlist_of_first_residue_positions\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43m[\u001b[49m\u001b[43mBox_L\u001b[49m\u001b[43m.\u001b[49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mreduced_length\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmagnitude\u001b[49m\u001b[43m/\u001b[49m\u001b[32;43m2\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m*\u001b[49m\u001b[32;43m3\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 5\u001b[39m pmb.get_instances_df(pmb_type=\u001b[33m\"\u001b[39m\u001b[33mpeptide\u001b[39m\u001b[33m\"\u001b[39m)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/pyMBE_pmb/pyMBE/pyMBE.py:1030\u001b[39m, in \u001b[36mpymbe_library.create_molecule\u001b[39m\u001b[34m(self, name, number_of_molecules, espresso_system, list_of_first_residue_positions, backbone_vector, use_default_bond, reverse_residue_order)\u001b[39m\n\u001b[32m 1027\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m item \u001b[38;5;129;01min\u001b[39;00m list_of_first_residue_positions:\n\u001b[32m 1028\u001b[39m central_bead_pos = [np.array(list_of_first_residue_positions[pos_index])]\n\u001b[32m-> \u001b[39m\u001b[32m1030\u001b[39m residue_id = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mcreate_residue\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[43mresidue\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1031\u001b[39m \u001b[43m \u001b[49m\u001b[43mespresso_system\u001b[49m\u001b[43m=\u001b[49m\u001b[43mespresso_system\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[32m 1032\u001b[39m \u001b[43m \u001b[49m\u001b[43mcentral_bead_position\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcentral_bead_pos\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[32m 1033\u001b[39m \u001b[43m \u001b[49m\u001b[43muse_default_bond\u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43muse_default_bond\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[32m 1034\u001b[39m \u001b[43m \u001b[49m\u001b[43mbackbone_vector\u001b[49m\u001b[43m=\u001b[49m\u001b[43mbackbone_vector\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1036\u001b[39m \u001b[38;5;66;03m# Add molecule_id to the residue instance and all particles associated\u001b[39;00m\n\u001b[32m 1037\u001b[39m \u001b[38;5;28mself\u001b[39m.db._propagate_id(root_type=\u001b[33m\"\u001b[39m\u001b[33mresidue\u001b[39m\u001b[33m\"\u001b[39m, \n\u001b[32m 1038\u001b[39m root_id=residue_id,\n\u001b[32m 1039\u001b[39m attribute=\u001b[33m\"\u001b[39m\u001b[33mmolecule_id\u001b[39m\u001b[33m\"\u001b[39m, \n\u001b[32m 1040\u001b[39m value=molecule_id)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/pyMBE_pmb/pyMBE/pyMBE.py:1319\u001b[39m, in \u001b[36mpymbe_library.create_residue\u001b[39m\u001b[34m(self, name, espresso_system, central_bead_position, use_default_bond, backbone_vector)\u001b[39m\n\u001b[32m 1315\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1316\u001b[39m bead_position=central_bead_position+\u001b[38;5;28mself\u001b[39m.generate_trial_perpendicular_vector(vector=np.array(backbone_vector),\n\u001b[32m 1317\u001b[39m magnitude=l0)\n\u001b[32m-> \u001b[39m\u001b[32m1319\u001b[39m side_bead_id = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mcreate_particle\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[43mside_chain_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[32m 1320\u001b[39m \u001b[43m \u001b[49m\u001b[43mespresso_system\u001b[49m\u001b[43m=\u001b[49m\u001b[43mespresso_system\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1321\u001b[39m \u001b[43m \u001b[49m\u001b[43mposition\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[43mbead_position\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[32m 1322\u001b[39m \u001b[43m \u001b[49m\u001b[43mnumber_of_particles\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[32m0\u001b[39m]\n\u001b[32m 1323\u001b[39m side_chain_beads_ids.append(side_bead_id)\n\u001b[32m 1324\u001b[39m \u001b[38;5;28mself\u001b[39m.db._update_instance(pmb_type=\u001b[33m\"\u001b[39m\u001b[33mparticle\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 1325\u001b[39m instance_id=side_bead_id,\n\u001b[32m 1326\u001b[39m attribute=\u001b[33m\"\u001b[39m\u001b[33mresidue_id\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 1327\u001b[39m value=residue_id)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/pyMBE_pmb/pyMBE/pyMBE.py:1133\u001b[39m, in \u001b[36mpymbe_library.create_particle\u001b[39m\u001b[34m(self, name, espresso_system, number_of_particles, position, fix)\u001b[39m\n\u001b[32m 1129\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m []\n\u001b[32m 1131\u001b[39m part_tpl = \u001b[38;5;28mself\u001b[39m.db.get_template(pmb_type=\u001b[33m\"\u001b[39m\u001b[33mparticle\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 1132\u001b[39m name=name)\n\u001b[32m-> \u001b[39m\u001b[32m1133\u001b[39m part_state = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mdb\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_template\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpmb_type\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mparticle_state\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 1134\u001b[39m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpart_tpl\u001b[49m\u001b[43m.\u001b[49m\u001b[43minitial_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1135\u001b[39m z = part_state.z\n\u001b[32m 1136\u001b[39m es_type = part_state.es_type\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/pyMBE_pmb/pyMBE/storage/manager.py:935\u001b[39m, in \u001b[36mManager.get_template\u001b[39m\u001b[34m(self, pmb_type, name)\u001b[39m\n\u001b[32m 932\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mThere are no \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpmb_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m templates defined in the database\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 934\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m._templates[pmb_type]:\n\u001b[32m--> \u001b[39m\u001b[32m935\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mTemplate \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m not found in type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpmb_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m.\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 936\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 937\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._templates[pmb_type][name]\n", + "\u001b[31mValueError\u001b[39m: Template 'KH' not found in type 'particle_state'." + ] + } + ], "source": [ - "pmb.define_peptide(name = sequence, \n", - " sequence = sequence, \n", - " model = model)\n", "\n", - "pmb.create_molecule(name = sequence,\n", - " number_of_molecules= N_peptide,\n", - " espresso_system = espresso_system,\n", - " list_of_first_residue_positions = [[Box_L.to('reduced_length').magnitude/2]*3])" + "mol_ids = pmb.create_molecule(name = sequence,\n", + " number_of_molecules= N_peptide,\n", + " espresso_system = espresso_system,\n", + " list_of_first_residue_positions = [[Box_L.to('reduced_length').magnitude/2]*3])\n", + "pmb.get_instances_df(pmb_type=\"peptide\")" ] }, { @@ -2661,24 +3581,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Delete the particles and check that our df is empty." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pmb.delete_molecule_in_system(molecule_id=0, \n", - " espresso_system = espresso_system)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, the setup using the three letter code. pyMBE automatically detects and transforms into the one-letter code using its own protein sequence parser." + "Delete the particles and check that the pyMBE database is empty" ] }, { @@ -2687,14 +3590,10 @@ "metadata": {}, "outputs": [], "source": [ - "sequence = 'LYS-LYS-LYS-LYS-LYS-GLU-GLU-GLU-GLU-GLU'\n", - "\n", - "pmb.define_peptide(name = sequence, \n", - " sequence = sequence, \n", - " model = model)\n", - "\n", - "print('one letter code', pmb.protein_sequence_parser(sequence=sequence))\n", - "print('defined peptide sequence ', sequence)" + "for mol_id in mol_ids:\n", + " pmb.delete_instances_in_system(instance_id=0,\n", + " pmb_type=\"peptide\", \n", + " espresso_system = espresso_system)" ] }, { @@ -2708,7 +3607,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Lunkad, R. et al. Molecular Systems Design & Engineering (2021), 6(2), 122-131." + "[1] Beyer, D., Torres, P. B., Pineda, S. P., Narambuena, C. F., Grad, J. N., Košovan, P., & Blanco, P. M. (2024). pyMBE: The Python-based molecule builder for ESPResSo. The Journal of Chemical Physics, 161(2).\n", + "[2] Lunkad, R., Murmiliuk, A., Hebbeker, P., Boublík, M., Tošner, Z., Štěpánek, M., & Košovan, P. (2021). Quantitative prediction of charge regulation in oligopeptides. Molecular Systems Design & Engineering, 6(2), 122-131." ] } ], From 3147a11b243b15fd47ed056b8d00507e5a8225f2 Mon Sep 17 00:00:00 2001 From: pmblanco Date: Tue, 10 Feb 2026 12:36:25 +0100 Subject: [PATCH 55/55] update changelog --- CHANGELOG.md | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e7ec95d..ed3a12e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,10 +7,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -### Changed -- Methods that interact directly with the pyMBE dataframe are now private and stored in a dedicated module in `storage/df_management`. These methods also have been refactored to be stateless methods, i.e. making it impossible for them to change behavior during the pyMBE object lifetime or for the user to change the pyMBE dataframe unless explicitely calling them. This includes the methods: `add_bond_in_df`, `add_value_to_df`, `assign_molecule_id`, `check_if_df_cell_has_a_value`, `check_if_name_is_defined_in_df`, `check_if_multiple_pmb_types_for_name`, `clean_df_row`, `clean_ids_in_df_row`, `copy_df_entry`, `create_variable_with_units`, `convert_columns_to_original_format`, `convert_str_to_bond_object`, `delete_entries_in_df`, `find_bond_key`, `setup_df`. (#145) -- `define_particle_entry_in_df` is now a private method in pyMBE, as it is a convenience method for internal use. (#145) -- The custom `NumpyEncoder` is now a private class in the private module `storage/df_management` because it is only internally used in pyMBE for serialization/deserialization. (#145) +## Added +- Introduced a canonical pyMBE database backend replacing the previous monolithic Pandas DataFrame storage approach. This lays the foundation for more robust, extensible, and normalized data handling across pyMBE. (#147) +- Added support to define reaction templates in the pyMBE database. (#147) +- Utility functions to cast information about templates and instances in the pyMBE database into pandas dataframe `pmb.get_templates_df`, `pmb.get_instances_df` and `pmb.get_reactions_df`. (#147) +- Utility functions to load and save the new database via the pyMBE API, `pmb.save_database` and `pmb.load_database`. (#147) +- Added functions to define particle states: `pmb.define_particle_states` and `pmb.define_monoprototic_particle_states`. (#147) +- Added utility functions in `lib/handy_functions` to define residue and particle templates for aminoacids en peptides and residues: `define_protein_AA_particles`, `define_protein_AA_residues` and `define_peptide_AA_residues`. (#147) + +## Changed +- Refactored core modules to use the new database schema based on templates and instances for particles, residues, molecules, hydrogels, proteins and peptides. (#147) +- Particle states now are independent templates and are now disentangled from particle templates. (#147) +- Pka values are now stored as part of chemical reactions and no longer an attribute of particle templates. (#147) +- Amino acid residue templates are no longer defined internally in `define_peptide` and `define_protein`. Those definitions are now exposed to the user. (#147) +- Molecule templates now need to be defined to be used as templates for hydrogel chains in hydrogels. (#147) + +## Fixed +- Utility methods `get_particle_id_map`, `calculate_HH`, `calculate_net_charge`, `center_object_in_simulation_box` now support all template types in pyMBE, including hydrogels. Some of these methods have been renamed to expose directly in the API this change in behavior. + +### Removed +- Methods that interact directly with the pyMBE dataframe. These methods have been replaced by private methods that instead interact with the new canonical pyMBE database in (`pyMBE/storage/manager`). This includes the methods: `add_bond_in_df`, `add_value_to_df`, `assign_molecule_id`, `check_if_df_cell_has_a_value`, `check_if_name_is_defined_in_df`, `check_if_multiple_pmb_types_for_name`, `clean_df_row`, `clean_ids_in_df_row`, `copy_df_entry`, `create_variable_with_units`, `convert_columns_to_original_format`, `convert_str_to_bond_object`, `delete_entries_in_df`, `find_bond_key`, `setup_df`, `define_particle_entry_in_df`, custom `NumpyEncoder`. (#145, #147) +- Method `add_bonds_to_espresso` has been removed from the API. pyMBE now adds bonds internally to ESPResSo when molecule instances are created into ESPResSo. (#147) +- Tutorial `lattice_builder.ipynb` has been removed because its content is redundant with sample script `build_hydrogel.py`. (#147) + ## [1.0.0] - 2025-10-08