Skip to content
26 changes: 22 additions & 4 deletions src/openfe_analysis/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,18 @@

def _determine_iteration_dt(dataset) -> float:
"""
Find out the timestep between each frame in the trajectory.
Determine the time increment between successive iterations
in a MultiStateReporter trajectory.
Parameters
----------
dataset : nc.Dataset
Dataset holding the MultiStateReporter generated NetCDF file.
NetCDF dataset produced by ``openmmtools.multistate.MultiStateReporter``.
Returns
-------
float
The timestep in units of picoseconds.
The time between successive iterations, in picoseconds.
Raises
------
Expand All @@ -35,7 +36,8 @@ def _determine_iteration_dt(dataset) -> float:
-----
This assumes an MCMC move which serializes in a manner similar
to `openmmtools.mcmc.LangevinDynamicsMove`, i.e. it must have
both a `timestep` and `n_steps` defined.
both a `timestep` and `n_steps` defined, such that
dt_iteration = n_steps * timestep
"""
# Deserialize the MCMC move information for the 0th entry.
mcmc_move_data = yaml.load(
Expand Down Expand Up @@ -149,16 +151,31 @@ def index_method(self) -> str:

@staticmethod
def parse_n_atoms(filename, **kwargs) -> int:
"""
Determine the number of atoms stored in a MultiStateReporter NetCDF file.
Parameters
----------
filename : path-like
Path to the NetCDF file.
Returns
-------
int
Number of atoms in the system.
"""
with nc.Dataset(filename) as ds:
n_atoms = ds.dimensions["atom"].size
return n_atoms

def _read_next_timestep(self, ts=None) -> Timestep:
# Advance the trajectory by one frame.
if (self._frame_index + 1) >= len(self):
raise EOFError
return self._read_frame(self._frame_index + 1)

def _read_frame(self, frame: int) -> Timestep:
# Read a single trajectory frame.
self._frame_index = frame

frame = self._frames[self._frame_index]
Expand Down Expand Up @@ -197,6 +214,7 @@ def _read_frame(self, frame: int) -> Timestep:

@property
def dt(self) -> float:
# Time difference between successive trajectory frames.
return self._dt

def _reopen(self):
Expand Down
90 changes: 69 additions & 21 deletions src/openfe_analysis/rmsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,48 @@


def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Universe:
"""Makes a Universe and applies some transformations
"""
Construct an MDAnalysis Universe from a MultiState NetCDF trajectory
and apply standard analysis transformations.

The Universe is created using the custom ``FEReader`` to extract a
single state from a multistate simulation.

Parameters
----------
top : pathlib.Path or Topology
Path to a topology file (e.g. PDB) or an already-loaded MDAnalysis
topology object.
trj : nc.Dataset
Open NetCDF dataset produced by
``openmmtools.multistate.MultiStateReporter``.
state : int
Thermodynamic state index to extract from the multistate trajectory.

Returns
-------
MDAnalysis.Universe
A Universe with trajectory transformations applied.

Notes
-----
Identifies two AtomGroups:
- protein, defined as having standard amino acid names, then filtered
down to CA
- ligand, defined as resname UNK

Then applies some transformations.
Depending on whether a protein is present, a sequence of trajectory
transformations is applied:

If a protein is present:
- prevents the protein from jumping between periodic images
- moves the ligand to the image closest to the protein
- aligns the entire system to minimise the protein RMSD
(class:`NoJump`)
- moves the ligand to the image closest to the protein (:class:`Minimiser`)
- aligns the entire system to minimise the protein RMSD (:class:`Aligner`)

If only a ligand:
If only a ligand is present:
- prevents the ligand from jumping between periodic images
- Aligns the ligand to minimize its RMSD
Comment on lines 44 to +60

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Details on the method should go under Notes I think.

"""
u = mda.Universe(
top,
Expand Down Expand Up @@ -76,23 +102,39 @@ def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Univers
def gather_rms_data(
pdb_topology: pathlib.Path, dataset: pathlib.Path, skip: Optional[int] = None
) -> dict[str, list[float]]:
"""Generate structural analysis of RBFE simulation
"""
Compute structural RMSD-based metrics for a multistate BFE simulation.

Parameters
----------
pdb_topology : pathlib.Path
path to pdb topology
Path to the PDB file defining system topology.
dataset : pathlib.Path
path to nc trajectory
Path to the NetCDF trajectory file produced by a multistate simulation.
skip : int, optional
step at which to progress through the trajectory. by default, selects a
step that produces roughly 500 frames of analysis per replicate

Produces, for each lambda state:
- 1D protein RMSD timeseries 'protein_RMSD'
- ligand RMSD timeseries
- ligand COM motion 'ligand_wander'
- 2D protein RMSD plot
Frame stride for analysis. If ``None``, a stride is chosen such that
approximately 500 frames are analyzed per state.

Returns
-------
dict[str, list]
Dictionary containing per-state analysis results with keys:
``protein_RMSD``, ``ligand_RMSD``, ``ligand_wander``,
``protein_2D_RMSD``, and ``time(ps)``.

Notes
-----
For each thermodynamic state (lambda), this function:
- Loads the trajectory using ``FEReader``
- Applies standard PBC-handling and alignment transformations
- Computes protein and ligand structural metrics over time

The following analyses are produced per state:
- 1D protein CA RMSD time series
- 1D ligand RMSD time series
- Ligand center-of-mass displacement from its initial position
(``ligand_wander``)
- Flattened 2D protein RMSD matrix (pairwise RMSD between frames)
"""
output = {
"protein_RMSD": [],
Expand Down Expand Up @@ -187,19 +229,25 @@ def gather_rms_data(


def twoD_RMSD(positions, w: Optional[npt.NDArray]) -> list[float]:
"""2 dimensions RMSD
"""
Compute a flattened 2D RMSD matrix from a trajectory.

For all unique frame pairs ``(i, j)`` with ``i < j``, this function
computes the RMSD between atomic coordinates after optimal alignment.

Parameters
----------
positions : np.ndarray
the protein positions for the entire trajectory
Atomic coordinates for all frames in the trajectory.
w : np.ndarray, optional
weights array
Per-atom weights to use in the RMSD calculation. If ``None``,
all atoms are weighted equally.

Returns
-------
rmsd_matrix : list
a flattened version of the 2d
list of float
Flattened list of RMSD values corresponding to all frame pairs
``(i, j)`` with ``i < j``.
"""
nframes, _, _ = positions.shape

Expand Down
47 changes: 34 additions & 13 deletions src/openfe_analysis/transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,26 @@


class NoJump(TransformationBase):
"""Stops an AtomGroup from moving more than half a box length between frames

This transformation prevents an AtomGroup "teleporting" across the box
border between two subsequent frames. This then simplifies the calculation
of motion over time.
"""
Prevent an AtomGroup from jumping between periodic images.
This transformation removes large apparent
COM displacements caused by periodic boundary conditions.

Parameters
----------
ag : MDAnalysis.AtomGroup
AtomGroup whose center-of-mass motion should be made continuous.

Notes
-----
- This transformation assumes an orthorhombic unit cell.
- Only translations are applied; no rotations or scaling.
- The correction is based on center-of-mass motion and is therefore
most appropriate for compact groups (e.g. proteins, ligands).
- Must be applied before any alignment transformations to avoid
mixing reference frames.
- Is intended to be applied before analyses that rely on smooth
time evolution (e.g. RMSD, COM motion).
"""

ag: mda.AtomGroup
Expand Down Expand Up @@ -49,12 +64,13 @@ class ClosestImageShift(TransformationBase):
"""
PBC-safe transformation that shifts one or more target AtomGroups
so that their COM is in the closest image relative to a reference AtomGroup.
Works for any box type (triclinic or orthorhombic).

CAVEAT:
This Transformation requires the AtomGroups to be unwrapped!
CAVEAT: This Transformation requires the AtomGroups to be unwrapped!

Inspired from:
Notes
-----
- Works for any box type (triclinic or orthorhombic).
- Inspired from:
https://github.com/wolberlab/OpenMMDL/blob/main/openmmdl/openmmdl_simulation/scripts/post_md_conversions.py
"""

Expand All @@ -75,10 +91,15 @@ def _transform(self, ts):


class Aligner(TransformationBase):
"""On-the-fly transformation to align a trajectory to minimise RMSD

centers all coordinates onto origin
rotates **entire universe** to minimise rmsd relative to **ref_ag**
"""
Align a trajectory to a reference AtomGroup by minimizing RMSD.

Notes
-----
Performs an on-the-fly least-squares alignment
of the entire universe to a reference AtomGroup.
At each frame, the coordinates are translated and rotated to minimize the
RMSD of the atoms relative to their positions in the reference.
"""

ref_pos: npt.NDArray
Expand Down
18 changes: 12 additions & 6 deletions src/openfe_analysis/utils/multistate.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ def _determine_position_indices(dataset: nc.Dataset) -> NDArray:
indices : NDArray[int]
An ordered array of iteration indices which hold positions.

Raises
------
ValueError
If positions are not written at a consistent interval.

Note
----
This assumes that the indices are equally spaced by a given
Expand Down Expand Up @@ -55,11 +60,12 @@ def _determine_position_indices(dataset: nc.Dataset) -> NDArray:


def _state_to_replica(dataset: nc.Dataset, state_num: int, frame_num: int) -> int:
"""Convert a state index to replica index at a given Dataset frame
"""
Map a thermodynamic state index to the corresponding replica index.

Parameters
----------
dataset : netCDF4.Dataset
dataset : nc.Dataset
Dataset containing the MultiState reporter generated NetCDF file
with information about all the frames and replica in the system.
state_num : int
Expand All @@ -86,7 +92,7 @@ def _replica_positions_at_frame(

Parameters
----------
dataset : netCDF4.Dataset
dataset : nc.Dataset
Dataset containing the MultiState information.
replica_index : int
Replica index to extract positions for.
Expand Down Expand Up @@ -125,7 +131,7 @@ def _create_new_dataset(filename: Path, n_atoms: int, title: str) -> nc.Dataset:

Returns
-------
netCDF4.Dataset
nc.Dataset
AMBER Conventions compliant NetCDF dataset to store information
contained in MultiState reporter generated NetCDF file.
"""
Expand Down Expand Up @@ -170,13 +176,13 @@ def _get_unitcell(
dataset: nc.Dataset, replica_index: int, frame_num: int
) -> Optional[Tuple[unit.Quantity]]:
"""
Helper method to extract a unit cell from the stored
Helper method to extract unit cell dimensions from the stored
box vectors in a MultiState reporter generated NetCDF file
at a given state and Dataset frame.

Parameters
----------
dataset : netCDF4.Dataset
dataset : nc.Dataset
Dataset of MultiState reporter generated NetCDF file.
replica_index : int
Replica for which to get the unit cell for.
Expand Down
Loading