Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dependencies = [
"loguru>=0.7.3",
"mdanalysis>=2.9.0",
"pydantic>=2.11.3",
"pydantic-settings>=2.12.0",
"streamlit>=1.50.0",
]

Expand Down
7 changes: 5 additions & 2 deletions src/grodecoder/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from ..main import main as grodecoder_main
from .args import Arguments as CliArgs
from .args import CoordinatesFile, StructureFile
from ..settings import get_settings
from ..logging import setup_logging


Expand All @@ -11,7 +12,6 @@
@click.argument("coordinates_file", type=CoordinatesFile, required=False)
@click.option(
"--bond-threshold",
default=5.0,
type=float,
help="Threshold for interchain bond detection (default: 5 Å)",
)
Expand All @@ -29,12 +29,15 @@ def cli(**kwargs):
args = CliArgs(
structure_file=kwargs["structure_file"],
coordinates_file=kwargs["coordinates_file"],
bond_threshold=kwargs["bond_threshold"],
no_atom_ids=kwargs["no_atom_ids"],
print_to_stdout=kwargs["stdout"],
)

get_settings().debug = kwargs["verbose"]

logfile = args.get_log_filename()
setup_logging(logfile, kwargs["verbose"])
setup_logging(logfile)
grodecoder_main(args)


Expand Down
4 changes: 2 additions & 2 deletions src/grodecoder/cli/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@ class Arguments:
Attrs:
structure_file (Path): Path to the structure file.
coordinates_file (Path): Path to the coordinates file.
bond_threshold (float): Threshold for interchain bond detection.
bond_threshold (float | None): Threshold for interchain bond detection.
no_atom_ids (bool): If True, use compact serialization (no atom indices).
print_to_stdout (bool): Whether to output results to stdout.
"""

structure_file: StructureFile
coordinates_file: CoordinatesFile | None = None
bond_threshold: float = 5.0
bond_threshold: float | None = None
no_atom_ids: bool = True
print_to_stdout: bool = False

Expand Down
31 changes: 24 additions & 7 deletions src/grodecoder/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,43 @@
from .io import read_universe
from .models import Decoded
from .toputils import guess_resolution
from .settings import get_settings


def _now() -> str:
"""Returns the current date and time formatted string."""
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")


def decode(universe: UniverseLike, bond_threshold: float = 5.0) -> Decoded:
def decode(universe: UniverseLike) -> Decoded:
"""Decodes the universe into an inventory of segments."""

settings = get_settings()

resolution = guess_resolution(universe, cutoff_distance=settings.resolution_detection.distance_cutoff)
logger.info(f"Guessed resolution: {resolution}")

# Guesses the chain dection distance cutoff if not provided by the user.
chain_detection_settings = get_settings().chain_detection

if chain_detection_settings.distance_cutoff.is_set():
value = chain_detection_settings.distance_cutoff.get()
logger.debug(f"chain detection: using user-defined value: {value:.2f}")
else:
logger.debug("chain detection: guessing distance cutoff based on resolution")
chain_detection_settings.distance_cutoff.guess(resolution)

distance_cutoff = chain_detection_settings.distance_cutoff.get()

return Decoded(
inventory=identify(universe, bond_threshold=bond_threshold),
resolution=guess_resolution(universe),
inventory=identify(universe, bond_threshold=distance_cutoff),
resolution=resolution,
)


def decode_structure(
structure_path: PathLike, coordinates_path: PathLike | None = None, bond_threshold: float = 5.0
) -> Decoded:
def decode_structure(structure_path: PathLike, coordinates_path: PathLike | None = None) -> Decoded:
"""Reads a structure file and decodes it into an inventory of segments."""
universe = read_universe(structure_path, coordinates_path)
assert universe.atoms is not None # required by type checker for some reason
logger.debug(f"Universe has {len(universe.atoms):,d} atoms")
return decode(universe, bond_threshold=bond_threshold)
return decode(universe)
29 changes: 25 additions & 4 deletions src/grodecoder/identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ def identify_small_molecule(
residue = SmallMolecule(
atoms=selection, description=definition.description, molecular_type=molecular_type
)
logger.debug(
f"identified small molecule {residue.description}: {len(selection.residues)} residues, {len(selection.atoms)} atoms"
)
counts.append(residue)
return counts

Expand Down Expand Up @@ -66,18 +69,25 @@ def _select_protein(universe: UniverseLike) -> AtomGroup:
selection_str = f"resname {' '.join(protein_residue_names)}"

# Exclude methanol residues from the selection.
logger.debug("excluding possible methanol residues (MET) from protein")
methanol = _find_methanol(universe)
if methanol:
selection_str += f" and not index {' '.join(map(str, methanol))}"

return universe.select_atoms(selection_str)
logger.debug("selecting protein")
protein = universe.select_atoms(selection_str)
logger.debug("selecting protein - done")
return protein


def _select_nucleic(universe: UniverseLike) -> AtomGroup:
"""Selects the nucleic acid atoms from the universe."""
nucleic_acid_residue_names = DB.get_nucleotide_names()
selection_str = f"resname {' '.join(nucleic_acid_residue_names)}"
return universe.select_atoms(selection_str)
logger.debug("selecting nucleic")
nucleic = universe.select_atoms(selection_str)
logger.debug("selecting nucleic - done")
return nucleic


def _iter_chains(atoms: AtomGroup, bond_threshold: float = 5.0) -> Iterator[AtomGroup]:
Expand All @@ -87,8 +97,14 @@ def _iter_chains(atoms: AtomGroup, bond_threshold: float = 5.0) -> Iterator[Atom
"""
if len(atoms) == 0:
return
segments = toputils.detect_chains(atoms, cutoff=bond_threshold)
logger.debug(f"detecting segments using cutoff distance {bond_threshold:.2f}")
segments = toputils.detect_chains(atoms, cutoff_distance=bond_threshold)

n_seg_str = f"{len(segments)} segment" + "s" if len(segments) > 1 else ""
logger.debug(f"detecting segments - done: found {n_seg_str}")

for start, end in segments:
logger.debug(f"yielding segment containing residues {start} to {end}")
yield atoms.residues[start : end + 1].atoms


Expand Down Expand Up @@ -156,6 +172,7 @@ def _log_identified_molecules(molecules: list[SmallMolecule], label: str) -> Non

def _identify(universe: UniverseLike, bond_threshold: float = 5.0) -> Inventory:
"""Identifies the molecules in the universe."""
logger.debug("Residu identification: start")

# Ensure the universe is an AtomGroup.
universe = universe.select_atoms("all")
Expand Down Expand Up @@ -211,8 +228,12 @@ def _identify(universe: UniverseLike, bond_threshold: float = 5.0) -> Inventory:
)
unknown_molecules.append(molecule)

return Inventory(
logger.debug("Creating inventory")
inventory = Inventory(
segments=protein + nucleic,
small_molecules=ions + solvents + lipids + others + unknown_molecules,
total_number_of_atoms=total_number_of_atoms,
)

logger.debug("Residu identification: end")
return inventory
12 changes: 11 additions & 1 deletion src/grodecoder/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,22 @@

from loguru import logger

from .settings import get_settings

def setup_logging(logfile: Path, debug: bool = False):

def setup_logging(logfile: Path):
"""Sets up logging configuration."""
debug = get_settings().debug

fmt = "<green>{time:YYYY-MM-DD HH:mm:ss}</green> <level>{level}: {message}</level>"
level = "DEBUG" if debug else "INFO"

logger.remove()

# Screen logger.
logger.add(sys.stderr, level=level, format=fmt, colorize=True)

# File logger
logger.add(logfile, level=level, format=fmt, colorize=False, mode="w")

# Sets up loguru to capture warnings (typically MDAnalysis warnings)
Expand All @@ -24,6 +33,7 @@ def showwarning(message, *args, **kwargs):

def is_logging_debug() -> bool:
"""Returns True if at least one logging handler is set to level DEBUG."""
print("COUCOU", get_logging_level())
return "DEBUG" in get_logging_level()


Expand Down
14 changes: 10 additions & 4 deletions src/grodecoder/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .databases import get_database_version
from .models import GrodecoderRunOutput
from .version import get_version
from .settings import get_settings

if TYPE_CHECKING:
from .cli.args import Arguments as CliArgs
Expand All @@ -27,22 +28,27 @@ def main(args: "CliArgs"):
structure_path = args.structure_file.path
coordinates_path = args.coordinates_file.path if args.coordinates_file else None

# Storing cli arguments into settings.
settings = get_settings()
settings.chain_detection.distance_cutoff = args.bond_threshold
settings.output.atom_ids = not args.no_atom_ids

logger.info(f"Processing structure file: {structure_path}")

# Decoding.
decoded = decode_structure(
structure_path, coordinates_path=coordinates_path, bond_threshold=args.bond_threshold
)
decoded = decode_structure(structure_path, coordinates_path=coordinates_path)

output = GrodecoderRunOutput(
decoded=decoded,
structure_file_checksum=_get_checksum(structure_path),
database_version=get_database_version(),
grodecoder_version=get_version(),
input_settings=settings,
)

# Serialization.
serialization_mode = "compact" if args.no_atom_ids else "full"
logger.debug("Creating json output")
serialization_mode = "full" if settings.output.atom_ids else "compact"

# Updates run time as late as possible.
output_json = output.model_dump(context={"serialization_mode": serialization_mode})
Expand Down
4 changes: 3 additions & 1 deletion src/grodecoder/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from __future__ import annotations
from pydantic import model_validator

from enum import StrEnum
from typing import Protocol
Expand All @@ -14,9 +13,11 @@
computed_field,
field_serializer,
model_serializer,
model_validator,
)

from . import toputils
from .settings import Settings


class MolecularResolution(StrEnum):
Expand Down Expand Up @@ -194,6 +195,7 @@ class GrodecoderRunOutput(BaseModel):
structure_file_checksum: str
database_version: str
grodecoder_version: str
input_settings: Settings


# =========================================================================================================
Expand Down
Loading