Module deeporigin.src

Sub-modules

deeporigin.src.client
deeporigin.src.constants
deeporigin.src.docking
deeporigin.src.pocket_finder
deeporigin.src.progress_view
deeporigin.src.properties
deeporigin.src.structures
deeporigin.src.utilities

Functions

def create_bounding_box(ligand, padding=0.0, output_file=None, around_ligand=False, box_size=20.0)
Expand source code
def create_bounding_box(ligand, padding=0.0, output_file=None, around_ligand=False, box_size=20.0):
    """
    Creates a bounding box around a molecular structure with specified parameters.

    Args:
        ligand (Structure): A molecular structure object containing 3D coordinates
        padding (float, optional): Additional space to add around the ligand's dimensions. Defaults to 0.0.
        output_file (str, optional): Path to save the bounding box as a structure file. Defaults to None.
        around_ligand (bool, optional): If True, creates a box that fits around ligand with padding.
            If False, creates a fixed size box. Defaults to False.
        box_size (float, optional): Size of the fixed bounding box when around_ligand is False. Defaults to 20.0.

    Returns:
        dict: A dictionary containing:
            - min_coords (numpy.ndarray): Minimum x,y,z coordinates
            - max_coords (numpy.ndarray): Maximum x,y,z coordinates
            - dimensions (numpy.ndarray): Box dimensions
            - center (numpy.ndarray): Box center coordinates
            - atom_array (AtomArray): Structure array of box vertices (only if output_file is specified)
    """
    structure_coord = ligand.coordinates
    if around_ligand:
        min_coords, max_coords, dimensions, center = calculate_bounding_box(structure_coord, padding)
    else:
        min_coords, max_coords, dimensions, center = calculate_fixed_bounding_box(structure_coord, box_size)

    result = {
        'min_coords': min_coords,
        'max_coords': max_coords,
        'dimensions': dimensions,
        'center': center
    }

    if output_file:
        atom_array = create_bounding_box_atoms(min_coords, max_coords, dimensions)
        struc.io.save_structure(output_file, atom_array)
        result['atom_array'] = atom_array
        print(f"Bounding box atoms saved to {output_file}")

    return result

Creates a bounding box around a molecular structure with specified parameters.

Args

ligand : Structure
A molecular structure object containing 3D coordinates
padding : float, optional
Additional space to add around the ligand's dimensions. Defaults to 0.0.
output_file : str, optional
Path to save the bounding box as a structure file. Defaults to None.
around_ligand : bool, optional
If True, creates a box that fits around ligand with padding. If False, creates a fixed size box. Defaults to False.
box_size : float, optional
Size of the fixed bounding box when around_ligand is False. Defaults to 20.0.

Returns

dict
A dictionary containing: - min_coords (numpy.ndarray): Minimum x,y,z coordinates - max_coords (numpy.ndarray): Maximum x,y,z coordinates - dimensions (numpy.ndarray): Box dimensions - center (numpy.ndarray): Box center coordinates - atom_array (AtomArray): Structure array of box vertices (only if output_file is specified)
def read_block(block_type, block_content)
Expand source code
def read_block(block_type, block_content):
    """Read a molecular structure block and return its contents based on file format.

    This function acts as a dispatcher to specific readers based on the input file format.

    Args:
        block_type (FileFormat): The format of the molecular structure block (MOL2, PDB, PDBQT, or XYZ)
        block_content (str): The content of the molecular structure block to be read

    Returns:
        tuple: A tuple containing:
            - name (str): The molecule name/identifier
            - atom_types (list or numpy.ndarray): List/array of atomic symbols
            - coordinates (numpy.ndarray): 3xN array of atom coordinates where N is number of atoms

    Raises:
        Exception: If block_type is not one of the supported formats

    See Also:
        read_mol2_block
        read_pdb_pdbqt_block
        read_xyz_block

    Examples:
        >>> content = "@<TRIPOS>MOLECULE\\n..."
        >>> name, atoms, coords = read_block(FileFormat.MOL2, content)
    """
    if block_type == FileFormat.MOL2:
        return read_mol2_block(block_content)
    elif block_type in [FileFormat.PDB, FileFormat.PDBQT]:
        return read_pdb_pdbqt_block(block_type, block_content)
    elif block_type == FileFormat.XYZ:
        return read_xyz_block(block_content)
    else:
        raise Exception(f"Invalid file format {block_type}")

Read a molecular structure block and return its contents based on file format.

This function acts as a dispatcher to specific readers based on the input file format.

Args

block_type : FileFormat
The format of the molecular structure block (MOL2, PDB, PDBQT, or XYZ)
block_content : str
The content of the molecular structure block to be read

Returns

tuple
A tuple containing: - name (str): The molecule name/identifier - atom_types (list or numpy.ndarray): List/array of atomic symbols - coordinates (numpy.ndarray): 3xN array of atom coordinates where N is number of atoms

Raises

Exception
If block_type is not one of the supported formats

See Also: read_mol2_block read_pdb_pdbqt_block
read_xyz_block

Examples

>>> content = "@<TRIPOS>MOLECULE\n..."
>>> name, atoms, coords = read_block(FileFormat.MOL2, content)

Classes

class DockingReport (results: List[DockingResult],
pocket_data)
Expand source code
class DockingReport:
    """
    A class to handle and report docking results.

    This class provides functionality to manage docking results, generate reports,
    save results to files, and visualize protein-ligand complexes.

    Attributes:
        results (List[DockingResult]): A list of docking results.
        pocket_data: Data about the binding pocket used in docking.

    Methods:
        _to_dataframe(include_props=None): Converts docking results to a pandas DataFrame.
        _repr_html_(): Returns HTML representation of the docking report.
        generate_custom_report(include_props=False): Generates a custom HTML report with specified properties.
        save(save_dir=None, safe=True): Saves docking results to SDF file with properties.
        visualize(protein_path=None, protein_format=None, sdf_file_path=None,
                 crystal_ligand_path=None, crystal_ligand_format=None):
            Visualizes the docking results in 3D.

    Examples:
        >>> report = DockingReport(results, pocket_data)
        >>> report.save()  # Saves results to an SDF file
        >>> report.visualize()  # Shows 3D visualization of results
    """
    def __init__(self, results: List[DockingResult], pocket_data):
        self.results = results
        self.pocket_data = pocket_data

    def _to_dataframe(self, include_props=None):
        """
        Converts docking results to a pandas DataFrame.

        Args:
            include_props (list, optional): Additional molecular properties to include in the DataFrame.
            If provided, these properties will be extracted from the ligand properties and
            added as columns.

        Returns:
            pd.DataFrame: A DataFrame containing the following columns by default:
            - Image: 2D molecular structure visualization
            - SMILES: SMILES string representation of the molecule
            - Ranking Score: Docking ranking score (rounded to 3 decimal places)
            - Binding Energy: Binding energy value (rounded to 3 decimal places)
            - Path To Docked Pose: File path to the docked ligand pose
            Additional columns will be added if include_props is specified.
            The DataFrame is sorted by Ranking Score in descending order.
        """

        data = []
        for result in self.results:
            property_dict = {
                "Image": None,
                "SMILES": result.smiles,
                "Ranking Score": None,
                "Binding Energy": None,
                "Path To Docked Pose": None,
            }

            if result.top_ligand and result.successful:
                ligand = result.top_ligand

                mol_props = ligand.properties

                energy_score = float(mol_props.get("Binding Energy", "0.0"))
                ranking_score = float(mol_props.get("Ranking Score", "0.0"))
                property_dict["Image"] = ligand.mol._draw()
                property_dict["SMILES"] = result.smiles
                property_dict["Ranking Score"] = round(ranking_score, 3)
                property_dict["Binding Energy"] = round(energy_score, 3)
                property_dict["Path To Docked Pose"] = ligand.file_path

                if include_props:
                    for prop in mol_props:
                        if "smiles" not in prop:
                            p = mol_props.get(prop, None)
                            property_dict[prop] = p
            data.append(property_dict)

        df = pd.DataFrame(data)
        df = df.sort_values(by="Ranking Score", ascending=False).reset_index(drop=True)
        return df

    def _repr_html_(self):
        df = self._to_dataframe().style.format(precision=3)
        return df._repr_html_()

    def generate_custom_report(self, include_props=False):
        """
        Generate a custom HTML report from the data.

        This method converts the internal data to a styled pandas DataFrame and returns it as HTML.
        The resulting DataFrame is formatted with 3 decimal places precision.

        Args:
            include_props (bool, optional): Whether to include properties in the report. Defaults to False.

        Returns:
            HTML: A styled HTML representation of the data with 3 decimal places precision.

        Example:
            >>> report = obj.generate_custom_report(include_props=True)
            >>> display(report)  # In Jupyter notebook
        """
        df = self._to_dataframe(include_props).style.format(precision=3)
        return HTML(df._repr_html_())

    def __str__(self):
        return f"DockingReport:\n  Number of DockingResults: {len(self.results)}"

    def __repr__(self):
        return self.__str__()

    def save(self, save_dir=None, safe=True):
        """
        Save docking results to files in a specified directory.

        Args:
            save_dir (str or Path, optional): Directory path where the results will be saved.
            If None, creates a directory in END_USER_HOME or current directory.
            safe (bool, optional): If True, moves existing files with same name instead of
            overwriting them. Defaults to True.

        Returns:
            str or None: Path to the created directory containing saved files if successful,
            None if no top ligands exist.

        Files Created:
            - docking_report_top_ligands.sdf: Contains the top scoring ligands with their properties
            - {protein_name}.pdb: Protein structure file
            - bounding_box.pdb: File containing the docking box information

        Notes:
            The saved SDF file includes:
            - Molecule structure
            - Molecule name (if available)
            - SMILES string (if available)
            - All existing molecular properties
            - All additional properties from docking results
        """

        top_ligands = []
        for result in self.results:
            if result.top_ligand:
                top_ligands.append(result.top_ligand)

        if not top_ligands:
            return None

        if not save_dir:
            save_dir_path = (
                Path(os.getenv("END_USER_HOME", "."))
                / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}"
            )
        else:
            save_dir_path = Path(save_dir) / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}"

        save_dir_path.mkdir(parents=True, exist_ok=True)
        sdf_file_path = save_dir_path / "docking_report_top_ligands.sdf"
        if safe and sdf_file_path.exists():
            move_file_with_extension(str(sdf_file_path), "sdf")
        else:
            remove_file(str(sdf_file_path))

        writer = Chem.SDWriter(str(sdf_file_path))
        writer.SetKekulize(False)

        for ligand in top_ligands:
            mol = ligand.mol.m  # RDKit molecule

            properties = ligand.properties
            existing_properties = ligand.mol.m.GetPropsAsDict()
            if ligand.name:
                mol.SetProp("_Name", ligand.name)
            if ligand.mol.smiles:
                mol.SetProp("_SMILES", ligand.mol.smiles)

            for prop_name, prop_value in existing_properties.items():
                mol.SetProp(prop_name, str(prop_value))

            for prop_name, prop_value in properties.items():
                mol.SetProp(prop_name, str(prop_value))

            writer.write(mol)
        writer.close()

        try:
            self.results[0].protein.write_to_file(str(save_dir_path / f"{self.results[0].protein.name}.pdb"))
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to write protein to file: {e}")

        save_bounding_box(
            self.pocket_data.box_center, self.pocket_data.box_size, output_file=str(save_dir_path / "bounding_box.pdb")
        )
        return str(save_dir_path)

    @jupyter_visualization
    def visualize(
        self,
        protein_path=None,
        protein_format=None,
        sdf_file_path=None,
        crystal_ligand_path=None,
        crystal_ligand_format=None,
    ):
        """
        Visualizes the docking report by rendering the merged structures of
        protein and ligands.

        Args:
            protein_path (str, optional): Path to the protein file.
            protein_format (str, optional): Format of the protein file (e.g., pdb).
            sdf_file_path (str, optional): Path to the ligand file in SDF format.

        Raises:
            ValueError: If `protein_path` is provided without `protein_format`.

        Returns:
            Jupyter visualization object: Rendered 3D structure of the protein-ligand complex.
        """
        if sdf_file_path is None:
            file_dir = Path(self.save(save_dir="/tmp"))
            sdf_file_path = str(file_dir / "docking_report_top_ligands.sdf")

        if protein_path is not None and protein_format is None:
            raise ValueError("Please provide the protein format along with the protein path.")

        if protein_path is None:
            if not self.results:
                raise ValueError("No results found to extract protein information from.")
            protein_path = str(self.results[0].protein.file_path)
            protein_format = self.results[0].protein.block_type

        viewer = DockingViewer()

        crystal_data = None
        if crystal_ligand_path and crystal_ligand_format:
            crystal_data = {"raw": str(crystal_ligand_path), "format": crystal_ligand_format}

        html_content = viewer.render_with_seperate_crystal(
            protein_data=protein_path,
            protein_format=protein_format,
            ligands_data=[sdf_file_path],
            ligand_format="sdf",
            crystal_data=crystal_data,
        )

        return html_content

A class to handle and report docking results.

This class provides functionality to manage docking results, generate reports, save results to files, and visualize protein-ligand complexes.

Attributes

results : List[DockingResult]
A list of docking results.
pocket_data
Data about the binding pocket used in docking.

Methods

to_dataframe(include_props=None): Converts docking results to a pandas DataFrame. _repr_html(): Returns HTML representation of the docking report. generate_custom_report(include_props=False): Generates a custom HTML report with specified properties. save(save_dir=None, safe=True): Saves docking results to SDF file with properties. visualize(protein_path=None, protein_format=None, sdf_file_path=None, crystal_ligand_path=None, crystal_ligand_format=None): Visualizes the docking results in 3D.

Examples

>>> report = DockingReport(results, pocket_data)
>>> report.save()  # Saves results to an SDF file
>>> report.visualize()  # Shows 3D visualization of results

Methods

def generate_custom_report(self, include_props=False)
Expand source code
def generate_custom_report(self, include_props=False):
    """
    Generate a custom HTML report from the data.

    This method converts the internal data to a styled pandas DataFrame and returns it as HTML.
    The resulting DataFrame is formatted with 3 decimal places precision.

    Args:
        include_props (bool, optional): Whether to include properties in the report. Defaults to False.

    Returns:
        HTML: A styled HTML representation of the data with 3 decimal places precision.

    Example:
        >>> report = obj.generate_custom_report(include_props=True)
        >>> display(report)  # In Jupyter notebook
    """
    df = self._to_dataframe(include_props).style.format(precision=3)
    return HTML(df._repr_html_())

Generate a custom HTML report from the data.

This method converts the internal data to a styled pandas DataFrame and returns it as HTML. The resulting DataFrame is formatted with 3 decimal places precision.

Args

include_props : bool, optional
Whether to include properties in the report. Defaults to False.

Returns

HTML
A styled HTML representation of the data with 3 decimal places precision.

Example

>>> report = obj.generate_custom_report(include_props=True)
>>> display(report)  # In Jupyter notebook
def save(self, save_dir=None, safe=True)
Expand source code
def save(self, save_dir=None, safe=True):
    """
    Save docking results to files in a specified directory.

    Args:
        save_dir (str or Path, optional): Directory path where the results will be saved.
        If None, creates a directory in END_USER_HOME or current directory.
        safe (bool, optional): If True, moves existing files with same name instead of
        overwriting them. Defaults to True.

    Returns:
        str or None: Path to the created directory containing saved files if successful,
        None if no top ligands exist.

    Files Created:
        - docking_report_top_ligands.sdf: Contains the top scoring ligands with their properties
        - {protein_name}.pdb: Protein structure file
        - bounding_box.pdb: File containing the docking box information

    Notes:
        The saved SDF file includes:
        - Molecule structure
        - Molecule name (if available)
        - SMILES string (if available)
        - All existing molecular properties
        - All additional properties from docking results
    """

    top_ligands = []
    for result in self.results:
        if result.top_ligand:
            top_ligands.append(result.top_ligand)

    if not top_ligands:
        return None

    if not save_dir:
        save_dir_path = (
            Path(os.getenv("END_USER_HOME", "."))
            / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}"
        )
    else:
        save_dir_path = Path(save_dir) / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}"

    save_dir_path.mkdir(parents=True, exist_ok=True)
    sdf_file_path = save_dir_path / "docking_report_top_ligands.sdf"
    if safe and sdf_file_path.exists():
        move_file_with_extension(str(sdf_file_path), "sdf")
    else:
        remove_file(str(sdf_file_path))

    writer = Chem.SDWriter(str(sdf_file_path))
    writer.SetKekulize(False)

    for ligand in top_ligands:
        mol = ligand.mol.m  # RDKit molecule

        properties = ligand.properties
        existing_properties = ligand.mol.m.GetPropsAsDict()
        if ligand.name:
            mol.SetProp("_Name", ligand.name)
        if ligand.mol.smiles:
            mol.SetProp("_SMILES", ligand.mol.smiles)

        for prop_name, prop_value in existing_properties.items():
            mol.SetProp(prop_name, str(prop_value))

        for prop_name, prop_value in properties.items():
            mol.SetProp(prop_name, str(prop_value))

        writer.write(mol)
    writer.close()

    try:
        self.results[0].protein.write_to_file(str(save_dir_path / f"{self.results[0].protein.name}.pdb"))
    except Exception as e:
        DEFAULT_LOGGER.log_error(f"Failed to write protein to file: {e}")

    save_bounding_box(
        self.pocket_data.box_center, self.pocket_data.box_size, output_file=str(save_dir_path / "bounding_box.pdb")
    )
    return str(save_dir_path)

Save docking results to files in a specified directory.

Args

save_dir : str or Path, optional
Directory path where the results will be saved.
If None, creates a directory in END_USER_HOME or current directory.
safe : bool, optional
If True, moves existing files with same name instead of

overwriting them. Defaults to True.

Returns

str or None
Path to the created directory containing saved files if successful,

None if no top ligands exist. Files Created: - docking_report_top_ligands.sdf: Contains the top scoring ligands with their properties - {protein_name}.pdb: Protein structure file - bounding_box.pdb: File containing the docking box information

Notes

The saved SDF file includes: - Molecule structure - Molecule name (if available) - SMILES string (if available) - All existing molecular properties - All additional properties from docking results

def visualize(*args, **kwargs)
Expand source code
def wrapper(*args, **kwargs):
    html_visualization = func(*args, **kwargs)
    return JupyterViewer.visualize(html_visualization)
class DockingResult (protein: Protein,
smiles: str | None = None,
file_path: str | None = None,
successful: bool | None = True)
Expand source code
class DockingResult:
    """
    A class representing the results of a molecular docking operation.

    This class stores and manages the results of a molecular docking simulation, including
    the protein target, docked ligands, and associated metadata. It provides methods for
    analyzing and visualizing the docking results.

    Attributes:
        protein (Protein): The protein target used in the docking.
        ligands (List[Ligand]): List of docked ligand poses.
        rmsds (Optional[List[float]]): RMSD values compared to crystal structure if calculated.
        top_ligand (Optional[Ligand]): The highest scoring ligand pose.
        smiles (Optional[str]): SMILES string representation of the ligand.
        successful (bool): Whether the docking operation was successful.
        file_path (Optional[str]): Path to the docking results file.

    Methods:
        add_ligand(ligand: Ligand): Add a docked ligand pose to results.
        calculate_rmsds_from_crystal(crystal_ligand: Union[Ligand, str]): Calculate RMSD values against crystal structure.
        _to_sdf(safe=True, sdf_file_path=None): Export docking results to SDF file.
        _to_dataframe(): Convert results to pandas DataFrame.
        visualize(crystal_ligand_path=None, crystal_ligand_format=None): Visualize docking results.
        analyze(index: Optional[int] = None): Generate detailed analysis of docking poses.

    Examples:
        >>> result = DockingResult(protein, smiles="CC(=O)OC1=CC=CC=C1C(=O)O")
        >>> result.add_ligand(docked_pose)
        >>> result.analyze()
    """
    def __init__(
        self,
        protein: Protein,
        smiles: Optional[str] = None,
        file_path: Optional[str] = None,
        successful: Optional[bool] = True,
    ):
        self.protein = protein
        self.ligands: List[Ligand] = []
        self.rmsds: Optional[List[float]] = None
        self.top_ligand: Optional[Ligand] = None
        self.smiles = smiles
        self.successful = successful
        self.file_path = file_path

    def add_ligand(self, ligand: Ligand):
        """
        Add a ligand to the list of ligands.

        Args:
            ligand (Ligand): The ligand object to be added to the list.

        Returns:
            None
        """
        self.ligands.append(ligand)

    def calculate_rmsds_from_crystal(self, crystal_ligand: Ligand | str):
        """
        Calculate RMSD values between this ligand and a crystal structure.

        This method computes Root Mean Square Deviation (RMSD) values between the current ligand
        and a reference crystal structure using the 'obrms' command line tool.

        Args:
            crystal_ligand (Union[Ligand, str]): Either a Ligand object or a file path string
                representing the crystal structure to compare against.

        Returns:
            list[float]: A list of RMSD values computed between the current ligand and the crystal structure.

        Raises:
            SystemError: If RMSD calculation fails for any reason.

        Example:
            >>> ligand = Ligand("path/to/ligand.pdb")
            >>> rmsds = ligand.calculate_rmsds_from_crystal("path/to/crystal.pdb")
        """
        if isinstance(crystal_ligand, str):
            crystal_ligand = Ligand(file_path=str(crystal_ligand))

        try:
            result = subprocess.run(
                ["obrms", self.file_path, crystal_ligand.file_path], capture_output=True, text=True
            )
            self.rmsds = [float(rmsd.split()[-1]) for rmsd in result.stdout.split("\n") if rmsd]
            return self.rmsds
        except Exception as e:
            raise SystemError(f"Failed to calculate RMSD values: {e}")

    def _to_sdf(self, safe=True, sdf_file_path=None):
        """
        Convert ligands to SDF file format.

        This method writes ligand molecules and their properties to a Structure-Data File (SDF).
        Each ligand's properties are stored as SDF tags in the output file.

        Args:
            safe (bool, optional): If True, backs up existing files instead of overwriting.
                Defaults to True.
            sdf_file_path (str, optional): Custom path for the output SDF file.
                If None, generates path based on protein name and SMILES. Defaults to None.

        Returns:
            str or None: Path to the created SDF file, or None if no ligands exist.

        Example:
            result._to_sdf(safe=True, sdf_file_path="output.sdf")
        """
        if not self.ligands:
            return None

        if not sdf_file_path:
            sdf_file_path = os.path.join(
                os.path.dirname(self.file_path or tempfile.gettempdir()),
                f"{self.protein.name}_docking_result_{self.smiles}.sdf",
            )

        if safe and os.path.isfile(sdf_file_path):
            move_file_with_extension(sdf_file_path, "sdf")
        else:
            remove_file(sdf_file_path)

        writer = Chem.SDWriter(sdf_file_path)
        writer.SetKekulize(False)

        for ligand in self.ligands:
            mol = ligand.mol.m  # RDKit molecule from Ligand

            properties = ligand.properties
            for prop_name, prop_value in properties.items():
                mol.SetProp(prop_name, str(prop_value))

            writer.write(mol)
        writer.close()

        return sdf_file_path

    def _to_dataframe(self):
        """
        Converts ligand data to a formatted pandas DataFrame.

        Creates a DataFrame containing pose rankings, scores, binding energies, and file paths
        for each ligand. The data is sorted by Pose Score in descending order.

        Returns:
            pandas.io.formats.style.Styler: A styled DataFrame with the following columns:
                - Ligand Pose Rank ID: Integer ranking of the pose (1-based)
                - Pose Score: Rounded to 3 decimal places
                - Binding Energy: Rounded to 3 decimal places
                - Path To Docked Pose: File path to the docked ligand pose

        Note:
            - Missing Binding Energy or Pose Score values default to 0.0
            - All numeric values are formatted to 3 decimal places in the output
        """
        data = []
        for idx, ligand in enumerate(self.ligands):
            mol_props = ligand.properties

            energy_score = float(mol_props.get("Binding Energy", "0.0"))
            rscore = float(mol_props.get("Pose Score", "0.0"))

            data.append(
                {
                    "Ligand Pose Rank ID": idx + 1,
                    "Pose Score": round(rscore, 3),
                    "Binding Energy": round(energy_score, 3),
                    "Path To Docked Pose": ligand.file_path,
                }
            )
        df = pd.DataFrame(data)
        # Sort by Ranking Score descending
        df = df.sort_values(by="Pose Score", ascending=False).reset_index(drop=True)
        return df.style.format(precision=3)

    def _repr_html_(self):
        df = self._to_dataframe()
        return df._repr_html_()

    def __str__(self):
        return (
            f"DockingResult:\n  Number of Ligands: {len(self.ligands)}\n"
            f"  SMILES: {self.smiles if self.smiles else 'Not provided'}\n"
            f"  File Path: {self.file_path if self.file_path else 'Not provided'}"
        )

    def __repr__(self):
        return self.__str__()

    @jupyter_visualization
    def visualize(self, crystal_ligand_path=None, crystal_ligand_format=None):
        """
        Visualize docking results with an optional crystal ligand overlay.

        Args:
            crystal_ligand_path (str, optional): File path to the crystal ligand structure.
            crystal_ligand_format (str, optional): Format of the crystal ligand file (e.g., 'pdb', 'mol2').

        Returns:
            str: HTML string containing the visualization that can be displayed in a web browser
             or Jupyter notebook.

        Note:
            The visualization will always use SDF format internally for ligands, regardless
            of input format. The protein structure will maintain its original format.
        """

        visualization_format = "sdf"
        crystal_data = None
        if crystal_ligand_format and crystal_ligand_path:
            crystal_data = {"raw": str(crystal_ligand_path), "format": crystal_ligand_format}

        return DockingViewer().render_with_seperate_crystal(
            protein_data=str(self.protein.file_path),
            protein_format=self.protein.block_type,
            ligands_data=[str(self.file_path)],
            ligand_format=visualization_format,
            crystal_data=crystal_data,
        )

    def analyze(self, index: Optional[int] = None):
        """
        Analyze protein-ligand interactions using PLIPy fingerprinting.

        This method analyzes the interactions between a protein and its ligands using
        the PLIP (Protein-Ligand Interaction Profiler) fingerprinting approach. It can analyze
        either a single ligand pose (specified by index) or all ligand poses.

        Args:
            index (Optional[int]): The index of the specific ligand pose to analyze.
                If None, analyzes all poses. Defaults to None.

        Returns:
            Union[pd.DataFrame, Dict]:
                - If index is None: Returns a DataFrame containing interaction fingerprints for all poses
                - If index is specified: Returns a dictionary containing the ligand network plot data

        Raises:
            ValueError: If no ligands or protein are found to analyze
            IndexError: If the provided ligand index is out of range

        Notes:
            - Creates temporary files for processing
            - Sets custom Van der Waals radii for Fe, H, and O atoms
            - Uses RDKit for molecular operations
            - Uses PLIPy for fingerprint generation
        """
        if not self.ligands:
            raise ValueError("No ligands found to analyze.")

        protein = deepcopy(self.protein)
        if not protein:
            raise ValueError("No protein found to analyze.")

        fp = plf.Fingerprint()
        with tempfile.TemporaryDirectory() as temp_dir:
            protein_file = os.path.join(temp_dir, f"{protein.name}.pdb")

            protein.write_to_file(protein_file)
            protein.file_path = protein_file

            sdf_file_path = self._to_sdf(sdf_file_path=os.path.join(temp_dir, f"{protein.name}_docking_result.sdf"))

            v = VdWContact()
            v.vdwradii["Fe"] = 2.0
            v.vdwradii["H"] = 1.05
            v.vdwradii["O"] = 1.48

            rdkit_prot = Chem.MolFromPDBFile(protein_file, False, False)
            protein_mol = plf.Molecule(rdkit_prot)
            pose_iterable = plf.sdf_supplier(str(sdf_file_path))
            sdf_supp = Chem.SDMolSupplier(str(sdf_file_path), sanitize=False)
            pose_iterable._suppl = sdf_supp

            if index is not None:
                if index < 0 or index >= len(sdf_supp):
                    raise IndexError("Ligand index out of range.")

                single_ligand_iterable = pose_iterable[index]
                fp.run_from_iterable([single_ligand_iterable], protein_mol)

                result = fp.plot_lignetwork(single_ligand_iterable)
            else:
                fp.run_from_iterable(pose_iterable, protein_mol)
                fp.plot_barcode(xlabel="Pose")

                result = fp.to_dataframe(index_col="Pose")

        return result

A class representing the results of a molecular docking operation.

This class stores and manages the results of a molecular docking simulation, including the protein target, docked ligands, and associated metadata. It provides methods for analyzing and visualizing the docking results.

Attributes

protein : Protein
The protein target used in the docking.
ligands : List[Ligand]
List of docked ligand poses.
rmsds : Optional[List[float]]
RMSD values compared to crystal structure if calculated.
top_ligand : Optional[Ligand]
The highest scoring ligand pose.
smiles : Optional[str]
SMILES string representation of the ligand.
successful : bool
Whether the docking operation was successful.
file_path : Optional[str]
Path to the docking results file.

Methods

add_ligand(ligand: Ligand): Add a docked ligand pose to results. calculate_rmsds_from_crystal(crystal_ligand: Union[Ligand, str]): Calculate RMSD values against crystal structure. _to_sdf(safe=True, sdf_file_path=None): Export docking results to SDF file. _to_dataframe(): Convert results to pandas DataFrame. visualize(crystal_ligand_path=None, crystal_ligand_format=None): Visualize docking results. analyze(index: Optional[int] = None): Generate detailed analysis of docking poses.

Examples

>>> result = DockingResult(protein, smiles="CC(=O)OC1=CC=CC=C1C(=O)O")
>>> result.add_ligand(docked_pose)
>>> result.analyze()

Methods

def add_ligand(self,
ligand: Ligand)
Expand source code
def add_ligand(self, ligand: Ligand):
    """
    Add a ligand to the list of ligands.

    Args:
        ligand (Ligand): The ligand object to be added to the list.

    Returns:
        None
    """
    self.ligands.append(ligand)

Add a ligand to the list of ligands.

Args

ligand : Ligand
The ligand object to be added to the list.

Returns

None

def analyze(self, index: int | None = None)
Expand source code
def analyze(self, index: Optional[int] = None):
    """
    Analyze protein-ligand interactions using PLIPy fingerprinting.

    This method analyzes the interactions between a protein and its ligands using
    the PLIP (Protein-Ligand Interaction Profiler) fingerprinting approach. It can analyze
    either a single ligand pose (specified by index) or all ligand poses.

    Args:
        index (Optional[int]): The index of the specific ligand pose to analyze.
            If None, analyzes all poses. Defaults to None.

    Returns:
        Union[pd.DataFrame, Dict]:
            - If index is None: Returns a DataFrame containing interaction fingerprints for all poses
            - If index is specified: Returns a dictionary containing the ligand network plot data

    Raises:
        ValueError: If no ligands or protein are found to analyze
        IndexError: If the provided ligand index is out of range

    Notes:
        - Creates temporary files for processing
        - Sets custom Van der Waals radii for Fe, H, and O atoms
        - Uses RDKit for molecular operations
        - Uses PLIPy for fingerprint generation
    """
    if not self.ligands:
        raise ValueError("No ligands found to analyze.")

    protein = deepcopy(self.protein)
    if not protein:
        raise ValueError("No protein found to analyze.")

    fp = plf.Fingerprint()
    with tempfile.TemporaryDirectory() as temp_dir:
        protein_file = os.path.join(temp_dir, f"{protein.name}.pdb")

        protein.write_to_file(protein_file)
        protein.file_path = protein_file

        sdf_file_path = self._to_sdf(sdf_file_path=os.path.join(temp_dir, f"{protein.name}_docking_result.sdf"))

        v = VdWContact()
        v.vdwradii["Fe"] = 2.0
        v.vdwradii["H"] = 1.05
        v.vdwradii["O"] = 1.48

        rdkit_prot = Chem.MolFromPDBFile(protein_file, False, False)
        protein_mol = plf.Molecule(rdkit_prot)
        pose_iterable = plf.sdf_supplier(str(sdf_file_path))
        sdf_supp = Chem.SDMolSupplier(str(sdf_file_path), sanitize=False)
        pose_iterable._suppl = sdf_supp

        if index is not None:
            if index < 0 or index >= len(sdf_supp):
                raise IndexError("Ligand index out of range.")

            single_ligand_iterable = pose_iterable[index]
            fp.run_from_iterable([single_ligand_iterable], protein_mol)

            result = fp.plot_lignetwork(single_ligand_iterable)
        else:
            fp.run_from_iterable(pose_iterable, protein_mol)
            fp.plot_barcode(xlabel="Pose")

            result = fp.to_dataframe(index_col="Pose")

    return result

Analyze protein-ligand interactions using PLIPy fingerprinting.

This method analyzes the interactions between a protein and its ligands using the PLIP (Protein-Ligand Interaction Profiler) fingerprinting approach. It can analyze either a single ligand pose (specified by index) or all ligand poses.

Args

index : Optional[int]
The index of the specific ligand pose to analyze. If None, analyzes all poses. Defaults to None.

Returns

Union[pd.DataFrame, Dict]
  • If index is None: Returns a DataFrame containing interaction fingerprints for all poses
  • If index is specified: Returns a dictionary containing the ligand network plot data

Raises

ValueError
If no ligands or protein are found to analyze
IndexError
If the provided ligand index is out of range

Notes

  • Creates temporary files for processing
  • Sets custom Van der Waals radii for Fe, H, and O atoms
  • Uses RDKit for molecular operations
  • Uses PLIPy for fingerprint generation
def calculate_rmsds_from_crystal(self,
crystal_ligand: Ligand | str)
Expand source code
def calculate_rmsds_from_crystal(self, crystal_ligand: Ligand | str):
    """
    Calculate RMSD values between this ligand and a crystal structure.

    This method computes Root Mean Square Deviation (RMSD) values between the current ligand
    and a reference crystal structure using the 'obrms' command line tool.

    Args:
        crystal_ligand (Union[Ligand, str]): Either a Ligand object or a file path string
            representing the crystal structure to compare against.

    Returns:
        list[float]: A list of RMSD values computed between the current ligand and the crystal structure.

    Raises:
        SystemError: If RMSD calculation fails for any reason.

    Example:
        >>> ligand = Ligand("path/to/ligand.pdb")
        >>> rmsds = ligand.calculate_rmsds_from_crystal("path/to/crystal.pdb")
    """
    if isinstance(crystal_ligand, str):
        crystal_ligand = Ligand(file_path=str(crystal_ligand))

    try:
        result = subprocess.run(
            ["obrms", self.file_path, crystal_ligand.file_path], capture_output=True, text=True
        )
        self.rmsds = [float(rmsd.split()[-1]) for rmsd in result.stdout.split("\n") if rmsd]
        return self.rmsds
    except Exception as e:
        raise SystemError(f"Failed to calculate RMSD values: {e}")

Calculate RMSD values between this ligand and a crystal structure.

This method computes Root Mean Square Deviation (RMSD) values between the current ligand and a reference crystal structure using the 'obrms' command line tool.

Args

crystal_ligand : Union[Ligand, str]
Either a Ligand object or a file path string representing the crystal structure to compare against.

Returns

list[float]
A list of RMSD values computed between the current ligand and the crystal structure.

Raises

SystemError
If RMSD calculation fails for any reason.

Example

>>> ligand = Ligand("path/to/ligand.pdb")
>>> rmsds = ligand.calculate_rmsds_from_crystal("path/to/crystal.pdb")
def visualize(*args, **kwargs)
Expand source code
def wrapper(*args, **kwargs):
    html_visualization = func(*args, **kwargs)
    return JupyterViewer.visualize(html_visualization)
class Ligand (identifier: str = '',
file_path: str = '',
smiles: str = '',
block_type: str = '',
block_content: str = '',
name: str = '',
seed: int = None,
xref_protein='',
xref_ins_code: str = '',
xref_residue_id: str = '',
xref_protein_chain_id: str = '',
save_to_file: bool = False,
properties: dict = None)
Expand source code
class Ligand:
    def __init__(
        self,
        identifier: str = "",
        file_path: str = "",
        smiles: str = "",
        block_type: str = "",
        block_content: str = "",
        name: str = "",
        seed: int = None,
        xref_protein="",
        xref_ins_code: str = "",
        xref_residue_id: str = "",
        xref_protein_chain_id: str = "",
        save_to_file: bool = False,
        properties: dict = None,
    ):
        """
        Initialize a Ligand object.

        This constructor creates a Ligand object from various input sources and validates the molecular structure.

        Args:
            identifier (str, optional): Name or identifier of the molecule. Defaults to "".
            file_path (str, optional): Path to input file containing molecule data. Defaults to "".
            smiles (str, optional): SMILES string representation of molecule. Defaults to "".
            block_type (str, optional): Type of molecular block content (e.g. "mol", "sdf"). Defaults to "".
            block_content (str, optional): Content of molecular block. Defaults to "".
            name (str, optional): Name for the molecule. Defaults to "".
            seed (int, optional): Random seed for coordinate generation. Defaults to None.
            xref_protein (str, optional): Cross-reference to protein. Defaults to "".
            xref_ins_code (str, optional): Cross-reference insertion code. Defaults to "".
            xref_residue_id (str, optional): Cross-reference residue ID. Defaults to "".
            xref_protein_chain_id (str, optional): Cross-reference protein chain ID. Defaults to "".
            save_to_file (bool, optional): Whether to save molecule to file. Defaults to False.
            properties (dict, optional): Additional properties for the molecule. Defaults to None.

        Raises:
            ValueError: If not exactly one input source is provided (identifier, file_path, smiles, or block_content).
            ValueError: If block_type is not provided when initializing from block_content.
            ValueError: If molecule creation fails.

        Notes:
            - Only one input source (identifier, file_path, smiles, or block_content) should be provided
            - Automatically generates coordinates if needed
            - Performs validation checks including heavy atom count
            - Can optionally save the molecule to file
            - Stores various properties including cross-references to protein structure
        """
        self.file_path = file_path
        self.identifier = identifier
        self.protonated_smiles = None
        self.block_type = block_type.lower()
        self.block_content = block_content
        self.name = name
        self.mol = None
        self.properties = dict() if not properties else {k: v for k, v in properties.items()}
        self.hac = 0
        self.xref_protein = xref_protein
        self.xref_ins_code = xref_ins_code
        self.xref_residue_id = xref_residue_id
        self.xref_protein_chain_id = xref_protein_chain_id

        sources_provided = sum(bool(x) for x in [identifier, file_path, smiles, block_content])
        if sources_provided != 1:
            raise ValueError("Please provide exactly one of identifier, file_path, smiles, or block_content.")

        try:
            if block_content:
                if not self.block_type:
                    raise ValueError("block_type must be provided when initializing from block_content.")

                self.mol = mol_from_block(self.block_type, self.block_content)
                DEFAULT_LOGGER.log_info("Initialized Ligand from block content.")
            elif identifier:
                self.mol = Molecule.from_smiles_or_name(name=identifier, add_coords=True, seed=seed)
            elif file_path:
                self.mol = self._initialize_from_file(file_path)
            elif smiles:
                self.mol = mol_from_smiles(smiles)
                self.block_type = "mol"
                self.block_content = self.mol.molblock()
                DEFAULT_LOGGER.log_info("Initialized Ligand from SMILES string.")

            else:
                raise ValueError("No valid source provided for ligand initialization.")

            if self.mol is None:
                raise ValueError("Failed to create molecule.")

            self.name = self.mol.name if self.mol.name else self.name or "Unknown_Ligand"
            directory = Path(self.get_directory())
            if self.name == "Unknown_Ligand":
                num = len(list(directory.glob(f"{self.name}*")))
                self.name = f"{self.name}_{num + 1}"

            self.hac = self.mol.m.GetNumHeavyAtoms()
            if self.hac < 5:
                DEFAULT_LOGGER.log_warning("Ligand has less than 5 heavy atoms.")

            file_props = self.mol.m.GetPropsAsDict()
            for key, value in file_props.items():
                self.properties[key] = value

            self.available_for_docking = not self.mol.contains_boron
            if save_to_file:
                self.write_to_file(output_format="sdf")

        except Exception as e:
            raise

    @property
    def coordinates(self):
        """
        Returns a numpy array of ligand coordinates.

        Returns:
            np.ndarray: A numpy array of float32 containing the 3D coordinates of all atoms in the ligand.
        """
        return np.array(self.mol.coords(), dtype=np.float32)

    @property
    def atom_types(self):
        """
        Returns a list of unique atom types present in the ligand molecule.

        Returns:
            list: A list of strings representing unique atomic species (e.g. ['C', 'H', 'O', 'N'])
        """
        return self.mol.species()

    def _initialize_from_file(self, file_path: str) -> Molecule:
        """
        Initialize a Molecule object from a file.

        This method reads a molecular structure file and creates a corresponding Molecule object.

        Args:
            file_path (str): Path to the molecular structure file.

        Returns:
            Molecule: A Molecule object initialized from the file.

        Raises:
            FileNotFoundError: If the specified file does not exist.
            Exception: If there is an error during molecule initialization.

        Notes:
            - The file extension determines the block type.
            - Supported file formats are determined by the mol_from_file function.
            - The operation is logged using DEFAULT_LOGGER.
        """
        path = Path(file_path)
        if not path.exists():
            raise FileNotFoundError(f"The file {file_path} does not exist.")

        extension = path.suffix.lower().lstrip(".")
        self.block_type = extension
        self.file_path = path

        try:
            molecule = mol_from_file(extension, str(path))
            DEFAULT_LOGGER.log_info(f"Initialized Ligand from file {file_path}.")
            return molecule
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to initialize Ligand from file {file_path}: {str(e)}")
            raise

    def set_property(self, prop_name: str, prop_value):
        """
        Set a property for both the ligand properties dictionary and the underlying RDKit molecule.

        Args:
            prop_name (str): Name of the property to set.
            prop_value: Value to set for the property. Will be converted to string for RDKit molecule.

        Note:
            The property is set both in the properties dict and RDKit molecule.
            The property value is logged at INFO level.
        """
        self.properties[prop_name] = prop_value
        self.mol.m.SetProp(prop_name, str(prop_value))
        DEFAULT_LOGGER.log_info(f"Set property '{prop_name}' to '{prop_value}'.")

    def get_property(self, prop_name: str):
        """
        Retrieve a property value from the ligand object.

        This method attempts to get the property value first from the properties dictionary,
        and if not found there, tries to retrieve it from the molecule object.

        Args:
            prop_name (str): The name of the property to retrieve.

        Returns:
            Any: The value of the property if found, None otherwise.

        Notes:
            The method first checks the internal properties dictionary. If the property
            is not found there, it checks the molecule object using RDKit's HasProp/GetProp.
            If found in the molecule object, the value is also cached in the properties
            dictionary for future use.
        """
        value = self.properties.get(prop_name)
        if value is not None:
            DEFAULT_LOGGER.log_info(f"Retrieved property '{prop_name}' from properties dictionary: '{value}'.")
            return value

        if self.mol.m.HasProp(prop_name):
            value = self.mol.m.GetProp(prop_name)

            self.properties[prop_name] = value
            DEFAULT_LOGGER.log_info(f"Retrieved property '{prop_name}' from molecule: '{value}'.")
            return value

        DEFAULT_LOGGER.log_info(f"Property '{prop_name}' not found.")
        return None

    def write_to_file(self, output_path: str = "", output_format: str = ""):
        """
        Write the ligand structure to a file in the specified format.

        This method writes the molecular structure and its properties to a file in the specified format.
        Supported formats are PDB, MOL, and SDF. Properties are included in the output file according
        to the format-specific conventions.

        Args:
            output_path (str, optional): The path where the file should be written. If not provided,
                the file will be written in the ligand's directory with the name and format extension.
            output_format (str, optional): The desired output format ('.pdb', '.mol', or '.sdf').
                If not provided, it will be inferred from the output_path extension.

        Raises:
            ValueError: If neither output_path nor output_format is provided, or if an unsupported
                file extension is specified.
            Exception: If any error occurs during the file writing process.

        Note:
            - If the output format doesn't match the file extension, a warning will be logged and
              the specified output format will be used.
            - Properties are written in the following format:
                - PDB: As REMARK lines
                - MOL: As property blocks after the molecule
                - SDF: As SD fields
        """
        try:
            if output_format == "" and output_path == "":
                raise ValueError("Please provide either output_path or output_format.")

            if not output_path:
                output_path = str(Path(self.get_directory()) / f"{self.name}.{output_format}")

            path = Path(output_path)
            extension = path.suffix.lower()
            if not output_format:
                output_format = extension

            if output_format and output_format[0] != ".":
                output_format = f".{output_format}"

            if extension and extension != output_format:
                DEFAULT_LOGGER.log_warning(
                    "Output format does not match the file extension. Writing to provided output format."
                )
                extension = output_format

            if self.name:
                self.set_property("_Name", self.name)
            if self.mol.smiles:
                self.set_property("_SMILES", self.mol.smiles)
            if self.properties:
                for prop_name, prop_value in self.properties.items():
                    self.set_property(prop_name, str(prop_value))

            if extension == ".pdb":
                pdb_block = Chem.MolToPDBBlock(self.mol.m)
                remark_lines = ""
                for prop_name, prop_value in self.mol.m.GetPropsAsDict().items():
                    remark_lines += f"REMARK   {prop_name}: {prop_value}\n"
                pdb_block_with_remarks = remark_lines + pdb_block
                path.write_text(pdb_block_with_remarks)
            elif extension == ".sdf":
                writer = Chem.SDWriter(str(path))
                writer.SetKekulize(False)
                writer.write(self.mol.m)
                writer.close()
            elif extension == ".mol":
                mol_block = Chem.MolToMolBlock(self.mol.m)
                prop_lines = ""
                for prop_name, prop_value in self.mol.m.GetPropsAsDict().items():
                    prop_lines += f">  <{prop_name}>\n{prop_value}\n\n"
                mol_block_with_props = mol_block + "\n" + prop_lines
                path.write_text(mol_block_with_props)
            else:
                raise ValueError(
                    f"Unsupported file extension '{extension}'. Supported extensions are '.pdb', '.mol', '.sdf'."
                )

            DEFAULT_LOGGER.log_info(f"Ligand structure written to {output_path}.")
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to write structure to file {output_path}: {str(e)}")
            raise

    def get_center(self) -> Optional[List[float]]:
        """
        Calculate the center coordinates of the ligand.

        Returns:
            Optional[List[float]]: The center coordinates as a list of floats [x, y, z]
                                  or None if coordinates are not available.

        Example:
            >>> ligand.get_center()
            [1.234, -2.345, 3.456]
        """
        if self.coordinates is None:
            DEFAULT_LOGGER.log_warning("Coordinates are not available for this ligand.")
            return None
        center = self.coordinates.mean(axis=0)
        DEFAULT_LOGGER.log_info(f"Calculated center coordinates: {center.tolist()}")
        return [float(x) for x in center.tolist()]

    def draw(self):
        """
        Draws a visual representation of the ligand molecule.

        Returns:
            Image: The 2D structural representation of the ligand molecule.
        """
        return self.mol.draw()

    @jupyter_visualization
    def visualize(self) -> str:
        try:
            temp_file = Path(tempfile.gettempdir()) / f"{self.name}_visualize.sdf"
            self.write_to_file(str(temp_file))

            viewer = MoleculeViewer(str(temp_file), format="sdf")
            ligand_config = viewer.get_ligand_visualization_config()
            html = viewer.render_ligand(ligand_config=ligand_config)

            return html
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Visualization failed: {str(e)}")
            raise

    @classmethod
    def create_ligands_from_sdf(cls, file_path: str) -> List["Ligand"]:
        """
        Creates a list of Ligand objects from an SDF file.

        This class method reads molecules from an SDF file and converts each valid molecule
        into a Ligand instance. It handles potential parsing errors and logs relevant information.

        Args:
            file_path (str): Path to the SDF file containing molecular structures.

        Returns:
            List[Ligand]: A list of Ligand objects created from the SDF file.
                Returns an empty list if no valid molecules are found or in case of errors.

        Raises:
            FileNotFoundError: If the specified file path does not exist.

        Example:
            >>> ligands = Ligand.create_ligands_from_sdf("molecules.sdf")
            >>> print(len(ligands))  # Number of successfully parsed molecules

        Notes:
            - Molecules that fail to parse will be skipped and logged as warnings
            - Properties from the SDF file are preserved and stored in the Ligand objects
            - Progress and errors are tracked through the DEFAULT_LOGGER
        """
        path = Path(file_path)
        if not path.exists():
            raise FileNotFoundError(f"The file '{file_path}' does not exist.")

        ligands = []
        try:
            suppl = Chem.SDMolSupplier(str(path))
            for idx, mol in enumerate(suppl, start=1):
                try:
                    if mol is None:
                        DEFAULT_LOGGER.log_warning(f"Skipping molecule at index {idx} due to parsing error.")
                        continue
                    mol_block = Chem.MolToMolBlock(mol)
                    ligand = Ligand(block_type="sdf", block_content=mol_block, properties=mol.GetPropsAsDict())
                    ligands.append(ligand)
                except Exception as e:
                    DEFAULT_LOGGER.log_error(f"Failed to create Ligand from SDF file molecule_idx = '{idx}': {str(e)}")
            DEFAULT_LOGGER.log_info(f"Created {len(ligands)} Ligand instances from SDF file '{file_path}'.")
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to create Ligands from SDF file '{file_path}': {str(e)}")

        return ligands

    @classmethod
    def create_ligands_from_csv(cls, file_path: str) -> List["Ligand"]:
        """
        Creates Ligand instances from a CSV file containing SMILES strings and optional additional properties.

        This class method reads a CSV file and creates Ligand objects from each row. The CSV file must contain
        a 'smiles' column (case-insensitive). Additional columns are treated as properties of the ligand.

        Args:
            file_path (str): Path to the CSV file containing ligand data.

        Returns:
            List[Ligand]: A list of created Ligand instances.

        Raises:
            FileNotFoundError: If the specified file does not exist.
            ValueError: If the CSV file does not contain a 'smiles' column.
            pd.errors.EmptyDataError: If the CSV file is empty.
            pd.errors.ParserError: If there are issues parsing the CSV file.

        Notes:
            - Rows with missing or invalid SMILES strings are skipped with a warning.
            - All column names are normalized (stripped and converted to lowercase) for comparison.
            - Non-SMILES columns are added as properties to the Ligand instances.
            - Any errors during processing of individual rows are logged but don't stop the overall process.

        Example CSV format:
            smiles,name,molecular_weight
            CC(=O)O,acetic acid,60.052
            CCO,ethanol,46.068
        """
        path = Path(file_path)
        if not path.exists():
            raise FileNotFoundError(f"The file '{file_path}' does not exist.")

        ligands = []
        try:
            df = pd.read_csv(file_path)
            normalized_columns = [col.strip().lower() for col in df.columns]

            if "smiles" not in normalized_columns:
                raise ValueError("CSV file must contain a 'smiles' column.")

            smiles_col_index = normalized_columns.index("smiles")
            smiles_col = df.columns[smiles_col_index]
            other_columns = [col for col in df.columns if col != smiles_col]

            for idx, row in df.iterrows():
                try:
                    smiles = row[smiles_col]
                    if pd.isna(smiles):
                        DEFAULT_LOGGER.log_warning(f"Skipping row {idx + 1}: SMILES value is missing.")
                        continue
                    mol = Chem.MolFromSmiles(smiles)
                    if mol is None:
                        DEFAULT_LOGGER.log_warning(f"Skipping row {idx + 1}: Invalid SMILES '{smiles}'.")
                        continue
                    ligand = Ligand(smiles=smiles)
                    for col in other_columns:
                        value = row[col]
                        if pd.notna(value):
                            ligand.set_property(col, value)
                    ligands.append(ligand)
                except Exception as e:
                    DEFAULT_LOGGER.log_error(f"Failed to create Ligand from CSV file row {idx + 1}: {str(e)}")

            DEFAULT_LOGGER.log_info(f"Created {len(ligands)} Ligand instances from CSV file '{file_path}'.")

        except pd.errors.EmptyDataError:
            DEFAULT_LOGGER.log_error(f"The CSV file '{file_path}' is empty.")
        except pd.errors.ParserError as e:
            DEFAULT_LOGGER.log_error(f"Error parsing CSV file '{file_path}': {str(e)}")
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to create Ligands from CSV file '{file_path}': {str(e)}")

        return ligands

    @classmethod
    def create_ligands_from_file(cls, file_path: str, file_type: str) -> List["Ligand"]:
        """
        Creates a list of Ligand objects from a file.

        Args:
            file_path (str): Path to the input file containing ligand data.
            file_type (str): Type of the input file. Supported types are 'sdf' and 'csv'.

        Returns:
            List[Ligand]: A list of Ligand objects created from the file data.

        Raises:
            ValueError: If the file_type is not supported ('sdf' or 'csv').

        Examples:
            >>> ligands = Ligand.create_ligands_from_file("compounds.sdf", "sdf")
            >>> ligands = Ligand.create_ligands_from_file("compounds.csv", "csv")
        """
        supported_types = ["sdf", "csv"]
        file_type = file_type.lower()

        if file_type not in supported_types:
            raise ValueError(f"Unsupported file format '{file_type}'. Only 'sdf' and 'csv' are supported.")

        if file_type == "sdf":
            return cls.create_ligands_from_sdf(file_path)
        elif file_type == "csv":
            return cls.create_ligands_from_csv(file_path)
        else:
            raise ValueError(f"Unsupported file format '{file_type}'. Only 'sdf' and 'csv' are supported.")

    @classmethod
    def convert_to_sdf(cls, block_content: str, block_type: str):
        """
        Converts molecular block content to SDF format.

        This class method takes a molecular block content and its type, attempts to convert
        it to an RDKit molecule object, and returns the molecule in SDF molblock format.

        Args:
            block_content (str): The string content of the molecular block to convert
            block_type (str): The type of molecular block (e.g. 'MOL', 'SDF', etc.)

        Returns:
            str: The converted molecule in SDF molblock format if successful
            None: If conversion fails

        Raises:
            Exception: Handles any exceptions during conversion and returns None after logging error

        Examples:
            >>> sdf_block = LigandStructure.convert_to_sdf("molecular block content", "MOL")
            >>> if sdf_block:
            ...     # Process the SDF block
            ... else:
            ...     # Handle conversion failure
        """
        try:
            molecule = mol_from_block(block_type, block_content, sanitize=True, remove_hs=False)
            writer = Chem.SDWriter(str(tempfile.mktemp(suffix=".sdf")))
            writer.write(molecule.m)
            writer.close()

            return molecule.molblock()
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to convert ligand block content to SDF: {str(e)}")
            return None

    @classmethod
    def fetch_smiles_from_pdb_api(cls, res_name: str) -> str:
        """
        Retrieves the SMILES string representation of a ligand from the PDB API.

        This class method queries the RCSB PDB REST API to fetch the stereochemical SMILES
        notation for a given ligand residue name. If the API request fails or the SMILES
        data is not found, appropriate errors are logged.

        Args:
            res_name (str): The residue name/identifier of the ligand to query.

        Returns:
            str: The stereochemical SMILES string of the ligand if found.
            None: If the API request fails or SMILES data is not available.

        Raises:
            ValueError: If the API request fails or SMILES data is not found for the given ligand.

        Example:
            >>> smiles = Ligand.fetch_smiles_from_pdb_api("ATP")
            >>> print(smiles)
            'NC1=C2N=CN(C(O)C3OC(COP(O)(=O)OP(O)(=O)OP(O)(O)=O)C(O)C3O)C2=NC=N1'
        """
        try:
            query_url = f"https://data.rcsb.org/rest/v1/core/chemcomp/{res_name.upper()}"
            response = requests.get(query_url)
            if response.status_code != 200:
                raise ValueError(f"Failed to retrieve data for ligand '{res_name}' from PDB API.")
            data = response.json()
            smiles = data.get("rcsb_chem_comp_descriptor", {}).get("smilesstereo")
            if not smiles:
                raise ValueError(f"SMILES not found for ligand '{res_name}'.")
            return smiles
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to fetch SMILES from PDB API: {str(e)}")

        return None

    @classmethod
    @jupyter_visualization
    def visualize_ligands_from_sdf(cls, file_path: str):
        """
        Visualize ligands from an SDF file.

        Args:
            file_path (str): The path to the SDF file.

        Raises:
            FileNotFoundError: If the file does not exist.
            ValueError: If the file cannot be parsed correctly.
        """
        try:
            viewer = MoleculeViewer(str(file_path), format="sdf")
            ligand_config = viewer.get_ligand_visualization_config()
            html = viewer.render_ligand(ligand_config=ligand_config)

            return html
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Visualization failed: {str(e)}")
            raise

    @classmethod
    @jupyter_visualization
    def visualize_ligands(cls, ligands: List["Ligand"]):
        """
        Visualize ligands.

        Args:
            ligands: List["Ligand"]: The list of ligands objects to visualize.

        Raises:
            FileNotFoundError: If the file does not exist.
            ValueError: If the file cannot be parsed correctly.
        """
        try:
            sdf_data = []
            current_file = f"{tempfile.mkstemp()[1]}.sdf"
            for ligand in ligands:
                ligand.write_to_file(output_format="sdf", output_path=current_file)

                with open(current_file, "r") as fd:
                    data = fd.read()

                sdf_data.append(data)

            sdf_data = "".join(sdf_data)
            viewer = MoleculeViewer(data=sdf_data, format="sdf")
            ligand_config = viewer.get_ligand_visualization_config()
            html = viewer.render_ligand(ligand_config=ligand_config)

            return html
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Visualization failed: {str(e)}")
            raise

    def _repr_html_(self) -> str:
        """
        Return the HTML representation of the object for Jupyter Notebook.

        Returns:
            str: The HTML content.
        """
        try:
            print(self.mol.m)
            return self.visualize()
        except Exception as e:
            DEFAULT_LOGGER.log_warning(f"Failed to generate HTML representation: {str(e)}")
            return self.__str__()

    def __str__(self) -> str:
        info_str = f"Name: {self.name}\nSMILES: {self.mol.smiles}\nHeavy Atoms: {self.hac}\n"
        if self.properties:
            info_str += "Properties:\n"
            for prop_name, prop_value in self.properties.items():
                info_str += f"  {prop_name}: {prop_value}\n"

        if self.xref_protein:
            info_str += f"Cross-reference Protein Chain ID: {self.xref_protein_chain_id}\n"
            info_str += f"Cross-reference Residue ID: {self.xref_residue_id}\n"
            info_str += f"Cross-reference Insertion Code: {self.xref_ins_code}\n"

        return f"Ligand:\n  {info_str}"

    def __repr__(self) -> str:
        return self.__str__()

    @staticmethod
    def get_directory() -> str:
        """
        Generates and ensures the existence of a directory for a protein.

        Args:

        Returns:
            str: The path to the protein's directory.
        """
        ligands_base_dir = Path(WORKING_DIR) / "ligands"
        ligands_base_dir.mkdir(parents=True, exist_ok=True)

        return str(ligands_base_dir)

    def admet_properties(self) -> str:
        """
        Predict ADMET properties for the ligand.

        Returns:
            str: A string containing the predicted ADMET properties.
        """
        try:
            props = predict_properties(smiles=self.mol.smiles)[0]
            for key, value in props.items():
                if key == "smiles":
                    continue
                self.set_property(key, value)

            return props
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to predict ADMET properties: {str(e)}")
            return "Failed to predict ADMET properties."

    def protonate(self, pH: float = 7.4, filter_percentage: float = 1):
        """
        Protonates the ligand molecule at a given pH value.

        This method attempts to generate a protonated version of the molecule using a pH-dependent
        protonation algorithm. If successful, it stores the protonated SMILES string and sets it
        as a property of the molecule.

        Args:
            pH (float, optional): The pH value at which to protonate the molecule. Defaults to 7.4.
            filter_percentage (float, optional): The filtering threshold for protonation states.
                Value between 0 and 1. Defaults to 1.

        Returns:
            self: Returns the ligand instance, allowing for method chaining.

        Raises:
            Exception: If protonation fails, the error is logged and the original instance is returned.

        Example:
            >>> ligand.protonate(pH=7.0, filter_percentage=0.8)
        """
        try:
            smiles = protonate(
                pH=pH,
                smiles=self.mol.smiles,
                filter_percentage=filter_percentage,
            )
            if smiles:
                self.protonated_smiles = smiles
                self.set_property("ProtonatedSMILES", smiles)
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to protonate the ligand molecule: {str(e)}")
            return self

    def update_coordinates(self, coords: np.ndarray):
        """
        Updates the 3D coordinates of the ligand molecule's conformer.

        Args:
            coords (np.ndarray): Array of new 3D coordinates for the ligand atoms.
                Must match the number of atoms in either the full molecule or molecule without hydrogens.

        Raises:
            ValueError: If the ligand molecule has no conformers to update.
            ValueError: If the number of coordinates doesn't match the number of atoms in the molecule
                (either with or without hydrogens).

        Notes:
            - The coordinates are updated in-place on the existing conformer
            - The input coordinates are converted to float64 type
            - A success message is logged after updating
        """
        if self.mol.m.GetNumConformers() == 0:
            raise ValueError("Ligand molecule has no conformers to update.")

        conformer = self.mol.m.GetConformer()
        mol_without_hs = Chem.RemoveHs(self.mol.m)

        conformer_no_hs = mol_without_hs.GetConformer()
        if coords.shape[0] != conformer.GetNumAtoms():
            if coords.shape[0] != conformer_no_hs.GetNumAtoms():
                raise ValueError("Number of ligand atoms does not match the conformer's atom count.")

        conformer.SetPositions(coords.astype(np.float64))
        DEFAULT_LOGGER.log_info("Ligand coordinates has been inplaced updated.")

    @classmethod
    def protonate_molecules(cls, ligands):
        """
        Protonates a list of ligands by adding hydrogens at physiological pH.
        This class method processes a list of ligands, either as SMILES strings or Ligand objects,
        and returns a list of protonated Ligand objects. It handles the protonation of each ligand
        while managing potential errors during SMILES parsing or protonation.
        Args:
            ligands (List[Union[str, Ligand]]): A list containing either SMILES strings or Ligand objects
                to be protonated.
        Returns:
            List[Ligand]: A list of successfully protonated Ligand objects. Failed ligands are excluded
                from the output list.
        Raises:
            None: Exceptions during processing individual ligands are caught and logged.
        Example:
            >>> smiles_list = ['CC(=O)O', 'CN1C=NC=C1']
            >>> protonated_ligands = Ligand.protonate_molecules(smiles_list)
        """
        mols = []

        for i in tqdm(range(0, len(ligands)), desc="Protonating Molecules"):
            ligand = ligands[i]
            if isinstance(ligand, str):
                try:
                    ligand = Ligand(smiles=ligand)
                except Exception as e:
                    DEFAULT_LOGGER.log_error(f"Failed to create Ligand from SMILES: {str(e)}")
                    continue
            try:
                if not ligand.protonated_smiles:
                    ligand.protonate()
            except Exception as e:
                DEFAULT_LOGGER.log_error(f"Failed to protonate the ligand molecule: {str(e)}")
                continue

            mols.append(ligand)
        return mols

Initialize a Ligand object.

This constructor creates a Ligand object from various input sources and validates the molecular structure.

Args

identifier : str, optional
Name or identifier of the molecule. Defaults to "".
file_path : str, optional
Path to input file containing molecule data. Defaults to "".
smiles : str, optional
SMILES string representation of molecule. Defaults to "".
block_type : str, optional
Type of molecular block content (e.g. "mol", "sdf"). Defaults to "".
block_content : str, optional
Content of molecular block. Defaults to "".
name : str, optional
Name for the molecule. Defaults to "".
seed : int, optional
Random seed for coordinate generation. Defaults to None.
xref_protein : str, optional
Cross-reference to protein. Defaults to "".
xref_ins_code : str, optional
Cross-reference insertion code. Defaults to "".
xref_residue_id : str, optional
Cross-reference residue ID. Defaults to "".
xref_protein_chain_id : str, optional
Cross-reference protein chain ID. Defaults to "".
save_to_file : bool, optional
Whether to save molecule to file. Defaults to False.
properties : dict, optional
Additional properties for the molecule. Defaults to None.

Raises

ValueError
If not exactly one input source is provided (identifier, file_path, smiles, or block_content).
ValueError
If block_type is not provided when initializing from block_content.
ValueError
If molecule creation fails.

Notes

  • Only one input source (identifier, file_path, smiles, or block_content) should be provided
  • Automatically generates coordinates if needed
  • Performs validation checks including heavy atom count
  • Can optionally save the molecule to file
  • Stores various properties including cross-references to protein structure

Static methods

def convert_to_sdf(block_content: str, block_type: str)

Converts molecular block content to SDF format.

This class method takes a molecular block content and its type, attempts to convert it to an RDKit molecule object, and returns the molecule in SDF molblock format.

Args

block_content : str
The string content of the molecular block to convert
block_type : str
The type of molecular block (e.g. 'MOL', 'SDF', etc.)

Returns

str
The converted molecule in SDF molblock format if successful
None
If conversion fails

Raises

Exception
Handles any exceptions during conversion and returns None after logging error

Examples

>>> sdf_block = LigandStructure.convert_to_sdf("molecular block content", "MOL")
>>> if sdf_block:
...     # Process the SDF block
... else:
...     # Handle conversion failure
def create_ligands_from_csv(file_path: str) ‑> List[Ligand]

Creates Ligand instances from a CSV file containing SMILES strings and optional additional properties.

This class method reads a CSV file and creates Ligand objects from each row. The CSV file must contain a 'smiles' column (case-insensitive). Additional columns are treated as properties of the ligand.

Args

file_path : str
Path to the CSV file containing ligand data.

Returns

List[Ligand]
A list of created Ligand instances.

Raises

FileNotFoundError
If the specified file does not exist.
ValueError
If the CSV file does not contain a 'smiles' column.
pd.errors.EmptyDataError
If the CSV file is empty.
pd.errors.ParserError
If there are issues parsing the CSV file.

Notes

  • Rows with missing or invalid SMILES strings are skipped with a warning.
  • All column names are normalized (stripped and converted to lowercase) for comparison.
  • Non-SMILES columns are added as properties to the Ligand instances.
  • Any errors during processing of individual rows are logged but don't stop the overall process.

Example CSV format: smiles,name,molecular_weight CC(=O)O,acetic acid,60.052 CCO,ethanol,46.068

def create_ligands_from_file(file_path: str, file_type: str) ‑> List[Ligand]

Creates a list of Ligand objects from a file.

Args

file_path : str
Path to the input file containing ligand data.
file_type : str
Type of the input file. Supported types are 'sdf' and 'csv'.

Returns

List[Ligand]
A list of Ligand objects created from the file data.

Raises

ValueError
If the file_type is not supported ('sdf' or 'csv').

Examples

>>> ligands = Ligand.create_ligands_from_file("compounds.sdf", "sdf")
>>> ligands = Ligand.create_ligands_from_file("compounds.csv", "csv")
def create_ligands_from_sdf(file_path: str) ‑> List[Ligand]

Creates a list of Ligand objects from an SDF file.

This class method reads molecules from an SDF file and converts each valid molecule into a Ligand instance. It handles potential parsing errors and logs relevant information.

Args

file_path : str
Path to the SDF file containing molecular structures.

Returns

List[Ligand]
A list of Ligand objects created from the SDF file. Returns an empty list if no valid molecules are found or in case of errors.

Raises

FileNotFoundError
If the specified file path does not exist.

Example

>>> ligands = Ligand.create_ligands_from_sdf("molecules.sdf")
>>> print(len(ligands))  # Number of successfully parsed molecules

Notes

  • Molecules that fail to parse will be skipped and logged as warnings
  • Properties from the SDF file are preserved and stored in the Ligand objects
  • Progress and errors are tracked through the DEFAULT_LOGGER
def fetch_smiles_from_pdb_api(res_name: str) ‑> str

Retrieves the SMILES string representation of a ligand from the PDB API.

This class method queries the RCSB PDB REST API to fetch the stereochemical SMILES notation for a given ligand residue name. If the API request fails or the SMILES data is not found, appropriate errors are logged.

Args

res_name : str
The residue name/identifier of the ligand to query.

Returns

str
The stereochemical SMILES string of the ligand if found.
None
If the API request fails or SMILES data is not available.

Raises

ValueError
If the API request fails or SMILES data is not found for the given ligand.

Example

>>> smiles = Ligand.fetch_smiles_from_pdb_api("ATP")
>>> print(smiles)
'NC1=C2N=CN(C(O)C3OC(COP(O)(=O)OP(O)(=O)OP(O)(O)=O)C(O)C3O)C2=NC=N1'
def get_directory() ‑> str
Expand source code
@staticmethod
def get_directory() -> str:
    """
    Generates and ensures the existence of a directory for a protein.

    Args:

    Returns:
        str: The path to the protein's directory.
    """
    ligands_base_dir = Path(WORKING_DIR) / "ligands"
    ligands_base_dir.mkdir(parents=True, exist_ok=True)

    return str(ligands_base_dir)

Generates and ensures the existence of a directory for a protein.

Args:

Returns

str
The path to the protein's directory.
def protonate_molecules(ligands)

Protonates a list of ligands by adding hydrogens at physiological pH. This class method processes a list of ligands, either as SMILES strings or Ligand objects, and returns a list of protonated Ligand objects. It handles the protonation of each ligand while managing potential errors during SMILES parsing or protonation.

Args

ligands : List[Union[str, Ligand]]
A list containing either SMILES strings or Ligand objects to be protonated.

Returns

List[Ligand]
A list of successfully protonated Ligand objects. Failed ligands are excluded from the output list.

Raises

None
Exceptions during processing individual ligands are caught and logged.

Example

>>> smiles_list = ['CC(=O)O', 'CN1C=NC=C1']
>>> protonated_ligands = Ligand.protonate_molecules(smiles_list)
def visualize_ligands(*args, **kwargs)
def visualize_ligands_from_sdf(*args, **kwargs)

Instance variables

prop atom_types
Expand source code
@property
def atom_types(self):
    """
    Returns a list of unique atom types present in the ligand molecule.

    Returns:
        list: A list of strings representing unique atomic species (e.g. ['C', 'H', 'O', 'N'])
    """
    return self.mol.species()

Returns a list of unique atom types present in the ligand molecule.

Returns

list
A list of strings representing unique atomic species (e.g. ['C', 'H', 'O', 'N'])
prop coordinates
Expand source code
@property
def coordinates(self):
    """
    Returns a numpy array of ligand coordinates.

    Returns:
        np.ndarray: A numpy array of float32 containing the 3D coordinates of all atoms in the ligand.
    """
    return np.array(self.mol.coords(), dtype=np.float32)

Returns a numpy array of ligand coordinates.

Returns

np.ndarray
A numpy array of float32 containing the 3D coordinates of all atoms in the ligand.

Methods

def admet_properties(self) ‑> str
Expand source code
def admet_properties(self) -> str:
    """
    Predict ADMET properties for the ligand.

    Returns:
        str: A string containing the predicted ADMET properties.
    """
    try:
        props = predict_properties(smiles=self.mol.smiles)[0]
        for key, value in props.items():
            if key == "smiles":
                continue
            self.set_property(key, value)

        return props
    except Exception as e:
        DEFAULT_LOGGER.log_error(f"Failed to predict ADMET properties: {str(e)}")
        return "Failed to predict ADMET properties."

Predict ADMET properties for the ligand.

Returns

str
A string containing the predicted ADMET properties.
def draw(self)
Expand source code
def draw(self):
    """
    Draws a visual representation of the ligand molecule.

    Returns:
        Image: The 2D structural representation of the ligand molecule.
    """
    return self.mol.draw()

Draws a visual representation of the ligand molecule.

Returns

Image
The 2D structural representation of the ligand molecule.
def get_center(self) ‑> List[float] | None
Expand source code
def get_center(self) -> Optional[List[float]]:
    """
    Calculate the center coordinates of the ligand.

    Returns:
        Optional[List[float]]: The center coordinates as a list of floats [x, y, z]
                              or None if coordinates are not available.

    Example:
        >>> ligand.get_center()
        [1.234, -2.345, 3.456]
    """
    if self.coordinates is None:
        DEFAULT_LOGGER.log_warning("Coordinates are not available for this ligand.")
        return None
    center = self.coordinates.mean(axis=0)
    DEFAULT_LOGGER.log_info(f"Calculated center coordinates: {center.tolist()}")
    return [float(x) for x in center.tolist()]

Calculate the center coordinates of the ligand.

Returns

Optional[List[float]]
The center coordinates as a list of floats [x, y, z] or None if coordinates are not available.

Example

>>> ligand.get_center()
[1.234, -2.345, 3.456]
def get_property(self, prop_name: str)
Expand source code
def get_property(self, prop_name: str):
    """
    Retrieve a property value from the ligand object.

    This method attempts to get the property value first from the properties dictionary,
    and if not found there, tries to retrieve it from the molecule object.

    Args:
        prop_name (str): The name of the property to retrieve.

    Returns:
        Any: The value of the property if found, None otherwise.

    Notes:
        The method first checks the internal properties dictionary. If the property
        is not found there, it checks the molecule object using RDKit's HasProp/GetProp.
        If found in the molecule object, the value is also cached in the properties
        dictionary for future use.
    """
    value = self.properties.get(prop_name)
    if value is not None:
        DEFAULT_LOGGER.log_info(f"Retrieved property '{prop_name}' from properties dictionary: '{value}'.")
        return value

    if self.mol.m.HasProp(prop_name):
        value = self.mol.m.GetProp(prop_name)

        self.properties[prop_name] = value
        DEFAULT_LOGGER.log_info(f"Retrieved property '{prop_name}' from molecule: '{value}'.")
        return value

    DEFAULT_LOGGER.log_info(f"Property '{prop_name}' not found.")
    return None

Retrieve a property value from the ligand object.

This method attempts to get the property value first from the properties dictionary, and if not found there, tries to retrieve it from the molecule object.

Args

prop_name : str
The name of the property to retrieve.

Returns

Any
The value of the property if found, None otherwise.

Notes

The method first checks the internal properties dictionary. If the property is not found there, it checks the molecule object using RDKit's HasProp/GetProp. If found in the molecule object, the value is also cached in the properties dictionary for future use.

def protonate(self, pH: float = 7.4, filter_percentage: float = 1)
Expand source code
def protonate(self, pH: float = 7.4, filter_percentage: float = 1):
    """
    Protonates the ligand molecule at a given pH value.

    This method attempts to generate a protonated version of the molecule using a pH-dependent
    protonation algorithm. If successful, it stores the protonated SMILES string and sets it
    as a property of the molecule.

    Args:
        pH (float, optional): The pH value at which to protonate the molecule. Defaults to 7.4.
        filter_percentage (float, optional): The filtering threshold for protonation states.
            Value between 0 and 1. Defaults to 1.

    Returns:
        self: Returns the ligand instance, allowing for method chaining.

    Raises:
        Exception: If protonation fails, the error is logged and the original instance is returned.

    Example:
        >>> ligand.protonate(pH=7.0, filter_percentage=0.8)
    """
    try:
        smiles = protonate(
            pH=pH,
            smiles=self.mol.smiles,
            filter_percentage=filter_percentage,
        )
        if smiles:
            self.protonated_smiles = smiles
            self.set_property("ProtonatedSMILES", smiles)
    except Exception as e:
        DEFAULT_LOGGER.log_error(f"Failed to protonate the ligand molecule: {str(e)}")
        return self

Protonates the ligand molecule at a given pH value.

This method attempts to generate a protonated version of the molecule using a pH-dependent protonation algorithm. If successful, it stores the protonated SMILES string and sets it as a property of the molecule.

Args

pH : float, optional
The pH value at which to protonate the molecule. Defaults to 7.4.
filter_percentage : float, optional
The filtering threshold for protonation states. Value between 0 and 1. Defaults to 1.

Returns

self
Returns the ligand instance, allowing for method chaining.

Raises

Exception
If protonation fails, the error is logged and the original instance is returned.

Example

>>> ligand.protonate(pH=7.0, filter_percentage=0.8)
def set_property(self, prop_name: str, prop_value)
Expand source code
def set_property(self, prop_name: str, prop_value):
    """
    Set a property for both the ligand properties dictionary and the underlying RDKit molecule.

    Args:
        prop_name (str): Name of the property to set.
        prop_value: Value to set for the property. Will be converted to string for RDKit molecule.

    Note:
        The property is set both in the properties dict and RDKit molecule.
        The property value is logged at INFO level.
    """
    self.properties[prop_name] = prop_value
    self.mol.m.SetProp(prop_name, str(prop_value))
    DEFAULT_LOGGER.log_info(f"Set property '{prop_name}' to '{prop_value}'.")

Set a property for both the ligand properties dictionary and the underlying RDKit molecule.

Args

prop_name : str
Name of the property to set.
prop_value
Value to set for the property. Will be converted to string for RDKit molecule.

Note

The property is set both in the properties dict and RDKit molecule. The property value is logged at INFO level.

def update_coordinates(self, coords: numpy.ndarray)
Expand source code
def update_coordinates(self, coords: np.ndarray):
    """
    Updates the 3D coordinates of the ligand molecule's conformer.

    Args:
        coords (np.ndarray): Array of new 3D coordinates for the ligand atoms.
            Must match the number of atoms in either the full molecule or molecule without hydrogens.

    Raises:
        ValueError: If the ligand molecule has no conformers to update.
        ValueError: If the number of coordinates doesn't match the number of atoms in the molecule
            (either with or without hydrogens).

    Notes:
        - The coordinates are updated in-place on the existing conformer
        - The input coordinates are converted to float64 type
        - A success message is logged after updating
    """
    if self.mol.m.GetNumConformers() == 0:
        raise ValueError("Ligand molecule has no conformers to update.")

    conformer = self.mol.m.GetConformer()
    mol_without_hs = Chem.RemoveHs(self.mol.m)

    conformer_no_hs = mol_without_hs.GetConformer()
    if coords.shape[0] != conformer.GetNumAtoms():
        if coords.shape[0] != conformer_no_hs.GetNumAtoms():
            raise ValueError("Number of ligand atoms does not match the conformer's atom count.")

    conformer.SetPositions(coords.astype(np.float64))
    DEFAULT_LOGGER.log_info("Ligand coordinates has been inplaced updated.")

Updates the 3D coordinates of the ligand molecule's conformer.

Args

coords : np.ndarray
Array of new 3D coordinates for the ligand atoms. Must match the number of atoms in either the full molecule or molecule without hydrogens.

Raises

ValueError
If the ligand molecule has no conformers to update.
ValueError
If the number of coordinates doesn't match the number of atoms in the molecule (either with or without hydrogens).

Notes

  • The coordinates are updated in-place on the existing conformer
  • The input coordinates are converted to float64 type
  • A success message is logged after updating
def visualize(*args, **kwargs)
Expand source code
def wrapper(*args, **kwargs):
    html_visualization = func(*args, **kwargs)
    return JupyterViewer.visualize(html_visualization)
def write_to_file(self, output_path: str = '', output_format: str = '')
Expand source code
def write_to_file(self, output_path: str = "", output_format: str = ""):
    """
    Write the ligand structure to a file in the specified format.

    This method writes the molecular structure and its properties to a file in the specified format.
    Supported formats are PDB, MOL, and SDF. Properties are included in the output file according
    to the format-specific conventions.

    Args:
        output_path (str, optional): The path where the file should be written. If not provided,
            the file will be written in the ligand's directory with the name and format extension.
        output_format (str, optional): The desired output format ('.pdb', '.mol', or '.sdf').
            If not provided, it will be inferred from the output_path extension.

    Raises:
        ValueError: If neither output_path nor output_format is provided, or if an unsupported
            file extension is specified.
        Exception: If any error occurs during the file writing process.

    Note:
        - If the output format doesn't match the file extension, a warning will be logged and
          the specified output format will be used.
        - Properties are written in the following format:
            - PDB: As REMARK lines
            - MOL: As property blocks after the molecule
            - SDF: As SD fields
    """
    try:
        if output_format == "" and output_path == "":
            raise ValueError("Please provide either output_path or output_format.")

        if not output_path:
            output_path = str(Path(self.get_directory()) / f"{self.name}.{output_format}")

        path = Path(output_path)
        extension = path.suffix.lower()
        if not output_format:
            output_format = extension

        if output_format and output_format[0] != ".":
            output_format = f".{output_format}"

        if extension and extension != output_format:
            DEFAULT_LOGGER.log_warning(
                "Output format does not match the file extension. Writing to provided output format."
            )
            extension = output_format

        if self.name:
            self.set_property("_Name", self.name)
        if self.mol.smiles:
            self.set_property("_SMILES", self.mol.smiles)
        if self.properties:
            for prop_name, prop_value in self.properties.items():
                self.set_property(prop_name, str(prop_value))

        if extension == ".pdb":
            pdb_block = Chem.MolToPDBBlock(self.mol.m)
            remark_lines = ""
            for prop_name, prop_value in self.mol.m.GetPropsAsDict().items():
                remark_lines += f"REMARK   {prop_name}: {prop_value}\n"
            pdb_block_with_remarks = remark_lines + pdb_block
            path.write_text(pdb_block_with_remarks)
        elif extension == ".sdf":
            writer = Chem.SDWriter(str(path))
            writer.SetKekulize(False)
            writer.write(self.mol.m)
            writer.close()
        elif extension == ".mol":
            mol_block = Chem.MolToMolBlock(self.mol.m)
            prop_lines = ""
            for prop_name, prop_value in self.mol.m.GetPropsAsDict().items():
                prop_lines += f">  <{prop_name}>\n{prop_value}\n\n"
            mol_block_with_props = mol_block + "\n" + prop_lines
            path.write_text(mol_block_with_props)
        else:
            raise ValueError(
                f"Unsupported file extension '{extension}'. Supported extensions are '.pdb', '.mol', '.sdf'."
            )

        DEFAULT_LOGGER.log_info(f"Ligand structure written to {output_path}.")
    except Exception as e:
        DEFAULT_LOGGER.log_error(f"Failed to write structure to file {output_path}: {str(e)}")
        raise

Write the ligand structure to a file in the specified format.

This method writes the molecular structure and its properties to a file in the specified format. Supported formats are PDB, MOL, and SDF. Properties are included in the output file according to the format-specific conventions.

Args

output_path : str, optional
The path where the file should be written. If not provided, the file will be written in the ligand's directory with the name and format extension.
output_format : str, optional
The desired output format ('.pdb', '.mol', or '.sdf'). If not provided, it will be inferred from the output_path extension.

Raises

ValueError
If neither output_path nor output_format is provided, or if an unsupported file extension is specified.
Exception
If any error occurs during the file writing process.

Note

  • If the output format doesn't match the file extension, a warning will be logged and the specified output format will be used.
  • Properties are written in the following format:
    • PDB: As REMARK lines
    • MOL: As property blocks after the molecule
    • SDF: As SD fields
class Logger (log_level, log_dest)
Expand source code
class Logger:
    def __init__(self, log_level, log_dest):
        self.level = self.str_to_loglevel(log_level)
        self.file = open(log_dest, "a") if isinstance(log_dest, str) else log_dest if log_dest else sys.stdout
        self.lock = threading.Lock()
        self.depth = 0

    def log(self, level, message, depth=None):
        if self.level.value <= level.value:
            if depth is None:
                depth = self.depth

            indent = "  " * depth  # Using two spaces per depth level for indentation
            log_entry = f"{indent}: {message}"
            with self.lock:
                print(log_entry, file=self.file)
                self.file.flush()

    def log_info(self, message, depth=None):
        self.log(LogLevel.INFO, message, depth)

    def log_warning(self, message, depth=None):
        self.log(LogLevel.WARNING, message, depth)

    def log_error(self, message, depth=None):
        self.log(LogLevel.ERROR, message, depth)

    def log_debug(self, message, depth=None):
        self.log(LogLevel.DEBUG, message, depth)

    def str_to_loglevel(self, level_str):
        try:
            return LogLevel[level_str]
        except KeyError:
            raise ValueError("Invalid log level: {}".format(level_str))

    def close(self):
        self.file.close()

    def add_depth(self):
        self.depth += 1

    def sub_depth(self):
        if self.depth > 0:
            self.depth -= 1

    def get_current_date(self):
        cur_date = datetime.datetime.now()
        return cur_date.strftime("%Y-%m-%d %H:%M:%S")

    def get_state_info(self):
        d = self.get_current_date()
        return "date={}".format(d)

Methods

def add_depth(self)
Expand source code
def add_depth(self):
    self.depth += 1
def close(self)
Expand source code
def close(self):
    self.file.close()
def get_current_date(self)
Expand source code
def get_current_date(self):
    cur_date = datetime.datetime.now()
    return cur_date.strftime("%Y-%m-%d %H:%M:%S")
def get_state_info(self)
Expand source code
def get_state_info(self):
    d = self.get_current_date()
    return "date={}".format(d)
def log(self, level, message, depth=None)
Expand source code
def log(self, level, message, depth=None):
    if self.level.value <= level.value:
        if depth is None:
            depth = self.depth

        indent = "  " * depth  # Using two spaces per depth level for indentation
        log_entry = f"{indent}: {message}"
        with self.lock:
            print(log_entry, file=self.file)
            self.file.flush()
def log_debug(self, message, depth=None)
Expand source code
def log_debug(self, message, depth=None):
    self.log(LogLevel.DEBUG, message, depth)
def log_error(self, message, depth=None)
Expand source code
def log_error(self, message, depth=None):
    self.log(LogLevel.ERROR, message, depth)
def log_info(self, message, depth=None)
Expand source code
def log_info(self, message, depth=None):
    self.log(LogLevel.INFO, message, depth)
def log_warning(self, message, depth=None)
Expand source code
def log_warning(self, message, depth=None):
    self.log(LogLevel.WARNING, message, depth)
def str_to_loglevel(self, level_str)
Expand source code
def str_to_loglevel(self, level_str):
    try:
        return LogLevel[level_str]
    except KeyError:
        raise ValueError("Invalid log level: {}".format(level_str))
def sub_depth(self)
Expand source code
def sub_depth(self):
    if self.depth > 0:
        self.depth -= 1
class MolPropsReport (results)
Expand source code
class MolPropsReport:
    """
    A class for handling and displaying molecular property calculation results.

    This class processes molecular property calculation results and can convert them
    into a formatted pandas DataFrame for display, particularly useful in Jupyter notebooks.

    Args:
        results (list): A list of dictionaries containing molecular property calculation results.
                       Each dictionary must contain a 'smiles' key and additional property keys.

    Attributes:
        results (list): The stored results from molecular property calculations.

    Methods:
        _to_dataframe(): Converts the results into a formatted pandas DataFrame.
        _repr_html_(): Returns HTML representation of the data for display in Jupyter notebooks.

    Examples:
        >>> results = [{"smiles": "CC", "property1": 0.5, "property2": 1.0}]
        >>> report = MolPropsReport(results)
        >>> df = report._to_dataframe()
    """

    def __init__(self, results):
        self.results = results

    def _to_dataframe(self):
        """
        Converts the results data into a pandas DataFrame.

        Extracts SMILES strings and other result properties into separate lists,
        then combines them into a DataFrame with formatted numerical precision.

        Returns:
            pandas.DataFrame: A styled DataFrame containing SMILES strings and associated data
            with numerical values formatted to 3 decimal places.
        """
        smiles = []

        data = {k: [] for k in self.results[0].keys() if k != "smiles"}

        for result in self.results:
            smiles.append(result["smiles"])

            for k in data:
                data[k].append(result.get(k))

        data = {
            "SMILES": smiles,
            **data,
        }

        df = pd.DataFrame(data).style.set_properties().format(precision=3)
        return df

    def _repr_html_(self):
        df = self._to_dataframe()
        return df._repr_html_()

A class for handling and displaying molecular property calculation results.

This class processes molecular property calculation results and can convert them into a formatted pandas DataFrame for display, particularly useful in Jupyter notebooks.

Args

results : list
A list of dictionaries containing molecular property calculation results. Each dictionary must contain a 'smiles' key and additional property keys.

Attributes

results : list
The stored results from molecular property calculations.

Methods

to_dataframe(): Converts the results into a formatted pandas DataFrame. _repr_html(): Returns HTML representation of the data for display in Jupyter notebooks.

Examples

>>> results = [{"smiles": "CC", "property1": 0.5, "property2": 1.0}]
>>> report = MolPropsReport(results)
>>> df = report._to_dataframe()
class PainsReport (results)
Expand source code
class PainsReport:
    """
    A class for generating and displaying PAINS (Pan-Assay Interference Compounds) analysis reports.

    Args:
        results (list): A list of dictionaries containing PAINS analysis results. Each dictionary
            should contain:
            - smiles (str): SMILES string representation of the molecule
            - PAINS (list or None): List of PAINS pattern SMARTS strings that match the molecule
                                   or None if no matches found

    Methods:
        get_html_of_molecule(result): Generates HTML img tag with highlighted PAINS matches
        _to_dataframe(): Converts results to pandas DataFrame
        _repr_html_(): Returns HTML representation for Jupyter display

    Examples:
        >>> results = [{'smiles': 'CC(=O)Oc1ccccc1C(=O)O', 'PAINS': ['[O,S]-[CH2]-[CH2]-[O,S]']}]
        >>> report = PainsReport(results)
        >>> print(report)
        DockingReport:
          Number of DockingResults: 1

    Notes:
        Requires RDKit for molecular operations and visualization.
        Implements Jupyter notebook display protocol via _repr_html_.
    """

    def __init__(self, results):
        self.results = results

    def get_html_of_molecule(self, result):
        molecule = Chem.MolFromSmiles(result["smiles"])
        all_matches = []
        if result["PAINS"] is not None:
            for smarts in result["PAINS"]:
                atom_matches = molecule.GetSubstructMatches(Chem.MolFromSmarts(smarts))
                all_matches.extend(atom_matches[0])

        Draw.DrawingOptions.atomHighlightsAreCircles = True
        Draw.DrawingOptions.atomHighlightColors = {i: (1, 0, 0) for i in set(all_matches)}

        img = Draw.MolToImage(molecule, size=(200, 100), highlightAtoms=all_matches)

        buffer = BytesIO()
        img.save(buffer, format="PNG")
        img_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
        html = '<img src="data:image/png;base64,{0}">'.format(img_str)
        return html

    def _to_dataframe(self):
        """
        Convert the results into a pandas DataFrame.

        This method processes the stored results and converts them into a structured DataFrame
        containing SMILES strings, HTML representations of molecules, and PAINS pattern information.

        Returns:
            pd.DataFrame: A DataFrame with the following columns:
                - SMILES: List of SMILES strings for each molecule
                - Molecule Image: HTML representations of the molecules
                - SMARTS patterns of PAINS: List of PAINS patterns found in each molecule
        """
        all_smiles_list = []
        all_molecule_html_list = []
        PAINS_pattern_list = []

        for _, result in enumerate(self.results):

            all_smiles_list.append(result["smiles"])
            all_molecule_html_list.append(self.get_html_of_molecule(result))
            PAINS_pattern_list.append(result["PAINS"])

        return pd.DataFrame.from_dict(
            {
                "SMILES": all_smiles_list,
                "Molecule Image": all_molecule_html_list,
                "SMARTS patterns of PAINS": PAINS_pattern_list,
            }
        )

    def _repr_html_(self):
        df = self._to_dataframe()
        return df.to_html(escape=False)

    def __str__(self):
        return f"DockingReport:\n  Number of DockingResults: {len(self.results)}"

    def __repr__(self):
        return self.__str__()

A class for generating and displaying PAINS (Pan-Assay Interference Compounds) analysis reports.

Args

results : list
A list of dictionaries containing PAINS analysis results. Each dictionary should contain: - smiles (str): SMILES string representation of the molecule - PAINS (list or None): List of PAINS pattern SMARTS strings that match the molecule or None if no matches found

Methods

get_html_of_molecule(result): Generates HTML img tag with highlighted PAINS matches to_dataframe(): Converts results to pandas DataFrame _repr_html(): Returns HTML representation for Jupyter display

Examples

>>> results = [{'smiles': 'CC(=O)Oc1ccccc1C(=O)O', 'PAINS': ['[O,S]-[CH2]-[CH2]-[O,S]']}]
>>> report = PainsReport(results)
>>> print(report)
DockingReport:
  Number of DockingResults: 1

Notes

Requires RDKit for molecular operations and visualization. Implements Jupyter notebook display protocol via repr_html.

Methods

def get_html_of_molecule(self, result)
Expand source code
def get_html_of_molecule(self, result):
    molecule = Chem.MolFromSmiles(result["smiles"])
    all_matches = []
    if result["PAINS"] is not None:
        for smarts in result["PAINS"]:
            atom_matches = molecule.GetSubstructMatches(Chem.MolFromSmarts(smarts))
            all_matches.extend(atom_matches[0])

    Draw.DrawingOptions.atomHighlightsAreCircles = True
    Draw.DrawingOptions.atomHighlightColors = {i: (1, 0, 0) for i in set(all_matches)}

    img = Draw.MolToImage(molecule, size=(200, 100), highlightAtoms=all_matches)

    buffer = BytesIO()
    img.save(buffer, format="PNG")
    img_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
    html = '<img src="data:image/png;base64,{0}">'.format(img_str)
    return html
class Pocket (file_path: str = '',
block_type: str = '',
block_content: str = '',
color='red',
name=None,
index: int | None = 0,
props: dict | None = None)
Expand source code
class Pocket:
    """
    A class representing a molecular pocket structure with various properties and methods for manipulation.
    This class handles loading, visualization, and analysis of molecular pocket structures,
    primarily dealing with PDB files. It supports initialization from either a file path
    or direct content block.

    Attributes:
        color (str): Color representation of the pocket (default: "red").
        index (int): Index identifier for the pocket (default: 0).
        props (dict): Dictionary containing pocket properties.
        name (str): Name identifier for the pocket.
        file_path (Path): Path to the pocket file.
        structure (AtomArray): Biotite structure object representing the pocket.
        coordinates (ndarray): Numpy array of atomic coordinates.
        block_type (str): Type of structure block (e.g., "pdb").
        block_content (str): Content of the structure block.

    Args:
        file_path (str, optional): Path to the pocket structure file.
        block_type (str, optional): Type of structure block.
        block_content (str, optional): Content of the structure block.
        color (str, optional): Color for visualization (default: "red").
        name (str, optional): Name identifier for the pocket.
        index (int, optional): Index identifier (default: 0).
        props (dict, optional): Dictionary of pocket properties.

    Raises:
        ValueError: If the structure cannot be loaded.
        ValueError: If neither or both file_path and block_content are provided.
        ValueError: If the block_type is not supported (only "pdb" is supported).
        FileNotFoundError: If the specified file_path doesn't exist.

    Examples:
        # Create from file
        pocket = Pocket(file_path="path/to/pocket.pdb", name="Pocket1")

        # Create from content
        pocket = Pocket(block_content="...", block_type="pdb", name="Pocket2")

        # Visualize pocket
        pocket.visualize()

        # Get pocket center
        center = pocket.get_center()
    """

    def __init__(
        self,
        file_path: str = "",
        block_type: str = "",
        block_content: str = "",
        color="red",
        name=None,
        index: Optional[int] = 0,
        props: Optional[dict] = None,
    ):
        self.color = color
        self.index = index
        self.props = props

        self.name = name
        self.file_path = None
        self.structure = None
        self.coordinates = None

        file_path_obj = Path(file_path).absolute() if file_path else None
        extension = file_path_obj.suffix.lower() if file_path_obj else ""
        if not block_type and extension:
            block_type = extension.lstrip(".")  # Remove the leading dot
        self.block_type = block_type.lower()
        self.block_content = block_content

        sources_provided = sum(bool(x) for x in [file_path, block_content])
        if sources_provided != 1:
            raise ValueError("Please provide exactly one of file_path or block_content.")

        from_block = False
        try:
            if file_path:
                self.file_path = Path(file_path).absolute()
                if not self.file_path.exists():
                    raise FileNotFoundError(f"The file {self.file_path} does not exist.")

                if not self.block_type:
                    self.block_type = self.file_path.suffix.lstrip(".").lower()

                self.block_content = self.file_path.read_text()

                pocket_file_dir = self.get_directory()
                if str(pocket_file_dir) != str(self.file_path.parent):
                    try:
                        destination = Path(pocket_file_dir) / self.file_path.name
                        shutil.copy2(self.file_path, destination)
                        self.file_path = destination
                    except Exception as e:
                        DEFAULT_LOGGER.log_error(f"Failed to copy file to destination: {str(e)}")
                        raise
            elif block_content:
                self.block_content = block_content
                if not self.block_type:
                    raise ValueError("block_type must be provided when initializing with block_content.")
                from_block = True
                pocket_file_dir = self.get_directory()

            if self.block_content:
                if self.block_type not in ["pdb"]:
                    raise ValueError(f"Only pdb file formats are supported (given {self.block_type})")
                self.structure = self.load_structure_from_block(self.block_content, self.block_type)

            if self.structure is None:
                raise ValueError("Structure could not be loaded.")

            # Type checking for AtomArrayStack
            if isinstance(self.structure, AtomArrayStack):
                self.structure = self.structure[0]

            DEFAULT_LOGGER.log_info(
                f"Loaded structure from {self.file_path if self.file_path else 'block content'}. Selected structure index: {0}"
            )

            if self.name is None:
                if self.file_path:
                    self.name = self.file_path.stem
                else:
                    self.name = "Unknown_Pocket"
                    directory = Path(pocket_file_dir)
                    num = len(list(directory.glob(f"{self.name}*")))
                    self.name = f"{self.name}_{num + 1}"

            self.coordinates = self.structure.coord

            if from_block:
                directory = Path(pocket_file_dir)
                self.file_path = directory / f"{self.name}.{self.block_type}"
                self.write_to_file(self.file_path)

        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to initialize pocket: {str(e)}")
            raise

    def load_structure_from_block(self, block_content: str, block_type: str):
        """
        Load molecular structure from a text block content.

        This method creates a Structure object from a text block containing structural data
        in a supported format.

        Args:
            block_content (str): Text content containing the structure data
            block_type (str): Format of the structure data (currently only "pdb" supported)

        Returns:
            Structure: A Structure object representing the molecular structure

        Raises:
            ValueError: If block_type is not supported
        """
        if block_type == "pdb":
            pdb_file = PDBFile.read(io.StringIO(block_content))
            structure = pdb_file.get_structure()
        else:
            raise ValueError(f"Unsupported block type: {block_type}")
        return structure

    @staticmethod
    def load_structure(structure_file_path: str):
        """
        Load a protein structure from a PDB file.

        Args:
            structure_file_path (str): Path to the PDB file containing the protein structure.

        Returns:
            Structure: The loaded protein structure object.

        Raises:
            FileNotFoundError: If the specified PDB file does not exist.
            ValueError: If the PDB file is invalid or cannot be parsed.
        """

        structure_file = PDBFile.read(structure_file_path)
        structure = structure_file.get_structure()
        return structure

    def write_to_file(self, output_path: str, output_format: str = "pdb"):
        """
        Write the current structure to a file in the specified format.
        This method writes the current structure to a file, with support for different output formats.
        If the output format is not PDB, it first writes to a temporary PDB file and then converts to
        the desired format.

        Args:
            output_path (str): Path where the structure file should be written.
            output_format (str, optional): Format of the output file. Defaults to "pdb".

        Raises:
            Exception: If writing to file fails, the error is logged via DEFAULT_LOGGER.

        Example:
            >>> pocket.write_to_file("structure.pdb")
            >>> pocket.write_to_file("structure.mol2", output_format="mol2")
        """

        def write_to_pdb_file(structure, output_path):
            pdb_file = PDBFile()
            pdb_file.set_structure(structure)
            pdb_file.write(output_path)

        try:
            path = Path(output_path)
            if not path.parent.exists():
                path.parent.mkdir(parents=True, exist_ok=True)

            if path.suffix.lower() != ".pdb":
                with tempfile.NamedTemporaryFile(delete=True) as temp:
                    write_to_pdb_file(self.structure, temp.name)
                    convert_file("pdb", temp.name, output_format, output_path)
            else:
                write_to_pdb_file(self.structure, output_path)
            DEFAULT_LOGGER.log_info(f"Current structure written to {output_path}.")

        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to write structure to file {output_path}: {str(e)}")

    @jupyter_visualization
    def visualize(self):
        """
        Visualizes the protein pocket in 3D using ProteinViewer.

        This method creates a 3D visualization of the pocket structure using the ProteinViewer class.
        It configures the visualization with appropriate colors and displays the pocket name and properties.

        Returns:
            The rendered 3D visualization of the protein pocket

        Notes:
            - Uses the pocket's file path and name for visualization
            - Applies color coding based on the pocket's index
            - Falls back to index 0 if the pocket index exceeds available colors
            - Displays pocket name and properties in the visualization legend
        """
        pocket_paths = [str(self.file_path)]
        pocket_names = ["Name: " + self.name + " | " + self.pocket_props()]

        viewer = ProteinViewer("", format="pdb")

        pocket_config = viewer.get_pocket_visualization_config()
        # Ensure self.index is within the bounds of surface_colors
        if self.index >= len(pocket_config.surface_colors):
            # Log a warning or adjust index appropriately
            DEFAULT_LOGGER.log_warning(f"Index {self.index} is out of bounds for surface_colors. Resetting to 0.")
            self.index = 0  # Default to the first color if out of bounds

        pocket_config.surface_colors = [pocket_config.surface_colors[self.index]]
        print(
            "\n|\n".join(
                colored("■", pocket_config.surface_colors[0]) + " " + pocket_name for pocket_name in pocket_names
            )
        )

        return viewer.render_protein_with_pockets(pocket_paths=pocket_paths, pocket_config=pocket_config)

    def pocket_props(self):
        """
        Formats the properties of a protein pocket into a single string line.

        Returns:
            str: A formatted string containing pocket properties in the format:
                 'Volume: {value}ų | Drugability score: {value}'
                 Returns empty string if no properties are available.
        """
        properties_line = ""
        if self.props:
            properties_line = (
                f"Volume: {self.props.get('volume', 'N/A')}ų | "
                f"Drugability score: {self.props.get('drugability_score', 'N/A')}"
            )
        return properties_line

    def __repr__(self):
        properties_line = ""
        if self.props:
            properties_line = (
                f"  Volume: {self.props.get('volume', 'N/A')}ų, "
                f"Total SASA: {self.props.get('total_SASA', 'N/A')} "
                f"Polar SASA: {self.props.get('polar_SASA', 'N/A')} "
                f"Polar/Apolar SASA ratio: {self.props.get('polar_apolar_SASA_ratio', 'N/A')} "
                f"Hydrophobicity: {self.props.get('hydrophobicity', 'N/A')} "
                f"Polarity: {self.props.get('polarity', 'N/A')} "
                f"Drugability score: {self.props.get('drugability_score', 'N/A')}"
            )

        return (
            f"Pocket:\n  Name: {self.name}\n{properties_line}  Block type: {self.block_type}\n"
            "Available Fields: {block_type, block_content, file_path, name, coordinates}"
        )

    def __str__(self):
        properties_line = ""
        if self.props:
            properties_line = (
                f"  Volume: {self.props.get('volume', 'N/A')}ų, "
                f"Total SASA: {self.props.get('total_SASA', 'N/A')}, "
                f"Polar SASA: {self.props.get('polar_SASA', 'N/A')}, "
                f"Polar/Apolar SASA ratio: {self.props.get('polar_apolar_SASA_ratio', 'N/A')}, "
                f"Hydrophobicity: {self.props.get('hydrophobicity', 'N/A')}, "
                f"Polarity: {self.props.get('polarity', 'N/A')}, "
                f"Drugability score: {self.props.get('drugability_score', 'N/A')}"
            )

        return (
            f"Pocket:\n  Name: {self.name}\n{properties_line}  Block type: {self.block_type}\n"
            "Available Fields: {block_type, block_content, file_path, name, coordinates}"
        )

    def get_center(self) -> Optional[List[float]]:
        """
        Calculate and return the center coordinates of the pocket.

        This method computes the arithmetic mean of all coordinates in the pocket
        to determine its center point.

        Returns:
            Optional[List[float]]: A list containing the x, y, z coordinates of the pocket's center.
                                  Returns None if coordinates are not available.
        """
        if self.coordinates is None:
            DEFAULT_LOGGER.log_warning("Coordinates are not available for this Pocket.")
            return None
        center = self.coordinates.mean(axis=0)
        DEFAULT_LOGGER.log_info(f"Calculated center coordinates: {center.tolist()}")
        return [float(x) for x in center.tolist()]

    @staticmethod
    def get_directory() -> str:
        """
        Returns the base directory path for storing pocket-related data.

        This method creates (if not exists) and returns the path to a 'pockets' directory
        under the working directory. The directory is created with parent directories if needed.

        Returns:
            str: Absolute path to the pockets base directory as a string
        """
        pockets_base_dir = Path(WORKING_DIR) / "pockets"
        pockets_base_dir.mkdir(parents=True, exist_ok=True)

        return str(pockets_base_dir)

    def update_coordinates(self, coords: np.ndarray):
        """
        Updates the coordinates of the pocket structure.

        Args:
            coords (np.ndarray): New coordinates to update the pocket structure with.
                                Should be a numpy array containing the coordinate data.

        Updates:
            - self.structure.coord: Updates the coordinates in the structure object
            - self.coordinates: Updates the local coordinates attribute

        Note:
            This method performs an in-place update of the coordinates and logs the action.
        """
        self.structure.coord = coords
        self.coordinates = coords
        DEFAULT_LOGGER.log_info("Pocket coordinates has been inplaced updated.")

A class representing a molecular pocket structure with various properties and methods for manipulation. This class handles loading, visualization, and analysis of molecular pocket structures, primarily dealing with PDB files. It supports initialization from either a file path or direct content block.

Attributes

color : str
Color representation of the pocket (default: "red").
index : int
Index identifier for the pocket (default: 0).
props : dict
Dictionary containing pocket properties.
name : str
Name identifier for the pocket.
file_path : Path
Path to the pocket file.
structure : AtomArray
Biotite structure object representing the pocket.
coordinates : ndarray
Numpy array of atomic coordinates.
block_type : str
Type of structure block (e.g., "pdb").
block_content : str
Content of the structure block.

Args

file_path : str, optional
Path to the pocket structure file.
block_type : str, optional
Type of structure block.
block_content : str, optional
Content of the structure block.
color : str, optional
Color for visualization (default: "red").
name : str, optional
Name identifier for the pocket.
index : int, optional
Index identifier (default: 0).
props : dict, optional
Dictionary of pocket properties.

Raises

ValueError
If the structure cannot be loaded.
ValueError
If neither or both file_path and block_content are provided.
ValueError
If the block_type is not supported (only "pdb" is supported).
FileNotFoundError
If the specified file_path doesn't exist.

Examples

Create from file

pocket = Pocket(file_path="path/to/pocket.pdb", name="Pocket1")

Create from content

pocket = Pocket(block_content="…", block_type="pdb", name="Pocket2")

Visualize pocket

pocket.visualize()

Get pocket center

center = pocket.get_center()

Static methods

def get_directory() ‑> str
Expand source code
@staticmethod
def get_directory() -> str:
    """
    Returns the base directory path for storing pocket-related data.

    This method creates (if not exists) and returns the path to a 'pockets' directory
    under the working directory. The directory is created with parent directories if needed.

    Returns:
        str: Absolute path to the pockets base directory as a string
    """
    pockets_base_dir = Path(WORKING_DIR) / "pockets"
    pockets_base_dir.mkdir(parents=True, exist_ok=True)

    return str(pockets_base_dir)

Returns the base directory path for storing pocket-related data.

This method creates (if not exists) and returns the path to a 'pockets' directory under the working directory. The directory is created with parent directories if needed.

Returns

str
Absolute path to the pockets base directory as a string
def load_structure(structure_file_path: str)
Expand source code
@staticmethod
def load_structure(structure_file_path: str):
    """
    Load a protein structure from a PDB file.

    Args:
        structure_file_path (str): Path to the PDB file containing the protein structure.

    Returns:
        Structure: The loaded protein structure object.

    Raises:
        FileNotFoundError: If the specified PDB file does not exist.
        ValueError: If the PDB file is invalid or cannot be parsed.
    """

    structure_file = PDBFile.read(structure_file_path)
    structure = structure_file.get_structure()
    return structure

Load a protein structure from a PDB file.

Args

structure_file_path : str
Path to the PDB file containing the protein structure.

Returns

Structure
The loaded protein structure object.

Raises

FileNotFoundError
If the specified PDB file does not exist.
ValueError
If the PDB file is invalid or cannot be parsed.

Methods

def get_center(self) ‑> List[float] | None
Expand source code
def get_center(self) -> Optional[List[float]]:
    """
    Calculate and return the center coordinates of the pocket.

    This method computes the arithmetic mean of all coordinates in the pocket
    to determine its center point.

    Returns:
        Optional[List[float]]: A list containing the x, y, z coordinates of the pocket's center.
                              Returns None if coordinates are not available.
    """
    if self.coordinates is None:
        DEFAULT_LOGGER.log_warning("Coordinates are not available for this Pocket.")
        return None
    center = self.coordinates.mean(axis=0)
    DEFAULT_LOGGER.log_info(f"Calculated center coordinates: {center.tolist()}")
    return [float(x) for x in center.tolist()]

Calculate and return the center coordinates of the pocket.

This method computes the arithmetic mean of all coordinates in the pocket to determine its center point.

Returns

Optional[List[float]]
A list containing the x, y, z coordinates of the pocket's center. Returns None if coordinates are not available.
def load_structure_from_block(self, block_content: str, block_type: str)
Expand source code
def load_structure_from_block(self, block_content: str, block_type: str):
    """
    Load molecular structure from a text block content.

    This method creates a Structure object from a text block containing structural data
    in a supported format.

    Args:
        block_content (str): Text content containing the structure data
        block_type (str): Format of the structure data (currently only "pdb" supported)

    Returns:
        Structure: A Structure object representing the molecular structure

    Raises:
        ValueError: If block_type is not supported
    """
    if block_type == "pdb":
        pdb_file = PDBFile.read(io.StringIO(block_content))
        structure = pdb_file.get_structure()
    else:
        raise ValueError(f"Unsupported block type: {block_type}")
    return structure

Load molecular structure from a text block content.

This method creates a Structure object from a text block containing structural data in a supported format.

Args

block_content : str
Text content containing the structure data
block_type : str
Format of the structure data (currently only "pdb" supported)

Returns

Structure
A Structure object representing the molecular structure

Raises

ValueError
If block_type is not supported
def pocket_props(self)
Expand source code
def pocket_props(self):
    """
    Formats the properties of a protein pocket into a single string line.

    Returns:
        str: A formatted string containing pocket properties in the format:
             'Volume: {value}ų | Drugability score: {value}'
             Returns empty string if no properties are available.
    """
    properties_line = ""
    if self.props:
        properties_line = (
            f"Volume: {self.props.get('volume', 'N/A')}ų | "
            f"Drugability score: {self.props.get('drugability_score', 'N/A')}"
        )
    return properties_line

Formats the properties of a protein pocket into a single string line.

Returns

str
A formatted string containing pocket properties in the format: 'Volume: {value}ų | Drugability score: {value}' Returns empty string if no properties are available.
def update_coordinates(self, coords: numpy.ndarray)
Expand source code
def update_coordinates(self, coords: np.ndarray):
    """
    Updates the coordinates of the pocket structure.

    Args:
        coords (np.ndarray): New coordinates to update the pocket structure with.
                            Should be a numpy array containing the coordinate data.

    Updates:
        - self.structure.coord: Updates the coordinates in the structure object
        - self.coordinates: Updates the local coordinates attribute

    Note:
        This method performs an in-place update of the coordinates and logs the action.
    """
    self.structure.coord = coords
    self.coordinates = coords
    DEFAULT_LOGGER.log_info("Pocket coordinates has been inplaced updated.")

Updates the coordinates of the pocket structure.

Args

coords : np.ndarray
New coordinates to update the pocket structure with. Should be a numpy array containing the coordinate data.

Updates

  • self.structure.coord: Updates the coordinates in the structure object
  • self.coordinates: Updates the local coordinates attribute

Note

This method performs an in-place update of the coordinates and logs the action.

def visualize(*args, **kwargs)
Expand source code
def wrapper(*args, **kwargs):
    html_visualization = func(*args, **kwargs)
    return JupyterViewer.visualize(html_visualization)
def write_to_file(self, output_path: str, output_format: str = 'pdb')
Expand source code
def write_to_file(self, output_path: str, output_format: str = "pdb"):
    """
    Write the current structure to a file in the specified format.
    This method writes the current structure to a file, with support for different output formats.
    If the output format is not PDB, it first writes to a temporary PDB file and then converts to
    the desired format.

    Args:
        output_path (str): Path where the structure file should be written.
        output_format (str, optional): Format of the output file. Defaults to "pdb".

    Raises:
        Exception: If writing to file fails, the error is logged via DEFAULT_LOGGER.

    Example:
        >>> pocket.write_to_file("structure.pdb")
        >>> pocket.write_to_file("structure.mol2", output_format="mol2")
    """

    def write_to_pdb_file(structure, output_path):
        pdb_file = PDBFile()
        pdb_file.set_structure(structure)
        pdb_file.write(output_path)

    try:
        path = Path(output_path)
        if not path.parent.exists():
            path.parent.mkdir(parents=True, exist_ok=True)

        if path.suffix.lower() != ".pdb":
            with tempfile.NamedTemporaryFile(delete=True) as temp:
                write_to_pdb_file(self.structure, temp.name)
                convert_file("pdb", temp.name, output_format, output_path)
        else:
            write_to_pdb_file(self.structure, output_path)
        DEFAULT_LOGGER.log_info(f"Current structure written to {output_path}.")

    except Exception as e:
        DEFAULT_LOGGER.log_error(f"Failed to write structure to file {output_path}: {str(e)}")

Write the current structure to a file in the specified format. This method writes the current structure to a file, with support for different output formats. If the output format is not PDB, it first writes to a temporary PDB file and then converts to the desired format.

Args

output_path : str
Path where the structure file should be written.
output_format : str, optional
Format of the output file. Defaults to "pdb".

Raises

Exception
If writing to file fails, the error is logged via DEFAULT_LOGGER.

Example

>>> pocket.write_to_file("structure.pdb")
>>> pocket.write_to_file("structure.mol2", output_format="mol2")
class PocketData (pocket: List | Pocket | Ligand,
xref_protein: Protein,
box_size: List | None = None,
fit_box: bool | None = None,
padding: int | None = 2)
Expand source code
class PocketData:
    def __init__(
        self,
        pocket: List | Pocket | Ligand,
        xref_protein: Protein,
        box_size: Optional[List] = None,
        fit_box: Optional[bool] = None,
        padding: Optional[int] = 2,
    ):
        """
        Initialize a PocketBox object.
        This class handles the creation and manipulation of pocket boxes for protein structures.
        It supports both direct coordinate input and structure-based pocket definitions.

        Args:
            pocket (List | Pocket | Ligand): Either a list of 3D coordinates [x,y,z] or a Pocket/Ligand object
            xref_protein (Protein): Reference protein structure
            box_size (List, optional): Custom box dimensions [x,y,z] in Angstroms. Defaults to None
            fit_box (bool, optional): Whether to fit box to pocket dimensions. Defaults to None
            padding (int, optional): Padding around pocket when fitting box. Defaults to 2

        Raises:
            ValueError: If both box_size and fit_box are specified
            ValueError: If pocket is a list but doesn't contain 3 numeric elements
            ValueError: If attempting to fit box to coordinate list
            ValueError: If pocket is invalid type

        Note:
            - If neither box_size nor fit_box specified, defaults to 24Å cubic box
            - When using Pocket/Ligand objects, PCA alignment is automatically performed
        """
        if box_size and fit_box:
            raise ValueError("Cannot specify both box_size and [fit_box and padding]")

        if not box_size and not fit_box:
            DEFAULT_LOGGER.log_warning("No box size or fit box specified. Defaulting to 24Å box.")
            box_size = [24, 24, 24]

        self.__pocket = deepcopy(pocket)
        if isinstance(self.__pocket, list):
            if len(self.__pocket) != 3:
                raise ValueError("If pocket is a list, it must have 3 elements (x, y, z)")

            for i in self.__pocket:
                if not isinstance(i, (int, float, np.int32, np.int64, np.float32, np.float64)):
                    raise ValueError("All elements in pocket must be integers or floats")

            self.__aligner = None

            if fit_box:
                raise ValueError("Cannot fit box to list of coordinates")

        elif isinstance(pocket, (Pocket, Ligand)):
            self.__aligner = StructureAligner()
            self.__aligner.calculate_pca(self.__pocket.coordinates)

            pocket_transformed_coords = self.transform(self.__pocket.coordinates)
            self.__pocket.update_coordinates(pocket_transformed_coords)
        else:
            raise ValueError("Invalid pocket type")

        self.__padding = padding
        self.__fit_box = fit_box
        self.__xref_protein = xref_protein
        self.__box_center = self._get_center(self.__pocket)
        self.__box_min_coords, self.__box_max_coords, self.__box_size = self.calculate_box_params(box_size)

    @classmethod
    def create_from_residues(
        cls,
        xref_protein: Protein,
        residue_ids: List[str],
        box_size: Optional[List] = None,
        fit_box: Optional[bool] = None,
        padding: Optional[int] = 2,
    ):
        """
        Creates a pocket data instance from a list of residue IDs.

        Args:
            xref_protein (Protein): The reference protein structure.
            residue_ids (List[str]): List of residue IDs that define the pocket.
            box_size (Optional[List], optional): Custom box dimensions. Defaults to None.
            fit_box (Optional[bool], optional): Whether to fit the box to the pocket. Defaults to None.
            padding (Optional[int], optional): Padding size around the pocket. Defaults to 2.

        Returns:
            PocketData: A new pocket data instance centered on the specified residues.

        Raises:
            None explicitly, but may log warnings if issues are found with residue selection.
        """
        center, warning, _ = xref_protein.get_center_by_residues(residue_ids)
        if warning:
            DEFAULT_LOGGER.log_warning(warning)

        return cls(pocket=center, xref_protein=xref_protein, box_size=box_size, fit_box=fit_box, padding=padding)

    @property
    def pocket(self):
        """
        Gets the pocket data.

        Returns:
            The pocket data stored in this instance.
        """
        return self.__pocket

    @property
    def xref_protein(self):
        """
        Get the cross-reference protein identifier.

        Returns:
            str: The cross-reference protein identifier associated with this pocket.
        """
        return self.__xref_protein

    @property
    def padding(self):
        """
        Gets the padding value used for this pocket data.

        Returns:
            float: The padding value used for spatial calculations around the pocket
        """
        return self.__padding

    @property
    def fit_box(self):
        """
        Gets the fit_box value used for this pocket data.

        Returns:
            bool: Whether the box is fitted to the pocket dimensions
        """
        return self.__fit_box

    @property
    def box_size(self):
        """
        Returns the size of the box.

        Returns:
            float: The size of the box representing the pocket's bounding box dimensions.
        """
        return self.__box_size

    @property
    def box_center(self):
        """
        Get the center coordinates of the binding site box.

        Returns:
            numpy.ndarray: The 3D coordinates [x, y, z] representing the center of the binding site box.
        """
        return self.__box_center

    @property
    def aligner(self):
        """
        Get the aligner instance associated with this pocket data.

        Returns:
            object: The aligner object used for structural alignment.
        """
        return self.__aligner

    @property
    def box_min_coords(self):
        """
        Returns the minimum coordinates of the box.

        Returns:
            list: A list containing the minimum x, y, z coordinates of the box.
        """
        return self.__box_min_coords

    @property
    def box_max_coords(self):
        """
        Gets the maximum coordinates of the box defining the pocket.

        Returns:
            tuple: A 3D coordinate tuple (x, y, z) representing the maximum bounds of the pocket box.
        """
        return self.__box_max_coords

    def _get_center(self, pocket: str | Pocket | List[float]):
        """
        Get the center coordinates of a pocket or ligand.

        Args:
            pocket (Union[str, Pocket, List[float]]): Input pocket data. Can be:
                - List[float]: Direct coordinates [x, y, z]
                - Pocket: Pocket object with get_center() method
                - Ligand: Ligand object with get_center() method

        Returns:
            List[float]: Center coordinates as [x, y, z]

        Raises:
            ValueError: If input pocket type is not supported
        """
        if isinstance(pocket, list):
            pocket = [float(x) for x in pocket]
        elif isinstance(pocket, (Pocket, Ligand)):
            pocket = pocket.get_center()
        else:
            raise ValueError("Invalid pocket type.")

        return pocket

    def transform(self, coords: np.ndarray) -> np.ndarray:
        """
        Transform coordinates using the aligner if available.

        Args:
            coords (np.ndarray): Input coordinates to be transformed.

        Returns:
            np.ndarray: Transformed coordinates if aligner exists, original coordinates otherwise.
        """
        if self.aligner is None:
            return coords

        return self.aligner.align_structure(coords)

    def inverse_transform(self, coords: np.ndarray) -> np.ndarray:
        """
        Inverse transform coordinates from the PCA-aligned space.

        Args:
            coords: Coordinates to inverse transform.

        Returns:
            Inverse transformed coordinates.
        """
        if self.aligner is None:
            return coords

        return self.aligner.restore_structure(coords)

    def match_protein(self, protein: Protein) -> bool:
        """
        Check if the protein matches the xref protein.

        Args:
            protein: Protein to check.

        Returns:
            True if the proteins match, False otherwise.
        """
        if protein != self.xref_protein:
            raise ValueError("Provided protein does not match xref protein.")

    def calculate_box_params(self, box_size: Optional[List] = None):
        if self.fit_box:
            result = create_bounding_box(self.pocket, padding=self.padding, around_ligand=True)
            box_min_coords, box_max_coords = list(result["min_coords"]), list(result["max_coords"])
        else:
            box_min_coords, box_max_coords = calculate_box_min_max(box_center=self.box_center, box_dimensions=box_size)

        if self.fit_box:
            box_size = calculate_box_dimensions(box_min_coords, box_max_coords)

        return box_min_coords, box_max_coords, box_size

    def from_xyz(self):
        """
        Determines if the pocket data is in XYZ format.

        Returns:
            bool: True if the pocket data is a list (XYZ format), False if it's a Ligand or Pocket object

        Raises:
            ValueError: If the pocket type is invalid
        """
        if isinstance(self.pocket, (Ligand, Pocket)):
            return False
        elif isinstance(self.pocket, list):
            return True
        else:
            raise ValueError("Invalid pocket type")

    @jupyter_visualization
    def show_box(self, protein: Protein = None, raise_for_protein_mismatch: bool = True) -> str:
        """
        Generates an HTML visualization of the protein structure with a bounding box representation of the pocket.
        This method creates a visual representation using MolStar viewer, showing either:
        1. The protein structure with a ligand and its bounding box (for non-XYZ derived pockets)
        2. The protein structure with just the bounding box (for XYZ derived pockets)

        Args:
            protein (Protein, optional): The protein structure to visualize. If None, uses the cross-referenced protein.
                Defaults to None.
            raise_for_protein_mismatch (bool, optional): Whether to raise an error if the protein doesn't match
                the pocket's reference protein. Defaults to True.

        Returns:
            str: HTML string containing the MolStar viewer visualization.

        Raises:
            ProteinMismatchError: If raise_for_protein_mismatch is True and the provided protein
                doesn't match the pocket's reference protein.
        """
        if protein is None:
            protein = self.xref_protein

        if raise_for_protein_mismatch:
            self.match_protein(protein)

        protein = deepcopy(protein)
        aligned_protein_coords = self.transform(protein.structure.coord)
        protein.update_coordinates(aligned_protein_coords)

        if not self.from_xyz():
            with tempfile.TemporaryDirectory() as temp_dir:
                tmp_protein_file = Path(temp_dir) / "protein.pdb"
                tmp_structure_file = Path(temp_dir) / "structure.sdf"

                protein.write_to_file(str(tmp_protein_file))
                self.pocket.write_to_file(str(tmp_structure_file), output_format="sdf")

                html = DockingMolstarViewer().render_ligand_with_bounding_box(
                    protein_data=str(tmp_protein_file),
                    protein_format="pdb",
                    ligand_data=str(tmp_structure_file),
                    ligand_format="sdf",
                    box={"min": self.box_min_coords, "max": self.box_max_coords},
                )
        else:
            protein_file_path = str(protein.file_path)
            protein_format = getattr(protein, "block_type", "pdb")
            html = DockingMolstarViewer().render_bounding_box(
                protein_data=protein_file_path,
                protein_format=protein_format,
                box_center=self.box_center,
                box_size=self.box_size,
            )

        return html

Initialize a PocketBox object. This class handles the creation and manipulation of pocket boxes for protein structures. It supports both direct coordinate input and structure-based pocket definitions.

Args

pocket : List | Pocket | Ligand
Either a list of 3D coordinates [x,y,z] or a Pocket/Ligand object
xref_protein : Protein
Reference protein structure
box_size : List, optional
Custom box dimensions [x,y,z] in Angstroms. Defaults to None
fit_box : bool, optional
Whether to fit box to pocket dimensions. Defaults to None
padding : int, optional
Padding around pocket when fitting box. Defaults to 2

Raises

ValueError
If both box_size and fit_box are specified
ValueError
If pocket is a list but doesn't contain 3 numeric elements
ValueError
If attempting to fit box to coordinate list
ValueError
If pocket is invalid type

Note

  • If neither box_size nor fit_box specified, defaults to 24Å cubic box
  • When using Pocket/Ligand objects, PCA alignment is automatically performed

Static methods

def create_from_residues(xref_protein: Protein,
residue_ids: List[str],
box_size: List | None = None,
fit_box: bool | None = None,
padding: int | None = 2)

Creates a pocket data instance from a list of residue IDs.

Args

xref_protein : Protein
The reference protein structure.
residue_ids : List[str]
List of residue IDs that define the pocket.
box_size : Optional[List], optional
Custom box dimensions. Defaults to None.
fit_box : Optional[bool], optional
Whether to fit the box to the pocket. Defaults to None.
padding : Optional[int], optional
Padding size around the pocket. Defaults to 2.

Returns

PocketData
A new pocket data instance centered on the specified residues.

Raises

None explicitly, but may log warnings if issues are found with residue selection.

Instance variables

prop aligner
Expand source code
@property
def aligner(self):
    """
    Get the aligner instance associated with this pocket data.

    Returns:
        object: The aligner object used for structural alignment.
    """
    return self.__aligner

Get the aligner instance associated with this pocket data.

Returns

object
The aligner object used for structural alignment.
prop box_center
Expand source code
@property
def box_center(self):
    """
    Get the center coordinates of the binding site box.

    Returns:
        numpy.ndarray: The 3D coordinates [x, y, z] representing the center of the binding site box.
    """
    return self.__box_center

Get the center coordinates of the binding site box.

Returns

numpy.ndarray
The 3D coordinates [x, y, z] representing the center of the binding site box.
prop box_max_coords
Expand source code
@property
def box_max_coords(self):
    """
    Gets the maximum coordinates of the box defining the pocket.

    Returns:
        tuple: A 3D coordinate tuple (x, y, z) representing the maximum bounds of the pocket box.
    """
    return self.__box_max_coords

Gets the maximum coordinates of the box defining the pocket.

Returns

tuple
A 3D coordinate tuple (x, y, z) representing the maximum bounds of the pocket box.
prop box_min_coords
Expand source code
@property
def box_min_coords(self):
    """
    Returns the minimum coordinates of the box.

    Returns:
        list: A list containing the minimum x, y, z coordinates of the box.
    """
    return self.__box_min_coords

Returns the minimum coordinates of the box.

Returns

list
A list containing the minimum x, y, z coordinates of the box.
prop box_size
Expand source code
@property
def box_size(self):
    """
    Returns the size of the box.

    Returns:
        float: The size of the box representing the pocket's bounding box dimensions.
    """
    return self.__box_size

Returns the size of the box.

Returns

float
The size of the box representing the pocket's bounding box dimensions.
prop fit_box
Expand source code
@property
def fit_box(self):
    """
    Gets the fit_box value used for this pocket data.

    Returns:
        bool: Whether the box is fitted to the pocket dimensions
    """
    return self.__fit_box

Gets the fit_box value used for this pocket data.

Returns

bool
Whether the box is fitted to the pocket dimensions
prop padding
Expand source code
@property
def padding(self):
    """
    Gets the padding value used for this pocket data.

    Returns:
        float: The padding value used for spatial calculations around the pocket
    """
    return self.__padding

Gets the padding value used for this pocket data.

Returns

float
The padding value used for spatial calculations around the pocket
prop pocket
Expand source code
@property
def pocket(self):
    """
    Gets the pocket data.

    Returns:
        The pocket data stored in this instance.
    """
    return self.__pocket

Gets the pocket data.

Returns

The pocket data stored in this instance.

prop xref_protein
Expand source code
@property
def xref_protein(self):
    """
    Get the cross-reference protein identifier.

    Returns:
        str: The cross-reference protein identifier associated with this pocket.
    """
    return self.__xref_protein

Get the cross-reference protein identifier.

Returns

str
The cross-reference protein identifier associated with this pocket.

Methods

def calculate_box_params(self, box_size: List | None = None)
Expand source code
def calculate_box_params(self, box_size: Optional[List] = None):
    if self.fit_box:
        result = create_bounding_box(self.pocket, padding=self.padding, around_ligand=True)
        box_min_coords, box_max_coords = list(result["min_coords"]), list(result["max_coords"])
    else:
        box_min_coords, box_max_coords = calculate_box_min_max(box_center=self.box_center, box_dimensions=box_size)

    if self.fit_box:
        box_size = calculate_box_dimensions(box_min_coords, box_max_coords)

    return box_min_coords, box_max_coords, box_size
def from_xyz(self)
Expand source code
def from_xyz(self):
    """
    Determines if the pocket data is in XYZ format.

    Returns:
        bool: True if the pocket data is a list (XYZ format), False if it's a Ligand or Pocket object

    Raises:
        ValueError: If the pocket type is invalid
    """
    if isinstance(self.pocket, (Ligand, Pocket)):
        return False
    elif isinstance(self.pocket, list):
        return True
    else:
        raise ValueError("Invalid pocket type")

Determines if the pocket data is in XYZ format.

Returns

bool
True if the pocket data is a list (XYZ format), False if it's a Ligand or Pocket object

Raises

ValueError
If the pocket type is invalid
def inverse_transform(self, coords: numpy.ndarray) ‑> numpy.ndarray
Expand source code
def inverse_transform(self, coords: np.ndarray) -> np.ndarray:
    """
    Inverse transform coordinates from the PCA-aligned space.

    Args:
        coords: Coordinates to inverse transform.

    Returns:
        Inverse transformed coordinates.
    """
    if self.aligner is None:
        return coords

    return self.aligner.restore_structure(coords)

Inverse transform coordinates from the PCA-aligned space.

Args

coords
Coordinates to inverse transform.

Returns

Inverse transformed coordinates.

def match_protein(self,
protein: Protein) ‑> bool
Expand source code
def match_protein(self, protein: Protein) -> bool:
    """
    Check if the protein matches the xref protein.

    Args:
        protein: Protein to check.

    Returns:
        True if the proteins match, False otherwise.
    """
    if protein != self.xref_protein:
        raise ValueError("Provided protein does not match xref protein.")

Check if the protein matches the xref protein.

Args

protein
Protein to check.

Returns

True if the proteins match, False otherwise.

def show_box(*args, **kwargs)
Expand source code
def wrapper(*args, **kwargs):
    html_visualization = func(*args, **kwargs)
    return JupyterViewer.visualize(html_visualization)
def transform(self, coords: numpy.ndarray) ‑> numpy.ndarray
Expand source code
def transform(self, coords: np.ndarray) -> np.ndarray:
    """
    Transform coordinates using the aligner if available.

    Args:
        coords (np.ndarray): Input coordinates to be transformed.

    Returns:
        np.ndarray: Transformed coordinates if aligner exists, original coordinates otherwise.
    """
    if self.aligner is None:
        return coords

    return self.aligner.align_structure(coords)

Transform coordinates using the aligner if available.

Args

coords : np.ndarray
Input coordinates to be transformed.

Returns

np.ndarray
Transformed coordinates if aligner exists, original coordinates otherwise.
class PocketFinderReport (protein, csv_file_path='')
Expand source code
class PocketFinderReport:
    """
    PocketFinderReport class for managing protein pocket analysis results.

    A class to handle collection and reporting of protein pocket properties including drugability scores,
    volumes, surface areas and other physicochemical properties.

    Attributes:
        protein: The protein object associated with this report
        file_path (str): Path to save the CSV report file
        pockets (list): List of pocket objects containing analysis results

    Methods:
        add_pocket(pocket): Add a pocket object to the report
        _to_dataframe(): Convert pocket data to pandas DataFrame
        _repr_html_(): Generate HTML representation of the report
        save_props(): Save pocket properties to CSV file

    Example:
        report = PocketFinderReport(protein_obj, "output.csv")
        report.add_pocket(pocket_obj)
        report.save_props()
    """
    def __init__(self, protein, csv_file_path=""):
        self.protein = protein
        self.file_path = csv_file_path
        self.pockets = []

    def add_pocket(self, pocket):
        """
        Add a pocket to the collection of pockets.

        Args:
            pocket: A pocket object to be added to the pockets list.
        """
        self.pockets.append(pocket)

    def _to_dataframe(self):
        data = []
        for idx, pocket in enumerate(self.pockets):
            props = pocket.props
            if props:
                data.append(
                    {
                        "Pocket ID": idx + 1,
                        "Color": pocket.color,
                        "Drugability Score": props.get("drugability_score", 0),
                        "Volume": props.get("volume", 0),
                        "Total SASA": props.get("total_SASA", 0),
                        "Polar SASA": props.get("polar_SASA", 0),
                        "Polar/Apolar SASA Ratio": props.get("polar_apolar_SASA_ratio", 0),
                        "Hydrophobicity": props.get("hydrophobicity", 0),
                        "Polarity": props.get("polarity", 0),
                    }
                )

        df = pd.DataFrame(data)
        # Sort by Ranking Score descending
        df = df.sort_values(by="Drugability Score", ascending=False).reset_index(drop=True)
        return df

    def _repr_html_(self):
        df = self._to_dataframe()
        return df.style.format(precision=3)._repr_html_()

    def save_props(self):
        """
        Saves the properties of the report to a CSV file.

        This method converts the internal data structure to a pandas DataFrame and saves it
        to the file path specified in self.file_path attribute. The DataFrame is saved
        without the index column.

        Returns:
            None
        """
        df = self._to_dataframe()
        df.to_csv(self.file_path, index=False)

PocketFinderReport class for managing protein pocket analysis results.

A class to handle collection and reporting of protein pocket properties including drugability scores, volumes, surface areas and other physicochemical properties.

Attributes

protein
The protein object associated with this report
file_path : str
Path to save the CSV report file
pockets : list
List of pocket objects containing analysis results

Methods

add_pocket(pocket): Add a pocket object to the report to_dataframe(): Convert pocket data to pandas DataFrame _repr_html(): Generate HTML representation of the report save_props(): Save pocket properties to CSV file

Example

report = PocketFinderReport(protein_obj, "output.csv") report.add_pocket(pocket_obj) report.save_props()

Methods

def add_pocket(self, pocket)
Expand source code
def add_pocket(self, pocket):
    """
    Add a pocket to the collection of pockets.

    Args:
        pocket: A pocket object to be added to the pockets list.
    """
    self.pockets.append(pocket)

Add a pocket to the collection of pockets.

Args

pocket
A pocket object to be added to the pockets list.
def save_props(self)
Expand source code
def save_props(self):
    """
    Saves the properties of the report to a CSV file.

    This method converts the internal data structure to a pandas DataFrame and saves it
    to the file path specified in self.file_path attribute. The DataFrame is saved
    without the index column.

    Returns:
        None
    """
    df = self._to_dataframe()
    df.to_csv(self.file_path, index=False)

Saves the properties of the report to a CSV file.

This method converts the internal data structure to a pandas DataFrame and saves it to the file path specified in self.file_path attribute. The DataFrame is saved without the index column.

Returns

None

class Protein (pdb_id: str = '',
file_path: str = '',
struct_ind: int = 0,
block_type: str = '',
block_content: str = '')
Expand source code
class Protein:
    def __init__(
        self, pdb_id: str = "", file_path: str = "", struct_ind: int = 0, block_type: str = "", block_content: str = ""
    ):
        """
        Initialize a Protein object from various input sources.

        This constructor can create a Protein object from a PDB ID, file path, or direct content block.
        It handles structure loading, file management, and basic protein information setup.

        Args:
            pdb_id (str, optional): PDB identifier to download and load protein structure. Defaults to "".
            file_path (str, optional): Path to a local protein structure file. Defaults to "".
            struct_ind (int, optional): Index of the structure to select if multiple structures exist. Defaults to 0.
            block_type (str, optional): File format type (e.g., "pdb", "pdbqt"). Required if using block_content. Defaults to "".
            block_content (str, optional): Direct string content of a protein structure file. Defaults to "".

        Raises:
            ValueError: If not exactly one source (pdb_id, file_path, or block_content) is provided.
            ValueError: If block_type is not provided when using block_content.
            ValueError: If file format is not supported (only pdb/pdbqt are supported).
            ValueError: If structure cannot be loaded.
            FileNotFoundError: If the specified file_path does not exist.

        Attributes:
            pdb_id (str): PDB identifier of the protein.
            file_path (Path): Absolute path to the protein structure file.
            struct_ind (int): Index of the selected structure.
            name (str): Name of the protein structure.
            structure: Loaded protein structure object.
            atom_types: Types of atoms in the structure.
            info (dict): Additional protein information.
            block_type (str): Type of structure file format.
            block_content (str): Content of the structure file.
        """
        self.pdb_id = None
        self.file_path = None
        self.struct_ind = struct_ind
        self.name = None
        self.structure = None
        self.atom_types = None
        self.info = None

        file_path_obj = Path(file_path) if file_path else None
        extension = file_path_obj.suffix.lower() if file_path_obj else ""
        if not block_type and extension:
            block_type = extension.lstrip(".")  # Remove the leading dot
        self.block_type = block_type.lower()
        self.block_content = block_content

        sources_provided = sum(bool(x) for x in [pdb_id, file_path, block_content])
        if sources_provided != 1:
            raise ValueError("Please provide exactly one of pdb_id, file_path, or block_content.")

        from_block = False
        try:
            if pdb_id:
                self.file_path = Path(self.download_protein_by_pdb_id(pdb_id)).absolute()
                if not self.block_type:
                    self.block_type = self.file_path.suffix.lstrip(".").lower()

                self.info = get_protein_info_dict(pdb_id)
                self.block_content = self.file_path.read_text()

            elif file_path:
                self.file_path = Path(file_path).absolute()
                if not self.file_path.exists():
                    raise FileNotFoundError(f"The file {self.file_path} does not exist.")

                if not self.block_type:
                    self.block_type = self.file_path.suffix.lstrip(".").lower()

                self.block_content = self.file_path.read_text()

                if not pdb_id and not self.info:
                    try:
                        protein_file_dir = self.get_directory()
                        if protein_file_dir != str(self.file_path.parent):
                            destination = Path(protein_file_dir) / self.file_path.name
                            shutil.copy2(self.file_path, destination)

                            self.file_path = destination

                    except Exception as e:
                        DEFAULT_LOGGER.log_error(f"Failed to copy file to destination: {str(e)}")
                        raise
            elif block_content:
                self.block_content = block_content
                if not self.block_type:
                    raise ValueError("block_type must be provided when initializing with block_content.")
                from_block = True

            if self.block_content:
                if self.block_type not in ["pdb", "pdbqt"]:
                    raise ValueError(f"Only pdb/pdbqt file formats are supported (given {self.block_type})")
                self.structure = self.load_structure_from_block(self.block_content, self.block_type)

            if self.structure is None:
                raise ValueError("Structure could not be loaded.")

            DEFAULT_LOGGER.log_info(
                f"Loaded structure from {self.file_path}. Selected structure index: {self.struct_ind}"
            )
            self.structure = self.select_structure(self.struct_ind)

            if self.name is None:
                if self.file_path:
                    self.name = self.file_path.stem
                else:
                    self.name = "Unknown_Structure"
                    protein_file_dir = self.get_directory()
                    directory = Path(protein_file_dir)
                    num = len(list(directory.glob(f"{self.name}*")))
                    self.name = f"{self.name}_{num + 1}"

            self.atom_types = self.structure.atom_name

            if from_block:
                protein_file_dir = self.get_directory()
                directory = Path(protein_file_dir)

                self.file_path = directory / f"{self.name}.{self.block_type}"
                self.write_to_file(self.file_path)

        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to initialize Protein: {str(e)}")
            raise

    @property
    def coordinates(self):
        """
        Gets the atomic coordinates of the protein structure.

        Returns:
            numpy.ndarray: A numpy array containing the 3D coordinates of all atoms in the structure.
            The array has shape (n_atoms, 3) where each row represents the x, y, z coordinates of an atom.
        """
        return self.structure.coord

    def prepare(self, model_loops: bool = False, pdb_id: str = "") -> "Protein":
        """
        Prepares the protein structure by processing metals, cofactors and optionally modeling missing loops.

        Args:
            model_loops (bool, optional): Whether to model missing loops in the structure.
                Requires a valid PDB ID if True. Defaults to False.
            pdb_id (str, optional): PDB ID of the protein structure. Required if model_loops=True.
                Defaults to empty string.

        Returns:
            Protein: A new Protein instance with the prepared structure.

        Raises:
            ValueError: If model_loops is True but no PDB ID is provided.
            Exception: If protein preparation fails.

        Notes:
            This method:
            - Extracts and filters metal ions and cofactors from the structure
            - Processes the structure through preparation pipeline
            - Writes the prepared structure to a new file with '_prep.pdb' suffix
            - Creates and returns a new Protein instance with the prepared structure
        """
        pdb_id = pdb_id if pdb_id else self.pdb_id
        if model_loops and not pdb_id:
            raise ValueError("PDB ID must be provided to model loops.")

        metal_resnames, cofactor_resnames = self.extract_metals_and_cofactors()
        metals_to_keep = [resname for resname in metal_resnames if resname.upper() in METALS]

        response = prepare(
            protein_path=self.file_path,
            protein_pdb_id=pdb_id,
            protein_extension=self.block_type,
            metal_resnames=metals_to_keep,
            cofactor_resnames=cofactor_resnames,
            model_loops=model_loops,
        )
        if not response["prepared_protein_content"]:
            raise Exception("Failed to prepare protein.")

        protein_dir = Path(self.file_path).parent
        base_name = Path(self.file_path).stem if self.file_path else "modified_structure"
        new_file_name = protein_dir / f"{base_name}_prep.pdb"

        intermediate_protein = Protein(block_content=response["prepared_protein_content"], block_type="pdb")
        intermediate_protein.write_to_file(str(new_file_name))

        protein = Protein(file_path=new_file_name)
        protein.pdb_id = self.pdb_id

        return protein

    def load_structure_from_block(self, block_content: str, block_type: str):
        """
        Load a molecular structure from a text block.

        This method reads a structure from a string content block in either PDB or PDBQT format
        and returns a Structure object.

        Args:
            block_content (str): String containing the structure data in PDB/PDBQT format
            block_type (str): Format type of the block content ('pdb' or 'pdbqt')

        Returns:
            Structure: A Structure object representing the molecular structure

        Raises:
            ValueError: If the block_type is not supported (must be 'pdb' or 'pdbqt')

        Examples:
            >>> protein = Protein()
            >>> pdb_content = "ATOM      1  N   ASN A   1      27.961  28.064  39.573  1.00 23.02           N"
            >>> structure = protein.load_structure_from_block(pdb_content, "pdb")
        """
        if block_type in ["pdb", "pdbqt"]:
            pdb_file = PDBFile.read(io.StringIO(block_content))
            structure = pdb_file.get_structure()
        else:
            raise ValueError(f"Unsupported block type: {block_type}")
        return structure

    @staticmethod
    def download_protein_by_pdb_id(pdb_id: str, save_dir: str = "") -> str:
        """
        Downloads a protein structure file from the PDB database.

        This function retrieves a protein structure file in PDB format from the Protein Data Bank
        using the provided PDB ID. If the file already exists in the specified directory,
        it skips the download.

        Args:
            pdb_id (str): The 4-character PDB ID of the protein structure.
            save_dir (str, optional): Directory path where the PDB file will be saved.
                If not provided, uses the default protein directory. Defaults to "".

        Returns:
            str: The full path to the downloaded PDB file.

        Raises:
            Exception: If the download fails for any reason (e.g., invalid PDB ID,
                network issues, etc.).

        Example:
            >>> file_path = download_protein_by_pdb_id("1abc", "/path/to/save/")
            >>> print(file_path)
            /path/to/save/1abc.pdb
        """
        if save_dir == "":
            save_dir = Protein.get_directory()

        pdb_id = pdb_id.lower()
        save_dir_path = Path(save_dir)
        save_dir_path.mkdir(parents=True, exist_ok=True)

        file_path = save_dir_path / f"{pdb_id}.pdb"
        if not file_path.exists():
            try:
                fetch(pdb_id, "pdb", save_dir_path)
                DEFAULT_LOGGER.log_info(f"Downloaded PDB {pdb_id} to {file_path}.")
            except Exception as e:
                DEFAULT_LOGGER.log_error(f"Failed to download PDB {pdb_id}: {str(e)}")
                raise
        else:
            DEFAULT_LOGGER.log_info(f"PDB file {file_path} already exists. Skipping download.")

        return str(file_path)

    @staticmethod
    def load_structure(structure_file_path: str):
        """
        Load a protein structure from a PDB file.

        Args:
            structure_file_path (str): Path to the PDB structure file.

        Returns:
            Structure: A Structure object representing the protein structure.

        Raises:
            FileNotFoundError: If the specified PDB file does not exist.
            PDBParseError: If the PDB file cannot be properly parsed.
        """
        structure_file = PDBFile.read(structure_file_path)
        structure = structure_file.get_structure()

        return structure

    def select_structure(self, index: int):
        """
        Selects a specific structure from the list of available structures.

        Args:
            index (int): The index of the structure to select.

        Returns:
            The selected structure at the specified index.

        Raises:
            ValueError: If the index is out of bounds (negative or >= length of structures).
        """
        if index < 0 or index >= len(self.structure):
            raise ValueError(f"Invalid structure index {index}. Total structures: {len(self.structure)}")

        return self.structure[index]

    def _filter_hetatm_records(self, exclude_water: bool = True, keep_resnames: Optional[List[str]] = None):
        """
        Filter HETATM records from the structure based on specified criteria.

        This method filters heterogeneous atom (HETATM) records from the structure,
        with options to exclude water molecules and keep only specific residue names.

        Args:
            exclude_water (bool, optional): Whether to exclude water molecules from the results.
                Removes 'HOH' and 'WAT' residues if True. Defaults to True.
            keep_resnames (List[str], optional): List of residue names to keep in the results.
                If provided, only residues with these names will be kept. Case-insensitive.
                Defaults to None.

        Returns:
            AtomArray: Filtered HETATM records as an AtomArray containing only the
                specified residues.
        """
        hetatm_records = self.structure[self.structure.hetero]
        res_names_upper = np.char.upper(hetatm_records.res_name)

        if exclude_water:
            water_residue_names = ["HOH", "WAT"]
            water_residue_names_upper = [name.upper() for name in water_residue_names]
            hetatm_records = hetatm_records[~np.isin(res_names_upper, water_residue_names_upper)]
            res_names_upper = np.char.upper(hetatm_records.res_name)

        if keep_resnames:
            keep_resnames_upper = [name.upper() for name in keep_resnames]
            hetatm_records = hetatm_records[np.isin(res_names_upper, keep_resnames_upper)]

        return hetatm_records

    def _filter_chain_records(self, chain_ids: Optional[List[str]] = None):
        """
        Filter structure records based on specified chain IDs.

        Args:
            chain_ids (Optional[List[str]]): List of chain IDs to filter by. If None or contains "ALL",
            returns all chains.

        Returns:
            Structure: Filtered structure records containing only specified chains.
        """

        if chain_ids is None or "ALL" in chain_ids:
            return self.structure
        else:
            return self.structure[np.isin(self.structure.chain_id, chain_ids)]

    def list_chain_names(self) -> List[str]:
        """
        Retrieves a list of unique chain identifiers from the protein structure.

        Returns:
            List[str]: A list of unique chain IDs present in the protein structure. Each chain ID is
                      typically a single character that identifies a specific polypeptide chain.
        """
        chain_records = self._filter_chain_records()
        chain_ids = np.unique(chain_records.chain_id)
        return list(chain_ids)

    def list_hetero_names(self, exclude_water=True) -> List[str]:
        """
        Returns a list of unique hetero atom residue names from the structure.

        Args:
            exclude_water (bool): If True, excludes water molecules (HOH) from the returned list.
            Defaults to True.

        Returns:
            List[str]: A list of unique hetero residue names found in the structure.
            Common examples include small molecules, ions, and modified amino acids.
        """
        hetatm_records = self._filter_hetatm_records(exclude_water=exclude_water)
        ligand_res_names = np.unique(hetatm_records.res_name)
        return list(ligand_res_names)

    def select_chain(self, chain_id: str) -> Optional["Protein"]:
        """
        Selects a specific chain from the protein structure and returns a new Protein object.

        Args:
            chain_id (str): The identifier of the chain to be selected.

        Returns:
            Optional[Protein]: A new Protein object containing only the selected chain.
                Returns None if the chain is not found.

        Raises:
            ValueError: If the specified chain_id is not found in the protein structure.

        Example:
            >>> protein = Protein("1abc.pdb")
            >>> chain_a = protein.select_chain("A")
        """
        chain_records = self._filter_chain_records(chain_ids=[chain_id])
        if len(chain_records) > 0:
            return self._create_new_protein_with_structure(chain_records, suffix=f"_chain_{chain_id}")
        else:
            raise ValueError(f"Chain {chain_id} not found.")

    def select_chains(self, chain_ids: List[str]) -> "Protein":
        """
        Select specific chains from the protein structure and create a new Protein object.

        Args:
            chain_ids (List[str]): A list of chain identifiers to select from the protein structure.

        Returns:
            Protein: A new Protein object containing only the selected chains.

        Raises:
            ValueError: If no chains are found for the provided chain IDs.

        Example:
            >>> protein.select_chains(['A', 'B'])
            # Returns a new Protein object with only chains A and B
        """
        chain_records = self._filter_chain_records(chain_ids=chain_ids)
        if len(chain_records) == 0:
            raise ValueError(f"No chains found for the provided chain IDs: {chain_ids}")
        return self._create_new_protein_with_structure(chain_records, suffix=f"_chains_{'_'.join(chain_ids)}")

    def select_ligand(self, res_name: str) -> "Ligand":
        """
        Selects and processes ligands from the protein structure based on the residue name.

        This method identifies ligand atoms in the structure, attempts to fetch their SMILES
        representation, and creates Ligand objects for each unique residue occurrence.

        Args:
            res_name (str): The residue name of the ligand to select.

        Returns:
            List[Ligand]: A list of Ligand objects, each representing a unique instance
            of the specified ligand in the structure.

        Raises:
            ValueError: If the specified residue name is not found in the structure's
                       hetero residues, or if no atoms are found for the specified ligand.

        Notes:
            - The method first attempts to fetch SMILES from PDB API
            - If SMILES fetch fails, it attempts to use OpenBabel for SMILES extraction
            - Each Ligand object contains the atomic coordinates and chemical structure information
            - Bond orders are assigned using SMILES when available

        Example:
            >>> protein = Protein("1abc.pdb")
            >>> ligands = protein.select_ligand("ATP")
            >>> print(len(ligands))  # Number of ATP molecules in structure
        """
        hetero_names = self.list_hetero_names()
        if res_name not in hetero_names:
            raise ValueError(f"Residue {res_name} not found. Available ligands are: {hetero_names}")

        ligand_atoms = self.structure[(self.structure.res_name == res_name) & self.structure.hetero]
        if len(ligand_atoms) == 0:
            raise ValueError(f"No atoms found for ligand {res_name}.")

        try:
            smiles = Ligand.fetch_smiles_from_pdb_api(res_name)
            DEFAULT_LOGGER.log_warning(f"SMILES for {res_name}: {smiles}")
        except Exception:
            DEFAULT_LOGGER.log_warning(f"Failed to fetch SMILES for {res_name}.")
            smiles = None

        chain_ids = ligand_atoms.chain_id
        res_ids = ligand_atoms.res_id
        ins_codes = ligand_atoms.ins_code

        residue_tuples = list(zip(chain_ids, res_ids, ins_codes))
        unique_residue_tuples = list(set(residue_tuples))

        ligands = []
        for chain_id, res_id, ins_code in unique_residue_tuples:
            mask = (
                (ligand_atoms.chain_id == chain_id)
                & (ligand_atoms.res_id == res_id)
                & (ligand_atoms.ins_code == ins_code)
            )

            ligand_group = ligand_atoms[mask]

            pdb_file = PDBFile()
            pdb_file.set_structure(ligand_group)
            pdb_block = io.StringIO()
            pdb_file.write(pdb_block)
            block_content, block_type = pdb_block.getvalue(), "pdb"
            if not smiles:
                DEFAULT_LOGGER.log_warning(f"PROCEEDING WITH OPEN BABEL TO EXTRACT SMILES FOR {res_name}")
                try:
                    block_content, block_type = convert_block("pdb", block_content, "sdf"), "sdf"
                except Exception as _:
                    DEFAULT_LOGGER.log_error(f"Failed to convert block to SDF. Please provide smiles manually.")
                    return

            ligand = Ligand(
                block_content=block_content,
                block_type=block_type,
                name=res_name,
                xref_protein=self,
                xref_ins_code=ins_code,
                xref_residue_id=res_id,
                xref_protein_chain_id=chain_id,
            )

            if smiles:
                ligand.mol.assign_bond_order_from_smiles(smiles)

            ligands.append(ligand)

        return ligands

    def select_ligands(self, res_names: List[str]) -> List["Ligand"]:
        """
        Selects and returns a list of ligands based on their residue names.

        Args:
            res_names (List[str]): A list of residue names to select ligands for.
                If None, all heterogeneous residue names will be used.

        Returns:
            List["Ligand"]: A list of Ligand objects matching the specified residue names.
                Returns an empty list if no matching ligands are found.

        Note:
            If a residue name is not found, a warning will be logged and the selection will continue
            with the remaining residue names.
        """
        if res_names is None:
            res_names = self.list_hetero_names()

        ligands = []
        for res_name in res_names:
            try:
                ligand = self.select_ligand(res_name)
                ligands.extend(ligand)
            except ValueError as e:
                DEFAULT_LOGGER.log_warning(str(e))
        return ligands

    def remove_hetatm(self, keep_resnames: Optional[List[str]] = None, remove_metals: Optional[List[str]] = None):
        """
        Remove HETATM records from the protein structure while allowing specific residues and metals to be kept.

        Args:
            keep_resnames (Optional[List[str]]): List of residue names to keep in the structure despite being HETATM records.
            Names are case-insensitive. Defaults to None.
            remove_metals (Optional[List[str]]): List of metal names to remove from the structure.
            By default, all metals are kept. Names are case-insensitive. Defaults to None.

        Returns:
            Protein: A new Protein object containing the filtered structure with the suffix "_no_hetatm"

        Examples:
            >>> protein.remove_hetatm(keep_resnames=['NAG', 'BMA'], remove_metals=['ZN', 'MG'])
            >>> protein.remove_hetatm()  # Removes all HETATM except metals
            >>> protein.remove_hetatm(keep_resnames=['HOH'])  # Keeps water molecules
        """

        metals = METALS
        if remove_metals:
            exclude_metals_upper = [metal.upper() for metal in remove_metals]
            metals = list(set(METALS) - set(exclude_metals_upper))

        if not metals and not keep_resnames:
            filtered_structure = self.structure[~self.structure.hetero]
        else:
            keep_resnames_upper = [res.upper() for res in keep_resnames] if keep_resnames else []
            keep_resnames_upper.extend(metals)
            keep_resnames_set = list(set(keep_resnames_upper))

            hetatm_to_keep = self._filter_hetatm_records(keep_resnames=keep_resnames_set)
            hetatm_indices_to_keep = np.isin(self.structure.res_id, hetatm_to_keep.res_id)
            filtered_structure = self.structure[~self.structure.hetero | hetatm_indices_to_keep]

        return self._create_new_protein_with_structure(filtered_structure, suffix="_no_hetatm")

    def remove_resnames(self, exclude_resnames: Optional[List[str]] = None) -> "Protein":
        """
        Remove residues from the protein structure based on their residue names.

        Args:
            exclude_resnames (List[str], optional): List of residue names to exclude from the structure.
            If None, returns a copy of the original structure.

        Returns:
            Protein: A new Protein instance with specified residues removed.
            The new instance has '_resnames_removed' suffix added to its name.

        Examples:
            >>> protein.remove_resnames(['ALA', 'GLY'])  # removes all alanine and glycine residues
            >>> protein.remove_resnames()  # returns a copy of the protein
        """
        if exclude_resnames is not None:
            b_resn = np.isin(self.structure.res_name, exclude_resnames)
            filtered_structure = self.structure[~b_resn]
        else:
            filtered_structure = self.structure.copy()
        return self._create_new_protein_with_structure(filtered_structure, suffix="_resnames_removed")

    def remove_water(self) -> "Protein":
        """
        Removes water molecules from the protein structure.

        This method filters out solvent molecules (including water) from the protein structure
        using a predefined solvent filter.

        Returns:
            Protein: A new Protein instance with water molecules removed.
                    The new instance will have the suffix '_no_water' appended to its name.
        """
        filtered_structure = self.structure[~filter_solvent(self.structure)]
        return self._create_new_protein_with_structure(filtered_structure, suffix="_no_water")

    def extract_metals_and_cofactors(self) -> Tuple[List[str], List[str]]:
        """
        Extracts metal ions and cofactor molecules from the protein structure by analyzing HETATM records.

        This method processes the structure's heterogeneous atoms (HETATM records), excluding water molecules,
        and categorizes them into metal ions and cofactors based on their elemental composition.

        Returns:
            Tuple[List[str], List[str]]: A tuple containing two lists:
                - First list contains the residue names of identified metal ions
                - Second list contains the residue names of identified cofactors

        Notes:
            - Water molecules (HOH, WAT) are excluded from the analysis
            - Metal ions are identified by checking if all atoms in a residue are metal elements
            - Any non-metal heterogeneous molecule is classified as a cofactor
            - The results are logged using DEFAULT_LOGGER
        """
        hetatm_records = self.structure[self.structure.hetero]
        water_residue_names = ["HOH", "WAT"]
        hetatm_records = hetatm_records[~np.isin(hetatm_records.res_name, water_residue_names)]

        metal_elements = {
            "AC", "AG", "AL", "AM", "AS", "AU", "B", "BA", "BE", "BH", "BI", "BK", "CA", "CD", "CE", "CF",
            "CM", "CN", "CS", "CU", "DB", "DS", "DY", "ER", "ES", "EU", "FE", "FM", "FR", "GA", "GD", "GE",
            "HF", "HG", "HO", "HS", "K", "LA", "LI", "LR", "LU", "MD", "MG", "MN", "MO", "MT", "NA", "NB",
            "ND", "NI", "NO", "NP", "OS", "PA", "TA", "PM", "PO", "PR", "PT", "PU", "RA", "RB", "RE", "RF",
            "RG", "RH", "RU", "SB", "SC", "SG", "SI", "SM", "SN", "SR", "TB", "TC", "TE", "TH", "TI", "TL",
            "TM", "U", "V", "W", "YB", "ZN", "ZR", "CO", "CR", "IN", "IR", "PB", "PD",
        }

        residue_groups = defaultdict(list)
        for atom in hetatm_records:
            key = (atom.chain_id, atom.res_id, atom.ins_code)
            residue_groups[key].append(atom)

        metal_resnames = set()
        cofactor_resnames = set()
        for key, atoms in residue_groups.items():
            res_name = atoms[0].res_name.strip().upper()
            is_metal = all(atom.element.strip().upper() in metal_elements for atom in atoms)
            if is_metal:
                metal_resnames.add(res_name)
            else:
                cofactor_resnames.add(res_name)

        metal_resnames = list(metal_resnames)
        cofactor_resnames = list(cofactor_resnames)

        DEFAULT_LOGGER.log_info(f"Identified metal residues: {metal_resnames}")
        DEFAULT_LOGGER.log_info(f"Identified cofactor residues: {cofactor_resnames}")

        return metal_resnames, cofactor_resnames

    def _create_new_protein_with_structure(self, new_structure, suffix: str = "_modified") -> "Protein":
        """
        Creates a new Protein instance with a modified structure and saves it to a new PDB file.

        Args:
            new_structure: The modified protein structure to be saved.
            suffix (str, optional): Suffix to append to the base filename. Defaults to "_modified".

        Returns:
            Protein: A new Protein instance containing the modified structure.

        Raises:
            Exception: If there is an error creating the new Protein with the modified structure.

        Notes:
            - If the target file already exists, it will be overwritten
            - If no original file path exists, creates file in system temp directory
            - The new file will have the same base name as the original with the specified suffix
        """
        base_name = self.file_path.stem if self.file_path else "modified_structure"
        new_file_name = f"{base_name}{suffix}.pdb"
        parent_dir = self.file_path.parent if self.file_path else Path(tempfile.gettempdir())
        new_file_path = parent_dir / new_file_name

        if new_file_path.exists():
            DEFAULT_LOGGER.log_warning(f"File {new_file_path} already exists. Overwriting.")
            os.remove(new_file_path)

        try:
            pdb_file = PDBFile()
            pdb_file.set_structure(new_structure)
            pdb_file.write(str(new_file_path))

            DEFAULT_LOGGER.log_info(f"Created new file with modified structure at {new_file_path}")

            return Protein(file_path=str(new_file_path))
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to create new Protein with modified structure: {str(e)}")

    def write_to_file(self, file_path: str):
        """
        Write the protein structure to a PDB file.

        This method writes the current protein structure to a specified file path in PDB format.

        Args:
            file_path (str): The path where the PDB file should be written.

        Raises:
            Exception: If there is an error writing the structure to the file.

        Example:
            >>> protein.write_to_file("/path/to/output.pdb")
        """
        try:
            pdb_file = PDBFile()
            pdb_file.set_structure(self.structure)
            pdb_file.write(file_path)
            DEFAULT_LOGGER.log_info(f"Current structure written to {file_path}.")
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to write structure to file {file_path}: {str(e)}")

    @jupyter_visualization
    def visualize(self) -> str:
        """
        Visualizes the protein structure using a ProteinViewer and returns HTML representation.

        This method generates a temporary PDB file for the protein structure and uses
        ProteinViewer to create an interactive 3D visualization.

        Returns:
            str: HTML string containing the protein visualization. If visualization fails,
                 returns an error message in HTML format.

        Raises:
            Exception: Any exceptions during visualization are caught and converted to an
                      error message.

        Example:
            >>> protein = Protein("1abc")
            >>> html = protein.visualize()
            >>> # html contains visualization that can be displayed in browser
        """
        try:
            unique_suffix = uuid.uuid4().hex
            current_protein_file = Path(tempfile.gettempdir()) / f"{self.name}_visualize_{unique_suffix}.pdb"
            self.write_to_file(str(current_protein_file))

            viewer = ProteinViewer(str(current_protein_file))
            protein_config = viewer.get_protein_visualization_config()
            html = viewer.render_protein(protein_config=protein_config)

            return html
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Visualization failed: {str(e)}")
            return f"<p>Visualization failed: {str(e)}</p>"

    def _repr_html_(self) -> str:
        """
        Generate an HTML representation of the protein for display in Jupyter notebooks.

        Returns:
            str: HTML string containing either protein info or a 3D visualization.
                 Falls back to string representation if visualization fails.
        """
        try:
            if self.info:
                return generate_html_output(self.info)
            return self.visualize()
        except Exception as e:
            DEFAULT_LOGGER.log_warning(f"Failed to generate HTML representation: {str(e)}")
            return self.__str__()

    def __str__(self):
        info_str = f"Name: {self.name}\nFile Path: {self.file_path}\n"
        if self.info:
            info_str += f"Info: {self.info}\n"
        return f"Protein:\n  {info_str}"

    @staticmethod
    def get_directory() -> str:
        """
        Returns the path to the base proteins directory.

        Creates the directory if it doesn't exist, using the WORKING_DIR constant as the root.

        Returns:
            str: The absolute path to the proteins directory as a string
        """
        proteins_base_dir = Path(WORKING_DIR) / "proteins"
        proteins_base_dir.mkdir(parents=True, exist_ok=True)

        return str(proteins_base_dir)

    def update_coordinates(self, coords: np.ndarray):
        """
        Updates the coordinates of the protein structure in-place.

        Args:
            coords (np.ndarray): New coordinates to be assigned to the protein structure.
                                Should match the shape of the existing coordinates.

        Returns:
            None

        Notes:
            This method modifies the protein structure coordinates directly and logs the update.
        """
        self.structure.coord = coords
        DEFAULT_LOGGER.log_info("Protein coordinates has been inplaced updated.")

    def get_center_by_residues(self, residues: List[str]) -> np.ndarray:
        """
        Calculate the center of mass for specified residues and visualize them.
        This method computes the centroid of atoms belonging to the specified residues and
        provides a visualization of these residues in the protein structure.

        Args:
            residues (List[str]): A list of 1-3 residue IDs to analyze.

        Returns:
            tuple: A tuple containing:
                - list: The coordinates of the center point [x, y, z]
                - str: Warning message if any residues were not found (empty string if all found)
                - IPython.display.HTML: Interactive 3D visualization of the protein with highlighted residues

        Raises:
            ValueError: If the number of residues is not between 1 and 3
            ValueError: If any residue ID is not an integer
            ValueError: If no atoms are found for the specified residue IDs

        Examples:
            >>> center, warning, viewer = protein.get_center_by_residues([1, 2, 3])
            >>> print(center)  # [x, y, z]
            >>> print(warning)  # Empty string or warning about missing residues
        """
        if not (1 <= len(residues) <= 3):
            print("Please provide 1-3 residue IDs")
            raise ValueError("Invalid number of residue IDs")

        for res_id in residues:
            if not isinstance(res_id, int):
                raise ValueError(f"Residue IDs must be integers. Got: {res_id}")

        mask = np.isin(self.structure.res_id, residues)
        pocket_atoms = self.structure[mask]
        if len(pocket_atoms) == 0:
            raise ValueError(f"No atoms found for the specified residue IDs: {residues}")

        warning = ""
        missing_residue_ids = set(residues) - set(pocket_atoms.res_id)
        if missing_residue_ids:
            warning = f"Residue IDs {missing_residue_ids} not found in the structure"

        res_name_id_mapping = {}
        for atom in pocket_atoms:
            res_name_id_mapping[atom.res_name] = atom.res_id

        center = centroid(pocket_atoms)

        with tempfile.TemporaryDirectory() as temp_dir:
            protein_format = "pdb"
            protein_path = os.path.join(temp_dir, "protein.pdb")
            self.write_to_file(protein_path)

            docking_viewer = DockingViewer()
            html = docking_viewer.render_highligh_residues(
                protein_data=protein_path, protein_format=protein_format, residue_ids=residues
            )

            if "ATOM" not in html:
                html = ""

        return list(center), warning, JupyterViewer.visualize(html)

Initialize a Protein object from various input sources.

This constructor can create a Protein object from a PDB ID, file path, or direct content block. It handles structure loading, file management, and basic protein information setup.

Args

pdb_id : str, optional
PDB identifier to download and load protein structure. Defaults to "".
file_path : str, optional
Path to a local protein structure file. Defaults to "".
struct_ind : int, optional
Index of the structure to select if multiple structures exist. Defaults to 0.
block_type : str, optional
File format type (e.g., "pdb", "pdbqt"). Required if using block_content. Defaults to "".
block_content : str, optional
Direct string content of a protein structure file. Defaults to "".

Raises

ValueError
If not exactly one source (pdb_id, file_path, or block_content) is provided.
ValueError
If block_type is not provided when using block_content.
ValueError
If file format is not supported (only pdb/pdbqt are supported).
ValueError
If structure cannot be loaded.
FileNotFoundError
If the specified file_path does not exist.

Attributes

pdb_id : str
PDB identifier of the protein.
file_path : Path
Absolute path to the protein structure file.
struct_ind : int
Index of the selected structure.
name : str
Name of the protein structure.
structure
Loaded protein structure object.
atom_types
Types of atoms in the structure.
info : dict
Additional protein information.
block_type : str
Type of structure file format.
block_content : str
Content of the structure file.

Static methods

def download_protein_by_pdb_id(pdb_id: str, save_dir: str = '') ‑> str
Expand source code
@staticmethod
def download_protein_by_pdb_id(pdb_id: str, save_dir: str = "") -> str:
    """
    Downloads a protein structure file from the PDB database.

    This function retrieves a protein structure file in PDB format from the Protein Data Bank
    using the provided PDB ID. If the file already exists in the specified directory,
    it skips the download.

    Args:
        pdb_id (str): The 4-character PDB ID of the protein structure.
        save_dir (str, optional): Directory path where the PDB file will be saved.
            If not provided, uses the default protein directory. Defaults to "".

    Returns:
        str: The full path to the downloaded PDB file.

    Raises:
        Exception: If the download fails for any reason (e.g., invalid PDB ID,
            network issues, etc.).

    Example:
        >>> file_path = download_protein_by_pdb_id("1abc", "/path/to/save/")
        >>> print(file_path)
        /path/to/save/1abc.pdb
    """
    if save_dir == "":
        save_dir = Protein.get_directory()

    pdb_id = pdb_id.lower()
    save_dir_path = Path(save_dir)
    save_dir_path.mkdir(parents=True, exist_ok=True)

    file_path = save_dir_path / f"{pdb_id}.pdb"
    if not file_path.exists():
        try:
            fetch(pdb_id, "pdb", save_dir_path)
            DEFAULT_LOGGER.log_info(f"Downloaded PDB {pdb_id} to {file_path}.")
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to download PDB {pdb_id}: {str(e)}")
            raise
    else:
        DEFAULT_LOGGER.log_info(f"PDB file {file_path} already exists. Skipping download.")

    return str(file_path)

Downloads a protein structure file from the PDB database.

This function retrieves a protein structure file in PDB format from the Protein Data Bank using the provided PDB ID. If the file already exists in the specified directory, it skips the download.

Args

pdb_id : str
The 4-character PDB ID of the protein structure.
save_dir : str, optional
Directory path where the PDB file will be saved. If not provided, uses the default protein directory. Defaults to "".

Returns

str
The full path to the downloaded PDB file.

Raises

Exception
If the download fails for any reason (e.g., invalid PDB ID, network issues, etc.).

Example

>>> file_path = download_protein_by_pdb_id("1abc", "/path/to/save/")
>>> print(file_path)
/path/to/save/1abc.pdb
def get_directory() ‑> str
Expand source code
@staticmethod
def get_directory() -> str:
    """
    Returns the path to the base proteins directory.

    Creates the directory if it doesn't exist, using the WORKING_DIR constant as the root.

    Returns:
        str: The absolute path to the proteins directory as a string
    """
    proteins_base_dir = Path(WORKING_DIR) / "proteins"
    proteins_base_dir.mkdir(parents=True, exist_ok=True)

    return str(proteins_base_dir)

Returns the path to the base proteins directory.

Creates the directory if it doesn't exist, using the WORKING_DIR constant as the root.

Returns

str
The absolute path to the proteins directory as a string
def load_structure(structure_file_path: str)
Expand source code
@staticmethod
def load_structure(structure_file_path: str):
    """
    Load a protein structure from a PDB file.

    Args:
        structure_file_path (str): Path to the PDB structure file.

    Returns:
        Structure: A Structure object representing the protein structure.

    Raises:
        FileNotFoundError: If the specified PDB file does not exist.
        PDBParseError: If the PDB file cannot be properly parsed.
    """
    structure_file = PDBFile.read(structure_file_path)
    structure = structure_file.get_structure()

    return structure

Load a protein structure from a PDB file.

Args

structure_file_path : str
Path to the PDB structure file.

Returns

Structure
A Structure object representing the protein structure.

Raises

FileNotFoundError
If the specified PDB file does not exist.
PDBParseError
If the PDB file cannot be properly parsed.

Instance variables

prop coordinates
Expand source code
@property
def coordinates(self):
    """
    Gets the atomic coordinates of the protein structure.

    Returns:
        numpy.ndarray: A numpy array containing the 3D coordinates of all atoms in the structure.
        The array has shape (n_atoms, 3) where each row represents the x, y, z coordinates of an atom.
    """
    return self.structure.coord

Gets the atomic coordinates of the protein structure.

Returns

numpy.ndarray
A numpy array containing the 3D coordinates of all atoms in the structure.

The array has shape (n_atoms, 3) where each row represents the x, y, z coordinates of an atom.

Methods

def extract_metals_and_cofactors(self) ‑> Tuple[List[str], List[str]]
Expand source code
def extract_metals_and_cofactors(self) -> Tuple[List[str], List[str]]:
    """
    Extracts metal ions and cofactor molecules from the protein structure by analyzing HETATM records.

    This method processes the structure's heterogeneous atoms (HETATM records), excluding water molecules,
    and categorizes them into metal ions and cofactors based on their elemental composition.

    Returns:
        Tuple[List[str], List[str]]: A tuple containing two lists:
            - First list contains the residue names of identified metal ions
            - Second list contains the residue names of identified cofactors

    Notes:
        - Water molecules (HOH, WAT) are excluded from the analysis
        - Metal ions are identified by checking if all atoms in a residue are metal elements
        - Any non-metal heterogeneous molecule is classified as a cofactor
        - The results are logged using DEFAULT_LOGGER
    """
    hetatm_records = self.structure[self.structure.hetero]
    water_residue_names = ["HOH", "WAT"]
    hetatm_records = hetatm_records[~np.isin(hetatm_records.res_name, water_residue_names)]

    metal_elements = {
        "AC", "AG", "AL", "AM", "AS", "AU", "B", "BA", "BE", "BH", "BI", "BK", "CA", "CD", "CE", "CF",
        "CM", "CN", "CS", "CU", "DB", "DS", "DY", "ER", "ES", "EU", "FE", "FM", "FR", "GA", "GD", "GE",
        "HF", "HG", "HO", "HS", "K", "LA", "LI", "LR", "LU", "MD", "MG", "MN", "MO", "MT", "NA", "NB",
        "ND", "NI", "NO", "NP", "OS", "PA", "TA", "PM", "PO", "PR", "PT", "PU", "RA", "RB", "RE", "RF",
        "RG", "RH", "RU", "SB", "SC", "SG", "SI", "SM", "SN", "SR", "TB", "TC", "TE", "TH", "TI", "TL",
        "TM", "U", "V", "W", "YB", "ZN", "ZR", "CO", "CR", "IN", "IR", "PB", "PD",
    }

    residue_groups = defaultdict(list)
    for atom in hetatm_records:
        key = (atom.chain_id, atom.res_id, atom.ins_code)
        residue_groups[key].append(atom)

    metal_resnames = set()
    cofactor_resnames = set()
    for key, atoms in residue_groups.items():
        res_name = atoms[0].res_name.strip().upper()
        is_metal = all(atom.element.strip().upper() in metal_elements for atom in atoms)
        if is_metal:
            metal_resnames.add(res_name)
        else:
            cofactor_resnames.add(res_name)

    metal_resnames = list(metal_resnames)
    cofactor_resnames = list(cofactor_resnames)

    DEFAULT_LOGGER.log_info(f"Identified metal residues: {metal_resnames}")
    DEFAULT_LOGGER.log_info(f"Identified cofactor residues: {cofactor_resnames}")

    return metal_resnames, cofactor_resnames

Extracts metal ions and cofactor molecules from the protein structure by analyzing HETATM records.

This method processes the structure's heterogeneous atoms (HETATM records), excluding water molecules, and categorizes them into metal ions and cofactors based on their elemental composition.

Returns

Tuple[List[str], List[str]]
A tuple containing two lists: - First list contains the residue names of identified metal ions - Second list contains the residue names of identified cofactors

Notes

  • Water molecules (HOH, WAT) are excluded from the analysis
  • Metal ions are identified by checking if all atoms in a residue are metal elements
  • Any non-metal heterogeneous molecule is classified as a cofactor
  • The results are logged using DEFAULT_LOGGER
def get_center_by_residues(self, residues: List[str]) ‑> numpy.ndarray
Expand source code
def get_center_by_residues(self, residues: List[str]) -> np.ndarray:
    """
    Calculate the center of mass for specified residues and visualize them.
    This method computes the centroid of atoms belonging to the specified residues and
    provides a visualization of these residues in the protein structure.

    Args:
        residues (List[str]): A list of 1-3 residue IDs to analyze.

    Returns:
        tuple: A tuple containing:
            - list: The coordinates of the center point [x, y, z]
            - str: Warning message if any residues were not found (empty string if all found)
            - IPython.display.HTML: Interactive 3D visualization of the protein with highlighted residues

    Raises:
        ValueError: If the number of residues is not between 1 and 3
        ValueError: If any residue ID is not an integer
        ValueError: If no atoms are found for the specified residue IDs

    Examples:
        >>> center, warning, viewer = protein.get_center_by_residues([1, 2, 3])
        >>> print(center)  # [x, y, z]
        >>> print(warning)  # Empty string or warning about missing residues
    """
    if not (1 <= len(residues) <= 3):
        print("Please provide 1-3 residue IDs")
        raise ValueError("Invalid number of residue IDs")

    for res_id in residues:
        if not isinstance(res_id, int):
            raise ValueError(f"Residue IDs must be integers. Got: {res_id}")

    mask = np.isin(self.structure.res_id, residues)
    pocket_atoms = self.structure[mask]
    if len(pocket_atoms) == 0:
        raise ValueError(f"No atoms found for the specified residue IDs: {residues}")

    warning = ""
    missing_residue_ids = set(residues) - set(pocket_atoms.res_id)
    if missing_residue_ids:
        warning = f"Residue IDs {missing_residue_ids} not found in the structure"

    res_name_id_mapping = {}
    for atom in pocket_atoms:
        res_name_id_mapping[atom.res_name] = atom.res_id

    center = centroid(pocket_atoms)

    with tempfile.TemporaryDirectory() as temp_dir:
        protein_format = "pdb"
        protein_path = os.path.join(temp_dir, "protein.pdb")
        self.write_to_file(protein_path)

        docking_viewer = DockingViewer()
        html = docking_viewer.render_highligh_residues(
            protein_data=protein_path, protein_format=protein_format, residue_ids=residues
        )

        if "ATOM" not in html:
            html = ""

    return list(center), warning, JupyterViewer.visualize(html)

Calculate the center of mass for specified residues and visualize them. This method computes the centroid of atoms belonging to the specified residues and provides a visualization of these residues in the protein structure.

Args

residues : List[str]
A list of 1-3 residue IDs to analyze.

Returns

tuple
A tuple containing: - list: The coordinates of the center point [x, y, z] - str: Warning message if any residues were not found (empty string if all found) - IPython.display.HTML: Interactive 3D visualization of the protein with highlighted residues

Raises

ValueError
If the number of residues is not between 1 and 3
ValueError
If any residue ID is not an integer
ValueError
If no atoms are found for the specified residue IDs

Examples

>>> center, warning, viewer = protein.get_center_by_residues([1, 2, 3])
>>> print(center)  # [x, y, z]
>>> print(warning)  # Empty string or warning about missing residues
def list_chain_names(self) ‑> List[str]
Expand source code
def list_chain_names(self) -> List[str]:
    """
    Retrieves a list of unique chain identifiers from the protein structure.

    Returns:
        List[str]: A list of unique chain IDs present in the protein structure. Each chain ID is
                  typically a single character that identifies a specific polypeptide chain.
    """
    chain_records = self._filter_chain_records()
    chain_ids = np.unique(chain_records.chain_id)
    return list(chain_ids)

Retrieves a list of unique chain identifiers from the protein structure.

Returns

List[str]
A list of unique chain IDs present in the protein structure. Each chain ID is typically a single character that identifies a specific polypeptide chain.
def list_hetero_names(self, exclude_water=True) ‑> List[str]
Expand source code
def list_hetero_names(self, exclude_water=True) -> List[str]:
    """
    Returns a list of unique hetero atom residue names from the structure.

    Args:
        exclude_water (bool): If True, excludes water molecules (HOH) from the returned list.
        Defaults to True.

    Returns:
        List[str]: A list of unique hetero residue names found in the structure.
        Common examples include small molecules, ions, and modified amino acids.
    """
    hetatm_records = self._filter_hetatm_records(exclude_water=exclude_water)
    ligand_res_names = np.unique(hetatm_records.res_name)
    return list(ligand_res_names)

Returns a list of unique hetero atom residue names from the structure.

Args

exclude_water : bool
If True, excludes water molecules (HOH) from the returned list.

Defaults to True.

Returns

List[str]
A list of unique hetero residue names found in the structure.

Common examples include small molecules, ions, and modified amino acids.

def load_structure_from_block(self, block_content: str, block_type: str)
Expand source code
def load_structure_from_block(self, block_content: str, block_type: str):
    """
    Load a molecular structure from a text block.

    This method reads a structure from a string content block in either PDB or PDBQT format
    and returns a Structure object.

    Args:
        block_content (str): String containing the structure data in PDB/PDBQT format
        block_type (str): Format type of the block content ('pdb' or 'pdbqt')

    Returns:
        Structure: A Structure object representing the molecular structure

    Raises:
        ValueError: If the block_type is not supported (must be 'pdb' or 'pdbqt')

    Examples:
        >>> protein = Protein()
        >>> pdb_content = "ATOM      1  N   ASN A   1      27.961  28.064  39.573  1.00 23.02           N"
        >>> structure = protein.load_structure_from_block(pdb_content, "pdb")
    """
    if block_type in ["pdb", "pdbqt"]:
        pdb_file = PDBFile.read(io.StringIO(block_content))
        structure = pdb_file.get_structure()
    else:
        raise ValueError(f"Unsupported block type: {block_type}")
    return structure

Load a molecular structure from a text block.

This method reads a structure from a string content block in either PDB or PDBQT format and returns a Structure object.

Args

block_content : str
String containing the structure data in PDB/PDBQT format
block_type : str
Format type of the block content ('pdb' or 'pdbqt')

Returns

Structure
A Structure object representing the molecular structure

Raises

ValueError
If the block_type is not supported (must be 'pdb' or 'pdbqt')

Examples

>>> protein = Protein()
>>> pdb_content = "ATOM      1  N   ASN A   1      27.961  28.064  39.573  1.00 23.02           N"
>>> structure = protein.load_structure_from_block(pdb_content, "pdb")
def prepare(self, model_loops: bool = False, pdb_id: str = '') ‑> Protein
Expand source code
def prepare(self, model_loops: bool = False, pdb_id: str = "") -> "Protein":
    """
    Prepares the protein structure by processing metals, cofactors and optionally modeling missing loops.

    Args:
        model_loops (bool, optional): Whether to model missing loops in the structure.
            Requires a valid PDB ID if True. Defaults to False.
        pdb_id (str, optional): PDB ID of the protein structure. Required if model_loops=True.
            Defaults to empty string.

    Returns:
        Protein: A new Protein instance with the prepared structure.

    Raises:
        ValueError: If model_loops is True but no PDB ID is provided.
        Exception: If protein preparation fails.

    Notes:
        This method:
        - Extracts and filters metal ions and cofactors from the structure
        - Processes the structure through preparation pipeline
        - Writes the prepared structure to a new file with '_prep.pdb' suffix
        - Creates and returns a new Protein instance with the prepared structure
    """
    pdb_id = pdb_id if pdb_id else self.pdb_id
    if model_loops and not pdb_id:
        raise ValueError("PDB ID must be provided to model loops.")

    metal_resnames, cofactor_resnames = self.extract_metals_and_cofactors()
    metals_to_keep = [resname for resname in metal_resnames if resname.upper() in METALS]

    response = prepare(
        protein_path=self.file_path,
        protein_pdb_id=pdb_id,
        protein_extension=self.block_type,
        metal_resnames=metals_to_keep,
        cofactor_resnames=cofactor_resnames,
        model_loops=model_loops,
    )
    if not response["prepared_protein_content"]:
        raise Exception("Failed to prepare protein.")

    protein_dir = Path(self.file_path).parent
    base_name = Path(self.file_path).stem if self.file_path else "modified_structure"
    new_file_name = protein_dir / f"{base_name}_prep.pdb"

    intermediate_protein = Protein(block_content=response["prepared_protein_content"], block_type="pdb")
    intermediate_protein.write_to_file(str(new_file_name))

    protein = Protein(file_path=new_file_name)
    protein.pdb_id = self.pdb_id

    return protein

Prepares the protein structure by processing metals, cofactors and optionally modeling missing loops.

Args

model_loops : bool, optional
Whether to model missing loops in the structure. Requires a valid PDB ID if True. Defaults to False.
pdb_id : str, optional
PDB ID of the protein structure. Required if model_loops=True. Defaults to empty string.

Returns

Protein
A new Protein instance with the prepared structure.

Raises

ValueError
If model_loops is True but no PDB ID is provided.
Exception
If protein preparation fails.

Notes

This method: - Extracts and filters metal ions and cofactors from the structure - Processes the structure through preparation pipeline - Writes the prepared structure to a new file with '_prep.pdb' suffix - Creates and returns a new Protein instance with the prepared structure

def remove_hetatm(self,
keep_resnames: List[str] | None = None,
remove_metals: List[str] | None = None)
Expand source code
def remove_hetatm(self, keep_resnames: Optional[List[str]] = None, remove_metals: Optional[List[str]] = None):
    """
    Remove HETATM records from the protein structure while allowing specific residues and metals to be kept.

    Args:
        keep_resnames (Optional[List[str]]): List of residue names to keep in the structure despite being HETATM records.
        Names are case-insensitive. Defaults to None.
        remove_metals (Optional[List[str]]): List of metal names to remove from the structure.
        By default, all metals are kept. Names are case-insensitive. Defaults to None.

    Returns:
        Protein: A new Protein object containing the filtered structure with the suffix "_no_hetatm"

    Examples:
        >>> protein.remove_hetatm(keep_resnames=['NAG', 'BMA'], remove_metals=['ZN', 'MG'])
        >>> protein.remove_hetatm()  # Removes all HETATM except metals
        >>> protein.remove_hetatm(keep_resnames=['HOH'])  # Keeps water molecules
    """

    metals = METALS
    if remove_metals:
        exclude_metals_upper = [metal.upper() for metal in remove_metals]
        metals = list(set(METALS) - set(exclude_metals_upper))

    if not metals and not keep_resnames:
        filtered_structure = self.structure[~self.structure.hetero]
    else:
        keep_resnames_upper = [res.upper() for res in keep_resnames] if keep_resnames else []
        keep_resnames_upper.extend(metals)
        keep_resnames_set = list(set(keep_resnames_upper))

        hetatm_to_keep = self._filter_hetatm_records(keep_resnames=keep_resnames_set)
        hetatm_indices_to_keep = np.isin(self.structure.res_id, hetatm_to_keep.res_id)
        filtered_structure = self.structure[~self.structure.hetero | hetatm_indices_to_keep]

    return self._create_new_protein_with_structure(filtered_structure, suffix="_no_hetatm")

Remove HETATM records from the protein structure while allowing specific residues and metals to be kept.

Args

keep_resnames : Optional[List[str]]
List of residue names to keep in the structure despite being HETATM records.
Names are case-insensitive. Defaults to None.
remove_metals : Optional[List[str]]
List of metal names to remove from the structure.

By default, all metals are kept. Names are case-insensitive. Defaults to None.

Returns

Protein
A new Protein object containing the filtered structure with the suffix "_no_hetatm"

Examples

>>> protein.remove_hetatm(keep_resnames=['NAG', 'BMA'], remove_metals=['ZN', 'MG'])
>>> protein.remove_hetatm()  # Removes all HETATM except metals
>>> protein.remove_hetatm(keep_resnames=['HOH'])  # Keeps water molecules
def remove_resnames(self, exclude_resnames: List[str] | None = None) ‑> Protein
Expand source code
def remove_resnames(self, exclude_resnames: Optional[List[str]] = None) -> "Protein":
    """
    Remove residues from the protein structure based on their residue names.

    Args:
        exclude_resnames (List[str], optional): List of residue names to exclude from the structure.
        If None, returns a copy of the original structure.

    Returns:
        Protein: A new Protein instance with specified residues removed.
        The new instance has '_resnames_removed' suffix added to its name.

    Examples:
        >>> protein.remove_resnames(['ALA', 'GLY'])  # removes all alanine and glycine residues
        >>> protein.remove_resnames()  # returns a copy of the protein
    """
    if exclude_resnames is not None:
        b_resn = np.isin(self.structure.res_name, exclude_resnames)
        filtered_structure = self.structure[~b_resn]
    else:
        filtered_structure = self.structure.copy()
    return self._create_new_protein_with_structure(filtered_structure, suffix="_resnames_removed")

Remove residues from the protein structure based on their residue names.

Args

exclude_resnames : List[str], optional
List of residue names to exclude from the structure.

If None, returns a copy of the original structure.

Returns

Protein
A new Protein instance with specified residues removed.

The new instance has '_resnames_removed' suffix added to its name.

Examples

>>> protein.remove_resnames(['ALA', 'GLY'])  # removes all alanine and glycine residues
>>> protein.remove_resnames()  # returns a copy of the protein
def remove_water(self) ‑> Protein
Expand source code
def remove_water(self) -> "Protein":
    """
    Removes water molecules from the protein structure.

    This method filters out solvent molecules (including water) from the protein structure
    using a predefined solvent filter.

    Returns:
        Protein: A new Protein instance with water molecules removed.
                The new instance will have the suffix '_no_water' appended to its name.
    """
    filtered_structure = self.structure[~filter_solvent(self.structure)]
    return self._create_new_protein_with_structure(filtered_structure, suffix="_no_water")

Removes water molecules from the protein structure.

This method filters out solvent molecules (including water) from the protein structure using a predefined solvent filter.

Returns

Protein
A new Protein instance with water molecules removed. The new instance will have the suffix '_no_water' appended to its name.
def select_chain(self, chain_id: str) ‑> Protein | None
Expand source code
def select_chain(self, chain_id: str) -> Optional["Protein"]:
    """
    Selects a specific chain from the protein structure and returns a new Protein object.

    Args:
        chain_id (str): The identifier of the chain to be selected.

    Returns:
        Optional[Protein]: A new Protein object containing only the selected chain.
            Returns None if the chain is not found.

    Raises:
        ValueError: If the specified chain_id is not found in the protein structure.

    Example:
        >>> protein = Protein("1abc.pdb")
        >>> chain_a = protein.select_chain("A")
    """
    chain_records = self._filter_chain_records(chain_ids=[chain_id])
    if len(chain_records) > 0:
        return self._create_new_protein_with_structure(chain_records, suffix=f"_chain_{chain_id}")
    else:
        raise ValueError(f"Chain {chain_id} not found.")

Selects a specific chain from the protein structure and returns a new Protein object.

Args

chain_id : str
The identifier of the chain to be selected.

Returns

Optional[Protein]
A new Protein object containing only the selected chain. Returns None if the chain is not found.

Raises

ValueError
If the specified chain_id is not found in the protein structure.

Example

>>> protein = Protein("1abc.pdb")
>>> chain_a = protein.select_chain("A")
def select_chains(self, chain_ids: List[str]) ‑> Protein
Expand source code
def select_chains(self, chain_ids: List[str]) -> "Protein":
    """
    Select specific chains from the protein structure and create a new Protein object.

    Args:
        chain_ids (List[str]): A list of chain identifiers to select from the protein structure.

    Returns:
        Protein: A new Protein object containing only the selected chains.

    Raises:
        ValueError: If no chains are found for the provided chain IDs.

    Example:
        >>> protein.select_chains(['A', 'B'])
        # Returns a new Protein object with only chains A and B
    """
    chain_records = self._filter_chain_records(chain_ids=chain_ids)
    if len(chain_records) == 0:
        raise ValueError(f"No chains found for the provided chain IDs: {chain_ids}")
    return self._create_new_protein_with_structure(chain_records, suffix=f"_chains_{'_'.join(chain_ids)}")

Select specific chains from the protein structure and create a new Protein object.

Args

chain_ids : List[str]
A list of chain identifiers to select from the protein structure.

Returns

Protein
A new Protein object containing only the selected chains.

Raises

ValueError
If no chains are found for the provided chain IDs.

Example

>>> protein.select_chains(['A', 'B'])
# Returns a new Protein object with only chains A and B
def select_ligand(self, res_name: str) ‑> Ligand
Expand source code
def select_ligand(self, res_name: str) -> "Ligand":
    """
    Selects and processes ligands from the protein structure based on the residue name.

    This method identifies ligand atoms in the structure, attempts to fetch their SMILES
    representation, and creates Ligand objects for each unique residue occurrence.

    Args:
        res_name (str): The residue name of the ligand to select.

    Returns:
        List[Ligand]: A list of Ligand objects, each representing a unique instance
        of the specified ligand in the structure.

    Raises:
        ValueError: If the specified residue name is not found in the structure's
                   hetero residues, or if no atoms are found for the specified ligand.

    Notes:
        - The method first attempts to fetch SMILES from PDB API
        - If SMILES fetch fails, it attempts to use OpenBabel for SMILES extraction
        - Each Ligand object contains the atomic coordinates and chemical structure information
        - Bond orders are assigned using SMILES when available

    Example:
        >>> protein = Protein("1abc.pdb")
        >>> ligands = protein.select_ligand("ATP")
        >>> print(len(ligands))  # Number of ATP molecules in structure
    """
    hetero_names = self.list_hetero_names()
    if res_name not in hetero_names:
        raise ValueError(f"Residue {res_name} not found. Available ligands are: {hetero_names}")

    ligand_atoms = self.structure[(self.structure.res_name == res_name) & self.structure.hetero]
    if len(ligand_atoms) == 0:
        raise ValueError(f"No atoms found for ligand {res_name}.")

    try:
        smiles = Ligand.fetch_smiles_from_pdb_api(res_name)
        DEFAULT_LOGGER.log_warning(f"SMILES for {res_name}: {smiles}")
    except Exception:
        DEFAULT_LOGGER.log_warning(f"Failed to fetch SMILES for {res_name}.")
        smiles = None

    chain_ids = ligand_atoms.chain_id
    res_ids = ligand_atoms.res_id
    ins_codes = ligand_atoms.ins_code

    residue_tuples = list(zip(chain_ids, res_ids, ins_codes))
    unique_residue_tuples = list(set(residue_tuples))

    ligands = []
    for chain_id, res_id, ins_code in unique_residue_tuples:
        mask = (
            (ligand_atoms.chain_id == chain_id)
            & (ligand_atoms.res_id == res_id)
            & (ligand_atoms.ins_code == ins_code)
        )

        ligand_group = ligand_atoms[mask]

        pdb_file = PDBFile()
        pdb_file.set_structure(ligand_group)
        pdb_block = io.StringIO()
        pdb_file.write(pdb_block)
        block_content, block_type = pdb_block.getvalue(), "pdb"
        if not smiles:
            DEFAULT_LOGGER.log_warning(f"PROCEEDING WITH OPEN BABEL TO EXTRACT SMILES FOR {res_name}")
            try:
                block_content, block_type = convert_block("pdb", block_content, "sdf"), "sdf"
            except Exception as _:
                DEFAULT_LOGGER.log_error(f"Failed to convert block to SDF. Please provide smiles manually.")
                return

        ligand = Ligand(
            block_content=block_content,
            block_type=block_type,
            name=res_name,
            xref_protein=self,
            xref_ins_code=ins_code,
            xref_residue_id=res_id,
            xref_protein_chain_id=chain_id,
        )

        if smiles:
            ligand.mol.assign_bond_order_from_smiles(smiles)

        ligands.append(ligand)

    return ligands

Selects and processes ligands from the protein structure based on the residue name.

This method identifies ligand atoms in the structure, attempts to fetch their SMILES representation, and creates Ligand objects for each unique residue occurrence.

Args

res_name : str
The residue name of the ligand to select.

Returns

List[Ligand]
A list of Ligand objects, each representing a unique instance

of the specified ligand in the structure.

Raises

ValueError
If the specified residue name is not found in the structure's hetero residues, or if no atoms are found for the specified ligand.

Notes

  • The method first attempts to fetch SMILES from PDB API
  • If SMILES fetch fails, it attempts to use OpenBabel for SMILES extraction
  • Each Ligand object contains the atomic coordinates and chemical structure information
  • Bond orders are assigned using SMILES when available

Example

>>> protein = Protein("1abc.pdb")
>>> ligands = protein.select_ligand("ATP")
>>> print(len(ligands))  # Number of ATP molecules in structure
def select_ligands(self, res_names: List[str]) ‑> List[Ligand]
Expand source code
def select_ligands(self, res_names: List[str]) -> List["Ligand"]:
    """
    Selects and returns a list of ligands based on their residue names.

    Args:
        res_names (List[str]): A list of residue names to select ligands for.
            If None, all heterogeneous residue names will be used.

    Returns:
        List["Ligand"]: A list of Ligand objects matching the specified residue names.
            Returns an empty list if no matching ligands are found.

    Note:
        If a residue name is not found, a warning will be logged and the selection will continue
        with the remaining residue names.
    """
    if res_names is None:
        res_names = self.list_hetero_names()

    ligands = []
    for res_name in res_names:
        try:
            ligand = self.select_ligand(res_name)
            ligands.extend(ligand)
        except ValueError as e:
            DEFAULT_LOGGER.log_warning(str(e))
    return ligands

Selects and returns a list of ligands based on their residue names.

Args

res_names : List[str]
A list of residue names to select ligands for. If None, all heterogeneous residue names will be used.

Returns

List["Ligand"]: A list of Ligand objects matching the specified residue names. Returns an empty list if no matching ligands are found.

Note

If a residue name is not found, a warning will be logged and the selection will continue with the remaining residue names.

def select_structure(self, index: int)
Expand source code
def select_structure(self, index: int):
    """
    Selects a specific structure from the list of available structures.

    Args:
        index (int): The index of the structure to select.

    Returns:
        The selected structure at the specified index.

    Raises:
        ValueError: If the index is out of bounds (negative or >= length of structures).
    """
    if index < 0 or index >= len(self.structure):
        raise ValueError(f"Invalid structure index {index}. Total structures: {len(self.structure)}")

    return self.structure[index]

Selects a specific structure from the list of available structures.

Args

index : int
The index of the structure to select.

Returns

The selected structure at the specified index.

Raises

ValueError
If the index is out of bounds (negative or >= length of structures).
def update_coordinates(self, coords: numpy.ndarray)
Expand source code
def update_coordinates(self, coords: np.ndarray):
    """
    Updates the coordinates of the protein structure in-place.

    Args:
        coords (np.ndarray): New coordinates to be assigned to the protein structure.
                            Should match the shape of the existing coordinates.

    Returns:
        None

    Notes:
        This method modifies the protein structure coordinates directly and logs the update.
    """
    self.structure.coord = coords
    DEFAULT_LOGGER.log_info("Protein coordinates has been inplaced updated.")

Updates the coordinates of the protein structure in-place.

Args

coords : np.ndarray
New coordinates to be assigned to the protein structure. Should match the shape of the existing coordinates.

Returns

None

Notes

This method modifies the protein structure coordinates directly and logs the update.

def visualize(*args, **kwargs)
Expand source code
def wrapper(*args, **kwargs):
    html_visualization = func(*args, **kwargs)
    return JupyterViewer.visualize(html_visualization)
def write_to_file(self, file_path: str)
Expand source code
def write_to_file(self, file_path: str):
    """
    Write the protein structure to a PDB file.

    This method writes the current protein structure to a specified file path in PDB format.

    Args:
        file_path (str): The path where the PDB file should be written.

    Raises:
        Exception: If there is an error writing the structure to the file.

    Example:
        >>> protein.write_to_file("/path/to/output.pdb")
    """
    try:
        pdb_file = PDBFile()
        pdb_file.set_structure(self.structure)
        pdb_file.write(file_path)
        DEFAULT_LOGGER.log_info(f"Current structure written to {file_path}.")
    except Exception as e:
        DEFAULT_LOGGER.log_error(f"Failed to write structure to file {file_path}: {str(e)}")

Write the protein structure to a PDB file.

This method writes the current protein structure to a specified file path in PDB format.

Args

file_path : str
The path where the PDB file should be written.

Raises

Exception
If there is an error writing the structure to the file.

Example

>>> protein.write_to_file("/path/to/output.pdb")
class ProtonationReport (results)
Expand source code
class ProtonationReport:
    def __init__(self, results):
        self.results, self.html_metadata = self.split_results(results)

    def split_results(self, results):
        """
        Splits results by separating HTML metadata from the main data.

        This method processes a list of result items, removing HTML metadata from the
        protonation data while preserving it in a separate dictionary mapped by SMILES.

        Args:
            results (list): List of dictionaries containing molecular data with 'smiles'
                and 'protonation' keys. The 'protonation' may contain 'html_metadata'.

        Returns:
            tuple: A 2-tuple containing:
                - list: Copy of input results with HTML metadata removed from protonation data
                - dict: Mapping of SMILES strings to their corresponding HTML metadata
        """
        list_without_html = []
        smiles_to_html_dict = {}

        for item in results:
            new_item = copy.deepcopy(item)
            if "html_metadata" in new_item["protonation"]:
                del new_item["protonation"]["html_metadata"]
            list_without_html.append(new_item)

            smiles = item["smiles"]
            html_meta = item["protonation"].get("html_metadata")
            smiles_to_html_dict[smiles] = html_meta

        return list_without_html, smiles_to_html_dict

    def show_plots(self):
        for smi, html_meta in self.html_metadata.items():
            centered_html = f"<center><h2>{smi}</h2>"
            display(HTML(centered_html))
            self.plot_concentration_curves(html_meta, plot=True)

    def smiles_to_img_html(self, smiles):
        mol = Chem.MolFromSmiles(smiles)
        AllChem.Compute2DCoords(mol)
        mol.GetConformer().SetId(1)

        return Draw.MolToSVG(mol, width=200, height=150).replace("\n", "")

    def plot_concentration_curves(self, html_meta, plot=False):
        """
        Plot concentration curves for different molecular species across a pH range.

        Args:
            html_meta (tuple): A tuple containing:
            - fractions (list): Matrix of fraction values for each species
            - smiles_list (list): List of SMILES notations for each species
            - concentration_values (list): Concentration values for each species
            - pH_range (list): Range of pH values for x-axis
            - pH (float): Current pH value
            plot (bool, optional): If True, displays the plot immediately. Defaults to False.

        Returns:
            plotly.graph_objects.Figure: A plotly figure object containing the concentration curves.
            The plot shows fraction percentage (0-100%) on y-axis vs pH (0-14) on x-axis.
            Each curve is labeled with species SMILES and concentration at current pH.
        """
        fractions, smiles_list, concentration_values, pH_range, pH = html_meta
        fractions = np.transpose(np.array(fractions))

        fig = sp.make_subplots(rows=1, cols=1)
        for i, fraction in enumerate(fractions):
            fig.add_trace(
                go.Scatter(
                    x=pH_range,
                    y=fraction * 100,
                    mode="lines",
                    showlegend=True,
                    name=f"Fraction of {smiles_list[i]} at pH={pH:.1f} is {round(concentration_values[i], 2)}(%)",
                ),
                row=1,
                col=1,
            )
        fig.update_layout(
            xaxis=dict(title="pH", range=[0, 14]),
            yaxis=dict(title="Fraction (%)", range=[0, 100]),
            hovermode="closest",
        )
        if plot:
            fig.show()

    def _to_dataframe(self):
        """
        Converts the protonation results to a pandas DataFrame.

        This method processes the protonation results stored in self.results and creates a DataFrame
        with the following structure:
        - Multi-index with SMILES as the top level
        - Columns:
            - 'protonated SMILES': The SMILES strings of protonated species
            - 'Concentration %': Rounded concentration percentages (to 2 decimal places)
            - 'Molecule Image': HTML representation of molecular structures

        Returns:
            pd.DataFrame: A DataFrame containing the protonation results with molecular structure
            visualizations. The DataFrame has a multi-index structure where the top level
            is the original SMILES string.
        """
        data_dict = {}

        for result in self.results:
            smiles = result["smiles"]
            smiles_list = result["protonation"]["smiles_list"]
            concentration_list = result["protonation"]["concentration_list"]
            rounded_concentration = [round(value, 2) for value in concentration_list]
            data_dict[smiles] = pd.DataFrame(
                {"protonated SMILES": smiles_list, "Concentration %": rounded_concentration}
            )

        df = pd.concat(data_dict.values(), keys=data_dict.keys(), names=["SMILES"])
        df["Molecule Image"] = df["protonated SMILES"].apply(self.smiles_to_img_html)
        return df

    def _repr_html_(self):
        df = self._to_dataframe()
        return df.to_html(escape=False)

Methods

def plot_concentration_curves(self, html_meta, plot=False)
Expand source code
def plot_concentration_curves(self, html_meta, plot=False):
    """
    Plot concentration curves for different molecular species across a pH range.

    Args:
        html_meta (tuple): A tuple containing:
        - fractions (list): Matrix of fraction values for each species
        - smiles_list (list): List of SMILES notations for each species
        - concentration_values (list): Concentration values for each species
        - pH_range (list): Range of pH values for x-axis
        - pH (float): Current pH value
        plot (bool, optional): If True, displays the plot immediately. Defaults to False.

    Returns:
        plotly.graph_objects.Figure: A plotly figure object containing the concentration curves.
        The plot shows fraction percentage (0-100%) on y-axis vs pH (0-14) on x-axis.
        Each curve is labeled with species SMILES and concentration at current pH.
    """
    fractions, smiles_list, concentration_values, pH_range, pH = html_meta
    fractions = np.transpose(np.array(fractions))

    fig = sp.make_subplots(rows=1, cols=1)
    for i, fraction in enumerate(fractions):
        fig.add_trace(
            go.Scatter(
                x=pH_range,
                y=fraction * 100,
                mode="lines",
                showlegend=True,
                name=f"Fraction of {smiles_list[i]} at pH={pH:.1f} is {round(concentration_values[i], 2)}(%)",
            ),
            row=1,
            col=1,
        )
    fig.update_layout(
        xaxis=dict(title="pH", range=[0, 14]),
        yaxis=dict(title="Fraction (%)", range=[0, 100]),
        hovermode="closest",
    )
    if plot:
        fig.show()

Plot concentration curves for different molecular species across a pH range.

Args

html_meta : tuple
A tuple containing:
  • fractions (list): Matrix of fraction values for each species
  • smiles_list (list): List of SMILES notations for each species
  • concentration_values (list): Concentration values for each species
  • pH_range (list): Range of pH values for x-axis
  • pH (float): Current pH value
    plot : bool, optional
    If True, displays the plot immediately. Defaults to False.

Returns

plotly.graph_objects.Figure
A plotly figure object containing the concentration curves.

The plot shows fraction percentage (0-100%) on y-axis vs pH (0-14) on x-axis. Each curve is labeled with species SMILES and concentration at current pH.

def show_plots(self)
Expand source code
def show_plots(self):
    for smi, html_meta in self.html_metadata.items():
        centered_html = f"<center><h2>{smi}</h2>"
        display(HTML(centered_html))
        self.plot_concentration_curves(html_meta, plot=True)
def smiles_to_img_html(self, smiles)
Expand source code
def smiles_to_img_html(self, smiles):
    mol = Chem.MolFromSmiles(smiles)
    AllChem.Compute2DCoords(mol)
    mol.GetConformer().SetId(1)

    return Draw.MolToSVG(mol, width=200, height=150).replace("\n", "")
def split_results(self, results)
Expand source code
def split_results(self, results):
    """
    Splits results by separating HTML metadata from the main data.

    This method processes a list of result items, removing HTML metadata from the
    protonation data while preserving it in a separate dictionary mapped by SMILES.

    Args:
        results (list): List of dictionaries containing molecular data with 'smiles'
            and 'protonation' keys. The 'protonation' may contain 'html_metadata'.

    Returns:
        tuple: A 2-tuple containing:
            - list: Copy of input results with HTML metadata removed from protonation data
            - dict: Mapping of SMILES strings to their corresponding HTML metadata
    """
    list_without_html = []
    smiles_to_html_dict = {}

    for item in results:
        new_item = copy.deepcopy(item)
        if "html_metadata" in new_item["protonation"]:
            del new_item["protonation"]["html_metadata"]
        list_without_html.append(new_item)

        smiles = item["smiles"]
        html_meta = item["protonation"].get("html_metadata")
        smiles_to_html_dict[smiles] = html_meta

    return list_without_html, smiles_to_html_dict

Splits results by separating HTML metadata from the main data.

This method processes a list of result items, removing HTML metadata from the protonation data while preserving it in a separate dictionary mapped by SMILES.

Args

results : list
List of dictionaries containing molecular data with 'smiles' and 'protonation' keys. The 'protonation' may contain 'html_metadata'.

Returns

tuple
A 2-tuple containing: - list: Copy of input results with HTML metadata removed from protonation data - dict: Mapping of SMILES strings to their corresponding HTML metadata