Module deeporigin.src.structures.ligand

Classes

class Ligand (identifier: str = '',
file_path: str = '',
smiles: str = '',
block_type: str = '',
block_content: str = '',
name: str = '',
seed: int = None,
xref_protein='',
xref_ins_code: str = '',
xref_residue_id: str = '',
xref_protein_chain_id: str = '',
save_to_file: bool = False,
properties: dict = None)
Expand source code
class Ligand:
    def __init__(
        self,
        identifier: str = "",
        file_path: str = "",
        smiles: str = "",
        block_type: str = "",
        block_content: str = "",
        name: str = "",
        seed: int = None,
        xref_protein="",
        xref_ins_code: str = "",
        xref_residue_id: str = "",
        xref_protein_chain_id: str = "",
        save_to_file: bool = False,
        properties: dict = None,
    ):
        """
        Initialize a Ligand object.

        This constructor creates a Ligand object from various input sources and validates the molecular structure.

        Args:
            identifier (str, optional): Name or identifier of the molecule. Defaults to "".
            file_path (str, optional): Path to input file containing molecule data. Defaults to "".
            smiles (str, optional): SMILES string representation of molecule. Defaults to "".
            block_type (str, optional): Type of molecular block content (e.g. "mol", "sdf"). Defaults to "".
            block_content (str, optional): Content of molecular block. Defaults to "".
            name (str, optional): Name for the molecule. Defaults to "".
            seed (int, optional): Random seed for coordinate generation. Defaults to None.
            xref_protein (str, optional): Cross-reference to protein. Defaults to "".
            xref_ins_code (str, optional): Cross-reference insertion code. Defaults to "".
            xref_residue_id (str, optional): Cross-reference residue ID. Defaults to "".
            xref_protein_chain_id (str, optional): Cross-reference protein chain ID. Defaults to "".
            save_to_file (bool, optional): Whether to save molecule to file. Defaults to False.
            properties (dict, optional): Additional properties for the molecule. Defaults to None.

        Raises:
            ValueError: If not exactly one input source is provided (identifier, file_path, smiles, or block_content).
            ValueError: If block_type is not provided when initializing from block_content.
            ValueError: If molecule creation fails.

        Notes:
            - Only one input source (identifier, file_path, smiles, or block_content) should be provided
            - Automatically generates coordinates if needed
            - Performs validation checks including heavy atom count
            - Can optionally save the molecule to file
            - Stores various properties including cross-references to protein structure
        """
        self.file_path = file_path
        self.identifier = identifier
        self.protonated_smiles = None
        self.block_type = block_type.lower()
        self.block_content = block_content
        self.name = name
        self.mol = None
        self.properties = dict() if not properties else {k: v for k, v in properties.items()}
        self.hac = 0
        self.xref_protein = xref_protein
        self.xref_ins_code = xref_ins_code
        self.xref_residue_id = xref_residue_id
        self.xref_protein_chain_id = xref_protein_chain_id

        sources_provided = sum(bool(x) for x in [identifier, file_path, smiles, block_content])
        if sources_provided != 1:
            raise ValueError("Please provide exactly one of identifier, file_path, smiles, or block_content.")

        try:
            if block_content:
                if not self.block_type:
                    raise ValueError("block_type must be provided when initializing from block_content.")

                self.mol = mol_from_block(self.block_type, self.block_content)
                DEFAULT_LOGGER.log_info("Initialized Ligand from block content.")
            elif identifier:
                self.mol = Molecule.from_smiles_or_name(name=identifier, add_coords=True, seed=seed)
            elif file_path:
                self.mol = self._initialize_from_file(file_path)
            elif smiles:
                self.mol = mol_from_smiles(smiles)
                self.block_type = "mol"
                self.block_content = self.mol.molblock()
                DEFAULT_LOGGER.log_info("Initialized Ligand from SMILES string.")

            else:
                raise ValueError("No valid source provided for ligand initialization.")

            if self.mol is None:
                raise ValueError("Failed to create molecule.")

            self.name = self.mol.name if self.mol.name else self.name or "Unknown_Ligand"
            directory = Path(self.get_directory())
            if self.name == "Unknown_Ligand":
                num = len(list(directory.glob(f"{self.name}*")))
                self.name = f"{self.name}_{num + 1}"

            self.hac = self.mol.m.GetNumHeavyAtoms()
            if self.hac < 5:
                DEFAULT_LOGGER.log_warning("Ligand has less than 5 heavy atoms.")

            file_props = self.mol.m.GetPropsAsDict()
            for key, value in file_props.items():
                self.properties[key] = value

            self.available_for_docking = not self.mol.contains_boron
            if save_to_file:
                self.write_to_file(output_format="sdf")

        except Exception as e:
            raise

    @property
    def coordinates(self):
        """
        Returns a numpy array of ligand coordinates.

        Returns:
            np.ndarray: A numpy array of float32 containing the 3D coordinates of all atoms in the ligand.
        """
        return np.array(self.mol.coords(), dtype=np.float32)

    @property
    def atom_types(self):
        """
        Returns a list of unique atom types present in the ligand molecule.

        Returns:
            list: A list of strings representing unique atomic species (e.g. ['C', 'H', 'O', 'N'])
        """
        return self.mol.species()

    def _initialize_from_file(self, file_path: str) -> Molecule:
        """
        Initialize a Molecule object from a file.

        This method reads a molecular structure file and creates a corresponding Molecule object.

        Args:
            file_path (str): Path to the molecular structure file.

        Returns:
            Molecule: A Molecule object initialized from the file.

        Raises:
            FileNotFoundError: If the specified file does not exist.
            Exception: If there is an error during molecule initialization.

        Notes:
            - The file extension determines the block type.
            - Supported file formats are determined by the mol_from_file function.
            - The operation is logged using DEFAULT_LOGGER.
        """
        path = Path(file_path)
        if not path.exists():
            raise FileNotFoundError(f"The file {file_path} does not exist.")

        extension = path.suffix.lower().lstrip(".")
        self.block_type = extension
        self.file_path = path

        try:
            molecule = mol_from_file(extension, str(path))
            DEFAULT_LOGGER.log_info(f"Initialized Ligand from file {file_path}.")
            return molecule
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to initialize Ligand from file {file_path}: {str(e)}")
            raise

    def set_property(self, prop_name: str, prop_value):
        """
        Set a property for both the ligand properties dictionary and the underlying RDKit molecule.

        Args:
            prop_name (str): Name of the property to set.
            prop_value: Value to set for the property. Will be converted to string for RDKit molecule.

        Note:
            The property is set both in the properties dict and RDKit molecule.
            The property value is logged at INFO level.
        """
        self.properties[prop_name] = prop_value
        self.mol.m.SetProp(prop_name, str(prop_value))
        DEFAULT_LOGGER.log_info(f"Set property '{prop_name}' to '{prop_value}'.")

    def get_property(self, prop_name: str):
        """
        Retrieve a property value from the ligand object.

        This method attempts to get the property value first from the properties dictionary,
        and if not found there, tries to retrieve it from the molecule object.

        Args:
            prop_name (str): The name of the property to retrieve.

        Returns:
            Any: The value of the property if found, None otherwise.

        Notes:
            The method first checks the internal properties dictionary. If the property
            is not found there, it checks the molecule object using RDKit's HasProp/GetProp.
            If found in the molecule object, the value is also cached in the properties
            dictionary for future use.
        """
        value = self.properties.get(prop_name)
        if value is not None:
            DEFAULT_LOGGER.log_info(f"Retrieved property '{prop_name}' from properties dictionary: '{value}'.")
            return value

        if self.mol.m.HasProp(prop_name):
            value = self.mol.m.GetProp(prop_name)

            self.properties[prop_name] = value
            DEFAULT_LOGGER.log_info(f"Retrieved property '{prop_name}' from molecule: '{value}'.")
            return value

        DEFAULT_LOGGER.log_info(f"Property '{prop_name}' not found.")
        return None

    def write_to_file(self, output_path: str = "", output_format: str = ""):
        """
        Write the ligand structure to a file in the specified format.

        This method writes the molecular structure and its properties to a file in the specified format.
        Supported formats are PDB, MOL, and SDF. Properties are included in the output file according
        to the format-specific conventions.

        Args:
            output_path (str, optional): The path where the file should be written. If not provided,
                the file will be written in the ligand's directory with the name and format extension.
            output_format (str, optional): The desired output format ('.pdb', '.mol', or '.sdf').
                If not provided, it will be inferred from the output_path extension.

        Raises:
            ValueError: If neither output_path nor output_format is provided, or if an unsupported
                file extension is specified.
            Exception: If any error occurs during the file writing process.

        Note:
            - If the output format doesn't match the file extension, a warning will be logged and
              the specified output format will be used.
            - Properties are written in the following format:
                - PDB: As REMARK lines
                - MOL: As property blocks after the molecule
                - SDF: As SD fields
        """
        try:
            if output_format == "" and output_path == "":
                raise ValueError("Please provide either output_path or output_format.")

            if not output_path:
                output_path = str(Path(self.get_directory()) / f"{self.name}.{output_format}")

            path = Path(output_path)
            extension = path.suffix.lower()
            if not output_format:
                output_format = extension

            if output_format and output_format[0] != ".":
                output_format = f".{output_format}"

            if extension and extension != output_format:
                DEFAULT_LOGGER.log_warning(
                    "Output format does not match the file extension. Writing to provided output format."
                )
                extension = output_format

            if self.name:
                self.set_property("_Name", self.name)
            if self.mol.smiles:
                self.set_property("_SMILES", self.mol.smiles)
            if self.properties:
                for prop_name, prop_value in self.properties.items():
                    self.set_property(prop_name, str(prop_value))

            if extension == ".pdb":
                pdb_block = Chem.MolToPDBBlock(self.mol.m)
                remark_lines = ""
                for prop_name, prop_value in self.mol.m.GetPropsAsDict().items():
                    remark_lines += f"REMARK   {prop_name}: {prop_value}\n"
                pdb_block_with_remarks = remark_lines + pdb_block
                path.write_text(pdb_block_with_remarks)
            elif extension == ".sdf":
                writer = Chem.SDWriter(str(path))
                writer.SetKekulize(False)
                writer.write(self.mol.m)
                writer.close()
            elif extension == ".mol":
                mol_block = Chem.MolToMolBlock(self.mol.m)
                prop_lines = ""
                for prop_name, prop_value in self.mol.m.GetPropsAsDict().items():
                    prop_lines += f">  <{prop_name}>\n{prop_value}\n\n"
                mol_block_with_props = mol_block + "\n" + prop_lines
                path.write_text(mol_block_with_props)
            else:
                raise ValueError(
                    f"Unsupported file extension '{extension}'. Supported extensions are '.pdb', '.mol', '.sdf'."
                )

            DEFAULT_LOGGER.log_info(f"Ligand structure written to {output_path}.")
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to write structure to file {output_path}: {str(e)}")
            raise

    def get_center(self) -> Optional[List[float]]:
        """
        Calculate the center coordinates of the ligand.

        Returns:
            Optional[List[float]]: The center coordinates as a list of floats [x, y, z]
                                  or None if coordinates are not available.

        Example:
            >>> ligand.get_center()
            [1.234, -2.345, 3.456]
        """
        if self.coordinates is None:
            DEFAULT_LOGGER.log_warning("Coordinates are not available for this ligand.")
            return None
        center = self.coordinates.mean(axis=0)
        DEFAULT_LOGGER.log_info(f"Calculated center coordinates: {center.tolist()}")
        return [float(x) for x in center.tolist()]

    def draw(self):
        """
        Draws a visual representation of the ligand molecule.

        Returns:
            Image: The 2D structural representation of the ligand molecule.
        """
        return self.mol.draw()

    @jupyter_visualization
    def visualize(self) -> str:
        try:
            temp_file = Path(tempfile.gettempdir()) / f"{self.name}_visualize.sdf"
            self.write_to_file(str(temp_file))

            viewer = MoleculeViewer(str(temp_file), format="sdf")
            ligand_config = viewer.get_ligand_visualization_config()
            html = viewer.render_ligand(ligand_config=ligand_config)

            return html
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Visualization failed: {str(e)}")
            raise

    @classmethod
    def create_ligands_from_sdf(cls, file_path: str) -> List["Ligand"]:
        """
        Creates a list of Ligand objects from an SDF file.

        This class method reads molecules from an SDF file and converts each valid molecule
        into a Ligand instance. It handles potential parsing errors and logs relevant information.

        Args:
            file_path (str): Path to the SDF file containing molecular structures.

        Returns:
            List[Ligand]: A list of Ligand objects created from the SDF file.
                Returns an empty list if no valid molecules are found or in case of errors.

        Raises:
            FileNotFoundError: If the specified file path does not exist.

        Example:
            >>> ligands = Ligand.create_ligands_from_sdf("molecules.sdf")
            >>> print(len(ligands))  # Number of successfully parsed molecules

        Notes:
            - Molecules that fail to parse will be skipped and logged as warnings
            - Properties from the SDF file are preserved and stored in the Ligand objects
            - Progress and errors are tracked through the DEFAULT_LOGGER
        """
        path = Path(file_path)
        if not path.exists():
            raise FileNotFoundError(f"The file '{file_path}' does not exist.")

        ligands = []
        try:
            suppl = Chem.SDMolSupplier(str(path))
            for idx, mol in enumerate(suppl, start=1):
                try:
                    if mol is None:
                        DEFAULT_LOGGER.log_warning(f"Skipping molecule at index {idx} due to parsing error.")
                        continue
                    mol_block = Chem.MolToMolBlock(mol)
                    ligand = Ligand(block_type="sdf", block_content=mol_block, properties=mol.GetPropsAsDict())
                    ligands.append(ligand)
                except Exception as e:
                    DEFAULT_LOGGER.log_error(f"Failed to create Ligand from SDF file molecule_idx = '{idx}': {str(e)}")
            DEFAULT_LOGGER.log_info(f"Created {len(ligands)} Ligand instances from SDF file '{file_path}'.")
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to create Ligands from SDF file '{file_path}': {str(e)}")

        return ligands

    @classmethod
    def create_ligands_from_csv(cls, file_path: str) -> List["Ligand"]:
        """
        Creates Ligand instances from a CSV file containing SMILES strings and optional additional properties.

        This class method reads a CSV file and creates Ligand objects from each row. The CSV file must contain
        a 'smiles' column (case-insensitive). Additional columns are treated as properties of the ligand.

        Args:
            file_path (str): Path to the CSV file containing ligand data.

        Returns:
            List[Ligand]: A list of created Ligand instances.

        Raises:
            FileNotFoundError: If the specified file does not exist.
            ValueError: If the CSV file does not contain a 'smiles' column.
            pd.errors.EmptyDataError: If the CSV file is empty.
            pd.errors.ParserError: If there are issues parsing the CSV file.

        Notes:
            - Rows with missing or invalid SMILES strings are skipped with a warning.
            - All column names are normalized (stripped and converted to lowercase) for comparison.
            - Non-SMILES columns are added as properties to the Ligand instances.
            - Any errors during processing of individual rows are logged but don't stop the overall process.

        Example CSV format:
            smiles,name,molecular_weight
            CC(=O)O,acetic acid,60.052
            CCO,ethanol,46.068
        """
        path = Path(file_path)
        if not path.exists():
            raise FileNotFoundError(f"The file '{file_path}' does not exist.")

        ligands = []
        try:
            df = pd.read_csv(file_path)
            normalized_columns = [col.strip().lower() for col in df.columns]

            if "smiles" not in normalized_columns:
                raise ValueError("CSV file must contain a 'smiles' column.")

            smiles_col_index = normalized_columns.index("smiles")
            smiles_col = df.columns[smiles_col_index]
            other_columns = [col for col in df.columns if col != smiles_col]

            for idx, row in df.iterrows():
                try:
                    smiles = row[smiles_col]
                    if pd.isna(smiles):
                        DEFAULT_LOGGER.log_warning(f"Skipping row {idx + 1}: SMILES value is missing.")
                        continue
                    mol = Chem.MolFromSmiles(smiles)
                    if mol is None:
                        DEFAULT_LOGGER.log_warning(f"Skipping row {idx + 1}: Invalid SMILES '{smiles}'.")
                        continue
                    ligand = Ligand(smiles=smiles)
                    for col in other_columns:
                        value = row[col]
                        if pd.notna(value):
                            ligand.set_property(col, value)
                    ligands.append(ligand)
                except Exception as e:
                    DEFAULT_LOGGER.log_error(f"Failed to create Ligand from CSV file row {idx + 1}: {str(e)}")

            DEFAULT_LOGGER.log_info(f"Created {len(ligands)} Ligand instances from CSV file '{file_path}'.")

        except pd.errors.EmptyDataError:
            DEFAULT_LOGGER.log_error(f"The CSV file '{file_path}' is empty.")
        except pd.errors.ParserError as e:
            DEFAULT_LOGGER.log_error(f"Error parsing CSV file '{file_path}': {str(e)}")
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to create Ligands from CSV file '{file_path}': {str(e)}")

        return ligands

    @classmethod
    def create_ligands_from_file(cls, file_path: str, file_type: str) -> List["Ligand"]:
        """
        Creates a list of Ligand objects from a file.

        Args:
            file_path (str): Path to the input file containing ligand data.
            file_type (str): Type of the input file. Supported types are 'sdf' and 'csv'.

        Returns:
            List[Ligand]: A list of Ligand objects created from the file data.

        Raises:
            ValueError: If the file_type is not supported ('sdf' or 'csv').

        Examples:
            >>> ligands = Ligand.create_ligands_from_file("compounds.sdf", "sdf")
            >>> ligands = Ligand.create_ligands_from_file("compounds.csv", "csv")
        """
        supported_types = ["sdf", "csv"]
        file_type = file_type.lower()

        if file_type not in supported_types:
            raise ValueError(f"Unsupported file format '{file_type}'. Only 'sdf' and 'csv' are supported.")

        if file_type == "sdf":
            return cls.create_ligands_from_sdf(file_path)
        elif file_type == "csv":
            return cls.create_ligands_from_csv(file_path)
        else:
            raise ValueError(f"Unsupported file format '{file_type}'. Only 'sdf' and 'csv' are supported.")

    @classmethod
    def convert_to_sdf(cls, block_content: str, block_type: str):
        """
        Converts molecular block content to SDF format.

        This class method takes a molecular block content and its type, attempts to convert
        it to an RDKit molecule object, and returns the molecule in SDF molblock format.

        Args:
            block_content (str): The string content of the molecular block to convert
            block_type (str): The type of molecular block (e.g. 'MOL', 'SDF', etc.)

        Returns:
            str: The converted molecule in SDF molblock format if successful
            None: If conversion fails

        Raises:
            Exception: Handles any exceptions during conversion and returns None after logging error

        Examples:
            >>> sdf_block = LigandStructure.convert_to_sdf("molecular block content", "MOL")
            >>> if sdf_block:
            ...     # Process the SDF block
            ... else:
            ...     # Handle conversion failure
        """
        try:
            molecule = mol_from_block(block_type, block_content, sanitize=True, remove_hs=False)
            writer = Chem.SDWriter(str(tempfile.mktemp(suffix=".sdf")))
            writer.write(molecule.m)
            writer.close()

            return molecule.molblock()
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to convert ligand block content to SDF: {str(e)}")
            return None

    @classmethod
    def fetch_smiles_from_pdb_api(cls, res_name: str) -> str:
        """
        Retrieves the SMILES string representation of a ligand from the PDB API.

        This class method queries the RCSB PDB REST API to fetch the stereochemical SMILES
        notation for a given ligand residue name. If the API request fails or the SMILES
        data is not found, appropriate errors are logged.

        Args:
            res_name (str): The residue name/identifier of the ligand to query.

        Returns:
            str: The stereochemical SMILES string of the ligand if found.
            None: If the API request fails or SMILES data is not available.

        Raises:
            ValueError: If the API request fails or SMILES data is not found for the given ligand.

        Example:
            >>> smiles = Ligand.fetch_smiles_from_pdb_api("ATP")
            >>> print(smiles)
            'NC1=C2N=CN(C(O)C3OC(COP(O)(=O)OP(O)(=O)OP(O)(O)=O)C(O)C3O)C2=NC=N1'
        """
        try:
            query_url = f"https://data.rcsb.org/rest/v1/core/chemcomp/{res_name.upper()}"
            response = requests.get(query_url)
            if response.status_code != 200:
                raise ValueError(f"Failed to retrieve data for ligand '{res_name}' from PDB API.")
            data = response.json()
            smiles = data.get("rcsb_chem_comp_descriptor", {}).get("smilesstereo")
            if not smiles:
                raise ValueError(f"SMILES not found for ligand '{res_name}'.")
            return smiles
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to fetch SMILES from PDB API: {str(e)}")

        return None

    @classmethod
    @jupyter_visualization
    def visualize_ligands_from_sdf(cls, file_path: str):
        """
        Visualize ligands from an SDF file.

        Args:
            file_path (str): The path to the SDF file.

        Raises:
            FileNotFoundError: If the file does not exist.
            ValueError: If the file cannot be parsed correctly.
        """
        try:
            viewer = MoleculeViewer(str(file_path), format="sdf")
            ligand_config = viewer.get_ligand_visualization_config()
            html = viewer.render_ligand(ligand_config=ligand_config)

            return html
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Visualization failed: {str(e)}")
            raise

    @classmethod
    @jupyter_visualization
    def visualize_ligands(cls, ligands: List["Ligand"]):
        """
        Visualize ligands.

        Args:
            ligands: List["Ligand"]: The list of ligands objects to visualize.

        Raises:
            FileNotFoundError: If the file does not exist.
            ValueError: If the file cannot be parsed correctly.
        """
        try:
            sdf_data = []
            current_file = f"{tempfile.mkstemp()[1]}.sdf"
            for ligand in ligands:
                ligand.write_to_file(output_format="sdf", output_path=current_file)

                with open(current_file, "r") as fd:
                    data = fd.read()

                sdf_data.append(data)

            sdf_data = "".join(sdf_data)
            viewer = MoleculeViewer(data=sdf_data, format="sdf")
            ligand_config = viewer.get_ligand_visualization_config()
            html = viewer.render_ligand(ligand_config=ligand_config)

            return html
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Visualization failed: {str(e)}")
            raise

    def _repr_html_(self) -> str:
        """
        Return the HTML representation of the object for Jupyter Notebook.

        Returns:
            str: The HTML content.
        """
        try:
            print(self.mol.m)
            return self.visualize()
        except Exception as e:
            DEFAULT_LOGGER.log_warning(f"Failed to generate HTML representation: {str(e)}")
            return self.__str__()

    def __str__(self) -> str:
        info_str = f"Name: {self.name}\nSMILES: {self.mol.smiles}\nHeavy Atoms: {self.hac}\n"
        if self.properties:
            info_str += "Properties:\n"
            for prop_name, prop_value in self.properties.items():
                info_str += f"  {prop_name}: {prop_value}\n"

        if self.xref_protein:
            info_str += f"Cross-reference Protein Chain ID: {self.xref_protein_chain_id}\n"
            info_str += f"Cross-reference Residue ID: {self.xref_residue_id}\n"
            info_str += f"Cross-reference Insertion Code: {self.xref_ins_code}\n"

        return f"Ligand:\n  {info_str}"

    def __repr__(self) -> str:
        return self.__str__()

    @staticmethod
    def get_directory() -> str:
        """
        Generates and ensures the existence of a directory for a protein.

        Args:

        Returns:
            str: The path to the protein's directory.
        """
        ligands_base_dir = Path(WORKING_DIR) / "ligands"
        ligands_base_dir.mkdir(parents=True, exist_ok=True)

        return str(ligands_base_dir)

    def admet_properties(self) -> str:
        """
        Predict ADMET properties for the ligand.

        Returns:
            str: A string containing the predicted ADMET properties.
        """
        try:
            props = predict_properties(smiles=self.mol.smiles)[0]
            for key, value in props.items():
                if key == "smiles":
                    continue
                self.set_property(key, value)

            return props
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to predict ADMET properties: {str(e)}")
            return "Failed to predict ADMET properties."

    def protonate(self, pH: float = 7.4, filter_percentage: float = 1):
        """
        Protonates the ligand molecule at a given pH value.

        This method attempts to generate a protonated version of the molecule using a pH-dependent
        protonation algorithm. If successful, it stores the protonated SMILES string and sets it
        as a property of the molecule.

        Args:
            pH (float, optional): The pH value at which to protonate the molecule. Defaults to 7.4.
            filter_percentage (float, optional): The filtering threshold for protonation states.
                Value between 0 and 1. Defaults to 1.

        Returns:
            self: Returns the ligand instance, allowing for method chaining.

        Raises:
            Exception: If protonation fails, the error is logged and the original instance is returned.

        Example:
            >>> ligand.protonate(pH=7.0, filter_percentage=0.8)
        """
        try:
            smiles = protonate(
                pH=pH,
                smiles=self.mol.smiles,
                filter_percentage=filter_percentage,
            )
            if smiles:
                self.protonated_smiles = smiles
                self.set_property("ProtonatedSMILES", smiles)
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to protonate the ligand molecule: {str(e)}")
            return self

    def update_coordinates(self, coords: np.ndarray):
        """
        Updates the 3D coordinates of the ligand molecule's conformer.

        Args:
            coords (np.ndarray): Array of new 3D coordinates for the ligand atoms.
                Must match the number of atoms in either the full molecule or molecule without hydrogens.

        Raises:
            ValueError: If the ligand molecule has no conformers to update.
            ValueError: If the number of coordinates doesn't match the number of atoms in the molecule
                (either with or without hydrogens).

        Notes:
            - The coordinates are updated in-place on the existing conformer
            - The input coordinates are converted to float64 type
            - A success message is logged after updating
        """
        if self.mol.m.GetNumConformers() == 0:
            raise ValueError("Ligand molecule has no conformers to update.")

        conformer = self.mol.m.GetConformer()
        mol_without_hs = Chem.RemoveHs(self.mol.m)

        conformer_no_hs = mol_without_hs.GetConformer()
        if coords.shape[0] != conformer.GetNumAtoms():
            if coords.shape[0] != conformer_no_hs.GetNumAtoms():
                raise ValueError("Number of ligand atoms does not match the conformer's atom count.")

        conformer.SetPositions(coords.astype(np.float64))
        DEFAULT_LOGGER.log_info("Ligand coordinates has been inplaced updated.")

    @classmethod
    def protonate_molecules(cls, ligands):
        """
        Protonates a list of ligands by adding hydrogens at physiological pH.
        This class method processes a list of ligands, either as SMILES strings or Ligand objects,
        and returns a list of protonated Ligand objects. It handles the protonation of each ligand
        while managing potential errors during SMILES parsing or protonation.
        Args:
            ligands (List[Union[str, Ligand]]): A list containing either SMILES strings or Ligand objects
                to be protonated.
        Returns:
            List[Ligand]: A list of successfully protonated Ligand objects. Failed ligands are excluded
                from the output list.
        Raises:
            None: Exceptions during processing individual ligands are caught and logged.
        Example:
            >>> smiles_list = ['CC(=O)O', 'CN1C=NC=C1']
            >>> protonated_ligands = Ligand.protonate_molecules(smiles_list)
        """
        mols = []

        for i in tqdm(range(0, len(ligands)), desc="Protonating Molecules"):
            ligand = ligands[i]
            if isinstance(ligand, str):
                try:
                    ligand = Ligand(smiles=ligand)
                except Exception as e:
                    DEFAULT_LOGGER.log_error(f"Failed to create Ligand from SMILES: {str(e)}")
                    continue
            try:
                if not ligand.protonated_smiles:
                    ligand.protonate()
            except Exception as e:
                DEFAULT_LOGGER.log_error(f"Failed to protonate the ligand molecule: {str(e)}")
                continue

            mols.append(ligand)
        return mols

Initialize a Ligand object.

This constructor creates a Ligand object from various input sources and validates the molecular structure.

Args

identifier : str, optional
Name or identifier of the molecule. Defaults to "".
file_path : str, optional
Path to input file containing molecule data. Defaults to "".
smiles : str, optional
SMILES string representation of molecule. Defaults to "".
block_type : str, optional
Type of molecular block content (e.g. "mol", "sdf"). Defaults to "".
block_content : str, optional
Content of molecular block. Defaults to "".
name : str, optional
Name for the molecule. Defaults to "".
seed : int, optional
Random seed for coordinate generation. Defaults to None.
xref_protein : str, optional
Cross-reference to protein. Defaults to "".
xref_ins_code : str, optional
Cross-reference insertion code. Defaults to "".
xref_residue_id : str, optional
Cross-reference residue ID. Defaults to "".
xref_protein_chain_id : str, optional
Cross-reference protein chain ID. Defaults to "".
save_to_file : bool, optional
Whether to save molecule to file. Defaults to False.
properties : dict, optional
Additional properties for the molecule. Defaults to None.

Raises

ValueError
If not exactly one input source is provided (identifier, file_path, smiles, or block_content).
ValueError
If block_type is not provided when initializing from block_content.
ValueError
If molecule creation fails.

Notes

  • Only one input source (identifier, file_path, smiles, or block_content) should be provided
  • Automatically generates coordinates if needed
  • Performs validation checks including heavy atom count
  • Can optionally save the molecule to file
  • Stores various properties including cross-references to protein structure

Static methods

def convert_to_sdf(block_content: str, block_type: str)

Converts molecular block content to SDF format.

This class method takes a molecular block content and its type, attempts to convert it to an RDKit molecule object, and returns the molecule in SDF molblock format.

Args

block_content : str
The string content of the molecular block to convert
block_type : str
The type of molecular block (e.g. 'MOL', 'SDF', etc.)

Returns

str
The converted molecule in SDF molblock format if successful
None
If conversion fails

Raises

Exception
Handles any exceptions during conversion and returns None after logging error

Examples

>>> sdf_block = LigandStructure.convert_to_sdf("molecular block content", "MOL")
>>> if sdf_block:
...     # Process the SDF block
... else:
...     # Handle conversion failure
def create_ligands_from_csv(file_path: str) ‑> List[Ligand]

Creates Ligand instances from a CSV file containing SMILES strings and optional additional properties.

This class method reads a CSV file and creates Ligand objects from each row. The CSV file must contain a 'smiles' column (case-insensitive). Additional columns are treated as properties of the ligand.

Args

file_path : str
Path to the CSV file containing ligand data.

Returns

List[Ligand]
A list of created Ligand instances.

Raises

FileNotFoundError
If the specified file does not exist.
ValueError
If the CSV file does not contain a 'smiles' column.
pd.errors.EmptyDataError
If the CSV file is empty.
pd.errors.ParserError
If there are issues parsing the CSV file.

Notes

  • Rows with missing or invalid SMILES strings are skipped with a warning.
  • All column names are normalized (stripped and converted to lowercase) for comparison.
  • Non-SMILES columns are added as properties to the Ligand instances.
  • Any errors during processing of individual rows are logged but don't stop the overall process.

Example CSV format: smiles,name,molecular_weight CC(=O)O,acetic acid,60.052 CCO,ethanol,46.068

def create_ligands_from_file(file_path: str, file_type: str) ‑> List[Ligand]

Creates a list of Ligand objects from a file.

Args

file_path : str
Path to the input file containing ligand data.
file_type : str
Type of the input file. Supported types are 'sdf' and 'csv'.

Returns

List[Ligand]
A list of Ligand objects created from the file data.

Raises

ValueError
If the file_type is not supported ('sdf' or 'csv').

Examples

>>> ligands = Ligand.create_ligands_from_file("compounds.sdf", "sdf")
>>> ligands = Ligand.create_ligands_from_file("compounds.csv", "csv")
def create_ligands_from_sdf(file_path: str) ‑> List[Ligand]

Creates a list of Ligand objects from an SDF file.

This class method reads molecules from an SDF file and converts each valid molecule into a Ligand instance. It handles potential parsing errors and logs relevant information.

Args

file_path : str
Path to the SDF file containing molecular structures.

Returns

List[Ligand]
A list of Ligand objects created from the SDF file. Returns an empty list if no valid molecules are found or in case of errors.

Raises

FileNotFoundError
If the specified file path does not exist.

Example

>>> ligands = Ligand.create_ligands_from_sdf("molecules.sdf")
>>> print(len(ligands))  # Number of successfully parsed molecules

Notes

  • Molecules that fail to parse will be skipped and logged as warnings
  • Properties from the SDF file are preserved and stored in the Ligand objects
  • Progress and errors are tracked through the DEFAULT_LOGGER
def fetch_smiles_from_pdb_api(res_name: str) ‑> str

Retrieves the SMILES string representation of a ligand from the PDB API.

This class method queries the RCSB PDB REST API to fetch the stereochemical SMILES notation for a given ligand residue name. If the API request fails or the SMILES data is not found, appropriate errors are logged.

Args

res_name : str
The residue name/identifier of the ligand to query.

Returns

str
The stereochemical SMILES string of the ligand if found.
None
If the API request fails or SMILES data is not available.

Raises

ValueError
If the API request fails or SMILES data is not found for the given ligand.

Example

>>> smiles = Ligand.fetch_smiles_from_pdb_api("ATP")
>>> print(smiles)
'NC1=C2N=CN(C(O)C3OC(COP(O)(=O)OP(O)(=O)OP(O)(O)=O)C(O)C3O)C2=NC=N1'
def get_directory() ‑> str
Expand source code
@staticmethod
def get_directory() -> str:
    """
    Generates and ensures the existence of a directory for a protein.

    Args:

    Returns:
        str: The path to the protein's directory.
    """
    ligands_base_dir = Path(WORKING_DIR) / "ligands"
    ligands_base_dir.mkdir(parents=True, exist_ok=True)

    return str(ligands_base_dir)

Generates and ensures the existence of a directory for a protein.

Args:

Returns

str
The path to the protein's directory.
def protonate_molecules(ligands)

Protonates a list of ligands by adding hydrogens at physiological pH. This class method processes a list of ligands, either as SMILES strings or Ligand objects, and returns a list of protonated Ligand objects. It handles the protonation of each ligand while managing potential errors during SMILES parsing or protonation.

Args

ligands : List[Union[str, Ligand]]
A list containing either SMILES strings or Ligand objects to be protonated.

Returns

List[Ligand]
A list of successfully protonated Ligand objects. Failed ligands are excluded from the output list.

Raises

None
Exceptions during processing individual ligands are caught and logged.

Example

>>> smiles_list = ['CC(=O)O', 'CN1C=NC=C1']
>>> protonated_ligands = Ligand.protonate_molecules(smiles_list)
def visualize_ligands(*args, **kwargs)
def visualize_ligands_from_sdf(*args, **kwargs)

Instance variables

prop atom_types
Expand source code
@property
def atom_types(self):
    """
    Returns a list of unique atom types present in the ligand molecule.

    Returns:
        list: A list of strings representing unique atomic species (e.g. ['C', 'H', 'O', 'N'])
    """
    return self.mol.species()

Returns a list of unique atom types present in the ligand molecule.

Returns

list
A list of strings representing unique atomic species (e.g. ['C', 'H', 'O', 'N'])
prop coordinates
Expand source code
@property
def coordinates(self):
    """
    Returns a numpy array of ligand coordinates.

    Returns:
        np.ndarray: A numpy array of float32 containing the 3D coordinates of all atoms in the ligand.
    """
    return np.array(self.mol.coords(), dtype=np.float32)

Returns a numpy array of ligand coordinates.

Returns

np.ndarray
A numpy array of float32 containing the 3D coordinates of all atoms in the ligand.

Methods

def admet_properties(self) ‑> str
Expand source code
def admet_properties(self) -> str:
    """
    Predict ADMET properties for the ligand.

    Returns:
        str: A string containing the predicted ADMET properties.
    """
    try:
        props = predict_properties(smiles=self.mol.smiles)[0]
        for key, value in props.items():
            if key == "smiles":
                continue
            self.set_property(key, value)

        return props
    except Exception as e:
        DEFAULT_LOGGER.log_error(f"Failed to predict ADMET properties: {str(e)}")
        return "Failed to predict ADMET properties."

Predict ADMET properties for the ligand.

Returns

str
A string containing the predicted ADMET properties.
def draw(self)
Expand source code
def draw(self):
    """
    Draws a visual representation of the ligand molecule.

    Returns:
        Image: The 2D structural representation of the ligand molecule.
    """
    return self.mol.draw()

Draws a visual representation of the ligand molecule.

Returns

Image
The 2D structural representation of the ligand molecule.
def get_center(self) ‑> List[float] | None
Expand source code
def get_center(self) -> Optional[List[float]]:
    """
    Calculate the center coordinates of the ligand.

    Returns:
        Optional[List[float]]: The center coordinates as a list of floats [x, y, z]
                              or None if coordinates are not available.

    Example:
        >>> ligand.get_center()
        [1.234, -2.345, 3.456]
    """
    if self.coordinates is None:
        DEFAULT_LOGGER.log_warning("Coordinates are not available for this ligand.")
        return None
    center = self.coordinates.mean(axis=0)
    DEFAULT_LOGGER.log_info(f"Calculated center coordinates: {center.tolist()}")
    return [float(x) for x in center.tolist()]

Calculate the center coordinates of the ligand.

Returns

Optional[List[float]]
The center coordinates as a list of floats [x, y, z] or None if coordinates are not available.

Example

>>> ligand.get_center()
[1.234, -2.345, 3.456]
def get_property(self, prop_name: str)
Expand source code
def get_property(self, prop_name: str):
    """
    Retrieve a property value from the ligand object.

    This method attempts to get the property value first from the properties dictionary,
    and if not found there, tries to retrieve it from the molecule object.

    Args:
        prop_name (str): The name of the property to retrieve.

    Returns:
        Any: The value of the property if found, None otherwise.

    Notes:
        The method first checks the internal properties dictionary. If the property
        is not found there, it checks the molecule object using RDKit's HasProp/GetProp.
        If found in the molecule object, the value is also cached in the properties
        dictionary for future use.
    """
    value = self.properties.get(prop_name)
    if value is not None:
        DEFAULT_LOGGER.log_info(f"Retrieved property '{prop_name}' from properties dictionary: '{value}'.")
        return value

    if self.mol.m.HasProp(prop_name):
        value = self.mol.m.GetProp(prop_name)

        self.properties[prop_name] = value
        DEFAULT_LOGGER.log_info(f"Retrieved property '{prop_name}' from molecule: '{value}'.")
        return value

    DEFAULT_LOGGER.log_info(f"Property '{prop_name}' not found.")
    return None

Retrieve a property value from the ligand object.

This method attempts to get the property value first from the properties dictionary, and if not found there, tries to retrieve it from the molecule object.

Args

prop_name : str
The name of the property to retrieve.

Returns

Any
The value of the property if found, None otherwise.

Notes

The method first checks the internal properties dictionary. If the property is not found there, it checks the molecule object using RDKit's HasProp/GetProp. If found in the molecule object, the value is also cached in the properties dictionary for future use.

def protonate(self, pH: float = 7.4, filter_percentage: float = 1)
Expand source code
def protonate(self, pH: float = 7.4, filter_percentage: float = 1):
    """
    Protonates the ligand molecule at a given pH value.

    This method attempts to generate a protonated version of the molecule using a pH-dependent
    protonation algorithm. If successful, it stores the protonated SMILES string and sets it
    as a property of the molecule.

    Args:
        pH (float, optional): The pH value at which to protonate the molecule. Defaults to 7.4.
        filter_percentage (float, optional): The filtering threshold for protonation states.
            Value between 0 and 1. Defaults to 1.

    Returns:
        self: Returns the ligand instance, allowing for method chaining.

    Raises:
        Exception: If protonation fails, the error is logged and the original instance is returned.

    Example:
        >>> ligand.protonate(pH=7.0, filter_percentage=0.8)
    """
    try:
        smiles = protonate(
            pH=pH,
            smiles=self.mol.smiles,
            filter_percentage=filter_percentage,
        )
        if smiles:
            self.protonated_smiles = smiles
            self.set_property("ProtonatedSMILES", smiles)
    except Exception as e:
        DEFAULT_LOGGER.log_error(f"Failed to protonate the ligand molecule: {str(e)}")
        return self

Protonates the ligand molecule at a given pH value.

This method attempts to generate a protonated version of the molecule using a pH-dependent protonation algorithm. If successful, it stores the protonated SMILES string and sets it as a property of the molecule.

Args

pH : float, optional
The pH value at which to protonate the molecule. Defaults to 7.4.
filter_percentage : float, optional
The filtering threshold for protonation states. Value between 0 and 1. Defaults to 1.

Returns

self
Returns the ligand instance, allowing for method chaining.

Raises

Exception
If protonation fails, the error is logged and the original instance is returned.

Example

>>> ligand.protonate(pH=7.0, filter_percentage=0.8)
def set_property(self, prop_name: str, prop_value)
Expand source code
def set_property(self, prop_name: str, prop_value):
    """
    Set a property for both the ligand properties dictionary and the underlying RDKit molecule.

    Args:
        prop_name (str): Name of the property to set.
        prop_value: Value to set for the property. Will be converted to string for RDKit molecule.

    Note:
        The property is set both in the properties dict and RDKit molecule.
        The property value is logged at INFO level.
    """
    self.properties[prop_name] = prop_value
    self.mol.m.SetProp(prop_name, str(prop_value))
    DEFAULT_LOGGER.log_info(f"Set property '{prop_name}' to '{prop_value}'.")

Set a property for both the ligand properties dictionary and the underlying RDKit molecule.

Args

prop_name : str
Name of the property to set.
prop_value
Value to set for the property. Will be converted to string for RDKit molecule.

Note

The property is set both in the properties dict and RDKit molecule. The property value is logged at INFO level.

def update_coordinates(self, coords: numpy.ndarray)
Expand source code
def update_coordinates(self, coords: np.ndarray):
    """
    Updates the 3D coordinates of the ligand molecule's conformer.

    Args:
        coords (np.ndarray): Array of new 3D coordinates for the ligand atoms.
            Must match the number of atoms in either the full molecule or molecule without hydrogens.

    Raises:
        ValueError: If the ligand molecule has no conformers to update.
        ValueError: If the number of coordinates doesn't match the number of atoms in the molecule
            (either with or without hydrogens).

    Notes:
        - The coordinates are updated in-place on the existing conformer
        - The input coordinates are converted to float64 type
        - A success message is logged after updating
    """
    if self.mol.m.GetNumConformers() == 0:
        raise ValueError("Ligand molecule has no conformers to update.")

    conformer = self.mol.m.GetConformer()
    mol_without_hs = Chem.RemoveHs(self.mol.m)

    conformer_no_hs = mol_without_hs.GetConformer()
    if coords.shape[0] != conformer.GetNumAtoms():
        if coords.shape[0] != conformer_no_hs.GetNumAtoms():
            raise ValueError("Number of ligand atoms does not match the conformer's atom count.")

    conformer.SetPositions(coords.astype(np.float64))
    DEFAULT_LOGGER.log_info("Ligand coordinates has been inplaced updated.")

Updates the 3D coordinates of the ligand molecule's conformer.

Args

coords : np.ndarray
Array of new 3D coordinates for the ligand atoms. Must match the number of atoms in either the full molecule or molecule without hydrogens.

Raises

ValueError
If the ligand molecule has no conformers to update.
ValueError
If the number of coordinates doesn't match the number of atoms in the molecule (either with or without hydrogens).

Notes

  • The coordinates are updated in-place on the existing conformer
  • The input coordinates are converted to float64 type
  • A success message is logged after updating
def visualize(*args, **kwargs)
Expand source code
def wrapper(*args, **kwargs):
    html_visualization = func(*args, **kwargs)
    return JupyterViewer.visualize(html_visualization)
def write_to_file(self, output_path: str = '', output_format: str = '')
Expand source code
def write_to_file(self, output_path: str = "", output_format: str = ""):
    """
    Write the ligand structure to a file in the specified format.

    This method writes the molecular structure and its properties to a file in the specified format.
    Supported formats are PDB, MOL, and SDF. Properties are included in the output file according
    to the format-specific conventions.

    Args:
        output_path (str, optional): The path where the file should be written. If not provided,
            the file will be written in the ligand's directory with the name and format extension.
        output_format (str, optional): The desired output format ('.pdb', '.mol', or '.sdf').
            If not provided, it will be inferred from the output_path extension.

    Raises:
        ValueError: If neither output_path nor output_format is provided, or if an unsupported
            file extension is specified.
        Exception: If any error occurs during the file writing process.

    Note:
        - If the output format doesn't match the file extension, a warning will be logged and
          the specified output format will be used.
        - Properties are written in the following format:
            - PDB: As REMARK lines
            - MOL: As property blocks after the molecule
            - SDF: As SD fields
    """
    try:
        if output_format == "" and output_path == "":
            raise ValueError("Please provide either output_path or output_format.")

        if not output_path:
            output_path = str(Path(self.get_directory()) / f"{self.name}.{output_format}")

        path = Path(output_path)
        extension = path.suffix.lower()
        if not output_format:
            output_format = extension

        if output_format and output_format[0] != ".":
            output_format = f".{output_format}"

        if extension and extension != output_format:
            DEFAULT_LOGGER.log_warning(
                "Output format does not match the file extension. Writing to provided output format."
            )
            extension = output_format

        if self.name:
            self.set_property("_Name", self.name)
        if self.mol.smiles:
            self.set_property("_SMILES", self.mol.smiles)
        if self.properties:
            for prop_name, prop_value in self.properties.items():
                self.set_property(prop_name, str(prop_value))

        if extension == ".pdb":
            pdb_block = Chem.MolToPDBBlock(self.mol.m)
            remark_lines = ""
            for prop_name, prop_value in self.mol.m.GetPropsAsDict().items():
                remark_lines += f"REMARK   {prop_name}: {prop_value}\n"
            pdb_block_with_remarks = remark_lines + pdb_block
            path.write_text(pdb_block_with_remarks)
        elif extension == ".sdf":
            writer = Chem.SDWriter(str(path))
            writer.SetKekulize(False)
            writer.write(self.mol.m)
            writer.close()
        elif extension == ".mol":
            mol_block = Chem.MolToMolBlock(self.mol.m)
            prop_lines = ""
            for prop_name, prop_value in self.mol.m.GetPropsAsDict().items():
                prop_lines += f">  <{prop_name}>\n{prop_value}\n\n"
            mol_block_with_props = mol_block + "\n" + prop_lines
            path.write_text(mol_block_with_props)
        else:
            raise ValueError(
                f"Unsupported file extension '{extension}'. Supported extensions are '.pdb', '.mol', '.sdf'."
            )

        DEFAULT_LOGGER.log_info(f"Ligand structure written to {output_path}.")
    except Exception as e:
        DEFAULT_LOGGER.log_error(f"Failed to write structure to file {output_path}: {str(e)}")
        raise

Write the ligand structure to a file in the specified format.

This method writes the molecular structure and its properties to a file in the specified format. Supported formats are PDB, MOL, and SDF. Properties are included in the output file according to the format-specific conventions.

Args

output_path : str, optional
The path where the file should be written. If not provided, the file will be written in the ligand's directory with the name and format extension.
output_format : str, optional
The desired output format ('.pdb', '.mol', or '.sdf'). If not provided, it will be inferred from the output_path extension.

Raises

ValueError
If neither output_path nor output_format is provided, or if an unsupported file extension is specified.
Exception
If any error occurs during the file writing process.

Note

  • If the output format doesn't match the file extension, a warning will be logged and the specified output format will be used.
  • Properties are written in the following format:
    • PDB: As REMARK lines
    • MOL: As property blocks after the molecule
    • SDF: As SD fields