Module deeporigin.src.structures.ligand
Classes
class Ligand (identifier: str = '',
file_path: str = '',
smiles: str = '',
block_type: str = '',
block_content: str = '',
name: str = '',
seed: int = None,
xref_protein='',
xref_ins_code: str = '',
xref_residue_id: str = '',
xref_protein_chain_id: str = '',
save_to_file: bool = False,
properties: dict = None)-
Expand source code
class Ligand: def __init__( self, identifier: str = "", file_path: str = "", smiles: str = "", block_type: str = "", block_content: str = "", name: str = "", seed: int = None, xref_protein="", xref_ins_code: str = "", xref_residue_id: str = "", xref_protein_chain_id: str = "", save_to_file: bool = False, properties: dict = None, ): """ Initialize a Ligand object. This constructor creates a Ligand object from various input sources and validates the molecular structure. Args: identifier (str, optional): Name or identifier of the molecule. Defaults to "". file_path (str, optional): Path to input file containing molecule data. Defaults to "". smiles (str, optional): SMILES string representation of molecule. Defaults to "". block_type (str, optional): Type of molecular block content (e.g. "mol", "sdf"). Defaults to "". block_content (str, optional): Content of molecular block. Defaults to "". name (str, optional): Name for the molecule. Defaults to "". seed (int, optional): Random seed for coordinate generation. Defaults to None. xref_protein (str, optional): Cross-reference to protein. Defaults to "". xref_ins_code (str, optional): Cross-reference insertion code. Defaults to "". xref_residue_id (str, optional): Cross-reference residue ID. Defaults to "". xref_protein_chain_id (str, optional): Cross-reference protein chain ID. Defaults to "". save_to_file (bool, optional): Whether to save molecule to file. Defaults to False. properties (dict, optional): Additional properties for the molecule. Defaults to None. Raises: ValueError: If not exactly one input source is provided (identifier, file_path, smiles, or block_content). ValueError: If block_type is not provided when initializing from block_content. ValueError: If molecule creation fails. Notes: - Only one input source (identifier, file_path, smiles, or block_content) should be provided - Automatically generates coordinates if needed - Performs validation checks including heavy atom count - Can optionally save the molecule to file - Stores various properties including cross-references to protein structure """ self.file_path = file_path self.identifier = identifier self.protonated_smiles = None self.block_type = block_type.lower() self.block_content = block_content self.name = name self.mol = None self.properties = dict() if not properties else {k: v for k, v in properties.items()} self.hac = 0 self.xref_protein = xref_protein self.xref_ins_code = xref_ins_code self.xref_residue_id = xref_residue_id self.xref_protein_chain_id = xref_protein_chain_id sources_provided = sum(bool(x) for x in [identifier, file_path, smiles, block_content]) if sources_provided != 1: raise ValueError("Please provide exactly one of identifier, file_path, smiles, or block_content.") try: if block_content: if not self.block_type: raise ValueError("block_type must be provided when initializing from block_content.") self.mol = mol_from_block(self.block_type, self.block_content) DEFAULT_LOGGER.log_info("Initialized Ligand from block content.") elif identifier: self.mol = Molecule.from_smiles_or_name(name=identifier, add_coords=True, seed=seed) elif file_path: self.mol = self._initialize_from_file(file_path) elif smiles: self.mol = mol_from_smiles(smiles) self.block_type = "mol" self.block_content = self.mol.molblock() DEFAULT_LOGGER.log_info("Initialized Ligand from SMILES string.") else: raise ValueError("No valid source provided for ligand initialization.") if self.mol is None: raise ValueError("Failed to create molecule.") self.name = self.mol.name if self.mol.name else self.name or "Unknown_Ligand" directory = Path(self.get_directory()) if self.name == "Unknown_Ligand": num = len(list(directory.glob(f"{self.name}*"))) self.name = f"{self.name}_{num + 1}" self.hac = self.mol.m.GetNumHeavyAtoms() if self.hac < 5: DEFAULT_LOGGER.log_warning("Ligand has less than 5 heavy atoms.") file_props = self.mol.m.GetPropsAsDict() for key, value in file_props.items(): self.properties[key] = value self.available_for_docking = not self.mol.contains_boron if save_to_file: self.write_to_file(output_format="sdf") except Exception as e: raise @property def coordinates(self): """ Returns a numpy array of ligand coordinates. Returns: np.ndarray: A numpy array of float32 containing the 3D coordinates of all atoms in the ligand. """ return np.array(self.mol.coords(), dtype=np.float32) @property def atom_types(self): """ Returns a list of unique atom types present in the ligand molecule. Returns: list: A list of strings representing unique atomic species (e.g. ['C', 'H', 'O', 'N']) """ return self.mol.species() def _initialize_from_file(self, file_path: str) -> Molecule: """ Initialize a Molecule object from a file. This method reads a molecular structure file and creates a corresponding Molecule object. Args: file_path (str): Path to the molecular structure file. Returns: Molecule: A Molecule object initialized from the file. Raises: FileNotFoundError: If the specified file does not exist. Exception: If there is an error during molecule initialization. Notes: - The file extension determines the block type. - Supported file formats are determined by the mol_from_file function. - The operation is logged using DEFAULT_LOGGER. """ path = Path(file_path) if not path.exists(): raise FileNotFoundError(f"The file {file_path} does not exist.") extension = path.suffix.lower().lstrip(".") self.block_type = extension self.file_path = path try: molecule = mol_from_file(extension, str(path)) DEFAULT_LOGGER.log_info(f"Initialized Ligand from file {file_path}.") return molecule except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to initialize Ligand from file {file_path}: {str(e)}") raise def set_property(self, prop_name: str, prop_value): """ Set a property for both the ligand properties dictionary and the underlying RDKit molecule. Args: prop_name (str): Name of the property to set. prop_value: Value to set for the property. Will be converted to string for RDKit molecule. Note: The property is set both in the properties dict and RDKit molecule. The property value is logged at INFO level. """ self.properties[prop_name] = prop_value self.mol.m.SetProp(prop_name, str(prop_value)) DEFAULT_LOGGER.log_info(f"Set property '{prop_name}' to '{prop_value}'.") def get_property(self, prop_name: str): """ Retrieve a property value from the ligand object. This method attempts to get the property value first from the properties dictionary, and if not found there, tries to retrieve it from the molecule object. Args: prop_name (str): The name of the property to retrieve. Returns: Any: The value of the property if found, None otherwise. Notes: The method first checks the internal properties dictionary. If the property is not found there, it checks the molecule object using RDKit's HasProp/GetProp. If found in the molecule object, the value is also cached in the properties dictionary for future use. """ value = self.properties.get(prop_name) if value is not None: DEFAULT_LOGGER.log_info(f"Retrieved property '{prop_name}' from properties dictionary: '{value}'.") return value if self.mol.m.HasProp(prop_name): value = self.mol.m.GetProp(prop_name) self.properties[prop_name] = value DEFAULT_LOGGER.log_info(f"Retrieved property '{prop_name}' from molecule: '{value}'.") return value DEFAULT_LOGGER.log_info(f"Property '{prop_name}' not found.") return None def write_to_file(self, output_path: str = "", output_format: str = ""): """ Write the ligand structure to a file in the specified format. This method writes the molecular structure and its properties to a file in the specified format. Supported formats are PDB, MOL, and SDF. Properties are included in the output file according to the format-specific conventions. Args: output_path (str, optional): The path where the file should be written. If not provided, the file will be written in the ligand's directory with the name and format extension. output_format (str, optional): The desired output format ('.pdb', '.mol', or '.sdf'). If not provided, it will be inferred from the output_path extension. Raises: ValueError: If neither output_path nor output_format is provided, or if an unsupported file extension is specified. Exception: If any error occurs during the file writing process. Note: - If the output format doesn't match the file extension, a warning will be logged and the specified output format will be used. - Properties are written in the following format: - PDB: As REMARK lines - MOL: As property blocks after the molecule - SDF: As SD fields """ try: if output_format == "" and output_path == "": raise ValueError("Please provide either output_path or output_format.") if not output_path: output_path = str(Path(self.get_directory()) / f"{self.name}.{output_format}") path = Path(output_path) extension = path.suffix.lower() if not output_format: output_format = extension if output_format and output_format[0] != ".": output_format = f".{output_format}" if extension and extension != output_format: DEFAULT_LOGGER.log_warning( "Output format does not match the file extension. Writing to provided output format." ) extension = output_format if self.name: self.set_property("_Name", self.name) if self.mol.smiles: self.set_property("_SMILES", self.mol.smiles) if self.properties: for prop_name, prop_value in self.properties.items(): self.set_property(prop_name, str(prop_value)) if extension == ".pdb": pdb_block = Chem.MolToPDBBlock(self.mol.m) remark_lines = "" for prop_name, prop_value in self.mol.m.GetPropsAsDict().items(): remark_lines += f"REMARK {prop_name}: {prop_value}\n" pdb_block_with_remarks = remark_lines + pdb_block path.write_text(pdb_block_with_remarks) elif extension == ".sdf": writer = Chem.SDWriter(str(path)) writer.SetKekulize(False) writer.write(self.mol.m) writer.close() elif extension == ".mol": mol_block = Chem.MolToMolBlock(self.mol.m) prop_lines = "" for prop_name, prop_value in self.mol.m.GetPropsAsDict().items(): prop_lines += f"> <{prop_name}>\n{prop_value}\n\n" mol_block_with_props = mol_block + "\n" + prop_lines path.write_text(mol_block_with_props) else: raise ValueError( f"Unsupported file extension '{extension}'. Supported extensions are '.pdb', '.mol', '.sdf'." ) DEFAULT_LOGGER.log_info(f"Ligand structure written to {output_path}.") except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to write structure to file {output_path}: {str(e)}") raise def get_center(self) -> Optional[List[float]]: """ Calculate the center coordinates of the ligand. Returns: Optional[List[float]]: The center coordinates as a list of floats [x, y, z] or None if coordinates are not available. Example: >>> ligand.get_center() [1.234, -2.345, 3.456] """ if self.coordinates is None: DEFAULT_LOGGER.log_warning("Coordinates are not available for this ligand.") return None center = self.coordinates.mean(axis=0) DEFAULT_LOGGER.log_info(f"Calculated center coordinates: {center.tolist()}") return [float(x) for x in center.tolist()] def draw(self): """ Draws a visual representation of the ligand molecule. Returns: Image: The 2D structural representation of the ligand molecule. """ return self.mol.draw() @jupyter_visualization def visualize(self) -> str: try: temp_file = Path(tempfile.gettempdir()) / f"{self.name}_visualize.sdf" self.write_to_file(str(temp_file)) viewer = MoleculeViewer(str(temp_file), format="sdf") ligand_config = viewer.get_ligand_visualization_config() html = viewer.render_ligand(ligand_config=ligand_config) return html except Exception as e: DEFAULT_LOGGER.log_error(f"Visualization failed: {str(e)}") raise @classmethod def create_ligands_from_sdf(cls, file_path: str) -> List["Ligand"]: """ Creates a list of Ligand objects from an SDF file. This class method reads molecules from an SDF file and converts each valid molecule into a Ligand instance. It handles potential parsing errors and logs relevant information. Args: file_path (str): Path to the SDF file containing molecular structures. Returns: List[Ligand]: A list of Ligand objects created from the SDF file. Returns an empty list if no valid molecules are found or in case of errors. Raises: FileNotFoundError: If the specified file path does not exist. Example: >>> ligands = Ligand.create_ligands_from_sdf("molecules.sdf") >>> print(len(ligands)) # Number of successfully parsed molecules Notes: - Molecules that fail to parse will be skipped and logged as warnings - Properties from the SDF file are preserved and stored in the Ligand objects - Progress and errors are tracked through the DEFAULT_LOGGER """ path = Path(file_path) if not path.exists(): raise FileNotFoundError(f"The file '{file_path}' does not exist.") ligands = [] try: suppl = Chem.SDMolSupplier(str(path)) for idx, mol in enumerate(suppl, start=1): try: if mol is None: DEFAULT_LOGGER.log_warning(f"Skipping molecule at index {idx} due to parsing error.") continue mol_block = Chem.MolToMolBlock(mol) ligand = Ligand(block_type="sdf", block_content=mol_block, properties=mol.GetPropsAsDict()) ligands.append(ligand) except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to create Ligand from SDF file molecule_idx = '{idx}': {str(e)}") DEFAULT_LOGGER.log_info(f"Created {len(ligands)} Ligand instances from SDF file '{file_path}'.") except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to create Ligands from SDF file '{file_path}': {str(e)}") return ligands @classmethod def create_ligands_from_csv(cls, file_path: str) -> List["Ligand"]: """ Creates Ligand instances from a CSV file containing SMILES strings and optional additional properties. This class method reads a CSV file and creates Ligand objects from each row. The CSV file must contain a 'smiles' column (case-insensitive). Additional columns are treated as properties of the ligand. Args: file_path (str): Path to the CSV file containing ligand data. Returns: List[Ligand]: A list of created Ligand instances. Raises: FileNotFoundError: If the specified file does not exist. ValueError: If the CSV file does not contain a 'smiles' column. pd.errors.EmptyDataError: If the CSV file is empty. pd.errors.ParserError: If there are issues parsing the CSV file. Notes: - Rows with missing or invalid SMILES strings are skipped with a warning. - All column names are normalized (stripped and converted to lowercase) for comparison. - Non-SMILES columns are added as properties to the Ligand instances. - Any errors during processing of individual rows are logged but don't stop the overall process. Example CSV format: smiles,name,molecular_weight CC(=O)O,acetic acid,60.052 CCO,ethanol,46.068 """ path = Path(file_path) if not path.exists(): raise FileNotFoundError(f"The file '{file_path}' does not exist.") ligands = [] try: df = pd.read_csv(file_path) normalized_columns = [col.strip().lower() for col in df.columns] if "smiles" not in normalized_columns: raise ValueError("CSV file must contain a 'smiles' column.") smiles_col_index = normalized_columns.index("smiles") smiles_col = df.columns[smiles_col_index] other_columns = [col for col in df.columns if col != smiles_col] for idx, row in df.iterrows(): try: smiles = row[smiles_col] if pd.isna(smiles): DEFAULT_LOGGER.log_warning(f"Skipping row {idx + 1}: SMILES value is missing.") continue mol = Chem.MolFromSmiles(smiles) if mol is None: DEFAULT_LOGGER.log_warning(f"Skipping row {idx + 1}: Invalid SMILES '{smiles}'.") continue ligand = Ligand(smiles=smiles) for col in other_columns: value = row[col] if pd.notna(value): ligand.set_property(col, value) ligands.append(ligand) except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to create Ligand from CSV file row {idx + 1}: {str(e)}") DEFAULT_LOGGER.log_info(f"Created {len(ligands)} Ligand instances from CSV file '{file_path}'.") except pd.errors.EmptyDataError: DEFAULT_LOGGER.log_error(f"The CSV file '{file_path}' is empty.") except pd.errors.ParserError as e: DEFAULT_LOGGER.log_error(f"Error parsing CSV file '{file_path}': {str(e)}") except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to create Ligands from CSV file '{file_path}': {str(e)}") return ligands @classmethod def create_ligands_from_file(cls, file_path: str, file_type: str) -> List["Ligand"]: """ Creates a list of Ligand objects from a file. Args: file_path (str): Path to the input file containing ligand data. file_type (str): Type of the input file. Supported types are 'sdf' and 'csv'. Returns: List[Ligand]: A list of Ligand objects created from the file data. Raises: ValueError: If the file_type is not supported ('sdf' or 'csv'). Examples: >>> ligands = Ligand.create_ligands_from_file("compounds.sdf", "sdf") >>> ligands = Ligand.create_ligands_from_file("compounds.csv", "csv") """ supported_types = ["sdf", "csv"] file_type = file_type.lower() if file_type not in supported_types: raise ValueError(f"Unsupported file format '{file_type}'. Only 'sdf' and 'csv' are supported.") if file_type == "sdf": return cls.create_ligands_from_sdf(file_path) elif file_type == "csv": return cls.create_ligands_from_csv(file_path) else: raise ValueError(f"Unsupported file format '{file_type}'. Only 'sdf' and 'csv' are supported.") @classmethod def convert_to_sdf(cls, block_content: str, block_type: str): """ Converts molecular block content to SDF format. This class method takes a molecular block content and its type, attempts to convert it to an RDKit molecule object, and returns the molecule in SDF molblock format. Args: block_content (str): The string content of the molecular block to convert block_type (str): The type of molecular block (e.g. 'MOL', 'SDF', etc.) Returns: str: The converted molecule in SDF molblock format if successful None: If conversion fails Raises: Exception: Handles any exceptions during conversion and returns None after logging error Examples: >>> sdf_block = LigandStructure.convert_to_sdf("molecular block content", "MOL") >>> if sdf_block: ... # Process the SDF block ... else: ... # Handle conversion failure """ try: molecule = mol_from_block(block_type, block_content, sanitize=True, remove_hs=False) writer = Chem.SDWriter(str(tempfile.mktemp(suffix=".sdf"))) writer.write(molecule.m) writer.close() return molecule.molblock() except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to convert ligand block content to SDF: {str(e)}") return None @classmethod def fetch_smiles_from_pdb_api(cls, res_name: str) -> str: """ Retrieves the SMILES string representation of a ligand from the PDB API. This class method queries the RCSB PDB REST API to fetch the stereochemical SMILES notation for a given ligand residue name. If the API request fails or the SMILES data is not found, appropriate errors are logged. Args: res_name (str): The residue name/identifier of the ligand to query. Returns: str: The stereochemical SMILES string of the ligand if found. None: If the API request fails or SMILES data is not available. Raises: ValueError: If the API request fails or SMILES data is not found for the given ligand. Example: >>> smiles = Ligand.fetch_smiles_from_pdb_api("ATP") >>> print(smiles) 'NC1=C2N=CN(C(O)C3OC(COP(O)(=O)OP(O)(=O)OP(O)(O)=O)C(O)C3O)C2=NC=N1' """ try: query_url = f"https://data.rcsb.org/rest/v1/core/chemcomp/{res_name.upper()}" response = requests.get(query_url) if response.status_code != 200: raise ValueError(f"Failed to retrieve data for ligand '{res_name}' from PDB API.") data = response.json() smiles = data.get("rcsb_chem_comp_descriptor", {}).get("smilesstereo") if not smiles: raise ValueError(f"SMILES not found for ligand '{res_name}'.") return smiles except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to fetch SMILES from PDB API: {str(e)}") return None @classmethod @jupyter_visualization def visualize_ligands_from_sdf(cls, file_path: str): """ Visualize ligands from an SDF file. Args: file_path (str): The path to the SDF file. Raises: FileNotFoundError: If the file does not exist. ValueError: If the file cannot be parsed correctly. """ try: viewer = MoleculeViewer(str(file_path), format="sdf") ligand_config = viewer.get_ligand_visualization_config() html = viewer.render_ligand(ligand_config=ligand_config) return html except Exception as e: DEFAULT_LOGGER.log_error(f"Visualization failed: {str(e)}") raise @classmethod @jupyter_visualization def visualize_ligands(cls, ligands: List["Ligand"]): """ Visualize ligands. Args: ligands: List["Ligand"]: The list of ligands objects to visualize. Raises: FileNotFoundError: If the file does not exist. ValueError: If the file cannot be parsed correctly. """ try: sdf_data = [] current_file = f"{tempfile.mkstemp()[1]}.sdf" for ligand in ligands: ligand.write_to_file(output_format="sdf", output_path=current_file) with open(current_file, "r") as fd: data = fd.read() sdf_data.append(data) sdf_data = "".join(sdf_data) viewer = MoleculeViewer(data=sdf_data, format="sdf") ligand_config = viewer.get_ligand_visualization_config() html = viewer.render_ligand(ligand_config=ligand_config) return html except Exception as e: DEFAULT_LOGGER.log_error(f"Visualization failed: {str(e)}") raise def _repr_html_(self) -> str: """ Return the HTML representation of the object for Jupyter Notebook. Returns: str: The HTML content. """ try: print(self.mol.m) return self.visualize() except Exception as e: DEFAULT_LOGGER.log_warning(f"Failed to generate HTML representation: {str(e)}") return self.__str__() def __str__(self) -> str: info_str = f"Name: {self.name}\nSMILES: {self.mol.smiles}\nHeavy Atoms: {self.hac}\n" if self.properties: info_str += "Properties:\n" for prop_name, prop_value in self.properties.items(): info_str += f" {prop_name}: {prop_value}\n" if self.xref_protein: info_str += f"Cross-reference Protein Chain ID: {self.xref_protein_chain_id}\n" info_str += f"Cross-reference Residue ID: {self.xref_residue_id}\n" info_str += f"Cross-reference Insertion Code: {self.xref_ins_code}\n" return f"Ligand:\n {info_str}" def __repr__(self) -> str: return self.__str__() @staticmethod def get_directory() -> str: """ Generates and ensures the existence of a directory for a protein. Args: Returns: str: The path to the protein's directory. """ ligands_base_dir = Path(WORKING_DIR) / "ligands" ligands_base_dir.mkdir(parents=True, exist_ok=True) return str(ligands_base_dir) def admet_properties(self) -> str: """ Predict ADMET properties for the ligand. Returns: str: A string containing the predicted ADMET properties. """ try: props = predict_properties(smiles=self.mol.smiles)[0] for key, value in props.items(): if key == "smiles": continue self.set_property(key, value) return props except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to predict ADMET properties: {str(e)}") return "Failed to predict ADMET properties." def protonate(self, pH: float = 7.4, filter_percentage: float = 1): """ Protonates the ligand molecule at a given pH value. This method attempts to generate a protonated version of the molecule using a pH-dependent protonation algorithm. If successful, it stores the protonated SMILES string and sets it as a property of the molecule. Args: pH (float, optional): The pH value at which to protonate the molecule. Defaults to 7.4. filter_percentage (float, optional): The filtering threshold for protonation states. Value between 0 and 1. Defaults to 1. Returns: self: Returns the ligand instance, allowing for method chaining. Raises: Exception: If protonation fails, the error is logged and the original instance is returned. Example: >>> ligand.protonate(pH=7.0, filter_percentage=0.8) """ try: smiles = protonate( pH=pH, smiles=self.mol.smiles, filter_percentage=filter_percentage, ) if smiles: self.protonated_smiles = smiles self.set_property("ProtonatedSMILES", smiles) except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to protonate the ligand molecule: {str(e)}") return self def update_coordinates(self, coords: np.ndarray): """ Updates the 3D coordinates of the ligand molecule's conformer. Args: coords (np.ndarray): Array of new 3D coordinates for the ligand atoms. Must match the number of atoms in either the full molecule or molecule without hydrogens. Raises: ValueError: If the ligand molecule has no conformers to update. ValueError: If the number of coordinates doesn't match the number of atoms in the molecule (either with or without hydrogens). Notes: - The coordinates are updated in-place on the existing conformer - The input coordinates are converted to float64 type - A success message is logged after updating """ if self.mol.m.GetNumConformers() == 0: raise ValueError("Ligand molecule has no conformers to update.") conformer = self.mol.m.GetConformer() mol_without_hs = Chem.RemoveHs(self.mol.m) conformer_no_hs = mol_without_hs.GetConformer() if coords.shape[0] != conformer.GetNumAtoms(): if coords.shape[0] != conformer_no_hs.GetNumAtoms(): raise ValueError("Number of ligand atoms does not match the conformer's atom count.") conformer.SetPositions(coords.astype(np.float64)) DEFAULT_LOGGER.log_info("Ligand coordinates has been inplaced updated.") @classmethod def protonate_molecules(cls, ligands): """ Protonates a list of ligands by adding hydrogens at physiological pH. This class method processes a list of ligands, either as SMILES strings or Ligand objects, and returns a list of protonated Ligand objects. It handles the protonation of each ligand while managing potential errors during SMILES parsing or protonation. Args: ligands (List[Union[str, Ligand]]): A list containing either SMILES strings or Ligand objects to be protonated. Returns: List[Ligand]: A list of successfully protonated Ligand objects. Failed ligands are excluded from the output list. Raises: None: Exceptions during processing individual ligands are caught and logged. Example: >>> smiles_list = ['CC(=O)O', 'CN1C=NC=C1'] >>> protonated_ligands = Ligand.protonate_molecules(smiles_list) """ mols = [] for i in tqdm(range(0, len(ligands)), desc="Protonating Molecules"): ligand = ligands[i] if isinstance(ligand, str): try: ligand = Ligand(smiles=ligand) except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to create Ligand from SMILES: {str(e)}") continue try: if not ligand.protonated_smiles: ligand.protonate() except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to protonate the ligand molecule: {str(e)}") continue mols.append(ligand) return mols
Initialize a Ligand object.
This constructor creates a Ligand object from various input sources and validates the molecular structure.
Args
identifier
:str
, optional- Name or identifier of the molecule. Defaults to "".
file_path
:str
, optional- Path to input file containing molecule data. Defaults to "".
smiles
:str
, optional- SMILES string representation of molecule. Defaults to "".
block_type
:str
, optional- Type of molecular block content (e.g. "mol", "sdf"). Defaults to "".
block_content
:str
, optional- Content of molecular block. Defaults to "".
name
:str
, optional- Name for the molecule. Defaults to "".
seed
:int
, optional- Random seed for coordinate generation. Defaults to None.
xref_protein
:str
, optional- Cross-reference to protein. Defaults to "".
xref_ins_code
:str
, optional- Cross-reference insertion code. Defaults to "".
xref_residue_id
:str
, optional- Cross-reference residue ID. Defaults to "".
xref_protein_chain_id
:str
, optional- Cross-reference protein chain ID. Defaults to "".
save_to_file
:bool
, optional- Whether to save molecule to file. Defaults to False.
properties
:dict
, optional- Additional properties for the molecule. Defaults to None.
Raises
ValueError
- If not exactly one input source is provided (identifier, file_path, smiles, or block_content).
ValueError
- If block_type is not provided when initializing from block_content.
ValueError
- If molecule creation fails.
Notes
- Only one input source (identifier, file_path, smiles, or block_content) should be provided
- Automatically generates coordinates if needed
- Performs validation checks including heavy atom count
- Can optionally save the molecule to file
- Stores various properties including cross-references to protein structure
Static methods
def convert_to_sdf(block_content: str, block_type: str)
-
Converts molecular block content to SDF format.
This class method takes a molecular block content and its type, attempts to convert it to an RDKit molecule object, and returns the molecule in SDF molblock format.
Args
block_content
:str
- The string content of the molecular block to convert
block_type
:str
- The type of molecular block (e.g. 'MOL', 'SDF', etc.)
Returns
str
- The converted molecule in SDF molblock format if successful
None
- If conversion fails
Raises
Exception
- Handles any exceptions during conversion and returns None after logging error
Examples
>>> sdf_block = LigandStructure.convert_to_sdf("molecular block content", "MOL") >>> if sdf_block: ... # Process the SDF block ... else: ... # Handle conversion failure
def create_ligands_from_csv(file_path: str) ‑> List[Ligand]
-
Creates Ligand instances from a CSV file containing SMILES strings and optional additional properties.
This class method reads a CSV file and creates Ligand objects from each row. The CSV file must contain a 'smiles' column (case-insensitive). Additional columns are treated as properties of the ligand.
Args
file_path
:str
- Path to the CSV file containing ligand data.
Returns
List[Ligand]
- A list of created Ligand instances.
Raises
FileNotFoundError
- If the specified file does not exist.
ValueError
- If the CSV file does not contain a 'smiles' column.
pd.errors.EmptyDataError
- If the CSV file is empty.
pd.errors.ParserError
- If there are issues parsing the CSV file.
Notes
- Rows with missing or invalid SMILES strings are skipped with a warning.
- All column names are normalized (stripped and converted to lowercase) for comparison.
- Non-SMILES columns are added as properties to the Ligand instances.
- Any errors during processing of individual rows are logged but don't stop the overall process.
Example CSV format: smiles,name,molecular_weight CC(=O)O,acetic acid,60.052 CCO,ethanol,46.068
def create_ligands_from_file(file_path: str, file_type: str) ‑> List[Ligand]
-
Creates a list of Ligand objects from a file.
Args
file_path
:str
- Path to the input file containing ligand data.
file_type
:str
- Type of the input file. Supported types are 'sdf' and 'csv'.
Returns
List[Ligand]
- A list of Ligand objects created from the file data.
Raises
ValueError
- If the file_type is not supported ('sdf' or 'csv').
Examples
>>> ligands = Ligand.create_ligands_from_file("compounds.sdf", "sdf") >>> ligands = Ligand.create_ligands_from_file("compounds.csv", "csv")
def create_ligands_from_sdf(file_path: str) ‑> List[Ligand]
-
Creates a list of Ligand objects from an SDF file.
This class method reads molecules from an SDF file and converts each valid molecule into a Ligand instance. It handles potential parsing errors and logs relevant information.
Args
file_path
:str
- Path to the SDF file containing molecular structures.
Returns
List[Ligand]
- A list of Ligand objects created from the SDF file. Returns an empty list if no valid molecules are found or in case of errors.
Raises
FileNotFoundError
- If the specified file path does not exist.
Example
>>> ligands = Ligand.create_ligands_from_sdf("molecules.sdf") >>> print(len(ligands)) # Number of successfully parsed molecules
Notes
- Molecules that fail to parse will be skipped and logged as warnings
- Properties from the SDF file are preserved and stored in the Ligand objects
- Progress and errors are tracked through the DEFAULT_LOGGER
def fetch_smiles_from_pdb_api(res_name: str) ‑> str
-
Retrieves the SMILES string representation of a ligand from the PDB API.
This class method queries the RCSB PDB REST API to fetch the stereochemical SMILES notation for a given ligand residue name. If the API request fails or the SMILES data is not found, appropriate errors are logged.
Args
res_name
:str
- The residue name/identifier of the ligand to query.
Returns
str
- The stereochemical SMILES string of the ligand if found.
None
- If the API request fails or SMILES data is not available.
Raises
ValueError
- If the API request fails or SMILES data is not found for the given ligand.
Example
>>> smiles = Ligand.fetch_smiles_from_pdb_api("ATP") >>> print(smiles) 'NC1=C2N=CN(C(O)C3OC(COP(O)(=O)OP(O)(=O)OP(O)(O)=O)C(O)C3O)C2=NC=N1'
def get_directory() ‑> str
-
Expand source code
@staticmethod def get_directory() -> str: """ Generates and ensures the existence of a directory for a protein. Args: Returns: str: The path to the protein's directory. """ ligands_base_dir = Path(WORKING_DIR) / "ligands" ligands_base_dir.mkdir(parents=True, exist_ok=True) return str(ligands_base_dir)
Generates and ensures the existence of a directory for a protein.
Args:
Returns
str
- The path to the protein's directory.
def protonate_molecules(ligands)
-
Protonates a list of ligands by adding hydrogens at physiological pH. This class method processes a list of ligands, either as SMILES strings or Ligand objects, and returns a list of protonated Ligand objects. It handles the protonation of each ligand while managing potential errors during SMILES parsing or protonation.
Args
ligands
:List[Union[str, Ligand]]
- A list containing either SMILES strings or Ligand objects to be protonated.
Returns
List[Ligand]
- A list of successfully protonated Ligand objects. Failed ligands are excluded from the output list.
Raises
None
- Exceptions during processing individual ligands are caught and logged.
Example
>>> smiles_list = ['CC(=O)O', 'CN1C=NC=C1'] >>> protonated_ligands = Ligand.protonate_molecules(smiles_list)
def visualize_ligands(*args, **kwargs)
def visualize_ligands_from_sdf(*args, **kwargs)
Instance variables
prop atom_types
-
Expand source code
@property def atom_types(self): """ Returns a list of unique atom types present in the ligand molecule. Returns: list: A list of strings representing unique atomic species (e.g. ['C', 'H', 'O', 'N']) """ return self.mol.species()
Returns a list of unique atom types present in the ligand molecule.
Returns
list
- A list of strings representing unique atomic species (e.g. ['C', 'H', 'O', 'N'])
prop coordinates
-
Expand source code
@property def coordinates(self): """ Returns a numpy array of ligand coordinates. Returns: np.ndarray: A numpy array of float32 containing the 3D coordinates of all atoms in the ligand. """ return np.array(self.mol.coords(), dtype=np.float32)
Returns a numpy array of ligand coordinates.
Returns
np.ndarray
- A numpy array of float32 containing the 3D coordinates of all atoms in the ligand.
Methods
def admet_properties(self) ‑> str
-
Expand source code
def admet_properties(self) -> str: """ Predict ADMET properties for the ligand. Returns: str: A string containing the predicted ADMET properties. """ try: props = predict_properties(smiles=self.mol.smiles)[0] for key, value in props.items(): if key == "smiles": continue self.set_property(key, value) return props except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to predict ADMET properties: {str(e)}") return "Failed to predict ADMET properties."
Predict ADMET properties for the ligand.
Returns
str
- A string containing the predicted ADMET properties.
def draw(self)
-
Expand source code
def draw(self): """ Draws a visual representation of the ligand molecule. Returns: Image: The 2D structural representation of the ligand molecule. """ return self.mol.draw()
Draws a visual representation of the ligand molecule.
Returns
Image
- The 2D structural representation of the ligand molecule.
def get_center(self) ‑> List[float] | None
-
Expand source code
def get_center(self) -> Optional[List[float]]: """ Calculate the center coordinates of the ligand. Returns: Optional[List[float]]: The center coordinates as a list of floats [x, y, z] or None if coordinates are not available. Example: >>> ligand.get_center() [1.234, -2.345, 3.456] """ if self.coordinates is None: DEFAULT_LOGGER.log_warning("Coordinates are not available for this ligand.") return None center = self.coordinates.mean(axis=0) DEFAULT_LOGGER.log_info(f"Calculated center coordinates: {center.tolist()}") return [float(x) for x in center.tolist()]
Calculate the center coordinates of the ligand.
Returns
Optional[List[float]]
- The center coordinates as a list of floats [x, y, z] or None if coordinates are not available.
Example
>>> ligand.get_center() [1.234, -2.345, 3.456]
def get_property(self, prop_name: str)
-
Expand source code
def get_property(self, prop_name: str): """ Retrieve a property value from the ligand object. This method attempts to get the property value first from the properties dictionary, and if not found there, tries to retrieve it from the molecule object. Args: prop_name (str): The name of the property to retrieve. Returns: Any: The value of the property if found, None otherwise. Notes: The method first checks the internal properties dictionary. If the property is not found there, it checks the molecule object using RDKit's HasProp/GetProp. If found in the molecule object, the value is also cached in the properties dictionary for future use. """ value = self.properties.get(prop_name) if value is not None: DEFAULT_LOGGER.log_info(f"Retrieved property '{prop_name}' from properties dictionary: '{value}'.") return value if self.mol.m.HasProp(prop_name): value = self.mol.m.GetProp(prop_name) self.properties[prop_name] = value DEFAULT_LOGGER.log_info(f"Retrieved property '{prop_name}' from molecule: '{value}'.") return value DEFAULT_LOGGER.log_info(f"Property '{prop_name}' not found.") return None
Retrieve a property value from the ligand object.
This method attempts to get the property value first from the properties dictionary, and if not found there, tries to retrieve it from the molecule object.
Args
prop_name
:str
- The name of the property to retrieve.
Returns
Any
- The value of the property if found, None otherwise.
Notes
The method first checks the internal properties dictionary. If the property is not found there, it checks the molecule object using RDKit's HasProp/GetProp. If found in the molecule object, the value is also cached in the properties dictionary for future use.
def protonate(self, pH: float = 7.4, filter_percentage: float = 1)
-
Expand source code
def protonate(self, pH: float = 7.4, filter_percentage: float = 1): """ Protonates the ligand molecule at a given pH value. This method attempts to generate a protonated version of the molecule using a pH-dependent protonation algorithm. If successful, it stores the protonated SMILES string and sets it as a property of the molecule. Args: pH (float, optional): The pH value at which to protonate the molecule. Defaults to 7.4. filter_percentage (float, optional): The filtering threshold for protonation states. Value between 0 and 1. Defaults to 1. Returns: self: Returns the ligand instance, allowing for method chaining. Raises: Exception: If protonation fails, the error is logged and the original instance is returned. Example: >>> ligand.protonate(pH=7.0, filter_percentage=0.8) """ try: smiles = protonate( pH=pH, smiles=self.mol.smiles, filter_percentage=filter_percentage, ) if smiles: self.protonated_smiles = smiles self.set_property("ProtonatedSMILES", smiles) except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to protonate the ligand molecule: {str(e)}") return self
Protonates the ligand molecule at a given pH value.
This method attempts to generate a protonated version of the molecule using a pH-dependent protonation algorithm. If successful, it stores the protonated SMILES string and sets it as a property of the molecule.
Args
pH
:float
, optional- The pH value at which to protonate the molecule. Defaults to 7.4.
filter_percentage
:float
, optional- The filtering threshold for protonation states. Value between 0 and 1. Defaults to 1.
Returns
self
- Returns the ligand instance, allowing for method chaining.
Raises
Exception
- If protonation fails, the error is logged and the original instance is returned.
Example
>>> ligand.protonate(pH=7.0, filter_percentage=0.8)
def set_property(self, prop_name: str, prop_value)
-
Expand source code
def set_property(self, prop_name: str, prop_value): """ Set a property for both the ligand properties dictionary and the underlying RDKit molecule. Args: prop_name (str): Name of the property to set. prop_value: Value to set for the property. Will be converted to string for RDKit molecule. Note: The property is set both in the properties dict and RDKit molecule. The property value is logged at INFO level. """ self.properties[prop_name] = prop_value self.mol.m.SetProp(prop_name, str(prop_value)) DEFAULT_LOGGER.log_info(f"Set property '{prop_name}' to '{prop_value}'.")
Set a property for both the ligand properties dictionary and the underlying RDKit molecule.
Args
prop_name
:str
- Name of the property to set.
prop_value
- Value to set for the property. Will be converted to string for RDKit molecule.
Note
The property is set both in the properties dict and RDKit molecule. The property value is logged at INFO level.
def update_coordinates(self, coords: numpy.ndarray)
-
Expand source code
def update_coordinates(self, coords: np.ndarray): """ Updates the 3D coordinates of the ligand molecule's conformer. Args: coords (np.ndarray): Array of new 3D coordinates for the ligand atoms. Must match the number of atoms in either the full molecule or molecule without hydrogens. Raises: ValueError: If the ligand molecule has no conformers to update. ValueError: If the number of coordinates doesn't match the number of atoms in the molecule (either with or without hydrogens). Notes: - The coordinates are updated in-place on the existing conformer - The input coordinates are converted to float64 type - A success message is logged after updating """ if self.mol.m.GetNumConformers() == 0: raise ValueError("Ligand molecule has no conformers to update.") conformer = self.mol.m.GetConformer() mol_without_hs = Chem.RemoveHs(self.mol.m) conformer_no_hs = mol_without_hs.GetConformer() if coords.shape[0] != conformer.GetNumAtoms(): if coords.shape[0] != conformer_no_hs.GetNumAtoms(): raise ValueError("Number of ligand atoms does not match the conformer's atom count.") conformer.SetPositions(coords.astype(np.float64)) DEFAULT_LOGGER.log_info("Ligand coordinates has been inplaced updated.")
Updates the 3D coordinates of the ligand molecule's conformer.
Args
coords
:np.ndarray
- Array of new 3D coordinates for the ligand atoms. Must match the number of atoms in either the full molecule or molecule without hydrogens.
Raises
ValueError
- If the ligand molecule has no conformers to update.
ValueError
- If the number of coordinates doesn't match the number of atoms in the molecule (either with or without hydrogens).
Notes
- The coordinates are updated in-place on the existing conformer
- The input coordinates are converted to float64 type
- A success message is logged after updating
def visualize(*args, **kwargs)
-
Expand source code
def wrapper(*args, **kwargs): html_visualization = func(*args, **kwargs) return JupyterViewer.visualize(html_visualization)
def write_to_file(self, output_path: str = '', output_format: str = '')
-
Expand source code
def write_to_file(self, output_path: str = "", output_format: str = ""): """ Write the ligand structure to a file in the specified format. This method writes the molecular structure and its properties to a file in the specified format. Supported formats are PDB, MOL, and SDF. Properties are included in the output file according to the format-specific conventions. Args: output_path (str, optional): The path where the file should be written. If not provided, the file will be written in the ligand's directory with the name and format extension. output_format (str, optional): The desired output format ('.pdb', '.mol', or '.sdf'). If not provided, it will be inferred from the output_path extension. Raises: ValueError: If neither output_path nor output_format is provided, or if an unsupported file extension is specified. Exception: If any error occurs during the file writing process. Note: - If the output format doesn't match the file extension, a warning will be logged and the specified output format will be used. - Properties are written in the following format: - PDB: As REMARK lines - MOL: As property blocks after the molecule - SDF: As SD fields """ try: if output_format == "" and output_path == "": raise ValueError("Please provide either output_path or output_format.") if not output_path: output_path = str(Path(self.get_directory()) / f"{self.name}.{output_format}") path = Path(output_path) extension = path.suffix.lower() if not output_format: output_format = extension if output_format and output_format[0] != ".": output_format = f".{output_format}" if extension and extension != output_format: DEFAULT_LOGGER.log_warning( "Output format does not match the file extension. Writing to provided output format." ) extension = output_format if self.name: self.set_property("_Name", self.name) if self.mol.smiles: self.set_property("_SMILES", self.mol.smiles) if self.properties: for prop_name, prop_value in self.properties.items(): self.set_property(prop_name, str(prop_value)) if extension == ".pdb": pdb_block = Chem.MolToPDBBlock(self.mol.m) remark_lines = "" for prop_name, prop_value in self.mol.m.GetPropsAsDict().items(): remark_lines += f"REMARK {prop_name}: {prop_value}\n" pdb_block_with_remarks = remark_lines + pdb_block path.write_text(pdb_block_with_remarks) elif extension == ".sdf": writer = Chem.SDWriter(str(path)) writer.SetKekulize(False) writer.write(self.mol.m) writer.close() elif extension == ".mol": mol_block = Chem.MolToMolBlock(self.mol.m) prop_lines = "" for prop_name, prop_value in self.mol.m.GetPropsAsDict().items(): prop_lines += f"> <{prop_name}>\n{prop_value}\n\n" mol_block_with_props = mol_block + "\n" + prop_lines path.write_text(mol_block_with_props) else: raise ValueError( f"Unsupported file extension '{extension}'. Supported extensions are '.pdb', '.mol', '.sdf'." ) DEFAULT_LOGGER.log_info(f"Ligand structure written to {output_path}.") except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to write structure to file {output_path}: {str(e)}") raise
Write the ligand structure to a file in the specified format.
This method writes the molecular structure and its properties to a file in the specified format. Supported formats are PDB, MOL, and SDF. Properties are included in the output file according to the format-specific conventions.
Args
output_path
:str
, optional- The path where the file should be written. If not provided, the file will be written in the ligand's directory with the name and format extension.
output_format
:str
, optional- The desired output format ('.pdb', '.mol', or '.sdf'). If not provided, it will be inferred from the output_path extension.
Raises
ValueError
- If neither output_path nor output_format is provided, or if an unsupported file extension is specified.
Exception
- If any error occurs during the file writing process.
Note
- If the output format doesn't match the file extension, a warning will be logged and the specified output format will be used.
- Properties are written in the following format:
- PDB: As REMARK lines
- MOL: As property blocks after the molecule
- SDF: As SD fields