Module deeporigin.src.structures.reports
Classes
class DockingReport (results: List[DockingResult],
pocket_data)-
Expand source code
class DockingReport: """ A class to handle and report docking results. This class provides functionality to manage docking results, generate reports, save results to files, and visualize protein-ligand complexes. Attributes: results (List[DockingResult]): A list of docking results. pocket_data: Data about the binding pocket used in docking. Methods: _to_dataframe(include_props=None): Converts docking results to a pandas DataFrame. _repr_html_(): Returns HTML representation of the docking report. generate_custom_report(include_props=False): Generates a custom HTML report with specified properties. save(save_dir=None, safe=True): Saves docking results to SDF file with properties. visualize(protein_path=None, protein_format=None, sdf_file_path=None, crystal_ligand_path=None, crystal_ligand_format=None): Visualizes the docking results in 3D. Examples: >>> report = DockingReport(results, pocket_data) >>> report.save() # Saves results to an SDF file >>> report.visualize() # Shows 3D visualization of results """ def __init__(self, results: List[DockingResult], pocket_data): self.results = results self.pocket_data = pocket_data def _to_dataframe(self, include_props=None): """ Converts docking results to a pandas DataFrame. Args: include_props (list, optional): Additional molecular properties to include in the DataFrame. If provided, these properties will be extracted from the ligand properties and added as columns. Returns: pd.DataFrame: A DataFrame containing the following columns by default: - Image: 2D molecular structure visualization - SMILES: SMILES string representation of the molecule - Ranking Score: Docking ranking score (rounded to 3 decimal places) - Binding Energy: Binding energy value (rounded to 3 decimal places) - Path To Docked Pose: File path to the docked ligand pose Additional columns will be added if include_props is specified. The DataFrame is sorted by Ranking Score in descending order. """ data = [] for result in self.results: property_dict = { "Image": None, "SMILES": result.smiles, "Ranking Score": None, "Binding Energy": None, "Path To Docked Pose": None, } if result.top_ligand and result.successful: ligand = result.top_ligand mol_props = ligand.properties energy_score = float(mol_props.get("Binding Energy", "0.0")) ranking_score = float(mol_props.get("Ranking Score", "0.0")) property_dict["Image"] = ligand.mol._draw() property_dict["SMILES"] = result.smiles property_dict["Ranking Score"] = round(ranking_score, 3) property_dict["Binding Energy"] = round(energy_score, 3) property_dict["Path To Docked Pose"] = ligand.file_path if include_props: for prop in mol_props: if "smiles" not in prop: p = mol_props.get(prop, None) property_dict[prop] = p data.append(property_dict) df = pd.DataFrame(data) df = df.sort_values(by="Ranking Score", ascending=False).reset_index(drop=True) return df def _repr_html_(self): df = self._to_dataframe().style.format(precision=3) return df._repr_html_() def generate_custom_report(self, include_props=False): """ Generate a custom HTML report from the data. This method converts the internal data to a styled pandas DataFrame and returns it as HTML. The resulting DataFrame is formatted with 3 decimal places precision. Args: include_props (bool, optional): Whether to include properties in the report. Defaults to False. Returns: HTML: A styled HTML representation of the data with 3 decimal places precision. Example: >>> report = obj.generate_custom_report(include_props=True) >>> display(report) # In Jupyter notebook """ df = self._to_dataframe(include_props).style.format(precision=3) return HTML(df._repr_html_()) def __str__(self): return f"DockingReport:\n Number of DockingResults: {len(self.results)}" def __repr__(self): return self.__str__() def save(self, save_dir=None, safe=True): """ Save docking results to files in a specified directory. Args: save_dir (str or Path, optional): Directory path where the results will be saved. If None, creates a directory in END_USER_HOME or current directory. safe (bool, optional): If True, moves existing files with same name instead of overwriting them. Defaults to True. Returns: str or None: Path to the created directory containing saved files if successful, None if no top ligands exist. Files Created: - docking_report_top_ligands.sdf: Contains the top scoring ligands with their properties - {protein_name}.pdb: Protein structure file - bounding_box.pdb: File containing the docking box information Notes: The saved SDF file includes: - Molecule structure - Molecule name (if available) - SMILES string (if available) - All existing molecular properties - All additional properties from docking results """ top_ligands = [] for result in self.results: if result.top_ligand: top_ligands.append(result.top_ligand) if not top_ligands: return None if not save_dir: save_dir_path = ( Path(os.getenv("END_USER_HOME", ".")) / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}" ) else: save_dir_path = Path(save_dir) / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}" save_dir_path.mkdir(parents=True, exist_ok=True) sdf_file_path = save_dir_path / "docking_report_top_ligands.sdf" if safe and sdf_file_path.exists(): move_file_with_extension(str(sdf_file_path), "sdf") else: remove_file(str(sdf_file_path)) writer = Chem.SDWriter(str(sdf_file_path)) writer.SetKekulize(False) for ligand in top_ligands: mol = ligand.mol.m # RDKit molecule properties = ligand.properties existing_properties = ligand.mol.m.GetPropsAsDict() if ligand.name: mol.SetProp("_Name", ligand.name) if ligand.mol.smiles: mol.SetProp("_SMILES", ligand.mol.smiles) for prop_name, prop_value in existing_properties.items(): mol.SetProp(prop_name, str(prop_value)) for prop_name, prop_value in properties.items(): mol.SetProp(prop_name, str(prop_value)) writer.write(mol) writer.close() try: self.results[0].protein.write_to_file(str(save_dir_path / f"{self.results[0].protein.name}.pdb")) except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to write protein to file: {e}") save_bounding_box( self.pocket_data.box_center, self.pocket_data.box_size, output_file=str(save_dir_path / "bounding_box.pdb") ) return str(save_dir_path) @jupyter_visualization def visualize( self, protein_path=None, protein_format=None, sdf_file_path=None, crystal_ligand_path=None, crystal_ligand_format=None, ): """ Visualizes the docking report by rendering the merged structures of protein and ligands. Args: protein_path (str, optional): Path to the protein file. protein_format (str, optional): Format of the protein file (e.g., pdb). sdf_file_path (str, optional): Path to the ligand file in SDF format. Raises: ValueError: If `protein_path` is provided without `protein_format`. Returns: Jupyter visualization object: Rendered 3D structure of the protein-ligand complex. """ if sdf_file_path is None: file_dir = Path(self.save(save_dir="/tmp")) sdf_file_path = str(file_dir / "docking_report_top_ligands.sdf") if protein_path is not None and protein_format is None: raise ValueError("Please provide the protein format along with the protein path.") if protein_path is None: if not self.results: raise ValueError("No results found to extract protein information from.") protein_path = str(self.results[0].protein.file_path) protein_format = self.results[0].protein.block_type viewer = DockingViewer() crystal_data = None if crystal_ligand_path and crystal_ligand_format: crystal_data = {"raw": str(crystal_ligand_path), "format": crystal_ligand_format} html_content = viewer.render_with_seperate_crystal( protein_data=protein_path, protein_format=protein_format, ligands_data=[sdf_file_path], ligand_format="sdf", crystal_data=crystal_data, ) return html_content
A class to handle and report docking results.
This class provides functionality to manage docking results, generate reports, save results to files, and visualize protein-ligand complexes.
Attributes
results
:List[DockingResult]
- A list of docking results.
pocket_data
- Data about the binding pocket used in docking.
Methods
to_dataframe(include_props=None): Converts docking results to a pandas DataFrame. _repr_html(): Returns HTML representation of the docking report. generate_custom_report(include_props=False): Generates a custom HTML report with specified properties. save(save_dir=None, safe=True): Saves docking results to SDF file with properties. visualize(protein_path=None, protein_format=None, sdf_file_path=None, crystal_ligand_path=None, crystal_ligand_format=None): Visualizes the docking results in 3D.
Examples
>>> report = DockingReport(results, pocket_data) >>> report.save() # Saves results to an SDF file >>> report.visualize() # Shows 3D visualization of results
Methods
def generate_custom_report(self, include_props=False)
-
Expand source code
def generate_custom_report(self, include_props=False): """ Generate a custom HTML report from the data. This method converts the internal data to a styled pandas DataFrame and returns it as HTML. The resulting DataFrame is formatted with 3 decimal places precision. Args: include_props (bool, optional): Whether to include properties in the report. Defaults to False. Returns: HTML: A styled HTML representation of the data with 3 decimal places precision. Example: >>> report = obj.generate_custom_report(include_props=True) >>> display(report) # In Jupyter notebook """ df = self._to_dataframe(include_props).style.format(precision=3) return HTML(df._repr_html_())
Generate a custom HTML report from the data.
This method converts the internal data to a styled pandas DataFrame and returns it as HTML. The resulting DataFrame is formatted with 3 decimal places precision.
Args
include_props
:bool
, optional- Whether to include properties in the report. Defaults to False.
Returns
HTML
- A styled HTML representation of the data with 3 decimal places precision.
Example
>>> report = obj.generate_custom_report(include_props=True) >>> display(report) # In Jupyter notebook
def save(self, save_dir=None, safe=True)
-
Expand source code
def save(self, save_dir=None, safe=True): """ Save docking results to files in a specified directory. Args: save_dir (str or Path, optional): Directory path where the results will be saved. If None, creates a directory in END_USER_HOME or current directory. safe (bool, optional): If True, moves existing files with same name instead of overwriting them. Defaults to True. Returns: str or None: Path to the created directory containing saved files if successful, None if no top ligands exist. Files Created: - docking_report_top_ligands.sdf: Contains the top scoring ligands with their properties - {protein_name}.pdb: Protein structure file - bounding_box.pdb: File containing the docking box information Notes: The saved SDF file includes: - Molecule structure - Molecule name (if available) - SMILES string (if available) - All existing molecular properties - All additional properties from docking results """ top_ligands = [] for result in self.results: if result.top_ligand: top_ligands.append(result.top_ligand) if not top_ligands: return None if not save_dir: save_dir_path = ( Path(os.getenv("END_USER_HOME", ".")) / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}" ) else: save_dir_path = Path(save_dir) / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}" save_dir_path.mkdir(parents=True, exist_ok=True) sdf_file_path = save_dir_path / "docking_report_top_ligands.sdf" if safe and sdf_file_path.exists(): move_file_with_extension(str(sdf_file_path), "sdf") else: remove_file(str(sdf_file_path)) writer = Chem.SDWriter(str(sdf_file_path)) writer.SetKekulize(False) for ligand in top_ligands: mol = ligand.mol.m # RDKit molecule properties = ligand.properties existing_properties = ligand.mol.m.GetPropsAsDict() if ligand.name: mol.SetProp("_Name", ligand.name) if ligand.mol.smiles: mol.SetProp("_SMILES", ligand.mol.smiles) for prop_name, prop_value in existing_properties.items(): mol.SetProp(prop_name, str(prop_value)) for prop_name, prop_value in properties.items(): mol.SetProp(prop_name, str(prop_value)) writer.write(mol) writer.close() try: self.results[0].protein.write_to_file(str(save_dir_path / f"{self.results[0].protein.name}.pdb")) except Exception as e: DEFAULT_LOGGER.log_error(f"Failed to write protein to file: {e}") save_bounding_box( self.pocket_data.box_center, self.pocket_data.box_size, output_file=str(save_dir_path / "bounding_box.pdb") ) return str(save_dir_path)
Save docking results to files in a specified directory.
Args
save_dir
:str
orPath
, optional- Directory path where the results will be saved.
- If None, creates a directory in END_USER_HOME or current directory.
safe
:bool
, optional- If True, moves existing files with same name instead of
overwriting them. Defaults to True.
Returns
str
orNone
- Path to the created directory containing saved files if successful,
None if no top ligands exist. Files Created: - docking_report_top_ligands.sdf: Contains the top scoring ligands with their properties - {protein_name}.pdb: Protein structure file - bounding_box.pdb: File containing the docking box information
Notes
The saved SDF file includes: - Molecule structure - Molecule name (if available) - SMILES string (if available) - All existing molecular properties - All additional properties from docking results
def visualize(*args, **kwargs)
-
Expand source code
def wrapper(*args, **kwargs): html_visualization = func(*args, **kwargs) return JupyterViewer.visualize(html_visualization)
class DockingResult (protein: Protein,
smiles: str | None = None,
file_path: str | None = None,
successful: bool | None = True)-
Expand source code
class DockingResult: """ A class representing the results of a molecular docking operation. This class stores and manages the results of a molecular docking simulation, including the protein target, docked ligands, and associated metadata. It provides methods for analyzing and visualizing the docking results. Attributes: protein (Protein): The protein target used in the docking. ligands (List[Ligand]): List of docked ligand poses. rmsds (Optional[List[float]]): RMSD values compared to crystal structure if calculated. top_ligand (Optional[Ligand]): The highest scoring ligand pose. smiles (Optional[str]): SMILES string representation of the ligand. successful (bool): Whether the docking operation was successful. file_path (Optional[str]): Path to the docking results file. Methods: add_ligand(ligand: Ligand): Add a docked ligand pose to results. calculate_rmsds_from_crystal(crystal_ligand: Union[Ligand, str]): Calculate RMSD values against crystal structure. _to_sdf(safe=True, sdf_file_path=None): Export docking results to SDF file. _to_dataframe(): Convert results to pandas DataFrame. visualize(crystal_ligand_path=None, crystal_ligand_format=None): Visualize docking results. analyze(index: Optional[int] = None): Generate detailed analysis of docking poses. Examples: >>> result = DockingResult(protein, smiles="CC(=O)OC1=CC=CC=C1C(=O)O") >>> result.add_ligand(docked_pose) >>> result.analyze() """ def __init__( self, protein: Protein, smiles: Optional[str] = None, file_path: Optional[str] = None, successful: Optional[bool] = True, ): self.protein = protein self.ligands: List[Ligand] = [] self.rmsds: Optional[List[float]] = None self.top_ligand: Optional[Ligand] = None self.smiles = smiles self.successful = successful self.file_path = file_path def add_ligand(self, ligand: Ligand): """ Add a ligand to the list of ligands. Args: ligand (Ligand): The ligand object to be added to the list. Returns: None """ self.ligands.append(ligand) def calculate_rmsds_from_crystal(self, crystal_ligand: Ligand | str): """ Calculate RMSD values between this ligand and a crystal structure. This method computes Root Mean Square Deviation (RMSD) values between the current ligand and a reference crystal structure using the 'obrms' command line tool. Args: crystal_ligand (Union[Ligand, str]): Either a Ligand object or a file path string representing the crystal structure to compare against. Returns: list[float]: A list of RMSD values computed between the current ligand and the crystal structure. Raises: SystemError: If RMSD calculation fails for any reason. Example: >>> ligand = Ligand("path/to/ligand.pdb") >>> rmsds = ligand.calculate_rmsds_from_crystal("path/to/crystal.pdb") """ if isinstance(crystal_ligand, str): crystal_ligand = Ligand(file_path=str(crystal_ligand)) try: result = subprocess.run( ["obrms", self.file_path, crystal_ligand.file_path], capture_output=True, text=True ) self.rmsds = [float(rmsd.split()[-1]) for rmsd in result.stdout.split("\n") if rmsd] return self.rmsds except Exception as e: raise SystemError(f"Failed to calculate RMSD values: {e}") def _to_sdf(self, safe=True, sdf_file_path=None): """ Convert ligands to SDF file format. This method writes ligand molecules and their properties to a Structure-Data File (SDF). Each ligand's properties are stored as SDF tags in the output file. Args: safe (bool, optional): If True, backs up existing files instead of overwriting. Defaults to True. sdf_file_path (str, optional): Custom path for the output SDF file. If None, generates path based on protein name and SMILES. Defaults to None. Returns: str or None: Path to the created SDF file, or None if no ligands exist. Example: result._to_sdf(safe=True, sdf_file_path="output.sdf") """ if not self.ligands: return None if not sdf_file_path: sdf_file_path = os.path.join( os.path.dirname(self.file_path or tempfile.gettempdir()), f"{self.protein.name}_docking_result_{self.smiles}.sdf", ) if safe and os.path.isfile(sdf_file_path): move_file_with_extension(sdf_file_path, "sdf") else: remove_file(sdf_file_path) writer = Chem.SDWriter(sdf_file_path) writer.SetKekulize(False) for ligand in self.ligands: mol = ligand.mol.m # RDKit molecule from Ligand properties = ligand.properties for prop_name, prop_value in properties.items(): mol.SetProp(prop_name, str(prop_value)) writer.write(mol) writer.close() return sdf_file_path def _to_dataframe(self): """ Converts ligand data to a formatted pandas DataFrame. Creates a DataFrame containing pose rankings, scores, binding energies, and file paths for each ligand. The data is sorted by Pose Score in descending order. Returns: pandas.io.formats.style.Styler: A styled DataFrame with the following columns: - Ligand Pose Rank ID: Integer ranking of the pose (1-based) - Pose Score: Rounded to 3 decimal places - Binding Energy: Rounded to 3 decimal places - Path To Docked Pose: File path to the docked ligand pose Note: - Missing Binding Energy or Pose Score values default to 0.0 - All numeric values are formatted to 3 decimal places in the output """ data = [] for idx, ligand in enumerate(self.ligands): mol_props = ligand.properties energy_score = float(mol_props.get("Binding Energy", "0.0")) rscore = float(mol_props.get("Pose Score", "0.0")) data.append( { "Ligand Pose Rank ID": idx + 1, "Pose Score": round(rscore, 3), "Binding Energy": round(energy_score, 3), "Path To Docked Pose": ligand.file_path, } ) df = pd.DataFrame(data) # Sort by Ranking Score descending df = df.sort_values(by="Pose Score", ascending=False).reset_index(drop=True) return df.style.format(precision=3) def _repr_html_(self): df = self._to_dataframe() return df._repr_html_() def __str__(self): return ( f"DockingResult:\n Number of Ligands: {len(self.ligands)}\n" f" SMILES: {self.smiles if self.smiles else 'Not provided'}\n" f" File Path: {self.file_path if self.file_path else 'Not provided'}" ) def __repr__(self): return self.__str__() @jupyter_visualization def visualize(self, crystal_ligand_path=None, crystal_ligand_format=None): """ Visualize docking results with an optional crystal ligand overlay. Args: crystal_ligand_path (str, optional): File path to the crystal ligand structure. crystal_ligand_format (str, optional): Format of the crystal ligand file (e.g., 'pdb', 'mol2'). Returns: str: HTML string containing the visualization that can be displayed in a web browser or Jupyter notebook. Note: The visualization will always use SDF format internally for ligands, regardless of input format. The protein structure will maintain its original format. """ visualization_format = "sdf" crystal_data = None if crystal_ligand_format and crystal_ligand_path: crystal_data = {"raw": str(crystal_ligand_path), "format": crystal_ligand_format} return DockingViewer().render_with_seperate_crystal( protein_data=str(self.protein.file_path), protein_format=self.protein.block_type, ligands_data=[str(self.file_path)], ligand_format=visualization_format, crystal_data=crystal_data, ) def analyze(self, index: Optional[int] = None): """ Analyze protein-ligand interactions using PLIPy fingerprinting. This method analyzes the interactions between a protein and its ligands using the PLIP (Protein-Ligand Interaction Profiler) fingerprinting approach. It can analyze either a single ligand pose (specified by index) or all ligand poses. Args: index (Optional[int]): The index of the specific ligand pose to analyze. If None, analyzes all poses. Defaults to None. Returns: Union[pd.DataFrame, Dict]: - If index is None: Returns a DataFrame containing interaction fingerprints for all poses - If index is specified: Returns a dictionary containing the ligand network plot data Raises: ValueError: If no ligands or protein are found to analyze IndexError: If the provided ligand index is out of range Notes: - Creates temporary files for processing - Sets custom Van der Waals radii for Fe, H, and O atoms - Uses RDKit for molecular operations - Uses PLIPy for fingerprint generation """ if not self.ligands: raise ValueError("No ligands found to analyze.") protein = deepcopy(self.protein) if not protein: raise ValueError("No protein found to analyze.") fp = plf.Fingerprint() with tempfile.TemporaryDirectory() as temp_dir: protein_file = os.path.join(temp_dir, f"{protein.name}.pdb") protein.write_to_file(protein_file) protein.file_path = protein_file sdf_file_path = self._to_sdf(sdf_file_path=os.path.join(temp_dir, f"{protein.name}_docking_result.sdf")) v = VdWContact() v.vdwradii["Fe"] = 2.0 v.vdwradii["H"] = 1.05 v.vdwradii["O"] = 1.48 rdkit_prot = Chem.MolFromPDBFile(protein_file, False, False) protein_mol = plf.Molecule(rdkit_prot) pose_iterable = plf.sdf_supplier(str(sdf_file_path)) sdf_supp = Chem.SDMolSupplier(str(sdf_file_path), sanitize=False) pose_iterable._suppl = sdf_supp if index is not None: if index < 0 or index >= len(sdf_supp): raise IndexError("Ligand index out of range.") single_ligand_iterable = pose_iterable[index] fp.run_from_iterable([single_ligand_iterable], protein_mol) result = fp.plot_lignetwork(single_ligand_iterable) else: fp.run_from_iterable(pose_iterable, protein_mol) fp.plot_barcode(xlabel="Pose") result = fp.to_dataframe(index_col="Pose") return result
A class representing the results of a molecular docking operation.
This class stores and manages the results of a molecular docking simulation, including the protein target, docked ligands, and associated metadata. It provides methods for analyzing and visualizing the docking results.
Attributes
protein
:Protein
- The protein target used in the docking.
ligands
:List[Ligand]
- List of docked ligand poses.
rmsds
:Optional[List[float]]
- RMSD values compared to crystal structure if calculated.
top_ligand
:Optional[Ligand]
- The highest scoring ligand pose.
smiles
:Optional[str]
- SMILES string representation of the ligand.
successful
:bool
- Whether the docking operation was successful.
file_path
:Optional[str]
- Path to the docking results file.
Methods
add_ligand(ligand: Ligand): Add a docked ligand pose to results. calculate_rmsds_from_crystal(crystal_ligand: Union[Ligand, str]): Calculate RMSD values against crystal structure. _to_sdf(safe=True, sdf_file_path=None): Export docking results to SDF file. _to_dataframe(): Convert results to pandas DataFrame. visualize(crystal_ligand_path=None, crystal_ligand_format=None): Visualize docking results. analyze(index: Optional[int] = None): Generate detailed analysis of docking poses.
Examples
>>> result = DockingResult(protein, smiles="CC(=O)OC1=CC=CC=C1C(=O)O") >>> result.add_ligand(docked_pose) >>> result.analyze()
Methods
def add_ligand(self,
ligand: Ligand)-
Expand source code
def add_ligand(self, ligand: Ligand): """ Add a ligand to the list of ligands. Args: ligand (Ligand): The ligand object to be added to the list. Returns: None """ self.ligands.append(ligand)
Add a ligand to the list of ligands.
Args
ligand
:Ligand
- The ligand object to be added to the list.
Returns
None
def analyze(self, index: int | None = None)
-
Expand source code
def analyze(self, index: Optional[int] = None): """ Analyze protein-ligand interactions using PLIPy fingerprinting. This method analyzes the interactions between a protein and its ligands using the PLIP (Protein-Ligand Interaction Profiler) fingerprinting approach. It can analyze either a single ligand pose (specified by index) or all ligand poses. Args: index (Optional[int]): The index of the specific ligand pose to analyze. If None, analyzes all poses. Defaults to None. Returns: Union[pd.DataFrame, Dict]: - If index is None: Returns a DataFrame containing interaction fingerprints for all poses - If index is specified: Returns a dictionary containing the ligand network plot data Raises: ValueError: If no ligands or protein are found to analyze IndexError: If the provided ligand index is out of range Notes: - Creates temporary files for processing - Sets custom Van der Waals radii for Fe, H, and O atoms - Uses RDKit for molecular operations - Uses PLIPy for fingerprint generation """ if not self.ligands: raise ValueError("No ligands found to analyze.") protein = deepcopy(self.protein) if not protein: raise ValueError("No protein found to analyze.") fp = plf.Fingerprint() with tempfile.TemporaryDirectory() as temp_dir: protein_file = os.path.join(temp_dir, f"{protein.name}.pdb") protein.write_to_file(protein_file) protein.file_path = protein_file sdf_file_path = self._to_sdf(sdf_file_path=os.path.join(temp_dir, f"{protein.name}_docking_result.sdf")) v = VdWContact() v.vdwradii["Fe"] = 2.0 v.vdwradii["H"] = 1.05 v.vdwradii["O"] = 1.48 rdkit_prot = Chem.MolFromPDBFile(protein_file, False, False) protein_mol = plf.Molecule(rdkit_prot) pose_iterable = plf.sdf_supplier(str(sdf_file_path)) sdf_supp = Chem.SDMolSupplier(str(sdf_file_path), sanitize=False) pose_iterable._suppl = sdf_supp if index is not None: if index < 0 or index >= len(sdf_supp): raise IndexError("Ligand index out of range.") single_ligand_iterable = pose_iterable[index] fp.run_from_iterable([single_ligand_iterable], protein_mol) result = fp.plot_lignetwork(single_ligand_iterable) else: fp.run_from_iterable(pose_iterable, protein_mol) fp.plot_barcode(xlabel="Pose") result = fp.to_dataframe(index_col="Pose") return result
Analyze protein-ligand interactions using PLIPy fingerprinting.
This method analyzes the interactions between a protein and its ligands using the PLIP (Protein-Ligand Interaction Profiler) fingerprinting approach. It can analyze either a single ligand pose (specified by index) or all ligand poses.
Args
index
:Optional[int]
- The index of the specific ligand pose to analyze. If None, analyzes all poses. Defaults to None.
Returns
Union[pd.DataFrame, Dict]
-
- If index is None: Returns a DataFrame containing interaction fingerprints for all poses
- If index is specified: Returns a dictionary containing the ligand network plot data
Raises
ValueError
- If no ligands or protein are found to analyze
IndexError
- If the provided ligand index is out of range
Notes
- Creates temporary files for processing
- Sets custom Van der Waals radii for Fe, H, and O atoms
- Uses RDKit for molecular operations
- Uses PLIPy for fingerprint generation
def calculate_rmsds_from_crystal(self,
crystal_ligand: Ligand | str)-
Expand source code
def calculate_rmsds_from_crystal(self, crystal_ligand: Ligand | str): """ Calculate RMSD values between this ligand and a crystal structure. This method computes Root Mean Square Deviation (RMSD) values between the current ligand and a reference crystal structure using the 'obrms' command line tool. Args: crystal_ligand (Union[Ligand, str]): Either a Ligand object or a file path string representing the crystal structure to compare against. Returns: list[float]: A list of RMSD values computed between the current ligand and the crystal structure. Raises: SystemError: If RMSD calculation fails for any reason. Example: >>> ligand = Ligand("path/to/ligand.pdb") >>> rmsds = ligand.calculate_rmsds_from_crystal("path/to/crystal.pdb") """ if isinstance(crystal_ligand, str): crystal_ligand = Ligand(file_path=str(crystal_ligand)) try: result = subprocess.run( ["obrms", self.file_path, crystal_ligand.file_path], capture_output=True, text=True ) self.rmsds = [float(rmsd.split()[-1]) for rmsd in result.stdout.split("\n") if rmsd] return self.rmsds except Exception as e: raise SystemError(f"Failed to calculate RMSD values: {e}")
Calculate RMSD values between this ligand and a crystal structure.
This method computes Root Mean Square Deviation (RMSD) values between the current ligand and a reference crystal structure using the 'obrms' command line tool.
Args
crystal_ligand
:Union[Ligand, str]
- Either a Ligand object or a file path string representing the crystal structure to compare against.
Returns
list[float]
- A list of RMSD values computed between the current ligand and the crystal structure.
Raises
SystemError
- If RMSD calculation fails for any reason.
Example
>>> ligand = Ligand("path/to/ligand.pdb") >>> rmsds = ligand.calculate_rmsds_from_crystal("path/to/crystal.pdb")
def visualize(*args, **kwargs)
-
Expand source code
def wrapper(*args, **kwargs): html_visualization = func(*args, **kwargs) return JupyterViewer.visualize(html_visualization)
class MolPropsReport (results)
-
Expand source code
class MolPropsReport: """ A class for handling and displaying molecular property calculation results. This class processes molecular property calculation results and can convert them into a formatted pandas DataFrame for display, particularly useful in Jupyter notebooks. Args: results (list): A list of dictionaries containing molecular property calculation results. Each dictionary must contain a 'smiles' key and additional property keys. Attributes: results (list): The stored results from molecular property calculations. Methods: _to_dataframe(): Converts the results into a formatted pandas DataFrame. _repr_html_(): Returns HTML representation of the data for display in Jupyter notebooks. Examples: >>> results = [{"smiles": "CC", "property1": 0.5, "property2": 1.0}] >>> report = MolPropsReport(results) >>> df = report._to_dataframe() """ def __init__(self, results): self.results = results def _to_dataframe(self): """ Converts the results data into a pandas DataFrame. Extracts SMILES strings and other result properties into separate lists, then combines them into a DataFrame with formatted numerical precision. Returns: pandas.DataFrame: A styled DataFrame containing SMILES strings and associated data with numerical values formatted to 3 decimal places. """ smiles = [] data = {k: [] for k in self.results[0].keys() if k != "smiles"} for result in self.results: smiles.append(result["smiles"]) for k in data: data[k].append(result.get(k)) data = { "SMILES": smiles, **data, } df = pd.DataFrame(data).style.set_properties().format(precision=3) return df def _repr_html_(self): df = self._to_dataframe() return df._repr_html_()
A class for handling and displaying molecular property calculation results.
This class processes molecular property calculation results and can convert them into a formatted pandas DataFrame for display, particularly useful in Jupyter notebooks.
Args
results
:list
- A list of dictionaries containing molecular property calculation results. Each dictionary must contain a 'smiles' key and additional property keys.
Attributes
results
:list
- The stored results from molecular property calculations.
Methods
to_dataframe(): Converts the results into a formatted pandas DataFrame. _repr_html(): Returns HTML representation of the data for display in Jupyter notebooks.
Examples
>>> results = [{"smiles": "CC", "property1": 0.5, "property2": 1.0}] >>> report = MolPropsReport(results) >>> df = report._to_dataframe()
class PainsReport (results)
-
Expand source code
class PainsReport: """ A class for generating and displaying PAINS (Pan-Assay Interference Compounds) analysis reports. Args: results (list): A list of dictionaries containing PAINS analysis results. Each dictionary should contain: - smiles (str): SMILES string representation of the molecule - PAINS (list or None): List of PAINS pattern SMARTS strings that match the molecule or None if no matches found Methods: get_html_of_molecule(result): Generates HTML img tag with highlighted PAINS matches _to_dataframe(): Converts results to pandas DataFrame _repr_html_(): Returns HTML representation for Jupyter display Examples: >>> results = [{'smiles': 'CC(=O)Oc1ccccc1C(=O)O', 'PAINS': ['[O,S]-[CH2]-[CH2]-[O,S]']}] >>> report = PainsReport(results) >>> print(report) DockingReport: Number of DockingResults: 1 Notes: Requires RDKit for molecular operations and visualization. Implements Jupyter notebook display protocol via _repr_html_. """ def __init__(self, results): self.results = results def get_html_of_molecule(self, result): molecule = Chem.MolFromSmiles(result["smiles"]) all_matches = [] if result["PAINS"] is not None: for smarts in result["PAINS"]: atom_matches = molecule.GetSubstructMatches(Chem.MolFromSmarts(smarts)) all_matches.extend(atom_matches[0]) Draw.DrawingOptions.atomHighlightsAreCircles = True Draw.DrawingOptions.atomHighlightColors = {i: (1, 0, 0) for i in set(all_matches)} img = Draw.MolToImage(molecule, size=(200, 100), highlightAtoms=all_matches) buffer = BytesIO() img.save(buffer, format="PNG") img_str = base64.b64encode(buffer.getvalue()).decode("utf-8") html = '<img src="data:image/png;base64,{0}">'.format(img_str) return html def _to_dataframe(self): """ Convert the results into a pandas DataFrame. This method processes the stored results and converts them into a structured DataFrame containing SMILES strings, HTML representations of molecules, and PAINS pattern information. Returns: pd.DataFrame: A DataFrame with the following columns: - SMILES: List of SMILES strings for each molecule - Molecule Image: HTML representations of the molecules - SMARTS patterns of PAINS: List of PAINS patterns found in each molecule """ all_smiles_list = [] all_molecule_html_list = [] PAINS_pattern_list = [] for _, result in enumerate(self.results): all_smiles_list.append(result["smiles"]) all_molecule_html_list.append(self.get_html_of_molecule(result)) PAINS_pattern_list.append(result["PAINS"]) return pd.DataFrame.from_dict( { "SMILES": all_smiles_list, "Molecule Image": all_molecule_html_list, "SMARTS patterns of PAINS": PAINS_pattern_list, } ) def _repr_html_(self): df = self._to_dataframe() return df.to_html(escape=False) def __str__(self): return f"DockingReport:\n Number of DockingResults: {len(self.results)}" def __repr__(self): return self.__str__()
A class for generating and displaying PAINS (Pan-Assay Interference Compounds) analysis reports.
Args
results
:list
- A list of dictionaries containing PAINS analysis results. Each dictionary should contain: - smiles (str): SMILES string representation of the molecule - PAINS (list or None): List of PAINS pattern SMARTS strings that match the molecule or None if no matches found
Methods
get_html_of_molecule(result): Generates HTML img tag with highlighted PAINS matches to_dataframe(): Converts results to pandas DataFrame _repr_html(): Returns HTML representation for Jupyter display
Examples
>>> results = [{'smiles': 'CC(=O)Oc1ccccc1C(=O)O', 'PAINS': ['[O,S]-[CH2]-[CH2]-[O,S]']}] >>> report = PainsReport(results) >>> print(report) DockingReport: Number of DockingResults: 1
Notes
Requires RDKit for molecular operations and visualization. Implements Jupyter notebook display protocol via repr_html.
Methods
def get_html_of_molecule(self, result)
-
Expand source code
def get_html_of_molecule(self, result): molecule = Chem.MolFromSmiles(result["smiles"]) all_matches = [] if result["PAINS"] is not None: for smarts in result["PAINS"]: atom_matches = molecule.GetSubstructMatches(Chem.MolFromSmarts(smarts)) all_matches.extend(atom_matches[0]) Draw.DrawingOptions.atomHighlightsAreCircles = True Draw.DrawingOptions.atomHighlightColors = {i: (1, 0, 0) for i in set(all_matches)} img = Draw.MolToImage(molecule, size=(200, 100), highlightAtoms=all_matches) buffer = BytesIO() img.save(buffer, format="PNG") img_str = base64.b64encode(buffer.getvalue()).decode("utf-8") html = '<img src="data:image/png;base64,{0}">'.format(img_str) return html
class PocketFinderReport (protein, csv_file_path='')
-
Expand source code
class PocketFinderReport: """ PocketFinderReport class for managing protein pocket analysis results. A class to handle collection and reporting of protein pocket properties including drugability scores, volumes, surface areas and other physicochemical properties. Attributes: protein: The protein object associated with this report file_path (str): Path to save the CSV report file pockets (list): List of pocket objects containing analysis results Methods: add_pocket(pocket): Add a pocket object to the report _to_dataframe(): Convert pocket data to pandas DataFrame _repr_html_(): Generate HTML representation of the report save_props(): Save pocket properties to CSV file Example: report = PocketFinderReport(protein_obj, "output.csv") report.add_pocket(pocket_obj) report.save_props() """ def __init__(self, protein, csv_file_path=""): self.protein = protein self.file_path = csv_file_path self.pockets = [] def add_pocket(self, pocket): """ Add a pocket to the collection of pockets. Args: pocket: A pocket object to be added to the pockets list. """ self.pockets.append(pocket) def _to_dataframe(self): data = [] for idx, pocket in enumerate(self.pockets): props = pocket.props if props: data.append( { "Pocket ID": idx + 1, "Color": pocket.color, "Drugability Score": props.get("drugability_score", 0), "Volume": props.get("volume", 0), "Total SASA": props.get("total_SASA", 0), "Polar SASA": props.get("polar_SASA", 0), "Polar/Apolar SASA Ratio": props.get("polar_apolar_SASA_ratio", 0), "Hydrophobicity": props.get("hydrophobicity", 0), "Polarity": props.get("polarity", 0), } ) df = pd.DataFrame(data) # Sort by Ranking Score descending df = df.sort_values(by="Drugability Score", ascending=False).reset_index(drop=True) return df def _repr_html_(self): df = self._to_dataframe() return df.style.format(precision=3)._repr_html_() def save_props(self): """ Saves the properties of the report to a CSV file. This method converts the internal data structure to a pandas DataFrame and saves it to the file path specified in self.file_path attribute. The DataFrame is saved without the index column. Returns: None """ df = self._to_dataframe() df.to_csv(self.file_path, index=False)
PocketFinderReport class for managing protein pocket analysis results.
A class to handle collection and reporting of protein pocket properties including drugability scores, volumes, surface areas and other physicochemical properties.
Attributes
protein
- The protein object associated with this report
file_path
:str
- Path to save the CSV report file
pockets
:list
- List of pocket objects containing analysis results
Methods
add_pocket(pocket): Add a pocket object to the report to_dataframe(): Convert pocket data to pandas DataFrame _repr_html(): Generate HTML representation of the report save_props(): Save pocket properties to CSV file
Example
report = PocketFinderReport(protein_obj, "output.csv") report.add_pocket(pocket_obj) report.save_props()
Methods
def add_pocket(self, pocket)
-
Expand source code
def add_pocket(self, pocket): """ Add a pocket to the collection of pockets. Args: pocket: A pocket object to be added to the pockets list. """ self.pockets.append(pocket)
Add a pocket to the collection of pockets.
Args
pocket
- A pocket object to be added to the pockets list.
def save_props(self)
-
Expand source code
def save_props(self): """ Saves the properties of the report to a CSV file. This method converts the internal data structure to a pandas DataFrame and saves it to the file path specified in self.file_path attribute. The DataFrame is saved without the index column. Returns: None """ df = self._to_dataframe() df.to_csv(self.file_path, index=False)
Saves the properties of the report to a CSV file.
This method converts the internal data structure to a pandas DataFrame and saves it to the file path specified in self.file_path attribute. The DataFrame is saved without the index column.
Returns
None
class ProtonationReport (results)
-
Expand source code
class ProtonationReport: def __init__(self, results): self.results, self.html_metadata = self.split_results(results) def split_results(self, results): """ Splits results by separating HTML metadata from the main data. This method processes a list of result items, removing HTML metadata from the protonation data while preserving it in a separate dictionary mapped by SMILES. Args: results (list): List of dictionaries containing molecular data with 'smiles' and 'protonation' keys. The 'protonation' may contain 'html_metadata'. Returns: tuple: A 2-tuple containing: - list: Copy of input results with HTML metadata removed from protonation data - dict: Mapping of SMILES strings to their corresponding HTML metadata """ list_without_html = [] smiles_to_html_dict = {} for item in results: new_item = copy.deepcopy(item) if "html_metadata" in new_item["protonation"]: del new_item["protonation"]["html_metadata"] list_without_html.append(new_item) smiles = item["smiles"] html_meta = item["protonation"].get("html_metadata") smiles_to_html_dict[smiles] = html_meta return list_without_html, smiles_to_html_dict def show_plots(self): for smi, html_meta in self.html_metadata.items(): centered_html = f"<center><h2>{smi}</h2>" display(HTML(centered_html)) self.plot_concentration_curves(html_meta, plot=True) def smiles_to_img_html(self, smiles): mol = Chem.MolFromSmiles(smiles) AllChem.Compute2DCoords(mol) mol.GetConformer().SetId(1) return Draw.MolToSVG(mol, width=200, height=150).replace("\n", "") def plot_concentration_curves(self, html_meta, plot=False): """ Plot concentration curves for different molecular species across a pH range. Args: html_meta (tuple): A tuple containing: - fractions (list): Matrix of fraction values for each species - smiles_list (list): List of SMILES notations for each species - concentration_values (list): Concentration values for each species - pH_range (list): Range of pH values for x-axis - pH (float): Current pH value plot (bool, optional): If True, displays the plot immediately. Defaults to False. Returns: plotly.graph_objects.Figure: A plotly figure object containing the concentration curves. The plot shows fraction percentage (0-100%) on y-axis vs pH (0-14) on x-axis. Each curve is labeled with species SMILES and concentration at current pH. """ fractions, smiles_list, concentration_values, pH_range, pH = html_meta fractions = np.transpose(np.array(fractions)) fig = sp.make_subplots(rows=1, cols=1) for i, fraction in enumerate(fractions): fig.add_trace( go.Scatter( x=pH_range, y=fraction * 100, mode="lines", showlegend=True, name=f"Fraction of {smiles_list[i]} at pH={pH:.1f} is {round(concentration_values[i], 2)}(%)", ), row=1, col=1, ) fig.update_layout( xaxis=dict(title="pH", range=[0, 14]), yaxis=dict(title="Fraction (%)", range=[0, 100]), hovermode="closest", ) if plot: fig.show() def _to_dataframe(self): """ Converts the protonation results to a pandas DataFrame. This method processes the protonation results stored in self.results and creates a DataFrame with the following structure: - Multi-index with SMILES as the top level - Columns: - 'protonated SMILES': The SMILES strings of protonated species - 'Concentration %': Rounded concentration percentages (to 2 decimal places) - 'Molecule Image': HTML representation of molecular structures Returns: pd.DataFrame: A DataFrame containing the protonation results with molecular structure visualizations. The DataFrame has a multi-index structure where the top level is the original SMILES string. """ data_dict = {} for result in self.results: smiles = result["smiles"] smiles_list = result["protonation"]["smiles_list"] concentration_list = result["protonation"]["concentration_list"] rounded_concentration = [round(value, 2) for value in concentration_list] data_dict[smiles] = pd.DataFrame( {"protonated SMILES": smiles_list, "Concentration %": rounded_concentration} ) df = pd.concat(data_dict.values(), keys=data_dict.keys(), names=["SMILES"]) df["Molecule Image"] = df["protonated SMILES"].apply(self.smiles_to_img_html) return df def _repr_html_(self): df = self._to_dataframe() return df.to_html(escape=False)
Methods
def plot_concentration_curves(self, html_meta, plot=False)
-
Expand source code
def plot_concentration_curves(self, html_meta, plot=False): """ Plot concentration curves for different molecular species across a pH range. Args: html_meta (tuple): A tuple containing: - fractions (list): Matrix of fraction values for each species - smiles_list (list): List of SMILES notations for each species - concentration_values (list): Concentration values for each species - pH_range (list): Range of pH values for x-axis - pH (float): Current pH value plot (bool, optional): If True, displays the plot immediately. Defaults to False. Returns: plotly.graph_objects.Figure: A plotly figure object containing the concentration curves. The plot shows fraction percentage (0-100%) on y-axis vs pH (0-14) on x-axis. Each curve is labeled with species SMILES and concentration at current pH. """ fractions, smiles_list, concentration_values, pH_range, pH = html_meta fractions = np.transpose(np.array(fractions)) fig = sp.make_subplots(rows=1, cols=1) for i, fraction in enumerate(fractions): fig.add_trace( go.Scatter( x=pH_range, y=fraction * 100, mode="lines", showlegend=True, name=f"Fraction of {smiles_list[i]} at pH={pH:.1f} is {round(concentration_values[i], 2)}(%)", ), row=1, col=1, ) fig.update_layout( xaxis=dict(title="pH", range=[0, 14]), yaxis=dict(title="Fraction (%)", range=[0, 100]), hovermode="closest", ) if plot: fig.show()
Plot concentration curves for different molecular species across a pH range.
Args
html_meta
:tuple
- A tuple containing:
- fractions (list): Matrix of fraction values for each species
- smiles_list (list): List of SMILES notations for each species
- concentration_values (list): Concentration values for each species
- pH_range (list): Range of pH values for x-axis
-
- pH (float): Current pH value
plot
:bool
, optional- If True, displays the plot immediately. Defaults to False.
Returns
plotly.graph_objects.Figure
- A plotly figure object containing the concentration curves.
The plot shows fraction percentage (0-100%) on y-axis vs pH (0-14) on x-axis. Each curve is labeled with species SMILES and concentration at current pH.
def show_plots(self)
-
Expand source code
def show_plots(self): for smi, html_meta in self.html_metadata.items(): centered_html = f"<center><h2>{smi}</h2>" display(HTML(centered_html)) self.plot_concentration_curves(html_meta, plot=True)
def smiles_to_img_html(self, smiles)
-
Expand source code
def smiles_to_img_html(self, smiles): mol = Chem.MolFromSmiles(smiles) AllChem.Compute2DCoords(mol) mol.GetConformer().SetId(1) return Draw.MolToSVG(mol, width=200, height=150).replace("\n", "")
def split_results(self, results)
-
Expand source code
def split_results(self, results): """ Splits results by separating HTML metadata from the main data. This method processes a list of result items, removing HTML metadata from the protonation data while preserving it in a separate dictionary mapped by SMILES. Args: results (list): List of dictionaries containing molecular data with 'smiles' and 'protonation' keys. The 'protonation' may contain 'html_metadata'. Returns: tuple: A 2-tuple containing: - list: Copy of input results with HTML metadata removed from protonation data - dict: Mapping of SMILES strings to their corresponding HTML metadata """ list_without_html = [] smiles_to_html_dict = {} for item in results: new_item = copy.deepcopy(item) if "html_metadata" in new_item["protonation"]: del new_item["protonation"]["html_metadata"] list_without_html.append(new_item) smiles = item["smiles"] html_meta = item["protonation"].get("html_metadata") smiles_to_html_dict[smiles] = html_meta return list_without_html, smiles_to_html_dict
Splits results by separating HTML metadata from the main data.
This method processes a list of result items, removing HTML metadata from the protonation data while preserving it in a separate dictionary mapped by SMILES.
Args
results
:list
- List of dictionaries containing molecular data with 'smiles' and 'protonation' keys. The 'protonation' may contain 'html_metadata'.
Returns
tuple
- A 2-tuple containing: - list: Copy of input results with HTML metadata removed from protonation data - dict: Mapping of SMILES strings to their corresponding HTML metadata