Module deeporigin.src.structures.reports

Classes

class DockingReport (results: List[DockingResult],
pocket_data)
Expand source code
class DockingReport:
    """
    A class to handle and report docking results.

    This class provides functionality to manage docking results, generate reports,
    save results to files, and visualize protein-ligand complexes.

    Attributes:
        results (List[DockingResult]): A list of docking results.
        pocket_data: Data about the binding pocket used in docking.

    Methods:
        _to_dataframe(include_props=None): Converts docking results to a pandas DataFrame.
        _repr_html_(): Returns HTML representation of the docking report.
        generate_custom_report(include_props=False): Generates a custom HTML report with specified properties.
        save(save_dir=None, safe=True): Saves docking results to SDF file with properties.
        visualize(protein_path=None, protein_format=None, sdf_file_path=None, 
                 crystal_ligand_path=None, crystal_ligand_format=None): 
            Visualizes the docking results in 3D.

    Examples:
        >>> report = DockingReport(results, pocket_data)
        >>> report.save()  # Saves results to an SDF file
        >>> report.visualize()  # Shows 3D visualization of results
    """
    def __init__(self, results: List[DockingResult], pocket_data):
        self.results = results
        self.pocket_data = pocket_data

    def _to_dataframe(self, include_props=None):
        """
        Converts docking results to a pandas DataFrame.

        Args:
            include_props (list, optional): Additional molecular properties to include in the DataFrame. 
            If provided, these properties will be extracted from the ligand properties and 
            added as columns.

        Returns:
            pd.DataFrame: A DataFrame containing the following columns by default:
            - Image: 2D molecular structure visualization
            - SMILES: SMILES string representation of the molecule 
            - Ranking Score: Docking ranking score (rounded to 3 decimal places)
            - Binding Energy: Binding energy value (rounded to 3 decimal places)
            - Path To Docked Pose: File path to the docked ligand pose
            Additional columns will be added if include_props is specified.
            The DataFrame is sorted by Ranking Score in descending order.
        """

        data = []
        for result in self.results:
            property_dict = {
                "Image": None,
                "SMILES": result.smiles,
                "Ranking Score": None,
                "Binding Energy": None,
                "Path To Docked Pose": None,
            }

            if result.top_ligand and result.successful:
                ligand = result.top_ligand

                mol_props = ligand.properties

                energy_score = float(mol_props.get("Binding Energy", "0.0"))
                ranking_score = float(mol_props.get("Ranking Score", "0.0"))
                property_dict["Image"] = ligand.mol._draw()
                property_dict["SMILES"] = result.smiles
                property_dict["Ranking Score"] = round(ranking_score, 3)
                property_dict["Binding Energy"] = round(energy_score, 3)
                property_dict["Path To Docked Pose"] = ligand.file_path

                if include_props:
                    for prop in mol_props:
                        if "smiles" not in prop:
                            p = mol_props.get(prop, None)
                            property_dict[prop] = p
            data.append(property_dict)

        df = pd.DataFrame(data)
        df = df.sort_values(by="Ranking Score", ascending=False).reset_index(drop=True)
        return df

    def _repr_html_(self):
        df = self._to_dataframe().style.format(precision=3)
        return df._repr_html_()

    def generate_custom_report(self, include_props=False):
        """
        Generate a custom HTML report from the data.

        This method converts the internal data to a styled pandas DataFrame and returns it as HTML.
        The resulting DataFrame is formatted with 3 decimal places precision.

        Args:
            include_props (bool, optional): Whether to include properties in the report. Defaults to False.

        Returns:
            HTML: A styled HTML representation of the data with 3 decimal places precision.

        Example:
            >>> report = obj.generate_custom_report(include_props=True)
            >>> display(report)  # In Jupyter notebook
        """
        df = self._to_dataframe(include_props).style.format(precision=3)
        return HTML(df._repr_html_())

    def __str__(self):
        return f"DockingReport:\n  Number of DockingResults: {len(self.results)}"

    def __repr__(self):
        return self.__str__()

    def save(self, save_dir=None, safe=True):
        """
        Save docking results to files in a specified directory.

        Args:
            save_dir (str or Path, optional): Directory path where the results will be saved.
            If None, creates a directory in END_USER_HOME or current directory.
            safe (bool, optional): If True, moves existing files with same name instead of
            overwriting them. Defaults to True.

        Returns:
            str or None: Path to the created directory containing saved files if successful,
            None if no top ligands exist.

        Files Created:
            - docking_report_top_ligands.sdf: Contains the top scoring ligands with their properties
            - {protein_name}.pdb: Protein structure file 
            - bounding_box.pdb: File containing the docking box information

        Notes:
            The saved SDF file includes:
            - Molecule structure
            - Molecule name (if available) 
            - SMILES string (if available)
            - All existing molecular properties
            - All additional properties from docking results
        """

        top_ligands = []
        for result in self.results:
            if result.top_ligand:
                top_ligands.append(result.top_ligand)

        if not top_ligands:
            return None

        if not save_dir:
            save_dir_path = (
                Path(os.getenv("END_USER_HOME", "."))
                / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}"
            )
        else:
            save_dir_path = Path(save_dir) / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}"

        save_dir_path.mkdir(parents=True, exist_ok=True)
        sdf_file_path = save_dir_path / "docking_report_top_ligands.sdf"
        if safe and sdf_file_path.exists():
            move_file_with_extension(str(sdf_file_path), "sdf")
        else:
            remove_file(str(sdf_file_path))

        writer = Chem.SDWriter(str(sdf_file_path))
        writer.SetKekulize(False)

        for ligand in top_ligands:
            mol = ligand.mol.m  # RDKit molecule

            properties = ligand.properties
            existing_properties = ligand.mol.m.GetPropsAsDict()
            if ligand.name:
                mol.SetProp("_Name", ligand.name)
            if ligand.mol.smiles:
                mol.SetProp("_SMILES", ligand.mol.smiles)

            for prop_name, prop_value in existing_properties.items():
                mol.SetProp(prop_name, str(prop_value))

            for prop_name, prop_value in properties.items():
                mol.SetProp(prop_name, str(prop_value))

            writer.write(mol)
        writer.close()

        try:
            self.results[0].protein.write_to_file(str(save_dir_path / f"{self.results[0].protein.name}.pdb"))
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to write protein to file: {e}")

        save_bounding_box(
            self.pocket_data.box_center, self.pocket_data.box_size, output_file=str(save_dir_path / "bounding_box.pdb")
        )
        return str(save_dir_path)

    @jupyter_visualization
    def visualize(
        self,
        protein_path=None,
        protein_format=None,
        sdf_file_path=None,
        crystal_ligand_path=None,
        crystal_ligand_format=None,
    ):
        """
        Visualizes the docking report by rendering the merged structures of
        protein and ligands.

        Args:
            protein_path (str, optional): Path to the protein file.
            protein_format (str, optional): Format of the protein file (e.g., pdb).
            sdf_file_path (str, optional): Path to the ligand file in SDF format.

        Raises:
            ValueError: If `protein_path` is provided without `protein_format`.

        Returns:
            Jupyter visualization object: Rendered 3D structure of the protein-ligand complex.
        """
        if sdf_file_path is None:
            file_dir = Path(self.save(save_dir="/tmp"))
            sdf_file_path = str(file_dir / "docking_report_top_ligands.sdf")

        if protein_path is not None and protein_format is None:
            raise ValueError("Please provide the protein format along with the protein path.")

        if protein_path is None:
            if not self.results:
                raise ValueError("No results found to extract protein information from.")
            protein_path = str(self.results[0].protein.file_path)
            protein_format = self.results[0].protein.block_type

        viewer = DockingViewer()

        crystal_data = None
        if crystal_ligand_path and crystal_ligand_format:
            crystal_data = {"raw": str(crystal_ligand_path), "format": crystal_ligand_format}

        html_content = viewer.render_with_seperate_crystal(
            protein_data=protein_path,
            protein_format=protein_format,
            ligands_data=[sdf_file_path],
            ligand_format="sdf",
            crystal_data=crystal_data,
        )

        return html_content

A class to handle and report docking results.

This class provides functionality to manage docking results, generate reports, save results to files, and visualize protein-ligand complexes.

Attributes

results : List[DockingResult]
A list of docking results.
pocket_data
Data about the binding pocket used in docking.

Methods

to_dataframe(include_props=None): Converts docking results to a pandas DataFrame. _repr_html(): Returns HTML representation of the docking report. generate_custom_report(include_props=False): Generates a custom HTML report with specified properties. save(save_dir=None, safe=True): Saves docking results to SDF file with properties. visualize(protein_path=None, protein_format=None, sdf_file_path=None, crystal_ligand_path=None, crystal_ligand_format=None): Visualizes the docking results in 3D.

Examples

>>> report = DockingReport(results, pocket_data)
>>> report.save()  # Saves results to an SDF file
>>> report.visualize()  # Shows 3D visualization of results

Methods

def generate_custom_report(self, include_props=False)
Expand source code
def generate_custom_report(self, include_props=False):
    """
    Generate a custom HTML report from the data.

    This method converts the internal data to a styled pandas DataFrame and returns it as HTML.
    The resulting DataFrame is formatted with 3 decimal places precision.

    Args:
        include_props (bool, optional): Whether to include properties in the report. Defaults to False.

    Returns:
        HTML: A styled HTML representation of the data with 3 decimal places precision.

    Example:
        >>> report = obj.generate_custom_report(include_props=True)
        >>> display(report)  # In Jupyter notebook
    """
    df = self._to_dataframe(include_props).style.format(precision=3)
    return HTML(df._repr_html_())

Generate a custom HTML report from the data.

This method converts the internal data to a styled pandas DataFrame and returns it as HTML. The resulting DataFrame is formatted with 3 decimal places precision.

Args

include_props : bool, optional
Whether to include properties in the report. Defaults to False.

Returns

HTML
A styled HTML representation of the data with 3 decimal places precision.

Example

>>> report = obj.generate_custom_report(include_props=True)
>>> display(report)  # In Jupyter notebook
def save(self, save_dir=None, safe=True)
Expand source code
def save(self, save_dir=None, safe=True):
    """
    Save docking results to files in a specified directory.

    Args:
        save_dir (str or Path, optional): Directory path where the results will be saved.
        If None, creates a directory in END_USER_HOME or current directory.
        safe (bool, optional): If True, moves existing files with same name instead of
        overwriting them. Defaults to True.

    Returns:
        str or None: Path to the created directory containing saved files if successful,
        None if no top ligands exist.

    Files Created:
        - docking_report_top_ligands.sdf: Contains the top scoring ligands with their properties
        - {protein_name}.pdb: Protein structure file 
        - bounding_box.pdb: File containing the docking box information

    Notes:
        The saved SDF file includes:
        - Molecule structure
        - Molecule name (if available) 
        - SMILES string (if available)
        - All existing molecular properties
        - All additional properties from docking results
    """

    top_ligands = []
    for result in self.results:
        if result.top_ligand:
            top_ligands.append(result.top_ligand)

    if not top_ligands:
        return None

    if not save_dir:
        save_dir_path = (
            Path(os.getenv("END_USER_HOME", "."))
            / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}"
        )
    else:
        save_dir_path = Path(save_dir) / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}"

    save_dir_path.mkdir(parents=True, exist_ok=True)
    sdf_file_path = save_dir_path / "docking_report_top_ligands.sdf"
    if safe and sdf_file_path.exists():
        move_file_with_extension(str(sdf_file_path), "sdf")
    else:
        remove_file(str(sdf_file_path))

    writer = Chem.SDWriter(str(sdf_file_path))
    writer.SetKekulize(False)

    for ligand in top_ligands:
        mol = ligand.mol.m  # RDKit molecule

        properties = ligand.properties
        existing_properties = ligand.mol.m.GetPropsAsDict()
        if ligand.name:
            mol.SetProp("_Name", ligand.name)
        if ligand.mol.smiles:
            mol.SetProp("_SMILES", ligand.mol.smiles)

        for prop_name, prop_value in existing_properties.items():
            mol.SetProp(prop_name, str(prop_value))

        for prop_name, prop_value in properties.items():
            mol.SetProp(prop_name, str(prop_value))

        writer.write(mol)
    writer.close()

    try:
        self.results[0].protein.write_to_file(str(save_dir_path / f"{self.results[0].protein.name}.pdb"))
    except Exception as e:
        DEFAULT_LOGGER.log_error(f"Failed to write protein to file: {e}")

    save_bounding_box(
        self.pocket_data.box_center, self.pocket_data.box_size, output_file=str(save_dir_path / "bounding_box.pdb")
    )
    return str(save_dir_path)

Save docking results to files in a specified directory.

Args

save_dir : str or Path, optional
Directory path where the results will be saved.
If None, creates a directory in END_USER_HOME or current directory.
safe : bool, optional
If True, moves existing files with same name instead of

overwriting them. Defaults to True.

Returns

str or None
Path to the created directory containing saved files if successful,

None if no top ligands exist. Files Created: - docking_report_top_ligands.sdf: Contains the top scoring ligands with their properties - {protein_name}.pdb: Protein structure file - bounding_box.pdb: File containing the docking box information

Notes

The saved SDF file includes: - Molecule structure - Molecule name (if available) - SMILES string (if available) - All existing molecular properties - All additional properties from docking results

def visualize(*args, **kwargs)
Expand source code
def wrapper(*args, **kwargs):
    html_visualization = func(*args, **kwargs)
    return JupyterViewer.visualize(html_visualization)
class DockingResult (protein: Protein,
smiles: str | None = None,
file_path: str | None = None,
successful: bool | None = True)
Expand source code
class DockingResult:
    """
    A class representing the results of a molecular docking operation.

    This class stores and manages the results of a molecular docking simulation, including
    the protein target, docked ligands, and associated metadata. It provides methods for
    analyzing and visualizing the docking results.

    Attributes:
        protein (Protein): The protein target used in the docking.
        ligands (List[Ligand]): List of docked ligand poses.
        rmsds (Optional[List[float]]): RMSD values compared to crystal structure if calculated.
        top_ligand (Optional[Ligand]): The highest scoring ligand pose.
        smiles (Optional[str]): SMILES string representation of the ligand.
        successful (bool): Whether the docking operation was successful.
        file_path (Optional[str]): Path to the docking results file.

    Methods:
        add_ligand(ligand: Ligand): Add a docked ligand pose to results.
        calculate_rmsds_from_crystal(crystal_ligand: Union[Ligand, str]): Calculate RMSD values against crystal structure.
        _to_sdf(safe=True, sdf_file_path=None): Export docking results to SDF file.
        _to_dataframe(): Convert results to pandas DataFrame.
        visualize(crystal_ligand_path=None, crystal_ligand_format=None): Visualize docking results.
        analyze(index: Optional[int] = None): Generate detailed analysis of docking poses.

    Examples:
        >>> result = DockingResult(protein, smiles="CC(=O)OC1=CC=CC=C1C(=O)O")
        >>> result.add_ligand(docked_pose)
        >>> result.analyze()
    """
    def __init__(
        self,
        protein: Protein,
        smiles: Optional[str] = None,
        file_path: Optional[str] = None,
        successful: Optional[bool] = True,
    ):
        self.protein = protein
        self.ligands: List[Ligand] = []
        self.rmsds: Optional[List[float]] = None
        self.top_ligand: Optional[Ligand] = None
        self.smiles = smiles
        self.successful = successful
        self.file_path = file_path

    def add_ligand(self, ligand: Ligand):
        """
        Add a ligand to the list of ligands.

        Args:
            ligand (Ligand): The ligand object to be added to the list.

        Returns:
            None
        """
        self.ligands.append(ligand)

    def calculate_rmsds_from_crystal(self, crystal_ligand: Ligand | str):
        """
        Calculate RMSD values between this ligand and a crystal structure.

        This method computes Root Mean Square Deviation (RMSD) values between the current ligand
        and a reference crystal structure using the 'obrms' command line tool.

        Args:
            crystal_ligand (Union[Ligand, str]): Either a Ligand object or a file path string
                representing the crystal structure to compare against.

        Returns:
            list[float]: A list of RMSD values computed between the current ligand and the crystal structure.

        Raises:
            SystemError: If RMSD calculation fails for any reason.

        Example:
            >>> ligand = Ligand("path/to/ligand.pdb") 
            >>> rmsds = ligand.calculate_rmsds_from_crystal("path/to/crystal.pdb")
        """
        if isinstance(crystal_ligand, str):
            crystal_ligand = Ligand(file_path=str(crystal_ligand))

        try:
            result = subprocess.run(
                ["obrms", self.file_path, crystal_ligand.file_path], capture_output=True, text=True
            )
            self.rmsds = [float(rmsd.split()[-1]) for rmsd in result.stdout.split("\n") if rmsd]
            return self.rmsds
        except Exception as e:
            raise SystemError(f"Failed to calculate RMSD values: {e}")

    def _to_sdf(self, safe=True, sdf_file_path=None):
        """
        Convert ligands to SDF file format.

        This method writes ligand molecules and their properties to a Structure-Data File (SDF).
        Each ligand's properties are stored as SDF tags in the output file.

        Args:
            safe (bool, optional): If True, backs up existing files instead of overwriting. 
                Defaults to True.
            sdf_file_path (str, optional): Custom path for the output SDF file. 
                If None, generates path based on protein name and SMILES. Defaults to None.

        Returns:
            str or None: Path to the created SDF file, or None if no ligands exist.

        Example:
            result._to_sdf(safe=True, sdf_file_path="output.sdf")
        """
        if not self.ligands:
            return None

        if not sdf_file_path:
            sdf_file_path = os.path.join(
                os.path.dirname(self.file_path or tempfile.gettempdir()),
                f"{self.protein.name}_docking_result_{self.smiles}.sdf",
            )

        if safe and os.path.isfile(sdf_file_path):
            move_file_with_extension(sdf_file_path, "sdf")
        else:
            remove_file(sdf_file_path)

        writer = Chem.SDWriter(sdf_file_path)
        writer.SetKekulize(False)

        for ligand in self.ligands:
            mol = ligand.mol.m  # RDKit molecule from Ligand

            properties = ligand.properties
            for prop_name, prop_value in properties.items():
                mol.SetProp(prop_name, str(prop_value))

            writer.write(mol)
        writer.close()

        return sdf_file_path

    def _to_dataframe(self):
        """
        Converts ligand data to a formatted pandas DataFrame.

        Creates a DataFrame containing pose rankings, scores, binding energies, and file paths
        for each ligand. The data is sorted by Pose Score in descending order.

        Returns:
            pandas.io.formats.style.Styler: A styled DataFrame with the following columns:
                - Ligand Pose Rank ID: Integer ranking of the pose (1-based)
                - Pose Score: Rounded to 3 decimal places
                - Binding Energy: Rounded to 3 decimal places
                - Path To Docked Pose: File path to the docked ligand pose

        Note:
            - Missing Binding Energy or Pose Score values default to 0.0
            - All numeric values are formatted to 3 decimal places in the output
        """
        data = []
        for idx, ligand in enumerate(self.ligands):
            mol_props = ligand.properties

            energy_score = float(mol_props.get("Binding Energy", "0.0"))
            rscore = float(mol_props.get("Pose Score", "0.0"))

            data.append(
                {
                    "Ligand Pose Rank ID": idx + 1,
                    "Pose Score": round(rscore, 3),
                    "Binding Energy": round(energy_score, 3),
                    "Path To Docked Pose": ligand.file_path,
                }
            )
        df = pd.DataFrame(data)
        # Sort by Ranking Score descending
        df = df.sort_values(by="Pose Score", ascending=False).reset_index(drop=True)
        return df.style.format(precision=3)

    def _repr_html_(self):
        df = self._to_dataframe()
        return df._repr_html_()

    def __str__(self):
        return (
            f"DockingResult:\n  Number of Ligands: {len(self.ligands)}\n"
            f"  SMILES: {self.smiles if self.smiles else 'Not provided'}\n"
            f"  File Path: {self.file_path if self.file_path else 'Not provided'}"
        )

    def __repr__(self):
        return self.__str__()

    @jupyter_visualization
    def visualize(self, crystal_ligand_path=None, crystal_ligand_format=None):
        """
        Visualize docking results with an optional crystal ligand overlay.

        Args:
            crystal_ligand_path (str, optional): File path to the crystal ligand structure.
            crystal_ligand_format (str, optional): Format of the crystal ligand file (e.g., 'pdb', 'mol2').

        Returns:
            str: HTML string containing the visualization that can be displayed in a web browser 
             or Jupyter notebook.

        Note:
            The visualization will always use SDF format internally for ligands, regardless
            of input format. The protein structure will maintain its original format.
        """

        visualization_format = "sdf"
        crystal_data = None
        if crystal_ligand_format and crystal_ligand_path:
            crystal_data = {"raw": str(crystal_ligand_path), "format": crystal_ligand_format}

        return DockingViewer().render_with_seperate_crystal(
            protein_data=str(self.protein.file_path),
            protein_format=self.protein.block_type,
            ligands_data=[str(self.file_path)],
            ligand_format=visualization_format,
            crystal_data=crystal_data,
        )

    def analyze(self, index: Optional[int] = None):
        """
        Analyze protein-ligand interactions using PLIPy fingerprinting.

        This method analyzes the interactions between a protein and its ligands using
        the PLIP (Protein-Ligand Interaction Profiler) fingerprinting approach. It can analyze
        either a single ligand pose (specified by index) or all ligand poses.

        Args:
            index (Optional[int]): The index of the specific ligand pose to analyze.
                If None, analyzes all poses. Defaults to None.

        Returns:
            Union[pd.DataFrame, Dict]: 
                - If index is None: Returns a DataFrame containing interaction fingerprints for all poses
                - If index is specified: Returns a dictionary containing the ligand network plot data

        Raises:
            ValueError: If no ligands or protein are found to analyze
            IndexError: If the provided ligand index is out of range

        Notes:
            - Creates temporary files for processing
            - Sets custom Van der Waals radii for Fe, H, and O atoms
            - Uses RDKit for molecular operations
            - Uses PLIPy for fingerprint generation
        """
        if not self.ligands:
            raise ValueError("No ligands found to analyze.")

        protein = deepcopy(self.protein)
        if not protein:
            raise ValueError("No protein found to analyze.")

        fp = plf.Fingerprint()
        with tempfile.TemporaryDirectory() as temp_dir:
            protein_file = os.path.join(temp_dir, f"{protein.name}.pdb")

            protein.write_to_file(protein_file)
            protein.file_path = protein_file

            sdf_file_path = self._to_sdf(sdf_file_path=os.path.join(temp_dir, f"{protein.name}_docking_result.sdf"))

            v = VdWContact()
            v.vdwradii["Fe"] = 2.0
            v.vdwradii["H"] = 1.05
            v.vdwradii["O"] = 1.48

            rdkit_prot = Chem.MolFromPDBFile(protein_file, False, False)
            protein_mol = plf.Molecule(rdkit_prot)
            pose_iterable = plf.sdf_supplier(str(sdf_file_path))
            sdf_supp = Chem.SDMolSupplier(str(sdf_file_path), sanitize=False)
            pose_iterable._suppl = sdf_supp

            if index is not None:
                if index < 0 or index >= len(sdf_supp):
                    raise IndexError("Ligand index out of range.")

                single_ligand_iterable = pose_iterable[index]
                fp.run_from_iterable([single_ligand_iterable], protein_mol)

                result = fp.plot_lignetwork(single_ligand_iterable)
            else:
                fp.run_from_iterable(pose_iterable, protein_mol)
                fp.plot_barcode(xlabel="Pose")

                result = fp.to_dataframe(index_col="Pose")

        return result

A class representing the results of a molecular docking operation.

This class stores and manages the results of a molecular docking simulation, including the protein target, docked ligands, and associated metadata. It provides methods for analyzing and visualizing the docking results.

Attributes

protein : Protein
The protein target used in the docking.
ligands : List[Ligand]
List of docked ligand poses.
rmsds : Optional[List[float]]
RMSD values compared to crystal structure if calculated.
top_ligand : Optional[Ligand]
The highest scoring ligand pose.
smiles : Optional[str]
SMILES string representation of the ligand.
successful : bool
Whether the docking operation was successful.
file_path : Optional[str]
Path to the docking results file.

Methods

add_ligand(ligand: Ligand): Add a docked ligand pose to results. calculate_rmsds_from_crystal(crystal_ligand: Union[Ligand, str]): Calculate RMSD values against crystal structure. _to_sdf(safe=True, sdf_file_path=None): Export docking results to SDF file. _to_dataframe(): Convert results to pandas DataFrame. visualize(crystal_ligand_path=None, crystal_ligand_format=None): Visualize docking results. analyze(index: Optional[int] = None): Generate detailed analysis of docking poses.

Examples

>>> result = DockingResult(protein, smiles="CC(=O)OC1=CC=CC=C1C(=O)O")
>>> result.add_ligand(docked_pose)
>>> result.analyze()

Methods

def add_ligand(self,
ligand: Ligand)
Expand source code
def add_ligand(self, ligand: Ligand):
    """
    Add a ligand to the list of ligands.

    Args:
        ligand (Ligand): The ligand object to be added to the list.

    Returns:
        None
    """
    self.ligands.append(ligand)

Add a ligand to the list of ligands.

Args

ligand : Ligand
The ligand object to be added to the list.

Returns

None

def analyze(self, index: int | None = None)
Expand source code
def analyze(self, index: Optional[int] = None):
    """
    Analyze protein-ligand interactions using PLIPy fingerprinting.

    This method analyzes the interactions between a protein and its ligands using
    the PLIP (Protein-Ligand Interaction Profiler) fingerprinting approach. It can analyze
    either a single ligand pose (specified by index) or all ligand poses.

    Args:
        index (Optional[int]): The index of the specific ligand pose to analyze.
            If None, analyzes all poses. Defaults to None.

    Returns:
        Union[pd.DataFrame, Dict]: 
            - If index is None: Returns a DataFrame containing interaction fingerprints for all poses
            - If index is specified: Returns a dictionary containing the ligand network plot data

    Raises:
        ValueError: If no ligands or protein are found to analyze
        IndexError: If the provided ligand index is out of range

    Notes:
        - Creates temporary files for processing
        - Sets custom Van der Waals radii for Fe, H, and O atoms
        - Uses RDKit for molecular operations
        - Uses PLIPy for fingerprint generation
    """
    if not self.ligands:
        raise ValueError("No ligands found to analyze.")

    protein = deepcopy(self.protein)
    if not protein:
        raise ValueError("No protein found to analyze.")

    fp = plf.Fingerprint()
    with tempfile.TemporaryDirectory() as temp_dir:
        protein_file = os.path.join(temp_dir, f"{protein.name}.pdb")

        protein.write_to_file(protein_file)
        protein.file_path = protein_file

        sdf_file_path = self._to_sdf(sdf_file_path=os.path.join(temp_dir, f"{protein.name}_docking_result.sdf"))

        v = VdWContact()
        v.vdwradii["Fe"] = 2.0
        v.vdwradii["H"] = 1.05
        v.vdwradii["O"] = 1.48

        rdkit_prot = Chem.MolFromPDBFile(protein_file, False, False)
        protein_mol = plf.Molecule(rdkit_prot)
        pose_iterable = plf.sdf_supplier(str(sdf_file_path))
        sdf_supp = Chem.SDMolSupplier(str(sdf_file_path), sanitize=False)
        pose_iterable._suppl = sdf_supp

        if index is not None:
            if index < 0 or index >= len(sdf_supp):
                raise IndexError("Ligand index out of range.")

            single_ligand_iterable = pose_iterable[index]
            fp.run_from_iterable([single_ligand_iterable], protein_mol)

            result = fp.plot_lignetwork(single_ligand_iterable)
        else:
            fp.run_from_iterable(pose_iterable, protein_mol)
            fp.plot_barcode(xlabel="Pose")

            result = fp.to_dataframe(index_col="Pose")

    return result

Analyze protein-ligand interactions using PLIPy fingerprinting.

This method analyzes the interactions between a protein and its ligands using the PLIP (Protein-Ligand Interaction Profiler) fingerprinting approach. It can analyze either a single ligand pose (specified by index) or all ligand poses.

Args

index : Optional[int]
The index of the specific ligand pose to analyze. If None, analyzes all poses. Defaults to None.

Returns

Union[pd.DataFrame, Dict]
  • If index is None: Returns a DataFrame containing interaction fingerprints for all poses
  • If index is specified: Returns a dictionary containing the ligand network plot data

Raises

ValueError
If no ligands or protein are found to analyze
IndexError
If the provided ligand index is out of range

Notes

  • Creates temporary files for processing
  • Sets custom Van der Waals radii for Fe, H, and O atoms
  • Uses RDKit for molecular operations
  • Uses PLIPy for fingerprint generation
def calculate_rmsds_from_crystal(self,
crystal_ligand: Ligand | str)
Expand source code
def calculate_rmsds_from_crystal(self, crystal_ligand: Ligand | str):
    """
    Calculate RMSD values between this ligand and a crystal structure.

    This method computes Root Mean Square Deviation (RMSD) values between the current ligand
    and a reference crystal structure using the 'obrms' command line tool.

    Args:
        crystal_ligand (Union[Ligand, str]): Either a Ligand object or a file path string
            representing the crystal structure to compare against.

    Returns:
        list[float]: A list of RMSD values computed between the current ligand and the crystal structure.

    Raises:
        SystemError: If RMSD calculation fails for any reason.

    Example:
        >>> ligand = Ligand("path/to/ligand.pdb") 
        >>> rmsds = ligand.calculate_rmsds_from_crystal("path/to/crystal.pdb")
    """
    if isinstance(crystal_ligand, str):
        crystal_ligand = Ligand(file_path=str(crystal_ligand))

    try:
        result = subprocess.run(
            ["obrms", self.file_path, crystal_ligand.file_path], capture_output=True, text=True
        )
        self.rmsds = [float(rmsd.split()[-1]) for rmsd in result.stdout.split("\n") if rmsd]
        return self.rmsds
    except Exception as e:
        raise SystemError(f"Failed to calculate RMSD values: {e}")

Calculate RMSD values between this ligand and a crystal structure.

This method computes Root Mean Square Deviation (RMSD) values between the current ligand and a reference crystal structure using the 'obrms' command line tool.

Args

crystal_ligand : Union[Ligand, str]
Either a Ligand object or a file path string representing the crystal structure to compare against.

Returns

list[float]
A list of RMSD values computed between the current ligand and the crystal structure.

Raises

SystemError
If RMSD calculation fails for any reason.

Example

>>> ligand = Ligand("path/to/ligand.pdb") 
>>> rmsds = ligand.calculate_rmsds_from_crystal("path/to/crystal.pdb")
def visualize(*args, **kwargs)
Expand source code
def wrapper(*args, **kwargs):
    html_visualization = func(*args, **kwargs)
    return JupyterViewer.visualize(html_visualization)
class MolPropsReport (results)
Expand source code
class MolPropsReport:
    """
    A class for handling and displaying molecular property calculation results.

    This class processes molecular property calculation results and can convert them
    into a formatted pandas DataFrame for display, particularly useful in Jupyter notebooks.

    Args:
        results (list): A list of dictionaries containing molecular property calculation results.
                       Each dictionary must contain a 'smiles' key and additional property keys.

    Attributes:
        results (list): The stored results from molecular property calculations.

    Methods:
        _to_dataframe(): Converts the results into a formatted pandas DataFrame.
        _repr_html_(): Returns HTML representation of the data for display in Jupyter notebooks.

    Examples:
        >>> results = [{"smiles": "CC", "property1": 0.5, "property2": 1.0}]
        >>> report = MolPropsReport(results)
        >>> df = report._to_dataframe()
    """

    def __init__(self, results):
        self.results = results

    def _to_dataframe(self):
        """
        Converts the results data into a pandas DataFrame.

        Extracts SMILES strings and other result properties into separate lists,
        then combines them into a DataFrame with formatted numerical precision.

        Returns:
            pandas.DataFrame: A styled DataFrame containing SMILES strings and associated data
            with numerical values formatted to 3 decimal places.
        """
        smiles = []

        data = {k: [] for k in self.results[0].keys() if k != "smiles"}

        for result in self.results:
            smiles.append(result["smiles"])

            for k in data:
                data[k].append(result.get(k))

        data = {
            "SMILES": smiles,
            **data,
        }

        df = pd.DataFrame(data).style.set_properties().format(precision=3)
        return df

    def _repr_html_(self):
        df = self._to_dataframe()
        return df._repr_html_()

A class for handling and displaying molecular property calculation results.

This class processes molecular property calculation results and can convert them into a formatted pandas DataFrame for display, particularly useful in Jupyter notebooks.

Args

results : list
A list of dictionaries containing molecular property calculation results. Each dictionary must contain a 'smiles' key and additional property keys.

Attributes

results : list
The stored results from molecular property calculations.

Methods

to_dataframe(): Converts the results into a formatted pandas DataFrame. _repr_html(): Returns HTML representation of the data for display in Jupyter notebooks.

Examples

>>> results = [{"smiles": "CC", "property1": 0.5, "property2": 1.0}]
>>> report = MolPropsReport(results)
>>> df = report._to_dataframe()
class PainsReport (results)
Expand source code
class PainsReport:
    """
    A class for generating and displaying PAINS (Pan-Assay Interference Compounds) analysis reports.

    Args:
        results (list): A list of dictionaries containing PAINS analysis results. Each dictionary 
            should contain:
            - smiles (str): SMILES string representation of the molecule
            - PAINS (list or None): List of PAINS pattern SMARTS strings that match the molecule
                                   or None if no matches found

    Methods:
        get_html_of_molecule(result): Generates HTML img tag with highlighted PAINS matches
        _to_dataframe(): Converts results to pandas DataFrame
        _repr_html_(): Returns HTML representation for Jupyter display

    Examples:
        >>> results = [{'smiles': 'CC(=O)Oc1ccccc1C(=O)O', 'PAINS': ['[O,S]-[CH2]-[CH2]-[O,S]']}]
        >>> report = PainsReport(results)
        >>> print(report)
        DockingReport:
          Number of DockingResults: 1

    Notes:
        Requires RDKit for molecular operations and visualization.
        Implements Jupyter notebook display protocol via _repr_html_.
    """

    def __init__(self, results):
        self.results = results

    def get_html_of_molecule(self, result):
        molecule = Chem.MolFromSmiles(result["smiles"])
        all_matches = []
        if result["PAINS"] is not None:
            for smarts in result["PAINS"]:
                atom_matches = molecule.GetSubstructMatches(Chem.MolFromSmarts(smarts))
                all_matches.extend(atom_matches[0])

        Draw.DrawingOptions.atomHighlightsAreCircles = True
        Draw.DrawingOptions.atomHighlightColors = {i: (1, 0, 0) for i in set(all_matches)}

        img = Draw.MolToImage(molecule, size=(200, 100), highlightAtoms=all_matches)

        buffer = BytesIO()
        img.save(buffer, format="PNG")
        img_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
        html = '<img src="data:image/png;base64,{0}">'.format(img_str)
        return html

    def _to_dataframe(self):
        """
        Convert the results into a pandas DataFrame.

        This method processes the stored results and converts them into a structured DataFrame
        containing SMILES strings, HTML representations of molecules, and PAINS pattern information.

        Returns:
            pd.DataFrame: A DataFrame with the following columns:
                - SMILES: List of SMILES strings for each molecule
                - Molecule Image: HTML representations of the molecules
                - SMARTS patterns of PAINS: List of PAINS patterns found in each molecule
        """
        all_smiles_list = []
        all_molecule_html_list = []
        PAINS_pattern_list = []

        for _, result in enumerate(self.results):

            all_smiles_list.append(result["smiles"])
            all_molecule_html_list.append(self.get_html_of_molecule(result))
            PAINS_pattern_list.append(result["PAINS"])

        return pd.DataFrame.from_dict(
            {
                "SMILES": all_smiles_list,
                "Molecule Image": all_molecule_html_list,
                "SMARTS patterns of PAINS": PAINS_pattern_list,
            }
        )

    def _repr_html_(self):
        df = self._to_dataframe()
        return df.to_html(escape=False)

    def __str__(self):
        return f"DockingReport:\n  Number of DockingResults: {len(self.results)}"

    def __repr__(self):
        return self.__str__()

A class for generating and displaying PAINS (Pan-Assay Interference Compounds) analysis reports.

Args

results : list
A list of dictionaries containing PAINS analysis results. Each dictionary should contain: - smiles (str): SMILES string representation of the molecule - PAINS (list or None): List of PAINS pattern SMARTS strings that match the molecule or None if no matches found

Methods

get_html_of_molecule(result): Generates HTML img tag with highlighted PAINS matches to_dataframe(): Converts results to pandas DataFrame _repr_html(): Returns HTML representation for Jupyter display

Examples

>>> results = [{'smiles': 'CC(=O)Oc1ccccc1C(=O)O', 'PAINS': ['[O,S]-[CH2]-[CH2]-[O,S]']}]
>>> report = PainsReport(results)
>>> print(report)
DockingReport:
  Number of DockingResults: 1

Notes

Requires RDKit for molecular operations and visualization. Implements Jupyter notebook display protocol via repr_html.

Methods

def get_html_of_molecule(self, result)
Expand source code
def get_html_of_molecule(self, result):
    molecule = Chem.MolFromSmiles(result["smiles"])
    all_matches = []
    if result["PAINS"] is not None:
        for smarts in result["PAINS"]:
            atom_matches = molecule.GetSubstructMatches(Chem.MolFromSmarts(smarts))
            all_matches.extend(atom_matches[0])

    Draw.DrawingOptions.atomHighlightsAreCircles = True
    Draw.DrawingOptions.atomHighlightColors = {i: (1, 0, 0) for i in set(all_matches)}

    img = Draw.MolToImage(molecule, size=(200, 100), highlightAtoms=all_matches)

    buffer = BytesIO()
    img.save(buffer, format="PNG")
    img_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
    html = '<img src="data:image/png;base64,{0}">'.format(img_str)
    return html
class PocketFinderReport (protein, csv_file_path='')
Expand source code
class PocketFinderReport:
    """
    PocketFinderReport class for managing protein pocket analysis results.

    A class to handle collection and reporting of protein pocket properties including drugability scores,
    volumes, surface areas and other physicochemical properties.

    Attributes:
        protein: The protein object associated with this report
        file_path (str): Path to save the CSV report file
        pockets (list): List of pocket objects containing analysis results

    Methods:
        add_pocket(pocket): Add a pocket object to the report
        _to_dataframe(): Convert pocket data to pandas DataFrame
        _repr_html_(): Generate HTML representation of the report
        save_props(): Save pocket properties to CSV file

    Example:
        report = PocketFinderReport(protein_obj, "output.csv")
        report.add_pocket(pocket_obj)
        report.save_props()
    """
    def __init__(self, protein, csv_file_path=""):
        self.protein = protein
        self.file_path = csv_file_path
        self.pockets = []

    def add_pocket(self, pocket):
        """
        Add a pocket to the collection of pockets.

        Args:
            pocket: A pocket object to be added to the pockets list.
        """
        self.pockets.append(pocket)

    def _to_dataframe(self):
        data = []
        for idx, pocket in enumerate(self.pockets):
            props = pocket.props
            if props:
                data.append(
                    {
                        "Pocket ID": idx + 1,
                        "Color": pocket.color,
                        "Drugability Score": props.get("drugability_score", 0),
                        "Volume": props.get("volume", 0),
                        "Total SASA": props.get("total_SASA", 0),
                        "Polar SASA": props.get("polar_SASA", 0),
                        "Polar/Apolar SASA Ratio": props.get("polar_apolar_SASA_ratio", 0),
                        "Hydrophobicity": props.get("hydrophobicity", 0),
                        "Polarity": props.get("polarity", 0),
                    }
                )

        df = pd.DataFrame(data)
        # Sort by Ranking Score descending
        df = df.sort_values(by="Drugability Score", ascending=False).reset_index(drop=True)
        return df

    def _repr_html_(self):
        df = self._to_dataframe()
        return df.style.format(precision=3)._repr_html_()

    def save_props(self):
        """
        Saves the properties of the report to a CSV file.

        This method converts the internal data structure to a pandas DataFrame and saves it
        to the file path specified in self.file_path attribute. The DataFrame is saved
        without the index column.

        Returns:
            None
        """
        df = self._to_dataframe()
        df.to_csv(self.file_path, index=False)

PocketFinderReport class for managing protein pocket analysis results.

A class to handle collection and reporting of protein pocket properties including drugability scores, volumes, surface areas and other physicochemical properties.

Attributes

protein
The protein object associated with this report
file_path : str
Path to save the CSV report file
pockets : list
List of pocket objects containing analysis results

Methods

add_pocket(pocket): Add a pocket object to the report to_dataframe(): Convert pocket data to pandas DataFrame _repr_html(): Generate HTML representation of the report save_props(): Save pocket properties to CSV file

Example

report = PocketFinderReport(protein_obj, "output.csv") report.add_pocket(pocket_obj) report.save_props()

Methods

def add_pocket(self, pocket)
Expand source code
def add_pocket(self, pocket):
    """
    Add a pocket to the collection of pockets.

    Args:
        pocket: A pocket object to be added to the pockets list.
    """
    self.pockets.append(pocket)

Add a pocket to the collection of pockets.

Args

pocket
A pocket object to be added to the pockets list.
def save_props(self)
Expand source code
def save_props(self):
    """
    Saves the properties of the report to a CSV file.

    This method converts the internal data structure to a pandas DataFrame and saves it
    to the file path specified in self.file_path attribute. The DataFrame is saved
    without the index column.

    Returns:
        None
    """
    df = self._to_dataframe()
    df.to_csv(self.file_path, index=False)

Saves the properties of the report to a CSV file.

This method converts the internal data structure to a pandas DataFrame and saves it to the file path specified in self.file_path attribute. The DataFrame is saved without the index column.

Returns

None

class ProtonationReport (results)
Expand source code
class ProtonationReport:
    def __init__(self, results):
        self.results, self.html_metadata = self.split_results(results)

    def split_results(self, results):
        """
        Splits results by separating HTML metadata from the main data.

        This method processes a list of result items, removing HTML metadata from the
        protonation data while preserving it in a separate dictionary mapped by SMILES.

        Args:
            results (list): List of dictionaries containing molecular data with 'smiles' 
                and 'protonation' keys. The 'protonation' may contain 'html_metadata'.

        Returns:
            tuple: A 2-tuple containing:
                - list: Copy of input results with HTML metadata removed from protonation data
                - dict: Mapping of SMILES strings to their corresponding HTML metadata
        """
        list_without_html = []
        smiles_to_html_dict = {}

        for item in results:
            new_item = copy.deepcopy(item)
            if "html_metadata" in new_item["protonation"]:
                del new_item["protonation"]["html_metadata"]
            list_without_html.append(new_item)

            smiles = item["smiles"]
            html_meta = item["protonation"].get("html_metadata")
            smiles_to_html_dict[smiles] = html_meta

        return list_without_html, smiles_to_html_dict

    def show_plots(self):
        for smi, html_meta in self.html_metadata.items():
            centered_html = f"<center><h2>{smi}</h2>"
            display(HTML(centered_html))
            self.plot_concentration_curves(html_meta, plot=True)

    def smiles_to_img_html(self, smiles):
        mol = Chem.MolFromSmiles(smiles)
        AllChem.Compute2DCoords(mol)
        mol.GetConformer().SetId(1)

        return Draw.MolToSVG(mol, width=200, height=150).replace("\n", "")

    def plot_concentration_curves(self, html_meta, plot=False):
        """
        Plot concentration curves for different molecular species across a pH range.

        Args:
            html_meta (tuple): A tuple containing:
            - fractions (list): Matrix of fraction values for each species
            - smiles_list (list): List of SMILES notations for each species  
            - concentration_values (list): Concentration values for each species
            - pH_range (list): Range of pH values for x-axis
            - pH (float): Current pH value
            plot (bool, optional): If True, displays the plot immediately. Defaults to False.

        Returns:
            plotly.graph_objects.Figure: A plotly figure object containing the concentration curves.
            The plot shows fraction percentage (0-100%) on y-axis vs pH (0-14) on x-axis.
            Each curve is labeled with species SMILES and concentration at current pH.
        """
        fractions, smiles_list, concentration_values, pH_range, pH = html_meta
        fractions = np.transpose(np.array(fractions))

        fig = sp.make_subplots(rows=1, cols=1)
        for i, fraction in enumerate(fractions):
            fig.add_trace(
                go.Scatter(
                    x=pH_range,
                    y=fraction * 100,
                    mode="lines",
                    showlegend=True,
                    name=f"Fraction of {smiles_list[i]} at pH={pH:.1f} is {round(concentration_values[i], 2)}(%)",
                ),
                row=1,
                col=1,
            )
        fig.update_layout(
            xaxis=dict(title="pH", range=[0, 14]),
            yaxis=dict(title="Fraction (%)", range=[0, 100]),
            hovermode="closest",
        )
        if plot:
            fig.show()

    def _to_dataframe(self):
        """
        Converts the protonation results to a pandas DataFrame.

        This method processes the protonation results stored in self.results and creates a DataFrame 
        with the following structure:
        - Multi-index with SMILES as the top level
        - Columns:
            - 'protonated SMILES': The SMILES strings of protonated species
            - 'Concentration %': Rounded concentration percentages (to 2 decimal places)
            - 'Molecule Image': HTML representation of molecular structures

        Returns:
            pd.DataFrame: A DataFrame containing the protonation results with molecular structure 
            visualizations. The DataFrame has a multi-index structure where the top level 
            is the original SMILES string.
        """
        data_dict = {}

        for result in self.results:
            smiles = result["smiles"]
            smiles_list = result["protonation"]["smiles_list"]
            concentration_list = result["protonation"]["concentration_list"]
            rounded_concentration = [round(value, 2) for value in concentration_list]
            data_dict[smiles] = pd.DataFrame(
                {"protonated SMILES": smiles_list, "Concentration %": rounded_concentration}
            )

        df = pd.concat(data_dict.values(), keys=data_dict.keys(), names=["SMILES"])
        df["Molecule Image"] = df["protonated SMILES"].apply(self.smiles_to_img_html)
        return df

    def _repr_html_(self):
        df = self._to_dataframe()
        return df.to_html(escape=False)

Methods

def plot_concentration_curves(self, html_meta, plot=False)
Expand source code
def plot_concentration_curves(self, html_meta, plot=False):
    """
    Plot concentration curves for different molecular species across a pH range.

    Args:
        html_meta (tuple): A tuple containing:
        - fractions (list): Matrix of fraction values for each species
        - smiles_list (list): List of SMILES notations for each species  
        - concentration_values (list): Concentration values for each species
        - pH_range (list): Range of pH values for x-axis
        - pH (float): Current pH value
        plot (bool, optional): If True, displays the plot immediately. Defaults to False.

    Returns:
        plotly.graph_objects.Figure: A plotly figure object containing the concentration curves.
        The plot shows fraction percentage (0-100%) on y-axis vs pH (0-14) on x-axis.
        Each curve is labeled with species SMILES and concentration at current pH.
    """
    fractions, smiles_list, concentration_values, pH_range, pH = html_meta
    fractions = np.transpose(np.array(fractions))

    fig = sp.make_subplots(rows=1, cols=1)
    for i, fraction in enumerate(fractions):
        fig.add_trace(
            go.Scatter(
                x=pH_range,
                y=fraction * 100,
                mode="lines",
                showlegend=True,
                name=f"Fraction of {smiles_list[i]} at pH={pH:.1f} is {round(concentration_values[i], 2)}(%)",
            ),
            row=1,
            col=1,
        )
    fig.update_layout(
        xaxis=dict(title="pH", range=[0, 14]),
        yaxis=dict(title="Fraction (%)", range=[0, 100]),
        hovermode="closest",
    )
    if plot:
        fig.show()

Plot concentration curves for different molecular species across a pH range.

Args

html_meta : tuple
A tuple containing:
  • fractions (list): Matrix of fraction values for each species
  • smiles_list (list): List of SMILES notations for each species
  • concentration_values (list): Concentration values for each species
  • pH_range (list): Range of pH values for x-axis
  • pH (float): Current pH value
    plot : bool, optional
    If True, displays the plot immediately. Defaults to False.

Returns

plotly.graph_objects.Figure
A plotly figure object containing the concentration curves.

The plot shows fraction percentage (0-100%) on y-axis vs pH (0-14) on x-axis. Each curve is labeled with species SMILES and concentration at current pH.

def show_plots(self)
Expand source code
def show_plots(self):
    for smi, html_meta in self.html_metadata.items():
        centered_html = f"<center><h2>{smi}</h2>"
        display(HTML(centered_html))
        self.plot_concentration_curves(html_meta, plot=True)
def smiles_to_img_html(self, smiles)
Expand source code
def smiles_to_img_html(self, smiles):
    mol = Chem.MolFromSmiles(smiles)
    AllChem.Compute2DCoords(mol)
    mol.GetConformer().SetId(1)

    return Draw.MolToSVG(mol, width=200, height=150).replace("\n", "")
def split_results(self, results)
Expand source code
def split_results(self, results):
    """
    Splits results by separating HTML metadata from the main data.

    This method processes a list of result items, removing HTML metadata from the
    protonation data while preserving it in a separate dictionary mapped by SMILES.

    Args:
        results (list): List of dictionaries containing molecular data with 'smiles' 
            and 'protonation' keys. The 'protonation' may contain 'html_metadata'.

    Returns:
        tuple: A 2-tuple containing:
            - list: Copy of input results with HTML metadata removed from protonation data
            - dict: Mapping of SMILES strings to their corresponding HTML metadata
    """
    list_without_html = []
    smiles_to_html_dict = {}

    for item in results:
        new_item = copy.deepcopy(item)
        if "html_metadata" in new_item["protonation"]:
            del new_item["protonation"]["html_metadata"]
        list_without_html.append(new_item)

        smiles = item["smiles"]
        html_meta = item["protonation"].get("html_metadata")
        smiles_to_html_dict[smiles] = html_meta

    return list_without_html, smiles_to_html_dict

Splits results by separating HTML metadata from the main data.

This method processes a list of result items, removing HTML metadata from the protonation data while preserving it in a separate dictionary mapped by SMILES.

Args

results : list
List of dictionaries containing molecular data with 'smiles' and 'protonation' keys. The 'protonation' may contain 'html_metadata'.

Returns

tuple
A 2-tuple containing: - list: Copy of input results with HTML metadata removed from protonation data - dict: Mapping of SMILES strings to their corresponding HTML metadata