Module `deeporigin.src.structures.reports`

Classes

class DockingReport (results: List[DockingResult], pocket_data)

Expand source code

class DockingReport:
    """
    A class to handle and report docking results.

    This class provides functionality to manage docking results, generate reports,
    save results to files, and visualize protein-ligand complexes.

    Attributes:
        results (List[DockingResult]): A list of docking results.
        pocket_data: Data about the binding pocket used in docking.

    Methods:
        _to_dataframe(include_props=None): Converts docking results to a pandas DataFrame.
        _repr_html_(): Returns HTML representation of the docking report.
        generate_custom_report(include_props=False): Generates a custom HTML report with specified properties.
        save(save_dir=None, safe=True): Saves docking results to SDF file with properties.
        visualize(protein_path=None, protein_format=None, sdf_file_path=None, 
                 crystal_ligand_path=None, crystal_ligand_format=None): 
            Visualizes the docking results in 3D.

    Examples:
        >>> report = DockingReport(results, pocket_data)
        >>> report.save()  # Saves results to an SDF file
        >>> report.visualize()  # Shows 3D visualization of results
    """
    def __init__(self, results: List[DockingResult], pocket_data):
        self.results = results
        self.pocket_data = pocket_data

    def _to_dataframe(self, include_props=None):
        """
        Converts docking results to a pandas DataFrame.

        Args:
            include_props (list, optional): Additional molecular properties to include in the DataFrame. 
            If provided, these properties will be extracted from the ligand properties and 
            added as columns.

        Returns:
            pd.DataFrame: A DataFrame containing the following columns by default:
            - Image: 2D molecular structure visualization
            - SMILES: SMILES string representation of the molecule 
            - Ranking Score: Docking ranking score (rounded to 3 decimal places)
            - Binding Energy: Binding energy value (rounded to 3 decimal places)
            - Path To Docked Pose: File path to the docked ligand pose
            Additional columns will be added if include_props is specified.
            The DataFrame is sorted by Ranking Score in descending order.
        """

        data = []
        for result in self.results:
            property_dict = {
                "Image": None,
                "SMILES": result.smiles,
                "Ranking Score": None,
                "Binding Energy": None,
                "Path To Docked Pose": None,
            }

            if result.top_ligand and result.successful:
                ligand = result.top_ligand

                mol_props = ligand.properties

                energy_score = float(mol_props.get("Binding Energy", "0.0"))
                ranking_score = float(mol_props.get("Ranking Score", "0.0"))
                property_dict["Image"] = ligand.mol._draw()
                property_dict["SMILES"] = result.smiles
                property_dict["Ranking Score"] = round(ranking_score, 3)
                property_dict["Binding Energy"] = round(energy_score, 3)
                property_dict["Path To Docked Pose"] = ligand.file_path

                if include_props:
                    for prop in mol_props:
                        if "smiles" not in prop:
                            p = mol_props.get(prop, None)
                            property_dict[prop] = p
            data.append(property_dict)

        df = pd.DataFrame(data)
        df = df.sort_values(by="Ranking Score", ascending=False).reset_index(drop=True)
        return df

    def _repr_html_(self):
        df = self._to_dataframe().style.format(precision=3)
        return df._repr_html_()

    def generate_custom_report(self, include_props=False):
        """
        Generate a custom HTML report from the data.

        This method converts the internal data to a styled pandas DataFrame and returns it as HTML.
        The resulting DataFrame is formatted with 3 decimal places precision.

        Args:
            include_props (bool, optional): Whether to include properties in the report. Defaults to False.

        Returns:
            HTML: A styled HTML representation of the data with 3 decimal places precision.

        Example:
            >>> report = obj.generate_custom_report(include_props=True)
            >>> display(report)  # In Jupyter notebook
        """
        df = self._to_dataframe(include_props).style.format(precision=3)
        return HTML(df._repr_html_())

    def __str__(self):
        return f"DockingReport:\n  Number of DockingResults: {len(self.results)}"

    def __repr__(self):
        return self.__str__()

    def save(self, save_dir=None, safe=True):
        """
        Save docking results to files in a specified directory.

        Args:
            save_dir (str or Path, optional): Directory path where the results will be saved.
            If None, creates a directory in END_USER_HOME or current directory.
            safe (bool, optional): If True, moves existing files with same name instead of
            overwriting them. Defaults to True.

        Returns:
            str or None: Path to the created directory containing saved files if successful,
            None if no top ligands exist.

        Files Created:
            - docking_report_top_ligands.sdf: Contains the top scoring ligands with their properties
            - {protein_name}.pdb: Protein structure file 
            - bounding_box.pdb: File containing the docking box information

        Notes:
            The saved SDF file includes:
            - Molecule structure
            - Molecule name (if available) 
            - SMILES string (if available)
            - All existing molecular properties
            - All additional properties from docking results
        """

        top_ligands = []
        for result in self.results:
            if result.top_ligand:
                top_ligands.append(result.top_ligand)

        if not top_ligands:
            return None

        if not save_dir:
            save_dir_path = (
                Path(os.getenv("END_USER_HOME", "."))
                / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}"
            )
        else:
            save_dir_path = Path(save_dir) / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}"

        save_dir_path.mkdir(parents=True, exist_ok=True)
        sdf_file_path = save_dir_path / "docking_report_top_ligands.sdf"
        if safe and sdf_file_path.exists():
            move_file_with_extension(str(sdf_file_path), "sdf")
        else:
            remove_file(str(sdf_file_path))

        writer = Chem.SDWriter(str(sdf_file_path))
        writer.SetKekulize(False)

        for ligand in top_ligands:
            mol = ligand.mol.m  # RDKit molecule

            properties = ligand.properties
            existing_properties = ligand.mol.m.GetPropsAsDict()
            if ligand.name:
                mol.SetProp("_Name", ligand.name)
            if ligand.mol.smiles:
                mol.SetProp("_SMILES", ligand.mol.smiles)

            for prop_name, prop_value in existing_properties.items():
                mol.SetProp(prop_name, str(prop_value))

            for prop_name, prop_value in properties.items():
                mol.SetProp(prop_name, str(prop_value))

            writer.write(mol)
        writer.close()

        try:
            self.results[0].protein.write_to_file(str(save_dir_path / f"{self.results[0].protein.name}.pdb"))
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to write protein to file: {e}")

        save_bounding_box(
            self.pocket_data.box_center, self.pocket_data.box_size, output_file=str(save_dir_path / "bounding_box.pdb")
        )
        return str(save_dir_path)

    @jupyter_visualization
    def visualize(
        self,
        protein_path=None,
        protein_format=None,
        sdf_file_path=None,
        crystal_ligand_path=None,
        crystal_ligand_format=None,
    ):
        """
        Visualizes the docking report by rendering the merged structures of
        protein and ligands.

        Args:
            protein_path (str, optional): Path to the protein file.
            protein_format (str, optional): Format of the protein file (e.g., pdb).
            sdf_file_path (str, optional): Path to the ligand file in SDF format.

        Raises:
            ValueError: If `protein_path` is provided without `protein_format`.

        Returns:
            Jupyter visualization object: Rendered 3D structure of the protein-ligand complex.
        """
        if sdf_file_path is None:
            file_dir = Path(self.save(save_dir="/tmp"))
            sdf_file_path = str(file_dir / "docking_report_top_ligands.sdf")

        if protein_path is not None and protein_format is None:
            raise ValueError("Please provide the protein format along with the protein path.")

        if protein_path is None:
            if not self.results:
                raise ValueError("No results found to extract protein information from.")
            protein_path = str(self.results[0].protein.file_path)
            protein_format = self.results[0].protein.block_type

        viewer = DockingViewer()

        crystal_data = None
        if crystal_ligand_path and crystal_ligand_format:
            crystal_data = {"raw": str(crystal_ligand_path), "format": crystal_ligand_format}

        html_content = viewer.render_with_seperate_crystal(
            protein_data=protein_path,
            protein_format=protein_format,
            ligands_data=[sdf_file_path],
            ligand_format="sdf",
            crystal_data=crystal_data,
        )

        return html_content

A class to handle and report docking results.

This class provides functionality to manage docking results, generate reports, save results to files, and visualize protein-ligand complexes.

Attributes

results : List[DockingResult]: A list of docking results.
pocket_data: Data about the binding pocket used in docking.

Methods

to_dataframe(include_props=None): Converts docking results to a pandas DataFrame. _repr_html(): Returns HTML representation of the docking report. generate_custom_report(include_props=False): Generates a custom HTML report with specified properties. save(save_dir=None, safe=True): Saves docking results to SDF file with properties. visualize(protein_path=None, protein_format=None, sdf_file_path=None, crystal_ligand_path=None, crystal_ligand_format=None): Visualizes the docking results in 3D.

Examples

>>> report = DockingReport(results, pocket_data)
>>> report.save()  # Saves results to an SDF file
>>> report.visualize()  # Shows 3D visualization of results

Methods

def generate_custom_report(self, include_props=False)

Expand source code

def generate_custom_report(self, include_props=False):
    """
    Generate a custom HTML report from the data.

    This method converts the internal data to a styled pandas DataFrame and returns it as HTML.
    The resulting DataFrame is formatted with 3 decimal places precision.

    Args:
        include_props (bool, optional): Whether to include properties in the report. Defaults to False.

    Returns:
        HTML: A styled HTML representation of the data with 3 decimal places precision.

    Example:
        >>> report = obj.generate_custom_report(include_props=True)
        >>> display(report)  # In Jupyter notebook
    """
    df = self._to_dataframe(include_props).style.format(precision=3)
    return HTML(df._repr_html_())

Generate a custom HTML report from the data.

This method converts the internal data to a styled pandas DataFrame and returns it as HTML. The resulting DataFrame is formatted with 3 decimal places precision.

Args

include_props : bool, optional: Whether to include properties in the report. Defaults to False.

Returns

HTML: A styled HTML representation of the data with 3 decimal places precision.

Example

>>> report = obj.generate_custom_report(include_props=True)
>>> display(report)  # In Jupyter notebook

def save(self, save_dir=None, safe=True)

Expand source code

def save(self, save_dir=None, safe=True):
    """
    Save docking results to files in a specified directory.

    Args:
        save_dir (str or Path, optional): Directory path where the results will be saved.
        If None, creates a directory in END_USER_HOME or current directory.
        safe (bool, optional): If True, moves existing files with same name instead of
        overwriting them. Defaults to True.

    Returns:
        str or None: Path to the created directory containing saved files if successful,
        None if no top ligands exist.

    Files Created:
        - docking_report_top_ligands.sdf: Contains the top scoring ligands with their properties
        - {protein_name}.pdb: Protein structure file 
        - bounding_box.pdb: File containing the docking box information

    Notes:
        The saved SDF file includes:
        - Molecule structure
        - Molecule name (if available) 
        - SMILES string (if available)
        - All existing molecular properties
        - All additional properties from docking results
    """

    top_ligands = []
    for result in self.results:
        if result.top_ligand:
            top_ligands.append(result.top_ligand)

    if not top_ligands:
        return None

    if not save_dir:
        save_dir_path = (
            Path(os.getenv("END_USER_HOME", "."))
            / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}"
        )
    else:
        save_dir_path = Path(save_dir) / f"docking_report_{datetime.now().strftime('%m-%d-%Y|%H:%M:%S')}"

    save_dir_path.mkdir(parents=True, exist_ok=True)
    sdf_file_path = save_dir_path / "docking_report_top_ligands.sdf"
    if safe and sdf_file_path.exists():
        move_file_with_extension(str(sdf_file_path), "sdf")
    else:
        remove_file(str(sdf_file_path))

    writer = Chem.SDWriter(str(sdf_file_path))
    writer.SetKekulize(False)

    for ligand in top_ligands:
        mol = ligand.mol.m  # RDKit molecule

        properties = ligand.properties
        existing_properties = ligand.mol.m.GetPropsAsDict()
        if ligand.name:
            mol.SetProp("_Name", ligand.name)
        if ligand.mol.smiles:
            mol.SetProp("_SMILES", ligand.mol.smiles)

        for prop_name, prop_value in existing_properties.items():
            mol.SetProp(prop_name, str(prop_value))

        for prop_name, prop_value in properties.items():
            mol.SetProp(prop_name, str(prop_value))

        writer.write(mol)
    writer.close()

    try:
        self.results[0].protein.write_to_file(str(save_dir_path / f"{self.results[0].protein.name}.pdb"))
    except Exception as e:
        DEFAULT_LOGGER.log_error(f"Failed to write protein to file: {e}")

    save_bounding_box(
        self.pocket_data.box_center, self.pocket_data.box_size, output_file=str(save_dir_path / "bounding_box.pdb")
    )
    return str(save_dir_path)

Save docking results to files in a specified directory.

Args

save_dir : str or Path, optional: Directory path where the results will be saved.
If None, creates a directory in END_USER_HOME or current directory.
safe : bool, optional: If True, moves existing files with same name instead of

overwriting them. Defaults to True.

Returns

str or None: Path to the created directory containing saved files if successful,

None if no top ligands exist. Files Created: - docking_report_top_ligands.sdf: Contains the top scoring ligands with their properties - {protein_name}.pdb: Protein structure file - bounding_box.pdb: File containing the docking box information

Notes

The saved SDF file includes: - Molecule structure - Molecule name (if available) - SMILES string (if available) - All existing molecular properties - All additional properties from docking results

def visualize(*args, **kwargs)

Expand source code

def wrapper(*args, **kwargs):
    html_visualization = func(*args, **kwargs)
    return JupyterViewer.visualize(html_visualization)

class DockingResult (protein: Protein, smiles: str | None = None, file_path: str | None = None, successful: bool | None = True)

Expand source code

class DockingResult:
    """
    A class representing the results of a molecular docking operation.

    This class stores and manages the results of a molecular docking simulation, including
    the protein target, docked ligands, and associated metadata. It provides methods for
    analyzing and visualizing the docking results.

    Attributes:
        protein (Protein): The protein target used in the docking.
        ligands (List[Ligand]): List of docked ligand poses.
        rmsds (Optional[List[float]]): RMSD values compared to crystal structure if calculated.
        top_ligand (Optional[Ligand]): The highest scoring ligand pose.
        smiles (Optional[str]): SMILES string representation of the ligand.
        successful (bool): Whether the docking operation was successful.
        file_path (Optional[str]): Path to the docking results file.

    Methods:
        add_ligand(ligand: Ligand): Add a docked ligand pose to results.
        calculate_rmsds_from_crystal(crystal_ligand: Union[Ligand, str]): Calculate RMSD values against crystal structure.
        _to_sdf(safe=True, sdf_file_path=None): Export docking results to SDF file.
        _to_dataframe(): Convert results to pandas DataFrame.
        visualize(crystal_ligand_path=None, crystal_ligand_format=None): Visualize docking results.
        analyze(index: Optional[int] = None): Generate detailed analysis of docking poses.

    Examples:
        >>> result = DockingResult(protein, smiles="CC(=O)OC1=CC=CC=C1C(=O)O")
        >>> result.add_ligand(docked_pose)
        >>> result.analyze()
    """
    def __init__(
        self,
        protein: Protein,
        smiles: Optional[str] = None,
        file_path: Optional[str] = None,
        successful: Optional[bool] = True,
    ):
        self.protein = protein
        self.ligands: List[Ligand] = []
        self.rmsds: Optional[List[float]] = None
        self.top_ligand: Optional[Ligand] = None
        self.smiles = smiles
        self.successful = successful
        self.file_path = file_path

    def add_ligand(self, ligand: Ligand):
        """
        Add a ligand to the list of ligands.

        Args:
            ligand (Ligand): The ligand object to be added to the list.

        Returns:
            None
        """
        self.ligands.append(ligand)

    def calculate_rmsds_from_crystal(self, crystal_ligand: Ligand | str):
        """
        Calculate RMSD values between this ligand and a crystal structure.

        This method computes Root Mean Square Deviation (RMSD) values between the current ligand
        and a reference crystal structure using the 'obrms' command line tool.

        Args:
            crystal_ligand (Union[Ligand, str]): Either a Ligand object or a file path string
                representing the crystal structure to compare against.

        Returns:
            list[float]: A list of RMSD values computed between the current ligand and the crystal structure.

        Raises:
            SystemError: If RMSD calculation fails for any reason.

        Example:
            >>> ligand = Ligand("path/to/ligand.pdb") 
            >>> rmsds = ligand.calculate_rmsds_from_crystal("path/to/crystal.pdb")
        """
        if isinstance(crystal_ligand, str):
            crystal_ligand = Ligand(file_path=str(crystal_ligand))

        try:
            result = subprocess.run(
                ["obrms", self.file_path, crystal_ligand.file_path], capture_output=True, text=True
            )
            self.rmsds = [float(rmsd.split()[-1]) for rmsd in result.stdout.split("\n") if rmsd]
            return self.rmsds
        except Exception as e:
            raise SystemError(f"Failed to calculate RMSD values: {e}")

    def _to_sdf(self, safe=True, sdf_file_path=None):
        """
        Convert ligands to SDF file format.

        This method writes ligand molecules and their properties to a Structure-Data File (SDF).
        Each ligand's properties are stored as SDF tags in the output file.

        Args:
            safe (bool, optional): If True, backs up existing files instead of overwriting. 
                Defaults to True.
            sdf_file_path (str, optional): Custom path for the output SDF file. 
                If None, generates path based on protein name and SMILES. Defaults to None.

        Returns:
            str or None: Path to the created SDF file, or None if no ligands exist.

        Example:
            result._to_sdf(safe=True, sdf_file_path="output.sdf")
        """
        if not self.ligands:
            return None

        if not sdf_file_path:
            sdf_file_path = os.path.join(
                os.path.dirname(self.file_path or tempfile.gettempdir()),
                f"{self.protein.name}_docking_result_{self.smiles}.sdf",
            )

        if safe and os.path.isfile(sdf_file_path):
            move_file_with_extension(sdf_file_path, "sdf")
        else:
            remove_file(sdf_file_path)

        writer = Chem.SDWriter(sdf_file_path)
        writer.SetKekulize(False)

        for ligand in self.ligands:
            mol = ligand.mol.m  # RDKit molecule from Ligand

            properties = ligand.properties
            for prop_name, prop_value in properties.items():
                mol.SetProp(prop_name, str(prop_value))

            writer.write(mol)
        writer.close()

        return sdf_file_path

    def _to_dataframe(self):
        """
        Converts ligand data to a formatted pandas DataFrame.

        Creates a DataFrame containing pose rankings, scores, binding energies, and file paths
        for each ligand. The data is sorted by Pose Score in descending order.

        Returns:
            pandas.io.formats.style.Styler: A styled DataFrame with the following columns:
                - Ligand Pose Rank ID: Integer ranking of the pose (1-based)
                - Pose Score: Rounded to 3 decimal places
                - Binding Energy: Rounded to 3 decimal places
                - Path To Docked Pose: File path to the docked ligand pose

        Note:
            - Missing Binding Energy or Pose Score values default to 0.0
            - All numeric values are formatted to 3 decimal places in the output
        """
        data = []
        for idx, ligand in enumerate(self.ligands):
            mol_props = ligand.properties

            energy_score = float(mol_props.get("Binding Energy", "0.0"))
            rscore = float(mol_props.get("Pose Score", "0.0"))

            data.append(
                {
                    "Ligand Pose Rank ID": idx + 1,
                    "Pose Score": round(rscore, 3),
                    "Binding Energy": round(energy_score, 3),
                    "Path To Docked Pose": ligand.file_path,
                }
            )
        df = pd.DataFrame(data)
        # Sort by Ranking Score descending
        df = df.sort_values(by="Pose Score", ascending=False).reset_index(drop=True)
        return df.style.format(precision=3)

    def _repr_html_(self):
        df = self._to_dataframe()
        return df._repr_html_()

    def __str__(self):
        return (
            f"DockingResult:\n  Number of Ligands: {len(self.ligands)}\n"
            f"  SMILES: {self.smiles if self.smiles else 'Not provided'}\n"
            f"  File Path: {self.file_path if self.file_path else 'Not provided'}"
        )

    def __repr__(self):
        return self.__str__()

    @jupyter_visualization
    def visualize(self, crystal_ligand_path=None, crystal_ligand_format=None):
        """
        Visualize docking results with an optional crystal ligand overlay.

        Args:
            crystal_ligand_path (str, optional): File path to the crystal ligand structure.
            crystal_ligand_format (str, optional): Format of the crystal ligand file (e.g., 'pdb', 'mol2').

        Returns:
            str: HTML string containing the visualization that can be displayed in a web browser 
             or Jupyter notebook.

        Note:
            The visualization will always use SDF format internally for ligands, regardless
            of input format. The protein structure will maintain its original format.
        """

        visualization_format = "sdf"
        crystal_data = None
        if crystal_ligand_format and crystal_ligand_path:
            crystal_data = {"raw": str(crystal_ligand_path), "format": crystal_ligand_format}

        return DockingViewer().render_with_seperate_crystal(
            protein_data=str(self.protein.file_path),
            protein_format=self.protein.block_type,
            ligands_data=[str(self.file_path)],
            ligand_format=visualization_format,
            crystal_data=crystal_data,
        )

    def analyze(self, index: Optional[int] = None):
        """
        Analyze protein-ligand interactions using PLIPy fingerprinting.

        This method analyzes the interactions between a protein and its ligands using
        the PLIP (Protein-Ligand Interaction Profiler) fingerprinting approach. It can analyze
        either a single ligand pose (specified by index) or all ligand poses.

        Args:
            index (Optional[int]): The index of the specific ligand pose to analyze.
                If None, analyzes all poses. Defaults to None.

        Returns:
            Union[pd.DataFrame, Dict]: 
                - If index is None: Returns a DataFrame containing interaction fingerprints for all poses
                - If index is specified: Returns a dictionary containing the ligand network plot data

        Raises:
            ValueError: If no ligands or protein are found to analyze
            IndexError: If the provided ligand index is out of range

        Notes:
            - Creates temporary files for processing
            - Sets custom Van der Waals radii for Fe, H, and O atoms
            - Uses RDKit for molecular operations
            - Uses PLIPy for fingerprint generation
        """
        if not self.ligands:
            raise ValueError("No ligands found to analyze.")

        protein = deepcopy(self.protein)
        if not protein:
            raise ValueError("No protein found to analyze.")

        fp = plf.Fingerprint()
        with tempfile.TemporaryDirectory() as temp_dir:
            protein_file = os.path.join(temp_dir, f"{protein.name}.pdb")

            protein.write_to_file(protein_file)
            protein.file_path = protein_file

            sdf_file_path = self._to_sdf(sdf_file_path=os.path.join(temp_dir, f"{protein.name}_docking_result.sdf"))

            v = VdWContact()
            v.vdwradii["Fe"] = 2.0
            v.vdwradii["H"] = 1.05
            v.vdwradii["O"] = 1.48

            rdkit_prot = Chem.MolFromPDBFile(protein_file, False, False)
            protein_mol = plf.Molecule(rdkit_prot)
            pose_iterable = plf.sdf_supplier(str(sdf_file_path))
            sdf_supp = Chem.SDMolSupplier(str(sdf_file_path), sanitize=False)
            pose_iterable._suppl = sdf_supp

            if index is not None:
                if index < 0 or index >= len(sdf_supp):
                    raise IndexError("Ligand index out of range.")

                single_ligand_iterable = pose_iterable[index]
                fp.run_from_iterable([single_ligand_iterable], protein_mol)

                result = fp.plot_lignetwork(single_ligand_iterable)
            else:
                fp.run_from_iterable(pose_iterable, protein_mol)
                fp.plot_barcode(xlabel="Pose")

                result = fp.to_dataframe(index_col="Pose")

        return result

A class representing the results of a molecular docking operation.

This class stores and manages the results of a molecular docking simulation, including the protein target, docked ligands, and associated metadata. It provides methods for analyzing and visualizing the docking results.

Attributes

protein : Protein: The protein target used in the docking.
ligands : List[Ligand]: List of docked ligand poses.
rmsds : Optional[List[float]]: RMSD values compared to crystal structure if calculated.
top_ligand : Optional[Ligand]: The highest scoring ligand pose.
smiles : Optional[str]: SMILES string representation of the ligand.
successful : bool: Whether the docking operation was successful.
file_path : Optional[str]: Path to the docking results file.

Methods

add_ligand(ligand: Ligand): Add a docked ligand pose to results. calculate_rmsds_from_crystal(crystal_ligand: Union[Ligand, str]): Calculate RMSD values against crystal structure. _to_sdf(safe=True, sdf_file_path=None): Export docking results to SDF file. _to_dataframe(): Convert results to pandas DataFrame. visualize(crystal_ligand_path=None, crystal_ligand_format=None): Visualize docking results. analyze(index: Optional[int] = None): Generate detailed analysis of docking poses.

Examples

>>> result = DockingResult(protein, smiles="CC(=O)OC1=CC=CC=C1C(=O)O")
>>> result.add_ligand(docked_pose)
>>> result.analyze()

Methods

def add_ligand(self, ligand: Ligand)

Expand source code

def add_ligand(self, ligand: Ligand):
    """
    Add a ligand to the list of ligands.

    Args:
        ligand (Ligand): The ligand object to be added to the list.

    Returns:
        None
    """
    self.ligands.append(ligand)

Add a ligand to the list of ligands.

Args

ligand : Ligand: The ligand object to be added to the list.

Returns

None

def analyze(self, index: int | None = None)

Expand source code

def analyze(self, index: Optional[int] = None):
    """
    Analyze protein-ligand interactions using PLIPy fingerprinting.

    This method analyzes the interactions between a protein and its ligands using
    the PLIP (Protein-Ligand Interaction Profiler) fingerprinting approach. It can analyze
    either a single ligand pose (specified by index) or all ligand poses.

    Args:
        index (Optional[int]): The index of the specific ligand pose to analyze.
            If None, analyzes all poses. Defaults to None.

    Returns:
        Union[pd.DataFrame, Dict]: 
            - If index is None: Returns a DataFrame containing interaction fingerprints for all poses
            - If index is specified: Returns a dictionary containing the ligand network plot data

    Raises:
        ValueError: If no ligands or protein are found to analyze
        IndexError: If the provided ligand index is out of range

    Notes:
        - Creates temporary files for processing
        - Sets custom Van der Waals radii for Fe, H, and O atoms
        - Uses RDKit for molecular operations
        - Uses PLIPy for fingerprint generation
    """
    if not self.ligands:
        raise ValueError("No ligands found to analyze.")

    protein = deepcopy(self.protein)
    if not protein:
        raise ValueError("No protein found to analyze.")

    fp = plf.Fingerprint()
    with tempfile.TemporaryDirectory() as temp_dir:
        protein_file = os.path.join(temp_dir, f"{protein.name}.pdb")

        protein.write_to_file(protein_file)
        protein.file_path = protein_file

        sdf_file_path = self._to_sdf(sdf_file_path=os.path.join(temp_dir, f"{protein.name}_docking_result.sdf"))

        v = VdWContact()
        v.vdwradii["Fe"] = 2.0
        v.vdwradii["H"] = 1.05
        v.vdwradii["O"] = 1.48

        rdkit_prot = Chem.MolFromPDBFile(protein_file, False, False)
        protein_mol = plf.Molecule(rdkit_prot)
        pose_iterable = plf.sdf_supplier(str(sdf_file_path))
        sdf_supp = Chem.SDMolSupplier(str(sdf_file_path), sanitize=False)
        pose_iterable._suppl = sdf_supp

        if index is not None:
            if index < 0 or index >= len(sdf_supp):
                raise IndexError("Ligand index out of range.")

            single_ligand_iterable = pose_iterable[index]
            fp.run_from_iterable([single_ligand_iterable], protein_mol)

            result = fp.plot_lignetwork(single_ligand_iterable)
        else:
            fp.run_from_iterable(pose_iterable, protein_mol)
            fp.plot_barcode(xlabel="Pose")

            result = fp.to_dataframe(index_col="Pose")

    return result

Analyze protein-ligand interactions using PLIPy fingerprinting.

This method analyzes the interactions between a protein and its ligands using the PLIP (Protein-Ligand Interaction Profiler) fingerprinting approach. It can analyze either a single ligand pose (specified by index) or all ligand poses.

Args

index : Optional[int]: The index of the specific ligand pose to analyze. If None, analyzes all poses. Defaults to None.

Returns

Union[pd.DataFrame, Dict]

If index is None: Returns a DataFrame containing interaction fingerprints for all poses
If index is specified: Returns a dictionary containing the ligand network plot data

Raises

ValueError: If no ligands or protein are found to analyze
IndexError: If the provided ligand index is out of range

Notes

Creates temporary files for processing
Sets custom Van der Waals radii for Fe, H, and O atoms
Uses RDKit for molecular operations
Uses PLIPy for fingerprint generation

def calculate_rmsds_from_crystal(self, crystal_ligand: Ligand | str)

Expand source code

def calculate_rmsds_from_crystal(self, crystal_ligand: Ligand | str):
    """
    Calculate RMSD values between this ligand and a crystal structure.

    This method computes Root Mean Square Deviation (RMSD) values between the current ligand
    and a reference crystal structure using the 'obrms' command line tool.

    Args:
        crystal_ligand (Union[Ligand, str]): Either a Ligand object or a file path string
            representing the crystal structure to compare against.

    Returns:
        list[float]: A list of RMSD values computed between the current ligand and the crystal structure.

    Raises:
        SystemError: If RMSD calculation fails for any reason.

    Example:
        >>> ligand = Ligand("path/to/ligand.pdb") 
        >>> rmsds = ligand.calculate_rmsds_from_crystal("path/to/crystal.pdb")
    """
    if isinstance(crystal_ligand, str):
        crystal_ligand = Ligand(file_path=str(crystal_ligand))

    try:
        result = subprocess.run(
            ["obrms", self.file_path, crystal_ligand.file_path], capture_output=True, text=True
        )
        self.rmsds = [float(rmsd.split()[-1]) for rmsd in result.stdout.split("\n") if rmsd]
        return self.rmsds
    except Exception as e:
        raise SystemError(f"Failed to calculate RMSD values: {e}")

Calculate RMSD values between this ligand and a crystal structure.

This method computes Root Mean Square Deviation (RMSD) values between the current ligand and a reference crystal structure using the 'obrms' command line tool.

Args

crystal_ligand : Union[Ligand, str]: Either a Ligand object or a file path string representing the crystal structure to compare against.

Returns

list[float]: A list of RMSD values computed between the current ligand and the crystal structure.

Raises

SystemError: If RMSD calculation fails for any reason.

Example

>>> ligand = Ligand("path/to/ligand.pdb") 
>>> rmsds = ligand.calculate_rmsds_from_crystal("path/to/crystal.pdb")

def visualize(*args, **kwargs)

Expand source code

def wrapper(*args, **kwargs):
    html_visualization = func(*args, **kwargs)
    return JupyterViewer.visualize(html_visualization)

class MolPropsReport (results)

Expand source code

class MolPropsReport:
    """
    A class for handling and displaying molecular property calculation results.

    This class processes molecular property calculation results and can convert them
    into a formatted pandas DataFrame for display, particularly useful in Jupyter notebooks.

    Args:
        results (list): A list of dictionaries containing molecular property calculation results.
                       Each dictionary must contain a 'smiles' key and additional property keys.

    Attributes:
        results (list): The stored results from molecular property calculations.

    Methods:
        _to_dataframe(): Converts the results into a formatted pandas DataFrame.
        _repr_html_(): Returns HTML representation of the data for display in Jupyter notebooks.

    Examples:
        >>> results = [{"smiles": "CC", "property1": 0.5, "property2": 1.0}]
        >>> report = MolPropsReport(results)
        >>> df = report._to_dataframe()
    """

    def __init__(self, results):
        self.results = results

    def _to_dataframe(self):
        """
        Converts the results data into a pandas DataFrame.

        Extracts SMILES strings and other result properties into separate lists,
        then combines them into a DataFrame with formatted numerical precision.

        Returns:
            pandas.DataFrame: A styled DataFrame containing SMILES strings and associated data
            with numerical values formatted to 3 decimal places.
        """
        smiles = []

        data = {k: [] for k in self.results[0].keys() if k != "smiles"}

        for result in self.results:
            smiles.append(result["smiles"])

            for k in data:
                data[k].append(result.get(k))

        data = {
            "SMILES": smiles,
            **data,
        }

        df = pd.DataFrame(data).style.set_properties().format(precision=3)
        return df

    def _repr_html_(self):
        df = self._to_dataframe()
        return df._repr_html_()

A class for handling and displaying molecular property calculation results.

This class processes molecular property calculation results and can convert them into a formatted pandas DataFrame for display, particularly useful in Jupyter notebooks.

Args

results : list: A list of dictionaries containing molecular property calculation results. Each dictionary must contain a 'smiles' key and additional property keys.

Attributes

results : list: The stored results from molecular property calculations.

Methods

to_dataframe(): Converts the results into a formatted pandas DataFrame. _repr_html(): Returns HTML representation of the data for display in Jupyter notebooks.

Examples

>>> results = [{"smiles": "CC", "property1": 0.5, "property2": 1.0}]
>>> report = MolPropsReport(results)
>>> df = report._to_dataframe()

class PainsReport (results)

Expand source code

class PainsReport:
    """
    A class for generating and displaying PAINS (Pan-Assay Interference Compounds) analysis reports.

    Args:
        results (list): A list of dictionaries containing PAINS analysis results. Each dictionary 
            should contain:
            - smiles (str): SMILES string representation of the molecule
            - PAINS (list or None): List of PAINS pattern SMARTS strings that match the molecule
                                   or None if no matches found

    Methods:
        get_html_of_molecule(result): Generates HTML img tag with highlighted PAINS matches
        _to_dataframe(): Converts results to pandas DataFrame
        _repr_html_(): Returns HTML representation for Jupyter display

    Examples:
        >>> results = [{'smiles': 'CC(=O)Oc1ccccc1C(=O)O', 'PAINS': ['[O,S]-[CH2]-[CH2]-[O,S]']}]
        >>> report = PainsReport(results)
        >>> print(report)
        DockingReport:
          Number of DockingResults: 1

    Notes:
        Requires RDKit for molecular operations and visualization.
        Implements Jupyter notebook display protocol via _repr_html_.
    """

    def __init__(self, results):
        self.results = results

    def get_html_of_molecule(self, result):
        molecule = Chem.MolFromSmiles(result["smiles"])
        all_matches = []
        if result["PAINS"] is not None:
            for smarts in result["PAINS"]:
                atom_matches = molecule.GetSubstructMatches(Chem.MolFromSmarts(smarts))
                all_matches.extend(atom_matches[0])

        Draw.DrawingOptions.atomHighlightsAreCircles = True
        Draw.DrawingOptions.atomHighlightColors = {i: (1, 0, 0) for i in set(all_matches)}

        img = Draw.MolToImage(molecule, size=(200, 100), highlightAtoms=all_matches)

        buffer = BytesIO()
        img.save(buffer, format="PNG")
        img_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
        html = '<img src="data:image/png;base64,{0}">'.format(img_str)
        return html

    def _to_dataframe(self):
        """
        Convert the results into a pandas DataFrame.

        This method processes the stored results and converts them into a structured DataFrame
        containing SMILES strings, HTML representations of molecules, and PAINS pattern information.

        Returns:
            pd.DataFrame: A DataFrame with the following columns:
                - SMILES: List of SMILES strings for each molecule
                - Molecule Image: HTML representations of the molecules
                - SMARTS patterns of PAINS: List of PAINS patterns found in each molecule
        """
        all_smiles_list = []
        all_molecule_html_list = []
        PAINS_pattern_list = []

        for _, result in enumerate(self.results):

            all_smiles_list.append(result["smiles"])
            all_molecule_html_list.append(self.get_html_of_molecule(result))
            PAINS_pattern_list.append(result["PAINS"])

        return pd.DataFrame.from_dict(
            {
                "SMILES": all_smiles_list,
                "Molecule Image": all_molecule_html_list,
                "SMARTS patterns of PAINS": PAINS_pattern_list,
            }
        )

    def _repr_html_(self):
        df = self._to_dataframe()
        return df.to_html(escape=False)

    def __str__(self):
        return f"DockingReport:\n  Number of DockingResults: {len(self.results)}"

    def __repr__(self):
        return self.__str__()

A class for generating and displaying PAINS (Pan-Assay Interference Compounds) analysis reports.

Args

results : list: A list of dictionaries containing PAINS analysis results. Each dictionary should contain: - smiles (str): SMILES string representation of the molecule - PAINS (list or None): List of PAINS pattern SMARTS strings that match the molecule or None if no matches found

Methods

get_html_of_molecule(result): Generates HTML img tag with highlighted PAINS matches to_dataframe(): Converts results to pandas DataFrame _repr_html(): Returns HTML representation for Jupyter display

Examples

>>> results = [{'smiles': 'CC(=O)Oc1ccccc1C(=O)O', 'PAINS': ['[O,S]-[CH2]-[CH2]-[O,S]']}]
>>> report = PainsReport(results)
>>> print(report)
DockingReport:
  Number of DockingResults: 1

Notes

Requires RDKit for molecular operations and visualization. Implements Jupyter notebook display protocol via repr_html.

Methods

def get_html_of_molecule(self, result)

Expand source code

def get_html_of_molecule(self, result):
    molecule = Chem.MolFromSmiles(result["smiles"])
    all_matches = []
    if result["PAINS"] is not None:
        for smarts in result["PAINS"]:
            atom_matches = molecule.GetSubstructMatches(Chem.MolFromSmarts(smarts))
            all_matches.extend(atom_matches[0])

    Draw.DrawingOptions.atomHighlightsAreCircles = True
    Draw.DrawingOptions.atomHighlightColors = {i: (1, 0, 0) for i in set(all_matches)}

    img = Draw.MolToImage(molecule, size=(200, 100), highlightAtoms=all_matches)

    buffer = BytesIO()
    img.save(buffer, format="PNG")
    img_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
    html = '<img src="data:image/png;base64,{0}">'.format(img_str)
    return html

class PocketFinderReport (protein, csv_file_path='')

Expand source code

class PocketFinderReport:
    """
    PocketFinderReport class for managing protein pocket analysis results.

    A class to handle collection and reporting of protein pocket properties including drugability scores,
    volumes, surface areas and other physicochemical properties.

    Attributes:
        protein: The protein object associated with this report
        file_path (str): Path to save the CSV report file
        pockets (list): List of pocket objects containing analysis results

    Methods:
        add_pocket(pocket): Add a pocket object to the report
        _to_dataframe(): Convert pocket data to pandas DataFrame
        _repr_html_(): Generate HTML representation of the report
        save_props(): Save pocket properties to CSV file

    Example:
        report = PocketFinderReport(protein_obj, "output.csv")
        report.add_pocket(pocket_obj)
        report.save_props()
    """
    def __init__(self, protein, csv_file_path=""):
        self.protein = protein
        self.file_path = csv_file_path
        self.pockets = []

    def add_pocket(self, pocket):
        """
        Add a pocket to the collection of pockets.

        Args:
            pocket: A pocket object to be added to the pockets list.
        """
        self.pockets.append(pocket)

    def _to_dataframe(self):
        data = []
        for idx, pocket in enumerate(self.pockets):
            props = pocket.props
            if props:
                data.append(
                    {
                        "Pocket ID": idx + 1,
                        "Color": pocket.color,
                        "Drugability Score": props.get("drugability_score", 0),
                        "Volume": props.get("volume", 0),
                        "Total SASA": props.get("total_SASA", 0),
                        "Polar SASA": props.get("polar_SASA", 0),
                        "Polar/Apolar SASA Ratio": props.get("polar_apolar_SASA_ratio", 0),
                        "Hydrophobicity": props.get("hydrophobicity", 0),
                        "Polarity": props.get("polarity", 0),
                    }
                )

        df = pd.DataFrame(data)
        # Sort by Ranking Score descending
        df = df.sort_values(by="Drugability Score", ascending=False).reset_index(drop=True)
        return df

    def _repr_html_(self):
        df = self._to_dataframe()
        return df.style.format(precision=3)._repr_html_()

    def save_props(self):
        """
        Saves the properties of the report to a CSV file.

        This method converts the internal data structure to a pandas DataFrame and saves it
        to the file path specified in self.file_path attribute. The DataFrame is saved
        without the index column.

        Returns:
            None
        """
        df = self._to_dataframe()
        df.to_csv(self.file_path, index=False)

PocketFinderReport class for managing protein pocket analysis results.

A class to handle collection and reporting of protein pocket properties including drugability scores, volumes, surface areas and other physicochemical properties.

Attributes

protein: The protein object associated with this report
file_path : str: Path to save the CSV report file
pockets : list: List of pocket objects containing analysis results

Methods

add_pocket(pocket): Add a pocket object to the report to_dataframe(): Convert pocket data to pandas DataFrame _repr_html(): Generate HTML representation of the report save_props(): Save pocket properties to CSV file

Example

report = PocketFinderReport(protein_obj, "output.csv") report.add_pocket(pocket_obj) report.save_props()

Methods

def add_pocket(self, pocket)

Expand source code

def add_pocket(self, pocket):
    """
    Add a pocket to the collection of pockets.

    Args:
        pocket: A pocket object to be added to the pockets list.
    """
    self.pockets.append(pocket)

Add a pocket to the collection of pockets.

Args

pocket: A pocket object to be added to the pockets list.

def save_props(self)

Expand source code

def save_props(self):
    """
    Saves the properties of the report to a CSV file.

    This method converts the internal data structure to a pandas DataFrame and saves it
    to the file path specified in self.file_path attribute. The DataFrame is saved
    without the index column.

    Returns:
        None
    """
    df = self._to_dataframe()
    df.to_csv(self.file_path, index=False)

Saves the properties of the report to a CSV file.

This method converts the internal data structure to a pandas DataFrame and saves it to the file path specified in self.file_path attribute. The DataFrame is saved without the index column.

Returns

None

class ProtonationReport (results)

Expand source code

class ProtonationReport:
    def __init__(self, results):
        self.results, self.html_metadata = self.split_results(results)

    def split_results(self, results):
        """
        Splits results by separating HTML metadata from the main data.

        This method processes a list of result items, removing HTML metadata from the
        protonation data while preserving it in a separate dictionary mapped by SMILES.

        Args:
            results (list): List of dictionaries containing molecular data with 'smiles' 
                and 'protonation' keys. The 'protonation' may contain 'html_metadata'.

        Returns:
            tuple: A 2-tuple containing:
                - list: Copy of input results with HTML metadata removed from protonation data
                - dict: Mapping of SMILES strings to their corresponding HTML metadata
        """
        list_without_html = []
        smiles_to_html_dict = {}

        for item in results:
            new_item = copy.deepcopy(item)
            if "html_metadata" in new_item["protonation"]:
                del new_item["protonation"]["html_metadata"]
            list_without_html.append(new_item)

            smiles = item["smiles"]
            html_meta = item["protonation"].get("html_metadata")
            smiles_to_html_dict[smiles] = html_meta

        return list_without_html, smiles_to_html_dict

    def show_plots(self):
        for smi, html_meta in self.html_metadata.items():
            centered_html = f"<center><h2>{smi}</h2>"
            display(HTML(centered_html))
            self.plot_concentration_curves(html_meta, plot=True)

    def smiles_to_img_html(self, smiles):
        mol = Chem.MolFromSmiles(smiles)
        AllChem.Compute2DCoords(mol)
        mol.GetConformer().SetId(1)

        return Draw.MolToSVG(mol, width=200, height=150).replace("\n", "")

    def plot_concentration_curves(self, html_meta, plot=False):
        """
        Plot concentration curves for different molecular species across a pH range.

        Args:
            html_meta (tuple): A tuple containing:
            - fractions (list): Matrix of fraction values for each species
            - smiles_list (list): List of SMILES notations for each species  
            - concentration_values (list): Concentration values for each species
            - pH_range (list): Range of pH values for x-axis
            - pH (float): Current pH value
            plot (bool, optional): If True, displays the plot immediately. Defaults to False.

        Returns:
            plotly.graph_objects.Figure: A plotly figure object containing the concentration curves.
            The plot shows fraction percentage (0-100%) on y-axis vs pH (0-14) on x-axis.
            Each curve is labeled with species SMILES and concentration at current pH.
        """
        fractions, smiles_list, concentration_values, pH_range, pH = html_meta
        fractions = np.transpose(np.array(fractions))

        fig = sp.make_subplots(rows=1, cols=1)
        for i, fraction in enumerate(fractions):
            fig.add_trace(
                go.Scatter(
                    x=pH_range,
                    y=fraction * 100,
                    mode="lines",
                    showlegend=True,
                    name=f"Fraction of {smiles_list[i]} at pH={pH:.1f} is {round(concentration_values[i], 2)}(%)",
                ),
                row=1,
                col=1,
            )
        fig.update_layout(
            xaxis=dict(title="pH", range=[0, 14]),
            yaxis=dict(title="Fraction (%)", range=[0, 100]),
            hovermode="closest",
        )
        if plot:
            fig.show()

    def _to_dataframe(self):
        """
        Converts the protonation results to a pandas DataFrame.

        This method processes the protonation results stored in self.results and creates a DataFrame 
        with the following structure:
        - Multi-index with SMILES as the top level
        - Columns:
            - 'protonated SMILES': The SMILES strings of protonated species
            - 'Concentration %': Rounded concentration percentages (to 2 decimal places)
            - 'Molecule Image': HTML representation of molecular structures

        Returns:
            pd.DataFrame: A DataFrame containing the protonation results with molecular structure 
            visualizations. The DataFrame has a multi-index structure where the top level 
            is the original SMILES string.
        """
        data_dict = {}

        for result in self.results:
            smiles = result["smiles"]
            smiles_list = result["protonation"]["smiles_list"]
            concentration_list = result["protonation"]["concentration_list"]
            rounded_concentration = [round(value, 2) for value in concentration_list]
            data_dict[smiles] = pd.DataFrame(
                {"protonated SMILES": smiles_list, "Concentration %": rounded_concentration}
            )

        df = pd.concat(data_dict.values(), keys=data_dict.keys(), names=["SMILES"])
        df["Molecule Image"] = df["protonated SMILES"].apply(self.smiles_to_img_html)
        return df

    def _repr_html_(self):
        df = self._to_dataframe()
        return df.to_html(escape=False)

Methods

def plot_concentration_curves(self, html_meta, plot=False)

Expand source code

def plot_concentration_curves(self, html_meta, plot=False):
    """
    Plot concentration curves for different molecular species across a pH range.

    Args:
        html_meta (tuple): A tuple containing:
        - fractions (list): Matrix of fraction values for each species
        - smiles_list (list): List of SMILES notations for each species  
        - concentration_values (list): Concentration values for each species
        - pH_range (list): Range of pH values for x-axis
        - pH (float): Current pH value
        plot (bool, optional): If True, displays the plot immediately. Defaults to False.

    Returns:
        plotly.graph_objects.Figure: A plotly figure object containing the concentration curves.
        The plot shows fraction percentage (0-100%) on y-axis vs pH (0-14) on x-axis.
        Each curve is labeled with species SMILES and concentration at current pH.
    """
    fractions, smiles_list, concentration_values, pH_range, pH = html_meta
    fractions = np.transpose(np.array(fractions))

    fig = sp.make_subplots(rows=1, cols=1)
    for i, fraction in enumerate(fractions):
        fig.add_trace(
            go.Scatter(
                x=pH_range,
                y=fraction * 100,
                mode="lines",
                showlegend=True,
                name=f"Fraction of {smiles_list[i]} at pH={pH:.1f} is {round(concentration_values[i], 2)}(%)",
            ),
            row=1,
            col=1,
        )
    fig.update_layout(
        xaxis=dict(title="pH", range=[0, 14]),
        yaxis=dict(title="Fraction (%)", range=[0, 100]),
        hovermode="closest",
    )
    if plot:
        fig.show()

Plot concentration curves for different molecular species across a pH range.

Args

html_meta : tuple: A tuple containing:

fractions (list): Matrix of fraction values for each species
smiles_list (list): List of SMILES notations for each species
concentration_values (list): Concentration values for each species
pH_range (list): Range of pH values for x-axis
pH (float): Current pH value

plot : bool, optional

If True, displays the plot immediately. Defaults to False.

Returns

plotly.graph_objects.Figure: A plotly figure object containing the concentration curves.

The plot shows fraction percentage (0-100%) on y-axis vs pH (0-14) on x-axis. Each curve is labeled with species SMILES and concentration at current pH.

def show_plots(self)

Expand source code

def show_plots(self):
    for smi, html_meta in self.html_metadata.items():
        centered_html = f"<center><h2>{smi}</h2>"
        display(HTML(centered_html))
        self.plot_concentration_curves(html_meta, plot=True)

def smiles_to_img_html(self, smiles)

Expand source code

def smiles_to_img_html(self, smiles):
    mol = Chem.MolFromSmiles(smiles)
    AllChem.Compute2DCoords(mol)
    mol.GetConformer().SetId(1)

    return Draw.MolToSVG(mol, width=200, height=150).replace("\n", "")

def split_results(self, results)

Expand source code

def split_results(self, results):
    """
    Splits results by separating HTML metadata from the main data.

    This method processes a list of result items, removing HTML metadata from the
    protonation data while preserving it in a separate dictionary mapped by SMILES.

    Args:
        results (list): List of dictionaries containing molecular data with 'smiles' 
            and 'protonation' keys. The 'protonation' may contain 'html_metadata'.

    Returns:
        tuple: A 2-tuple containing:
            - list: Copy of input results with HTML metadata removed from protonation data
            - dict: Mapping of SMILES strings to their corresponding HTML metadata
    """
    list_without_html = []
    smiles_to_html_dict = {}

    for item in results:
        new_item = copy.deepcopy(item)
        if "html_metadata" in new_item["protonation"]:
            del new_item["protonation"]["html_metadata"]
        list_without_html.append(new_item)

        smiles = item["smiles"]
        html_meta = item["protonation"].get("html_metadata")
        smiles_to_html_dict[smiles] = html_meta

    return list_without_html, smiles_to_html_dict

Splits results by separating HTML metadata from the main data.

This method processes a list of result items, removing HTML metadata from the protonation data while preserving it in a separate dictionary mapped by SMILES.

Args

results : list: List of dictionaries containing molecular data with 'smiles' and 'protonation' keys. The 'protonation' may contain 'html_metadata'.

Returns

tuple: A 2-tuple containing: - list: Copy of input results with HTML metadata removed from protonation data - dict: Mapping of SMILES strings to their corresponding HTML metadata