Module deeporigin.src.docking

Classes

class DockingClient
Expand source code
class DockingClient(Client):
    """
    A client class for handling molecular docking operations.
    This class provides functionality for preparing and executing molecular docking simulations,
    managing protein-ligand interactions, and processing docking results. It extends the base
    Client class and integrates with molecular property calculations.

    Attributes:
        props_client (MolecularPropertiesClient): Client for molecular property calculations

    Methods:
        get_protein_data_object: Prepares protein data for docking
        _protonate_smiles: Protonates SMILES strings in batches
        update_progress_bar: Updates progress visualization during docking
        docking_request: Sends docking request to server
        get_results_dir: Creates/retrieves results directory
        stop_docking_request: Stops ongoing docking request
        wait_for_docking_response: Polls server for docking completion
        process_docking_request: Manages complete docking request lifecycle
        prepare: Prepares protein and ligand structures
        ensure_float_list: Validates pocket and box size inputs
        process_docking_response: Processes docking results
        _process_individual_ligands: Handles individual ligand processing
        _get_solution_score: Retrieves specific scores from solutions
        _select_top_ligand_by_energy: Selects best ligand by binding energy
        _align_protein_for_docking: Aligns protein structure for docking
        dock: Main method for executing molecular docking

        >>> client = DockingClient()
        >>> result = client.dock(
        ...     protein="protein.pdb",
        ...     ligands=["CC(=O)O", "C1=CC=CC=C1"],
        ...     pocket_data=pocket_info
        ... )

        - Supports both synchronous and asynchronous docking operations
        - Handles multiple input formats for proteins and ligands
        - Provides progress tracking and visualization
        - Implements automatic protein preparation and ligand protonation
    """
    def __init__(self):
        super().__init__()
        self.props_client = MolecularPropertiesClient()

    def get_protein_data_object(self, protein: Protein, pocket: Union[str, Pocket, List[float]], box_size: List[float]) -> dict:
        """
        Prepares protein data for docking by creating a dictionary containing protein information and pocket specifications.

        Args:
            protein (Protein): A Protein object containing the protein structure data.
            pocket (Union[str, Pocket, List[float]]): Either a path to pocket file, Pocket object, or list of coordinates
                specifying the pocket center.
            box_size (List[float]): A list of three floats specifying the dimensions of the docking box in angstroms.
                Only required when pocket is specified as coordinates.

        Returns:
            dict: A dictionary containing:
                - 'extension': File extension/type of the protein structure
                - 'content': Content of the protein structure file
                - 'pocket_center': List of coordinates (if pocket specified as coordinates)
                - 'box_size': Box dimensions (if pocket specified as coordinates)
                - 'pocket_content': Content of pocket file (if pocket specified as file or Pocket object)

        Raises:
            None

        Note:
            The protein structure is temporarily written to a file to extract its content.
            The pocket can be specified either as coordinates with box size or as a file/object containing pocket information.
        """
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdb") as temp_file:
            protein.write_to_file(temp_file.name)
            with open(temp_file.name, "r") as f:
                block_content = f.read()

        obj = {"extension": protein.block_type, "content": block_content}

        if isinstance(pocket, list):
            obj["pocket_center"] = pocket
            obj["box_size"] = box_size
        else:
            # Pocket from file or Pocket object.
            pocket_content = pocket.block_content if isinstance(pocket, Pocket) else Path(pocket).read_text()
            obj["pocket_content"] = pocket_content

        return obj

    def _protonate_smiles(self, smiles_list: List[str], properties: Dict[str, Dict], batch_size: int = 100) -> Tuple[List[str], Dict[str, Dict]]:
        """Protonates a list of SMILES strings using a batch processing approach.

        This method takes a list of SMILES strings and protonates them using an external protonation service.
        The protonation is performed in batches to handle large datasets efficiently.

        Args:
            smiles_list (List[str]): A list of SMILES strings to be protonated.
            properties (Dict[str, Dict]): A dictionary mapping original SMILES to their properties.
            batch_size (int, optional): The number of SMILES to process in each batch. Defaults to 100.

        Returns:
            Tuple[List[str], Dict[str, Dict]]: A tuple containing:
                - A list of protonated SMILES strings
                - A dictionary mapping protonated SMILES to their original properties

        Raises:
            Exception: If the protonation process fails.
            KeyError: If the protonation output has unexpected structure.
            IndexError: If no protonated SMILES are returned for an input.
        """
        DEFAULT_LOGGER.log_warning("Protonating SMILES in batches.")
        protonated_list = []
        new_properties = {}

        try:
            for batch in chunker(smiles_list, batch_size):
                protonation_output = self.props_client.protonate(batch, html_output=False)
                for prot in protonation_output:
                    initial_smiles = prot["smiles"]
                    try:
                        # Assuming we always get at least one protonated SMILES
                        protonated_smiles = prot["protonation"]["smiles_list"][0]
                        new_properties[protonated_smiles] = properties.get(initial_smiles, {})
                        protonated_list.append(protonated_smiles)
                    except (KeyError, IndexError) as e:
                        DEFAULT_LOGGER.log_error(f"Failed to extract protonated SMILES: {str(e)}")
                DEFAULT_LOGGER.log_warning(f"Protonated {len(protonated_list)} out of {len(smiles_list)} ligands.")
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to protonate SMILES: {str(e)}")

        return protonated_list, new_properties

    def update_progress_bar(self, logger: Logger, progress: ProgressView, body: dict) -> None:
        """Updates the progress bar for docking operations based on received status information.

        Args:
            logger (Logger): Logger object for recording progress updates.
            progress (ProgressView): Progress view object to be updated.
            body (dict): Dictionary containing progress information with keys:
                - description (str): Description of current docking status
                - percentage (int): Current progress percentage (0-100)
                - pose (str, optional): Ligand content/pose information

        Returns:
            None

        Notes:
            - Completes and displays final progress when description is "Completed"
            - Only updates description if changed from current progress description
            - Logs info message when percentage increases
            - Updates and displays progress bar with new information
        """
        description = body.get("description", "")
        percentage = int(body.get("percentage", 0))
        ligand_content = body.get("pose")

        if description == "Completed":
            progress.update(new_desc=description, new_percentage=percentage)
            progress.display()
            return

        # Update progress only if there's a change
        new_description = None if description == progress.description else description

        if percentage > progress.bar.n:
            logger.log_info("Docking progress updated.")

        progress.update(new_desc=new_description, new_percentage=percentage, new_ligand_content=ligand_content)
        progress.display()

    def docking_request(self, logger: Logger, protein_data: dict, smiles_list: List[str]) -> Tuple[Optional[dict], bool]:
        """
        Send a docking request to the server with protein data and SMILES list.

        This method prepares and sends a docking request to process molecular docking
        between a protein and a list of SMILES molecules.

        Args:
            logger (Logger): Logger object to record execution information
            protein_data (dict): Dictionary containing the protein information and parameters
            smiles_list (List[str]): List of SMILES strings representing molecules to dock

        Returns:
            Tuple[Optional[dict], bool]: A tuple containing:
                - The response data as a dictionary if successful, None if failed
                - A boolean indicating success (True) or failure (False)

        Raises:
            Exception: If there is an error during the request. The exception is caught and
                      logged, returning (None, False)
        """
        logger.log_info("Preparing and sending docking request.")
        body = {"protein_data": protein_data, "smiles_list": smiles_list}

        try:
            response = self.post_request(endpoint="docking", logger=logger, data=body)
            data = response.json()

            return data, True
        except Exception as e:
            console_logger = Logger("INFO", None)
            if "response" in locals() and response.status_code == 503:
                console_logger.log_error(WARNING_NUMBER_OF_AVAILABLE_WORKERS)
            else:
                console_logger.log_warning(WARNING_MESSAGE)
            logger.log_error(f"Unexpected error during docking request: {str(e)}")
            return None, False

    def get_results_dir(self, result_dir_name: str) -> Tuple[Path, bool]:
        """
        Creates or retrieves a results directory under the user's home directory.

        Args:
            result_dir_name (str): Name of the results directory to create/retrieve

        Returns:
            Tuple[Path, bool]: A tuple containing:
                - Path: Path object pointing to the results directory
                - bool: True if directory was newly created, False if it already existed
        """
        result_path = Path(os.getenv("END_USER_HOME", "/home/bench-user"))
        results_dir = result_path / result_dir_name

        if not results_dir.is_dir():
            results_dir.mkdir(parents=True, exist_ok=True)
            return results_dir, True
        return results_dir, False

    def stop_docking_request(self, request_info: dict) -> None:
        """
        Stops a docking request with the specified request ID.

        This method sends a POST request to stop an ongoing docking request. It requires
        the request ID to identify which docking request to stop.

        Args:
            request_info (dict): A dictionary containing the request information.
                                Must contain 'request_id' key.

        Returns:
            None

        Raises:
            No explicit exceptions are raised, but logs error messages if:
            - request_id is missing from request_info
            - The stop request fails (non-200 response)

        Example:
            >>> docking.stop_docking_request({"request_id": "123"})
        """
        request_id = request_info.get("request_id", "")
        if not request_id:
            DEFAULT_LOGGER.log_error("Missing 'request_id' in request_info.")
            return

        DEFAULT_LOGGER.log_info("Stopping docking request.")
        response = self.post_request(endpoint=f"docking/{request_id}/stop", logger=DEFAULT_LOGGER, data={})
        if response.status_code == 200:
            data = response.json()
            DEFAULT_LOGGER.log_info(data.get("msg", "Docking request stopped."))
        else:
            data = response.json() if response.text else {}
            msg = data.get("msg", "Failed to stop docking request.")
            DEFAULT_LOGGER.log_error(msg)

    def wait_for_docking_response(self, logger: Logger, request_id: str, progress_view: ProgressView, interval: float = 0.5) -> Tuple[Optional[dict], bool]:
        """
        Waits for a docking request to complete by polling the server at regular intervals.

        This method continuously checks the status of a docking request until it either completes
        successfully or fails. It updates a progress bar during the waiting period.

        Args:
            logger (Logger): Logger instance for recording the process.
            request_id (str): The unique identifier for the docking request.
            progress_view (ProgressView): Progress view instance for updating the progress bar.
            interval (float, optional): Time interval between polling requests in seconds. Defaults to 0.5.

        Returns:
            Tuple[Optional[dict], bool]: A tuple containing:
                - The response data as a dictionary if successful, None if failed
                - A boolean indicating success (True) or failure (False)

        Note:
            The method handles different HTTP status codes:
            - 200: Success, docking completed
            - 202: In progress, updates progress bar
            - 429: Rate limit exceeded
            - Other: Unexpected error
        """
        console_logger = Logger("INFO", None)
        logger.log_info("Waiting for docking to complete.")

        while True:
            time.sleep(interval)
            response = self.get_request(endpoint=f"docking/{request_id}", logger=logger)
            if response is None:
                logger.log_error("No response while waiting.")
                return None, False

            if response.status_code == 200:
                data = response.json()
                logger.log_info("Docking request successfully completed.")
                self.update_progress_bar(logger, progress_view, {"description": "Completed", "percentage": 100})
                return data, True
            elif response.status_code == 202:
                body = response.json()
                self.update_progress_bar(logger, progress_view, body)
            elif response.status_code == 429:
                console_logger.log_warning(WARNING_MESSAGE)
                logger.log_error(f"Failed with status_code = {response.status_code}, message: {response.text}")
                return None, False
            else:
                console_logger.log_warning(WARNING_MESSAGE)
                logger.log_error("Unexpected docking request status code.")
                return None, False

    def process_docking_request(self, logger: Logger, protein: Protein, pocket: Union[str, Pocket, List[float]], smiles_list: List[str], box_size: List[float], results_dir: str, wait_until_complete: bool = True, progress_only: bool = False) -> Tuple[Optional[dict], bool, Optional[Path]]:
        """
        Process a molecular docking request and manage its execution.

        This method handles the full lifecycle of a docking request, including setting up directories,
        sending the request, and optionally waiting for results.

        Args:
            logger (Logger): Logger instance for tracking the docking process
            protein (Protein): Protein object containing the target structure information
            pocket (Union[str, Pocket, List[float]]): Binding pocket information, either as a string identifier,
                Pocket object, or list of coordinates [x, y, z]
            smiles_list (List[str]): List of SMILES strings representing the ligands to dock
            box_size (List[float]): Dimensions of the docking box [x, y, z]
            results_dir (str): Directory path where docking results will be stored
            wait_until_complete (bool, optional): If True, wait for docking completion. Defaults to True
            progress_only (bool, optional): If True, show only progress without visualization. Defaults to False

        Returns:
            Tuple[Optional[dict], bool, Optional[Path]]: A tuple containing:
                - Response data dictionary (or None if failed)
                - Success status boolean
                - Path to results folder (or None if failed)

        Raises:
            None explicitly, but may raise exceptions from underlying operations

        Example:
            response, success, path = processor.process_docking_request(
                logger,
                protein,
                pocket_coords,
                ["C1=CC=CC=C1", "CC(=O)O"],
                [20, 20, 20],
                "results"
        """
        logger.log_info("Starting docking process.")
        results_path, is_new = self.get_results_dir(results_dir)
        items_count = len(list(results_path.iterdir())) if not is_new else 0

        protein_data = self.get_protein_data_object(protein, pocket, box_size)

        request_id, started = self.docking_request(logger, protein_data, smiles_list)
        if not started:
            logger.log_error("Failed to initiate docking request.")
            return None, False, None

        if not request_id:
            logger.log_error("Missing request_id in docking response.")
            return None, False, None

        folder_name = f"{protein.name}_run_{items_count + 1}_{datetime.now().strftime('%Y%m%d%H%M%S')}"
        folder_path = results_path / folder_name
        folder_path.mkdir(parents=True, exist_ok=True)

        info = {
            "request_id": request_id,
            "run_folder_path": str(folder_path),
            "protein_file_path": str(protein.file_path),
        }

        DEFAULT_LOGGER.log_warning(f"Docking request initiated. Request Info: {info}")

        if wait_until_complete:
            progress_view = ProgressView(
                desc="",
                progress_only=progress_only,
                docking_viewer=DockingViewer(
                    protein_content=protein_data["content"],
                    protein_format=protein_data["extension"],
                    pocket_center=pocket if isinstance(pocket, list) else None,
                    dim=20,
                ),
            )

            response, completed = self.wait_for_docking_response(logger, request_id, progress_view)
            if not completed:
                logger.log_error("Docking failed during waiting.")
                return None, False, None
            return response, True, folder_path
        else:
            logger.log_info(
                f"Docking request submitted. Use request ID {request_id} to check status/results at {folder_path}."
            )
            return info, True, folder_path

    def prepare(self, pdb_id: str = "", pdb_file_path: str = "", protein: Protein = None, chains: Optional[Union[List[str], str]] = None, ligand: Optional[Ligand] = None, ligand_res_name: Optional[str] = None, remove_metals: Optional[List[str]] = None, keep_resnames: Optional[List[str]] = None, model_loops: bool = False) -> dict:
        """Prepares protein and ligand structures for molecular docking.
        This method processes protein and ligand structures, handling various input formats and performing
        necessary preparations for docking simulations.
        Args:
            pdb_id (str, optional): PDB ID of the protein structure. Defaults to "".
            pdb_file_path (str, optional): Path to PDB file. Defaults to "".
            protein (Protein, optional): Protein object. Defaults to None.
            chains (Union[List[str], str], optional): Chain ID(s) to select from protein. Defaults to None.
            ligand (Ligand, optional): Ligand object. Defaults to None.
            ligand_res_name (str, optional): Residue name of ligand to extract. Defaults to None.
            remove_metals (List[str], optional): List of metal atoms to remove. Defaults to None.
            keep_resnames (List[str], optional): List of residue names to keep. Defaults to None.
            model_loops (bool, optional): Whether to model missing loops. Defaults to False.
        Returns:
            dict: Dictionary containing:
                - ligand_res_name (str): Name of the ligand residue
                - raw_protein_path (str): Path to original protein structure
                - prepared_protein_path (str): Path to prepared protein structure
                - prepared_ligand_path (str): Path to prepared ligand structure
        Raises:
            ValueError: If no protein source is provided (protein, pdb_file_path, or pdb_id)
            ValueError: If neither ligand nor ligand_res_name is provided
            ValueError: If multiple copies of specified ligand are found
        Notes:
            - Priority for protein source: protein object > pdb_file_path > pdb_id
            - Ligands with fewer than 5 heavy atoms are rejected
            - Loop modeling requires valid PDB ID
        """
        data = {"ligand_res_name": ligand_res_name}

        if protein is not None:
            modified_protein = protein
            if pdb_id:
                modified_protein.pdb_id = pdb_id
            if pdb_file_path:
                DEFAULT_LOGGER.log_warning(
                    "Both protein object and pdb_file_path are provided. Using provided protein object."
                )
        elif pdb_file_path:
            modified_protein = Protein(file_path=pdb_file_path)
            if pdb_id:
                modified_protein.pdb_id = pdb_id
                DEFAULT_LOGGER.log_info("pdb_id provided alongside pdb_file_path. Using file path as primary source.")
        else:
            if not pdb_id:
                raise ValueError("You must provide at least one of: protein, pdb_file_path, or pdb_id.")
            modified_protein = Protein(pdb_id=pdb_id)

        if not pdb_id:
            pdb_id = modified_protein.pdb_id

        data["raw_protein_path"] = modified_protein.file_path

        if chains:
            chains = [chains] if isinstance(chains, str) else chains
            existing_chains = modified_protein.list_chain_names()
            invalid_chains = [c for c in chains if c not in existing_chains]
            if invalid_chains:
                modified_protein = modified_protein.remove_hetatm(
                    keep_resnames=keep_resnames, remove_metals=remove_metals
                )
                data["prepared_protein_path"] = modified_protein.file_path
                data["prepared_ligand_path"] = ""
                DEFAULT_LOGGER.log_warning(
                    f"Invalid chains {invalid_chains}. Available: {existing_chains}."
                )
                return data
            modified_protein = modified_protein.select_chains(chain_ids=chains)

        hetero_names = modified_protein.list_hetero_names()
        if not ligand and not ligand_res_name:
            raise ValueError(
                f"Must provide ligand or ligand_res_name. Available ligands: {hetero_names}"
            )

        if not ligand:
            if ligand_res_name not in hetero_names:
                modified_protein = modified_protein.remove_hetatm(
                    keep_resnames=keep_resnames, remove_metals=remove_metals
                )
                data["prepared_protein_path"] = modified_protein.file_path
                data["prepared_ligand_path"] = ""
                DEFAULT_LOGGER.log_error(
                    f"Ligand {ligand_res_name} not found. Available: {hetero_names}."
                )
                return data

            ligands = modified_protein.select_ligand(ligand_res_name)
            if len(ligands) > 1:
                ligands = modified_protein.select_ligand(ligand_res_name)
                if len(ligands) > 1:
                    raise ValueError(
                        f"Multiple ligands {ligand_res_name} found. Extract manually."
                    )
            ligand = ligands[0]

        modified_protein = modified_protein.remove_hetatm(
            keep_resnames=keep_resnames, remove_metals=remove_metals
        )

        if ligand and ligand.mol.m.GetNumHeavyAtoms() < 5:
            data["prepared_protein_path"] = modified_protein.file_path
            data["prepared_ligand_path"] = ""
            DEFAULT_LOGGER.log_warning("Ligand too small (<5 heavy atoms).")
            return data

        ligand_res_name = ligand_res_name if ligand_res_name else ligand.name
        temp_dir = modified_protein.file_path.parent / f"prepared_for_{ligand_res_name}"
        temp_dir.mkdir(parents=True, exist_ok=True)

        protein_output_file = temp_dir / f"{modified_protein.name}_aligned.pdb"
        ligand_output_file = temp_dir / f"{ligand_res_name}_aligned.pdb" if ligand else None

        try:
            DEFAULT_LOGGER.log_info("Preparing protein structure.")
            if model_loops and not pdb_id:
                DEFAULT_LOGGER.log_error("Cannot model loops without a PDB ID.")
                model_loops = False

            modified_protein = modified_protein.prepare(model_loops=model_loops, pdb_id=pdb_id)
            modified_protein.write_to_file(protein_output_file)
        except Exception as e:
            DEFAULT_LOGGER.log_error(f"Failed to prepare protein: {str(e)}")
            return data

        if ligand:
            ligand.write_to_file(ligand_output_file)
            data["prepared_protein_path"] = str(protein_output_file)
            data["prepared_ligand_path"] = str(ligand_output_file)
        else:
            data["prepared_protein_path"] = str(modified_protein.file_path)
            data["prepared_ligand_path"] = ""

        return data


    def ensure_float_list(self, pocket: Union[str, Pocket, List[float]], box_size: Optional[List[float]] = None) -> Tuple[List[float], List[float]]:
        """Convert pocket input and box size to lists of floats.

        This function processes the pocket input (which can be a string path, Pocket object, or list of coordinates)
        and box size to ensure they are properly formatted as lists of floats.

        Args:
            pocket (Union[str, Pocket, List[float]]): The pocket input, which can be:
                - A string path to a pocket file
                - A Pocket object
                - A list of float coordinates
            box_size (Optional[List[float]], optional): Box dimensions as a list of 3 floats.
                Defaults to [20.0, 20.0, 20.0].

        Returns:
            Tuple[List[float], List[float]]: A tuple containing:
                - Processed pocket coordinates as a list of floats
                - Box size as a list of floats

        Raises:
            ValueError: If the pocket input is not a valid type.
        """
        if box_size is None:
            box_size = [20.0, 20.0, 20.0]
        else:
            box_size = [float(x) for x in box_size]

        if isinstance(pocket, list):
            pocket = [float(x) for x in pocket]
        elif isinstance(pocket, Pocket):
            pocket = pocket.coordinates.mean(axis=1).tolist()
        elif isinstance(pocket, str):
            pocket_obj = Pocket(file_path=pocket)
            pocket = pocket_obj.coordinates.mean(axis=1).tolist()
        else:
            raise ValueError("Invalid pocket type.")

        return pocket, box_size

    def process_docking_response(self, response: dict, protein: Protein, run_folder_path: Path, pocket_data: PocketData, properties: Dict[str, Dict] = None) -> Optional[DockingReport]:
        """
        Process the docking response and generate a docking report.
        This method processes the response from a docking operation, creating necessary output folders
        and files for each SMILES compound, and generates a comprehensive docking report.
        Args:
            response (dict): The docking response dictionary containing results for multiple SMILES compounds.
            protein (Protein): The protein object used in the docking.
            run_folder_path (Path): Path to the folder where docking results will be saved.
            pocket_data (PocketData): Data about the protein pocket used for docking.
            properties (Dict[str, Dict], optional): Additional properties for each SMILES compound.
                Defaults to None.
        Returns:
            Optional[DockingReport]: A DockingReport object containing all docking results and pocket data,
                or None if processing fails.
        Raises:
            Exception: If there's an error processing individual SMILES results. These exceptions
                are caught and logged, allowing the process to continue with remaining compounds.
        Note:
            - Creates a separate folder for each SMILES compound under run_folder_path
            - Saves docking solutions in SDF format
            - Tracks successful vs unsuccessful docking attempts
            - Selects top ligand based on energy when not explicitly determined
        """
        if properties is None:
            properties = {}

        logger = Logger("INFO", os.getenv("LOG_BIOSIM_CLIENT"))
        logger.log_info("Parsing docking results.")
        results = []
        successful_count = 0

        for i, report in enumerate(response):
            try:
                index_str = f"{i + 1}-th SMILES"
                logger.log_info(f"Processing {index_str}.")

                smiles = report.get("SMILES", "")
                status = report.get("status", "")
                out_content = report.get("out_content", "")
                ranking_score = report.get("ranking_score")
                solutions = report.get("solutions", [])
                props = properties.get(smiles, {})

                docking_result = DockingResult(protein, smiles=smiles, successful=(status == "Success"))
                if not docking_result.successful:
                    logger.log_info(f"{index_str} unsuccessful.")
                    results.append(docking_result)
                    continue

                successful_count += 1
                out_folder_path = run_folder_path / f"smiles_{i + 1}"
                out_folder_path.mkdir(parents=True, exist_ok=True)

                logger.log_info(f"Creating SMILES folder at {out_folder_path}.")
                sdf_content = self._process_individual_ligands(
                    props=props,
                    logger=logger,
                    solutions=solutions,
                    pocket_data=pocket_data,
                    out_content=out_content,
                    ranking_score=ranking_score,
                    docking_result=docking_result,
                    out_folder_path=out_folder_path,
                )

                out_file_path = out_folder_path / "candidate_solutions.sdf"
                with out_file_path.open("w") as out_file:
                    out_file.write(sdf_content)
                logger.log_info(f"Complete solution saved at {out_file_path}.")

                if not docking_result.top_ligand and docking_result.ligands:
                    docking_result.top_ligand = self._select_top_ligand_by_energy(docking_result.ligands)

                docking_result.file_path = str(out_file_path)
                results.append(docking_result)
                logger.log_info(f"{index_str} processing completed.")
            except Exception as e:
                msg = f"Docking response processing failed {i + 1}-th SMILES - {str(e)}"
                logger.log_error(msg)
                Logger("INFO", None).log_error(msg)
                continue

        DEFAULT_LOGGER.log_info(f"Docking completed for {successful_count} ligands.")
        logger.log_info("Docking request completed.")
        return DockingReport(results, pocket_data)

    def _process_individual_ligands(self, docking_result: DockingResult, out_folder_path: Path, out_content: str, solutions: list, props: dict, ranking_score: Optional[float], pocket_data: Optional[PocketData], logger: Logger) -> str:
        """
        Process individual ligands from docking results and save them as separate files.
        This method processes each ligand block from the docking output, saves them as separate SDF files,
        adds scoring properties, transforms coordinates if needed, and updates the docking results.
        Args:
            docking_result (DockingResult): Object to store the processed docking results
            out_folder_path (Path): Path to the output folder where ligand files will be saved
            out_content (str): Raw output content containing ligand blocks separated by "$$$$"
            solutions (list): List of docking solutions containing scoring information
            props (dict): Dictionary of properties to be added to each ligand
            ranking_score (Optional[float]): Score used for ranking ligands, if applicable
            pocket_data (Optional[PocketData]): Pocket data for coordinate transformation
            logger (Logger): Logger object for recording processing information
        Returns:
            str: Concatenated string of all processed ligand blocks
        Note:
            - Each ligand is saved as a separate SDF file named "candidate_solution_X.sdf"
            - Ligand properties include Pose Score and Binding Energy
            - If ranking_score is provided, the corresponding ligand is set as top_ligand
            - Coordinates are transformed using pocket_data if available and not from XYZ
        """
        blocks = [block for block in out_content.split("$$$$\n") if block.strip()]
        updated_blocks = []

        for j, block in enumerate(blocks):
            file_path = out_folder_path / f"candidate_solution_{j + 1}.sdf"
            with file_path.open("w") as out_file:
                out_file.write(block)
            logger.log_info(f"{j + 1}-th conformation saved at {file_path}.")

            rscore = self._get_solution_score(solutions, j, "rscore")
            energy_score = self._get_solution_score(solutions, j, "energy_score")

            ligand = Ligand(block_type="sdf", name=f"conformation_{j + 1}", block_content=block, properties=props)
            ligand.set_property("Pose Score", rscore)
            ligand.set_property("Binding Energy", energy_score)
            if ranking_score is not None:
                ligand.set_property("Ranking Score", ranking_score)

            if pocket_data and not pocket_data.from_xyz():
                coords = pocket_data.inverse_transform(ligand.coordinates)
                ligand.update_coordinates(coords)

            ligand.write_to_file(str(file_path))

            # Reload ligand to ensure it's properly saved
            ligand = Ligand(file_path=file_path)
            with open(file_path, "r") as f:
                block_content = f.read()
            updated_blocks.append(block_content)

            docking_result.ligands.append(ligand)
            logger.log_info(f"{j + 1}-th ligand object added.")

            if ranking_score is not None:
                docking_result.top_ligand = ligand
                ranking_score = None

        return "".join(updated_blocks)

    def _get_solution_score(self, solutions: list, index: int, key: str) -> Optional[float]:
        """
        Retrieves and processes a specific score from a solution at the given index.

        Args:
            solutions (list): List of solution dictionaries containing score information.
            index (int): Index of the solution to retrieve the score from.
            key (str): Key identifying the type of score to retrieve from the solution.

        Returns:
            Optional[float]: The score value if found, processed for 'rscore' to be between 0.05 and 0.95.
                            Returns None if index is out of range or score not found.

        Notes:
            For 'rscore' key, the value is clamped between 0.05 and 0.95.
            For other keys, the raw score value is returned as is.
        """
        if index < len(solutions):
            score = solutions[index].get(key)
            if key == "rscore" and score is not None:
                return min(max(score, 0.05), 0.95)
            return score
        return None

    def _select_top_ligand_by_energy(self, ligands: List[Ligand]) -> Optional[Ligand]:
        """
        Selects the ligand with the lowest binding energy from a list of ligands.

        Args:
            ligands (List[Ligand]): List of ligand objects to evaluate.

        Returns:
            Optional[Ligand]: Ligand with the lowest binding energy if found, None if no ligands have binding energy data.

        Notes:
            - Ligands without binding energy data are filtered out before comparison
            - Lower binding energy values indicate better binding affinity
        """
        ligands_with_energy = [l for l in ligands if l.get_property("Binding Energy") is not None]
        if not ligands_with_energy:
            return None
        return min(ligands_with_energy, key=lambda lg: lg.get_property("Binding Energy"))

    def _align_protein_for_docking(self, protein: Protein, pocket_data: PocketData) -> Protein:
        """
        Aligns a protein structure for docking based on pocket data transformation.

        This method creates a deep copy of the input protein and transforms its coordinates
        according to the pocket data's transformation matrix.

        Args:
            protein (Protein): The protein structure to be aligned.
            pocket_data (PocketData): Contains transformation data for protein alignment.

        Returns:
            Protein: A new Protein object with transformed coordinates.

        Note:
            The original protein structure remains unchanged as the method operates on a deep copy.
        """
        protein_copy = deepcopy(protein)
        aligned_protein_coords = pocket_data.transform(protein_copy.structure.coord)
        protein_copy.update_coordinates(aligned_protein_coords)

        return protein_copy

    def dock(self, protein: Union[str, Protein], ligands: List[Union[str, Ligand]], pocket_data: PocketData, protonate_ligands: bool = True, results_dir: str = "docking_results", raise_for_protein_mismatch: bool = True, progress_only: bool = False) -> Optional[Union[DockingReport, Tuple[dict, Dict[str, Dict]]]]:
        """
        Perform molecular docking of ligands to a protein target.
        This method handles the complete docking workflow including protein preparation,
        ligand preprocessing, and docking execution.
        Args:
            protein (Union[str, Protein]): The protein target, either as a file path or Protein object
            ligands (List[Union[str, Ligand]]): List of ligands to dock, either as SMILES strings or Ligand objects
            pocket_data (PocketData): Data describing the binding pocket location and dimensions
            protonate_ligands (bool, optional): Whether to protonate ligands before docking. Defaults to True
            results_dir (str, optional): Directory to store docking results. Defaults to "docking_results"
            raise_for_protein_mismatch (bool, optional): Whether to raise error if pocket protein doesn't match. Defaults to True
            progress_only (bool, optional): If True, returns only progress information. Defaults to False
        Returns:
            Optional[Union[DockingReport, Tuple[dict, Dict[str, Dict]]]]:
                - DockingReport object containing docking results
                - None if docking fails or no valid ligands
                - Tuple of (response, properties) if progress_only=True
        Raises:
            ValueError: If no pocket data is provided
            ProteinMismatchError: If pocket protein doesn't match input protein and raise_for_protein_mismatch=True
        Notes:
            - Automatically protonates ligands if protonate_ligands=True
            - Switches to progress_only mode if more than 5 ligands are provided
            - Handles both file paths and object inputs for proteins and ligands
        """
        logger = Logger("INFO", os.getenv("LOG_BIOSIM_CLIENT"))
        logger.log_info("Executing docking request.")

        if not pocket_data:
            raise ValueError("No pocket data provided.")

        if not isinstance(protein, Protein):
            protein = Protein(file_path=protein)

        if raise_for_protein_mismatch:
            pocket_data.match_protein(protein)

        pocket = pocket_data.box_center
        box_size = pocket_data.box_size
        new_protein = self._align_protein_for_docking(protein, pocket_data) if not pocket_data.from_xyz() else protein

        DEFAULT_LOGGER.log_info("Preprocessing ligands for docking.")

        smiles_list, properties = [], {}

        if protonate_ligands:
            ligands = Ligand.protonate_molecules(ligands)

        for ligand in ligands:
            if isinstance(ligand, str):
                ligand = Ligand(smiles=ligand)

            if not ligand.available_for_docking:
                continue

            smiles = ligand.mol.smiles
            if ligand.protonated_smiles:
                smiles = ligand.protonated_smiles

            smiles_list.append(smiles)
            properties[smiles] = ligand.properties

        if not smiles_list:
            DEFAULT_LOGGER.log_warning("No valid ligands after preprocessing.")
            return None

        DEFAULT_LOGGER.log_info(f"Passing {len(smiles_list)}/{len(ligands)} ligands to docking.")

        progress_only = len(smiles_list) > 5 or progress_only
        response, completed, run_folder_path = self.process_docking_request(
            logger=logger,
            protein=new_protein,
            pocket=pocket,
            smiles_list=smiles_list,
            box_size=box_size,
            results_dir=results_dir,
            wait_until_complete=True,
            progress_only=progress_only,
        )

        if not completed:
            logger.log_error("Docking terminated prematurely.")
            return None

        docking_report = self.process_docking_response(
            protein=protein,
            response=response,
            properties=properties,
            pocket_data=pocket_data,
            run_folder_path=run_folder_path,
        )

        if docking_report is None:
            DEFAULT_LOGGER.log_error("Docking failed.")
        return docking_report

A client class for handling molecular docking operations. This class provides functionality for preparing and executing molecular docking simulations, managing protein-ligand interactions, and processing docking results. It extends the base Client class and integrates with molecular property calculations.

Attributes

props_client : MolecularPropertiesClient
Client for molecular property calculations

Methods

get_protein_data_object: Prepares protein data for docking _protonate_smiles: Protonates SMILES strings in batches update_progress_bar: Updates progress visualization during docking docking_request: Sends docking request to server get_results_dir: Creates/retrieves results directory stop_docking_request: Stops ongoing docking request wait_for_docking_response: Polls server for docking completion process_docking_request: Manages complete docking request lifecycle prepare: Prepares protein and ligand structures ensure_float_list: Validates pocket and box size inputs process_docking_response: Processes docking results _process_individual_ligands: Handles individual ligand processing _get_solution_score: Retrieves specific scores from solutions _select_top_ligand_by_energy: Selects best ligand by binding energy _align_protein_for_docking: Aligns protein structure for docking dock: Main method for executing molecular docking

>>> client = DockingClient()
>>> result = client.dock(
...     protein="protein.pdb",
...     ligands=["CC(=O)O", "C1=CC=CC=C1"],
...     pocket_data=pocket_info
... )
  • Supports both synchronous and asynchronous docking operations
  • Handles multiple input formats for proteins and ligands
  • Provides progress tracking and visualization
  • Implements automatic protein preparation and ligand protonation

Ancestors

Methods

def dock(self,
protein: str | Protein,
ligands: List[str | Ligand],
pocket_data: PocketData,
protonate_ligands: bool = True,
results_dir: str = 'docking_results',
raise_for_protein_mismatch: bool = True,
progress_only: bool = False) ‑> DockingReport | Tuple[dict, Dict[str, Dict]] | None
Expand source code
def dock(self, protein: Union[str, Protein], ligands: List[Union[str, Ligand]], pocket_data: PocketData, protonate_ligands: bool = True, results_dir: str = "docking_results", raise_for_protein_mismatch: bool = True, progress_only: bool = False) -> Optional[Union[DockingReport, Tuple[dict, Dict[str, Dict]]]]:
    """
    Perform molecular docking of ligands to a protein target.
    This method handles the complete docking workflow including protein preparation,
    ligand preprocessing, and docking execution.
    Args:
        protein (Union[str, Protein]): The protein target, either as a file path or Protein object
        ligands (List[Union[str, Ligand]]): List of ligands to dock, either as SMILES strings or Ligand objects
        pocket_data (PocketData): Data describing the binding pocket location and dimensions
        protonate_ligands (bool, optional): Whether to protonate ligands before docking. Defaults to True
        results_dir (str, optional): Directory to store docking results. Defaults to "docking_results"
        raise_for_protein_mismatch (bool, optional): Whether to raise error if pocket protein doesn't match. Defaults to True
        progress_only (bool, optional): If True, returns only progress information. Defaults to False
    Returns:
        Optional[Union[DockingReport, Tuple[dict, Dict[str, Dict]]]]:
            - DockingReport object containing docking results
            - None if docking fails or no valid ligands
            - Tuple of (response, properties) if progress_only=True
    Raises:
        ValueError: If no pocket data is provided
        ProteinMismatchError: If pocket protein doesn't match input protein and raise_for_protein_mismatch=True
    Notes:
        - Automatically protonates ligands if protonate_ligands=True
        - Switches to progress_only mode if more than 5 ligands are provided
        - Handles both file paths and object inputs for proteins and ligands
    """
    logger = Logger("INFO", os.getenv("LOG_BIOSIM_CLIENT"))
    logger.log_info("Executing docking request.")

    if not pocket_data:
        raise ValueError("No pocket data provided.")

    if not isinstance(protein, Protein):
        protein = Protein(file_path=protein)

    if raise_for_protein_mismatch:
        pocket_data.match_protein(protein)

    pocket = pocket_data.box_center
    box_size = pocket_data.box_size
    new_protein = self._align_protein_for_docking(protein, pocket_data) if not pocket_data.from_xyz() else protein

    DEFAULT_LOGGER.log_info("Preprocessing ligands for docking.")

    smiles_list, properties = [], {}

    if protonate_ligands:
        ligands = Ligand.protonate_molecules(ligands)

    for ligand in ligands:
        if isinstance(ligand, str):
            ligand = Ligand(smiles=ligand)

        if not ligand.available_for_docking:
            continue

        smiles = ligand.mol.smiles
        if ligand.protonated_smiles:
            smiles = ligand.protonated_smiles

        smiles_list.append(smiles)
        properties[smiles] = ligand.properties

    if not smiles_list:
        DEFAULT_LOGGER.log_warning("No valid ligands after preprocessing.")
        return None

    DEFAULT_LOGGER.log_info(f"Passing {len(smiles_list)}/{len(ligands)} ligands to docking.")

    progress_only = len(smiles_list) > 5 or progress_only
    response, completed, run_folder_path = self.process_docking_request(
        logger=logger,
        protein=new_protein,
        pocket=pocket,
        smiles_list=smiles_list,
        box_size=box_size,
        results_dir=results_dir,
        wait_until_complete=True,
        progress_only=progress_only,
    )

    if not completed:
        logger.log_error("Docking terminated prematurely.")
        return None

    docking_report = self.process_docking_response(
        protein=protein,
        response=response,
        properties=properties,
        pocket_data=pocket_data,
        run_folder_path=run_folder_path,
    )

    if docking_report is None:
        DEFAULT_LOGGER.log_error("Docking failed.")
    return docking_report

Perform molecular docking of ligands to a protein target. This method handles the complete docking workflow including protein preparation, ligand preprocessing, and docking execution.

Args

protein : Union[str, Protein]
The protein target, either as a file path or Protein object
ligands : List[Union[str, Ligand]]
List of ligands to dock, either as SMILES strings or Ligand objects
pocket_data : PocketData
Data describing the binding pocket location and dimensions
protonate_ligands : bool, optional
Whether to protonate ligands before docking. Defaults to True
results_dir : str, optional
Directory to store docking results. Defaults to "docking_results"
raise_for_protein_mismatch : bool, optional
Whether to raise error if pocket protein doesn't match. Defaults to True
progress_only : bool, optional
If True, returns only progress information. Defaults to False

Returns

Optional[Union[DockingReport, Tuple[dict, Dict[str, Dict]]]]
  • DockingReport object containing docking results
  • None if docking fails or no valid ligands
  • Tuple of (response, properties) if progress_only=True

Raises

ValueError
If no pocket data is provided
ProteinMismatchError
If pocket protein doesn't match input protein and raise_for_protein_mismatch=True

Notes

  • Automatically protonates ligands if protonate_ligands=True
  • Switches to progress_only mode if more than 5 ligands are provided
  • Handles both file paths and object inputs for proteins and ligands
def docking_request(self,
logger: Logger,
protein_data: dict,
smiles_list: List[str]) ‑> Tuple[dict | None, bool]
Expand source code
def docking_request(self, logger: Logger, protein_data: dict, smiles_list: List[str]) -> Tuple[Optional[dict], bool]:
    """
    Send a docking request to the server with protein data and SMILES list.

    This method prepares and sends a docking request to process molecular docking
    between a protein and a list of SMILES molecules.

    Args:
        logger (Logger): Logger object to record execution information
        protein_data (dict): Dictionary containing the protein information and parameters
        smiles_list (List[str]): List of SMILES strings representing molecules to dock

    Returns:
        Tuple[Optional[dict], bool]: A tuple containing:
            - The response data as a dictionary if successful, None if failed
            - A boolean indicating success (True) or failure (False)

    Raises:
        Exception: If there is an error during the request. The exception is caught and
                  logged, returning (None, False)
    """
    logger.log_info("Preparing and sending docking request.")
    body = {"protein_data": protein_data, "smiles_list": smiles_list}

    try:
        response = self.post_request(endpoint="docking", logger=logger, data=body)
        data = response.json()

        return data, True
    except Exception as e:
        console_logger = Logger("INFO", None)
        if "response" in locals() and response.status_code == 503:
            console_logger.log_error(WARNING_NUMBER_OF_AVAILABLE_WORKERS)
        else:
            console_logger.log_warning(WARNING_MESSAGE)
        logger.log_error(f"Unexpected error during docking request: {str(e)}")
        return None, False

Send a docking request to the server with protein data and SMILES list.

This method prepares and sends a docking request to process molecular docking between a protein and a list of SMILES molecules.

Args

logger : Logger
Logger object to record execution information
protein_data : dict
Dictionary containing the protein information and parameters
smiles_list : List[str]
List of SMILES strings representing molecules to dock

Returns

Tuple[Optional[dict], bool]
A tuple containing: - The response data as a dictionary if successful, None if failed - A boolean indicating success (True) or failure (False)

Raises

Exception
If there is an error during the request. The exception is caught and logged, returning (None, False)
def ensure_float_list(self,
pocket: str | Pocket | List[float],
box_size: List[float] | None = None) ‑> Tuple[List[float], List[float]]
Expand source code
def ensure_float_list(self, pocket: Union[str, Pocket, List[float]], box_size: Optional[List[float]] = None) -> Tuple[List[float], List[float]]:
    """Convert pocket input and box size to lists of floats.

    This function processes the pocket input (which can be a string path, Pocket object, or list of coordinates)
    and box size to ensure they are properly formatted as lists of floats.

    Args:
        pocket (Union[str, Pocket, List[float]]): The pocket input, which can be:
            - A string path to a pocket file
            - A Pocket object
            - A list of float coordinates
        box_size (Optional[List[float]], optional): Box dimensions as a list of 3 floats.
            Defaults to [20.0, 20.0, 20.0].

    Returns:
        Tuple[List[float], List[float]]: A tuple containing:
            - Processed pocket coordinates as a list of floats
            - Box size as a list of floats

    Raises:
        ValueError: If the pocket input is not a valid type.
    """
    if box_size is None:
        box_size = [20.0, 20.0, 20.0]
    else:
        box_size = [float(x) for x in box_size]

    if isinstance(pocket, list):
        pocket = [float(x) for x in pocket]
    elif isinstance(pocket, Pocket):
        pocket = pocket.coordinates.mean(axis=1).tolist()
    elif isinstance(pocket, str):
        pocket_obj = Pocket(file_path=pocket)
        pocket = pocket_obj.coordinates.mean(axis=1).tolist()
    else:
        raise ValueError("Invalid pocket type.")

    return pocket, box_size

Convert pocket input and box size to lists of floats.

This function processes the pocket input (which can be a string path, Pocket object, or list of coordinates) and box size to ensure they are properly formatted as lists of floats.

Args

pocket : Union[str, Pocket, List[float]]
The pocket input, which can be: - A string path to a pocket file - A Pocket object - A list of float coordinates
box_size : Optional[List[float]], optional
Box dimensions as a list of 3 floats. Defaults to [20.0, 20.0, 20.0].

Returns

Tuple[List[float], List[float]]
A tuple containing: - Processed pocket coordinates as a list of floats - Box size as a list of floats

Raises

ValueError
If the pocket input is not a valid type.
def get_protein_data_object(self,
protein: Protein,
pocket: str | Pocket | List[float],
box_size: List[float]) ‑> dict
Expand source code
def get_protein_data_object(self, protein: Protein, pocket: Union[str, Pocket, List[float]], box_size: List[float]) -> dict:
    """
    Prepares protein data for docking by creating a dictionary containing protein information and pocket specifications.

    Args:
        protein (Protein): A Protein object containing the protein structure data.
        pocket (Union[str, Pocket, List[float]]): Either a path to pocket file, Pocket object, or list of coordinates
            specifying the pocket center.
        box_size (List[float]): A list of three floats specifying the dimensions of the docking box in angstroms.
            Only required when pocket is specified as coordinates.

    Returns:
        dict: A dictionary containing:
            - 'extension': File extension/type of the protein structure
            - 'content': Content of the protein structure file
            - 'pocket_center': List of coordinates (if pocket specified as coordinates)
            - 'box_size': Box dimensions (if pocket specified as coordinates)
            - 'pocket_content': Content of pocket file (if pocket specified as file or Pocket object)

    Raises:
        None

    Note:
        The protein structure is temporarily written to a file to extract its content.
        The pocket can be specified either as coordinates with box size or as a file/object containing pocket information.
    """
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdb") as temp_file:
        protein.write_to_file(temp_file.name)
        with open(temp_file.name, "r") as f:
            block_content = f.read()

    obj = {"extension": protein.block_type, "content": block_content}

    if isinstance(pocket, list):
        obj["pocket_center"] = pocket
        obj["box_size"] = box_size
    else:
        # Pocket from file or Pocket object.
        pocket_content = pocket.block_content if isinstance(pocket, Pocket) else Path(pocket).read_text()
        obj["pocket_content"] = pocket_content

    return obj

Prepares protein data for docking by creating a dictionary containing protein information and pocket specifications.

Args

protein : Protein
A Protein object containing the protein structure data.
pocket : Union[str, Pocket, List[float]]
Either a path to pocket file, Pocket object, or list of coordinates specifying the pocket center.
box_size : List[float]
A list of three floats specifying the dimensions of the docking box in angstroms. Only required when pocket is specified as coordinates.

Returns

dict
A dictionary containing: - 'extension': File extension/type of the protein structure - 'content': Content of the protein structure file - 'pocket_center': List of coordinates (if pocket specified as coordinates) - 'box_size': Box dimensions (if pocket specified as coordinates) - 'pocket_content': Content of pocket file (if pocket specified as file or Pocket object)

Raises

None

Note

The protein structure is temporarily written to a file to extract its content. The pocket can be specified either as coordinates with box size or as a file/object containing pocket information.

def get_results_dir(self, result_dir_name: str) ‑> Tuple[pathlib.Path, bool]
Expand source code
def get_results_dir(self, result_dir_name: str) -> Tuple[Path, bool]:
    """
    Creates or retrieves a results directory under the user's home directory.

    Args:
        result_dir_name (str): Name of the results directory to create/retrieve

    Returns:
        Tuple[Path, bool]: A tuple containing:
            - Path: Path object pointing to the results directory
            - bool: True if directory was newly created, False if it already existed
    """
    result_path = Path(os.getenv("END_USER_HOME", "/home/bench-user"))
    results_dir = result_path / result_dir_name

    if not results_dir.is_dir():
        results_dir.mkdir(parents=True, exist_ok=True)
        return results_dir, True
    return results_dir, False

Creates or retrieves a results directory under the user's home directory.

Args

result_dir_name : str
Name of the results directory to create/retrieve

Returns

Tuple[Path, bool]
A tuple containing: - Path: Path object pointing to the results directory - bool: True if directory was newly created, False if it already existed
def prepare(self,
pdb_id: str = '',
pdb_file_path: str = '',
protein: Protein = None,
chains: List[str] | str | None = None,
ligand: Ligand | None = None,
ligand_res_name: str | None = None,
remove_metals: List[str] | None = None,
keep_resnames: List[str] | None = None,
model_loops: bool = False) ‑> dict
Expand source code
def prepare(self, pdb_id: str = "", pdb_file_path: str = "", protein: Protein = None, chains: Optional[Union[List[str], str]] = None, ligand: Optional[Ligand] = None, ligand_res_name: Optional[str] = None, remove_metals: Optional[List[str]] = None, keep_resnames: Optional[List[str]] = None, model_loops: bool = False) -> dict:
    """Prepares protein and ligand structures for molecular docking.
    This method processes protein and ligand structures, handling various input formats and performing
    necessary preparations for docking simulations.
    Args:
        pdb_id (str, optional): PDB ID of the protein structure. Defaults to "".
        pdb_file_path (str, optional): Path to PDB file. Defaults to "".
        protein (Protein, optional): Protein object. Defaults to None.
        chains (Union[List[str], str], optional): Chain ID(s) to select from protein. Defaults to None.
        ligand (Ligand, optional): Ligand object. Defaults to None.
        ligand_res_name (str, optional): Residue name of ligand to extract. Defaults to None.
        remove_metals (List[str], optional): List of metal atoms to remove. Defaults to None.
        keep_resnames (List[str], optional): List of residue names to keep. Defaults to None.
        model_loops (bool, optional): Whether to model missing loops. Defaults to False.
    Returns:
        dict: Dictionary containing:
            - ligand_res_name (str): Name of the ligand residue
            - raw_protein_path (str): Path to original protein structure
            - prepared_protein_path (str): Path to prepared protein structure
            - prepared_ligand_path (str): Path to prepared ligand structure
    Raises:
        ValueError: If no protein source is provided (protein, pdb_file_path, or pdb_id)
        ValueError: If neither ligand nor ligand_res_name is provided
        ValueError: If multiple copies of specified ligand are found
    Notes:
        - Priority for protein source: protein object > pdb_file_path > pdb_id
        - Ligands with fewer than 5 heavy atoms are rejected
        - Loop modeling requires valid PDB ID
    """
    data = {"ligand_res_name": ligand_res_name}

    if protein is not None:
        modified_protein = protein
        if pdb_id:
            modified_protein.pdb_id = pdb_id
        if pdb_file_path:
            DEFAULT_LOGGER.log_warning(
                "Both protein object and pdb_file_path are provided. Using provided protein object."
            )
    elif pdb_file_path:
        modified_protein = Protein(file_path=pdb_file_path)
        if pdb_id:
            modified_protein.pdb_id = pdb_id
            DEFAULT_LOGGER.log_info("pdb_id provided alongside pdb_file_path. Using file path as primary source.")
    else:
        if not pdb_id:
            raise ValueError("You must provide at least one of: protein, pdb_file_path, or pdb_id.")
        modified_protein = Protein(pdb_id=pdb_id)

    if not pdb_id:
        pdb_id = modified_protein.pdb_id

    data["raw_protein_path"] = modified_protein.file_path

    if chains:
        chains = [chains] if isinstance(chains, str) else chains
        existing_chains = modified_protein.list_chain_names()
        invalid_chains = [c for c in chains if c not in existing_chains]
        if invalid_chains:
            modified_protein = modified_protein.remove_hetatm(
                keep_resnames=keep_resnames, remove_metals=remove_metals
            )
            data["prepared_protein_path"] = modified_protein.file_path
            data["prepared_ligand_path"] = ""
            DEFAULT_LOGGER.log_warning(
                f"Invalid chains {invalid_chains}. Available: {existing_chains}."
            )
            return data
        modified_protein = modified_protein.select_chains(chain_ids=chains)

    hetero_names = modified_protein.list_hetero_names()
    if not ligand and not ligand_res_name:
        raise ValueError(
            f"Must provide ligand or ligand_res_name. Available ligands: {hetero_names}"
        )

    if not ligand:
        if ligand_res_name not in hetero_names:
            modified_protein = modified_protein.remove_hetatm(
                keep_resnames=keep_resnames, remove_metals=remove_metals
            )
            data["prepared_protein_path"] = modified_protein.file_path
            data["prepared_ligand_path"] = ""
            DEFAULT_LOGGER.log_error(
                f"Ligand {ligand_res_name} not found. Available: {hetero_names}."
            )
            return data

        ligands = modified_protein.select_ligand(ligand_res_name)
        if len(ligands) > 1:
            ligands = modified_protein.select_ligand(ligand_res_name)
            if len(ligands) > 1:
                raise ValueError(
                    f"Multiple ligands {ligand_res_name} found. Extract manually."
                )
        ligand = ligands[0]

    modified_protein = modified_protein.remove_hetatm(
        keep_resnames=keep_resnames, remove_metals=remove_metals
    )

    if ligand and ligand.mol.m.GetNumHeavyAtoms() < 5:
        data["prepared_protein_path"] = modified_protein.file_path
        data["prepared_ligand_path"] = ""
        DEFAULT_LOGGER.log_warning("Ligand too small (<5 heavy atoms).")
        return data

    ligand_res_name = ligand_res_name if ligand_res_name else ligand.name
    temp_dir = modified_protein.file_path.parent / f"prepared_for_{ligand_res_name}"
    temp_dir.mkdir(parents=True, exist_ok=True)

    protein_output_file = temp_dir / f"{modified_protein.name}_aligned.pdb"
    ligand_output_file = temp_dir / f"{ligand_res_name}_aligned.pdb" if ligand else None

    try:
        DEFAULT_LOGGER.log_info("Preparing protein structure.")
        if model_loops and not pdb_id:
            DEFAULT_LOGGER.log_error("Cannot model loops without a PDB ID.")
            model_loops = False

        modified_protein = modified_protein.prepare(model_loops=model_loops, pdb_id=pdb_id)
        modified_protein.write_to_file(protein_output_file)
    except Exception as e:
        DEFAULT_LOGGER.log_error(f"Failed to prepare protein: {str(e)}")
        return data

    if ligand:
        ligand.write_to_file(ligand_output_file)
        data["prepared_protein_path"] = str(protein_output_file)
        data["prepared_ligand_path"] = str(ligand_output_file)
    else:
        data["prepared_protein_path"] = str(modified_protein.file_path)
        data["prepared_ligand_path"] = ""

    return data

Prepares protein and ligand structures for molecular docking. This method processes protein and ligand structures, handling various input formats and performing necessary preparations for docking simulations.

Args

pdb_id : str, optional
PDB ID of the protein structure. Defaults to "".
pdb_file_path : str, optional
Path to PDB file. Defaults to "".
protein : Protein, optional
Protein object. Defaults to None.
chains : Union[List[str], str], optional
Chain ID(s) to select from protein. Defaults to None.
ligand : Ligand, optional
Ligand object. Defaults to None.
ligand_res_name : str, optional
Residue name of ligand to extract. Defaults to None.
remove_metals : List[str], optional
List of metal atoms to remove. Defaults to None.
keep_resnames : List[str], optional
List of residue names to keep. Defaults to None.
model_loops : bool, optional
Whether to model missing loops. Defaults to False.

Returns

dict
Dictionary containing: - ligand_res_name (str): Name of the ligand residue - raw_protein_path (str): Path to original protein structure - prepared_protein_path (str): Path to prepared protein structure - prepared_ligand_path (str): Path to prepared ligand structure

Raises

ValueError
If no protein source is provided (protein, pdb_file_path, or pdb_id)
ValueError
If neither ligand nor ligand_res_name is provided
ValueError
If multiple copies of specified ligand are found

Notes

  • Priority for protein source: protein object > pdb_file_path > pdb_id
  • Ligands with fewer than 5 heavy atoms are rejected
  • Loop modeling requires valid PDB ID
def process_docking_request(self,
logger: Logger,
protein: Protein,
pocket: str | Pocket | List[float],
smiles_list: List[str],
box_size: List[float],
results_dir: str,
wait_until_complete: bool = True,
progress_only: bool = False) ‑> Tuple[dict | None, bool, pathlib.Path | None]
Expand source code
def process_docking_request(self, logger: Logger, protein: Protein, pocket: Union[str, Pocket, List[float]], smiles_list: List[str], box_size: List[float], results_dir: str, wait_until_complete: bool = True, progress_only: bool = False) -> Tuple[Optional[dict], bool, Optional[Path]]:
    """
    Process a molecular docking request and manage its execution.

    This method handles the full lifecycle of a docking request, including setting up directories,
    sending the request, and optionally waiting for results.

    Args:
        logger (Logger): Logger instance for tracking the docking process
        protein (Protein): Protein object containing the target structure information
        pocket (Union[str, Pocket, List[float]]): Binding pocket information, either as a string identifier,
            Pocket object, or list of coordinates [x, y, z]
        smiles_list (List[str]): List of SMILES strings representing the ligands to dock
        box_size (List[float]): Dimensions of the docking box [x, y, z]
        results_dir (str): Directory path where docking results will be stored
        wait_until_complete (bool, optional): If True, wait for docking completion. Defaults to True
        progress_only (bool, optional): If True, show only progress without visualization. Defaults to False

    Returns:
        Tuple[Optional[dict], bool, Optional[Path]]: A tuple containing:
            - Response data dictionary (or None if failed)
            - Success status boolean
            - Path to results folder (or None if failed)

    Raises:
        None explicitly, but may raise exceptions from underlying operations

    Example:
        response, success, path = processor.process_docking_request(
            logger,
            protein,
            pocket_coords,
            ["C1=CC=CC=C1", "CC(=O)O"],
            [20, 20, 20],
            "results"
    """
    logger.log_info("Starting docking process.")
    results_path, is_new = self.get_results_dir(results_dir)
    items_count = len(list(results_path.iterdir())) if not is_new else 0

    protein_data = self.get_protein_data_object(protein, pocket, box_size)

    request_id, started = self.docking_request(logger, protein_data, smiles_list)
    if not started:
        logger.log_error("Failed to initiate docking request.")
        return None, False, None

    if not request_id:
        logger.log_error("Missing request_id in docking response.")
        return None, False, None

    folder_name = f"{protein.name}_run_{items_count + 1}_{datetime.now().strftime('%Y%m%d%H%M%S')}"
    folder_path = results_path / folder_name
    folder_path.mkdir(parents=True, exist_ok=True)

    info = {
        "request_id": request_id,
        "run_folder_path": str(folder_path),
        "protein_file_path": str(protein.file_path),
    }

    DEFAULT_LOGGER.log_warning(f"Docking request initiated. Request Info: {info}")

    if wait_until_complete:
        progress_view = ProgressView(
            desc="",
            progress_only=progress_only,
            docking_viewer=DockingViewer(
                protein_content=protein_data["content"],
                protein_format=protein_data["extension"],
                pocket_center=pocket if isinstance(pocket, list) else None,
                dim=20,
            ),
        )

        response, completed = self.wait_for_docking_response(logger, request_id, progress_view)
        if not completed:
            logger.log_error("Docking failed during waiting.")
            return None, False, None
        return response, True, folder_path
    else:
        logger.log_info(
            f"Docking request submitted. Use request ID {request_id} to check status/results at {folder_path}."
        )
        return info, True, folder_path

Process a molecular docking request and manage its execution.

This method handles the full lifecycle of a docking request, including setting up directories, sending the request, and optionally waiting for results.

Args

logger : Logger
Logger instance for tracking the docking process
protein : Protein
Protein object containing the target structure information
pocket : Union[str, Pocket, List[float]]
Binding pocket information, either as a string identifier, Pocket object, or list of coordinates [x, y, z]
smiles_list : List[str]
List of SMILES strings representing the ligands to dock
box_size : List[float]
Dimensions of the docking box [x, y, z]
results_dir : str
Directory path where docking results will be stored
wait_until_complete : bool, optional
If True, wait for docking completion. Defaults to True
progress_only : bool, optional
If True, show only progress without visualization. Defaults to False

Returns

Tuple[Optional[dict], bool, Optional[Path]]
A tuple containing: - Response data dictionary (or None if failed) - Success status boolean - Path to results folder (or None if failed)

Raises

None explicitly, but may raise exceptions from underlying operations

Example

response, success, path = processor.process_docking_request( logger, protein, pocket_coords, ["C1=CC=CC=C1", "CC(=O)O"], [20, 20, 20], "results"

def process_docking_response(self,
response: dict,
protein: Protein,
run_folder_path: pathlib.Path,
pocket_data: PocketData,
properties: Dict[str, Dict] = None) ‑> DockingReport | None
Expand source code
def process_docking_response(self, response: dict, protein: Protein, run_folder_path: Path, pocket_data: PocketData, properties: Dict[str, Dict] = None) -> Optional[DockingReport]:
    """
    Process the docking response and generate a docking report.
    This method processes the response from a docking operation, creating necessary output folders
    and files for each SMILES compound, and generates a comprehensive docking report.
    Args:
        response (dict): The docking response dictionary containing results for multiple SMILES compounds.
        protein (Protein): The protein object used in the docking.
        run_folder_path (Path): Path to the folder where docking results will be saved.
        pocket_data (PocketData): Data about the protein pocket used for docking.
        properties (Dict[str, Dict], optional): Additional properties for each SMILES compound.
            Defaults to None.
    Returns:
        Optional[DockingReport]: A DockingReport object containing all docking results and pocket data,
            or None if processing fails.
    Raises:
        Exception: If there's an error processing individual SMILES results. These exceptions
            are caught and logged, allowing the process to continue with remaining compounds.
    Note:
        - Creates a separate folder for each SMILES compound under run_folder_path
        - Saves docking solutions in SDF format
        - Tracks successful vs unsuccessful docking attempts
        - Selects top ligand based on energy when not explicitly determined
    """
    if properties is None:
        properties = {}

    logger = Logger("INFO", os.getenv("LOG_BIOSIM_CLIENT"))
    logger.log_info("Parsing docking results.")
    results = []
    successful_count = 0

    for i, report in enumerate(response):
        try:
            index_str = f"{i + 1}-th SMILES"
            logger.log_info(f"Processing {index_str}.")

            smiles = report.get("SMILES", "")
            status = report.get("status", "")
            out_content = report.get("out_content", "")
            ranking_score = report.get("ranking_score")
            solutions = report.get("solutions", [])
            props = properties.get(smiles, {})

            docking_result = DockingResult(protein, smiles=smiles, successful=(status == "Success"))
            if not docking_result.successful:
                logger.log_info(f"{index_str} unsuccessful.")
                results.append(docking_result)
                continue

            successful_count += 1
            out_folder_path = run_folder_path / f"smiles_{i + 1}"
            out_folder_path.mkdir(parents=True, exist_ok=True)

            logger.log_info(f"Creating SMILES folder at {out_folder_path}.")
            sdf_content = self._process_individual_ligands(
                props=props,
                logger=logger,
                solutions=solutions,
                pocket_data=pocket_data,
                out_content=out_content,
                ranking_score=ranking_score,
                docking_result=docking_result,
                out_folder_path=out_folder_path,
            )

            out_file_path = out_folder_path / "candidate_solutions.sdf"
            with out_file_path.open("w") as out_file:
                out_file.write(sdf_content)
            logger.log_info(f"Complete solution saved at {out_file_path}.")

            if not docking_result.top_ligand and docking_result.ligands:
                docking_result.top_ligand = self._select_top_ligand_by_energy(docking_result.ligands)

            docking_result.file_path = str(out_file_path)
            results.append(docking_result)
            logger.log_info(f"{index_str} processing completed.")
        except Exception as e:
            msg = f"Docking response processing failed {i + 1}-th SMILES - {str(e)}"
            logger.log_error(msg)
            Logger("INFO", None).log_error(msg)
            continue

    DEFAULT_LOGGER.log_info(f"Docking completed for {successful_count} ligands.")
    logger.log_info("Docking request completed.")
    return DockingReport(results, pocket_data)

Process the docking response and generate a docking report. This method processes the response from a docking operation, creating necessary output folders and files for each SMILES compound, and generates a comprehensive docking report.

Args

response : dict
The docking response dictionary containing results for multiple SMILES compounds.
protein : Protein
The protein object used in the docking.
run_folder_path : Path
Path to the folder where docking results will be saved.
pocket_data : PocketData
Data about the protein pocket used for docking.
properties : Dict[str, Dict], optional
Additional properties for each SMILES compound. Defaults to None.

Returns

Optional[DockingReport]
A DockingReport object containing all docking results and pocket data, or None if processing fails.

Raises

Exception
If there's an error processing individual SMILES results. These exceptions are caught and logged, allowing the process to continue with remaining compounds.

Note

  • Creates a separate folder for each SMILES compound under run_folder_path
  • Saves docking solutions in SDF format
  • Tracks successful vs unsuccessful docking attempts
  • Selects top ligand based on energy when not explicitly determined
def stop_docking_request(self, request_info: dict) ‑> None
Expand source code
def stop_docking_request(self, request_info: dict) -> None:
    """
    Stops a docking request with the specified request ID.

    This method sends a POST request to stop an ongoing docking request. It requires
    the request ID to identify which docking request to stop.

    Args:
        request_info (dict): A dictionary containing the request information.
                            Must contain 'request_id' key.

    Returns:
        None

    Raises:
        No explicit exceptions are raised, but logs error messages if:
        - request_id is missing from request_info
        - The stop request fails (non-200 response)

    Example:
        >>> docking.stop_docking_request({"request_id": "123"})
    """
    request_id = request_info.get("request_id", "")
    if not request_id:
        DEFAULT_LOGGER.log_error("Missing 'request_id' in request_info.")
        return

    DEFAULT_LOGGER.log_info("Stopping docking request.")
    response = self.post_request(endpoint=f"docking/{request_id}/stop", logger=DEFAULT_LOGGER, data={})
    if response.status_code == 200:
        data = response.json()
        DEFAULT_LOGGER.log_info(data.get("msg", "Docking request stopped."))
    else:
        data = response.json() if response.text else {}
        msg = data.get("msg", "Failed to stop docking request.")
        DEFAULT_LOGGER.log_error(msg)

Stops a docking request with the specified request ID.

This method sends a POST request to stop an ongoing docking request. It requires the request ID to identify which docking request to stop.

Args

request_info : dict
A dictionary containing the request information. Must contain 'request_id' key.

Returns

None

Raises

No explicit exceptions are raised, but logs error messages if: - request_id is missing from request_info - The stop request fails (non-200 response)

Example

>>> docking.stop_docking_request({"request_id": "123"})
def update_progress_bar(self,
logger: Logger,
progress: ProgressView,
body: dict) ‑> None
Expand source code
def update_progress_bar(self, logger: Logger, progress: ProgressView, body: dict) -> None:
    """Updates the progress bar for docking operations based on received status information.

    Args:
        logger (Logger): Logger object for recording progress updates.
        progress (ProgressView): Progress view object to be updated.
        body (dict): Dictionary containing progress information with keys:
            - description (str): Description of current docking status
            - percentage (int): Current progress percentage (0-100)
            - pose (str, optional): Ligand content/pose information

    Returns:
        None

    Notes:
        - Completes and displays final progress when description is "Completed"
        - Only updates description if changed from current progress description
        - Logs info message when percentage increases
        - Updates and displays progress bar with new information
    """
    description = body.get("description", "")
    percentage = int(body.get("percentage", 0))
    ligand_content = body.get("pose")

    if description == "Completed":
        progress.update(new_desc=description, new_percentage=percentage)
        progress.display()
        return

    # Update progress only if there's a change
    new_description = None if description == progress.description else description

    if percentage > progress.bar.n:
        logger.log_info("Docking progress updated.")

    progress.update(new_desc=new_description, new_percentage=percentage, new_ligand_content=ligand_content)
    progress.display()

Updates the progress bar for docking operations based on received status information.

Args

logger : Logger
Logger object for recording progress updates.
progress : ProgressView
Progress view object to be updated.
body : dict
Dictionary containing progress information with keys: - description (str): Description of current docking status - percentage (int): Current progress percentage (0-100) - pose (str, optional): Ligand content/pose information

Returns

None

Notes

  • Completes and displays final progress when description is "Completed"
  • Only updates description if changed from current progress description
  • Logs info message when percentage increases
  • Updates and displays progress bar with new information
def wait_for_docking_response(self,
logger: Logger,
request_id: str,
progress_view: ProgressView,
interval: float = 0.5) ‑> Tuple[dict | None, bool]
Expand source code
def wait_for_docking_response(self, logger: Logger, request_id: str, progress_view: ProgressView, interval: float = 0.5) -> Tuple[Optional[dict], bool]:
    """
    Waits for a docking request to complete by polling the server at regular intervals.

    This method continuously checks the status of a docking request until it either completes
    successfully or fails. It updates a progress bar during the waiting period.

    Args:
        logger (Logger): Logger instance for recording the process.
        request_id (str): The unique identifier for the docking request.
        progress_view (ProgressView): Progress view instance for updating the progress bar.
        interval (float, optional): Time interval between polling requests in seconds. Defaults to 0.5.

    Returns:
        Tuple[Optional[dict], bool]: A tuple containing:
            - The response data as a dictionary if successful, None if failed
            - A boolean indicating success (True) or failure (False)

    Note:
        The method handles different HTTP status codes:
        - 200: Success, docking completed
        - 202: In progress, updates progress bar
        - 429: Rate limit exceeded
        - Other: Unexpected error
    """
    console_logger = Logger("INFO", None)
    logger.log_info("Waiting for docking to complete.")

    while True:
        time.sleep(interval)
        response = self.get_request(endpoint=f"docking/{request_id}", logger=logger)
        if response is None:
            logger.log_error("No response while waiting.")
            return None, False

        if response.status_code == 200:
            data = response.json()
            logger.log_info("Docking request successfully completed.")
            self.update_progress_bar(logger, progress_view, {"description": "Completed", "percentage": 100})
            return data, True
        elif response.status_code == 202:
            body = response.json()
            self.update_progress_bar(logger, progress_view, body)
        elif response.status_code == 429:
            console_logger.log_warning(WARNING_MESSAGE)
            logger.log_error(f"Failed with status_code = {response.status_code}, message: {response.text}")
            return None, False
        else:
            console_logger.log_warning(WARNING_MESSAGE)
            logger.log_error("Unexpected docking request status code.")
            return None, False

Waits for a docking request to complete by polling the server at regular intervals.

This method continuously checks the status of a docking request until it either completes successfully or fails. It updates a progress bar during the waiting period.

Args

logger : Logger
Logger instance for recording the process.
request_id : str
The unique identifier for the docking request.
progress_view : ProgressView
Progress view instance for updating the progress bar.
interval : float, optional
Time interval between polling requests in seconds. Defaults to 0.5.

Returns

Tuple[Optional[dict], bool]
A tuple containing: - The response data as a dictionary if successful, None if failed - A boolean indicating success (True) or failure (False)

Note

The method handles different HTTP status codes: - 200: Success, docking completed - 202: In progress, updates progress bar - 429: Rate limit exceeded - Other: Unexpected error

Inherited members