# Original Copyright 2021 DeepMind Technologies Limited
# Modifications Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

"""Script to identify templates for AlphaFold"""
import os
import shutil
import time
from time import strftime, gmtime
from resource import getrusage, RUSAGE_SELF
import json
from absl import app
from absl import flags
from absl import logging

from alphafold.data.tools import hhsearch
from alphafold.data.tools import hmmsearch
from new_pipelines import TemplateSearchPipeline

logging.set_verbosity(logging.INFO)

flags.DEFINE_string("msa_path", None, "Path to a .sto file containing MSA hits.")
flags.DEFINE_string(
    "output_dir", None, "Path to a directory that will store the results."
)
flags.DEFINE_string(
    "hhsearch_binary_path", shutil.which("hhsearch"), "Path to the HHsearch executable."
)
flags.DEFINE_string(
    "hmmsearch_binary_path",
    shutil.which("hmmsearch"),
    "Path to the hmmsearch executable.",
)
flags.DEFINE_string(
    "hmmbuild_binary_path", shutil.which("hmmbuild"), "Path to the hmmbuild executable."
)
flags.DEFINE_string(
    "database_path", "/database", "Path to the PDB70 and pdb_seqres databases."
)
flags.DEFINE_enum(
    "model_preset",
    "monomer",
    ["monomer", "monomer_casp14", "monomer_ptm", "multimer"],
    "Choose preset model configuration - the monomer model, "
    "the monomer model with extra ensembling, monomer model with "
    "pTM head, or multimer model",
)
flags.DEFINE_integer("cpu", 4, "Number of cpus to use for search")

FLAGS = flags.FLAGS


def search_templates(
    msa_path: str, output_dir: str, data_pipeline: TemplateSearchPipeline
) -> None:
    """Search for templates"""
    metrics = {
        "process": "Template Search",
        "start_time": strftime("%d %b %Y %H:%M:%S +0000", gmtime()),
        "timings": {},
    }
    logging.info("Searching for templates")
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Search for templates
    t_0 = time.time()
    pdb_hits_out_path = data_pipeline.process(msa_path=msa_path, output_dir=output_dir)
    logging.info(f"Template hits written to {pdb_hits_out_path}")
    process_time = time.time() - t_0
    metrics["timings"].update({"total": round(process_time, 3)})

    logging.info(f"Template search completed in {process_time} seconds.")
    metrics.update(
        {
            "model_preset": FLAGS.model_preset,
            "cpu": FLAGS.cpu,
            "end_time": strftime("%d %b %Y %H:%M:%S +0000", gmtime()),
            # "peak_reserved_memory_gb": round(getrusage(RUSAGE_SELF).ru_maxrss / 1000000, 3),
        }
    )
    metrics_output_path = os.path.join(FLAGS.output_dir, "metrics.json")
    with open(metrics_output_path, "w") as f:
        f.write(json.dumps(metrics))

def main(argv):

    for tool_name in ("hhsearch", "hmmsearch", "hmmbuild"):
        if not FLAGS[f"{tool_name}_binary_path"].value:
            raise ValueError(
                f'Could not find path to the "{tool_name}" binary. Make '
                "sure it is installed on your system."
            )

    run_multimer_system = "multimer" in FLAGS.model_preset

    if run_multimer_system:
        data_pipeline = TemplateSearchPipeline(
            template_searcher=hmmsearch.Hmmsearch(
                binary_path=FLAGS.hmmsearch_binary_path,
                hmmbuild_binary_path=FLAGS.hmmbuild_binary_path,
                database_path=FLAGS.database_path + "/pdb_seqres.txt",
            ),
            cpu=FLAGS.cpu,
        )
    else:
        data_pipeline = TemplateSearchPipeline(
            template_searcher=hhsearch.HHSearch(
                binary_path=FLAGS.hhsearch_binary_path,
                databases=[FLAGS.database_path + "/pdb70"],
            ),
            cpu=FLAGS.cpu,
        )

    search_templates(
        msa_path=FLAGS.msa_path,
        output_dir=FLAGS.output_dir,
        data_pipeline=data_pipeline,
    )


if __name__ == "__main__":
    flags.mark_flags_as_required(["msa_path", "output_dir"])

    app.run(main)