import re
import os
from openfold.data import data_pipeline
import argparse
#from openfold.utils.tensor_utils import (
#    tensor_tree_map,
#)

def precompute_alignments(alignment_dir,no_cpus,tmp_fasta_path):
    with open(tmp_fasta_path, "r") as fp:
        data = fp.read()
    lines = [l.replace('\n', '')for prot in data.split('>') for l in prot.strip().split('\n', 1)][1:]
    tags, seqs = lines[::2], lines[1::2]
    tag = tags[0]

    local_alignment_dir = os.path.join(alignment_dir, tag)
    if not os.path.exists(local_alignment_dir):
        os.makedirs(local_alignment_dir, exist_ok=True)

        alignment_runner = data_pipeline.AlignmentRunner(
                jackhmmer_binary_path='/usr/bin/jackhmmer',
                hhblits_binary_path='/usr/bin/hhblits',
                hhsearch_binary_path='/usr/bin/hhsearch',
                uniref90_database_path='/fsx-shared/openfold/data/uniref90/uniref90.fasta',
                mgnify_database_path='/fsx-shared/openfold/data/mgnify/mgy_clusters_2018_12.fa',
                bfd_database_path='/fsx-shared/openfold/data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt',
                uniclust30_database_path='/fsx-shared/openfold/data/uniclust30/uniclust30_2018_08/uniclust30_2018_08',
                pdb70_database_path='/fsx-shared/openfold/data/pdb70/pdb70',
                no_cpus=no_cpus
            )
        alignment_runner.run(tmp_fasta_path, local_alignment_dir)

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--cpus", type=int, default=4,
        help="""Number of CPUs with which to run alignment tools"""
    )

    parser.add_argument(
        "--one_file_path", type=str,
        help="""Path to one fast file"""
    )
    
    args = parser.parse_args()
    alignment_dir = '/fsx-shared/openfold/cameo'
    precompute_alignments(alignment_dir,no_cpus = args.cpus, tmp_fasta_path = args.one_file_path)