# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

from Bio.SeqIO.FastaIO import FastaIterator
import os


def list_files_in_dir(dir, extension=".txt"):
    paths = []
    for filename in os.listdir(dir):
        full_path = os.path.abspath(os.path.join(dir, filename))
        if filename.endswith(extension):
            paths.append(full_path)
    paths.sort()
    return paths


def extract_seqs_from_dir(dir, extension=".fa"):
    file_list = list_files_in_dir(dir, extension)
    sequences = []
    for file in file_list:
        with open(file, "r") as f:
            sequences.extend([str(record.seq) for record in FastaIterator(f)])
    return sequences