In [None]:
import boto3
import sagemaker
client = boto3.client("sagemaker")
sess=sagemaker.Session()
default_bucket= sess.default_bucket()

In [None]:
#User inputs for pipeline run 
fasta_file = 'T1030.fasta' #Default pipeline execution name will drop .fasta
!mkdir ./sequence_input/
!curl 'https://www.predictioncenter.org/casp14/target.cgi?target=T1030&view=sequence' > ./sequence_input/T1030.fasta 

In [None]:
#Upload to S3
AminoAcidSeq = open('./sequence_input/T1030.fasta', 'rb')
s3 = boto3.resource('s3')
fastaKey = f'sequence_input/{fasta_file}'
response = s3.Bucket(default_bucket).put_object(Key=fastaKey, Body=AminoAcidSeq)
fasta_s3_uri = f's3://{default_bucket}/{fastaKey}'
experiment_name = fasta_file[:fasta_file.find(".")] 

In [None]:
response = client.start_pipeline_execution(
    PipelineName='ProteinFoldWorkflow',
    PipelineExecutionDisplayName= experiment_name,
    PipelineExecutionDescription=f'This pipeline was executed via boto3 and is running an inference for {experiment_name}',
    PipelineParameters=[
        {
            'Name':'FastaFileName',
            'Value':fasta_file,
        },
        {
            'Name':'FastaInputS3URI',
            'Value':fasta_s3_uri,
        },
        {
            'Name':'db_preset',
            'Value':'full_dbs',
        },    
        {
            'Name':'MaxTemplateDate',
            'Value':'2020-05-1',
        },  
        {
            'Name':'ModelPreset',
            'Value':'monomer',
        },  
        {
            'Name':'NumMultimerPredictionsPerModel',
            'Value':'5',
        },  
        {
            'Name':'InferenceInstanceType',
            'Value':'ml.g5.4xlarge',
        },   
                {
            'Name':'MSAInstanceType',
            'Value':'ml.m5.4xlarge',
        },   
    ],
    ParallelismConfiguration={
        'MaxParallelExecutionSteps': 2
    }
)
print(response)