a @d @svddlZddlZddlmZddlmZddlZddlZddlZddlZddl Z ddl Z ddl Z ddl Z ddl Z ddlZddlmZddlmZddlZddlZddlZddlZddlZddlZddlmZmZddlmZdd idd idd id d id d idd idd idd idd idd id ZGd d d eZGdddeZ gdZ!dZ"ddZ#GdddZ$Gdddej%Z&dS)N)yaspin)Spinners)MultiDataModel)Model)ListOptional) PredictorZTS_DEFAULT_WORKERS_PER_MODEL1ZMMS_DEFAULT_WORKERS_PER_MODEL) z!huggingface-text2text-flan-t5-xxlz&huggingface-text2text-flan-t5-xxl-fp16z*huggingface-text2text-flan-t5-xxl-bnb-int8z huggingface-text2text-flan-t5-xlz#huggingface-text2text-flan-t5-largez#huggingface-text2text-flan-ul2-bf16z%huggingface-text2text-bigscience-t0ppz*huggingface-text2text-bigscience-t0pp-fp16z.huggingface-text2text-bigscience-t0pp-bnb-int8z8huggingface-textgeneration2-gpt-neoxt-chat-base-20b-fp16c@seZdZddZdddZdS)utilscCsdSNselfr r ;/home/ec2-user/SageMaker/ezsm-ray-FM/ezsmdeploy/__init__.py__init__*szutils.__init__task == text2textcCs0ddlm}||d}tdtd|dS)Nr)list_jumpstart_models)filterz)List of foundation models in Jumpstart:  )Z"sagemaker.jumpstart.notebook_utilsrprintjoin)rZ filter_valuerZtext_generation_modelsr r rlist_foundation_models,s  zutils.list_foundation_modelsN)r)__name__ __module__ __qualname__rrr r r rr )sr c@seZdZd3d d Zd d Zd dZd4ejdddZddZ ddZ ddZ ddZ ddZ ddZdd Zd!d"Zd#d$Zd%d&Zd'd(Zd)d*Zd5d-d.Zd/d0Zd1d2ZdS)6DeployNFTd*c!Csgd|_dgdgddgdgd|_||_||_||_||_| |_| |_||_| |_ ||_ d|_ ||_ ||_ ||_||_||_||_td d }i|_t|d d :}t|}|D]}t|d |j|d<qWdn1s0Y|jdkrd|_d|_n|jt|jvs(d|jvr|jdkrd|jvrr|jdkrdd|_d|_|jdkn d|_d|_n4|jdd ddvrd|_d|_n d|_d|_ntdt|jdt|t kr|g|_!d|_njt|tkr||_!d|_nN|dkr>tdd}Wdn1s$0Ydg|_!d|_ntd|j s|js|dddkrztd|dn||_"|j"}t|.}t#$|%}dd|j&D} Wdn1s0Yd| vrd | vrtd!|| dkrt'j()|_(n| |_(|dkr&t*+,|_-n@t|t krR||_-|.dkrftd"ntd#t t|| dkr~|j(/|_0n| |_0||_1|dkr||jvr||_2|j||_1n.|dkr||jvr|j s|jstd$||_3||_|4dS)%N) tensorflowpytorchmxnetsklearn huggingfacer!Ztorchr#Zgluonr$)r!r"r#r$F ezsmdeployz data/cost.csvrmoderrTlocal local_gpu.)pg!Please choose an instance type inz, or choose local for local testing. Don't pass in any instance or pass in None if you want to automatically choose an instance type.tmpmodelwzmodel must be a single serialized file (like 'model.pkl') or a list of files ([model.pkl, model2.pkl]). If you are downloading a model in the script or packaging with the container, pass in model = Nonepyz9please provide a valid python script with .py extension. z is invalidcSsg|]}t|tjr|jqSr ) isinstanceast FunctionDefname).0nr r r z#Deploy.__init__..Z load_modelpredictzplease implement a load_model(modelpath) that returns a loaded model, and predict(inputdata) function that returns a prediction in yourzplease enter a name with lower case letters; we will be using this name for s3 bucket prefixes, model names, ECR repository names etc. that have various restrictionszKenter string for a name or don't pass in a name; type of name passed in is zIf requirements=None, please provide a value for framework; choice should be one of 'tensorflow','pytorch','mxnet','sklearn')5Z frameworklistZframeworkinstallswaitbudgetinstance_count instance_typeimagedockerfilepatheiprefixmonitordeployedautoscaletargetfoundation_modelfoundation_model_versionhuggingface_modelhuggingface_model_taskhuggingface_model_quantize pkg_resourcesresource_filenamecostdictopencsvreaderfloatgpu multimodellistkeyssplit ValueErrortypestrmodelscriptr5parsereadbody sagemakersessionSession shortuuiduuidlowerr7islowerZdefault_bucketbucket requirements framework autoscaledeploy)!rr\r]rjrir7rkrGr=rhrDrbrArBr@r?r>rCrErHrIrJrKrLZcostpathinfilerRrowsfpfilenamefilenode functionsr r rr4s   6       0    &zDeploy.__init__c Cs"|jdkrtdddlm}m}|jd|_d|ji}|jdurL|j|d<d|jvs`d|jvrtd|d <d |jvrtd |d <n2d |jvrtd |d <nd |jvrtd|d <|j dvr|j |d<n|j dkrntd|j t }d|j }}||d|||dddd|_ dS)Nz.Please enter a valid instance type, not [None]r)HuggingFaceModelget_huggingface_llm_image_uriZ HF_MODEL_IDZHF_TASKr.r-rZ SM_NUM_GPUSZ12xZ8xZ16x)Z bitsandbytesZgptqZHF_MODEL_QUANTIZEzFhuggingface_model_quantize needs to be one of bitsandbytes, gptq, not z hf-model-r%z4.26z1.13Zpy39) image_urienvroler7Ztransformers_versionZpytorch_version py_version)r@rYZsagemaker.huggingfacertrur\rKjsondumpsrLraget_execution_roler7sagemakermodel)rrtruZhubaws_role endpoint_namer7r r rdeploy_huggingface_models@          zDeploy.deploy_huggingface_modelc Csddlm}m}m}m}|jd|_|j|j|jdd}|jdkrH||_tjj ddd|j|j|d}tjj |j|jdd}tjj |j|jdd}t } d|j } |jt vrtjj ||| tjj| t |jd |_n2|d tjj |||d | tjj| |d d |_dS) Nr) image_urisinstance_types model_uris script_uris inference)model_id model_versionscope)regionrjZ image_scoperrr@)rrZ script_scope)rrZ model_scopemodel-)rx model_datarz predictor_clsr7rysrcz inference.py)rx source_dirrZ entry_pointrzrr7Zsagemaker_session)rarrrrr\Zretrieve_defaultrIr@retriever~r7_model_env_variable_mapr predictorrr makedir_safeget_sagemaker_session) rrrrrr@Zdeploy_image_uriZdeploy_source_uriZ model_urirrr r rdeploy_foundation_model2sV         zDeploy.deploy_foundation_modelr)returncCs6tjdtjd}tjj|d}tjj||d}|S)zReturn the SageMaker session.ra)Z service_name region_name)local_download_dir)sagemaker_clientsettings)boto3clientrcrrasession_settingsZSessionSettingsrb)rrrrrbr r rrrs zDeploy.get_sagemaker_sessionc CsPi|_|jdkrtdd}t|dd}t|}|D]`}t|ddt|d|j|d|j|dt|ddt|df|j|d<q8Wdn1s0Y| n|jt |j vs|jdvr6|jdvr.|j|j|_ |j dkr4d d d d d dd}|j ||j |_ nd|_ ntdt |j ddS)Nr&data/instancetypes.csvr'r(rrr*r+gQ?gQ?g(\?gp= ף?gp= ף?gp= ף?)zml.eia2.mediumz ml.eia2.largezml.eia2.xlargez ml.eia.mediumz ml.eia.largez ml.eia.xlarger/z$, or choose local for local testing.) instancedictr@rMrNrPrQrRrSrOchoose_instance_typerVrW costperhourrCrY)rZinstancetypepathrmrRrnZeicostsr r rprocess_instance_typesB   (.    zDeploy.process_instance_typec CsJ|jdkrd|jd}n|jd|jd}||j|}tdd|_d}d}t|j D]l}|j|d}|j|d }|j|d }|j d kr||kr||kr|}|}q`||kr`||j kr`|}qq`|dkr|j d krt d t |j d t |dn*|dkr2|j d kr2t dt |d||_ |j|j |_dS)Nrezsmdeploy/model-//ezsmdeploy/model-r&rrrrrrz9Could not find an instance that satisfies your budget of z8 per hour and can host your models with a total size of z, Gb. Please choose a higher budget per hour.z3You may be using large models with a total size of z` Gb. Please choose a high memory GPU instance and launch without multiple models (if applicable))rDr7get_sizerhrMrNZinstancetypespathrVrrWr>rYr[r@rOr) rtmppathsizeZchoseninstanceZmincostinstanceZ memperworkerZcostZ costpermemr r rrsT   zDeploy.choose_instance_typecCs|j||dSr )r add_model)rZs3pathZ relativepathr r rrszDeploy.add_modelcCs|js2td|j|jd|jttjjd|_ n`t d|jd |jd dddd|jttjjd|_ |jD]}| |dqzd|_dS) Nrr)r7rrxrzrr)r7Zmodel_data_prefixrxrzrzserving/F)rUrr7 modelpathrArar~rrrrrrXrrC)rpathr r r create_models$   zDeploy.create_modelc Cs|jrPddlm}tdkr,d|j|j}nd|j|j|j}|dd|d}nd}|jrd |jvrhd nd}|j j |j |jt j jd |j||jd |_ nl|jrd |jvrd nd}|j j |j |jd |j||jdd|_ n,|j j |j |j|jd|jd|j|dd|_ d|j|_dS)Nr)DataCaptureConfigr's3://{}/ezsmdeploy/model-{}/datacapturez*s3://{}/{}/ezsmdeploy/model-{}/datacaptureTr)Zenable_captureZsampling_percentageZdestination_s3_urip3zezsm-foundation-endpoint-)initial_instance_countr@rr volume_sizer=zezsm-hf-endpoint-i,)rr@rrr=&container_startup_health_check_timeoutzezsm-endpoint-F)rr@Zaccelerator_typerZupdate_endpointr=data_capture_configr)rEZsagemaker.model_monitorrrDformatrhr7rHr@rrlr?rarrr=rJrCr)rrZtmps3urirrr r r deploy_models\     zDeploy.deploy_modelcCs>td}||}d}|jj|dD]}||j}q&|dS)Ns3g)ZPrefixgA)rresourceZBucketobjectsrr)rrhrrZ my_bucketZ total_sizeobjr r rrPs    zDeploy.get_sizecCsdd}|jdkrd}n |jd}g|_|jD]4}|j|jjd||j||jd|d7}q*dS)Nrrrrmodel{}.tar.gzrrhZ key_prefix) rDrr\appendrbZ upload_datarrhr7)rirr7r r r upload_modelZs   zDeploy.upload_modelc Csd}|jD]r}d|vrd|vr|jjd||ddd|ddddttd |d  }| d |Wdn1s0Yd |}nd|vr8d|vr8| d |t |d |ttd |d  }| d |Wdn1s$0Yd |}td|d}d|vrd|j |ddn | |||d7}q dS)Nrztar.gzrz./downloads/{}rrrz./downloads/{}/*.tar.gzrz./extractedmodel/{}/zextractedmodel/{}/z./downloads/{}/rzw:gzr,)arcname)r\rbZ download_datarrXrtarfilerPglob extractallrshutilcopyaddclose)rrr7tarr r r tar_modelks:  . 0   zDeploy.tar_modelc Csvzt|Wn Yn0ztj|s4t|Wn:typ}z"|jdkr\t|jWYd}~n d}~00dS)N) rrmtreeosrexistsmakedirsOSErrorerrnor)r directoryerrr r rrs   zDeploy.makedir_safecCs|dt|jtkrBtj|jr6t|jdq|jdnDt|jt kr~t dd}t dd|j}| || ntddS)Nrzsrc/requirements.txtz does not exist!r1cSs|dS)Nrr )xr r rr;z,Deploy.handle_requirements..zUpass in a path/to/requirements.txt or a list of requirements ['scikit-learn',...,...])rrZrir[rrrrrrVrPmap writelinesrrY)rfl1r r rhandle_requirementss     zDeploy.handle_requirementscCsd}tdd(}tj||j|dd}Wdn1s<0Ytddd}td dd}d |||j|_ tj d st d qd |_dS)Nz=chmod +x src/build-docker.sh & sudo ./src/build-docker.sh {}zsrc/dockeroutput.txtr1T)stdoutshellz9aws sts get-caller-identity --query Account --output textrrzaws configure get regionz/{}.dkr.ecr.{}.amazonaws.com/ezsmdeploy-image-{} src/done.txtrzPlease see src/dockeroutput.txt)rP subprocessPopenrr7rpopenr_rXrArrtimesleepZ dockeroutput)rcmdrr-Zacctrr r r build_dockers  6  zDeploy.build_dockerc Cstdj|jd}|d}|ddd}td}|jdd||d d d d }|jd |jdd||d d|jddiddddd}||_ dS)Nra) EndpointNamerZProductionVariantsrZ VariantNamezapplication-autoscalingzendpoint/{}/variant/{}z&sagemaker:variant:DesiredInstanceCountr )ServiceNamespace ResourceIdScalableDimensionZ MinCapacityZ MaxCapacityzscaling-policy-{}ZTargetTrackingScalingZPredefinedMetricTypeZ&SageMakerVariantInvocationsPerInstanceiXF)Z TargetValueZPredefinedMetricSpecificationZScaleOutCooldownZScaleInCooldownZDisableScaleIn)Z PolicyNamerrrZ PolicyTypeZ(TargetTrackingScalingPolicyConfiguration) rrZdescribe_endpointrZregister_scalable_targetrZput_scaling_policyr7rGZscalingresponse)rresponseZin1Zin2rr r rautoscale_endpoints8     zDeploy.autoscale_endpointrc Cs|jr|dkrtd|jrtdd}t|dtj}t t j ddd<}| | ttj|d||jrtd d $} t|jd d | Wdn1s0Yn>td d $} t|jdd | Wdn1s0Yt|td dd|||} t| } tjdsJtdq.td| | ttj|d|Wdn1s0YntddS)NzXsince this is a multimodel endpoint, please pass in a target model that you wish to testr&zdata/smlocust.pyzsrc/smlocust.pygreenrcolortextz | Starting test with Locustzsrc/locustdata.txtr1z model1.tar.gz)r target_modelzsrc/testdata.pwbzklocust -f src/smlocust.py --no-web -c {} -r {} --run-time {}s --csv=src/locuststats; touch src/testdone.txtzsrc/testdone.txtrz9 | Done! Please see the src folder for locuststats* filesz-Deploy model to endpoint first before testing)rUrYrFrMrNrrdatetimenowrrpointhidewriter[showrPr|dumprpicklerrsystemrrrrremove) rZ input_datarZ usercountZ hatchrateZ timeoutsecspath1startspoutfilerr-r r rtestsZ    $  "  *z Deploy.testc Cstj}ttjddd}|js|jsztdWn Yn0| | |j dgkr| t tj|dn| t tj|d|||| | t tj|d||jdkrt tj|d }|d n|t tj|d }| | ||t|jd | | t tj|d ||jdkr|jdkrtdd|_n&|jdkr|jdkrtdd|_t|jd| | t tj|d||jrdtdd}tdd}tdd}t|dt|dt|dd|_ntdd}tdd}tdd}tdd}tdd}tdd} t|d t|d!t|d"t|d#t|d$t| d|jr:|jdkr:d|_| | t tj|d%|| | t tj|d&||jdkr| t tj|d'|| | t tj|d(||n,|jr|n|jr| nt!d)| | t tj|d*|j"||#| | t tj|d+||j$r|j"d,vr|%| | t tj|d-|n@|j$r|j"d,vr| | t tj|d.||j&r,| | t tj|d/d0'|j(|j)||j*+t tj|d1d|_,z6t-.d2t-.d t-.d3t-.d4t-.dWn Yn0|j/WdS1s0YdS)5Nrrrzsrc/r0z^ | No model was passed. Assuming you are downloading a model in the script or in the containerz | compressed model(s)z7 | uploaded model tarball(s) ; check returned modelpathz# | no additional requirements foundrz | added requirements filezsrc/transformscript.pyz | added source fileTr&zdata/DockerfileFzdata/Dockerfile_flaskzsrc/Dockerfilez | added Dockerfilezdata/model_handler.pyzdata/dockerd-entrypoint.pyzdata/build-docker.shzsrc/model_handler.pyzsrc/dockerd-entrypoint.pyzsrc/build-docker.shzdata/nginx.confzdata/predictor.pyz data/servez data/trainz data/wsgi.pyzsrc/nginx.confzsrc/predictor.pyz src/servez src/trainz src/wsgi.pyz^ | Setting Elastic Inference to None since you selected a GPU instancez' | added model_handler and docker utilsz | building docker containerz | built docker containerzbr)zzz <|endoftext|>z#Current Date: {} Current Time: {} cCs$tD]}||d}q|d}|S)Nrr)MEANINGLESS_WORDSreplacestrip)rwordr r rclean_response.s rc@sHeZdZddZddZddZddZd d Zd d Ze d dZ dS) ConversationcCs2td}td}||_||_t|||_dS)Nz%Y-%m-%dz%H:%M:%S %p %Z)rstrftime _human_id_bot_id PRE_PROMPTr_prompt)rhuman_idbot_idZcur_datecur_timer r rr5s   zConversation.__init__cCs |j|jd|d7_dS)N: r)rr)rcontextr r rpush_context_turn=szConversation.push_context_turncCs6|j|jd|d7_|j|jd7_dS)Nrr:)rrr)rqueryr r rpush_human_turnAszConversation.push_human_turncCsL|j|v}||jdd}t|}|s4|d7}|j|d7_dS)Nr!rz...r)rrXrr)rrZ has_finishedZbot_turnr r rpush_model_responseEs  z Conversation.push_model_responsecCs<|jd}|jd}td|d|d|j}|dS)Nr!(|z)\W?r)rrrerXr)rZ human_tagZbot_tagZturnsr r r get_last_turnOs  zConversation.get_last_turncCs|jSr )rr r r rget_raw_promptUszConversation.get_raw_promptcCs |t_dSr )rr)clsvaluer r rfrom_raw_promptXszConversation.from_raw_promptN) rrrrr r#r$r(r) classmethodr,r r r rr4s rcspeZdZdZdZdZdZdeee e edfdd Z d d Z d d Z d dZddZddZddZZS)rzjEzSMdeploy Openchatkit shell - Type /help or /? to list commands. For example, type /quit to exit shell. z>>> zzN)rr  cmd_queuec s<t||_||_||_|jg|jd<|dur8||_dS)NZstopping_criteria)superrrr\payload_kwargsrcmdqueue)rrr r.kwargs __class__r rrgs zOpenChatKitShell.__init__cCst|j|j|_dSr rrr conversationr r r rprelooppszOpenChatKitShell.preloopcCs"|dr|ddnd|}|S)Nrrzsay ) startswith)rlinecommandr r rprecmdsszOpenChatKitShell.precmdcCs|j||j}d|jvrXd|i|j}|j|}|dddt|d}nTd|jvr|ddid}|j|}|ddd }|ddt||}nd }|j |t |j dS) Nz neoxt-chatZ text_inputsrZgenerated_textz-Chatr r)inputs parametersz z1I don't recognize the output from this chat model) r6r#r)r\r0rr<lenrfindr$rr()rargpromptpayloadroutputlastr r rdo_sayws        zOpenChatKitShell.do_saycCst|j|j|_dSr r5rr@r r rdo_resetszOpenChatKitShell.do_resetcCstd|jddS)NzHyperparameters: r)rr0rFr r rdo_hyperparameterssz#OpenChatKitShell.do_hyperparameterscCsdS)NTr rFr r rdo_quitszOpenChatKitShell.do_quit)N)rrrintrorArrrr[rrrr7r;rErGrHrI __classcell__r r r3rr^s r)'rardrZyaspin.spinnersrrrrr'rrrrrMrZsagemaker.multidatamodelrZsagemaker.modelrr5rQr|rrtypingrrZsagemaker.predictorrrobjectr rrrrrCmdrr r r rs`      {*