o hdi @sddlZddlZddlmZddlmZddlZddlZddlZddlZddl Z ddl Z ddl Z ddl Z ddl Z ddlZddlmZddlmZddlmZddlZddlZddlZddlZddlZddlZddlmZmZddlmZd d id d id d id d id d id d id d id d id d id d id ZGd dde Z!Gddde Z"gdZ#dZ$ddZ%GdddZ&Gdddej'Z(dS)N)yaspin)Spinners)MultiDataModel)ServerlessInferenceConfig)Model)ListOptional) PredictorZTS_DEFAULT_WORKERS_PER_MODEL1ZMMS_DEFAULT_WORKERS_PER_MODEL) z!huggingface-text2text-flan-t5-xxlz&huggingface-text2text-flan-t5-xxl-fp16z*huggingface-text2text-flan-t5-xxl-bnb-int8z huggingface-text2text-flan-t5-xlz#huggingface-text2text-flan-t5-largez#huggingface-text2text-flan-ul2-bf16z%huggingface-text2text-bigscience-t0ppz*huggingface-text2text-bigscience-t0pp-fp16z.huggingface-text2text-bigscience-t0pp-bnb-int8z8huggingface-textgeneration2-gpt-neoxt-chat-base-20b-fp16c@seZdZddZdddZdS)utilscCsdSNselfr r Q/home/ec2-user/SageMaker/easy-amazon-sagemaker-deployments/ezsmdeploy/__init__.py__init__+zutils.__init__task == text2textcCs0ddlm}||d}tdtd|dS)Nr)list_jumpstart_models)filterz)List of foundation models in Jumpstart:  )Z"sagemaker.jumpstart.notebook_utilsrprintjoin)rZ filter_valuerZtext_generation_modelsr r rlist_foundation_models-s  zutils.list_foundation_modelsN)r)__name__ __module__ __qualname__rrr r r rr *sr c@seZdZ                       d4d d Zd dZddZd5dejfddZddZ ddZ ddZ ddZ ddZ dd Zd!d"Zd#d$Zd%d&Zd'd(Zd)d*Zd+d,Z -d6d.d/Zd0d1Zd2d3ZdS)7DeployNF Td*c$Csgd|_dgdgddgdgd|_| |_||_||_||_||_||_||_| |_ ||_ d|_ ||_ ||_ ||_||_||_||_|rLt| | d |_nd|_||_td d }i|_t|d d }t|}|D] }t|d|j|d<qiWdn1swY|jdkrd|_d|_nV|jt|jvsd|jvr|jdkrd|jvr|jdkrd|_d|_|jdkn,d|_d|_n%|j ddddvrd|_d|_nd|_d|_n t!dt|jdt"|t#kr|g|_$d|_n5t"|tkr||_$d|_n'|dkr'tdd}Wdn 1swYdg|_$d|_nt!d|j s~|js~|dddkrDt!d|d||_%|j%} t| }!t&'|!(}"dd|"j)D}#Wdn 1siwYd |#vr~d!|#vr~t!d"||dkrt*j+,|_+n||_+|dkrt-./|_0n t"|t#kr||_0|1dkrt!d#n t!d$t#t"|| dkr|j+2|_3n| |_3||_4|dkr||jvr||_5|j||_4n|dkr||jvr|j s|jst!d%||_6| |_|7dS)&N) tensorflowpytorchmxnetsklearn huggingfacer%Ztorchr'Zgluonr()r%r&r'r(F)Zmemory_size_in_mbZmax_concurrency ezsmdeployz data/cost.csvrmoder"rTlocal local_gpu.)pg!Please choose an instance type inz, or choose local for local testing. Don't pass in any instance or pass in None if you want to automatically choose an instance type.tmpmodelwzmodel must be a single serialized file (like 'model.pkl') or a list of files ([model.pkl, model2.pkl]). If you are downloading a model in the script or packaging with the container, pass in model = Nonepyz9please provide a valid python script with .py extension. z is invalidcSsg|] }t|tjr|jqSr ) isinstanceast FunctionDefname).0nr r r sz#Deploy.__init__..Z load_modelpredictzplease implement a load_model(modelpath) that returns a loaded model, and predict(inputdata) function that returns a prediction in yourzplease enter a name with lower case letters; we will be using this name for s3 bucket prefixes, model names, ECR repository names etc. that have various restrictionszKenter string for a name or don't pass in a name; type of name passed in is zIf requirements=None, please provide a value for framework; choice should be one of 'tensorflow','pytorch','mxnet','sklearn')8Z frameworklistZframeworkinstallswaitbudgetinstance_count instance_typeimagedockerfilepatheiprefixmonitordeployedautoscaletargetfoundation_modelfoundation_model_versionhuggingface_modelhuggingface_model_task serverlessrserverless_confighuggingface_model_quantize pkg_resourcesresource_filenamecostdictopencsvreaderfloatgpu multimodellistkeyssplit ValueErrortypestrmodelscriptr9parsereadbody sagemakersessionSession shortuuiduuidlowerr;islowerZdefault_bucketbucket requirements framework autoscaledeploy)$rrarbrornr;rprJrOZserverless_memoryZserverless_concurrencyr@rmrGrgrDrErCrBrArFrHrKrLrMrNrQZcostpathinfilerWrowsfpfilenamefilenode functionsr r rr5s                   & zDeploy.__init__c Cs|jdkr |js tdddlm}m}|jd|_d|ji}|jdur)|j|d<|js1z+t d}|j |j dgdd d}d |vrVt |d d dd |d <n Wnty}zt|ddtdt d|d <d|jvrt d|d <d|jvrt d|d <nd|jvrt d|d <nsd|jvrt d|d <nfd|jvrt d|d <d|jvrt d|d <nMd|jvrt d|d <n@d|jvrt d|d <d|jvrt d|d <n'd|jvrt d|d <nd|jvrt d|d <n d|jvr t d|d <WYd}~nd}~ww|jdvr"|j|d<n|jdkr)ntd |jt}d!|j}} |jsX|jrX|jrX||d"|||d#d$d%d&|_dS|||d#d$d%d'|_dS)(Nz.Please enter a valid instance type, not [None]r)HuggingFaceModelget_huggingface_llm_image_uriZ HF_MODEL_IDZHF_TASKZec2zml.) InstanceTypesr{ZGpuInfoZGpusZCountZ SM_NUM_GPUSz ... )endzNTrying fallback to figure out number of GPUs in the instance type you chose - r2r"Z12x24x48xp2Z8x16xp3p4)Z bitsandbytesZgptqZHF_MODEL_QUANTIZEzFhuggingface_model_quantize needs to be one of bitsandbytes, gptq, not z hf-model-r)z4.26z1.13Zpy39) image_urienvroler;transformers_versionpytorch_version py_version)rrrrr)rCrOr^Zsagemaker.huggingfaceryrzrarNboto3clientZdescribe_instance_typesstripjsondumps ExceptionrrQrfget_execution_roler;rKrMsagemakermodel) rryrzZhubZ ec2_clientrespeaws_role endpoint_namer;r r rdeploy_huggingface_model s                       zDeploy.deploy_huggingface_modelc Csddlm}m}m}m}|jd|_|j|j|jdd}|jdkr$||_tjj ddd|j|j|d}tjj |j|jdd}tjj |j|jdd}t } d|j } |jt vrjtjj ||| tjj| t |jd |_dSdd lm} | |j|j| d |_dS) Nr) image_urisinstance_types model_uris script_uris inference)model_id model_versionscope)regionroZ image_scoperrrC)rrZ script_scope)rrZ model_scopemodel-)r model_datar predictor_clsr;r)JumpStartModel)rrr)rfrrrrraZretrieve_defaultrLrCretrieverr;_model_env_variable_mapr predictorr rZsagemaker.jumpstart.modelr) rrrrrrCZdeploy_image_uriZdeploy_source_uriZ model_urirrrr r rdeploy_foundation_modeldsJ         zDeploy.deploy_foundation_modelsrcreturncCs6tjdtjd}tjj|d}tjj||d}|S)zReturn the SageMaker session.rf)Z service_name region_name)local_download_dir)sagemaker_clientsettings)rrrhrrfsession_settingsZSessionSettingsrg)rrrrrgr r rget_sagemaker_sessions zDeploy.get_sagemaker_sessionc CsPi|_|jdkrbtdd}t|dd@}t|}|D]0}t|ddt|d|j|d|j|dt|ddt|df|j|d<qWdn1sWwY| dS|jt |j vsq|jdvr|jdvr|j|j|_ |j dkrd d d d d dd}|j ||j |_ dSdSd|_ dStdt |j d)Nr*data/instancetypes.csvr+r,r"rr.r/gQ?gQ?g(\?gp= ף?gp= ף?gp= ף?)zml.eia2.mediumz ml.eia2.largezml.eia2.xlargez ml.eia.mediumz ml.eia.largez ml.eia.xlarger3z$, or choose local for local testing.) instancedictrCrRrSrUrVrWrXrTchoose_instance_typer[r\ costperhourrFr^)rZinstancetypepathrrrWrsZeicostsr r rprocess_instance_typesF   (      zDeploy.process_instance_typec Cs@|jdkr d|jd}n |jd|jd}||j|}tdd|_d}d}t|j D]6}|j|d}|j|d }|j|d }|j d krY||krX||krX|}|}q0||krf||j krf|}nq0|dkr|j d krt d t |j d t |d|dkr|j d krt dt |d||_ |j|j |_dS)Nr!ezsmdeploy/model-//ezsmdeploy/model-r*rrrr"rr#z9Could not find an instance that satisfies your budget of z8 per hour and can host your models with a total size of z, Gb. Please choose a higher budget per hour.z3You may be using large models with a total size of z` Gb. Please choose a high memory GPU instance and launch without multiple models (if applicable))rGr;get_sizermrRrSZinstancetypespathr[rr\rAr^r`rCrTr) rtmppathsizeZchoseninstanceZmincostinstanceZ memperworkerZcostZ costpermemr r rrsX   zDeploy.choose_instance_typecCs|j||dSr )r add_model)rZs3pathZ relativepathr r rrszDeploy.add_modelcCs|jstd|j|jd|jttjjd|_ dSt d|jd |jd dddd|jttjjd|_ |jD]}| |dq>d|_dS) Nrr)r;rrrrr)r;Zmodel_data_prefixrrrzserving/F)rZrr; modelpathrDrfrrr rrrr]rrF)rpathr r r create_model s$     zDeploy.create_modelc Cs|jr(ddlm}tdkrd|j|j}n d|j|j|j}|dd|d}nd}|jdurWd}d |jvsMd |jvsMd |jvsMd |jvsMd |jvsM|jrOd}d|jvrVd}nd}|jrr|j sr|j j |j |jd|j||j d|_nN|jr|j r|j j |j |jd|j||j dd|_n4|j r|js|j j |j |jd|j||j|j d|_n|j j |j |j|jd|jd|j |||jdd |_d|j|_dS)Nr)DataCaptureConfigr!'s3://{}/ezsmdeploy/model-{}/datacapturez*s3://{}/{}/ezsmdeploy/model-{}/datacaptureTr#)Zenable_captureZsampling_percentageZdestination_s3_urirrrr~rZg5zezsm-foundation-endpoint-)initial_instance_countrCr volume_sizer@zezsm-hf-endpoint-i,)rrCrrr@&container_startup_health_check_timeout)rrCrrserverless_inference_configr@zezsm-endpoint-F) rrCZaccelerator_typerZupdate_endpointr@rdata_capture_configrr)rHZsagemaker.model_monitorrrGformatrmr;rCrKrMrrqrBr@rrPrFr)rrZtmps3urirrr r r deploy_model=s|   8       zDeploy.deploy_modelcCs>td}||}d}|jj|dD]}||j}q|dS)Ns3g)ZPrefixgA)rresourceZBucketobjectsrr)rrmrrZ my_bucketZ total_sizeobjr r rrs   zDeploy.get_sizecCsdd}|jdkr d}n|jd}g|_|jD]}|j|jjd||j||jd|d7}qdS)Nr"r!rrmodel{}.tar.gzrrm key_prefix) rGrraappendrgZ upload_datarrmr;)rirr;r r r upload_models    zDeploy.upload_modelc Cszd}|jD]}d|vrUd|vrU|jjd||ddd|ddddttd |d }| d |Wdn1sJwYd |}nCd|vrd|vr| d |t |d |ttd |d }| d |Wdn1swYd |}td|d}d|vr|j |ddn| |||d7}qdS)Nr"ztar.gzrz./downloads/{}rrrz./downloads/{}/*.tar.gzrz./extractedmodel/{}/zextractedmodel/{}/z./downloads/{}/rzw:gzr0)arcname)rargZ download_datarr]rtarfilerUglob extractall makedir_safeshutilcopyaddclose)rrr;tarr r r tar_models@      zDeploy.tar_modelc Csxzt|WnYztj|st|WdSWdSty;}z|jdkr0t|jWYd}~dSd}~ww)N) rrmtreeosrexistsmakedirsOSErrorerrnor)r directoryerrr r rrs   zDeploy.makedir_safecCs|dt|jtkr!tj|jrt|jddS|jdt|jt kr@t dd}t dd|j}| || dStd)Nrzsrc/requirements.txtz does not exist!r5cSs|dS)Nrr )xr r rsz,Deploy.handle_requirements..zUpass in a path/to/requirements.txt or a list of requirements ['scikit-learn',...,...])rr_rnr`rrrrrr[rUmap writelinesrr^)rfl1r r rhandle_requirementss     zDeploy.handle_requirementscCsd}tdd}tj||j|dd}Wdn1swYtddd}td dd}d |||j|_ tj d sUt d tj d rJd |_dS)Nz=chmod +x src/build-docker.sh & sudo ./src/build-docker.sh {}zsrc/dockeroutput.txtr5T)stdoutshellz9aws sts get-caller-identity --query Account --output textrrzaws configure get regionz/{}.dkr.ecr.{}.amazonaws.com/ezsmdeploy-image-{} src/done.txtrzPlease see src/dockeroutput.txt)rU subprocessPopenrr;rpopenrdr]rDrrtimesleepZ dockeroutput)rcmdrr1Zacctrr r r build_dockers"    zDeploy.build_dockerc Cstdj|jd}|d}|ddd}td}|jdd||d d d d }|jd |jdd||d d|jddiddddd}||_ dS)Nrf) EndpointNamerZProductionVariantsrZ VariantNamezapplication-autoscalingzendpoint/{}/variant/{}z&sagemaker:variant:DesiredInstanceCountr"r )ServiceNamespace ResourceIdScalableDimensionZ MinCapacityZ MaxCapacityzscaling-policy-{}ZTargetTrackingScalingZPredefinedMetricTypeZ&SageMakerVariantInvocationsPerInstanceiXF)Z TargetValueZPredefinedMetricSpecificationZScaleOutCooldownZScaleInCooldownZDisableScaleIn)Z PolicyNamerrrZ PolicyTypeZ(TargetTrackingScalingPolicyConfiguration) rrZdescribe_endpointrZregister_scalable_targetrZput_scaling_policyr;rJZscalingresponse)rresponseZin1Zin2rr r rautoscale_endpoints8      zDeploy.autoscale_endpointc Cs|jr |dkr td|jrtdd}t|dtj}t t j ddd}| | ttj|d||jratd d } t|jd d | Wdn1s[wYntd d } t|jdd | Wdn1s{wYt|td dd|||} t| } tjdstdtjdrtd| | ttj|d|WddS1swYdStd)NzXsince this is a multimodel endpoint, please pass in a target model that you wish to testr*zdata/smlocust.pyzsrc/smlocust.pygreenr!colortextz | Starting test with Locustzsrc/locustdata.txtr5z model1.tar.gz)r target_modelzsrc/testdata.pwbzklocust -f src/smlocust.py --no-web -c {} -r {} --run-time {}s --csv=src/locuststats; touch src/testdone.txtzsrc/testdone.txtrz9 | Done! Please see the src folder for locuststats* filesz-Deploy model to endpoint first before testing)rZr^rIrRrSrrdatetimenowrrpointhidewriter`showrUrdumprpicklerrsystemrrrrremove) rZ input_datar Z usercountZ hatchrateZ timeoutsecspath1startspoutfilerr1r r rtest>sd           ",z Deploy.testc Cstj}ttjddd}|js|jsztdWnY| | |j dgkr@| t tj|dn| t tj|d|||| | t tj|d||jdkrt tj|d }|d n|t tj|d }| | ||t|jd | | t tj|d ||jdkr|jdkrtdd|_n|jdkr|jdkrtdd|_t|jd| | t tj|d||jr+tdd}tdd}tdd}t|dt|dt|dd|_nktdd}tdd}tdd}tdd}tdd}tdd} t|d t|d!t|d"t|d#t|d$t| d|jr|jdkrd|_| | t tj|d%|| | t tj|d&||jdkr| t tj|d'|| | t tj|d(||n|jr|js|n |jr| nt!d)| |j"s| t tj|d*|j#n| t tj|d+||$| | t tj|d,||j%rd|j#d-vrd|&| | t tj|d.|n |j%r|j#d-vr| | t tj|d/||j'r| | t tj|d0d1(|j)|j*||j+,t tj|d2d|_-zt./d3t./d t./d4t./d5t./dWnY|j0WdS1swYdS)6Nrr!rzsrc/r4z^ | No model was passed. Assuming you are downloading a model in the script or in the containerz | compressed model(s)z7 | uploaded model tarball(s) ; check returned modelpathz# | no additional requirements foundrz | added requirements filezsrc/transformscript.pyz | added source fileTr*zdata/DockerfileFzdata/Dockerfile_flaskzsrc/Dockerfilez | added Dockerfilezdata/model_handler.pyzdata/dockerd-entrypoint.pyzdata/build-docker.shzsrc/model_handler.pyzsrc/dockerd-entrypoint.pyzsrc/build-docker.shzdata/nginx.confzdata/predictor.pyz data/servez data/trainz data/wsgi.pyzsrc/nginx.confzsrc/predictor.pyz src/servez src/trainz src/wsgi.pyz^ | Setting Elastic Inference to None since you selected a GPU instancez' | added model_handler and docker utilsz | building docker containerz | built docker containerz ir)zzz <|endoftext|>z#Current Date: {} Current Time: {} cCs$tD]}||d}q|d}|S)Nr!r)MEANINGLESS_WORDSreplacer)rwordr r rclean_response{s r'c@sHeZdZddZddZddZddZd d Zd d Ze d dZ dS) ConversationcCs2td}td}||_||_t|||_dS)Nz%Y-%m-%dz%H:%M:%S %p %Z)rstrftime _human_id_bot_id PRE_PROMPTr_prompt)rhuman_idbot_idZcur_datecur_timer r rrs  zConversation.__init__cCs |j|jd|d7_dS)N: r)r-r*)rcontextr r rpush_context_turns zConversation.push_context_turncCs6|j|jd|d7_|j|jd7_dS)Nr1r:)r-r*r+)rqueryr r rpush_human_turnszConversation.push_human_turncCsL|j|v}||jdd}t|}|s|d7}|j|d7_dS)Nr4rz...r)r*r]r'r-)rrZ has_finishedZbot_turnr r rpush_model_responses z Conversation.push_model_responsecCs<|jd}|jd}td|d|d|j}|dS)Nr4(|z)\W?r)r*r+rer]r-)rZ human_tagZbot_tagZturnsr r r get_last_turns  zConversation.get_last_turncCs|jSr )r-rr r rget_raw_promptszConversation.get_raw_promptcCs |t_dSr )rr-)clsvaluer r rfrom_raw_prompts zConversation.from_raw_promptN) rrrrr3r6r7r;r< classmethodr?r r r rr(s r(c steZdZdZdZdZdZddedede e effd d Z d d Z d dZ ddZddZddZddZZS)r!zjEzSMdeploy Openchatkit shell - Type /help or /? to list commands. For example, type /quit to exit shell. z>>> zzNrr cmd_queuec s@t||_||_||_|jg|jd<|dur||_dSdS)NZstopping_criteria)superrrrapayload_kwargsr.cmdqueue)rrrrAkwargs __class__r rrs  zOpenChatKitShell.__init__cCt|j|j|_dSr r(r.r/ conversationrr r rpreloopzOpenChatKitShell.preloopcCs&|dr |dd}|Sd|}|S)Nrr"zsay ) startswith)rlinecommandr r rprecmdszOpenChatKitShell.precmdcCs|j||j}d|jvr,d|i|j}|j|}|dddt|d}n/d|jvs6d|jvrY|ddid }|j|}|ddd }|ddt||}nd }|j |t |j dS) Nz neoxt-chatZ text_inputsrZgenerated_textz-Chatr#r r#)inputs parametersz z1I don't recognize the output from this chat model) rJr6r<rarCrr?lenrfindr7rr;)rargpromptpayloadroutputlastr r rdo_says      zOpenChatKitShell.do_saycCrHr rIrrUr r rdo_resetrLzOpenChatKitShell.do_resetcCstd|jddS)NzHyperparameters: r)rrCr[r r rdo_hyperparameterssz#OpenChatKitShell.do_hyperparameterscCsdS)NTr r[r r rdo_quitrzOpenChatKitShell.do_quitr )rrrintrorVr.r/r r`rrrrKrPrZr\r]r^ __classcell__r r rFrr!s$ r!))rfrirZyaspin.spinnersrrr rr:rrrrrRrZsagemaker.multidatamodelrZsagemaker.serverlessrZsagemaker.modelrr9rVrrrtypingrrZsagemaker.predictorr robjectr rr$r,r'r(Cmdr!r r r rsf       H*