### SageMaker Stable diffusion Quick Kit - Inference 部署
   [SageMaker Stable Diffusion Quick Kit](https://github.com/aws-samples/sagemaker-stablediffusion-quick-kit) 提供了一组开箱即用的代码、配置文件，它可以帮助客户在亚马逊云上使用Amazon SageMaker , Lambda, Cloudfront快速构建Stable diffusion AI绘图服务.
   
   ![架构](https://raw.githubusercontent.com/aws-samples/sagemaker-stablediffusion-quick-kit/main/images/architecture.png)


#### 前提条件
1. 亚马逊云账号
2. 建议使用ml.g4dn.xlarge/ml.g5.xlarge

### Notebook部署步骤
1. 升级boto3, sagemaker python sdk
2. 部署AIGC推理服务
    * 配置模型参数
    * 配置异步推理
    * 部署SageMaker Endpoint 
3. 测试模型
4. 配置推理服务弹性伸缩策略(可选)
5. 清除资源


### 1. 升级boto3, sagemaker python sdk

In [None]:
!pip install --upgrade boto3 sagemaker

In [None]:
import time
import boto3
import sagemaker
account_id = boto3.client('sts').get_caller_identity().get('Account')
region_name = boto3.session.Session().region_name

sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
role = sagemaker.get_execution_role()

print(f"role: {role}")
print(f"bucket: {bucket}")

### 2. 部署AIGC推理服务

#### 2.1 配置模型参数
   * model_name: 支持 Huggingface diffusers models 结构，
       * 可以直接使用Huggingface的名字:Linaqruf/anything-v3.0
       * s3://sagemaker-us-east-1-123456789011/dreambooth/trained_models/model.tar.gz
       * 目前不支持.ckpt(single check point format),请使用转换脚本转换为diffusers格式
   * model_args:  diffuser StableDiffusionPipeline init arguments
   * framework_version: pytroch版本
   * py_version: python版本
   * model_environment: 推理环境变量

In [None]:

#model_name = 'andite/anything-v4.0' # 默认的，高品质、高细节的动漫风格
#model_name = 'Envvi/Inkpunk-Diffusion' # 温克朋克风格，提示词 nvinkpunk
#model_name = 'nousr/robo-diffusion-2-base' # 看起来很酷的机器人，提示词 nousr robot 
#model_name = 'prompthero/openjourney' # openjorney 风格,提示词 mdjrny-v4 style
#model_name = 'dreamlike-art/dreamlike-photoreal-2.0' #写实，真实风格，提示词 photo
#model_name = 'runwayml/stable-diffusion-inpainting'
#model_name = 'danbrown/RPG-v4' #RPG 角色扮演
model_name = 'runwayml/stable-diffusion-v1-5' #标准stable diffusion 1.5



#增加 SD Webui Lora 模型加载示例, 
#lora_model 请对应的LoRA模型上传到自己账号的s3桶
#lora_model = f's3://{bucket}/fakemonPokMonLORA/fakemonPokMonLORA_v10Beta.safetensors'



framework_version = '1.10'
py_version = 'py38'

model_environment = {
    'SAGEMAKER_MODEL_SERVER_TIMEOUT':'600', 
    'SAGEMAKER_MODEL_SERVER_WORKERS': '1', 
    'model_name':model_name,
    #'lora_model':lora_model, #开启LoRA使用
    's3_bucket':bucket
}

#### 2.2 创建dummy model_data 文件(真正的模型使用infernece.py进行加载), 为SageMaker Endpoint 创建 PyTorchModel 

In [None]:
!touch dummy
!tar czvf model.tar.gz dummy sagemaker-logo-small.png
assets_dir = 's3://{0}/{1}/assets/'.format(bucket, 'stablediffusion')
model_data = 's3://{0}/{1}/assets/model.tar.gz'.format(bucket, 'stablediffusion')
!aws s3 cp model.tar.gz $assets_dir
!rm -f dummy model.tar.gz

In [None]:
from sagemaker.pytorch.model import PyTorchModel

model = PyTorchModel(
    name = None,
    model_data = model_data,
    entry_point = 'inference.py',
    source_dir = "./code/",
    role = role,
    framework_version = framework_version, 
    py_version = py_version,
    env = model_environment
)

#### 2.3 配置异步推理、设置推理使用的实例类型


In [None]:
from sagemaker.async_inference import AsyncInferenceConfig
import uuid

endpoint_name = f'AIGC-Quick-Kit-{str(uuid.uuid4())}'
instance_type = 'ml.g5.xlarge'
instance_count = 1
async_config = AsyncInferenceConfig(output_path='s3://{0}/{1}/asyncinvoke/out/'.format(bucket, 'stablediffusion'))

print(f'endpoint_name: {endpoint_name}')

#### 2.4 部署SageMaker Endpoint

In [None]:
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer


async_predictor = model.deploy(
    endpoint_name = endpoint_name,
    instance_type = instance_type, 
    initial_instance_count = instance_count,
    async_inference_config = async_config,
    serializer = JSONSerializer(),
    deserializer = JSONDeserializer(),
    #wait=False
)


#### 2.5 编写异步推理调用辅助方法(适用于Notebook)
 * get_bucket_and_key, read s3 object
 * draw_image, download image from s3 and draw it in notebook
 * async_predict_fn 


In [None]:
import json
import io
from PIL import Image
import traceback
import time
from sagemaker.async_inference.waiter_config import WaiterConfig


s3_resource = boto3.resource('s3')

def get_bucket_and_key(s3uri):
    pos = s3uri.find('/', 5)
    bucket = s3uri[5 : pos]
    key = s3uri[pos + 1 : ]
    return bucket, key

def draw_image(response):
    try:
        bucket, key = get_bucket_and_key(response.output_path)
        obj = s3_resource.Object(bucket, key)
        body = obj.get()['Body'].read().decode('utf-8') 
        predictions = json.loads(body)['result']
        print(predictions)
        for prediction in predictions:
            bucket, key = get_bucket_and_key(prediction)
            obj = s3_resource.Object(bucket, key)
            bytes = obj.get()['Body'].read()
            image = Image.open(io.BytesIO(bytes))
            image.show()
    except Exception as e:
        traceback.print_exc()
        print(e)


def async_predict_fn(predictor,inputs):
    response = predictor.predict_async(inputs)
    
    print(f"Response object: {response}")
    print(f"Response output path: {response.output_path}")
    print("Start Polling to get response:")
    
    start = time.time()
    config = WaiterConfig(
        max_attempts=100, #  number of attempts
        delay=10 #  time in seconds to wait between attempts
    )

    response.get_result(config)
    draw_image(response)

    print(f"Time taken: {time.time() - start}s")

### 3. 测试
#### 3.1 txt2img 文本到图片推理

In [None]:
#AIGC Quick Kit txt2img
inputs_txt2img = {
    "prompt": "a photo of an astronaut riding a horse on mars",
    "negative_prompt":"",
    "steps":20,
    "sampler":"euler_a",
    "seed": 52362,
    "height": 512, 
    "width": 512,
    "count":2

}
start=time.time()
async_predict_fn(async_predictor,inputs_txt2img)
print(f"Time taken: {time.time() - start}s")

#### 3.3 img2img 图片到图片推理
  
 * 原始图片 :![](https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg)

In [None]:
#AIGC Quick Kit img2img
# 图片到图片推理
inputs_img2img = {
    "prompt": "A fantasy landscape, trending on artstation",
    "negative_prompt":"",
    "steps":20,
    "sampler":"euler_a",
    "seed":43768,
    "height": 512, 
    "width": 512,
    "count":2,
    "input_image":"https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"
    #"input_image":"https://img.getimg.ai/inputs/img-Rj5vsMBFWrshn7cFwneVI.png"

}

async_predict_fn(async_predictor,inputs_img2img)

#### 3.2 LoRA  测试
测试LoRA 模型

In [None]:
prompt ="pokemon,fire, a red wolf with blue eyes"


negative_prompt= "nsfw, human, 1boy, 1girl,watermark, (worst quality, low quality:1.4), ( jpeg artifacts:1.4), (depth of field, bokeh, blurry, film grain, chromatic aberration, lens flare:1.0), greyscale, monochrome, dusty sunbeams, trembling, motion lines, motion blur, emphasis lines, text, title, logo, signature,"

inputs_txt2img = {
    "prompt": prompt,
    "negative_prompt":negative_prompt,
    "steps":20,
    "sampler":"euler_a",
    "seed": 52362,
    "height": 512, 
    "width": 512,
    "count":2

}
start=time.time()
async_predict_fn(async_predictor,inputs_txt2img)
print(f"Time taken: {time.time() - start}s")


### 4. 配置推理服务弹性伸缩策略(可选)

In [None]:
# application-autoscaling client
asg_client = boto3.client("application-autoscaling")

# This is the format in which application autoscaling references the endpoint
resource_id = f"endpoint/{async_predictor.endpoint_name}/variant/AllTraffic"

# Configure Autoscaling on asynchronous endpoint down to zero instances
response = asg_client.register_scalable_target(
    ServiceNamespace="sagemaker",
    ResourceId=resource_id,
    ScalableDimension="sagemaker:variant:DesiredInstanceCount",
    MinCapacity=1,
    MaxCapacity=2,
)

response = asg_client.put_scaling_policy(
    PolicyName=f'Request-ScalingPolicy-{async_predictor.endpoint_name}',
    ServiceNamespace="sagemaker",
    ResourceId=resource_id,
    ScalableDimension="sagemaker:variant:DesiredInstanceCount",
    PolicyType="TargetTrackingScaling",
    TargetTrackingScalingPolicyConfiguration={
        "TargetValue": 2.0,
        "CustomizedMetricSpecification": {
            "MetricName": "ApproximateBacklogSizePerInstance",
            "Namespace": "AWS/SageMaker",
            "Dimensions": [{"Name": "EndpointName", "Value": async_predictor.endpoint_name}],
            "Statistic": "Average",
        },
        "ScaleInCooldown": 600, # duration until scale in begins (down to zero)
        "ScaleOutCooldown": 300 # duration between scale out attempts
    },
)

#### 通过并发推理，测试伸缩策略

In [None]:
import time
import random

start = time.time()

outputs=[]

def build_prompts_with_random_seed():
    return {
        "prompt": "a photo of an astronaut riding a horse on mars",
        "negative_prompt":"",
        "steps":50,
        "sampler":"ddim",
        "seed": random.randint(52362, 99999999),
        "height": 512, 
        "width": 512,
        "count":2

    }

# send 10 requests
for i in range(10):
    prediction = async_predictor.predict_async(build_prompts_with_random_seed())
    outputs.append(prediction)

# iterate over list of output paths and get results
results = []
for output in outputs:
    response = output.get_result(WaiterConfig(max_attempts=600))
    results.append(response)

print(f"Time taken: {time.time() - start}s")
print(results)

#### 绘制推理结果


In [None]:
for r in results:
    for item in r["result"]:
        bucket, key = get_bucket_and_key(item)
        obj = s3_resource.Object(bucket, key)
        bytes = obj.get()['Body'].read()
        image = Image.open(io.BytesIO(bytes))
        image.show()


In [None]:
response = asg_client.deregister_scalable_target(
    ServiceNamespace='sagemaker',
    ResourceId=resource_id,
    ScalableDimension='sagemaker:variant:DesiredInstanceCount'
)


### 5. 清除资源

In [None]:
async_predictor.delete_endpoint()