# 1. 初始化环境

初始化环境会做以下几件事情:

1. 获取当前 region_name/account_id/role 等信息
2. 写一个 get_bucket_and_key 函数用来解析 bucket 和 key

In [None]:
import sagemaker
import json
import io
from PIL import Image
import boto3

# 获取 Sagemaker 执行的环境,包括它的默认 buck
sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
role = sagemaker.get_execution_role()

account_id = boto3.client('sts').get_caller_identity().get('Account')
region_name = boto3.session.Session().region_name

s3_resource = boto3.resource('s3')

def get_bucket_and_key(s3uri):
 pos = s3uri.find('/', 5)
 bucket = s3uri[5 : pos]
 key = s3uri[pos + 1 : ]
 return bucket, key

print("account_id:", account_id)
print("region_name:", region_name)

# 2. 定义模型

1. 这里的模型指的是 Sagemaker Inference Model,在后面用于创建 Endpoint
2. 这里会实例化一个 model,主要设置了两个参数:
 1. model_data:用于推理的各种 stable diffusion 模型,即之前打包上传的 model.tar.gz 文件(参考 README.md)
 2. image_uri:stable-diffusion-webui 的代码,之前 build push 到 ECR 的用于 Inference 的容器镜像(参考 README.md)
3. 此时只会实例化一个 model,不会在 Sagemaker Inference Model 中创建模型(在 deploy 的时候才会创建)

In [None]:
from sagemaker.model import Model
from sagemaker.predictor import Predictor

# 模型的环境变量
model_environment = {
 'SAGEMAKER_MODEL_SERVER_TIMEOUT': '1200',
 'ckpt': '/opt/ml/model/Stable-diffusion/YOUR_MODEL.safetensors' # [Need to Define] 默认模型,这里改成你打包的模型里的某个文件名,只需修改模型名称,不需修改路径
}

# 实例化一个 Sagemaker SDK 的 Model,在后面 deploy
model = Model(
 name="YOUR_MODEL", # [Need to Define] 这里改成你想定义的模型名称
 model_data="s3://YOUR_BUCKET/YOUR_model.tar.gz", # [Need to Define] 这里改成你上传的 tarball 的 S3 地址
 role=sagemaker.get_execution_role(),
 image_uri=f'YOUR_ECR_ADDRESS', # [Need to Define] 这里改成你 build push 的 ECR 的镜像地址
 env=model_environment,
 predictor_cls=Predictor
)

# 3. 创建部署 Endpoint

## 3.1 配置异步推理 Config

In [None]:
from sagemaker.async_inference import AsyncInferenceConfig # 用于定义异步推理配置
outputs = "s3://YOUR_BUCKET/outputs/" # [Need to Define] 这里改成异步推理结果存放的 S3 地址 
async_config = AsyncInferenceConfig(output_path=outputs)

# AsyncInferenceConfig 还可以配置最大并发调用(max_concurrent_invocations_per_instance)以及使用 SNS 进行推理完成的通知,详细可参考 https://sagemaker.readthedocs.io/en/stable/overview.html

## 3.2 部署异步推理节点

In [None]:
# 部署 Sagemaker Inference Endpoint
predictor = model.deploy(
 endpoint_name=f"YOUR_ENDPOINT_NAME", # [Need to Define] endpoint_name 改成你的名称
 instance_type='ml.g5.2xlarge', # [Need to Define] Sagemaker Inference Endpoint 的机型,初次使用可选 ml.g5.2xlarge,后续根据需求可变更
 initial_instance_count=1, # [Need to Define] Sagemaker Inference Endpoint 机器数量,初次使用推荐为 1,后续可配置 autoscaling
 async_inference_config=async_config
)

# 执行会有一段时间,此时可在 Sagemaker 的页面查看 Inference --> Endpoints 查看部署进度
# 也可以点开 Endpoints 的日志确认部署正常
# 部署正常完成后,predictor 将会是一个 AsyncPredictor 对象

## 3.3 异步推理节点部署完成

上面代码 model.deploy 的 predictor 结果是一个 AsyncPredictor 对象,可以使用 predict_async 方法进行异步推理

# 4. 测试 Endpoint

## 4.1 使用标准的 stable-diffusion-webui api 调用 Sagemaker Inference Endpoint 进行测试

In [None]:
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

predictor.serializer = JSONSerializer()
predictor.deserializer = JSONDeserializer()

# inputs 是调用的 payload,和开源 stable-diffusion-webui 一致
inputs = {
 'task': 'text-to-image', 
 'model': 'rpg.safetensors',
 'txt2img_payload': {
 'enable_hr': False, 
 'denoising_strength': 0.7, 
 'firstphase_width': 0, 
 'firstphase_height': 0, 
 'prompt': 'beautiful (swedish:1.3) , Full body armor made of demon (demon armor:1.3), 1 man, (insanely detailed:1.5), light particle, (mist on the floor:1.3), ((solo)), (highest quality, Alessandro Casagrande, Greg Rutkowski, Sally Mann, concept art, 4k), (colorful), (high sharpness), ((detailed pupils)), red eyes, ((painting:1.1)), (digital painting:1.1), detailed face and eyes,Masterpiece, best quality, highly detailed photo:1, 8k, detailed face,photorealistic, dark and gloomy, fog, thunder background, By jeremy mann, by sandra chevrier, by maciej kuciara,((male demon)), sharp, ((perfect body)), realistic, real shadow, 3d, ((squatting warrior)), ((dark and gloomy castle background:1.3)), thunder sky, thunder, (by Michelangelo), king pose:1,1, side view:1,1', 
 'negative_prompt': '(bad art, low detail, pencil drawing:1.6), (plain background, grainy, low quality, mutated hands and fingers:1.5), (watermark, thin lines:1.3), (deformed, signature:1.2), (big nipples, blurry, ugly, bad anatomy, extra limbs, undersaturated, low resolution), disfigured, deformations, out of frame, amputee, bad proportions, extra limb, missing limbs, distortion, floating limbs, out of frame, poorly drawn face, poorly drawn hands, text, malformed, error, missing fingers, cropped, jpeg artifacts, teeth, unsharp', 
 'styles': ['None', 'None'], 
 'seed': -1.0, 
 'subseed': -1.0, 
 'subseed_strength': 0, 
 'seed_resize_from_h': 0, 
 'seed_resize_from_w': 0, 
 'sampler_index': 'DPM++ 2S a Karras', 
 'batch_size': 1, 
 'n_iter': 1, 
 'steps': 35, 
 'cfg_scale': 7, 
 'width': 512, 
 'height': 512, 
 'restore_faces': True, 
 'tiling': False, 
 'eta': 1, 
 's_churn': 0, 
 's_tmax': None, 
 's_tmin': 0, 
 's_noise': 1, 
 'override_settings': {}, 
 'script_args': [0, False, False, False, "", 1, "", 0, "", True, False, False]}
}

# 调用 Sagemaker Inference Endpoint 异步推理
prediction = predictor.predict_async(inputs)

from sagemaker.async_inference.waiter_config import WaiterConfig
print(f"Response output path: {prediction.output_path}") # 获取异步推理结果存放的 S3 地址
print("Start Polling to get response:") # 开始轮询是否推理完成有结果了

import time

start = time.time()

config = WaiterConfig(
 max_attempts=100, # 重试次数
 delay=10 # 两次重试之间时间间隔
 )

prediction.get_result(config)

print(f"Time taken: {time.time() - start}s") # 推理花费时间
print("Inference finished") # 推理完成已有结果

## 4.2 展示推理结果的图片

In [None]:
import traceback
from PIL import Image
import base64
from datetime import datetime

try:
 bucket, key = get_bucket_and_key(prediction.output_path) # 获取异步推理结果
 obj = s3_resource.Object(bucket, key)
 body = obj.get()['Body'].read().decode('utf-8') 
 # 异步推理结果是一个 json 文件,其中 images 字段是一个 list,表示所有图片
 # images 中的每个 item 都是一张图片,用 base64 编码
 for image in json.loads(body)['images']:
 image = Image.open(io.BytesIO(base64.b64decode(image)))
 display(image) # 用 Notebook 原生功能展示图片
except Exception as e:
 traceback.print_exc()
 print(e)

# 至此,Sagemaker Inference Endpoint 部署过程已全部完成,后续的更新和删除参考下面代码


---

# 5. 使用已有的 Inference Endpoint 进行异步推理

## 5.1 使用 Sagemaker SDK 进行推理

使用 Sagemaker SDK 进行推理比较直观:
1. 实例化一个 AsyncPredictor 对象
2. 调用 AsyncPredictor 的 predict_async 方法进行异步推理
3. 注意,在调用 predict_async 的时候需要指定一个 S3 地址作为 inputs payload 存放的地址

In [None]:
from sagemaker.predictor_async import AsyncPredictor

# 实例化一个 AsyncPredictor 对象
async_predictor = AsyncPredictor(Predictor(endpoint_name="YOUR_ENDPOINT")) # [Need to Define] endpoint_name 改成你创建的名称

#=============== 以下代码全部拷贝自 4.1 & 4.2 章节 ===============#

async_predictor.serializer = JSONSerializer()
async_predictor.deserializer = JSONDeserializer()

# inputs 是调用的 payload,和开源 stable-diffusion-webui 一致
inputs = {
 'task': 'text-to-image', 
 'model': 'rpg.safetensors',
 'txt2img_payload': {
 'enable_hr': False, 
 'denoising_strength': 0.7, 
 'firstphase_width': 0, 
 'firstphase_height': 0, 
 'prompt': 'beautiful (swedish:1.3) , Full body armor made of demon (demon armor:1.3), 1 man, (insanely detailed:1.5), light particle, (mist on the floor:1.3), ((solo)), (highest quality, Alessandro Casagrande, Greg Rutkowski, Sally Mann, concept art, 4k), (colorful), (high sharpness), ((detailed pupils)), red eyes, ((painting:1.1)), (digital painting:1.1), detailed face and eyes,Masterpiece, best quality, highly detailed photo:1, 8k, detailed face,photorealistic, dark and gloomy, fog, thunder background, By jeremy mann, by sandra chevrier, by maciej kuciara,((male demon)), sharp, ((perfect body)), realistic, real shadow, 3d, ((squatting warrior)), ((dark and gloomy castle background:1.3)), thunder sky, thunder, (by Michelangelo), king pose:1,1, side view:1,1', 
 'negative_prompt': '(bad art, low detail, pencil drawing:1.6), (plain background, grainy, low quality, mutated hands and fingers:1.5), (watermark, thin lines:1.3), (deformed, signature:1.2), (big nipples, blurry, ugly, bad anatomy, extra limbs, undersaturated, low resolution), disfigured, deformations, out of frame, amputee, bad proportions, extra limb, missing limbs, distortion, floating limbs, out of frame, poorly drawn face, poorly drawn hands, text, malformed, error, missing fingers, cropped, jpeg artifacts, teeth, unsharp', 
 'styles': ['None', 'None'], 
 'seed': -1.0, 
 'subseed': -1.0, 
 'subseed_strength': 0, 
 'seed_resize_from_h': 0, 
 'seed_resize_from_w': 0, 
 'sampler_index': 'DPM++ 2S a Karras', 
 'batch_size': 1, 
 'n_iter': 1, 
 'steps': 35, 
 'cfg_scale': 7, 
 'width': 512, 
 'height': 512, 
 'restore_faces': True, 
 'tiling': False, 
 'eta': 1, 
 's_churn': 0, 
 's_tmax': None, 
 's_tmin': 0, 
 's_noise': 1, 
 'override_settings': {}, 
 'script_args': [0, False, False, False, "", 1, "", 0, "", True, False, False]}
}

# 调用 Sagemaker Inference Endpoint 异步推理
prediction = async_predictor.predict_async(inputs, input_path="s3://YOUR_BUCKET/") # [Need to Define]

from sagemaker.async_inference.waiter_config import WaiterConfig
print(f"Response output path: {prediction.output_path}") # 获取异步推理结果存放的 S3 地址
print("Start Polling to get response:") # 开始轮询是否推理完成有结果了

import time

start = time.time()

config = WaiterConfig(
 max_attempts=100, # 重试次数
 delay=10 # 两次重试之间时间间隔
 )

prediction.get_result(config)

print(f"Time taken: {time.time() - start}s") # 推理花费时间
print("Inference finished") # 推理完成已有结果

try:
 bucket, key = get_bucket_and_key(prediction.output_path) # 获取异步推理结果
 obj = s3_resource.Object(bucket, key)
 body = obj.get()['Body'].read().decode('utf-8') 
 # 异步推理结果是一个 json 文件,其中 images 字段是一个 list,表示所有图片
 # images 中的每个 item 都是一张图片,用 base64 编码
 for image in json.loads(body)['images']:
 image = Image.open(io.BytesIO(base64.b64decode(image)))
 display(image) # 用 Notebook 原生功能展示图片
except Exception as e:
 traceback.print_exc()
 print(e)

## 5.2 用 Boto3 调用异步推理节点

在进行 stable-diffusion Serverless API 部署的过程中,如果 API 只需要进行异步推理而不涉及 Sagemaker 其他操作,推荐使用 Boto3 来实现:
1. 代码依然很直观
2. 不需要安装 Sagemaker SDK 依赖,通常安装 Sagemaker SDK 依赖后的代码包大小会超过限制

In [None]:
# 实例化 sagemaker-runtime
sagemaker_client = boto3.client("sagemaker-runtime")
payload = {
 'task': 'text-to-image', 
 'model': 'rpg.safetensors',
 'txt2img_payload': {
 'enable_hr': False, 
 'denoising_strength': 0.7, 
 'firstphase_width': 0, 
 'firstphase_height': 0, 
 'prompt': 'beautiful (swedish:1.3) , Full body armor made of demon (demon armor:1.3), 1 man, (insanely detailed:1.5), light particle, (mist on the floor:1.3), ((solo)), (highest quality, Alessandro Casagrande, Greg Rutkowski, Sally Mann, concept art, 4k), (colorful), (high sharpness), ((detailed pupils)), red eyes, ((painting:1.1)), (digital painting:1.1), detailed face and eyes,Masterpiece, best quality, highly detailed photo:1, 8k, detailed face,photorealistic, dark and gloomy, fog, thunder background, By jeremy mann, by sandra chevrier, by maciej kuciara,((male demon)), sharp, ((perfect body)), realistic, real shadow, 3d, ((squatting warrior)), ((dark and gloomy castle background:1.3)), thunder sky, thunder, (by Michelangelo), king pose:1,1, side view:1,1', 
 'negative_prompt': '(bad art, low detail, pencil drawing:1.6), (plain background, grainy, low quality, mutated hands and fingers:1.5), (watermark, thin lines:1.3), (deformed, signature:1.2), (big nipples, blurry, ugly, bad anatomy, extra limbs, undersaturated, low resolution), disfigured, deformations, out of frame, amputee, bad proportions, extra limb, missing limbs, distortion, floating limbs, out of frame, poorly drawn face, poorly drawn hands, text, malformed, error, missing fingers, cropped, jpeg artifacts, teeth, unsharp', 
 'styles': ['None', 'None'], 
 'seed': -1.0, 
 'subseed': -1.0, 
 'subseed_strength': 0, 
 'seed_resize_from_h': 0, 
 'seed_resize_from_w': 0, 
 'sampler_index': 'DPM++ 2S a Karras', 
 'batch_size': 1, 
 'n_iter': 1, 
 'steps': 35, 
 'cfg_scale': 7, 
 'width': 512, 
 'height': 512, 
 'restore_faces': True, 
 'tiling': False, 
 'eta': 1, 
 's_churn': 0, 
 's_tmax': None, 
 's_tmin': 0, 
 's_noise': 1, 
 'override_settings': {}, 
 'script_args': [0, False, False, False, "", 1, "", 0, "", True, False, False]}
}

import uuid
inference_id = str(uuid.uuid4()) # 生成一个唯一的 id 来识别调用

S3_InputBucket = "YOUR_BUCKET" # [Need to Define]
# 把上面的 payload 写到 S3 的一个位置,作为参数传给后面的 Sagemaker invoke_endpoint_async 调用方法
s3_resource = boto3.resource("s3")
s3_object = s3_resource.Object(S3_InputBucket, f"inputs/{inference_id}")
s3_object.put(Body=bytes(json.dumps(payload).encode('UTF-8')))
input_location = f"s3://{S3_InputBucket}/inputs/{inference_id}"

# 使用 Boto3 调用 Sagemaker 异步推理
response = sagemaker_client.invoke_endpoint_async(
 EndpointName="YOUR_ENDPOINT", # [Need to Define]
 ContentType='application/json',
 Accept="application/json;jpeg",
 InputLocation=input_location
)

# 异步推理结果存放的 S3 地址
output_location = response["OutputLocation"]
print(output_location)

In [None]:
# 此时异步推理可能还未完成
# 可以重复运行当前 Cell 尝试异步推理结果是否已生成
from botocore.exceptions import ClientError

bucket, key = get_bucket_and_key(output_location)
try:
 bucket, key = get_bucket_and_key(output_location) # 获取异步推理结果
 obj = s3_resource.Object(bucket, key)
 body = obj.get()['Body'].read().decode('utf-8') 
 # 异步推理结果是一个 json 文件,其中 images 字段是一个 list,表示所有图片
 # images 中的每个 item 都是一张图片,用 base64 编码
 for image in json.loads(body)['images']:
 image = Image.open(io.BytesIO(base64.b64decode(image)))
 display(image) # 用 Notebook 原生功能展示图片
except ClientError as e:
 if e.response['Error']['Code'] == 'NoSuchKey':
 print("Results not generated yet.")
 else:
 traceback.print_exc()
 print(e)

# 6. 更新 Inference Endpoint

In [30]:
predictor = Predictor(endpoint_name="YOUR_ENDPOINT") # [Need to Define] endpoint_name 改成你创建的名称
predictor.update_endpoint(instance_type="ml.g4dn.2xlarge", initial_instance_count=1)

# 执行可能需要一些时间,此时可以去 Sagemaker 页面 Inference --> Endpoints 处查看更新进度

----------------------!

# 7. 删除 Inference Endpoint

In [31]:
#model = Model(name="YOUR_MODEL", # [Need to Define] 这里改成你想定义的模型名称)
#model.delete_model()

predictor = Predictor(endpoint_name="YOUR_ENDPOINT") # [Need to Define] endpoint_name 改成你创建的名称
predictor.delete_endpoint()