## SageMaker Stable diffusion Quick Kit 
 [SageMaker Stable Diffusion Quick Kit](https://github.com/aws-samples/sagemaker-stablediffusion-quick-kit) is an asset to help our customers launch stable diffusion models services on Amazon Sagemaker or Amazon EKS.
 
 ![architecture](https://raw.githubusercontent.com/aws-samples/sagemaker-stablediffusion-quick-kit/main/images/architecture.png)

#### Prerequisites
1. Amazon Web Service account 
2. ml.g4dn.xlarge or ml.g5xlarge perfer to used


#### Notebook Step
1. Upgrage boto3, sagemaker python sdk 
2. Deploy AIGC inference service with SageMaker Endpoint service 
 * config model parameter 
 * config async inference
 * deploy SageMaker Endpoint
3. Test inference
4. SageMaker endpoint AutoScaling Config(option)
5. Clear resource

### 1. Upgrage boto3, sagemaker python sdk 

In [None]:
!pip install --upgrade boto3 sagemaker

In [None]:
import boto3
import sagemaker
account_id = boto3.client('sts').get_caller_identity().get('Account')
region_name = boto3.session.Session().region_name

sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
role = sagemaker.get_execution_role()

print(f'execution role: {role}')
print(f'default bucket: {bucket}')

### 2. Deploy AIGC inference service with SageMaker Endpoint service 

#### 2.1 setup model name and arguments 
 * model_name: Huggingface diffusers models (not support single check point format)
 * model_args: diffuser StableDiffusionPipeline init arguments
 * framework_version: pytroch version
 * py_version: python: 3.8 
 * model_environment: inference contianer env 

In [None]:

framework_version = '1.10'
py_version = 'py38'

model_environment = {
 'SAGEMAKER_MODEL_SERVER_TIMEOUT':'600', 
 'SAGEMAKER_MODEL_SERVER_WORKERS': '1', 
 'model_name':'Linaqruf/anything-v3.0', #huggingface model name 
 's3_bucket': bucket
}

#### 2.2 Create fake dummy model_data file, and create PyTorchModel for SageMaker Endpoint.

In [None]:
!touch dummy
!tar czvf model.tar.gz dummy sagemaker-logo-small.png
assets_dir = 's3://{0}/{1}/assets/'.format(bucket, 'stablediffusion')
model_data = 's3://{0}/{1}/assets/model.tar.gz'.format(bucket, 'stablediffusion')
!aws s3 cp model.tar.gz $assets_dir
!rm -f dummy model.tar.gz

In [None]:
from sagemaker.pytorch.model import PyTorchModel

model = PyTorchModel(
 name = None,
 model_data = model_data,
 entry_point = 'inference.py',
 source_dir = "./code/",
 role = role,
 framework_version = framework_version, 
 py_version = py_version,
 env = model_environment
)

#### 2.3 Config async inference output , setup config instance_type and name


In [None]:
from sagemaker.async_inference import AsyncInferenceConfig
import uuid

endpoint_name = f'AIGC-Quick-Kit-{str(uuid.uuid4())}'
instance_type = 'ml.g4dn.xlarge'
instance_count = 1
async_config = AsyncInferenceConfig(output_path='s3://{0}/{1}/asyncinvoke/out/'.format(bucket, 'stablediffusion'))

print(f'endpoint_name: {endpoint_name}')

#### 2.4 Deploy SageMaker Endpoint

In [None]:
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer


async_predictor = model.deploy(
 endpoint_name = endpoint_name,
 instance_type = instance_type, 
 initial_instance_count = instance_count,
 async_inference_config = async_config,
 serializer = JSONSerializer(),
 deserializer = JSONDeserializer()
)


#### 2.5 Create async inference invoke help function 
 * get_bucket_and_key, read s3 object
 * draw_image, download image from s3 and draw it in notebook
 * async_predict_fn 


In [None]:
import json
import io
from PIL import Image
import traceback
import time
from sagemaker.async_inference.waiter_config import WaiterConfig


s3_resource = boto3.resource('s3')

def get_bucket_and_key(s3uri):
 pos = s3uri.find('/', 5)
 bucket = s3uri[5 : pos]
 key = s3uri[pos + 1 : ]
 return bucket, key

def draw_image(response):
 try:
 bucket, key = get_bucket_and_key(response.output_path)
 obj = s3_resource.Object(bucket, key)
 body = obj.get()['Body'].read().decode('utf-8') 
 predictions = json.loads(body)['result']
 print(predictions)
 for prediction in predictions:
 bucket, key = get_bucket_and_key(prediction)
 obj = s3_resource.Object(bucket, key)
 bytes = obj.get()['Body'].read()
 image = Image.open(io.BytesIO(bytes))
 image.show()
 except Exception as e:
 traceback.print_exc()
 print(e)


def async_predict_fn(predictor,inputs):
 response = predictor.predict_async(inputs)
 
 print(f"Response object: {response}")
 print(f"Response output path: {response.output_path}")
 print("Start Polling to get response:")
 
 start = time.time()
 config = WaiterConfig(
 max_attempts=100, # number of attempts
 delay=10 # time in seconds to wait between attempts
 )

 response.get_result(config)
 draw_image(response)

 print(f"Time taken: {time.time() - start}s")

### 3. Testing
 3.1 txt2img inference

In [None]:
#AIGC Quick Kit txt2img
inputs_txt2img = {
 "prompt": "a photo of an astronaut riding a horse on mars",
 "negative_prompt":"",
 "steps":20,
 "sampler":"euler_a",
 "seed": 31252362,
 "height": 512, 
 "width": 512,
 "count":2

}


async_predict_fn(async_predictor,inputs_txt2img)



 3.2 img2img inference
 
 * origin image :![](https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg)

In [None]:
#AIGC Quick Kit img2img

inputs_img2img = {
 "prompt": "A fantasy landscape, trending on artstation",
 "negative_prompt":"",
 "steps":20,
 "sampler":"euler_a",
 "seed":43768,
 "height": 512, 
 "width": 512,
 "count":2,
 "input_image":"https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"

}

async_predict_fn(async_predictor,inputs_img2img)

### 4. SageMaker endpoint AutoScaling Config(Option)

In [None]:
# application-autoscaling client
asg_client = boto3.client("application-autoscaling")

# This is the format in which application autoscaling references the endpoint
resource_id = f"endpoint/{async_predictor.endpoint_name}/variant/AllTraffic"

# Configure Autoscaling on asynchronous endpoint down to zero instances
response = asg_client.register_scalable_target(
 ServiceNamespace="sagemaker",
 ResourceId=resource_id,
 ScalableDimension="sagemaker:variant:DesiredInstanceCount",
 MinCapacity=1,
 MaxCapacity=2,
)

response = asg_client.put_scaling_policy(
 PolicyName=f'Request-ScalingPolicy-{async_predictor.endpoint_name}',
 ServiceNamespace="sagemaker",
 ResourceId=resource_id,
 ScalableDimension="sagemaker:variant:DesiredInstanceCount",
 PolicyType="TargetTrackingScaling",
 TargetTrackingScalingPolicyConfiguration={
 "TargetValue": 2.0,
 "CustomizedMetricSpecification": {
 "MetricName": "ApproximateBacklogSizePerInstance",
 "Namespace": "AWS/SageMaker",
 "Dimensions": [{"Name": "EndpointName", "Value": async_predictor.endpoint_name}],
 "Statistic": "Average",
 },
 "ScaleInCooldown": 600, # duration until scale in begins (down to zero)
 "ScaleOutCooldown": 300 # duration between scale out attempts
 },
)

In [None]:
import time

start = time.time()

outputs=[]

inputs_txt2img = {
 "prompt": "a photo of an astronaut riding a horse on mars",
 "negative_prompt":"",
 "steps":20,
 "sampler":"euler_a",
 "seed": 52362,
 "height": 512, 
 "width": 512,
 "count":2

}

# send 10 requests
for i in range(10):
 prediction = async_predictor.predict_async(inputs_txt2img)
 outputs.append(prediction)

# iterate over list of output paths and get results
results = []
for output in outputs:
 response = output.get_result(WaiterConfig(max_attempts=600))
 results.append(response)

print(f"Time taken: {time.time() - start}s")
print(results)

### draw result image

In [None]:
for r in results:
 for item in r["result"]:
 bucket, key = get_bucket_and_key(item)
 obj = s3_resource.Object(bucket, key)
 bytes = obj.get()['Body'].read()
 image = Image.open(io.BytesIO(bytes))
 image.show()

In [None]:
response = asg_client.deregister_scalable_target(
 ServiceNamespace='sagemaker',
 ResourceId=resource_id,
 ScalableDimension='sagemaker:variant:DesiredInstanceCount'
)


### 5. Clear resource 

In [None]:
async_predictor.delete_endpoint()

# 