--- title: "01_SageMakerProcessing" output: rmarkdown::github_document knit: (function(inputFile, encoding) { rmarkdown::render(inputFile, encoding = encoding, output_dir = "rendered") }) --- Clear the workspace ```{r} rm(list = ls()) ``` ## SageMaker Processing ## Reinstall numpy to workaround conflict between reticulate and RStudio v4.x due to BLAS library dependency ## The reinstall process takes 5 minutes. ## https://github.com/rstudio/reticulate/issues/1257 ```{bash} python_path=$(which python) echo $python_path sudo --set-home $python_path -m pip install --no-user --force-reinstall --no-binary numpy numpy ``` ## Imports ```{r} suppressWarnings(library(reticulate)) path_to_python <- system("which python", intern = TRUE) use_python(path_to_python) sagemaker <- import('sagemaker') ``` ```{r} role = sagemaker$get_execution_role() session = sagemaker$Session() s3_output = session$default_bucket() s3_prefix = "R-in-Processing" account_id <- session$account_id() region <- session$boto_region_name ``` ```{r, results='hide'} container_uri <- paste(account_id, "dkr.ecr", region, "amazonaws.com/sagemaker-r-processing:1.0", sep=".") print(container_uri) ``` ```{r} processor <- sagemaker$processing$ScriptProcessor(image_uri = container_uri, command=list("Rscript"), role = role, instance_count=1L, instance_type="ml.c5.xlarge") processor$run( code="/home/sagemaker-user/rstudio-on-sagemaker-workshop/02_AdvancedSageMaker/preprocessing.R", job_name=paste("r-processing", as.integer(as.numeric(Sys.time())), sep="-")) ```