library(readr) library(dplyr) library(ggplot2) library(forcats) input_dir <- "/opt/ml/processing/input/" output_dir <- "/opt/ml/processing/output/" #dir.create(output_dir, showWarnings = FALSE) filename <- Sys.glob(paste(input_dir, "*.csv", sep="")) abalone <- read_csv(filename) abalone <- abalone %>% mutate(female = as.integer(ifelse(sex == 'F', 1, 0)), male = as.integer(ifelse(sex == 'M', 1, 0)), infant = as.integer(ifelse(sex == 'I', 1, 0))) %>% select(-sex) abalone <- abalone %>% select(rings:infant, length:shell_weight) abalone_train <- abalone %>% sample_frac(size = 0.7) abalone <- anti_join(abalone, abalone_train) abalone_test <- abalone %>% sample_frac(size = 0.5) abalone_valid <- anti_join(abalone, abalone_test) write_csv(abalone_train, paste0(output_dir,'train/abalone_train.csv')) write_csv(abalone_valid, paste0(output_dir,'valid/abalone_valid.csv')) write_csv(abalone_test, paste0(output_dir,'test/abalone_test.csv'))