# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from __future__ import absolute_import import os import pandas as pd from sklearn.externals import joblib from sklearn.linear_model import LogisticRegression if __name__ == "__main__": training_data_directory = "/opt/ml/input/data/train" train_features_data = os.path.join(training_data_directory, "train_features.csv") train_labels_data = os.path.join(training_data_directory, "train_labels.csv") X_train = pd.read_csv(train_features_data, header=None) y_train = pd.read_csv(train_labels_data, header=None) model = LogisticRegression(class_weight="balanced", solver="lbfgs") model.fit(X_train, y_train) model_output_directory = os.path.join("/opt/ml/model", "model.joblib") joblib.dump(model, model_output_directory)