{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Import libraries" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import boto3\n", "import json\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Installing file system interface for s3" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install s3fs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set up the bucket name and key by entering the\n", "bucket_name=*enter your bucket name*,\n", "and *in_key_name=prefix/path/file*" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bucket_name=''\n", "in_key_name='//'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Reading the file content from s3" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "s3 = boto3.resource('s3')\n", "obj = s3.Object(bucket_name,in_key_name)\n", "text=obj.get()['Body'].read().decode('utf-8')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Calling Comprehend API detect_sentiment for sentiment analysis" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "client = boto3.client('comprehend')\n", "lst=[]\n", "for line in text.splitlines():\n", " line_dict=json.loads(line)\n", " line_dict['Sentiment']=client.detect_sentiment(Text=line_dict['text'],LanguageCode='en')['Sentiment']\n", " lst.append(json.dumps(line_dict))\n", "joined_lines='\\n'.join(lst)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "slack_df = pd.read_json(joined_lines,lines=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "slack_df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "slack_df[['text','Sentiment']]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "slack_df[['text','Sentiment']].groupby(['Sentiment']).agg(['count'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "slack_df[['text','Sentiment']].groupby(['Sentiment']).agg(['count']).plot(kind='pie',subplots=True)" ] } ], "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "Python 3 (Data Science)", "language": "python", "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }