# How to use the Cohort Modeler Data Generator

This notebook is currently the mechanism on how to generate data for the Cohort Modeler Sample Dataset. This will not be how we generate the data long term as we are working with the Neptune team to get sample data set uploaded that lowers the burden on customers. 

To use this notebook, run through ever cell. **The only modification you currently need to make is in the S3 upload cell you will need to place your S3 bucket name as a place to upload the CSVs generated through this process and ensure you have the required IAM credentials to do so.**

## Establish libraries

Import libraries and seed any random functions (to make this re-creatable)

In [None]:
!pip install faker
!pip install uuid
!pip install boto3

In [None]:
import csv
import random
from faker import Faker
import numpy as np
import uuid

myseed = 1234

Faker.seed(myseed)
random.seed(myseed)
np.random.seed(myseed)

### Setting Player Cohort Size
userSize = 1000
campaignSize = 10


## Create fake users

In [None]:
usersList = []
for x in range(userSize):
 newUser = {}
 newUser['~id'] = str(uuid.uuid4()) 
 newUser['~label'] = 'player'
 newUser['playerId:String'] = Faker().unique.user_name()
 newUser['status:String'] = 'Active' if random.random() < 0.98 else 'Inactive'
 newUser['joinedDate:Date'] = Faker().date_between(start_date='-5y')
 newUser['lastPlayed:Date'] = Faker().date_between(start_date=newUser['joinedDate:Date'])
 newUser['ea_reputation:Int'] = int(np.random.normal(0, 25))
 newUser['ea_altruism:Int'] = int(abs(np.random.normal(0, 10)))
 newUser['ea_duty:Int'] = int(abs(np.random.normal(0, 3)))
 newUser['ea_mischief:Int'] = int(np.random.normal(0, 1.5))
 newUser['ea_malice:Int'] = int(abs(np.random.normal(0, 1)))
 newUser['ea_atrisk:Int'] = int(abs(np.random.normal(0, 1)))
 newUser['stat_totalSkinPurchases:Int'] = int(abs(np.random.normal(15,5)))
 newUser['stat_totalCurrencyPurchases:Int'] = int(abs(np.random.normal(1000,200))) #Add more non toxicity realated options
 newUser['stat_idleMinutes:Int'] = int(abs(np.random.normal(360,120)))
 newUser['stat_marketingEmailClickThroughs:Int'] = int(abs(np.random.normal(15,4)))
 newUser['stat_lastChatTimestamp:Int'] = int(abs(np.random.normal(1621279571,1296000)))
 newUser['stat_lastGameSession:Int'] = int(abs(np.random.normal(1621279571,1296000)))
 newUser['stat_lastGameSessionLength:Int'] = int(abs(np.random.normal(180,60)))
 newUser['stat_longestGameSessionLength:Int'] = int(abs(np.random.normal(480,120)))
 newUser['stat_shortestGameSessionLength:Int'] = int(abs(np.random.normal(10,5)))
 newUser['stat_medianGameSessionLength:Int'] = int(abs(np.random.normal(60,10)))
 
 usersList.append(newUser)
 
print(usersList[0])

## Create fake campaigns

In [None]:
campaignList = []
emailOpenedCampaign = []
emailSentCampaign = []

for x in range(campaignSize):
 newCampaign = {}
 newCampaign['~id'] = campaign = str(uuid.uuid4())
 newCampaign['~label'] = 'campaign'
 newCampaign['name:String'] = Faker().text(max_nb_chars=20)
 newCampaign['stat_totalEmailOpened:Int'] = emailOpened =int(abs(np.random.normal(25,1)))
 newCampaign['stat_messagesSent:Int'] = emailSent =int(abs(np.random.normal(100,0)))
 newCampaign['stat_messagesDelivered:Int'] = emailDelivered =int(abs(np.random.normal(60,2)))
 newCampaign['stat_dailyActive:Int'] = int(abs(np.random.normal(60,5)))
 newCampaign['stat_newPlayers:Int'] = int(abs(np.random.normal(5,1)))
 
 emailOpenedCampaign.append(emailOpened)
 emailSentCampaign.append(emailSent)
 campaignList.append(newCampaign)
 

## Create player actions

In [None]:
actionTypes = ['action_chat',
'action_sharepii',
'action_partyjoin',
'action_randomheal',
'action_grief',
'action_badname',
'action_harass',
'action_stalk',
'action_badlanguage',
'action_endorse',
'action_report',
'action_badimage']
 

actionList = []

for action in actionTypes:
 newAction = {}
 newAction['~id'] = action
 newAction['~label'] = 'action'
 actionList.append(newAction)
 
print(actionList)

## Create Player Edges to Actions (engagedIn)

Likely need to change the statistics based on bad activities... right now it the following code equally randomizes between good and bad actions.

In [None]:
engageList = []

for x in range(userSize):
 numEngage = abs(int(np.random.normal(2, 3)))
 numEngage = 0 if numEngage < 0 else numEngage
 uniqueEngage = []
 for y in range(numEngage):
 newEngage = {}
 newEngage['~from'] = usersList[x]['~id']
 randEngage = random.choice(actionList)['~id']
 while(True):
 if randEngage not in uniqueEngage:
 uniqueEngage.append(randEngage)
 break
 else:
 randEngage = random.choice(actionList)['~id']
 newEngage['~to'] = randEngage
 newEngage['~label'] = 'EngagedIn'
 newEngage['iterations:Int'] = abs(int(np.random.normal(800, 2000)))
 engageList.append(newEngage)

## Create player interactions

In [None]:
interList = []

for x in range(userSize):
 numInter = int(np.random.normal(20, 8))
 numInter = 0 if numInter < 0 else numInter
 uniqueFriends = []
 uniqueFriends.append(usersList[x]['~id'])
 for y in range(numInter):
 newInter = {}
 newInter['~from'] = usersList[x]['~id']
 randUser = random.choice(usersList)['~id']
 while(True):
 if randUser not in uniqueFriends:
 uniqueFriends.append(randUser)
 break
 else:
 randUser = random.choice(usersList)['~id']
 newInter['~to'] = randUser
 newInter['~label'] = 'Interactions'
 newInter['action_chat:Int'] = abs(int(np.random.normal(2000, 2000)))
 newInter['action_sharepii:Int'] = abs(int(np.random.normal(1, 1)))
 newInter['action_partyjoin:Int'] = abs(int(np.random.normal(4, 5)))
 newInter['action_randomheal:Int'] = abs(int(np.random.normal(0.4, 1.4)))
 newInter['action_endorse:Int'] = abs(int(np.random.normal(3, 6)))
 newInter['action_report:Int'] = abs(int(np.random.normal(0.2, 0.5)))
 interList.append(newInter)

## Create campaigns interactions

In [None]:
campaignInteractionList = []
campaignBidirectionalInteractionList = []
campUserInteractionList = []
campBidirectionalList = []

for x in range(0,campaignSize):
 campUserInteractionList.append(random.sample(range(0, len(usersList)), emailSentCampaign[x]))

for x in range(0, len(emailOpenedCampaign)):
 campBidirectionalList.append(random.sample(range(0, len(campUserInteractionList[x])), emailOpenedCampaign[x]))

for x in range(0,campaignSize):
 for items in campUserInteractionList[x]:
 newCamp = {}
 newCamp['~from'] = campaignList[x]['~id']
 newCamp['~to'] = usersList[items]['~id']
 newCamp['~label'] = 'MarketingInteractions'
 campaignInteractionList.append(newCamp)
 for items in campBidirectionalList[x]:
 newCamp = {}
 newCamp['~from'] = usersList[items]['~id']
 newCamp['~to'] = campaignList[x]['~id']
 newCamp['~label'] = 'CustomerMarketingInteractions'
 newCamp['campaign_login:Int'] = random.randrange(20)
 newCamp['campaign_emailOpened:Int'] = random.randrange(1,5)
 newCamp['campaign_linkClicked:Int'] = random.randrange(0,2)
 campaignBidirectionalInteractionList.append(newCamp)

print()

## Export lists to CSVs

In [None]:
toExport = [usersList, actionList, interList, engageList, campaignInteractionList, campaignList, campaignBidirectionalInteractionList]
filenames = ['user_vertices','action_vertices','interaction_edges','engagement_edges', 'campaign_edges', 'campaign_vertices', 'campaign_bidirectional_edges']

for index, currentList in enumerate(toExport):
 with open('./' + filenames[index] + '.csv', 'w') as csvFile:
 writer = csv.DictWriter(csvFile, escapechar=' ',quoting=csv.QUOTE_NONE,fieldnames = currentList[0].keys())
 writer.writeheader()
 for p in currentList:
 writer.writerow(p)
 csvFile.close()

## Upload CSVs to S3


### **To upload you will need to add the S3 bucket you want to upload to and the objects in that bucket in the IAM policy attached to the notebook.**

In [None]:
import logging
import boto3
from botocore.exceptions import ClientError

def upload_file(file_name, bucket, object_name=None):
 # If S3 object_name was not specified, use file_name
 if object_name is None:
 object_name = file_name

 # Upload the file
 s3_client = boto3.client('s3')
 try:
 response = s3_client.upload_file(file_name, bucket, object_name)
 except ClientError as e:
 logging.error(e)
 return False
 return True

bucket_name = "cohort-modeler" #this needs to be updated to your bucket

filenames_csv = ['user_vertices.csv','action_vertices.csv','interaction_edges.csv','engagement_edges.csv', 'campaign_edges.csv', 'campaign_vertices.csv', 'campaign_bidirectional_edges.csv']


for item in filenames_csv:
 upload_file(item, bucket_name)


## What to do next
Move over to the Cohort Modeler Sample Notebook as there will be a cell to upload the data generated into Neptune