{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import sys\n", "import json\n", "import os\n", "import random\n", "import hashlib\n", "import warnings\n", "import urllib.request\n", "import time\n", "\n", "import warnings\n", "\n", "warnings.filterwarnings(action='ignore')\n", "random.seed(47)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### import gremlin_python packages" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from gremlin_python import statics\n", "from gremlin_python.structure.graph import Graph\n", "from gremlin_python.process.graph_traversal import __\n", "from gremlin_python.process.strategies import *\n", "from gremlin_python.process.traversal import T, P, Operator, Scope, Column, Order\n", "from gremlin_python.process.anonymous_traversal import traversal\n", "from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Neptune endpoint 설정" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "NEPTUNE_ENDPOINT = 'Your-Neptune-Cluster-Endpoint'\n", "NEPTUNE_PORT = 8182" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def graph_traversal(neptune_endpoint=None, neptune_port=NEPTUNE_PORT, show_endpoint=True, connection=None):\n", " def _remote_connection(neptune_endpoint=None, neptune_port=None, show_endpoint=True):\n", " neptune_gremlin_endpoint = '{protocol}://{neptune_endpoint}:{neptune_port}/{suffix}'.format(protocol='wss',\n", " neptune_endpoint=neptune_endpoint, neptune_port=neptune_port, suffix='gremlin')\n", "\n", " if show_endpoint:\n", " print('gremlin: {}'.format(neptune_gremlin_endpoint))\n", " retry_count = 0\n", " while True:\n", " try:\n", " return DriverRemoteConnection(neptune_gremlin_endpoint, 'g')\n", " except HTTPError as ex:\n", " exc_info = sys.exc_info()\n", " if retry_count < 3:\n", " retry_count += 1\n", " print('Connection timeout. Retrying...')\n", " else:\n", " raise exc_info[0].with_traceback(exc_info[1], exc_info[2])\n", "\n", " if connection is None:\n", " connection = _remote_connection(neptune_endpoint, neptune_port, show_endpoint)\n", " return traversal().withRemote(connection)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def clear_graph(neptune_endpoint=None, neptune_port=NEPTUNE_PORT, batch_size=200, edge_batch_size=None, vertex_batch_size=None):\n", " if edge_batch_size is None:\n", " edge_batch_size = batch_size\n", " if vertex_batch_size is None:\n", " vertex_batch_size = batch_size\n", " g = graph_traversal(neptune_endpoint, neptune_port, False)\n", " has_edges = True\n", " edge_count = None\n", " while has_edges:\n", " if edge_count is None:\n", " print('clearing property graph data [edge_batch_size={}, edge_count=Unknown]...'.format(edge_batch_size))\n", " else:\n", " print('clearing property graph data [edge_batch_size={}, edge_count={}]...'.format(edge_batch_size, edge_count))\n", " g.E().limit(edge_batch_size).drop().toList()\n", " edge_count = g.E().count().next()\n", " has_edges = (edge_count > 0)\n", " has_vertices = True\n", " vertex_count = None\n", " while has_vertices:\n", " if vertex_count is None:\n", " print('clearing property graph data [vertex_batch_size={}, vertex_count=Unknown]...'.format(vertex_batch_size))\n", " else:\n", " print('clearing property graph data [vertex_batch_size={}, vertex_count={}]...'.format(vertex_batch_size, vertex_count))\n", " g.V().limit(vertex_batch_size).drop().toList()\n", " vertex_count = g.V().count().next()\n", " has_vertices = (vertex_count > 0)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def get_person(g, person_id):\n", " person = g.V(person_id).limit(1).toList()\n", " return None if not person else person[-1]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def upsert_person(g, person):\n", " person_vertex = get_person(g, person['id'])\n", " elem = g.addV('person').property(T.id, person['id']).next() if not person_vertex else g.V(person_vertex).next()\n", " for k in ('id', 'name', 'email', 'phone_number', 'company', 'job_title'):\n", " g.V(elem).property(k, person[k]).next()\n", " g.V(elem).property('_name', person['name'].lower()).next()\n", "\n", " _from_person_id = hashlib.md5(person['owner'].encode('utf-8')).hexdigest()[:8]\n", " _to_person_id = person['id']\n", " if _from_person_id != _to_person_id:\n", " from_person_vertex = get_person(g, _from_person_id)\n", " to_person_vertex = get_person(g, _to_person_id)\n", " weight = 1.0\n", " if g.V(from_person_vertex).outE('knows').filter(__.inV().is_(to_person_vertex)).toList():\n", " print('Updating relationship')\n", " g.V(from_person_vertex).outE('knows').filter(__.inV().is_(to_person_vertex)).property('weight', weight).next()\n", " else:\n", " print('Creating relationship')\n", " g.V(from_person_vertex).addE('knows').to(to_person_vertex).property('weight', weight).next()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def _print_all_vertices(g):\n", " import pprint\n", " all_persons = [{**node.__dict__, **properties} for node in g.V()\n", " for properties in g.V(node).valueMap()]\n", " pprint.pprint(all_persons)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Neptune 데이터베이스 초기화¶" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "neptune_endpoint, neptune_port = (NEPTUNE_ENDPOINT, NEPTUNE_PORT)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "#clear_graph(neptune_endpoint)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gremlin: ws://octember-bizcard.cluster-cnrh6fettief.us-east-1.neptune.amazonaws.com:8182/gremlin\n" ] } ], "source": [ "g = graph_traversal(neptune_endpoint, neptune_port)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'_name': ['edy kim'],\n", " 'company': ['aws'],\n", " 'email': ['edy@amazon.com'],\n", " 'id': ['f75f761c'],\n", " 'job_title': ['Specialist Solutions Architect'],\n", " 'label': 'person',\n", " 'name': ['Edy Kim'],\n", " 'phone_number': ['(+82 10) 231 5467 ']},\n", " {'_name': ['pororo kim'],\n", " 'company': ['aws'],\n", " 'email': ['pororo@amazon.com'],\n", " 'id': ['a8f9f7c7'],\n", " 'job_title': ['SA Manager'],\n", " 'label': 'person',\n", " 'name': ['Pororo Kim'],\n", " 'phone_number': ['(+82 10)321 6547 ']},\n", " {'_name': ['poby kim'],\n", " 'company': ['aws'],\n", " 'email': ['poby@amazon.com'],\n", " 'id': ['6f371694'],\n", " 'job_title': ['Solutions Architect'],\n", " 'label': 'person',\n", " 'name': ['Poby Kim'],\n", " 'phone_number': ['(+82 10) 312 4567 ']},\n", " {'_name': ['rody park'],\n", " 'company': ['aws'],\n", " 'email': ['rody@amazon.com'],\n", " 'id': ['0679a6d8'],\n", " 'job_title': ['Solutions Architect'],\n", " 'label': 'person',\n", " 'name': ['Rody Park'],\n", " 'phone_number': ['(+82 10) 513 6754 ']},\n", " {'_name': ['crong lee'],\n", " 'company': ['aws'],\n", " 'email': ['crong@amazon.com'],\n", " 'id': ['1ee4a9f1'],\n", " 'job_title': ['Associate Solutions Architect'],\n", " 'label': 'person',\n", " 'name': ['Crong Lee'],\n", " 'phone_number': ['(+82 10)231 7546 ']},\n", " {'_name': ['harry jang'],\n", " 'company': ['aws'],\n", " 'email': ['harry@amazon.com'],\n", " 'id': ['3b87c97d'],\n", " 'job_title': ['Partner Solutions Architect'],\n", " 'label': 'person',\n", " 'name': ['Harry Jang'],\n", " 'phone_number': ['(+82 10) 213 4754 ']}]\n" ] } ], "source": [ "_print_all_vertices(g)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 인맥 관계도 그리기" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import networkx as nx\n", "\n", "plt.rcdefaults()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dg = nx.DiGraph()\n", "\n", "path_list = g.V().out().path().by('name').toList()\n", "for e in path_list:\n", " dg.add_edge(e[0], e[1])\n", "nx.draw(dg, pos=nx.circular_layout(dg), with_labels=True, node_size=1200, width=2, font_weight='bold')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'company': ['aws'],\n", " 'email': ['harry@amazon.com'],\n", " 'job_title': ['Partner Solutions Architect'],\n", " 'name': ['Harry Jang'],\n", " 'phone_number': ['(+82 10) 213 4754 '],\n", " 'score': 3.0}\n", "{'company': ['aws'],\n", " 'email': ['crong@amazon.com'],\n", " 'job_title': ['Associate Solutions Architect'],\n", " 'name': ['Crong Lee'],\n", " 'phone_number': ['(+82 10)231 7546 '],\n", " 'score': 3.0}\n" ] } ], "source": [ "import pprint\n", "\n", "user = 'Poby Kim'.lower()\n", "recommendations = (g.V().hasLabel('person').has('_name', user).as_('person')\n", " .both('knows').aggregate('friends')\n", " .both('knows').where(P.neq('person')).where(P.without('friends'))\n", " .groupCount().by('id')\n", " .order(Scope.local)\n", " .by(Column.values, Order.decr)\n", " .next())\n", "\n", "res = []\n", "for key, score in recommendations.items():\n", " value = {k: v for k, v in g.V(key).valueMap().next().items() if not (k == 'id' or k.startswith('_'))}\n", " value['score'] = float(score)\n", " res.append(value)\n", "\n", "for e in res:\n", " pprint.pprint(e)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 4 }