{ "cells": [ { "cell_type": "markdown", "id": "94dca4f5", "metadata": {}, "source": [ "## Prepare groundtruth binding sites data obtained from BioLiP \n", "\n", "Link to the original data source https://zhanggroup.org/BioLiP/download.html" ] }, { "cell_type": "code", "execution_count": 15, "id": "0e6d908e", "metadata": {}, "outputs": [], "source": [ "import pickle\n", "import os\n", "\n", "data = {'ATP binding': {}, 'GTP binding': {}, 'heme binding': {}}\n", "\n", "with open('./binding_sites_data/BioLiP_2013-03-6.txt', 'r') as f:\n", " for l in f:\n", " if len(l.split('\\t')) > 8:\n", " if l.split('\\t')[4] in ['HEM', 'ATP', 'GTP']:\n", " name = '-'.join(l.split('\\t')[:2]).upper()\n", " mf = {'ATP': 'ATP binding', 'GTP': 'GTP binding', 'HEM': 'heme binding' }[l.split('\\t')[4]]\n", " sites = [int(w[1:])-1 for w in l.split('\\t')[8].split(' ')]\n", " data[mf][name] = {\n", " 'name': name,\n", " 'mf_term': mf,\n", " 'sites': sites\n", " }" ] }, { "cell_type": "code", "execution_count": 16, "id": "75c26907", "metadata": {}, "outputs": [], "source": [ "import pickle\n", "with open('./binding_sites_data/processed.pickle', 'wb') as f:\n", " pickle.dump(data, f)" ] }, { "cell_type": "code", "execution_count": null, "id": "6fec188a", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "conda_pytorch_latest_p36", "language": "python", "name": "conda_pytorch_latest_p36" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.13" } }, "nbformat": 4, "nbformat_minor": 5 }