Source code for missense_kinase_toolkit.databases.cli.extract_kinase_annotations

#!/usr/bin/env python

import argparse

import pandas as pd

from missense_kinase_toolkit.databases import config, io_utils, scrapers, klifs

[docs] def parsearg_utils(): parser = argparse.ArgumentParser( description="Get kinase annotations from KinHub and KLIFS databases." ) parser.add_argument( "--outDir", type=str, help="Required: Output directory path (str)", ) parser.add_argument( "--requestsCache", type=str, default="requests_cache", help="Optional: Requests cache; default: `requests_cache` (str)", ) parser.add_argument( "--csvKinhub", type=str, default="kinhub.csv", help="Optional: CSV file in outDir that contains Kinhub kinase info; default: `kinhub.csv` (str)", ) parser.add_argument( "--csvKLIFS", type=str, default="klifs.csv", help="Optional: CSV file in outDir that contains KLIFS kinase info; default: `klifs.csv` (str)", ) # TODO: add logging functionality return parser
[docs] def main(): args = parsearg_utils().parse_args() config.set_output_dir(args.outDir) config.set_request_cache(args.requestsCache) # get KinHub list of kinases df_kinhub = scrapers.kinhub() io_utils.save_dataframe_to_csv(df_kinhub, args.csvKinhub) # get KLIFS annotations list_kinase_hgnc = df_kinhub["HGNC Name"].to_list() dict_kinase_info = {} for kinase in list_kinase_hgnc: dict_kinase_info[kinase] = klifs.KinaseInfo(kinase)._kinase_info df_klifs = pd.DataFrame(dict_kinase_info).T df_klifs = df_klifs.rename_axis("HGNC Name").reset_index() io_utils.save_dataframe_to_csv(df_klifs, args.csvKLIFS)