Source code for missense_kinase_toolkit.databases.cli.transform_cbioportal

#!/usr/bin/env python

import argparse

from missense_kinase_toolkit.databases import config, io_utils


[docs] def parsearg_utils(): parser = argparse.ArgumentParser( description="Concatenate, remove duplicates, and extract genes and mutation types of interest from cBioPortal data." ) parser.add_argument( "--mutationTypes", type=str, help="Optional: Mutation type(s) to extract, separated by commas; default: `Missense_Mutation` (str)", default="Missense_Mutation", ) parser.add_argument( "--outDir", type=str, help="Required: Output directory path (str)", ) parser.add_argument( "--requestsCache", type=str, default="requests_cache", help="Optional: Requests cache; default: requests_cache (str)", ) parser.add_argument( "--listCols", type=str, default="HGNC Name, UniprotID", help="Optional: List of columns to merge separated by comma with the first as the merge on column of HGNC gene names; default: `HGNC Name, UniprotID` (str)", ) parser.add_argument( "--csvRef", type=str, default="kinhub.csv", help="Optional: CSV file in outDir that contains; default: `kinhub.csv` (str)", ) # TODO: add logging functionality return parser
[docs] def main(): args = parsearg_utils().parse_args() config.set_output_dir(args.outDir) config.set_request_cache(args.requestsCache) list_mutations = io_utils.convert_str2list(args.mutationTypes) list_cols = io_utils.convert_str2list(args.listCols) df_cbioportal = io_utils.concatenate_csv_files_with_glob("*_mutations.csv") df_kinhub = io_utils.load_csv_to_dataframe(args.csvRef) # df_kinhub = scrapers.kinhub() # io_utils.save_dataframe_to_csv(df_kinhub, "kinhub.csv") list_kinase_hgnc = df_kinhub[list_cols[0]].to_list() # list_kinase_hgnc = df_kinhub["HGNC Name"].to_list() df_subset = df_cbioportal.loc[df_cbioportal["mutationType"].isin(list_mutations), ].reset_index(drop=True) df_subset = df_subset.loc[df_subset["hugoGeneSymbol"].isin(list_kinase_hgnc), ].reset_index(drop=True) # list_cols = ["HGNC Name", "UniprotID"] df_subset_merge = df_subset.merge(df_kinhub[list_cols], how = "left", left_on = "hugoGeneSymbol", right_on = list_cols[0]) df_subset_merge = df_subset_merge.drop([list_cols[0]], axis=1) io_utils.save_dataframe_to_csv(df_subset_merge, "transformed_mutations.csv")