diff --git a/Allen Brain Atlas/Human M1 10x (RNA-Sequencing)/Human M1 10x.ipynb b/Allen Brain Atlas/Human M1 10x (RNA-Sequencing)/Human M1 10x.ipynb new file mode 100644 index 0000000..454fbc2 --- /dev/null +++ b/Allen Brain Atlas/Human M1 10x (RNA-Sequencing)/Human M1 10x.ipynb @@ -0,0 +1,1044 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "https://portal.brain-map.org/atlases-and-data/rnaseq/human-m1-10x" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "filepath = \"/Volumes/Edrive/minji/python_projects/RNA-seq/Brian Roth Human M1 10x/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "filename = \"trimmed_means.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(filepath+filename, index_col=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(50281, 127)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Inh L1 PAX6 CHRFAM7AInh L1-3 VIP CBLN1Exc L3 LAMP5 CARM1P1Inh L1-3 SST FAM20AInh L1-6 LAMP5 AARDInh L1-6 LAMP5 CA1Exc L6 THEMIS SLNInh L2 VIP SLC6A16Exc L5 FEZF2 NREP-AS1Inh L3-5 VIP IGDCC3...Exc L5-6 FEZF2 SH2D1BInh L2-5 PVALB RPH3ALExc L3 THEMIS ENPEPExc L6 FEZF2 PROKR2Inh L1-2 VIP HTR3AExc L5-6 FEZF2 OR1L8Exc L2 LINC00507 GLRA3Inh L3-5 SST OR5AH1PExc L2-3 RORB RTKN2Exc L5 RORB MED8
feature
DDX11L10.0000000.0000000.0000000.0000000.000.0000000.0000000.0000000.0000000.000000...0.000000.0000000.0000000.0000000.0000.0000000.0000000.0000000.0000000.000000
WASH7P0.0000000.0000000.0000000.0000000.000.0000000.0000000.0000000.0000000.000000...0.000000.0000000.0000000.0000000.0000.0000000.0000000.0000000.0000000.000000
MIR6859-10.0000000.0000000.0000000.0000000.000.0000000.0000000.0000000.0000000.000000...0.000000.0000000.0000000.0000000.0000.0000000.0000000.0000000.0000000.000000
MIR1302-20.0000000.0000000.0000000.0000000.000.0000000.0000000.0000000.0000000.000000...0.000000.0000000.0000000.0000000.0000.0000000.0000000.0000000.0000000.000000
FAM138A0.0000000.0000000.0000000.0000000.000.0000000.0000000.0000000.0000000.000000...0.000000.0000000.0000000.0000000.0000.0000000.0000000.0000000.0000000.000000
..................................................................
ND60.0000000.0000000.0000000.0000000.000.0000000.0000000.0000000.0000000.000000...0.000000.0000000.0000000.0000000.0000.0000000.0000000.0000000.0000000.000000
TRNE0.0000000.0000000.0000000.0000000.000.0000000.0000000.0000000.0000000.000000...0.000000.0000000.0000000.0000000.0000.0000000.0000000.0000000.0000000.000000
CYTB7.9330857.9703886.9027294.8098137.778.2318898.1073188.3581256.6616529.009897...8.024757.3608827.2929379.7278387.4428.0067187.0270748.4871077.7319348.679226
TRNT0.0000000.0000000.0000000.0000000.000.0000000.0000000.0000000.0000000.000000...0.000000.0000000.0000000.0000000.0000.0000000.0000000.0000000.0000000.000000
TRNP0.0000000.0000000.0000000.0000000.000.0000000.0000000.0000000.0000000.000000...0.000000.0000000.0000000.0000000.0000.0000000.0000000.0000000.0000000.000000
\n", + "

50281 rows × 127 columns

\n", + "
" + ], + "text/plain": [ + " Inh L1 PAX6 CHRFAM7A Inh L1-3 VIP CBLN1 Exc L3 LAMP5 CARM1P1 \\\n", + "feature \n", + "DDX11L1 0.000000 0.000000 0.000000 \n", + "WASH7P 0.000000 0.000000 0.000000 \n", + "MIR6859-1 0.000000 0.000000 0.000000 \n", + "MIR1302-2 0.000000 0.000000 0.000000 \n", + "FAM138A 0.000000 0.000000 0.000000 \n", + "... ... ... ... \n", + "ND6 0.000000 0.000000 0.000000 \n", + "TRNE 0.000000 0.000000 0.000000 \n", + "CYTB 7.933085 7.970388 6.902729 \n", + "TRNT 0.000000 0.000000 0.000000 \n", + "TRNP 0.000000 0.000000 0.000000 \n", + "\n", + " Inh L1-3 SST FAM20A Inh L1-6 LAMP5 AARD Inh L1-6 LAMP5 CA1 \\\n", + "feature \n", + "DDX11L1 0.000000 0.00 0.000000 \n", + "WASH7P 0.000000 0.00 0.000000 \n", + "MIR6859-1 0.000000 0.00 0.000000 \n", + "MIR1302-2 0.000000 0.00 0.000000 \n", + "FAM138A 0.000000 0.00 0.000000 \n", + "... ... ... ... \n", + "ND6 0.000000 0.00 0.000000 \n", + "TRNE 0.000000 0.00 0.000000 \n", + "CYTB 4.809813 7.77 8.231889 \n", + "TRNT 0.000000 0.00 0.000000 \n", + "TRNP 0.000000 0.00 0.000000 \n", + "\n", + " Exc L6 THEMIS SLN Inh L2 VIP SLC6A16 Exc L5 FEZF2 NREP-AS1 \\\n", + "feature \n", + "DDX11L1 0.000000 0.000000 0.000000 \n", + "WASH7P 0.000000 0.000000 0.000000 \n", + "MIR6859-1 0.000000 0.000000 0.000000 \n", + "MIR1302-2 0.000000 0.000000 0.000000 \n", + "FAM138A 0.000000 0.000000 0.000000 \n", + "... ... ... ... \n", + "ND6 0.000000 0.000000 0.000000 \n", + "TRNE 0.000000 0.000000 0.000000 \n", + "CYTB 8.107318 8.358125 6.661652 \n", + "TRNT 0.000000 0.000000 0.000000 \n", + "TRNP 0.000000 0.000000 0.000000 \n", + "\n", + " Inh L3-5 VIP IGDCC3 ... Exc L5-6 FEZF2 SH2D1B \\\n", + "feature ... \n", + "DDX11L1 0.000000 ... 0.00000 \n", + "WASH7P 0.000000 ... 0.00000 \n", + "MIR6859-1 0.000000 ... 0.00000 \n", + "MIR1302-2 0.000000 ... 0.00000 \n", + "FAM138A 0.000000 ... 0.00000 \n", + "... ... ... ... \n", + "ND6 0.000000 ... 0.00000 \n", + "TRNE 0.000000 ... 0.00000 \n", + "CYTB 9.009897 ... 8.02475 \n", + "TRNT 0.000000 ... 0.00000 \n", + "TRNP 0.000000 ... 0.00000 \n", + "\n", + " Inh L2-5 PVALB RPH3AL Exc L3 THEMIS ENPEP Exc L6 FEZF2 PROKR2 \\\n", + "feature \n", + "DDX11L1 0.000000 0.000000 0.000000 \n", + "WASH7P 0.000000 0.000000 0.000000 \n", + "MIR6859-1 0.000000 0.000000 0.000000 \n", + "MIR1302-2 0.000000 0.000000 0.000000 \n", + "FAM138A 0.000000 0.000000 0.000000 \n", + "... ... ... ... \n", + "ND6 0.000000 0.000000 0.000000 \n", + "TRNE 0.000000 0.000000 0.000000 \n", + "CYTB 7.360882 7.292937 9.727838 \n", + "TRNT 0.000000 0.000000 0.000000 \n", + "TRNP 0.000000 0.000000 0.000000 \n", + "\n", + " Inh L1-2 VIP HTR3A Exc L5-6 FEZF2 OR1L8 Exc L2 LINC00507 GLRA3 \\\n", + "feature \n", + "DDX11L1 0.000 0.000000 0.000000 \n", + "WASH7P 0.000 0.000000 0.000000 \n", + "MIR6859-1 0.000 0.000000 0.000000 \n", + "MIR1302-2 0.000 0.000000 0.000000 \n", + "FAM138A 0.000 0.000000 0.000000 \n", + "... ... ... ... \n", + "ND6 0.000 0.000000 0.000000 \n", + "TRNE 0.000 0.000000 0.000000 \n", + "CYTB 7.442 8.006718 7.027074 \n", + "TRNT 0.000 0.000000 0.000000 \n", + "TRNP 0.000 0.000000 0.000000 \n", + "\n", + " Inh L3-5 SST OR5AH1P Exc L2-3 RORB RTKN2 Exc L5 RORB MED8 \n", + "feature \n", + "DDX11L1 0.000000 0.000000 0.000000 \n", + "WASH7P 0.000000 0.000000 0.000000 \n", + "MIR6859-1 0.000000 0.000000 0.000000 \n", + "MIR1302-2 0.000000 0.000000 0.000000 \n", + "FAM138A 0.000000 0.000000 0.000000 \n", + "... ... ... ... \n", + "ND6 0.000000 0.000000 0.000000 \n", + "TRNE 0.000000 0.000000 0.000000 \n", + "CYTB 8.487107 7.731934 8.679226 \n", + "TRNT 0.000000 0.000000 0.000000 \n", + "TRNP 0.000000 0.000000 0.000000 \n", + "\n", + "[50281 rows x 127 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Compute the sum across all rows and remove genes with a total of less than 5 or genes with less than 5 non-zero cells." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "df = df[df.sum(axis=1)>5]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "df = df[(df != 0).astype(int).sum(axis=1) >= 5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For each gene, compute a Z-score using only the cells that do not have 0. Z-score is computed by subtracting the mean and dividing by the standard deviation." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "df[df==0] = np.nan\n", + "transposed_df = df.T\n", + "znorm_df = (transposed_df - transposed_df.mean()) / transposed_df.std()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
featureLINC01128NOC2LHES4AGRNC1orf159SDF4UBE2J2ACAP3CPSF3LDVL1...ND2COX1COX2ATP6COX3ND3ND4LND4ND5CYTB
Inh L1 PAX6 CHRFAM7ANaNNaN-1.038358NaNNaN-1.729148NaN-1.118255NaNNaN...-0.6042510.3908610.2529770.4254580.317008-0.416324NaN0.279025-0.7249090.243941
Inh L1-3 VIP CBLN1NaNNaNNaNNaNNaN-1.259180NaN-0.836926-0.878536NaN...-0.3468410.3231660.4396220.6479680.388296-0.157575NaN0.501555-0.3248910.265685
Exc L3 LAMP5 CARM1P10.365220-0.125027NaN1.4310280.5021220.7391470.4136390.2643630.842442-0.575511...-0.877274-0.410960-0.572095-0.291868-0.515956-1.021173NaN-0.489011-0.992155-0.356634
Inh L1-3 SST FAM20ANaNNaNNaNNaNNaN-1.703526NaN-0.774428NaNNaN...-1.0210700.0627570.046087-1.3387040.015415-1.111847NaN-1.397286-1.213790-1.576555
Inh L1-6 LAMP5 AARDNaNNaN0.634147NaN-0.8467890.095190-1.116835-0.728181-0.553518NaN...-0.2062740.2504350.0055140.2622520.209883-0.337692NaN0.255757-0.5332700.148882
..................................................................
Exc L5-6 FEZF2 OR1L81.2920270.6198540.6562051.6238181.6699051.9346522.1449101.9334821.1521650.524931...-0.1812380.177361-0.0135190.2115580.1562861.025051NaN0.2503000.2468750.286861
Exc L2 LINC00507 GLRA3-0.116144NaNNaN1.1250840.3022940.132063-0.7819382.2828030.617923-0.080213...-0.658028-0.264215-0.453672-0.130844-0.451933-0.867175NaN-0.307704-1.013937-0.284155
Inh L3-5 SST OR5AH1PNaNNaN-0.456191NaN-0.8366180.057329NaN-0.711716-0.973062NaN...-0.0471560.5715820.6262190.7096700.5194510.101882NaN0.6574560.0158840.566870
Exc L2-3 RORB RTKN20.807610-0.369788-0.2146262.6199610.430462-0.056443-0.2413560.7884840.258513-0.129655...-0.201011-0.067697-0.1825930.177000-0.177196-0.365131NaN-0.050814-0.3273520.126694
Exc L5 RORB MED80.8723870.360330-0.115846NaN0.1880930.382969-1.039625-0.1669320.606149NaN...1.4159640.5791750.5816920.6649060.7612061.411444NaN0.6977970.7218770.678853
\n", + "

127 rows × 10986 columns

\n", + "
" + ], + "text/plain": [ + "feature LINC01128 NOC2L HES4 AGRN C1orf159 \\\n", + "Inh L1 PAX6 CHRFAM7A NaN NaN -1.038358 NaN NaN \n", + "Inh L1-3 VIP CBLN1 NaN NaN NaN NaN NaN \n", + "Exc L3 LAMP5 CARM1P1 0.365220 -0.125027 NaN 1.431028 0.502122 \n", + "Inh L1-3 SST FAM20A NaN NaN NaN NaN NaN \n", + "Inh L1-6 LAMP5 AARD NaN NaN 0.634147 NaN -0.846789 \n", + "... ... ... ... ... ... \n", + "Exc L5-6 FEZF2 OR1L8 1.292027 0.619854 0.656205 1.623818 1.669905 \n", + "Exc L2 LINC00507 GLRA3 -0.116144 NaN NaN 1.125084 0.302294 \n", + "Inh L3-5 SST OR5AH1P NaN NaN -0.456191 NaN -0.836618 \n", + "Exc L2-3 RORB RTKN2 0.807610 -0.369788 -0.214626 2.619961 0.430462 \n", + "Exc L5 RORB MED8 0.872387 0.360330 -0.115846 NaN 0.188093 \n", + "\n", + "feature SDF4 UBE2J2 ACAP3 CPSF3L DVL1 ... \\\n", + "Inh L1 PAX6 CHRFAM7A -1.729148 NaN -1.118255 NaN NaN ... \n", + "Inh L1-3 VIP CBLN1 -1.259180 NaN -0.836926 -0.878536 NaN ... \n", + "Exc L3 LAMP5 CARM1P1 0.739147 0.413639 0.264363 0.842442 -0.575511 ... \n", + "Inh L1-3 SST FAM20A -1.703526 NaN -0.774428 NaN NaN ... \n", + "Inh L1-6 LAMP5 AARD 0.095190 -1.116835 -0.728181 -0.553518 NaN ... \n", + "... ... ... ... ... ... ... \n", + "Exc L5-6 FEZF2 OR1L8 1.934652 2.144910 1.933482 1.152165 0.524931 ... \n", + "Exc L2 LINC00507 GLRA3 0.132063 -0.781938 2.282803 0.617923 -0.080213 ... \n", + "Inh L3-5 SST OR5AH1P 0.057329 NaN -0.711716 -0.973062 NaN ... \n", + "Exc L2-3 RORB RTKN2 -0.056443 -0.241356 0.788484 0.258513 -0.129655 ... \n", + "Exc L5 RORB MED8 0.382969 -1.039625 -0.166932 0.606149 NaN ... \n", + "\n", + "feature ND2 COX1 COX2 ATP6 COX3 \\\n", + "Inh L1 PAX6 CHRFAM7A -0.604251 0.390861 0.252977 0.425458 0.317008 \n", + "Inh L1-3 VIP CBLN1 -0.346841 0.323166 0.439622 0.647968 0.388296 \n", + "Exc L3 LAMP5 CARM1P1 -0.877274 -0.410960 -0.572095 -0.291868 -0.515956 \n", + "Inh L1-3 SST FAM20A -1.021070 0.062757 0.046087 -1.338704 0.015415 \n", + "Inh L1-6 LAMP5 AARD -0.206274 0.250435 0.005514 0.262252 0.209883 \n", + "... ... ... ... ... ... \n", + "Exc L5-6 FEZF2 OR1L8 -0.181238 0.177361 -0.013519 0.211558 0.156286 \n", + "Exc L2 LINC00507 GLRA3 -0.658028 -0.264215 -0.453672 -0.130844 -0.451933 \n", + "Inh L3-5 SST OR5AH1P -0.047156 0.571582 0.626219 0.709670 0.519451 \n", + "Exc L2-3 RORB RTKN2 -0.201011 -0.067697 -0.182593 0.177000 -0.177196 \n", + "Exc L5 RORB MED8 1.415964 0.579175 0.581692 0.664906 0.761206 \n", + "\n", + "feature ND3 ND4L ND4 ND5 CYTB \n", + "Inh L1 PAX6 CHRFAM7A -0.416324 NaN 0.279025 -0.724909 0.243941 \n", + "Inh L1-3 VIP CBLN1 -0.157575 NaN 0.501555 -0.324891 0.265685 \n", + "Exc L3 LAMP5 CARM1P1 -1.021173 NaN -0.489011 -0.992155 -0.356634 \n", + "Inh L1-3 SST FAM20A -1.111847 NaN -1.397286 -1.213790 -1.576555 \n", + "Inh L1-6 LAMP5 AARD -0.337692 NaN 0.255757 -0.533270 0.148882 \n", + "... ... ... ... ... ... \n", + "Exc L5-6 FEZF2 OR1L8 1.025051 NaN 0.250300 0.246875 0.286861 \n", + "Exc L2 LINC00507 GLRA3 -0.867175 NaN -0.307704 -1.013937 -0.284155 \n", + "Inh L3-5 SST OR5AH1P 0.101882 NaN 0.657456 0.015884 0.566870 \n", + "Exc L2-3 RORB RTKN2 -0.365131 NaN -0.050814 -0.327352 0.126694 \n", + "Exc L5 RORB MED8 1.411444 NaN 0.697797 0.721877 0.678853 \n", + "\n", + "[127 rows x 10986 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "znorm_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For each column, collect the up genes (positive Z-scores) that are greater than 1.95 (p-value < 0.05) put these gene sets in a GMT file with the column label followed by the tab separated genes in each row." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "with open(filepath+\"Human M1 10x upgenes.gmt\", \"w\") as f:\n", + " for sample_idx in znorm_df.index:\n", + " sample = znorm_df.loc[sample_idx]\n", + " upgenes = list(sample[sample > 1.95].index)\n", + " \n", + " result = [sample_idx, \"\"]\n", + " if len(upgenes) == 0:\n", + " continue\n", + " result.extend(upgenes)\n", + " f.write(\"\\t\".join(result))\n", + " f.write(\"\\n\")\n", + " f.flush()\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "with open(filepath+\"Human M1 10x downgenes.gmt\", \"w\") as f:\n", + " for sample_idx in znorm_df.index:\n", + " sample = znorm_df.loc[sample_idx]\n", + " downgenes = list(sample[sample < -1.95].index)\n", + " result = [sample_idx, \"\"]\n", + " if len(downgenes) == 0:\n", + " continue\n", + " result.extend(downgenes)\n", + " \n", + " f.write(\"\\t\".join(result))\n", + " f.write(\"\\n\")\n", + " f.flush()\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Exc L2-3 RORB PTPN3\t\n", + "\n", + "Inh L3-5 SST GGTLC3\t\n", + "\n" + ] + } + ], + "source": [ + "with open(filepath+\"upgenes.gmt\", \"r\") as f:\n", + " lines = f.readlines()\n", + " for line in lines:\n", + " s = line.strip().split(\"\\t\\t\")\n", + " if not len(s) == 2:\n", + " print(line)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py38", + "language": "python", + "name": "py38" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}