diff --git a/Allen Brain Atlas/Human M1 10x (RNA-Sequencing)/Human M1 10x.ipynb b/Allen Brain Atlas/Human M1 10x (RNA-Sequencing)/Human M1 10x.ipynb new file mode 100644 index 0000000..454fbc2 --- /dev/null +++ b/Allen Brain Atlas/Human M1 10x (RNA-Sequencing)/Human M1 10x.ipynb @@ -0,0 +1,1044 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "https://portal.brain-map.org/atlases-and-data/rnaseq/human-m1-10x" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "filepath = \"/Volumes/Edrive/minji/python_projects/RNA-seq/Brian Roth Human M1 10x/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "filename = \"trimmed_means.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(filepath+filename, index_col=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(50281, 127)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | Inh L1 PAX6 CHRFAM7A | \n", + "Inh L1-3 VIP CBLN1 | \n", + "Exc L3 LAMP5 CARM1P1 | \n", + "Inh L1-3 SST FAM20A | \n", + "Inh L1-6 LAMP5 AARD | \n", + "Inh L1-6 LAMP5 CA1 | \n", + "Exc L6 THEMIS SLN | \n", + "Inh L2 VIP SLC6A16 | \n", + "Exc L5 FEZF2 NREP-AS1 | \n", + "Inh L3-5 VIP IGDCC3 | \n", + "... | \n", + "Exc L5-6 FEZF2 SH2D1B | \n", + "Inh L2-5 PVALB RPH3AL | \n", + "Exc L3 THEMIS ENPEP | \n", + "Exc L6 FEZF2 PROKR2 | \n", + "Inh L1-2 VIP HTR3A | \n", + "Exc L5-6 FEZF2 OR1L8 | \n", + "Exc L2 LINC00507 GLRA3 | \n", + "Inh L3-5 SST OR5AH1P | \n", + "Exc L2-3 RORB RTKN2 | \n", + "Exc L5 RORB MED8 | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| feature | \n", + "\n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " |
| DDX11L1 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.00 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.00000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
| WASH7P | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.00 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.00000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
| MIR6859-1 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.00 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.00000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
| MIR1302-2 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.00 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.00000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
| FAM138A | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.00 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.00000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| ND6 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.00 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.00000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
| TRNE | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.00 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.00000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
| CYTB | \n", + "7.933085 | \n", + "7.970388 | \n", + "6.902729 | \n", + "4.809813 | \n", + "7.77 | \n", + "8.231889 | \n", + "8.107318 | \n", + "8.358125 | \n", + "6.661652 | \n", + "9.009897 | \n", + "... | \n", + "8.02475 | \n", + "7.360882 | \n", + "7.292937 | \n", + "9.727838 | \n", + "7.442 | \n", + "8.006718 | \n", + "7.027074 | \n", + "8.487107 | \n", + "7.731934 | \n", + "8.679226 | \n", + "
| TRNT | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.00 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.00000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
| TRNP | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.00 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.00000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
50281 rows × 127 columns
\n", + "| feature | \n", + "LINC01128 | \n", + "NOC2L | \n", + "HES4 | \n", + "AGRN | \n", + "C1orf159 | \n", + "SDF4 | \n", + "UBE2J2 | \n", + "ACAP3 | \n", + "CPSF3L | \n", + "DVL1 | \n", + "... | \n", + "ND2 | \n", + "COX1 | \n", + "COX2 | \n", + "ATP6 | \n", + "COX3 | \n", + "ND3 | \n", + "ND4L | \n", + "ND4 | \n", + "ND5 | \n", + "CYTB | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Inh L1 PAX6 CHRFAM7A | \n", + "NaN | \n", + "NaN | \n", + "-1.038358 | \n", + "NaN | \n", + "NaN | \n", + "-1.729148 | \n", + "NaN | \n", + "-1.118255 | \n", + "NaN | \n", + "NaN | \n", + "... | \n", + "-0.604251 | \n", + "0.390861 | \n", + "0.252977 | \n", + "0.425458 | \n", + "0.317008 | \n", + "-0.416324 | \n", + "NaN | \n", + "0.279025 | \n", + "-0.724909 | \n", + "0.243941 | \n", + "
| Inh L1-3 VIP CBLN1 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "-1.259180 | \n", + "NaN | \n", + "-0.836926 | \n", + "-0.878536 | \n", + "NaN | \n", + "... | \n", + "-0.346841 | \n", + "0.323166 | \n", + "0.439622 | \n", + "0.647968 | \n", + "0.388296 | \n", + "-0.157575 | \n", + "NaN | \n", + "0.501555 | \n", + "-0.324891 | \n", + "0.265685 | \n", + "
| Exc L3 LAMP5 CARM1P1 | \n", + "0.365220 | \n", + "-0.125027 | \n", + "NaN | \n", + "1.431028 | \n", + "0.502122 | \n", + "0.739147 | \n", + "0.413639 | \n", + "0.264363 | \n", + "0.842442 | \n", + "-0.575511 | \n", + "... | \n", + "-0.877274 | \n", + "-0.410960 | \n", + "-0.572095 | \n", + "-0.291868 | \n", + "-0.515956 | \n", + "-1.021173 | \n", + "NaN | \n", + "-0.489011 | \n", + "-0.992155 | \n", + "-0.356634 | \n", + "
| Inh L1-3 SST FAM20A | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "-1.703526 | \n", + "NaN | \n", + "-0.774428 | \n", + "NaN | \n", + "NaN | \n", + "... | \n", + "-1.021070 | \n", + "0.062757 | \n", + "0.046087 | \n", + "-1.338704 | \n", + "0.015415 | \n", + "-1.111847 | \n", + "NaN | \n", + "-1.397286 | \n", + "-1.213790 | \n", + "-1.576555 | \n", + "
| Inh L1-6 LAMP5 AARD | \n", + "NaN | \n", + "NaN | \n", + "0.634147 | \n", + "NaN | \n", + "-0.846789 | \n", + "0.095190 | \n", + "-1.116835 | \n", + "-0.728181 | \n", + "-0.553518 | \n", + "NaN | \n", + "... | \n", + "-0.206274 | \n", + "0.250435 | \n", + "0.005514 | \n", + "0.262252 | \n", + "0.209883 | \n", + "-0.337692 | \n", + "NaN | \n", + "0.255757 | \n", + "-0.533270 | \n", + "0.148882 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| Exc L5-6 FEZF2 OR1L8 | \n", + "1.292027 | \n", + "0.619854 | \n", + "0.656205 | \n", + "1.623818 | \n", + "1.669905 | \n", + "1.934652 | \n", + "2.144910 | \n", + "1.933482 | \n", + "1.152165 | \n", + "0.524931 | \n", + "... | \n", + "-0.181238 | \n", + "0.177361 | \n", + "-0.013519 | \n", + "0.211558 | \n", + "0.156286 | \n", + "1.025051 | \n", + "NaN | \n", + "0.250300 | \n", + "0.246875 | \n", + "0.286861 | \n", + "
| Exc L2 LINC00507 GLRA3 | \n", + "-0.116144 | \n", + "NaN | \n", + "NaN | \n", + "1.125084 | \n", + "0.302294 | \n", + "0.132063 | \n", + "-0.781938 | \n", + "2.282803 | \n", + "0.617923 | \n", + "-0.080213 | \n", + "... | \n", + "-0.658028 | \n", + "-0.264215 | \n", + "-0.453672 | \n", + "-0.130844 | \n", + "-0.451933 | \n", + "-0.867175 | \n", + "NaN | \n", + "-0.307704 | \n", + "-1.013937 | \n", + "-0.284155 | \n", + "
| Inh L3-5 SST OR5AH1P | \n", + "NaN | \n", + "NaN | \n", + "-0.456191 | \n", + "NaN | \n", + "-0.836618 | \n", + "0.057329 | \n", + "NaN | \n", + "-0.711716 | \n", + "-0.973062 | \n", + "NaN | \n", + "... | \n", + "-0.047156 | \n", + "0.571582 | \n", + "0.626219 | \n", + "0.709670 | \n", + "0.519451 | \n", + "0.101882 | \n", + "NaN | \n", + "0.657456 | \n", + "0.015884 | \n", + "0.566870 | \n", + "
| Exc L2-3 RORB RTKN2 | \n", + "0.807610 | \n", + "-0.369788 | \n", + "-0.214626 | \n", + "2.619961 | \n", + "0.430462 | \n", + "-0.056443 | \n", + "-0.241356 | \n", + "0.788484 | \n", + "0.258513 | \n", + "-0.129655 | \n", + "... | \n", + "-0.201011 | \n", + "-0.067697 | \n", + "-0.182593 | \n", + "0.177000 | \n", + "-0.177196 | \n", + "-0.365131 | \n", + "NaN | \n", + "-0.050814 | \n", + "-0.327352 | \n", + "0.126694 | \n", + "
| Exc L5 RORB MED8 | \n", + "0.872387 | \n", + "0.360330 | \n", + "-0.115846 | \n", + "NaN | \n", + "0.188093 | \n", + "0.382969 | \n", + "-1.039625 | \n", + "-0.166932 | \n", + "0.606149 | \n", + "NaN | \n", + "... | \n", + "1.415964 | \n", + "0.579175 | \n", + "0.581692 | \n", + "0.664906 | \n", + "0.761206 | \n", + "1.411444 | \n", + "NaN | \n", + "0.697797 | \n", + "0.721877 | \n", + "0.678853 | \n", + "
127 rows × 10986 columns
\n", + "