Keyerror when processing pandas dataframe
For a pathway pi, the CNA data of associated genes were extracted from the CNV matrix (C), producing an intermediate matrix B∈Rn×ri, where ri is the number of genes involved in the pathway pi. That is, the matrix B consists of samples in rows and genes for a given pathway in columns. Using principal component analysis (PCA), the matrix B was decomposed into uncorrelated components, yielding Gpi∈Rn×q, where q is the number of principal components (PCs).
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
import csv
def get_kegg_pathways():
kegg_pathways = []
with open(directory + "hsa.txt", newline="") as keggfile:
kegg = pd.read_csv(keggfile, sep="\t")
for row in kegg:
#for row in kegg.itertuples():
kegg_pathways.append(row)
return kegg_pathways
def main():
# Pathway info
kegg = get_kegg_pathways()
# q : Number of Principal Components (PCs)
# C : CNV matrix
# G = mRNA expression matrix
# M : DNA methylation matrix
q = 5
C = []
G = []
M = []
# Process common data (denoted as matrix B)
cna_sample_index = {}
process_common = True
if process_common:
for i, p in enumerate(kegg):
genes = {}
first = True
for s in p:
if first:
first = False
else:
if s!= "NA":
genes[s] = 1
# Loop through each sample
B = []
pathways = []
for s in ld:
B.append([])
pathways.append(cna_sample_index[p])
Bi = 0
for index, row in cna.df.itertuples():
if row[0].upper() in genes:
Bi2 = Bi
for c in pathways:
B[Bi2].append(cna.df.iloc[index, c])
Bi2 = Bi2 + 1
pca_cna = cna.fit()
pca_cna.fit(B)
Traceback:
File "/home/melissachua/main.py", line 208, in <module>
main()
File "/home/melissachua/main.py", line 165, in main
pathways.append(cna_sample_index[p])
KeyError: 'hsa00010_Glycolysis_/_Gluconeogenesis'
kegg
table:
0 | 1 | |
---|---|---|
0 | hsa00010_Glycolysis_/_Gluconeogenesis | NaN |
1 | hsa00020_Citrate_cycle_(TCA_cycle) | NaN |
2 | hsa00030_Pentose_phosphate_pathway | NaN |
cna
table:
Hugo_Symbol | TCGA-02-0001-01 | TCGA-02-0001-02 | TCGA-02-0001-03 | |
---|---|---|---|---|
0 | 0.001 | 0.002 | 0.003 | 0.004 |
1 | 0.005 | 0.006 | 0.007 | 0.008 |
How many English words
do you know?
do you know?
Test your English vocabulary size, and measure
how many words do you know
Online Test
how many words do you know
Powered by Examplum