Keyerror when processing pandas dataframe

For a pathway pi, the CNA data of associated genes were extracted from the CNV matrix (C), producing an intermediate matrix B∈Rn×ri⁠, where ri is the number of genes involved in the pathway pi. That is, the matrix B consists of samples in rows and genes for a given pathway in columns. Using principal component analysis (PCA), the matrix B was decomposed into uncorrelated components, yielding Gpi∈Rn×q⁠, where q is the number of principal components (PCs).

import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
import csv

def get_kegg_pathways():
    kegg_pathways = []

    with open(directory + "hsa.txt", newline="") as keggfile:
        kegg = pd.read_csv(keggfile, sep="\t")

        for row in kegg:
        #for row in kegg.itertuples():
            kegg_pathways.append(row)
        return kegg_pathways

def main():

    # Pathway info
    kegg = get_kegg_pathways()

    # q : Number of Principal Components (PCs)
    # C : CNV matrix
    # G = mRNA expression matrix
    # M : DNA methylation matrix
    q = 5
    C = []
    G = []
    M = []

    # Process common data (denoted as matrix B)
    cna_sample_index = {}
    process_common = True
    if process_common:
        for i, p in enumerate(kegg):
            genes = {}
            first = True
            for s in p:
                if first:
                    first = False
                else:
                    if s!= "NA":
                        genes[s] = 1
            # Loop  through each sample
            B = []
            pathways = []
            for s in ld:
                B.append([])
                pathways.append(cna_sample_index[p])

            Bi = 0
            for index, row in cna.df.itertuples():
                if row[0].upper() in genes:
                    Bi2 = Bi
                    for c in pathways:
                        B[Bi2].append(cna.df.iloc[index, c])
                        Bi2 = Bi2 + 1

            pca_cna = cna.fit()
            pca_cna.fit(B)

Traceback:

  File "/home/melissachua/main.py", line 208, in <module>
    main()
  File "/home/melissachua/main.py", line 165, in main
    pathways.append(cna_sample_index[p])
KeyError: 'hsa00010_Glycolysis_/_Gluconeogenesis'

kegg table:

0 1
0 hsa00010_Glycolysis_/_Gluconeogenesis NaN
1 hsa00020_Citrate_cycle_(TCA_cycle) NaN
2 hsa00030_Pentose_phosphate_pathway NaN

cna table:

Hugo_Symbol TCGA-02-0001-01 TCGA-02-0001-02 TCGA-02-0001-03
0 0.001 0.002 0.003 0.004
1 0.005 0.006 0.007 0.008
How many English words
do you know?
Test your English vocabulary size, and measure
how many words do you know
Online Test
Powered by Examplum