Need to convert below output into data frame

Hi i want weight of edges to beprinted in list. Here i the code i am at.

import pandas as pd
#readingdata
data = pd.read_csv('HEPARTWO10k.csv')
new_data = data
new_data['age'] = new_data['age'].str[-2:] #data cleaning

#getting into structure
from causalnex.structure import StructureModel
sm = StructureModel()

#Labeling the variables in numeric format
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
for col in non_numeric_columns:
    new_data[col] = le.fit_transform(new_data[col])

new_data.head(5)

from causalnex.structure.notears import from_pandas
from IPython.display import Image
from causalnex.plots import plot_structure, NODE_STYLE, EDGE_STYLE
sm = from_pandas(new_data)
print(sm.edges(data=True))

Here is the output of edges and weight

[('alcoholism', 'vh_amn', {'origin': 'learned', 'weight': 8.883337828718184e-07}), ('alcoholism', 'hepatotoxic', {'origin': 'learned', 'weight': 1.6581288502950121e-07}), ('alcoholism', 'THepatitis', {'origin': 'learned', 'weight': 1.1071852773841471e-08}), ('alcoholism', 'hospital', {'origin': 'learned', 'weight': 0.0777471917583987}), ('alcoholism', 'surgery', {'origin': 'learned', 'weight': 0.03881834897358515}), ('alcoholism', 'gallstones', {'origin': 'learned', 'weight': 1.8670393846294617e-06}), ('alcoholism', 'choledocholithotomy', {'origin': 'learned', 'weight': 1.2189514108947425e-05}), ('alcoholism', 'injections', {'origin': 'learned', 'weight': 4.883777956499646e-07}), ('alcoholism', 'transfusion', {'origin': 'learned', 'weight': 3.063116834538042e-06}), ('alcoholism', 'ChHepatitis', {'origin': 'learned', 'weight': 4.5859350413506023e-07}), ('alcoholism', 'sex', {'origin': 'learned', 'weight': 0.05102990887956666}), ('alcoholism', 'age', {'origin': 'learned', 'weight': 0.0004804154365110523}), ('alcoholism', 'PBC', {'origin': 'learned', 'weight': 0.02460148399819424}), ('alcoholism', 'fibrosis', {'origin': 'learned', 'weight': 2.0249489928432656e-07}), ('alcoholism', 'diabetes', {'origin': 'learned', 'weight': 9.898756038280852e-07}), ('alcoholism', 'obesity', {'origin': 'learned', 'weight': 7.974093153968598e-07}), ('alcoholism', 'Steatosis', {'origin': 'learned', 'weight': 1.6665573140806262e-09}), ('alcoholism', 'Cirrhosis', {'origin': 'learned', 'weight': -5.847800482627274e-07})

I want this output to be stored in data frame with columns as edge1, edge2, origin, weights

I want output like this:

  • edge1 edge2 origin weight

1 answer

  • answered 2020-12-01 23:38 Dani Mesejo

    If your data is list of tuples, like the one in the listing below you could do:

    import pandas as pd
    
    lst = [('alcoholism', 'vh_amn', {'origin': 'learned', 'weight': 8.883337828718184e-07}),
           ('alcoholism', 'hepatotoxic', {'origin': 'learned', 'weight': 1.6581288502950121e-07}),
           ('alcoholism', 'THepatitis', {'origin': 'learned', 'weight': 1.1071852773841471e-08}),
           ('alcoholism', 'hospital', {'origin': 'learned', 'weight': 0.0777471917583987}),
           ('alcoholism', 'surgery', {'origin': 'learned', 'weight': 0.03881834897358515}),
           ('alcoholism', 'gallstones', {'origin': 'learned', 'weight': 1.8670393846294617e-06}),
           ('alcoholism', 'choledocholithotomy', {'origin': 'learned', 'weight': 1.2189514108947425e-05}),
           ('alcoholism', 'injections', {'origin': 'learned', 'weight': 4.883777956499646e-07}),
           ('alcoholism', 'transfusion', {'origin': 'learned', 'weight': 3.063116834538042e-06}),
           ('alcoholism', 'ChHepatitis', {'origin': 'learned', 'weight': 4.5859350413506023e-07}),
           ('alcoholism', 'sex', {'origin': 'learned', 'weight': 0.05102990887956666}),
           ('alcoholism', 'age', {'origin': 'learned', 'weight': 0.0004804154365110523}),
           ('alcoholism', 'PBC', {'origin': 'learned', 'weight': 0.02460148399819424}),
           ('alcoholism', 'fibrosis', {'origin': 'learned', 'weight': 2.0249489928432656e-07}),
           ('alcoholism', 'diabetes', {'origin': 'learned', 'weight': 9.898756038280852e-07}),
           ('alcoholism', 'obesity', {'origin': 'learned', 'weight': 7.974093153968598e-07}),
           ('alcoholism', 'Steatosis', {'origin': 'learned', 'weight': 1.6665573140806262e-09}),
           ('alcoholism', 'Cirrhosis', {'origin': 'learned', 'weight': -5.847800482627274e-07})]
    
    df = pd.DataFrame.from_records(data=[{"edge1": m, "edge2": n, **d} for m, n, d in lst])
    print(df)
    

    Output

             edge1                edge2   origin        weight
    0   alcoholism               vh_amn  learned  8.883338e-07
    1   alcoholism          hepatotoxic  learned  1.658129e-07
    2   alcoholism           THepatitis  learned  1.107185e-08
    3   alcoholism             hospital  learned  7.774719e-02
    4   alcoholism              surgery  learned  3.881835e-02
    5   alcoholism           gallstones  learned  1.867039e-06
    6   alcoholism  choledocholithotomy  learned  1.218951e-05
    7   alcoholism           injections  learned  4.883778e-07
    8   alcoholism          transfusion  learned  3.063117e-06
    9   alcoholism          ChHepatitis  learned  4.585935e-07
    10  alcoholism                  sex  learned  5.102991e-02
    11  alcoholism                  age  learned  4.804154e-04
    12  alcoholism                  PBC  learned  2.460148e-02
    13  alcoholism             fibrosis  learned  2.024949e-07
    14  alcoholism             diabetes  learned  9.898756e-07
    15  alcoholism              obesity  learned  7.974093e-07
    16  alcoholism            Steatosis  learned  1.666557e-09
    17  alcoholism            Cirrhosis  learned -5.847800e-07