hovertool displaying ??? for bokeh boxplot

dataset: https://github.com/rashida048/Datasets/blob/master/StudentsPerformance.csv

I am trying to implement the hovertool function to display the value for the "Total" (sum of the 3 score columns) on each outlier. but when I hover over each value only ??? is displayed. I have also tried moving "source" inside the body of the function, as well as defining '@tot' in the hovertool as '@df.Total' and vice versa but still no success. where have I gone wrong? image attached.

enter image description here

source=ColumnDataSource(data=dict(df,tot=df.Total))

def box_plot(df, vals, label, ylabel=None,xlabel=None,title=None):
 
    # Group Data frame
    df_gb = df.groupby(label)
    # Get the categories
    cats = list(df_gb.groups.keys())

    # Compute quartiles for each group
    q1 = df_gb[vals].quantile(q=0.25)
    q2 = df_gb[vals].quantile(q=0.5)
    q3 = df_gb[vals].quantile(q=0.75)
                       
    # Compute interquartile region and upper and lower bounds for outliers
    iqr = q3 - q1
    upper_cutoff = q3 + 1.5*iqr
    lower_cutoff = q1 - 1.5*iqr

    # Find the outliers for each category
    def outliers(group):
        cat = group.name
        outlier_inds = (group[vals] > upper_cutoff[cat]) \
                                     | (group[vals] < lower_cutoff[cat])
        return group[vals][outlier_inds]

    # Apply outlier finder
    out = df_gb.apply(outliers).dropna()

    # Points of outliers for plotting
    outx = []
    outy = []
    for cat in cats:
        # only add outliers if they exist
        if cat in out and not out[cat].empty:
            for value in out[cat]:
                outx.append(cat)
                outy.append(value) 
                
    # If outliers, shrink whiskers to smallest and largest non-outlier
    qmin = df_gb[vals].min()
    qmax = df_gb[vals].max()
    upper = [min([x,y]) for (x,y) in zip(qmax, upper_cutoff)]
    lower = [max([x,y]) for (x,y) in zip(qmin, lower_cutoff)]

    cats = [str(i) for i in cats]
# Build figure
    p = figure(sizing_mode='stretch_width', x_range=cats,height=300,toolbar_location=None)
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_width = 2
    p.yaxis.axis_label = ylabel
    p.xaxis.axis_label = xlabel
    p.title=title
    p.y_range.start=0
    p.title.align = 'center'
    
    # stems
    p.segment(cats, upper, cats, q3, line_width=2, line_color="black")
    p.segment(cats, lower, cats, q1, line_width=2, line_color="black")

    # boxes
    p.rect(cats, (q3 + q1)/2, 0.5, q3 - q1, fill_color=['#a50f15', '#de2d26', '#fb6a4a', '#fcae91', '#fee5d9'], 
           alpha=0.7, line_width=2, line_color="black")

    # median (almost-0 height rects simpler than segments)
    p.rect(cats, q2, 0.5, 0.01, line_color="black", line_width=2)

    # whiskers (almost-0 height rects simpler than segments)
    p.rect(cats, lower, 0.2, 0.01, line_color="black")
    p.rect(cats, upper, 0.2, 0.01, line_color="black")

    # outliers
    p.circle(outx, outy, size=6, color="black")
    
    p.add_tools(HoverTool(tooltips=[('Total','@tot')]))

    return p

p = box_plot(df, 'Total', 'race/ethnicity', ylabel='Total spread',xlabel='Race/Ethnicity',title='BoxPlot')
show(p)

1 answer

  • answered 2022-05-04 18:00 mosc9575

    The problem is, that you never pass a source to the figure object, but the HoverTool is looking for data in the tot column. There is none, and every time bokeh doesn't find any data, it shows ???.

    See the minimal example below, how it could work.

    from bokeh.plotting import figure, show, output_notebook
    from bokeh.models import HoverTool, ColumnDataSource
    output_notebook()
    
    source = ColumnDataSource(dict(
        x=list(range(5)),
        y=list(range(5)),
        tot=list('ABCDE')
    ))
    
    p = figure(width=300, height=300)
    p.circle(x='x', y='y', source=source)
    p.add_tools(HoverTool(tooltips=[('Total','@tot')]))
    show(p)
    

    simple HoverTool

    Back to your code and how to make a HoverTool work. Since you only want a hover information for the outliers, you can create a outliers_source right befor you call p.figure().

    outliers_source= ColumnDataSource(dict(
        outx=outx,
        outy=outy,
        tot=list(range(len(outx)))
    ))
    

    and then change the line below:

    # old
    # p.circle(outx, outy, size=6, color="black")
    p.circle('outx', 'outy', size=6, color="black", source=outliers_source)
    

    The changes maybe look quite small, but the information for the circles are now coming from a complete different source. Internally a lot has changed.

    Now your HoverTool should show some numbers. You have to implement the logic of your information, because I didn't understand it.

How many English words
do you know?
Test your English vocabulary size, and measure
how many words do you know
Online Test
Powered by Examplum