Skip to content

API Documentation

add_errorbar_loc_on_posthoc(posthoc_df, bar_coords, overwrite_num_loc=True)

merges posthoc df with newly created errorbar span detection

Source code in plot_posthoc_test\plot_stat_annotate.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def add_errorbar_loc_on_posthoc(posthoc_df, bar_coords, overwrite_num_loc= True):
    ''' merges posthoc df with newly created errorbar span detection '''
    error_rows = []
    success_rows = []
    for row in posthoc_df.itertuples():
        group_info = row.g1_num_loc#=14.125, g2_num_loc=13.5, g1_cat_loc=-0.2, g2_cat_loc=-0.1, max_group_loc_val=14.125
        # print(row.g1_cat_loc, row.g1_num_loc,row.g2_cat_loc, row.g2_num_loc)
        g1_row_match = get_child_df_row(row.g1_cat_loc, row.g1_num_loc, bar_coords).assign(**{'g1_cat_loc':row.g1_cat_loc, 'g1_num_loc': row.g1_num_loc}).rename({x: "_".join(["g_1",x])for x in ['child_x','child_y']},axis = 1)
        g2_row_match = get_child_df_row(row.g2_cat_loc, row.g2_num_loc, bar_coords).assign(**{'g2_cat_loc':row.g2_cat_loc, 'g2_num_loc': row.g2_num_loc}).rename({x: "_".join(["g_2",x])for x in ['child_x','child_y']},axis = 1)
        success_rows.append(pd.concat([g1_row_match.reset_index(drop=True), g2_row_match[g2_row_match.columns.difference(g1_row_match.columns)].reset_index(drop=True)],axis = 1))
        if (g1_row_match.size == 0) |(g2_row_match.size == 0) :
            error_rows.append(row.Index)
    ebar_loc = pd.concat(success_rows)
    assert (len(error_rows)==0), f" len {error_rows} of error rows list"
    posthoc_df = posthoc_df.merge(ebar_loc, how = 'left', on = ['g1_num_loc', 'g2_num_loc', 'g1_cat_loc', 'g2_cat_loc'])
    if overwrite_num_loc:
        posthoc_df.loc[:, 'g1_num_loc'] = posthoc_df.apply(lambda x: x['g_1_child_y'].max(),axis = 1)
        posthoc_df.loc[:, 'g2_num_loc'] = posthoc_df.apply(lambda x: x['g_2_child_y'].max(),axis = 1)
    return posthoc_df

get_ax_children_types(ax_obj)

To- return list stating what type each child of the mplt ax object is

Source code in plot_posthoc_test\plot_stat_annotate.py
22
23
24
def get_ax_children_types(ax_obj):
    ''' To- return list stating what type each child of the mplt ax object is'''
    return [type(x) for x in ax_obj.get_children()]

get_child_df_row(hue_cat_loc, hue_num_loc, bar_coords)

non vectorized function relying on vectorized subfuunctions

Source code in plot_posthoc_test\plot_stat_annotate.py
82
83
84
85
86
87
88
89
90
91
def get_child_df_row(hue_cat_loc,hue_num_loc, bar_coords):
    ''' non vectorized function relying on vectorized subfuunctions'''
    ##run comparisons to get bool
    is_size_match = bar_coords.y_is_2_elem# print(is_size_match)
    is_in_child_x_vals = bar_coords.apply(lambda x:np.round(hue_cat_loc,decimals = 2) in x.child_x,axis = 1)
    is_hue_point_in_range_bounds = bar_coords.apply(lambda x: is_val_between_range_min_max(hue_num_loc,x.child_y),axis = 1)
    #these need to iterate over entire DF to get complete bool made
    bar_row_bool = is_hue_point_in_range_bounds & bar_coords.y_is_2_elem & is_in_child_x_vals
    row_match = bar_coords[bar_row_bool].drop(['index','next_collection_index', 'child_index'],axis = 1)
    return row_match

get_hue_errorbar_loc_dict(ax_input, hue_order)

Get a dictionary of the data errorbars at each level of the hue variable.

Parameters: ax_input (matplotlib.axes.Axes): The input axis object. hue_order (list): The order of the hue in a list.

Returns: dict: Dictionary with hue errorbar locations.

Source code in plot_posthoc_test\plot_stat_annotate.py
461
462
463
464
465
466
467
468
469
470
471
472
473
474
def get_hue_errorbar_loc_dict(ax_input, hue_order):
    """ 
    Get a dictionary of the data errorbars at each level of the hue variable.

    Parameters:
    ax_input (matplotlib.axes.Axes): The input axis object.
    hue_order (list): The order of the hue in a list.

    Returns:
    dict: Dictionary with hue errorbar  locations.
    """
    hue_point_loc_dict = [{'hue': hue_order[count],
                            'data_locs':x.get_offsets().data} for count, x in enumerate(ax_input.collections)]
    return hue_point_loc_dict

get_hue_loc_on_axis(hue_loc_df, posthoc_df, detect_error_bar=False)

Add numerical and categorical axis locations to the posthoc comparison dataframe. Main function creating/label numeric loc on axis NEW (2.6.25)- add detect errorbar to automatically detect errorbar, and move point marked for symbol loc if so Parameters: hue_loc_df (pandas.DataFrame): DataFrame with hue locations. posthoc_df (pandas.DataFrame): DataFrame with posthoc comparisons.

Returns: pandas.DataFrame: Updated DataFrame with axis locations.

Source code in plot_posthoc_test\plot_stat_annotate.py
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
def get_hue_loc_on_axis(hue_loc_df, posthoc_df, detect_error_bar = False): 
    """ 
    Add numerical and categorical axis locations to the posthoc comparison dataframe. Main function creating/label numeric loc on axis 
    NEW (2.6.25)- add detect errorbar  to automatically detect errorbar, and move point marked for symbol loc if so 
    Parameters:
    hue_loc_df (pandas.DataFrame): DataFrame with hue locations.
    posthoc_df (pandas.DataFrame): DataFrame with posthoc comparisons.

    Returns:
    pandas.DataFrame: Updated DataFrame with axis locations.
    """
    """ given hue_loc_df listing where each point for the given hue categories are located, use the pre-existing post hoc comparison df
    and add a new column to dataframe indicating y/numerical ax loc for each comparison 
    g1/2_num_loc: the value of position of group 1/2 (usually on the y axis) on the numerical axis used
    g1/2_cat_loc: the value of point of group 1/2 (usually on the x axis) on the categorical axis used
    group_1/2_order_pos is used to to know what the values are """
        ## get loc of points on the numerical ax (usually y but not always)
        #index into hueloc df, with index = hue Group_1 name of posthoc df ; then, get list of point vals in collection; then, 
    posthoc_df['hue_group_1_locs'] = posthoc_df.apply(lambda x: hue_loc_df.loc[x['group_1']], axis = 1) #elem index = what x tick num elem is centered on
    posthoc_df['hue_group_2_locs'] = posthoc_df.apply(lambda x: hue_loc_df.loc[x['group_2']], axis = 1) #elem index = what x tick num elem is centered on
    posthoc_df['g1_num_loc'] = posthoc_df.apply(lambda x: x['hue_group_1_locs'][x['group_1_order_pos'][0],1], axis = 1) #x['group_1_order_pos'][0] = position of group 1 being used, in ordered list of collections 
    posthoc_df['g2_num_loc'] = posthoc_df.apply(lambda x: x['hue_group_2_locs'][x['group_2_order_pos'][0],1], axis = 1) #x['group_1_order_pos'][0] = position of group 1 being used, in ordered list of collections 

    ## get location of poitns on the categorical axis (usually x but not always)
    posthoc_df['g1_cat_loc'] = posthoc_df.apply(lambda x: x['hue_group_1_locs'][x['group_1_order_pos'][0],0], axis = 1) #x['group_1_order_pos'][0] = position of group 1 being used, in ordered list of collections 
    posthoc_df['g2_cat_loc'] = posthoc_df.apply(lambda x: x['hue_group_2_locs'][x['group_2_order_pos'][0],0], axis = 1) #x['group_1_order_pos'][0] = position of group 1 being used, in ordered list of collections
    #get max of numerical ax values
    posthoc_df['max_group_loc_val'] = posthoc_df[['g1_num_loc', 'g2_num_loc']].max(axis = 1)
    return posthoc_df 

get_hue_point_loc_df(ax_input, hue_order)

Get a DataFrame of the datapoints at each level of the hue variable.

Parameters: ax_input (matplotlib.axes.Axes): The input axis object. hue_order (list): The order of the hue.

Returns: pandas.DataFrame: DataFrame with hue point locations.

Source code in plot_posthoc_test\plot_stat_annotate.py
447
448
449
450
451
452
453
454
455
456
457
458
459
def get_hue_point_loc_df(ax_input, hue_order):
    """ 
    Get a DataFrame of the datapoints at each level of the hue variable.

    Parameters:
    ax_input (matplotlib.axes.Axes): The input axis object.
    hue_order (list): The order of the hue.

    Returns:
    pandas.DataFrame: DataFrame with hue point locations.
    """
    hue_loc_df = pd.DataFrame.from_dict(get_hue_point_loc_dict(ax_input, hue_order)).set_index('hue') #get array of numerical points and values for each hue level
    return hue_loc_df

get_hue_point_loc_dict(ax_input, hue_order)

Get a dictionary of the datapoints at each level of the hue variable.

Parameters: ax_input (matplotlib.axes.Axes): The input axis object. hue_order (list): The order of the hue.

Returns: dict: Dictionary with hue point locations.

Source code in plot_posthoc_test\plot_stat_annotate.py
477
478
479
480
481
482
483
484
485
486
487
488
489
def get_hue_point_loc_dict(ax_input, hue_order):
    """ 
    Get a dictionary of the datapoints at each level of the hue variable.

    Parameters:
    ax_input (matplotlib.axes.Axes): The input axis object.
    hue_order (list): The order of the hue.

    Returns:
    dict: Dictionary with hue point locations.
    """
    hue_point_loc_dict = [{'hue': hue_order[count], 'data_locs':x.get_offsets().data} for count, x in enumerate(ax_input.collections)]
    return hue_point_loc_dict

get_pair_stat_test_result(test_name, ax_category_level, group_order, group_1_name, group_2_name, data_group_1_values, data_group_2_values, ax_var_is_hue=False)

Run statistical test on data groups.

Parameters: test_name (str): The name of the test. ax_category_level (str): The axis category level. group_order (iterable): The order of the groups. group_1_name (str): The name of the first group. group_2_name (str): The name of the second group. data_group_1_values (numpy.ndarray): Values of the first group. data_group_2_values (numpy.ndarray): Values of the second group. ax_var_is_hue (bool, optional): Whether the axis variable is the hue. Defaults to False.

Returns: dict: Dictionary with the statistical test results.

Source code in plot_posthoc_test\plot_stat_annotate.py
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
def get_pair_stat_test_result(test_name, ax_category_level,group_order, group_1_name, group_2_name, data_group_1_values,data_group_2_values,ax_var_is_hue = False):
    """ 
    Run statistical test on data groups.

    Parameters:
    test_name (str): The name of the test.
    ax_category_level (str): The axis category level.
    group_order (iterable): The order of the groups.
    group_1_name (str): The name of the first group.
    group_2_name (str): The name of the second group.
    data_group_1_values (numpy.ndarray): Values of the first group.
    data_group_2_values (numpy.ndarray): Values of the second group.
    ax_var_is_hue (bool, optional): Whether the axis variable is the hue. Defaults to False.

    Returns:
    dict: Dictionary with the statistical test results.
    """
    '''Run stats test on data_group_1_values and data_group_2_values (user input for test )
    ax order- tells you what order elements in the hue category levels are spread across the ax, so you can use later for indexing'''
    ## run stats on group values
    stat_result= []
    if test_name == 'custom':
            print('custom test ran')
    elif test_name == 'MWU':
            stat_result = stats.mannwhitneyu(data_group_1_values, data_group_2_values)
    elif test_name == 'bootstrap_sdev_overlap':
            stat_result = test_group_mean_separation(data_group_1_values, data_group_2_values)
    elif test_name == '2_sample_t_test':
            stat_result = stats.ttest_ind(data_group_1_values, data_group_2_values, equal_var = False) #equal_var = True, run 2 sample ttset, if false, run welch's test for unequal var
    elif test_name == 'permutation_test':
            stat_result =  run_permutation_test_on_diff_of_vector_means( data_group_1_values, data_group_2_values, 10000) #set to .values as original output is dict, and rounding a rdict fails 
    #record the stat values (mean, sem etc)     
    if test_name == 'bootstrap_sdev_overlap':

        group_mean_dict ={'group_1_mean':stat_result['group_1_mean'], 'group_1_sem':stat_result['group_1_std'],
                            'group_2_mean':stat_result['group_2_mean'], 'group_2_sem':stat_result['group_2_std']}
        stat_result = [stat_result['mean_diff_more_than_sdevs'], stat_result['pseudo_pvalue']]
    else:
        group_mean_dict = {'group_1_mean':np.nanmean(data_group_1_values), 'group_1_sem':scipy.stats.sem(data_group_1_values,nan_policy = 'omit' ),
                            'group_2_mean':np.nanmean(data_group_2_values), 'group_2_sem':scipy.stats.sem(data_group_2_values,nan_policy = 'omit')}
    #pack and return result dict
    # print(group_order)
    if ax_var_is_hue: #if x categorical ticks = hue groups, find index of group1 name in hue order
        group_pos = {'group_1_order_pos': get_match_index_in_iterable(group_order, group_1_name),
                   'group_2_order_pos': get_match_index_in_iterable(group_order, group_2_name)}
    else: #else, find index of ax_category_tick name in xtick order, to pull correct point loc (say of wt-veh, at late IA tick)
        group_pos = {'group_1_order_pos': get_match_index_in_iterable(group_order, ax_category_level),
                   'group_2_order_pos': get_match_index_in_iterable(group_order, ax_category_level)}
        #get where the groups being compared, are listed in the data collections f
    result_dict = {'category_compared_within': ax_category_level, 'group_1': group_1_name, 'group_2':group_2_name,
                   'group_1_n':data_group_1_values.shape, 'group_2_n':data_group_2_values.shape,
                   **group_pos, **group_mean_dict,
                    'test_name': test_name, 'stat_result': np.round(stat_result, 5), 'pvalue': stat_result[1]}
    return result_dict

get_sig_bar_x_vals(comparison_tuple)

Get the x-values for the significance bar.

Parameters: comparison_tuple (namedtuple): Tuple with comparison information.

Returns: list: List of x-values for the significance bar.

Source code in plot_posthoc_test\plot_stat_annotate.py
505
506
507
508
509
510
511
512
513
514
515
516
517
def get_sig_bar_x_vals(comparison_tuple):
    """ 
    Get the x-values for the significance bar.

    Parameters:
    comparison_tuple (namedtuple): Tuple with comparison information.

    Returns:
    list: List of x-values for the significance bar.
    """
    x_vals = [comparison_tuple.g1_cat_loc, comparison_tuple.g1_cat_loc,
              comparison_tuple.g2_cat_loc, comparison_tuple.g2_cat_loc]# list the 4 x coord for points that define the line
    return x_vals

get_sig_bar_y_vals(bottom_val=None, line_height=1.01)

Get the y-values for the significance bar. Parameters: bottom_val (float, optional): The bottom value for the bar. Defaults to 0.95. line_height (float, optional): The height of the line. Defaults to 1.01.

Returns: list: List of y-values for the significance bar.

Source code in plot_posthoc_test\plot_stat_annotate.py
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
def get_sig_bar_y_vals(bottom_val = None, line_height= 1.01):
    """ 
    Get the y-values for the significance bar.
    Parameters:
    bottom_val (float, optional): The bottom value for the bar. Defaults to 0.95.
    line_height (float, optional): The height of the line. Defaults to 1.01.

    Returns:
    list: List of y-values for the significance bar.
    """
    """ comparison tuple max y value is multipled by offset factor"""
    if bottom_val is None:
        bottom_val = 0.95 #for ax relative point plotting
    # bottom_val = comparison_tuple.max_group_loc_val * offset_factor #for data point plotting
    y_vals = [bottom_val,bottom_val* line_height, bottom_val*line_height,bottom_val]# list the 4 x coord for points that define the line
    return y_vals

get_x_ticks_as_df(ticklabel_obj)

Get a DataFrame of the x-tick labels and their positions.

Parameters: ticklabel_obj (list): List of tick label objects.

Returns: pandas.DataFrame: DataFrame with x-tick labels and positions.

Source code in plot_posthoc_test\plot_stat_annotate.py
491
492
493
494
495
496
497
498
499
500
501
502
def get_x_ticks_as_df(ticklabel_obj):
    """ 
    Get a DataFrame of the x-tick labels and their positions.

    Parameters:
    ticklabel_obj (list): List of tick label objects.

    Returns:
    pandas.DataFrame: DataFrame with x-tick labels and positions.
    """
    ticks_df = pd.DataFrame.from_records([{'tick_text':x.get_text(), 'tick_pos': x.get_position()} for x in ticklabel_obj])
    return ticks_df

is_val_between_range_min_max(value, range_array)

smple function

Source code in plot_posthoc_test\plot_stat_annotate.py
66
67
68
69
70
def is_val_between_range_min_max(value, range_array):
    ''' smple function'''
    is_lessthan_max = value < np.max(range_array)
    is_greaterthan_min = value> np.min(range_array)
    return is_lessthan_max & is_greaterthan_min

main_run_posthoc_tests_and_get_hue_loc_df(ax_input, plot_params, plot_obj, preset_comparisons, hue_var=None, test_name=None, hue_order=None, ax_var_is_hue=False, detect_error_bar=False)

Run posthoc tests on all axis ticks, get hue levels for each axis tick, and join this to the dataframe produced.

Parameters: ax_input (matplotlib.axes.Axes): The input axis object. plot_params (dict): Dictionary containing plot parameters. plot_obj (seaborn.axisgrid.FacetGrid): The plot object. preset_comparisons (list): List of preset comparisons. hue_var (str, optional): The hue variable. Defaults to None. test_name (str, optional): The name of the test. Defaults to None. hue_order (list, optional): The order of the hue. Defaults to None. ax_var_is_hue (bool, optional): Whether the axis variable is the hue. Defaults to False.

Returns: pandas.DataFrame: DataFrame with posthoc test results and hue locations.

Source code in plot_posthoc_test\plot_stat_annotate.py
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
def main_run_posthoc_tests_and_get_hue_loc_df(ax_input, plot_params, plot_obj, preset_comparisons,
                                               hue_var= None, test_name = None, hue_order = None, ax_var_is_hue=False,detect_error_bar = False):
    """ 
    Run posthoc tests on all axis ticks, get hue levels for each axis tick, and join this to the dataframe produced.

    Parameters:
    ax_input (matplotlib.axes.Axes): The input axis object.
    plot_params (dict): Dictionary containing plot parameters.
    plot_obj (seaborn.axisgrid.FacetGrid): The plot object.
    preset_comparisons (list): List of preset comparisons.
    hue_var (str, optional): The hue variable. Defaults to None.
    test_name (str, optional): The name of the test. Defaults to None.
    hue_order (list, optional): The order of the hue. Defaults to None.
    ax_var_is_hue (bool, optional): Whether the axis variable is the hue. Defaults to False.

    Returns:
    pandas.DataFrame: DataFrame with posthoc test results and hue locations.
    """
    if hue_var is None:
        hue_var = plot_params['hue']
    if hue_order is None:
        hue_order = plot_params['hue_order']
    if test_name is None:
        test_name = None
        # group_order- depends on if comparing within x axis, or within hues 
    if ax_var_is_hue: #you will use this to find the ordering of the hue collection points of interest
        group_order = plot_params['hue_order'] #order in collection = order in hue
    else: #if hue collection tiled over differnt x categories
        group_order = plot_params['order'] #order in collection = order in x category
    posthoc_df = run_posthoc_tests_on_all_ax_ticks(plot_params['data'], plot_obj = plot_obj, 
                                                   comparison_list =preset_comparisons, ax_grouping_col= plot_params['x'],
                                                   group_order = group_order, hue_col_name=hue_var, value_col_name = plot_params['y'],
                                                   test_name = test_name,ax_var_is_hue=ax_var_is_hue)## get df with info on post-hoc comparisons
    hue_loc_df = get_hue_point_loc_df(ax_input, hue_order) # hue_loc_df = pd.DataFrame.from_dict(get_hue_point_loc_dict(plot_ax, geno_order)).set_index('hue') #get array of numerical points and values for each hue level
    posthoc_df = get_hue_loc_on_axis(hue_loc_df, posthoc_df) #find pos in numerical ax of fig, then add as cols to df
    #manually set cat compared within to single variable if hue == axis category
    if ax_var_is_hue: #you will use this to find the ordering of the hue collection points of interest
        posthoc_df['category_compared_within']= plot_params['x']

    if detect_error_bar: #NEW_ add errorbar locs to posthoc df

        print('Error bar detected, moving bounds')
        ax_childs =plot_obj.get_children()
        ax_child_points_index = [count for count, x in enumerate( get_ax_children_types(plot_obj)) if x is path_collection_type]
        bar_coords =  return_ax_child_line_coor(ax_childs,ax_child_points_index)
        posthoc_df =add_errorbar_loc_on_posthoc(posthoc_df, bar_coords)
    return posthoc_df

plot_sig_bars_w_comp_df(ax_input, sig_comp_df, direction_to_plot=None)

Plot significance bars with comparison dataframe.

Parameters: ax_input (matplotlib.axes.Axes): The input axis object. sig_comp_df (pandas.DataFrame): DataFrame with significance comparisons. direction_to_plot (str, optional): Direction to plot ('top_down', 'bottom_up'). Defaults to 'bottom_up'. TO- given parameters, plot vertical lines between centers of datapoints of interest (pre-sorted), with significance star (pre-calculated)

Source code in plot_posthoc_test\plot_stat_annotate.py
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
def plot_sig_bars_w_comp_df(ax_input, sig_comp_df, direction_to_plot = None):
    """ 
    Plot significance bars with comparison dataframe.

    Parameters:
    ax_input (matplotlib.axes.Axes): The input axis object.
    sig_comp_df (pandas.DataFrame): DataFrame with significance comparisons.
    direction_to_plot (str, optional): Direction to plot ('top_down', 'bottom_up'). Defaults to 'bottom_up'.
 TO- given parameters, plot vertical lines between centers of datapoints of interest (pre-sorted), with significance star (pre-calculated)"""
    ## plotting params
    if direction_to_plot is None:#set direction to plot ('top_down', 'bottom_up')
        direction_to_plot = 'bottom_up'

    line_height = 1.01
    offset_constant = 0.025 #what linear amount to add
    star_space_to_line = offset_constant/5
    trans = matplotlib.transforms.blended_transform_factory(x_transform = ax_input.transData,y_transform = ax_input.transAxes)# the x coords of this transformation are data, and the y coord are axes
    ## main loop over categorical ticks
    for cat in sig_comp_df.category_compared_within:#iterate over each categorical tick value
        top_bbox = np.array([[0, 0],[0, 0]])#initialize box location for comparison # =[lower_x, lower_y] [upper_x, upper_y]
        for comp in sig_comp_df.loc[sig_comp_df.category_compared_within == cat,:].itertuples():
            x_vals = get_sig_bar_x_vals(comp) # [comp.g1_cat_loc, comp.g1_cat_loc, comp.g2_cat_loc, comp.g2_cat_loc]# list the 4 x coord for points that define the line
            y_vals =get_sig_bar_y_vals(0.95,line_height) #  [comp.max_group_loc_val, comp.max_group_loc_val * h, comp.max_group_loc_val * h, comp.max_group_loc_val] # list 4 y coord for points that define the line
            line_overlap = (top_bbox[0,1] >= y_vals[0])##check overlap with previous bounding box
            if line_overlap: #if the top of the prev bbox overlaps with the current line, move the current line up to ABOVE top bbox
                y_vals = get_sig_bar_y_vals(top_bbox[1,1]+offset_constant,line_height)             ## if overlaps with previous bounding box, adjust height by N
            text_x = (x_vals[0]+ x_vals[2])*.5
            text_y = y_vals[1] + star_space_to_line
            #plot sig star over line
            ax_input.plot(x_vals, y_vals, lw=annotator_default['line_width'], color = 'black', transform = trans, clip_on = False)
            star_annot = ax_input.annotate(convert_pvalue_to_asterisks(comp.pvalue), xy = (text_x, text_y), xycoords = ('data', 'axes fraction'),
                            ha='center', va='baseline', fontsize = 'small',)# bbox = {'boxstyle': 'Square, pad = 0.0', 'fc': 'lightblue', 'lw': 0})
            bbox_in_ax = ax_input.transAxes.inverted().transform(star_annot.get_window_extent()) # to get ax coordinates of bounding box (transform from  Return the Bbox bounding the text, in display units.)
            top_bbox = bbox_in_ax      #detect overlap by storing, then comparing ot previous versions

plot_sig_bars_w_comp_df_tight(ax_input, sig_comp_df, direction_to_plot=None, tight=None, tight_offset=None, offset_constant=None, debug=None)

Plot significance bars with comparison dataframe, using a tight layout. TO- given parameters, plot vertical lines between centers of datapoints of interest (pre-sorted), with significance star (pre-calculated) Parameters ax_input (matplotlib.axes.Axes): The input axis object. sig_comp_df (pandas.DataFrame): DataFrame with significance comparisons. direction_to_plot (str, optional): Direction to plot ('top_down', 'bottom_up'). Defaults to 'bottom_up'. tight (bool, optional): Whether to plot bars right above their corresponding values. Defaults to True. tight_offset (float, optional): Offset for tight layout. Defaults to 0.075. offset_constant (float, optional): Constant for offset. Defaults to 0.0225. debug (bool, optional): Whether to print debug information. Defaults to None.

Source code in plot_posthoc_test\plot_stat_annotate.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
def plot_sig_bars_w_comp_df_tight(ax_input, sig_comp_df, direction_to_plot = None, tight = None, tight_offset = None, offset_constant=None, debug = None):
    """ 
    Plot significance bars with comparison dataframe, using a tight layout.
    TO- given parameters, plot vertical lines between centers of datapoints of interest (pre-sorted), with significance star (pre-calculated)
    Parameters
    ax_input (matplotlib.axes.Axes): The input axis object.
    sig_comp_df (pandas.DataFrame): DataFrame with significance comparisons.
    direction_to_plot (str, optional): Direction to plot ('top_down', 'bottom_up'). Defaults to 'bottom_up'.
    tight (bool, optional): Whether to plot bars right above their corresponding values. Defaults to True.
    tight_offset (float, optional): Offset for tight layout. Defaults to 0.075.
    offset_constant (float, optional): Constant for offset. Defaults to 0.0225.
    debug (bool, optional): Whether to print debug information. Defaults to None.
    """
    ## plotting params    #set direction to plot ('top_down', 'bottom_up') #set whether bars are plotted right above their coresponding values, or not
    #declare initial transforms of interest
    transform_ax_to_data = ax_input.transAxes + ax_input.transData.inverted() #create ax-display + display-data pipe
    transform_data_to_ax = transform_ax_to_data.inverted() # 
    #default vcalues
    if direction_to_plot is None:
        direction_to_plot = 'bottom_up'
        line_start_y_pos = 0.8 #base case- plot upwards from 0.8 of ax size 
    if tight is None:
        tight = True #set whether or not to plot bars RIGHT above datapoints
    if tight_offset is None:
        tight_offset = 0.075 #fraction of ax to put between the point of interest and the line of sig post-hoc
    #params for offsetting
    line_height = 1.00 #base case- 1.01
    if offset_constant is None:
        offset_constant = 0.0225 #what linear amount to add, in AX FRACTION AMOUNT 

    star_space_to_line = offset_constant*0.1
    if debug == True:
        print(f'tight format, max_numeric_ax_value = {max_numeric_ax_value}.  start y val  = {line_start_y_pos}')
        #transData transforms: (DATA) -> (DISPLAY COORDINATES)     # transAxes transforms (AXES) -> (DISPLAY)     #all transforms -> display coords 
    trans = matplotlib.transforms.blended_transform_factory(x_transform = ax_input.transData,
                                                            y_transform = ax_input.transAxes)# the x coords of this transformation are data, and the y coord are axes
    ## main loop over categorical ticks, bottom up approach 
    for cat in sig_comp_df.category_compared_within.unique():#iterate over each categorical tick value
        top_bbox = np.array([[0, 0],[0, 0]])#initialize box location for comparison # =[lower_x, lower_y] [upper_x, upper_y]
        #get max y position value for each category you're doing post-hoc comparisons within
        sig_comp_category = sig_comp_df.loc[sig_comp_df.category_compared_within == cat,:]
        if tight:
            max_numeric_ax_value = sig_comp_category.loc[:, ['g1_num_loc','g2_num_loc']].max().values.max()    #get max val in the group of interest you're running posthocs on (x ticks of interest)    
            line_start_y_pos = transform_data_to_ax.transform((0,max_numeric_ax_value))[1]+tight_offset # data -> axes 
            if debug == True:
                print(f'tight format, max_numeric_ax_value = {max_numeric_ax_value}.  start y val  = {line_start_y_pos}')
            #transData transforms: (DATA) -> (DISPLAY COORDINATES)     # transAxes transforms (AXES) -> (DISPLAY)     
        for comp in sig_comp_category.itertuples():
            x_vals = get_sig_bar_x_vals(comp) # [comp.g1_cat_loc, comp.g1_cat_loc, comp.g2_cat_loc, comp.g2_cat_loc]# list the 4 x coord for points that define the line
            y_vals =get_sig_bar_y_vals(line_start_y_pos,line_height) #  [comp.max_group_loc_val, comp.max_group_loc_val * h, comp.max_group_loc_val * h, comp.max_group_loc_val] # list 4 y coord for points that define the line
            #compare overlap of proposed y values, in data space 
            line_overlap = (top_bbox[1,1] >= y_vals[0])##check y overlap with previous bounding box,       #top right point y val in top_box defined by [1,1]
            if debug == True:
                print(f"line overlap = ({top_bbox[0,1]} >= {y_vals[0]}")
                print(f"line x_vals, y_vals: {x_vals, y_vals}")
            if line_overlap: #if the top of the prev bbox overlaps with the current line, move the current line up to ABOVE top bbox
                y_vals = get_sig_bar_y_vals(top_bbox[1,1]+offset_constant,line_height)             ## if overlaps with previous bounding box, adjust height by N

            text_x = (x_vals[0]+ x_vals[2])*.5
            text_y = y_vals[1] + star_space_to_line#what linear amount to separate star from line, in AX FRACTION AMOUNT 
            #plot sig star over line
            ax_input.plot(x_vals, y_vals, lw=annotator_default['line_width'], color = 'black', transform = trans, clip_on = False)
            star_annot = ax_input.annotate(convert_pvalue_to_asterisks(comp.pvalue), xy = (text_x, text_y), xycoords = ('data', 'axes fraction'),
                            ha='center', va='baseline', fontsize = 'x-small',fontweight = 'light')# bbox = {'boxstyle': 'Square, pad = 0.0', 'fc': 'lightblue', 'lw': 0})
            bbox_in_ax = ax_input.transAxes.inverted().transform(star_annot.get_window_extent()) #Get the artist's bounding box in display space.
            # ax.transData.inverted() is a matplotlib.transforms.Transform that goes from display coordinates to data coordinates
            top_bbox = bbox_in_ax      #detect overlap by storing, then comparing ot previous versions

return_ax_child_line_coor(ax_childs, ax_child_points_index)

Given a set of indices of the ax child objects between which to query (e.g. path collections index), find fully nonnan yvals in the lines (corresponding to the actual real vertical errorbar) and return coords

Source code in plot_posthoc_test\plot_stat_annotate.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def return_ax_child_line_coor(ax_childs, ax_child_points_index):
    ''' Given a set of indices of the ax child objects between which to query (e.g. path collections index), find fully nonnan yvals in the lines (corresponding to the actual real vertical errorbar) and return coords'''
    #get errorbar lims via #get non nan values
    ci_info = []
    for count, i in enumerate(ax_child_points_index):
        #get range around points, looking back  
        if count == 0:
            range_start = 0
        else: 
            range_start = ax_child_points_index[count-1]+1
        range_end = i
        # main body
        child_range = ax_childs[range_start: range_end] #create list of ax children instances
        #store each entry in a list of 1 dict per line obj 
        cis = [{'child_index': count + range_start ,
                'child_x':np.round(x.get_xdata(),decimals = 2),
                'child_y':x.get_ydata(),
               'next_collection_index': i} for count, x in enumerate(child_range)] #each child N gets a dict N with information
        child = pd.DataFrame.from_records(cis)
        ci_info.append(child)
    # ci_x = [x.get_xdata() for x in child_range] # ci_y = [x.get_ydata() for x in child_range] #old way of getting x values
    #NEW LOGIC- drop nan rows (as those arne't real lines)
    coords = pd.concat(ci_info).reset_index()
    coords['y_is_nonnan'] = coords.child_y.apply(lambda x: np.logical_not(np.any(np.isnan(x))))
    coords['y_is_2_elem'] = coords.child_y.apply(lambda x:x.size == 2)
    drop_nan_row= True
    if drop_nan_row:
        coords= coords[coords['y_is_nonnan']]
    return coords

run_posthoc_test_on_tick_hue_groups(ax_tick_data, hue_group_1, hue_group_2, ax_category_level, group_order, hue_col_name, value_cols_name, test_name=None, ax_var_is_hue=False)

Run posthoc test on tick hue groups. Use existing dataframe filtered by the current axis levels, and perform stats on the hue groups.

Parameters: ax_tick_data (pandas.DataFrame): The axis tick corresponding dataframe. hue_group_1 (str): The first hue group. hue_group_2 (str): The second hue group. ax_category_level (str): The axis category level. group_order (iterable): The order of the groups. hue_col_name (str): The hue column name. value_cols_name (str): The value column name. test_name (str, optional): The name of the test. Defaults to 'MWU'. ax_var_is_hue (bool, optional): Whether the axis variable is the hue. Defaults to False.

Returns: dict: Dictionary with the posthoc test results.

Source code in plot_posthoc_test\plot_stat_annotate.py
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
def run_posthoc_test_on_tick_hue_groups(ax_tick_data, hue_group_1, hue_group_2, ax_category_level,group_order,
                                         hue_col_name, value_cols_name,test_name = None,ax_var_is_hue = False):
    """ 
    Run posthoc test on tick hue groups. Use existing dataframe filtered by the current axis levels, and perform stats on the hue groups.

    Parameters:
    ax_tick_data (pandas.DataFrame): The axis tick corresponding dataframe.
    hue_group_1 (str): The first hue group.
    hue_group_2 (str): The second hue group.
    ax_category_level (str): The axis category level.
    group_order (iterable): The order of the groups.
    hue_col_name (str): The hue column name.
    value_cols_name (str): The value column name.
    test_name (str, optional): The name of the test. Defaults to 'MWU'.
    ax_var_is_hue (bool, optional): Whether the axis variable is the hue. Defaults to False.

    Returns:
    dict: Dictionary with the posthoc test results.
    """
    ##define test to run
    if test_name is None:
        test_name = 'MWU'
    ## extract groups from the dataframe containing data corresponding to the ax tick in question
    # ax_tick_data =plot_data.loc[plot_data[ax_grouping_col] ==  ax_category_level,:]
    data_group_1= ax_tick_data.loc[ax_tick_data[hue_col_name] == hue_group_1,:]
    data_group_2= ax_tick_data.loc[ax_tick_data[hue_col_name] == hue_group_2,:]
    ## get values from each grup
    data_group_1_values = data_group_1[value_cols_name].values
    data_group_2_values =data_group_2[value_cols_name].values
    ## run stats on group values
    result_dict = get_pair_stat_test_result(test_name, ax_category_level,group_order, hue_group_1, hue_group_2, data_group_1_values,data_group_2_values,ax_var_is_hue)
    return result_dict

run_posthoc_tests_on_all_ax_ticks(plot_data, plot_obj, comparison_list, ax_grouping_col, group_order, hue_col_name, value_col_name, test_name=None, ax_var_is_hue=False)

Run posthoc tests on all axis ticks.

Parameters: plot_data (pandas.DataFrame): The plot data. plot_obj (seaborn.axisgrid.FacetGrid): The plot object. comparison_list (list): List of comparisons. ax_grouping_col (str): The column name for axis grouping. group_order (iterable): The order of the groups. hue_col_name (str): The hue column name. value_col_name (str): The value column name. test_name (str, optional): The name of the test. Defaults to 'MWU'. ax_var_is_hue (bool, optional): Whether the axis variable is the hue. Defaults to False.

Returns: pandas.DataFrame: DataFrame with posthoc test results.
Source code in plot_posthoc_test\plot_stat_annotate.py
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
def run_posthoc_tests_on_all_ax_ticks(plot_data, plot_obj, comparison_list, ax_grouping_col, group_order, hue_col_name, value_col_name,
                                      test_name = None, ax_var_is_hue = False):
    """ 
    Run posthoc tests on all axis ticks.

    Parameters:
    plot_data (pandas.DataFrame): The plot data.
    plot_obj (seaborn.axisgrid.FacetGrid): The plot object.
    comparison_list (list): List of comparisons.
    ax_grouping_col (str): The column name for axis grouping.
    group_order (iterable): The order of the groups.
    hue_col_name (str): The hue column name.
    value_col_name (str): The value column name.
    test_name (str, optional): The name of the test. Defaults to 'MWU'.
    ax_var_is_hue (bool, optional): Whether the axis variable is the hue. Defaults to False.

        Returns: pandas.DataFrame: DataFrame with posthoc test results.
    """
    if test_name is None:
        test_name = 'MWU'

    compare_stats_df = []
    #if the ax levels = the hue levels, don't filter the plot data by what ax group col you're on
    if ax_var_is_hue:

        print(f"With axis variable == Hue variable:")
        for geno_pair in comparison_list: #iterate over ex. (WT VEH to HET VEH), do stats on each
            posthoc_output= run_posthoc_test_on_tick_hue_groups(plot_data,
                                                                    geno_pair[0], geno_pair[1], geno_pair,group_order,
                                                                    hue_col_name, value_col_name,test_name = test_name,ax_var_is_hue = ax_var_is_hue)
            compare_stats_df.append(posthoc_output)
    else:
    #iterate through the different categories to compare hue level values within
        for ax_category_level in plot_data[ax_grouping_col].unique():        # print(ax_category_level)
            for geno_pair in comparison_list: #iterate over ex. (WT VEH to HET VEH), do stats on each
                posthoc_output= run_posthoc_test_on_tick_hue_groups(plot_data.loc[plot_data[ax_grouping_col] ==  ax_category_level,:],
                                                                    geno_pair[0], geno_pair[1], ax_category_level, group_order,
                                                                    hue_col_name, value_col_name,test_name = test_name)
                compare_stats_df.append(posthoc_output)
    stat_table = pd.DataFrame.from_records(compare_stats_df)
    # after producing stat result table, merge with df of x tick labels and their positions
    if not(ax_var_is_hue):#transform list of xticklabels to pandas df and merge ## inserted 10.16.24- automerge the xticks
        stat_table = stat_table.merge(get_x_ticks_as_df(plot_obj.get_xticklabels()), left_on = 'category_compared_within', right_on = 'tick_text') 
    return stat_table