@@ -171,7 +171,7 @@ def plot_bubble_plot_mean_distances(distances_df, primary_vars, comparison_vars,
171171
172172
173173
174- def plot_custom_scatter (data , primary_vars , comparison_vars , fig_size , bubble_size , file_save , sort_by_difference , compare_distribution_metric , statistical_test ):
174+ def plot_custom_scatter (data , primary_vars , comparison_vars , fig_size , bubble_size ,sort_by_difference , compare_distribution_metric , statistical_test , save_path ):
175175 # Set plot style and font
176176 sns .set_style ("white" )
177177 plt .rcParams ['font.family' ] = 'Arial'
@@ -356,27 +356,46 @@ def plot_custom_scatter(data, primary_vars, comparison_vars, fig_size, bubble_si
356356 plt .legend (handles = legend_elements , loc = 'lower right' )
357357 sns .despine ()
358358 # Save the plot
359- if file_save :
360- plt .savefig (f" { file_save } _scatterplot_hallmarks.pdf" , dpi = 300 ,bbox_inches = 'tight' )
359+ if save_path is not None :
360+ plt .savefig (save_path , dpi = 300 ,bbox_inches = 'tight' )
361361 plt .show ()
362362 if statistical_test :
363363 return results_df
364364
365- def plot_bar_plot_distance (distances ,primary_variables ,comparison_variables ,fig_size ):
365+ def plot_bar_plot_distance (distances ,primary_variables ,comparison_variables ,fig_size = ( 3 , 3 ), save_path = "barplot.pdf" ):
366366 #filter distances by primary_variable and comparison_variable
367367 filtered_df = distances [
368368 distances ['primary_variable' ].isin (primary_variables ) &
369369 distances ['comparison_variable' ].isin (comparison_variables )
370370 ]
371-
372- #plot boxplot of min_distance
373- for comparison_variable in comparison_variables :
374- fig , ax = plt .subplots (figsize = fig_size )
375- sns .boxplot (data = filtered_df [filtered_df ['comparison_variable' ] == comparison_variable ],
371+ #if hotspot_number is a column in distances
372+ if 'hotspot_number' in filtered_df .columns :
373+ for comparison_variable in comparison_variables :
374+ fig , ax = plt .subplots (figsize = fig_size )
375+ #filter for comparison_variable
376+ distance_vals_filtered = filtered_df [filtered_df ['comparison_variable' ] == comparison_variable ]
377+ distance_vals_filtered = distance_vals_filtered .groupby (['batch' ,'primary_variable' , 'hotspot_number' ]).min_distance .median ().reset_index ()
378+ t_stat , p_val = ttest_ind (distance_vals_filtered [distance_vals_filtered ['primary_variable' ] == primary_variables [0 ]]['min_distance' ],
379+ distance_vals_filtered [distance_vals_filtered ['primary_variable' ] == primary_variables [1 ]]['min_distance' ])
380+ sns .boxplot (data = distance_vals_filtered ,
376381 x = 'primary_variable' , y = 'min_distance' , ax = ax ,palette = 'viridis' )
377- ax .set_title (comparison_variable )
378- ax .set_xticklabels (ax .get_xticklabels (), rotation = 90 )
379- plt .show ()
382+ ax .set_title (comparison_variable + " p-value: {:.3}" .format (p_val ))
383+ ax .set_xticklabels (ax .get_xticklabels (), rotation = 90 )
384+ plt .savefig (save_path )
385+ plt .show ()
386+
387+ else :
388+ #plot boxplot of min_distance
389+ for comparison_variable in comparison_variables :
390+ fig , ax = plt .subplots (figsize = fig_size )
391+ sns .boxplot (data = filtered_df [filtered_df ['comparison_variable' ] == comparison_variable ],
392+ x = 'primary_variable' , y = 'min_distance' , ax = ax ,palette = 'viridis' )
393+ ax .set_title (comparison_variable )
394+ ax .set_xticklabels (ax .get_xticklabels (), rotation = 90 )
395+ plt .savefig (save_path )
396+ plt .show ()
397+
398+
380399
381400# Create a DataFrame for heatmap with states as rows and signatures as columns; helper function
382401def create_heatmap_data (mean_scores , states_to_loop_through , signatures ):
@@ -490,25 +509,66 @@ def calculate_signature_differences(anndata_breast, gene_signatures, states):
490509 })
491510 return pd .DataFrame (results )
492511
493- def plot_signature_boxplot (anndata_breast ,hotspot_variable ,signature ,fig_size ,file_save ):
494- hot_data = anndata_breast .obs [~ anndata_breast .obs [hotspot_variable [0 ]].isna ()][signature ]
495- cold_data = anndata_breast .obs [~ anndata_breast .obs [hotspot_variable [1 ]].isna ()][signature ]
496- # Plotting
497- data = pd .DataFrame ({
498- hotspot_variable [0 ]: hot_data ,
499- hotspot_variable [1 ]: cold_data
500- })
512+ def plot_signature_boxplot (anndata_breast , hotspot_variable , signature , fig_size = (5 , 5 ), save_path = None ):
513+ # Check if 'hotspot_number' exists in the `obs` data
514+ #create a list of hotspot_variables with the number appended
515+ hotspot_variable_number = []
516+ for hotspot in hotspot_variable :
517+ hotspot_variable_number .append (hotspot + "_number" )
518+
519+ if hotspot_variable_number [0 ] in anndata_breast .obs .columns :
520+ print ("Averaging signature score per hotspot" )
521+ # Filter for the specific hotspots and compute the mean signature per hotspot number
522+ hot_data = anndata_breast .obs [~ anndata_breast .obs [hotspot_variable [0 ]].isna ()]
523+ cold_data = anndata_breast .obs [~ anndata_breast .obs [hotspot_variable [1 ]].isna ()]
524+
525+ # Group by 'hotspot_number' and calculate the mean signature
526+ hot_data_grouped = hot_data .groupby (hotspot_variable_number [0 ])[signature ].mean ().reset_index ()
527+ cold_data_grouped = cold_data .groupby (hotspot_variable_number [1 ])[signature ].mean ().reset_index ()
528+
529+ #calculate p value
530+ p_value = ttest_ind (hot_data_grouped [signature ], cold_data_grouped [signature ])[1 ]
531+
532+
533+ # Add a column to indicate the group
534+ hot_data_grouped ['Hotspot' ] = hotspot_variable [0 ]
535+ cold_data_grouped ['Hotspot' ] = hotspot_variable [1 ]
536+
537+ # Combine data
538+ combined_data = pd .concat ([hot_data_grouped , cold_data_grouped ], ignore_index = True )
539+
540+ # Plotting
541+ plt .figure (figsize = fig_size )
542+ sns .boxplot (x = 'Hotspot' , y = signature , data = combined_data , showfliers = False )
543+ plt .title (f'{ signature } Mean per Hotspot Number (p-value: { p_value :.2f} )' )
544+ plt .ylabel ('Mean Response Score' )
545+ if save_path :
546+ plt .savefig (save_path , dpi = 300 )
547+ plt .show ()
548+
549+ else :
550+ # Fallback to original logic if 'hotspot_number' does not exist
551+ hot_data = anndata_breast .obs [~ anndata_breast .obs [hotspot_variable [0 ]].isna ()][signature ]
552+ cold_data = anndata_breast .obs [~ anndata_breast .obs [hotspot_variable [1 ]].isna ()][signature ]
553+
554+ # Combine data
555+ data = pd .DataFrame ({
556+ hotspot_variable [0 ]: hot_data ,
557+ hotspot_variable [1 ]: cold_data
558+ })
559+
560+ # Melt the DataFrame to long format for seaborn
561+ data_melted = data .melt (var_name = 'Hotspot' , value_name = 'Response to Checkpoint Score' )
562+
563+ # Plotting
564+ plt .figure (figsize = fig_size )
565+ sns .boxplot (x = 'Hotspot' , y = 'Response to Checkpoint Score' , data = data_melted , showfliers = False )
566+ plt .title (f'Response to Checkpoint Genes based on { signature } ' )
567+ plt .ylabel ('Response to Checkpoint Score' )
568+ if save_path :
569+ plt .savefig (save_path , dpi = 300 )
570+ plt .show ()
501571
502- # Melting the DataFrame to long format for seaborn
503- data_melted = data .melt (var_name = 'Hotspot' , value_name = 'Response to Checkpoint Score' )
504- # Plotting
505- plt .figure (figsize = fig_size )
506- sns .boxplot (x = 'Hotspot' , y = 'Response to Checkpoint Score' , data = data_melted , showfliers = False )
507- plt .title ('Response to Checkpoint Genes based on EMT Hallmarks' )
508- plt .ylabel ('Response to Checkpoint Score' )
509- if file_save :
510- plt .savefig (f"{ file_save } _overall_comparison.pdf" , dpi = 300 )
511- plt .show ()
512572
513573#helper function
514574def plot_bubble_chart (data , states , fig_size , bubble_size ):
0 commit comments