Source code for bmorph.evaluation.plotting

import numpy as np
import xarray as xr
import pandas as pd
from typing import List
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy
import probscale

import networkx as nx
import graphviz as gv
import pygraphviz as pgv
import probscale
from networkx.drawing.nx_agraph import graphviz_layout

from bmorph.evaluation.constants import colors99p99
from bmorph.evaluation import descriptive_statistics as dst

from statsmodels.distributions.empirical_distribution import ECDF

#*****************************************************************************************
# Plotting Helper Functions:
#      custom_legend
#      calc_water_year
#      find_index_water_year
#      determine_row_col
#      log10_1p
#      scatter_series_axes
#*****************************************************************************************

[docs]def custom_legend(names:List, colors=colors99p99): """Creates a list of patches to be passed in as `handles` for the plt.legends function. Parameters ---------- names : list Legend names. colors : list A list of the colors corresponding to `names`. Returns ------- handles Handle parameter for matplotlib.legend. """ legend_elements = list() for i,name in enumerate(names): legend_elements.append(mpl.patches.Patch(facecolor = colors[i], label = name)) return legend_elements
[docs]def calc_water_year(df: pd.DataFrame): """Calculates the water year. Parameters ---------- df : pandas.DataFrame Flow timeseries with a DataTimeIndex. Returns ------- pandas.DataFrame.index A pandas.DataFrame index grouped by water year. """ return df.index.year + (df.index.month >= 10).astype(int)
[docs]def find_index_water_year(data: pd.DataFrame) -> np.int: """ Finds the index of the first hydrologic year. Parameters ---------- data : pd.DataFrame Flow timeseries with a DateTime index. Returns ------- int Index of the first hydrologic year. """ water_year = pd.Timestamp(0) i = 0 while water_year == pd.Timestamp(0): date = data.index[i] if date.month == 10 and date.day == 1: water_year = date else: i = i + 1 return i
[docs]def determine_row_col(n:int, pref_rows = True): """Determines rows and columns for rectangular subplots Calculates a rectangular subplot layout that contains at least n subplots, some may need to be turned off in plotting. If a square configuration is possible, then a square configuration will be proposed. This helps automate the process of plotting a variable number of subplots. Parameters ---------- n : int Total number of plots. pref_rows : boolean If True, and only a rectangular arrangment is possible, then put the longer dimension in n_rows. If False, then it is placed in the n_columns. Returns ------- n_rows : int Number of rows for matplotlib.subplot. n_columns : int Number of columns for matplotlib.subplot. """ if n < 0: raise Exception("Please enter a positive n") # use square root to test because we want a square # arrangment sqrt_n = np.sqrt(n) int_sqrt_n = int(sqrt_n) if sqrt_n == float(int_sqrt_n): return int_sqrt_n, int_sqrt_n elif int_sqrt_n*(int_sqrt_n+1)>=n: # see if a rectangular orientation would work # eg. sqrt(12) = 3.464, int(3.464) = 3 # 3*(3+1) = 3*4 = 12 if pref_rows: return int_sqrt_n+1, int_sqrt_n else: return int_sqrt_n, int_sqrt_n+1 else: # since sqrt(n)*sqrt(n) = n, # (sqrt(n)+1)*sqrt(n) < n, # then (sqrt(n)+1)^2 > n return int_sqrt_n+1, int_sqrt_n+1
[docs]def log10_1p(x: np.ndarray): """Return the log10 of one plus the input array, element-wise. Parameters ---------- x : numpy.ndarray An array of values greater than -1. If values are less than or equal to -1, then a domain error will occur in computing the logarithm. Returns -------- y : numpy.ndarray Array of the values having the log10(element+1) computer. """ y = np.nan*x for i, element in enumerate(x): y[i] = np.log10(element + 1) return y
[docs]def scatter_series_axes(data_x, data_y, label: str, color: str, alpha: float, ax = None) -> plt.axes: """Creates a scatter axis for plotting. Parameters ---------- data_x : array-like Data for the x series. data_y : array-like Data for the y series. label : str Name for the axes. color : str Color for the markers. alpha : float Transparency for the markers. Returns ------- matplotlib.axes """ if ax is None: fig, ax = plt.subplots() ax.scatter(data_x, data_y, label = label, color = color, alpha = alpha) return ax
#***************************************************************************************** # General Bias Correction Summary Statistics: # pbias_sites # diff_maxflow_sites # pbias_plotter # diff_maxflow_plotter # site_diff_scatter # stat_corrections_scatter2D # anomaly_scatter2D # rmseFraePlot #*****************************************************************************************
[docs]def pbias_sites(observed: pd.DataFrame, predicted: pd.DataFrame): """Calculates percent bias on a hydrologic year and site-by-site basis. Parameters ---------- observed : pandas.DataFrame Dataframe containing all observations. predicted : pandas.DataFrame Dataframe containing all predictions. Returns ------- pandas.DataFrame Dataframe contain the percent bias computed. """ #finds the start of the first hydraulic year i = find_index_water_year(observed) water_year_start = observed.index[i] #counts the number of hydraylic years water_years = 0 while i < len(observed): date = observed.index[i] if date.month == 9 and date.day == 30: water_years = water_years + 1 i = i + 1 pbias_site_df = pd.DataFrame(columns = observed.columns, index = pd.Series(range(0, water_years))) pbias_current_year = pd.DataFrame(columns = observed.columns) for i in range(0, water_years): #establish end of hydraulic year water_year_end = water_year_start + pd.Timedelta(364.25, unit = 'd') #need to truncate datetimes since time indicies do not align O = observed.loc[water_year_start : water_year_end] P = predicted.loc[water_year_start : water_year_end] O.index = O.index.floor('d') P.index = P.index.floor('d') pbias_current_year = dst.pbias(O, P) #puts the computations for the hydraulic year into our bigger dataframe pbias_site_df.iloc[i] = pbias_current_year.iloc[0].T #set up next hydraulic year water_year_start = water_year_start + pd.Timedelta(365.25, unit = 'd') return pbias_site_df
[docs]def diff_maxflow_sites(observed: pd.DataFrame, predicted: pd.DataFrame): """Calculates difference in maximum flows on a hydrologic year and site-by-site basis. Parameters ---------- observed : pandas.DataFrame Dataframe containing all observations. predicted : pandas.DataFrame Dataframe containing all predictions. Returns ------- pandas.DataFrame DataFrame containing the difference in maximum flows. """ #finds the start of the first hydraulic year i = find_index_water_year(observed) water_year_start = observed.index[i] #counts the number of hydraylic years water_years = 0 while i < len(observed): date = observed.index[i] if date.month == 9 and date.day == 30: water_years = water_years + 1 i = i + 1 diff_maxflow_sites_df = pd.DataFrame(columns = observed.columns, index = pd.Series(range(0,water_years))) diff_maxflow_current_year = pd.DataFrame(columns = observed.columns) for i in range(0,water_years): #establish end of hydraulic year water_year_end = water_year_start + pd.Timedelta(364.25, unit = 'd') #need to truncate datetimes since time indicies do not align O = observed.loc[water_year_start : water_year_end] P = predicted.loc[water_year_start : water_year_end] O.index = O.index.floor('d') P.index = P.index.floor('d') diff_maxflow_current_year = P.max().to_frame().T - O.max().to_frame().T #puts the computations for the hydraulic year into our bigger dataframe diff_maxflow_sites_df.iloc[i] = diff_maxflow_current_year.iloc[0].T #set up next hydraulic year water_year_start = water_year_start + pd.Timedelta(365.25, unit = 'd') return diff_maxflow_sites_df
[docs]def pbias_plotter(observed: pd.DataFrame, names: list, colors: list, *models: pd.DataFrame): """Plots box plots of numerous models grouped by site. Parameters ---------- observed : pandas.Dataframe Dataframe containing observations. names : list List of the model names. colors: list List of colors to be plotted. *models : List[pandas.DataFrame] Any number of pandas.DataFrame objects to be evaluated. Returns ------- matplotlib.figure, matplotlib.axes """ num_models = len(models) sites = observed.columns pbias_models = list() #runs each model through pbias_sites for model in models: pbias_models.append(pbias_sites(observed, model)) fig = plt.figure() ax = plt.axes(xlabel = 'Sites', ylabel = 'Percent Bias') position = 1 for site in sites: df = pd.DataFrame(index = pbias_models[0].index) #fill out the dataframe for a single site with each model's percent bias i = 0 for model in pbias_models: entry = f"{site}:{names[i]}" df[entry] = model[site] i = i + 1 boxplots = plt.boxplot(df.T, positions = np.arange(position, position + num_models), patch_artist=True) for patch, color in zip(boxplots['boxes'], colors): patch.set_facecolor(color) position = position + num_models + 1 tick_location = list() start_tick = int(np.ceil(len(models) / 2)) tick_spacing = len(models) + 1 for j in range(0, len(sites)): tick_location.append(start_tick + j * tick_spacing) ax.set(xticks = tick_location, xticklabels = sites) plt.xticks(rotation = 90) ax.legend(handles=custom_legend(names, colors), loc='upper left') return fig, ax
[docs]def diff_maxflow_plotter(observed: pd.DataFrame, names: list, colors: list, *models: pd.DataFrame): """Plots box plots of numerous models grouped by site. Parameters ---------- observed : pandas.Dataframe a dataframe containing observations names : list List of the model names. colors: list List of colors to be plotted. *models : List[pandas.DataFrame] Any number of pandas.DataFrame objects to be evaluated. Returns ------- matplotlib.figure, matplotlib.axes """ num_models = len(models) sites = observed.columns diff_maxflow_models = list() #runs each model through pbSites for model in models: diff_maxflow_models.append(diff_maxflow_sites(observed, model)) fig = plt.figure() ax = plt.axes(xlabel = 'Sites', ylabel = 'Difference in Max Flow') position = 1 for site in sites: df = pd.DataFrame(index = diff_maxflow_models[0].index) #fill out the dataframe for a single site with each model's percent bias for i, model in enumerate(diff_maxflow_models): entry = f"{site}:{names[i]}" df[entry] = model[site] bp = plt.boxplot(df.T, positions = np.arange(position, position + num_models), patch_artist = True) for patch, color in zip(bp['boxes'], colors): patch.set_facecolor(color) position = position + num_models + 1 #follow plot style: https://stackoverflow.com/questions/16592222/matplotlib-group-boxplots tick_location = list() start_tick = int(np.ceil(len(models) / 2)) tick_spacing = len(models) + 1 for j in range(0, len(sites)): tick_location.append(start_tick + j * tick_spacing) ax.set(xticks = tick_location, xticklabels = sites) plt.xticks(rotation = 45) plt.title('Yearly Difference in Max Flow due to Correction') ax.legend(handles = custom_legend(names, colors), loc = 'upper left') return fig, ax
[docs]def site_diff_scatter(predictions: dict, raw_key: str, model_keys: list, compare: dict, compare_key: str, site: str, colors = colors99p99): """Creates a scatter plot of Raw-BC versus some measure. Parameters ---------- predictions : dict Expects {'Prediction Names' : Prediction pandas.DataFrame}. 'Prediction Names' will be printed in the legend. raw_key : str The key for the predictions dictionary that directs to the raw data that each model will be subtracting. model_keys : list A list of dictoionary keys pertaining to the correction models that are wanting to be plotted. compare : dict Expecting {'Measure name' : measure pandas.DataFrame}. These are what is being plotted against on the horizontal-axis. compare_key : str The dictionary key for the measure desired in the compare dictionary. 'compare_key' will be printed on the horizontal axis. site : str A single site designiation to be examined in the plot. This will be listed as the title of the plot. colors : List[str], optional Colors as strings to be plotted from. Returns ------- matplotlib.figure, matplotlib.axes """ #retreiving DataFrames and establishing data to be plotted raw = predictions[raw_key] raw = raw.loc[:, site] Y = list() for model_key in model_keys: predict = predictions[model_key] Y.append(raw - predict.loc[:, site]) X = compare[compare_key] X = X.loc[:, site] fig,ax = plt.subplots() for i,y in enumerate(Y): scatter_series_axes(X, y, model_keys[i], color = colors[i], alpha = 0.05, ax = ax) plt.xlabel(compare_key) plt.ylabel('Raw-BC') plt.title(site) plt.axhline(0) plt.legend(handles = custom_legend(model_keys, colors)) return fig, ax
[docs]def stat_corrections_scatter2D(computations: dict, baseline_key: str, cor_keys: list, uncor_key: str, sites = [], multi = True, colors = colors99p99): """Creates a scatter plot of the flow before/after corrections relative to observations. Parameters ---------- computations : dict Expecting {"Correction Name": correction pandas.DataFrame}. baseline_key : str Contains the dictionary key for the `computations` dictionary that accesses what baseline the corrections should be compared to. This is typically observations. cor_keys : list Dictionary keys accessing the correction DataFrames in `computations`. These will be printed in the legend. uncor_key : str The dictionary key that accesses the uncorrected data in `computations`. sites : list Site(s) to be compared in the plot, can have a size of 1. If `multi` is set to False and this is not changed to a single site, then the first value in the list will be chosen. multi : boolean, optional Determines whether the plot uses data from multiple sites or a single site, defaults as True. colors : List[str], optional Colors as strings to be plotted from. Plotting colors are different for each correction DataFrame, but same across sites for a singular correction. An error will be thrown if there are more `cor_keys` then `colors`. Returns ------- matplotlib.figure, matplotlib.axes """ #retreiving DataFrames and establishing data to be plotted datum = computations[baseline_key] X = datum - computations[uncor_key] #we need to make the values replacable by the sites, #hence making the max the min and the min the max xmax = X.min().min() xmin = X.max().max() ymax = 0 ymin = 100 #thrown in are Smax and Smin to determine what the max #and min values overall are so that the axi may be #appropriately scaled fig,ax = plt.subplots() for i, cor_key in enumerate(cor_keys): Y = datum - computations[cor_key] if multi == True: for site in sites: x = X.loc[:, site] y = Y.loc[:, site] xmax_site = x.max() xmin_site = x.min() ymax_site = y.max() ymin_site = y.min() if xmax_site > xmax: xmax = xmax_site if xmin_site < xmin: xmin = xmin_site if ymax_site > ymax: ymax = ymax_site if ymin_site < ymin: ymin = ymin_site scatter_series_axes(x, y, site, colors[i], 0.05, ax) else: #meaning site should be set to a singular value #double check that this was actually changed, otherwise picks first value site = sites if isinstance(sites, list): site = sites[0] x = X.loc[:, site] y = Y.loc[:, site] xmax_site = x.max() xmin_site = x.min() ymax_site = y.max() ymin_site = y.min() if xmax_site > xmax: xmax = xmax_site if xmin_site < xmin: xmin = xmin_site if ymax_site > ymax: ymax = ymax_site if ymin_site < ymin: ymin = ymin_site scatter_series_axes(x, y, site, colors[i], 0.05, ax) #Sets up labels based on whether one or more sites were plotted plt.xlabel(f'{baseline_key}-Uncorrected') plt.ylabel(f'{baseline_key}-Corrected') if multi==True: plt.title("Statistical Corrections") else: plt.title(f'Statistical Corrections: {sites}') minlin = xmin maxlin = xmax if ymin < minlin: minlin = ymin if ymax > maxlin: maxlin = ymax minlin = minlin * 0.9 maxlin = maxlin * 1.1 plt.plot([minlin, maxlin], [minlin, maxlin]) plt.legend(handles = custom_legend(cor_keys, colors)) return fig, ax
[docs]def anomaly_scatter2D(computations: dict, baseline_key: str, vert_key: str, horz_key: str, sites = [], multi = True, colors = colors99p99, show_legend = True): """Plots two correction models against each other after Raw is subracted from each. Parameters ---------- computations : dict Expecting {"Correction Name": correction pandas.DataFrame}. baseline_key : str Dictionary key for the `computations` dictionary that accesses what baseline the corrections should be compared to. This is typically observations. vert_key : str Dictionary key for the `computations` dictionary that accesses the model to be plotted on the vertical axis. horz_key : str Dictionary key for the `computations` dictionary that accesses the model to be plotted on the horizontal axis. sites : list Site(s) to be compared in the plot, can have a size of 1. If `multi` is set to False and this is not changed to a single site, then the first value in the list will be chosen. multi : boolean, optional Whether the plot uses data from multiple sites or a single site. colors : list, optional Colors as strings to be plotted from. Plotting colors are different for each correction DataFrame, but same across sites for a singular correction. An error will be thrown if there are more `cor_keys` then colors. show_legend : boolean, optional Whether or not to display the legend, defaults as True. """ #retreiving DataFrames and establishing data to be plotted datum = computations[baseline_key] X = datum - computations[horz_key] Y = datum - computations[vert_key] i = 0 fig, ax = plt.subplots() if multi == True: for site in sites: x = X.loc[:, site] y = Y.loc[:, site] scatter_series_axes(x, y, site, colors[i], 0.05, ax) i = i + 1 if i >= len(colors): #recycles colors if all exhausted i = 0 else: #meaning site should be set to a singular value #double check that this was actually changed, otherwise picks first value site = sites if type(sites) == list: site = sites[0] X = X.loc[:, site] Y = Y.loc[:, site] scatter_series_axes(X, Y, site, colors[i], 0.05, ax) plt.xlabel(f'{baseline_key}-{horz_key}') plt.ylabel(f'{baseline_key}-{vert_key}') plt.title("Statistical Correction Anomolies") plt.axhline(0) plt.axvline(0) if show_legend: plt.legend(handles = custom_legend(sites, colors))
[docs]def rmseFracPlot(data_dict: dict, obs_key: str, sim_keys: list, sites = [], multi = True, colors = colors99p99): """Root mean square values calculated by including descending values one-by-one. Parameters ---------- data_dict : dict Expecting {"Data Name": data pandas.DataFrame}. obs_key : str Dictionary key for the `computations` dictionary that accesses the observations to be used as true in calculating root mean squares. sim_keys : list Dictionary keys accessing the simulated DataFrames in `computations`, used in predictions in calculating root mean squares. sites : list Site(s) to be compared in the plot, can have a size of 1. If `multi` is set to False and this is not changed to a single site, then the first value in the list will be chosen. multi : boolean, optional Whether the plot uses data from multiple sites or a single site. colors : list, optional Colors as strings to be plotted from. Plotting colors are different for each correction DataFrame, but same across sites for a singular correction. An error will be thrown if there are more `sim_keys` then colors. """ #retrieving data and flooring time stamps observations = data_dict[obs_key] observations.index = observations.index.floor('d') color_num = 0 for sim_key in sim_keys: predictions = data_dict[sim_key] predictions.index = predictions.index.floor('d') N = len(predictions.index) rmse_tot = dst.rmse(observations, predictions) rmse_n = pd.DataFrame(index = np.arange(0, N)) errors = predictions - observations square_errors = errors.pow(2) if multi == True: #constructs a dataframe where each column is independently sorted for site in sites: square_errors_site = square_errors.loc[:, site].sort_values(ascending = False) rmse_n[site]=square_errors_site.values #performs cumulative mean calcualtion for site in sites: vals = rmse_n[site].values mat = np.vstack([vals,] * N).T nantri = np.tri(N) nantri[nantri == 0] = np.nan mat_mean = np.nanmean(mat * nantri, axis = 1) mat_rmse = np.power(mat_mean, 0.5) rmse_n[site] = np.divide(mat_rmse, rmse_tot[site].values) else: site = sites if type(sites) == list: site = sites[0] square_errors_site = square_errors.loc[:, site].sort_values(ascending = False) rmse_n[site] = square_errors_site.values vals = rmse_n[site].values mat = np.vstack([vals,] * N).T nantri = np.tri(N) nantri[nantri == 0] = np.nan mat_mean = np.nanmean(mat * nantri, axis=1) mat_rmse = np.power(mat_mean, 0.5) rmse_n[site] = np.divide(mat_rmse, rmse_tot[site].values) rmse_n.index = rmse_n.index / N plt.plot(rmse_n, color = colors[color_num], alpha = 0.5) color_num = color_num + 1 plt.xlabel('n/N') plt.xscale('log') plt.ylabel('RMSE_Cumulative/RMSE_total') plt.yscale('log') if multi == True: plt.title('RMSE Distribution in Descending Sort') else: plt.title(f'RMSE Distribution in Descending Sort: {site}') plt.axhline(1) plt.legend(handles = custom_legend(sim_keys , colors))
#***************************************************************************************** # SimpleRiverNetwork Plots and Related NetworkX Functions: # find_upstream # find_all_upstream # create_adj_mat # create_nxgraph # organize_nxgraph # color_code_nxgraph # draw_dataset #*****************************************************************************************
[docs]def find_upstream(topo: xr.Dataset, segID: int, return_segs: list = []): """Finds what river segment is directly upstream from the xarray.Dataset. Parameters ---------- topo : xarray.Dataset Contains river network topography. Expecting each river segment's immeditate downstream river segment is designated by 'Tosegment'/ segID : int Current river segment identification number. return_segs : list River segment identification numbers upstream from `segID`. This defaults as an empty list to be filled by the method. """ upsegs = np.argwhere((topo['Tosegment'] == segID).values).flatten() upsegIDs = topo['seg_id'][upsegs].values return_segs += list(upsegs)
[docs]def find_all_upstream(topo: xr.Dataset, segID: int, return_segs: list = []) -> np.ndarray: """Finds all upstream river segments for a given river segment from the xarray.Dataset. Parameters ---------- topo : xarray.Dataset segID : int return_segs : list Returns ------- numpy.ndarray """ upsegs = np.argwhere((topo['Tosegment'] == segID).values).flatten() upsegIDs = topo['seg_id'][upsegs].values return_segs += list(upsegs) for upsegID in upsegIDs: find_all_upstream(topo, upsegID, return_segs = return_segs) return np.unique(return_segs).flatten()
[docs]def create_adj_mat(topo: xr.Dataset) -> np.ndarray: """Forms the adjacency matrix for the graph of the topography. Note that this is independent of whatever the segments are called, it is a purely a map of the relative object locations. Parameters ---------- topo : xarray.Dataset Describes the topograph of the river network. Returns -------- numpy.ndarray An adjacency matrix describing the river network. """ #creates the empty adjacency matrix N = topo.dims['seg'] adj_mat = np.zeros(shape=(N, N), dtype = int) #builds adjacency matrix based on what segements are upstream i = 0 for ID in topo['seg_id'].values: adj = list() find_upstream(topo, ID, adj) for dex in adj: adj_mat[i, dex] += 1 i += 1 return adj_mat
[docs]def create_nxgraph(adj_mat: np.ndarray) -> nx.Graph: """Creates a NetworkX Graph object given an adjacency matrix. Parameters ---------- adj_mat : numpy.ndarray Adjacency matrix describing the river network. Returns ------- networkx.graph NetworkX Graph of respective nodes. """ topog = nx.from_numpy_matrix(adj_mat) return topog
[docs]def organize_nxgraph(topo: nx.Graph): """Orders the node positions into a hierarchical structure. Based on the "dot" layout and given topography. Parameters ---------- topo : xarray.Dataset Contains river segment identifications and relationships. Returns ------- networkx.positions """ pos = nx.drawing.nx_agraph.graphviz_layout(topo, prog = 'dot') return pos
[docs]def color_code_nxgraph(graph: nx.graph, measure: pd.Series, cmap=mpl.cm.get_cmap('coolwarm_r'), vmin=None, vmax=None)-> dict: """Creates a dictionary mapping of nodes to color values. Parameters ---------- graph : networkx.graph Graph to be color coded measure : pandas.Series Contains river segment ID's as the index and desired measures as values. cmap : matplotlib.colors.LinearSegmentedColormap, optional Colormap to be used for coloring the SimpleRiverNewtork plot. This defaults as 'coolwarm_r'. vmin: float, optional Minimum value for coloring vmax: float, optional Maximum value for coloring Returns ------- dict Dictionary of {i:color} where i is the index of the river segment. """ if np.where(measure < 0)[0].size == 0: # meaning we have only positive values and do not need to establish # zero at the center of the color bar segs = measure.index minimum = 0 #set to zero to preserve the coloring of the scale maximum = measure.max() color_vals = (measure.values) / (maximum) color_bar = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin = minimum, vmax = maximum)) color_dict = {f'{seg}' : mpl.colors.to_hex(cmap(i)) for i, seg in zip(color_vals, segs)} return color_dict, color_bar else: segs = measure.index #determine colorbar range if vmin is None and vmax is None: extreme = np.max(np.abs([np.min(measure), np.max(measure)])) vmin = -extreme vmax = extreme elif vmin is None: vmin = measure.min() elif vmax is None: vmax = measure.max() norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax) color_bar = plt.cm.ScalarMappable(cmap=cmap, norm=norm) color_dict = {f'{seg}': mpl.colors.to_hex(cmap(norm(i))) for i, seg in zip(measure.values, segs)} return color_dict, color_bar
[docs]def draw_dataset(topo: xr.Dataset, color_measure: pd.Series, cmap = mpl.cm.get_cmap('coolwarm_r')): """"Plots the river network through networkx. Draws a networkx graph from a topological xrarray.Dataset and color codes it based on a pandas.Series. Parameters ---------- topo : xarray.Dataset Contains river segment identifications and relationships. color_measure : pandas.Series Indicies are concurrent with the number of segs in topo. Typically this contains statistical information about the flows that will be color coded by least to greatest value. cmap : matplotlib.colors.LinearSegmentedColormap, optional Colormap to be used for coloring the SimpleRiverNewtork plot. This defaults as 'coolwarm_r'. """ topo_adj_mat = create_adj_mat(topo) topo_graph = create_nxgraph(topo_adj_mat) topo_positions = organize_nxgraph(topo_graph) topo_color_dict, topo_color_cbar = color_code_nxgraph(topo_graph, color_measure, cmap) topo_nodecolors = [topo_color_dict[f'{node}'] for node in topo_graph.nodes()] nx.draw_networkx(topo_graph, topo_positions, node_size = 200, font_size = 8, font_weight = 'bold', node_shape = 's', linewidths = 2, font_color = 'w', node_color = topo_nodecolors) plt.colorbar(topo_color_cbar)
#***************************************************************************************** # BMORPH Summary Statistics: # plot_reduced_flows # plot_spearman_rank_difference # correction_scatter # pbias_diff_hist # plot_residual_overlay # norm_change_annual_flow # pbias_compare_hist # compare_PDF # compare_CDF # spearman_diff_boxplots_annual # kl_divergence_annual_compare # spearman_diff_boxplots_annual_compare # compare_CDF_all # compare_mean_grouped_CPD #*****************************************************************************************
[docs]def plot_reduced_flows(flow_dataset: xr.Dataset, plot_sites: list, reduce_func=np.mean, interval = "day", statistic_label='Mean', units_label = r'$(m^3/s)$', title_label=f'Annual Mean Flows', raw_var = 'IRFroutedRunoff', raw_name = 'Mizuroute Raw', ref_var = 'upstream_ref_flow', ref_name = 'upstream_ref_flow', bc_vars = list(), bc_names = list(), fontsize_title = 24, fontsize_legend = 20, fontsize_subplot = 20, fontsize_tick = 20, fontcolor = 'black', figsize_width = 20, figsize_height = 12, plot_colors = ['grey', 'black', 'blue', 'red'], return_reduced_flows = False): """Creates a series of subplots plotting statistical day of year flows per gauge site. Parameters ---------- flow_dataset : xarray.Dataset Contatains raw, reference, and bias corrected flows. plot_sites : list Sites to be plotted. reduce_func : function, optional A function to apply to flows grouped by `interval`, defaults as np.mean. interval : str, optional What time interval annual `reduce_func` should be computed on. Currently supported is `day` for dayofyear (default), `week` for weekofyear, and `month` for monthly. statistic_label : str, optional Label for the statistic representing the `reduce_func`, defaults as 'Mean' to fit `reduce_func` as np.mean. units_label : str, optional Label for the units of flow, defaults as r`$(m^3/s)$`. title_label : str Lable for the figure title representing the reduce_func, defaults as f'Annual Mean Flows' to fit `reduce_func` as np.mean. raw_var : str, optional The string to access the raw flows in `flow_dataset`, defaults as 'IRFroutedRunoff'. raw_name : str, optional Label for the raw flows in the legend, defaults as 'Mizuroute Raw'. ref_var : str, optional The string to access the reference flows in `flow_dataset`, defaults as 'upstream_ref_flow'. ref_name : str, optional Label for the reference flows in the legend, defaults as 'upstream_ref_flow'. bc_vars : list The strings to access the bias corrected flows in `flow_dataset`. bc_names : list Labels for the bias corrected flows in the legend, expected in the same order as `bc_vars`. plot_colors : list, optional Colors to be plotted for `raw_var`, `ref_var`, `bc_vars` respectively. Defaults as ['grey', 'black', 'blue', 'red']. return_reduced_flows : boolean, optional If True, returns the reduced flows as calculated for plotting, defaults as False. This is typically used for debugging purposes. fontsize_title : int, optional Font size of the plot title, defaults as 80. fontsize_legend : int, optional Font size of the plot legend, defaults as 68. fontsize_subplot : int, optional Font size for the subplots, defaults as 60. fontsize_tick : int, optional Font size of the ticks, defaults 45. fontcolor : str, optional Color of the font, defaults as 'black' figsize_width : int, optional Width of the figure, defaults as 70. figusize_height : int, optional Height of the figure, defaults as 30. Returns ------- xarray.Dataset or (matplotlib.figure, matplotlib.axes) If `return_reduced_flows` is False, matplotlib.figure and matplotlib.axes, otherwise the reduced_flows are returned as the xarray.Dataset. """ if len(bc_vars) == 0: raise Exception("Please enter a non-zero number strings in bc_vars to be used") if len(bc_vars) != len(bc_names): raise Exception("Please have the same number of entries in bc_names as bc_names") if len(plot_colors) < 2 + len(bc_vars): raise Exception(f"Please enter at least {2 + len(bc_vars)} colors in plot_colors") interval = interval.lower() interval_name = "Day of Year" if interval == 'day': interval_name = "Day" raw_flow = flow_dataset[raw_var].groupby( flow_dataset['time'].dt.dayofyear).reduce(reduce_func) reference_flow = flow_dataset[ref_var].groupby( flow_dataset['time'].dt.dayofyear).reduce(reduce_func) bc_flows = list() for bc_var in bc_vars: bc_flows.append(flow_dataset[bc_var].groupby(flow_dataset['time'].dt.dayofyear).reduce(reduce_func)) time = raw_flow['dayofyear'].values elif interval == 'week': interval_name = "Week of Year" raw_flow = flow_dataset[raw_var].groupby( flow_dataset['time'].dt.isocalendar().week).reduce(reduce_func) reference_flow = flow_dataset[ref_var].groupby( flow_dataset['time'].dt.isocalendar().week).reduce(reduce_func) bc_flows = list() for bc_var in bc_vars: bc_flows.append(flow_dataset[bc_var].groupby(flow_dataset['time'].dt.isocalendar().week).reduce(reduce_func)) time = raw_flow['week'].values elif interval == 'month': interval_name = "Month" raw_flow = flow_dataset[raw_var].groupby( flow_dataset['time'].dt.month).reduce(reduce_func) reference_flow = flow_dataset[ref_var].groupby( flow_dataset['time'].dt.month).reduce(reduce_func) bc_flows = list() for bc_var in bc_vars: bc_flows.append(flow_dataset[bc_var].groupby(flow_dataset['time'].dt.month).reduce(reduce_func)) time = raw_flow['month'].values else: raise Exception("Please use 'day', 'week', or 'month' for interval.") outlet_names = flow_dataset['seg'].values raw_flow_df = pd.DataFrame(data = raw_flow.values, index=time, columns = outlet_names) reference_flow_df = pd.DataFrame(data = reference_flow.values, index=time, columns = outlet_names) bc_flow_dfs = list() for bc_flow in bc_flows: bc_flow_dfs.append(pd.DataFrame(data = bc_flow.values, index = time, columns = outlet_names)) plot_names = [raw_name, ref_name] plot_names.extend(bc_names) mpl.rcParams['figure.figsize'] = (figsize_width, figsize_height) n_rows, n_cols = determine_row_col(len(plot_sites)) fig, axs = plt.subplots(n_rows, n_cols) for site, ax in zip(plot_sites, axs.ravel()): ax.plot(raw_flow_df[site], color = plot_colors[0], alpha = 0.8, lw = 4) ax.plot(reference_flow_df[site], color = plot_colors[1], alpha = 0.8, lw = 4) for i, bc_flow_df in enumerate(bc_flow_dfs): ax.plot(bc_flow_df[site], color = plot_colors[2+i], lw = 4, alpha = 0.8) ax.set_title(site, fontsize = fontsize_subplot, color = fontcolor) plt.setp(ax.spines.values(), color = fontcolor) ax.tick_params(axis = 'both', colors = fontcolor, labelsize = fontsize_tick) if len(plot_sites) < n_rows * n_cols: for ax_index in np.arange(len(plot_sites), n_rows * n_cols): axs.ravel().tolist()[ax_index].axis('off') fig.text(0.4, -0.02, interval_name, fontsize = fontsize_title, ha = 'center') fig.text(-0.02, 0.5, f"{statistic_label} {interval_name} Flow {units_label}", fontsize = fontsize_title, va = 'center', rotation = 'vertical') plt.subplots_adjust(wspace = 0.1, hspace = 0.3, left = 0.05, right = 0.8, top = 0.95) fig.legend(plot_names, fontsize = fontsize_legend, loc = 'center right'); if return_reduced_flows: reduced_flows = xr.Dataset(coords={'site': plot_sites, 'time': time}) reduced_flows[raw_var] = xr.DataArray(data = raw_flow_df.values, dims = ('time', 'site') ).transpose() reduced_flows[ref_var] = xr.DataArray(data = reference_flow_df.values, dims = ('time', 'site') ).transpose() for bc_var, bc_flow_df in zip(bc_vars, bc_flow_dfs): reduced_flows[bc_var] = xr.DataArray(data = bc_flow_df.values, dims = ('time', 'site') ).transpose() return reduced_flows return fig, ax
[docs]def plot_spearman_rank_difference(flow_dataset:xr.Dataset, gauge_sites: list, start_year: str, end_year: str, relative_locations_triu: pd.DataFrame, basin_map_png, cmap = mpl.cm.get_cmap('coolwarm_r'), blank_plot_color = 'w', fontcolor = 'black', fontsize_title=60, fontsize_tick = 30, fontsize_label = 45): """Creates a site-to-site rank correlation difference comparison plot with a map of the basin. Parameters ---------- flow_dataset : xarray.Dataset Contains raw as 'raw_flow' and bias corrected as 'bias_corrected_total_flow' flow values, times of the flow values, and the names of the sites where those flow values exist gauge_sites : list Gauge sites to be plotted. start_year : str String formatted as 'yyyy-mm-dd' to start rank correlation window. end_year : str String formatted as 'yyyy-mm-dd' to end rank correlation window. relative_locations_triu : pandas.DataFrame Denotes which sites are connected with a '1' and has the lower triangle set to '0'. basin_map_png : png file The basin map with site values marked. cmap : matplotlib.colors.LinearSegmentedColormap, optional Colormap to be used for coloring the SimpleRiverNewtork plot. This defaults as 'coolwarm_r'. blank_plot_color : str, optional Color to set the lower extremes in `cmap` should be to keep extreme values from skewing the color map and hiding values. This is defaulted as 'w' for white. It should appear that this color matches the background plot color to make it appear as if no value is plotted here. font_color : str, optional Color of the font, defaulted as 'black'. fontsize_title : int, optional Font size of the title, defaults as 60. fontsize_tick : int, optional Font size of the ticks, defaults as 30. fontsize_label : int, optional Font size of the labels, defaults as 45. """ mpl.rcParams['figure.figsize'] = (40, 20) cmap.set_under(color = blank_plot_color) fig, ax = plt.subplots() if int(end_year[:4]) - int(start_year[:4]) == 1: fig.suptitle(f'WY{start_year[:4]}: 'r'$r_{s}(Q_{raw}) - r_{s}(Q_{bc})$', fontsize = fontsize_title, color = fontcolor, x = 0.6, y = 0.95) elif int(end_year[:4])-int(start_year[:4]) > 1: end_WY = int(end_year[:4])-1 fig.suptitle(f'WY{start_year[:4]} to WY{end_WY}: 'r'$r_{s}(Q_{raw}) - r_{s}(Q_{bc})$', fontsize = fontsize_title, color = fontcolor, x = 0.6, y = 0.95) else: raise Exception('Please check end_year is later than start_year') time_span = flow_dataset['time'].values outlet_names = flow_dataset['outlet'].values raw_flow = pd.DataFrame(data = np.transpose(flow_dataset['raw_flow'].values), index = time_span, columns = outlet_names) ref_flow = pd.DataFrame(data = flow_dataset['reference_flow'].values, index = time_span, columns = outlet_names) bc_flow = pd.DataFrame(data=np.transpose(flow_dataset['bias_corrected_total_flow'].values), index = time_span, columns = outlet_names) raw_flow_spearman = filter_rank_corr(raw_flow.loc[start_year : end_year].corr(method = 'spearman'), rel_loc = relative_locations_triu) bc_flow_spearman = filter_rank_corr(bc_flow.loc[start_year : end_year].corr(method = 'spearman'), rel_loc = relative_locations_triu) raw_minus_bc_spearman = raw_flow_spearman - bc_flow_spearman vmin = np.min(raw_minus_bc_spearman.min()) vmax = np.max(raw_minus_bc_spearman.max()) vextreme = vmax if np.abs(vmin) > vextreme: vextreme = np.abs(vmin) # -10 is used to ensure that the squares not to be plotted are marked as such by # the cmap's under values vunder = np.abs(vmin) * -10 im = ax.imshow(raw_minus_bc_spearman.fillna(vunder), vmin = -vextreme, vmax = vextreme, cmap = cmap) plt.setp(ax.spines.values(), color = fontcolor) ax.tick_params(axis='both', colors = fontcolor) tick_tot = len(gauge_sites) ax.set_xticks(np.arange(0, tick_tot, 1.0)) ax.set_yticks(np.arange(0, tick_tot, 1.0)) ax.set_ylim(tick_tot - 0.5, -0.5) ax.set_xticklabels(gauge_sites, rotation = 'vertical', fontsize = fontsize_label) ax.set_yticklabels(gauge_sites, fontsize = fontsize_tick); cb = fig.colorbar(im, pad = 0.01) cb.ax.yaxis.set_tick_params(color = fontcolor, labelcolor = fontcolor, labelsize = fontsize_tick) cb.outline.set_edgecolor(None) fig.text(0.6, -0.02, 'Site Abbreviation', fontsize = fontsize_label, ha = 'center') fig.text(0.32, 0.4, 'Site Abbreviation', fontsize = fontsize_label, va = 'center', rotation = 'vertical') newax = fig.add_axes([0.295, 0.4, 0.49, 0.49], anchor = 'NE', zorder = 2) newax.imshow(basin_map_png) newax.axis('off') newax.tick_params(axis = 'both') newax.set_xticks([]) newax.set_yticks([]) plt.tight_layout plt.show()
[docs]def correction_scatter(site_dict:dict, raw_flow:pd.DataFrame, ref_flow:pd.DataFrame, bc_flow:pd.DataFrame, colors:list, title='Flow Residuals', fontsize_title=80, fontsize_legend=68, fontsize_subplot=60, fontsize_tick=45, fontcolor='black', pos_cone_guide=False, neg_cone_guide=False): """Difference from reference flows before and after correction. Plots differences between the raw and reference flows on the horizontal and differences between the bias corrected and refrerence on the vertical. This compares corrections needed before and after the bias correction method is applied. Parameters ---------- site_dict : dict Expects {subgroup name: list of segments in subgroup} how sites are to be seperated. raw_flow : pandas.DataFrame Contains flows before correction. ref_flow : pandas.DataFrame Contains the reference flows to compare `raw_flow` and `bc_flow`. bc_flow : pandas.DataFrame Contains flows after correction. colors : list Colors to be plotted for each site in `site_dict`. title : str, optional Title label for the plot, defaults as 'Flow Residuals'. fontsize_title : int, optional Fontsize of the title, defaults as 80. fontsize_legend : int, optional Fontsize of the legend, defaults as 68. fontsize_subplot : int, optional Fontsize of the subplots, defaults as 60. fontsize_tick : int, optional Fontsize of the ticks, defaults as 45. fontcolor : str, optional Color of the font, defaults as 'black'. pos_cone_guide : boolean, optional If True, plots a postive 1:1 line through the origin for reference. neg_cone_guide : boolean, optional If True, plots a negative 1:1 line through the origin for reference. """ num_plots = len(site_dict.keys()) n_rows, n_cols = determine_row_col(num_plots) mpl.rcParams['figure.figsize']=(60,40) fig,axs = plt.subplots(nrows=n_rows, ncols=n_cols) plt.suptitle(title, fontsize= fontsize_title, color=fontcolor, y=1.05) ax_list = axs.ravel().tolist() before_bc = ref_flow-raw_flow after_bc = ref_flow-bc_flow for i, site_group_key in enumerate(site_dict.keys()): site_group = site_dict[site_group_key] group_before_bc = before_bc.loc[:, site_group] group_after_bc = after_bc.loc[:, site_group] scatter_series_axes(group_before_bc, group_after_bc, label=site_group_key, alpha=0.05, color=colors[i], ax=ax_list[i]) ax_list[i].set_title(site_group_key, fontsize=fontsize_subplot) plt.setp(ax_list[i].spines.values(), color=fontcolor) ax_list[i].tick_params(axis='both', colors=fontcolor, labelsize=fontsize_tick) # add horizontal axis at 0 line and hide plots that are not in use for i, ax in enumerate(axs.ravel()): if i < num_plots: bottom, top = ax.get_ylim() left, right = ax.get_xlim() ref_line_max = np.max([bottom, top, left, right]) ref_line_min = np.min([bottom, top, left, right]) ref_line_ext = ref_line_max if np.abs(ref_line_min) > ref_line_ext: ref_line_ext = np.abs(ref_line_min) ax.plot([-ref_line_ext, ref_line_ext], [0,0], color='k', linestyle='--') if pos_cone_guide and neg_cone_guide: ax.plot([-ref_line_ext, ref_line_ext], [-ref_line_ext, ref_line_max], color='k', linestyle='--') ax.plot([-ref_line_ext, ref_line_ext], [ref_line_ext, -ref_line_max], color='k', linestyle='--') elif pos_cone_guide: ax.plot([0, ref_line_ext], [0, ref_line_ext], color='k', linestyle='--') ax.plot([0, ref_line_ext], [0, -ref_line_ext], color='k', linestyle='--') elif neg_cone_guide: ax.plot([0, -ref_line_ext], [0, ref_line_ext], color='k', linestyle='--') ax.plot([0, -ref_line_ext], [0, -ref_line_ext], color='k', linestyle='--') else: ax.axis('off') fig.text(0.5, -0.04, r'$Q_{ref} - Q_{raw} \quad (m^3/s)$', ha='center', va = 'bottom', fontsize=fontsize_title, color=fontcolor); fig.text(-0.02, 0.5, r'$Q_{ref} - Q_{bc} \quad (m^3/s)$', va='center', rotation = 'vertical', fontsize=fontsize_title, color=fontcolor); plt.tight_layout() plt.show
[docs]def compare_correction_scatter(flow_dataset: xr.Dataset, plot_sites:list, raw_var= "raw", raw_name = "Mizuroute Raw", ref_var="ref", ref_name = "Reference", bc_vars = list(), bc_names = list(), plot_colors = ['blue', 'purple','orange','red'], title='Absolute Error in Flow'r'$(m^3/s)$', fontsize_title=80, fontsize_legend=68, alpha = 0.05, fontsize_subplot=60, fontsize_tick=45, fontcolor='black', pos_cone_guide=False, neg_cone_guide=False, symmetry=True): """Difference from reference flows before and after correction. Plots differences between the raw and reference flows on the horizontal and differences between the bias corrected and refrerence on the vertical. This compares corrections needed before and after the bias correction method is applied. Parameters ---------- flow_dataset : xarray.Dataset contains raw, reference, and bias corrected flows. plot_sites : list Sites to be plotted, expected as the `seg` coordinate in `flow_dataset`. raw_var : str, optional The string to access the raw flows in `flow_dataset`, defaults as `raw`. raw_name : str, optional Label for the raw flows in the legend, defaults as 'Mizuroute Raw'. ref_var : str, optional The string to access the reference flows in `ref`, defaults as 'upstream_ref_flow'. ref_name : str, optional Label for the reference flows in the legend, defaults as 'Reference'. bc_vars : list The strings to access the bias corrected flows in `flow_dataset`. bc_names : list Labels for the bias corrected flows in the legend, expected in the same order as `bc_vars`. plot_colors : list, optional Colors to be plotted for each site in `plot sites`. Defaults as ['blue', 'purple', 'orange, 'red']. fontsize_title : int, optional Fontsize of the title, defaults as 80. fontsize_legend : int, optional Fontsize of the legend, defaults as 68. fontsize_subplot : int, optional Fontsize of the subplots, defaults as 60. fontsize_tick : int, optional Fontsize of the ticks, defaults as 45. fontcolor : str, optional Color of the font, defaults as 'black'. pos_cone_guide : boolean, optional If True, plots a postive 1:1 line through the origin for reference. neg_cone_guide : boolean, optional If True, plots a negative 1:1 line through the origin for reference. symmetry : boolean, optional If True, the plot axis are symmetrical about the origin (default). If False, plotting limits will minimize empty space while not losing any data. """ if len(bc_vars) == 0: raise Exception("Please enter a non-zero number strings in bc_vars to be used") if len(bc_vars) != len(bc_names): raise Exception("Please have the same number of entries in bc_names as bc_names") if len(plot_colors) < len(bc_vars): raise Exception(f"Please enter at least {len(bc_vars)} colors in plot_colors") num_plots = len(plot_sites) n_rows, n_cols = determine_row_col(num_plots) mpl.rcParams['figure.figsize']=(60,40) fig,axs = plt.subplots(nrows=n_rows, ncols=n_cols) plt.suptitle(title, fontsize= fontsize_title, color=fontcolor, y=1.05) ax_list = axs.ravel().tolist() plot_flows = flow_dataset.sel(seg=plot_sites).copy() plot_flows["before_bc"] = plot_flows[ref_var]-plot_flows[raw_var] for bc_var in bc_vars: plot_flows[f"after_{bc_var}"] = plot_flows[ref_var]-plot_flows[bc_var] #we need to figure out which plots have the most spread to make certain we hide the fewest spread = list() for bc_var in bc_vars: spread.append(plot_flows[f"after_{bc_var}"].values.max()-plot_flows[f"after_{bc_var}"].values.min()) sorted_spread = np.flip(np.sort(spread.copy())) spread_ranks = [spread.index(val) for val in sorted_spread] for i, site in enumerate(plot_sites): before_bc = plot_flows["before_bc"].sel(seg=site).values for j in spread_ranks: after_bc = plot_flows[f"after_{bc_vars[j]}"].sel(seg=site).values scatter_series_axes(before_bc, after_bc, label=bc_names[j], alpha=alpha, color=plot_colors[j], ax=ax_list[i]) ax_list[i].set_title(site, fontsize=fontsize_subplot) plt.setp(ax_list[i].spines.values(), color=fontcolor) ax_list[i].tick_params(axis='both', colors=fontcolor, labelsize=fontsize_tick) # add horizontal axis at 0 line and hide plots that are not in use for i, ax in enumerate(axs.ravel()): if i == len(axs.ravel())-1: ax.axis('off') ax.legend(handles=custom_legend(names = bc_names, colors=plot_colors), fontsize = fontsize_legend, loc='center') if i < num_plots: bottom, top = ax.get_ylim() left, right = ax.get_xlim() ref_line_max = np.max([bottom, top, left, right]) ref_line_min = np.min([bottom, top, left, right]) ref_line_ext = ref_line_max if np.abs(ref_line_min) > ref_line_ext: ref_line_ext = np.abs(ref_line_min) ax.plot([-ref_line_ext, ref_line_ext], [0,0], color='k', linestyle='--') if pos_cone_guide and neg_cone_guide: ax.plot([-ref_line_ext, ref_line_ext], [-ref_line_ext, ref_line_max], color='k', linestyle='--') ax.plot([-ref_line_ext, ref_line_ext], [ref_line_ext, -ref_line_max], color='k', linestyle='--') elif pos_cone_guide: ax.plot([0, ref_line_ext], [0, ref_line_ext], color='k', linestyle='--') ax.plot([0, ref_line_ext], [0, -ref_line_ext], color='k', linestyle='--') elif neg_cone_guide: ax.plot([0, -ref_line_ext], [0, ref_line_ext], color='k', linestyle='--') ax.plot([0, -ref_line_ext], [0, -ref_line_ext], color='k', linestyle='--') if not symmetry: ax.set_xlim(left=left,right=right) ax.set_ylim(top=top,bottom=bottom) else: ax.axis('off') fig.text(0.5, -0.04, r'$Q_{ref} - Q_{raw} \quad (m^3/s)$', ha='center', va = 'bottom', fontsize=fontsize_title, color=fontcolor); fig.text(-0.02, 0.5, r'$Q_{ref} - Q_{bc} \quad (m^3/s)$', va='center', rotation = 'vertical', fontsize=fontsize_title, color=fontcolor); plt.tight_layout() plt.show
[docs]def pbias_diff_hist(sites:list, colors:list, raw_flow:pd.DataFrame, ref_flow:pd.DataFrame, bc_flow: pd.DataFrame, grouper=pd.Grouper(freq='M'), total_bins=None, title_freq='Monthly', fontsize_title=90, fontsize_subplot_title=60, fontsize_tick=40, fontsize_labels=84): """Histograms of differences in percent bias before/after bias correction. Creates a number of histogram subplots by each given site that plot the difference in percent bias before and after bias correction. Parameters ---------- sites : list Sites that are the columns of the flow DataFrames, `raw_flow`, `ref_flow`, and `bc_flow`. colors : list Colors to plot the sites with, (do not have to be different), that are used in the same order as the list of sites. raw_flow : pandas.DataFrame Contains flows before bias correction. ref_flow : pandas.DataFrame Contains reference flows for comparison as true values. bc_flow : pandas.DataFrame Contains flows after bias correction. grouper : pandas.TimeGrouper, optional How flows should be grouped for bias correction. This defaults as monthly. total_bins : int, optional Number of bins to use in the histogram plots. If none specified, defaults to the floored square root of the number of pbias difference values. title_freq : str, optional An adjective description of the frequency with which the flows are grouped, should align with `grouper`, although there is no check to verify this. This defaults as 'Monthly'. fontsize_title : int, optional Font size of the title. Defaults as 90. fontsize_subplot_title : int, optional Font size of the subplots. Defaults as 60. fontsize_tick : int, optional Font size of the ticks. Defaults as 40. fontsize_labels : int, optional Font size of the labels. Defaults as 84. """ if len(sites) != len(colors): raise Exception('Please enter the same number of colors as sites') mpl.rcParams['figure.figsize']=(60, 40) n_rows,n_cols = determine_row_col(len(sites)) bc_m_pbias = pbias_by_index( observe=ref_flow.groupby(grouper).sum(), predict=bc_flow.groupby(grouper).sum()) raw_m_pbias = pbias_by_index( observe=ref_flow.groupby(grouper).sum(), predict=raw_flow.groupby(grouper).sum()) bc_pbias_impact = bc_m_pbias-raw_m_pbias if type(total_bins) is type(None): total_bins=int(np.sqrt(len(bc_pbias_impact.index))) fig, axs = plt.subplots(n_rows, n_cols) axs_list = axs.ravel().tolist() plt.suptitle(f"Change in Percent Bias from Bias Correction on a {title_freq} Basis", fontsize=fontsize_title, y=1.05) i=0 for site in sites: ax = axs_list[i] site_bc_pbias_impact=bc_pbias_impact[site] impact_extreme = np.max(np.abs(site_bc_pbias_impact)) bin_step = (2*impact_extreme/total_bins) bin_range = np.arange(-impact_extreme, impact_extreme+bin_step, bin_step) site_bc_pbias_impact.plot.hist(ax=ax, bins=bin_range, color=colors[i]) ax.vlines(0, 0, ax.get_ylim()[1], color='k', linestyle='--', lw=4) ax.set_xlim(left=-150, right=150) ax.set_title(site,fontsize=fontsize_subplot_title) ax.set_ylabel("") ax.tick_params(axis='both', labelsize=fontsize_tick) i+=1 while i < len(axs_list): axs_list[i].axis('off') i+=1 fig.text(0.5, -0.04, r'$(PBias_{BC})-(PBias_{raw}) \quad (\%)$', ha='center', va = 'bottom', fontsize=fontsize_labels); fig.text(-0.02, 0.5, 'Frequencey', va='center', rotation = 'vertical', fontsize=fontsize_labels); plt.tight_layout()
[docs]def plot_residual_overlay(flows: pd.DataFrame, upstream_sites: list, downstream_site: str, start_year: int, end_year: int, ax=None, fontsize_title=40, fontsize_labels=60, fontsize_tick= 30, linecolor='k', alpha=0.3): """Plots flow upstream/downstream residuals overlayed across one year span. Plots residuals from each hydrologic year on top of each other with a refence line at zero flow. Residuals are calculated as downstream flows - sum(upstream flows). Parameters ---------- flows : pandas.DataFrame All flows to be used in plotting. upstream_sites : list Strings of the site names stored in `flows` to aggregate. downstream_sites : str Name of the downstream site stored in flows that will have the `upstream_sites` subtracted from it. start_year : int The starting year to plot. end_year : int The year to conclude on. ax : matplotlib.axes, optional Axes to plot on. If none is specified, a new one is created. fontsize_title : int, optional Font size of the title. fontsize_labels : int, optional Font size of the labels. fontsize_tick : int, optional Font size of the ticks. linecolor : str, optional Color of the lines plotted. Defaults as 'k' for black. alpha : float, optional Transparency of the lines plotted. Defaults as 0.3 to help see how residuals line up across many years. Returns ------- matplotlib.axes """ mpl.rcParams['figure.figsize']=(20,20) if type(ax) is type(None): fig,ax = plt.subplots() start_date='-10-01' end_date='-09-30' year = start_year upstream_site_string = "" residual_flow = pd.DataFrame(index=flows.index, columns=['Residuals']) residual_flow['Residuals'] = flows[downstream_site].values for upstream_site in upstream_sites: residual_flow['Residuals'] -= flows[upstream_site].values upstream_site_string += f'{upstream_site}, ' upstream_site_string = upstream_site_string[:len(upstream_site_string)-2] while year < end_year: values = residual_flow[f'{str(year)}{start_date}':f'{str(year+1)}{end_date}'].values doy = np.arange(1, len(values)+1, 1) ax.plot(doy, values, color=linecolor, alpha=alpha) year += 1 ax.plot([1,366],[0,0], color='k', linestyle='--', lw=4) ax.set_xlim(left=1, right=366) ax.set_ylabel(r"$Q_{downstream}-\sum{Q_{upstream}}$"" "r"$(m^3/s)$", fontsize=fontsize_labels); ax.set_xlabel("Day of Year", fontsize=fontsize_labels); ax.set_title(f"Upstream: {upstream_site_string} | Downstream: {downstream_site}", fontsize=fontsize_title, y=1.04); ax.tick_params(axis='both', labelsize=fontsize_tick) return ax
[docs]def norm_change_annual_flow(sites: list, before_bc: pd.DataFrame, after_bc: pd.DataFrame, colors = list, fontsize_title=60, fontsize_labels=40, fontsize_tick= 30): """Normalized change in annual flow volume. Plots a series of subplots containing bar charts that depict the differnece in normalized annual flow volume due to bias correction. Parameters ---------- sites : list String names of all the sites to be plotted, matching sites contained in the DataFrames `before_bc` and `after_bc`. before_bc : pandas.DataFrame Contains flows, (not aggregated), before bias correction is applied. after_bc : pandas.DataFrame Contains flows, (not aggregated), after bias correction is applied. colors : list Ccolors to be used for each site's subplot in the same order as sites, (does not have to be unique). fontsize_title : int, optional Font size of the title. Defaults as 60. fontsize_labels : int, optional Font size of the labels. Defaults as 40. fontsize_tick : int, optional Font size of the ticks. Defaults as 30. """ mpl.rcParams['figure.figsize']=(30,20) WY_grouper = calc_water_year(before_bc) after_bc_annual = after_bc.groupby(WY_grouper).sum() before_bc_annual = before_bc.groupby(WY_grouper).sum() after_bc_annual, before_bc_annual = dst.normalize_pair(data=after_bc_annual, norming_data=before_bc_annual) diff_annual = after_bc_annual - before_bc_annual max_mag = np.abs(np.max(diff_annual.max())) min_mag = np.abs(np.min(diff_annual.min())) extreme_mag = np.max([max_mag, min_mag]) n_rows, n_cols = determine_row_col(len(sites)) fig, axs = plt.subplots(n_rows, n_cols) plt.suptitle("Change in Annual Flow Volume from Bias Correction", fontsize=fontsize_title, y=1.05) axs_list = axs.ravel().tolist() i=0 for site in sites: ax = axs_list[i] ax.bar(diff_annual.index, diff_annual[site].values, color=color_list[i]) ax.set_title(site, fontsize=fontsize_tick) ax.set_ylim(top = extreme_mag, bottom=-extreme_mag) ax.tick_params(axis='both', labelsize=fontsize_tick) i += 1 while i < len(axs_list): axs_list[i].axis('off') i += 1 fig.text(0.5, -0.04, 'Hydrologic Year', ha='center', va = 'bottom', fontsize=fontsize_labels); fig.text(-0.04, 0.5, "Normalized Change in Annual Flow Volume", va='center', rotation = 'vertical', fontsize=fontsize_labels); plt.tight_layout()
[docs]def pbias_compare_hist(sites: list, raw_flow: pd.DataFrame, ref_flow: pd.DataFrame, bc_flow: pd.DataFrame, grouper=pd.Grouper(freq='Y'), total_bins=None, title_freq='Yearly', fontsize_title=90, fontsize_subplot_title=60, fontsize_tick=40,fontsize_labels=84, x_extreme=150): """Histograms comparing percent bias before/after bias correction. Creates a number of histogram subplots by each given sites that plot percent bias both before and after bias correction. Parameters ---------- sites : list Sites corresponding to the columns of the flow DataFrames, `raw_flow`, `ref_flow`, and `bc_flow`, to be plotted. raw_flow : pandas.DataFrame Flows before bias correction. ref_flow : pandas.DataFrame Reference flows for comparison as true values. bc_flow : pandas.DataFrame Flows after bias correction. grouper : pandas.TimeGrouper How flows should be grouped for bias correction, defaults as yearly. total_bins : int, optional Number of bins to use in the histogram plots. if none specified, defaults to the floored square root of the number of pbias difference values. title_freq : str, optional An adjective description of the frequency with which the flows are grouped corresponding to `grouper`. Defaults as 'Yearly'. fontsize_title : int, optional Font size of the title, defaulting as 90. fontsize_subplot_title : int, optional Font size of the subplot title, defaulting as 60. fontsize_ticler : int, optional Font size of the ticks, defaulting as 40. fontsize_labels : int, optional Font size of the labels, defaulting as 84. x_extreme : float, optional Greatest magnitude on the horizontal axis to specify the range, defaulting as 150, which results in a range of (-150, 150). This is useful if desiring to zoom in closer to the origin and exclued outlying percent biases. """ mpl.rcParams['figure.figsize']=(60,40) n_rows,n_cols = determine_row_col(len(sites)) bc_m_pbias = dst.pbias_by_index( observe=ref_flow.groupby(grouper).sum(), predict=bc_flow.groupby(grouper).sum()) raw_m_pbias = dst.pbias_by_index( observe=ref_flow.groupby(grouper).sum(), predict=raw_flow.groupby(grouper).sum()) if type(total_bins)==type(None): total_bins=int(np.sqrt(len(bc_m_pbias.index))) fig, axs = plt.subplots(n_rows,n_cols) axs_list = axs.ravel().tolist() plt.suptitle(f"Change in Percent Bias from Bias Correction on a {title_freq} Basis", fontsize=fontsize_title,y=1.05) i=0 for site in sites: ax = axs_list[i] before_pbias=raw_m_pbias[site] after_pbias=bc_m_pbias[site] before_extreme = np.max(np.abs(before_pbias)) after_extreme = np.max(np.abs(after_pbias)) extreme=np.max([before_extreme,after_extreme]) bin_step = (2*extreme/total_bins) bin_range = np.arange(-extreme,extreme+bin_step,bin_step) before_pbias.plot.hist(ax=ax,bins=bin_range,color='red',edgecolor='black',alpha=0.5) after_pbias.plot.hist(ax=ax,bins=bin_range,color='blue',edgecolor='black',alpha=0.5) ax.vlines(0,0,ax.get_ylim()[1], color='k',linestyle='--',lw=4) x_extreme = np.abs(x_extreme) ax.set_xlim(left=-x_extreme,right=x_extreme) ax.set_title(site,fontsize=fontsize_subplot_title) ax.set_ylabel("") ax.tick_params(axis='both',labelsize=fontsize_tick) i+=1 while i < len(axs_list): axs_list[i].axis('off') i+=1 fig.text(0.5, -0.04, r'$PBias_{monthly} \quad (\%)$', ha='center', va = 'bottom', fontsize=fontsize_labels); fig.text(-0.02, 0.5, 'Frequencey', va='center', rotation = 'vertical', fontsize=fontsize_labels); plt.legend(handles=custom_legend(['Before BC','After BC', 'Overlap'],['red','blue','purple']), loc='lower right',fontsize=fontsize_labels) plt.tight_layout()
[docs]def compare_PDF(flow_dataset:xr.Dataset, gauge_sites = list, raw_var='raw_flow', ref_var='reference_flow', bc_var='bias_corrected_total_flow', raw_name='Mizuroute Raw', ref_name='NRNI Reference', bc_name='BMORPH BC', fontsize_title=40, fontsize_labels=30, fontsize_tick= 20): """Compare probability distribution functions. Plots the PDF's of the raw, reference, and bias corrected flows for each gauge site. Parameters ---------- flow_dataset : xarray.Dataset Contatains raw, reference, and bias corrected flows. gauges_sites : list Gauge sites to be plotted as used in the `flow_dataset`. raw_var : str, optional The string to access the raw flows in `flow_dataset` for flows before bias correction. Defaults as 'raw_flow'. ref_var : str, optional The string to access the reference flows in `flow_dataset` for true flows. Defaults as 'reference_flow'. bc_var : str, optional The string to access the bias corrected flows in flow_dataset for flows after bias correction. Defaults as 'bias_corrected_total_flow'. raw_name : str, optional Label for the raw flows before bias correction. Defaults as 'Mizuroute Raw'. ref_name : str, optional Label for the reference flows. Defaults as 'NRNI Reference'. bc_name : str, optional Label for the bias corrected flows after bias correction. Defaults as 'BMORPH BC'. fontsize_title : int, optional Fontsize of the title. Defaults as 40. fontsize_labels : int, optional Fontsize of the lables. Defaults as 30. fontsize_tick : int, optional Fontsize of the ticks. Defaults as 20. """ n_rows, n_cols = determine_row_col(len(gauge_sites)) fig, axes = plt.subplots(n_rows, n_cols, figsize=(20, 20), sharex=False, sharey=False) axes = axes.flatten() fig.suptitle("Probability Distribution Functions", y=1.01,x=0.4, fontsize=fontsize_title) for i, site in enumerate(gauge_sites): cmp = flow_dataset.sel(outlet=site) ax=axes[i] sns.kdeplot(np.log10(cmp[raw_var].values), ax=ax, color='grey', legend=False, label=raw_name) sns.kdeplot(np.log10(cmp[ref_var].values), ax=ax, color='black', legend=False, label=ref_name) sns.kdeplot(np.log10(cmp[bc_var].values), ax=ax, color='red', legend=False, label=bc_name) ax.set_title(site, fontsize=fontsize_labels) ax.tick_params(axis='both', labelsize=fontsize_tick) # relabel axis in standard base 10 labels = ax.get_xticks() ax.set_xticklabels(labels=["$" + str(10**j) + "$" for j in labels], rotation=30) axes[-1].axis('off') axes[i].legend(bbox_to_anchor=(1.1, 0.8), fontsize=fontsize_tick) fig.text(0.4, 0.04, r'Q [$m^3/s$]', ha='center', fontsize=fontsize_labels) fig.text(-0.04, 0.5, r'Density', va='center', rotation='vertical', fontsize=fontsize_labels) plt.subplots_adjust(wspace=0.3, hspace= 0.45, left = 0.05, right = 0.8, top = 0.95)
[docs]def compare_CDF(flow_dataset: xr.Dataset, plot_sites: list, raw_var: str, raw_name: str, ref_var: str, ref_name: str, bc_vars: list, bc_names: list, plot_colors: list, logit_scale = True, logarithm_base = '10', units = r'Q [$m^3/s$]', markers = ['o', 'x', '*', '*'], figsize = (20,20), sharex = False, sharey = True, fontsize_title = 40, fontsize_labels = 30, fontsize_tick = 20, markersize = 1, alpha = 0.3): """Compare probability distribution functions on a logit scale. Plots the CDF's of the raw, reference, and bias corrected flows. Parameters ---------- flow_dataset : xarray.Dataset Contatains raw, reference, and bias corrected flows. gauges_sites : list Gauge sites to be plotted contained in `flow_dataset`. raw_var : str Accesses the raw flows in `flow_dataset` for flows before bias correction. raw_name : str Label for the raw flows in the legend, corresponding to `raw_var`. ref_var : str Accesses the reference flows in `flow_dataset` for true flows. ref_name : str Label for the reference flows in the legend, corresponding to `ref_var`. bc_vars : list Accesses the bias corrected flows in `flow_dataset`, where each element accesses its own bias corrected flows. Can be a size of 1. bc_names : list Label for the bias corrected flows in the legend for each entry in `bc_var`, assumed to be in the same order. Can be a size of 1. logit_scale : boolean, optional Whether to plot the vertical scale on a logit axis (True) or not (False). Defaults as True. logarithm_base : str, optional The logarthimic base to use for the horizontal scale. Only the following are currently supported: '10' to use a log10 horizontal scale (default) 'e' to use a natural log horizontal scale units : str, optional The horizontal axis's label for units, defaults as r'Q [$m^3/s$]'. plot_colors : list, optional Colors to be plotted for the flows corresponding to `raw_var`, `ref_var`, and `bc_var`, defaulting as ['grey', 'black', 'blue', 'red'], assuming there are two entries in `bc_var`. markers : list, optional Markers to be plotted for the flows corresponding to `raw_var`, `ref_var`, and `bc_var`, defaulting as ['o', 'x', '*', '*'], assuming there are two entries in `bc_var`. figsize : tuple, optional Figure size following matplotlib notation, defaults as (20, 20). sharex : boolean or str, optional Whether horizontal axis should be shared amongst subplots, defaulting as False. sharey : boolean or str, optional Whether vertical axis should be shared amongst subplots, defaulting as True. fontsize_title : int, optional Font size of the title, defaults as 40. fontsize_labels : int, optional Font size of the labels, defaults as 30. fontsize_tick : int, optional Font size of the ticks, defaults as 20. markersize : float, optional Size of the markers plotted, defaults as 1. alpha : float, optional Transparancy of the markers plotted, defaults as 0.3 to help see where markers clump together. Returns ------- matplotlib.figure, matplotlib.axes """ if len(bc_vars) == 0: raise Exception("Please enter a non-zero number strings in bc_vars to be used") if len(bc_vars) != len(bc_names): raise Exception("Please have the same number of entries in bc_names as bc_names") if len(plot_colors) < len(bc_vars): raise Exception(f"Please enter at least {len(bc_vars)} colors in plot_colors") if logarithm_base == '10': log_func = log10_1p elif logarithm_base == 'e': log_func = np.log1p else: raise Exception("Please enter logarithm_base as '10' or 'e'") n_rows, n_cols = determine_row_col(len(plot_sites)) fig, axes = plt.subplots(n_rows, n_cols, figsize = figsize, sharex = sharex, sharey = sharey) axes = axes.flatten() fig.suptitle("Cumulative Distribution Functions", y = 1.01, x = 0.4, fontsize = fontsize_title) for i, site in enumerate(plot_sites): ax = axes[i] cmp = flow_dataset.sel(seg = site) raw = ECDF(log_func(cmp[raw_var].values)) ref = ECDF(log_func(cmp[ref_var].values)) cors = list() for bc_var in bc_vars: cors.append(ECDF(log_func(cmp[bc_var].values))) linewidth = markersize / 2 ax.plot(raw.x, raw.y, color = plot_colors[0], label = raw_name, lw = linewidth, linestyle = '--', marker = 'o', markersize = markersize, alpha = alpha) ax.plot(ref.x, ref.y, color = plot_colors[1], label = ref_name, lw = linewidth, linestyle = '--', marker = 'x', markersize = markersize, alpha = alpha) for j, cor in enumerate(cors): ax.plot(cor.x, cor.y, color = plot_colors[2+j], label = bc_names[j], lw = linewidth, linestyle = '--', marker = '*', markersize = markersize, alpha = alpha) ax.set_title(site, fontsize = fontsize_labels) ax.tick_params(axis = 'both', labelsize = fontsize_tick) # relabel axis to account for log_func xlabels = ax.get_xticks() if logarithm_base == '10': ax.set_xticklabels(labels = ["$" + "{:0.0f}".format(10**k-1) + "$" for k in xlabels], rotation = 30) elif logarithm_base == 'e': ax.set_xticklabels(labels = ["$" + "{:0.2f}".format(exp(k)-1) + "$" for k in xlabels], rotation=30) if logit_scale: ax.set_yscale('logit') ax.minorticks_off() ax.yaxis.set_major_formatter(mpl.ticker.LogitFormatter()) ylabels = ax.get_yticks() new_ylabels = list() for k, ylabel in enumerate(ylabels): if k % 3 == 0: new_ylabels.append(ylabel) ax.set_yticks(ticks = new_ylabels) axes[-1].axis('off') axes[i].legend(bbox_to_anchor = (1.1, 0.8), fontsize = fontsize_tick) fig.text(0.4, 0.04, units, ha = 'center', fontsize = fontsize_labels) fig.text(-0.04, 0.5, r'Non-exceedence probability', va = 'center', rotation = 'vertical', fontsize = fontsize_labels) plt.subplots_adjust(hspace = 0.35, left = 0.05, right = 0.8, top = 0.95) return fig, axes
[docs]def spearman_diff_boxplots_annual(raw_flows: pd.DataFrame, bc_flows: pd.DataFrame, site_pairings, fontsize_title=40, fontsize_tick=30, fontsize_labels=40, subtitle = None, median_plot_color = 'red'): """Annual difference in spearman rank as boxplots. Creates box plots for each stide pairing determing the difference in spearman rank for each year between the raw and bias corrected data. Parameters ---------- raw_flows : pandas.DataFrame Raw flows before bias correction with sites in the columns and time in the index. bc_flows : pandas.DataFrame Bias corrected flows with sites in the columns and time in the index. site_pairings : List[List[str]] List of list of string site pairs e.g. [['downstream_name','upstream_name'],...]. This is used to organize which sites should be paired together for computing the spearman rank difference. fontsize_title : int, optional Font size of the title, defaults as 40. fontsize_tick : int, optional Font size of the ticks, defaults as 30. fontsize_labels : int, optional Font size of the labels, defaults as 40. subtitle : str, optional Subtitle to include after "Annual Chnage in Speraman Rank: ". If no subtitle is is specified, none is included and only the title is plotted. median_plot_color : str Color to plot the boxplot's median as, defaults as 'red'. """ if np.where(raw_flows.index != bc_flows.index)[0].size != 0: raise Exception('Please ensure raw_flows and bc_flows have the same index') WY_grouper = calc_water_year(raw_flows) annual_spearman_difference = pd.DataFrame(index=np.arange(WY_grouper[0], WY_grouper[-1],1), columns=[str(pairing) for pairing in site_pairings]) for WY in annual_spearman_difference.index: raw_flow_WY = raw_flows[f"{WY}-10-01":f"{WY+1}-09-30"] bc_flow_WY = bc_flows[f"{WY}-10-01":f"{WY+1}-09-30"] for site_pairing in site_pairings: downstream = site_pairing[0] upstream = site_pairing[1] downstream_raw = raw_flow_WY[downstream] upstream_raw = raw_flow_WY[upstream] downstream_bc = bc_flow_WY[downstream] upstream_bc = bc_flow_WY[upstream] raw_spearman, raw_pvalue = scipy.stats.spearmanr(a=downstream_raw.values,b=upstream_raw.values) bc_spearman, bc_pvalue = scipy.stats.spearmanr(a=downstream_bc.values,b=upstream_bc.values) annual_spearman_difference.loc[WY][str(site_pairing)] = raw_spearman-bc_spearman fig, ax = plt.subplots(figsize=(20,20)) if isinstance(subtitle, type(None)): plt.suptitle('Annual Change in Spearman Rank', fontsize=fontsize_title, y=1.05) else: assert isinstance(subtitle, str) plt.suptitle(f'Annual Change in Spearman Rank: {subtitle}', fontsize = fontsize_title, y = 1.05) max_vert = np.max(annual_spearman_difference.max().values)*1.1 min_vert = np.min(annual_spearman_difference.min().values)*1.1 ax.axhline(y=0, color='black', linestyle='--') ax.boxplot([annual_spearman_difference[site_pairing].values for site_pairing in annual_spearman_difference.columns], labels=[f'{site_pairing[0]},\n{site_pairing[1]}' for site_pairing in site_pairings], medianprops={'color':median_plot_color, 'lw':4}, boxprops={'lw':4}, whiskerprops={'lw':4}, capprops={'lw':4}) ax.set_ylim(top=max_vert, bottom=min_vert) ax.tick_params(axis='both', labelsize=fontsize_tick) fig.text(0.5, -0.04, "Gauge Site Pairs: Downstream, Upstream", ha='center', va = 'bottom', fontsize=fontsize_labels); fig.text(-0.04, 0.5, r'$r_s(Q_{raw}^{up}, Q_{raw}^{down}) - r_s(Q_{bc}^{up}, Q_{bc}^{down})$', va='center', rotation = 'vertical', fontsize=fontsize_labels); plt.tight_layout()
[docs]def kl_divergence_annual_compare(flow_dataset: xr.Dataset, sites: list, raw_var: str, raw_name: str, ref_var: str, ref_name: str, bc_vars: list, bc_names: list, plot_colors: list, title = "Annual KL Diveregence Before/After Bias Correction", fontsize_title = 40, fontsize_tick = 30, fontsize_labels = 40, fontsize_legend = 30, showfliers = False, sharex = True, sharey = 'row', TINY_VAL = 1e-6, figsize = (30,20), show_y_grid = True): """Kullback-Liebler Divergence compared before and after bias correction as boxplots. Plots the KL divergence for each year per site as KL(P_{ref} || P_{raw}) and KL( P_{ref} || P_{bc}). Parameters ---------- flow_dataset : xarray.Dataset Contains raw (uncorrected), reference (true), and bias corrected flows. sites : list Contains all the sites to be plotted as included in `flow_dataset`, (note that if the number of sites to be plotted is square or rectangular, the last site will not be plotted to save room for the legend). raw_var : str Accesses the raw flows in `flow_dataset`. raw_name : str Label for the raw flows in the legend and horizontal labels corresponding to `raw_var`. ref_var : str Accesses the reference flows in `flow_dataset`. ref_name : str Label the reference flows in the legend and horizontal labels corresponding to `ref_var`. bc_vars : list String(s) to access the bias corrected flows in `flow_dataset`. bc_names : list Label(s) for the bias corrected flows in the legend and horizontal labels, corresponding to each element in `bc_vars`. plot_colors : list Colors to be plotted for the raw and the bias corrected flows, respectively. title : str, optional Title to be plotted, defaults as "Annual KL Diveregence Before/After Bias Correction". fontsize_title : int, optional Font size of the title, defaults as 40. fontsize_tick : int, optional Font size of the ticks, defaults as 30. fontsize_labels : int, optional Font size of the labels, defaults as 40. fontsize_legend : int, optional Font size of the legend text, defaults as 30. showfliers : boolean, optional Whether to include fliers in the boxplots, defaults as False. sharex : boolean or str, optional Whether the horizontal axis is shared, defaults as True. sharey : boolean or str, optional Whether the vertical axis is shared, defaults as 'row' to share the vertical axis in the same row. TINY_VAL : float, optional Used to ensure there are no zero values in the data because zero values cause unsual behavior in calculating the KL Divergence. Defaults as 1E-6. figsize : tuple, optional Figure size following maptlotlib connventions, defaults as (30,20). show_y_grid : boolean, optional Whether to plot y grid lines, defaults as True. Returns ------- maptlotlib.figure, matplotlib.axes """ raw_flows = flow_dataset[raw_var].to_pandas() ref_flows = flow_dataset[ref_var].to_pandas() bc_flows = list() for bc_var in bc_vars: bc_flows.append(flow_dataset[bc_var].to_pandas()) WY_grouper = calc_water_year(raw_flows) WY_array = np.arange(WY_grouper[0], WY_grouper[-1], 1) n_rows, n_cols = determine_row_col(len(sites)) fig, axs = plt.subplots(n_rows, n_cols, figsize = figsize, sharex = sharex, sharey = sharey) axs_list = axs.ravel().tolist() kldiv_refraw_annual = pd.DataFrame(index = WY_array, columns = sites) kldiv_refbc_annuals = list() for bc_var in bc_vars: kldiv_refbc_annuals.append(pd.DataFrame(index = WY_array, columns = sites)) plt.suptitle(title, fontsize = fontsize_title, y = 1.05) for WY in WY_array: raw_flow_WY = raw_flows[f"{WY}-10-01":f"{WY+1}-09-30"] ref_flow_WY = ref_flows[f"{WY}-10-01":f"{WY+1}-09-30"] bc_flow_WYs = list() for bc_flow in bc_flows: bc_flow_WYs.append(bc_flow[f"{WY}-10-01":f"{WY+1}-09-30"]) total_bins = int(np.sqrt(len(raw_flow_WY.index))) for site in sites: ref_WY_site_vals = ref_flow_WY[site].values raw_WY_site_vals = raw_flow_WY[site].values bc_WY_site_vals = list() for bc_flow_WY in bc_flow_WYs: bc_WY_site_vals.append(bc_flow_WY[site].values) ref_WY_site_pdf, ref_WY_site_edges = np.histogram(ref_WY_site_vals, bins = total_bins, density = True) raw_WY_site_pdf = np.histogram(raw_WY_site_vals, bins = ref_WY_site_edges, density = True)[0] bc_WY_site_pdfs = list() for bc_WY_site_val in bc_WY_site_vals: bc_WY_site_pdf = np.histogram(bc_WY_site_val, bins = ref_WY_site_edges, density = True)[0] bc_WY_site_pdf[bc_WY_site_pdf == 0] = TINY_VAL bc_WY_site_pdfs.append(bc_WY_site_pdf) ref_WY_site_pdf[ref_WY_site_pdf == 0] = TINY_VAL raw_WY_site_pdf[raw_WY_site_pdf == 0] = TINY_VAL kldiv_refraw_annual.loc[WY][site] = scipy.stats.entropy(pk = raw_WY_site_pdf, qk = ref_WY_site_pdf) for i, (kldiv_refbc_annual, bc_WY_site_pdf) in enumerate(zip(kldiv_refbc_annuals, bc_WY_site_pdfs)): kldiv_refbc_annual.loc[WY][site] = scipy.stats.entropy(pk = bc_WY_site_pdf, qk = ref_WY_site_pdf) kldiv_refbc_annuals[i] = kldiv_refbc_annual plot_labels = [raw_name] plot_labels.extend(bc_names) for i, site in enumerate(sites): ax=axs_list[i] plot_vals = [kldiv_refraw_annual[site].values] plot_vals.extend([kldiv_refbc_annual[site].values for kldiv_refbc_annual in kldiv_refbc_annuals]) box_dict = ax.boxplot(plot_vals, patch_artist = True, showfliers = showfliers, widths = 0.8, notch = True) for item in ['boxes', 'fliers', 'medians', 'means']: for sub_item, color in zip(box_dict[item], plot_colors): plt.setp(sub_item, color = color) ax.set_title(site, fontsize = fontsize_labels) ax.set_xticks(np.arange(1, len(plot_labels)+1)) ax.set_xticklabels(plot_labels, fontsize = fontsize_tick, rotation = 45) ax.tick_params(axis = 'both', labelsize = fontsize_tick) if show_y_grid: ax.grid(which = 'major', axis = 'y', alpha = 0.5) # gets rid of any spare axes i += 1 while i < len(axs_list): axs_list[i].axis('off') i += 1 # ensures last axes is off to make room for the legend axs_list[-1].axis('off') fig.text(-0.04, 0.5, "Annual KL Divergence", va = 'center', rotation = 'vertical', fontsize = fontsize_labels); fig.text(0.5, -0.04, r'$KL(P_{' + f'{ref_name}' + r'} || P_{scenario})$', va = 'bottom', ha = 'center', fontsize = fontsize_labels); axs_list[-1].legend(handles=custom_legend(names=plot_labels, colors = plot_colors), fontsize = fontsize_legend, loc = 'center') plt.tight_layout() return fig, axs
[docs]def spearman_diff_boxplots_annual_compare(flow_dataset: xr.Dataset, site_pairings, raw_var: str, bc_vars: list, bc_names: list, plot_colors: list, showfliers = True, fontsize_title=40, fontsize_tick=25, fontsize_labels=30, figsize = (20,20), sharey = 'row'): """Annual difference in spearman rank as boxplots. Creates box plots for each site pairing determining the difference in spearman rank for each year between the raw and the bias corrected data. Parameters ---------- flow_dataset : xarray.Dataset Contains raw (uncorrected), reference (true), and bias corrected flows. site_pairings : List[List[str]] List of list of string site pairs e.g. [['downstream_name','upstream_name'],...]. This is used to organize which sites should be paired together for computing the spearman rank difference. raw_var : str Accesses the raw (uncorrected) flows in `flow_dataset`. bc_vars : list Strings to access the bias corrected flows in `flow_dataset`. bc_names : list Labels for the bias corrected flows from `flow_dataset`, corresponding to each element in `bc_vars` in the same order. plot_colors : list Colors that are in the same order as the `bc_vars` and `bc_names` to be used in plotting. showfliers : boolean, optional Whether to show fliers on the boxplots, defaults as True. fontsize_title : int, optional Font size of the title, defaults as 40. fontsize_tick : int, optional Font size of the ticks, defaults as 25. fontsize_lables : int, optional Font size of the labels, defaults as 30. figsize : tuple Figure size following matplotlib connventions, defaults as (20, 20). sharey : boolean or str, optional Whether or how the vertical axis are to be shared, defaults as 'row' to have vertical axis in the same row shared. Returns ------- matplotlib.figure, matplotlib.axes """ if len(bc_vars) == 0: raise Exception("Please enter a non-zero number strings in bc_vars to be used") if len(bc_vars) != len(bc_names): raise Exception("Please have the same number of entries in bc_names as bc_names") if len(plot_colors) < len(bc_vars): raise Exception(f"Please enter at least {len(bc_vars)} colors in plot_colors") raw_flows = flow_dataset[raw_var].to_pandas() bc_flows = list() for bc_var in bc_vars: bc_flows.append(flow_dataset[bc_var].to_pandas()) WY_grouper = calc_water_year(raw_flows) WY_index = np.arange(WY_grouper[0], WY_grouper[-1], 1) annual_spearman_differences = list() for bc_var in bc_vars: annual_spearman_differences.append(pd.DataFrame(index=WY_index, columns=[str(pairing) for pairing in site_pairings])) for WY in WY_index: raw_flow_WY = raw_flows[f"{WY}-10-01":f"{WY+1}-09-30"] bc_flow_WY_list= list() for bc_flow in bc_flows: bc_flow_WY_list.append(bc_flow[f"{WY}-10-01":f"{WY+1}-09-30"]) for site_pairing in site_pairings: downstream = site_pairing[0] upstream = site_pairing[1] downstream_raw = raw_flow_WY[downstream] upstream_raw = raw_flow_WY[upstream] downstream_bcs = list() upstream_bcs = list() for bc_flow_WY in bc_flow_WY_list: downstream_bcs.append(bc_flow_WY[downstream]) upstream_bcs.append(bc_flow_WY[upstream]) raw_spearman, raw_pvalue = scipy.stats.spearmanr(a=downstream_raw.values, b=upstream_raw.values) for i, (downstream_bc, upstream_bc, annual_spearman_difference) in enumerate( zip(downstream_bcs, upstream_bcs, annual_spearman_differences)): bc_spearman, bc_pvalue = scipy.stats.spearmanr(a=downstream_bc.values, b=upstream_bc.values) annual_spearman_difference.loc[WY][str(site_pairing)] = raw_spearman-bc_spearman annual_spearman_differences[i] = annual_spearman_difference n_rows, n_cols = determine_row_col(len(site_pairings)) fig, axs = plt.subplots(n_rows, n_cols, figsize=figsize, sharex = True, sharey = sharey) axs_list = axs.ravel().tolist() #rewrites wrt subplots for site_pairings fig.suptitle('Annual Change in Spearman Rank', fontsize=fontsize_title, y=1.05) max_vert = np.max([df.values for df in annual_spearman_differences])*1.1 min_vert = np.min([df.values for df in annual_spearman_differences]) min_vert = np.min([min_vert*1.1, min_vert*0.9]) for i, site_pairing in enumerate(site_pairings): ax = axs_list[i] ax.axhline(y=0, color='black', linestyle='--') ax.set_title(f"{site_pairing[0]}, {site_pairing[1]}", fontsize = fontsize_labels) box_dict = ax.boxplot([annual_spearman_difference[str(site_pairing)].values for annual_spearman_difference in annual_spearman_differences], patch_artist = True, showfliers = showfliers, widths = 0.8, notch = True) ax.set_ylim(top=max_vert, bottom=min_vert) ax.set_xticklabels(bc_names, fontsize = fontsize_tick) ax.tick_params(axis='both', labelsize=fontsize_tick) for item in ['boxes', 'fliers', 'medians', 'means']: for sub_item, color in zip(box_dict[item], plot_colors): plt.setp(sub_item, color = color) fig.text(0.5, -0.04, "Bias Correction Scenario", ha='center', va = 'bottom', fontsize=fontsize_labels); fig.text(-0.04, 0.5, r'$r_s(Q_{raw}^{up}, Q_{raw}^{down}) - r_s(Q_{bc}^{up}, Q_{bc}^{down})$', va='center', rotation = 'vertical', fontsize=fontsize_labels); plt.tight_layout() return fig, axs
[docs]def compare_CDF_all(flow_dataset:xr.Dataset, plot_sites: list, raw_var: str, raw_name: str, ref_var: str, ref_name: str, bc_vars: list, bc_names: list, plot_colors: list, logit_scale = True, logarithm_base = '10', units = r'Q [$m^3/s$]', figsize = (20,20), fontsize_title = 40, fontsize_labels = 40, fontsize_tick = 40, markersize = 1, alpha = 0.3): """Compare probability distribution functions as a summary statistic. Plots the CDF's of the raw, reference, and bias corrected flows with data from all sites in plot_sites combined for a summary statistic. Parameters ---------- flow_dataset : xarray.Dataset Contains raw (uncorrected), reference (true), and bias corrected flows. plot_sites : list Gauge sites to be plotted. raw_var : str Accesses the raw (uncorrected) flows in `flow_dataset`. raw_name : str Label for the raw flows in the legend, corresponding to `raw_var`. ref_var : str Accesses the reference (true) flows in `flow_dataset`. ref_name : str Label for the reference flows in the legend, corresponding to `ref_var`. bc_vars : list Accesses the bias corrected flows in `flow_dataset`. Can be a size of 1. bc_names : list Label for the bias corrected flows in the legend, corresponding to `bc_var`. Can be a size of 1. plot_colors : list, optional Colors to be plotted for the flows corresponding to `raw_var`, `ref_var`, and `bc_var`, defaulting as ['grey', 'black', 'blue', 'red'], assuming there are two entries in `bc_var`. logit_scale : True, optional Whether to plot the vertical scale on a logit axis (True) or not (False). Defaults as True. logarithm_base : str, optional The logarthimic base to use for the horizontal scale. Only the following are currently supported: '10' to use a log10 horizontal scale (default) 'e' to use a natural log horizontal scale units : str, optional The horizontal axis's label for units, defaults as r'Q [$m^3/s$]'. figsize : tuple, optional Figure size following matplotlib connventions, defaults as (20, 20). fontsize_title : int, optional Font size of the title, defaults as 40. fontsize_labels : int, optional Font size of the labels, defaults as 40. fontsize_tick : int, optional Font size of the ticks, defaults as 40. markersize : float, optional Size of the markers plotted, defaults as 1. Linewidth is half of this value. alpha : float, optional Transparancy of the lines and markers, defaults as 0.3. Returns ------- matplotlib.figure, matplotlib.axes """ if len(bc_vars) == 0: raise Exception("Please enter a non-zero number strings in bc_vars to be used") if len(bc_vars) != len(bc_names): raise Exception("Please have the same number of entries in bc_names as bc_names") if len(plot_colors) < len(bc_vars): raise Exception(f"Please enter at least {len(bc_vars)} colors in plot_colors") if logarithm_base == '10': log_func = log10_1p elif logarithm_base == 'e': log_func = np.log1p else: raise Exception("Please enter logarithm_base as '10' or 'e'") fig, ax = plt.subplots(figsize = figsize) fig.suptitle("Cumulative Distribution Function", y = 1.01, x = 0.4, fontsize = fontsize_title) cmp = flow_dataset.sel(seg = plot_sites) raw = ECDF(log_func(cmp[raw_var].values.flatten())) ref = ECDF(log_func(cmp[ref_var].values.flatten())) cors = list() for bc_var in bc_vars: cors.append(ECDF(log_func(cmp[bc_var].values.flatten()))) linewidth = markersize / 2 ax.plot(raw.x, raw.y, color = plot_colors[0], label = raw_name, lw = linewidth, linestyle = '--', marker = 'o', markersize = markersize, alpha = alpha) ax.plot(ref.x, ref.y, color = plot_colors[1], label = ref_name, lw = linewidth, linestyle = '--', marker = 'x', markersize = markersize, alpha = alpha) for j, cor in enumerate(cors): ax.plot(cor.x, cor.y, color = plot_colors[2+j], label = bc_names[j], lw = linewidth, linestyle = '--', marker = '*', markersize = markersize, alpha = alpha) ax.tick_params(axis = 'both', labelsize = fontsize_tick) # relabel axis to account for log_func xlabels = ax.get_xticks() if logarithm_base == '10': ax.set_xticklabels(labels = ["$" + "{:0.0f}".format(10**k-1) + "$" for k in xlabels]) elif logarithm_base == 'e': ax.set_xticklabels(labels = ["$" + "{:0.2f}".format(exp(k)-1) + "$" for k in xlabels]) if logit_scale: ax.set_yscale('logit') ax.minorticks_off() ax.yaxis.set_major_formatter(mpl.ticker.LogitFormatter()) ylabels = ax.get_yticks() new_ylabels = list() for k, ylabel in enumerate(ylabels): if k % 3 == 0: new_ylabels.append(ylabel) ax.set_yticks(ticks = new_ylabels) plot_labels = [raw_name, ref_name] plot_labels.extend(bc_names) ax.legend(handles = custom_legend(plot_labels, plot_colors), fontsize = fontsize_tick) fig.text(0.4, 0.04, units, ha = 'center', fontsize = fontsize_labels) fig.text(-0.04, 0.5, r'Non-exceedence probability', va = 'center', rotation = 'vertical', fontsize = fontsize_labels) plt.subplots_adjust(hspace = 0.35, left = 0.05, right = 0.8, top = 0.95) return fig, axes
[docs]def compare_mean_grouped_CPD(flow_dataset: xr.Dataset, plot_sites: list, grouper_func, raw_var: str, raw_name: str, ref_var: str, ref_name: str, bc_vars: list, bc_names: list, plot_colors: list, subset_month = None, units = r'Mean Annual Flow [$m^3/s$]', figsize = (20,20), sharex = False, sharey = False, pp_kws = dict(postype='cunnane'), fontsize_title = 80, fontsize_legend = 68, fontsize_subplot = 60, fontsize_tick = 45, fontsize_labels = 80, linestyles = ['-','-','-'], markers = ['.','.','.'], markersize = 30, alpha = 1, legend_bbox_to_anchor = (1, 1), fig = None, axes = None, start_ax_index = 0, tot_plots = None ): """ Cumulative Probability Distributions plots the CPD's of the raw, reference, and bias corrected flows on a probability axis ---- flow_dataset : xarray.Dataset Contatains raw, reference, and bias corrected flows. plot_sites : list A list of sites to be plotted. grouper_func Function to group a pandas.DataFrame index by to calculate the mean of the grouped values. raw_var : str The string to access the raw flows in flow_dataset. raw_name : str Label for the raw flows in the legend. ref_var : str The string to access the reference flows in flow_dataset. ref_name : str Label for the reference flows in the legend. bc_vars : list List of strings to access the bias corrected flows in flow_dataset. bc_names : list List of labels for the bias corrected flows in the legend. plot_colors : list Contains the colors to be plotted for `raw_var`, `ref_var`, and `bc_vars`, respectively. subset_month: int, optional The integer date of a month to subset out for plotting, (ex: if you want to subset out January, enter 1). Defaults as None to avoid subsetting and use all the data in the year. units : str, optional Vertical axis's label for units, defaults as r'Mean Annual Flow [$m^3/s$]'. pp_kws : dict, optional Plotting position computation as specified by https://matplotlib.org/mpl-probscale/tutorial/closer_look_at_plot_pos.html. Defaults as dict(postype='cunnane') for cunnane plotting positions. fontsize_title : int, optional Font size for the plot title, defaults as 80. fontsize_legend : int, optional Font size for the plot legend, defaults as 68. fontsize_subplot : int, optional Font size for the plot subplot text, default as 60. fontsize_tick : int, optional Font size for the plot ticks, defaults as 45. fontsize_labels : int, optional Font size for the horizontal and vertical axis labels, defaults as 80. linestyles : list, optional Linestyles for ploting `raw_var`, `ref_var`, and `bc_vars`, respectively. Defaults as ['-','-','-'], expecting one of each. markers : list, optional Markers for ploting `raw_var`, `ref_var`, and `bc_vars`, respectively. Defaults as ['.','.','.'], expecting one of each. markersize : int, optional Size of the markers for plotting, defaults as 30. alpha : float, optional Alpha transparency value for plotting, where 1 is opaque and 0 is transparent. legend_bbox_to_anchor : tuple, optional Box that is used to position the legend to the final axes. Defaults as (1,1). Modify this is the legend does not plot where you desire it to be. fig : matplotlib.figure, optional matplotlib figure object to plot on, defaulting as None and creating a new object unless otherwise specified. axes : matplotlib.axes, optional Array-like of matplotlib axes objet to plot multiple plots on, defaulting as None and creating a new object unless otherwise specified. start_ax_index : int, optional If the plots should not be plotted starting at the first ax in axes, specifiy the index that plotting should begin on. Defaults as None, assuming plotting should begin from the first ax. tot_plots : int, optional If more plotting is to be done than with the total data to be provided, describe how many total plots there should be. Defalts as None, assuming plotting should begin form the first ax. """ if len(bc_vars) == 0: raise Exception("Please enter a non-zero number strings in bc_vars to be used") if len(bc_vars) != len(bc_names): raise Exception("Please have the same number of entries in bc_names as bc_names") if len(plot_colors) < 2 + len(bc_vars): raise Exception(f"Please enter at least {2+len(bc_vars)} colors in plot_colors") if len(linestyles) < 2 + len(bc_vars): raise Exception(f"Please enter at least {2+len(bc_vars)} styles in linestyles") if len(markers) < 2 + len(bc_vars): raise Exception(f"Please enter at least {2+len(bc_vars)} markers in markers") if not tot_plots: tot_plots = len(plot_sites) n_rows, n_cols = determine_row_col(tot_plots) if fig is None or axes is None: fig, axes = plt.subplots(n_rows, n_cols, figsize = figsize, sharex = sharex, sharey = sharey) axes = axes.flatten() time = flow_dataset['time'].values raw_flow_df = pd.DataFrame(data = flow_dataset[raw_var].sel(seg=plot_sites).values, index=time, columns = plot_sites) ref_flow_df = pd.DataFrame(data = flow_dataset[ref_var].sel(seg=plot_sites).values, index=time, columns = plot_sites) bc_flow_dfs = list() for bc_var in bc_vars: bc_flow_df = pd.DataFrame(data = flow_dataset[bc_var].sel(seg=plot_sites).values, index = time, columns = plot_sites) bc_flow_dfs.append(bc_flow_df) if not isinstance(subset_month, type(None)): raw_flow_df = raw_flow_df[raw_flow_df.index.month == subset_month] ref_flow_df = ref_flow_df[ref_flow_df.index.month == subset_month] for i, bc_flow_df in enumerate(bc_flow_dfs): bc_flow_dfs[i] = bc_flow_df[bc_flow_df.index.month == subset_month] WY_grouper = grouper_func(raw_flow_df) raw_flow_annual = raw_flow_df.groupby(WY_grouper).mean() ref_flow_annual = ref_flow_df.groupby(WY_grouper).mean() bc_flow_annuals = list() for bc_flow_df in bc_flow_dfs: bc_flow_annual = bc_flow_df.groupby(WY_grouper).mean() bc_flow_annuals.append(bc_flow_annual) for i, site in enumerate(plot_sites): ax = axes[i+start_ax_index] raw = raw_flow_annual[site].values ref = ref_flow_annual[site].values y_max_raw = scipy.stats.scoreatpercentile(raw, 99) y_max_ref = scipy.stats.scoreatpercentile(ref, 99) y_min_raw = scipy.stats.scoreatpercentile(raw, 1) y_min_ref = scipy.stats.scoreatpercentile(ref, 1) y_max = np.max([y_max_raw, y_max_ref])*1.1 y_min = np.min([y_min_raw, y_min_ref])*0.9 cors = list() for bc_flow_annual in bc_flow_annuals: bc = bc_flow_annual[site].values cors.append(bc) y_max_bc = scipy.stats.scoreatpercentile(bc, 99) y_min_bc = scipy.stats.scoreatpercentile(bc, 1) if y_max_bc > y_max: y_max = y_max_bc if y_min_bc < y_min: y_min = y_min_bc common_opts = dict( plottype='prob', probax='x' ) probscale.probplot(raw, ax=ax, pp_kws=pp_kws, scatter_kws=dict(linestyle=linestyles[0], marker=markers[0], markersize = markersize, alpha=alpha, color = plot_colors[0], label=raw_name), **common_opts) probscale.probplot(ref, ax=ax, pp_kws=pp_kws, scatter_kws=dict(linestyle=linestyles[1], marker=markers[1], markersize = markersize*1.5, alpha=alpha, color = plot_colors[1], label=ref_name), **common_opts) for j, cor in enumerate(cors): probscale.probplot(cor, ax=ax, pp_kws=pp_kws, scatter_kws=dict(linestyle=linestyles[2+j], marker=markers[2+j], markersize = markersize, alpha=alpha, color = plot_colors[2+j], label=bc_names[j]), **common_opts) ax.set_title(site, fontsize = fontsize_subplot) ax.tick_params(axis = 'both', labelsize = fontsize_tick) ax.set_ylim([y_min, y_max]) ax.set_xlim(left=1, right=99) ax.set_xticks([1, 10, 20, 50, 80, 90, 95, 99]) plt.setp(ax.get_xticklabels(), Rotation=45) axes[-1].axis('off') plot_names = [raw_name, ref_name] plot_names.extend(bc_names) axes[i].legend(handles=custom_legend(names = plot_names, colors=plot_colors), bbox_to_anchor = legend_bbox_to_anchor, fontsize = fontsize_legend) fig.text(0.4, 0.04, 'Cumulative Percentile', ha = 'center', fontsize = fontsize_labels) fig.text(-0.01, 0.5, units, va = 'center', rotation = 'vertical', fontsize = fontsize_labels) plt.subplots_adjust(hspace = 0.35, left = 0.05, right = 0.8, top = 0.95) return fig, axes