Weighted Pattern/Spatial Correlation in Python

NCL (NCAR’s Command Language) has a useful function to calculate the [weighted/unweighted or centered/uncentered] pattern correlation. I’ve been trying to recreate this function in Python with no success.

Has anyone written a Python Function to calculate a weighted pattern correlation or have any thoughts to fix my function?
Below is my most recent attempt which doesn’t yield errors, but gives me a perfect pattern correlation between my datasets (which is not true). I’ve only included the part of the function that calculates the weighted pattern correlation for simplicity. Right now I am writing the function for the centered pattern correlation, but would include the option for uncentered when the weighting works.

Thank you in advance!

[import modules and read in data]

# Get spatial correlation (using the Pearson Correlation Coefficient) between model simulation and observational dataset
def get_spatial_correlation(model_path, obs_path, variable, time_slice, lat_slice, lon_slice, season=None, mask=None):

  #[code to read in data, with options for seasonal subsetting or masking]
    
    # Calculate weights
    weights = np.cos(np.deg2rad(obs_data.lat))
    
    # Calculate weighted means
    obs_weighted_mean = (obs_climatology * weights).sum(dim=('lat', 'lon')) / weights.sum()
    model_weighted_mean = (model_climatology * weights).sum(dim=('lat', 'lon')) / weights.sum()
    
    # Calculate centered arrays
    obs_centered = obs_climatology - obs_weighted_mean
    model_centered = model_climatology - model_weighted_mean
    
    # Calculate weighted standard deviations
    obs_weighted_std = np.sqrt(((obs_centered * weights) ** 2).sum(dim=('lat', 'lon')) / weights.sum())
    model_weighted_std = np.sqrt(((model_centered * weights) ** 2).sum(dim=('lat', 'lon')) / weights.sum())
    
    # Calculate weighted covariance
    weighted_cov = ((obs_centered * weights) * (model_centered * weights)).sum(dim=('lat', 'lon')) / weights.sum()
    
    # Calculate weighted correlation coefficient
    pattern_cor = weighted_cov / (obs_weighted_std * model_weighted_std)
    pattern_cor = pattern_cor.round(2)
    return pattern_cor

Hi @r.isphording. You might find the xskillscore package useful. It includes functions for computing various weighted correlations of xarray objects, e.g. xskillscore.pearson_r — xskillscore documentation

2 Likes

Thank you so much @dougiesquire! I wasn’t aware of that package, and it is exactly what I needed.
I’ll post my successful function below in case it’s useful in the future.

# Import modules, read in data, etc.

# Get spatial correlation (using the Pearson Correlation Coefficient) between model simulation and observational dataset; the default is to calculate the centered correlation coefficient
def get_spatial_correlation(model_path, obs_path, variable, time_slice, lat_slice, lon_slice, season=None, iscale=None, mask=None, centered=True):

    # Get data
    # Include SPI scale in data extraction if a scale is provided by the user
    if iscale is None:
        # Get observational dataset
        obs_data = get_data_from_file(obs_path, variable, time_slice, lat_slice, lon_slice)

        # Get model dataset
        model_data = get_data_from_file(model_path, variable, time_slice, lat_slice, lon_slice)
    
    else:

        # Get observational dataset
        obs_data = get_data_from_file(obs_path, variable, time_slice, lat_slice, lon_slice, iscale)
    
        # Get model dataset
        model_data = get_data_from_file(model_path, variable, time_slice, lat_slice, lon_slice, iscale)
        
    # Extract unmasked data if no mask provided
    if mask is None:
        pass

    else:
       
        # Apply mask to data
        model_data = model_data.where(mask==1)
        obs_data = obs_data.where(mask==1)
    
    if season is None:
        
        # Calculate Annual climatology datasets
        obs_climatology = obs_data.mean(dim='time')
        model_climatology = model_data.mean(dim='time')
    
    else:
        
        # Calculate seasonal climatology datasets; the season is defined by the user as a list of month numbers
        obs_climatology = obs_data.sel(time=obs_data.time.dt.month.isin(season)).mean(dim='time')
        model_climatology = model_data.sel(time=model_data.time.dt.month.isin(season)).mean(dim='time')
    
    # Calculate weights
    weights = np.cos(np.deg2rad(obs_data.lat))
    weights.name = "weights"
    weights2d_0 = np.expand_dims(weights.to_numpy(), axis=1)
    weights2d = np.repeat(weights2d_0, len(obs_data.lon), axis=1)
    
    # Convert weights to Xarray for input into correlation function
    weights2d_xr = xr.DataArray(weights2d, coords={'lat': obs_data.lat, 'lon': obs_data.lon}, dims=['lat', 'lon'])
    
    # Calculate weighted means
    obs_weighted_mean = obs_climatology.weighted(weights).mean()
    model_weighted_mean = model_climatology.weighted(weights).mean()
    
    # Check if user wants centered or uncentered correlation coefficient
    if centered is True:
        
        # Calculate centered arrays (anomalies) and assign to input datasets
        obs_input = obs_climatology - obs_weighted_mean
        model_input = model_climatology - model_weighted_mean
        
    elif centered is False:
        
        # Assign climatology as input data
        obs_input = obs_climatology
        model_input = model_climatology
        
    # Calculate Weighted Spatial (i.e. Pattern) Correlation and reduce to 2 decimals
    pattern_cor_i = xs.pearson_r(model_input, obs_input, dim=['lat', 'lon'], weights=weights2d_xr, skipna=True)
    pattern_cor = pattern_cor_i.astype(float).round(2)
    
    return pattern_cor
1 Like