ENH: implement pd.Series.corr(method="distance")

# self and other can be assumed to be aligned already
def nandistcorr(self, other):
    n = len(self)
    a = np.zeros(shape=(n, n))
    b = np.zeros(shape=(n, n))

    for i in range(n):
        for j in range(i+1, n):
            a[i, j] = abs(self[i] - self[j])
            b[i, j] = abs(other[i] - other[j])

    a = a + a.T
    b = b + b.T

    a_bar = np.vstack([np.nanmean(a, axis=0)] * n)
    b_bar = np.vstack([np.nanmean(b, axis=0)] * n)

    A = a - a_bar - a_bar.T + np.full(shape=(n, n), fill_value=a_bar.mean())
    B = b - b_bar - b_bar.T + np.full(shape=(n, n), fill_value=b_bar.mean())

    cov_ab = np.sqrt(np.nansum(A * B)) / n
    std_a = np.sqrt(np.sqrt(np.nansum(A**2)) / n)
    std_b = np.sqrt(np.sqrt(np.nansum(B**2)) / n)

    return cov_ab / std_a / std_b

ENH: implement pd.Series.corr(method="distance")

Issue Analytics

Top GitHub Comments

Top Results From Across the Web

Top Related Medium Post

Top Related StackOverflow Question

Troubleshoot Live Code

Top Related Reddit Thread

Top Related Hackernoon Post

Top Related Tweet

Top Related Dev.to Post

Top Related Hashnode Post

BUG: pandas.core.ops.dispatch_to_extension_op fails with UnboundLocalError

API: astype mechanism for extension arrays