numpy 中是否有另一种方法来实现 scipy.stats.mode 函数以沿轴获取 ndarrays 中最常见的值?(无需导入其他模块)即
import numpy as np
from scipy.stats import mode
a = np.array([[[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19]],
[[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19]],
[[40, 40, 42, 43, 44],
[45, 46, 47, 48, 49],
[50, 51, 52, 53, 54],
[55, 56, 57, 58, 59]]])
mode= mode(data, axis=0)
mode = mode[0]
print mode
>>>[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19]
scipy.stats.mode
函数是用这段代码定义的,它只依赖于numpy:
def mode(a, axis=0):
scores = np.unique(np.ravel(a)) # get ALL unique values
testshape = list(a.shape)
testshape[axis] = 1
oldmostfreq = np.zeros(testshape)
oldcounts = np.zeros(testshape)
for score in scores:
template = (a == score)
counts = np.expand_dims(np.sum(template, axis),axis)
mostfrequent = np.where(counts > oldcounts, score, oldmostfreq)
oldcounts = np.maximum(counts, oldcounts)
oldmostfreq = mostfrequent
return mostfrequent, oldcounts
来源:https://github.com/scipy/scipy/blob/master/scipy/stats/stats.py#L609
如果您知道没有很多不同的值(相对于输入“itemArray”的大小),那么这样的方法可能会很有效:
uniqueValues = np.unique(itemArray).tolist()
uniqueCounts = [len(np.nonzero(itemArray == uv)[0])
for uv in uniqueValues]
modeIdx = uniqueCounts.index(max(uniqueCounts))
mode = itemArray[modeIdx]
# All counts as a map
valueToCountMap = dict(zip(uniqueValues, uniqueCounts))
这对我有用:
def mode(list_array_series):
'''
A function to generate mode value(s) from numbers in the form of lists, series, dataframe columns, numpy arrays
'''
if len(list_array_series) == 0: # no mode in this instance
return None
unique_values_w_counts = np.unique(list_array_series, return_counts=True) # generate 2-dimensional array, [0] = unique values, [1] = counts for unique values
maximum_count = np.max(unique_values_w_counts[1]) # mode values have max counts
if maximum_count == 1: # no mode in this instance
return None
mode_indexes = np.where(unique_values_w_counts[1] == maximum_count) # creates 1-dimensional array of index positions of mode values
mode_values = list(unique_values_w_counts[0][mode_indexes]) # creates list of mode values found by the index values
return mode_values
# dataframe column
print(mode(df['tonnage']))
# series from dataframe column using loc
print(mode(df.loc[:, 'tonnage']))
# list
print(mode(df['tonnage'].to_list()))
# series
print(mode(pd.Series(df['tonnage'])))
# numpy array
print(mode(np.array(df['tonnage'])))