如果我理解正确,则无需遍历各列。该解决方案用NaN替换所有偏差超过三个组标准偏差的所有值。
def replace(group, stds):
group[np.abs(group - group.mean()) > stds * group.std()] = np.nan
return group
# df is your DataFrame
df.loc[:, df.columns != group_column] = df.groupby(group_column).transform(lambda g: replace(g, 3))