我终于找到了答案。您需要 重新 ,因为在实例化模型时设置 shuffle = True 不会在使用partial_fit 时重新 整理 数据(仅适用于 fit )。注意:在sklearn.linear_model.SGDClassifier页面上找到此信息将很有帮助。
from sklearn.linear_model import SGDClassifier
import random
clf2 = SGDClassifier(loss='log') # shuffle=True is useless here
shuffledRange = range(len(X))
n_iter = 5
for n in range(n_iter):
random.shuffle(shuffledRange)
shuffledX = [X[i] for i in shuffledRange]
shuffledY = [Y[i] for i in shuffledRange]
for batch in batches(range(len(shuffledX)), 10000):
clf2.partial_fit(shuffledX[batch[0]:batch[-1]+1], shuffledY[batch[0]:batch[-1]+1], classes=numpy.unique(Y))