1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
| datas = pd.DataFrame(x) datas['y'] = y datas
dfs = pd.concat([datas.iloc[50:100],datas.iloc[100:110]]) dfs
dfs.groupby('y').size()
''' 过采样: 填充数据,但容易过拟合,只能在训练集进行操作! '''
from collections import Counter from imblearn.over_sampling import RandomOverSampler
x = dfs.drop('y',axis = 1) y = dfs['y']
ros = RandomOverSampler(random_state = 0) x_,y_ = ros.fit_sample(x,y)
Counter(y_)
''' 欠采样: 删除数据,容易欠拟合,同样,只能在训练集中操作。 ''' from imblearn.under_sampling import RandomUnderSampler x = dfs.drop('y',axis = 1) y = dfs['y'] rus = RandomUnderSampler(random_state = 0) x_,y_ = rus.fit_sample(x,y) Counter(y_)
|