基于GAN提高非平衡COVID-19死亡率预测模型准确性( 五 )

模型比较将原始数据分为训练和测试后 , 将GAN生成的数据添加到训练数据中 , 以将性能与基本模型进行比较 。 在实际(原始)分割测试数据上测试模型性能 。
from sklearn.metrics import recall_score as rsfrom sklearn.metrics import precision_score as psfrom sklearn.metrics import f1_score as fsfrom sklearn.metrics import balanced_accuracy_score as basfrom sklearn.metrics import confusion_matrix as cmimport numpy as npimport pandas as pdimport datetime as dtimport sklearnfrom scipy import statsfrom sklearn import preprocessingfrom sklearn.model_selection import GridSearchCVfrom sklearn.ensemble import RandomForestClassifierfrom sklearn.ensemble import AdaBoostClassifierfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import recall_score as rsfrom sklearn.metrics import precision_score as psfrom sklearn.metrics import f1_score as fsfrom sklearn.metrics import log_lossrf = RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,criterion='gini', max_depth=2, max_features='auto',max_leaf_nodes=None, max_samples=None,min_impurity_decrease=0.0, min_impurity_split=None,min_samples_leaf=2, min_samples_split=2,min_weight_fraction_leaf=0.0, n_estimators=100,n_jobs=None, oob_score=False, random_state=None,verbose=0, warm_start=False)classifier = AdaBoostClassifier(rf,50,0.01,'SAMME.R',10)#Seperate TV in Generated DataX1 = df_generated_data.loc[:, df_generated_data.columns != 'death']Y1 = df_generated_data['death']#Seperate TV in Original DataX = df.loc[:, df.columns != 'death']Y = df['death']#Splitting Original DataX_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=0)#Appending Generated Data to X_trainX_train1=X_train.append(X1, sort=False)Y_train1=Y_train.append(Y1)classifier.fit(X_train1,np.array(Y_train1).reshape(Y_train1.shape[0],1))pred = np.array(classifier.predict(X_test))recall = rs(Y_test,pred)precision = ps(Y_test,pred)r1 = fs(Y_test,pred)ma = classifier.score(X_test,Y_test)print('*** Evaluation metrics for test dataset ***\n')print('Recall Score: ',recall)print('Precision Score: ',precision)print('F1 Score: ',f1)print('Accuracy: ',ma)公制基本模型得分*用增强的生成数据评分 召回分数0.75 0.83 精度分数11 F1分数0.86 0.9 准确性0.90.95
资料来源:表3基本模型指标

结论【基于GAN提高非平衡COVID-19死亡率预测模型准确性】与基本模型相比 , 所提出的模型提供了更加准确和可靠的结果 , 表明基于GAN的过采样克服了不平衡数据的局限性 , 并适当地扩充了少数类 。