python sklearn常用分类算法模型的调用
本文实例为大家分享了pythonsklearn分类算法模型调用的具体代码,供大家参考,具体内容如下
实现对'NB', 'KNN', 'LR', 'RF', 'DT', 'SVM','SVMCV', 'GBDT'模型的简单调用。
#coding=gbk importtime fromsklearnimportmetrics importpickleaspickle importpandasaspd #MultinomialNaiveBayesClassifier defnaive_bayes_classifier(train_x,train_y): fromsklearn.naive_bayesimportMultinomialNB model=MultinomialNB(alpha=0.01) model.fit(train_x,train_y) returnmodel #KNNClassifier defknn_classifier(train_x,train_y): fromsklearn.neighborsimportKNeighborsClassifier model=KNeighborsClassifier() model.fit(train_x,train_y) returnmodel #LogisticRegressionClassifier deflogistic_regression_classifier(train_x,train_y): fromsklearn.linear_modelimportLogisticRegression model=LogisticRegression(penalty='l2') model.fit(train_x,train_y) returnmodel #RandomForestClassifier defrandom_forest_classifier(train_x,train_y): fromsklearn.ensembleimportRandomForestClassifier model=RandomForestClassifier(n_estimators=8) model.fit(train_x,train_y) returnmodel #DecisionTreeClassifier defdecision_tree_classifier(train_x,train_y): fromsklearnimporttree model=tree.DecisionTreeClassifier() model.fit(train_x,train_y) returnmodel #GBDT(GradientBoostingDecisionTree)Classifier defgradient_boosting_classifier(train_x,train_y): fromsklearn.ensembleimportGradientBoostingClassifier model=GradientBoostingClassifier(n_estimators=200) model.fit(train_x,train_y) returnmodel #SVMClassifier defsvm_classifier(train_x,train_y): fromsklearn.svmimportSVC model=SVC(kernel='rbf',probability=True) model.fit(train_x,train_y) returnmodel #SVMClassifierusingcrossvalidation defsvm_cross_validation(train_x,train_y): fromsklearn.grid_searchimportGridSearchCV fromsklearn.svmimportSVC model=SVC(kernel='rbf',probability=True) param_grid={'C':[1e-3,1e-2,1e-1,1,10,100,1000],'gamma':[0.001,0.0001]} grid_search=GridSearchCV(model,param_grid,n_jobs=1,verbose=1) grid_search.fit(train_x,train_y) best_parameters=grid_search.best_estimator_.get_params() forpara,valinlist(best_parameters.items()): print(para,val) model=SVC(kernel='rbf',C=best_parameters['C'],gamma=best_parameters['gamma'],probability=True) model.fit(train_x,train_y) returnmodel defread_data(data_file): data=pd.read_csv(data_file) train=data[:int(len(data)*0.9)] test=data[int(len(data)*0.9):] train_y=train.label train_x=train.drop('label',axis=1) test_y=test.label test_x=test.drop('label',axis=1) returntrain_x,train_y,test_x,test_y if__name__=='__main__': data_file="H:\\Research\\data\\trainCG.csv" thresh=0.5 model_save_file=None model_save={} test_classifiers=['NB','KNN','LR','RF','DT','SVM','SVMCV','GBDT'] classifiers={'NB':naive_bayes_classifier, 'KNN':knn_classifier, 'LR':logistic_regression_classifier, 'RF':random_forest_classifier, 'DT':decision_tree_classifier, 'SVM':svm_classifier, 'SVMCV':svm_cross_validation, 'GBDT':gradient_boosting_classifier } print('readingtrainingandtestingdata...') train_x,train_y,test_x,test_y=read_data(data_file) forclassifierintest_classifiers: print('*******************%s********************'%classifier) start_time=time.time() model=classifiers[classifier](train_x,train_y) print('trainingtook%fs!'%(time.time()-start_time)) predict=model.predict(test_x) ifmodel_save_file!=None: model_save[classifier]=model precision=metrics.precision_score(test_y,predict) recall=metrics.recall_score(test_y,predict) print('precision:%.2f%%,recall:%.2f%%'%(100*precision,100*recall)) accuracy=metrics.accuracy_score(test_y,predict) print('accuracy:%.2f%%'%(100*accuracy)) ifmodel_save_file!=None: pickle.dump(model_save,open(model_save_file,'wb'))
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。