# Python集成学习方法# 集成学习通过组合多个模型来提升整体性能# 主要方法: Voting, Bagging, Boosting, Stacking# 1. 导入库import numpy as npfrom sklearn.datasets import load_breast_cancerfrom sklearn.model_selection import train_test_split, cross_val_scorefrom sklearn.metrics import accuracy_scorefrom sklearn.linear_model import LogisticRegressionfrom sklearn.tree import DecisionTreeClassifierfrom sklearn.svm import SVCfrom sklearn.neighbors import KNeighborsClassifierfrom sklearn.ensemble import (VotingClassifier, BaggingClassifier, AdaBoostClassifier, StackingClassifier)# 2. 加载数据cancer load_breast_cancer()X, y cancer.data, cancer.targetX_train, X_test, y_train, y_test train_test_split(X, y, test_size0.3, random_state42)# 定义基模型base_models [(lr, LogisticRegression(max_iter5000, random_state42)),(dt, DecisionTreeClassifier(max_depth5, random_state42)),(svm, SVC(kernelrbf, probabilityTrue, random_state42)),(knn, KNeighborsClassifier(n_neighbors7))]# 3. VotingClassifier — 投票集成voting_hard VotingClassifier(estimatorsbase_models, votinghard)voting_soft VotingClassifier(estimatorsbase_models, votingsoft)voting_hard.fit(X_train, y_train)voting_soft.fit(X_train, y_train)print(f 投票集成 )print(f硬投票: {accuracy_score(y_test, voting_hard.predict(X_test)):.4f})print(f软投票: {accuracy_score(y_test, voting_soft.predict(X_test)):.4f})# 4. 各基模型独立表现print(f\n基模型独立表现 (5折CV):)for name, model in base_models:scores cross_val_score(model, X_train, y_train, cv5, scoringaccuracy)print(f {name}: {scores.mean():.4f})# 5. Bagging — 自助聚合bagging BaggingClassifier(estimatorDecisionTreeClassifier(random_state42),n_estimators50, max_samples0.8, max_features0.8,random_state42, n_jobs-1)bagging.fit(X_train, y_train)dt_single DecisionTreeClassifier(random_state42)dt_single.fit(X_train, y_train)print(f\n Bagging )print(f单棵决策树: {dt_single.score(X_test, y_test):.4f})print(fBagging (50棵): {bagging.score(X_test, y_test):.4f})# 6. AdaBoost — 自适应提升adaboost AdaBoostClassifier(estimatorDecisionTreeClassifier(max_depth1),n_estimators100, learning_rate0.1, random_state42)adaboost.fit(X_train, y_train)print(f\n AdaBoost )print(f准确率: {adaboost.score(X_test, y_test):.4f})# 7. Stacking — 堆叠集成stacking StackingClassifier(estimatorsbase_models,final_estimatorLogisticRegression(max_iter5000),cv5, stack_methodpredict_proba)stacking.fit(X_train, y_train)print(f\n Stacking )print(f准确率: {stacking.score(X_test, y_test):.4f})# 8. 所有方法对比print(f\n 集成方法总对比 )print(f软投票: {accuracy_score(y_test, voting_soft.predict(X_test)):.4f})print(fBagging: {bagging.score(X_test, y_test):.4f})print(fAdaBoost: {adaboost.score(X_test, y_test):.4f})print(fStacking: {stacking.score(X_test, y_test):.4f})# 9. 集成原理总结# Voting: 多模型投票简单有效# Bagging: 并行训练降低方差# Boosting: 串行训练降低偏差# Stacking: 元模型融合基模型输出# 10. 适用场景# Voting: 基模型差异大时效果好# Bagging: 基模型方差大深决策树# Boosting: 基模型偏差大浅决策树# Stacking: 追求最佳性能时