Python kod yardımı ltfn

Bir python kodum var ancak onu test ettiğimde yeterince iyi sonuçlar alamıyorum… Belki kötü bir veri seti kullanmaktan kaynaklanmakta ama veri setimi değiştiremiyorum. yardımcı olursanız sevinirim.
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.feature_selection import RFE
from sklearn.metrics import precision_score, f1_score, recall_score
from sklearn.model_selection import cross_val_score
import optuna
import joblib
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping # Early stopping import edilmesi

Veri Setini Yükle

df = pd.read_excel(“C:\Users\qwerty\Desktop\hepsi\rawdata\rawdata.xlsx”)

Sayısal Olmayan Sütunların Etiketlenmesi

label_encoders = {}
for col in df.select_dtypes(include=[‘object’]).columns:
le = LabelEncoder()
df[col] = le.fit_transform(df[col])
label_encoders[col] = le

Eksik Değerlerin İşlenmesi

imputer = SimpleImputer(strategy=‘mean’)
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

Aykırı Değerlerin İşlenmesi

for col in df_imputed.select_dtypes(include=[np.number]).columns:
q75, q25 = np.percentile(df_imputed[col], [75, 25])
iqr = q75 - q25
upper_bound = q75 + (1.5 * iqr)
lower_bound = q25 - (1.5 * iqr)
df_imputed[col] = np.where(df_imputed[col] > upper_bound, upper_bound, df_imputed[col])
df_imputed[col] = np.where(df_imputed[col] < lower_bound, lower_bound, df_imputed[col])

Veriyi Ayırma

X = df_imputed.iloc[:, :-2] # Tüm kolonlar (son iki kolon hariç)
y1 = df_imputed.iloc[:, -2].astype(int) # 1. hedef değişken
y2 = df_imputed.iloc[:, -1].astype(int) # 2. hedef değişken

StratifiedShuffleSplit ile Veriyi Bölme

X_train, X_test, y1_train, y1_test = train_test_split(X, y1, test_size=0.3, random_state=42)
y2_train, y2_test = y2.iloc[y1_train.index], y2.iloc[y1_test.index]

Ölçekleme

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Özellik Seçimi (RFE)

estimator = RandomForestClassifier()
selector = RFE(estimator, n_features_to_select=9, step=1)
X_train_selected = selector.fit_transform(X_train_scaled, y1_train)
X_test_selected = selector.transform(X_test_scaled)

Keras modeli oluşturma

def create_keras_model(num_layers, units, learning_rate):
model = keras.Sequential()
for _ in range(num_layers):
model.add(layers.Dense(units, activation=‘relu’))
model.add(layers.Dropout(0.2)) # Dropout ekleyin
model.add(layers.Dense(1, activation=‘sigmoid’))
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss=‘binary_crossentropy’, metrics=[‘accuracy’])
return model

Hiperparametre Optimizasyonu

performance_data = # Performans verilerini saklamak için bir liste oluştur

def objective(trial, y_train):
model_name = trial.suggest_categorical(“model”, [“rf”, “knn”, “dt”, “mlp”, “xgb”, “lgbm”, “catboost”, “keras”])

if model_name == "rf":
    n_estimators = trial.suggest_int("n_estimators", 50, 300)
    max_depth = trial.suggest_int("max_depth", 2, 50)
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
elif model_name == "knn":
    n_neighbors = trial.suggest_int("n_neighbors", 2, 20)
    model = KNeighborsClassifier(n_neighbors=n_neighbors)
elif model_name == "dt":
    max_depth = trial.suggest_int("max_depth", 2, 50)
    model = DecisionTreeClassifier(max_depth=max_depth)
elif model_name == "mlp":
    hidden_layer_sizes = trial.suggest_int("hidden_layer_sizes", 50, 300)
    alpha = trial.suggest_float("alpha", 1e-5, 1e-1)
    model = MLPClassifier(hidden_layer_sizes=(hidden_layer_sizes,), alpha=alpha, max_iter=1000)
elif model_name == "xgb":
    n_estimators = trial.suggest_int("n_estimators", 50, 300)
    learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
    max_depth = trial.suggest_int("max_depth", 2, 50)
    model = XGBClassifier(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth,
                          use_label_encoder=False)
elif model_name == "lgbm":
    n_estimators = trial.suggest_int("n_estimators", 50, 300)
    learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
    num_leaves = trial.suggest_int("num_leaves", 2, 256)
    model = LGBMClassifier(n_estimators=n_estimators, learning_rate=learning_rate, num_leaves=num_leaves)
elif model_name == "catboost":
    n_estimators = trial.suggest_int("n_estimators", 50, 300)
    learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
    depth = trial.suggest_int("depth", 2, 16)
    model = CatBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate, depth=depth, verbose=0)
elif model_name == "keras":
    num_layers = trial.suggest_int("num_layers", 1, 5)
    units = trial.suggest_int("units", 32, 128)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2)
    model = create_keras_model(num_layers, units, learning_rate)
    model.fit(X_train_selected, y_train, epochs=50, batch_size=32, verbose=0)
    score = model.evaluate(X_train_selected, y_train, verbose=0)[1]
    performance_data.append({"trial": len(performance_data) + 1, "model": model_name, "score": score})
    return score

score = cross_val_score(model, X_train_selected, y_train, cv=5, scoring="accuracy").mean()

# Performans verilerini kaydet
performance_data.append({"trial": len(performance_data) + 1, "model": model_name, "score": score})

return score

y1 için en iyi parametreleri bul

study_y1 = optuna.create_study(direction=“maximize”)
study_y1.optimize(lambda trial: objective(trial, y1_train), n_trials=150)
best_params_y1 = study_y1.best_params

y2 için en iyi parametreleri bul

study_y2 = optuna.create_study(direction=“maximize”)
study_y2.optimize(lambda trial: objective(trial, y2_train), n_trials=150)
best_params_y2 = study_y2.best_params

En İyi Modelleri Eğit

def train_best_model(best_params, X_train, y_train):
if best_params[“model”] == “keras”:
model = create_keras_model(best_params[“num_layers”], best_params[“units”], best_params[“learning_rate”])

    # Early Stopping Callbacks ekledik
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1, validation_split=0.2,
              callbacks=[early_stopping])
else:
    model_name = best_params["model"]
    if model_name == "rf":
        model = RandomForestClassifier(n_estimators=best_params["n_estimators"], max_depth=best_params["max_depth"])
    elif model_name == "knn":
        model = KNeighborsClassifier(n_neighbors=best_params["n_neighbors"])
    elif model_name == "dt":
        model = DecisionTreeClassifier(max_depth=best_params["max_depth"])
    elif model_name == "mlp":
        model = MLPClassifier(hidden_layer_sizes=(best_params["hidden_layer_sizes"],), alpha=best_params["alpha"],
                              max_iter=1000)
    elif model_name == "xgb":
        model = XGBClassifier(n_estimators=best_params["n_estimators"], learning_rate=best_params["learning_rate"],
                              max_depth=best_params["max_depth"], use_label_encoder=False)
    elif model_name == "lgbm":
        model = LGBMClassifier(n_estimators=best_params["n_estimators"], learning_rate=best_params["learning_rate"],
                               num_leaves=best_params["num_leaves"])
    elif model_name == "catboost":
        model = CatBoostClassifier(n_estimators=best_params["n_estimators"],
                                   learning_rate=best_params["learning_rate"],
                                   depth=best_params["depth"], verbose=0)


    model.fit(X_train, y_train)

return model

model_y1 = train_best_model(best_params_y1, X_train_selected, y1_train)
model_y2 = train_best_model(best_params_y2, X_train_selected, y2_train)

Stacking Modeli Ekleyelim

StackingClassifier için en iyi modelleri seçelim

base_learners_y1 = [
(“rf”, RandomForestClassifier(n_estimators=100, max_depth=15)),
(“knn”, KNeighborsClassifier(n_neighbors=5)),
(“dt”, DecisionTreeClassifier(max_depth=15)),
(“mlp”, MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000)),
(“xgb”, XGBClassifier(n_estimators=100, max_depth=5)),
(“lgbm”, LGBMClassifier(n_estimators=100, max_depth=5)),
(“catboost”, CatBoostClassifier(iterations=100, depth=5, learning_rate=0.05))
]

base_learners_y2 = base_learners_y1 # Y2 için aynı base learners’ı kullanalım

stacking_model_y1 = VotingClassifier(estimators=base_learners_y1, voting=‘soft’)
stacking_model_y2 = VotingClassifier(estimators=base_learners_y2, voting=‘soft’)

stacking_model_y1.fit(X_train_selected, y1_train)
stacking_model_y2.fit(X_train_selected, y2_train)

Tahminleri Al

def evaluate_model(model, X_test, y_test):
# Eğer model bir VotingClassifier ise
if isinstance(model, VotingClassifier):
# Tüm model tahminlerini al (olasılık tahminleri)
y_pred_prob_list = [estimator.predict_proba(X_test) for estimator in model.estimators_]

    # Olasılıkları 2D forma sok
    y_pred_prob = np.array(y_pred_prob_list).T  # (n_models, n_samples, n_classes)

    # Olasılıklar üzerinden her örnek için en yüksek olasılığa sahip sınıfı seç
    y_pred = np.argmax(y_pred_prob.mean(axis=0), axis=1)

else:
    # Diğer modeller için normal tahmin
    y_pred = model.predict(X_test)

precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

return precision, recall, f1

y1 Performans Değerlendirmesi

precision_y1, recall_y1, f1_y1 = evaluate_model(stacking_model_y1, X_test_selected, y1_test)
print(f"y1 için Precision: {precision_y1}“)
print(f"y1 için Recall: {recall_y1}”)
print(f"y1 için F1 Skoru: {f1_y1}")

y2 Performans Değerlendirmesi

precision_y2, recall_y2, f1_y2 = evaluate_model(stacking_model_y2, X_test_selected, y2_test)
print(f"y2 için Precision: {precision_y2}“)
print(f"y2 için Recall: {recall_y2}”)
print(f"y2 için F1 Skoru: {f1_y2}")

Performans Metriklerini Kaydet

performance_metrics = {
“y1”: {“Precision”: precision_y1, “Recall”: recall_y1, “F1”: f1_y1},
“y2”: {“Precision”: precision_y2, “Recall”: recall_y2, “F1”: f1_y2},
}

Metrikleri bir dosyaya kaydet

with open(“C:\Users\qwerty\Desktop\hepsi\rawdata\performance_metrics_c.txt”, “w”) as f:
for target, metrics in performance_metrics.items():
f.write(f"{target} için:\n")
for metric, value in metrics.items():
f.write(f"{metric}: {value}\n")
f.write(“\n”)

Model Kaydetme

joblib.dump(stacking_model_y1, ‘C:\Users\qwerty\Desktop\hepsi\rawdata\stacking_model_y1_c.pkl’)
joblib.dump(stacking_model_y2, ‘C:\Users\qwerty\Desktop\hepsi\rawdata\stacking_model_y2_c.pkl’)
joblib.dump(scaler, ‘C:\Users\qwerty\Desktop\hepsi\rawdata\scaler03072024_c.pkl’)
joblib.dump(imputer, ‘C:\Users\qwerty\Desktop\hepsi\rawdata\imputer03072024_c.pkl’)
joblib.dump(label_encoders, ‘C:\Users\qwerty\Desktop\hepsi\rawdata\label_encoders03072024_c.pkl’)
joblib.dump(selector, ‘C:\Users\qwerty\Desktop\hepsi\rawdata\selector03072024_c.pkl’)

Performans verilerini bir DataFrame’e çevir ve Excel’e yaz

performance_df = pd.DataFrame(performance_data)
performance_df.to_excel(“C:\Users\qwerty\Desktop\hepsi\rawdata\performance_trials.xlsx”, index=False)

Doğru ve Yanlış Tahminleri Belirleme

y1_predictions = stacking_model_y1.predict(X_test_selected).ravel()
y2_predictions = stacking_model_y2.predict(X_test_selected).ravel()

Boyutları kontrol et

print(“y1_test boyutu:”, y1_test.shape)
print(“y1_predictions boyutu:”, y1_predictions.shape)
print(“y2_test boyutu:”, y2_test.shape)
print(“y2_predictions boyutu:”, y2_predictions.shape)

Sonuçları DataFrame’e ekle

results_df = pd.DataFrame({
‘True_iy’: y1_test.values,
‘Predicted_iy’: y1_predictions,
‘True_ms’: y2_test.values,
‘Predicted_ms’: y2_predictions
})

Doğru ve yanlış tahminleri işaretle

results_df[‘Correct_iy’] = results_df[‘True_iy’] == results_df[‘Predicted_iy’]
results_df[‘Correct_ms’] = results_df[‘True_ms’] == results_df[‘Predicted_ms’]

Sonuçları Excel dosyasına kaydet

results_df.to_excel(“C:\Users\qwerty\Desktop\hepsi\rawdata\predictions_results_c.xlsx”, index=False)
print(“Tahmin sonuçları başarıyla kaydedildi.”)