DEV Community

SavvyShivam
SavvyShivam

Posted on

ML troubleshooting

# Cloud driven Loan Defalut predictor using machine learning ## Task 1. Launching an Amazon sage make instance # Task 2. upload note book into jupiter # Task 3. Data Loading import boto3 import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.utils import resample import sagemaker role = sagemaker.get_execution_role() bucket_name = "loan-data" folder_name = "loan_cleaned_data" file_name = "loan_cleaned_data.csv" s3_url= f"s3://{bucket_name}/{folder_name}/{file_name}" df = pd.read_csv(s3_url) #Task 4. Feature engineering (One hot Encoding) df_encoded = pd.get_dummies(df,columns=['purpose'], dtype=int) # Task 5. Data preprocessing (Handling imbalanced data) class_counts = df_encoded['not_fully_paid'].value_counts() majority_class = df_encoded[df_encoded['not_fully_paid']==0] minority_class = df_encoded[df_encoded['not_fully_paid']==1] minority_unsample = resample(minority_class,replace=True,n_samples=len(majority_class),random_state=42) df_balanced = pd.concat([majority_class,minority_unsample]) # Task 6. Model Training X = df_balanced.drop(columns=['sl_no','not_fully_paid']) y = df_balanced['not_fully_paid'] X_train , X_test, y_train, y_test = train_test_split(X,y,test_size=0.4,random_state=42) rf = RandomForestClassifier(random_state=42) rf.fit(X_train, y_train) # Task 7. Model Evaluation y_pred = rf.predict(X_test) print(classification_report(y_test,y_pred)) 
Enter fullscreen mode Exit fullscreen mode

Top comments (0)