Skip to content

Commit d359cc4

Browse files
authored
Add files via upload
1 parent 1ade42f commit d359cc4

File tree

5 files changed

+180
-0
lines changed

5 files changed

+180
-0
lines changed

Salary_Data.csv

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
YearsExperience,Salary
2+
1.1,39343
3+
1.3,46205
4+
1.5,37731
5+
2,43525
6+
2.2,39891
7+
2.9,56642
8+
3,60150
9+
3.2,54445
10+
3.2,64445
11+
3.7,57189
12+
3.9,63218
13+
4,55794
14+
4,56957
15+
4.1,57081
16+
4.5,61111
17+
4.9,67938
18+
5.1,66029
19+
5.3,83088
20+
5.9,81363
21+
6,93940
22+
6.8,91738
23+
7.1,98273
24+
7.9,101302
25+
8.2,113812
26+
8.7,109431
27+
9,105582
28+
9.5,116969
29+
9.6,112635
30+
10.3,122391
31+
10.5,121872

app.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import streamlit as st
2+
import pickle
3+
import numpy as np
4+
5+
# Load the saved model
6+
model = pickle.load(open(r"C:\Users\Tharuni\Desktop\NIT\Aug month\18th-regression frontned backedn\linear_regression_model.pkl", 'rb'))
7+
8+
# Set the title of the Streamlit app
9+
st.title("Salary Prediction App ")
10+
11+
# Add a brief description
12+
st.write("This app predicts the salary based on years of experience using a simple linear regression model.")
13+
14+
# Add input widget for user to enter years of experience
15+
years_experience = st.number_input("Enter Years of Experience:", min_value=0.0, max_value=50.0, value=1.0, step=0.5)
16+
17+
# When the button is clicked, make predictions
18+
if st.button("Predict Salary"):
19+
# Make a prediction using the trained model
20+
experience_input = np.array([[years_experience]]) # Convert the input to a 2D array for prediction
21+
prediction = model.predict(experience_input)
22+
23+
# Display the result
24+
st.success(f"The predicted salary for {years_experience} years of experience is: ${prediction[0]:,.2f}")
25+
26+
# Display information about the model
27+
st.write("The model was trained using a dataset of salaries and years of experience.built model by prakash senapati")

linear_regression_model.pkl

435 Bytes
Binary file not shown.

regression.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
import pandas as pd
4+
from sklearn.model_selection import train_test_split
5+
from sklearn.linear_model import LinearRegression
6+
from sklearn.metrics import mean_squared_error
7+
import os
8+
import pickle
9+
10+
# Load the dataset
11+
dataset = pd.read_csv(r"C:\Users\Tharuni\Desktop\NIT\Aug month\18th-regression frontned backedn\Salary_Data.csv")
12+
# Split the data into independent and dependent variables
13+
X = dataset.iloc[:, :-1].values
14+
y = dataset.iloc[:, 1].values
15+
16+
# Split the dataset into training and testing sets (80-20%)
17+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)
18+
19+
# Train the model
20+
regressor = LinearRegression()
21+
regressor.fit(X_train, y_train)
22+
23+
# Predict the test set
24+
y_pred = regressor.predict(X_test)
25+
26+
# Visualize the training set
27+
plt.scatter(X_train, y_train, color='red')
28+
plt.plot(X_train, regressor.predict(X_train), color='blue')
29+
plt.title('Salary vs Experience (Training set)')
30+
plt.xlabel('Years of Experience')
31+
plt.ylabel('Salary')
32+
plt.show()
33+
34+
# Visualize the test set
35+
plt.scatter(X_test, y_test, color='red')
36+
plt.plot(X_train, regressor.predict(X_train), color='blue')
37+
plt.title('Salary vs Experience (Test set)')
38+
plt.xlabel('Years of Experience')
39+
plt.ylabel('Salary')
40+
plt.show()
41+
42+
# Predict salary for 12 and 20 years of experience using the trained model
43+
y_12 = regressor.predict([[12]])
44+
y_20 = regressor.predict([[20]])
45+
print(f"Predicted salary for 12 years of experience: ${y_12[0]:,.2f}")
46+
print(f"Predicted salary for 20 years of experience: ${y_20[0]:,.2f}")
47+
48+
# Check model performance
49+
bias = regressor.score(X_train, y_train)
50+
variance = regressor.score(X_test, y_test)
51+
train_mse = mean_squared_error(y_train, regressor.predict(X_train))
52+
test_mse = mean_squared_error(y_test, y_pred)
53+
54+
print(f"Training Score (R^2): {bias:.2f}")
55+
print(f"Testing Score (R^2): {variance:.2f}")
56+
print(f"Training MSE: {train_mse:.2f}")
57+
print(f"Test MSE: {test_mse:.2f}")
58+
59+
# Save the trained model to disk
60+
filename = 'linear_regression_model.pkl'
61+
with open(filename, 'wb') as file:
62+
pickle.dump(regressor, file)
63+
print("Model has been pickled and saved as linear_regression_model.pkl")
64+
65+
print("Full path:", os.path.abspath(filename))
66+

spyder.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Import necessary libraries
2+
import numpy as np
3+
import matplotlib.pyplot as plt
4+
import pandas as pd
5+
from sklearn.model_selection import train_test_split
6+
from sklearn.linear_model import LinearRegression
7+
8+
# Load the dataset
9+
dataset = pd.read_csv(r"C:\Users\Tharuni\Desktop\NIT\Aug month\18th-regression frontned backedn\Salary_Data.csv")
10+
11+
# Check the shape of the dataset
12+
print("Dataset Shape:", dataset.shape) # (30, 2)
13+
14+
# Feature selection (independent variable X and dependent variable y)
15+
x = dataset.iloc[:, :-1] # Years of experience (Independent variable)
16+
y = dataset.iloc[:, -1] # Salary (Dependent variable)
17+
18+
# Split the dataset into training and testing sets (80% training, 20% testing)
19+
20+
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0)
21+
22+
# Reshape x_train and x_test into 2D arrays if they are single feature columns
23+
x_train = x_train.values.reshape(-1, 1)
24+
x_test = x_test.values.reshape(-1, 1)
25+
26+
# You don't need to reshape y_train, as it's the target variable
27+
# Fit the Linear Regression model to the training set
28+
regressor = LinearRegression()
29+
regressor.fit(x_train, y_train)
30+
31+
# Predicting the results for the test set
32+
y_pred = regressor.predict(x_test)
33+
34+
# Visualizing the Training set results
35+
plt.scatter(x_train, y_train, color = 'red') # Real salary data (training)
36+
plt.plot(x_train, regressor.predict(x_train), color = 'blue') # Predicted regression line
37+
plt.title('Salary vs Experience (Training set)')
38+
plt.xlabel('Years of Experience')
39+
plt.ylabel('Salary')
40+
plt.show()
41+
42+
# Visualizing the Test set results
43+
plt.scatter(x_test, y_test, color = 'red') # Real salary data (testing)
44+
plt.plot(x_train, regressor.predict(x_train), color = 'blue') # Regression line from training set
45+
plt.title('Salary vs Experience (Test set)')
46+
plt.xlabel('Years of Experience')
47+
plt.ylabel('Salary')
48+
plt.show()
49+
50+
# Optional: Output the coefficients of the linear model
51+
print(f"Intercept: {regressor.intercept_}")
52+
print(f"Coefficient: {regressor.coef_}")
53+
54+
# Compare predicted and actual salaries from the test set
55+
comparison = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
56+
print(comparison)

0 commit comments

Comments
 (0)