Skip to content

Commit 1eb74e9

Browse files
committed
Add bosthon housing
0 parents commit 1eb74e9

File tree

7 files changed

+13307
-0
lines changed

7 files changed

+13307
-0
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
.idea/
2+
.ipynb_checkpoints/
3+
.vscode/
4+
.venv/

README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# ML Aide tutorial
2+
3+
## Setup
4+
5+
To run this tutorial we use [pyenv](https://github.com/pyenv/pyenv) and
6+
[virtualenv](https://virtualenv.pypa.io/en/latest/) to create a virtual
7+
environment. All dependencies will be installed in the virtual environment.
8+
9+
1. Create virtual environment
10+
```
11+
virtualenv venv
12+
```
13+
14+
2. Activate virtual environment
15+
```
16+
source venv/bin/activate
17+
```
18+
19+
3. Install dependencies
20+
```
21+
pip install -r requirements.txt
22+
```
23+
24+
## Start
25+
```
26+
cd boston-house-prcing
27+
python app.py
28+
```

boston-house-pricing/app.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
from mlaide import client
2+
from mlaide.model import ArtifactRef
3+
import pandas as pd
4+
from sklearn import metrics
5+
from sklearn.linear_model import Lasso, LinearRegression
6+
from sklearn.model_selection import cross_val_score, train_test_split
7+
from sklearn.pipeline import Pipeline
8+
from sklearn.preprocessing import StandardScaler
9+
import numpy as np
10+
11+
# create connection
12+
options = client.MvcOptions(
13+
mvc_server_url='http://localhost:8881/api/v1',
14+
api_key='NTIxYmUxOWMtNTJkMi00NDQ0LTljYjUtMTU1ZWZhMDFjYWFmOuKCqDdBPMKw4oKjwrjCsOKCsHTigrFj'
15+
)
16+
mlaide_client = client.MvcClient(project_key='usa-housing', options=options)
17+
18+
# start tracking
19+
run_data_preparation = mlaide_client.start_new_run(experiment_key='linear-regression', run_name='data preparation')
20+
21+
# read data
22+
housing_data = pd.read_csv('data/housing.csv')
23+
24+
# add dataset as artifact
25+
artifact = run_data_preparation.create_artifact(name="USA housing dataset", artifact_type="dataset", metadata={})
26+
run_data_preparation.add_artifact_file(artifact, 'data/housing.csv')
27+
run_data_preparation.set_completed_status()
28+
29+
30+
# create run with an reference to the dataset artifact
31+
artifact_ref = ArtifactRef(name="USA housing dataset", version=1)
32+
run_pipeline_setup = mlaide_client.start_new_run(experiment_key='linear-regression',
33+
run_name='pipeline setup',
34+
used_artifacts=[artifact_ref])
35+
36+
X = housing_data[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
37+
'Avg. Area Number of Bedrooms', 'Area Population']]
38+
y = housing_data['Price']
39+
40+
test_size=0.3
41+
random_state=42
42+
43+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
44+
45+
run_pipeline_setup.log_parameter('test_size', test_size)
46+
run_pipeline_setup.log_parameter('random_state', random_state)
47+
48+
# create pipeline with standard scaler and store the pipeline in ML Aide
49+
pipeline = Pipeline([
50+
('std_scalar', StandardScaler())
51+
])
52+
53+
X_train = pipeline.fit_transform(X_train)
54+
X_test = pipeline.transform(X_test)
55+
56+
run_pipeline_setup.log_model(pipeline, model_name="pipeline")
57+
run_pipeline_setup.set_completed_status()
58+
59+
# Linear Regression
60+
dataset_artifact_ref = ArtifactRef(name="USA housing dataset", version=1)
61+
pipeline_artifact_ref = ArtifactRef(name="pipeline", version=1)
62+
run_linear_regression = mlaide_client.start_new_run(experiment_key='linear-regression',
63+
run_name='linear regression',
64+
used_artifacts=[dataset_artifact_ref, pipeline_artifact_ref])
65+
66+
lin_reg = LinearRegression(normalize=True)
67+
lin_reg.fit(X_train,y_train)
68+
69+
run_linear_regression.log_model(lin_reg, 'linear regression')
70+
71+
test_pred = lin_reg.predict(X_test)
72+
train_pred = lin_reg.predict(X_train)
73+
74+
mae = metrics.mean_absolute_error(y_test, test_pred)
75+
mse = metrics.mean_squared_error(y_test, test_pred)
76+
rmse = np.sqrt(metrics.mean_squared_error(y_test, test_pred))
77+
r2 = metrics.r2_score(y_test, test_pred)
78+
cross_validation = cross_val_score(LinearRegression(), X, y, cv=10).mean()
79+
80+
run_linear_regression.log_metric('mae', mae)
81+
run_linear_regression.log_metric('mse', mse)
82+
run_linear_regression.log_metric('rmse', rmse)
83+
run_linear_regression.log_metric('r2', r2)
84+
run_linear_regression.log_metric('cross validation', cross_validation)
85+
86+
run_linear_regression.set_completed_status()
87+
88+
# Lasso Regression
89+
dataset_artifact_ref = ArtifactRef(name="USA housing dataset", version=1)
90+
pipeline_artifact_ref = ArtifactRef(name="pipeline", version=1)
91+
run_lasso = mlaide_client.start_new_run(experiment_key='lasso-regression',
92+
run_name='lasso regression',
93+
used_artifacts=[dataset_artifact_ref, pipeline_artifact_ref])
94+
95+
alpha = 0.1
96+
precompute = True
97+
positive = True
98+
selection = 'random'
99+
random_state = 42
100+
101+
run_lasso.log_parameter('alpha', alpha)
102+
run_lasso.log_parameter('precompute', precompute)
103+
run_lasso.log_parameter('positive', positive)
104+
run_lasso.log_parameter('selection', selection)
105+
run_lasso.log_parameter('random state', random_state)
106+
107+
model = Lasso(alpha=alpha,
108+
precompute=precompute,
109+
positive=positive,
110+
selection=selection,
111+
random_state=random_state)
112+
model.fit(X_train, y_train)
113+
114+
run_lasso.log_model(model, 'lasso')
115+
116+
test_pred = model.predict(X_test)
117+
train_pred = model.predict(X_train)
118+
119+
mae = metrics.mean_absolute_error(y_test, test_pred)
120+
mse = metrics.mean_squared_error(y_test, test_pred)
121+
rmse = np.sqrt(metrics.mean_squared_error(y_test, test_pred))
122+
r2 = metrics.r2_score(y_test, test_pred)
123+
cross_validation = cross_val_score(Lasso(), X, y, cv=10).mean()
124+
125+
run_lasso.log_metric('mae', mae)
126+
run_lasso.log_metric('mse', mse)
127+
run_lasso.log_metric('rmse', rmse)
128+
run_lasso.log_metric('r2', r2)
129+
run_lasso.log_metric('cross validation', cross_validation)
130+
131+
run_lasso.set_completed_status()
132+

boston-house-pricing/boston-house-pricing.ipynb

Lines changed: 429 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)