6565 "cell_type" : " markdown" ,
6666 "metadata" : {},
6767 "source" : [
68- " #### Let's train CatBoost on clear data and take a look at the quality: "
68+ " #### Let's train CatBoost on clear data and take a look at the quality. We set a small learning rate to avoid overfitting when we start removing noisy objects. "
6969 ]
7070 },
7171 {
7777 "name" : " stdout" ,
7878 "output_type" : " stream" ,
7979 "text" : [
80- " 0.2157984851490331 \n "
80+ " 0.22947301323494568 \n "
8181 ]
8282 }
8383 ],
8484 "source" : [
85- " cb = CatBoost({'iterations': 100, 'verbose': False, 'random_seed': 42})\n " ,
85+ " cb = CatBoost({'iterations': 100, 'verbose': False, 'random_seed': 42, 'learning_rate': 0.001 })\n " ,
8686 " cb.fit(train_pool);\n " ,
8787 " print(cb.eval_metrics(validation_pool, ['RMSE'])['RMSE'][-1])"
8888 ]
124124 "name" : " stdout" ,
125125 "output_type" : " stream" ,
126126 "text" : [
127- " 0.25915746122622113 \n "
127+ " 0.24770929523786442 \n "
128128 ]
129129 }
130130 ],
168168 },
169169 {
170170 "cell_type" : " code" ,
171- "execution_count" : 8 ,
171+ "execution_count" : 7 ,
172172 "metadata" : {},
173173 "outputs" : [
174174 {
175175 "name" : " stdout" ,
176176 "output_type" : " stream" ,
177177 "text" : [
178- " RMSE on validation datset when 0 harmful objects from train are dropped: 0.25915746122622113 \n " ,
179- " RMSE on validation datset when 250 harmful objects from train are dropped: 0.25601149050939825 \n " ,
180- " RMSE on validation datset when 500 harmful objects from train are dropped: 0.25158044983631966 \n " ,
181- " RMSE on validation datset when 750 harmful objects from train are dropped: 0.24570533776587475 \n " ,
182- " RMSE on validation datset when 1000 harmful objects from train are dropped: 0.24171376432589384 \n " ,
183- " RMSE on validation datset when 1250 harmful objects from train are dropped: 0.23716221792112202 \n " ,
184- " RMSE on validation datset when 1500 harmful objects from train are dropped: 0.23352830055657348 \n " ,
185- " RMSE on validation datset when 1750 harmful objects from train are dropped: 0.23035731488436903 \n " ,
186- " RMSE on validation datset when 2000 harmful objects from train are dropped: 0.2275943109556251 \n "
178+ " RMSE on validation datset when 0 harmful objects from train are dropped: 0.24770929523786442 \n " ,
179+ " RMSE on validation datset when 250 harmful objects from train are dropped: 0.2447175042288005 \n " ,
180+ " RMSE on validation datset when 500 harmful objects from train are dropped: 0.24225895802476696 \n " ,
181+ " RMSE on validation datset when 750 harmful objects from train are dropped: 0.23953255257505965 \n " ,
182+ " RMSE on validation datset when 1000 harmful objects from train are dropped: 0.23730021406692955 \n " ,
183+ " RMSE on validation datset when 1250 harmful objects from train are dropped: 0.23571326583727906 \n " ,
184+ " RMSE on validation datset when 1500 harmful objects from train are dropped: 0.23414087500696676 \n " ,
185+ " RMSE on validation datset when 1750 harmful objects from train are dropped: 0.23269021852578387 \n " ,
186+ " RMSE on validation datset when 2000 harmful objects from train are dropped: 0.231598588484771 \n "
187187 ]
188188 }
189189 ],
210210 " \n " ,
211211 " ||RMSE on the validation dataset|\n " ,
212212 " |-|-|\n " ,
213- " |Clear train dataset: | 0.215798485149 |\n " ,
214- " |Noisy train dataset: | 0.259157461226 |\n " ,
215- " |Purified train dataset: | 0.227594310956 |"
213+ " |Clear train dataset: | 0.22947301323494568 |\n " ,
214+ " |Noisy train dataset: | 0.24770929523786442 |\n " ,
215+ " |Purified train dataset: | 0.231598588484771 |"
216216 ]
217217 },
218218 {
244244 },
245245 "nbformat" : 4 ,
246246 "nbformat_minor" : 1
247- }
247+ }
0 commit comments