|
31 | 31 | "source": [ |
32 | 32 | "import catboost\n", |
33 | 33 | "from catboost import datasets\n", |
34 | | - "from catboost.utils import create_cd\n", |
35 | 34 | "import os\n", |
36 | 35 | "import numpy as np\n", |
37 | 36 | "\n", |
|
40 | 39 | "pool = catboost.Pool(\n", |
41 | 40 | " data=X[:1000], # top 1000 documents are enough for this example\n", |
42 | 41 | " label=Y[:1000],\n", |
43 | | - " feature_names=dict(enumerate(X))\n", |
| 42 | + " feature_names=list(X.columns)\n", |
44 | 43 | ")" |
45 | 44 | ] |
46 | 45 | }, |
|
130 | 129 | "name": "stdout", |
131 | 130 | "output_type": "stream", |
132 | 131 | "text": [ |
133 | | - "Git info:\n", |
134 | | - " Commit: 573374100b7b767846cd30e914cece73cfec17a5\n", |
135 | | - " Author: 'format:Stanislav Kirillov <staskirillov@gmail.com>'\n", |
136 | | - "\n", |
137 | | - " Summary: version v0.10.3 ref:a691b211b401900433abee0a8972132e97b4cbec\n", |
| 132 | + "Svn info:\n", |
| 133 | + " URL: svn+ssh://arcadia.yandex.ru/arc/trunk/arcadia\n", |
| 134 | + " Last Changed Rev: 5437739\n", |
| 135 | + " Last Changed Author: dkvasov\n", |
| 136 | + " Last Changed Date: 2019-08-08T13:43:02.471284Z\n", |
138 | 137 | "\n", |
139 | 138 | "Other info:\n", |
140 | | - " Build by: go\n", |
141 | | - " Top src dir: /var/lib/go-agent/pipelines/BuildMaster/catboost.git\n", |
142 | | - " Top build dir: /nvme/.ya/build/build_root/ap4q/000eb9\n", |
143 | | - " Hostname: ip-172-31-39-161\n", |
| 139 | + " Build by: eermishkina\n", |
| 140 | + " Top src dir: /place/home/eermishkina/trunc/arcadia\n", |
| 141 | + " Top build dir: /home/eermishkina/.ya/build\n", |
| 142 | + " Hostname: su57.search.yandex.net\n", |
144 | 143 | " Host information: \n", |
145 | | - " Linux ip-172-31-39-161 4.4.0-1065-aws #75-Ubuntu SMP Fri Aug 10 11:14:32 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux\n", |
| 144 | + " Linux su57.search.yandex.net 4.4.88-42 #1 SMP Mon Sep 18 14:33:37 UTC 2017 x86_64\n", |
| 145 | + "\n", |
146 | 146 | " \n" |
147 | 147 | ] |
148 | 148 | } |
|
174 | 174 | { |
175 | 175 | "data": { |
176 | 176 | "text/plain": [ |
177 | | - "[u'cat_features_hash', u'ctrs', u'float_features', u'categorical_features']" |
| 177 | + "[u'float_features']" |
178 | 178 | ] |
179 | 179 | }, |
180 | 180 | "execution_count": 6, |
|
221 | 221 | { |
222 | 222 | "data": { |
223 | 223 | "text/plain": [ |
224 | | - "{u'borders': [8.5],\n", |
| 224 | + "{u'borders': [83.5],\n", |
225 | 225 | " u'feature_index': 0,\n", |
226 | 226 | " u'flat_feature_index': 0,\n", |
227 | 227 | " u'has_nans': False,\n", |
|
281 | 281 | "text": [ |
282 | 282 | "{\n", |
283 | 283 | " \"leaf_values\": [\n", |
284 | | - " -0.3465735863, \n", |
285 | | - " 0, \n", |
286 | | - " -0.5493061443, \n", |
287 | | - " 0.4236489302\n", |
| 284 | + " 0.022173912547853145, \n", |
| 285 | + " 0.017826086558078085, \n", |
| 286 | + " 0.011304347573415556, \n", |
| 287 | + " -0.02565217333967221, \n", |
| 288 | + " -0.02565217333967221, \n", |
| 289 | + " 0.07652173742004059, \n", |
| 290 | + " -0.016956521360122625, \n", |
| 291 | + " -0.019130434355010134, \n", |
| 292 | + " -0.02130434734989765, \n", |
| 293 | + " -0.019130434355010148, \n", |
| 294 | + " 0.02407969585645712, \n", |
| 295 | + " 0.027495255552409406, \n", |
| 296 | + " 0.0035863376807432745, \n", |
| 297 | + " -0.026869069608165402, \n", |
| 298 | + " -0.028292219481478875, \n", |
| 299 | + " 0.07398772840759553, \n", |
| 300 | + " -0.004233128739738201, \n", |
| 301 | + " -0.012975459832675437, \n", |
| 302 | + " -0.0286196312621424, \n", |
| 303 | + " -0.02815950857304045\n", |
288 | 304 | " ], \n", |
289 | 305 | " \"splits\": [\n", |
290 | 306 | " {\n", |
291 | | - " \"split_index\": 13, \n", |
292 | | - " \"float_feature_index\": 88, \n", |
293 | | - " \"border_id\": 4.976368427, \n", |
| 307 | + " \"split_index\": 4, \n", |
| 308 | + " \"float_feature_index\": 16, \n", |
| 309 | + " \"border\": 10.407758712768555, \n", |
294 | 310 | " \"split_type\": \"FloatFeature\"\n", |
295 | 311 | " }, \n", |
296 | 312 | " {\n", |
297 | | - " \"split_index\": 1, \n", |
298 | | - " \"float_feature_index\": 6, \n", |
299 | | - " \"border_id\": 0.07142850012, \n", |
| 313 | + " \"split_index\": 3, \n", |
| 314 | + " \"float_feature_index\": 13, \n", |
| 315 | + " \"border\": 3.5, \n", |
300 | 316 | " \"split_type\": \"FloatFeature\"\n", |
301 | 317 | " }\n", |
302 | 318 | " ], \n", |
303 | 319 | " \"leaf_weights\": [\n", |
304 | | - " 72, \n", |
305 | | - " 0, \n", |
306 | | - " 388, \n", |
307 | | - " 540\n", |
| 320 | + " 123, \n", |
| 321 | + " 54, \n", |
| 322 | + " 512, \n", |
| 323 | + " 311\n", |
308 | 324 | " ]\n", |
309 | 325 | "}\n" |
310 | 326 | ] |
|
356 | 372 | { |
357 | 373 | "data": { |
358 | 374 | "text/plain": [ |
359 | | - "{u'borders': [8.5],\n", |
| 375 | + "{u'borders': [83.5],\n", |
360 | 376 | " u'feature_index': 0,\n", |
361 | 377 | " u'flat_feature_index': 0,\n", |
362 | 378 | " u'has_nans': False,\n", |
|
417 | 433 | { |
418 | 434 | "data": { |
419 | 435 | "text/plain": [ |
420 | | - "[{u'border_id': 4.976368427,\n", |
421 | | - " u'float_feature_index': 88,\n", |
422 | | - " u'split_index': 13,\n", |
| 436 | + "[{u'border': 10.407758712768555,\n", |
| 437 | + " u'float_feature_index': 16,\n", |
| 438 | + " u'split_index': 4,\n", |
423 | 439 | " u'split_type': u'FloatFeature'},\n", |
424 | | - " {u'border_id': 0.07142850012,\n", |
425 | | - " u'float_feature_index': 6,\n", |
426 | | - " u'split_index': 1,\n", |
| 440 | + " {u'border': 3.5,\n", |
| 441 | + " u'float_feature_index': 13,\n", |
| 442 | + " u'split_index': 3,\n", |
427 | 443 | " u'split_type': u'FloatFeature'}]" |
428 | 444 | ] |
429 | 445 | }, |
|
445 | 461 | { |
446 | 462 | "data": { |
447 | 463 | "text/plain": [ |
448 | | - "[{'border_id': 4.976368427,\n", |
449 | | - " 'flat_feature_index': 88,\n", |
450 | | - " 'float_feature_index': 88,\n", |
451 | | - " 'split_index': 13,\n", |
| 464 | + "[{'border_id': 10.407758712768555,\n", |
| 465 | + " 'flat_feature_index': 16,\n", |
| 466 | + " 'float_feature_index': 16,\n", |
| 467 | + " 'split_index': 4,\n", |
452 | 468 | " 'split_type': 'FloatFeature'},\n", |
453 | | - " {'border_id': 0.07142850012,\n", |
454 | | - " 'flat_feature_index': 6,\n", |
455 | | - " 'float_feature_index': 6,\n", |
456 | | - " 'split_index': 1,\n", |
| 469 | + " {'border_id': 3.5,\n", |
| 470 | + " 'flat_feature_index': 13,\n", |
| 471 | + " 'float_feature_index': 13,\n", |
| 472 | + " 'split_index': 3,\n", |
457 | 473 | " 'split_type': 'FloatFeature'}]" |
458 | 474 | ] |
459 | 475 | }, |
|
489 | 505 | { |
490 | 506 | "data": { |
491 | 507 | "text/plain": [ |
492 | | - "{u'leaf_values': [0.04693548282,\n", |
493 | | - " -0.0212903221,\n", |
494 | | - " -0.02564516072,\n", |
| 508 | + "{u'leaf_values': [0.05084745649058943,\n", |
| 509 | + " -0.025423728245294732,\n", |
| 510 | + " -0.025423728245294732,\n", |
| 511 | + " -0.02526315733006127,\n", |
| 512 | + " 0.04578947266073611,\n", |
| 513 | + " -0.020526315330674765,\n", |
495 | 514 | " 0,\n", |
496 | 515 | " 0,\n", |
497 | 516 | " 0,\n", |
498 | | - " -0.02499999944,\n", |
499 | | - " 0.04499999899,\n", |
500 | | - " -0.01999999955,\n", |
501 | | - " -0.02557376992,\n", |
502 | | - " -0.0181967209,\n", |
503 | | - " 0.04377049082],\n", |
504 | | - " u'leaf_weights': [53, 0, 45, 52],\n", |
505 | | - " u'splits': [{u'border_id': 1.549999952,\n", |
| 517 | + " -0.025573769920184966,\n", |
| 518 | + " -0.018196720904746992,\n", |
| 519 | + " 0.04377049082493207],\n", |
| 520 | + " u'leaf_weights': [50, 48, 0, 52],\n", |
| 521 | + " u'splits': [{u'border': 0.800000011920929,\n", |
506 | 522 | " u'float_feature_index': 3,\n", |
507 | | - " u'split_index': 66,\n", |
| 523 | + " u'split_index': 58,\n", |
508 | 524 | " u'split_type': u'FloatFeature'},\n", |
509 | | - " {u'border_id': 3.400000095,\n", |
510 | | - " u'float_feature_index': 2,\n", |
511 | | - " u'split_index': 37,\n", |
| 525 | + " {u'border': 1.5499999523162842,\n", |
| 526 | + " u'float_feature_index': 3,\n", |
| 527 | + " u'split_index': 64,\n", |
512 | 528 | " u'split_type': u'FloatFeature'}]}" |
513 | 529 | ] |
514 | 530 | }, |
|
562 | 578 | { |
563 | 579 | "data": { |
564 | 580 | "text/plain": [ |
565 | | - "array([-0.96672324, 2.03299365, -0.79820129])" |
| 581 | + "array([[ 0.46872039, 0.02717889, -0.02942634, -0.23159877, -0.23487417],\n", |
| 582 | + " [ 0.16342239, 0.23999561, 0.08782089, -0.24248687, -0.24875202],\n", |
| 583 | + " [ 0.33317769, 0.15813378, -0.0085402 , -0.24050062, -0.24227064]])" |
566 | 584 | ] |
567 | 585 | }, |
568 | 586 | "execution_count": 15, |
|
576 | 594 | }, |
577 | 595 | { |
578 | 596 | "cell_type": "code", |
579 | | - "execution_count": 21, |
| 597 | + "execution_count": 16, |
580 | 598 | "metadata": {}, |
581 | 599 | "outputs": [ |
582 | 600 | { |
583 | 601 | "data": { |
584 | 602 | "text/plain": [ |
585 | | - "array([-1.09331328, 1.22121605, -0.69495837])" |
| 603 | + "array([[ 0.27934996, 0.00999965, -0.04288901, -0.12313168, -0.12332892],\n", |
| 604 | + " [ 0.08037036, 0.13138144, 0.04135186, -0.124843 , -0.12826065],\n", |
| 605 | + " [ 0.19021373, 0.07576716, -0.01227257, -0.12547529, -0.12823304]])" |
586 | 606 | ] |
587 | 607 | }, |
588 | | - "execution_count": 21, |
| 608 | + "execution_count": 16, |
589 | 609 | "metadata": {}, |
590 | 610 | "output_type": "execute_result" |
591 | 611 | } |
|
599 | 619 | }, |
600 | 620 | { |
601 | 621 | "cell_type": "code", |
602 | | - "execution_count": 20, |
| 622 | + "execution_count": 17, |
603 | 623 | "metadata": {}, |
604 | 624 | "outputs": [ |
605 | 625 | { |
606 | 626 | "data": { |
607 | 627 | "text/plain": [ |
608 | | - "array([ 0.12659004, 0.8117776 , -0.10324292])" |
| 628 | + "array([[ 0.27934996, 0.00999965, -0.04288901, -0.12313168, -0.12332892],\n", |
| 629 | + " [ 0.08037036, 0.13138144, 0.04135186, -0.124843 , -0.12826065],\n", |
| 630 | + " [ 0.19021373, 0.07576716, -0.01227257, -0.12547529, -0.12823304]])" |
609 | 631 | ] |
610 | 632 | }, |
611 | | - "execution_count": 20, |
| 633 | + "execution_count": 17, |
612 | 634 | "metadata": {}, |
613 | 635 | "output_type": "execute_result" |
614 | 636 | } |
|
0 commit comments