|
35 | 35 | }, |
36 | 36 | { |
37 | 37 | "cell_type": "code", |
38 | | - "execution_count": 1, |
| 38 | + "execution_count": null, |
39 | 39 | "metadata": {}, |
40 | | - "outputs": [ |
41 | | - { |
42 | | - "name": "stdout", |
43 | | - "output_type": "stream", |
44 | | - "text": [ |
45 | | - "\n", |
46 | | - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3\u001b[0m\n", |
47 | | - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3.11 -m pip install --upgrade pip\u001b[0m\n" |
48 | | - ] |
49 | | - } |
50 | | - ], |
| 40 | + "outputs": [], |
51 | 41 | "source": [ |
52 | 42 | "!pip install elasticsearch -qU" |
53 | 43 | ] |
|
82 | 72 | }, |
83 | 73 | { |
84 | 74 | "cell_type": "code", |
85 | | - "execution_count": 27, |
| 75 | + "execution_count": null, |
86 | 76 | "metadata": {}, |
87 | | - "outputs": [ |
88 | | - { |
89 | | - "name": "stdout", |
90 | | - "output_type": "stream", |
91 | | - "text": [ |
92 | | - "{'name': 'instance-0000000001', 'cluster_name': '1646af1463a8461e8bc3a33f317f8cf1', 'cluster_uuid': 'FF7uKiNRT6SejAcx2qDL-w', 'version': {'number': '8.11.0', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '454cd35d33aafc161b4f7238d63777e71814d834', 'build_date': '2023-10-16T22:04:47.763088486Z', 'build_snapshot': False, 'lucene_version': '9.8.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}\n" |
93 | | - ] |
94 | | - } |
95 | | - ], |
| 77 | + "outputs": [], |
96 | 78 | "source": [ |
97 | 79 | "# Found in the 'Manage Deployment' page\n", |
98 | 80 | "CLOUD_ID = getpass.getpass('Enter Elastic Cloud ID: ')\n", |
|
246 | 228 | "source": [ |
247 | 229 | "# Upgrade index `movies` to use ELSER model\n", |
248 | 230 | "\n", |
249 | | - "we are ready to re-index `movies` to a new index with the ELSER model `.elser_model_2`. As a first step, we have to create new ingestion pipeline and a index to use ELSER model. \n", |
| 231 | + "we are ready to re-index `movies` to a new index with the ELSER model `.elser_model_2`. As a first step, we have to create new ingestion pipeline and index to use ELSER model. \n", |
250 | 232 | "\n", |
251 | 233 | "# Create a new pipeline with ELSER \n", |
252 | 234 | "Let's create a new ingestion pipeline with ELSER model `.elser_model_2`. " |
|
283 | 265 | "source": [ |
284 | 266 | "# Create a index with mappings\n", |
285 | 267 | "\n", |
286 | | - "Next, create an index with [`text_expansion`](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-text-expansion-query.html) query supporting ELSER model and [`rank_features`](https://www.elastic.co/guide/en/elasticsearch/reference/current/rank-features.html) to work with our vectors. \n", |
287 | | - "\n" |
| 268 | + "Next, create an index with required mappings for ELSER. " |
288 | 269 | ] |
289 | 270 | }, |
290 | 271 | { |
|
307 | 288 | " }\n", |
308 | 289 | " },\n", |
309 | 290 | " \"plot_embedding\": { \n", |
310 | | - " \"type\": \"rank_features\" \n", |
| 291 | + " \"type\": \"sparse_vector\" \n", |
311 | 292 | " }\n", |
312 | 293 | " }\n", |
313 | 294 | " }\n", |
314 | 295 | ")" |
315 | 296 | ] |
316 | 297 | }, |
| 298 | + { |
| 299 | + "cell_type": "markdown", |
| 300 | + "metadata": {}, |
| 301 | + "source": [ |
| 302 | + "**Note:**\n", |
| 303 | + "- `plot_embedding` is the name of the field that contains generated token with the type [`sparse_vector`](https://www.elastic.co/guide/en/elasticsearch/reference/master/sparse-vector.html) \n", |
| 304 | + "- `plot` is the name of the field from which the [`sparse_vector`](https://www.elastic.co/guide/en/elasticsearch/reference/master/sparse-vector.html) are created. " |
| 305 | + ] |
| 306 | + }, |
317 | 307 | { |
318 | 308 | "cell_type": "markdown", |
319 | 309 | "metadata": {}, |
|
341 | 331 | "cell_type": "markdown", |
342 | 332 | "metadata": {}, |
343 | 333 | "source": [ |
344 | | - "Once reindex is complete, inspect any document in the index `elser-movies` and notice that the document has a additional field `plot_embedding` with terms that we will be using in to search in our `text_expansion` query. \n", |
| 334 | + "Once reindex is complete, inspect any document in the index `elser-movies` and notice that the document has a additional field `plot_embedding` with terms that we will be using in `text_expansion` query. \n", |
345 | 335 | " " |
346 | 336 | ] |
347 | 337 | }, |
|
356 | 346 | }, |
357 | 347 | { |
358 | 348 | "cell_type": "code", |
359 | | - "execution_count": 10, |
| 349 | + "execution_count": 7, |
360 | 350 | "metadata": {}, |
361 | 351 | "outputs": [ |
362 | 352 | { |
|
473 | 463 | " }\n", |
474 | 464 | " },\n", |
475 | 465 | " \"plot_embedding\": {\n", |
476 | | - " \"type\": \"rank_features\"\n", |
| 466 | + " \"type\": \"sparse_vector\"\n", |
477 | 467 | " },\n", |
478 | 468 | " }\n", |
479 | 469 | " }\n", |
|
493 | 483 | }, |
494 | 484 | { |
495 | 485 | "cell_type": "code", |
496 | | - "execution_count": 30, |
| 486 | + "execution_count": null, |
497 | 487 | "metadata": {}, |
498 | | - "outputs": [ |
499 | | - { |
500 | | - "data": { |
501 | | - "text/plain": [ |
502 | | - "ObjectApiResponse({'took': 2271, 'timed_out': False, 'total': 12, 'updated': 0, 'created': 12, 'deleted': 0, 'batches': 1, 'version_conflicts': 0, 'noops': 0, 'retries': {'bulk': 0, 'search': 0}, 'throttled_millis': 0, 'requests_per_second': -1.0, 'throttled_until_millis': 0, 'failures': []})" |
503 | | - ] |
504 | | - }, |
505 | | - "execution_count": 30, |
506 | | - "metadata": {}, |
507 | | - "output_type": "execute_result" |
508 | | - } |
509 | | - ], |
| 488 | + "outputs": [], |
510 | 489 | "source": [ |
511 | 490 | "client.reindex(source={\n", |
512 | | - " \"index\": \"elser-example-movies\", # replace with your index name\n", |
| 491 | + " \"index\": \"my-index\", # replace with your index name\n", |
513 | 492 | " \"_source\": {\n", |
514 | | - " \"excludes\": [\"ml\"] # replace with the field-name from your index, that has previously generated tokens\n", |
| 493 | + " \"excludes\": [\"my-tokens-field\"] # replace with the field-name from your index, that has previously generated tokens\n", |
515 | 494 | " }}, \n", |
516 | 495 | " dest={\n", |
517 | 496 | " \"index\": \"elser-upgrade-index-demo\",\n", |
|
530 | 509 | }, |
531 | 510 | { |
532 | 511 | "cell_type": "code", |
533 | | - "execution_count": 31, |
| 512 | + "execution_count": 21, |
534 | 513 | "metadata": {}, |
535 | 514 | "outputs": [ |
536 | 515 | { |
|
594 | 573 | }, |
595 | 574 | { |
596 | 575 | "cell_type": "code", |
597 | | - "execution_count": 14, |
| 576 | + "execution_count": 8, |
598 | 577 | "metadata": {}, |
599 | 578 | "outputs": [ |
600 | 579 | { |
|
603 | 582 | "ObjectApiResponse({'blogs': {'aliases': {}, 'mappings': {'properties': {'text_embedding': {'properties': {'is_truncated': {'type': 'boolean'}, 'model_id': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'predicted_value': {'type': 'dense_vector', 'dims': 384, 'index': True, 'similarity': 'l2_norm'}}}, 'title': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}}}, 'settings': {'index': {'routing': {'allocation': {'include': {'_tier_preference': 'data_content'}}}, 'number_of_shards': '1', 'blocks': {'read_only_allow_delete': 'false'}, 'provided_name': 'blogs', 'default_pipeline': 'vectorize_blogs', 'creation_date': '1697651466693', 'number_of_replicas': '1', 'uuid': 'JWkPyTphQ2GV0sLadHWjjw', 'version': {'created': '8500003'}}}}})" |
604 | 583 | ] |
605 | 584 | }, |
606 | | - "execution_count": 14, |
| 585 | + "execution_count": 8, |
607 | 586 | "metadata": {}, |
608 | 587 | "output_type": "execute_result" |
609 | 588 | } |
|
682 | 661 | " }\n", |
683 | 662 | " },\n", |
684 | 663 | " \"title_embedding\": {\n", |
685 | | - " \"type\": \"rank_features\"\n", |
| 664 | + " \"type\": \"sparse_vector\"\n", |
686 | 665 | " },\n", |
687 | 666 | " }\n", |
688 | 667 | " }\n", |
|
725 | 704 | }, |
726 | 705 | { |
727 | 706 | "cell_type": "code", |
728 | | - "execution_count": 24, |
| 707 | + "execution_count": 11, |
729 | 708 | "metadata": {}, |
730 | 709 | "outputs": [ |
731 | 710 | { |
|
0 commit comments