|
12 | 12 | }, |
13 | 13 | { |
14 | 14 | "cell_type": "code", |
15 | | - "execution_count": 2, |
| 15 | + "execution_count": 1, |
16 | 16 | "id": "aac9563e", |
17 | 17 | "metadata": {}, |
18 | 18 | "outputs": [], |
|
25 | 25 | }, |
26 | 26 | { |
27 | 27 | "cell_type": "code", |
28 | | - "execution_count": 3, |
| 28 | + "execution_count": 2, |
29 | 29 | "id": "a3c3999a", |
30 | 30 | "metadata": {}, |
31 | 31 | "outputs": [], |
|
41 | 41 | }, |
42 | 42 | { |
43 | 43 | "cell_type": "code", |
44 | | - "execution_count": 6, |
| 44 | + "execution_count": 3, |
45 | 45 | "id": "5eabdb75", |
46 | 46 | "metadata": {}, |
47 | 47 | "outputs": [ |
|
63 | 63 | }, |
64 | 64 | { |
65 | 65 | "cell_type": "code", |
66 | | - "execution_count": 7, |
| 66 | + "execution_count": 4, |
67 | 67 | "id": "4b172de8", |
68 | 68 | "metadata": {}, |
69 | 69 | "outputs": [ |
|
89 | 89 | "print(docs[0].page_content)" |
90 | 90 | ] |
91 | 91 | }, |
| 92 | + { |
| 93 | + "cell_type": "markdown", |
| 94 | + "id": "8061454b", |
| 95 | + "metadata": {}, |
| 96 | + "source": [ |
| 97 | + "## Persistance\n", |
| 98 | + "\n", |
| 99 | + "The below steps cover how to persist a ChromaDB instance" |
| 100 | + ] |
| 101 | + }, |
| 102 | + { |
| 103 | + "cell_type": "markdown", |
| 104 | + "id": "2b76db26", |
| 105 | + "metadata": {}, |
| 106 | + "source": [ |
| 107 | + "### Initialize PeristedChromaDB\n", |
| 108 | + "Create embeddings for each chunk and insert into the Chroma vector database. The persist_directory argument tells ChromaDB where to store the database when it's persisted.\n", |
| 109 | + "\n" |
| 110 | + ] |
| 111 | + }, |
| 112 | + { |
| 113 | + "cell_type": "code", |
| 114 | + "execution_count": 6, |
| 115 | + "id": "cdb86e0d", |
| 116 | + "metadata": {}, |
| 117 | + "outputs": [ |
| 118 | + { |
| 119 | + "name": "stdout", |
| 120 | + "output_type": "stream", |
| 121 | + "text": [ |
| 122 | + "Running Chroma using direct local API.\n", |
| 123 | + "No existing DB found in db, skipping load\n", |
| 124 | + "No existing DB found in db, skipping load\n" |
| 125 | + ] |
| 126 | + } |
| 127 | + ], |
| 128 | + "source": [ |
| 129 | + "# Embed and store the texts\n", |
| 130 | + "# Supplying a persist_directory will store the embeddings on disk\n", |
| 131 | + "persist_directory = 'db'\n", |
| 132 | + "\n", |
| 133 | + "embedding = OpenAIEmbeddings()\n", |
| 134 | + "vectordb = Chroma.from_documents(documents=docs, embedding=embedding, persist_directory=persist_directory)" |
| 135 | + ] |
| 136 | + }, |
| 137 | + { |
| 138 | + "cell_type": "markdown", |
| 139 | + "id": "f568a322", |
| 140 | + "metadata": {}, |
| 141 | + "source": [ |
| 142 | + "### Persist the Database\n", |
| 143 | + "In a notebook, we should call persist() to ensure the embeddings are written to disk. This isn't necessary in a script - the database will be automatically persisted when the client object is destroyed." |
| 144 | + ] |
| 145 | + }, |
| 146 | + { |
| 147 | + "cell_type": "code", |
| 148 | + "execution_count": 8, |
| 149 | + "id": "74b08cb4", |
| 150 | + "metadata": {}, |
| 151 | + "outputs": [ |
| 152 | + { |
| 153 | + "name": "stdout", |
| 154 | + "output_type": "stream", |
| 155 | + "text": [ |
| 156 | + "Persisting DB to disk, putting it in the save folder db\n", |
| 157 | + "PersistentDuckDB del, about to run persist\n", |
| 158 | + "Persisting DB to disk, putting it in the save folder db\n" |
| 159 | + ] |
| 160 | + } |
| 161 | + ], |
| 162 | + "source": [ |
| 163 | + "vectordb.persist()\n", |
| 164 | + "vectordb = None" |
| 165 | + ] |
| 166 | + }, |
| 167 | + { |
| 168 | + "cell_type": "markdown", |
| 169 | + "id": "cc9ed900", |
| 170 | + "metadata": {}, |
| 171 | + "source": [ |
| 172 | + "### Load the Database from disk, and create the chain\n", |
| 173 | + "Be sure to pass the same persist_directory and embedding_function as you did when you instantiated the database. Initialize the chain we will use for question answering." |
| 174 | + ] |
| 175 | + }, |
| 176 | + { |
| 177 | + "cell_type": "code", |
| 178 | + "execution_count": 10, |
| 179 | + "id": "31fecfe9", |
| 180 | + "metadata": {}, |
| 181 | + "outputs": [ |
| 182 | + { |
| 183 | + "name": "stdout", |
| 184 | + "output_type": "stream", |
| 185 | + "text": [ |
| 186 | + "Running Chroma using direct local API.\n", |
| 187 | + "loaded in 4 embeddings\n", |
| 188 | + "loaded in 1 collections\n" |
| 189 | + ] |
| 190 | + } |
| 191 | + ], |
| 192 | + "source": [ |
| 193 | + "# Now we can load the persisted database from disk, and use it as normal. \n", |
| 194 | + "vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)\n" |
| 195 | + ] |
| 196 | + }, |
92 | 197 | { |
93 | 198 | "cell_type": "code", |
94 | 199 | "execution_count": null, |
95 | | - "id": "a359ed74", |
| 200 | + "id": "4dde7a0d", |
96 | 201 | "metadata": {}, |
97 | 202 | "outputs": [], |
98 | 203 | "source": [] |
|
0 commit comments