pgvector support for Python
Supports Django, SQLAlchemy, SQLModel, Psycopg 3, Psycopg 2, asyncpg, and Peewee
Run:
pip install pgvector
And follow the instructions for your database library:
Or check out some examples:
- Embeddings with OpenAI
- Sentence embeddings with SentenceTransformers
- Hybrid search with SentenceTransformers (Reciprocal Rank Fusion)
- Hybrid search with SentenceTransformers (cross-encoder)
- Image search with PyTorch
- Implicit feedback recommendations with Implicit
- Explicit feedback recommendations with Surprise
- Recommendations with LightFM
- Horizontal scaling with Citus
Create a migration to enable the extension
from pgvector.django import VectorExtension class Migration(migrations.Migration): operations = [ VectorExtension() ]
Add a vector field to your model
from pgvector.django import VectorField class Item(models.Model): embedding = VectorField(dimensions=3)
Insert a vector
item = Item(embedding=[1, 2, 3]) item.save()
Get the nearest neighbors to a vector
from pgvector.django import L2Distance Item.objects.order_by(L2Distance('embedding', [3, 1, 2]))[:5]
Also supports MaxInnerProduct
and CosineDistance
Get the distance
Item.objects.annotate(distance=L2Distance('embedding', [3, 1, 2]))
Get items within a certain distance
Item.objects.alias(distance=L2Distance('embedding', [3, 1, 2])).filter(distance__lt=5)
Average vectors
from django.db.models import Avg Item.objects.aggregate(Avg('embedding'))
Also supports Sum
Add an approximate index
from pgvector.django import HnswIndex, IvfflatIndex class Item(models.Model): class Meta: indexes = [ HnswIndex( name='my_index', fields=['embedding'], m=16, ef_construction=64, opclasses=['vector_l2_ops'] ), # or IvfflatIndex( name='my_index', fields=['embedding'], lists=100, opclasses=['vector_l2_ops'] ) ]
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
Enable the extension
session.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
Add a vector column
from pgvector.sqlalchemy import Vector class Item(Base): __tablename__ = "items" id: Mapped[int] = mapped_column(primary_key=True) embedding = mapped_column(Vector(3))
Insert a vector
item = Item(embedding=[1, 2, 3]) session.add(item) session.commit()
Get the nearest neighbors to a vector
session.scalars(select(Item).order_by(Item.embedding.l2_distance([3, 1, 2])).limit(5))
Also supports max_inner_product
and cosine_distance
Get the distance
session.scalars(select(Item.embedding.l2_distance([3, 1, 2])))
Get items within a certain distance
session.scalars(select(Item).filter(Item.embedding.l2_distance([3, 1, 2]) < 5))
Average vectors
from sqlalchemy.sql import func session.scalars(select(func.avg(Item.embedding))).first()
Also supports sum
Add an approximate index
index = Index( 'my_index', Item.embedding, postgresql_using='hnsw', postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'vector_l2_ops'} ) # or index = Index( 'my_index', Item.embedding, postgresql_using='ivfflat', postgresql_with={'lists': 100}, postgresql_ops={'embedding': 'vector_l2_ops'} ) index.create(engine)
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
Enable the extension
session.exec(text('CREATE EXTENSION IF NOT EXISTS vector'))
Add a vector column
from pgvector.sqlalchemy import Vector from sqlalchemy import Column class Item(SQLModel, table=True): embedding: List[float] = Field(sa_column=Column(Vector(3)))
Insert a vector
item = Item(embedding=[1, 2, 3]) session.add(item) session.commit()
Get the nearest neighbors to a vector
session.exec(select(Item).order_by(Item.embedding.l2_distance([3, 1, 2])).limit(5))
Also supports max_inner_product
and cosine_distance
Get the distance
session.exec(select(Item.embedding.l2_distance([3, 1, 2])))
Get items within a certain distance
session.exec(select(Item).filter(Item.embedding.l2_distance([3, 1, 2]) < 5))
Average vectors
from sqlalchemy.sql import func session.exec(select(func.avg(Item.embedding))).first()
Also supports sum
Add an approximate index
from sqlalchemy import Index index = Index( 'my_index', Item.embedding, postgresql_using='hnsw', postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'vector_l2_ops'} ) # or index = Index( 'my_index', Item.embedding, postgresql_using='ivfflat', postgresql_with={'lists': 100}, postgresql_ops={'embedding': 'vector_l2_ops'} ) index.create(engine)
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
Enable the extension
conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
Register the vector type with your connection
from pgvector.psycopg import register_vector register_vector(conn)
For async connections, use
from pgvector.psycopg import register_vector_async await register_vector_async(conn)
Create a table
conn.execute('CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))')
Insert a vector
embedding = np.array([1, 2, 3]) conn.execute('INSERT INTO items (embedding) VALUES (%s)', (embedding,))
Get the nearest neighbors to a vector
conn.execute('SELECT * FROM items ORDER BY embedding <-> %s LIMIT 5', (embedding,)).fetchall()
Add an approximate index
conn.execute('CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)') # or conn.execute('CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)')
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
Enable the extension
cur = conn.cursor() cur.execute('CREATE EXTENSION IF NOT EXISTS vector')
Register the vector type with your connection or cursor
from pgvector.psycopg2 import register_vector register_vector(conn)
Create a table
cur.execute('CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))')
Insert a vector
embedding = np.array([1, 2, 3]) cur.execute('INSERT INTO items (embedding) VALUES (%s)', (embedding,))
Get the nearest neighbors to a vector
cur.execute('SELECT * FROM items ORDER BY embedding <-> %s LIMIT 5', (embedding,)) cur.fetchall()
Add an approximate index
cur.execute('CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)') # or cur.execute('CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)')
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
Enable the extension
await conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
Register the vector type with your connection
from pgvector.asyncpg import register_vector await register_vector(conn)
or your pool
async def init(conn): await register_vector(conn) pool = await asyncpg.create_pool(..., init=init)
Create a table
await conn.execute('CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))')
Insert a vector
embedding = np.array([1, 2, 3]) await conn.execute('INSERT INTO items (embedding) VALUES ($1)', embedding)
Get the nearest neighbors to a vector
await conn.fetch('SELECT * FROM items ORDER BY embedding <-> $1 LIMIT 5', embedding)
Add an approximate index
await conn.execute('CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)') # or await conn.execute('CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)')
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
Add a vector column
from pgvector.peewee import VectorField class Item(BaseModel): embedding = VectorField(dimensions=3)
Insert a vector
item = Item.create(embedding=[1, 2, 3])
Get the nearest neighbors to a vector
Item.select().order_by(Item.embedding.l2_distance([3, 1, 2])).limit(5)
Also supports max_inner_product
and cosine_distance
Get the distance
Item.select(Item.embedding.l2_distance([3, 1, 2]).alias('distance'))
Get items within a certain distance
Item.select().where(Item.embedding.l2_distance([3, 1, 2]) < 5)
Average vectors
from peewee import fn Item.select(fn.avg(Item.embedding)).scalar()
Also supports sum
Add an approximate index
Item.add_index('embedding vector_l2_ops', using='hnsw')
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
View the changelog
Everyone is encouraged to help improve this project. Here are a few ways you can help:
- Report bugs
- Fix bugs and submit pull requests
- Write, clarify, or fix documentation
- Suggest or add new features
To get started with development:
git clone https://github.com/pgvector/pgvector-python.git cd pgvector-python pip install -r requirements.txt createdb pgvector_python_test pytest