Keyword search is like asking a librarian, “Do you have a book with the word spaceship?”
Semantic search is like asking, “Got anything that feels like Interstellar but with fewer tears?”
That second one is where vector search shines: it retrieves by meaning, not exact words. MongoDB supports this via the $vectorSearch aggregation stage, purpose-built for semantic search over stored embeddings.
Keyword search is great until your user types “lappy” instead of “laptop.” Then it’s just guesswork with confidence.
Zero-shot here means: you didn’t train a custom model for your domain. You just:
Ingest time
Query time
Create a vector index mapping the embedding field with dimensions matching your embedding model.
{
"fields": [
{
"type": "vector",
"path": "embedding",
"numDimensions": 384,
"similarity": "cosine"
},
{ "type": "filter", "path": "category" }
]
}
Below is an example that can be used to repliacte with Voyage AI.
from pymongo import MongoClient
import voyageai
import os
# 1) Voyage AI client
vo = voyageai.Client(api_key=os.environ["VOYAGE_API_KEY"])
# Embed into a single vector
def embed(text):
result = vo.embed(
texts=[text],
model="voyage-3-large", # or voyage-3-lite
input_type="document"
)
return result.embeddings[0]
client = MongoClient(os.environ["MONGODB_URI"])
def ingest():
col = client["demo"]["articles"]
docs = [
{
"title": "MongoDB Vector Search 101",
"body": "Learn semantic search with embeddings and $vectorSearch.",
"category": "mongodb"
},
{
"title": "RAG for busy engineers",
"body": "Retrieval-Augmented Generation without losing your weekend.",
"category": "genai"
}
]
for d in docs:
text = f"{d['title']}\n{d['body']}\n{d['category']}"
embedding = embed(text)
col.insert_one({
**d,
"embedding": embedding
})
print("Ingest complete ✅")
client.close()
if __name__ == "__main__":
ingest()
MongoDB’s $vectorSearch is an aggregation stage that performs semantic similarity search over indexed embeddings and can return a relevance score via $meta: "vectorSearchScore".
from pymongo import MongoClient
import voyageai
import os
# Voyage AI client
vo = voyageai.Client(api_key=os.environ["VOYAGE_API_KEY"])
client = MongoClient(os.environ["MONGODB_URI"])
def embed(text):
result = vo.embed(
texts=[text],
model="voyage-3-large", # or voyage-3-lite
input_type="query"
)
return result.embeddings[0]
def semantic_search(query, limit=5, category=None):
col = client["demo"]["articles"]
query_vector = embed(query)
pipeline_agg = [
{
"$vectorSearch": {
"index": "articles_vector_index",
"path": "embedding",
"queryVector": query_vector,
"numCandidates": 100,
"limit": limit,
**(
{"filter": {"category": {"$eq": category}}}
if category
else {}
)
}
},
{
"$project": {
"_id": 0,
"title": 1,
"category": 1,
"score": {"$meta": "vectorSearchScore"}
}
}
]
results = list(col.aggregate(pipeline_agg))
client.close()
return results
if __name__ == "__main__":
results = semantic_search(
"how to do meaning-based search in mongodb",
limit=3
)
print(results)
Why those knobs matter: