How to Build a RAG Pipeline with LlamaParse and Snowflake Cortex

See how leading teams deploy agents at scale. Find a stop near you.

Snowflake for Developers/Guides/How to Build a RAG Pipeline with LlamaParse and Snowflake Cortex

Partner Solution

How to Build a RAG Pipeline with LlamaParse and Snowflake Cortex

Josh Reini

Overview

Quickly build a RAG pipeline to query unstructured documents like PDFs, slide decks, and manuals. Use cases include Q&A over contracts, financial reports, and compliance docs, internal copilots for onboarding and support, and retrieval over technical specs or architecture guides, all powered by Snowflake Cortex and LlamaParse.

This guide walks you through building an end-to-end RAG workflow using LlamaParse, a genAI-native parser from LlamaIndex, and Snowflake Cortex, which provides built-in tools for text splitting, hybrid search, and LLM-powered generation.

LlamaParse is designed for LLM workflows and offers:

Accurate table extraction
Natural language prompts for structured output
JSON and image extraction modes
Support for 10+ file types (PDF, PPTX, DOCX, HTML, XML, etc.)
Multilingual support

It ensures clean, structured data, ready for downstream tasks like RAG, semantic search, and agents.

Together, LlamaParse and Cortex offer a seamless path from raw documents to intelligent, production-ready RAG workflows, all within your Snowflake environment.

This solution was created, tested, and verified by a member of the Snowflake Partner Network and meets compatibility requirements with Snowflake instances as of date of publication.

Code Example

from llama_cloud_services import LlamaParse

parser = LlamaParse(
num_workers=4,
verbose=True,
language="en",
)

result = parser.parse("./snowflake_2025_10k.pdf")

# Get markdown documents
markdown_documents = result.get_markdown_documents(split_by_page=False)

import pandas as pd

# fields that matter only to vector/RAG helpers – we don't need them here
_INTERNAL_KEYS_TO_SKIP = {
"excluded_embed_metadata_keys",
"excluded_llm_metadata_keys",
"relationships",
"metadata_template",
"metadata_separator",
"text_template",
"class_name",
}

def documents_to_dataframe(documents):
"""Convert a list of LlamaIndex documents to a tidy pandas DataFrame,
omitting vector-store helper fields that aren't needed for retrieval.
"""
rows = []

for doc in documents:
d = doc.model_dump(exclude_none=True)

for k in _INTERNAL_KEYS_TO_SKIP:
d.pop(k, None)

# Pull out & flatten metadata
meta = d.pop("metadata", {})
d.update(meta)

# Extract raw text
t_res = d.pop("text_resource", None)
if t_res is not None:
d["text"] = t_res.get("text") if isinstance(t_res, dict) else getattr(t_res, "text", None)

rows.append(d)

return pd.DataFrame(rows)

from snowflake.snowpark import Session

connection_parameters = {
"account": os.getenv("SNOWFLAKE_ACCOUNT"),
"user": os.getenv("SNOWFLAKE_USER"),
"password": os.getenv("SNOWFLAKE_PASSWORD"),
"role": os.getenv("SNOWFLAKE_ROLE"),
"warehouse": os.getenv("SNOWFLAKE_WAREHOUSE"),
"database": os.getenv("SNOWFLAKE_DATABASE"),
"schema": os.getenv("SNOWFLAKE_SCHEMA"),
}

session = Session.builder.configs(connection_parameters).create()
snowpark_df = session.create_dataframe(documents_df)
snowpark_df.write.mode("overwrite").save_as_table("snowflake_10k")

split_text_sql = """
CREATE OR REPLACE TABLE SNOWFLAKE_10K_MARKDOWN_CHUNKS AS
SELECT
ID,
"file_name" as FILE_NAME,
c.value::string as TEXT
FROM
SNOWFLAKE_10K,
LATERAL FLATTEN(input => SNOWFLAKE.CORTEX.SPLIT_TEXT_RECURSIVE_CHARACTER(
"text",
'markdown',
512,
128
)) c;
"""

session.sql(split_text_sql).collect()

create_search_service_sql = """
CREATE OR REPLACE CORTEX SEARCH SERVICE SNOWFLAKE_10K_SEARCH_SERVICE
ON TEXT
ATTRIBUTES ID, FILE_NAME
WAREHOUSE = S
TARGET_LAG = '1 hour'
AS (
SELECT
ID,
FILE_NAME,
TEXT
FROM SEC_10KS.PUBLIC.SNOWFLAKE_10K_MARKDOWN_CHUNKS
);
"""

session.sql(create_search_service_sql).collect()

Now that the Cortex Search Service is created, we can create a python class to retrieve relevant chunks from the service.

from snowflake.core import Root
from typing import List
from snowflake.snowpark.session import Session

class CortexSearchRetriever:
def __init__(self, snowpark_session: Session, limit_to_retrieve: int = 4):
self._snowpark_session = snowpark_session
self._limit_to_retrieve = limit_to_retrieve

def retrieve(self, query: str) -> List[str]:
root = Root(self._snowpark_session)
search_service = (
root.databases["SEC_10KS"]
.schemas["PUBLIC"]
.cortex_search_services["SNOWFLAKE_10K_SEARCH_SERVICE"]
)
resp = search_service.search(
query=query,
columns=["text"],
limit=self._limit_to_retrieve
)
return [curr["text"] for curr in resp.results] if resp.results else []

retriever = CortexSearchRetriever(snowpark_session=session, limit_to_retrieve=5)
retrieved_context = retriever.retrieve("What was the total revenue (in billions) for Snowflake in FY 2024? How much of that was product revenue?")
retrieved_context

from snowflake.cortex import complete

class RAG:
def __init__(self, session):
self.session = session
self.retriever = CortexSearchRetriever(snowpark_session=self.session, limit_to_retrieve=10)

def retrieve_context(self, query: str) -> list:
return self.retriever.retrieve(query)

def generate_completion(self, query: str, context_str: list) -> str:
prompt = f"""
You are an expert assistant extracting information from context provided.\n
Answer the question concisely, yet completely. Only use the information provided.\n
Context: {context_str}\n
Question:\n{query}\nAnswer:\n"""
response = complete("claude-4-sonnet", prompt, session=self.session)
return response

def query(self, query: str) -> str:
context_str = self.retrieve_context(query)
return self.generate_completion(query, context_str)

rag = RAG(session)
response = rag.query("What was the total revenue (in billions) for Snowflake in FY 2024? How much of that was product revenue?")
print(response)

Get Started

Updated 2026-04-29

This content is provided as is, and is not maintained on an ongoing basis. It may be out of date with current Snowflake instances