How to Build a RAG Pipeline with LlamaParse and Snowflake Cortex
Overview
Quickly build a RAG pipeline to query unstructured documents like PDFs, slide decks, and manuals. Use cases include Q&A over contracts, financial reports, and compliance docs, internal copilots for onboarding and support, and retrieval over technical specs or architecture guides, all powered by Snowflake Cortex and LlamaParse.
This guide walks you through building an end-to-end RAG workflow using LlamaParse, a genAI-native parser from LlamaIndex, and Snowflake Cortex, which provides built-in tools for text splitting, hybrid search, and LLM-powered generation.
LlamaParse is designed for LLM workflows and offers:
- Accurate table extraction
- Natural language prompts for structured output
- JSON and image extraction modes
- Support for 10+ file types (PDF, PPTX, DOCX, HTML, XML, etc.)
- Multilingual support
It ensures clean, structured data, ready for downstream tasks like RAG, semantic search, and agents.
Together, LlamaParse and Cortex offer a seamless path from raw documents to intelligent, production-ready RAG workflows, all within your Snowflake environment.
This solution was created, tested, and verified by a member of the Snowflake Partner Network and meets compatibility requirements with Snowflake instances as of date of publication.
Code Example
from llama_cloud_services import LlamaParse parser = LlamaParse( num_workers=4, verbose=True, language="en", ) result = parser.parse("./snowflake_2025_10k.pdf") # Get markdown documents markdown_documents = result.get_markdown_documents(split_by_page=False) import pandas as pd # fields that matter only to vector/RAG helpers – we don't need them here _INTERNAL_KEYS_TO_SKIP = { "excluded_embed_metadata_keys", "excluded_llm_metadata_keys", "relationships", "metadata_template", "metadata_separator", "text_template", "class_name", } def documents_to_dataframe(documents): """Convert a list of LlamaIndex documents to a tidy pandas DataFrame, omitting vector-store helper fields that aren't needed for retrieval. """ rows = [] for doc in documents: d = doc.model_dump(exclude_none=True) for k in _INTERNAL_KEYS_TO_SKIP: d.pop(k, None) # Pull out & flatten metadata meta = d.pop("metadata", {}) d.update(meta) # Extract raw text t_res = d.pop("text_resource", None) if t_res is not None: d["text"] = t_res.get("text") if isinstance(t_res, dict) else getattr(t_res, "text", None) rows.append(d) return pd.DataFrame(rows) from snowflake.snowpark import Session connection_parameters = { "account": os.getenv("SNOWFLAKE_ACCOUNT"), "user": os.getenv("SNOWFLAKE_USER"), "password": os.getenv("SNOWFLAKE_PASSWORD"), "role": os.getenv("SNOWFLAKE_ROLE"), "warehouse": os.getenv("SNOWFLAKE_WAREHOUSE"), "database": os.getenv("SNOWFLAKE_DATABASE"), "schema": os.getenv("SNOWFLAKE_SCHEMA"), } session = Session.builder.configs(connection_parameters).create() snowpark_df = session.create_dataframe(documents_df) snowpark_df.write.mode("overwrite").save_as_table("snowflake_10k") split_text_sql = """ CREATE OR REPLACE TABLE SNOWFLAKE_10K_MARKDOWN_CHUNKS AS SELECT ID, "file_name" as FILE_NAME, c.value::string as TEXT FROM SNOWFLAKE_10K, LATERAL FLATTEN(input => SNOWFLAKE.CORTEX.SPLIT_TEXT_RECURSIVE_CHARACTER( "text", 'markdown', 512, 128 )) c; """ session.sql(split_text_sql).collect() create_search_service_sql = """ CREATE OR REPLACE CORTEX SEARCH SERVICE SNOWFLAKE_10K_SEARCH_SERVICE ON TEXT ATTRIBUTES ID, FILE_NAME WAREHOUSE = S TARGET_LAG = '1 hour' AS ( SELECT ID, FILE_NAME, TEXT FROM SEC_10KS.PUBLIC.SNOWFLAKE_10K_MARKDOWN_CHUNKS ); """ session.sql(create_search_service_sql).collect() Now that the Cortex Search Service is created, we can create a python class to retrieve relevant chunks from the service. from snowflake.core import Root from typing import List from snowflake.snowpark.session import Session class CortexSearchRetriever: def __init__(self, snowpark_session: Session, limit_to_retrieve: int = 4): self._snowpark_session = snowpark_session self._limit_to_retrieve = limit_to_retrieve def retrieve(self, query: str) -> List[str]: root = Root(self._snowpark_session) search_service = ( root.databases["SEC_10KS"] .schemas["PUBLIC"] .cortex_search_services["SNOWFLAKE_10K_SEARCH_SERVICE"] ) resp = search_service.search( query=query, columns=["text"], limit=self._limit_to_retrieve ) return [curr["text"] for curr in resp.results] if resp.results else [] retriever = CortexSearchRetriever(snowpark_session=session, limit_to_retrieve=5) retrieved_context = retriever.retrieve("What was the total revenue (in billions) for Snowflake in FY 2024? How much of that was product revenue?") retrieved_context from snowflake.cortex import complete class RAG: def __init__(self, session): self.session = session self.retriever = CortexSearchRetriever(snowpark_session=self.session, limit_to_retrieve=10) def retrieve_context(self, query: str) -> list: return self.retriever.retrieve(query) def generate_completion(self, query: str, context_str: list) -> str: prompt = f""" You are an expert assistant extracting information from context provided.\n Answer the question concisely, yet completely. Only use the information provided.\n Context: {context_str}\n Question:\n{query}\nAnswer:\n""" response = complete("claude-4-sonnet", prompt, session=self.session) return response def query(self, query: str) -> str: context_str = self.retrieve_context(query) return self.generate_completion(query, context_str) rag = RAG(session) response = rag.query("What was the total revenue (in billions) for Snowflake in FY 2024? How much of that was product revenue?") print(response)
Get Started
This content is provided as is, and is not maintained on an ongoing basis. It may be out of date with current Snowflake instances