[{"data":1,"prerenderedAt":1076},["ShallowReactive",2],{"blog-rag-full-stack-guide":3},{"id":4,"title":5,"body":6,"category":1062,"date":1063,"description":16,"extension":1064,"meta":1065,"navigation":1066,"path":1067,"seo":1068,"stem":1069,"tags":1070,"__hash__":1075},"blog\u002Fblog\u002Frag-full-stack-guide.md","RAG 全链路深度解析：从 Chunking 到生产落地的工程实践",{"type":7,"value":8,"toc":1030},"minimark",[9,13,17,20,25,28,91,94,100,104,107,179,182,187,191,194,199,210,215,221,224,229,233,236,240,336,343,347,363,370,375,379,382,385,453,456,477,480,485,489,492,495,566,580,583,676,679,684,688,692,695,699,702,706,709,712,739,742,747,751,754,760,763,783,786,789,794,798,801,804,810,816,822,828,833,837,840,843,912,915,968,983,986,991,994,997,1016,1023],[10,11,5],"h1",{"id":12},"rag-全链路深度解析从-chunking-到生产落地的工程实践",[14,15,16],"p",{},"大模型很强，但这个\"强\"是有边界的。你的知识截止在训练的那一天，你无法访问公司的内部文档，还时不时编造一些听起来很合理但完全是虚构的东西。这就是为什么我们需要 RAG（Retrieval-Augmented Generation）——给大模型配一个外挂知识库，让它每次回答前先去查资料。",[14,18,19],{},"RAG 的技术栈并不复杂，但要把每个环节都做到工程可用，里面隐藏着大量的权衡与取舍。本文从原理到工程，系统梳理 RAG 全链路的技术细节。",[21,22,24],"h2",{"id":23},"_1-为什么需要-rag","1. 为什么需要 RAG",[14,26,27],{},"LLM 天生有三个硬伤，而 RAG 恰好是它们的解药。",[29,30,31,47],"table",{},[32,33,34],"thead",{},[35,36,37,41,44],"tr",{},[38,39,40],"th",{},"问题",[38,42,43],{},"说明",[38,45,46],{},"RAG 怎么解决",[48,49,50,65,78],"tbody",{},[35,51,52,59,62],{},[53,54,55],"td",{},[56,57,58],"strong",{},"知识截止",[53,60,61],{},"模型训练数据有时间限制",[53,63,64],{},"检索实时\u002F最新数据",[35,66,67,72,75],{},[53,68,69],{},[56,70,71],{},"幻觉",[53,73,74],{},"编造不存在的事实",[53,76,77],{},"基于真实检索结果回答",[35,79,80,85,88],{},[53,81,82],{},[56,83,84],{},"领域知识不足",[53,86,87],{},"对内部文档、专业知识了解有限",[53,89,90],{},"接入私有知识库",[14,92,93],{},"但这三个问题并不是在所有场景下都需要解决。如果你想用 LLM 做数学推理、代码生成或创意写作，RAG 的意义不大——这些任务不需要外部知识，反而需要模型自身的推理能力。同样，如果是风格统一、术语固定的场景（比如客服话术模板），用 Fine-tuning 固化比每次检索更高效。",[14,95,96,99],{},[56,97,98],{},"小结："," RAG 解决的是\"事实性知识\"的问题，不是所有问题。选不选 RAG，取决于你的场景是否需要、以及能否获取到可信的外部信息。",[21,101,103],{"id":102},"_2-rag-还是-fine-tuning一个架构选择题","2. RAG 还是 Fine-tuning：一个架构选择题",[14,105,106],{},"这是面试里最高频的问题之一，但在工程实践中也是一个真实存在的选型困境。",[29,108,109,122],{},[32,110,111],{},[35,112,113,116,119],{},[38,114,115],{},"维度",[38,117,118],{},"RAG",[38,120,121],{},"Fine-tuning",[48,123,124,135,146,157,168],{},[35,125,126,129,132],{},[53,127,128],{},"更新成本",[53,130,131],{},"低，改数据库即可",[53,133,134],{},"高，需重新训练",[35,136,137,140,143],{},[53,138,139],{},"知识范围",[53,141,142],{},"大，可到 TB 级",[53,144,145],{},"受模型容量限制",[35,147,148,151,154],{},[53,149,150],{},"适合什么",[53,152,153],{},"事实、文档、可枚举知识",[53,155,156],{},"风格、任务范式、领域语言",[35,158,159,162,165],{},[53,160,161],{},"可解释性",[53,163,164],{},"高，有引用可追溯",[53,166,167],{},"低，黑盒",[35,169,170,173,176],{},[53,171,172],{},"延迟",[53,174,175],{},"多一步检索，延迟略高",[53,177,178],{},"纯生成，延迟更低",[14,180,181],{},"关键判断标准：如果知识需要频繁更新、或者知识量很大，RAG 是更好的选择。如果是想教会模型某种写作风格或推理模式，Fine-tuning 更合适。当然，两者可以结合——先用 RAG 召回事实，再用 Fine-tuned 模型以特定风格组织回答。",[14,183,184,186],{},[56,185,98],{}," RAG 管事实，Fine-tuning 管风格。这不是二选一，而是可以根据场景组合使用的两种工具。",[21,188,190],{"id":189},"_3-rag-全链路概览","3. RAG 全链路概览",[14,192,193],{},"RAG 系统分为离线（数据准备）和在线（检索生成）两个阶段。",[14,195,196],{},[56,197,198],{},"离线阶段：",[200,201,206],"pre",{"className":202,"code":204,"language":205},[203],"language-text","原始文档 → 清洗 → 切分（chunking）→ 向量化（embedding）→ 存入向量库\n","text",[207,208,204],"code",{"__ignoreMap":209},"",[14,211,212],{},[56,213,214],{},"在线阶段：",[200,216,219],{"className":217,"code":218,"language":205},[203],"用户问题 → 查询改写 → 向量化 → 向量检索 + 关键词检索\n                                    ↓\n                                 重排（rerank）\n                                    ↓\n                           Top-K 文档 → 拼接 prompt → LLM 生成\n",[207,220,218],{"__ignoreMap":209},[14,222,223],{},"离线阶段做一次、在线阶段每次请求都做。离线决定知识的上限，在线决定响应的质量。",[14,225,226,228],{},[56,227,98],{}," 理解清楚这两个阶段的职责划分，是搭建 RAG 系统的第一步。离线做得好，在线阶段才能有好的素材可用。",[21,230,232],{"id":231},"_4-数据基石chunking-切分策略","4. 数据基石：Chunking 切分策略",[14,234,235],{},"文档切分是整个 RAG 系统里\"看似简单但影响深远\"的一步。切得太粗，相关信息在长文本里被稀释，检索不准；切得太细，单块信息太少，LLM 拿不到完整上下文。",[237,238,239],"h3",{"id":239},"主流切分策略",[29,241,242,254],{},[32,243,244],{},[35,245,246,249,251],{},[38,247,248],{},"策略",[38,250,43],{},[38,252,253],{},"适用场景",[48,255,256,269,282,295,310,323],{},[35,257,258,263,266],{},[53,259,260],{},[56,261,262],{},"固定字符切分",[53,264,265],{},"每 N 个字符一刀切",[53,267,268],{},"粗糙原型，不推荐生产",[35,270,271,276,279],{},[53,272,273],{},[56,274,275],{},"按段落",[53,277,278],{},"按空行切",[53,280,281],{},"结构化文档",[35,283,284,289,292],{},[53,285,286],{},[56,287,288],{},"按句子",[53,290,291],{},"NLP 工具切句",[53,293,294],{},"问答型文档",[35,296,297,302,305],{},[53,298,299],{},[56,300,301],{},"递归切分（Recursive）",[53,303,304],{},"先按大边界切，超长再按小边界",[53,306,307],{},[56,308,309],{},"默认推荐",[35,311,312,317,320],{},[53,313,314],{},[56,315,316],{},"语义切分（Semantic）",[53,318,319],{},"用 Embedding 相似度找断点",[53,321,322],{},"高质量要求场景",[35,324,325,330,333],{},[53,326,327],{},[56,328,329],{},"按 Markdown 结构",[53,331,332],{},"按标题层级切",[53,334,335],{},"技术文档、Wiki",[14,337,338,339,342],{},"生产环境中最常用的是",[56,340,341],{},"递归切分","：先尝试按段落切，如果段落太长再按句子切，句子还长就按固定长度截断。这样能在语义完整性和长度控制之间取得良好的平衡。",[237,344,346],{"id":345},"关键参数chunk_size-与-chunk_overlap","关键参数：chunk_size 与 chunk_overlap",[348,349,350,357],"ul",{},[351,352,353,356],"li",{},[56,354,355],{},"chunk_size","：200-1500 token。短查询、精准问答用 300-500；需要长篇上下文理解的用 1000+。",[351,358,359,362],{},[56,360,361],{},"chunk_overlap","：10-20% of chunk_size。避免相关信息刚好被切到边界上，代价是存储和检索成本略有增加。",[14,364,365,366,369],{},"有一个小技巧值得单独说：",[56,367,368],{},"Parent-Child Chunking","。把文档切成两级，小 chunk 用于检索（更精准），命中后返回对应的大 chunk 喂给 LLM（信息更完整）。这样既保证了召回精度，又保证了生成质量。",[14,371,372,374],{},[56,373,98],{}," 没有\"最好\"的 chunk 大小，只有\"最适合当前文档和查询\"的大小。parent-child 是兼顾检索精度和上下文完整性的实用方案。",[21,376,378],{"id":377},"_5-语义编码embedding-模型选型","5. 语义编码：Embedding 模型选型",[14,380,381],{},"Chunk 准备好了，下一步是把文字转换成向量。Embedding 模型的选择直接影响检索质量。",[237,383,384],{"id":384},"常用模型对比",[29,386,387,399],{},[32,388,389],{},[35,390,391,394,396],{},[38,392,393],{},"模型",[38,395,115],{},[38,397,398],{},"特点",[48,400,401,414,427,440],{},[35,402,403,408,411],{},[53,404,405],{},[56,406,407],{},"text-embedding-3-small",[53,409,410],{},"1536",[53,412,413],{},"效果均衡，成本低",[35,415,416,421,424],{},[53,417,418],{},[56,419,420],{},"text-embedding-3-large",[53,422,423],{},"3072",[53,425,426],{},"效果强，成本适中",[35,428,429,434,437],{},[53,430,431],{},[56,432,433],{},"BGE (BAAI\u002Fbge-*)",[53,435,436],{},"768\u002F1024",[53,438,439],{},"开源，中文支持好",[35,441,442,447,450],{},[53,443,444],{},[56,445,446],{},"M3E",[53,448,449],{},"768",[53,451,452],{},"中文领先的开源模型",[14,454,455],{},"选型时关注三个点：",[457,458,459,465,471],"ol",{},[351,460,461,464],{},[56,462,463],{},"语言覆盖","：中文场景 BGE \u002F M3E 往往优于 OpenAI 的模型",[351,466,467,470],{},[56,468,469],{},"领域适配","：通用模型在医疗、法律、代码等专业领域可能不够，可以针对领域做 fine-tune",[351,472,473,476],{},[56,474,475],{},"维度与成本","：高维度效果更好但存储和计算成本更高。Matryoshka 式嵌入（可降维）是兼顾两者的新趋势",[14,478,479],{},"工程上还需要注意：批量处理能降低 5-10 倍 API 成本；相同文本重复嵌入浪费钱，加一层哈希缓存；换 Embedding 模型需要重建索引，保留旧索引直到新索引验证完毕。",[14,481,482,484],{},[56,483,98],{}," Embedding 模型是 RAG 的\"翻译官\"，把人类语言翻译成计算机能检索的向量。选一个好翻译官，比后面花太多功夫调优检索策略更重要。",[21,486,488],{"id":487},"_6-向量存储索引算法与数据库选型","6. 向量存储：索引算法与数据库选型",[14,490,491],{},"数据量大了以后，暴力搜索太慢——遍历 1000 万条向量显然不现实。这时就需要 ANN（Approximate Nearest Neighbor）近似最近邻算法。",[237,493,494],{"id":494},"索引算法对比",[29,496,497,509],{},[32,498,499],{},[35,500,501,504,507],{},[38,502,503],{},"算法",[38,505,506],{},"原理",[38,508,398],{},[48,510,511,524,537,553],{},[35,512,513,518,521],{},[53,514,515],{},[56,516,517],{},"Flat（暴力）",[53,519,520],{},"遍历所有向量",[53,522,523],{},"100% 精度，慢",[35,525,526,531,534],{},[53,527,528],{},[56,529,530],{},"IVF",[53,532,533],{},"聚类 + 桶内搜索",[53,535,536],{},"快，精度中等",[35,538,539,544,547],{},[53,540,541],{},[56,542,543],{},"HNSW",[53,545,546],{},"分层图结构",[53,548,549,552],{},[56,550,551],{},"最常用","，快且精度高",[35,554,555,560,563],{},[53,556,557],{},[56,558,559],{},"PQ",[53,561,562],{},"向量压缩",[53,564,565],{},"省存储，精度略降",[14,567,568,571,572,575,576,579],{},[56,569,570],{},"HNSW 是工业界的首选。"," 它的核心思想类似跳表（skip list）：从稀疏的高层图快速定位到目标区域，再到密集的低层精细搜索。关键参数是 ",[207,573,574],{},"M","（每个节点的连接数）和 ",[207,577,578],{},"efSearch","（检索时搜索宽度）。",[237,581,582],{"id":582},"向量数据库选型",[29,584,585,597],{},[32,586,587],{},[35,588,589,592,594],{},[38,590,591],{},"数据库",[38,593,398],{},[38,595,596],{},"适用规模",[48,598,599,612,625,637,650,663],{},[35,600,601,606,609],{},[53,602,603],{},[56,604,605],{},"Pinecone",[53,607,608],{},"托管 SaaS，免运维",[53,610,611],{},"中小到大型",[35,613,614,619,622],{},[53,615,616],{},[56,617,618],{},"Weaviate",[53,620,621],{},"功能全，支持混合检索",[53,623,624],{},"中型",[35,626,627,632,635],{},[53,628,629],{},[56,630,631],{},"Qdrant",[53,633,634],{},"性能好，Rust 实现",[53,636,611],{},[35,638,639,644,647],{},[53,640,641],{},[56,642,643],{},"Milvus",[53,645,646],{},"大规模、高性能",[53,648,649],{},"亿级+",[35,651,652,657,660],{},[53,653,654],{},[56,655,656],{},"Chroma",[53,658,659],{},"轻量，嵌入式",[53,661,662],{},"原型\u002F小型",[35,664,665,670,673],{},[53,666,667],{},[56,668,669],{},"pgvector",[53,671,672],{},"PostgreSQL 扩展",[53,674,675],{},"中小型，统一管理",[14,677,678],{},"规模估算：1000 万条 chunk、1536 维 float32 需要约 60GB 存储，用 PQ 压缩可以降到约 6GB，精度损失 5% 以内。",[14,680,681,683],{},[56,682,98],{}," HNSW + Qdrant 是目前中小规模场景的黄金组合。大规模场景可以考虑 Milvus 搭配 PQ 压缩。",[21,685,687],{"id":686},"_7-检索策略从单路到混合","7. 检索策略：从单路到混合",[237,689,691],{"id":690},"向量检索dense-retrieval","向量检索（Dense Retrieval）",[14,693,694],{},"用 Embedding 把 query 转成向量，在向量索引里找最近的 K 个。能捕捉语义——\"笔记本电脑\"和\"笔记本\"是相近的。但对关键词精确匹配不敏感，\"iPhone 15\"和\"iPhone 14\"在向量空间里可能很接近。",[237,696,698],{"id":697},"关键词检索sparse-retrieval","关键词检索（Sparse Retrieval）",[14,700,701],{},"BM25 是经典方案——按词频和逆文档频率打分。优点是对关键词、专有名词、数字、代码能做精准匹配。缺点是不理解同义词。",[237,703,705],{"id":704},"混合检索hybrid-retrieval","混合检索（Hybrid Retrieval）",[14,707,708],{},"生产级的 RAG 系统基本都用混合检索——两条路并行，结果合并。",[14,710,711],{},"合并方法：",[348,713,714,724,733],{},[351,715,716,719,720,723],{},[56,717,718],{},"RRF（Reciprocal Rank Fusion）","：",[207,721,722],{},"score = Σ 1\u002F(k + rank_i)","，最常用，不需要分数归一化",[351,725,726,719,729,732],{},[56,727,728],{},"加权分数",[207,730,731],{},"final = α · vec_score + (1-α) · bm25_score","，需要归一化",[351,734,735,738],{},[56,736,737],{},"Rerank","：两路 top-N 合并后统一重排",[14,740,741],{},"Top-K 的选择也有讲究。太小（K=3）召回率低，太大（K=20+）会稀释上下文且成本高。推荐策略是粗召回 K=20-50，经过 rerank 后取 top 5-10。",[14,743,744,746],{},[56,745,98],{}," 纯向量检索就像只靠感觉找东西，纯关键词检索就像只靠目录找东西。混合检索把两者结合起来，才是生产级的做法。",[21,748,750],{"id":749},"_8-rerank粗召回后的精过滤","8. Rerank：粗召回后的精过滤",[14,752,753],{},"向量检索用的双塔模型（query 和 doc 分别编码，算相似度），速度快但精度有限。Rerank 用交叉编码器——把 query 和 doc 拼在一起喂给模型，直接输出相关度分数，精度更高但慢得多。",[200,755,758],{"className":756,"code":757,"language":205},[203],"阶段 1：向量检索 → top-50（快但粗）\n阶段 2：Rerank → top-5（慢但准）\n",[207,759,757],{"__ignoreMap":209},[14,761,762],{},"常用 Reranker：",[348,764,765,771,777],{},[351,766,767,770],{},[56,768,769],{},"Cohere Rerank","：API，效果好，多语言",[351,772,773,776],{},[56,774,775],{},"BGE Reranker","：开源，中文强",[351,778,779,782],{},[56,780,781],{},"LLM as Reranker","：用 GPT-4 打分，效果最好但最贵",[14,784,785],{},"工业界的经验：加 Rerank 可以把精确率再提 10-30%，尤其在 top-3 指标上提升明显。代价是增加 100-500ms 的延迟。",[14,787,788],{},"当然，并不是所有场景都需要 Rerank。如果检索数据量小（几百条）、精度要求不高、或者有严格的延迟预算，可以跳过 Rerank。",[14,790,791,793],{},[56,792,98],{}," Rerank 是用时间来换精度的经典策略。精召回在前，粗筛选在后，两阶段配合才能兼顾速度和准确率。",[21,795,797],{"id":796},"_9-query-改写让问题更精准","9. Query 改写：让问题更精准",[14,799,800],{},"用户的问题往往不太讲究——简短、含代词、缺上下文。直接拿原问题去检索效果可能很差。Query 改写就是解决这个问题的。",[237,802,803],{"id":803},"几种常见方法",[14,805,806,809],{},[56,807,808],{},"Query Expansion \u002F Rewriting："," 让 LLM 把模糊的问题补全。例如\"它怎么用？\"改写为\"LangChain 的 RunnableSequence 怎么使用？\"",[14,811,812,815],{},[56,813,814],{},"HyDE（Hypothetical Document Embeddings）："," 一个很有意思的技巧——让 LLM 先假装回答这个问题，生成一段假想答案，然后用这段假想答案去检索。原理是：在向量空间里，\"答案与相关文档的距离\"通常比\"问题与相关文档的距离\"更近。",[14,817,818,821],{},[56,819,820],{},"Step-Back Prompting："," 问题太具体时，先抽象成更宏观的问题再检索。例如\"梅西 1987 年 6 月 24 日出生那天是星期几？\"先退回一步查\"梅西出生在哪一天\"，查到出生日再推算。",[14,823,824,827],{},[56,825,826],{},"问题分解（Decomposition）："," 复杂问题拆成子问题分别检索。例如\"比较 LangChain 和 LlamaIndex 在 RAG 上的优劣\"拆成三个子问题分别检索后再综合回答。",[14,829,830,832],{},[56,831,98],{}," Query 改写是最容易被忽视的优化点。用户提问随意，但检索需要精准。把随意变成精准，是 Rerank 之前最值得投入的优化之一。",[21,834,836],{"id":835},"_10-评估体系衡量-rag-好坏的标尺","10. 评估体系：衡量 RAG 好坏的标尺",[14,838,839],{},"没有评估就没有优化。RAG 的评估分两个层面：",[237,841,842],{"id":842},"检索层面",[29,844,845,858],{},[32,846,847],{},[35,848,849,852,855],{},[38,850,851],{},"指标",[38,853,854],{},"关心什么",[38,856,857],{},"一句话理解",[48,859,860,873,886,899],{},[35,861,862,867,870],{},[53,863,864],{},[56,865,866],{},"Recall@K",[53,868,869],{},"有没有漏",[53,871,872],{},"相关文档召回到多少",[35,874,875,880,883],{},[53,876,877],{},[56,878,879],{},"Precision@K",[53,881,882],{},"有没有错",[53,884,885],{},"召回的结果里多少是相关的",[35,887,888,893,896],{},[53,889,890],{},[56,891,892],{},"MRR",[53,894,895],{},"第一次对有多快",[53,897,898],{},"第一个正确答案排第几",[35,900,901,906,909],{},[53,902,903],{},[56,904,905],{},"NDCG@K",[53,907,908],{},"排序质量",[53,910,911],{},"相关的是否排在前面",[237,913,914],{"id":914},"生成层面",[29,916,917,926],{},[32,918,919],{},[35,920,921,923],{},[38,922,851],{},[38,924,925],{},"评估什么",[48,927,928,938,948,958],{},[35,929,930,935],{},[53,931,932],{},[56,933,934],{},"Faithfulness",[53,936,937],{},"答案是否基于检索内容，没有幻觉",[35,939,940,945],{},[53,941,942],{},[56,943,944],{},"Answer Relevance",[53,946,947],{},"答案是否回答用户的问题",[35,949,950,955],{},[53,951,952],{},[56,953,954],{},"Context Precision",[53,956,957],{},"检索回来的内容里有多少真正用上了",[35,959,960,965],{},[53,961,962],{},[56,963,964],{},"Context Recall",[53,966,967],{},"生成好答案所需的信息是否都检索到了",[14,969,970,971,974,975,978,979,982],{},"常用评估框架：",[56,972,973],{},"RAGAS"," 专为 RAG 设计，开箱即用，覆盖上面四个生成层面指标。",[56,976,977],{},"LangSmith"," 和 ",[56,980,981],{},"Langfuse"," 则提供了更完整的追踪和评估平台。",[14,984,985],{},"评估时一个比较实际的做法是：构建一套标准问答对并标注好相关文档，然后用 LLM-as-judge 打分，加人工抽检。这样可以定期跑指标，跟踪系统的退化或改进。",[14,987,988,990],{},[56,989,98],{}," 没有评估的 RAG 系统就像没有仪表盘的汽车——你在开车，但不知道速度、油量和方向。分层评估、定期运行、追踪趋势，是持续优化 RAG 系统的基础设施。",[21,992,993],{"id":993},"总结与思考",[14,995,996],{},"RAG 本质上是给大模型配了一个\"外挂大脑\"。但把这个外挂做好，需要理解从数据准备到检索生成的每一个环节：",[348,998,999,1005,1010],{},[351,1000,1001,1004],{},[56,1002,1003],{},"数据层面","：Chunk 怎么切、Embedding 用什么模型、向量索引怎么建",[351,1006,1007,1009],{},[56,1008,842],{},"：混合检索召回、Rerank 精筛、Query 改写补全",[351,1011,1012,1015],{},[56,1013,1014],{},"评估层面","：分层指标追踪、持续迭代优化",[14,1017,1018,1019,1022],{},"最容易被忽略的一点是：",[56,1020,1021],{},"RAG 是个系统工程，不是加一个向量数据库就完事了。"," 每个环节的调优都会影响最终效果，而真正生产可用的 RAG 系统需要把这些环节串起来，配合可观测性、A\u002FB 测试和持续评估，才能稳定迭代。",[14,1024,1025],{},[1026,1027,1029],"a",{"href":1028},"\u002Fblog\u002F","返回博客列表",{"title":209,"searchDepth":1031,"depth":1031,"links":1032},2,[1033,1034,1035,1036,1041,1044,1048,1053,1054,1057,1061],{"id":23,"depth":1031,"text":24},{"id":102,"depth":1031,"text":103},{"id":189,"depth":1031,"text":190},{"id":231,"depth":1031,"text":232,"children":1037},[1038,1040],{"id":239,"depth":1039,"text":239},3,{"id":345,"depth":1039,"text":346},{"id":377,"depth":1031,"text":378,"children":1042},[1043],{"id":384,"depth":1039,"text":384},{"id":487,"depth":1031,"text":488,"children":1045},[1046,1047],{"id":494,"depth":1039,"text":494},{"id":582,"depth":1039,"text":582},{"id":686,"depth":1031,"text":687,"children":1049},[1050,1051,1052],{"id":690,"depth":1039,"text":691},{"id":697,"depth":1039,"text":698},{"id":704,"depth":1039,"text":705},{"id":749,"depth":1031,"text":750},{"id":796,"depth":1031,"text":797,"children":1055},[1056],{"id":803,"depth":1039,"text":803},{"id":835,"depth":1031,"text":836,"children":1058},[1059,1060],{"id":842,"depth":1039,"text":842},{"id":914,"depth":1039,"text":914},{"id":993,"depth":1031,"text":993},"AI\u002FLLM","2026-05-03","md",{},true,"\u002Fblog\u002Frag-full-stack-guide",{"title":5,"description":16},"blog\u002Frag-full-stack-guide",[118,1071,1072,1073,1074],"检索增强生成","Embedding","向量数据库","大模型","qacc_qLpSyg6vmA15Sh72au5sT1ibVVI7V9dqu0K2WM",1779959652908]