外观
LlamaIndex 集成
LlamaIndex 专注于数据索引和检索,可以将梓享搜索作为自定义工具集成到 Agent 中。
场景 1:企业知识库增强
构建既能查询内部文档,又能获取外部最新信息的智能系统:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.core.tools import QueryEngineTool, FunctionTool, ToolMetadata
from llama_index.agent.openai import OpenAIAgent
from llama_index.llms.openai import OpenAI
import requests
# 1. 加载并索引内部文档
documents = SimpleDirectoryReader("./company_docs").load_data()
internal_index = VectorStoreIndex.from_documents(documents)
internal_query_engine = internal_index.as_query_engine(similarity_top_k=3)
# 创建内部知识库工具
internal_tool = QueryEngineTool(
query_engine=internal_query_engine,
metadata=ToolMetadata(
name="internal_knowledge",
description="查询公司内部文档、规章制度、产品手册、技术文档等固定知识"
)
)
# 2. 创建互联网搜索工具
def search_internet(query: str) -> str:
"""搜索互联网最新信息"""
url = "https://search.aiserver.cloud/v1/api"
headers = {
"Authorization": "Bearer YOUR-API-KEY",
"Content-Type": "application/json"
}
data = {"engine": "china", "query": query, "max_results": 5}
response = requests.post(url, headers=headers, json=data)
result = response.json()
if result.get("code") == 0:
results = result["data"]["results"]
return "\n\n".join([
f"标题: {item['title']}\n内容: {item['content']}\n来源: {item['url']}"
for item in results
])
return "搜索失败"
external_tool = FunctionTool.from_defaults(
fn=search_internet,
name="internet_search",
description="搜索互联网最新信息:行业动态、技术趋势、竞品分析、市场资讯"
)
# 3. 创建混合检索 Agent
llm = OpenAI(model="gpt-4")
agent = OpenAIAgent.from_tools(
tools=[internal_tool, external_tool],
llm=llm,
system_prompt="""你是一个企业智能助手。
当用户询问:
- 公司内部信息(制度、流程、产品)→ 使用 internal_knowledge
- 行业最新动态、技术趋势 → 使用 internet_search
- 需要对比内外部信息时 → 同时使用两个工具
回答时请注明信息来源。""",
verbose=True
)
# 使用示例
print(agent.chat("我们公司的技术栈是什么?")) # 查内部文档
print(agent.chat("最近AI行业有什么新技术?")) # 搜互联网
print(agent.chat("对比我们的技术栈和行业主流方案")) # 混合检索场景 2:智能研报生成系统
结合历史数据分析和最新资讯,生成行业研究报告:
from llama_index.core import VectorStoreIndex, Document
from llama_index.core.tools import FunctionTool
from llama_index.agent.openai import OpenAIAgent
# 1. 历史研报数据索引
historical_reports = SimpleDirectoryReader("./reports").load_data()
report_index = VectorStoreIndex.from_documents(historical_reports)
historical_tool = QueryEngineTool(
query_engine=report_index.as_query_engine(),
metadata=ToolMetadata(
name="historical_reports",
description="查询历史行业研报、数据分析、趋势报告"
)
)
# 2. 实时资讯搜索
def search_latest_news(query: str) -> str:
"""搜索最新行业资讯"""
url = "https://search.aiserver.cloud/v1/api"
headers = {"Authorization": "Bearer YOUR-API-KEY", "Content-Type": "application/json"}
data = {"engine": "china", "query": f"{query} 最新资讯", "max_results": 10}
response = requests.post(url, headers=headers, json=data)
result = response.json()
if result.get("code") == 0:
return "\n\n".join([
f"【{item['title']}】\n{item['content']}\n来源: {item['url']}"
for item in result["data"]["results"]
])
return "未找到相关资讯"
news_tool = FunctionTool.from_defaults(
fn=search_latest_news,
name="latest_news",
description="获取最新行业新闻、公司动态、政策发布、市场变化"
)
# 3. 创建研报生成 Agent
analyst_agent = OpenAIAgent.from_tools(
tools=[historical_tool, news_tool],
llm=OpenAI(model="gpt-4"),
system_prompt="""你是一个行业分析师。
生成研报时:
1. 使用 historical_reports 了解历史趋势和数据
2. 使用 latest_news 获取最新动态和变化
3. 结合两者生成全面的分析报告
报告应包含:历史背景、当前状况、最新变化、未来趋势。""",
verbose=True
)
# 生成研报
response = analyst_agent.chat("请生成一份关于AI大模型行业的分析报告")
print(response)快速开始
安装依赖
pip install llama-index llama-index-llms-openai requests基础示例
from llama_index.core.tools import FunctionTool
from llama_index.agent.openai import OpenAIAgent
from llama_index.llms.openai import OpenAI
import requests
def search_internet(query: str) -> str:
"""搜索互联网获取实时信息"""
url = "https://search.aiserver.cloud/v1/api"
headers = {
"Authorization": "Bearer YOUR-API-KEY",
"Content-Type": "application/json"
}
data = {
"engine": "china",
"query": query,
"max_results": 5
}
response = requests.post(url, headers=headers, json=data)
result = response.json()
if result.get("code") == 0:
results = result["data"]["results"]
return "\n\n".join([
f"{item['title']}\n{item['content']}\n来源: {item['url']}"
for item in results
])
return "搜索失败"
# 创建工具
search_tool = FunctionTool.from_defaults(
fn=search_internet,
name="search",
description="搜索互联网获取最新信息"
)
# 创建 Agent
llm = OpenAI(model="gpt-4")
agent = OpenAIAgent.from_tools(
tools=[search_tool],
llm=llm,
verbose=True
)
# 使用
response = agent.chat("帮我查一下2024年AI行业最新动态")
print(response)高级配置
支持双引擎
from llama_index.core.tools import FunctionTool
from typing import Literal
def search_with_engine(
query: str,
engine: Literal["china", "global"] = "china"
) -> str:
"""
搜索互联网信息
Args:
query: 搜索关键词
engine: 搜索引擎,china=中文搜索,global=全球搜索
"""
url = "https://search.aiserver.cloud/v1/api"
headers = {
"Authorization": "Bearer YOUR-API-KEY",
"Content-Type": "application/json"
}
data = {
"engine": engine,
"query": query,
"max_results": 5
}
try:
response = requests.post(url, headers=headers, json=data, timeout=10)
result = response.json()
if result.get("code") == 0:
results = result["data"]["results"]
formatted = []
for i, item in enumerate(results, 1):
formatted.append(
f"[{i}] {item['title']}\n"
f"摘要: {item['content']}\n"
f"链接: {item['url']}"
)
return "\n\n".join(formatted)
else:
return f"搜索失败: {result.get('msg')}"
except Exception as e:
return f"搜索错误: {str(e)}"
# 创建工具
search_tool = FunctionTool.from_defaults(
fn=search_with_engine,
name="zixiang_search",
description="搜索互联网获取实时信息。支持中文搜索(engine=china)和全球搜索(engine=global)"
)与 RAG 结合
混合检索
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.tools import QueryEngineTool, ToolMetadata
# 创建本地知识库检索工具
documents = SimpleDirectoryReader("./data").load_data()
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
local_search_tool = QueryEngineTool(
query_engine=query_engine,
metadata=ToolMetadata(
name="local_search",
description="搜索本地知识库中的信息"
)
)
# 创建互联网搜索工具
internet_search_tool = FunctionTool.from_defaults(
fn=search_internet,
name="internet_search",
description="搜索互联网最新信息"
)
# 创建 Agent,结合两种检索能力
agent = OpenAIAgent.from_tools(
tools=[local_search_tool, internet_search_tool],
llm=OpenAI(model="gpt-4"),
verbose=True
)
# Agent 会自动选择合适的工具
response = agent.chat("最新的AI政策是什么?")结构化输出
使用 Pydantic 模型
from pydantic import BaseModel, Field
from typing import List
class SearchResult(BaseModel):
"""搜索结果模型"""
title: str = Field(description="标题")
url: str = Field(description="链接")
content: str = Field(description="摘要")
class SearchResponse(BaseModel):
"""搜索响应模型"""
results: List[SearchResult] = Field(description="搜索结果列表")
engine: str = Field(description="使用的引擎")
def search_structured(query: str) -> SearchResponse:
"""返回结构化搜索结果"""
url = "https://search.aiserver.cloud/v1/api"
headers = {
"Authorization": "Bearer YOUR-API-KEY",
"Content-Type": "application/json"
}
data = {
"engine": "china",
"query": query,
"max_results": 5
}
response = requests.post(url, headers=headers, json=data)
result = response.json()
if result.get("code") == 0:
return SearchResponse(
results=[
SearchResult(**item)
for item in result["data"]["results"]
],
engine=result["engine"]
)
else:
return SearchResponse(results=[], engine="")
# 创建工具
structured_search_tool = FunctionTool.from_defaults(
fn=search_structured,
name="structured_search",
description="搜索互联网,返回结构化结果"
)异步调用
import asyncio
import aiohttp
from llama_index.core.tools import AsyncBaseTool
class ZixiangSearchTool(AsyncBaseTool):
"""异步搜索工具"""
async def acall(self, query: str, engine: str = "china") -> str:
"""异步调用搜索 API"""
url = "https://search.aiserver.cloud/v1/api"
headers = {
"Authorization": "Bearer YOUR-API-KEY",
"Content-Type": "application/json"
}
data = {
"engine": engine,
"query": query,
"max_results": 5
}
async with aiohttp.ClientSession() as session:
async with session.post(
url,
headers=headers,
json=data
) as response:
result = await response.json()
if result.get("code") == 0:
results = result["data"]["results"]
return "\n\n".join([
f"{item['title']}\n{item['content']}"
for item in results
])
return "搜索失败"
@property
def metadata(self):
return ToolMetadata(
name="async_search",
description="异步搜索互联网信息"
)
# 使用异步工具
async def main():
tool = ZixiangSearchTool()
agent = OpenAIAgent.from_tools(
tools=[tool],
llm=OpenAI(model="gpt-4")
)
response = await agent.achat("最新AI新闻")
print(response)
asyncio.run(main())相关资源
| 资源 | 链接 |
|---|---|
| LlamaIndex 官方文档 | https://docs.llamaindex.ai/ |
| API 完整文档 | Web Search API |
