按語義相似性搜索文件
具備批次處理能力的語義搜尋端點
Initiate a search operation with a query text of up to 400 words and receive the most semantically similar responses from the stored knowledge. For question-answering, convert your question into an ideal answer and submit it to receive similar real answers.
Up to 400 words sentence for which you wish to find semantically similar chunks of knowledge.
Number of semantically similar chunks of text to return. Use 'n=3' for up to 5, and 'n=10' for more information. If you do not receive enough information, consider trying again with a larger 'n' value.
Start of the time range for documents to be searched, in ISO 8601 format.
End of the time range for documents to be searched, in ISO 8601 format.
Successful retrieval of documents
Bad request
Unauthorized
Not found
Internal server error
GET /api/v1/documents/search/ HTTP/1.1
Host: api.rememberizer.ai
Accept: */*
{
"data_sources": [
{
"name": "text",
"documents": 1
}
],
"matched_chunks": [
{
"document": {
"id": 18,
"document_id": "text",
"name": "text",
"type": "text",
"path": "text",
"url": "text",
"size": 1,
"created_time": "2025-11-07T13:02:16.112Z",
"modified_time": "2025-11-07T13:02:16.112Z",
"indexed_on": "2025-11-07T13:02:16.112Z",
"integration": {
"id": 1,
"integration_type": "text"
}
},
"matched_content": "text",
"distance": 1
}
]
}範例請求
curl -X GET \
"https://api.rememberizer.ai/api/v1/documents/search/?q=如何%20將%20Rememberizer%20整合%20到%20自訂%20應用程式&n=5&from=2023-01-01T00:00:00Z&to=2023-12-31T23:59:59Z" \
-H "Authorization: Bearer YOUR_JWT_TOKEN"const searchDocuments = async (query, numResults = 5, from = null, to = null) => {
const url = new URL('https://api.rememberizer.ai/api/v1/documents/search/');
url.searchParams.append('q', query);
url.searchParams.append('n', numResults);
if (from) {
url.searchParams.append('from', from);
}
if (to) {
url.searchParams.append('to', to);
}
const response = await fetch(url.toString(), {
method: 'GET',
headers: {
'Authorization': 'Bearer YOUR_JWT_TOKEN'
}
});
const data = await response.json();
console.log(data);
};
searchDocuments('如何將 Rememberizer 整合到自訂應用程式', 5);import requests
def search_documents(query, num_results=5, from_date=None, to_date=None):
headers = {
"Authorization": "Bearer YOUR_JWT_TOKEN"
}
params = {
"q": query,
"n": num_results
}
if from_date:
params["from"] = from_date
if to_date:
params["to"] = to_date
response = requests.get(
"https://api.rememberizer.ai/api/v1/documents/search/",
headers=headers,
params=params
)
data = response.json()
print(data)
search_documents("如何將 Rememberizer 整合到自訂應用程式", 5)require 'net/http'
require 'uri'
require 'json'
def search_documents(query, num_results=5, from_date=nil, to_date=nil)
uri = URI('https://api.rememberizer.ai/api/v1/documents/search/')
params = {
q: query,
n: num_results
}
params[:from] = from_date if from_date
params[:to] = to_date if to_date
uri.query = URI.encode_www_form(params)
request = Net::HTTP::Get.new(uri)
request['Authorization'] = 'Bearer YOUR_JWT_TOKEN'
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
response = http.request(request)
data = JSON.parse(response.body)
puts data
end
search_documents("如何將 Rememberizer 整合到自訂應用程式", 5)查詢參數
q
字串
必填。 搜尋查詢文本(最多 400 字)。
n
整數
要返回的結果數量。預設:3。使用較高的值(例如,10)以獲得更全面的結果。
from
字串
要搜尋的文件的時間範圍開始,使用 ISO 8601 格式。
to
字串
要搜尋的文件的時間範圍結束,使用 ISO 8601 格式。
prev_chunks
整數
包含的前幾個片段數量以提供上下文。預設:2。
next_chunks
整數
包含的後幾個片段數量以提供上下文。預設:2。
回應格式
{
"data_sources": [
{
"name": "Google Drive",
"documents": 3
},
{
"name": "Slack",
"documents": 2
}
],
"matched_chunks": [
{
"document": {
"id": 12345,
"document_id": "1aBcD2efGhIjK3lMnOpQrStUvWxYz",
"name": "Rememberizer API 文件.pdf",
"type": "application/pdf",
"path": "/Documents/Rememberizer/API 文件.pdf",
"url": "https://drive.google.com/file/d/1aBcD2efGhIjK3lMnOpQrStUvWxYz/view",
"size": 250000,
"created_time": "2023-05-10T14:30:00Z",
"modified_time": "2023-06-15T09:45:00Z",
"indexed_on": "2023-06-15T10:30:00Z",
"integration": {
"id": 101,
"integration_type": "google_drive"
}
},
"matched_content": "要將 Rememberizer 與自定義應用程式整合,您可以使用 OAuth2 認證流程來授權您的應用程式訪問用戶的 Rememberizer 數據。一旦獲得授權,您的應用程式可以使用 Rememberizer API 來搜尋文件、檢索內容等。",
"distance": 0.123
},
// ... 更多匹配的片段
],
"message": "搜尋成功完成",
"code": "success"
}搜尋優化技巧
用於問題回答
在尋找問題的答案時,嘗試將查詢表述為理想答案。例如:
而不是: "什麼是向量嵌入?" 嘗試: "向量嵌入是一種將文本轉換為高維空間中的數值向量的技術。"
調整結果數量
從
n=3開始,以獲得快速且高相關性的結果增加到
n=10或更高,以獲取更全面的信息如果搜索返回的信息不足,請嘗試增加
n參數
基於時間的篩選
使用 from 和 to 參數來專注於特定時間範圍內的文件:
最近的文件:將
from設定為最近的日期歷史分析:指定特定的日期範圍
排除過時的信息:設置適當的
to日期
批次操作
為了有效處理大量的搜尋查詢,Rememberizer 支援批次操作以優化性能並減少 API 呼叫的開銷。
批次搜尋
import requests
import time
import json
from concurrent.futures import ThreadPoolExecutor
def batch_search_documents(queries, num_results=5, batch_size=10):
"""
執行多個查詢的批次搜尋
參數:
queries: 搜尋查詢字串的列表
num_results: 每個查詢返回的結果數量
batch_size: 同時處理的查詢數量
返回:
每個查詢的搜尋結果列表
"""
headers = {
"Authorization": "Bearer YOUR_JWT_TOKEN",
"Content-Type": "application/json"
}
results = []
# 批次處理查詢
for i in range(0, len(queries), batch_size):
batch = queries[i:i+batch_size]
# 創建一個線程池以並行發送請求
with ThreadPoolExecutor(max_workers=batch_size) as executor:
futures = []
for query in batch:
params = {
"q": query,
"n": num_results
}
future = executor.submit(
requests.get,
"https://api.rememberizer.ai/api/v1/documents/search/",
headers=headers,
params=params
)
futures.append(future)
# 收集完成的結果
for future in futures:
response = future.result()
results.append(response.json())
# 速率限制 - 批次之間暫停以避免 API 限制
if i + batch_size < len(queries):
time.sleep(1)
return results
# 示例用法
queries = [
"如何使用 OAuth 與 Rememberizer",
"向量數據庫配置選項",
"語義搜索的最佳實踐",
# 根據需要添加更多查詢
]
results = batch_search_documents(queries, num_results=3, batch_size=5)/**
* 使用多個查詢執行批量搜索
*
* @param {string[]} queries - 搜索查詢字符串列表
* @param {number} numResults - 每個查詢返回的結果數量
* @param {number} batchSize - 同時處理的查詢數量
* @param {number} delayBetweenBatches - 批次之間等待的毫秒數
* @returns {Promise<Array>} - 每個查詢的搜索結果列表
*/
async function batchSearchDocuments(queries, numResults = 5, batchSize = 10, delayBetweenBatches = 1000) {
const results = [];
// 批量處理查詢
for (let i = 0; i < queries.length; i += batchSize) {
const batch = queries.slice(i, i + batchSize);
// 創建一個承諾的數組以進行並發請求
const batchPromises = batch.map(query => {
const url = new URL('https://api.rememberizer.ai/api/v1/documents/search/');
url.searchParams.append('q', query);
url.searchParams.append('n', numResults);
return fetch(url.toString(), {
method: 'GET',
headers: {
'Authorization': 'Bearer YOUR_JWT_TOKEN'
}
}).then(response => response.json());
});
// 等待批次中的所有請求完成
const batchResults = await Promise.all(batchPromises);
results.push(...batchResults);
// 速率限制 - 在批次之間暫停以避免 API 限流
if (i + batchSize < queries.length) {
await new Promise(resolve => setTimeout(resolve, delayBetweenBatches));
}
}
return results;
}
// 示例用法
const queries = [
"如何使用 OAuth 與 Rememberizer",
"向量數據庫配置選項",
"語義搜索的最佳實踐",
// 根據需要添加更多查詢
];
batchSearchDocuments(queries, 3, 5)
.then(results => console.log(results))
.catch(error => console.error('批量搜索錯誤:', error));require 'net/http'
require 'uri'
require 'json'
require 'concurrent'
# 批次搜尋多個查詢
#
# @param queries [Array<String>] 搜尋查詢字串的列表
# @param num_results [Integer] 每個查詢返回的結果數量
# @param batch_size [Integer] 同時處理的查詢數量
# @param delay_between_batches [Float] 批次之間等待的秒數
# @return [Array] 每個查詢的搜索結果列表
def batch_search_documents(queries, num_results = 5, batch_size = 10, delay_between_batches = 1.0)
results = []
# 批量處理查詢
queries.each_slice(batch_size).with_index do |batch, batch_index|
# 創建一個線程池以進行並發請求
pool = Concurrent::FixedThreadPool.new(batch_size)
futures = []
batch.each do |query|
futures << Concurrent::Future.execute(executor: pool) do
uri = URI('https://api.rememberizer.ai/api/v1/documents/search/')
params = {
q: query,
n: num_results
}
uri.query = URI.encode_www_form(params)
request = Net::HTTP::Get.new(uri)
request['Authorization'] = 'Bearer YOUR_JWT_TOKEN'
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
response = http.request(request)
JSON.parse(response.body)
end
end
# 收集所有線程的結果
batch_results = futures.map(&:value)
results.concat(batch_results)
# 速率限制 - 在批次之間暫停以避免 API 限制
if batch_index < (queries.length / batch_size.to_f).ceil - 1
sleep(delay_between_batches)
end
end
pool.shutdown
results
end
# 示例用法
queries = [
"如何使用 OAuth 與 Rememberizer",
"向量資料庫配置選項",
"語義搜索的最佳實踐",
# 根據需要添加更多查詢
]
results = batch_search_documents(queries, 3, 5)
puts results性能考量
在實施批次操作時,考慮以下最佳實踐:
最佳批次大小:從 5-10 個查詢的批次大小開始,根據應用程式的性能特徵進行調整。
速率限制:在批次之間加入延遲,以防止 API 限流。一個好的起始點是在批次之間等待 1 秒。
錯誤處理:實施穩健的錯誤處理,以管理批次中的失敗請求。
資源管理:監控客戶端資源使用情況,特別是在大型批次大小的情況下,以防止過度的記憶體消耗。
回應處理:在可能的情況下,異步處理批次結果,以改善用戶體驗。
對於高流量應用程式,考慮實施佇列系統,以有效管理大量的搜尋請求。
此端點提供強大的語義搜尋功能,涵蓋整個知識庫。它使用向量嵌入根據意義而非精確關鍵字匹配來查找內容。
Last updated