第一个文档数据库不提供对 count() 的支持,他们提供了一个存储过程来获取我在下面的存储过程中使用的计数
function usp_GetInfinityDataView(param) {
var context = getContext();
var response = context.getResponse();
var collection = context.getCollection();
var collectionLink = collection.getSelfLink();
var Rount = 0;
count("SELECT i.id FROM infinity i",null);
var query = {
query: 'SELECT * FROM infinity i'
};
getNodes(param["ContinuationToken"],param["PageSize"]);
function getNodes(continuationToken,intPageSize) {
// Tune the pageSize to fit your dataset.
var requestOptions = {
continuation: continuationToken,
pageSize: intPageSize
};
var accepted = collection.queryDocuments(collectionLink, query, requestOptions,
function(err, documentsRead, responseOptions) {
response.setBody({
"ResponseContinuation": responseOptions.continuation,
"Count": Rount,
"ViewData": documentsRead
});
});
}
function count(filterQuery, continuationToken) {
var collection = getContext().getCollection();
var maxResult = 99999999999; // MAX number of docs to process in one batch, when reached, return to client/request continuation.
// intentionally set low to demonstrate the concept. This can be much higher. Try experimenting.
// We've had it in to the high thousands before seeing the stored proceudre timing out.
// The number of documents counted.
var result = 0;
tryQuery(continuationToken);
// Helper method to check for max result and call query.
function tryQuery(nextContinuationToken) {
var responseOptions = { continuation: nextContinuationToken, pageSize : maxResult };
// In case the server is running this script for long time/near timeout, it would return false,
// in this case we set the response to current continuation token,
// and the client will run this script again starting from this continuation.
// When the client calls this script 1st time, is passes empty continuation token.
if (result >= maxResult || !query(responseOptions)) {
setBody(nextContinuationToken);
}
}
function query(responseOptions) {
// For empty query string, use readDocuments rather than queryDocuments -- it's faster as doesn't need to process the query.
return (filterQuery && filterQuery.length) ?
collection.queryDocuments(collection.getSelfLink(), filterQuery, responseOptions, onReadDocuments) :
collection.readDocuments(collection.getSelfLink(), responseOptions, onReadDocuments);
}
// This is callback is called from collection.queryDocuments/readDocuments.
function onReadDocuments(err, docFeed, responseOptions) {
if (err) {
throw 'Error while reading document: ' + err;
}
// Increment the number of documents counted so far.
result += docFeed.length;
// If there is continuation, call query again with it,
// otherwise we are done, in which case set continuation to null.
if (responseOptions.continuation) {
tryQuery(responseOptions.continuation);
} else {
setBody(null);
}
}
// Set response body: use an object the client is expecting (2 properties: result and continuationToken).
function setBody(continuationToken) {
Rount = result;
}
}
}
问题是,每次我从 c# web api 调用这个过程时,它都会给我不同的结果,而不是实际的计数(比如我的结果集计数是 17491,但返回 17020 或 17202,有时返回总数)。当没有结果较少,即在另一个集合上。我尝试将两个过程分开,但仍然相同。
存储过程在超过一定限制(至少时间)时被强制终止。这就是您链接的原始示例在正文中返回继续标记的原因。您的上述版本已删除该内容,但您需要恢复它。这样,如果第一次调用存储过程返回的结果包含继续标记,您就知道要再次调用它。最终计数将是对存储过程的所有调用的总和。
您可能还想将 maxResult 降低到 1000 或 10,000 之类的值。这为您提供了更精细的粒度,根据我的经验,实际上比拥有大量数字更快地得到答案。
我不太喜欢示例计数存储过程的编写方式。 Here 是我的等价物。主要区别在于,它返回的对象的形状与其接受的形状相同,并且每次都来回传递重新启动已终止的存储过程所需的所有状态。我已经在here写过有关这种编写存储过程的方法。 documentdb-utils(这些链接所在的位置)包含 Azure 提供的 node.js SDK 的包装器,该包装器将自动继续调用提前终止的存储过程,直到它们实际完成。完全公开,我是 documentdb-utils 的作者。
您还可以查看我在 Omex Document DB Github 存储库中编写的 Count 存储过程: https://github.com/microsoft/Omex/blob/master/src/DocumentDb/Scripts/Count.js