在 documentdb 中获得不同的计数结果

Question

第一个文档数据库不提供对 count() 的支持，他们提供了一个存储过程来获取我在下面的存储过程中使用的计数

function usp_GetInfinityDataView(param) {
  var context = getContext();
  var response = context.getResponse();
  var collection = context.getCollection();
  var collectionLink = collection.getSelfLink();
  var Rount = 0;
  count("SELECT i.id FROM infinity i",null); 
  
  var query = {
    query: 'SELECT * FROM infinity i'
  };

  getNodes(param["ContinuationToken"],param["PageSize"]);

  function getNodes(continuationToken,intPageSize) {
    // Tune the pageSize to fit your dataset.
    var requestOptions = {
      continuation: continuationToken,
      pageSize: intPageSize
    };

    var accepted = collection.queryDocuments(collectionLink, query, requestOptions,
      function(err, documentsRead, responseOptions) {
        response.setBody({
              "ResponseContinuation": responseOptions.continuation,
              "Count": Rount,
              "ViewData": documentsRead
            });
      });
  }
  
  function count(filterQuery, continuationToken) {
    var collection = getContext().getCollection();
    var maxResult = 99999999999; // MAX number of docs to process in one batch, when reached, return to client/request continuation. 
                        // intentionally set low to demonstrate the concept. This can be much higher. Try experimenting.
                        // We've had it in to the high thousands before seeing the stored proceudre timing out.

    // The number of documents counted.
    var result = 0;

    tryQuery(continuationToken);

    // Helper method to check for max result and call query.
    function tryQuery(nextContinuationToken) {
        var responseOptions = { continuation: nextContinuationToken, pageSize : maxResult };

        // In case the server is running this script for long time/near timeout, it would return false,
        // in this case we set the response to current continuation token, 
        // and the client will run this script again starting from this continuation.
        // When the client calls this script 1st time, is passes empty continuation token.
        if (result >= maxResult || !query(responseOptions)) {
            setBody(nextContinuationToken);
        }
    }

    function query(responseOptions) {
        // For empty query string, use readDocuments rather than queryDocuments -- it's faster as doesn't need to process the query.
        return (filterQuery && filterQuery.length) ?
            collection.queryDocuments(collection.getSelfLink(), filterQuery, responseOptions, onReadDocuments) :
            collection.readDocuments(collection.getSelfLink(), responseOptions, onReadDocuments);
    }

    // This is callback is called from collection.queryDocuments/readDocuments.
    function onReadDocuments(err, docFeed, responseOptions) {
        if (err) {
            throw 'Error while reading document: ' + err;
        }

        // Increment the number of documents counted so far.
        result += docFeed.length;

        // If there is continuation, call query again with it, 
        // otherwise we are done, in which case set continuation to null.
        if (responseOptions.continuation) {
            tryQuery(responseOptions.continuation);
        } else {
            setBody(null);
        }
    }

    // Set response body: use an object the client is expecting (2 properties: result and continuationToken).
    function setBody(continuationToken) {
        Rount = result;
    }
}
}

问题是，每次我从 c# web api 调用这个过程时，它都会给我不同的结果，而不是实际的计数（比如我的结果集计数是 17491，但返回 17020 或 17202，有时返回总数）。当没有结果较少，即在另一个集合上。我尝试将两个过程分开，但仍然相同。

Answer 1

存储过程在超过一定限制（至少时间）时被强制终止。这就是您链接的原始示例在正文中返回继续标记的原因。您的上述版本已删除该内容，但您需要恢复它。这样，如果第一次调用存储过程返回的结果包含继续标记，您就知道要再次调用它。最终计数将是对存储过程的所有调用的总和。

您可能还想将 maxResult 降低到 1000 或 10,000 之类的值。这为您提供了更精细的粒度，根据我的经验，实际上比拥有大量数字更快地得到答案。

我不太喜欢示例计数存储过程的编写方式。 Here 是我的等价物。主要区别在于，它返回的对象的形状与其接受的形状相同，并且每次都来回传递重新启动已终止的存储过程所需的所有状态。我已经在here写过有关这种编写存储过程的方法。 documentdb-utils（这些链接所在的位置）包含 Azure 提供的 node.js SDK 的包装器，该包装器将自动继续调用提前终止的存储过程，直到它们实际完成。完全公开，我是 documentdb-utils 的作者。

Answer 2

您还可以查看我在 Omex Document DB Github 存储库中编写的 Count 存储过程： https://github.com/microsoft/Omex/blob/master/src/DocumentDb/Scripts/Count.js

在 documentdb 中获得不同的计数结果

问题描述投票：0回答：2

2个回答

最新问题

在 documentdb 中获得不同的计数结果

问题描述 投票：0回答：2

2个回答

最新问题

问题描述投票：0回答：2