Query iterator per feed range
See original GitHub issueIs your feature request related to a problem? Please describe. With the v2 sdk we could pass the PartitionKeyRangeId in the FeedOptions when querying (CreateDocumentQuery). We used this feature to create a web api that would allow clients to read data (backed by a query) in parallel by first reading the shards (partition key ranges) and then executing an iterator per shard (partition key range). This doesn’t seem to be possible with the v3 sdk.
Describe the solution you’d like We would like the ability to specify a feed range when querying so that the iterator only returns results from the specified range. The internal property FeedRange on QueryRequestOptions might already cover this functionality in part.
Describe alternatives you’ve considered Parallel execution is supported by the sdk, but this doesn’t work for remote clients that use our web api without having direct access to Cosmos DB. The change feed pull model does support this with e.g. ChangeFeedStartFrom.Beginning(range), but we need a filtered data set, instead of all data.
Additional context I cooked up something to get us started. Hopefully it clarifies what we’re trying to do:
public static class ContainerExtensions
{
/// <summary>
/// This method creates a query for items under a container in an Azure Cosmos database using a SQL statement with parameterized values.
/// For more information on preparing SQL statements with parameterized values, please see <see cref="QueryDefinition"/>.
/// </summary>
/// <param name="container">The container to create the query for.</param>
/// <param name="queryText">The Cosmos SQL query text.</param>
/// <param name="continuationToken">(Optional) The continuation token in the Azure Cosmos DB service.</param>
/// <param name="requestOptions">(Optional) The options for the item query request.</param>
/// <param name="feedRange">(Optional) Feed range to limit query to.</param>
/// <returns>An iterator to go through the items.</returns>
public static FeedIterator<T> GetItemQueryIterator<T>(this Container container,
QueryDefinition queryDefinition, string continuationToken = null, QueryRequestOptions requestOptions = null,
FeedRange feedRange = null)
{
if (feedRange != null)
{
// If we need to start at the beginning, create an empty continuation token for a specific range.
if (continuationToken == null)
continuationToken = SingleRangeFeedIterator<T>.CreateBeginningOfRangeContinuationToken(feedRange);
return new SingleRangeFeedIterator<T>(
container.GetItemQueryIterator<T>(queryDefinition, continuationToken, requestOptions),
feedRange,
continuationToken
);
}
return container.GetItemQueryIterator<T>(queryDefinition, continuationToken, requestOptions);
}
/// <summary>
/// This method creates a query for items under a container in an Azure Cosmos database using a SQL statement with parameterized values.
/// For more information on preparing SQL statements with parameterized values, please see <see cref="QueryDefinition"/>.
/// </summary>
/// <param name="container">The container to create the query for.</param>
/// <param name="feedRange">The single feed range to query items in.</param>
/// <param name="queryText">The Cosmos SQL query text.</param>
/// <param name="continuationToken">(Optional) The continuation token in the Azure Cosmos DB service.</param>
/// <param name="requestOptions">(Optional) The options for the item query request.</param>
/// <param name="feedRange">(Optional) Feed range to limit query to.</param>
/// <returns>An iterator to go through the items.</returns>
public static FeedIterator<T> GetItemQueryIterator<T>(this Container container,
string queryText = null, string continuationToken = null, QueryRequestOptions requestOptions = null,
FeedRange feedRange = null)
{
if (feedRange != null)
{
// If we need to start at the beginning, create an empty continuation token for a specific range.
if (continuationToken == null)
continuationToken = SingleRangeFeedIterator<T>.CreateBeginningOfRangeContinuationToken(feedRange);
return new SingleRangeFeedIterator<T>(
container.GetItemQueryIterator<T>(queryText, continuationToken, requestOptions),
feedRange,
continuationToken
);
}
return container.GetItemQueryIterator<T>(queryText, continuationToken, requestOptions);
}
}
/// <summary>
/// Iterates a feed without switching ranges.
/// This iterator will only return results from a specific feed range.
/// </summary>
/// <typeparam name="T"></typeparam>
public class SingleRangeFeedIterator<T> : FeedIterator<T>
{
private readonly FeedIterator<T> _innerIterator;
private bool? _hasMoreResults;
public override bool HasMoreResults => _hasMoreResults ?? _innerIterator.HasMoreResults;
public FeedRange FeedRange { get; }
public SingleRangeFeedIterator(FeedIterator<T> innerIterator, FeedRange feedRange, string continuationToken)
{
if (string.IsNullOrWhiteSpace(continuationToken))
throw new ArgumentException($"'{nameof(continuationToken)}' cannot be null or whitespace.", nameof(continuationToken));
_innerIterator = innerIterator;
FeedRange = feedRange;
// Valiate range in continuation token.
var continuationRange = ExtractRange(continuationToken);
if (continuationRange != null && !FeedRange.Equals(continuationRange))
throw new ArgumentException("This continuation token does not match the provided feed range for this iterator.");
}
public override async Task<FeedResponse<T>> ReadNextAsync(CancellationToken cancellationToken = default)
{
var response = await _innerIterator.ReadNextAsync(cancellationToken);
if (!string.IsNullOrWhiteSpace(response.ContinuationToken))
{
var nextRange = ExtractRange(response.ContinuationToken);
if (nextRange == null || !FeedRange.Equals(nextRange))
_hasMoreResults = false;
}
return response;
}
protected override void Dispose(bool disposing)
{
_innerIterator.Dispose();
}
#region Static helpers
public static string CreateBeginningOfRangeContinuationToken(FeedRange feedRange)
{
if (feedRange is null) throw new ArgumentNullException(nameof(feedRange));
var jRange = JObject.Parse(feedRange.ToJsonString())["Range"];
//return $@"[{{""token"":null,""range"":{{""min"":""{jRange["min"]}"",""max"":""{jRange["max"]}""}}}}]";
return JsonConvert.SerializeObject(new[]
{
new
{
token = (string)null,
range = new
{
min = jRange["min"].ToString(),
max = jRange["max"].ToString()
}
}
});
}
private static FeedRange ExtractRange(string continuationToken)
{
if (string.IsNullOrWhiteSpace(continuationToken)) return null;
var jContinuation = JArray.Parse(continuationToken).Single();
// Patch casing.
jContinuation["Range"] = jContinuation["range"];
return FeedRange.FromJsonString(jContinuation.ToString());
}
#endregion
}
Issue Analytics
- State:
- Created 2 years ago
- Comments:5 (4 by maintainers)
Top GitHub Comments
This exists in the preview package, you can call GetQueryIterator with a FeedRange. It is not GA.
This has been GAed in 3.26.0