Constant memory usage growth in service consuming and producing via Kafka
See original GitHub issueDescription
We have 2 services group listening from 6 topics (4 partitions per each) and each service produces messages to another topic. Average consuming rate is ~1m hits per hour aggregate for 2 instances. Average message size is ~100-200 bytes, rarely 300 bytes (serialized DTO). Each service creating 2 Consumer instances since we subscribe for events from 2 broker clusters (5 topic subscriptions for first consumer instance and 1 topic subscription for second one). Producer is currently idle most of the time, like several messages per day.
Code is following:
=========================== Configuration ==============================
var consumer_1 = new Consumer(new Dictionary<string, object> // consumer_2 is the same
{
{ "group.id", appSettings.KafkaConsumerGroup },
{ "bootstrap.servers", string.Join(",", appSettings.KafkaBootstrapServers_1) },
{ "queued.max.messages.kbytes", appSettings.TopicPartitionMaxQueueKb }, // 131072 Kb = 128 Mb
{ "queued.min.messages", appSettings.TopicPartitionMinQueueMessages }, // 10000
{ "fetch.message.max.bytes", appSettings.TopicPartitionMaxFetchBytes } // 131072 bytes = 128 Kb
});
var producerConfig = new KafkaPushNotificationProducerConfig(new Dictionary<string, object>
{
{ "bootstrap.servers", string.Join(",", appSettings.KafkaBootstrapServers_3) }
});
===================== Consumer extensions ===============================
public static class KafkaConsumerExtensions
{
private static readonly ILogger Log = Serilog.Log.Logger;
public static void AddKafkaSubscription<TKafkaNotification, TNotification>(
this Consumer consumer,
IMediator mediator,
string topic,
IDeserializer<TKafkaNotification> deserializer,
bool enableInternalLogging)
where TKafkaNotification : class
where TNotification : INotification
{
consumer.OnMessage += async (sender, message) =>
{
try
{
Log.Information($"[Confluent.Kafka.Consumer.OnMessage] Message received for topic {topic}{Environment.NewLine}" +
$"Key bytes: {message.Key?.Length ?? 0}; Value bytes: {message.Value?.Length ?? 0}");
if (message.Topic == topic) // disregard messages from other topics
{
var model = deserializer.Deserialize(message.Value);
var request = model.MapTo<TNotification>();
await mediator.Publish(request).ConfigureAwait(false);
}
}
catch (Exception ex)
{
Log.Error(
ex,
$"[Confluent.Kafka.Consumer.OnMessage] Exception happened while processing Kafka Subscription delegate for topic: {topic} and model: {typeof(TKafkaNotification).Name}.");
}
};
consumer.OnPartitionsAssigned += (sender, list) =>
{
Log.Debug($"[Confluent.Kafka.Consumer.OnPartitionsAssigned] #{list.Count} Partitions were assigned for Topic: {topic} and model: {typeof(TKafkaNotification).Name}.");
consumer.Assign(list);
};
consumer.OnPartitionsRevoked += (sender, list) =>
{
Log.Debug($"[Confluent.Kafka.Consumer.OnPartitionsRevoked] #{list.Count} Partitions were revoked for Topic: {topic} and model: {typeof(TKafkaNotification).Name}.");
consumer.Unassign();
};
consumer.OnError += (_, error)
=> Log.Debug($"[Confluent.Kafka.Consumer.OnError] Kafka error. Error: {error}. Topic: {topic} and model: {typeof(TKafkaNotification).Name}");
consumer.OnConsumeError += (sender, code)
=> Log.Debug($"[Confluent.Kafka.Consumer.OnConsumeError] Kafka consumer error. Code: {code}. Topic: {topic} and model: {typeof(TKafkaNotification).Name}");
if (enableInternalLogging)
{
consumer.OnLog += (sender, message) =>
Log.Information(
$"[Confluent.Kafka.Consumer.OnLog] Level: {message.Level}; Name: {message.Name}; Facility: {message.Facility}; Message: {message.Message}; ");
}
Log.Debug($"[Confluent.Kafka.Consumer] Added Kafka subscription for topic: {topic} and model: {typeof(TKafkaNotification).Name}.");
}
public static void Start(this Consumer consumer, int pollingIntervalMs)
{
while (true)
{
try
{
consumer.Poll(TimeSpan.FromMilliseconds(pollingIntervalMs));
}
catch (Exception ex)
{
Log.Error("[Confluent.Kafka.Consumer] Kafka polling error.", ex);
}
}
}
}
========================= Consumer startup ==============================
_consumer_1.Subscribe(
new List<string>
{
_appSettings.KafkaTopic_1_1,
_appSettings.KafkaTopic_1_2,
_appSettings.KafkaTopic_1_3,
_appSettings.KafkaTopic_1_4,
_appSettings.KafkaTopic_1_5
});
_consumer_2.Subscribe(
new List<string>
{
_appSettings.KafkaTopic_2_1
});
_consumerTask_1.Run(() => _consumer_1.Start(_appSettings.KafkaPollingIntervalMs));
_consumerTask_2.Run(() => _consumer_2.Start(_appSettings.KafkaPollingIntervalMs));
============================== Producer ==============================
public class SendMessageManager : ISendMessageManager
{
private static readonly ILogger Log = Serilog.Log.ForContext<SendEmailManager>();
private readonly AppSettings _settings;
private readonly IKafkaProducerConfig _producerConfig;
public SendMessageManager(IKafkaProducerConfig producerConfig, AppSettings settings)
{
_settings = settings;
_producerConfig = producerConfig;
}
public async Task SendMessage(SendMessageRequest request)
{
try
{
var settings = new AvroSerializerSettings { UsePosixTime = true };
var avroSerializer = AvroSerializer.Create<SendMessageRequest>(settings);
using (var stream = new MemoryStream())
{
avroSerializer.Serialize(stream, request);
var payload = stream.ToArray();
using (var producer = new Producer(_producerConfig.Instance))
{
var report = await producer.ProduceAsync(_settings.KafkaSendMessageTopic, null, payload);
if (_settings.EnableInternalLogging.HasValue && _settings.EnableInternalLogging.Value)
{
producer.OnLog += (sender, message) =>
Log.Debug(
$"[Confluent.Kafka.Producer] Level: {message.Level}; Name: {message.Name}; Facility: {message.Facility}; Message: {message.Message}; ");
}
Log.Debug($"[{nameof(SendMessage)}] 'SendMessage' request to Kafka succeeded.{Environment.NewLine}" +
$"SendMessageRequest: {JsonConvert.SerializeObject(request)}{Environment.NewLine}" +
$"Delivery report: Topic = {_settings.KafkaSendMessageTopic}; Partition = {report.Partition}; Offset = {report.Offset}");
}
}
}
catch (Exception e)
{
Log.Error($"[{nameof(SendMessage)}] 'SendMessage' request to Kafka failed.{Environment.NewLine}" +
$"SendMessageRequest: {JsonConvert.SerializeObject(request)}", e);
throw;
}
}
}
Service memory usage reached ~5GB for around half a day, and keeps growing until OOM. It raises ~1Mb per second, sometimes keeps stale for a while and then starts to grow again. After some investigation I lowered some settings as per wiki explanation (~10 times lower each): “queued.max.messages.kbytes”, “queued.min.messages” and “fetch.message.max.bytes” as I assumed consuming messages is the most likely point where memory leaks. This may have turned down the tempo of growth a bit but didn’t stop the constant growth. Attaching DebugDiag tool to process didn’t show some obvious memory leak issues for process except Commited Vritual Memory is high and ~= RAM usage. So I’m currently struggling to find guilty party for that behavior and thought you guys may help with the direction.
How to reproduce
All conditions are pretty much in the description.
Checklist
Please provide the following information:
- [+] Confluent.Kafka nuget version: 0.11.5
- [-] Apache Kafka version:
- [+] Client configuration: check description
- [+] Operating system: Windows Server 2012 R2
- [-] Provide logs (with “debug” : “…” as necessary in configuration): I will turn on and update
- [-] Provide broker log excerpts
- [+] Critical issue
Issue Analytics
- State:
- Created 5 years ago
- Comments:5 (2 by maintainers)
Top GitHub Comments
@mhowlett , that seems to be true, issue is not related to Kafka client. Thanks for looking into it.
this looks unrelated to
Confluent.Kafka
to me - I can’t think of any reason for the large number ofInt32
arrays and we don’t make use of the ASP.NET Caching functionality.