Stuck on an issue?

Lightrun Answers was designed to reduce the constant googling that comes with debugging 3rd party libraries. It collects links to all the places you might be looking at while hunting down a tough bug.

And, if you’re still stuck at the end, we’re happy to hop on a call to see how we can help out.

Missing records after compaction and checkpointing.

See original GitHub issue

Hi,

I’m experminting with FASTER KV as a state storage for Trill. I’m having an odd issue where when I run the following test, it’s failing due to the number of records stored on disk being less than the records inserted,

In this case I hvae intentionally set the index size to a small number in order to demonstrate the issue I’m having.

Can you please explain the behaviour I’m seeing here.

The test result:

Message: 
Assert.Equal() Failure
Expected: 70000
Actual:   37386

  Stack Trace: 
StreamableStateTests.StreamableState_Benchmark() line 51
--- End of stack trace from previous location ---

  Standard Output: 

1073ms / 1.07s: Initiating store

1106ms / 1.11s: Upserting 70000 records

34ms / 0.03s: Checkpointing records before compaction

1312ms / 1.31s: Compacted unitl 1680064

8ms / 0.01s: Checkpointing records after compaction

6ms / 0.01s: Checkpointing index

6450ms / 6.45s: Initiating store

475ms / 0.48s: Querying 37386 records

internal class StreamableState : IDisposable
{
    private const string LogFileName = "hlog.log";
    private readonly IDevice _logDevice;
    private readonly ICheckpointManager _checkpointManager;
    private readonly FasterKV<long, long> _fasterKV;
    private bool _disposedValue;

    public StreamableState(string directory)
    {
        if (!Directory.Exists(directory))
        {
            Directory.CreateDirectory(directory);
        }

        _logDevice = Devices.CreateLogDevice(Path.Join(directory, LogFileName));

        _checkpointManager = new DeviceLogCommitCheckpointManager(
            new LocalStorageNamedDeviceFactory(),
            new DefaultCheckpointNamingScheme(Path.Join(directory, "checkpoints")));

        _fasterKV = new FasterKV<long, long>(
            1L << 2,
            new LogSettings
            {
                LogDevice = _logDevice
            },
            new CheckpointSettings
            {
                CheckpointManager = _checkpointManager
            },
            tryRecoverLatest: true);
    }

    public async Task UpsertAsync(long key, long value)
    {
        using var session = NewSession();
        await session.UpsertAsync(ref key, ref value);
    }

    public async Task RemoveAsync(long key)
    {
        using var session = NewSession();
        await session.DeleteAsync(key);
    }

    public IEnumerable<long> Query()
    {
        using var session = NewSession();
        using var iter = session.Iterate();

        while (iter.GetNext(out var recordInfo))
        {
            yield return iter.GetValue();
        }
    }

    public async Task CheckpointAsync()
    {
        await _fasterKV.TakeHybridLogCheckpointAsync(CheckpointType.FoldOver);
    }

    public async Task CheckpointIndexAsync()
    {
        await _fasterKV.TakeIndexCheckpointAsync();
    }

    public long Compact()
    {
        using var session = NewSession();
        return session.Compact(_fasterKV.Log.SafeReadOnlyAddress);
    }

    public void Dispose()
    {
        Dispose(disposing: true);
        GC.SuppressFinalize(this);
    }

    protected virtual void Dispose(bool disposing)
    {
        if (!_disposedValue)
        {
            if (disposing)
            {
                _checkpointManager.Dispose();
                _logDevice.Dispose();
                _fasterKV.Dispose();
            }

            _disposedValue = true;
        }
    }

    private ClientSession<long, long, long, long, Empty, IFunctions<long, long, long, long, Empty>> NewSession() =>
        _fasterKV.NewSession(new SimpleFunctions<long, long>());
}

public class StreamableStateTests
{
    private readonly ITestOutputHelper _testOutputHelper;

    public StreamableStateTests(ITestOutputHelper testOutputHelper)
    {
        _testOutputHelper = testOutputHelper;
    }

    [Fact]
    public async Task StreamableState_Benchmark()
    {
        var count = 70_000;
        int actualCount = 0;

        var directory = Path.Join(Path.Join(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "faster"));

        using (var state = await BenchmarkOperation(state => $"Initiating store", () => Task.FromResult(new StreamableState(directory))))
        {
            var values = Enumerable.Range(0, count).ToList();

            await BenchmarkOperation($"Upserting {values.Count} records", async () =>
            {
                foreach (var value in values)
                {
                    await state.UpsertAsync(value, value);
                }
            });

            await BenchmarkOperation($"Checkpointing records before compaction", () => state.CheckpointAsync());
            await BenchmarkOperation(compacted => $"Compacted until {compacted}", () => Task.FromResult(state.Compact()));
            await BenchmarkOperation($"Checkpointing records after compaction", () => state.CheckpointAsync());
            await BenchmarkOperation($"Checkpointing index", () => state.CheckpointIndexAsync());
        }

        using (var state = await BenchmarkOperation(state => $"Initiating store", () => Task.FromResult(new StreamableState(directory))))
        {
            var all = await BenchmarkOperation(all => $"Querying {all.Count} records", () => Task.FromResult(state.Query().ToList()));
            actualCount = all.Count;
        }

        Directory.Delete(directory, true);

        Assert.Equal(count, actualCount);
    }

    private async Task BenchmarkOperation(string description, Func<Task> operation)
    {
        // Upsert
        var stopWatch = Stopwatch.StartNew();
        await operation();
        stopWatch.Stop();

        _testOutputHelper.WriteLine(string.Empty);
        _testOutputHelper.WriteLine($"{stopWatch.ElapsedMilliseconds}ms / {stopWatch.Elapsed.TotalSeconds:0.00}s: {description}");
    }

    private async Task<T> BenchmarkOperation<T>(Func<T, string> description, Func<Task<T>> operation)
    {
        // Upsert
        var stopWatch = Stopwatch.StartNew();
        var result = await operation();
        stopWatch.Stop();

        _testOutputHelper.WriteLine(string.Empty);
        _testOutputHelper.WriteLine($"{stopWatch.ElapsedMilliseconds}ms / {stopWatch.Elapsed.TotalSeconds:0.00}s: {description(result)}");

        return result;
    }
}