diff --git a/.github/workflows/go.yaml b/.github/workflows/go.yaml index ddbf53a..d1ef402 100644 --- a/.github/workflows/go.yaml +++ b/.github/workflows/go.yaml @@ -28,7 +28,7 @@ jobs: name: test strategy: matrix: - go: ["1.19.x", "1.22.x", "1.23.x"] + go: ["1.22.x", "1.23.x"] runs-on: ubuntu-latest steps: - name: Setup Go @@ -44,13 +44,12 @@ jobs: - name: Upload coverage to codecov.io uses: codecov/codecov-action@v3 - test-os: - name: test-os + test-race-1: + name: test-race-1 strategy: matrix: - go: ["1.19.x", "1.22.x", "1.23.x"] - os: [macos-latest, windows-latest, ubuntu-latest] - runs-on: ${{ matrix.os }} + go: ["1.23.x"] + runs-on: ubuntu-latest steps: - name: Setup Go with: @@ -60,17 +59,13 @@ jobs: - uses: actions/checkout@v2 - name: Test - run: | - go test ./... -run=TestPersistOS - go test ./... -run=TestHybridCacheGetSetNoRace - go test ./... -run=TestNvmResize + run: go test ./... -run=TestCacheRace_GetSetDeleteExpire -count=1 -race - build-os: - name: build-os + test-race-2: + name: test-race-2 strategy: matrix: - go: ["1.19.x", "1.22.x", "1.23.x"] - os: [darwin, windows, freebsd, solaris, illumos, openbsd, plan9] + go: ["1.23.x"] runs-on: ubuntu-latest steps: - name: Setup Go @@ -80,5 +75,24 @@ jobs: - uses: actions/checkout@v2 - - name: Build - run: GOOS=${{ matrix.os }} CGO_ENABLED=0 go build + - name: Test + run: go test ./... -run=TestCacheRace_ -count=1 + + test-os: + name: test-os + strategy: + matrix: + go: ["1.22.x", "1.23.x"] + os: [macos-latest, windows-latest, ubuntu-latest] + runs-on: ${{ matrix.os }} + steps: + - name: Setup Go + with: + go-version: ${{ matrix.go }} + uses: actions/setup-go@v2 + + - uses: actions/checkout@v2 + + - name: Test + run: | + go test ./... -run=TestPersistOS diff --git a/Makefile b/Makefile index 293e33f..73714c3 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,10 @@ -.PHONY: test testx lint bench cover +.PHONY: test test-race testx lint bench cover test: - go test ./... -race + go test -skip=TestCacheRace_ ./... + +test-race: + go test ./... -run=TestCacheRace_ -count=1 -race testx: go test ./... -v -failfast @@ -10,5 +13,5 @@ lint: golangci-lint run cover: - go test -race -timeout 2000s -coverprofile=cover.out -coverpkg=./... ./... + go test -timeout 2000s -coverprofile=cover.out -coverpkg=./... -skip=TestCacheRace_ ./... go tool cover -html=cover.out -o cover.html diff --git a/README.md b/README.md index c5375c4..9229091 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ High performance in-memory & hybrid cache inspired by [Caffeine](https://github. - [Benchmarks](#benchmarks) * [throughput](#throughput) * [hit ratios](#hit-ratios) -- [Hybrid Cache(Experimental)](#hybrid-cacheexperimental) +- [Secondary Cache(Experimental)](#secondary-cacheexperimental) - [Support](#support) ## Requirements @@ -246,92 +246,31 @@ BenchmarkCache/zipf_ristretto_reads=0%,writes=100%-8 2028530 670.6 ![hit ratios](benchmarks/results/oltp.png) -## Hybrid Cache(Experimental) +## Secondary Cache(Experimental) -HybridCache feature enables Theine to extend the DRAM cache to NVM. With HybridCache, Theine can seamlessly move Items stored in cache across DRAM and NVM as they are accessed. Using HybridCache, you can shrink your DRAM footprint of the cache and replace it with NVM like Flash. This can also enable you to achieve large cache capacities for the same or relatively lower power and dollar cost. +SecondaryCache is the interface for caching data on a secondary tier, which can be a non-volatile media or alternate forms of caching such as compressed data. The purpose of the secondary cache is to support other ways of caching the object, such as persistent or compressed data. It can be viewed as an extension of Theine’s current in-memory cache. -#### Design -Hybrid Cache is inspired by CacheLib's HybridCache. See [introduction](https://cachelib.org/docs/Cache_Library_User_Guides/HybridCache) and [architecture](https://cachelib.org/docs/Cache_Library_Architecture_Guide/hybrid_cache) from CacheLib's guide. +Currently, the SecondaryCache interface has one implementation inspired by CacheLib's Hybrid Cache. -When you use HybridCache, items allocated in the cache can live on NVM or DRAM based on how they are accessed. Irrespective of where they are, **when you access them, you always get them to be in DRAM**. - -Items start their lifetime on DRAM. As an item becomes cold it gets evicted from DRAM when the cache is full. Theine spills it to a cache on the NVM device. Upon subsequent access through `Get()`, if the item is not in DRAM, theine looks it up in the HybridCache and if found, moves it to DRAM. When the HybridCache gets filled up, subsequent insertions into the HybridCache from DRAM will throw away colder items from HybridCache. - -Same as CacheLib, Theine hybrid cache also has **BigHash** and **Block Cache**, it's highly recommended to read the CacheLib architecture design before using hybrid cache, here is a simple introduction of these 2 engines(just copy from CacheLib): - -- **BigHash** is effectively a giant fixed-bucket hash map on the device. To read or write, the entire bucket is read (in case of write, updated and written back). Bloom filter used to reduce number of IO. When bucket is full, items evicted in FIFO manner. You don't pay any RAM price here (except Bloom filter, which is 2GB for 1TB BigHash, tunable). -- **Block Cache**, on the other hand, divides device into equally sized regions (16MB, tunable) and fills a region with items of same size class, or, in case of log-mode fills regions sequentially with items of different size. Sometimes we call log-mode “stack alloc”. BC stores compact index in memory: key hash to offset. We do not store full key in memory and if collision happens (super rare), old item will look like evicted. In your calculations, use 12 bytes overhead per item to estimate RAM usage. For example, if your average item size is 4KB and cache size is 500GB you'll need around 1.4GB of memory. - -#### Using Hybrid Cache - -To use HybridCache, you need to create a nvm cache with NvmBuilder. NewNvmBuilder require 2 params, first is cache file name, second is cache size in bytes. Theine will use direct I/O to read/write file. - -```go -nvm, err := theine.NewNvmBuilder[int, int]("cache", 150<<20).[settings...].Build() -``` - -Then enable hybrid mode in your Theine builder. ```go -client, err := theine.NewBuilder[int, int](100).Hybrid(nvm).Build() +type SecondaryCache[K comparable, V any] interface { + Get(key K) (value V, cost int64, expire int64, ok bool, err error) + Set(key K, value V, cost int64, expire int64) error + Delete(key K) error + HandleAsyncError(err error) +} ``` -#### NVM Builder Settings - -All settings are optional, unless marked as "Required". - -* **[Common]** `BlockSize` default 4096 - - Device block size in bytes (minimum IO granularity). -* **[Common]** `KeySerializer` default JsonSerializer - - KeySerializer is used to marshal/unmarshal between your key type and bytes. - ```go - type Serializer[T any] interface { - Marshal(v T) ([]byte, error) - Unmarshal(raw []byte, v *T) error - } - ``` -* **[Common]** `ValueSerializer` default JsonSerializer +If you plan to use a remote cache or database, such as Redis, as a secondary cache, keep in mind that the in-memory cache remains the primary source of truth. Evicted entries from memory are sent to the secondary cache. This approach differs from most tiered cache systems, where the remote cache is treated as the primary source of truth and is written to first. - ValueSerializer is used to marshal/unmarshal between your value type and bytes. Same interface as KeySerializer. -* **[Common]** `ErrorHandler` default do nothing - - Theine evicts entries to Nvm asynchronously, so errors will be handled by this error handler. -* **[BlockCache]** `RegionSize` default 16 << 20 (16 MB) - - Region size in bytes. -* **[BlockCache]** `CleanRegionSize` default 3 - - How many regions do we reserve for future writes. Set this to be equivalent to your per-second write rate. It should ensure your writes will not have to retry to wait for a region reclamation to finish. -* **[BigHash]** `BucketSize` defalut 4 << 10 (4 KB) - - Bucket size in bytes. -* **[BigHash]** `BigHashPct` default 10 - - Percentage of space to reserve for BigHash. Set the percentage > 0 to enable BigHash. The remaining part is for BlockCache. The value has to be in the range of [0, 100]. Set to 100 will disable block cache. -* **[BigHash]** `BigHashMaxItemSize` default (bucketSize - 80) - - Maximum size of a small item to be stored in BigHash. Must be less than (bucket size - 80). -* **[BigHash]** `BucketBfSize` default 8 bytes - - Bloom filter size, bytes per bucket. - -#### Hybrid Mode Settings - -After you call `Hybrid(...)` in a cache builder. Theine will convert current builder to hybrid builder. Hybrid builder has several settings. - -* `Workers` defalut 2 - - Theine evicts entries in a separate policy goroutinue, but insert to NVM can be done parallel. To make this work, Theine send evicted entries to workers, and worker will sync data to NVM cache. This setting controls how many workers are used to sync data. - -* `AdmProbability` defalut 1 - - This is an admission policy for endurance and performance reason. When entries are evicted from DRAM cache, this policy will be used to control the insertion percentage. A value of 1 means that all entries evicted from DRAM will be inserted into NVM. Values should be in the range of [0, 1]. +#### Secondary Cache Implementations +NVM: https://github.com/Yiling-J/theine-nvm #### Limitations - Cache Persistence is not currently supported, but it may be added in the future. You can still use the Persistence API in a hybrid-enabled cache, but only the DRAM part of the cache will be saved or loaded. - The removal listener will only receive REMOVED events, which are generated when an entry is explicitly removed by calling the Delete API. - No Range/Len API. + ## Support Feel free to open an issue or ask question in discussions. diff --git a/builder_test.go b/builder_test.go index cd3d046..bcf1f6e 100644 --- a/builder_test.go +++ b/builder_test.go @@ -2,11 +2,11 @@ package theine_test import ( "context" - "os" "reflect" "testing" "github.com/Yiling-J/theine-go" + "github.com/Yiling-J/theine-go/internal" "github.com/stretchr/testify/require" ) @@ -36,18 +36,16 @@ func TestBuilder(t *testing.T) { // hybrid cache _, err = builder.Hybrid(nil).Build() require.Error(t, err) - nvm, err := theine.NewNvmBuilder[int, int]("afoo", 500<<10).RegionSize(5 << 10).KeySerializer(&IntSerializer{}).ValueSerializer(&IntSerializer{}).BucketBfSize(16).Build() - defer os.Remove("afoo") - require.Nil(t, err) - _, err = builder.Hybrid(nvm).Workers(0).Build() + secondary := internal.NewSimpleMapSecondary[int, int]() + _, err = builder.Hybrid(secondary).Workers(0).Build() require.Error(t, err) - builderH := builder.Hybrid(nvm).Workers(1).AdmProbability(0.8) + builderH := builder.Hybrid(secondary).Workers(1).AdmProbability(0.8) cacheH, err := builderH.Build() require.Nil(t, err) require.Equal(t, reflect.TypeOf(&theine.HybridCache[int, int]{}), reflect.TypeOf(cacheH)) // loading + hybrid - builderLH := builderL.Hybrid(nvm) + builderLH := builderL.Hybrid(secondary) cacheLH, err := builderLH.Build() require.Nil(t, err) require.Equal(t, reflect.TypeOf(&theine.HybridLoadingCache[int, int]{}), reflect.TypeOf(cacheLH)) @@ -61,10 +59,3 @@ func TestBuilder(t *testing.T) { require.Nil(t, err) require.Equal(t, reflect.TypeOf(&theine.HybridLoadingCache[int, int]{}), reflect.TypeOf(cacheLH)) } - -func TestNvmBuilder(t *testing.T) { - _, err := theine.NewNvmBuilder[int, int]("afoo", 100<<10).BlockSize(512).BucketSize(4 << 10).RegionSize(20 << 10).CleanRegionSize(3).KeySerializer(&IntSerializer{}).ValueSerializer(&IntSerializer{}).BigHashPct(20).Build() - defer os.Remove("afoo") - require.Nil(t, err) - -} diff --git a/cache_race_test.go b/cache_race_test.go new file mode 100644 index 0000000..8f5a661 --- /dev/null +++ b/cache_race_test.go @@ -0,0 +1,139 @@ +package theine + +import ( + "math/rand" + "sync" + "testing" + "time" + + "github.com/Yiling-J/theine-go/internal" + "github.com/stretchr/testify/require" +) + +func keyGen() []uint64 { + keys := []uint64{} + r := rand.New(rand.NewSource(0)) + z := rand.NewZipf(r, 1.01, 9.0, 200000) + for i := 0; i < 2<<16; i++ { + keys = append(keys, z.Uint64()) + } + return keys +} + +func TestCacheRace_GetSet(t *testing.T) { + for _, size := range []int{500, 2000, 10000, 50000} { + builder := NewBuilder[uint64, uint64](int64(size)) + builder.RemovalListener(func(key, value uint64, reason RemoveReason) {}) + client, err := builder.Build() + require.Nil(t, err) + var wg sync.WaitGroup + keys := keyGen() + + for i := 1; i <= 20; i++ { + wg.Add(1) + go func() { + defer wg.Done() + rd := rand.Intn(2 << 16) + for i := 0; i < 100000; i++ { + keyGet := keys[(i+rd)&(2<<16-1)] + keyUpdate := keys[(i+3*rd)&(2<<16-1)] + + v, ok := client.Get(keyGet) + if ok && v != keyGet { + panic(keyGet) + } + if !ok { + client.SetWithTTL(keyGet, keyGet, 1, 0) + } + + client.SetWithTTL(keyUpdate, keyUpdate, int64(i%10+1), 0) + } + }() + } + wg.Wait() + client.store.Wait() + + require.True( + t, client.Len() < size+internal.WriteBufferSize, + ) + + di := client.store.DebugInfo() + + require.Equal(t, client.Len(), int(di.TotalCount())) + require.True(t, di.TotalWeight() <= int64(size+size/10)) + require.Equal(t, di.ProbationWeight, di.ProbationWeightField) + require.Equal(t, di.ProtectedWeight, di.ProtectedWeightField) + + for i := 0; i < len(di.QueueWeight); i++ { + require.Equal(t, di.QueueWeight[i], di.QueueWeightField[i]) + } + + client.store.RangeEntry(func(entry *internal.Entry[uint64, uint64]) { + require.Equal(t, entry.Weight(), entry.PolicyWeight(), entry.Position()) + }) + + client.Close() + } +} + +func TestCacheRace_GetSetDeleteExpire(t *testing.T) { + for _, size := range []int{500, 2000, 10000, 50000} { + builder := NewBuilder[uint64, uint64](int64(size)) + builder.RemovalListener(func(key, value uint64, reason RemoveReason) {}) + client, err := builder.Build() + require.Nil(t, err) + var wg sync.WaitGroup + keys := keyGen() + + for i := 1; i <= 20; i++ { + wg.Add(1) + go func() { + defer wg.Done() + rd := rand.Intn(2 << 16) + for i := 0; i < 100000; i++ { + key := keys[(i+rd)&(2<<16-1)] + v, ok := client.Get(key) + if ok && v != key { + panic(key) + } + if i%3 == 0 { + client.SetWithTTL(key, key, int64(i%10+1), time.Second*time.Duration(i%25+5)) + } + if i%5 == 0 { + client.Delete(key) + } + if i%5000 == 0 { + client.Range(func(key, value uint64) bool { + return true + }) + } + } + }() + } + wg.Wait() + + client.store.Wait() + + require.True( + t, client.Len() < size+internal.WriteBufferSize, + ) + + di := client.store.DebugInfo() + + require.Equal(t, client.Len(), int(di.TotalCount())) + require.True(t, di.TotalWeight() <= int64(size+size/10)) + require.Equal(t, di.ProbationWeight, di.ProbationWeightField) + require.Equal(t, di.ProtectedWeight, di.ProtectedWeightField) + + for i := 0; i < len(di.QueueWeight); i++ { + require.Equal(t, di.QueueWeight[i], di.QueueWeightField[i]) + } + + client.store.RangeEntry(func(entry *internal.Entry[uint64, uint64]) { + require.Equal(t, entry.Weight(), entry.PolicyWeight(), entry.Position()) + }) + + client.Close() + + } +} diff --git a/cache_test.go b/cache_test.go index 0a09188..38112e9 100644 --- a/cache_test.go +++ b/cache_test.go @@ -67,7 +67,7 @@ func TestCache_SetParallel(t *testing.T) { } func TestCache_GetSetGetDeleteGet(t *testing.T) { - client, err := theine.NewBuilder[string, string](1000).Build() + client, err := theine.NewBuilder[string, string](50000).Build() require.Nil(t, err) for i := 0; i < 20000; i++ { key := fmt.Sprintf("key:%d", rand.Intn(3000)) @@ -164,51 +164,6 @@ func TestCache_SetWithTTLAutoExpire(t *testing.T) { client.Close() } -func TestCache_GetSetDeleteNoRace(t *testing.T) { - for _, size := range []int{500, 2000, 10000, 50000} { - builder := theine.NewBuilder[string, string](int64(size)) - builder.RemovalListener(func(key, value string, reason theine.RemoveReason) {}) - client, err := builder.Build() - require.Nil(t, err) - var wg sync.WaitGroup - keys := []string{} - for i := 0; i < 100000; i++ { - keys = append(keys, fmt.Sprintf("%d", rand.Intn(1000000))) - } - for i := 1; i <= 20; i++ { - wg.Add(1) - go func() { - defer wg.Done() - for i := 0; i < 100000; i++ { - key := keys[i] - v, ok := client.Get(key) - if ok && v != key { - panic(key) - } - if i%3 == 0 { - client.SetWithTTL(key, key, int64(i%10+1), time.Second*time.Duration(i%25+5)) - } - if i%5 == 0 { - client.Delete(key) - } - if i%5000 == 0 { - client.Range(func(key, value string) bool { - return true - }) - } - } - }() - } - wg.Wait() - time.Sleep(300 * time.Millisecond) - - require.True( - t, client.Len() < size+internal.WriteBufferSize, - ) - client.Close() - } -} - func TestCache_Cost(t *testing.T) { client, err := theine.NewBuilder[string, string](500).Build() require.Nil(t, err) @@ -257,12 +212,16 @@ func TestCache_CostUpdate(t *testing.T) { time.Sleep(time.Second) require.True(t, client.Len() <= 25 && client.Len() >= 24) require.True(t, client.EstimatedSize() <= 500 && client.EstimatedSize() >= 480) + // update cost success := client.Set("key:15", "", 200) require.True(t, success) time.Sleep(time.Second) - require.True(t, client.Len() <= 16 && client.Len() >= 15) + require.True( + t, client.Len() <= 16 && client.Len() >= 15, + fmt.Sprintf("length too large %d", client.Len()), + ) require.True(t, client.EstimatedSize() <= 500 && client.EstimatedSize() >= 480) } diff --git a/go.mod b/go.mod index 71f9cdb..c3dbe61 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/Yiling-J/theine-go -go 1.19 +go 1.20 require ( github.com/cespare/xxhash/v2 v2.1.1 diff --git a/hybrid_cache_test.go b/hybrid_cache_test.go deleted file mode 100644 index 56fd3c8..0000000 --- a/hybrid_cache_test.go +++ /dev/null @@ -1,440 +0,0 @@ -package theine_test - -import ( - "bytes" - "context" - "encoding/binary" - "math/rand" - "os" - "runtime" - "strconv" - "sync" - "sync/atomic" - "testing" - "time" - - "github.com/Yiling-J/theine-go" - "github.com/stretchr/testify/require" -) - -type ByteSerializer struct{} - -func (s *ByteSerializer) Marshal(i []byte) ([]byte, error) { - return i, nil -} - -func (s *ByteSerializer) Unmarshal(raw []byte, v *[]byte) error { - *v = make([]byte, len(raw)) - copy(*v, raw) - return nil -} - -type IntSerializer struct{} - -func (s *IntSerializer) Marshal(i int) ([]byte, error) { - buff := bytes.NewBuffer(make([]byte, 0)) - err := binary.Write(buff, binary.BigEndian, uint64(i)) - if err != nil { - return nil, err - } - return buff.Bytes(), nil -} - -func (s *IntSerializer) Unmarshal(raw []byte, v *int) error { - num := binary.BigEndian.Uint64(raw) - *v = int(num) - return nil -} - -func TestHybridCacheBighashOnly(t *testing.T) { - nvm, err := theine.NewNvmBuilder[int, []byte]("afoo", 150<<20).BigHashPct(100).ErrorHandler(func(err error) {}).Build() - require.Nil(t, err) - defer os.Remove("afoo") - client, err := theine.NewBuilder[int, []byte](100).Hybrid(nvm).Workers(8).Build() - require.Nil(t, err) - for i := 0; i < 1000; i++ { - success := client.Set(i, []byte(strconv.Itoa(i)), 1) - require.True(t, success) - } - time.Sleep(50 * time.Millisecond) - - for i := 0; i < 1000; i++ { - value, success, err := client.Get(i) - require.Nil(t, err) - require.True(t, success) - require.Equal(t, strconv.Itoa(i), string(value)) - } -} - -func TestHybridCacheBlockCacheOnly(t *testing.T) { - nvm, err := theine.NewNvmBuilder[int, []byte]("afoo", 150<<20).BigHashPct(0).KeySerializer(&IntSerializer{}).ValueSerializer(&ByteSerializer{}).ErrorHandler(func(err error) {}).Build() - require.Nil(t, err) - defer os.Remove("afoo") - client, err := theine.NewBuilder[int, []byte](100).Hybrid(nvm).Workers(8).Build() - require.Nil(t, err) - s := &IntSerializer{} - for i := 0; i < 1000; i++ { - base, err := s.Marshal(i) - require.Nil(t, err) - value := make([]byte, 40<<10) - copy(value, base) - success := client.Set(i, value, 1) - require.True(t, success) - } - time.Sleep(50 * time.Millisecond) - - for i := 0; i < 1000; i++ { - value, success, err := client.Get(i) - require.Nil(t, err) - require.True(t, success) - expected, err := s.Marshal(i) - require.Nil(t, err) - require.Equal(t, expected, value[:8]) - } -} - -func TestHybridCacheGetSetBlockCacheOnly(t *testing.T) { - nvm, err := theine.NewNvmBuilder[int, []byte]("afoo", 40<<20).RegionSize(4 << 20).BigHashPct(0).KeySerializer(&IntSerializer{}).ValueSerializer(&ByteSerializer{}).ErrorHandler(func(err error) {}).Build() - require.Nil(t, err) - defer os.Remove("afoo") - client, err := theine.NewBuilder[int, []byte](100).Hybrid(nvm).Workers(8).Build() - require.Nil(t, err) - s := &IntSerializer{} - var wg sync.WaitGroup - for i := 1; i <= 20; i++ { - wg.Add(1) - go func() { - defer wg.Done() - q := rand.Intn(100) - for i := q; i < 3000+q; i++ { - base, err := s.Marshal(i) - require.Nil(t, err) - value, success, err := client.Get(i) - require.Nil(t, err) - if !success { - value := make([]byte, 40<<10) - copy(value, base) - success := client.Set(i, value, 1) - require.True(t, success) - } else { - expected, err := s.Marshal(i) - require.Nil(t, err) - require.Equal(t, expected, value[:8]) - } - } - }() - } - wg.Wait() -} - -func TestHybridCacheGetSetGetDeleteGet(t *testing.T) { - nvm, err := theine.NewNvmBuilder[int, []byte]("afoo", 150<<20).BigHashPct(30).KeySerializer(&IntSerializer{}).ValueSerializer(&ByteSerializer{}).ErrorHandler(func(err error) {}).Build() - require.Nil(t, err) - defer os.Remove("afoo") - client, err := theine.NewBuilder[int, []byte](100).Hybrid(nvm).Workers(8).AdmProbability(1).Build() - require.Nil(t, err) - s := &IntSerializer{} - for i := 0; i < 1000; i++ { - var value []byte - base, err := s.Marshal(i) - require.Nil(t, err) - if i < 600 { - value = base - } else { - value = make([]byte, 4200) - copy(value, base) - } - _, ok, _ := client.Get(i) - require.False(t, ok) - ok = client.Set(i, value, 1) - require.True(t, ok) - v, ok, _ := client.Get(i) - require.True(t, ok) - require.Equal(t, value, v) - err = client.Delete(i) - require.Nil(t, err) - _, ok, _ = client.Get(i) - require.False(t, ok) - } -} - -func TestHybridCacheMix(t *testing.T) { - nvm, err := theine.NewNvmBuilder[int, []byte]("afoo", 150<<20).BigHashPct(30).KeySerializer(&IntSerializer{}).ValueSerializer(&ByteSerializer{}).ErrorHandler(func(err error) {}).Build() - require.Nil(t, err) - defer os.Remove("afoo") - client, err := theine.NewBuilder[int, []byte](100).Hybrid(nvm).Workers(8).AdmProbability(1).Build() - require.Nil(t, err) - s := &IntSerializer{} - for i := 0; i < 1000; i++ { - var value []byte - base, err := s.Marshal(i) - require.Nil(t, err) - if i < 600 { - value = base - } else { - value = make([]byte, 4200) - copy(value, base) - } - success := client.Set(i, value, 1) - require.Nil(t, err) - require.True(t, success) - } - time.Sleep(50 * time.Millisecond) - - for i := 0; i < 1000; i++ { - value, success, err := client.Get(i) - require.Nil(t, err) - require.True(t, success) - expected, err := s.Marshal(i) - require.Nil(t, err) - require.Equal(t, expected, value[:8]) - } -} - -func TestHybridCacheMixTTL(t *testing.T) { - nvm, err := theine.NewNvmBuilder[int, []byte]("afoo", 150<<20).BigHashPct(30).KeySerializer(&IntSerializer{}).ValueSerializer(&ByteSerializer{}).ErrorHandler(func(err error) {}).Build() - require.Nil(t, err) - defer os.Remove("afoo") - client, err := theine.NewBuilder[int, []byte](100).Hybrid(nvm).Workers(8).AdmProbability(1).Build() - require.Nil(t, err) - s := &IntSerializer{} - for i := 0; i < 1000; i++ { - var value []byte - base, err := s.Marshal(i) - require.Nil(t, err) - if i < 600 { - value = base - } else { - value = make([]byte, 4200) - copy(value, base) - } - success := client.SetWithTTL(i, value, 1, time.Second) - require.Nil(t, err) - require.True(t, success) - } - time.Sleep(50 * time.Millisecond) - for i := 0; i < 1000; i++ { - _, success, err := client.Get(i) - require.Nil(t, err) - require.True(t, success) - } - time.Sleep(2 * time.Second) - - for i := 0; i < 1000; i++ { - _, success, err := client.Get(i) - require.Nil(t, err) - require.False(t, success) - } -} - -func TestHybridCacheMixProb(t *testing.T) { - nvm, err := theine.NewNvmBuilder[int, []byte]("afoo", 150<<20).BigHashPct(30).KeySerializer(&IntSerializer{}).ValueSerializer(&ByteSerializer{}).ErrorHandler(func(err error) {}).Build() - require.Nil(t, err) - defer os.Remove("afoo") - client, err := theine.NewBuilder[int, []byte](100).Hybrid(nvm).Workers(8).AdmProbability(0.5).Build() - require.Nil(t, err) - s := &IntSerializer{} - for i := 0; i < 1000; i++ { - var value []byte - base, err := s.Marshal(i) - require.Nil(t, err) - if i < 600 { - value = base - } else { - value = make([]byte, 4200) - copy(value, base) - } - success := client.Set(i, value, 1) - require.Nil(t, err) - require.True(t, success) - } - time.Sleep(50 * time.Millisecond) - - counter := 0 - for i := 0; i < 1000; i++ { - _, success, err := client.Get(i) - require.Nil(t, err) - if success { - counter += 1 - } - } - require.True(t, counter < 600) -} - -func TestHybridCacheMixPersist(t *testing.T) { - nvm, err := theine.NewNvmBuilder[int, []byte]("afoo", 150<<20).BigHashPct(30).KeySerializer(&IntSerializer{}).ValueSerializer(&ByteSerializer{}).ErrorHandler(func(err error) {}).Build() - require.Nil(t, err) - defer os.Remove("afoo") - client, err := theine.NewBuilder[int, []byte](100).Hybrid(nvm).Workers(8).AdmProbability(1).Build() - require.Nil(t, err) - s := &IntSerializer{} - for i := 0; i < 1000; i++ { - var value []byte - base, err := s.Marshal(i) - require.Nil(t, err) - if i < 600 { - value = base - } else { - value = make([]byte, 4200) - copy(value, base) - } - success := client.Set(i, value, 1) - require.Nil(t, err) - require.True(t, success) - } - time.Sleep(50 * time.Millisecond) - f, err := os.Create("ptest") - defer os.Remove("ptest") - require.Nil(t, err) - err = client.SaveCache(0, f) - require.Nil(t, err) - f.Close() - - f, err = os.Open("ptest") - require.Nil(t, err) - - nvm2, err := theine.NewNvmBuilder[int, []byte]("afoo", 150<<20).BigHashPct(30).KeySerializer(&IntSerializer{}).ValueSerializer(&ByteSerializer{}).ErrorHandler(func(err error) {}).Build() - require.Nil(t, err) - new, err := theine.NewBuilder[int, []byte](100).Hybrid(nvm2).Workers(8).AdmProbability(1).Build() - require.Nil(t, err) - err = new.LoadCache(0, f) - require.Nil(t, err) - f.Close() - - get := 0 - for i := 0; i < 1000; i++ { - value, success, err := new.Get(i) - require.Nil(t, err) - if success { - get += 1 - expected, err := s.Marshal(i) - require.Nil(t, err) - require.Equal(t, expected, value[:8]) - } - } - require.True(t, get == 100) -} - -func TestHybridCacheErrorHandler(t *testing.T) { - var errCounter atomic.Uint32 - nvm, err := theine.NewNvmBuilder[int, []byte]("afoo", 150<<20).BigHashPct(100).KeySerializer(&IntSerializer{}).ValueSerializer(&ByteSerializer{}).ErrorHandler(func(err error) { - errCounter.Add(1) - }).Build() - require.Nil(t, err) - client, err := theine.NewBuilder[int, []byte](100).Hybrid(nvm).Workers(8).Build() - require.Nil(t, err) - err = os.Truncate("afoo", 1) - require.Nil(t, err) - defer os.Remove("afoo") - for i := 0; i < 1000; i++ { - success := client.Set(i, []byte(strconv.Itoa(i)), 1) - require.Nil(t, err) - require.True(t, success) - } - require.True(t, errCounter.Load() > 0) - -} - -func TestHybridCacheGetSetNoRace(t *testing.T) { - nvm, err := theine.NewNvmBuilder[int, []byte]("afoo", 1000<<20).KeySerializer(&IntSerializer{}).ValueSerializer(&ByteSerializer{}).ErrorHandler(func(err error) {}).Build() - require.Nil(t, err) - defer os.Remove("afoo") - client, err := theine.NewBuilder[int, []byte](100).Hybrid(nvm).Workers(8).Build() - require.Nil(t, err) - var wg sync.WaitGroup - for i := 1; i <= runtime.GOMAXPROCS(0)*2; i++ { - wg.Add(1) - go func() { - defer wg.Done() - s := &IntSerializer{} - for i := 0; i < 20000; i++ { - key := i - v, ok, err := client.Get(key) - if err != nil { - panic(err) - } - if !ok { - base, err := s.Marshal(key) - if err != nil { - panic(err) - } - if i%2 == 0 { - value := make([]byte, 1<<10) - copy(value, base) - _ = client.Set(key, value, 1) - } else { - value := make([]byte, 120<<10) - copy(value, base) - _ = client.Set(key, value, 1) - } - if i%5 == 0 { - err := client.Delete(key) - if err != nil { - panic(err) - } - } - } else { - expected, err := s.Marshal(key) - if err != nil { - panic(err) - } - if !bytes.Equal(v[:8], expected) { - panic("value mismatch") - } - } - } - }() - } - wg.Wait() - time.Sleep(500 * time.Millisecond) - client.Close() -} - -func TestHybridLoadingCache(t *testing.T) { - nvm, err := theine.NewNvmBuilder[int, []byte]("afoo", 150<<20).BigHashPct(30).KeySerializer(&IntSerializer{}).ValueSerializer(&ByteSerializer{}).ErrorHandler(func(err error) {}).Build() - require.Nil(t, err) - defer os.Remove("afoo") - s := &IntSerializer{} - client, err := theine.NewBuilder[int, []byte](100).Hybrid(nvm).Workers(8).AdmProbability(1). - Loading(func(ctx context.Context, key int) (theine.Loaded[[]byte], error) { - var value []byte - base, err := s.Marshal(key) - require.Nil(t, err) - if key < 600 { - value = base - } else { - value = make([]byte, 4200) - copy(value, base) - } - return theine.Loaded[[]byte]{Value: value, Cost: 1, TTL: 0}, nil - }).Build() - require.Nil(t, err) - - for i := 0; i < 1000; i++ { - value, err := client.Get(context.TODO(), i) - require.Nil(t, err) - expected, err := s.Marshal(i) - require.Nil(t, err) - require.Equal(t, expected, value[:8]) - } - - for i := 0; i < 1000; i++ { - value, err := client.Get(context.TODO(), i) - require.Nil(t, err) - expected, err := s.Marshal(i) - require.Nil(t, err) - require.Equal(t, expected, value[:8]) - } - - success := client.Set(999, []byte{1}, 1) - require.True(t, success) - _, err = client.Get(context.TODO(), 999) - require.Nil(t, err) - err = client.Delete(999) - require.Nil(t, err) - _, err = client.Get(context.TODO(), 999) - require.Nil(t, err) - success = client.SetWithTTL(999, []byte{}, 1, 5*time.Second) - require.True(t, success) - -} diff --git a/internal/alloc/alloc.go b/internal/alloc/alloc.go deleted file mode 100644 index 941ef75..0000000 --- a/internal/alloc/alloc.go +++ /dev/null @@ -1,100 +0,0 @@ -package alloc - -import ( - "bytes" - "sync" - "sync/atomic" - - "github.com/Yiling-J/theine-go/internal/nvm/directio" -) - -func alignDown(num int, alignment int) int { - return num - num%alignment -} - -func alignUp(num int, alignment int) int { - return alignDown(num+alignment-1, alignment) -} - -type Allocator struct { - pool sync.Pool - itemPool sync.Pool - blockSize int - bucketSize int - regionSize int - mu sync.Mutex - current *BufferItem -} - -type BufferItem struct { - pool *sync.Pool - buffer *bytes.Buffer - full atomic.Bool - count atomic.Int32 -} - -type AllocItem struct { - buffer *BufferItem - allocator *Allocator - Data []byte -} - -func (a *AllocItem) Deallocate() { - if a != nil { - new := a.buffer.count.Add(-1) - if new == 0 && a.buffer.full.Load() { - a.buffer.pool.Put(a.buffer) - } - a.allocator.itemPool.Put(a) - } -} - -func NewAllocator(bucketSize int, regionSize int, blockSize int) *Allocator { - a := &Allocator{ - pool: sync.Pool{New: func() any { - return &BufferItem{ - buffer: bytes.NewBuffer(directio.AlignedBlock(int(regionSize))), - } - }}, - itemPool: sync.Pool{New: func() any { - return &AllocItem{} - }}, - blockSize: blockSize, - bucketSize: bucketSize, - regionSize: regionSize, - } - a.current = a.pool.Get().(*BufferItem) - a.current.pool = &a.pool - return a -} - -func (a *Allocator) Allocate(size int) *AllocItem { - item := a.itemPool.Get().(*AllocItem) - a.mu.Lock() - current := a.current - size = alignUp(size, a.blockSize) - if current.buffer.Len() < size { - if current.count.Load() == 0 { - // reuse directly - current.buffer.Reset() - current.buffer = bytes.NewBuffer(current.buffer.Bytes()[:current.buffer.Cap()]) - - } else { - // put back to pool in item dealloc callback - current.full.Store(true) - current = a.pool.Get().(*BufferItem) - a.current = current - current.buffer.Reset() - current.buffer = bytes.NewBuffer(current.buffer.Bytes()[:current.buffer.Cap()]) - current.pool = &a.pool - current.count.Store(0) - current.full.Store(false) - } - } - item.Data = current.buffer.Next(size) - item.buffer = current - item.allocator = a - current.count.Add(1) - a.mu.Unlock() - return item -} diff --git a/internal/entry.go b/internal/entry.go index 581ade3..e37c971 100644 --- a/internal/entry.go +++ b/internal/entry.go @@ -1,6 +1,8 @@ package internal -import "sync/atomic" +import ( + "sync/atomic" +) const ( NEW int8 = iota @@ -14,15 +16,17 @@ type ReadBufItem[K comparable, V any] struct { hash uint64 } type WriteBufItem[K comparable, V any] struct { - entry *Entry[K, V] - cost int64 - code int8 - rechedule bool - fromNVM bool + entry *Entry[K, V] + costChange int64 + code int8 + rechedule bool + fromNVM bool + hash uint64 } type QueueItem[K comparable, V any] struct { entry *Entry[K, V] + hash uint64 fromNVM bool } @@ -34,22 +38,24 @@ type MetaData[K comparable, V any] struct { } type Entry[K comparable, V any] struct { - key K - value V - meta MetaData[K, V] - cost int64 - expire atomic.Int64 - frequency atomic.Int32 - queued uint8 - flag Flag + key K + value V + meta MetaData[K, V] + weight atomic.Int64 + policyWeight int64 + expire atomic.Int64 + queueIndex atomic.Int32 // -1: queue to main, -2 new entry, -3: removed, -4: main, >=0: index + flag Flag } +// used in test only func NewEntry[K comparable, V any](key K, value V, cost int64, expire int64) *Entry[K, V] { entry := &Entry[K, V]{ key: key, value: value, } - entry.cost = cost + entry.weight.Store(cost) + entry.policyWeight = cost if expire > 0 { entry.expire.Store(expire) } @@ -130,23 +136,24 @@ func (e *Entry[K, V]) setNext(entry *Entry[K, V], listType uint8) { func (e *Entry[K, V]) pentry() *Pentry[K, V] { return &Pentry[K, V]{ - Key: e.key, - Value: e.value, - Cost: e.cost, - Expire: e.expire.Load(), - Frequency: e.frequency.Load(), - Removed: e.flag.IsRemoved(), + Key: e.key, + Value: e.value, + Weight: e.weight.Load(), + PolicyWeight: e.policyWeight, + Expire: e.expire.Load(), + Flag: e.flag, } } // entry for persistence type Pentry[K comparable, V any] struct { - Key K - Value V - Cost int64 - Expire int64 - Frequency int32 - Removed bool + Key K + Value V + Weight int64 + PolicyWeight int64 + Expire int64 + Frequency int32 + Flag Flag } func (e *Pentry[K, V]) entry() *Entry[K, V] { @@ -154,9 +161,30 @@ func (e *Pentry[K, V]) entry() *Entry[K, V] { key: e.Key, value: e.Value, } - en.cost = e.Cost - en.frequency.Store(e.Frequency) + en.weight.Store(e.Weight) en.expire.Store(e.Expire) - en.flag.SetRemoved(e.Removed) + en.flag = e.Flag + en.policyWeight = e.PolicyWeight return en } + +func (e *Entry[K, V]) Weight() int64 { + return e.weight.Load() +} + +func (e *Entry[K, V]) PolicyWeight() int64 { + return e.policyWeight +} + +func (e *Entry[K, V]) Position() string { + if e.queueIndex.Load() == -3 { + return "REMOVED" + } + if e.queueIndex.Load() >= 0 { + return "QUEUE" + } + if e.meta.prev != nil { + return "MAIN" + } + return "UNKNOWN" +} diff --git a/internal/list.go b/internal/list.go index 90ce52e..adb7b66 100644 --- a/internal/list.go +++ b/internal/list.go @@ -58,6 +58,12 @@ func (l *List[K, V]) display() string { return strings.Join(s, "/") } +func (l *List[K, V]) rangef(fn func(*Entry[K, V])) { + for e := l.Front(); e != nil; e = e.Next(l.listType) { + fn(e) + } +} + func (l *List[K, V]) displayReverse() string { var s []string for e := l.Back(); e != nil; e = e.Prev(l.listType) { @@ -97,12 +103,13 @@ func (l *List[K, V]) insert(e, at *Entry[K, V]) *Entry[K, V] { e.flag.SetProbation(true) } } + e.setPrev(at, l.listType) e.setNext(at.next(l.listType), l.listType) e.prev(l.listType).setNext(e, l.listType) e.next(l.listType).setPrev(e, l.listType) if l.bounded { - l.len.Add(e.cost) + l.len.Add(e.policyWeight) // l.len += int(e.cost.Load()) l.count += 1 } @@ -130,7 +137,7 @@ func (l *List[K, V]) remove(e *Entry[K, V]) { e.flag.SetProtected(false) } if l.bounded { - l.len.Add(-e.cost) + l.len.Add(-e.policyWeight) l.count -= 1 } } diff --git a/internal/list_test.go b/internal/list_test.go index e5b52de..df6d950 100644 --- a/internal/list_test.go +++ b/internal/list_test.go @@ -7,7 +7,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestList(t *testing.T) { +func TestList_PushPop(t *testing.T) { l := NewList[string, string](5, LIST_PROBATION) require.Equal(t, uint(5), l.capacity) require.Equal(t, LIST_PROBATION, l.listType) @@ -55,7 +55,7 @@ func TestList(t *testing.T) { } -func TestListCountCost(t *testing.T) { +func TestList_CountCost(t *testing.T) { l := NewList[string, string](100, LIST_PROBATION) require.Equal(t, uint(100), l.capacity) require.Equal(t, LIST_PROBATION, l.listType) @@ -73,7 +73,7 @@ func TestListCountCost(t *testing.T) { require.Equal(t, 2, l.count) } -func TestWheelList(t *testing.T) { +func TestList_WheelList(t *testing.T) { l := NewList[string, string](5, WHEEL_LIST) require.Equal(t, uint(5), l.capacity) require.Equal(t, WHEEL_LIST, l.listType) diff --git a/internal/nvm/bighash.go b/internal/nvm/bighash.go deleted file mode 100644 index ad6c8b4..0000000 --- a/internal/nvm/bighash.go +++ /dev/null @@ -1,348 +0,0 @@ -package nvm - -import ( - "bytes" - "os" - "sync" - "time" - "unsafe" - - "github.com/Yiling-J/theine-go/internal/alloc" - "github.com/Yiling-J/theine-go/internal/bf" - "github.com/Yiling-J/theine-go/internal/clock" - "github.com/Yiling-J/theine-go/internal/nvm/serializers" - "github.com/zeebo/xxh3" -) - -type BucketHeader struct { - checksum uint64 - generationTime uint64 - capacity uint64 - size uint64 - endOffset uint64 -} - -// func (h *BucketHeader) remainingCapacity() uint64 { -// return h.capacity - h.endOffset -// } - -type BucketEntry struct { - keySize uint64 - valueSize uint64 - cost int64 - expire int64 - hash uint64 -} - -type Bucket struct { - mu sync.RWMutex // used to lock bloomfilter - Bloomfilter *bf.Bloomfilter -} - -// used in persisit data -type BucketP struct { - Index uint64 - Bloomfilter *bf.Bloomfilter -} - -type BigHash struct { - CacheSize uint64 - numBuckets uint64 - headerSize uint64 - entrySize uint64 - BucketSize uint64 - GenerationTime uint64 - buckets []*Bucket - Clock *clock.Clock - headerSerializer serializers.Serializer[BucketHeader] - entrySerializer serializers.Serializer[BucketEntry] - file *os.File - allocator *alloc.Allocator -} - -func NewBigHash(cacheSize uint64, bucketSize uint64, bfSize uint32, allocator *alloc.Allocator) *BigHash { - - b := &BigHash{ - headerSize: uint64(unsafe.Sizeof(BucketHeader{})), - entrySize: uint64(unsafe.Sizeof(BucketEntry{})), - CacheSize: uint64(cacheSize), - BucketSize: bucketSize, - GenerationTime: uint64(time.Now().UnixNano()), - numBuckets: uint64(cacheSize / bucketSize), - buckets: []*Bucket{}, - Clock: &clock.Clock{Start: time.Now().UTC()}, - headerSerializer: serializers.NewMemorySerializer[BucketHeader](), - entrySerializer: serializers.NewMemorySerializer[BucketEntry](), - allocator: allocator, - } - for i := 0; i < int(cacheSize/bucketSize); i++ { - b.buckets = append(b.buckets, &Bucket{Bloomfilter: bf.NewWithSize(bfSize)}) - } - return b -} - -// load bucket data to bytes -func (h *BigHash) loadBucketData(index int) (*alloc.AllocItem, *BucketHeader, error) { - alloc := h.allocator.Allocate(int(h.BucketSize)) - offset := index * int(h.BucketSize) - _, err := h.file.ReadAt(alloc.Data, int64(offset)) - if err != nil { - return alloc, nil, err - } - var header BucketHeader - err = h.headerSerializer.Unmarshal(alloc.Data[:h.headerSize], &header) - if err != nil { - return alloc, nil, err - } - var checksumMatch bool - if header.endOffset >= h.headerSize && header.endOffset <= h.BucketSize { - checksum := xxh3.Hash(alloc.Data[h.headerSize:header.endOffset]) - checksumMatch = (checksum == header.checksum) - } - if !checksumMatch || header.generationTime != h.GenerationTime { - header = BucketHeader{ - checksum: xxh3.Hash(alloc.Data[h.headerSize:h.headerSize]), - generationTime: h.GenerationTime, - capacity: h.BucketSize, - size: 0, - endOffset: h.headerSize, - } - hb, err := h.headerSerializer.Marshal(header) - if err != nil { - return alloc, nil, err - } - _ = copy(alloc.Data[:h.headerSize], hb) - - } - return alloc, &header, nil -} - -func (h *BigHash) saveBucketData(index int, data []byte) error { - _, err := h.file.WriteAt(data, int64(index*int(h.BucketSize))) - if err != nil { - return err - } - cached := make([]byte, len(data)) - _ = copy(cached, data) - return nil -} - -// delete from bucket -func (h *BigHash) deleteFromBucket(keyh uint64, keyb []byte) error { - index := keyh % h.numBuckets - bucket := h.buckets[index] - // hold lock until write back to file - bucket.mu.Lock() - defer bucket.mu.Unlock() - alloc, header, err := h.loadBucketData(int(index)) - defer alloc.Deallocate() - if err != nil { - return err - } - bucketBytes := alloc.Data - // init header - if header.capacity == 0 { - header.capacity = uint64(h.BucketSize) - header.endOffset = h.headerSize - } - bucket.Bloomfilter.Reset() - // init new buffer - newAlloc := h.allocator.Allocate(int(h.BucketSize)) - defer newAlloc.Deallocate() - block := newAlloc.Data - offset := int(h.headerSize) - - now := h.Clock.NowNano() - // write existing entries to new block, stop when no space left in block - offsetOld := int(h.headerSize) - count := 0 - for i := 0; i < int(header.size); i++ { - // nead to check entry meta first and remove duplicate/expired entries - var entry BucketEntry - err := h.entrySerializer.Unmarshal( - bucketBytes[offsetOld:offsetOld+int(h.entrySize)], - &entry, - ) - if err != nil { - return err - } - total := entry.keySize + entry.valueSize + h.entrySize - if entry.hash == keyh || (entry.expire > 0 && entry.expire <= now) { - offsetOld += int(total) - continue - } - // not enough space, stop loop - if total > h.BucketSize-uint64(offset) { - break - } - n := copy(block[offset:], bucketBytes[offsetOld:offsetOld+int(total)]) - offset += n - offsetOld += n - count += 1 - bucket.Bloomfilter.Insert(entry.hash) - } - header.endOffset = uint64(offset) - header.checksum = xxh3.Hash(block[h.headerSize:header.endOffset]) - header.size = uint64(count) - hb, err := h.headerSerializer.Marshal(*header) - if err != nil { - return err - } - // add header - _ = copy(block[0:], hb) - return h.saveBucketData(int(index), block) -} - -// insert new key to bucket -func (h *BigHash) addToBucket(keyh uint64, keyb []byte, value []byte, cost int64, expire int64) error { - index := keyh % h.numBuckets - bucket := h.buckets[index] - // hold lock until write back to file - bucket.mu.Lock() - defer bucket.mu.Unlock() - alloc, header, err := h.loadBucketData(int(index)) - defer alloc.Deallocate() - if err != nil { - return err - } - bucketBytes := alloc.Data - // init header - if header.capacity == 0 { - header.capacity = uint64(h.BucketSize) - header.endOffset = h.headerSize - } - bucket.Bloomfilter.Reset() - // init new buffer - allocNew := h.allocator.Allocate(int(h.BucketSize)) - defer allocNew.Deallocate() - block := allocNew.Data - offset := int(h.headerSize) - // write new entry first - bn, err := h.entrySerializer.Marshal(BucketEntry{ - keySize: uint64(len(keyb)), - valueSize: uint64(len(value)), - cost: cost, - expire: expire, - hash: keyh, - }) - if err != nil { - return err - } - n := copy(block[offset:], bn) - offset += n - n = copy(block[offset:], keyb) - offset += n - n = copy(block[offset:], value) - offset += n - bucket.Bloomfilter.Insert(keyh) - - now := h.Clock.NowNano() - // write existing entries to new block, stop when no space left in block - offsetOld := int(h.headerSize) - count := 1 // new entry - for i := 0; i < int(header.size); i++ { - // nead to check entry meta first and remove duplicate/expired entries - var entry BucketEntry - err := h.entrySerializer.Unmarshal( - bucketBytes[offsetOld:offsetOld+int(h.entrySize)], - &entry, - ) - if err != nil { - return err - } - total := entry.keySize + entry.valueSize + h.entrySize - if entry.hash == keyh || (entry.expire > 0 && entry.expire <= now) { - offsetOld += int(total) - continue - } - // not enough space, stop loop - if total > h.BucketSize-uint64(offset) { - break - } - n = copy(block[offset:], bucketBytes[offsetOld:offsetOld+int(total)]) - offset += n - offsetOld += n - count += 1 - bucket.Bloomfilter.Insert(entry.hash) - } - header.endOffset = uint64(offset) - header.checksum = xxh3.Hash(block[h.headerSize:header.endOffset]) - header.size = uint64(count) - hb, err := h.headerSerializer.Marshal(*header) - if err != nil { - return err - } - // add header - _ = copy(block[0:], hb) - err = h.saveBucketData(int(index), block) - return err -} - -func (h *BigHash) Insert(key []byte, value []byte, cost int64, expire int64) error { - kh := xxh3.Hash(key) - return h.addToBucket(kh, key, value, cost, expire) -} - -func (h *BigHash) Delete(key []byte) error { - kh := xxh3.Hash(key) - return h.deleteFromBucket(kh, key) -} - -func (h *BigHash) getFromBucket(keyh uint64, key []byte) (entry BucketEntry, item *alloc.AllocItem, ok bool, err error) { - index := keyh % h.numBuckets - bucket := h.buckets[index] - bucket.mu.RLock() - if !bucket.Bloomfilter.Exist(keyh) { - bucket.mu.RUnlock() - return entry, nil, ok, err - } - alloc, header, err := h.loadBucketData(int(index)) - bucket.mu.RUnlock() - if err != nil { - return entry, alloc, ok, err - } - if header.size == 0 { - return entry, alloc, false, nil - } - bucketData := alloc.Data - - // empty bucket - if header.size == 0 { - return entry, alloc, false, err - } - offset := int(h.headerSize) - for i := 0; i < int(header.size); i++ { - err := h.entrySerializer.Unmarshal( - bucketData[offset:offset+int(h.entrySize)], - &entry, - ) - if err != nil { - return entry, alloc, ok, err - } - offset += int(h.entrySize) - if entry.hash == keyh && bytes.Equal(bucketData[offset:offset+int(entry.keySize)], key) { - offset += int(entry.keySize) - alloc.Data = alloc.Data[offset : offset+int(entry.valueSize)] - if entry.expire > 0 && entry.expire <= h.Clock.NowNano() { - return entry, alloc, false, nil - } - return entry, alloc, true, nil - - } else { - offset += int(entry.keySize + entry.valueSize) - } - } - return entry, alloc, ok, err -} - -func (h *BigHash) Lookup(key []byte) (item *alloc.AllocItem, cost int64, expire int64, ok bool, err error) { - kh := xxh3.Hash(key) - entry, item, ok, err := h.getFromBucket(kh, key) - if err != nil { - return nil, cost, expire, ok, err - } - if ok { - return item, entry.cost, entry.expire, true, nil - } - return nil, cost, expire, false, nil -} diff --git a/internal/nvm/bighash_test.go b/internal/nvm/bighash_test.go deleted file mode 100644 index 32b8d72..0000000 --- a/internal/nvm/bighash_test.go +++ /dev/null @@ -1,46 +0,0 @@ -package nvm - -import ( - "os" - "strconv" - "testing" - "time" - - "github.com/Yiling-J/theine-go/internal/alloc" - "github.com/Yiling-J/theine-go/internal/nvm/directio" - "github.com/Yiling-J/theine-go/internal/nvm/preallocate" - "github.com/stretchr/testify/require" -) - -func TestBigHash(t *testing.T) { - f, err := directio.OpenFile("bfoo", os.O_RDWR|os.O_CREATE, 0666) - require.Nil(t, err) - defer os.Remove("bfoo") - err = f.Truncate(4096 * 50) - require.Nil(t, err) - err = preallocate.Preallocate(f, 4096*50, true) - require.Nil(t, err) - bh := NewBigHash(4096*50, 4096, 8, alloc.NewAllocator(4096, 16<<20, 4096)) - require.Equal(t, 64, int(bh.buckets[0].Bloomfilter.M)) - bh.file = f - for i := 0; i < 100; i++ { - key := []byte(strconv.Itoa(i)) - err := bh.Insert(key, key, 1, 0) - require.Nil(t, err) - } - for i := 0; i < 100; i++ { - key := []byte(strconv.Itoa(i)) - v, _, _, _, err := bh.Lookup(key) - require.Nil(t, err) - require.Equal(t, key, v.Data) - } - - // test expire - key := []byte(strconv.Itoa(500)) - err = bh.Insert(key, key, 1, bh.Clock.ExpireNano(10*time.Millisecond)) - require.Nil(t, err) - time.Sleep(30 * time.Millisecond) - _, _, _, ok, err := bh.Lookup(key) - require.Nil(t, err) - require.False(t, ok) -} diff --git a/internal/nvm/blockcache.go b/internal/nvm/blockcache.go deleted file mode 100644 index e37bc87..0000000 --- a/internal/nvm/blockcache.go +++ /dev/null @@ -1,223 +0,0 @@ -package nvm - -import ( - "bytes" - "errors" - "sync" - "time" - "unsafe" - - "github.com/Yiling-J/theine-go/internal/alloc" - "github.com/Yiling-J/theine-go/internal/clock" - "github.com/Yiling-J/theine-go/internal/nvm/serializers" - "github.com/zeebo/xxh3" -) - -const ( - alignSize = 512 - readBufSize = 64 -) - -func spread(h uint64) uint64 { - h ^= h >> 17 - h *= 0xed5ad4bb - h ^= h >> 11 - h *= 0xac4c1b51 - h ^= h >> 15 - return h -} - -func align(n int) int { - l := n % alignSize - if l != 0 { - return n + alignSize - l - } - return n -} - -type BlockInfo struct { - address uint32 - sizeHint uint32 - removed bool -} - -type BlockInfoP struct { - KeyHash uint64 - Address uint32 - SizeHint uint32 -} - -type BlockEntry struct { - keySize uint64 - valueSize uint64 - cost int64 - expire int64 - checksum uint64 -} - -type BlockCache struct { - Offset uint64 - mu *sync.RWMutex - Clock *clock.Clock - CacheSize uint64 - entrySize uint64 - RegionSize uint32 - index map[uint64]*BlockInfo - entrySerializer serializers.Serializer[BlockEntry] - regionManager *RegionManager -} - -func NewBlockCache(cacheSize int, regionSize int, cleanRegionSize uint32, offset uint64, allocator *alloc.Allocator, errHandler func(err error)) *BlockCache { - regionSize = align(regionSize) - regionCount := cacheSize / regionSize - b := &BlockCache{ - Offset: offset, - mu: &sync.RWMutex{}, - entrySerializer: serializers.NewMemorySerializer[BlockEntry](), - CacheSize: uint64(cacheSize), - RegionSize: uint32(regionSize), - entrySize: uint64(unsafe.Sizeof(BlockEntry{})), - index: make(map[uint64]*BlockInfo, cacheSize/regionSize), - Clock: &clock.Clock{Start: time.Now().UTC()}, - } - if errHandler == nil { - errHandler = func(err error) {} - } - b.regionManager = NewRegionManager( - offset, uint32(regionSize), uint32(regionCount), cleanRegionSize, b.removeRegion, - allocator, errHandler, - ) - - return b - -} - -func (c *BlockCache) realAddress(address uint32) (uint64, uint64) { - base := uint64(address) * uint64(alignSize) - return base / uint64(c.RegionSize), base % uint64(c.RegionSize) -} - -func (c *BlockCache) Lookup(key []byte) (item *alloc.AllocItem, cost int64, expire int64, ok bool, err error) { - kh := xxh3.Hash(key) - c.mu.RLock() - index, ok := c.index[kh] - if !ok { - c.mu.RUnlock() - return nil, cost, expire, false, nil - } - c.mu.RUnlock() - - rid, offset := c.realAddress(index.address) - item, err = c.regionManager.GetData( - index, rid, offset, uint64(index.sizeHint)*alignSize, - ) - if err != nil { - return item, cost, expire, false, err - } - if item == nil { - return item, cost, expire, false, nil - } - var entry BlockEntry - err = c.entrySerializer.Unmarshal(item.Data[:c.entrySize], &entry) - if err != nil { - return item, cost, expire, false, err - } - checksum := xxh3.Hash(item.Data[c.entrySize : c.entrySize+entry.keySize+entry.valueSize]) - if checksum != entry.checksum { - return item, cost, expire, false, errors.New("checksum mismatch") - } - - if entry.expire > 0 && entry.expire <= c.Clock.NowNano() { - return item, cost, expire, false, err - } - - if !bytes.Equal(key, item.Data[c.entrySize:c.entrySize+entry.keySize]) { - return item, cost, expire, false, err - } - offset = c.entrySize + entry.keySize - item.Data = item.Data[offset : offset+entry.valueSize] - return item, entry.cost, entry.expire, true, err - -} - -func (c *BlockCache) Insert(key []byte, value []byte, cost int64, expire int64) error { - kh := xxh3.Hash(key) - header := BlockEntry{ - keySize: uint64(len(key)), - valueSize: uint64(len(value)), - cost: cost, - expire: expire, - } - size := int(c.entrySize) + len(key) + len(value) - res := size % alignSize - if res != 0 { - size += (alignSize - res) - } - rid, offset, buffer, cb, err := c.regionManager.Allocate(size) - if err != nil { - return err - } - - // esacpe - _, err = buffer.Write(make([]byte, c.entrySize)) - if err != nil { - return err - } - _, err = buffer.Write(key) - if err != nil { - return err - } - _, err = buffer.Write(value) - if err != nil { - return err - } - b := buffer.Bytes() - header.checksum = xxh3.Hash(b[int(c.entrySize):]) - hb, err := c.entrySerializer.Marshal(header) - if err != nil { - return err - } - _ = copy(b[:], hb) - cb() - c.mu.Lock() - c.index[kh] = &BlockInfo{ - sizeHint: uint32(size / alignSize), - address: uint32((uint64(rid)*uint64(c.RegionSize) + offset) / alignSize), - } - c.mu.Unlock() - return nil -} - -func (c *BlockCache) removeRegion(data []byte, endOffset uint64) error { - offset := 0 - for offset < int(endOffset) { - var entry BlockEntry - err := c.entrySerializer.Unmarshal(data[offset:offset+int(c.entrySize)], &entry) - if err != nil { - return err - } - offset += int(c.entrySize) - checksum := xxh3.Hash(data[offset : offset+int(entry.keySize+entry.valueSize)]) - if checksum != entry.checksum { - return errors.New("checksum mismatch") - } - keyh := xxh3.Hash(data[offset : offset+int(entry.keySize)]) - c.mu.Lock() - i, ok := c.index[keyh] - if ok { - i.removed = true - delete(c.index, keyh) - } - c.mu.Unlock() - offset += int(entry.keySize + entry.valueSize) - offset = align(offset) - } - return nil -} - -func (c *BlockCache) Delete(key []byte) { - kh := xxh3.Hash(key) - c.mu.Lock() - delete(c.index, kh) - c.mu.Unlock() -} diff --git a/internal/nvm/blockcache_test.go b/internal/nvm/blockcache_test.go deleted file mode 100644 index d17e3a8..0000000 --- a/internal/nvm/blockcache_test.go +++ /dev/null @@ -1,120 +0,0 @@ -package nvm - -import ( - "bytes" - "encoding/binary" - "math/rand" - "os" - "strconv" - "sync" - "testing" - "time" - - "github.com/Yiling-J/theine-go/internal/alloc" - "github.com/Yiling-J/theine-go/internal/nvm/directio" - "github.com/Yiling-J/theine-go/internal/nvm/preallocate" - "github.com/stretchr/testify/require" -) - -func TestBlockCacheSimple(t *testing.T) { - bc := NewBlockCache(500<<10, 20<<10, 3, 0, alloc.NewAllocator(0, 20<<10, 4096), nil) - - f, err := os.OpenFile("bfoo", os.O_RDWR|os.O_CREATE, 0666) - require.Nil(t, err) - defer os.Remove("bfoo") - err = f.Truncate(int64(bc.CacheSize)) - require.Nil(t, err) - err = preallocate.Preallocate(f, int64(bc.CacheSize), true) - require.Nil(t, err) - bc.regionManager.file = f - - for i := 0; i < 10; i++ { - key := []byte(strconv.Itoa(i)) - value := make([]byte, 10<<10+i) - err := bc.Insert(key, value, 1, 0) - require.Nil(t, err) - } - for i := 0; i < 10; i++ { - key := []byte(strconv.Itoa(i)) - v, _, _, _, err := bc.Lookup(key) - require.Nil(t, err) - require.Equal(t, 10<<10+i, len(v.Data)) - } - - // expire test - key := []byte(strconv.Itoa(500)) - value := make([]byte, 10<<10+5) - err = bc.Insert(key, value, 1, bc.Clock.ExpireNano(10*time.Millisecond)) - require.Nil(t, err) - time.Sleep(30 * time.Millisecond) - _, _, _, ok, err := bc.Lookup(key) - require.Nil(t, err) - require.False(t, ok) -} - -type IntSerializer struct{} - -func (s *IntSerializer) Marshal(i int) ([]byte, error) { - buff := bytes.NewBuffer(make([]byte, 0)) - err := binary.Write(buff, binary.BigEndian, uint64(i)) - if err != nil { - return nil, err - } - return buff.Bytes(), nil -} - -func (s *IntSerializer) Unmarshal(raw []byte, v *int) error { - num := binary.BigEndian.Uint64(raw) - *v = int(num) - return nil -} - -func TestBlockCacheParallel(t *testing.T) { - bc := NewBlockCache(5000<<10, 50<<10, 3, 0, alloc.NewAllocator(0, 50<<10, 4096), nil) - - f, err := os.OpenFile("bfoo", os.O_RDWR|os.O_CREATE, 0666) - require.Nil(t, err) - err = f.Truncate(int64(bc.CacheSize)) - require.Nil(t, err) - err = preallocate.Preallocate(f, int64(bc.CacheSize), true) - require.Nil(t, err) - f.Close() - f, err = directio.OpenFile("bfoo", os.O_RDWR, 0666) - require.Nil(t, err) - defer os.Remove("bfoo") - bc.regionManager.file = f - - var wg sync.WaitGroup - for i := 1; i <= 20; i++ { - wg.Add(1) - go func() { - defer wg.Done() - s := &IntSerializer{} - for i := 0; i < 200; i++ { - n := rand.Intn(200) - key, err := s.Marshal(n) - require.Nil(t, err) - v, _, _, ok, err := bc.Lookup(key) - if err != nil { - panic(err) - } - if ok { - expected, err := s.Marshal(n) - require.Nil(t, err) - require.Equal(t, expected, v.Data[:8]) - } else { - base, err := s.Marshal(n) - require.Nil(t, err) - value := make([]byte, 10<<10) - copy(value, base) - err = bc.Insert(key, value, 1, 0) - if err != nil { - panic(err) - } - } - - } - }() - } - wg.Wait() -} diff --git a/internal/nvm/directio/.gitignore b/internal/nvm/directio/.gitignore deleted file mode 100644 index 5236e1e..0000000 --- a/internal/nvm/directio/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*~ - diff --git a/internal/nvm/directio/COPYING b/internal/nvm/directio/COPYING deleted file mode 100644 index 8c27c67..0000000 --- a/internal/nvm/directio/COPYING +++ /dev/null @@ -1,20 +0,0 @@ -Copyright (C) 2012 by Nick Craig-Wood http://www.craig-wood.com/nick/ - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - diff --git a/internal/nvm/directio/README.md b/internal/nvm/directio/README.md deleted file mode 100644 index c740b90..0000000 --- a/internal/nvm/directio/README.md +++ /dev/null @@ -1,58 +0,0 @@ -DirectIO -======== - -This is library for the Go language to enable use of Direct IO under -all supported OSes of Go (except openbsd and plan9). - -Direct IO does IO to and from disk without buffering data in the OS. -It is useful when you are reading or writing lots of data you don't -want to fill the OS cache up with. - -See here for package docs - - http://godoc.org/github.com/ncw/directio - -Install -------- - -Directio is a Go library and installs in the usual way - - go get github.com/ncw/directio - -Usage ------ - -Instead of using os.OpenFile use directio.OpenFile - - in, err := directio.OpenFile(file, os.O_RDONLY, 0666) - -And when reading or writing blocks, make sure you do them in chunks of -directio.BlockSize using memory allocated by directio.AlignedBlock - - block := directio.AlignedBlock(directio.BlockSize) - _, err := io.ReadFull(in, block) - -License -------- - -This is free software under the terms of MIT the license (check the -COPYING file included in this package). - -Contact and support -------------------- - -The project website is at: - -- https://github.com/ncw/directio - -There you can file bug reports, ask for help or contribute patches. - -Authors -------- - -- Nick Craig-Wood - -Contributors ------------- - -- Pavel Odintsov diff --git a/internal/nvm/directio/direct_io.go b/internal/nvm/directio/direct_io.go deleted file mode 100644 index 42ad06c..0000000 --- a/internal/nvm/directio/direct_io.go +++ /dev/null @@ -1,57 +0,0 @@ -// This is library for the Go language to enable use of Direct IO under -// all supported OSes of Go. -// -// Direct IO does IO to and from disk without buffering data in the OS. -// It is useful when you are reading or writing lots of data you don't -// want to fill the OS cache up with. -// -// Instead of using os.OpenFile use directio.OpenFile -// -// in, err := directio.OpenFile(file, os.O_RDONLY, 0666) -// -// And when reading or writing blocks, make sure you do them in chunks of -// directio.BlockSize using memory allocated by directio.AlignedBlock -// -// block := directio.AlignedBlock(BlockSize) -// _, err := io.ReadFull(in, block) -package directio - -import ( - "log" - "unsafe" -) - -// alignment returns alignment of the block in memory -// with reference to AlignSize -// -// Can't check alignment of a zero sized block as &block[0] is invalid -func alignment(block []byte, AlignSize int) int { - return int(uintptr(unsafe.Pointer(&block[0])) & uintptr(AlignSize-1)) -} - -// IsAligned checks wether passed byte slice is aligned -func IsAligned(block []byte) bool { - return alignment(block, AlignSize) == 0 -} - -// AlignedBlock returns []byte of size BlockSize aligned to a multiple -// of AlignSize in memory (must be power of two) -func AlignedBlock(BlockSize int) []byte { - block := make([]byte, BlockSize+AlignSize) - if AlignSize == 0 { - return block - } - a := alignment(block, AlignSize) - offset := 0 - if a != 0 { - offset = AlignSize - a - } - block = block[offset : offset+BlockSize] - // Can't check alignment of a zero sized block - if BlockSize != 0 { - if !IsAligned(block) { - log.Fatal("Failed to align block") - } - } - return block -} diff --git a/internal/nvm/directio/direct_io_darwin.go b/internal/nvm/directio/direct_io_darwin.go deleted file mode 100644 index dd5da54..0000000 --- a/internal/nvm/directio/direct_io_darwin.go +++ /dev/null @@ -1,36 +0,0 @@ -// Direct IO for darwin - -package directio - -import ( - "fmt" - "os" - "syscall" -) - -const ( - // OSX doesn't need any alignment - AlignSize = 0 - - // Minimum block size - BlockSize = 4096 -) - -func OpenFile(name string, flag int, perm os.FileMode) (file *os.File, err error) { - file, err = os.OpenFile(name, flag, perm) - if err != nil { - return - } - - // Set F_NOCACHE to avoid caching - // F_NOCACHE Turns data caching off/on. A non-zero value in arg turns data caching off. A value - // of zero in arg turns data caching on. - _, _, e1 := syscall.Syscall(syscall.SYS_FCNTL, uintptr(file.Fd()), syscall.F_NOCACHE, 1) - if e1 != 0 { - err = fmt.Errorf("Failed to set F_NOCACHE: %s", e1) - file.Close() - file = nil - } - - return -} diff --git a/internal/nvm/directio/direct_io_simple.go b/internal/nvm/directio/direct_io_simple.go deleted file mode 100644 index 6fef538..0000000 --- a/internal/nvm/directio/direct_io_simple.go +++ /dev/null @@ -1,20 +0,0 @@ -// +build solaris illumos plan9 openbsd - -package directio - -import ( - "os" -) - -const ( - // Size to align the buffer to - AlignSize = 4096 - - // Minimum block size - BlockSize = 4096 -) - -// OpenFile just call os.OpenFile with same params -func OpenFile(name string, flag int, perm os.FileMode) (file *os.File, err error) { - return os.OpenFile(name, flag, perm) -} diff --git a/internal/nvm/directio/direct_io_test.go b/internal/nvm/directio/direct_io_test.go deleted file mode 100644 index 338c1db..0000000 --- a/internal/nvm/directio/direct_io_test.go +++ /dev/null @@ -1,71 +0,0 @@ -package directio_test - -import ( - "bytes" - "io" - "os" - "testing" - - "github.com/Yiling-J/theine-go/internal/nvm/directio" -) - -func TestDirectIo(t *testing.T) { - // Make a temporary file name - fd, err := os.CreateTemp("", "direct_io_test") - if err != nil { - t.Fatal("Failed to make temp file", err) - } - path := fd.Name() - fd.Close() - - // starting block - block1 := directio.AlignedBlock(directio.BlockSize) - for i := 0; i < len(block1); i++ { - block1[i] = 'A' - } - - // Write the file - out, err := directio.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0666) - if err != nil { - t.Fatal("Failed to directio.OpenFile for read", err) - } - _, err = out.Write(block1) - if err != nil { - t.Fatal("Failed to write", err) - } - err = out.Close() - if err != nil { - t.Fatal("Failed to close writer", err) - } - - // Read the file - block2 := directio.AlignedBlock(directio.BlockSize) - in, err := directio.OpenFile(path, os.O_RDONLY, 0666) - if err != nil { - t.Fatal("Failed to directio.OpenFile for write", err) - } - _, err = io.ReadFull(in, block2) - if err != nil { - t.Fatal("Failed to read", err) - } - err = in.Close() - if err != nil { - t.Fatal("Failed to close reader", err) - } - - // Tidy - err = os.Remove(path) - if err != nil { - t.Fatal("Failed to remove temp file", path, err) - } - - // Compare - if !bytes.Equal(block1, block2) { - t.Fatal("Read not the same as written") - } -} - -func TestZeroSizedBlock(t *testing.T) { - // This should not panic! - directio.AlignedBlock(0) -} diff --git a/internal/nvm/directio/direct_io_unix.go b/internal/nvm/directio/direct_io_unix.go deleted file mode 100644 index 9b8981d..0000000 --- a/internal/nvm/directio/direct_io_unix.go +++ /dev/null @@ -1,23 +0,0 @@ -// Direct IO for Unix - -// +build !windows,!darwin,!openbsd,!plan9,!solaris,!illumos - -package directio - -import ( - "os" - "syscall" -) - -const ( - // Size to align the buffer to - AlignSize = 4096 - - // Minimum block size - BlockSize = 4096 -) - -// OpenFile is a modified version of os.OpenFile which sets O_DIRECT -func OpenFile(name string, flag int, perm os.FileMode) (file *os.File, err error) { - return os.OpenFile(name, syscall.O_DIRECT|flag, perm) -} diff --git a/internal/nvm/directio/direct_io_windows.go b/internal/nvm/directio/direct_io_windows.go deleted file mode 100644 index 49508b8..0000000 --- a/internal/nvm/directio/direct_io_windows.go +++ /dev/null @@ -1,92 +0,0 @@ -// Direct IO for windows - -package directio - -import ( - "os" - "syscall" - "unicode/utf16" -) - -const ( - // Size to align the buffer to - AlignSize = 4096 - - // Minimum block size - BlockSize = 4096 - - // Extra flags for windows - FILE_FLAG_NO_BUFFERING = 0x20000000 - FILE_FLAG_WRITE_THROUGH = 0x80000000 -) - -// utf16FromString returns the UTF-16 encoding of the UTF-8 string -// s, with a terminating NUL added. If s contains a NUL byte at any -// location, it returns (nil, EINVAL). -// -// FIXME copied from go source -func utf16FromString(s string) ([]uint16, error) { - for i := 0; i < len(s); i++ { - if s[i] == 0 { - return nil, syscall.EINVAL - } - } - return utf16.Encode([]rune(s + "\x00")), nil -} - -// OpenFile is a modified version of os.OpenFile which sets the -// passes the following flags to windows CreateFile. -// -// The FILE_FLAG_NO_BUFFERING takes this concept one step further and -// eliminates all read-ahead file buffering and disk caching as well, -// so that all reads are guaranteed to come from the file and not from -// any system buffer or disk cache. When using FILE_FLAG_NO_BUFFERING, -// disk reads and writes must be done on sector boundaries, and buffer -// addresses must be aligned on disk sector boundaries in memory. -// -// FIXME copied from go source then modified -func OpenFile(path string, mode int, perm os.FileMode) (file *os.File, err error) { - if len(path) == 0 { - return nil, &os.PathError{"open", path, syscall.ERROR_FILE_NOT_FOUND} - } - pathp, err := utf16FromString(path) - if err != nil { - return nil, &os.PathError{"open", path, err} - } - var access uint32 - switch mode & (os.O_RDONLY | os.O_WRONLY | os.O_RDWR) { - case os.O_RDONLY: - access = syscall.GENERIC_READ - case os.O_WRONLY: - access = syscall.GENERIC_WRITE - case os.O_RDWR: - access = syscall.GENERIC_READ | syscall.GENERIC_WRITE - } - if mode&syscall.O_CREAT != 0 { - access |= syscall.GENERIC_WRITE - } - if mode&os.O_APPEND != 0 { - access &^= syscall.GENERIC_WRITE - access |= syscall.FILE_APPEND_DATA - } - sharemode := uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE) - var sa *syscall.SecurityAttributes - var createmode uint32 - switch { - case mode&(syscall.O_CREAT|os.O_EXCL) == (syscall.O_CREAT | os.O_EXCL): - createmode = syscall.CREATE_NEW - case mode&(syscall.O_CREAT|os.O_TRUNC) == (syscall.O_CREAT | os.O_TRUNC): - createmode = syscall.CREATE_ALWAYS - case mode&syscall.O_CREAT == syscall.O_CREAT: - createmode = syscall.OPEN_ALWAYS - case mode&os.O_TRUNC == os.O_TRUNC: - createmode = syscall.TRUNCATE_EXISTING - default: - createmode = syscall.OPEN_EXISTING - } - h, e := syscall.CreateFile(&pathp[0], access, sharemode, sa, createmode, syscall.FILE_ATTRIBUTE_NORMAL|FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, 0) - if e != nil { - return nil, &os.PathError{"open", path, e} - } - return os.NewFile(uintptr(h), path), nil -} diff --git a/internal/nvm/nvm.go b/internal/nvm/nvm.go deleted file mode 100644 index 6129fa9..0000000 --- a/internal/nvm/nvm.go +++ /dev/null @@ -1,216 +0,0 @@ -package nvm - -import ( - "errors" - "os" - "unsafe" - - "github.com/Yiling-J/theine-go/internal" - "github.com/Yiling-J/theine-go/internal/alloc" - "github.com/Yiling-J/theine-go/internal/clock" - "github.com/Yiling-J/theine-go/internal/nvm/directio" - "github.com/Yiling-J/theine-go/internal/nvm/preallocate" -) - -type NvmStore[K comparable, V any] struct { - file *os.File - bighash *BigHash - blockcache *BlockCache - keySerializer internal.Serializer[K] - valueSerializer internal.Serializer[V] - errorHandler func(err error) - bigHashMaxEntrySize int - blockCacheMaxEntrySize int -} - -const ( - BigHashMetaBlock uint8 = iota - BigHashMetaBucketBlock - BlockCacheMetaBlock - BlockCacheMetaIndexBlock - DataBlock -) - -func alignDown(num int, alignment int) int { - return num - num%alignment -} - -func alignUp(num int, alignment int) int { - return alignDown(num+alignment-1, alignment) -} - -func NewNvmStore[K comparable, V any]( - file string, blockSize int, cacheSize int, bucketSize int, regionSize int, - cleanRegionSize int, sizePct uint8, bigHashMaxEntrySize int, bfSize int, errorHandler func(err error), - keySerializer internal.Serializer[K], - valueSerializer internal.Serializer[V], -) (*NvmStore[K, V], error) { - if sizePct > 100 { - return nil, errors.New("sizePct larger than 100") - } - - f, err := os.OpenFile(file, os.O_RDWR|os.O_CREATE, 0666) - if err != nil { - return nil, err - } - fs, err := f.Stat() - if err != nil { - return nil, err - } - allocSize := int64(cacheSize) - fs.Size() - if allocSize < 0 { - err = f.Truncate(int64(cacheSize)) - if err != nil { - return nil, err - } - } - if allocSize > 0 { - err = preallocate.Preallocate(f, allocSize, false) - if err != nil { - return nil, err - } - err = f.Truncate(int64(cacheSize)) - if err != nil { - return nil, err - } - } - err = f.Close() - if err != nil { - return nil, err - } - f, err = directio.OpenFile(file, os.O_RDWR, 0666) - if err != nil { - return nil, err - } - store := &NvmStore[K, V]{ - file: f, keySerializer: keySerializer, valueSerializer: valueSerializer, - errorHandler: errorHandler, - } - if store.errorHandler == nil { - store.errorHandler = func(err error) {} - } - bhSize := cacheSize * int(sizePct) / 100 - bcSize := cacheSize - bhSize - bucketSize = alignUp(bucketSize, blockSize) - regionSize = alignUp(regionSize, blockSize) - allocator := alloc.NewAllocator(bucketSize, regionSize, blockSize) - if bhSize > 0 { - bhSize = alignDown(bhSize, blockSize) - bh := NewBigHash(uint64(bhSize), uint64(bucketSize), uint32(bfSize), allocator) - bh.file = f - store.bighash = bh - } - if bcSize > 0 { - bcSize = alignDown(bcSize, blockSize) - bc := NewBlockCache( - bcSize, regionSize, uint32(cleanRegionSize), uint64(bhSize), allocator, errorHandler, - ) - bc.regionManager.file = f - store.blockcache = bc - } - - max := bucketSize - int(unsafe.Sizeof(BucketHeader{})) + int(unsafe.Sizeof(BucketEntry{})) - if bigHashMaxEntrySize > 0 { - if bigHashMaxEntrySize >= max { - return nil, errors.New("bigHashMaxEntrySize too large") - } - store.bigHashMaxEntrySize = bigHashMaxEntrySize - } else { - store.bigHashMaxEntrySize = max - } - store.blockCacheMaxEntrySize = align(regionSize) - return store, nil -} - -func (n *NvmStore[K, V]) SetClock(clock *clock.Clock) { - if n.bighash != nil { - n.bighash.Clock = clock - } - if n.blockcache != nil { - n.blockcache.Clock = clock - } - -} - -func (n *NvmStore[K, V]) Get(key K) (value V, cost int64, expire int64, ok bool, err error) { - kb, err := n.keySerializer.Marshal(key) - if err != nil { - return value, cost, expire, false, err - } - alloc, cost, expire, ok, err := n.get(kb) - if alloc != nil { - defer alloc.Deallocate() - } - if err != nil { - return value, cost, expire, false, err - } - if !ok { - return value, cost, expire, false, &internal.NotFound{} - } - err = n.valueSerializer.Unmarshal(alloc.Data, &value) - if err != nil { - return value, cost, expire, false, err - } - return value, cost, expire, true, nil -} - -func (n *NvmStore[K, V]) get(key []byte) (item *alloc.AllocItem, cost int64, expire int64, ok bool, err error) { - if n.bighash != nil { - item, cost, expire, ok, err = n.bighash.Lookup(key) - if err != nil || ok { - return - } - } - if n.blockcache != nil { - return n.blockcache.Lookup(key) - } - return item, cost, expire, ok, err -} - -func (n *NvmStore[K, V]) Set(key K, value V, cost int64, expire int64) error { - kb, err := n.keySerializer.Marshal(key) - if err != nil { - return err - } - vb, err := n.valueSerializer.Marshal(value) - if err != nil { - return err - } - return n.set(kb, vb, cost, expire) -} - -func (n *NvmStore[K, V]) set(key []byte, value []byte, cost int64, expire int64) error { - if n.bighash != nil && len(key)+len(value) <= n.bigHashMaxEntrySize { - return n.bighash.Insert(key, value, cost, expire) - } - if n.blockcache != nil && len(key)+len(value) <= n.blockCacheMaxEntrySize { - return n.blockcache.Insert(key, value, cost, expire) - } - return nil -} - -func (n *NvmStore[K, V]) Delete(key K) error { - kb, err := n.keySerializer.Marshal(key) - if err != nil { - return err - } - return n.delete(kb) -} - -func (n *NvmStore[K, V]) delete(key []byte) error { - if n.bighash != nil { - err := n.bighash.Delete(key) - if err != nil { - return err - } - } - if n.blockcache != nil { - n.blockcache.Delete(key) - return nil - } - return nil -} - -func (n *NvmStore[K, V]) HandleAsyncError(err error) { - n.errorHandler(err) -} diff --git a/internal/nvm/nvm_test.go b/internal/nvm/nvm_test.go deleted file mode 100644 index ccd3673..0000000 --- a/internal/nvm/nvm_test.go +++ /dev/null @@ -1,131 +0,0 @@ -package nvm - -import ( - "bytes" - "encoding/binary" - "errors" - "os" - "testing" - - "github.com/stretchr/testify/require" -) - -func TestNvmSetup(t *testing.T) { - // block size is 16k - // bucket size will align to 16k - // bighash raw size is 25357910, align to 25346048 - // block cache raw size is 101431640, align to 101416960 - store, err := NewNvmStore[int, int]( - "bfoo", 16<<10, 126789550, 4<<10, 4<<20, 3, 20, 500, 8, func(err error) {}, - nil, nil, - ) - require.Nil(t, err) - defer os.Remove("bfoo") - require.Equal(t, 16<<10, int(store.bighash.BucketSize)) - require.Equal(t, 25346048, int(store.bighash.CacheSize)) - require.Equal(t, 1547, int(store.bighash.numBuckets)) - - require.Equal(t, 101416960, int(store.blockcache.CacheSize)) - require.Equal(t, 4<<20, int(store.blockcache.RegionSize)) - require.Equal(t, 24, int(store.blockcache.regionManager.regionCount)) - require.Equal(t, 500, store.bigHashMaxEntrySize) - require.Equal(t, 64, int(store.bighash.buckets[0].Bloomfilter.M)) - - // no bighash - store, err = NewNvmStore[int, int]( - "bfoo", 16<<10, 126789550, 4<<10, 4<<20, 3, 0, 0, 8, func(err error) {}, - nil, nil, - ) - require.Nil(t, err) - require.Nil(t, store.bighash) - require.Equal(t, 126779392, int(store.blockcache.CacheSize)) - - // no block cache - store, err = NewNvmStore[int, int]( - "bfoo", 16<<10, 126789550, 4<<10, 4<<20, 3, 100, 0, 8, func(err error) {}, - nil, nil, - ) - require.Nil(t, err) - require.Nil(t, store.blockcache) - require.Equal(t, 126779392, int(store.bighash.CacheSize)) - - // tool large bighahsh max size - _, err = NewNvmStore[int, int]( - "bfoo", 16<<10, 126789550, 4<<10, 4<<20, 3, 20, 10<<50, 8, func(err error) {}, - nil, nil, - ) - require.NotNil(t, err) -} - -type ByteSerializer struct{} - -func (s *ByteSerializer) Marshal(i []byte) ([]byte, error) { - return i, nil -} - -func (s *ByteSerializer) Unmarshal(raw []byte, v *[]byte) error { - *v = make([]byte, len(raw)) - copy(*v, raw) - return nil -} - -func TestNvmResize(t *testing.T) { - defer os.Remove("bfoo") - for _, size := range []int{30 << 20, 100 << 20, 50 << 20} { - store, err := NewNvmStore[int, []byte]( - "bfoo", 512, size, 4<<10, 100<<10, 3, 20, 0, 8, func(err error) { - require.Nil(t, err) - }, - &IntSerializer{}, &ByteSerializer{}, - ) - require.Nil(t, err) - - // insert to soc - for i := 0; i < 5000; i++ { - err = store.Set(i, make([]byte, 1<<10), 1, 0) - require.Nil(t, err) - } - // insert to loc - for i := 0; i < 5000; i++ { - err = store.Set(i, make([]byte, 20<<10), 1, 0) - require.Nil(t, err) - } - - } - -} - -type IntSerializerE struct{} - -func (s *IntSerializerE) Marshal(i int) ([]byte, error) { - buff := bytes.NewBuffer(make([]byte, 0)) - err := binary.Write(buff, binary.BigEndian, uint64(i)) - if err != nil { - return nil, err - } - return buff.Bytes(), errors.New("e") -} - -func (s *IntSerializerE) Unmarshal(raw []byte, v *int) error { - num := binary.BigEndian.Uint64(raw) - *v = int(num) - return errors.New("e") -} - -func TestNvmSerializerError(t *testing.T) { - defer os.Remove("bfoo") - errCount := 0 - store, err := NewNvmStore[int, int]( - "bfoo", 512, 1000<<10, 4<<10, 5<<10, 3, 20, 0, 8, func(err error) { - errCount += 1 - }, - &IntSerializerE{}, &IntSerializerE{}, - ) - require.Nil(t, err) - - err = store.Set(1, 1, 1, 0) - require.NotNil(t, err) - - _, _, _, _, err = store.Get(1) - require.NotNil(t, err) -} diff --git a/internal/nvm/persist_test.go b/internal/nvm/persist_test.go deleted file mode 100644 index 5e4fcd1..0000000 --- a/internal/nvm/persist_test.go +++ /dev/null @@ -1 +0,0 @@ -package nvm diff --git a/internal/nvm/preallocate/preallocate.go b/internal/nvm/preallocate/preallocate.go deleted file mode 100644 index ce39adf..0000000 --- a/internal/nvm/preallocate/preallocate.go +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package preallocate - -import ( - "io" - "os" -) - -// Preallocate tries to allocate the space for given -// file. This operation is only supported on linux by a -// few filesystems (btrfs, ext4, etc.). -// If the operation is unsupported, no error will be returned. -// Otherwise, the error encountered will be returned. -func Preallocate(f *os.File, sizeInBytes int64, extendFile bool) error { - if sizeInBytes == 0 { - // fallocate will return EINVAL if length is 0; skip - return nil - } - if extendFile { - return preallocExtend(f, sizeInBytes) - } - return preallocFixed(f, sizeInBytes) -} - -func preallocExtendTrunc(f *os.File, sizeInBytes int64) error { - curOff, err := f.Seek(0, io.SeekCurrent) - if err != nil { - return err - } - size, err := f.Seek(sizeInBytes, io.SeekEnd) - if err != nil { - return err - } - if _, err = f.Seek(curOff, io.SeekStart); err != nil { - return err - } - if sizeInBytes > size { - return nil - } - return f.Truncate(sizeInBytes) -} diff --git a/internal/nvm/preallocate/preallocate_darwin.go b/internal/nvm/preallocate/preallocate_darwin.go deleted file mode 100644 index 5158d15..0000000 --- a/internal/nvm/preallocate/preallocate_darwin.go +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package preallocate - -import ( - "os" - - "golang.org/x/sys/unix" -) - -func preallocExtend(f *os.File, sizeInBytes int64) error { - if err := preallocFixed(f, sizeInBytes); err != nil { - return err - } - return preallocExtendTrunc(f, sizeInBytes) -} - -func preallocFixed(f *os.File, sizeInBytes int64) error { - fstore := &unix.Fstore_t{ - Flags: unix.F_ALLOCATECONTIG, - Posmode: unix.F_PEOFPOSMODE, - Length: sizeInBytes, - } - err := unix.FcntlFstore(f.Fd(), unix.F_PREALLOCATE, fstore) - if err != nil { - fstore.Flags = unix.F_ALLOCATEALL - err = unix.FcntlFstore(f.Fd(), unix.F_PREALLOCATE, fstore) - } - if err == nil || err == unix.ENOTSUP { - - return nil - } - return err -} diff --git a/internal/nvm/preallocate/preallocate_linux.go b/internal/nvm/preallocate/preallocate_linux.go deleted file mode 100644 index 318de4b..0000000 --- a/internal/nvm/preallocate/preallocate_linux.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package preallocate - -import ( - "os" - "syscall" -) - - -func preallocExtend(f *os.File, sizeInBytes int64) error { - // use mode = 0 to change size - err := syscall.Fallocate(int(f.Fd()), 0, 0, sizeInBytes) - if err != nil { - errno, ok := err.(syscall.Errno) - // not supported; fallback - // fallocate EINTRs frequently in some environments; fallback - if ok && (errno == syscall.ENOTSUP || errno == syscall.EINTR) { - return preallocExtendTrunc(f, sizeInBytes) - } - } - return err -} - -func preallocFixed(f *os.File, sizeInBytes int64) error { - // use mode = 1 to keep size; see FALLOC_FL_KEEP_SIZE - err := syscall.Fallocate(int(f.Fd()), 1, 0, sizeInBytes) - if err != nil { - errno, ok := err.(syscall.Errno) - // treat not supported as nil error - if ok && errno == syscall.ENOTSUP { - return nil - } - } - return err -} diff --git a/internal/nvm/preallocate/preallocate_other.go b/internal/nvm/preallocate/preallocate_other.go deleted file mode 100644 index 04b89cf..0000000 --- a/internal/nvm/preallocate/preallocate_other.go +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !linux && !darwin -// +build !linux,!darwin - -package preallocate - -import "os" - -func preallocExtend(f *os.File, sizeInBytes int64) error { - return preallocExtendTrunc(f, sizeInBytes) -} - -func preallocFixed(f *os.File, sizeInBytes int64) error { return nil } diff --git a/internal/nvm/preallocate/preallocate_test.go b/internal/nvm/preallocate/preallocate_test.go deleted file mode 100644 index 3be0869..0000000 --- a/internal/nvm/preallocate/preallocate_test.go +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package preallocate - -import ( - "os" - "testing" -) - -func TestPreallocateExtend(t *testing.T) { - pf := func(f *os.File, sz int64) error { return Preallocate(f, sz, true) } - tf := func(t *testing.T, f *os.File) { testPreallocateExtend(t, f, pf) } - runPreallocTest(t, tf) -} - -func TestPreallocateExtendTrunc(t *testing.T) { - tf := func(t *testing.T, f *os.File) { testPreallocateExtend(t, f, preallocExtendTrunc) } - runPreallocTest(t, tf) -} - -func testPreallocateExtend(t *testing.T, f *os.File, pf func(*os.File, int64) error) { - size := int64(64 * 1000) - if err := pf(f, size); err != nil { - t.Fatal(err) - } - - stat, err := f.Stat() - if err != nil { - t.Fatal(err) - } - if stat.Size() != size { - t.Errorf("size = %d, want %d", stat.Size(), size) - } -} - -func TestPreallocateFixed(t *testing.T) { runPreallocTest(t, testPreallocateFixed) } -func testPreallocateFixed(t *testing.T, f *os.File) { - size := int64(64 * 1000) - if err := Preallocate(f, size, false); err != nil { - t.Fatal(err) - } - - stat, err := f.Stat() - if err != nil { - t.Fatal(err) - } - if stat.Size() != 0 { - t.Errorf("size = %d, want %d", stat.Size(), 0) - } -} - -func runPreallocTest(t *testing.T, test func(*testing.T, *os.File)) { - p := t.TempDir() - - f, err := os.CreateTemp(p, "") - if err != nil { - t.Fatal(err) - } - test(t, f) -} diff --git a/internal/nvm/region_manager.go b/internal/nvm/region_manager.go deleted file mode 100644 index 0d19ba2..0000000 --- a/internal/nvm/region_manager.go +++ /dev/null @@ -1,233 +0,0 @@ -package nvm - -import ( - "bytes" - "math/rand" - "os" - "sync" - "sync/atomic" - - "github.com/Yiling-J/theine-go/internal" - "github.com/Yiling-J/theine-go/internal/alloc" - "github.com/Yiling-J/theine-go/internal/nvm/directio" -) - -type Region struct { - EndOffset uint64 - buffer *bytes.Buffer - clean bool - lock *sync.RWMutex -} - -type RegionManager struct { - file *os.File - offset uint64 - mu sync.RWMutex - sketchMu sync.RWMutex - active uint32 - regionSize uint32 - regionCount uint32 - cleanRegionSize uint32 - cleanRegionCount atomic.Int32 - cleanRegionDone atomic.Bool - regions map[uint32]*Region - sketch *internal.CountMinSketch - bufferPool sync.Pool - removeRegion func(data []byte, endOffset uint64) error - readChan chan uint32 - allocator *alloc.Allocator - flushChan chan uint32 - cleanChan chan uint32 - errorHandler func(err error) -} - -func NewRegionManager(offset uint64, regionSize, regionCount, cleanRegionSize uint32, removeFunc func(data []byte, endOffset uint64) error, allocator *alloc.Allocator, errHandler func(err error)) *RegionManager { - rm := &RegionManager{ - offset: offset, - regionSize: regionSize, - regionCount: regionCount, - cleanRegionSize: cleanRegionSize, - bufferPool: sync.Pool{New: func() any { - return bytes.NewBuffer(directio.AlignedBlock(int(regionSize))) - }}, - readChan: make(chan uint32, 128), - removeRegion: removeFunc, - regions: make(map[uint32]*Region, regionCount), - sketch: internal.NewCountMinSketch(), - allocator: allocator, - flushChan: make(chan uint32, 3), - cleanChan: make(chan uint32, 3), - errorHandler: errHandler, - } - for i := 0; i < int(regionCount); i++ { - // region 0 is the first active region, so clean should be false - rm.regions[uint32(i)] = &Region{EndOffset: 0, clean: i != 0, lock: &sync.RWMutex{}} - } - rm.cleanRegionCount.Store(int32(regionCount)) - rm.attachBuffer(rm.regions[0]) - rm.sketch.EnsureCapacity(uint(regionSize)) - go rm.readQ() - for i := 0; i < int(cleanRegionSize); i++ { - go rm.flushAndClean() - } - return rm -} - -func (m *RegionManager) GetData(index *BlockInfo, rid uint64, offset uint64, size uint64) (*alloc.AllocItem, error) { - region := m.regions[uint32(rid)] - region.lock.RLock() - if index.removed { - region.lock.RUnlock() - return nil, nil - } - item := m.allocator.Allocate(int(size)) - if region.buffer != nil { - _ = copy(item.Data, region.buffer.Bytes()[offset:offset+size]) - } else { - _, err := m.file.ReadAt(item.Data, int64(m.offset+rid*uint64(m.regionSize)+offset)) - if err != nil { - region.lock.RUnlock() - return item, err - } - } - region.lock.RUnlock() - m.readChan <- uint32(rid) - return item, nil -} - -func (m *RegionManager) Allocate(size int) (uint32, uint64, *bytes.Buffer, func(), error) { - m.mu.Lock() - region := m.regions[m.active] - allocatedRegion := m.active - if m.regionSize-uint32(region.EndOffset) < uint32(size) { - full := m.active - m.flushChan <- full - clean := <-m.cleanChan - m.active = clean - allocatedRegion = clean - region = m.regions[m.active] - // reset offset and remove clean mark - region.EndOffset = 0 - region.clean = false - m.attachBuffer(region) - } - offset := region.EndOffset - b := region.buffer.Bytes()[region.EndOffset : region.EndOffset+uint64(size)] - region.EndOffset = region.EndOffset + uint64(size) - buffer := bytes.NewBuffer(b) - buffer.Reset() - region.lock.Lock() - m.mu.Unlock() - callback := func() { - region.lock.Unlock() - } - return allocatedRegion, offset, buffer, callback, nil -} - -// reclaim should have rlock from caller -func (m *RegionManager) reclaim() (uint32, error) { - victim := m.victim() - buffer := m.bufferPool.Get().(*bytes.Buffer) - buffer.Reset() - data := buffer.Bytes()[:m.regionSize] - _, err := m.file.ReadAt(data, int64(m.offset+uint64(victim)*uint64(m.regionSize))) - if err != nil { - return victim, err - } - region := m.regions[victim] - region.lock.Lock() - err = m.removeRegion(data, region.EndOffset) - m.bufferPool.Put(buffer) - region.clean = true - region.lock.Unlock() - return victim, err -} - -func (m *RegionManager) flushSync(rid uint32) error { - region := m.regions[rid] - region.lock.Lock() - defer region.lock.Unlock() - b := region.buffer.Bytes()[:m.regionSize] - _, err := m.file.WriteAt(b, int64(m.offset+uint64(rid)*uint64(m.regionSize))) - if err != nil { - return err - } - m.detachBuffer(region) - return nil -} - -// flush and clean always come together -// because flush means current buffer is full and need a new clean buffer -func (m *RegionManager) flushAndClean() { - for rid := range m.flushChan { - err := m.flushSync(rid) - if err != nil { - m.errorHandler(err) - continue - } - var clean uint32 - if !m.cleanRegionDone.Load() { - new := m.cleanRegionCount.Add(-1) - if new >= int32(m.cleanRegionSize) { - clean = uint32(new) - m.cleanChan <- clean - continue - } else { - m.cleanRegionDone.Store(true) - } - } - clean, err = m.reclaim() - if err != nil { - m.errorHandler(err) - continue - } - m.cleanChan <- clean - } -} - -func (m *RegionManager) attachBuffer(region *Region) { - region.buffer = m.bufferPool.Get().(*bytes.Buffer) - region.buffer.Reset() -} -func (m *RegionManager) detachBuffer(region *Region) { - buffer := region.buffer - region.buffer = nil - m.bufferPool.Put(buffer) -} - -func (m *RegionManager) victim() uint32 { - counter := 0 - var new uint32 - var fq uint - m.sketchMu.RLock() - for { - rid := uint32(rand.Intn(int(m.regionCount))) - // skip if already clean or buffer not nil - rg := m.regions[rid] - rg.lock.RLock() - if rg.clean || rg.buffer != nil { - rg.lock.RUnlock() - continue - } - rg.lock.RUnlock() - fqn := m.sketch.Estimate(spread(uint64(rid))) - if new == 0 || fqn < fq { - fq = fqn - new = rid - } - counter += 1 - if counter == 5 { - break - } - } - m.sketchMu.RUnlock() - return new -} - -func (m *RegionManager) readQ() { - for rid := range m.readChan { - m.sketchMu.Lock() - m.sketch.Add(spread(uint64(rid))) - m.sketchMu.Unlock() - } -} diff --git a/internal/nvm/serializers/memory.go b/internal/nvm/serializers/memory.go deleted file mode 100644 index db92177..0000000 --- a/internal/nvm/serializers/memory.go +++ /dev/null @@ -1,44 +0,0 @@ -package serializers - -import "unsafe" - -type MemorySerializer[V any] struct { - Size int - Str bool -} - -func NewMemorySerializer[V any]() *MemorySerializer[V] { - var v V - serializer := &MemorySerializer[V]{Size: int(unsafe.Sizeof(v))} - switch ((interface{})(v)).(type) { - case string: - serializer.Str = true - default: - serializer.Size = int(unsafe.Sizeof(v)) - } - return serializer -} - -func (s *MemorySerializer[V]) Marshal(v V) ([]byte, error) { - if s.Str { - return []byte(*(*string)(unsafe.Pointer(&v))), nil - } - return *(*[]byte)(unsafe.Pointer(&struct { - data unsafe.Pointer - len int - }{unsafe.Pointer(&v), s.Size})), nil -} - -func (s *MemorySerializer[V]) Unmarshal(raw []byte, v *V) error { - if s.Str { - s := string(raw) - *v = *(*V)(unsafe.Pointer(&s)) - return nil - } - m := *(*struct { - data unsafe.Pointer - len int - })(unsafe.Pointer(&raw)) - *v = *(*V)(m.data) - return nil -} diff --git a/internal/nvm/serializers/memory_test.go b/internal/nvm/serializers/memory_test.go deleted file mode 100644 index 373f8da..0000000 --- a/internal/nvm/serializers/memory_test.go +++ /dev/null @@ -1,33 +0,0 @@ -package serializers - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func TestMemorySerializer(t *testing.T) { - s := NewMemorySerializer[int]() - b, err := s.Marshal(12) - require.Nil(t, err) - var q int - err = s.Unmarshal(b, &q) - require.Nil(t, err) - require.Equal(t, 12, q) - - s1 := NewMemorySerializer[string]() - b, err = s1.Marshal("foo") - require.Nil(t, err) - var q1 string - err = s1.Unmarshal(b, &q1) - require.Nil(t, err) - require.Equal(t, "foo", q1) - - s3 := NewMemorySerializer[[]byte]() - b, err = s3.Marshal([]byte{1, 3, 4, 1}) - require.Nil(t, err) - var q3 []byte - err = s3.Unmarshal(b, &q3) - require.Nil(t, err) - require.Equal(t, []byte{1, 3, 4, 1}, q3) -} diff --git a/internal/nvm/serializers/serializers.go b/internal/nvm/serializers/serializers.go deleted file mode 100644 index c6c47cc..0000000 --- a/internal/nvm/serializers/serializers.go +++ /dev/null @@ -1,6 +0,0 @@ -package serializers - -type Serializer[T any] interface { - Marshal(v T) ([]byte, error) - Unmarshal(raw []byte, v *T) error -} diff --git a/internal/persistence_test.go b/internal/persistence_test.go index 2f4e5ca..897b7b6 100644 --- a/internal/persistence_test.go +++ b/internal/persistence_test.go @@ -10,7 +10,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestStorePersistence(t *testing.T) { +func TestStorePersistence_Simple(t *testing.T) { store := NewStore[int, int](1000, false, nil, nil, nil, 0, 0, nil) for _, q := range store.queue.qs { q.size = 0 @@ -48,9 +48,10 @@ func TestStorePersistence(t *testing.T) { key: i, value: i, } - entry.frequency.Store(int32(i)) - entry.cost = 1 + store.policy.sketch.Addn(store.hasher.hash(entry.key), 10) + entry.weight.Store(1) store.shards[0].mu.Lock() + entry.queueIndex.Store(-2) store.setEntry(123, store.shards[0], 1, entry, false) _, index := store.index(i) store.shards[index].mu.Lock() @@ -108,7 +109,7 @@ func TestStorePersistence(t *testing.T) { } -func TestStorePersistenceTTL(t *testing.T) { +func TestStorePersistence_TTL(t *testing.T) { store := NewStore[int, int](1000, false, nil, nil, nil, 0, 0, nil) for i := 0; i < 10; i++ { _ = store.Set(i, i, 1, 2*time.Second) @@ -157,7 +158,7 @@ func TestStorePersistenceTTL(t *testing.T) { } } -func TestStorePersistenceResize(t *testing.T) { +func TestStorePersistence_Resize(t *testing.T) { store := NewStore[int, int](1000, false, nil, nil, nil, 0, 0, nil) for i := 0; i < 1000; i++ { _ = store.Set(i, i, 1, 0) diff --git a/internal/policy_bench_test.go b/internal/policy_bench_test.go index 24870d5..4b31193 100644 --- a/internal/policy_bench_test.go +++ b/internal/policy_bench_test.go @@ -15,14 +15,15 @@ func BenchmarkPolicy_Read(b *testing.B) { ritems := []ReadBufItem[uint64, bool]{} for i := 0; i < 100000; i++ { k := z.Uint64() + e := &Entry[uint64, bool]{ + key: k, + value: true, + } + e.weight.Store(1) witems = append(witems, WriteBufItem[uint64, bool]{ - entry: &Entry[uint64, bool]{ - key: k, - value: true, - cost: 1, - }, - cost: 1, - code: NEW, + entry: e, + costChange: 0, + code: NEW, }) } for _, wi := range witems { @@ -46,14 +47,16 @@ func BenchmarkPolicy_Write(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { + e := &Entry[uint64, bool]{ + key: uint64(i), + value: true, + } + e.weight.Store(1) + e.policyWeight = 1 store.sinkWrite(WriteBufItem[uint64, bool]{ - entry: &Entry[uint64, bool]{ - key: uint64(i), - value: true, - cost: 1, - }, - cost: 1, - code: NEW, + entry: e, + costChange: 0, + code: NEW, }) } diff --git a/internal/policy_flag.go b/internal/policy_flag.go index 6590dd3..6436ad4 100644 --- a/internal/policy_flag.go +++ b/internal/policy_flag.go @@ -6,68 +6,81 @@ package internal // Bit 1: Indicates if this entry is a root of linked list. // Bit 2: Indicates if this entry is on probation. // Bit 3: Indicates if this entry is protected. -// Bit 4: Indicates if this entry is removed. +// Bit 4: Indicates if this entry is removed from main(SLRU). // Bit 5: Indicates if this entry is from NVM. +// Bit 6: Indicates if this entry is deleted by API. type Flag struct { - flags int8 + Flags int8 } func (f *Flag) SetRoot(isRoot bool) { if isRoot { - f.flags |= (1 << 0) // Set bit 1 (root) + f.Flags |= (1 << 0) // Set bit 1 (root) } else { - f.flags &^= (1 << 0) // Clear bit 1 (root) + f.Flags &^= (1 << 0) // Clear bit 1 (root) } } func (f *Flag) SetProbation(isProbation bool) { if isProbation { - f.flags |= (1 << 1) // Set bit 2 (probation) + f.Flags |= (1 << 1) // Set bit 2 (probation) } else { - f.flags &^= (1 << 1) // Clear bit 2 (probation) + f.Flags &^= (1 << 1) // Clear bit 2 (probation) } } func (f *Flag) SetProtected(isProtected bool) { if isProtected { - f.flags |= (1 << 2) // Set bit 3 (protected) + f.Flags |= (1 << 2) // Set bit 3 (protected) } else { - f.flags &^= (1 << 2) // Clear bit 3 (protected) + f.Flags &^= (1 << 2) // Clear bit 3 (protected) } } func (f *Flag) SetRemoved(isRemoved bool) { if isRemoved { - f.flags |= (1 << 3) // Set bit 4 (removed) + f.Flags |= (1 << 3) // Set bit 4 (removed) } else { - f.flags &^= (1 << 3) // Clear bit 4 (removed) + f.Flags &^= (1 << 3) // Clear bit 4 (removed) } } func (f *Flag) SetFromNVM(isFromNVM bool) { if isFromNVM { - f.flags |= (1 << 4) // Set bit 5 (from NVM) + f.Flags |= (1 << 4) // Set bit 5 (from NVM) } else { - f.flags &^= (1 << 4) // Clear bit 5 (from NVM) + f.Flags &^= (1 << 4) // Clear bit 5 (from NVM) + } +} + +func (f *Flag) SetDeleted(isDeleted bool) { + if isDeleted { + f.Flags |= (1 << 5) // Set bit 6 (deleted) + } else { + f.Flags &^= (1 << 5) // Clear bit 6 (deleted) } } func (f *Flag) IsRoot() bool { - return (f.flags & (1 << 0)) != 0 + return (f.Flags & (1 << 0)) != 0 } func (f *Flag) IsProbation() bool { - return (f.flags & (1 << 1)) != 0 + return (f.Flags & (1 << 1)) != 0 } func (f *Flag) IsProtected() bool { - return (f.flags & (1 << 2)) != 0 + return (f.Flags & (1 << 2)) != 0 } func (f *Flag) IsRemoved() bool { - return (f.flags & (1 << 3)) != 0 + return (f.Flags & (1 << 3)) != 0 } func (f *Flag) IsFromNVM() bool { - return (f.flags & (1 << 4)) != 0 + return (f.Flags & (1 << 4)) != 0 +} + +func (f *Flag) IsDeleted() bool { + return (f.Flags & (1 << 5)) != 0 } diff --git a/internal/policy_flag_test.go b/internal/policy_flag_test.go index 1988925..7221a80 100644 --- a/internal/policy_flag_test.go +++ b/internal/policy_flag_test.go @@ -87,7 +87,7 @@ func TestFlag_CombinedFlags(t *testing.T) { } // reset - f.flags = 0 + f.Flags = 0 if f.IsRoot() { t.Error("Expected root flag to be false, got true") diff --git a/internal/queue.go b/internal/queue.go index da2a2dd..19f1017 100644 --- a/internal/queue.go +++ b/internal/queue.go @@ -12,18 +12,32 @@ type StripedQueue[K comparable, V any] struct { thresholdLoad func() int32 sendCallback func(item QueueItem[K, V]) removeCallback func(item QueueItem[K, V]) + sketch *CountMinSketch + qpool *sync.Pool } -func NewStripedQueue[K comparable, V any](queueCount int, queueSize int, thresholdLoad func() int32) *StripedQueue[K, V] { +func NewStripedQueue[K comparable, V any](queueCount int, queueSize int, sketch *CountMinSketch, thresholdLoad func() int32) *StripedQueue[K, V] { sq := &StripedQueue[K, V]{ qs: make([]*Queue[K, V], 0), count: queueCount, thresholdLoad: thresholdLoad, + sketch: sketch, + qpool: &sync.Pool{ + New: func() any { + return &qslice[K, V]{ + send: make([]QueueItem[K, V], 0, 2), + removed: make([]QueueItem[K, V], 0, 2), + } + }, + }, } for i := 0; i < queueCount; i++ { sq.qs = append(sq.qs, &Queue[K, V]{ - deque: deque.New[QueueItem[K, V]](8), - size: queueSize, + deque: deque.New[QueueItem[K, V]](8), + size: queueSize, + index: int32(i), + sketch: sketch, + qpool: sq.qpool, }) } return sq @@ -34,73 +48,135 @@ func (s *StripedQueue[K, V]) Push(hash uint64, entry *Entry[K, V], cost int64, f q.push(hash, entry, cost, fromNVM, s.thresholdLoad(), s.sendCallback, s.removeCallback) } -func (s *StripedQueue[K, V]) UpdateCost(hash uint64, entry *Entry[K, V], cost int64) bool { +func (s *StripedQueue[K, V]) PushSimple(hash uint64, entry *Entry[K, V]) { q := s.qs[hash&uint64(s.count-1)] + q.len += int(entry.policyWeight) + q.deque.PushFront(QueueItem[K, V]{entry: entry, fromNVM: entry.flag.IsFromNVM(), hash: hash}) +} + +func (s *StripedQueue[K, V]) UpdateCost(key K, hash uint64, entry *Entry[K, V], costChange int64) bool { + q := s.qs[hash&uint64(s.count-1)] + q.mu.Lock() defer q.mu.Unlock() - if entry.queued == 0 { - entry.cost = cost - return true - } - if entry.queued == 1 { - costChange := cost - entry.cost - entry.cost = cost + + index := entry.queueIndex.Load() + + switch index { + + case q.index: + entry.policyWeight += costChange q.len += int(costChange) return true + case -1: + // entry is moving from queue to main + // also send update event to main + return false + case -2: + // there are two types of race conditions here: + // - Create/update race for the same entry: Since both create and update use `+=` on the policy weight, the result is consistent. + // - Evict/update race for different entries: When an entry is evicted and reused from the sync pool, the policy weight should not be updated in this case. + // The race detector may flag this line, but it's safe to ignore. + if entry.key == key { + entry.policyWeight += costChange + } + return true + case -3: + // entry is removed from queue + // still return true here because entry is not on main yet. + // So don't send event to main. + return true + default: + return false + } +} + +func (s *StripedQueue[K, V]) Delete(hash uint64, entry *Entry[K, V]) bool { + q := s.qs[hash&uint64(s.count-1)] + q.mu.Lock() + defer q.mu.Unlock() + + index := entry.queueIndex.Load() + + switch index { + case q.index: + entry.queueIndex.Store(-3) + return true + case -2: + entry.queueIndex.Store(-3) + return true + default: + return false } - return false +} + +type qslice[K comparable, V any] struct { + send []QueueItem[K, V] + removed []QueueItem[K, V] } type Queue[K comparable, V any] struct { - deque *deque.Deque[QueueItem[K, V]] - len int - size int - mu sync.Mutex + index int32 + deque *deque.Deque[QueueItem[K, V]] + len int + size int + mu sync.Mutex + sketch *CountMinSketch + qpool *sync.Pool } -func (q *Queue[K, V]) push(hash uint64, entry *Entry[K, V], cost int64, fromNVM bool, threshold int32, sendCallback func(item QueueItem[K, V]), removeCallback func(item QueueItem[K, V])) { +func (q *Queue[K, V]) push(hash uint64, entry *Entry[K, V], costChange int64, fromNVM bool, threshold int32, sendCallback func(item QueueItem[K, V]), removeCallback func(item QueueItem[K, V])) { q.mu.Lock() - // new entry cost should be -1, - // not -1 means already updated and cost param is stale - if entry.cost == -1 { - entry.cost = cost + + if i := entry.queueIndex.Load(); i == -3 { + q.mu.Unlock() + return + } + + success := entry.queueIndex.CompareAndSwap(-2, q.index) + if !success { + return } - entry.queued = 1 + // += here because of possible create/update race + entry.policyWeight += costChange - q.len += int(entry.cost) - q.deque.PushFront(QueueItem[K, V]{entry: entry, fromNVM: fromNVM}) + q.len += int(entry.policyWeight) + q.deque.PushFront(QueueItem[K, V]{entry: entry, fromNVM: fromNVM, hash: hash}) if q.len <= q.size { q.mu.Unlock() return } - // send to slru - send := make([]QueueItem[K, V], 0, 2) - // removed because frequency < slru tail frequency - removed := make([]QueueItem[K, V], 0, 2) + qs := q.qpool.Get().(*qslice[K, V]) for q.len > q.size { evicted := q.deque.PopBack() - evicted.entry.queued = 2 - q.len -= int(evicted.entry.cost) + q.len -= int(evicted.entry.policyWeight) + + if evicted.entry.queueIndex.Load() == -3 { + continue + } - count := evicted.entry.frequency.Load() - if count == -1 { - send = append(send, evicted) + count := q.sketch.Estimate(evicted.hash) + var index int32 = -1 + if int32(count) >= threshold { + qs.send = append(qs.send, evicted) } else { - if int32(count) >= threshold { - send = append(send, evicted) - } else { - removed = append(removed, evicted) - } + index = -3 + qs.removed = append(qs.removed, evicted) } + + evicted.entry.queueIndex.CompareAndSwap(q.index, index) } q.mu.Unlock() - for _, item := range send { + for _, item := range qs.send { sendCallback(item) } - for _, item := range removed { + for _, item := range qs.removed { removeCallback(item) } + qs.send = qs.send[:0] + qs.removed = qs.removed[:0] + q.qpool.Put(qs) } diff --git a/internal/queue_test.go b/internal/queue_test.go index 534d9fa..8fae80b 100644 --- a/internal/queue_test.go +++ b/internal/queue_test.go @@ -7,15 +7,16 @@ import ( ) func TestQueue_UpdateCost(t *testing.T) { - q := NewStripedQueue[int, int](1, 10, func() int32 { return -1 }) - entry := &Entry[int, int]{cost: -1} - entry.queued = 1 + q := NewStripedQueue[int, int](1, 10, NewCountMinSketch(), func() int32 { return -1 }) + entry := &Entry[int, int]{key: 1} + entry.weight.Store(1) + entry.queueIndex.Store(-2) q.Push(20, entry, 1, false) require.Equal(t, 1, q.qs[0].len) - q.UpdateCost(20, entry, 5) + q.UpdateCost(1, 20, entry, 4) require.Equal(t, 5, q.qs[0].len) - q.UpdateCost(20, entry, 3) + q.UpdateCost(1, 20, entry, -2) require.Equal(t, 3, q.qs[0].len) } diff --git a/internal/secondary_cache.go b/internal/secondary_cache.go index 60b8618..90cc5f7 100644 --- a/internal/secondary_cache.go +++ b/internal/secondary_cache.go @@ -1,6 +1,10 @@ package internal import ( + "errors" + "sync" + "sync/atomic" + "github.com/Yiling-J/theine-go/internal/clock" ) @@ -19,6 +23,68 @@ type SecondaryCache[K comparable, V any] interface { Get(key K) (value V, cost int64, expire int64, ok bool, err error) Set(key K, value V, cost int64, expire int64) error Delete(key K) error - SetClock(clock *clock.Clock) HandleAsyncError(err error) } + +// used in test only +type SimpleMapSecondary[K comparable, V any] struct { + m map[K]*Entry[K, V] + ErrCounter atomic.Uint64 + mu sync.Mutex + ErrMode bool +} + +func NewSimpleMapSecondary[K comparable, V any]() *SimpleMapSecondary[K, V] { + return &SimpleMapSecondary[K, V]{ + m: make(map[K]*Entry[K, V]), + } +} + +func (s *SimpleMapSecondary[K, V]) Get(key K) (value V, cost int64, expire int64, ok bool, err error) { + s.mu.Lock() + defer s.mu.Unlock() + + e, ok := s.m[key] + if !ok { + return + } + return e.value, e.weight.Load(), e.expire.Load(), true, nil +} + +func (s *SimpleMapSecondary[K, V]) Set(key K, value V, cost int64, expire int64) error { + s.mu.Lock() + defer s.mu.Unlock() + + if s.ErrMode { + return errors.New("err") + } + + e := &Entry[K, V]{ + value: value, + } + e.weight.Store(cost) + + s.m[key] = e + s.m[key].expire.Store(expire) + return nil +} + +func (s *SimpleMapSecondary[K, V]) Delete(key K) error { + s.mu.Lock() + defer s.mu.Unlock() + + if _, ok := s.m[key]; !ok { + return nil + } + delete(s.m, key) + return nil +} + +func (s *SimpleMapSecondary[K, V]) SetClock(clock *clock.Clock) { +} + +func (s *SimpleMapSecondary[K, V]) HandleAsyncError(err error) { + if err != nil { + s.ErrCounter.Add(1) + } +} diff --git a/internal/sketch.go b/internal/sketch.go index 484338a..0dbd573 100644 --- a/internal/sketch.go +++ b/internal/sketch.go @@ -1,18 +1,52 @@ package internal +import "sync/atomic" + type CountMinSketch struct { + Table []atomic.Uint64 + Additions uint + SampleSize uint + BlockMask uint + mu *RBMutex +} + +// sketch for persistence +type CountMinSketchPersist struct { Table []uint64 Additions uint SampleSize uint BlockMask uint } +func (s *CountMinSketchPersist) CountMinSketch() *CountMinSketch { + p := &CountMinSketch{ + Additions: s.Additions, SampleSize: s.SampleSize, BlockMask: s.BlockMask, + Table: make([]atomic.Uint64, len(s.Table)), + mu: NewRBMutex(), + } + for i := 0; i < len(s.Table); i++ { + p.Table[i].Store(s.Table[i]) + } + return p +} + func NewCountMinSketch() *CountMinSketch { - new := &CountMinSketch{} + new := &CountMinSketch{mu: NewRBMutex()} new.EnsureCapacity(16) return new } +func (s *CountMinSketch) CountMinSketchPersist() *CountMinSketchPersist { + p := &CountMinSketchPersist{ + Additions: s.Additions, SampleSize: s.SampleSize, BlockMask: s.BlockMask, + Table: make([]uint64, 0, len(s.Table)), + } + for i := 0; i < len(s.Table); i++ { + p.Table = append(p.Table, s.Table[i].Load()) + } + return p +} + // indexOf return table index and counter index together func (s *CountMinSketch) indexOf(h uint64, block uint64, offset uint8) (uint, uint) { counterHash := h + uint64(1+offset)*(h>>32) @@ -23,8 +57,9 @@ func (s *CountMinSketch) indexOf(h uint64, block uint64, offset uint8) (uint, ui func (s *CountMinSketch) inc(index uint, offset uint) bool { mask := uint64(0xF << offset) - if s.Table[index]&mask != mask { - s.Table[index] += 1 << offset + v := s.Table[index].Load() + if v&mask != mask { + s.Table[index].Store(v + 1<> 1 + v := s.Table[i].Load() + s.Table[i].Store(v >> 1) } s.Additions = s.Additions >> 1 } func (s *CountMinSketch) count(h uint64, block uint64, offset uint8) uint { index, off := s.indexOf(h, block, offset) - count := (s.Table[index] >> off) & 0xF + count := (s.Table[index].Load() >> off) & 0xF return uint(count) } @@ -76,12 +132,14 @@ func min(a, b uint) uint { func (s *CountMinSketch) Estimate(h uint64) uint { hn := spread(h) + t := s.mu.RLock() block := (hn & uint64(s.BlockMask)) << 3 hc := rehash(h) m := min(s.count(hc, block, 0), 100) m = min(s.count(hc, block, 1), m) m = min(s.count(hc, block, 2), m) m = min(s.count(hc, block, 3), m) + s.mu.RUnlock(t) return m } @@ -93,10 +151,12 @@ func (s *CountMinSketch) EnsureCapacity(size uint) { size = 16 } newSize := next2Power(size) - s.Table = make([]uint64, newSize) + s.mu.Lock() + s.Table = make([]atomic.Uint64, newSize) s.SampleSize = 10 * size s.BlockMask = uint((len(s.Table) >> 3) - 1) s.Additions = 0 + s.mu.Unlock() } func spread(h uint64) uint64 { diff --git a/internal/sketch_test.go b/internal/sketch_test.go index 24aeed1..091a817 100644 --- a/internal/sketch_test.go +++ b/internal/sketch_test.go @@ -11,13 +11,13 @@ import ( "github.com/zeebo/xxh3" ) -func TestEnsureCapacity(t *testing.T) { +func TestSketch_EnsureCapacity(t *testing.T) { sketch := NewCountMinSketch() sketch.EnsureCapacity(1) require.Equal(t, 16, len(sketch.Table)) } -func TestSketch(t *testing.T) { +func TestSketch_Basic(t *testing.T) { sketch := NewCountMinSketch() sketch.EnsureCapacity(100) require.Equal(t, 128, len(sketch.Table)) diff --git a/internal/slru.go b/internal/slru.go index 4077b46..7e363c8 100644 --- a/internal/slru.go +++ b/internal/slru.go @@ -15,12 +15,7 @@ func NewSlru[K comparable, V any](size uint) *Slru[K, V] { } func (s *Slru[K, V]) insert(entry *Entry[K, V]) *Entry[K, V] { - var evicted *Entry[K, V] - if s.probation.Len()+s.protected.Len() >= int(s.maxsize) { - evicted = s.probation.PopTail() - } - s.probation.PushFront(entry) - return evicted + return s.probation.PushFront(entry) } func (s *Slru[K, V]) victim() *Entry[K, V] { diff --git a/internal/store.go b/internal/store.go index cbbc742..743fb3f 100644 --- a/internal/store.go +++ b/internal/store.go @@ -5,6 +5,7 @@ import ( "context" "encoding/gob" "errors" + "fmt" "io" "math/rand" "runtime" @@ -191,13 +192,30 @@ func NewStore[K comparable, V any]( s.shards = append(s.shards, NewShard[K, V](doorkeeper)) } s.queue = NewStripedQueue[K, V]( - queueCount, queueSize, func() int32 { return s.policy.threshold.Load() }, + queueCount, queueSize, s.policy.sketch, func() int32 { return s.policy.threshold.Load() }, ) s.queue.sendCallback = func(item QueueItem[K, V]) { - s.writeChan <- WriteBufItem[K, V]{entry: item.entry, code: NEW, fromNVM: item.fromNVM} + s.writeChan <- WriteBufItem[K, V]{ + entry: item.entry, code: NEW, fromNVM: item.fromNVM, + } } s.queue.removeCallback = func(item QueueItem[K, V]) { - s.writeChan <- WriteBufItem[K, V]{entry: item.entry, code: EVICTE} + entry := item.entry + _, index := s.index(entry.key) + shard := s.shards[index] + shard.mu.Lock() + deleted := shard.delete(entry) + shard.mu.Unlock() + if deleted { + k, v := entry.key, entry.value + if s.removalListener != nil { + s.removalListener(k, v, EVICTED) + } + s.postDelete(entry) + } + } + s.policy.removeCallback = func(entry *Entry[K, V]) { + s.removeEntry(entry, EVICTED) } s.ctx, s.cancel = context.WithCancel(context.Background()) @@ -207,7 +225,6 @@ func NewStore[K comparable, V any]( go s.maintenance() if s.secondaryCache != nil { s.secondaryCacheBuf = make(chan SecondaryCacheItem[K, V], 256) - s.secondaryCache.SetClock(s.timerwheel.clock) for i := 0; i < workers; i++ { go s.processSecondary() } @@ -282,17 +299,31 @@ func (s *Store[K, V]) GetWithSecodary(key K) (value V, ok bool, err error) { } value, err, _ = shard.vgroup.Do(key, func() (v V, err error) { + // load and store should be atomic + shard.mu.Lock() v, cost, expire, ok, err := s.secondaryCache.Get(key) if err != nil { + shard.mu.Unlock() return v, err } if !ok { + shard.mu.Unlock() return v, &NotFound{} } + if expire <= s.timerwheel.clock.NowNano() { + err = s.secondaryCache.Delete(key) + if err == nil { + err = &NotFound{} + } + shard.mu.Unlock() + return v, err + } + // insert to cache - _, _, _ = s.setInternal(key, v, cost, expire, true) + _, _, _ = s.setShard(shard, h, key, v, cost, expire, true) return v, err }) + var notFound *NotFound if errors.As(err, ¬Found) { return value, false, nil @@ -309,30 +340,32 @@ func (s *Store[K, V]) setEntry(hash uint64, shard *Shard[K, V], cost int64, entr s.queue.Push(hash, entry, cost, fromNVM) } -func (s *Store[K, V]) setInternal(key K, value V, cost int64, expire int64, nvmClean bool) (*Shard[K, V], *Entry[K, V], bool) { - h, index := s.index(key) - shard := s.shards[index] - shard.mu.Lock() +func (s *Store[K, V]) setShard(shard *Shard[K, V], hash uint64, key K, value V, cost int64, expire int64, nvmClean bool) (*Shard[K, V], *Entry[K, V], bool) { exist, ok := shard.get(key) if ok { exist.value = value + old := exist.weight.Swap(cost) + + // create/update events order might change due to race, + // send cost change in event and apply them to entry policy weight + // so different order still works. + costChange := cost - old + shard.mu.Unlock() + var reschedule bool - queued := s.queue.UpdateCost(h, exist, cost) + queued := s.queue.UpdateCost(key, hash, exist, costChange) if expire > 0 { old := exist.expire.Swap(expire) if old != expire { reschedule = true } } - // on update, unlock shard lock until queue update, - // this is because when update/delete race, the deleted - // entry might already been reused if mutex not exist. - shard.mu.Unlock() if !queued { s.writeChan <- WriteBufItem[K, V]{ - entry: exist, code: UPDATE, cost: cost, rechedule: reschedule, + entry: exist, code: UPDATE, costChange: costChange, rechedule: reschedule, + hash: hash, } } return shard, exist, true @@ -343,7 +376,7 @@ func (s *Store[K, V]) setInternal(key K, value V, cost int64, expire int64, nvmC shard.dookeeper.Reset() shard.counter = 0 } - hit := shard.dookeeper.Insert(h) + hit := shard.dookeeper.Insert(hash) if !hit { shard.counter += 1 shard.mu.Unlock() @@ -351,17 +384,32 @@ func (s *Store[K, V]) setInternal(key K, value V, cost int64, expire int64, nvmC } } entry := s.entryPool.Get().(*Entry[K, V]) - entry.frequency.Store(-1) + if entry.key == key { + // put back and create an entry manually + // because same key reuse might cause race condition + s.entryPool.Put(entry) + entry = &Entry[K, V]{} + } entry.key = key entry.value = value entry.expire.Store(expire) - entry.queued = 0 // 0: map, 1: queue, 2: queue->slru - entry.cost = -1 - s.setEntry(h, shard, cost, entry, nvmClean) + entry.weight.Store(cost) + entry.policyWeight = 0 + entry.queueIndex.Store(-2) + s.setEntry(hash, shard, cost, entry, nvmClean) return shard, entry, true } +func (s *Store[K, V]) setInternal(key K, value V, cost int64, expire int64, nvmClean bool) (*Shard[K, V], *Entry[K, V], bool) { + h, index := s.index(key) + shard := s.shards[index] + shard.mu.Lock() + + return s.setShard(shard, h, key, value, cost, expire, nvmClean) + +} + func (s *Store[K, V]) Set(key K, value V, cost int64, ttl time.Duration) bool { if cost == 0 { cost = s.cost(value) @@ -383,7 +431,7 @@ type dequeKV[K comparable, V any] struct { } func (s *Store[K, V]) Delete(key K) { - _, index := s.index(key) + h, index := s.index(key) shard := s.shards[index] shard.mu.Lock() entry, ok := shard.get(key) @@ -392,7 +440,9 @@ func (s *Store[K, V]) Delete(key K) { } shard.mu.Unlock() if ok { - s.writeChan <- WriteBufItem[K, V]{entry: entry, code: REMOVE} + if !s.queue.Delete(h, entry) { + s.writeChan <- WriteBufItem[K, V]{entry: entry, code: REMOVE} + } } } @@ -500,6 +550,7 @@ func (s *Store[K, V]) removeEntry(entry *Entry[K, V], reason RemoveReason) { // already removed from shard map case REMOVED: + entry.flag.SetDeleted(true) kv := s.kvBuilder(entry) _ = s.removalCallback(kv, reason) } @@ -518,6 +569,30 @@ func (s *Store[K, V]) sinkWrite(item WriteBufItem[K, V]) (tailUpdate bool) { if entry == nil { return } + + // entry removed by API explicitly will not resue by sync pool, + // so all events can be ignored except the REMOVE one. + if entry.flag.IsDeleted() { + return + } + if item.code == REMOVE { + entry.flag.SetDeleted(true) + } + + if entry.queueIndex.Load() == -1 && item.code == UPDATE { + + // Double-check the key hash, in case the sequence is: + // entry is evicted -> reused -> added to queue -> added back to main. + // If the entry is reused but still associated with the same key, + // it could lead to a race condition. + hh := s.hasher.hash(entry.key) + if hh != item.hash { + return + } + entry.policyWeight += item.costChange + return + } + if item.fromNVM { entry.flag.SetFromNVM(item.fromNVM) } @@ -531,6 +606,7 @@ func (s *Store[K, V]) sinkWrite(item WriteBufItem[K, V]) (tailUpdate bool) { // lock free because store API never read/modify entry metadata switch item.code { case NEW: + entry.queueIndex.Store(-4) entry.flag.SetRemoved(false) if expire := entry.expire.Load(); expire != 0 { @@ -546,10 +622,9 @@ func (s *Store[K, V]) sinkWrite(item WriteBufItem[K, V]) (tailUpdate bool) { s.removeEntry(evicted, EVICTED) tailUpdate = true } - removed := s.policy.EvictEntries() - for _, e := range removed { + t := s.policy.EvictEntries() + if t { tailUpdate = true - s.removeEntry(e, EVICTED) } case REMOVE: @@ -563,19 +638,16 @@ func (s *Store[K, V]) sinkWrite(item WriteBufItem[K, V]) (tailUpdate bool) { // create/update race if entry.meta.prev == nil { - entry.cost = item.cost return } - if item.cost != entry.cost { - costChange := item.cost - entry.cost - entry.cost = item.cost - s.policy.UpdateCost(entry, costChange) - removed := s.policy.EvictEntries() - for _, e := range removed { - tailUpdate = true - s.removeEntry(e, EVICTED) + if item.costChange != 0 { + hh := s.hasher.hash(entry.key) + if hh != item.hash { + return } + s.policy.UpdateCost(entry, item.costChange) + tailUpdate = s.policy.EvictEntries() } } item.entry = nil @@ -627,13 +699,8 @@ func (s *Store[K, V]) maintenance() { } }() - // Continuously receive the first item from the buffered channel. - // Then, attempt to retrieve up to 127 more items from the channel in a non-blocking manner - // to batch process them together. This reduces contention by minimizing the number of - // times the mutex lock is acquired for processing the buffer. - // If the channel is closed during the select, exit the loop. - // After collecting up to 127 items (or fewer if no more are available), lock the mutex, - // process the batch with drainWrite(), and then release the lock. + // continuously receive the first item from the buffered channel. + // avoid a busy loop while still processing data in batches. for first := range s.writeChan { s.writeBuffer = append(s.writeBuffer, first) loop: @@ -674,6 +741,17 @@ func (s *Store[K, V]) Range(f func(key K, value V) bool) { } } +// used in test +func (s *Store[K, V]) RangeEntry(f func(entry *Entry[K, V])) { + for _, shard := range s.shards { + tk := shard.mu.RLock() + for _, entry := range shard.hashmap { + f(entry) + } + shard.mu.RUnlock(tk) + } +} + func (s *Store[K, V]) Stats() Stats { return newStats(s.policy.hits.Value(), s.policy.misses.Value()) } @@ -698,7 +776,7 @@ func (s *Store[K, V]) getReadBufferIdx() int { type StoreMeta struct { Version uint64 StartNano int64 - Sketch *CountMinSketch + Sketch *CountMinSketchPersist } func (m *StoreMeta) Persist(writer io.Writer, blockEncoder *gob.Encoder) error { @@ -743,7 +821,7 @@ func (s *Store[K, V]) Persist(version uint64, writer io.Writer) error { meta := &StoreMeta{ Version: version, StartNano: s.timerwheel.clock.Start.UnixNano(), - Sketch: s.policy.sketch, + Sketch: s.policy.sketch.CountMinSketchPersist(), } err := meta.Persist(writer, blockEncoder) if err != nil { @@ -794,7 +872,7 @@ func (s *Store[K, V]) processSecondary() { if exist { err := s.secondaryCache.Set( item.entry.key, item.entry.value, - item.entry.cost, item.entry.expire.Load(), + item.entry.weight.Load(), item.entry.expire.Load(), ) item.shard.mu.RUnlock(tk) if err != nil { @@ -815,6 +893,20 @@ func (s *Store[K, V]) processSecondary() { } } +// wait write chan, used in test +func (s *Store[K, V]) Wait() { + for len(s.writeChan) != 0 { + runtime.Gosched() + } + for i := 0; i < 15; i++ { + s.mlock.Lock() + _ = 1 + s.mlock.Unlock() + runtime.Gosched() + } + time.Sleep(200 * time.Millisecond) +} + func (s *Store[K, V]) Recover(version uint64, reader io.Reader) error { blockDecoder := gob.NewDecoder(reader) block := &DataBlock[any]{} @@ -842,7 +934,7 @@ func (s *Store[K, V]) Recover(version uint64, reader io.Reader) error { break } switch block.Type { - case 1: + case 1: // metadata metaDecoder := gob.NewDecoder(reader) m := &StoreMeta{} err = metaDecoder.Decode(m) @@ -852,9 +944,9 @@ func (s *Store[K, V]) Recover(version uint64, reader io.Reader) error { if m.Version != version { return VersionMismatch } - s.policy.sketch = m.Sketch + s.policy.sketch = m.Sketch.CountMinSketch() s.timerwheel.clock.SetStart(m.StartNano) - case 2: + case 2: // main-protected entryDecoder := gob.NewDecoder(reader) for { pentry := &Pentry[K, V]{} @@ -876,7 +968,7 @@ func (s *Store[K, V]) Recover(version uint64, reader io.Reader) error { s.insertSimple(entry) } } - case 3: + case 3: // main-probation entryDecoder := gob.NewDecoder(reader) for { pentry := &Pentry[K, V]{} @@ -899,7 +991,7 @@ func (s *Store[K, V]) Recover(version uint64, reader io.Reader) error { s.insertSimple(entry) } } - case 4: + case 4: // queue entryDecoder := gob.NewDecoder(reader) for { pentry := &Pentry[K, V]{} @@ -915,16 +1007,104 @@ func (s *Store[K, V]) Recover(version uint64, reader io.Reader) error { continue } entry := pentry.entry() - h, index := s.index(entry.key) - shard := s.shards[index] - shard.mu.Lock() - s.setEntry(h, shard, pentry.Cost, entry, entry.flag.IsFromNVM()) + entry.queueIndex.Store(-2) + h, _ := s.index(entry.key) + s.queue.PushSimple(h, entry) + s.insertSimple(entry) + } } } return nil } +type debugInfo struct { + QueueWeight []int64 + QueueWeightField []int64 + QueueCount int64 + ProbationWeight int64 + ProbationWeightField int64 + ProbationCount int64 + ProtectedWeight int64 + ProtectedWeightField int64 + ProtectedCount int64 +} + +func (i debugInfo) String() string { + final := "" + final += fmt.Sprintf("total items in queues %d\n", i.QueueCount) + final += fmt.Sprintf("sum of weight of each queue %v\n", i.QueueWeight) + final += fmt.Sprintf("total items in probation list %d\n", i.ProbationCount) + final += fmt.Sprintf("sum of wieght of probation list %d\n", i.ProbationWeight) + final += fmt.Sprintf("total items in protected list %d\n", i.ProtectedCount) + final += fmt.Sprintf("sum of wieght of protected list %d\n", i.ProtectedWeight) + final += fmt.Sprintf("total items %d\n", i.QueueCount+i.ProbationCount+i.ProtectedCount) + return final +} + +func (i debugInfo) TotalCount() int64 { + return i.QueueCount + i.ProbationCount + i.ProtectedCount +} + +func (i debugInfo) TotalWeight() int64 { + var tw int64 + for _, v := range i.QueueWeight { + tw += v + } + tw += i.ProbationWeight + tw += i.ProtectedWeight + return tw +} + +// used for test, only +func (s *Store[K, V]) DebugInfo() debugInfo { + qs := []int64{} + qsf := []int64{} + var qc int64 + for _, q := range s.queue.qs { + var qsum int64 + for i := 0; i < q.deque.Len(); i++ { + e := q.deque.At(i) + qsum += e.entry.policyWeight + // if entry is removed, don't update queue entry count + // because we will compare this to hashmap entry count + if e.entry.queueIndex.Load() < 0 { + continue + } + qc += 1 + } + qs = append(qs, qsum) + qsf = append(qsf, int64(q.len)) + } + + var probationSum int64 + var probationCount int64 + s.policy.slru.probation.rangef(func(e *Entry[K, V]) { + probationSum += e.policyWeight + probationCount += 1 + }) + + var protectedSum int64 + var protectedCount int64 + s.policy.slru.protected.rangef(func(e *Entry[K, V]) { + protectedCount += 1 + protectedSum += e.policyWeight + }) + + return debugInfo{ + QueueWeight: qs, + QueueWeightField: qsf, + QueueCount: qc, + ProbationWeight: probationSum, + ProbationWeightField: int64(s.policy.slru.probation.Len()), + ProbationCount: probationCount, + ProtectedWeight: protectedSum, + ProtectedWeightField: int64(s.policy.slru.protected.Len()), + ProtectedCount: protectedCount, + } + +} + type Loaded[V any] struct { Value V Cost int64 @@ -952,21 +1132,36 @@ func (s *LoadingStore[K, V]) Get(ctx context.Context, key K) (V, error) { v, ok := s.getFromShard(key, h, shard) if !ok { loaded, err, _ := shard.group.Do(key, func() (Loaded[V], error) { + // load and store should be atomic + shard.mu.Lock() + // first try get from secondary cache if s.secondaryCache != nil { vs, cost, expire, ok, err := s.secondaryCache.Get(key) var notFound *NotFound if err != nil && !errors.As(err, ¬Found) { + shard.mu.Unlock() return Loaded[V]{}, err } if ok { - _, _, _ = s.setInternal(key, vs, cost, expire, true) + _, _, _ = s.setShard(shard, h, key, vs, cost, expire, true) return Loaded[V]{Value: vs}, nil } } + loaded, err := s.loader(ctx, key) + var expire int64 + if loaded.TTL != 0 { + expire = s.timerwheel.clock.ExpireNano(loaded.TTL) + } + if loaded.Cost == 0 { + loaded.Cost = s.cost(loaded.Value) + } + if err == nil { - s.Set(key, loaded.Value, loaded.Cost, loaded.TTL) + s.setShard(shard, h, key, loaded.Value, loaded.Cost, expire, false) + } else { + shard.mu.Unlock() } return loaded, err }) diff --git a/internal/store_test.go b/internal/store_test.go index cb02389..52e872b 100644 --- a/internal/store_test.go +++ b/internal/store_test.go @@ -8,7 +8,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestStore_DequeExpire(t *testing.T) { +func TestStore_QueueExpire(t *testing.T) { store := NewStore[int, int](5000, false, nil, nil, nil, 0, 0, nil) defer store.Close() @@ -25,7 +25,8 @@ func TestStore_DequeExpire(t *testing.T) { for i := 0; i < 50; i++ { entry := &Entry[int, int]{key: i} entry.expire.Store(expire) - entry.cost = 1 + entry.weight.Store(1) + entry.queueIndex.Store(-2) store.shards[0].mu.Lock() store.setEntry(123, store.shards[0], 1, entry, false) _, index := store.index(i) @@ -40,7 +41,7 @@ func TestStore_DequeExpire(t *testing.T) { mu.Unlock() } -func TestStore_ProcessDeque(t *testing.T) { +func TestStore_ProcessQueue(t *testing.T) { store := NewStore[int, int](20000, false, nil, nil, nil, 0, 0, nil) defer store.Close() @@ -61,7 +62,8 @@ func TestStore_ProcessDeque(t *testing.T) { for i := 0; i < 5; i++ { entry := &Entry[int, int]{key: i} - entry.cost = 1 + entry.weight.Store(1) + entry.queueIndex.Store(-2) store.shards[0].mu.Lock() store.setEntry(h, store.shards[0], 1, entry, false) _, index := store.index(i) @@ -81,17 +83,18 @@ func TestStore_ProcessDeque(t *testing.T) { time.Sleep(1 * time.Second) // test evicted callback, cost less than threshold will be evicted immediately + store.policy.threshold.Store(100) for i := 10; i < 15; i++ { entry := &Entry[int, int]{key: i} - entry.cost = 1 - - store.shards[0].mu.Lock() - store.policy.threshold.Store(100) - store.setEntry(h, store.shards[0], 1, entry, false) + entry.weight.Store(1) + entry.queueIndex.Store(-2) _, index := store.index(i) store.shards[index].mu.Lock() store.shards[index].hashmap[i] = entry store.shards[index].mu.Unlock() + + store.shards[0].mu.Lock() + store.setEntry(h, store.shards[0], 1, entry, false) } time.Sleep(2 * time.Second) @@ -100,7 +103,7 @@ func TestStore_ProcessDeque(t *testing.T) { require.Equal(t, 5, len(evicted)) } -func TestStore_RemoveDeque(t *testing.T) { +func TestStore_RemoveQueue(t *testing.T) { store := NewStore[int, int](20000, false, nil, nil, nil, 0, 0, nil) defer store.Close() h, index := store.index(123) @@ -114,7 +117,8 @@ func TestStore_RemoveDeque(t *testing.T) { q.size = 10 q.len = 10 entryNew := &Entry[int, int]{key: 1} - entryNew.cost = 1 + entryNew.weight.Store(1) + entryNew.queueIndex.Store(-2) store.queue.Push(h, entryNew, 1, false) shard.hashmap[1] = entryNew // delete key @@ -175,6 +179,7 @@ func TestStore_GetExpire(t *testing.T) { key: 123, value: 123, } + entry.queueIndex.Store(-2) entry.expire.Store(fakeNow) store.shards[i].hashmap[123] = entry diff --git a/internal/tlfu.go b/internal/tlfu.go index 17af025..9185f64 100644 --- a/internal/tlfu.go +++ b/internal/tlfu.go @@ -5,19 +5,20 @@ import ( ) type TinyLfu[K comparable, V any] struct { - slru *Slru[K, V] - sketch *CountMinSketch - hasher *Hasher[K] - size uint - counter uint - misses *UnsignedCounter - hits *UnsignedCounter - hitsPrev uint64 - missesPrev uint64 - hr float32 - threshold atomic.Int32 - lruFactor uint8 - step int8 + slru *Slru[K, V] + sketch *CountMinSketch + hasher *Hasher[K] + size uint + counter uint + misses *UnsignedCounter + hits *UnsignedCounter + hitsPrev uint64 + missesPrev uint64 + hr float32 + threshold atomic.Int32 + lruFactor uint8 + step int8 + removeCallback func(entry *Entry[K, V]) } func NewTinyLfu[K comparable, V any](size uint, hasher *Hasher[K]) *TinyLfu[K, V] { @@ -95,10 +96,7 @@ func (t *TinyLfu[K, V]) Set(entry *Entry[K, V]) *Entry[K, V] { } if entry.meta.prev == nil { if victim := t.slru.victim(); victim != nil { - freq := int(entry.frequency.Load()) - if freq == -1 { - freq = int(t.sketch.Estimate(t.hasher.hash(entry.key))) - } + freq := int(t.sketch.Estimate(t.hasher.hash(entry.key))) evictedCount := uint(freq) + uint(t.lruFactor) victimCount := t.sketch.Estimate(t.hasher.hash(victim.key)) if evictedCount <= uint(victimCount) { @@ -135,8 +133,6 @@ func (t *TinyLfu[K, V]) Access(item ReadBufItem[K, V]) { if tail { t.UpdateThreshold() } - } else { - entry.frequency.Store(int32(t.sketch.Estimate(item.hash))) } } else { reset := t.sketch.Add(item.hash) @@ -151,27 +147,29 @@ func (t *TinyLfu[K, V]) Remove(entry *Entry[K, V]) { } func (t *TinyLfu[K, V]) UpdateCost(entry *Entry[K, V], delta int64) { + entry.policyWeight += delta t.slru.updateCost(entry, delta) } -func (t *TinyLfu[K, V]) EvictEntries() []*Entry[K, V] { - removed := []*Entry[K, V]{} +func (t *TinyLfu[K, V]) EvictEntries() (evicted bool) { for t.slru.probation.Len()+t.slru.protected.Len() > int(t.slru.maxsize) { entry := t.slru.probation.PopTail() if entry == nil { break } - removed = append(removed, entry) + evicted = true + t.removeCallback(entry) } for t.slru.probation.Len()+t.slru.protected.Len() > int(t.slru.maxsize) { entry := t.slru.protected.PopTail() if entry == nil { break } - removed = append(removed, entry) + evicted = true + t.removeCallback(entry) } - return removed + return } func (t *TinyLfu[K, V]) UpdateThreshold() { diff --git a/internal/tlfu_test.go b/internal/tlfu_test.go index f563507..32f7208 100644 --- a/internal/tlfu_test.go +++ b/internal/tlfu_test.go @@ -8,7 +8,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestTlfu(t *testing.T) { +func TestTlfu_Basic(t *testing.T) { hasher := NewHasher[string](nil) tlfu := NewTinyLfu[string, string](1000, hasher) require.Equal(t, uint(1000), tlfu.slru.probation.capacity) @@ -80,13 +80,17 @@ func TestTlfu(t *testing.T) { } -func TestEvictEntries(t *testing.T) { +func TestTlfu_EvictEntries(t *testing.T) { hasher := NewHasher[string](nil) tlfu := NewTinyLfu[string, string](500, hasher) require.Equal(t, uint(500), tlfu.slru.probation.capacity) require.Equal(t, uint(400), tlfu.slru.protected.capacity) require.Equal(t, 0, int(tlfu.slru.probation.len.Load())) require.Equal(t, 0, int(tlfu.slru.protected.len.Load())) + em := []*Entry[string, string]{} + tlfu.removeCallback = func(entry *Entry[string, string]) { + em = append(em, entry) + } for i := 0; i < 500; i++ { tlfu.Set(NewEntry(fmt.Sprintf("%d:1", i), "", 1, 0)) @@ -94,47 +98,53 @@ func TestEvictEntries(t *testing.T) { require.Equal(t, 500, int(tlfu.slru.probation.len.Load())) require.Equal(t, 0, int(tlfu.slru.protected.len.Load())) new := NewEntry("l:10", "", 10, 0) - new.frequency.Store(10) + tlfu.sketch.Addn(hasher.hash(new.key), 10) tlfu.Set(new) require.Equal(t, 509, int(tlfu.slru.probation.len.Load())) require.Equal(t, 0, int(tlfu.slru.protected.len.Load())) // 2. probation length is 509, so remove 9 entries from probation - removed := tlfu.EvictEntries() - for _, rm := range removed { + tlfu.EvictEntries() + for _, rm := range em { require.True(t, strings.HasSuffix(rm.key, ":1")) } - require.Equal(t, 9, len(removed)) + require.Equal(t, 9, len(em)) require.Equal(t, 500, int(tlfu.slru.probation.len.Load())) require.Equal(t, 0, int(tlfu.slru.protected.len.Load())) + // reset evicted list + em = []*Entry[string, string]{} // put l:450 to probation, this will remove 1 entry, probation len is 949 now // remove 449 entries from probation new = NewEntry("l:450", "", 450, 0) - new.frequency.Store(10) + tlfu.sketch.Addn(hasher.hash(new.key), 10) tlfu.Set(new) - removed = tlfu.EvictEntries() - require.Equal(t, 449, len(removed)) + tlfu.EvictEntries() + require.Equal(t, 449, len(em)) require.Equal(t, 500, int(tlfu.slru.probation.len.Load())) require.Equal(t, 0, int(tlfu.slru.protected.len.Load())) + // reset evicted list + em = []*Entry[string, string]{} // put l:460 to probation, this will remove 1 entry, probation len is 959 now // remove all entries except the new l:460 one new = NewEntry("l:460", "", 460, 0) - new.frequency.Store(10) + tlfu.sketch.Addn(hasher.hash(new.key), 10) tlfu.Set(new) - removed = tlfu.EvictEntries() - require.Equal(t, 41, len(removed)) + tlfu.EvictEntries() + require.Equal(t, 41, len(em)) require.Equal(t, 460, int(tlfu.slru.probation.len.Load())) require.Equal(t, 0, int(tlfu.slru.protected.len.Load())) + // reset evicted list + em = []*Entry[string, string]{} // access tlfu.Access(ReadBufItem[string, string]{entry: new}) require.Equal(t, 0, int(tlfu.slru.probation.len.Load())) require.Equal(t, 460, int(tlfu.slru.protected.len.Load())) - new.cost = 600 + new.weight.Store(600) tlfu.UpdateCost(new, 140) - removed = tlfu.EvictEntries() - require.Equal(t, 1, len(removed)) + tlfu.EvictEntries() + require.Equal(t, 1, len(em)) require.Equal(t, 0, int(tlfu.slru.probation.len.Load())) require.Equal(t, 0, int(tlfu.slru.protected.len.Load())) diff --git a/nvm.go b/nvm.go deleted file mode 100644 index 37f2aad..0000000 --- a/nvm.go +++ /dev/null @@ -1,121 +0,0 @@ -package theine - -import ( - "encoding/json" - - "github.com/Yiling-J/theine-go/internal/nvm" -) - -type JsonSerializer[T any] struct{} - -func (s *JsonSerializer[T]) Marshal(v T) ([]byte, error) { - return json.Marshal(v) -} - -func (s *JsonSerializer[T]) Unmarshal(raw []byte, v *T) error { - return json.Unmarshal(raw, v) -} - -type NvmBuilder[K comparable, V any] struct { - file string - cacheSize int - blockSize int - bucketSize int - regionSize int - maxItemSize int - cleanRegionSize int - bhPct int - bfSize int - errorHandler func(err error) - keySerializer Serializer[K] - valueSerializer Serializer[V] -} - -func NewNvmBuilder[K comparable, V any](file string, cacheSize int) *NvmBuilder[K, V] { - return &NvmBuilder[K, V]{ - file: file, - cacheSize: cacheSize, - blockSize: 4096, - regionSize: 16 << 20, // 16mb - cleanRegionSize: 3, - bucketSize: 4 << 10, // 4kb - bhPct: 10, // 10% - bfSize: 8, // 8 bytes bloomfilter - errorHandler: func(err error) {}, - } -} - -// Device block size in bytes (minimum IO granularity). -func (b *NvmBuilder[K, V]) BlockSize(size int) *NvmBuilder[K, V] { - b.blockSize = size - return b -} - -// Block cache Region size in bytes. -func (b *NvmBuilder[K, V]) RegionSize(size int) *NvmBuilder[K, V] { - b.regionSize = size - return b -} - -// Big hash bucket size in bytes. -func (b *NvmBuilder[K, V]) BucketSize(size int) *NvmBuilder[K, V] { - b.bucketSize = size - return b -} - -// Percentage of space to reserve for BigHash. Set the percentage > 0 to enable BigHash. -// Set percentage to 100 to disable block cache. -func (b *NvmBuilder[K, V]) BigHashPct(pct int) *NvmBuilder[K, V] { - b.bhPct = pct - return b -} - -// Maximum size of a small item to be stored in BigHash. Must be less than the bucket size. -func (b *NvmBuilder[K, V]) BigHashMaxItemSize(size int) *NvmBuilder[K, V] { - b.maxItemSize = size - return b -} - -// Block cache clean region size. -func (b *NvmBuilder[K, V]) CleanRegionSize(size int) *NvmBuilder[K, V] { - b.cleanRegionSize = size - return b -} - -// Nvm cache error handler. -func (b *NvmBuilder[K, V]) ErrorHandler(fn func(err error)) *NvmBuilder[K, V] { - b.errorHandler = fn - return b -} - -// Nvm cache key serializer. -func (b *NvmBuilder[K, V]) KeySerializer(s Serializer[K]) *NvmBuilder[K, V] { - b.keySerializer = s - return b -} - -// Nvm cache value serializer. -func (b *NvmBuilder[K, V]) ValueSerializer(s Serializer[V]) *NvmBuilder[K, V] { - b.valueSerializer = s - return b -} - -func (b *NvmBuilder[K, V]) BucketBfSize(size int) *NvmBuilder[K, V] { - b.bfSize = size - return b -} - -// Build cache. -func (b *NvmBuilder[K, V]) Build() (*nvm.NvmStore[K, V], error) { - if b.keySerializer == nil { - b.keySerializer = &JsonSerializer[K]{} - } - if b.valueSerializer == nil { - b.valueSerializer = &JsonSerializer[V]{} - } - return nvm.NewNvmStore[K, V]( - b.file, b.blockSize, b.cacheSize, b.bucketSize, - b.regionSize, b.cleanRegionSize, uint8(b.bhPct), b.maxItemSize, b.bfSize, b.errorHandler, - b.keySerializer, b.valueSerializer, - ) -} diff --git a/persistence_test.go b/persistence_test.go index 5c70cd3..1256e63 100644 --- a/persistence_test.go +++ b/persistence_test.go @@ -11,7 +11,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestPersistBasic(t *testing.T) { +func TestPersist_Basic(t *testing.T) { client, err := theine.NewBuilder[int, int](100).Build() require.Nil(t, err) for i := 0; i < 1000; i++ { @@ -43,7 +43,7 @@ func TestPersistBasic(t *testing.T) { } -func TestLoadingPersistBasic(t *testing.T) { +func TestPersist_LoadingBasic(t *testing.T) { client, err := theine.NewBuilder[int, int](100).BuildWithLoader(func(ctx context.Context, key int) (theine.Loaded[int], error) { return theine.Loaded[int]{Value: key, Cost: 1, TTL: 0}, nil }) @@ -79,7 +79,7 @@ func TestLoadingPersistBasic(t *testing.T) { } -func TestVersionMismatch(t *testing.T) { +func TestPersist_TestVersionMismatch(t *testing.T) { client, err := theine.NewBuilder[int, int](100).Build() require.Nil(t, err) f, err := os.Create("ptest") @@ -97,7 +97,7 @@ func TestVersionMismatch(t *testing.T) { require.Equal(t, theine.VersionMismatch, err) } -func TestChecksumMismatch(t *testing.T) { +func TestPersist_TestChecksumMismatch(t *testing.T) { client, err := theine.NewBuilder[int, int](100).Build() require.Nil(t, err) f, err := os.Create("ptest") @@ -126,7 +126,7 @@ type PStruct struct { Data []byte } -func TestPersistLarge(t *testing.T) { +func TestPersist_Large(t *testing.T) { client, err := theine.NewBuilder[int, PStruct](100000).Build() require.Nil(t, err) for i := 0; i < 100000; i++ { @@ -165,7 +165,7 @@ func TestPersistLarge(t *testing.T) { } } -func TestPersistOS(t *testing.T) { +func TestPersist_OS(t *testing.T) { f, err := os.Open("otest") require.Nil(t, err) client, err := theine.NewBuilder[int, int](100).Build() diff --git a/run/main.go b/run/main.go new file mode 100644 index 0000000..af3fad5 --- /dev/null +++ b/run/main.go @@ -0,0 +1,111 @@ +package main + +import ( + "encoding/binary" + "fmt" + "log" + "math/rand" + "net/http" + "sync" + "time" + + _ "net/http/pprof" + + "github.com/Yiling-J/theine-go" + // "github.com/go-echarts/statsview" +) + +// A simple infinite loop script to monitor heap and GC status during concurrent get/set/update operations. +// Install github.com/go-echarts/statsview, uncomment the relevant code, +// then start the script and visit http://localhost:18066/debug/statsview to view the results. + +const CACHE_SIZE = 500000 + +func keyGen() []uint64 { + keys := []uint64{} + r := rand.New(rand.NewSource(0)) + z := rand.NewZipf(r, 1.01, 9.0, CACHE_SIZE*100) + for i := 0; i < 2<<23; i++ { + keys = append(keys, z.Uint64()) + } + return keys +} + +type v128 struct { + _v [128]byte +} + +// GetU64 retrieves the first 8 bytes of _v as a uint64 +func (v *v128) GetU64() uint64 { + return binary.LittleEndian.Uint64(v._v[:8]) +} + +// SetU64 sets the first 8 bytes of _v to the value of the provided uint64 +func (v *v128) SetU64(val uint64) { + binary.LittleEndian.PutUint64(v._v[:8], val) +} + +func NewV128(val uint64) v128 { + var v v128 + v.SetU64(val) + return v +} + +func main() { + go func() { + log.Println(http.ListenAndServe("localhost:6060", nil)) + }() + + // http://localhost:18066/debug/statsview + // mgr := statsview.New() + // go func() { _ = mgr.Start() }() + + builder := theine.NewBuilder[uint64, v128](int64(CACHE_SIZE)) + builder.RemovalListener(func(key uint64, value v128, reason theine.RemoveReason) {}) + client, err := builder.Build() + if err != nil { + panic("client build failed") + } + var wg sync.WaitGroup + keys := keyGen() + + for i := 0; i < CACHE_SIZE; i++ { + client.SetWithTTL( + uint64(i), NewV128(uint64(i)), 1, 500*time.Second, + ) + } + + fmt.Println("==== start ====") + for i := 1; i <= 6; i++ { + wg.Add(1) + go func() { + defer wg.Done() + rd := rand.Intn(2 << 16) + i := 0 + for { + keyGet := keys[(i+rd)&(2<<23-1)] + keyUpdate := keys[(i+3*rd)&(2<<23-1)] + + v, ok := client.Get(keyGet) + if ok && v.GetU64() != keyGet { + panic(keyGet) + } + if !ok { + client.SetWithTTL( + keyGet, NewV128(keyGet), 1, time.Second*time.Duration(i%255+10), + ) + } + + client.SetWithTTL( + keyUpdate, NewV128(keyUpdate), int64(keyUpdate&7+1), + time.Second*time.Duration(i&63+30), + ) + i++ + } + }() + } + wg.Wait() + client.Close() + // mgr.Stop() + +} diff --git a/secondary_cache_test.go b/secondary_cache_test.go new file mode 100644 index 0000000..81d91b4 --- /dev/null +++ b/secondary_cache_test.go @@ -0,0 +1,141 @@ +package theine_test + +import ( + "context" + "runtime" + "sync" + "testing" + "time" + + "github.com/Yiling-J/theine-go" + "github.com/Yiling-J/theine-go/internal" + "github.com/stretchr/testify/require" +) + +func TestSecondaryCache_GetSetGetDeleteGet(t *testing.T) { + secondary := internal.NewSimpleMapSecondary[int, int]() + client, err := theine.NewBuilder[int, int](50000).Hybrid(secondary).Workers(8).AdmProbability(1).Build() + require.Nil(t, err) + for i := 0; i < 1000; i++ { + _, ok, _ := client.Get(i) + require.False(t, ok) + ok = client.Set(i, i, 1) + require.True(t, ok) + v, ok, _ := client.Get(i) + require.True(t, ok) + require.Equal(t, i, v) + err = client.Delete(i) + require.Nil(t, err) + _, ok, _ = client.Get(i) + require.False(t, ok) + } +} + +func TestSecondaryCache_AdmProb(t *testing.T) { + secondary := internal.NewSimpleMapSecondary[int, int]() + client, err := theine.NewBuilder[int, int](100).Hybrid(secondary).Workers(8).AdmProbability(0.5).Build() + require.Nil(t, err) + for i := 0; i < 1000; i++ { + success := client.Set(i, i, 1) + require.Nil(t, err) + require.True(t, success) + } + time.Sleep(50 * time.Millisecond) + + counter := 0 + for i := 0; i < 1000; i++ { + _, success, err := client.Get(i) + require.Nil(t, err) + if success { + counter += 1 + } + } + require.True(t, counter < 600) +} + +func TestSecondaryCache_ErrorHandler(t *testing.T) { + secondary := internal.NewSimpleMapSecondary[int, int]() + secondary.ErrMode = true + client, err := theine.NewBuilder[int, int](100).Hybrid(secondary).Workers(8).AdmProbability(1).Build() + require.Nil(t, err) + + for i := 0; i < 1000; i++ { + success := client.Set(i, i, 1) + require.Nil(t, err) + require.True(t, success) + } + + require.True(t, secondary.ErrCounter.Load() > 0) + +} + +func TestSecondaryCache_GetSetNoRace(t *testing.T) { + secondary := internal.NewSimpleMapSecondary[int, int]() + client, err := theine.NewBuilder[int, int](100).Hybrid(secondary).Workers(8).AdmProbability(1).Build() + require.Nil(t, err) + var wg sync.WaitGroup + for i := 1; i <= runtime.GOMAXPROCS(0)*2; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := 0; i < 20000; i++ { + key := i + v, ok, err := client.Get(key) + if err != nil { + panic(err) + } + if !ok { + if i%2 == 0 { + _ = client.Set(key, i, 1) + } + if i%5 == 0 { + err := client.Delete(key) + if err != nil { + panic(err) + } + } + } else { + if i != v { + panic("value mismatch") + } + } + } + }() + } + wg.Wait() + time.Sleep(500 * time.Millisecond) + client.Close() +} + +func TestSecondaryCache_LoadingCache(t *testing.T) { + secondary := internal.NewSimpleMapSecondary[int, int]() + client, err := theine.NewBuilder[int, int](100).Hybrid(secondary).Workers(8).AdmProbability(1). + Loading(func(ctx context.Context, key int) (theine.Loaded[int], error) { + return theine.Loaded[int]{Value: key, Cost: 1, TTL: 0}, nil + }).Build() + require.Nil(t, err) + + for i := 0; i < 1000; i++ { + value, err := client.Get(context.TODO(), i) + require.Nil(t, err) + require.Equal(t, i, value) + } + + for i := 0; i < 1000; i++ { + value, err := client.Get(context.TODO(), i) + require.Nil(t, err) + require.Equal(t, i, value) + } + + success := client.Set(999, 999, 1) + require.True(t, success) + _, err = client.Get(context.TODO(), 999) + require.Nil(t, err) + err = client.Delete(999) + require.Nil(t, err) + _, err = client.Get(context.TODO(), 999) + require.Nil(t, err) + success = client.SetWithTTL(999, 999, 1, 5*time.Second) + require.True(t, success) + +}