Categorygithub.com/romanodesouza/cachebox
repositorypackage
0.1.1
Repository: https://github.com/romanodesouza/cachebox.git
Documentation: pkg.go.dev

# Packages

No description provided by the author
No description provided by the author

# README

cachebox

Godoc Build Status Go Report Card codecov license

A caching library to handle group and individual caches.

There are only two hard things in Computer Science: cache invalidation and naming things.

cachebox implements namespace versioning based on timestamps with nano precision over recyclable keys to make it easier to invalidate groups of keys without polluting the keyspace.

install

go get github.com/romanodesouza/cachebox

usage

package main

import (
	"context"
	"os"

	"github.com/bradfitz/gomemcache/memcache"
	"github.com/romanodesouza/cachebox"
	"github.com/romanodesouza/cachebox/storage/memcached"
)

func main() {
	client := memcache.New(os.Getenv("MEMCACHED_HOST"))
	store := memcached.NewGoMemcache(client)
	cache := cachebox.NewCache(store)
	ctx := context.Background()

	// Get
	reply, err := cache.Get(ctx, key)
	reply, err := cache.GetMulti(ctx, keys)

	// Set
	err := cache.Set(ctx, cachebox.Item{
		Key: "key",
		Value: []byte("ok"),
		TTL: time.Hour,
	})
	err := cache.SetMulti(ctx, []cachebox.Item{
		{
			Key: "key1",
			Value: []byte("ok1"),
			TTL: time.Hour,
		},
		{
			Key: "key2",
			Value: []byte("ok2"),
			TTL: time.Hour,
		},
	})

	// Delete
	err := cache.Delete(ctx, key)
	err := cache.DeleteMulti(ctx, keys)

	// Namespacing (when any of these namespace keys get invalidated, key is also invalid)
	ns := cache.Namespace("ns:key1", "ns:key2")
	reply, err := ns.Get(ctx, key)
	err := ns.Set(ctx, cachebox.Item{
		Key: "key",
		Value: []byte("ok"),
		TTL: time.Hour,
	})

	// Serialization
	b, err := cachebox.Marshal(i)

	// Deserialization
	err := cachebox.Unmarshal(b, &i)

	// Cache miss check
	err := cachebox.Unmarshal(b, &i)
	if err == cachebox.ErrMiss {
		// ...
	}
}

storage

Built-in support for:

You can provide your own by implementing the Storage interface:

type Storage interface {
	MGet(ctx context.Context, keys ...string) ([][]byte, error)
	Set(ctx context.Context, items ...Item) error
	Delete(ctx context.Context, keys ...string) error
}

multi storage support

store := storage.NewMultiStorage(memcached.NewGoMemcache(client), redis.NewRedigo(pool))
// Will try to fetch keys from memcached first
cache := cachebox.NewCache(store)

bypass

You can bypass only reading or both read/writing.

// Skip all get calls, useful to cache recomputed data
ctx := cachebox.WithBypass(parent, cachebox.BypassReading)

// Skip everything, useful to debug underlying layers
ctx := cachebox.WithBypass(parent, cachebox.BypassReadWriting)

stampede prevention

Avoid a high overload when a key expires and many concurrent calls try to recompute it at the same time using i/o contention with pessimistic lock so when a key expires, only the first call recomputes it while the others await for it or until the context times out.

Read more about cache stampede on Wikipedia.

cache := cachebox.NewCache(store, cachebox.WithKeyLock())

msgp compatibility

You can use the great msgp to serialize/deserialize items.

cachebox.Marshal(i) // uses msgp as long i implements its interface
cachebox.Unmarshal(b, &i) // uses msgp as long *i implements its interface

gzip

Too big values? Enable gzip compression.

cache := cachebox.NewCache(store, cachebox.WithGzipCompression(level))

instrumentation

The built-in storage adapters accepts interfaces so you can wrap their clients to gather metrics and/or do tracing for example.

type InstrumentedGoMemcacheClient struct {
	*memcache.Client
	stats *mystats.Collector
}

func NewInstrumentedGoMemcacheClient(client *memcache.Client, stats *mystats.Collector) *InstrumentedGoMemcacheClient {
	return &InstrumentedGoMemcacheClient{
		Client: client,
		stats: stats,
	}
}

func (i *InstrumentedGoMemcacheClient) Get(key string) (*memcache.Item, error) {
	item, err := i.Client.Get(key)

	switch {
		case err == nil:
			i.stats.Hit(key)
		case err == memcache.ErrCacheMiss:
			i.stats.Miss(key)
	}

	return item, err
}

client := NewInstrumentedGoMemcacheClient(NewGoMemcache(), NewStatsCollector())
store := memcached.NewGoMemcache(client)
cache := cachebox.NewCache(store)

Worth saying that when OpenTelemetry gets stable, cachebox will support it.

key-based versioning

Ok, cool, but I still prefer key-based versioning so I can visualize better my keyspace.

cache := cachebox.NewCache(store, cachebox.WithKeyBasedExpiration())

Now you will be able to see namespaced keys with the cachebox:v[timestamp]: prefix.

example

type CacheRepository struct {
	cache *cachebox.Cache
	logger *myapp.Loggger
	repo Repository
}

func (c *CacheRepository) FindAll(ctx context.Context) ([]*Entity, error) {
	ids, err := c.FindIDs(ctx)
	if err != nil {
		return nil, err
	}

	return c.FindByIDs(ctx, ids)
}

func (c *CacheRepository) FindIDs(ctx context.Context) ([]int64, error) {
	// Group caching retrieves a key namespaced by one or many namespace keys.
	// If the namespace version is newer than key's version, it considers it as cache miss.
	nskeys := []string{"ns:users"}
	if includeInactive {
		nskeys = append(nskeys, "ns:inactiveusers")
	}

	ns := c.cache.Namespace(nskeys...)

	key := "users"
	reply, err := ns.Get(ctx, key)
	if err != nil {
		// Something went wrong with cache, log it and falls back to next layer
		c.logger.Error(errors.Wrap(err, "could not retrieve ids from cache"))
		return c.repo.FindIDs(ctx)
	}

	var ids []int64

	if err := cachebox.Unmarshal(reply, &ids); err != nil {
		if err != cachebox.ErrMiss {
			c.logger.Error(errors.Wrap(err, "could not deserialize ids"))
		}

		var err error

		ids, err = c.repo.FindIDs(ctx)
		if err != nil {
			return nil, err
		}

		var b []byte
		b, err = cachebox.Marshal(&ids)
		if err != nil {
			err = errors.Wrap(err, "could not serialize ids")
			c.logger.Error(err)
			return nil, err
		}

		err = ns.Set(ctx, cachebox.Item{
			Key: key,
			Value: b,
			TTL: time.Hour,
		})

		if err != nil {
			c.logger.Error(errors.Wrap(err, "could not cache ids"))
		}
	}

	return ids, nil
}

func (c *CacheRepository) FindByIDs(ctx context.Context, ids []int64) ([]*Entity, error) {
	// Individual caching consists in retrieving many items (from database for example) and caching
	// them one by one individually, this is effective when you have a high number of shared items.
	keys := make([]string, len(ids))
	for i, id := range ids {
		keys[i] = fmt.Sprintf("prefix_%d", id)
	}

	reply, err := c.cache.GetMulti(ctx, keys)
	if err != nil {
		// Something went wrong with cache, log it and fallbacks to next layer
		c.logger.Error(errors.Wrap(err, "could not retrieve entities from cache"))
		return c.repo.FindByIDs(ctx, ids)
	}

	entities := make([]*Entity, len(keys))
	// Build an inverted index to look up missing items later on
	idx := make(map[int64]int)

	for i, b := range reply {
		if err := cachebox.Unmarshal(b, entities[i]); err != nil {
			idx[ids[i]] = i

			if err != cachebox.ErrMiss {
				// Not a cache miss, so log the error
				c.logger.Error(errors.Wrap(err, "could not deserialize item"))
			}
		}
	}

	// Checks if it needs to go to next layer to fetch missing items
	if len(idx) > 0 {
		missingIDs := make([]int64, 0, len(idx))
		for id := range idx {
			missingIDs = append(missingIDs, id)
		}

		found, err := c.repo.FindByIDs(ctx, missingIDs)
		if err != nil {
			c.logger.Error(err)
			return entities, err
		}

		items := make([]cachebox.Item, 0, len(found))

		for _, entity := range found {
			i := idx[entity.ID]

			// Place the found object in the list
			entities[i] = entity

			// Serialize
			b, err := cachebox.Marshal(entity)
			if err != nil {
				c.logger.Error(errors.Wrap(err, "could not serialize entity"))
			}

			items = append(items, cachebox.Item{
				Key: keys[i],
				Value: b,
				TTL: time.Hour,
			})
		}

		if err := c.cache.SetMulti(ctx, items); err != nil {
			c.logger.Error(errors.Wrap(err, "could not cache entities"))
		}
	}

	return entities, nil
}

invalidation

// Invalidate a namespace key to invalidate all related groups of keys
cache.Delete(ctx, "ns:key1")

// When invalidating an individual item, also invalidate the namespaces it belongs to
cache.DeleteMulti(ctx, "user_1", "ns:users", "ns:inactiveusers")

// You could even recompute the individual cache item before invalidating the namespaces
ctx := cachebox.WithBypass(parent, cachebox.BypassReading)
_, _ = FindByIDs(ctx, []int64{1})
cache.DeleteMulti(ctx, "ns:users", "ns:inactiveusers")

benchmarks

cachebox adds almost no overhead over raw storage clients.

goos: linux
goarch: amd64
pkg: github.com/romanodesouza/cachebox/integration
BenchmarkGoMemcache/gomemcache:get-4         	   10000	    109752 ns/op	     208 B/op	       9 allocs/op
BenchmarkGoMemcache/cachebox:get-4           	   10000	    109818 ns/op	     256 B/op	      11 allocs/op
BenchmarkGoMemcache/gomemcache:set-4         	   10000	    103729 ns/op	     112 B/op	       5 allocs/op
BenchmarkGoMemcache/cachebox:set-4           	   10000	    104124 ns/op	     160 B/op	       6 allocs/op
BenchmarkGoMemcache/gomemcache:getmulti-4    	 1000000	      1585 ns/op	     222 B/op	       2 allocs/op
BenchmarkGoMemcache/cachebox:getmulti-4      	 1233427	      2302 ns/op	     225 B/op	       2 allocs/op
BenchmarkGoMemcache/gomemcache:setmulti-4    	    5626	    204885 ns/op	     128 B/op	       7 allocs/op
BenchmarkGoMemcache/cachebox:setmulti-4      	    4981	    245714 ns/op	     366 B/op	       7 allocs/op
BenchmarkGoMemcache/gomemcache:delete-4      	    6922	    189450 ns/op	      16 B/op	       1 allocs/op
BenchmarkGoMemcache/cachebox:delete-4        	    6546	    187937 ns/op	      32 B/op	       2 allocs/op
BenchmarkGoMemcache/gomemcache:deletemulti-4 	    6109	    184948 ns/op	      32 B/op	       3 allocs/op
BenchmarkGoMemcache/cachebox:deletemulti-4   	 2760835	      1006 ns/op	     122 B/op	       2 allocs/op
BenchmarkRedigo/redigo:get-4                 	    1015	   1170355 ns/op	   10016 B/op	      42 allocs/op
BenchmarkRedigo/cachebox:get-4               	     903	   1183235 ns/op	   10063 B/op	      44 allocs/op
BenchmarkRedigo/redigo:set-4                 	     870	   1221171 ns/op	   10162 B/op	      44 allocs/op
BenchmarkRedigo/cachebox:set-4               	     914	   1217707 ns/op	   10257 B/op	      47 allocs/op
BenchmarkRedigo/redigo:getmulti-4            	  963810	      1216 ns/op	     171 B/op	       3 allocs/op
BenchmarkRedigo/cachebox:getmulti-4          	 1047338	      1065 ns/op	     180 B/op	       3 allocs/op
BenchmarkRedigo/redigo:setmulti-4            	  573801	      2225 ns/op	     208 B/op	       5 allocs/op
BenchmarkRedigo/cachebox:setmulti-4          	  196122	      7917 ns/op	     518 B/op	       8 allocs/op
BenchmarkRedigo/redigo:delete-4              	    1141	   1027026 ns/op	    9976 B/op	      40 allocs/op
BenchmarkRedigo/cachebox:delete-4            	     889	   1225767 ns/op	    9992 B/op	      41 allocs/op
BenchmarkRedigo/redigo:deletemulti-4         	 3459343	       639 ns/op	     137 B/op	       3 allocs/op
BenchmarkRedigo/cachebox:deletemulti-4       	 2788777	       752 ns/op	     153 B/op	       3 allocs/op

TODO

  • Add OpenTelemetry support