I'm trying to build a model where there will me multiple reads of an entire collection and rare additions and modifications to it.
I thought I might use the ConcurrentBag in .NET as I've read the documentation and it's supposed to be good for concurrent reads and writes.
The code would look like this:
public class Cache
{
ConcurrentBag<string> cache = new ConcurrentBag<string>();
// this method gets called frequently
public IEnumerable<string> GetAllEntries()
{
return cache.ToList();
}
// this method gets rarely called
public void Add(string newEntry)
{
// add to concurrentBag
}
public void Remove(string entryToRemove)
{
// remove from concurrent bag
}
}
However, I've decompiled the ConcurrentBag class and on theGetEnumerator there's always a lock taken, which means any call to GetAllEntries will lock the entire collection and it will not perform.
I'm thinking to get around this and code it in this manner instead, using a list.
public class Cache
{
private object guard = new object();
IList<string> cache = new List<string>();
// this method gets called frequently
public IEnumerable<string> GetAllEntries()
{
var currentCache = cache;
return currentCache;
}
// this method gets rarely called
public void Add(string newEntry)
{
lock (guard)
{
cache.Add(newEntry);
}
}
public void Remove(string entryToRemove)
{
lock (guard)
{
cache.Remove(entryToRemove);
}
}
}
Since the Add and Remove are rarely called I don't care too much about locking the access to the list there. On Get I might get a stale version of the list, but again I don't care, it will be fine for the next request.
Is the second implementation a good way to go?
EDIT
I've run a quick performance test and the results are the following:
Setup: populated the in memory collection with 10000 strings.
Action: GetAllEntries concurrently 50000 times.
Result:
00:00:35.2393871 to finish operation using ConcurrentBag (first implementation)
00:00:00.0036959 to finish operation using normal list (second implementation)
Code below:
class Program
{
static void Main(string[] args)
{
// warmup caches and stopwatch
var cacheWitBag = new CacheWithBag();
var cacheWitList = new CacheWithList();
cacheWitBag.Add("abc");
cacheWitBag.GetAllEntries();
cacheWitList.Add("abc");
cacheWitList.GetAllEntries();
var sw = new Stopwatch();
// warmup stowtach as well
sw.Start();
// initialize caches (rare writes so no real reason to measure here
for (int i =0; i < 50000; i++)
{
cacheWitBag.Add(new Guid().ToString());
cacheWitList.Add(new Guid().ToString());
}
sw.Stop();
// measure
var program = new Program();
sw.Start();
program.Run(cacheWitBag).Wait();
sw.Stop();
Console.WriteLine(sw.Elapsed);
sw.Restart();
program.Run2(cacheWitList).Wait();
sw.Stop();
Console.WriteLine(sw.Elapsed);
}
public async Task Run(CacheWithBag cache1)
{
List<Task> tasks = new List<Task>();
for (int i = 0; i < 10000; i++)
{
tasks.Add(Task.Run(() => cache1.GetAllEntries()));
}
await Task.WhenAll(tasks);
}
public async Task Run2(CacheWithList cache)
{
List<Task> tasks = new List<Task>();
for (int i = 0; i < 10000; i++)
{
tasks.Add(Task.Run(() => cache.GetAllEntries()));
}
await Task.WhenAll(tasks);
}
public class CacheWithBag
{
ConcurrentBag<string> cache = new ConcurrentBag<string>();
// this method gets called frequently
public IEnumerable<string> GetAllEntries()
{
return cache.ToList();
}
// this method gets rarely called
public void Add(string newEntry)
{
cache.Add(newEntry);
}
}
public class CacheWithList
{
private object guard = new object();
IList<string> cache = new List<string>();
// this method gets called frequently
public IEnumerable<string> GetAllEntries()
{
var currentCache = cache;
return currentCache;
}
// this method gets rarely called
public void Add(string newEntry)
{
lock (guard)
{
cache.Add(newEntry);
}
}
public void Remove(string entryToRemove)
{
lock (guard)
{
cache.Remove(entryToRemove);
}
}
}
}
}