From a8214dda06f02444237deb335bcd76b32f7a20fa Mon Sep 17 00:00:00 2001
From: Mathieu Stefani <mathieu@datacratic.com>
Date: Thu, 25 Jul 2013 16:57:56 -0400
Subject: [PATCH 01/12] Introduced a boolean flag to GcLock to control defer
 execution

---
 gc/gc_lock.cc | 19 ++++++++++++-------
 gc/gc_lock.h  | 29 ++++++++++++++++-------------
 2 files changed, 28 insertions(+), 20 deletions(-)
diff --git a/gc/gc_lock.cc b/gc/gc_lock.cc
index 8ab02c68..502b05c9 100644
--- a/gc/gc_lock.cc
+++ b/gc/gc_lock.cc
@@ -343,7 +343,7 @@ GcLockBase::
 
 bool
 GcLockBase::
-updateData(Data & oldValue, Data & newValue)
+updateData(Data & oldValue, Data & newValue, bool runDefer /* = true */)
 {
     bool wake;
     try {
@@ -373,7 +373,9 @@ updateData(Data & oldValue, Data & newValue)
         // anything that was waiting for it to be visible and run any
         // deferred handlers.
         futex_wake(data->visibleEpoch);
-        runDefers();
+        if (runDefer) {
+            runDefers();
+        }
     }
 
     return true;
@@ -429,7 +431,7 @@ checkDefers()
 
 void
 GcLockBase::
-enterCS(ThreadGcInfoEntry * entry)
+enterCS(ThreadGcInfoEntry * entry, bool runDefer)
 {
     if (!entry) entry = &getEntry();
         
@@ -471,24 +473,27 @@ enterCS(ThreadGcInfoEntry * entry)
 
         entry->inEpoch = newValue.epoch & 1;
             
-        if (updateData(current, newValue)) break;
+        if (updateData(current, newValue, runDefer)) break;
     }
 }
 
 void
 GcLockBase::
-exitCS(ThreadGcInfoEntry * entry)
+exitCS(ThreadGcInfoEntry * entry, bool runDefer /* = true */)
 {
     if (!entry) entry = &getEntry();
 
     if (entry->inEpoch == -1)
         throw ML::Exception("not in a CS");
 
+    ExcCheck(entry->inEpoch == 0 || entry->inEpoch == 1,
+            "Invalid inEpoch");
     // Fast path
-    if (__sync_fetch_and_add(data->in + (entry->inEpoch & 1), -1) > 1) {
+    if (__sync_fetch_and_add(data->in + entry->inEpoch, -1) > 1) {
         entry->inEpoch = -1;
         return;
     }
+
         
     // Slow path; an epoch may have come to an end
     
@@ -499,7 +504,7 @@ exitCS(ThreadGcInfoEntry * entry)
 
         //newValue.addIn(entry->inEpoch, -1);
 
-        if (updateData(current, newValue)) break;
+        if (updateData(current, newValue, runDefer)) break;
     }
 
     entry->inEpoch = -1;
diff --git a/gc/gc_lock.h b/gc/gc_lock.h
index cce9b39f..33b499eb 100644
--- a/gc/gc_lock.h
+++ b/gc/gc_lock.h
@@ -23,7 +23,6 @@
 namespace Datacratic {
 
 
-
 /*****************************************************************************/
 /* GC LOCK BASE                                                              */
 /*****************************************************************************/
@@ -120,7 +119,7 @@ struct GcLockBase : public boost::noncopyable {
     Data* data;
 
     Deferred * deferred;   ///< Deferred workloads (hidden structure)
-    
+
     /** Update with the new value after first checking that the current
         value is the same as the old value.  Returns true if it
         succeeded; otherwise oldValue is updated with the new old
@@ -131,7 +130,7 @@ struct GcLockBase : public boost::noncopyable {
         on that value, and will run any deferred handlers registered for
         that value.
     */
-    bool updateData(Data & oldValue, Data & newValue);
+    bool updateData(Data & oldValue, Data & newValue, bool runDefer = true);
 
     /** Executes any available deferred work. */
     void runDefers();
@@ -141,8 +140,8 @@ struct GcLockBase : public boost::noncopyable {
     */
     std::vector<DeferredList *> checkDefers();
 
-    void enterCS(ThreadGcInfoEntry * entry = 0);
-    void exitCS(ThreadGcInfoEntry * entry = 0);
+    void enterCS(ThreadGcInfoEntry * entry = 0, bool runDefer = true);
+    void exitCS(ThreadGcInfoEntry * entry = 0, bool runDefer = true);
     void enterCSExclusive(ThreadGcInfoEntry * entry = 0);
     void exitCSExclusive(ThreadGcInfoEntry * entry = 0);
 
@@ -169,12 +168,13 @@ struct GcLockBase : public boost::noncopyable {
     /** Permanently deletes any resources associated with this lock. */
     virtual void unlink() = 0;
 
-    void lockShared(GcInfo::PerThreadInfo * info = 0)
+    void lockShared(GcInfo::PerThreadInfo * info = 0,
+                    bool runDefer = true)
     {
         ThreadGcInfoEntry & entry = getEntry(info);
 
         if (!entry.readLocked && !entry.writeLocked)
-            enterCS(&entry);
+            enterCS(&entry, runDefer);
 
         ++entry.readLocked;
 
@@ -187,7 +187,8 @@ struct GcLockBase : public boost::noncopyable {
 #endif
     }
 
-    void unlockShared(GcInfo::PerThreadInfo * info = 0)
+    void unlockShared(GcInfo::PerThreadInfo * info = 0, 
+                      bool runDefer = true)
     {
         ThreadGcInfoEntry & entry = getEntry(info);
 
@@ -195,7 +196,7 @@ struct GcLockBase : public boost::noncopyable {
             throw ML::Exception("bad read lock nesting");
         --entry.readLocked;
         if (!entry.readLocked && !entry.writeLocked)
-            exitCS(&entry);
+            exitCS(&entry, runDefer);
 
 #if GC_LOCK_DEBUG
         using namespace std;
@@ -265,18 +266,20 @@ struct GcLockBase : public boost::noncopyable {
     }
 
     struct SharedGuard {
-        SharedGuard(GcLockBase & lock)
-            : lock(lock)
+        SharedGuard(GcLockBase & lock, bool runDefer = true)
+            : lock(lock),
+              runDefer_(runDefer)
         {
-            lock.lockShared();
+            lock.lockShared(0, runDefer_);
         }
 
         ~SharedGuard()
         {
-            lock.unlockShared();
+            lock.unlockShared(0, runDefer_);
         }
         
         GcLockBase & lock;
+        const bool runDefer_;
     };
 
     struct ExclusiveGuard {

From 90f23f487a351c92401b1cc3de42a09734862163 Mon Sep 17 00:00:00 2001
From: Mathieu Stefani <mathieu@datacratic.com>
Date: Thu, 25 Jul 2013 17:00:37 -0400
Subject: [PATCH 02/12] Removed unused RCU source files from directory tree

---
 gc/gc.cc              | 326 ------------------------------------------
 gc/gc.h               | 122 ----------------
 gc/gc.mk              |   4 +-
 gc/rcu_lock.cc        |  16 ---
 gc/rcu_lock.h         | 248 --------------------------------
 gc/testing/gc_test.cc |  30 +---
 6 files changed, 2 insertions(+), 744 deletions(-)
 delete mode 100644 gc/gc.cc
 delete mode 100644 gc/gc.h
 delete mode 100644 gc/rcu_lock.cc
 delete mode 100644 gc/rcu_lock.h

diff --git a/gc/gc.cc b/gc/gc.cc
deleted file mode 100644
index 94ee14c2..00000000
--- a/gc/gc.cc
+++ /dev/null
@@ -1,326 +0,0 @@
-/* gc.cc
-   Jeremy Barnes, 26 September 2011
-   Copyright (c) 2011 Datacratic.  All rights reserved.
-
-*/
-
-#include "gc.h"
-#include <urcu.h>
-#include <boost/thread.hpp>
-#include <iostream>
-#include "jml/utils/exc_assert.h"
-#include "jml/arch/atomic_ops.h"
-#include "jml/arch/rwlock.h"
-#include "jml/arch/thread_specific.h"
-
-using namespace std;
-
-
-namespace Datacratic {
-namespace MMap {
-
-
-struct doInit {
-    doInit()
-    {
-        rcu_init();
-    }
-} init;
-
-void registerThread()
-{
-    rcu_register_thread();
-    rcu_defer_register_thread();
-}
-
-void unregisterThread()
-{
-    rcu_defer_unregister_thread();
-    rcu_unregister_thread();
-}
-
-/** If true, then we are in exclusive mode which means that only one thing
-    can hold a lock at once. */
-static volatile bool exclusiveMode = false;
-
-/** The mutex for exclusive mode. */
-//static boost::shared_mutex exclusionMutex;
-static ML::RWLock exclusionMutex;
-
-#if 0
-
-/** Thread-specific data for the gc functionality.  Boost's thread specific data
-    class has internal locking which makes it unsuitable.  We use gcc's support
-    for thread specific data (which is much faster), with Boost's stuff there
-    simply to make sure that everything gets cleaned up when each thread exits.
-*/
-
-struct ThreadData;
-
-struct ThreadDataDeleter {
-    ThreadDataDeleter(ThreadData * data)
-        : data(data)
-    {
-    }
-
-    ThreadData * data;
-
-    ~ThreadDataDeleter();
-};
-
-boost::thread_specific_ptr<ThreadDataDeleter> threadDataDeleter;
-
-#endif
-
-struct ThreadData {
-
-    static int numInRead;
-
-    ThreadData()
-        : readLocked(0), writeLocked(0)
-    {
-        lockedExclusive = false;
-        registerThread();
-        initialized = true;
-        threadNum = random();
-
-        //threadDataDeleter.reset(new ThreadDataDeleter(this));
-
-        //cerr << "initialized thread " << threadNum << " at " << this << endl;
-    }
-
-    ~ThreadData()
-    {
-        //cerr << "destroyThread " << threadNum << " at " << this << endl;
-
-        if (writeLocked) {
-            cerr << "warning: thread exiting with write lock held" << endl;
-        }
-
-        if (readLocked) {
-            cerr << "warning: thread exiting with read lock held" << endl;
-        }
-        
-        if (initialized)
-            unregisterThread();
-        initialized = false;
-    }
-    
-    void readLock()
-    {
-        //cerr << "readLock" << " " << this << " readLocked " << readLocked << " writeLocked "
-        //     << writeLocked << " excl " << exclusiveMode << " lexcl " << lockedExclusive
-        //     << endl;
-        while (!readLocked && !writeLocked) {
-            lockedExclusive = exclusiveMode;
-
-            if (lockedExclusive)
-                exclusionMutex.lock_shared();
-
-            rcu_read_lock();  // this also does a memory barrier...
-
-            // Avoid racing with the update of the exlusive lock...
-            // TODO: this needs to be well tested
-            if (lockedExclusive != exclusiveMode) {
-                rcu_read_unlock();
-                continue;
-            }
-
-            //ML::atomic_inc(numInRead);
-
-            break;
-        }
-
-        ++readLocked;
-
-    }
-
-    bool isReadLocked()
-    {
-        return readLocked || writeLocked;
-    }
-    
-    void readUnlock()
-    {
-        //cerr << "readUnlock" << " " << this << " readLocked " << readLocked << " writeLocked "
-        //     << writeLocked << " excl " << exclusiveMode << " lexcl " << lockedExclusive
-        //     << endl;
-
-        if (readLocked <= 0)
-            throw ML::Exception("bad read lock nesting");
-        --readLocked;
-        if (!readLocked && !writeLocked) {
-            if (lockedExclusive) exclusionMutex.unlock_shared();
-            //ML::atomic_dec(numInRead);
-            rcu_read_unlock();
-        }
-    }
-    
-    void writeLock()
-    {
-        //cerr << "writeLock" << " " << this << " readLocked " << readLocked << " writeLocked "
-        //     << writeLocked << " excl " << exclusiveMode << " lexcl " << lockedExclusive
-        //     << endl;
-        if (readLocked)
-            throw ML::Exception("can't acquire write lock with read lock held");
-
-        if (!writeLocked) {
-            exclusionMutex.lock();
-            //cerr << "entering exclusive mode numInRead = " << numInRead << endl;
-            ExcAssert(!exclusiveMode);
-            exclusiveMode = true;
-            //ML::memory_barrier();
-            synchronize_rcu();  // wait for all readers to stop and block on lock
-
-            //ExcAssertEqual(numInRead, 0);
-        }
-
-        ++writeLocked;
-    }
-
-    void writeUnlock()
-    {
-        //cerr << "writeUnlock" << " " << this << " readLocked " << readLocked << " writeLocked "
-        //     << writeLocked << " excl " << exclusiveMode << " lexcl " << lockedExclusive
-        //     << endl;
-
-        if (writeLocked <= 0)
-            throw ML::Exception("bad write lock nesting");
-        --writeLocked;
-        if (!writeLocked) {
-            exclusiveMode = false;
-            exclusionMutex.unlock();
-        }
-    }
-
-    bool isWriteLocked()
-    {
-        return writeLocked;
-    }
-
-    void stopTheWorld()
-    {
-        writeLock();
-        rcu_defer_barrier();
-    }
-    
-    void restartTheWorld()
-    {
-        writeUnlock();
-    }
-
-    bool initialized;
-    int readLocked;
-    bool lockedExclusive;
-    int writeLocked;
-    int threadNum;
-
-    pthread_key_t tssKey;
-};
-
-int ThreadData::numInRead = 0;
-
-#if 0
-ThreadDataDeleter::
-~ThreadDataDeleter()
-{
-    delete data;
-}
-
-// Will be leaked...
- __thread ThreadData * threadData = 0;
-
-
-#endif
-
-ML::Thread_Specific<ThreadData> threadData;
-
-ThreadData & getThreadData()
-{
-    return *threadData;
-
-#if 0
-    if (!threadData.get())
-        threadData.reset(new ThreadData());
-    return *threadData;
-#endif
-}
-
-int getThreadNum()
-{
-    return getThreadData().threadNum;
-}
-
-void readLock()
-{
-    getThreadData().readLock();
-}
-
-void readUnlock()
-{
-    getThreadData().readUnlock();
-}
-
-bool isReadLocked()
-{
-    return getThreadData().isReadLocked();
-}
-
-void waitForGC()
-{
-    rcu_defer_barrier();
-}
-
-// These are stubs at the moment but will have to become something better...
-
-void writeLock()
-{
-    getThreadData().writeLock();
-}
-
-void writeUnlock()
-{
-    getThreadData().writeUnlock();
-}
-
-bool isWriteLocked()
-{
-    return getThreadData().isWriteLocked();
-}
-
-void stopTheWorld()
-{
-    getThreadData().stopTheWorld();
-}
-
-void restartTheWorld()
-{
-    getThreadData().restartTheWorld();
-}
-
-void doDeferredGc(void * workBlock)
-{
-    boost::function<void ()> * work
-        = reinterpret_cast<boost::function<void ()> *>(workBlock);
-    try {
-        (*work)();
-    } catch (...) {
-        delete work;
-        throw;
-    }
-    
-    delete work;
-}
-
-void deferGC(const boost::function<void ()> & work)
-{
-    defer_rcu(&doDeferredGc,
-              new boost::function<void ()>(work));
-}
-
-
-//    boost::lock_guard<boost::mutex> lock(m);
-
-
-} // namespace MMap
-} // namespace Datacratic
diff --git a/gc/gc.h b/gc/gc.h
deleted file mode 100644
index 6698eea8..00000000
--- a/gc/gc.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/* gc.h                                                            -*- C++ -*-
-   Jeremy Barnes, 26 September 2011
-   Copyright (c) 2011 Datacratic.  All rights reserved.
-
-   Garbage collection basics.
-*/
-
-#ifndef __mmap__gc_h__
-#define __mmap__gc_h__
-
-#include "jml/arch/exception.h"
-#include <boost/function.hpp>
-#include "jml/compiler/compiler.h"
-
-namespace Datacratic {
-namespace MMap {
-
-int getThreadNum() JML_PURE_FN;
-
-#if 0
-
-
-void readLock();
-void readUnlock();
-bool isReadLocked();
-
-struct ReadGuard {
-    ReadGuard()
-    {
-        readLock();
-    }
-
-    ~ReadGuard()
-    {
-        readUnlock();
-    }
-};
-
-struct FancyReadGuard {
-
-    FancyReadGuard(bool doLock = true)
-        : locked_(false)
-    {
-        if (doLock) lock();
-    }
-
-    ~FancyReadGuard()
-    {
-        if (locked_) unlock();
-    }
-
-    void unlock()
-    {
-        if (!locked_)
-            throw ML::Exception("attempt to unlock unlocked FancyReadGuard");
-        readUnlock();
-        locked_ = false;
-    }
-
-    void lock()
-    {
-        if (locked_)
-            throw ML::Exception("attempt to lock locked FancyReadGuard");
-        readLock();
-        locked_ = true;
-    }
-
-    bool locked() const { return locked_; }
-
-    bool locked_;
-};
-
-void writeLock();
-void writeUnlock();
-bool isWriteLocked();
-
-struct WriteGuard {
-    WriteGuard()
-    {
-        writeLock();
-    }
-    
-    ~WriteGuard()
-    {
-        writeUnlock();
-    }
-};
-
-// Stop the whole world, including dealing with all deferred updates
-void stopTheWorld();
-void restartTheWorld();
-
-
-
-struct StopTheWorldGuard {
-    StopTheWorldGuard()
-    {
-        stopTheWorld();
-    }
-    
-    ~StopTheWorldGuard()
-    {
-        restartTheWorld();
-    }
-};
-
-/** Wait until everything that's pending for GC has been run.  Runs like a
-    barrier.
-*/
-void waitForGC();
-
-/** Do some deferred work to garbage collect. */
-void deferGC(const boost::function<void ()> & work);
-
-#endif
-
-} // namespace MMap
-} // namespace Datacratic
-
-#endif /* __mmap__gc_h__ */
-
-
diff --git a/gc/gc.mk b/gc/gc.mk
index d341bbd5..d8f768f4 100644
--- a/gc/gc.mk
+++ b/gc/gc.mk
@@ -8,9 +8,7 @@
 
 
 LIBGC_SOURCES := \
-	gc_lock.cc \
-	rcu_lock.cc \
-	gc.cc
+	gc_lock.cc 
 
 $(eval $(call library,gc,$(LIBGC_SOURCES),arch utils urcu))
 
diff --git a/gc/rcu_lock.cc b/gc/rcu_lock.cc
deleted file mode 100644
index 87c78378..00000000
--- a/gc/rcu_lock.cc
+++ /dev/null
@@ -1,16 +0,0 @@
-/* rcu_lock.cc                                                      -*- C++ -*-
-   Jeremy Barnes, 20 November 2011
-   Copyright (c) 2011 Datacratic.  All rights reserved.
-
-   Garbage collection lock using userspace RCU.
-*/
-#include "rcu_lock.h"
-
-namespace Datacratic {
-
-
-int RcuLock::currentIndex = 0;
-ML::Thread_Specific<RcuLock::ThreadGcInfo> RcuLock::gcInfo;  ///< Thread-specific bookkeeping
-
-
-} // namespace Datacratic
diff --git a/gc/rcu_lock.h b/gc/rcu_lock.h
deleted file mode 100644
index f29515a0..00000000
--- a/gc/rcu_lock.h
+++ /dev/null
@@ -1,248 +0,0 @@
-/* rcu_lock.h                                                      -*- C++ -*-
-   Jeremy Barnes, 20 November 2011
-   Copyright (c) 2011 Datacratic.  All rights reserved.
-
-   Garbage collection lock using userspace RCU.
-*/
-
-#ifndef __mmap__rcu_lock_h__
-#define __mmap__rcu_lock_h__
-
-#include <urcu.h>
-#include <vector>
-#include <boost/function.hpp>
-#include "jml/compiler/compiler.h"
-#include "jml/utils/exc_assert.h"
-#include "jml/arch/rwlock.h"
-#include "jml/arch/thread_specific.h"
-
-namespace Datacratic {
-
-struct RcuLock {
-
-    /// A thread's bookkeeping info about each GC area
-    struct ThreadGcInfoEntry {
-        ThreadGcInfoEntry()
-            : readLocked(0), lockedExclusive(false), writeLocked(0)
-        {
-        }
-
-        int readLocked;
-        bool lockedExclusive;
-        int writeLocked;
-    };
-
-    /// A thread's overall bookkeeping information
-    struct ThreadGcInfo {
-        ThreadGcInfo()
-        {
-            static int threadInfoNum = 0;
-            threadNum = __sync_fetch_and_add(&threadInfoNum, 1);
-            rcu_register_thread();
-            rcu_defer_register_thread();
-        }
-
-        ~ThreadGcInfo()
-        {
-            rcu_unregister_thread();
-            rcu_defer_unregister_thread();
-        }
-
-        int threadNum;
-
-        std::vector<ThreadGcInfoEntry> info;
-
-        ThreadGcInfoEntry & operator [] (int index)
-        {
-            ExcAssertGreaterEqual(index, 0);
-            if (info.size() <= index)
-                info.resize(index + 1);
-            return info[index];
-        }
-    };
-
-    static ML::Thread_Specific<ThreadGcInfo> gcInfo;  ///< Thread-specific bookkeeping
-    bool exclusiveMode;  ///< If true we fall back to a RW mutex
-    ML::RWLock exclusionMutex;  ///< The RW mutex for exclusive mode
-    int index;     ///< Which global thread number we are
-
-    void enterCS()
-    {
-        rcu_read_lock();
-    }
-
-    void exitCS()
-    {
-        rcu_read_unlock();
-    }
-
-    int myEpoch() const
-    {
-        return -1;
-    }
-
-    int currentEpoch() const
-    {
-        return -1;
-    }
-
-    JML_ALWAYS_INLINE ThreadGcInfoEntry & getEntry() const
-    {
-        ThreadGcInfo & info = *gcInfo;
-        return info[index];
-    }
-
-    static int currentIndex;
-
-    RcuLock()
-        : exclusiveMode(false),
-          index(currentIndex + 1)
-    {
-    }
-
-    void lockShared()
-    {
-        ThreadGcInfoEntry & entry = getEntry();
-
-        //cerr << "lockShared: readLocked " << entry.readLocked
-        //     << " writeLocked: " << entry.writeLocked
-        //     << " exclusive " << exclusiveMode << endl;
-
-        while (!entry.readLocked && !entry.writeLocked) {
-            entry.lockedExclusive = exclusiveMode;
-
-            // If something else wanted an exclusive lock then we are in
-            // exclusive mode and we have to acquire the RW lock beore we
-            // can continue
-            if (JML_UNLIKELY(entry.lockedExclusive))
-                exclusionMutex.lock_shared();
-
-            //cerr << "entering" << endl;
-            enterCS();
-
-            // Avoid racing with the update of the exlusive lock...
-            // TODO: this needs to be well tested
-            if (entry.lockedExclusive != exclusiveMode) {
-                //cerr << "reexiting" << endl;
-                exitCS();
-                continue;
-            }
-            
-            break;
-        }
-
-        ++entry.readLocked;
-    }
-
-    void unlockShared()
-    {
-        ThreadGcInfoEntry & entry = getEntry();
-
-        if (entry.readLocked <= 0)
-            throw ML::Exception("bad read lock nesting");
-        --entry.readLocked;
-        if (!entry.readLocked && !entry.writeLocked) {
-            if (entry.lockedExclusive) exclusionMutex.unlock_shared();
-            exitCS();
-        }
-    }
-
-    bool isLockedShared()
-    {
-        ThreadGcInfoEntry & entry = getEntry();
-        return entry.readLocked || entry.writeLocked;
-    }
-
-    void lockExclusive()
-    {
-        ThreadGcInfoEntry & entry = getEntry();
-
-        if (entry.readLocked)
-            throw ML::Exception("can't acquire write lock with read lock held");
-
-        if (!entry.writeLocked) {
-            exclusionMutex.lock();
-            //cerr << "entering exclusive mode numInRead = " << numInRead << endl;
-            ExcAssert(!exclusiveMode);
-            exclusiveMode = true;
-            //ML::memory_barrier();
-            visibleBarrier();
-
-            //ExcAssertEqual(numInRead, 0);
-        }
-
-        ++entry.writeLocked;
-    }
-
-    void unlockExclusive()
-    {
-        ThreadGcInfoEntry & entry = getEntry();
-
-        if (entry.writeLocked <= 0)
-            throw ML::Exception("bad write lock nesting");
-        --entry.writeLocked;
-        if (!entry.writeLocked) {
-            exclusiveMode = false;
-            exclusionMutex.unlock();
-        }
-    }
-
-    bool isLockedExclusive()
-    {
-        ThreadGcInfoEntry & entry = getEntry();
-        return entry.writeLocked;
-    }
-
-    void visibleBarrier()
-    {
-        synchronize_rcu();
-    }
-
-    void deferBarrier()
-    {
-        rcu_defer_barrier();
-    }
-
-    static void callFn(void * arg)
-    {
-        boost::function<void ()> * fn
-            = reinterpret_cast<boost::function<void ()> *>(arg);
-        try {
-            (*fn)();
-        } catch (...) {
-            delete fn;
-            throw;
-        }
-        delete fn;
-    }
-
-    void defer(boost::function<void ()> work)
-    {
-        defer(callFn, new boost::function<void ()>(work));
-    }
-
-    void defer(void (work) (void *), void * arg)
-    {
-        getEntry();  // make sure thread is registered
-        defer_rcu(work, arg);
-    }
-
-    void defer(void (work) (void *, void *), void * arg1, void * arg2)
-    {
-        defer([=] () { work(arg1, arg2); });
-    }
-
-    void defer(void (work) (void *, void *, void *), void * arg1, void * arg2, void * arg3)
-    {
-        defer([=] () { work(arg1, arg2, arg3); });
-    }
-
-    void dump()
-    {
-    }
-};
-
-} // namespace Datacratic
-
-
-#endif /* __mmap__rcu_lock_h__ */
diff --git a/gc/testing/gc_test.cc b/gc/testing/gc_test.cc
index d72ae394..426fc6d7 100644
--- a/gc/testing/gc_test.cc
+++ b/gc/testing/gc_test.cc
@@ -9,7 +9,6 @@
 #define BOOST_TEST_DYN_LINK
 
 #include "soa/gc/gc_lock.h"
-#include "soa/gc/rcu_lock.h"
 #include "jml/utils/string_functions.h"
 #include "jml/utils/exc_assert.h"
 #include "jml/utils/guard.h"
@@ -21,6 +20,7 @@
 #include <boost/test/unit_test.hpp>
 #include <boost/bind.hpp>
 #include <iostream>
+#include <urcu.h>
 
 #include <boost/thread.hpp>
 #include <boost/thread/barrier.hpp>
@@ -35,12 +35,6 @@ namespace Datacratic {
 extern int32_t gcLockStartingEpoch;
 };
 
-struct doInit {
-    doInit()
-    {
-        rcu_init();
-    }
-} init;
 
 #if 1
 
@@ -642,17 +636,6 @@ BOOST_AUTO_TEST_CASE ( test_gc_sync_many_threads )
     test.run(boost::bind(&TestBase<GcLock>::allocThreadSync, &test, _1));
 }
 
-BOOST_AUTO_TEST_CASE ( test_rcu_sync )
-{
-    cerr << "testing synchronized RCU" << endl;
-
-    int nthreads = 2;
-    int nblocks = 2;
-
-    TestBase<RcuLock> test(nthreads, nblocks);
-    test.run(boost::bind(&TestBase<RcuLock>::allocThreadSync, &test, _1));
-}
-
 BOOST_AUTO_TEST_CASE ( test_gc_deferred )
 {
     cerr << "testing deferred GcLock" << endl;
@@ -664,17 +647,6 @@ BOOST_AUTO_TEST_CASE ( test_gc_deferred )
     test.run(boost::bind(&TestBase<GcLock>::allocThreadDefer, &test, _1));
 }
 
-BOOST_AUTO_TEST_CASE ( test_rcu_deferred )
-{
-    cerr << "testing deferred RCU" << endl;
-
-    int nthreads = 2;
-    int nblocks = 2;
-
-    TestBase<RcuLock> test(nthreads, nblocks);
-    test.run(boost::bind(&TestBase<RcuLock>::allocThreadDefer, &test, _1));
-}
-
 
 struct SharedGcLockProxy : public SharedGcLock {
     static const char* name;

From 2d011f02dd3c7e49ebc4340aa50810689629d7db Mon Sep 17 00:00:00 2001
From: Mathieu Stefani <mathieu@datacratic.com>
Date: Fri, 26 Jul 2013 11:32:08 -0400
Subject: [PATCH 03/12] Replaced rcu_dereference_sym call by plain std::atomic.
 rcu_dereference_sym basically calls CMM_ACCESS_ONCE which essentially is the
 same macro as the ACCESS_ONCE kernel one. This macro ensures that the
 compiler won't make any optimization and won't load the given value twice.
 std::atomic should do the trick here.

---
 gc/testing/gc_test.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gc/testing/gc_test.cc b/gc/testing/gc_test.cc
index 426fc6d7..b7afcbbe 100644
--- a/gc/testing/gc_test.cc
+++ b/gc/testing/gc_test.cc
@@ -20,7 +20,7 @@
 #include <boost/test/unit_test.hpp>
 #include <boost/bind.hpp>
 #include <iostream>
-#include <urcu.h>
+#include <atomic>
 
 #include <boost/thread.hpp>
 #include <boost/thread/barrier.hpp>
@@ -367,7 +367,7 @@ struct TestBase {
     {
         for (unsigned i = 0;  i < nthreads;  ++i) {
             allBlocks[i] = new int *[nblocks];
-            std::fill(allBlocks[i], allBlocks[i] + nblocks, (int *)0);
+            std::fill(allBlocks[i].load(), allBlocks[i].load() + nblocks, (int *)0);
         }
     }
 
@@ -389,7 +389,7 @@ struct TestBase {
        here by another thread should always refer to exactly the same
        value.
     */
-    vector<int **> allBlocks;
+    vector<atomic<int **>> allBlocks;
 
     void checkVisible(int threadNum, unsigned long long start)
     {
@@ -400,7 +400,7 @@ struct TestBase {
         for (unsigned i = 0;  i < nthreads;  ++i) {
             for (unsigned j = 0;  j < nblocks;  ++j) {
                 //int * val = allBlocks[i][j];
-                int * val = (int *)(rcu_dereference_sym(allBlocks[i][j]));
+                int * val = allBlocks[i].load()[j];
                 if (val) {
                     int atVal = *val;
                     if (atVal != i) {

From 467cce629d3f56dc08ace65083cef58d3e83a54e Mon Sep 17 00:00:00 2001
From: Mathieu Stefani <mathieu@datacratic.com>
Date: Fri, 2 Aug 2013 16:44:34 -0400
Subject: [PATCH 04/12] Implemented speculative Gc. The speculative unlock
 version does not guarantee that once it's done, the Gc is unlocked.

---
 gc/gc_lock.cc |  2 ++
 gc/gc_lock.h  | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/gc/gc_lock.cc b/gc/gc_lock.cc
index 502b05c9..0e9f172e 100644
--- a/gc/gc_lock.cc
+++ b/gc/gc_lock.cc
@@ -34,6 +34,8 @@ namespace Datacratic {
 */
 int32_t gcLockStartingEpoch = 0;
 
+int32_t SpeculativeThreshold = 5;
+
 /** A safe comparaison of epochs that deals with potential overflows.
     \todo So many possible bit twiddling hacks... Must resist...
 */
diff --git a/gc/gc_lock.h b/gc/gc_lock.h
index 33b499eb..14cf14df 100644
--- a/gc/gc_lock.h
+++ b/gc/gc_lock.h
@@ -15,6 +15,7 @@
 #include "jml/arch/atomic_ops.h"
 #include "jml/arch/thread_specific.h"
 #include <vector>
+#include <iostream>
 
 #if GC_LOCK_DEBUG
 #  include <iostream>
@@ -22,7 +23,7 @@
 
 namespace Datacratic {
 
-
+extern int32_t SpeculativeThreshold;
 /*****************************************************************************/
 /* GC LOCK BASE                                                              */
 /*****************************************************************************/
@@ -39,6 +40,7 @@ struct GcLockBase : public boost::noncopyable {
         {
         }
 
+
         int inEpoch;  // 0, 1, -1 = not in 
         int readLocked;
         int writeLocked;
@@ -46,11 +48,30 @@ struct GcLockBase : public boost::noncopyable {
         std::string print() const;
     };
 
+    struct SpeculativeEntry {
+        SpeculativeEntry() :
+            specLock(0),
+            specUnlock(0)
+        {
+        }
+
+        ~SpeculativeEntry() {
+        }
+
+        int specLock;
+        int specUnlock;
+    };
+
     typedef ML::ThreadSpecificInstanceInfo<ThreadGcInfoEntry, GcLockBase>
         GcInfo;
     typedef typename GcInfo::PerThreadInfo ThreadGcInfo;
 
+    typedef ML::ThreadSpecificInstanceInfo<SpeculativeEntry, GcLockBase>
+        SpeculativeGcInfo;
+    typedef typename SpeculativeGcInfo::PerThreadInfo ThreadSpeculativeGcInfo;
+
     GcInfo gcInfo;
+    SpeculativeGcInfo specGcInfo;
 
     struct Data {
         Data();
@@ -161,6 +182,12 @@ struct GcLockBase : public boost::noncopyable {
         return *gcInfo.get(info);
     }
 
+    JML_ALWAYS_INLINE SpeculativeEntry &
+    getSpecEntry(SpeculativeGcInfo::PerThreadInfo * info = 0) const
+    {
+        return *specGcInfo.get(info);
+    }
+
     GcLockBase();
 
     virtual ~GcLockBase();
@@ -207,6 +234,40 @@ struct GcLockBase : public boost::noncopyable {
 #endif
     }
 
+    void lockSpeculative(SpeculativeGcInfo::PerThreadInfo * info = 0)
+    {
+        SpeculativeEntry & entry = getSpecEntry(info); 
+        if (entry.specLock == 0 && entry.specUnlock == 0) {
+            lockShared(0, false);
+        }
+        ++entry.specLock;
+    }
+
+    void unlockSpeculative(SpeculativeGcInfo::PerThreadInfo * info = 0)
+    {
+        SpeculativeEntry & entry = getSpecEntry(info);
+        if (entry.specLock == 0) {
+            throw ML::Exception("Should lock before unlock");
+        }
+
+        --entry.specLock;
+        if (!entry.specLock) {
+            if (++entry.specUnlock == SpeculativeThreshold) {
+                unlockShared(0, false);
+                entry.specUnlock = 0;
+            }
+        }
+    }
+
+    void forceUnlock(SpeculativeGcInfo::PerThreadInfo * info = 0) {
+        SpeculativeEntry & entry = getSpecEntry(info);
+        ExcAssertEqual(entry.specLock, 0);
+        if (!entry.specLock && entry.specUnlock) {
+            unlockShared(0, true);
+            entry.specUnlock = 0;
+        }
+    }
+        
     int isLockedShared(GcInfo::PerThreadInfo * info = 0) const
     {
         ThreadGcInfoEntry & entry = getEntry(info);
@@ -297,6 +358,22 @@ struct GcLockBase : public boost::noncopyable {
         GcLockBase & lock;
     };
 
+    struct SpeculativeGuard {
+        SpeculativeGuard(GcLockBase &lock) :
+            lock(lock) 
+        {
+            lock.lockSpeculative();
+        }
+
+        ~SpeculativeGuard() 
+        {
+            lock.unlockSpeculative();
+        }
+
+        GcLockBase & lock;
+    };
+
+
     /** Wait until everything that's currently visible is no longer
         accessible.
         

From 1b57d9e0f2a4aa382e9030404a52112314b80fd4 Mon Sep 17 00:00:00 2001
From: Mathieu Stefani <mathieu@datacratic.com>
Date: Wed, 7 Aug 2013 16:48:39 -0400
Subject: [PATCH 05/12] Refactored TLS stuff

---
 gc/gc_lock.cc         |   4 -
 gc/gc_lock.h          | 188 ++++++++++++++++++++++++++----------------
 gc/testing/gc_test.cc |   8 +-
 3 files changed, 122 insertions(+), 78 deletions(-)

diff --git a/gc/gc_lock.cc b/gc/gc_lock.cc
index 0e9f172e..75479382 100644
--- a/gc/gc_lock.cc
+++ b/gc/gc_lock.cc
@@ -483,8 +483,6 @@ void
 GcLockBase::
 exitCS(ThreadGcInfoEntry * entry, bool runDefer /* = true */)
 {
-    if (!entry) entry = &getEntry();
-
     if (entry->inEpoch == -1)
         throw ML::Exception("not in a CS");
 
@@ -516,8 +514,6 @@ void
 GcLockBase::
 enterCSExclusive(ThreadGcInfoEntry * entry)
 {
-    if (!entry) entry = &getEntry();
-        
     ExcAssertEqual(entry->inEpoch, -1);
 
     Data current = *data, newValue;
diff --git a/gc/gc_lock.h b/gc/gc_lock.h
index 14cf14df..6cae26de 100644
--- a/gc/gc_lock.h
+++ b/gc/gc_lock.h
@@ -36,42 +36,121 @@ struct GcLockBase : public boost::noncopyable {
     /// A thread's bookkeeping info about each GC area
     struct ThreadGcInfoEntry {
         ThreadGcInfoEntry()
-            : inEpoch(-1), readLocked(0), writeLocked(0)
+            : inEpoch(-1), readLocked(0), writeLocked(0),
+              specLocked(0), specUnlocked(0),
+              owner(0)
         {
         }
 
+        ~ThreadGcInfoEntry() {
+            using namespace std;
+            /* We are not in a speculative critical section, check if
+             * Gc has been left locked
+             */
+            if (!specLocked && !specUnlocked && (readLocked || writeLocked))
+               cerr << "Thread died but GcLock is still locked" << endl;
+
+            /* We are in a speculative CS but Gc has not beed unlocked,
+             * then forceUnlock 
+             */
+            else if (!specLocked && specUnlocked) {
+              // unlockShared(true); 
+            }
+           
+        } 
+
 
         int inEpoch;  // 0, 1, -1 = not in 
         int readLocked;
         int writeLocked;
 
-        std::string print() const;
-    };
+        int specLocked;
+        int specUnlocked;
+
+        GcLockBase *owner;
+
+        void init(const GcLockBase * const self) {
+            if (!owner) 
+                owner = const_cast<GcLockBase *>(self);
+        }
+                
+
+        void lockShared(bool runDefer) {
+            if (!readLocked && !writeLocked)
+                owner->enterCS(this, runDefer);
+
+            ++readLocked;
+        }
+
+        void unlockShared(bool runDefer) {
+            if (readLocked <= 0)
+                throw ML::Exception("Bad read lock nesting");
+
+            --readLocked;
+            if (!readLocked && !writeLocked) 
+                owner->exitCS(this, runDefer);
 
-    struct SpeculativeEntry {
-        SpeculativeEntry() :
-            specLock(0),
-            specUnlock(0)
-        {
         }
 
-        ~SpeculativeEntry() {
+        bool isLockedShared() {
+            return readLocked + writeLocked;
         }
 
-        int specLock;
-        int specUnlock;
+        void lockExclusive() {
+            if (!writeLocked)
+                owner->enterCSExclusive(this);
+            
+             ++writeLocked;
+        }
+
+        void unlockExclusive() {
+            if (writeLocked <= 0)
+                throw ML::Exception("Bad write lock nesting");
+
+            --writeLocked;
+            if (!writeLocked)
+                owner->exitCSExclusive(this);
+        }
+
+        void lockSpeculative() {
+            if (!specLocked && !specUnlocked) 
+                lockShared(false);
+
+            ++specLocked;
+        }
+
+        void unlockSpeculative() {
+            if (!specLocked) 
+                throw ML::Exception("Bad speculative lock nesting");
+
+            --specLocked;
+            if (!specLocked) {
+                if (++specUnlocked == SpeculativeThreshold) {
+                    unlockShared(false);
+                    specUnlocked = 0;
+                }
+            }
+        }
+
+        void forceUnlock() {
+            ExcCheckEqual(specLocked, 0, "Bad forceUnlock call");
+
+            if (specUnlocked) {
+                unlockShared(true);
+                specUnlocked = 0;
+            }
+        }
+
+
+        std::string print() const;
     };
 
+
     typedef ML::ThreadSpecificInstanceInfo<ThreadGcInfoEntry, GcLockBase>
         GcInfo;
     typedef typename GcInfo::PerThreadInfo ThreadGcInfo;
 
-    typedef ML::ThreadSpecificInstanceInfo<SpeculativeEntry, GcLockBase>
-        SpeculativeGcInfo;
-    typedef typename SpeculativeGcInfo::PerThreadInfo ThreadSpeculativeGcInfo;
-
     GcInfo gcInfo;
-    SpeculativeGcInfo specGcInfo;
 
     struct Data {
         Data();
@@ -179,13 +258,11 @@ struct GcLockBase : public boost::noncopyable {
     JML_ALWAYS_INLINE ThreadGcInfoEntry &
     getEntry(GcInfo::PerThreadInfo * info = 0) const
     {
-        return *gcInfo.get(info);
-    }
+        ThreadGcInfoEntry *entry = gcInfo.get(info);
+        entry->init(this);
+        return *entry;
 
-    JML_ALWAYS_INLINE SpeculativeEntry &
-    getSpecEntry(SpeculativeGcInfo::PerThreadInfo * info = 0) const
-    {
-        return *specGcInfo.get(info);
+        //return *gcInfo.get(info);
     }
 
     GcLockBase();
@@ -200,10 +277,7 @@ struct GcLockBase : public boost::noncopyable {
     {
         ThreadGcInfoEntry & entry = getEntry(info);
 
-        if (!entry.readLocked && !entry.writeLocked)
-            enterCS(&entry, runDefer);
-
-        ++entry.readLocked;
+        entry.lockShared(runDefer);
 
 #if GC_LOCK_DEBUG
         using namespace std;
@@ -219,11 +293,7 @@ struct GcLockBase : public boost::noncopyable {
     {
         ThreadGcInfoEntry & entry = getEntry(info);
 
-        if (entry.readLocked <= 0)
-            throw ML::Exception("bad read lock nesting");
-        --entry.readLocked;
-        if (!entry.readLocked && !entry.writeLocked)
-            exitCS(&entry, runDefer);
+        entry.unlockShared(runDefer);
 
 #if GC_LOCK_DEBUG
         using namespace std;
@@ -234,49 +304,37 @@ struct GcLockBase : public boost::noncopyable {
 #endif
     }
 
-    void lockSpeculative(SpeculativeGcInfo::PerThreadInfo * info = 0)
+    void lockSpeculative(GcInfo::PerThreadInfo * info = 0)
     {
-        SpeculativeEntry & entry = getSpecEntry(info); 
-        if (entry.specLock == 0 && entry.specUnlock == 0) {
-            lockShared(0, false);
-        }
-        ++entry.specLock;
+        ThreadGcInfoEntry & entry = getEntry(info); 
+
+        entry.lockSpeculative();
     }
 
-    void unlockSpeculative(SpeculativeGcInfo::PerThreadInfo * info = 0)
+    void unlockSpeculative(GcInfo::PerThreadInfo * info = 0)
     {
-        SpeculativeEntry & entry = getSpecEntry(info);
-        if (entry.specLock == 0) {
-            throw ML::Exception("Should lock before unlock");
-        }
+        ThreadGcInfoEntry & entry = getEntry(info);
 
-        --entry.specLock;
-        if (!entry.specLock) {
-            if (++entry.specUnlock == SpeculativeThreshold) {
-                unlockShared(0, false);
-                entry.specUnlock = 0;
-            }
-        }
+        entry.unlockSpeculative();
     }
 
-    void forceUnlock(SpeculativeGcInfo::PerThreadInfo * info = 0) {
-        SpeculativeEntry & entry = getSpecEntry(info);
-        ExcAssertEqual(entry.specLock, 0);
-        if (!entry.specLock && entry.specUnlock) {
-            unlockShared(0, true);
-            entry.specUnlock = 0;
-        }
+    void forceUnlock(GcInfo::PerThreadInfo * info = 0) {
+        ThreadGcInfoEntry & entry = getEntry(info);
+
+        entry.forceUnlock();
     }
         
     int isLockedShared(GcInfo::PerThreadInfo * info = 0) const
     {
         ThreadGcInfoEntry & entry = getEntry(info);
-        return entry.readLocked + entry.writeLocked;
+
+        return entry.isLockedShared();
     }
 
     int lockedInEpoch(GcInfo::PerThreadInfo * info = 0) const
     {
         ThreadGcInfoEntry & entry = getEntry(info);
+
         return entry.inEpoch;
     }
 
@@ -284,14 +342,7 @@ struct GcLockBase : public boost::noncopyable {
     {
         ThreadGcInfoEntry & entry = getEntry(info);
 
-        if (entry.readLocked)
-            throw ML::Exception("can't acquire write lock with read lock held");
-
-        if (!entry.writeLocked)
-            enterCSExclusive(&entry);
-
-        ++entry.writeLocked;
-
+        entry.lockExclusive();
 #if GC_LOCK_DEBUG
         using namespace std;
         cerr << "lockExclusive "
@@ -305,11 +356,7 @@ struct GcLockBase : public boost::noncopyable {
     {
         ThreadGcInfoEntry & entry = getEntry(info);
 
-        if (entry.writeLocked <= 0)
-            throw ML::Exception("bad write lock nesting");
-        --entry.writeLocked;
-        if (!entry.writeLocked)
-            exitCSExclusive(&entry);
+        entry.unlockExclusive();
 
 #if GC_LOCK_DEBUG
         using namespace std;
@@ -323,6 +370,7 @@ struct GcLockBase : public boost::noncopyable {
     int isLockedExclusive(GcInfo::PerThreadInfo * info = 0) const
     {
         ThreadGcInfoEntry & entry = getEntry(info);
+
         return entry.writeLocked;
     }
 
diff --git a/gc/testing/gc_test.cc b/gc/testing/gc_test.cc
index b7afcbbe..d2134223 100644
--- a/gc/testing/gc_test.cc
+++ b/gc/testing/gc_test.cc
@@ -394,8 +394,8 @@ struct TestBase {
     void checkVisible(int threadNum, unsigned long long start)
     {
         // We're reading from someone else's pointers, so we need to lock here
-        gc.enterCS();
-        //gc.lockShared();
+        //gc.enterCS();
+        gc.lockShared();
 
         for (unsigned i = 0;  i < nthreads;  ++i) {
             for (unsigned j = 0;  j < nblocks;  ++j) {
@@ -415,8 +415,8 @@ struct TestBase {
             }
         }
 
-        gc.exitCS();
-        //gc.unlockShared();
+        //gc.exitCS();
+        gc.unlockShared();
     }
 
     void doReadThread(int threadNum)

From c6cfcd66b90487dc467e152a7c707a0b5259fa3a Mon Sep 17 00:00:00 2001
From: Mathieu Stefani <mathieu@datacratic.com>
Date: Fri, 16 Aug 2013 17:01:08 -0400
Subject: [PATCH 06/12] GcLock: can now dissociate read-side from write-side
 critical sections. This allows us to block writes when needed

---
 gc/gc_lock.cc         |   5 +-
 gc/gc_lock.h          | 210 ++++++++++++++++++++++++++++++++++++------
 gc/rcu_protected.h    |   6 +-
 gc/testing/gc_test.cc | 140 +++++++++++++++++++++++++++-
 4 files changed, 325 insertions(+), 36 deletions(-)

diff --git a/gc/gc_lock.cc b/gc/gc_lock.cc
index 667ce9bb..5521fcaa 100644
--- a/gc/gc_lock.cc
+++ b/gc/gc_lock.cc
@@ -242,7 +242,8 @@ print() const
 
 inline GcLockBase::Data::
 Data() :
-    bits(0), bits2(0)
+    bits(0), bits2(0),
+    writeLock(0)
 {
     epoch = gcLockStartingEpoch; // makes it easier to test overflows.
     visibleEpoch = epoch;
@@ -253,6 +254,7 @@ Data(const Data & other)
 {
     //ML::ticks();
     q = other.q;
+    writeLock = other.writeLock;
     //ML::ticks();
 }
 
@@ -262,6 +264,7 @@ operator = (const Data & other)
 {
     //ML::ticks();
     this->q = other.q;
+    this->writeLock = other.writeLock;
     //ML::ticks();
     return *this;
 }
diff --git a/gc/gc_lock.h b/gc/gc_lock.h
index 46aa33fe..07ab1da6 100644
--- a/gc/gc_lock.h
+++ b/gc/gc_lock.h
@@ -48,7 +48,7 @@ struct GcLockBase : public boost::noncopyable {
     /// A thread's bookkeeping info about each GC area
     struct ThreadGcInfoEntry {
         ThreadGcInfoEntry()
-            : inEpoch(-1), readLocked(0), writeLocked(0),
+            : inEpoch(-1), readLocked(0), writeLocked(0), exclusiveLocked(0),
               owner(0)
         {
         }
@@ -65,6 +65,7 @@ struct GcLockBase : public boost::noncopyable {
         int inEpoch;  // 0, 1, -1 = not in 
         int readLocked;
         int writeLocked;
+        int exclusiveLocked;
 
         GcLockBase *owner;
 
@@ -72,42 +73,44 @@ struct GcLockBase : public boost::noncopyable {
             if (!owner) 
                 owner = const_cast<GcLockBase *>(self);
         }
-                
 
-        void lockShared(RunDefer runDefer) {
-            if (!readLocked && !writeLocked)
+        void enterShared(RunDefer runDefer) {
+            if (!readLocked && !exclusiveLocked)
                 owner->enterCS(this, runDefer);
 
             ++readLocked;
         }
 
-        void unlockShared(RunDefer runDefer) {
+        void exitShared(RunDefer runDefer) {
             if (readLocked <= 0)
                 throw ML::Exception("Bad read lock nesting");
 
             --readLocked;
-            if (!readLocked && !writeLocked) 
+            if (!readLocked && !exclusiveLocked) 
                 owner->exitCS(this, runDefer);
 
         }
 
         bool isLockedShared() {
-            return readLocked + writeLocked;
+            return readLocked + exclusiveLocked;
         }
 
         void lockExclusive() {
-            if (!writeLocked)
+            if (readLocked || writeLocked) 
+                throw ML::Exception("Can not lock exclusive while holding "
+                                     "read or write lock");
+            if (!exclusiveLocked)
                 owner->enterCSExclusive(this);
             
-             ++writeLocked;
+             ++exclusiveLocked;
         }
 
         void unlockExclusive() {
-            if (writeLocked <= 0)
-                throw ML::Exception("Bad write lock nesting");
+            if (exclusiveLocked <= 0)
+                throw ML::Exception("Bad exclusive lock nesting");
 
-            --writeLocked;
-            if (!writeLocked)
+            --exclusiveLocked;
+            if (!exclusiveLocked)
                 owner->exitCSExclusive(this);
         }
 
@@ -120,6 +123,10 @@ struct GcLockBase : public boost::noncopyable {
         GcInfo;
     typedef typename GcInfo::PerThreadInfo ThreadGcInfo;
 
+    typedef uint64_t Word;
+    static constexpr size_t Bits = sizeof(Word) * CHAR_BIT;
+    static constexpr Word StopBitMask = (1ULL << (Bits - 1));
+
     struct Data {
         Data();
         Data(const Data & other);
@@ -133,7 +140,7 @@ struct GcLockBase : public boost::noncopyable {
                 int32_t epoch;       ///< Current epoch number (could be smaller).
                 int16_t in[2];       ///< How many threads in each epoch
                 int32_t visibleEpoch;///< Lowest epoch number that's visible
-                int32_t exclusive;   ///< Mutex value to lock exclusively
+                int32_t exclusive;       ///< Mutex value for exclusive lock
             };
             struct {
                 uint64_t bits;
@@ -144,6 +151,8 @@ struct GcLockBase : public boost::noncopyable {
             };
         } JML_ALIGNED(16);
 
+        volatile Word writeLock;
+
         int16_t inCurrent() const { return in[epoch & 1]; }
         int16_t inOld() const { return in[(epoch - 1)&1]; }
 
@@ -205,8 +214,6 @@ struct GcLockBase : public boost::noncopyable {
         ThreadGcInfoEntry *entry = gcInfo.get(info);
         entry->init(this);
         return *entry;
-
-        //return *gcInfo.get(info);
     }
 
     GcLockBase();
@@ -216,43 +223,92 @@ struct GcLockBase : public boost::noncopyable {
     /** Permanently deletes any resources associated with this lock. */
     virtual void unlink() = 0;
 
-    void lockShared(GcInfo::PerThreadInfo * info = 0,
+    void enterShared(GcInfo::PerThreadInfo * info = 0,
                     RunDefer runDefer = RD_YES)
     {
         ThreadGcInfoEntry & entry = getEntry(info);
 
-        entry.lockShared(runDefer);
+        entry.enterShared(runDefer);
 
 #if GC_LOCK_DEBUG
         using namespace std;
-        cerr << "lockShared "
+        cerr << "enterShared "
              << this << " index " << index
              << ": now " << entry.print() << " data "
              << data->print() << endl;
 #endif
     }
 
-    void unlockShared(GcInfo::PerThreadInfo * info = 0, 
-                      RunDefer runDefer = RD_YES)
+    void exitShared(GcInfo::PerThreadInfo * info = 0, 
+                    RunDefer runDefer = RD_YES)
     {
         ThreadGcInfoEntry & entry = getEntry(info);
 
-        entry.unlockShared(runDefer);
+        entry.exitShared(runDefer);
 
 #if GC_LOCK_DEBUG
         using namespace std;
-        cerr << "unlockShared "
+        cerr << "exitShared "
              << this << " index " << index
              << ": now " << entry.print() << " data "
              << data->print() << endl;
 #endif
     }
 
+    void enterWriteShared(GcInfo::PerThreadInfo * info = 0,
+                          RunDefer runDefer = RD_YES)
+    {
+        Word oldVal, newVal;
+        for (;;) {
+            auto writeLock = data->writeLock;
+            // We are write locked, continue spinning
+            if ((writeLock >> (Bits - 1)) == 1) 
+                continue;
+
+            oldVal = data->writeLock;
+            newVal = oldVal + 1;
+
+            // If the CAS fails, it surely means someone in
+            // the meantime set the stop bit,  we then retry.
+            if (ML::cmp_xchg(data->writeLock, oldVal, newVal))
+                break;
+        }
+                
+
+        enterShared(info, runDefer);
+    }
+
+    void exitWriteShared(GcInfo::PerThreadInfo * info = 0,
+                         RunDefer runDefer = RD_YES)
+    {
+        ExcAssertGreater(data->writeLock, 0);
+        ML::atomic_dec(data->writeLock);
+
+        exitShared(info, runDefer);
+    }
+
+    void enterReadShared(GcInfo::PerThreadInfo * info = 0,
+                        RunDefer runDefer = RD_YES)
+    {
+        ThreadGcInfoEntry & entry = getEntry(info);
+
+        entry.enterShared(runDefer);
+    }
+    void exitReadShared(GcInfo::PerThreadInfo * info = 0,
+                        RunDefer runDefer = RD_YES)
+    {
+        exitShared(info, runDefer);
+    }
+
     int isLockedShared(GcInfo::PerThreadInfo * info = 0) const
     {
         ThreadGcInfoEntry & entry = getEntry(info);
 
-        return entry.isLockedShared();
+        return entry.isLockedShared() || isLockedWrite();
+    }
+
+    bool isLockedWrite() const {
+        return getEntry().writeLocked;
     }
 
     int lockedInEpoch(GcInfo::PerThreadInfo * info = 0) const
@@ -295,7 +351,56 @@ struct GcLockBase : public boost::noncopyable {
     {
         ThreadGcInfoEntry & entry = getEntry(info);
 
-        return entry.writeLocked;
+        return entry.exclusiveLocked;
+    }
+
+    void lockWrite()
+    {
+        ThreadGcInfoEntry &entry = getEntry();
+        if (!entry.writeLocked) {
+            Word oldValue, newValue;
+            for (;;) {
+                if ((data->writeLock >> (Bits - 1)) == 1)
+                    continue;
+
+                oldValue = data->writeLock;
+                newValue = oldValue | StopBitMask;
+                if (ML::cmp_xchg(data->writeLock, oldValue, newValue))
+                    break;
+
+            }
+
+            // Stop bit must be set
+            ExcAssertEqual(data->writeLock >> (Bits - 1), 1);
+
+            // At this point, we stoped all the upcoming writes. However,
+            // ongoing writes might still be executing. Issuing a writeBarrier
+            // will wait for all writes to finish before continuing
+            writeBarrier();
+
+            // No writes must be ongoing
+            ExcAssertEqual((data->writeLock & ~StopBitMask), 0); 
+        }
+        ++entry.writeLocked;
+    }
+
+    void writeBarrier() {
+        // Busy-waiting for all writes to finish
+        while ((data->writeLock & ~StopBitMask) > 0) { }
+    }
+
+
+    void unlockWrite(GcInfo::PerThreadInfo * info = 0)
+    {
+        ThreadGcInfoEntry &entry = getEntry();
+        --entry.writeLocked;
+        if (!entry.writeLocked) {
+            Word oldValue = data->writeLock;
+            Word newValue = oldValue & ~StopBitMask;
+            if (!ML::cmp_xchg(data->writeLock, oldValue, newValue)) {
+            }
+
+        }
     }
 
 
@@ -308,13 +413,13 @@ struct GcLockBase : public boost::noncopyable {
               doLock_(doLock)
         {
             if (doLock_)
-                lock.lockShared(0, runDefer_);
+                lock.enterShared(0, runDefer_);
         }
 
         ~SharedGuard()
         {
             if (doLock_)
-                lock.unlockShared(0, runDefer_);
+            lock.exitShared(0, runDefer_);
         }
         
         GcLockBase & lock;
@@ -322,6 +427,56 @@ struct GcLockBase : public boost::noncopyable {
         const DoLock doLock_;      ///< Do we really lock?
     };
 
+
+    struct ReadSharedGuard {
+        ReadSharedGuard(GcLockBase & lock, RunDefer runDefer = RD_YES)
+            : lock(lock),
+              runDefer(runDefer)
+        {
+            lock.enterReadShared(0, runDefer);
+        }
+
+        ~ReadSharedGuard()
+        {
+            lock.exitReadShared(0, runDefer);
+        }
+
+        GcLockBase & lock;
+        const RunDefer runDefer;
+    };
+
+    struct WriteSharedGuard {
+        WriteSharedGuard(GcLockBase & lock, RunDefer runDefer = RD_YES) 
+            : lock(lock),
+              runDefer(runDefer)
+        {
+            lock.enterWriteShared(0, runDefer);
+        }
+
+        ~WriteSharedGuard()
+        {
+            lock.exitWriteShared(0, runDefer);
+        }
+
+        GcLockBase & lock;
+        const RunDefer runDefer;
+    };
+
+    struct WriteLockGuard {
+        WriteLockGuard(GcLockBase & lock)
+            : lock(lock)
+        {
+            lock.lockWrite();
+        }
+
+        ~WriteLockGuard()
+        {
+            lock.unlockWrite();
+        }
+
+        GcLockBase & lock;
+    };
+
     struct ExclusiveGuard {
         ExclusiveGuard(GcLockBase & lock)
             : lock(lock)
@@ -394,7 +549,6 @@ struct GcLockBase : public boost::noncopyable {
 
     void dump();
 
-
 protected:
     Data* data;
 
diff --git a/gc/rcu_protected.h b/gc/rcu_protected.h
index b4f379ab..698179da 100644
--- a/gc/rcu_protected.h
+++ b/gc/rcu_protected.h
@@ -20,7 +20,7 @@ struct RcuLocked {
         : ptr(ptr), lock(lock)
     {
         if (lock)
-            lock->lockShared();
+            lock->enterShared();
     }
 
     /// Transfer from another lock
@@ -37,7 +37,7 @@ struct RcuLocked {
         : ptr(ptr), lock(other.lock)
     {
         if (lock)
-            lock->lockShared();
+            lock->enterShared();
     }
 
     template<typename T2>
@@ -72,7 +72,7 @@ struct RcuLocked {
     void unlock()
     {
         if (lock) {
-            lock->unlockShared();
+            lock->exitShared();
             lock = 0;
         }
     }
diff --git a/gc/testing/gc_test.cc b/gc/testing/gc_test.cc
index d2134223..bee7ba84 100644
--- a/gc/testing/gc_test.cc
+++ b/gc/testing/gc_test.cc
@@ -21,6 +21,8 @@
 #include <boost/bind.hpp>
 #include <iostream>
 #include <atomic>
+#include <chrono>
+#include <thread>
 
 #include <boost/thread.hpp>
 #include <boost/thread/barrier.hpp>
@@ -41,7 +43,7 @@ extern int32_t gcLockStartingEpoch;
 BOOST_AUTO_TEST_CASE ( test_gc )
 {
     GcLock gc;
-    gc.lockShared();
+    gc.enterShared();
 
     BOOST_CHECK(gc.isLockedShared());
 
@@ -55,7 +57,7 @@ BOOST_AUTO_TEST_CASE ( test_gc )
     cerr << endl << "after defer" << endl;
     gc.dump();
 
-    gc.unlockShared();
+    gc.exitShared();
 
     cerr << endl << "after unlock shared" << endl;
     gc.dump();
@@ -248,6 +250,136 @@ BOOST_AUTO_TEST_CASE(test_mutual_exclusion)
 
 }
 
+BOOST_AUTO_TEST_CASE(test_write_exclusion)
+{
+    cerr << "Testing write exclusion" << endl;
+
+    GcLock lock; 
+
+    std::atomic<bool> finished { false };
+
+    std::atomic<uint64_t> errors { 0 };
+    std::atomic<uint64_t> numWrite { 0 };
+    std::atomic<uint64_t> numRead { 0 };
+    std::atomic<uint64_t> numLockWrite { 0 };
+
+    auto doWriteSharedThread = [&]() {
+        while (!finished.load()) {
+            GcLock::WriteSharedGuard guard(lock);
+
+            numWrite.fetch_add(1);
+            if (numWrite.load() > 0 && numLockWrite.load() > 0) {
+                cerr << "at least one write when locked write" << endl;
+                errors.fetch_add(1);
+            }
+
+            numWrite.fetch_sub(1);
+        }
+    };
+
+    auto doLockWriteThread = [&]() {
+        GcLock::WriteLockGuard guard(lock);
+        numLockWrite.fetch_add(1);
+
+        while (!finished.load()) {
+
+            if (numWrite.load() > 0) {
+                cerr << "write after locked write" << endl;
+                errors.fetch_add(1);
+            }
+
+
+        }
+
+        numLockWrite.fetch_sub(1);
+    };
+
+    auto doReadSharedThread = [&]() {
+        while (!finished.load()) {
+            GcLock::ReadSharedGuard guard(lock);
+
+            numRead.fetch_add(1);
+            this_thread::sleep_for(chrono::milliseconds(10));
+        }
+    };
+
+    {
+        size_t writeThreads = 4;
+        cerr << "single write lock, multi writes" << endl;
+
+        boost::thread_group group;
+        for (size_t i = 0; i < writeThreads; ++i) {
+            group.create_thread(doWriteSharedThread);
+        }
+
+        this_thread::sleep_for(chrono::milliseconds(500));
+
+        group.create_thread(doLockWriteThread);
+
+        this_thread::sleep_for(chrono::seconds(1));
+
+        finished.store(true);
+        group.join_all();
+
+        BOOST_CHECK_EQUAL(errors.load(), 0);
+    }
+
+    {
+        size_t writeThreads = 16;
+        size_t writeLockThreads = 8;
+        cerr << "Multi write lock, multi writes" << endl;
+
+        finished.store(false);
+        boost::thread_group group;
+        for (size_t i = 0; i < writeThreads; ++i) {
+            group.create_thread(doWriteSharedThread);
+        }
+
+        this_thread::sleep_for(chrono::milliseconds(500));
+
+        for (size_t i = 0; i < writeLockThreads; ++i) {
+            group.create_thread(doLockWriteThread);
+        }
+
+        this_thread::sleep_for(chrono::seconds(1));
+
+        finished.store(true);
+        group.join_all();
+
+        BOOST_CHECK_EQUAL(errors.load(), 0);
+    }
+
+
+    {
+        size_t writeThreads = 4;
+        size_t readThreads = 8;
+        cerr << "mixed reads and write lock" << endl;
+
+        finished.store(false);
+        boost::thread_group group;
+        for (size_t i = 0; i < writeThreads; ++i) {
+            group.create_thread(doWriteSharedThread);
+        }
+        for (size_t i = 0; i < readThreads; ++i) {
+            group.create_thread(doReadSharedThread);
+        }
+
+        this_thread::sleep_for(chrono::milliseconds(200));
+        group.create_thread(doLockWriteThread);
+
+        this_thread::sleep_for(chrono::seconds(1));
+        finished.store(true);
+        group.join_all();
+        BOOST_CHECK_EQUAL(errors.load(), 0);
+
+        const size_t minimumReads = 100 * readThreads;
+        cout << numRead.load() << " total reads" << endl;
+        BOOST_CHECK_GE(numRead.load(), minimumReads);
+    }
+
+
+}
+
 #endif
 
 #define USE_MALLOC 1
@@ -395,7 +527,7 @@ struct TestBase {
     {
         // We're reading from someone else's pointers, so we need to lock here
         //gc.enterCS();
-        gc.lockShared();
+        gc.enterShared();
 
         for (unsigned i = 0;  i < nthreads;  ++i) {
             for (unsigned j = 0;  j < nblocks;  ++j) {
@@ -416,7 +548,7 @@ struct TestBase {
         }
 
         //gc.exitCS();
-        gc.unlockShared();
+        gc.exitShared();
     }
 
     void doReadThread(int threadNum)

From 03cd5e22cd59084a7f22ace3b1ceb93b0c264bdb Mon Sep 17 00:00:00 2001
From: Mathieu Stefani <mathieu@datacratic.com>
Date: Thu, 22 Aug 2013 14:24:22 -0400
Subject: [PATCH 07/12] Made some refinement to the GcLock. Also added a guard
 against infinite spinning

---
 gc/gc_lock.h          | 138 +++++++++++++++++++++++++++---------------
 gc/testing/gc_test.cc |  14 ++---
 2 files changed, 96 insertions(+), 56 deletions(-)

diff --git a/gc/gc_lock.h b/gc/gc_lock.h
index 07ab1da6..ba0231dc 100644
--- a/gc/gc_lock.h
+++ b/gc/gc_lock.h
@@ -10,6 +10,7 @@
 #define __mmap__gc_lock_h__
 
 #define GC_LOCK_DEBUG 0
+#define GC_LOCK_SPIN_DEBUG 0
 
 #include "jml/utils/exc_assert.h"
 #include "jml/arch/atomic_ops.h"
@@ -21,6 +22,30 @@
 #  include <iostream>
 #endif
 
+#include <chrono>
+#include "jml/arch/backtrace.h"
+
+#if GC_LOCK_SPIN_DEBUG
+
+#   define GCLOCK_SPINCHECK_DECL \
+      std::chrono::time_point<std::chrono::system_clock> __start, __end;  \
+      __start = std::chrono::system_clock::now();
+
+#    define GCLOCK_SPINCHECK                                              \
+       do {                                                               \
+           __end = std::chrono::system_clock::now();                      \
+           std::chrono::duration<double> elapsed = __end - __start;       \
+           if (elapsed > std::chrono::seconds(10)) {                      \
+               throw ML::Exception("GCLOCK_SPINCHECK: spent more than 10" \
+                                   " seconds spinning");                  \
+           }                                                              \
+       } while (0)
+#else
+#   define GCLOCK_SPINCHECK_DECL
+#   define GCLOCK_SPINCHECK ((void) 0)
+#endif
+
+
 namespace Datacratic {
 
 /*****************************************************************************/
@@ -49,6 +74,7 @@ struct GcLockBase : public boost::noncopyable {
     struct ThreadGcInfoEntry {
         ThreadGcInfoEntry()
             : inEpoch(-1), readLocked(0), writeLocked(0), exclusiveLocked(0),
+              writeEntered(0),
               owner(0)
         {
         }
@@ -66,6 +92,7 @@ struct GcLockBase : public boost::noncopyable {
         int readLocked;
         int writeLocked;
         int exclusiveLocked;
+        int writeEntered;
 
         GcLockBase *owner;
 
@@ -258,46 +285,66 @@ struct GcLockBase : public boost::noncopyable {
     void enterWriteShared(GcInfo::PerThreadInfo * info = 0,
                           RunDefer runDefer = RD_YES)
     {
-        Word oldVal, newVal;
-        for (;;) {
-            auto writeLock = data->writeLock;
-            // We are write locked, continue spinning
-            if ((writeLock >> (Bits - 1)) == 1) 
-                continue;
-
-            oldVal = data->writeLock;
-            newVal = oldVal + 1;
-
-            // If the CAS fails, it surely means someone in
-            // the meantime set the stop bit,  we then retry.
-            if (ML::cmp_xchg(data->writeLock, oldVal, newVal))
-                break;
-        }
-                
+        ThreadGcInfoEntry & entry = getEntry(info);
+        if (!entry.writeEntered) {
+            Word oldVal, newVal;
+            GCLOCK_SPINCHECK_DECL
 
-        enterShared(info, runDefer);
-    }
+            for (;;) {
+                auto writeLock = data->writeLock;
+                // We are write locked, continue spinning
+                GCLOCK_SPINCHECK;
+                    
+                if ((writeLock >> (Bits - 1)) == 1) {
+                    sched_yield();
+                    continue;
+                }
+
+                oldVal = data->writeLock;
+
+                // When our thread reads current writeLock, it might
+                // have been locked by an other thread spinning
+                // right after having been unlocked.
+                // Thus we could end up in a really weird situation,
+                // where the linearization point marked by the CAS
+                // would only be reached after the writeBarrier
+                // in the locking thread.
+                //
+                // To prevent this, we check again if the stop bit
+                // is set.
+                if ((oldVal >> (Bits - 1)) == 1) {
+                    sched_yield();
+                    continue;
+                }
 
-    void exitWriteShared(GcInfo::PerThreadInfo * info = 0,
-                         RunDefer runDefer = RD_YES)
-    {
-        ExcAssertGreater(data->writeLock, 0);
-        ML::atomic_dec(data->writeLock);
+                newVal = oldVal + 1;
+
+
+                // If the CAS fails, someone else in the meantime
+                // must have entered a CS and incremented the counter
+                // behind our back. We then retry
+                if (ML::cmp_xchg(data->writeLock, oldVal, newVal))
+                    break;
+            }
+                    
 
-        exitShared(info, runDefer);
+            enterShared(info, runDefer);
+        }
+        ++entry.writeEntered;
     }
 
-    void enterReadShared(GcInfo::PerThreadInfo * info = 0,
-                        RunDefer runDefer = RD_YES)
+    void exitWriteShared(GcInfo::PerThreadInfo * info = 0,
+                         RunDefer runDefer = RD_YES)
     {
         ThreadGcInfoEntry & entry = getEntry(info);
+        ExcAssertGreater(entry.writeEntered, 0);
+        --entry.writeEntered;
+        if (!entry.writeEntered) {
+            ExcAssertGreater(data->writeLock, 0);
+            ML::atomic_dec(data->writeLock);
 
-        entry.enterShared(runDefer);
-    }
-    void exitReadShared(GcInfo::PerThreadInfo * info = 0,
-                        RunDefer runDefer = RD_YES)
-    {
-        exitShared(info, runDefer);
+            exitShared(info, runDefer);
+        }
     }
 
     int isLockedShared(GcInfo::PerThreadInfo * info = 0) const
@@ -359,11 +406,20 @@ struct GcLockBase : public boost::noncopyable {
         ThreadGcInfoEntry &entry = getEntry();
         if (!entry.writeLocked) {
             Word oldValue, newValue;
+            GCLOCK_SPINCHECK_DECL
             for (;;) {
+                GCLOCK_SPINCHECK;
                 if ((data->writeLock >> (Bits - 1)) == 1)
                     continue;
 
                 oldValue = data->writeLock;
+
+                // See enterWriteShared for the reason of this
+                // double-check.
+                // TODO: is this really needed ?
+                if ((oldValue >> (Bits - 1)) == 1)
+                    continue;
+
                 newValue = oldValue | StopBitMask;
                 if (ML::cmp_xchg(data->writeLock, oldValue, newValue))
                     break;
@@ -428,23 +484,6 @@ struct GcLockBase : public boost::noncopyable {
     };
 
 
-    struct ReadSharedGuard {
-        ReadSharedGuard(GcLockBase & lock, RunDefer runDefer = RD_YES)
-            : lock(lock),
-              runDefer(runDefer)
-        {
-            lock.enterReadShared(0, runDefer);
-        }
-
-        ~ReadSharedGuard()
-        {
-            lock.exitReadShared(0, runDefer);
-        }
-
-        GcLockBase & lock;
-        const RunDefer runDefer;
-    };
-
     struct WriteSharedGuard {
         WriteSharedGuard(GcLockBase & lock, RunDefer runDefer = RD_YES) 
             : lock(lock),
@@ -638,5 +677,6 @@ struct SharedGcLock : public GcLockBase
 
 } // namespace Datacratic
 
+
 #endif /* __mmap__gc_lock_h__ */
 
diff --git a/gc/testing/gc_test.cc b/gc/testing/gc_test.cc
index bee7ba84..d62d2ba8 100644
--- a/gc/testing/gc_test.cc
+++ b/gc/testing/gc_test.cc
@@ -263,7 +263,7 @@ BOOST_AUTO_TEST_CASE(test_write_exclusion)
     std::atomic<uint64_t> numRead { 0 };
     std::atomic<uint64_t> numLockWrite { 0 };
 
-    auto doWriteSharedThread = [&]() {
+    auto doWriteThread = [&]() {
         while (!finished.load()) {
             GcLock::WriteSharedGuard guard(lock);
 
@@ -294,9 +294,9 @@ BOOST_AUTO_TEST_CASE(test_write_exclusion)
         numLockWrite.fetch_sub(1);
     };
 
-    auto doReadSharedThread = [&]() {
+    auto doReadThread = [&]() {
         while (!finished.load()) {
-            GcLock::ReadSharedGuard guard(lock);
+            GcLock::SharedGuard guard(lock);
 
             numRead.fetch_add(1);
             this_thread::sleep_for(chrono::milliseconds(10));
@@ -309,7 +309,7 @@ BOOST_AUTO_TEST_CASE(test_write_exclusion)
 
         boost::thread_group group;
         for (size_t i = 0; i < writeThreads; ++i) {
-            group.create_thread(doWriteSharedThread);
+            group.create_thread(doWriteThread);
         }
 
         this_thread::sleep_for(chrono::milliseconds(500));
@@ -332,7 +332,7 @@ BOOST_AUTO_TEST_CASE(test_write_exclusion)
         finished.store(false);
         boost::thread_group group;
         for (size_t i = 0; i < writeThreads; ++i) {
-            group.create_thread(doWriteSharedThread);
+            group.create_thread(doWriteThread);
         }
 
         this_thread::sleep_for(chrono::milliseconds(500));
@@ -358,10 +358,10 @@ BOOST_AUTO_TEST_CASE(test_write_exclusion)
         finished.store(false);
         boost::thread_group group;
         for (size_t i = 0; i < writeThreads; ++i) {
-            group.create_thread(doWriteSharedThread);
+            group.create_thread(doWriteThread);
         }
         for (size_t i = 0; i < readThreads; ++i) {
-            group.create_thread(doReadSharedThread);
+            group.create_thread(doReadThread);
         }
 
         this_thread::sleep_for(chrono::milliseconds(200));

From e76a74a59898dbdfd0b800a301e702e6cf453ee6 Mon Sep 17 00:00:00 2001
From: Mathieu Stefani <mathieu@datacratic.com>
Date: Mon, 26 Aug 2013 09:29:16 -0400
Subject: [PATCH 08/12] Calling ML::do_abort before throwing in
 GCLOCK_SPINCHECK macro. This is useful if we can't use catch throw in gdb

---
 gc/gc_lock.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gc/gc_lock.h b/gc/gc_lock.h
index ba0231dc..7a12cdc9 100644
--- a/gc/gc_lock.h
+++ b/gc/gc_lock.h
@@ -13,6 +13,7 @@
 #define GC_LOCK_SPIN_DEBUG 0
 
 #include "jml/utils/exc_assert.h"
+#include "jml/utils/abort.h"
 #include "jml/arch/atomic_ops.h"
 #include "jml/arch/thread_specific.h"
 #include <vector>
@@ -23,7 +24,6 @@
 #endif
 
 #include <chrono>
-#include "jml/arch/backtrace.h"
 
 #if GC_LOCK_SPIN_DEBUG
 
@@ -36,6 +36,7 @@
            __end = std::chrono::system_clock::now();                      \
            std::chrono::duration<double> elapsed = __end - __start;       \
            if (elapsed > std::chrono::seconds(10)) {                      \
+               ML::do_abort();                                            \
                throw ML::Exception("GCLOCK_SPINCHECK: spent more than 10" \
                                    " seconds spinning");                  \
            }                                                              \

From 10b19619e2c870b676b4235c96a0004dfd0173fe Mon Sep 17 00:00:00 2001
From: Mathieu Stefani <mathieu@datacratic.com>
Date: Mon, 26 Aug 2013 09:36:56 -0400
Subject: [PATCH 09/12] Using AND mask rather than SHR to check the stop bit

---
 gc/gc_lock.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gc/gc_lock.h b/gc/gc_lock.h
index 7a12cdc9..e1717fab 100644
--- a/gc/gc_lock.h
+++ b/gc/gc_lock.h
@@ -296,7 +296,7 @@ struct GcLockBase : public boost::noncopyable {
                 // We are write locked, continue spinning
                 GCLOCK_SPINCHECK;
                     
-                if ((writeLock >> (Bits - 1)) == 1) {
+                if (writeLock & StopBitMask) {
                     sched_yield();
                     continue;
                 }
@@ -313,7 +313,7 @@ struct GcLockBase : public boost::noncopyable {
                 //
                 // To prevent this, we check again if the stop bit
                 // is set.
-                if ((oldVal >> (Bits - 1)) == 1) {
+                if (oldVal & StopBitMask) {
                     sched_yield();
                     continue;
                 }
@@ -410,7 +410,7 @@ struct GcLockBase : public boost::noncopyable {
             GCLOCK_SPINCHECK_DECL
             for (;;) {
                 GCLOCK_SPINCHECK;
-                if ((data->writeLock >> (Bits - 1)) == 1)
+                if (data->writeLock & StopBitMask)
                     continue;
 
                 oldValue = data->writeLock;
@@ -418,7 +418,7 @@ struct GcLockBase : public boost::noncopyable {
                 // See enterWriteShared for the reason of this
                 // double-check.
                 // TODO: is this really needed ?
-                if ((oldValue >> (Bits - 1)) == 1)
+                if (oldValue & StopBitMask)
                     continue;
 
                 newValue = oldValue | StopBitMask;
@@ -428,7 +428,7 @@ struct GcLockBase : public boost::noncopyable {
             }
 
             // Stop bit must be set
-            ExcAssertEqual(data->writeLock >> (Bits - 1), 1);
+            ExcAssertEqual((data->writeLock & StopBitMask), StopBitMask);
 
             // At this point, we stoped all the upcoming writes. However,
             // ongoing writes might still be executing. Issuing a writeBarrier

From 91d65428c75662c5975bf3bfd4e564c87681acea Mon Sep 17 00:00:00 2001
From: Mathieu Stefani <mathieu@datacratic.com>
Date: Mon, 26 Aug 2013 09:50:59 -0400
Subject: [PATCH 10/12] Throwing an exception when the CAS fails in unlockWrite

---
 gc/gc_lock.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gc/gc_lock.h b/gc/gc_lock.h
index e1717fab..758f4a99 100644
--- a/gc/gc_lock.h
+++ b/gc/gc_lock.h
@@ -454,7 +454,9 @@ struct GcLockBase : public boost::noncopyable {
         if (!entry.writeLocked) {
             Word oldValue = data->writeLock;
             Word newValue = oldValue & ~StopBitMask;
+
             if (!ML::cmp_xchg(data->writeLock, oldValue, newValue)) {
+                throw ML::Exception("Failed to unlockWrite");
             }
 
         }

From 150205a5c39f45fe59c173691fd4788b8c793566 Mon Sep 17 00:00:00 2001
From: Mathieu Stefani <mathieu@datacratic.com>
Date: Mon, 26 Aug 2013 10:29:21 -0400
Subject: [PATCH 11/12] Moved write-side critical section implementation in
 source file

---
 gc/gc_lock.cc | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++
 gc/gc_lock.h  | 56 +++++++------------------------------------------
 2 files changed, 65 insertions(+), 49 deletions(-)

diff --git a/gc/gc_lock.cc b/gc/gc_lock.cc
index 5521fcaa..91ebcbe0 100644
--- a/gc/gc_lock.cc
+++ b/gc/gc_lock.cc
@@ -628,6 +628,64 @@ exitCSExclusive(ThreadGcInfoEntry * entry)
     entry->inEpoch = -1;
 }
 
+void GcLockBase::
+enterCSWrite(GcInfo::PerThreadInfo * info,
+             RunDefer runDefer)
+{
+    Word oldVal, newVal;
+    GCLOCK_SPINCHECK_DECL
+
+    for (;;) {
+        auto writeLock = data->writeLock;
+        // We are write locked, continue spinning
+        GCLOCK_SPINCHECK;
+            
+        if (writeLock & StopBitMask) {
+            sched_yield();
+            continue;
+        }
+
+        oldVal = data->writeLock;
+
+        // When our thread reads current writeLock, it might
+        // have been locked by an other thread spinning
+        // right after having been unlocked.
+        // Thus we could end up in a really weird situation,
+        // where the linearization point marked by the CAS
+        // would only be reached after the writeBarrier
+        // in the locking thread.
+        //
+        // To prevent this, we check again if the stop bit
+        // is set.
+        if (oldVal & StopBitMask) {
+            sched_yield();
+            continue;
+        }
+
+        newVal = oldVal + 1;
+
+
+        // If the CAS fails, someone else in the meantime
+        // must have entered a CS and incremented the counter
+        // behind our back. We then retry
+        if (ML::cmp_xchg(data->writeLock, oldVal, newVal))
+            break;
+    }
+            
+
+    enterShared(info, runDefer);
+}
+
+void GcLockBase::
+exitCSWrite(GcInfo::PerThreadInfo * info,
+            RunDefer runDefer)
+{
+    ExcAssertGreater(data->writeLock, 0);
+    ML::atomic_dec(data->writeLock);
+
+    exitShared(info, runDefer);
+}
+
 void
 GcLockBase::
 visibleBarrier()
diff --git a/gc/gc_lock.h b/gc/gc_lock.h
index 758f4a99..fd8d2504 100644
--- a/gc/gc_lock.h
+++ b/gc/gc_lock.h
@@ -223,6 +223,8 @@ struct GcLockBase : public boost::noncopyable {
 
     void enterCS(ThreadGcInfoEntry * entry = 0, RunDefer runDefer = RD_YES);
     void exitCS(ThreadGcInfoEntry * entry = 0, RunDefer runDefer = RD_YES);
+    void enterCSWrite(GcInfo::PerThreadInfo * info = 0, RunDefer runDefer = RD_YES);
+    void exitCSWrite(GcInfo::PerThreadInfo * info = 0, RunDefer runDefer = RD_YES);
     void enterCSExclusive(ThreadGcInfoEntry * entry = 0);
     void exitCSExclusive(ThreadGcInfoEntry * entry = 0);
 
@@ -287,50 +289,9 @@ struct GcLockBase : public boost::noncopyable {
                           RunDefer runDefer = RD_YES)
     {
         ThreadGcInfoEntry & entry = getEntry(info);
-        if (!entry.writeEntered) {
-            Word oldVal, newVal;
-            GCLOCK_SPINCHECK_DECL
-
-            for (;;) {
-                auto writeLock = data->writeLock;
-                // We are write locked, continue spinning
-                GCLOCK_SPINCHECK;
-                    
-                if (writeLock & StopBitMask) {
-                    sched_yield();
-                    continue;
-                }
-
-                oldVal = data->writeLock;
-
-                // When our thread reads current writeLock, it might
-                // have been locked by an other thread spinning
-                // right after having been unlocked.
-                // Thus we could end up in a really weird situation,
-                // where the linearization point marked by the CAS
-                // would only be reached after the writeBarrier
-                // in the locking thread.
-                //
-                // To prevent this, we check again if the stop bit
-                // is set.
-                if (oldVal & StopBitMask) {
-                    sched_yield();
-                    continue;
-                }
-
-                newVal = oldVal + 1;
-
+        if (!entry.writeEntered)
+            enterCSWrite(info, runDefer);
 
-                // If the CAS fails, someone else in the meantime
-                // must have entered a CS and incremented the counter
-                // behind our back. We then retry
-                if (ML::cmp_xchg(data->writeLock, oldVal, newVal))
-                    break;
-            }
-                    
-
-            enterShared(info, runDefer);
-        }
         ++entry.writeEntered;
     }
 
@@ -339,13 +300,10 @@ struct GcLockBase : public boost::noncopyable {
     {
         ThreadGcInfoEntry & entry = getEntry(info);
         ExcAssertGreater(entry.writeEntered, 0);
-        --entry.writeEntered;
-        if (!entry.writeEntered) {
-            ExcAssertGreater(data->writeLock, 0);
-            ML::atomic_dec(data->writeLock);
 
-            exitShared(info, runDefer);
-        }
+        --entry.writeEntered;
+        if (!entry.writeEntered)
+            exitCSWrite(info, runDefer);
     }
 
     int isLockedShared(GcInfo::PerThreadInfo * info = 0) const

From df3176db0ba9f4ab9657d3ec176de1a214aef8aa Mon Sep 17 00:00:00 2001
From: Mathieu Stefani <mathieu@datacratic.com>
Date: Mon, 26 Aug 2013 13:14:04 -0400
Subject: [PATCH 12/12] Removed the double-check. We don't need it if we check
 after reading the old value

---
 gc/gc_lock.cc | 18 +-----------------
 gc/gc_lock.h  |  6 +-----
 2 files changed, 2 insertions(+), 22 deletions(-)

diff --git a/gc/gc_lock.cc b/gc/gc_lock.cc
index 91ebcbe0..a8c9df87 100644
--- a/gc/gc_lock.cc
+++ b/gc/gc_lock.cc
@@ -636,27 +636,11 @@ enterCSWrite(GcInfo::PerThreadInfo * info,
     GCLOCK_SPINCHECK_DECL
 
     for (;;) {
-        auto writeLock = data->writeLock;
-        // We are write locked, continue spinning
         GCLOCK_SPINCHECK;
-            
-        if (writeLock & StopBitMask) {
-            sched_yield();
-            continue;
-        }
 
         oldVal = data->writeLock;
 
-        // When our thread reads current writeLock, it might
-        // have been locked by an other thread spinning
-        // right after having been unlocked.
-        // Thus we could end up in a really weird situation,
-        // where the linearization point marked by the CAS
-        // would only be reached after the writeBarrier
-        // in the locking thread.
-        //
-        // To prevent this, we check again if the stop bit
-        // is set.
+        // Stop bit is set, meaning that it's locked.
         if (oldVal & StopBitMask) {
             sched_yield();
             continue;
diff --git a/gc/gc_lock.h b/gc/gc_lock.h
index fd8d2504..dc848087 100644
--- a/gc/gc_lock.h
+++ b/gc/gc_lock.h
@@ -368,14 +368,10 @@ struct GcLockBase : public boost::noncopyable {
             GCLOCK_SPINCHECK_DECL
             for (;;) {
                 GCLOCK_SPINCHECK;
-                if (data->writeLock & StopBitMask)
-                    continue;
 
                 oldValue = data->writeLock;
 
-                // See enterWriteShared for the reason of this
-                // double-check.
-                // TODO: is this really needed ?
+                // Stop bit is set, meaning that it's already locked.
                 if (oldValue & StopBitMask)
                     continue;