diff --git a/README.md b/README.md
index 9e3f6cf..fdcc67d 100644
--- a/README.md
+++ b/README.md
@@ -35,3 +35,16 @@ locally. You can run the included examples directly after the build:
 ```bash
 python3 test.py
 ```
+
+### Benchmarking
+
+To measure how the sort implementation scales with different thread counts, run
+the benchmark script:
+
+```bash
+python3 benchmark_sort.py
+```
+
+The script generates a random list of 100 numbers and times the sorting process
+using 1, 2, 4 and 8 threads. Each run executes roughly 200k iterations of the
+engine to ensure the list is fully sorted, and the elapsed time is printed.
diff --git a/benchmark_sort.py b/benchmark_sort.py
new file mode 100644
index 0000000..1ce2eda
--- /dev/null
+++ b/benchmark_sort.py
@@ -0,0 +1,50 @@
+import os
+import random
+import time
+import fraglets
+
+SORT_FILE = os.path.join(os.path.dirname(__file__), 'sort.fra')
+
+
+def load_sort_core():
+    lines = []
+    with open(SORT_FILE) as fh:
+        for line in fh:
+            line = line.strip()
+            if not line or line.startswith('#'):
+                continue
+            # skip the example sort invocation
+            if line.startswith('[sort '):
+                continue
+            lines.append(line)
+    return lines
+
+
+CORE_LINES = load_sort_core()
+
+
+def build_sort_lines(nums):
+    return CORE_LINES + ["[sort " + " ".join(map(str, nums)) + "]"]
+
+
+def run_once(nums, threads):
+    """Run the sort once and return (duration, sorted_nums)."""
+    f = fraglets.fraglets()
+    for line in build_sort_lines(nums):
+        f.parse(line)
+    start = time.perf_counter()
+    # Roughly tuned iteration count: larger lists require more
+    # iterations to complete. We found 200k iterations gives reliable
+    # results when sorting 100 numbers across thread counts.
+    f.run_threads(200000, 10000, threads, True)
+    dur = time.perf_counter() - start
+    sorted_nums = list(map(int, f.get_sorted()))
+    return dur, sorted_nums
+
+
+if __name__ == '__main__':
+    nums = [random.randint(-1000, 1000) for _ in range(100)]
+    for t in [1, 2, 4, 8]:
+        dur, out = run_once(nums, t)
+        assert out == sorted(nums)
+        print(f'Threads: {t}, time: {dur:.3f}s')
diff --git a/fraglets.cpp b/fraglets.cpp
index ef01c77..8fdf810 100644
--- a/fraglets.cpp
+++ b/fraglets.cpp
@@ -674,28 +674,24 @@ struct PropensityArgs {
     std::vector<symbol>* keys;
     size_t start;
     size_t end;
+    propMap local_prop;
+    double local_wt;
 };
 
 void* fraglets::propensity_thread(void* arg){
     PropensityArgs* args = static_cast<PropensityArgs*>(arg);
-    double local_wt = 0;
-    propMap local_prop;
+    args->local_wt = 0;
+    args->local_prop.clear();
     for(size_t i=args->start;i<args->end;i++){
         symbol key = (*args->keys)[i];
         std::size_t m = args->self->active.multk(key);
         std::size_t p = args->self->passive.multk(key);
         std::size_t w = m*p;
         if(w>0){
-            local_prop[key] = w;
+            args->local_prop[key] = w;
         }
-        local_wt += w;
-    }
-    pthread_mutex_lock(&args->self->prop_mutex);
-    for(auto &kv : local_prop){
-        args->self->prop[kv.first] = kv.second;
+        args->local_wt += w;
     }
-    args->self->wt += local_wt;
-    pthread_mutex_unlock(&args->self->prop_mutex);
     return NULL;
 }
 
@@ -729,6 +725,12 @@ double fraglets::propensity_parallel(int nthreads){
             pthread_join(threads[i], NULL);
         }
     }
+    for(int i=0;i<nthreads;i++){
+        for(auto &kv : args[i].local_prop){
+            this->prop[kv.first] = kv.second;
+        }
+        this->wt += args[i].local_wt;
+    }
     if (this->wt <= 0){
         this->idle = true;
     }
diff --git a/fraglets.py b/fraglets.py
index 3f005dd..d462fa7 100644
--- a/fraglets.py
+++ b/fraglets.py
@@ -27,6 +27,10 @@ def run(self, iter,size,quiet=False):
         cFraglets.run(self.cfraglets,iter,size,quiet)
         self.iter = cFraglets.getIter(self.cfraglets)
 
+    def run_threads(self, iter, size, threads, quiet=False):
+        cFraglets.run_threads(self.cfraglets, iter, size, threads, quiet)
+        self.iter = cFraglets.getIter(self.cfraglets)
+
     def parse(self, line):
         cFraglets.parse(self.cfraglets,line)
 
diff --git a/fragletsToPy.cpp b/fragletsToPy.cpp
index dba690c..753916f 100644
--- a/fragletsToPy.cpp
+++ b/fragletsToPy.cpp
@@ -71,6 +71,24 @@ PyObject* run(PyObject* self, PyObject* args)
     return Py_BuildValue("");
 }
 
+PyObject* run_threads(PyObject* self, PyObject* args)
+{
+    PyObject* fragletsCapsule_;
+    int iter_;
+    int size_;
+    int threads_;
+    bool quiet_;
+    PyArg_ParseTuple(args, "Oiiib",
+                     &fragletsCapsule_,
+                     &iter_,
+                     &size_,
+                     &threads_,
+                     &quiet_);
+    fraglets* frag = (fraglets*)PyCapsule_GetPointer(fragletsCapsule_, "fragletsPtr");
+    frag->run_parallel(iter_, size_, threads_, quiet_);
+    return Py_BuildValue("");
+}
+
 PyObject* getIter(PyObject* self, PyObject* args)
 {
     PyObject* fragletsCapsule_;
@@ -162,6 +180,8 @@ static PyMethodDef fragletsFunctions[] =
      "Create `fraglets` object"},
     {"run",run, METH_VARARGS,
     "runs vessel"},
+    {"run_threads",run_threads, METH_VARARGS,
+    "runs vessel with specified thread count"},
     {"parse",parse, METH_VARARGS,
     "parses string and injects mol"},
     {"getUnimolTags",getUnimolTags,METH_VARARGS,