diff --git a/T.c b/T.c
index a33d1e90b67afe9e4acc6feb05ad71458a1c5d37..13bf25b324019a8de5d69395448f7d69721e9fb3 100644
--- a/T.c
+++ b/T.c
@@ -54,9 +54,16 @@ static void *T_send_thread(void *_)
 {
   while (1) {
     usleep(5000);
+    __sync_synchronize();
     while (T_cache[T_busylist_head].busy) {
-      char *b = T_cache[T_busylist_head].buffer;
-      int l = T_cache[T_busylist_head].length;
+      char *b;
+      int l;
+      /* TODO: be sure about those memory barriers - in doubt one is
+       * put here too
+       */
+      __sync_synchronize();
+      b = T_cache[T_busylist_head].buffer;
+      l = T_cache[T_busylist_head].length;
       while (l) {
         int done = write(T_socket, b, l);
         if (done <= 0) {
@@ -123,7 +130,7 @@ void T_connect_to_tracer(char *addr, int port)
 
 #ifdef T_USE_SHARED_MEMORY
   /* setup shared memory */
-  T_shm_fd = shm_open(T_SHM_FILENAME, O_RDWR, 0666);
+  T_shm_fd = shm_open(T_SHM_FILENAME, O_RDWR /*| O_SYNC*/, 0666);
   shm_unlink(T_SHM_FILENAME);
   if (T_shm_fd == -1) { perror(T_SHM_FILENAME); abort(); }
   T_cache = mmap(NULL, T_CACHE_SIZE * sizeof(T_cache_t),
diff --git a/T.h b/T.h
index 405594f2d4b2b22dd5338c5737a3d494b8b3d81e..6797213d311818097ef7db008797c8f772687906 100644
--- a/T.h
+++ b/T.h
@@ -118,16 +118,18 @@ extern T_cache_t *T_cache;
 #ifdef T_USE_SHARED_MEMORY
 
 #define T_SEND() \
-  T_cache[T_LOCAL_slot].busy = 1; \
   T_cache[T_LOCAL_slot].length = T_LOCAL_size; \
+  __sync_synchronize(); \
+  T_cache[T_LOCAL_slot].busy = 1; \
   T_send(T_LOCAL_buf, T_LOCAL_size)
 
 #else /* T_USE_SHARED_MEMORY */
 
 /* when not using shared memory, wait for send to finish */
 #define T_SEND() \
-  T_cache[T_LOCAL_slot].busy = 1; \
   T_cache[T_LOCAL_slot].length = T_LOCAL_size; \
+  __sync_synchronize(); \
+  T_cache[T_LOCAL_slot].busy = 1; \
   T_send(T_LOCAL_buf, T_LOCAL_size); \
   while (T_cache[T_LOCAL_slot].busy) usleep(1*1000)
 
diff --git a/tracer/main.c b/tracer/main.c
index 18ed8087cc5519ffa8fd924f2e8e465fdfb6433e..cf8f8aac859df410407e9acb21ff93fdbfc3bf54 100644
--- a/tracer/main.c
+++ b/tracer/main.c
@@ -356,7 +356,8 @@ void wait_message(void)
 
 void init_shm(void)
 {
-  int s = shm_open(T_SHM_FILENAME, O_RDWR | O_CREAT, 0666);
+  int i;
+  int s = shm_open(T_SHM_FILENAME, O_RDWR | O_CREAT /*| O_SYNC*/, 0666);
   if (s == -1) { perror(T_SHM_FILENAME); abort(); }
   if (ftruncate(s, T_CACHE_SIZE * sizeof(T_cache_t)))
     { perror(T_SHM_FILENAME); abort(); }
@@ -365,6 +366,12 @@ void init_shm(void)
   if (T_cache == NULL)
     { perror(T_SHM_FILENAME); abort(); }
   close(s);
+
+  /* let's garbage the memory to catch some potential problems
+   * (think multiprocessor sync issues, barriers, etc.)
+   */
+  memset(T_cache, 0x55, T_CACHE_SIZE * sizeof(T_cache_t));
+  for (i = 0; i < T_CACHE_SIZE; i++) T_cache[i].busy = 0;
 }
 
 #endif /* T_USE_SHARED_MEMORY */
@@ -555,6 +562,7 @@ no_init_message:
   while (1) {
 #ifdef T_USE_SHARED_MEMORY
     wait_message();
+    __sync_synchronize();
 #endif
 
 #ifdef T_USE_SHARED_MEMORY