diff --git a/openair1/PHY/TOOLS/time_meas.c b/openair1/PHY/TOOLS/time_meas.c
index 19e9d64d1f024ce3609670771909ec4c04558061..31a613d03297700b9beaeb662a91b487cae50afe 100644
--- a/openair1/PHY/TOOLS/time_meas.c
+++ b/openair1/PHY/TOOLS/time_meas.c
@@ -83,9 +83,8 @@ void print_meas(time_stats_t *ts, const char* name, time_stats_t * total_exec_ti
       //printf("%20s: total: %10.3f ms, average: %10.3f us (%10d trials)\n", name, ts->diff/cpu_freq_GHz/1000000.0, ts->diff/ts->trials/cpu_freq_GHz/1000.0, ts->trials);
 
       if ((total_exec_time == NULL) || (sf_exec_time== NULL)) {
-        fprintf(stderr, "%25s:  %15.3f ms ;  %15.3f us; %15d;\n",
+        fprintf(stderr, "%25s:  %15.3f us; %15d;\n",
                 name,
-                (ts->diff/cpu_freq_GHz/1000000.0),
                 (ts->diff/ts->trials/cpu_freq_GHz/1000.0),
                 ts->trials);
       } else {
diff --git a/openair1/PHY/defs.h b/openair1/PHY/defs.h
index b4b6247104293f12e02e14aee6f462b8b8a22f2d..ccdb6d7a069d33db7eed38392642d48b31aa4d92 100644
--- a/openair1/PHY/defs.h
+++ b/openair1/PHY/defs.h
@@ -740,6 +740,8 @@ typedef struct RU_t_s{
   void (*eNB_top)(struct PHY_VARS_eNB_s *eNB, int frame_rx, int subframe_rx, char *string);
   /// Timing statistics
   time_stats_t ofdm_demod_stats;
+  /// Timing statistics (TX)
+  time_stats_t ofdm_mod_stats;
   /// RX and TX buffers for precoder output
   RU_COMMON            common;
   /// beamforming weight vectors per eNB
@@ -755,6 +757,8 @@ typedef struct RU_t_s{
   openair0_timestamp   ts_offset;
   /// process scheduling variables
   RU_proc_t            proc;
+  /// stats thread pthread descriptor
+  pthread_t            ru_stats_thread;
 } RU_t;
 
 
diff --git a/openair1/SCHED/ru_procedures.c b/openair1/SCHED/ru_procedures.c
index dd33d1a185b14ae3f6886cbbd5df09294998661f..75f71e107f3fc934e459e1ba9e4f012dd860f2f3 100644
--- a/openair1/SCHED/ru_procedures.c
+++ b/openair1/SCHED/ru_procedures.c
@@ -83,6 +83,8 @@ void feptx_ofdm(RU_t *ru) {
       ((subframe_select(fp,subframe)==SF_S))) {
     //    LOG_D(HW,"Frame %d: Generating slot %d\n",frame,next_slot);
 
+    start_meas(&ru->ofdm_mod_stats);
+
     for (aa=0; aa<ru->nb_tx; aa++) {
       if (fp->Ncp == EXTENDED) {
         PHY_ofdm_mod(&ru->common.txdataF_BF[aa][0],
@@ -185,6 +187,7 @@ void feptx_ofdm(RU_t *ru) {
          ru->common.txdata[aa][tx_offset] = 0x00000000;
        }
      }
+     stop_meas(&ru->ofdm_mod_stats);
      LOG_D(PHY,"feptx_ofdm (TXPATH): frame %d, subframe %d: txp (time %p) %d dB, txp (freq) %d dB\n",
 	   ru->proc.frame_tx,subframe,txdata,dB_fixed(signal_energy((int32_t*)txdata,fp->samples_per_tti)),
 	   dB_fixed(signal_energy_nodc(ru->common.txdataF_BF[aa],2*slot_sizeF)));
@@ -265,6 +268,9 @@ static void *fep_thread(void *param) {
 
   RU_t *ru = (RU_t *)param;
   RU_proc_t *proc  = &ru->proc;
+
+  thread_top_init("fep_thread",0,870000,1000000,1000000);
+
   while (!oai_exit) {
 
     if (wait_on_condition(&proc->mutex_fep,&proc->cond_fep,&proc->instance_cnt_fep,"fep thread")<0) break;  
diff --git a/targets/RT/USER/lte-ru.c b/targets/RT/USER/lte-ru.c
index ad3ea8121c6fe1e78cfc6ac690794ce0fd22fa34..e15568e4c4b3fda5bf8221427f9d992d97b8ae6c 100644
--- a/targets/RT/USER/lte-ru.c
+++ b/targets/RT/USER/lte-ru.c
@@ -1323,6 +1323,21 @@ int setup_RU_buffers(RU_t *ru) {
   return(0);
 }
 
+static void* ru_stats_thread(void* param) {
+
+  RU_t               *ru      = (RU_t*)param;
+
+  wait_sync("ru_stats_thread");
+
+  while (!oai_exit) {
+     sleep(1);
+     if (opp_enabled == 1) {
+       if (ru->feprx) print_meas(&ru->ofdm_demod_stats,"feprx",NULL,NULL);
+       if (ru->feptx_ofdm) print_meas(&ru->ofdm_mod_stats,"feptx_ofdm",NULL,NULL);
+     }
+  }
+}
+
 static void* ru_thread( void* param ) {
 
   static int ru_thread_status;
@@ -1462,6 +1477,7 @@ static void* ru_thread( void* param ) {
     if ((ru->fh_north_asynch_in == NULL) && (ru->fh_south_out)) ru->fh_south_out(ru);
  
     if (ru->fh_north_out) ru->fh_north_out(ru);
+
   }
   
 
@@ -1563,7 +1579,7 @@ int start_rf(RU_t *ru) {
 }
 
 extern void fep_full(RU_t *ru);
-extern void fep_full_2thread(RU_t *ru);
+extern void ru_fep_full_2thread(RU_t *ru);
 extern void feptx_ofdm(RU_t *ru);
 extern void feptx_prec(RU_t *ru);
 
@@ -1648,7 +1664,9 @@ void init_RU_proc(RU_t *ru) {
     pthread_setname_np( proc->pthread_FH, name );
     
   }
-  
+
+  init_fep_thread(ru,NULL); 
+  if (opp_enabled == 1) pthread_create(&ru->ru_stats_thread,NULL,ru_stats_thread,(void*)ru); 
   
 }
 
@@ -1902,7 +1920,7 @@ void init_RU(char *rf_config_file) {
 	ru->fh_north_out          = fh_if4p5_north_out;       // send_IF4p5 on reception
 	ru->fh_south_out          = tx_rf;                    // send output to RF
 	ru->fh_north_asynch_in    = fh_if4p5_north_asynch_in; // TX packets come asynchronously
-	ru->feprx                 = fep_full;                 // RX DFTs
+	ru->feprx                 = (get_nprocs()<=2) ? fep_full :fep_full;                 // RX DFTs
 	ru->feptx_ofdm            = feptx_ofdm;               // this is fep with idft only (no precoding in RRU)
 	ru->feptx_prec            = NULL;
 	ru->start_if              = start_if;                 // need to start the if interface for if4p5
@@ -1920,7 +1938,7 @@ void init_RU(char *rf_config_file) {
       }
       else if (ru->function == eNodeB_3GPP) {  
 	ru->do_prach             = 0;                       // no prach processing in RU            
-	ru->feprx                = fep_full;                // RX DFTs
+	ru->feprx                = (get_nprocs()<=2) ? fep_full : ru_fep_full_2thread;                // RX DFTs
 	ru->feptx_ofdm           = feptx_ofdm;              // this is fep with idft and precoding
 	ru->feptx_prec           = feptx_prec;              // this is fep with idft and precoding
 	ru->fh_north_in          = NULL;                    // no incoming fronthaul from north
@@ -1948,7 +1966,7 @@ void init_RU(char *rf_config_file) {
 
     case REMOTE_IF5: // the remote unit is IF5 RRU
       ru->do_prach               = 0;
-      ru->feprx                  = fep_full;                   // this is frequency-shift + DFTs
+      ru->feprx                  = (get_nprocs()<=2) ? fep_full : fep_full;                   // this is frequency-shift + DFTs
       ru->feptx_prec             = feptx_prec;                 // need to do transmit Precoding + IDFTs 
       ru->feptx_ofdm             = feptx_ofdm;                 // need to do transmit Precoding + IDFTs 
       if (ru->if_timing == synch_to_other) {