diff --git a/openair1/PHY/CODING/lte_rate_matching.c b/openair1/PHY/CODING/lte_rate_matching.c
index 96663bd8a6f8829b23a73c05700371ed0ef5f1a9..483cda3f939d21f938ee808605ceedc46c9b745a 100644
--- a/openair1/PHY/CODING/lte_rate_matching.c
+++ b/openair1/PHY/CODING/lte_rate_matching.c
@@ -53,7 +53,7 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w)
 {
 
   uint32_t RTC = (D>>5), ND, ND3;
-  uint32_t row,col,Kpi,index;
+  uint32_t row,col,Kpi;
   uint32_t index3,k,k2;
 #ifdef RM_DEBUG
   uint32_t nulled=0;
@@ -84,7 +84,6 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w)
 #ifdef RM_DEBUG
     printf("Col %d\n",col);
 #endif
-    index = bitrev[col];
     index3 = bitrev_x3[col];//3*index;
 
     for (row=0; row<RTC; row++) {
@@ -108,10 +107,7 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w)
 
 #endif
       index3+=96;
-      index+=32;
-      k++;
-      k2++;
-      k2++;
+      k++;k2+=2;
     }
   }
 
diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c
index f9f6002970ec433d7c962340c28a9954d379bcd3..950422abb8c8370094a674e8071eb1beb57e47d2 100644
--- a/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c
+++ b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c
@@ -861,7 +861,7 @@ void dlsch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
 
   len_mod4 =len&3;
   len2=len>>2;  // length in quad words (4 REs)
-  len2+=(len_mod4?0:1);
+  len2+=((len_mod4==0)?0:1);
 
   for (i=0; i<len2; i++) {
 
diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_modulation.c b/openair1/PHY/LTE_TRANSPORT/dlsch_modulation.c
index 3b9c58ca1bdd03ee55ca7606dadcb665014916a6..7f733a0046bc5c4b73988d5aaa2dd30d0a13932a 100644
--- a/openair1/PHY/LTE_TRANSPORT/dlsch_modulation.c
+++ b/openair1/PHY/LTE_TRANSPORT/dlsch_modulation.c
@@ -136,6 +136,334 @@ void layer1prec2A(int32_t *antenna0_sample, int32_t *antenna1_sample, uint8_t pr
   ((int16_t *)antenna1_sample)[1] = (int16_t)((((int16_t *)antenna1_sample)[1]*ONE_OVER_SQRT2_Q15)>>15);  */
 }
 
+uint32_t FOUR[2]={0,4};
+uint32_t TWO[2]={0,2};
+
+int allocate_REs_in_RB_no_pilots_16QAM_siso(LTE_DL_FRAME_PARMS *frame_parms,
+					    mod_sym_t **txdataF,
+					    uint32_t *jj,
+					    uint32_t *jj2,
+					    uint16_t re_offset,
+					    uint32_t symbol_offset,
+					    LTE_DL_eNB_HARQ_t *dlsch0_harq,
+					    LTE_DL_eNB_HARQ_t *dlsch1_harq,
+					    uint8_t pilots,
+					    int16_t amp,
+					    uint8_t precoder_index,
+					    int16_t *qam_table_s0,
+					    int16_t *qam_table_s1,
+					    uint32_t *re_allocated,
+					    uint8_t skip_dc,
+					    uint8_t skip_half,
+					    int *P1_SHIFT,
+					    int *P2_SHIFT)
+{
+
+
+  uint8_t *x0             = dlsch0_harq->e;
+  uint32_t qam16_table_offset_re = 0;
+  uint32_t qam16_table_offset_im = 0;
+
+  uint32_t tti_offset;
+  uint8_t re;
+  uint8_t *x0p;
+
+  if (skip_dc == 0) {
+    for (x0p=&x0[*jj],tti_offset=symbol_offset+re_offset,re=0; 
+	 re<12; 
+	 re++,x0p+=4,tti_offset++) {
+      
+      qam16_table_offset_re=TWO[x0p[0]];
+      qam16_table_offset_im=TWO[x0p[1]];
+      qam16_table_offset_re+=x0p[2];
+      qam16_table_offset_im+=x0p[3];
+      ((int16_t *)&txdataF[0][tti_offset])[0]=qam_table_s0[qam16_table_offset_re];
+      ((int16_t *)&txdataF[0][tti_offset])[1]=qam_table_s0[qam16_table_offset_im];
+    }
+  }
+  else {
+    for (x0p=&x0[*jj],tti_offset=symbol_offset+re_offset,re=0; 
+	 re<6; 
+	 re++,x0p+=4,tti_offset++) {
+      
+      qam16_table_offset_re=TWO[x0p[0]];
+      qam16_table_offset_im=TWO[x0p[1]];
+      qam16_table_offset_re+=x0p[2];
+      qam16_table_offset_im+=x0p[3];
+      ((int16_t *)&txdataF[0][tti_offset])[0]=qam_table_s0[qam16_table_offset_re];
+      ((int16_t *)&txdataF[0][tti_offset])[1]=qam_table_s0[qam16_table_offset_im];
+    }
+
+    for (tti_offset=symbol_offset+re_offset-frame_parms->ofdm_symbol_size+7; 
+	 re<12; 
+	 re++,x0p+=4,tti_offset++) {
+      
+      qam16_table_offset_re=TWO[x0p[0]];
+      qam16_table_offset_im=TWO[x0p[1]];
+      qam16_table_offset_re+=x0p[2];
+      qam16_table_offset_im+=x0p[3];
+      ((int16_t *)&txdataF[0][tti_offset])[0]=qam_table_s0[qam16_table_offset_re];
+      ((int16_t *)&txdataF[0][tti_offset])[1]=qam_table_s0[qam16_table_offset_im];
+    }
+  }
+  *re_allocated = *re_allocated + 12;
+  *jj=*jj + 48;
+    
+    return(0);
+}
+
+int allocate_REs_in_RB_pilots_16QAM_siso(LTE_DL_FRAME_PARMS *frame_parms,
+					 mod_sym_t **txdataF,
+					 uint32_t *jj,
+					 uint32_t *jj2,
+					 uint16_t re_offset,
+					 uint32_t symbol_offset,
+					 LTE_DL_eNB_HARQ_t *dlsch0_harq,
+					 LTE_DL_eNB_HARQ_t *dlsch1_harq,
+					 uint8_t pilots,
+					 int16_t amp,
+					 uint8_t precoder_index,
+					 int16_t *qam_table_s0,
+					 int16_t *qam_table_s1,
+					 uint32_t *re_allocated,
+					 uint8_t skip_dc,
+					 uint8_t skip_half,
+					 int *P1_SHIFT,
+					 int *P2_SHIFT)
+{
+  
+
+  uint8_t *x0             = dlsch0_harq->e;
+  uint32_t qam16_table_offset_re = 0;
+  uint32_t qam16_table_offset_im = 0;
+
+  uint32_t tti_offset;
+  uint8_t re;
+  uint8_t *x0p;
+
+
+  if (skip_dc == 0) {
+    //    printf("pilots: P1_SHIFT[0] %d\n",P1_SHIFT[0]);
+    for (x0p=&x0[*jj],tti_offset=symbol_offset+re_offset+P1_SHIFT[0],re=P1_SHIFT[0]; 
+	 re<12; 
+	 x0p+=4) {
+      
+      qam16_table_offset_re=TWO[x0p[0]];
+      qam16_table_offset_im=TWO[x0p[1]];
+      qam16_table_offset_re+=x0p[2];
+      qam16_table_offset_im+=x0p[3];
+      ((int16_t *)&txdataF[0][tti_offset])[0]=qam_table_s0[qam16_table_offset_re];
+      ((int16_t *)&txdataF[0][tti_offset])[1]=qam_table_s0[qam16_table_offset_im];
+      //      printf("pilots: re %d, tti_offset %d, P1_SHIFT %d\n",re,tti_offset,P1_SHIFT[re+1]);
+      tti_offset+=P1_SHIFT[re+1];
+      re+=P1_SHIFT[re+1];
+    }
+  }
+  else {
+    for (x0p=&x0[*jj],tti_offset=symbol_offset+re_offset+P1_SHIFT[0],re=P1_SHIFT[0]; 
+	 re<6; 
+	 x0p+=4) {
+      
+      qam16_table_offset_re=TWO[x0p[0]];
+      qam16_table_offset_im=TWO[x0p[1]];
+      qam16_table_offset_re+=x0p[2];
+      qam16_table_offset_im+=x0p[3];
+      ((int16_t *)&txdataF[0][tti_offset])[0]=qam_table_s0[qam16_table_offset_re];
+      ((int16_t *)&txdataF[0][tti_offset])[1]=qam_table_s0[qam16_table_offset_im];
+      tti_offset+=P1_SHIFT[re+1];
+      re+=P1_SHIFT[re+1];
+    }
+
+    for (tti_offset=symbol_offset+re_offset-frame_parms->ofdm_symbol_size+6+P1_SHIFT[6]; 
+	 re<12; 
+	 x0p+=4) {
+      
+      qam16_table_offset_re=TWO[x0p[0]];
+      qam16_table_offset_im=TWO[x0p[1]];
+      qam16_table_offset_re+=x0p[2];
+      qam16_table_offset_im+=x0p[3];
+      ((int16_t *)&txdataF[0][tti_offset])[0]=qam_table_s0[qam16_table_offset_re];
+      ((int16_t *)&txdataF[0][tti_offset])[1]=qam_table_s0[qam16_table_offset_im];
+      tti_offset+=P1_SHIFT[re+1];
+      re+=P1_SHIFT[re+1];
+    }
+  }
+  *re_allocated = *re_allocated + 10;
+  *jj=*jj + 40;
+
+  return(0);
+}
+
+int allocate_REs_in_RB_no_pilots_64QAM_siso(LTE_DL_FRAME_PARMS *frame_parms,
+					    mod_sym_t **txdataF,
+					    uint32_t *jj,
+					    uint32_t *jj2,
+					    uint16_t re_offset,
+					    uint32_t symbol_offset,
+					    LTE_DL_eNB_HARQ_t *dlsch0_harq,
+					    LTE_DL_eNB_HARQ_t *dlsch1_harq,
+					    uint8_t pilots,
+					    int16_t amp,
+					    uint8_t precoder_index,
+					    int16_t *qam_table_s0,
+					    int16_t *qam_table_s1,
+					    uint32_t *re_allocated,
+					    uint8_t skip_dc,
+					    uint8_t skip_half,
+					    int *P1_SHIFT,
+					    int *P2_SHIFT)
+{
+
+
+  uint8_t *x0             = dlsch0_harq->e;
+  uint32_t qam64_table_offset_re = 0;
+  uint32_t qam64_table_offset_im = 0;
+
+  uint32_t tti_offset;
+  uint8_t re;
+  uint8_t *x0p;
+
+  if (skip_dc == 0) {
+
+
+    for (x0p=&x0[*jj],tti_offset=symbol_offset+re_offset,re=0; 
+	 re<12; 
+	 re++,x0p+=6,tti_offset++) {
+      
+      qam64_table_offset_re=FOUR[x0p[0]];
+      qam64_table_offset_im=FOUR[x0p[1]];
+      qam64_table_offset_re+=TWO[x0p[2]];
+      qam64_table_offset_im+=TWO[x0p[3]];
+      qam64_table_offset_re+=x0p[4];
+      qam64_table_offset_im+=x0p[5];
+      ((int16_t *)&txdataF[0][tti_offset])[0]=qam_table_s0[qam64_table_offset_re];
+      ((int16_t *)&txdataF[0][tti_offset])[1]=qam_table_s0[qam64_table_offset_im];
+    }
+  }
+  else {
+    for (x0p=&x0[*jj],tti_offset=symbol_offset+re_offset,re=0; 
+	 re<6; 
+	 re++,x0p+=6,tti_offset++) {
+      
+      qam64_table_offset_re=FOUR[x0p[0]];
+      qam64_table_offset_im=FOUR[x0p[1]];
+      qam64_table_offset_re+=TWO[x0p[2]];
+      qam64_table_offset_im+=TWO[x0p[3]];
+      qam64_table_offset_re+=x0p[4];
+      qam64_table_offset_im+=x0p[5];
+      ((int16_t *)&txdataF[0][tti_offset])[0]=qam_table_s0[qam64_table_offset_re];
+      ((int16_t *)&txdataF[0][tti_offset])[1]=qam_table_s0[qam64_table_offset_im];
+    }
+
+    for (tti_offset=symbol_offset+re_offset-frame_parms->ofdm_symbol_size+7; 
+	 re<12; 
+	 re++,x0p+=6,tti_offset++) {
+      
+      qam64_table_offset_re=FOUR[x0p[0]];
+      qam64_table_offset_im=FOUR[x0p[1]];
+      qam64_table_offset_re+=TWO[x0p[2]];
+      qam64_table_offset_im+=TWO[x0p[3]];
+      qam64_table_offset_re+=x0p[4];
+      qam64_table_offset_im+=x0p[5];
+      ((int16_t *)&txdataF[0][tti_offset])[0]=qam_table_s0[qam64_table_offset_re];
+      ((int16_t *)&txdataF[0][tti_offset])[1]=qam_table_s0[qam64_table_offset_im];
+    }
+  }
+
+  *re_allocated = *re_allocated + 12;
+  *jj=*jj + 72;
+    
+  return(0);
+}
+
+int allocate_REs_in_RB_pilots_64QAM_siso(LTE_DL_FRAME_PARMS *frame_parms,
+					 mod_sym_t **txdataF,
+					 uint32_t *jj,
+					 uint32_t *jj2,
+					 uint16_t re_offset,
+					 uint32_t symbol_offset,
+					 LTE_DL_eNB_HARQ_t *dlsch0_harq,
+					 LTE_DL_eNB_HARQ_t *dlsch1_harq,
+					 uint8_t pilots,
+					 int16_t amp,
+					 uint8_t precoder_index,
+					 int16_t *qam_table_s0,
+					 int16_t *qam_table_s1,
+					 uint32_t *re_allocated,
+					 uint8_t skip_dc,
+					 uint8_t skip_half,
+					 int *P1_SHIFT,
+					 int *P2_SHIFT)
+{
+  
+
+  uint8_t *x0             = dlsch0_harq->e;
+  uint32_t qam64_table_offset_re = 0;
+  uint32_t qam64_table_offset_im = 0;
+
+  uint32_t tti_offset;
+  uint8_t re;
+  uint8_t *x0p;
+
+
+  if (skip_dc == 0) {
+    //    printf("pilots: P1_SHIFT[0] %d\n",P1_SHIFT[0]);
+    for (x0p=&x0[*jj],tti_offset=symbol_offset+re_offset+P1_SHIFT[0],re=P1_SHIFT[0]; 
+	 re<12; 
+	 x0p+=6) {
+      
+      qam64_table_offset_re=FOUR[x0p[0]];
+      qam64_table_offset_im=FOUR[x0p[1]];
+      qam64_table_offset_re+=TWO[x0p[2]];
+      qam64_table_offset_im+=TWO[x0p[3]];
+      qam64_table_offset_re+=x0p[4];
+      qam64_table_offset_im+=x0p[5];
+      ((int16_t *)&txdataF[0][tti_offset])[0]=qam_table_s0[qam64_table_offset_re];
+      ((int16_t *)&txdataF[0][tti_offset])[1]=qam_table_s0[qam64_table_offset_im];
+      //      printf("pilots: re %d, tti_offset %d, P1_SHIFT %d\n",re,tti_offset,P1_SHIFT[re+1]);
+      tti_offset+=P1_SHIFT[re+1];
+      re+=P1_SHIFT[re+1];
+    }
+  }
+  else {
+    for (x0p=&x0[*jj],tti_offset=symbol_offset+re_offset+P1_SHIFT[0],re=P1_SHIFT[0]; 
+	 re<6; 
+	 x0p+=6) {
+      
+      qam64_table_offset_re=FOUR[x0p[0]];
+      qam64_table_offset_im=FOUR[x0p[1]];
+      qam64_table_offset_re+=TWO[x0p[2]];
+      qam64_table_offset_im+=TWO[x0p[3]];
+      qam64_table_offset_re+=x0p[4];
+      qam64_table_offset_im+=x0p[5];
+      ((int16_t *)&txdataF[0][tti_offset])[0]=qam_table_s0[qam64_table_offset_re];
+      ((int16_t *)&txdataF[0][tti_offset])[1]=qam_table_s0[qam64_table_offset_im];
+      tti_offset+=P1_SHIFT[re+1];
+      re+=P1_SHIFT[re+1];
+    }
+
+    for (tti_offset=symbol_offset+re_offset-frame_parms->ofdm_symbol_size+6+P1_SHIFT[6]; 
+	 re<12; 
+	 x0p+=6) {
+      
+      qam64_table_offset_re=FOUR[x0p[0]];
+      qam64_table_offset_im=FOUR[x0p[1]];
+      qam64_table_offset_re+=TWO[x0p[2]];
+      qam64_table_offset_im+=TWO[x0p[3]];
+      qam64_table_offset_re+=x0p[4];
+      qam64_table_offset_im+=x0p[5];
+      ((int16_t *)&txdataF[0][tti_offset])[0]=qam_table_s0[qam64_table_offset_re];
+      ((int16_t *)&txdataF[0][tti_offset])[1]=qam_table_s0[qam64_table_offset_im];
+      tti_offset+=P1_SHIFT[re+1];
+      re+=P1_SHIFT[re+1];
+    }
+  }
+  *re_allocated = *re_allocated + 10;
+  *jj=*jj + 60;
+
+  return(0);
+}
+
 int allocate_REs_in_RB(LTE_DL_FRAME_PARMS *frame_parms,
                        mod_sym_t **txdataF,
                        uint32_t *jj,
@@ -151,7 +479,9 @@ int allocate_REs_in_RB(LTE_DL_FRAME_PARMS *frame_parms,
                        int16_t *qam_table_s1,
                        uint32_t *re_allocated,
                        uint8_t skip_dc,
-                       uint8_t skip_half)
+                       uint8_t skip_half,
+		       int *P1_SHIFT,
+		       int *P2_SHIFT)
 {
 
 
@@ -209,27 +539,6 @@ int allocate_REs_in_RB(LTE_DL_FRAME_PARMS *frame_parms,
 
   }
 
-  /*
-  switch (mod_order) {
-  case 2:
-    // QPSK single stream
-
-    break;
-  case 4:
-    //16QAM Single stream
-    gain_lin_16QAM1 = (int16_t)(((int32_t)amp*QAM16_n1)>>15);
-    gain_lin_16QAM2 = (int16_t)(((int32_t)amp*QAM16_n2)>>15);
-
-    break;
-
-  case 6:
-    //64QAM Single stream
-    break;
-  default:
-    break;
-  }
-  */
-
 #ifdef DEBUG_DLSCH_MODULATION
   printf("allocate_re (mod %d): symbol_offset %d re_offset %d (%d,%d), jj %d -> %d,%d\n",mod_order0,symbol_offset,re_offset,skip_dc,skip_half,*jj, x0[*jj], x0[1+*jj]);
 #endif
@@ -242,6 +551,7 @@ int allocate_REs_in_RB(LTE_DL_FRAME_PARMS *frame_parms,
   else if (skip_half==2)
     first_re=6;
 
+
   for (re=first_re; re<last_re; re++) {
 
 
@@ -350,8 +660,8 @@ int allocate_REs_in_RB(LTE_DL_FRAME_PARMS *frame_parms,
           *jj=*jj+1;
 
           for (aa=0; aa<frame_parms->nb_antennas_tx; aa++) {
-            ((int16_t *)&txdataF[aa][tti_offset])[0]+=qam_table_s0[qam64_table_offset_re];//(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15);
-            ((int16_t *)&txdataF[aa][tti_offset])[1]+=qam_table_s0[qam64_table_offset_im];//(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15);
+            ((int16_t *)&txdataF[aa][tti_offset])[0]+=qam_table_s0[qam64_table_offset_re];
+            ((int16_t *)&txdataF[aa][tti_offset])[1]+=qam_table_s0[qam64_table_offset_im];
           }
 
           break;
@@ -970,7 +1280,7 @@ int allocate_REs_in_RB(LTE_DL_FRAME_PARMS *frame_parms,
           }
         }
       } else if (mimo_mode>=TM9_10) {
-        msg("allocate_REs_in_RB() [dlsch.c] : ERROR, unknown mimo_mode %d\n",mimo_mode);
+        printf("allocate_REs_in_RB() [dlsch.c] : ERROR, unknown mimo_mode %d\n",mimo_mode);
         return(-1);
       }
     }
@@ -1191,7 +1501,7 @@ int dlsch_modulation(mod_sym_t **txdataF,
   uint8_t harq_pid = dlsch0->current_harq_pid;
   LTE_DL_eNB_HARQ_t *dlsch0_harq = dlsch0->harq_processes[harq_pid];
   LTE_DL_eNB_HARQ_t *dlsch1_harq; //= dlsch1->harq_processes[harq_pid];
-  uint32_t i,jj,jj2,re_allocated,symbol_offset;
+  uint32_t i,i2,jj,jj2,re_allocated,symbol_offset;
   uint16_t l,rb,re_offset;
   uint32_t rb_alloc_ind;
   uint32_t *rb_alloc = dlsch0_harq->rb_alloc;
@@ -1202,7 +1512,28 @@ int dlsch_modulation(mod_sym_t **txdataF,
   int16_t amp_rho_a, amp_rho_b;
   int16_t qam16_table_a0[4],qam64_table_a0[8],qam16_table_b0[4],qam64_table_b0[8];
   int16_t qam16_table_a1[4],qam64_table_a1[8],qam16_table_b1[4],qam64_table_b1[8];
-  int16_t *qam_table_s0,*qam_table_s1;
+  int16_t *qam_table_s0=NULL,*qam_table_s1=NULL;
+  int (*allocate_REs)(LTE_DL_FRAME_PARMS *,
+		      mod_sym_t **,
+		      uint32_t*,
+		      uint32_t*,
+		      uint16_t,
+		      uint32_t,
+		      LTE_DL_eNB_HARQ_t *,
+		      LTE_DL_eNB_HARQ_t *,
+		      uint8_t,
+		      int16_t,
+		      uint8_t,
+		      int16_t *,
+		      int16_t *,
+		      uint32_t *,
+		      uint8_t,
+		      uint8_t,
+		      int *,
+		      int *);
+  int P1_SHIFT[13],P2_SHIFT[13];
+  int offset,nushiftmod3;
+
 #ifdef DEBUG_DLSCH_MODULATION
   uint8_t Nl0 = dlsch0_harq->Nl;
   uint8_t Nl1;
@@ -1255,7 +1586,7 @@ int dlsch_modulation(mod_sym_t **txdataF,
   for (l=num_pdcch_symbols; l<nsymb; l++) {
 
 #ifdef DEBUG_DLSCH_MODULATION
-    msg("Generating DLSCH (harq_pid %d,mimo %d, pmi_alloc0 %llx, mod0 %d, mod1 %d, rb_alloc[0] %d) in %d\n",
+    printf("Generating DLSCH (harq_pid %d,mimo %d, pmi_alloc0 %llx, mod0 %d, mod1 %d, rb_alloc[0] %d) in %d\n",
         harq_pid,
         dlsch0_harq->mimo_mode,
         pmi2hex_2Ar1(dlsch0_harq->pmi_alloc),
@@ -1281,6 +1612,32 @@ int dlsch_modulation(mod_sym_t **txdataF,
         pilots=0;
     }
 
+    offset = (pilots==2)?3:0;
+    nushiftmod3 = frame_parms->nushift%3;
+
+    if (pilots>0) {  // compute pilot arrays, could be done statically if performance suffers
+      if (frame_parms->mode1_flag == 1) {
+	//	printf("l %d, nushift %d, offset %d\n",l,frame_parms->nushift,offset);
+	for (i=0,i2=0;i<12;i++) {
+	  if ((i!=(frame_parms->nushift+offset)) && (i!=((frame_parms->nushift+6+offset)%12)))
+	    P1_SHIFT[i2++]=1;
+	  else
+	    P1_SHIFT[i2++]=2;
+	}
+	P1_SHIFT[0]--;
+      }
+      else {
+	for (i=0,i2=0;i<12;i++) {
+	  if ((i!=nushiftmod3) && (i!=nushiftmod3+6) && (i!=nushiftmod3+3) && (i!=nushiftmod3+9))
+	    P2_SHIFT[i2++]=1;
+	  else
+	    P2_SHIFT[i2++]=2;
+	}
+	P2_SHIFT[0]--;
+      }
+    }
+    P1_SHIFT[12]=1;P2_SHIFT[12]=1;
+
     re_offset = frame_parms->first_carrier_offset;
     symbol_offset = (uint32_t)frame_parms->ofdm_symbol_size*(l+(subframe_offset*nsymb));
 
@@ -1421,50 +1778,100 @@ int dlsch_modulation(mod_sym_t **txdataF,
       }
 
       if (dlsch0_harq->Nlayers>1) {
-        msg("Nlayers %d: re_offset %d, symbol %d offset %d\n",dlsch0_harq->Nlayers,re_offset,l,symbol_offset);
+        printf("Nlayers %d: re_offset %d, symbol %d offset %d\n",dlsch0_harq->Nlayers,re_offset,l,symbol_offset);
         return(-1);
       }
 
       if (dlsch1) {
         if (dlsch1_harq->Nlayers>1) {
-          msg("Nlayers %d: re_offset %d, symbol %d offset %d\n",dlsch0_harq->Nlayers,re_offset,l,symbol_offset);
+          printf("Nlayers %d: re_offset %d, symbol %d offset %d\n",dlsch0_harq->Nlayers,re_offset,l,symbol_offset);
           return(-1);
         }
       }
 
-      if (mod_order0 == 4)
-        qam_table_s0 = ((pilots) ? qam16_table_b0 : qam16_table_a0);
-      else if (mod_order0 == 6)
-        qam_table_s0 = ((pilots) ? qam64_table_b0 : qam64_table_a0);
-      else
-        qam_table_s0 = NULL;
+      allocate_REs = allocate_REs_in_RB;
+
+      switch (mod_order0) {
+      case 2:
+	qam_table_s0 = NULL;
+	break;
+      case 4:
+	if (pilots) {
+	  qam_table_s0 = qam16_table_b0; 
+	  allocate_REs = (dlsch0->harq_processes[harq_pid]->mimo_mode == SISO) ? 
+	    allocate_REs_in_RB_pilots_16QAM_siso :
+	    allocate_REs_in_RB;
+	}
+	else {
+	  qam_table_s0 = qam16_table_a0;
+	  allocate_REs = (dlsch0->harq_processes[harq_pid]->mimo_mode == SISO) ? 
+	    allocate_REs_in_RB_no_pilots_16QAM_siso :
+	    allocate_REs_in_RB;
+	  
+	}
+	break;
+
+      case 6:
+	if (pilots) {
+	  qam_table_s0 = qam64_table_b0; 
+	  allocate_REs = (dlsch0->harq_processes[harq_pid]->mimo_mode == SISO) ? 
+	                 allocate_REs_in_RB_pilots_64QAM_siso :
+	                 allocate_REs_in_RB;
+	}
+	else {
+	  qam_table_s0 = qam64_table_a0;
+	  allocate_REs = (dlsch0->harq_processes[harq_pid]->mimo_mode == SISO) ? 
+	                 allocate_REs_in_RB_no_pilots_64QAM_siso :
+	                 allocate_REs_in_RB;
+	}
+	break;
+      
+      }
 
-      if (mod_order1 == 4)
-        qam_table_s1 = ((pilots) ? qam16_table_b1 : qam16_table_a1);
-      else if (mod_order1 == 6)
-        qam_table_s1 = ((pilots) ? qam64_table_b1 : qam64_table_a1);
-      else
-        qam_table_s1 = NULL;
+      switch (mod_order1) {
+      case 2:
+	qam_table_s1 = NULL;
+	allocate_REs = allocate_REs_in_RB;
+	break;
+      case 4:
+	if (pilots) {
+	  qam_table_s1 = qam16_table_b1; 
+	}
+	else {
+	  qam_table_s1 = qam16_table_a1;
+	}
+	break;
+      case 6:
+	if (pilots) {
+	  qam_table_s1 = qam64_table_b1; 
+	}
+	else {
+	  qam_table_s1 = qam64_table_a1;
+	}
+	break;
+      
+      }
 
       if (rb_alloc_ind > 0) {
-        //    printf("Allocated rb %d/symbol %d, skip_half %d, subframe_offset %d, symbol_offset %d, re_offset %d, jj %d\n",rb,l,skip_half,subframe_offset,symbol_offset,re_offset,jj);
-        allocate_REs_in_RB(frame_parms,
-                           txdataF,
-                           &jj,
-                           &jj2,
-                           re_offset,
-                           symbol_offset,
-                           dlsch0->harq_processes[harq_pid],
-                           (dlsch1==NULL) ? NULL : dlsch1->harq_processes[harq_pid],
-                           pilots,
-                           ((pilots) ? amp_rho_b : amp_rho_a),
-                           get_pmi(frame_parms->N_RB_DL,dlsch0->harq_processes[harq_pid],rb),
-                           qam_table_s0,
-                           qam_table_s1,
-                           &re_allocated,
-                           skip_dc,
-                           skip_half);
-
+	//	printf("Allocated rb %d/symbol %d, skip_half %d, subframe_offset %d, symbol_offset %d, re_offset %d, jj %d\n",rb,l,skip_half,subframe_offset,symbol_offset,re_offset,jj);
+	allocate_REs(frame_parms,
+		     txdataF,
+		     &jj,
+		     &jj2,
+		     re_offset,
+		     symbol_offset,
+		     dlsch0->harq_processes[harq_pid],
+		     (dlsch1==NULL) ? NULL : dlsch1->harq_processes[harq_pid],
+		     pilots,
+		     ((pilots) ? amp_rho_b : amp_rho_a),
+		     get_pmi(frame_parms->N_RB_DL,dlsch0->harq_processes[harq_pid],rb),
+		     qam_table_s0,
+		     qam_table_s1,
+		     &re_allocated,
+		     skip_dc,
+		     skip_half,
+		     P1_SHIFT,
+		     P2_SHIFT);
       }
 
       re_offset+=12; // go to next RB
@@ -1484,7 +1891,7 @@ int dlsch_modulation(mod_sym_t **txdataF,
 
 
 #ifdef DEBUG_DLSCH_MODULATION
-  msg("generate_dlsch : jj = %d,re_allocated = %d (G %d)\n",jj,re_allocated,get_G(frame_parms,dlsch0_harq->nb_rb,dlsch0_harq->rb_alloc,mod_order0,Nl0,2,0,subframe_offset));
+  printf("generate_dlsch : jj = %d,re_allocated = %d (G %d)\n",jj,re_allocated,get_G(frame_parms,dlsch0_harq->nb_rb,dlsch0_harq->rb_alloc,mod_order0,Nl0,2,0,subframe_offset));
 #endif
 
   VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_ENB_DLSCH_MODULATION, VCD_FUNCTION_OUT);
@@ -1583,7 +1990,7 @@ int mch_modulation(mod_sym_t **txdataF,
 
 
 #ifdef DEBUG_DLSCH_MODULATION
-  msg("generate_dlsch(MCH) : jj = %d,re_allocated = %d (G %d)\n",jj,re_allocated,get_G(frame_parms,dlsch->harq_processes[0]->nb_rb,dlsch->harq_processes[0]->rb_alloc,mod_order,1,2,0,subframe_offset));
+  printf("generate_dlsch(MCH) : jj = %d,re_allocated = %d (G %d)\n",jj,re_allocated,get_G(frame_parms,dlsch->harq_processes[0]->nb_rb,dlsch->harq_processes[0]->rb_alloc,mod_order,1,2,0,subframe_offset));
 #endif
 
   return (re_allocated);
diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_scrambling.c b/openair1/PHY/LTE_TRANSPORT/dlsch_scrambling.c
index dd971748cb80e8e31de0cb778b2d462b2a75dfa1..a5dd4bf76470ed30542892307ea4fad2fdd49455 100644
--- a/openair1/PHY/LTE_TRANSPORT/dlsch_scrambling.c
+++ b/openair1/PHY/LTE_TRANSPORT/dlsch_scrambling.c
@@ -56,10 +56,11 @@ void dlsch_scrambling(LTE_DL_FRAME_PARMS *frame_parms,
                       uint8_t Ns)
 {
 
-  int i,j,k=0;
+  int i;
   //  uint8_t reset;
   uint32_t x1, x2, s=0;
-  uint8_t *e=dlsch->harq_processes[dlsch->current_harq_pid]->e;
+  uint8_t *dlsch_e=dlsch->harq_processes[dlsch->current_harq_pid]->e;
+  uint8_t *e=dlsch_e;
 
   VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_ENB_DLSCH_SCRAMBLING, VCD_FUNCTION_IN);
 
@@ -78,24 +79,56 @@ void dlsch_scrambling(LTE_DL_FRAME_PARMS *frame_parms,
 
   for (i=0; i<(1+(G>>5)); i++) {
 
-    for (j=0; j<32; j++,k++) {
 #ifdef DEBUG_SCRAMBLING
-      printf("scrambling %d : %d => ",k,e[k]);
+    printf("scrambling %d : %d => ",k,e[k]);
 #endif
-      e[k] = (e[k]&1) ^ ((s>>j)&1);
+    
+    e[0] = (e[0]&1) ^ (s&1);      
+    e[1] = (e[1]&1) ^ ((s>>1)&1);      
+    e[2] = (e[2]&1) ^ ((s>>2)&1);      
+    e[3] = (e[3]&1) ^ ((s>>3)&1);      
+    e[4] = (e[4]&1) ^ ((s>>4)&1);      
+    e[5] = (e[5]&1) ^ ((s>>5)&1);      
+    e[6] = (e[6]&1) ^ ((s>>6)&1);      
+    e[7] = (e[7]&1) ^ ((s>>7)&1);      
+    e[8] = (e[8]&1) ^ ((s>>8)&1);      
+    e[9] = (e[9]&1) ^ ((s>>9)&1);      
+    e[10] = (e[10]&1) ^ ((s>>10)&1);      
+    e[11] = (e[11]&1) ^ ((s>>11)&1);      
+    e[12] = (e[12]&1) ^ ((s>>12)&1);      
+    e[13] = (e[13]&1) ^ ((s>>13)&1);      
+    e[14] = (e[14]&1) ^ ((s>>14)&1);      
+    e[15] = (e[15]&1) ^ ((s>>15)&1);      
+    e[16] = (e[16]&1) ^ ((s>>16)&1);      
+    e[17] = (e[17]&1) ^ ((s>>17)&1);      
+    e[18] = (e[18]&1) ^ ((s>>18)&1);      
+    e[19] = (e[19]&1) ^ ((s>>19)&1);      
+    e[20] = (e[20]&1) ^ ((s>>20)&1);      
+    e[21] = (e[21]&1) ^ ((s>>21)&1);      
+    e[22] = (e[22]&1) ^ ((s>>22)&1);      
+    e[23] = (e[23]&1) ^ ((s>>23)&1);      
+    e[24] = (e[24]&1) ^ ((s>>24)&1);      
+    e[25] = (e[25]&1) ^ ((s>>25)&1);      
+    e[26] = (e[26]&1) ^ ((s>>26)&1);      
+    e[27] = (e[27]&1) ^ ((s>>27)&1);      
+    e[28] = (e[28]&1) ^ ((s>>28)&1);      
+    e[29] = (e[29]&1) ^ ((s>>29)&1);      
+    e[30] = (e[30]&1) ^ ((s>>30)&1);      
+    e[31] = (e[31]&1) ^ ((s>>31)&1);      
+    
 #ifdef DEBUG_SCRAMBLING
-      printf("%d\n",e[k]);
+    printf("%d\n",e[k]);
 #endif
-    }
-
+    
+    
     s = lte_gold_generic(&x1, &x2, 0);
+    e += 32;
   }
 
   VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_ENB_DLSCH_SCRAMBLING, VCD_FUNCTION_OUT);
 
 }
 
-
 void dlsch_unscrambling(LTE_DL_FRAME_PARMS *frame_parms,
                         int mbsfn_flag,
                         LTE_UE_DLSCH_t *dlsch,
diff --git a/openair1/PHY/LTE_TRANSPORT/proto.h b/openair1/PHY/LTE_TRANSPORT/proto.h
index 9afea790b327c6efd15a53949a979f74243c8f7d..ab1119020c2397e8d1ea22e22ebacd0f0beaa5e4 100644
--- a/openair1/PHY/LTE_TRANSPORT/proto.h
+++ b/openair1/PHY/LTE_TRANSPORT/proto.h
@@ -183,7 +183,9 @@ int32_t allocate_REs_in_RB(LTE_DL_FRAME_PARMS *frame_parms,
                            int16_t *qam_table_s1,
                            uint32_t *re_allocated,
                            uint8_t skip_dc,
-                           uint8_t skip_half);
+                           uint8_t skip_half,
+			   int *P1_SHIFT,
+			   int *P2_SHIFT);
 
 
 /** \fn int32_t dlsch_modulation(mod_sym_t **txdataF,
diff --git a/openair1/PHY/TOOLS/cmult_vv.c b/openair1/PHY/TOOLS/cmult_vv.c
index f970d332248f47fd2109dc71515a8ce99a11ce27..ec6aacc440252af575e123b120156063d51c9401 100755
--- a/openair1/PHY/TOOLS/cmult_vv.c
+++ b/openair1/PHY/TOOLS/cmult_vv.c
@@ -85,7 +85,7 @@ int mult_cpx_conj_vector(int16_t *x1,
 
   // we compute 4 cpx multiply for each loop
   for(i=0; i<(N>>2); i++) {
-  #if defined(__x86_64__) || defined(__i386__)
+#if defined(__x86_64__) || defined(__i386__)
     tmp_re = _mm_madd_epi16(*x1_128,*x2_128);
     tmp_im = _mm_shufflelo_epi16(*x1_128,_MM_SHUFFLE(2,3,0,1));
     tmp_im = _mm_shufflehi_epi16(tmp_im,_MM_SHUFFLE(2,3,0,1));
diff --git a/openair1/PHY/TOOLS/lte_dfts.c b/openair1/PHY/TOOLS/lte_dfts.c
index b010f93aabeebb614535686ee1ca350a89d0eb4b..e2c23c8538b839c3499b8648860863e4ef4cce4d 100644
--- a/openair1/PHY/TOOLS/lte_dfts.c
+++ b/openair1/PHY/TOOLS/lte_dfts.c
@@ -105,7 +105,6 @@ static inline void cmacc(__m128i a,__m128i b, __m128i *re32, __m128i *im32)
 
 
 
-
 static inline void cmult(__m128i a,__m128i b, __m128i *re32, __m128i *im32) __attribute__((always_inline));
 
 static inline void cmult(__m128i a,__m128i b, __m128i *re32, __m128i *im32)
@@ -1754,7 +1753,8 @@ int16_t tw64c[96] __attribute__((aligned(16))) = { 0,32767,3212,32609,6393,32137
 #define simdshort_q15_t __m64
 #define shiftright_int16(a,shift) _mm_srai_epi16(a,shift)
 #define set1_int16(a) _mm_set1_epi16(a);
-#define mulhi_int16(a,b) _mm_slli_epi16(_mm_mulhi_epi16(a,b),1);
+//#define mulhi_int16(a,b) _mm_slli_epi16(_mm_mulhi_epi16(a,b),1);
+#define mulhi_int16(a,b) _mm_mulhrs_epi16 (a,b)
 #elif defined(__arm__)
 #define simd_q15_t int16x8_t
 #define simdshort_q15_t int16x4_t
diff --git a/openair1/SIMULATION/LTE_PHY/dlsim.c b/openair1/SIMULATION/LTE_PHY/dlsim.c
index 6f2c1f1d224494c93e78de898fc9792098a6951a..58cf77568a8ff6b1527a9485836381d232785e93 100644
--- a/openair1/SIMULATION/LTE_PHY/dlsim.c
+++ b/openair1/SIMULATION/LTE_PHY/dlsim.c
@@ -215,7 +215,7 @@ int main(int argc, char **argv)
   // void *data;
   // int ii;
   //  int bler;
-  double blerr[4],uncoded_ber;//,avg_ber;
+  double blerr[4],uncoded_ber,avg_ber;
   short *uncoded_ber_bit=NULL;
   uint8_t N_RB_DL=25,osf=1;
   frame_t frame_type = FDD;
@@ -2623,7 +2623,7 @@ PMI_FEEDBACK:
                                               &PHY_vars_eNB->lte_frame_parms,
                                               num_pdcch_symbols,
                                               PHY_vars_eNB->dlsch_eNB[k][0],
-                                              PHY_vars_eNB->dlsch_eNB[k][1]);	      
+                                              (transmission_mode==3)||(transmission_mode==4) ? PHY_vars_eNB->dlsch_eNB[k][1] : NULL);	      
               /* avoid gcc warnings */
               (void)re_allocated;
 
@@ -3292,7 +3292,7 @@ PMI_FEEDBACK:
             PHY_vars_UE->dlsch_ue[0][cw]->harq_processes[PHY_vars_UE->dlsch_ue[0][cw]->current_harq_pid]->G = coded_bits_per_codeword;
 
 
-            /*
+            
             // calculate uncoded BLER
             uncoded_ber=0;
             for (i=0;i<coded_bits_per_codeword;i++)
@@ -3308,7 +3308,7 @@ PMI_FEEDBACK:
 
             if (n_frames==1)
               write_output("uncoded_ber_bit.m","uncoded_ber_bit",uncoded_ber_bit,coded_bits_per_codeword,1,0);
-            */
+            
 
             start_meas(&PHY_vars_UE->dlsch_unscrambling_stats);
             dlsch_unscrambling(&PHY_vars_UE->lte_frame_parms,