From 1e08469ed8fe8232133be1f2df53fa17ee81cb06 Mon Sep 17 00:00:00 2001
From: Raymond Knopp <raymond.knopp@eurecom.fr>
Date: Mon, 14 Dec 2015 01:51:26 -0800
Subject: [PATCH] conversion of some sse4 code to avx2 for turbo encoder.  No
 noticeable improvement (output expansion for encoder doesn't benefit much
 because of unpack instruction difference on AVX2.

maxh adjustement in dlsch_demodulation (to be checked further for other mcs)

exit criterion in dlsim to allow estimation of lower BLER.
---
 cmake_targets/CMakeLists.txt                  |   2 +-
 openair1/PHY/CODING/3gpplte_sse.c             | 205 ++++++++++++++++--
 .../PHY/LTE_TRANSPORT/dlsch_demodulation.c    |   2 +-
 openair1/SIMULATION/LTE_PHY/dlsim.c           |   2 +-
 4 files changed, 186 insertions(+), 25 deletions(-)

diff --git a/cmake_targets/CMakeLists.txt b/cmake_targets/CMakeLists.txt
index d8fd96353d..8ba602c25d 100644
--- a/cmake_targets/CMakeLists.txt
+++ b/cmake_targets/CMakeLists.txt
@@ -134,7 +134,7 @@ else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
     set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx2")
   endif()
   if (CPUINFO MATCHES "sse4_2")
-    set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx2 -msse4.2")
+    set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -msse4.2")
   endif()
   if (CPUINFO MATCHES "sse4_1")
     set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -msse4.1")
diff --git a/openair1/PHY/CODING/3gpplte_sse.c b/openair1/PHY/CODING/3gpplte_sse.c
index 41c0f65dc3..3dda77e4ee 100755
--- a/openair1/PHY/CODING/3gpplte_sse.c
+++ b/openair1/PHY/CODING/3gpplte_sse.c
@@ -37,6 +37,7 @@
 #include "extern_3GPPinterleaver.h"
 #else
 #include "vars.h"
+#include <stdint.h>
 #endif
 #include <stdio.h>
 #include <string.h>
@@ -48,6 +49,7 @@
 #define print_shorts(s,x) printf("%s %x,%x,%x,%x,%x,%x,%x,%x\n",s,(x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5],(x)[6],(x)[7])
 #define print_ints(s,x) printf("%s %x %x %x %x\n",s,(x)[0],(x)[1],(x)[2],(x)[3])
 
+#define print_bytes2(s,x) printf("%s %x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x\n",s,(x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5],(x)[6],(x)[7],(x)[8],(x)[9],(x)[10],(x)[11],(x)[12],(x)[13],(x)[14],(x)[15],(x)[16],(x)[17],(x)[18],(x)[19],(x)[20],(x)[21],(x)[22],(x)[23],(x)[24],(x)[25],(x)[26],(x)[27],(x)[28],(x)[29],(x)[30],(x)[31])
 
 //#define DEBUG_TURBO_ENCODER 1
 #define CALLGRIND 1
@@ -143,9 +145,10 @@ void treillis_table_init(void)
 char interleave_compact_byte(short * base_interleaver,unsigned char * input, unsigned char * output, int n)
 {
 
-  char expandInput[768*8] __attribute__((aligned(16)));
+  char expandInput[768*8] __attribute__((aligned(32)));
   int i,loop=n>>4;
 #if defined(__x86_64__) || defined(__i386__)
+#ifndef __AVX2__
   __m128i *i_128=(__m128i *)input, *o_128=(__m128i*)expandInput;
   __m128i tmp1, tmp2, tmp3, tmp4;
   __m128i BIT_MASK = _mm_set_epi8(  0b00000001,
@@ -164,6 +167,43 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
                                     0b00100000,
                                     0b01000000,
                                     0b10000000);
+
+#else
+  __m256i *i_256=(__m256i *)input, *o_256=(__m256i*)expandInput;
+  __m256i tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  __m256i BIT_MASK = _mm256_set_epi8(  0b00000001,
+				       0b00000010,
+				       0b00000100,
+				       0b00001000,
+				       0b00010000,
+				       0b00100000,
+				       0b01000000,
+				       0b10000000,
+				       0b00000001,
+				       0b00000010,
+				       0b00000100,
+				       0b00001000,
+				       0b00010000,
+				       0b00100000,
+				       0b01000000,
+				       0b10000000,
+				       0b00000001,
+				       0b00000010,
+				       0b00000100,
+				       0b00001000,
+				       0b00010000,
+				       0b00100000,
+				       0b01000000,
+				       0b10000000,
+				       0b00000001,
+				       0b00000010,
+				       0b00000100,
+				       0b00001000,
+				       0b00010000,
+				       0b00100000,
+				       0b01000000,
+				       0b10000000);
+#endif
 #elif defined(__arm__)
   uint8x16_t *i_128=(uint8x16_t *)input, *o_128=(uint8x16_t *)expandInput;
   uint8x16_t tmp1,tmp2;
@@ -187,46 +227,123 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
                                     0b00000010,
                                     0b00000001};
 #endif
+ 
+#ifndef __AVX2__
   if ((n&15) > 0)
     loop++;
-
+#else
+  loop=n>>5;
+  if ((n&31) > 0)
+    loop++;
+#endif
   for (i=0; i<loop ; i++ ) {
     /* int cur_byte=i<<3; */
     /* for (b=0;b<8;b++) */
     /*   expandInput[cur_byte+b] = (input[i]&(1<<(7-b)))>>(7-b); */
 
 #if defined(__x86_64__) || defined(__i386__)
-    tmp1=_mm_load_si128(i_128++);
-    tmp2=_mm_unpacklo_epi8(tmp1,tmp1);
-    tmp3=_mm_unpacklo_epi16(tmp2,tmp2);
-    tmp4=_mm_unpacklo_epi32(tmp3,tmp3);
+#ifndef __AVX2__
+    tmp1=_mm_load_si128(i_128++);       // tmp1 = B0,B1,...,B15
+    tmp2=_mm_unpacklo_epi8(tmp1,tmp1);  // tmp2 = B0,B0,B1,B1,...,B7,B7
+    tmp3=_mm_unpacklo_epi16(tmp2,tmp2); // tmp3 = B0,B0,B0,B0,B1,B1,B1,B1,B2,B2,B2,B2,B3,B3,B3,B3
+    tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 - B0,B0,B0,B0,B0,B0,B0,B0,B1,B1,B1,B1,B1,B1,B1,B1
     *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);
 
-    tmp4=_mm_unpackhi_epi32(tmp3,tmp3);
+    tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 - B2,B2,B2,B2,B2,B2,B2,B2,B3,B3,B3,B3,B3,B3,B3,B3
     *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
 
-    tmp3=_mm_unpackhi_epi16(tmp2,tmp2);
-    tmp4=_mm_unpacklo_epi32(tmp3,tmp3);
+    tmp3=_mm_unpackhi_epi16(tmp2,tmp2); // tmp3 = B4,B4,B4,B4,B5,B5,B5,B5,B6,B6,B6,B6,B7,B7,B7,B7
+    tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 - B4,B4,B4,B4,B4,B4,B4,B4,B5,B5,B5,B5,B5,B5,B5,B5
     *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
 
-    tmp4=_mm_unpackhi_epi32(tmp3,tmp3);
+    tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 - B6,B6,B6,B6,B6,B6,B6,B6,B7,B7,B7,B7,B7,B7,B7,B7
     *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
 
-    tmp2=_mm_unpackhi_epi8(tmp1,tmp1);
-    tmp3=_mm_unpacklo_epi16(tmp2,tmp2);
-    tmp4=_mm_unpacklo_epi32(tmp3,tmp3);
+    tmp2=_mm_unpackhi_epi8(tmp1,tmp1);  // tmp2 = B8,B8,B9,B9,...,B15,B15
+    tmp3=_mm_unpacklo_epi16(tmp2,tmp2); // tmp3 = B8,B8,B8,B8,B9,B9,B9,B9,B10,B10,B10,B10,B11,B11,B11,B11
+    tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 = B8,B8,B8,B8,B8,B8,B8,B8,B9,B9,B9,B9,B9,B9,B9,B9
     *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
 
-    tmp4=_mm_unpackhi_epi32(tmp3,tmp3);
+    tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 = B10,B10,B10,B10,B10,B10,B10,B10,B11,B11,B11,B11,B11,B11,B11,B11
     *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
 
-    tmp3=_mm_unpackhi_epi16(tmp2,tmp2);
-    tmp4=_mm_unpacklo_epi32(tmp3,tmp3);
+    tmp3=_mm_unpackhi_epi16(tmp2,tmp2); // tmp3 = B12,B12,B12,B12,B13,B13,B13,B13,B14,B14,B14,B14,B15,B15,B15,B15
+    tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 = B12,B12,B12,B12,B12,B12,B12,B12,B13,B13,B13,B13,B13,B13,B13,B13
     *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
 
-    tmp4=_mm_unpackhi_epi32(tmp3,tmp3);
+    tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 = B14,B14,B14,B14,B14,B14,B14,B14,B15,B15,B15,B15,B15,B15,B15,B15
     *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
-
+#else
+    tmp1=_mm256_load_si256(i_256++);       // tmp1 = B0,B1,...,B15,...,B31
+    //print_bytes2("in",(uint8_t*)&tmp1);
+    tmp2=_mm256_unpacklo_epi8(tmp1,tmp1);  // tmp2 = B0,B0,B1,B1,...,B7,B7,B16,B16,B17,B17,...,B23,B23
+    tmp3=_mm256_unpacklo_epi16(tmp2,tmp2); // tmp3 = B0,B0,B0,B0,B1,B1,B1,B1,B2,B2,B2,B2,B3,B3,B3,B3,B16,B16,B16,B16,...,B19,B19,B19,B19
+    tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 - B0,B0,B0,B0,B0,B0,B0,B0,B1,B1,B1,B1,B1,B1,B1,B1,B16,B16...,B17..,B17
+    tmp5=_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 - B2,B2,B2,B2,B2,B2,B2,B2,B3,B3,B3,B3,B3,B3,B3,B3,B18...,B18,B19,...,B19
+    tmp6=_mm256_insertf128_si256(tmp4,_mm256_extracti128_si256(tmp5,0),1);  // tmp6 = B0 B1 B2 B3
+    tmp7=_mm256_insertf128_si256(tmp5,_mm256_extracti128_si256(tmp4,1),0);  // tmp7 = B16 B17 B18 B19
+    //print_bytes2("tmp2",(uint8_t*)&tmp2);
+    //print_bytes2("tmp3",(uint8_t*)&tmp3);
+    //print_bytes2("tmp4",(uint8_t*)&tmp4);
+    //print_bytes2("tmp5",(uint8_t*)&tmp4);
+    //print_bytes2("tmp6",(uint8_t*)&tmp6);
+    //print_bytes2("tmp7",(uint8_t*)&tmp7);
+    o_256[0]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp6,BIT_MASK),BIT_MASK);
+    //print_bytes2("out",(uint8_t*)o_256);
+    o_256[4]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
+    //print_bytes2("out",(uint8_t*)(o_256+4));
+
+    tmp3=_mm256_unpackhi_epi16(tmp2,tmp2); // tmp3 = B4,B4,B4,B4,B5,B5,B5,B5,B6,B6,B6,B6,B7,B7,B7,B7,B20,B20,B20,B20,...,B23,B23,B23,B23
+    tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 - B4,B4,B4,B4,B4,B4,B4,B4,B5,B5,B5,B5,B5,B5,B5,B5,B20,B20...,B21..,B21
+    tmp5=_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 - B6,B6,B6,B6,B6,B6,B6,B6,B7,B7,B7,B7,B7,B7,B7,B7,B22...,B22,B23,...,B23
+    tmp6=_mm256_insertf128_si256(tmp4,_mm256_extracti128_si256(tmp5,0),1);  // tmp6 = B4 B5 B6 B7
+    tmp7=_mm256_insertf128_si256(tmp5,_mm256_extracti128_si256(tmp4,1),0);  // tmp7 = B20 B21 B22 B23
+    //print_bytes2("tmp2",(uint8_t*)&tmp2);
+    //print_bytes2("tmp3",(uint8_t*)&tmp3);
+    //print_bytes2("tmp4",(uint8_t*)&tmp4);
+    //print_bytes2("tmp5",(uint8_t*)&tmp4);
+    //print_bytes2("tmp6",(uint8_t*)&tmp6);
+    //print_bytes2("tmp7",(uint8_t*)&tmp7);
+    o_256[1]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp6,BIT_MASK),BIT_MASK);
+    //print_bytes2("out",(uint8_t*)(o_256+1));
+    o_256[5]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
+    //print_bytes2("out",(uint8_t*)(o_256+4));
+
+    tmp2=_mm256_unpackhi_epi8(tmp1,tmp1);  // tmp2 = B8 B9 B10 B11 B12 B13 B14 B15 B25 B26 B27 B28 B29 B30 B31
+    tmp3=_mm256_unpacklo_epi16(tmp2,tmp2); // tmp3 = B8,B9,B10,B11,B26,B27,B28,B29
+    tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 - B8,B9,B26,B27
+    tmp5=_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 - B10,B11,B28,B29
+    tmp6=_mm256_insertf128_si256(tmp4,_mm256_extracti128_si256(tmp5,0),1);  // tmp6 = B8 B9 B10 B11
+    tmp7=_mm256_insertf128_si256(tmp5,_mm256_extracti128_si256(tmp4,1),0);  // tmp7 = B26 B27 B28 B29
+    //print_bytes2("tmp2",(uint8_t*)&tmp2);
+    //print_bytes2("tmp3",(uint8_t*)&tmp3);
+    //print_bytes2("tmp4",(uint8_t*)&tmp4);
+    //print_bytes2("tmp5",(uint8_t*)&tmp4);
+    //print_bytes2("tmp6",(uint8_t*)&tmp6);
+    //print_bytes2("tmp7",(uint8_t*)&tmp7);
+    o_256[2]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp6,BIT_MASK),BIT_MASK);
+    //print_bytes2("out",(uint8_t*)(o_256+2));
+    o_256[6]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
+    //print_bytes2("out",(uint8_t*)(o_256+4));
+
+    tmp3=_mm256_unpackhi_epi16(tmp2,tmp2); // tmp3 = B12 B13 B14 B15 B28 B29 B30 B31
+    tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 = B12 B13 B28 B29
+    tmp5=_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 = B14 B15 B30 B31 
+    tmp6=_mm256_insertf128_si256(tmp4,_mm256_extracti128_si256(tmp5,0),1);  // tmp6 = B12 B13 B14 B15 
+    tmp7=_mm256_insertf128_si256(tmp5,_mm256_extracti128_si256(tmp4,1),0);  // tmp7 = B28 B29 B30 B31
+    //print_bytes2("tmp2",(uint8_t*)&tmp2);
+    //print_bytes2("tmp3",(uint8_t*)&tmp3);
+    //print_bytes2("tmp4",(uint8_t*)&tmp4);
+    //print_bytes2("tmp5",(uint8_t*)&tmp4);
+    //print_bytes2("tmp6",(uint8_t*)&tmp6);
+    //print_bytes2("tmp7",(uint8_t*)&tmp7);
+    o_256[3]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp6,BIT_MASK),BIT_MASK);
+    //print_bytes2("out",(uint8_t*)(o_256+3));
+    o_256[7]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
+    //print_bytes2("out",(uint8_t*)(o_256+7));
+
+    o_256+=8;
+#endif
 #elif defined(__arm__)
     tmp1=vld1q_u8((uint8_t*)i_128);
     //print_bytes("tmp1:",(uint8_t*)&tmp1);
@@ -305,8 +422,13 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
 
   short * ptr_intl=base_interleaver;
 #if defined(__x86_64) || defined(__i386__)
+#ifndef __AVX2__
   __m128i tmp;
- uint16_t *systematic2_ptr=(unsigned short *) output;
+ uint16_t *systematic2_ptr=(uint16_t *) output;
+#else
+  __m256i tmp;
+ uint32_t *systematic2_ptr=(uint32_t *) output;
+#endif
 #elif defined(__arm__)
   uint8x16_t tmp;
   const uint8_t __attribute__ ((aligned (16))) _Powers[16]= 
@@ -316,11 +438,15 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
   uint8x16_t Powers= vld1q_u8(_Powers);
   uint8_t *systematic2_ptr=(uint8_t *) output;
 #endif
+#ifndef __AVX2
   int input_length_words=n>>1;
-
+#else
+  int input_length_words=n>>2;
+#endif
   for ( i=0; i<  input_length_words ; i ++ ) {
 
 #if defined(__x86_64__) || defined(__i386__)
+#ifndef __AVX2__
     tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],7);
     tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],6);
     tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],5);
@@ -338,6 +464,41 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
     tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],8+1);
     tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],8+0);
     *systematic2_ptr++=(unsigned short)_mm_movemask_epi8(tmp);
+#else
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],7);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],6);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],5);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],4);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],3);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],2);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],1);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],0);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+7);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+6);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+5);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+4);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+3);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+2);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+1);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+0);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+7);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+6);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+5);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+4);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+3);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+2);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+1);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+0);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+7);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+6);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+5);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+4);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+3);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+2);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+1);
+    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+0);
+    *systematic2_ptr++=(unsigned int)_mm256_movemask_epi8(tmp);
+#endif
 #elif defined(__arm__)
     tmp=vsetq_lane_u8(expandInput[*ptr_intl++],tmp,7);
     tmp=vsetq_lane_u8(expandInput[*ptr_intl++],tmp,6);
@@ -485,7 +646,7 @@ void threegpplte_turbo_encoder(unsigned char *input,
 int main(int argc,char **argv)
 {
 
-  unsigned char input[INPUT_LENGTH+16],state,state2;
+  unsigned char input[INPUT_LENGTH+32],state,state2;
   unsigned char output[12+(3*(INPUT_LENGTH<<3))],x,z;
   int i;
   unsigned char out;
@@ -510,7 +671,7 @@ int main(int argc,char **argv)
   memset((void*)input,0,INPUT_LENGTH+16);
   for (i=0; i<INPUT_LENGTH; i++) {
     input[i] = i*219;
-    printf("Input %d : %x\n",i,input[i]);
+    printf("Input %d : %d\n",i,input[i]);
   }
 
   threegpplte_turbo_encoder(&input[0],
diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c
index de9acefb6e..ad464b16ba 100644
--- a/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c
+++ b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c
@@ -281,7 +281,7 @@ int rx_pdsch(PHY_VARS_UE *phy_vars_ue,
     //  avgs = cmax(avgs,avg[(aarx<<1)+aatx]);
 
 
-    lte_ue_pdsch_vars[eNB_id]->log2_maxh = (log2_approx(avgs)/2);
+    lte_ue_pdsch_vars[eNB_id]->log2_maxh = (log2_approx(avgs)/2)+3;
     // + log2_approx(frame_parms->nb_antennas_tx_eNB-1) //-1 because log2_approx counts the number of bits
     //      + log2_approx(frame_parms->nb_antennas_rx-1);
 
diff --git a/openair1/SIMULATION/LTE_PHY/dlsim.c b/openair1/SIMULATION/LTE_PHY/dlsim.c
index 1190c9523f..ef7f640e50 100644
--- a/openair1/SIMULATION/LTE_PHY/dlsim.c
+++ b/openair1/SIMULATION/LTE_PHY/dlsim.c
@@ -4033,7 +4033,7 @@ PMI_FEEDBACK:
         printf("[continue] effective rate : %f  (%2.1f%%,%f)): increase snr \n",rate*effective_rate, 100*effective_rate, rate);
       }
 
-      if (((double)errs[0]/(round_trials[0]))<1e-2)
+      if (((double)errs[0]/(round_trials[0]))<(10.0/n_frames))
         break;
     }// SNR
 
-- 
GitLab