From 8a9ef14e4f04f4dd2032cacecc925be219a5444e Mon Sep 17 00:00:00 2001
From: Florian Kaltenberger <florian.kaltenberger@eurecom.fr>
Date: Sun, 8 Jan 2017 14:52:29 +0100
Subject: [PATCH] reduced the max number of antenna ports to 6 to save memory
 (we are not using antenna port >5 anyway) mult_cpx_conj_vector can now also
 do multiply add beam_precoding now uses mult_cpx_conj_vector

Conflicts:
	openair1/PHY/MODULATION/beamforming.c
---
 openair1/PHY/INIT/lte_init.c                  |  4 +-
 .../lte_ul_channel_estimation.c               |  9 ++--
 openair1/PHY/MODULATION/beamforming.c         | 43 ++++++++++---------
 openair1/PHY/TOOLS/cmult_vv.c                 | 16 +++++--
 openair1/PHY/TOOLS/defs.h                     |  4 +-
 openair1/PHY/impl_defs_lte.h                  |  2 +-
 openair1/PHY/impl_defs_top.h                  |  2 +-
 7 files changed, 48 insertions(+), 32 deletions(-)

diff --git a/openair1/PHY/INIT/lte_init.c b/openair1/PHY/INIT/lte_init.c
index 12bd06bb81..accc54af54 100644
--- a/openair1/PHY/INIT/lte_init.c
+++ b/openair1/PHY/INIT/lte_init.c
@@ -1304,7 +1304,7 @@ int phy_init_lte_eNB(PHY_VARS_eNB *eNB,
       common_vars->txdataF[eNB_id] = (int32_t **)malloc16(NB_ANTENNA_PORTS_ENB*sizeof(int32_t*));
       common_vars->txdataF_BF[eNB_id] = (int32_t **)malloc16(fp->nb_antennas_tx*sizeof(int32_t*));
 
-      for (i=0; i<14; i++) {
+      for (i=0; i<NB_ANTENNA_PORTS_ENB; i++) {
         common_vars->txdataF[eNB_id][i] = (int32_t*)malloc16_clear(fp->ofdm_symbol_size*fp->symbols_per_tti*10*sizeof(int32_t) );
 #ifdef DEBUG_PHY
         msg("[openair][LTE_PHY][INIT] lte_common_vars->txdataF[%d][%d] = %p (%d bytes)\n",
@@ -1326,7 +1326,7 @@ int phy_init_lte_eNB(PHY_VARS_eNB *eNB,
 #endif
       }
       
-      for (i=0; i<14; i++) { // 14 is the total number of antenna ports
+      for (i=0; i<NB_ANTENNA_PORTS_ENB; i++) { 
         common_vars->beam_weights[eNB_id][i] = (int32_t **)malloc16_clear(fp->nb_antennas_tx*sizeof(int32_t*));
         for (j=0; j<fp->nb_antennas_tx; j++) {
           common_vars->beam_weights[eNB_id][i][j] = (int32_t *)malloc16_clear(fp->ofdm_symbol_size*sizeof(int32_t));
diff --git a/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c b/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c
index d2bc2e27c0..4fef74dd1c 100644
--- a/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c
+++ b/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c
@@ -720,10 +720,11 @@ int32_t lte_srs_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms,
       //write_output("eNB_srs.m","srs_eNB",common_vars->srs,(frame_parms->ofdm_symbol_size),1,1);
 
       mult_cpx_conj_vector((int16_t*) &common_vars->rxdataF[eNB_id][aa][2*frame_parms->ofdm_symbol_size*symbol],
-                      (int16_t*) srs_vars->srs,
-                      (int16_t*) srs_vars->srs_ch_estimates[eNB_id][aa],
-                      frame_parms->ofdm_symbol_size,
-                      15);
+			   (int16_t*) srs_vars->srs,
+			   (int16_t*) srs_vars->srs_ch_estimates[eNB_id][aa],
+			   frame_parms->ofdm_symbol_size,
+			   15,
+			   0);
 
       //msg("SRS channel estimation cmult out\n");
 #ifdef USER_MODE
diff --git a/openair1/PHY/MODULATION/beamforming.c b/openair1/PHY/MODULATION/beamforming.c
index 6557a47ee7..331c4e2421 100644
--- a/openair1/PHY/MODULATION/beamforming.c
+++ b/openair1/PHY/MODULATION/beamforming.c
@@ -64,27 +64,30 @@ int beam_precoding(int32_t **txdataF,
   memset(txdataF_BF[aa],0,sizeof(int32_t)*(frame_parms->ofdm_symbol_size));
 
   for (p=0; p<14; p++) {
-    //if (p==0 || p==1 || p==5 || p>7)
-    //  mult_cpx_conj_vector((int16_t*)txdataF[p], (int16_t*)beam_weights[p][aa], (int16_t*)txdataF_BF[aa], frame_parms->ofdm_symbol_size, 15);
-    for (re=0;re<frame_parms->ofdm_symbol_size;re++) {
-      if ((p==0 || p==1 || p==5 || p>=7) && txdataF[p][slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re]!=0) {
-        ((int16_t*)&txdataF_BF[aa][re])[0] += (int16_t)((((int16_t*)&txdataF[p][slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re])[0]*((int16_t*)&beam_weights[p][aa][re])[0])>>15);
-        ((int16_t*)&txdataF_BF[aa][re])[0] -= (int16_t)((((int16_t*)&txdataF[p][slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re])[1]*((int16_t*)&beam_weights[p][aa][re])[1])>>15);
-        ((int16_t*)&txdataF_BF[aa][re])[1] += (int16_t)((((int16_t*)&txdataF[p][slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re])[0]*((int16_t*)&beam_weights[p][aa][re])[1])>>15);
-        ((int16_t*)&txdataF_BF[aa][re])[1] += (int16_t)((((int16_t*)&txdataF[p][slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re])[1]*((int16_t*)&beam_weights[p][aa][re])[0])>>15);
+    if (p==0 || p==1 || p==5) {
+      mult_cpx_conj_vector((int16_t*)beam_weights[p][aa], (int16_t*)&txdataF[p][slot_offset_F+symbol*frame_parms->ofdm_symbol_size], (int16_t*)txdataF_BF[aa], frame_parms->ofdm_symbol_size, 15, 1);
+      //multadd_cpx_vector((int16_t*)txdataF[p],(int16_t*)beam_weights[p][aa], (int16_t*)txdataF_BF[aa], 0, frame_parms->ofdm_symbol_size, 15);
 
-	  /*
-          printf("beamforming.c:txdataF[%d][%d]=%d+j%d, beam_weights[%d][%d][%d]=%d+j%d,txdata_BF[%d][%d]=%d+j%d\n",
-                 p,slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re,
-                 ((int16_t*)&txdataF[p][slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re])[0],
-                 ((int16_t*)&txdataF[p][slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re])[1],
-                 p,aa,re,
-                 ((int16_t*)&beam_weights[p][aa][re])[0],((int16_t*)&beam_weights[p][aa][re])[1],
-                 aa,re,
-                 ((int16_t*)&txdataF_BF[aa][re])[0],
-                 ((int16_t*)&txdataF_BF[aa][re])[1]); 
-	  */
-      } 
+      /*
+      for (re=0;re<frame_parms->ofdm_symbol_size;re++) {
+        if (txdataF[p][slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re]!=0) {
+          ((int16_t*)&txdataF_BF[aa][re])[0] += (int16_t)((((int16_t*)&txdataF[p][slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re])[0]*((int16_t*)&beam_weights[p][aa][re])[0])>>15);
+          ((int16_t*)&txdataF_BF[aa][re])[0] -= (int16_t)((((int16_t*)&txdataF[p][slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re])[1]*((int16_t*)&beam_weights[p][aa][re])[1])>>15);
+          ((int16_t*)&txdataF_BF[aa][re])[1] += (int16_t)((((int16_t*)&txdataF[p][slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re])[0]*((int16_t*)&beam_weights[p][aa][re])[1])>>15);
+          ((int16_t*)&txdataF_BF[aa][re])[1] += (int16_t)((((int16_t*)&txdataF[p][slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re])[1]*((int16_t*)&beam_weights[p][aa][re])[0])>>15);
+
+            printf("beamforming.c:txdataF[%d][%d]=%d+j%d, beam_weights[%d][%d][%d]=%d+j%d,txdata_BF[%d][%d]=%d+j%d\n",
+                   p,slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re,
+                   ((int16_t*)&txdataF[p][slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re])[0],
+                   ((int16_t*)&txdataF[p][slot_offset_F+symbol*frame_parms->ofdm_symbol_size+re])[1],
+                   p,aa,re,
+                   ((int16_t*)&beam_weights[p][aa][re])[0],((int16_t*)&beam_weights[p][aa][re])[1],
+                   aa,re,
+                   ((int16_t*)&txdataF_BF[aa][re])[0],
+                   ((int16_t*)&txdataF_BF[aa][re])[1]);
+        }
+      }
+      */ 
     }
   }
   return 0;
diff --git a/openair1/PHY/TOOLS/cmult_vv.c b/openair1/PHY/TOOLS/cmult_vv.c
index cefa972061..39bfe547e7 100644
--- a/openair1/PHY/TOOLS/cmult_vv.c
+++ b/openair1/PHY/TOOLS/cmult_vv.c
@@ -41,7 +41,8 @@ int mult_cpx_conj_vector(int16_t *x1,
                          int16_t *x2,
                          int16_t *y,
                          uint32_t N,
-                         int output_shift)
+                         int output_shift,
+			 int madd)
 {
   // Multiply elementwise the complex conjugate of x1 with x2. 
   // x1       - input 1    in the format  |Re0 Im0 Re1 Im1|,......,|Re(N-2)  Im(N-2) Re(N-1) Im(N-1)|
@@ -55,6 +56,8 @@ int mult_cpx_conj_vector(int16_t *x1,
   // N        - the size f the vectors (this function does N cpx mpy. WARNING: N>=4;
   //
   // output_shift  - shift to be applied to generate output
+  //
+  // madd - add the output to y
 
   uint32_t i;                 // loop counter
 
@@ -88,7 +91,11 @@ int mult_cpx_conj_vector(int16_t *x1,
     tmp_im = _mm_srai_epi32(tmp_im,output_shift);
     tmpy0  = _mm_unpacklo_epi32(tmp_re,tmp_im);
     tmpy1  = _mm_unpackhi_epi32(tmp_re,tmp_im);
-    *y_128 = _mm_packs_epi32(tmpy0,tmpy1);
+    if (madd==0) 
+      *y_128 = _mm_packs_epi32(tmpy0,tmpy1);
+    else
+      *y_128 += _mm_packs_epi32(tmpy0,tmpy1);
+
 #elif defined(__arm__)
 
     tmp_re  = vmull_s16(((simdshort_q15_t *)x1_128)[0], ((simdshort_q15_t*)x2_128)[0]);
@@ -110,7 +117,10 @@ int mult_cpx_conj_vector(int16_t *x1,
     tmp_re = vqshlq_s32(tmp_re,shift);
     tmp_im = vqshlq_s32(tmp_im,shift);
     tmpy   = vzip_s16(vmovn_s32(tmp_re),vmovn_s32(tmp_im));
-    *y_128 = vcombine_s16(tmpy.val[0],tmpy.val[1]);
+    if (madd==0)
+      *y_128 = vcombine_s16(tmpy.val[0],tmpy.val[1]);
+    else
+      *y_128 += vcombine_s16(tmpy.val[0],tmpy.val[1]);
 #endif
     x1_128++;
     x2_128++;
diff --git a/openair1/PHY/TOOLS/defs.h b/openair1/PHY/TOOLS/defs.h
index 453efc028a..273822c598 100644
--- a/openair1/PHY/TOOLS/defs.h
+++ b/openair1/PHY/TOOLS/defs.h
@@ -116,13 +116,15 @@ int rotate_cpx_vector(int16_t *x,
   @param y        - output     in the format  |Re0 Im0 Re1 Im1|,......,|Re(N-2)  Im(N-2) Re(N-1) Im(N-1)|
   @param N        - the size f the vectors (this function does N cpx mpy. WARNING: N>=4;
   @param output_shift  - shift to be applied to generate output
+  @param madd - if not zero result is added to output
 */
 
 int mult_cpx_conj_vector(int16_t *x1,
                          int16_t *x2,
                          int16_t *y,
                          uint32_t N,
-                         int output_shift);
+                         int output_shift,
+			 int madd);
 
 // lte_dfts.c
 void init_fft(uint16_t size,
diff --git a/openair1/PHY/impl_defs_lte.h b/openair1/PHY/impl_defs_lte.h
index 6635573779..98d6e06f76 100644
--- a/openair1/PHY/impl_defs_lte.h
+++ b/openair1/PHY/impl_defs_lte.h
@@ -546,7 +546,7 @@ typedef struct {
   uint8_t nb_antennas_tx;
   /// Number of Receive antennas in node
   uint8_t nb_antennas_rx;
-  /// Number of Logical transmit antenna ports in eNodeB
+  /// Number of common transmit antenna ports in eNodeB (1 or 2)
   uint8_t nb_antenna_ports_eNB;
   /// PRACH_CONFIG
   PRACH_CONFIG_COMMON prach_config_common;
diff --git a/openair1/PHY/impl_defs_top.h b/openair1/PHY/impl_defs_top.h
index 79e684215c..dd09749fbf 100644
--- a/openair1/PHY/impl_defs_top.h
+++ b/openair1/PHY/impl_defs_top.h
@@ -177,7 +177,7 @@
 #define DMA_BLKS_PER_SLOT    (SLOT_LENGTH_BYTES/2048)                    // Number of DMA blocks per slot
 #define SLOT_TIME_NS         (SLOT_LENGTH_SAMPLES*(1e3)/7.68)            // slot time in ns
 
-#define NB_ANTENNA_PORTS_ENB  14                                         // total number of eNB antenna ports
+#define NB_ANTENNA_PORTS_ENB  6                                         // total number of eNB antenna ports
 
 #ifdef EXMIMO
 #define TARGET_RX_POWER 55    // Target digital power for the AGC
-- 
GitLab