diff --git a/cmake_targets/CMakeLists.txt b/cmake_targets/CMakeLists.txt index e4d3acf4b1c976540a95e6cbd033bfa26b9d9ecd..0bf63cfb1ac626240f429314bf0ca312c72e56b2 100644 --- a/cmake_targets/CMakeLists.txt +++ b/cmake_targets/CMakeLists.txt @@ -1034,6 +1034,7 @@ set(PHY_SRC ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_modulation.c ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_demodulation.c ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_llr_computation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/power_control.c ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_decoding.c ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_scrambling.c diff --git a/openair1/PHY/INIT/lte_init.c b/openair1/PHY/INIT/lte_init.c index d0556a0235c042a9805cd595af26a51a2ae1cbc2..de83506bb31aed698f0c41fa95ff49468c204955 100755 --- a/openair1/PHY/INIT/lte_init.c +++ b/openair1/PHY/INIT/lte_init.c @@ -948,6 +948,11 @@ void phy_config_dedicated_ue(uint8_t Mod_id,int CC_id,uint8_t eNB_id, // fill cqi parameters for periodic CQI reporting get_cqipmiri_params(phy_vars_ue,eNB_id); + // disable MIB SIB decoding once we are on connected mode + LOG_I(PHY,"Disabling SIB MIB decoding \n"); + phy_vars_ue->decode_SIB = 0; + phy_vars_ue->decode_MIB = 0; + } void phy_config_cba_rnti (module_id_t Mod_id,int CC_id,eNB_flag_t eNB_flag, uint8_t index, rnti_t cba_rnti, uint8_t cba_group_id, uint8_t num_active_cba_groups) @@ -1306,6 +1311,10 @@ int phy_init_lte_ue(PHY_VARS_UE *ue, ue->high_speed_flag = 1; ue->ch_est_alpha = 24576; + // enable MIB/SIB decoding by default + ue->decode_MIB = 1; + ue->decode_SIB = 1; + init_prach_tables(839); diff --git a/openair1/PHY/LTE_ESTIMATION/lte_dl_bf_channel_estimation.c b/openair1/PHY/LTE_ESTIMATION/lte_dl_bf_channel_estimation.c index 02b5102ae544a16b04e527ac9aed30fc1fc84009..7e112342ce15980c0e199cc930c6428ea8347db7 100644 --- a/openair1/PHY/LTE_ESTIMATION/lte_dl_bf_channel_estimation.c +++ b/openair1/PHY/LTE_ESTIMATION/lte_dl_bf_channel_estimation.c @@ -51,10 +51,10 @@ int lte_dl_bf_channel_estimation(PHY_VARS_UE *phy_vars_ue, int uespec_pilot[300]; LTE_DL_FRAME_PARMS *frame_parms = &phy_vars_ue->frame_parms; - LTE_UE_DLSCH_t **dlsch_ue = phy_vars_ue->dlsch[eNB_id]; + LTE_UE_DLSCH_t **dlsch_ue = phy_vars_ue->dlsch[(Ns>>1)&0x1][eNB_id]; LTE_DL_UE_HARQ_t *dlsch0_harq; - harq_pid = dlsch_ue[0]->current_harq_pid; + harq_pid = dlsch_ue[0]->current_harq_pid; dlsch0_harq = dlsch_ue[0]->harq_processes[harq_pid]; if (((frame_parms->Ncp == NORMAL) && (symbol>=7)) || diff --git a/openair1/PHY/LTE_TRANSPORT/dci.c b/openair1/PHY/LTE_TRANSPORT/dci.c index 49fe90bf15c0f58668d8e12f010aa4b46a2fe842..af25e33ff74dbc70ea7d4d46a6cdfcbf41fad1a1 100644 --- a/openair1/PHY/LTE_TRANSPORT/dci.c +++ b/openair1/PHY/LTE_TRANSPORT/dci.c @@ -2893,15 +2893,15 @@ void dci_decoding_procedure0(LTE_UE_PDCCH **pdcch_vars, break; case 2: - *CCEmap|=(0x03<<(CCEind&0x1f)); + *CCEmap|=(1<<(CCEind&0x1f)); break; case 4: - *CCEmap|=(0x0f<<(CCEind&0x1f)); + *CCEmap|=(1<<(CCEind&0x1f)); break; case 8: - *CCEmap|=(0xff<<(CCEind&0x1f)); + *CCEmap|=(1<<(CCEind&0x1f)); break; } @@ -3149,7 +3149,7 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0) , ra_rnti, P_RNTI, 2, @@ -3177,7 +3177,7 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 2, @@ -3209,7 +3209,7 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), P_RNTI, ra_rnti, 3, @@ -3237,7 +3237,7 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 3, @@ -3271,7 +3271,7 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 0, @@ -3300,7 +3300,7 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 1, @@ -3333,7 +3333,7 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 2, @@ -3362,7 +3362,7 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 3, @@ -3395,7 +3395,7 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 0, @@ -3426,7 +3426,7 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 1, @@ -3458,7 +3458,7 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 2, @@ -3490,7 +3490,7 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 3, @@ -3518,16 +3518,18 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, } else if (tmode == 3) { + LOG_D(PHY," Now check UE_SPEC format 2A_2A search aggregation 1\n"); // Now check UE_SPEC format 2A_2A search spaces at aggregation 1 + old_dci_cnt=dci_cnt; dci_decoding_procedure0(pdcch_vars,0,mode, subframe, dci_alloc, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, - P_RNTI, + P_RNTI, 0, format1A, format1A, @@ -3542,21 +3544,25 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, &CCEmap1, &CCEmap2); + LOG_D(PHY," format 2A_2A search CCEmap0 %x, format0_found %d, format_c_found %d \n", CCEmap0, format0_found, format_c_found); if ((CCEmap0==0xffff)|| ((format0_found==1)&&(format_c_found==1))) return(dci_cnt); + LOG_D(PHY," format 2A_2A search dci_cnt %d, old_dci_cn t%d \n", dci_cnt, old_dci_cnt); if (dci_cnt>old_dci_cnt) return(dci_cnt); // Now check UE_SPEC format 2 search spaces at aggregation 2 + LOG_D(PHY," Now check UE_SPEC format 2A_2A search aggregation 2\n"); + old_dci_cnt=dci_cnt; dci_decoding_procedure0(pdcch_vars,0,mode, subframe, dci_alloc, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 1, @@ -3577,19 +3583,22 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, ((format0_found==1)&&(format_c_found==1))) return(dci_cnt); + LOG_D(PHY," format 2A_2A search dci_cnt %d, old_dci_cn t%d \n", dci_cnt, old_dci_cnt); if (dci_cnt>old_dci_cnt) return(dci_cnt); // Now check UE_SPEC format 2_2A search spaces at aggregation 4 + LOG_D(PHY," Now check UE_SPEC format 2_2A search spaces at aggregation 4 \n"); + old_dci_cnt=dci_cnt; dci_decoding_procedure0(pdcch_vars,0,mode, subframe, dci_alloc, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, - P_RNTI, + P_RNTI, 2, format1A, format1A, @@ -3608,18 +3617,21 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, ((format0_found==1)&&(format_c_found==1))) return(dci_cnt); + LOG_D(PHY," format 2A_2A search dci_cnt %d, old_dci_cn t%d \n", dci_cnt, old_dci_cnt); if (dci_cnt>old_dci_cnt) return(dci_cnt); //#ifdef ALL_AGGREGATION // Now check UE_SPEC format 2_2A search spaces at aggregation 8 + LOG_D(PHY," Now check UE_SPEC format 2_2A search spaces at aggregation 8 \n"); + old_dci_cnt=dci_cnt; dci_decoding_procedure0(pdcch_vars,0,mode, subframe, dci_alloc, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 3, @@ -3636,16 +3648,24 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, &CCEmap1, &CCEmap2); //#endif + if ((CCEmap0==0xffff)|| + ((format0_found==1)&&(format_c_found==1))) + return(dci_cnt); + + LOG_D(PHY," format 2A_2A search dci_cnt %d, old_dci_cn t%d \n", dci_cnt, old_dci_cnt); + if (dci_cnt>old_dci_cnt) + return(dci_cnt); } else if (tmode == 4) { // Now check UE_SPEC format 2_2A search spaces at aggregation 1 + old_dci_cnt=dci_cnt; dci_decoding_procedure0(pdcch_vars,0,mode, subframe, dci_alloc, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 0, @@ -3670,13 +3690,14 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, return(dci_cnt); // Now check UE_SPEC format 2 search spaces at aggregation 2 + old_dci_cnt=dci_cnt; dci_decoding_procedure0(pdcch_vars,0,mode, subframe, dci_alloc, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 1, @@ -3701,13 +3722,14 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, return(dci_cnt); // Now check UE_SPEC format 2_2A search spaces at aggregation 4 + old_dci_cnt=dci_cnt; dci_decoding_procedure0(pdcch_vars,0,mode, subframe, dci_alloc, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 2, @@ -3733,13 +3755,14 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, //#ifdef ALL_AGGREGATION // Now check UE_SPEC format 2_2A search spaces at aggregation 8 + old_dci_cnt=dci_cnt; dci_decoding_procedure0(pdcch_vars,0,mode, subframe, dci_alloc, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 3, @@ -3762,13 +3785,14 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, #ifdef DEBUG_DCI_DECODING LOG_I(PHY," MU-MIMO check UE_SPEC format 1E_2A_M10PRB\n"); #endif + old_dci_cnt=dci_cnt; dci_decoding_procedure0(pdcch_vars,0,mode, subframe, dci_alloc, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 0, @@ -3794,13 +3818,14 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, return(dci_cnt); // Now check UE_SPEC format 1E_2A_M10PRB search spaces aggregation 2 + old_dci_cnt=dci_cnt; dci_decoding_procedure0(pdcch_vars,0,mode, subframe, dci_alloc, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 1, @@ -3825,13 +3850,14 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, return(dci_cnt); // Now check UE_SPEC format 1E_2A_M10PRB search spaces aggregation 4 + old_dci_cnt=dci_cnt; dci_decoding_procedure0(pdcch_vars,0,mode, subframe, dci_alloc, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 2, @@ -3858,13 +3884,14 @@ uint16_t dci_decoding_procedure(PHY_VARS_UE *ue, //#ifdef ALL_AGGREGATION // Now check UE_SPEC format 1E_2A_M10PRB search spaces at aggregation 8 + old_dci_cnt=dci_cnt; dci_decoding_procedure0(pdcch_vars,0,mode, subframe, dci_alloc, eNB_id, frame_parms, mi, - SI_RNTI, + ((ue->decode_SIB == 1) ? SI_RNTI : 0), ra_rnti, P_RNTI, 3, diff --git a/openair1/PHY/LTE_TRANSPORT/dci_tools.c b/openair1/PHY/LTE_TRANSPORT/dci_tools.c index 9f4749cdc60a69f8bf3d7281a4a533891d49dcae..c4e379b45ab1b42c1db7b9622f67402be0f0b737 100644 --- a/openair1/PHY/LTE_TRANSPORT/dci_tools.c +++ b/openair1/PHY/LTE_TRANSPORT/dci_tools.c @@ -4763,22 +4763,26 @@ int check_dci_format1_1a_coherency(DCI_format_t dci_format, uint16_t si_rnti, uint16_t ra_rnti, uint16_t p_rnti, + uint32_t frame, + uint8_t subframe, DCI_INFO_EXTRACTED_t *pdci_info_extarcted, LTE_DL_UE_HARQ_t *pdlsch0_harq) { uint8_t harq_pid = pdci_info_extarcted->harq_pid; uint32_t rballoc = pdci_info_extarcted->rballoc; uint8_t mcs1 = pdci_info_extarcted->mcs1; - uint8_t rv1 = pdci_info_extarcted->rv1; - uint8_t ndi1 = pdci_info_extarcted->ndi1; uint8_t TPC = pdci_info_extarcted->TPC; uint8_t rah = pdci_info_extarcted->rah; +#ifdef DEBUG_DCI + uint8_t rv1 = pdci_info_extarcted->rv1; + uint8_t ndi1 = pdci_info_extarcted->ndi1; +#endif uint8_t NPRB = 0; long long int RIV_max = 0; #ifdef DEBUG_DCI - LOG_I(PHY,"[DCI-FORMAT-1-1A] dci_format %d\n", dci_format); + LOG_I(PHY,"[DCI-FORMAT-1-1A] AbsSubframe %d.%d dci_format %d\n", frame, subframe, dci_format); LOG_I(PHY,"[DCI-FORMAT-1-1A] rnti %x\n", rnti); LOG_I(PHY,"[DCI-FORMAT-1-1A] harq_pid %d\n", harq_pid); LOG_I(PHY,"[DCI-FORMAT-1-1A] rah %d\n", rah); @@ -4983,24 +4987,30 @@ int check_dci_format2_2a_coherency(DCI_format_t dci_format, uint8_t rv2 = pdci_info_extarcted->rv2; uint8_t harq_pid = pdci_info_extarcted->harq_pid; uint32_t rballoc = pdci_info_extarcted->rballoc; + +#ifdef DEBUG_DCI uint8_t ndi1 = pdci_info_extarcted->ndi1; uint8_t ndi2 = pdci_info_extarcted->ndi2; +#endif uint8_t NPRB = 0; long long RIV_max = 0; +#ifdef DEBUG_DCI LOG_I(PHY, "extarcted dci - dci_format %d \n", dci_format); + LOG_I(PHY, "extarcted dci - rnti %d \n", rnti); LOG_I(PHY, "extarcted dci - rah %d \n", rah); LOG_I(PHY, "extarcted dci - mcs1 %d \n", mcs1); LOG_I(PHY, "extarcted dci - mcs2 %d \n", mcs2); LOG_I(PHY, "extarcted dci - rv1 %d \n", rv1); LOG_I(PHY, "extarcted dci - rv2 %d \n", rv2); - LOG_I(PHY, "extarcted dci - ndi1 %d \n", ndi1); - LOG_I(PHY, "extarcted dci - ndi2 %d \n", ndi2); + //LOG_I(PHY, "extarcted dci - ndi1 %d \n", ndi1); + //LOG_I(PHY, "extarcted dci - ndi2 %d \n", ndi2); LOG_I(PHY, "extarcted dci - rballoc %x \n", rballoc); - LOG_I(PHY, "extarcted dci - harq pif %d \n", harq_pid); + LOG_I(PHY, "extarcted dci - harq pid %d \n", harq_pid); LOG_I(PHY, "extarcted dci - round0 %d \n", pdlsch0_harq->round); LOG_I(PHY, "extarcted dci - round1 %d \n", pdlsch1_harq->round); +#endif // I- check dci content minimum coherency if(harq_pid >8) @@ -5043,6 +5053,21 @@ int check_dci_format2_2a_coherency(DCI_format_t dci_format, }*/ + if((pdlsch0_harq->round == 0) && (rv1 > 0)) + { + // DCI false detection + LOG_I(PHY,"bad rv1\n"); + return(0); + } + + if((pdlsch1_harq->round == 0) && (rv2 > 0)) + { + // DCI false detection + LOG_I(PHY,"bad rv2\n"); + return(0); + } + + switch (N_RB_DL) { case 6: if (rah == 0) @@ -5606,6 +5631,7 @@ void compute_precoding_info_format2A(uint8_t tpmi, void prepare_dl_decoding_format2_2A(DCI_format_t dci_format, DCI_INFO_EXTRACTED_t *pdci_info_extarcted, LTE_DL_FRAME_PARMS *frame_parms, + uint16_t rnti, uint8_t subframe, LTE_DL_UE_HARQ_t *dlsch0_harq, LTE_DL_UE_HARQ_t *dlsch1_harq, @@ -5663,15 +5689,17 @@ void prepare_dl_decoding_format2_2A(DCI_format_t dci_format, dlsch1_harq->dl_power_off = 1; pdlsch0->current_harq_pid = harq_pid; - pdlsch0->harq_ack[subframe].harq_id = harq_pid; + pdlsch0->harq_ack[subframe].harq_id = harq_pid; pdlsch1->current_harq_pid = harq_pid; - pdlsch1->harq_ack[subframe].harq_id = harq_pid; + pdlsch1->harq_ack[subframe].harq_id = harq_pid; // assume two CW are active dlsch0_harq->status = ACTIVE; dlsch1_harq->status = ACTIVE; pdlsch0->active = 1; pdlsch1->active = 1; + pdlsch0->rnti = rnti; + pdlsch1->rnti = rnti; if (TB0_active && TB1_active && tbswap==1) { @@ -5682,20 +5710,16 @@ void prepare_dl_decoding_format2_2A(DCI_format_t dci_format, if (TB0_active==0) { dlsch0_harq->status = SCH_IDLE; pdlsch0->active = 0; -#ifdef DEBUG_HARQ + #ifdef DEBUG_HARQ printf("[DCI UE]: TB0 is deactivated, retransmit TB1 transmit in TM6\n"); -#endif + #endif } if (TB1_active==0) { dlsch1_harq->status = SCH_IDLE; pdlsch1->active = 0; -#ifdef DEBUG_HARQ - printf("[DCI UE]: TB1 is deactivated, retransmit TB0 transmit in TM6\n"); -#endif } - #ifdef DEBUG_HARQ printf("[DCI UE]: dlsch0_harq status %d , dlsch1_harq status %d\n", dlsch0_harq->status, dlsch1_harq->status); #endif @@ -5727,6 +5751,9 @@ void prepare_dl_decoding_format2_2A(DCI_format_t dci_format, dlsch1_harq->rb_alloc_odd[3] = dlsch0_harq->rb_alloc_odd[3]; dlsch1_harq->nb_rb = dlsch0_harq->nb_rb; + + //dlsch0_harq->Nl = 1; + //dlsch1_harq->Nl = 1; } } else if ((TB0_active == 0) && (TB1_active == 1)){ @@ -5774,38 +5801,57 @@ void prepare_dl_decoding_format2_2A(DCI_format_t dci_format, if ((ndi1!=dlsch0_harq->DCINdi) || (dlsch0_harq->first_tx==1)) { dlsch0_harq->round = 0; + //LOG_I(PHY,"[UE] DLSCH: New Data Indicator CW0 subframe %d (pid %d, round %d)\n", + // subframe,harq_pid,dlsch0_harq->round); if ( dlsch0_harq->first_tx==1) { LOG_D(PHY,"Format 2 DCI First TX0: Clearing flag\n"); dlsch0_harq->first_tx = 0; } }else{ if(dlsch0_harq->round == 0) { +#if 0 // skip pdsch decoding and report ack dlsch0_harq->status = SCH_IDLE; pdlsch0->active = 0; pdlsch0->harq_ack[subframe].ack = 1; pdlsch0->harq_ack[subframe].harq_id = harq_pid; pdlsch0->harq_ack[subframe].send_harq_status = 1; +#endif } } - dlsch0_harq->TBS = TBStable[get_I_TBS(dlsch0_harq->mcs)][dlsch0_harq->nb_rb-1]; - if(dlsch0_harq->Nl == 2) - dlsch0_harq->TBS = TBStable[get_I_TBS(dlsch0_harq->mcs)][(dlsch0_harq->nb_rb<<1)-1]; - if (mcs1 <= 28) + // if Imcs in [29..31] TBS is assumed to be as determined from DCI transported in the latest + // PDCCH for the same trasport block using Imcs in [0 .. 28] + if(dlsch0_harq->mcs <= 28) + { + dlsch0_harq->TBS = TBStable[get_I_TBS(dlsch0_harq->mcs)][dlsch0_harq->nb_rb-1]; + LOG_D(PHY,"[UE] DLSCH: New TBS CW0 subframe %d (pid %d, round %d) TBS %d \n", + subframe,harq_pid,dlsch0_harq->round, dlsch0_harq->TBS); + } + else + { + LOG_D(PHY,"[UE] DLSCH: Keep the same TBS CW0 subframe %d (pid %d, round %d) TBS %d \n", + subframe,harq_pid,dlsch0_harq->round, dlsch0_harq->TBS); + } + //if(dlsch0_harq->Nl == 2) + //dlsch0_harq->TBS = TBStable[get_I_TBS(dlsch0_harq->mcs)][(dlsch0_harq->nb_rb<<1)-1]; + if (mcs1 <= 28) dlsch0_harq->Qm = get_Qm(mcs1); - else if (mcs1<=31) + else if (mcs1<=31) dlsch0_harq->Qm = (mcs1-28)<<1; } if (TB1_active) { if ((ndi2!=dlsch1_harq->DCINdi) || (dlsch1_harq->first_tx==1)) { dlsch1_harq->round = 0; + //LOG_I(PHY,"[UE] DLSCH: New Data Indicator CW1 subframe %d (pid %d, round %d)\n", + // subframe,harq_pid,dlsch0_harq->round); if (dlsch1_harq->first_tx==1) { LOG_D(PHY,"Format 2 DCI First TX1: Clearing flag\n"); dlsch1_harq->first_tx = 0; } }else{ +#if 0 if(dlsch1_harq->round == 0) { // skip pdsch decoding and report ack dlsch1_harq->status = SCH_IDLE; @@ -5814,15 +5860,25 @@ void prepare_dl_decoding_format2_2A(DCI_format_t dci_format, pdlsch1->harq_ack[subframe].harq_id = harq_pid; pdlsch1->harq_ack[subframe].send_harq_status = 1; } +#endif } - dlsch1_harq->TBS = TBStable[get_I_TBS(dlsch1_harq->mcs)][dlsch1_harq->nb_rb-1]; - if(dlsch0_harq->Nl == 2) - dlsch0_harq->TBS = TBStable[get_I_TBS(dlsch0_harq->mcs)][(dlsch0_harq->nb_rb<<1)-1]; - - if (mcs2 <= 28) + // if Imcs in [29..31] TBS is assumed to be as determined from DCI transported in the latest + // PDCCH for the same trasport block using Imcs in [0 .. 28] + if(dlsch1_harq->mcs <= 28) + { + dlsch1_harq->TBS = TBStable[get_I_TBS(dlsch1_harq->mcs)][dlsch1_harq->nb_rb-1]; + LOG_D(PHY,"[UE] DLSCH: New TBS CW1 subframe %d (pid %d, round %d) TBS %d \n", + subframe,harq_pid,dlsch1_harq->round, dlsch1_harq->TBS); + } + else + { + LOG_D(PHY,"[UE] DLSCH: Keep the same TBS CW1 subframe %d (pid %d, round %d) TBS %d \n", + subframe,harq_pid,dlsch1_harq->round, dlsch1_harq->TBS); + } + if (mcs2 <= 28) dlsch1_harq->Qm = get_Qm(mcs2); - else if (mcs1<=31) + else if (mcs1<=31) dlsch1_harq->Qm = (mcs2-28)<<1; } @@ -5920,7 +5976,7 @@ int generate_ue_dlsch_params_from_dci(int frame, tc_rnti, si_rnti, ra_rnti, - p_rnti, + p_rnti,frame,subframe, &dci_info_extarcted, dlsch0_harq); if(status == 0) @@ -6020,7 +6076,7 @@ int generate_ue_dlsch_params_from_dci(int frame, tc_rnti, si_rnti, ra_rnti, - p_rnti, + p_rnti,frame,subframe, &dci_info_extarcted, dlsch0_harq); if(status == 0) @@ -6047,7 +6103,7 @@ int generate_ue_dlsch_params_from_dci(int frame, case format2: { // extract dci infomation - LOG_I(PHY,"[DCI-format2] extract dci infomation \n"); + //LOG_I(PHY,"[DCI-format2] AbsSubframe %d.%d extract dci infomation \n", frame, subframe); extract_dci2_info(frame_parms->N_RB_DL, frame_type, frame_parms->nb_antenna_ports_eNB, @@ -6069,7 +6125,7 @@ int generate_ue_dlsch_params_from_dci(int frame, dlsch0_harq = dlsch0->harq_processes[harq_pid]; dlsch1_harq = dlsch1->harq_processes[harq_pid]; - LOG_I(PHY,"[DCI-format2] check dci content \n"); + //LOG_I(PHY,"[DCI-format2] check dci content \n"); status = check_dci_format2_2a_coherency(format2, frame_parms->N_RB_DL, &dci_info_extarcted, @@ -6083,10 +6139,11 @@ int generate_ue_dlsch_params_from_dci(int frame, return(-1); // dci is correct ==> update internal structure and prepare dl decoding - LOG_I(PHY,"[DCI-format2] update internal structure and prepare dl decoding \n"); + //LOG_I(PHY,"[DCI-format2] update internal structure and prepare dl decoding \n"); prepare_dl_decoding_format2_2A(format2, &dci_info_extarcted, frame_parms, + rnti, subframe, dlsch0_harq, dlsch1_harq, @@ -6099,7 +6156,7 @@ int generate_ue_dlsch_params_from_dci(int frame, case format2A: { // extract dci infomation - LOG_I(PHY,"[DCI-format2A] extract dci infomation \n"); + //LOG_I(PHY,"[DCI-format2] AbsSubframe %d.%d extract dci infomation \n", frame%1024, subframe); extract_dci2A_info(frame_parms->N_RB_DL, frame_type, frame_parms->nb_antenna_ports_eNB, @@ -6107,10 +6164,10 @@ int generate_ue_dlsch_params_from_dci(int frame, &dci_info_extarcted); // check dci content - LOG_I(PHY,"[DCI-format2A] check dci content \n"); - LOG_I(PHY,"[DCI-format2A] tb_swap %d harq_pid %d\n", dci_info_extarcted.tb_swap, dci_info_extarcted.harq_pid); - dlsch[0]->active = 0; - dlsch[1]->active = 0; + //LOG_I(PHY,"[DCI-format2A] check dci content \n"); + //LOG_I(PHY,"[DCI-format2A] tb_swap %d harq_pid %d\n", dci_info_extarcted.tb_swap, dci_info_extarcted.harq_pid); + //dlsch[0]->active = 0; + //dlsch[1]->active = 0; if (dci_info_extarcted.tb_swap == 0) { dlsch0 = dlsch[0]; @@ -6122,7 +6179,7 @@ int generate_ue_dlsch_params_from_dci(int frame, dlsch0_harq = dlsch0->harq_processes[dci_info_extarcted.harq_pid]; dlsch1_harq = dlsch1->harq_processes[dci_info_extarcted.harq_pid]; - LOG_I(PHY,"[DCI-format2A] check dci content \n"); + //LOG_I(PHY,"[DCI-format2A] check dci content \n"); status = check_dci_format2_2a_coherency(format2A, frame_parms->N_RB_DL, &dci_info_extarcted, @@ -6136,10 +6193,11 @@ int generate_ue_dlsch_params_from_dci(int frame, return(-1); // dci is correct ==> update internal structure and prepare dl decoding - LOG_I(PHY,"[DCI-format2A] update internal structure and prepare dl decoding \n"); + //LOG_I(PHY,"[DCI-format2A] update internal structure and prepare dl decoding \n"); prepare_dl_decoding_format2_2A(format2A, &dci_info_extarcted, frame_parms, + rnti, subframe, dlsch0_harq, dlsch1_harq, @@ -7142,7 +7200,7 @@ int generate_ue_ulsch_params_from_dci(void *dci_pdu, uint8_t transmission_mode = ue->transmission_mode[eNB_id]; ANFBmode_t AckNackFBMode; LTE_UE_ULSCH_t *ulsch = ue->ulsch[eNB_id]; - LTE_UE_DLSCH_t **dlsch = ue->dlsch[0]; + LTE_UE_DLSCH_t **dlsch = ue->dlsch[subframe&0x1][0]; PHY_MEASUREMENTS *meas = &ue->measurements; LTE_DL_FRAME_PARMS *frame_parms = &ue->frame_parms; // uint32_t current_dlsch_cqi = ue->current_dlsch_cqi[eNB_id]; @@ -7947,7 +8005,7 @@ int generate_ue_ulsch_params_from_dci(void *dci_pdu, if (frame_parms->frame_type == FDD) { int dl_subframe = (subframe<4) ? (subframe+6) : (subframe-4); - if (ue->dlsch[eNB_id][0]->harq_ack[dl_subframe].send_harq_status>0) { // we have downlink transmission + if (ue->dlsch[dl_subframe&0x1][eNB_id][0]->harq_ack[dl_subframe].send_harq_status>0) { // we have downlink transmission ulsch->harq_processes[harq_pid]->O_ACK = 1; } else { ulsch->harq_processes[harq_pid]->O_ACK = 0; diff --git a/openair1/PHY/LTE_TRANSPORT/defs.h b/openair1/PHY/LTE_TRANSPORT/defs.h index 3b13c01c52ccf0b2b02e69772aa38ec61487d3e5..f6293bf1b2d73689d6769a1270bfe0ad53fc418b 100755 --- a/openair1/PHY/LTE_TRANSPORT/defs.h +++ b/openair1/PHY/LTE_TRANSPORT/defs.h @@ -708,7 +708,7 @@ typedef struct { int16_t sqrt_rho_a; /// amplitude of PDSCH (compared to RS) in symbols containing pilots int16_t sqrt_rho_b; - /// Current HARQ process id + /// Current HARQ process id threadRx Odd and threadRx Even uint8_t current_harq_pid; /// Current subband antenna selection uint32_t antenna_alloc; diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_decoding.c b/openair1/PHY/LTE_TRANSPORT/dlsch_decoding.c index 04794e3ffcba4fcf6540f31cffe594416774b23f..d683decc9f57230760206b91795621eaa585de8e 100644 --- a/openair1/PHY/LTE_TRANSPORT/dlsch_decoding.c +++ b/openair1/PHY/LTE_TRANSPORT/dlsch_decoding.c @@ -38,6 +38,7 @@ #include "SIMULATION/TOOLS/defs.h" //#define DEBUG_DLSCH_DECODING +extern double cpuf; void free_ue_dlsch(LTE_UE_DLSCH_t *dlsch) { @@ -270,7 +271,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, return(max_turbo_iterations); }*/ - /*harq_pid = dlsch->current_harq_pid; + /*harq_pid = dlsch->current_harq_pid[subframe&0x1]; if (harq_pid >= 8) { printf("dlsch_decoding.c: Illegal harq_pid %d\n",harq_pid); return(max_turbo_iterations); @@ -341,6 +342,8 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, printf("Segmentation: C %d, Cminus %d, Kminus %d, Kplus %d\n",harq_process->C,harq_process->Cminus,harq_process->Kminus,harq_process->Kplus); #endif + opp_enabled=1; + for (r=0; r<harq_process->C; r++) { @@ -376,7 +379,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, (r==0) ? harq_process->F : 0); #ifdef DEBUG_DLSCH_DECODING - LOG_I(PHY,"HARQ_PID %d Rate Matching Segment %d (coded bits %d,unpunctured/repeated bits %d, TBS %d, mod_order %d, nb_rb %d, Nl %d, rv %d, round %d)...\n", + LOG_D(PHY,"HARQ_PID %d Rate Matching Segment %d (coded bits %d,unpunctured/repeated bits %d, TBS %d, mod_order %d, nb_rb %d, Nl %d, rv %d, round %d)...\n", harq_pid,r, G, Kr*3, harq_process->TBS, @@ -459,6 +462,10 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, #if 1 if (err_flag == 0) { + LOG_D(PHY, "turbo algo Kr=%d cb_cnt=%d C=%d nbRB=%d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d maxIter %d\n", + Kr,r,harq_process->C,harq_process->nb_rb,A,harq_process->TBS, + harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round,dlsch->max_turbo_iterations); + if (llr8_flag) { AssertFatal (Kr >= 256, "turbo algo issue Kr=%d cb_cnt=%d C=%d nbRB=%d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d\n", Kr,r,harq_process->C,harq_process->nb_rb,A,harq_process->TBS,harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round); @@ -605,6 +612,13 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, &phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1); stop_meas(dlsch_turbo_decoding_stats); + /*printf("Segmentation: C %d r %d, dlsch_rate_unmatching_stats %5.3f dlsch_deinterleaving_stats %5.3f dlsch_turbo_decoding_stats %5.3f \n", + harq_process->C, + r, + dlsch_rate_unmatching_stats->p_time/(cpuf*1000.0), + dlsch_deinterleaving_stats->p_time/(cpuf*1000.0), + dlsch_turbo_decoding_stats->p_time/(cpuf*1000.0));*/ + } } } @@ -636,25 +650,28 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, harq_process->round++; - if(is_crnti) - { - LOG_D(PHY,"[UE %d] DLSCH: Setting NACK for subframe %d (pid %d, round %d, TBS %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round,harq_process->TBS); - } // printf("Rate: [UE %d] DLSCH: Setting NACK for subframe %d (pid %d, round %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round); if (harq_process->round >= dlsch->Mdlharq) { harq_process->status = SCH_IDLE; + harq_process->round = 0; + } + if(is_crnti) + { + LOG_D(PHY,"[UE %d] DLSCH: Setting NACK for subframe %d (pid %d, pid status %d, round %d/Max %d, TBS %d)\n", + phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->status,harq_process->round,dlsch->Mdlharq,harq_process->TBS); } return((1+dlsch->max_turbo_iterations)); } else { - LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for SFN/SF %d/%d (pid %d, round %d, subframe %d)\n", - phy_vars_ue->Mod_id, frame_rx_prev, subframe_rx_prev, harq_pid, harq_process->round, subframe); harq_process->status = SCH_IDLE; harq_process->round = 0; dlsch->harq_ack[subframe].ack = 1; dlsch->harq_ack[subframe].harq_id = harq_pid; dlsch->harq_ack[subframe].send_harq_status = 1; + LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for SFN/SF %d/%d (pid %d, pid status %d, round %d, subframe %d)\n", + phy_vars_ue->Mod_id, frame_rx_prev, subframe_rx_prev, harq_pid, harq_process->status, harq_process->round, subframe); + if(is_crnti) { LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for subframe %d (pid %d, round %d, TBS %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round,harq_process->TBS); @@ -908,7 +925,7 @@ uint32_t dlsch_decoding_emul(PHY_VARS_UE *phy_vars_ue, break; case PDSCH: // TB0 - dlsch_ue = phy_vars_ue->dlsch[eNB_id][0]; + dlsch_ue = phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]; harq_pid = dlsch_ue->current_harq_pid; ue_id= (uint32_t)find_ue((int16_t)phy_vars_ue->pdcch_vars[(uint32_t)eNB_id]->crnti,PHY_vars_eNB_g[eNB_id2][CC_id]); DevAssert( ue_id != (uint32_t)-1 ); @@ -954,7 +971,7 @@ uint32_t dlsch_decoding_emul(PHY_VARS_UE *phy_vars_ue, break; case PDSCH1: { // TB1 - dlsch_ue = phy_vars_ue->dlsch[eNB_id][1]; + dlsch_ue = phy_vars_ue->dlsch[subframe&0x1][eNB_id][1]; harq_pid = dlsch_ue->current_harq_pid; int8_t UE_id = find_ue( phy_vars_ue->pdcch_vars[eNB_id]->crnti, PHY_vars_eNB_g[eNB_id2][CC_id] ); DevAssert( UE_id != -1 ); @@ -1008,7 +1025,7 @@ uint32_t dlsch_decoding_emul(PHY_VARS_UE *phy_vars_ue, break; default: - dlsch_ue = phy_vars_ue->dlsch[eNB_id][0]; + dlsch_ue = phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]; LOG_E(PHY,"dlsch_decoding_emul: FATAL, unknown DLSCH_id %d\n",dlsch_id); dlsch_ue->last_iteration_cnt = 1+dlsch_ue->max_turbo_iterations; return(1+dlsch_ue->max_turbo_iterations); diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c index 601eacc6205143468931e31b6526a58dfba962c9..72b486c04202891f9d600fad11a185d918dfcb43 100644 --- a/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c +++ b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c @@ -42,6 +42,7 @@ #define NOCYGWIN_STATIC #endif +extern int16_t dlsch_demod_shift; //#define DEBUG_HARQ //#undef LOG_D @@ -138,16 +139,18 @@ int rx_pdsch(PHY_VARS_UE *ue, case PDSCH: pdsch_vars = &ue->pdsch_vars[subframe&0x1][eNB_id]; - dlsch = ue->dlsch[eNB_id]; + dlsch = ue->dlsch[subframe&0x1][eNB_id]; + LOG_D(PHY,"AbsSubframe %d.%d / Sym %d harq_pid %d, harq status %d.%d \n", + frame,subframe,symbol,harq_pid, + dlsch[0]->harq_processes[harq_pid]->status, + dlsch[1]->harq_processes[harq_pid]->status); + if ((dlsch[0]->harq_processes[harq_pid]->status == ACTIVE) && (dlsch[1]->harq_processes[harq_pid]->status == ACTIVE)){ codeword_TB0 = dlsch[0]->harq_processes[harq_pid]->codeword; codeword_TB1 = dlsch[1]->harq_processes[harq_pid]->codeword; dlsch0_harq = dlsch[codeword_TB0]->harq_processes[harq_pid]; dlsch1_harq = dlsch[codeword_TB1]->harq_processes[harq_pid]; -#ifdef DEBUG_HARQ - printf("I am assuming both CW active\n"); -#endif } else if ((dlsch[0]->harq_processes[harq_pid]->status == ACTIVE) && (dlsch[1]->harq_processes[harq_pid]->status != ACTIVE) ) { @@ -159,8 +162,8 @@ int rx_pdsch(PHY_VARS_UE *ue, else if ((dlsch[0]->harq_processes[harq_pid]->status != ACTIVE) && (dlsch[1]->harq_processes[harq_pid]->status == ACTIVE) ){ codeword_TB1 = dlsch[1]->harq_processes[harq_pid]->codeword; - dlsch0_harq = dlsch[1]->harq_processes[harq_pid]; - dlsch1_harq = NULL; + dlsch0_harq = dlsch[1]->harq_processes[harq_pid]; + dlsch1_harq = NULL; codeword_TB0 = -1; } else { @@ -336,7 +339,7 @@ int rx_pdsch(PHY_VARS_UE *ue, LOG_W(PHY,"dlsch_demodulation: beamforming mode not supported yet.\n"); } - // printf("nb_rb = %d, eNB_id %d\n",nb_rb,eNB_id); + //printf("nb_rb = %d, eNB_id %d\n",nb_rb,eNB_id); if (nb_rb==0) { LOG_D(PHY,"dlsch_demodulation.c: nb_rb=0\n"); return(-1); @@ -357,12 +360,18 @@ int rx_pdsch(PHY_VARS_UE *ue, symbol, nb_rb); - if ((dlsch0_harq->mimo_mode<DUALSTREAM_UNIFORM_PRECODING1) && (rx_type==rx_IC_single_stream) && (eNB_id_i==ue->n_connected_eNB) && (dlsch0_harq->dl_power_off==0)) // TM5 two-user + if ((dlsch0_harq->mimo_mode<DUALSTREAM_UNIFORM_PRECODING1) && + (rx_type==rx_IC_single_stream) && + (eNB_id_i==ue->n_connected_eNB) && + (dlsch0_harq->dl_power_off==0) + ) // TM5 two-user + { dlsch_scale_channel(pdsch_vars[eNB_id_i]->dl_ch_estimates_ext, frame_parms, dlsch, symbol, nb_rb); + } if (first_symbol_flag==1) { if (beamforming_mode==0){ @@ -392,21 +401,23 @@ int rx_pdsch(PHY_VARS_UE *ue, nb_rb, dlsch0_harq->mimo_mode); + LOG_D(PHY,"Channel Level TM34 avg_0 %d, avg_1 %d, rx_type %d, rx_standard %d, interf_unaw_shift %d \n", avg_0[0], + avg_1[0], rx_type, rx_standard, interf_unaw_shift); if (rx_type>rx_standard) { - avg_0[0] = (log2_approx(avg_0[0])/2) -13 + interf_unaw_shift; - avg_1[0] = (log2_approx(avg_1[0])/2) -13 + interf_unaw_shift; + avg_0[0] = (log2_approx(avg_0[0])/2) + dlsch_demod_shift;// + 2 ;//+ 4; + avg_1[0] = (log2_approx(avg_1[0])/2) + dlsch_demod_shift;// + 2 ;//+ 4; pdsch_vars[eNB_id]->log2_maxh0 = cmax(avg_0[0],0); pdsch_vars[eNB_id]->log2_maxh1 = cmax(avg_1[0],0); - //printf("TM4 I-A log2_maxh0 = %d\n", lte_ue_pdsch_vars[eNB_id]->log2_maxh0); - //printf("TM4 I-A log2_maxh1 = %d\n", lte_ue_pdsch_vars[eNB_id]->log2_maxh1); + //printf("TM4 I-A log2_maxh0 = %d\n", pdsch_vars[eNB_id]->log2_maxh0); + //printf("TM4 I-A log2_maxh1 = %d\n", pdsch_vars[eNB_id]->log2_maxh1); } else { avg_0[0] = (log2_approx(avg_0[0])/2) - 13 + interf_unaw_shift; avg_1[0] = (log2_approx(avg_1[0])/2) - 13 + interf_unaw_shift; pdsch_vars[eNB_id]->log2_maxh0 = cmax(avg_0[0],0); pdsch_vars[eNB_id]->log2_maxh1 = cmax(avg_1[0],0); - //printf("TM4 I-UA log2_maxh0 = %d\n", lte_ue_pdsch_vars[eNB_id]->log2_maxh0); - //printf("TM4 I-UA log2_maxh1 = %d\n", lte_ue_pdsch_vars[eNB_id]->log2_maxh1); + //printf("TM4 I-UA log2_maxh0 = %d\n", pdsch_vars[eNB_id]->log2_maxh0); + //printf("TM4 I-UA log2_maxh1 = %d\n", pdsch_vars[eNB_id]->log2_maxh1); } } else if (dlsch0_harq->mimo_mode<DUALSTREAM_UNIFORM_PRECODING1) {// single-layer precoding (TM5, TM6) @@ -448,8 +459,11 @@ int rx_pdsch(PHY_VARS_UE *ue, symbol, nb_rb); #ifdef DEBUG_PHY - LOG_D(PHY,"[DLSCH] log2_maxh = %d (%d,%d)\n",pdsch_vars[eNB_id]->log2_maxh,avg[0],avgs); - LOG_D(PHY,"[DLSCH] mimo_mode = %d\n", dlsch0_harq->mimo_mode); + LOG_I(PHY,"[DLSCH] log2_maxh = %d [log2_maxh0 %d log2_maxh1 %d] (%d,%d)\n",pdsch_vars[eNB_id]->log2_maxh, + pdsch_vars[eNB_id]->log2_maxh0, + pdsch_vars[eNB_id]->log2_maxh1, + avg[0],avgs); + LOG_I(PHY,"[DLSCH] mimo_mode = %d\n", dlsch0_harq->mimo_mode); #endif } @@ -1023,6 +1037,44 @@ int rx_pdsch(PHY_VARS_UE *ue, } } +// Please keep it: useful for debugging +#if 0 + if( (symbol == 13) && (dlsch0_harq->mimo_mode == 2) ) + { + LOG_E(PHY,"Dump Phy Chan Est \n"); + if(subframe&0x1) + { +#if 1 + //write_output("rxdataF0.m" , "rxdataF0", &common_vars->common_vars_rx_data_per_thread[subframe&0x1].rxdataF[0][0],14*frame_parms->ofdm_symbol_size,1,1); + //write_output("rxdataF1.m" , "rxdataF1", &common_vars->common_vars_rx_data_per_thread[subframe&0x1].rxdataF[0][0],14*frame_parms->ofdm_symbol_size,1,1); + //write_output("dl_ch_estimates00.m", "dl_ch_estimates00", &common_vars->common_vars_rx_data_per_thread[subframe&0x1].dl_ch_estimates[eNB_id][0][0],14*frame_parms->ofdm_symbol_size,1,1); + //write_output("dl_ch_estimates01.m", "dl_ch_estimates01", &common_vars->common_vars_rx_data_per_thread[subframe&0x1].dl_ch_estimates[eNB_id][1][0],14*frame_parms->ofdm_symbol_size,1,1); + //write_output("dl_ch_estimates10.m", "dl_ch_estimates10", &common_vars->common_vars_rx_data_per_thread[subframe&0x1].dl_ch_estimates[eNB_id][2][0],14*frame_parms->ofdm_symbol_size,1,1); + //write_output("dl_ch_estimates11.m", "dl_ch_estimates11", &common_vars->common_vars_rx_data_per_thread[subframe&0x1].dl_ch_estimates[eNB_id][3][0],14*frame_parms->ofdm_symbol_size,1,1); + + + //write_output("rxdataF_ext00.m" , "rxdataF_ext00", &pdsch_vars[eNB_id]->rxdataF_ext[0][0],14*frame_parms->N_RB_DL*12,1,1); + //write_output("rxdataF_ext01.m" , "rxdataF_ext01", &pdsch_vars[eNB_id]->rxdataF_ext[1][0],14*frame_parms->N_RB_DL*12,1,1); + //write_output("rxdataF_ext10.m" , "rxdataF_ext10", &pdsch_vars[eNB_id]->rxdataF_ext[2][0],14*frame_parms->N_RB_DL*12,1,1); + //write_output("rxdataF_ext11.m" , "rxdataF_ext11", &pdsch_vars[eNB_id]->rxdataF_ext[3][0],14*frame_parms->N_RB_DL*12,1,1); + write_output("dl_ch_estimates_ext00.m", "dl_ch_estimates_ext00", &pdsch_vars[eNB_id]->dl_ch_estimates_ext[0][0],14*frame_parms->N_RB_DL*12,1,1); + write_output("dl_ch_estimates_ext01.m", "dl_ch_estimates_ext01", &pdsch_vars[eNB_id]->dl_ch_estimates_ext[1][0],14*frame_parms->N_RB_DL*12,1,1); + write_output("dl_ch_estimates_ext10.m", "dl_ch_estimates_ext10", &pdsch_vars[eNB_id]->dl_ch_estimates_ext[2][0],14*frame_parms->N_RB_DL*12,1,1); + write_output("dl_ch_estimates_ext11.m", "dl_ch_estimates_ext11", &pdsch_vars[eNB_id]->dl_ch_estimates_ext[3][0],14*frame_parms->N_RB_DL*12,1,1); + write_output("rxdataF_comp00.m","rxdataF_comp00", &pdsch_vars[eNB_id]->rxdataF_comp0[0][0],14*frame_parms->N_RB_DL*12,1,1); + write_output("rxdataF_comp01.m","rxdataF_comp01", &pdsch_vars[eNB_id]->rxdataF_comp0[1][0],14*frame_parms->N_RB_DL*12,1,1); + write_output("rxdataF_comp10.m","rxdataF_comp10", &pdsch_vars[eNB_id]->rxdataF_comp1[harq_pid][round][0][0],14*frame_parms->N_RB_DL*12,1,1); + write_output("rxdataF_comp11.m","rxdataF_comp11", &pdsch_vars[eNB_id]->rxdataF_comp1[harq_pid][round][1][0],14*frame_parms->N_RB_DL*12,1,1); +#endif + write_output("llr0.m","llr0", &pdsch_vars[eNB_id]->llr[0][0],(14*nb_rb*12*dlsch1_harq->Qm) - 4*(nb_rb*4*dlsch1_harq->Qm),1,0); + write_output("llr1.m","llr1", &pdsch_vars[eNB_id]->llr[1][0],(14*nb_rb*12*dlsch1_harq->Qm) - 4*(nb_rb*4*dlsch1_harq->Qm),1,0); + + + AssertFatal(0," "); + } + + } +#endif #if T_TRACER T(T_UE_PHY_PDSCH_IQ, T_INT(eNB_id), T_INT(ue->Mod_id), T_INT(frame%1024), @@ -1609,9 +1661,7 @@ void prec2A_TM3_128(__m128i *ch0,__m128i *ch1) { __m128i tmp0,tmp1; - // sqrt(2) is already taken into account in computation sqrt_rho_a, sqrt_rho_b, - //so divide by 2 is replaced by divide by sqrt(2). - + //_mm_mulhi_epi16 // print_shorts("prec2A_TM3 ch0 (before):",ch0); // print_shorts("prec2A_TM3 ch1 (before):",ch1); @@ -1622,17 +1672,22 @@ void prec2A_TM3_128(__m128i *ch0,__m128i *ch1) { ch0[0] = _mm_adds_epi16(ch0[0],tmp1); ch1[0] = _mm_subs_epi16(tmp0,tmp1); - - // print_shorts("prec2A_TM3 ch0 (mid):",&tmp0); - // print_shorts("prec2A_TM3 ch1 (mid):",ch1); - ch0[0] = _mm_mulhi_epi16(ch0[0],amp); ch0[0] = _mm_slli_epi16(ch0[0],1); + ch1[0] = _mm_mulhi_epi16(ch1[0],amp); ch1[0] = _mm_slli_epi16(ch1[0],1); - // ch0[0] = _mm_srai_epi16(ch0[0],1); - // ch1[0] = _mm_srai_epi16(ch1[0],1); + // print_shorts("prec2A_TM3 ch0 (mid):",&tmp0); + // print_shorts("prec2A_TM3 ch1 (mid):",ch1); + + //ch0[0] = _mm_mulhi_epi16(ch0[0],amp); + //ch0[0] = _mm_slli_epi16(ch0[0],1); + //ch1[0] = _mm_mulhi_epi16(ch1[0],amp); + //ch1[0] = _mm_slli_epi16(ch1[0],1); + + //ch0[0] = _mm_srai_epi16(ch0[0],1); + //ch1[0] = _mm_srai_epi16(ch1[0],1); // print_shorts("prec2A_TM3 ch0 (after):",ch0); // print_shorts("prec2A_TM3 ch1 (after):",ch1); diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c index 0b73e2b02390a060e3774838e42fa4ab7cfa1533..b08836762cdbbf4111cc9f32b1460c891718d752 100644 --- a/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c +++ b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c @@ -8831,6 +8831,37 @@ int dlsch_64qam_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms, len = (nb_rb*12) - pbch_pss_sss_adjust; } +#ifdef __AVX2__ + + // Round length up to multiple of 16 words + uint32_t len256i = ((len+16)>>4)*16; + int32_t *rxF_256i = (int32_t*) malloc16_clear(len256i*4); + int32_t *rxF_i_256i = (int32_t*) malloc16_clear(len256i*4); + int32_t *ch_mag_256i = (int32_t*) malloc16_clear(len256i*4); + int32_t *ch_mag_i_256i = (int32_t*) malloc16_clear(len256i*4); + int32_t *rho_256i = (int32_t*) malloc16_clear(len256i*4); + + memcpy(rxF_256i, rxF, len*4); + memcpy(rxF_i_256i, rxF_i, len*4); + memcpy(ch_mag_256i, ch_mag, len*4); + memcpy(ch_mag_i_256i, ch_mag_i, len*4); + memcpy(rho_256i, rho, len*4); + + qam64_qam64_avx2((int32_t *)rxF_256i, + (int32_t *)rxF_i_256i, + (int32_t *)ch_mag_256i, + (int32_t *)ch_mag_i_256i, + (int16_t *)llr16, + (int32_t *) rho_256i, + len); + + free16(rxF_256i, sizeof(rxF_256i)); + free16(rxF_i_256i, sizeof(rxF_i_256i)); + free16(ch_mag_256i, sizeof(ch_mag_256i)); + free16(ch_mag_i_256i, sizeof(ch_mag_i_256i)); + free16(rho_256i, sizeof(rho_256i)); + +#else qam64_qam64((short *)rxF, (short *)rxF_i, (short *)ch_mag, @@ -8838,6 +8869,7 @@ int dlsch_64qam_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms, (short *)llr16, (short *)rho, len); +#endif llr16 += (6*len); *llr16p = (short *)llr16; diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c new file mode 100644 index 0000000000000000000000000000000000000000..cda5ad0f55ee8afbe37bf4d3536cea17fb3ed575 --- /dev/null +++ b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c @@ -0,0 +1,4034 @@ + /* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.0 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +/*! \file PHY/LTE_TRANSPORT/dlsch_llr_computation.c + * \brief Top-level routines for LLR computation of the PDSCH physical channel from 36-211, V8.6 2009-03 + * \author R. Knopp, F. Kaltenberger,A. Bhamri, S. Aubert, S. Wagner, X Jiang + * \date 2011 + * \version 0.1 + * \company Eurecom + * \email: knopp@eurecom.fr,florian.kaltenberger@eurecom.fr,ankit.bhamri@eurecom.fr,sebastien.aubert@eurecom.fr, sebastian.wagner@eurecom.fr + * \note + * \warning + */ + +#include "PHY/defs.h" +#include "PHY/TOOLS/defs.h" +#include "PHY/extern.h" +#include "defs.h" +#include "extern.h" +#include "PHY/sse_intrin.h" + +int16_t ones256[16] __attribute__ ((aligned(32))) = {0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff}; + +static __m256i rho_rpi __attribute__ ((aligned(32))); +static __m256i rho_rmi __attribute__ ((aligned(32))); +static __m256i rho_rpi_1_1 __attribute__ ((aligned(32))); +static __m256i rho_rpi_1_3 __attribute__ ((aligned(32))); +static __m256i rho_rpi_1_5 __attribute__ ((aligned(32))); +static __m256i rho_rpi_1_7 __attribute__ ((aligned(32))); +static __m256i rho_rpi_3_1 __attribute__ ((aligned(32))); +static __m256i rho_rpi_3_3 __attribute__ ((aligned(32))); +static __m256i rho_rpi_3_5 __attribute__ ((aligned(32))); +static __m256i rho_rpi_3_7 __attribute__ ((aligned(32))); +static __m256i rho_rpi_5_1 __attribute__ ((aligned(32))); +static __m256i rho_rpi_5_3 __attribute__ ((aligned(32))); +static __m256i rho_rpi_5_5 __attribute__ ((aligned(32))); +static __m256i rho_rpi_5_7 __attribute__ ((aligned(32))); +static __m256i rho_rpi_7_1 __attribute__ ((aligned(32))); +static __m256i rho_rpi_7_3 __attribute__ ((aligned(32))); +static __m256i rho_rpi_7_5 __attribute__ ((aligned(32))); +static __m256i rho_rpi_7_7 __attribute__ ((aligned(32))); +static __m256i rho_rmi_1_1 __attribute__ ((aligned(32))); +static __m256i rho_rmi_1_3 __attribute__ ((aligned(32))); +static __m256i rho_rmi_1_5 __attribute__ ((aligned(32))); +static __m256i rho_rmi_1_7 __attribute__ ((aligned(32))); +static __m256i rho_rmi_3_1 __attribute__ ((aligned(32))); +static __m256i rho_rmi_3_3 __attribute__ ((aligned(32))); +static __m256i rho_rmi_3_5 __attribute__ ((aligned(32))); +static __m256i rho_rmi_3_7 __attribute__ ((aligned(32))); +static __m256i rho_rmi_5_1 __attribute__ ((aligned(32))); +static __m256i rho_rmi_5_3 __attribute__ ((aligned(32))); +static __m256i rho_rmi_5_5 __attribute__ ((aligned(32))); +static __m256i rho_rmi_5_7 __attribute__ ((aligned(32))); +static __m256i rho_rmi_7_1 __attribute__ ((aligned(32))); +static __m256i rho_rmi_7_3 __attribute__ ((aligned(32))); +static __m256i rho_rmi_7_5 __attribute__ ((aligned(32))); +static __m256i rho_rmi_7_7 __attribute__ ((aligned(32))); + +static __m256i psi_r_m7_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_p7 __attribute__ ((aligned(32))); + +static __m256i psi_i_m7_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_p7 __attribute__ ((aligned(32))); + +static __m256i a_r_m7_m7 __attribute__ ((aligned(32))); +static __m256i a_r_m7_m5 __attribute__ ((aligned(32))); +static __m256i a_r_m7_m3 __attribute__ ((aligned(32))); +static __m256i a_r_m7_m1 __attribute__ ((aligned(32))); +static __m256i a_r_m7_p1 __attribute__ ((aligned(32))); +static __m256i a_r_m7_p3 __attribute__ ((aligned(32))); +static __m256i a_r_m7_p5 __attribute__ ((aligned(32))); +static __m256i a_r_m7_p7 __attribute__ ((aligned(32))); +static __m256i a_r_m5_m7 __attribute__ ((aligned(32))); +static __m256i a_r_m5_m5 __attribute__ ((aligned(32))); +static __m256i a_r_m5_m3 __attribute__ ((aligned(32))); +static __m256i a_r_m5_m1 __attribute__ ((aligned(32))); +static __m256i a_r_m5_p1 __attribute__ ((aligned(32))); +static __m256i a_r_m5_p3 __attribute__ ((aligned(32))); +static __m256i a_r_m5_p5 __attribute__ ((aligned(32))); +static __m256i a_r_m5_p7 __attribute__ ((aligned(32))); +static __m256i a_r_m3_m7 __attribute__ ((aligned(32))); +static __m256i a_r_m3_m5 __attribute__ ((aligned(32))); +static __m256i a_r_m3_m3 __attribute__ ((aligned(32))); +static __m256i a_r_m3_m1 __attribute__ ((aligned(32))); +static __m256i a_r_m3_p1 __attribute__ ((aligned(32))); +static __m256i a_r_m3_p3 __attribute__ ((aligned(32))); +static __m256i a_r_m3_p5 __attribute__ ((aligned(32))); +static __m256i a_r_m3_p7 __attribute__ ((aligned(32))); +static __m256i a_r_m1_m7 __attribute__ ((aligned(32))); +static __m256i a_r_m1_m5 __attribute__ ((aligned(32))); +static __m256i a_r_m1_m3 __attribute__ ((aligned(32))); +static __m256i a_r_m1_m1 __attribute__ ((aligned(32))); +static __m256i a_r_m1_p1 __attribute__ ((aligned(32))); +static __m256i a_r_m1_p3 __attribute__ ((aligned(32))); +static __m256i a_r_m1_p5 __attribute__ ((aligned(32))); +static __m256i a_r_m1_p7 __attribute__ ((aligned(32))); +static __m256i a_r_p1_m7 __attribute__ ((aligned(32))); +static __m256i a_r_p1_m5 __attribute__ ((aligned(32))); +static __m256i a_r_p1_m3 __attribute__ ((aligned(32))); +static __m256i a_r_p1_m1 __attribute__ ((aligned(32))); +static __m256i a_r_p1_p1 __attribute__ ((aligned(32))); +static __m256i a_r_p1_p3 __attribute__ ((aligned(32))); +static __m256i a_r_p1_p5 __attribute__ ((aligned(32))); +static __m256i a_r_p1_p7 __attribute__ ((aligned(32))); +static __m256i a_r_p3_m7 __attribute__ ((aligned(32))); +static __m256i a_r_p3_m5 __attribute__ ((aligned(32))); +static __m256i a_r_p3_m3 __attribute__ ((aligned(32))); +static __m256i a_r_p3_m1 __attribute__ ((aligned(32))); +static __m256i a_r_p3_p1 __attribute__ ((aligned(32))); +static __m256i a_r_p3_p3 __attribute__ ((aligned(32))); +static __m256i a_r_p3_p5 __attribute__ ((aligned(32))); +static __m256i a_r_p3_p7 __attribute__ ((aligned(32))); +static __m256i a_r_p5_m7 __attribute__ ((aligned(32))); +static __m256i a_r_p5_m5 __attribute__ ((aligned(32))); +static __m256i a_r_p5_m3 __attribute__ ((aligned(32))); +static __m256i a_r_p5_m1 __attribute__ ((aligned(32))); +static __m256i a_r_p5_p1 __attribute__ ((aligned(32))); +static __m256i a_r_p5_p3 __attribute__ ((aligned(32))); +static __m256i a_r_p5_p5 __attribute__ ((aligned(32))); +static __m256i a_r_p5_p7 __attribute__ ((aligned(32))); +static __m256i a_r_p7_m7 __attribute__ ((aligned(32))); +static __m256i a_r_p7_m5 __attribute__ ((aligned(32))); +static __m256i a_r_p7_m3 __attribute__ ((aligned(32))); +static __m256i a_r_p7_m1 __attribute__ ((aligned(32))); +static __m256i a_r_p7_p1 __attribute__ ((aligned(32))); +static __m256i a_r_p7_p3 __attribute__ ((aligned(32))); +static __m256i a_r_p7_p5 __attribute__ ((aligned(32))); +static __m256i a_r_p7_p7 __attribute__ ((aligned(32))); + +static __m256i a_i_m7_m7 __attribute__ ((aligned(32))); +static __m256i a_i_m7_m5 __attribute__ ((aligned(32))); +static __m256i a_i_m7_m3 __attribute__ ((aligned(32))); +static __m256i a_i_m7_m1 __attribute__ ((aligned(32))); +static __m256i a_i_m7_p1 __attribute__ ((aligned(32))); +static __m256i a_i_m7_p3 __attribute__ ((aligned(32))); +static __m256i a_i_m7_p5 __attribute__ ((aligned(32))); +static __m256i a_i_m7_p7 __attribute__ ((aligned(32))); +static __m256i a_i_m5_m7 __attribute__ ((aligned(32))); +static __m256i a_i_m5_m5 __attribute__ ((aligned(32))); +static __m256i a_i_m5_m3 __attribute__ ((aligned(32))); +static __m256i a_i_m5_m1 __attribute__ ((aligned(32))); +static __m256i a_i_m5_p1 __attribute__ ((aligned(32))); +static __m256i a_i_m5_p3 __attribute__ ((aligned(32))); +static __m256i a_i_m5_p5 __attribute__ ((aligned(32))); +static __m256i a_i_m5_p7 __attribute__ ((aligned(32))); +static __m256i a_i_m3_m7 __attribute__ ((aligned(32))); +static __m256i a_i_m3_m5 __attribute__ ((aligned(32))); +static __m256i a_i_m3_m3 __attribute__ ((aligned(32))); +static __m256i a_i_m3_m1 __attribute__ ((aligned(32))); +static __m256i a_i_m3_p1 __attribute__ ((aligned(32))); +static __m256i a_i_m3_p3 __attribute__ ((aligned(32))); +static __m256i a_i_m3_p5 __attribute__ ((aligned(32))); +static __m256i a_i_m3_p7 __attribute__ ((aligned(32))); +static __m256i a_i_m1_m7 __attribute__ ((aligned(32))); +static __m256i a_i_m1_m5 __attribute__ ((aligned(32))); +static __m256i a_i_m1_m3 __attribute__ ((aligned(32))); +static __m256i a_i_m1_m1 __attribute__ ((aligned(32))); +static __m256i a_i_m1_p1 __attribute__ ((aligned(32))); +static __m256i a_i_m1_p3 __attribute__ ((aligned(32))); +static __m256i a_i_m1_p5 __attribute__ ((aligned(32))); +static __m256i a_i_m1_p7 __attribute__ ((aligned(32))); +static __m256i a_i_p1_m7 __attribute__ ((aligned(32))); +static __m256i a_i_p1_m5 __attribute__ ((aligned(32))); +static __m256i a_i_p1_m3 __attribute__ ((aligned(32))); +static __m256i a_i_p1_m1 __attribute__ ((aligned(32))); +static __m256i a_i_p1_p1 __attribute__ ((aligned(32))); +static __m256i a_i_p1_p3 __attribute__ ((aligned(32))); +static __m256i a_i_p1_p5 __attribute__ ((aligned(32))); +static __m256i a_i_p1_p7 __attribute__ ((aligned(32))); +static __m256i a_i_p3_m7 __attribute__ ((aligned(32))); +static __m256i a_i_p3_m5 __attribute__ ((aligned(32))); +static __m256i a_i_p3_m3 __attribute__ ((aligned(32))); +static __m256i a_i_p3_m1 __attribute__ ((aligned(32))); +static __m256i a_i_p3_p1 __attribute__ ((aligned(32))); +static __m256i a_i_p3_p3 __attribute__ ((aligned(32))); +static __m256i a_i_p3_p5 __attribute__ ((aligned(32))); +static __m256i a_i_p3_p7 __attribute__ ((aligned(32))); +static __m256i a_i_p5_m7 __attribute__ ((aligned(32))); +static __m256i a_i_p5_m5 __attribute__ ((aligned(32))); +static __m256i a_i_p5_m3 __attribute__ ((aligned(32))); +static __m256i a_i_p5_m1 __attribute__ ((aligned(32))); +static __m256i a_i_p5_p1 __attribute__ ((aligned(32))); +static __m256i a_i_p5_p3 __attribute__ ((aligned(32))); +static __m256i a_i_p5_p5 __attribute__ ((aligned(32))); +static __m256i a_i_p5_p7 __attribute__ ((aligned(32))); +static __m256i a_i_p7_m7 __attribute__ ((aligned(32))); +static __m256i a_i_p7_m5 __attribute__ ((aligned(32))); +static __m256i a_i_p7_m3 __attribute__ ((aligned(32))); +static __m256i a_i_p7_m1 __attribute__ ((aligned(32))); +static __m256i a_i_p7_p1 __attribute__ ((aligned(32))); +static __m256i a_i_p7_p3 __attribute__ ((aligned(32))); +static __m256i a_i_p7_p5 __attribute__ ((aligned(32))); +static __m256i a_i_p7_p7 __attribute__ ((aligned(32))); + +static __m256i psi_a_m7_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_p7 __attribute__ ((aligned(32))); + +static __m256i a_sq_m7_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_p7 __attribute__ ((aligned(32))); + +static __m256i bit_met_m7_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_p7 __attribute__ ((aligned(32))); + +static __m256i y0_p_1_1 __attribute__ ((aligned(32))); +static __m256i y0_p_1_3 __attribute__ ((aligned(32))); +static __m256i y0_p_1_5 __attribute__ ((aligned(32))); +static __m256i y0_p_1_7 __attribute__ ((aligned(32))); +static __m256i y0_p_3_1 __attribute__ ((aligned(32))); +static __m256i y0_p_3_3 __attribute__ ((aligned(32))); +static __m256i y0_p_3_5 __attribute__ ((aligned(32))); +static __m256i y0_p_3_7 __attribute__ ((aligned(32))); +static __m256i y0_p_5_1 __attribute__ ((aligned(32))); +static __m256i y0_p_5_3 __attribute__ ((aligned(32))); +static __m256i y0_p_5_5 __attribute__ ((aligned(32))); +static __m256i y0_p_5_7 __attribute__ ((aligned(32))); +static __m256i y0_p_7_1 __attribute__ ((aligned(32))); +static __m256i y0_p_7_3 __attribute__ ((aligned(32))); +static __m256i y0_p_7_5 __attribute__ ((aligned(32))); +static __m256i y0_p_7_7 __attribute__ ((aligned(32))); +static __m256i y0_m_1_1 __attribute__ ((aligned(32))); +static __m256i y0_m_1_3 __attribute__ ((aligned(32))); +static __m256i y0_m_1_5 __attribute__ ((aligned(32))); +static __m256i y0_m_1_7 __attribute__ ((aligned(32))); +static __m256i y0_m_3_1 __attribute__ ((aligned(32))); +static __m256i y0_m_3_3 __attribute__ ((aligned(32))); +static __m256i y0_m_3_5 __attribute__ ((aligned(32))); +static __m256i y0_m_3_7 __attribute__ ((aligned(32))); +static __m256i y0_m_5_1 __attribute__ ((aligned(32))); +static __m256i y0_m_5_3 __attribute__ ((aligned(32))); +static __m256i y0_m_5_5 __attribute__ ((aligned(32))); +static __m256i y0_m_5_7 __attribute__ ((aligned(32))); +static __m256i y0_m_7_1 __attribute__ ((aligned(32))); +static __m256i y0_m_7_3 __attribute__ ((aligned(32))); +static __m256i y0_m_7_5 __attribute__ ((aligned(32))); +static __m256i y0_m_7_7 __attribute__ ((aligned(32))); + +static __m256i xmm0 __attribute__ ((aligned(32))); +static __m256i xmm1 __attribute__ ((aligned(32))); +static __m256i xmm2 __attribute__ ((aligned(32))); +static __m256i xmm3 __attribute__ ((aligned(32))); +static __m256i xmm4 __attribute__ ((aligned(32))); +static __m256i xmm5 __attribute__ ((aligned(32))); +static __m256i xmm6 __attribute__ ((aligned(32))); +static __m256i xmm7 __attribute__ ((aligned(32))); +static __m256i xmm8 __attribute__ ((aligned(32))); + +static __m256i y0r __attribute__ ((aligned(32))); +static __m256i y0i __attribute__ ((aligned(32))); +static __m256i y1r __attribute__ ((aligned(32))); +static __m256i y1i __attribute__ ((aligned(32))); +static __m256i y2r __attribute__ ((aligned(32))); +static __m256i y2i __attribute__ ((aligned(32))); + +static __m256i logmax_num_re0 __attribute__ ((aligned(32))); +static __m256i logmax_den_re0 __attribute__ ((aligned(32))); + +static __m256i tmp_result __attribute__ ((aligned(32))); +static __m256i tmp_result2 __attribute__ ((aligned(32))); +static __m256i tmp_result3 __attribute__ ((aligned(32))); +static __m256i tmp_result4 __attribute__ ((aligned(32))); + +//============================================================================================== +// Auxiliary Makros + +// calculate interference magnitude +#define interference_abs_epi16(psi,int_ch_mag,int_mag,c1,c2) tmp_result = _mm256_cmpgt_epi16(int_ch_mag,psi); tmp_result2 = _mm256_xor_si256(tmp_result,(*(__m256i*)&ones256[0])); tmp_result = _mm256_and_si256(tmp_result,c1); tmp_result2 = _mm256_and_si256(tmp_result2,c2); int_mag = _mm256_or_si256(tmp_result,tmp_result2); + +// calculate interference magnitude +// tmp_result = ones in shorts corr. to interval 2<=x<=4, tmp_result2 interval < 2, tmp_result3 interval 4<x<6 and tmp_result4 interval x>6 +#define interference_abs_64qam_epi16(psi,int_ch_mag,int_two_ch_mag,int_three_ch_mag,a,c1,c3,c5,c7) tmp_result = _mm256_cmpgt_epi16(int_two_ch_mag,psi); tmp_result3 = _mm256_xor_si256(tmp_result,(*(__m256i*)&ones256[0])); tmp_result2 = _mm256_cmpgt_epi16(int_ch_mag,psi); tmp_result = _mm256_xor_si256(tmp_result,tmp_result2); tmp_result4 = _mm256_cmpgt_epi16(psi,int_three_ch_mag); tmp_result3 = _mm256_xor_si256(tmp_result3,tmp_result4); tmp_result = _mm256_and_si256(tmp_result,c3); tmp_result2 = _mm256_and_si256(tmp_result2,c1); tmp_result3 = _mm256_and_si256(tmp_result3,c5); tmp_result4 = _mm256_and_si256(tmp_result4,c7); tmp_result = _mm256_or_si256(tmp_result,tmp_result2); tmp_result3 = _mm256_or_si256(tmp_result3,tmp_result4); a = _mm256_or_si256(tmp_result,tmp_result3); + +// calculates psi_a = psi_r*a_r + psi_i*a_i +#define prodsum_psi_a_epi16(psi_r,a_r,psi_i,a_i,psi_a) tmp_result = _mm256_mulhi_epi16(psi_r,a_r); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result2 = _mm256_mulhi_epi16(psi_i,a_i); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); psi_a = _mm256_adds_epi16(tmp_result,tmp_result2); + +// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor +#define square_a_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq) tmp_result = _mm256_mulhi_epi16(a_r,a_r); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result = _mm256_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result = _mm256_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result2 = _mm256_mulhi_epi16(a_i,a_i); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); a_sq = _mm256_adds_epi16(tmp_result,tmp_result2); + +// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor for 64-QAM +#define square_a_64qam_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq) tmp_result = _mm256_mulhi_epi16(a_r,a_r); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result = _mm256_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm256_slli_epi16(tmp_result,3); tmp_result = _mm256_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result2 = _mm256_mulhi_epi16(a_i,a_i); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm256_slli_epi16(tmp_result2,3); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); a_sq = _mm256_adds_epi16(tmp_result,tmp_result2); + +void seperate_real_imag_parts(__m256i *out_re, + __m256i *out_im, + __m256i in0, + __m256i in1) +{ + __m256i tmp0; + __m256i tmp1; + + in0 = _mm256_shufflelo_epi16(in0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + in0 = _mm256_shufflehi_epi16(in0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + in0 = _mm256_shuffle_epi32(in0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + in1 = _mm256_shufflelo_epi16(in1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + in1 = _mm256_shufflehi_epi16(in1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + in1 = _mm256_shuffle_epi32(in1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + //in0 = [Re(0,1,2,3) Im(0,1,2,3) Re(4,5,6,7) Im(4,5,6,7)] + //in0 = [Re(8,9,10,11) Im(8,9,10,11) Re(12,13,14,15) Im(12,13,14,15)] + + tmp0 = _mm256_unpacklo_epi64(in0, in1); + //axmm2 = [Re(0,1,2,3) Re(8,9,10,11) Re(4,5,6,7) Re(12,13,14,15)] + tmp0 = _mm256_permute4x64_epi64(tmp0,0xd8); // Re(rho) + + tmp1 = _mm256_unpackhi_epi64(in0, in1); + //axmm3 = [Im(0,1,2,3) Im(8,9,10,11) Im(4,5,6,7) Im(12,13,14,15)] + tmp1 = _mm256_permute4x64_epi64(tmp1,0xd8); // Im(rho) + + *out_re = tmp0; + *out_im = tmp1; +} + +void qam64_qam16_avx2(short *stream0_in, + short *stream1_in, + short *ch_mag, + short *ch_mag_i, + short *stream0_out, + short *rho01, + int length + ) +{ + + /* + Author: S. Wagner + Date: 31-07-12 + + Input: + stream0_in: MF filter for 1st stream, i.e., y0=h0'*y + stream1_in: MF filter for 2nd stream, i.e., y1=h1'*y + ch_mag: 4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + ch_mag_i: 4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + rho01: Channel cross correlation, i.e., h1'*h0 + + Output: + stream0_out: output LLRs for 1st stream + */ + +#if defined(__x86_64__) || defined(__i386__) + + __m256i *rho01_256i = (__m256i *)rho01; + __m256i *stream0_256i_in = (__m256i *)stream0_in; + __m256i *stream1_256i_in = (__m256i *)stream1_in; + __m256i *ch_mag_256i = (__m256i *)ch_mag; + __m256i *ch_mag_256i_i = (__m256i *)ch_mag_i; + + __m256i ONE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(10112)); // round(1/sqrt(42)*2^16) + __m256i THREE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30337)); // round(3/sqrt(42)*2^16) + __m256i FIVE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(25281)); // round(5/sqrt(42)*2^15) + __m256i SEVEN_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(17697)); // round(5/sqrt(42)*2^15) + __m256i FORTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30969)); // round(49/(4*sqrt(42))*2^14), Q2.14 + __m256i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(23385)); // round(37/(4*sqrt(42))*2^14), Q2.14 + __m256i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(31601)); // round(25/(4*sqrt(42))*2^15) + __m256i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(18329)); // round(29/(4*sqrt(42))*2^15), Q2.14 + __m256i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(21489)); // round(17/(4*sqrt(42))*2^15) + __m256i NINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(11376)); // round(9/(4*sqrt(42))*2^15) + __m256i THIRTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(16433)); // round(13/(4*sqrt(42))*2^15) + __m256i FIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(6320)); // round(5/(4*sqrt(42))*2^15) + __m256i ONE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(1264)); // round(1/(4*sqrt(42))*2^15) + __m256i ONE_OVER_SQRT_10_Q15 = _mm256_broadcastw_epi16(_mm_set1_epi16(10362)); // round(1/sqrt(10)*2^15) + __m256i THREE_OVER_SQRT_10 = _mm256_broadcastw_epi16(_mm_set1_epi16(31086)); // round(3/sqrt(10)*2^15) + __m256i SQRT_10_OVER_FOUR = _mm256_broadcastw_epi16(_mm_set1_epi16(25905)); // round(sqrt(10)/4*2^15) + + + __m256i ch_mag_int; + __m256i ch_mag_des; + __m256i ch_mag_98_over_42_with_sigma2; + __m256i ch_mag_74_over_42_with_sigma2; + __m256i ch_mag_58_over_42_with_sigma2; + __m256i ch_mag_50_over_42_with_sigma2; + __m256i ch_mag_34_over_42_with_sigma2; + __m256i ch_mag_18_over_42_with_sigma2; + __m256i ch_mag_26_over_42_with_sigma2; + __m256i ch_mag_10_over_42_with_sigma2; + __m256i ch_mag_2_over_42_with_sigma2; + __m256i y0r_one_over_sqrt_21; + __m256i y0r_three_over_sqrt_21; + __m256i y0r_five_over_sqrt_21; + __m256i y0r_seven_over_sqrt_21; + __m256i y0i_one_over_sqrt_21; + __m256i y0i_three_over_sqrt_21; + __m256i y0i_five_over_sqrt_21; + __m256i y0i_seven_over_sqrt_21; + +#elif defined(__arm__) + +#endif + int i,j; + uint32_t len256 = (length)>>3; + + for (i=0; i<len256; i+=2) { + +#if defined(__x86_64__) || defined(__i386__) + // Get rho + /* + xmm0 = rho01_128i[i]; + xmm1 = rho01_128i[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) + xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) + */ + seperate_real_imag_parts(&xmm2, &xmm3, rho01_256i[i], rho01_256i[i+1]); + + rho_rpi = _mm256_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) + rho_rmi = _mm256_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + + // Compute the different rhos + rho_rpi_1_1 = _mm256_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42); + rho_rmi_1_1 = _mm256_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42); + rho_rpi_3_3 = _mm256_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42); + rho_rmi_3_3 = _mm256_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42); + rho_rpi_5_5 = _mm256_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42); + rho_rmi_5_5 = _mm256_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42); + rho_rpi_7_7 = _mm256_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42); + rho_rmi_7_7 = _mm256_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42); + + rho_rpi_5_5 = _mm256_slli_epi16(rho_rpi_5_5, 1); + rho_rmi_5_5 = _mm256_slli_epi16(rho_rmi_5_5, 1); + rho_rpi_7_7 = _mm256_slli_epi16(rho_rpi_7_7, 2); + rho_rmi_7_7 = _mm256_slli_epi16(rho_rmi_7_7, 2); + + xmm4 = _mm256_mulhi_epi16(xmm2, ONE_OVER_SQRT_42); + xmm5 = _mm256_mulhi_epi16(xmm3, ONE_OVER_SQRT_42); + xmm6 = _mm256_mulhi_epi16(xmm3, THREE_OVER_SQRT_42); + xmm7 = _mm256_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42); + xmm8 = _mm256_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42); + xmm7 = _mm256_slli_epi16(xmm7, 1); + xmm8 = _mm256_slli_epi16(xmm8, 2); + + rho_rpi_1_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_1_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_1_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_1_5 = _mm256_subs_epi16(xmm4, xmm7); + rho_rpi_1_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_1_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, THREE_OVER_SQRT_42); + rho_rpi_3_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_3_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_3_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_3_5 = _mm256_subs_epi16(xmm4, xmm7); + rho_rpi_3_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_3_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42); + xmm4 = _mm256_slli_epi16(xmm4, 1); + rho_rpi_5_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_5_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_5_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_5_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_5_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_5_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42); + xmm4 = _mm256_slli_epi16(xmm4, 2); + rho_rpi_7_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_7_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_7_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_7_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_7_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_7_5 = _mm256_subs_epi16(xmm4, xmm7); + + // Rearrange interfering MF output + /* + xmm0 = stream1_128i_in[i]; + xmm1 = stream1_128i_in[i+1]; + xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y1r = _mm256_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] + y1i = _mm256_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] + */ + + seperate_real_imag_parts(&y1r, &y1i, stream1_256i_in[i], stream1_256i_in[i+1]); + + // Psi_r calculation from rho_rpi or rho_rmi + xmm0 = _mm256_broadcastw_epi16(_mm_set1_epi16(0));// ZERO for abs_pi16 + xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1r); + psi_r_p7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1r); + psi_r_p7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1r); + psi_r_p7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1r); + psi_r_p7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1r); + psi_r_p7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1r); + psi_r_p7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1r); + psi_r_p7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1r); + psi_r_p7_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1r); + psi_r_p5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1r); + psi_r_p5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1r); + psi_r_p5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1r); + psi_r_p5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1r); + psi_r_p5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1r); + psi_r_p5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1r); + psi_r_p5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1r); + psi_r_p5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1r); + psi_r_p3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1r); + psi_r_p3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1r); + psi_r_p3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1r); + psi_r_p3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1r); + psi_r_p3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1r); + psi_r_p3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1r); + psi_r_p3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1r); + psi_r_p3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1r); + psi_r_p1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1r); + psi_r_p1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1r); + psi_r_p1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1r); + psi_r_p1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1r); + psi_r_p1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1r); + psi_r_p1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1r); + psi_r_p1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1r); + psi_r_p1_m7 = _mm256_abs_epi16(xmm2); + + xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1r); + psi_r_m1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1r); + psi_r_m1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1r); + psi_r_m1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1r); + psi_r_m1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1r); + psi_r_m1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1r); + psi_r_m1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1r); + psi_r_m1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1r); + psi_r_m1_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1r); + psi_r_m3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1r); + psi_r_m3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1r); + psi_r_m3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1r); + psi_r_m3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1r); + psi_r_m3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1r); + psi_r_m3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1r); + psi_r_m3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1r); + psi_r_m3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1r); + psi_r_m5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1r); + psi_r_m5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1r); + psi_r_m5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1r); + psi_r_m5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1r); + psi_r_m5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1r); + psi_r_m5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1r); + psi_r_m5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1r); + psi_r_m5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1r); + psi_r_m7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1r); + psi_r_m7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1r); + psi_r_m7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1r); + psi_r_m7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1r); + psi_r_m7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1r); + psi_r_m7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1r); + psi_r_m7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1r); + psi_r_m7_m7 = _mm256_abs_epi16(xmm2); + + // Psi_i calculation from rho_rpi or rho_rmi + xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1i); + psi_i_p7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1i); + psi_i_p7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1i); + psi_i_p7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1i); + psi_i_p7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1i); + psi_i_p7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1i); + psi_i_p7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1i); + psi_i_p7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1i); + psi_i_p7_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1i); + psi_i_p5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1i); + psi_i_p5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1i); + psi_i_p5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1i); + psi_i_p5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1i); + psi_i_p5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1i); + psi_i_p5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1i); + psi_i_p5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1i); + psi_i_p5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1i); + psi_i_p3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1i); + psi_i_p3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1i); + psi_i_p3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1i); + psi_i_p3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1i); + psi_i_p3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1i); + psi_i_p3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1i); + psi_i_p3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1i); + psi_i_p3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1i); + psi_i_p1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1i); + psi_i_p1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1i); + psi_i_p1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1i); + psi_i_p1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1i); + psi_i_p1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1i); + psi_i_p1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1i); + psi_i_p1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1i); + psi_i_p1_m7 = _mm256_abs_epi16(xmm2); + + xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1i); + psi_i_m1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1i); + psi_i_m1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1i); + psi_i_m1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1i); + psi_i_m1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1i); + psi_i_m1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1i); + psi_i_m1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1i); + psi_i_m1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1i); + psi_i_m1_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1i); + psi_i_m3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1i); + psi_i_m3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1i); + psi_i_m3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1i); + psi_i_m3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1i); + psi_i_m3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1i); + psi_i_m3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1i); + psi_i_m3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1i); + psi_i_m3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1i); + psi_i_m5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1i); + psi_i_m5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1i); + psi_i_m5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1i); + psi_i_m5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1i); + psi_i_m5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1i); + psi_i_m5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1i); + psi_i_m5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1i); + psi_i_m5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1i); + psi_i_m7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1i); + psi_i_m7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1i); + psi_i_m7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1i); + psi_i_m7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1i); + psi_i_m7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1i); + psi_i_m7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1i); + psi_i_m7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1i); + psi_i_m7_m7 = _mm256_abs_epi16(xmm2); + +/* + // Rearrange desired MF output + xmm0 = stream0_128i_in[i]; + xmm1 = stream0_128i_in[i+1]; + xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y0r = _mm256_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + y0i = _mm256_unpackhi_epi64(xmm0,xmm1); +*/ + seperate_real_imag_parts(&y0r, &y0i, stream0_256i_in[i], stream0_256i_in[i+1]); + + /* + // Rearrange desired channel magnitudes + xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10)) + xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10)) + xmm2 = _mm256_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm256_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm256_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_des = _mm256_unpacklo_epi64(xmm2,xmm3); + */ + + seperate_real_imag_parts(&ch_mag_des, &xmm2, ch_mag_256i[i], ch_mag_256i[i+1]); + + // Rearrange interfering channel magnitudes + /* + xmm2 = ch_mag_128i_i[i]; + xmm3 = ch_mag_128i_i[i+1]; + xmm2 = _mm256_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm256_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm256_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_int = _mm256_unpacklo_epi64(xmm2,xmm3); + */ + + seperate_real_imag_parts(&ch_mag_int, &xmm2, ch_mag_256i_i[i], ch_mag_256i_i[i+1]); + + y0r_one_over_sqrt_21 = _mm256_mulhi_epi16(y0r, ONE_OVER_SQRT_42); + y0r_three_over_sqrt_21 = _mm256_mulhi_epi16(y0r, THREE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm256_mulhi_epi16(y0r, FIVE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm256_slli_epi16(y0r_five_over_sqrt_21, 1); + y0r_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42); + y0r_seven_over_sqrt_21 = _mm256_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14 + + y0i_one_over_sqrt_21 = _mm256_mulhi_epi16(y0i, ONE_OVER_SQRT_42); + y0i_three_over_sqrt_21 = _mm256_mulhi_epi16(y0i, THREE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm256_mulhi_epi16(y0i, FIVE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm256_slli_epi16(y0i_five_over_sqrt_21, 1); + y0i_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42); + y0i_seven_over_sqrt_21 = _mm256_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14 + + y0_p_7_1 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_7_3 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_7_5 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_7_7 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_5_1 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_5_3 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_5_5 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_5_7 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_3_1 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_3_3 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_3_5 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_3_7 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_1_1 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_1_3 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_1_5 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_1_7 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + + y0_m_1_1 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_1_3 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_1_5 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_1_7 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_3_1 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_3_3 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_3_5 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_3_7 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_5_1 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_5_3 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_5_5 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_5_7 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_7_1 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_7_3 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_7_5 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_7_7 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + + interference_abs_epi16(psi_r_p7_p7, ch_mag_int, a_r_p7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_p5, ch_mag_int, a_r_p7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_p3, ch_mag_int, a_r_p7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_p1, ch_mag_int, a_r_p7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m1, ch_mag_int, a_r_p7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m3, ch_mag_int, a_r_p7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m5, ch_mag_int, a_r_p7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m7, ch_mag_int, a_r_p7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p7, ch_mag_int, a_r_p5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p5, ch_mag_int, a_r_p5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p3, ch_mag_int, a_r_p5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p1, ch_mag_int, a_r_p5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m1, ch_mag_int, a_r_p5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m3, ch_mag_int, a_r_p5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m5, ch_mag_int, a_r_p5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m7, ch_mag_int, a_r_p5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p7, ch_mag_int, a_r_p3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p5, ch_mag_int, a_r_p3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p3, ch_mag_int, a_r_p3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p1, ch_mag_int, a_r_p3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m1, ch_mag_int, a_r_p3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m3, ch_mag_int, a_r_p3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m5, ch_mag_int, a_r_p3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m7, ch_mag_int, a_r_p3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p7, ch_mag_int, a_r_p1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p5, ch_mag_int, a_r_p1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p3, ch_mag_int, a_r_p1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p1, ch_mag_int, a_r_p1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m1, ch_mag_int, a_r_p1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m3, ch_mag_int, a_r_p1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m5, ch_mag_int, a_r_p1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m7, ch_mag_int, a_r_p1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p7, ch_mag_int, a_r_m1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p5, ch_mag_int, a_r_m1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p3, ch_mag_int, a_r_m1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p1, ch_mag_int, a_r_m1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m1, ch_mag_int, a_r_m1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m3, ch_mag_int, a_r_m1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m5, ch_mag_int, a_r_m1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m7, ch_mag_int, a_r_m1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p7, ch_mag_int, a_r_m3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p5, ch_mag_int, a_r_m3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p3, ch_mag_int, a_r_m3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p1, ch_mag_int, a_r_m3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m1, ch_mag_int, a_r_m3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m3, ch_mag_int, a_r_m3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m5, ch_mag_int, a_r_m3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m7, ch_mag_int, a_r_m3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p7, ch_mag_int, a_r_m5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p5, ch_mag_int, a_r_m5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p3, ch_mag_int, a_r_m5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p1, ch_mag_int, a_r_m5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m1, ch_mag_int, a_r_m5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m3, ch_mag_int, a_r_m5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m5, ch_mag_int, a_r_m5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m7, ch_mag_int, a_r_m5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p7, ch_mag_int, a_r_m7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p5, ch_mag_int, a_r_m7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p3, ch_mag_int, a_r_m7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p1, ch_mag_int, a_r_m7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m1, ch_mag_int, a_r_m7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m3, ch_mag_int, a_r_m7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m5, ch_mag_int, a_r_m7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m7, ch_mag_int, a_r_m7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + + interference_abs_epi16(psi_i_p7_p7, ch_mag_int, a_i_p7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_p5, ch_mag_int, a_i_p7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_p3, ch_mag_int, a_i_p7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_p1, ch_mag_int, a_i_p7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m1, ch_mag_int, a_i_p7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m3, ch_mag_int, a_i_p7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m5, ch_mag_int, a_i_p7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m7, ch_mag_int, a_i_p7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p7, ch_mag_int, a_i_p5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p5, ch_mag_int, a_i_p5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p3, ch_mag_int, a_i_p5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p1, ch_mag_int, a_i_p5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m1, ch_mag_int, a_i_p5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m3, ch_mag_int, a_i_p5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m5, ch_mag_int, a_i_p5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m7, ch_mag_int, a_i_p5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p7, ch_mag_int, a_i_p3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p5, ch_mag_int, a_i_p3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p3, ch_mag_int, a_i_p3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p1, ch_mag_int, a_i_p3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m1, ch_mag_int, a_i_p3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m3, ch_mag_int, a_i_p3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m5, ch_mag_int, a_i_p3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m7, ch_mag_int, a_i_p3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p7, ch_mag_int, a_i_p1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p5, ch_mag_int, a_i_p1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p3, ch_mag_int, a_i_p1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p1, ch_mag_int, a_i_p1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m1, ch_mag_int, a_i_p1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m3, ch_mag_int, a_i_p1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m5, ch_mag_int, a_i_p1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m7, ch_mag_int, a_i_p1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p7, ch_mag_int, a_i_m1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p5, ch_mag_int, a_i_m1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p3, ch_mag_int, a_i_m1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p1, ch_mag_int, a_i_m1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m1, ch_mag_int, a_i_m1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m3, ch_mag_int, a_i_m1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m5, ch_mag_int, a_i_m1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m7, ch_mag_int, a_i_m1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p7, ch_mag_int, a_i_m3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p5, ch_mag_int, a_i_m3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p3, ch_mag_int, a_i_m3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p1, ch_mag_int, a_i_m3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m1, ch_mag_int, a_i_m3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m3, ch_mag_int, a_i_m3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m5, ch_mag_int, a_i_m3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m7, ch_mag_int, a_i_m3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p7, ch_mag_int, a_i_m5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p5, ch_mag_int, a_i_m5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p3, ch_mag_int, a_i_m5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p1, ch_mag_int, a_i_m5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m1, ch_mag_int, a_i_m5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m3, ch_mag_int, a_i_m5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m5, ch_mag_int, a_i_m5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m7, ch_mag_int, a_i_m5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p7, ch_mag_int, a_i_m7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p5, ch_mag_int, a_i_m7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p3, ch_mag_int, a_i_m7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p1, ch_mag_int, a_i_m7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m1, ch_mag_int, a_i_m7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m3, ch_mag_int, a_i_m7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m5, ch_mag_int, a_i_m7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m7, ch_mag_int, a_i_m7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + + // Calculation of a group of two terms in the bit metric involving product of psi and interference + prodsum_psi_a_epi16(psi_r_p7_p7, a_r_p7_p7, psi_i_p7_p7, a_i_p7_p7, psi_a_p7_p7); + prodsum_psi_a_epi16(psi_r_p7_p5, a_r_p7_p5, psi_i_p7_p5, a_i_p7_p5, psi_a_p7_p5); + prodsum_psi_a_epi16(psi_r_p7_p3, a_r_p7_p3, psi_i_p7_p3, a_i_p7_p3, psi_a_p7_p3); + prodsum_psi_a_epi16(psi_r_p7_p1, a_r_p7_p1, psi_i_p7_p1, a_i_p7_p1, psi_a_p7_p1); + prodsum_psi_a_epi16(psi_r_p7_m1, a_r_p7_m1, psi_i_p7_m1, a_i_p7_m1, psi_a_p7_m1); + prodsum_psi_a_epi16(psi_r_p7_m3, a_r_p7_m3, psi_i_p7_m3, a_i_p7_m3, psi_a_p7_m3); + prodsum_psi_a_epi16(psi_r_p7_m5, a_r_p7_m5, psi_i_p7_m5, a_i_p7_m5, psi_a_p7_m5); + prodsum_psi_a_epi16(psi_r_p7_m7, a_r_p7_m7, psi_i_p7_m7, a_i_p7_m7, psi_a_p7_m7); + prodsum_psi_a_epi16(psi_r_p5_p7, a_r_p5_p7, psi_i_p5_p7, a_i_p5_p7, psi_a_p5_p7); + prodsum_psi_a_epi16(psi_r_p5_p5, a_r_p5_p5, psi_i_p5_p5, a_i_p5_p5, psi_a_p5_p5); + prodsum_psi_a_epi16(psi_r_p5_p3, a_r_p5_p3, psi_i_p5_p3, a_i_p5_p3, psi_a_p5_p3); + prodsum_psi_a_epi16(psi_r_p5_p1, a_r_p5_p1, psi_i_p5_p1, a_i_p5_p1, psi_a_p5_p1); + prodsum_psi_a_epi16(psi_r_p5_m1, a_r_p5_m1, psi_i_p5_m1, a_i_p5_m1, psi_a_p5_m1); + prodsum_psi_a_epi16(psi_r_p5_m3, a_r_p5_m3, psi_i_p5_m3, a_i_p5_m3, psi_a_p5_m3); + prodsum_psi_a_epi16(psi_r_p5_m5, a_r_p5_m5, psi_i_p5_m5, a_i_p5_m5, psi_a_p5_m5); + prodsum_psi_a_epi16(psi_r_p5_m7, a_r_p5_m7, psi_i_p5_m7, a_i_p5_m7, psi_a_p5_m7); + prodsum_psi_a_epi16(psi_r_p3_p7, a_r_p3_p7, psi_i_p3_p7, a_i_p3_p7, psi_a_p3_p7); + prodsum_psi_a_epi16(psi_r_p3_p5, a_r_p3_p5, psi_i_p3_p5, a_i_p3_p5, psi_a_p3_p5); + prodsum_psi_a_epi16(psi_r_p3_p3, a_r_p3_p3, psi_i_p3_p3, a_i_p3_p3, psi_a_p3_p3); + prodsum_psi_a_epi16(psi_r_p3_p1, a_r_p3_p1, psi_i_p3_p1, a_i_p3_p1, psi_a_p3_p1); + prodsum_psi_a_epi16(psi_r_p3_m1, a_r_p3_m1, psi_i_p3_m1, a_i_p3_m1, psi_a_p3_m1); + prodsum_psi_a_epi16(psi_r_p3_m3, a_r_p3_m3, psi_i_p3_m3, a_i_p3_m3, psi_a_p3_m3); + prodsum_psi_a_epi16(psi_r_p3_m5, a_r_p3_m5, psi_i_p3_m5, a_i_p3_m5, psi_a_p3_m5); + prodsum_psi_a_epi16(psi_r_p3_m7, a_r_p3_m7, psi_i_p3_m7, a_i_p3_m7, psi_a_p3_m7); + prodsum_psi_a_epi16(psi_r_p1_p7, a_r_p1_p7, psi_i_p1_p7, a_i_p1_p7, psi_a_p1_p7); + prodsum_psi_a_epi16(psi_r_p1_p5, a_r_p1_p5, psi_i_p1_p5, a_i_p1_p5, psi_a_p1_p5); + prodsum_psi_a_epi16(psi_r_p1_p3, a_r_p1_p3, psi_i_p1_p3, a_i_p1_p3, psi_a_p1_p3); + prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1); + prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1); + prodsum_psi_a_epi16(psi_r_p1_m3, a_r_p1_m3, psi_i_p1_m3, a_i_p1_m3, psi_a_p1_m3); + prodsum_psi_a_epi16(psi_r_p1_m5, a_r_p1_m5, psi_i_p1_m5, a_i_p1_m5, psi_a_p1_m5); + prodsum_psi_a_epi16(psi_r_p1_m7, a_r_p1_m7, psi_i_p1_m7, a_i_p1_m7, psi_a_p1_m7); + prodsum_psi_a_epi16(psi_r_m1_p7, a_r_m1_p7, psi_i_m1_p7, a_i_m1_p7, psi_a_m1_p7); + prodsum_psi_a_epi16(psi_r_m1_p5, a_r_m1_p5, psi_i_m1_p5, a_i_m1_p5, psi_a_m1_p5); + prodsum_psi_a_epi16(psi_r_m1_p3, a_r_m1_p3, psi_i_m1_p3, a_i_m1_p3, psi_a_m1_p3); + prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1); + prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1); + prodsum_psi_a_epi16(psi_r_m1_m3, a_r_m1_m3, psi_i_m1_m3, a_i_m1_m3, psi_a_m1_m3); + prodsum_psi_a_epi16(psi_r_m1_m5, a_r_m1_m5, psi_i_m1_m5, a_i_m1_m5, psi_a_m1_m5); + prodsum_psi_a_epi16(psi_r_m1_m7, a_r_m1_m7, psi_i_m1_m7, a_i_m1_m7, psi_a_m1_m7); + prodsum_psi_a_epi16(psi_r_m3_p7, a_r_m3_p7, psi_i_m3_p7, a_i_m3_p7, psi_a_m3_p7); + prodsum_psi_a_epi16(psi_r_m3_p5, a_r_m3_p5, psi_i_m3_p5, a_i_m3_p5, psi_a_m3_p5); + prodsum_psi_a_epi16(psi_r_m3_p3, a_r_m3_p3, psi_i_m3_p3, a_i_m3_p3, psi_a_m3_p3); + prodsum_psi_a_epi16(psi_r_m3_p1, a_r_m3_p1, psi_i_m3_p1, a_i_m3_p1, psi_a_m3_p1); + prodsum_psi_a_epi16(psi_r_m3_m1, a_r_m3_m1, psi_i_m3_m1, a_i_m3_m1, psi_a_m3_m1); + prodsum_psi_a_epi16(psi_r_m3_m3, a_r_m3_m3, psi_i_m3_m3, a_i_m3_m3, psi_a_m3_m3); + prodsum_psi_a_epi16(psi_r_m3_m5, a_r_m3_m5, psi_i_m3_m5, a_i_m3_m5, psi_a_m3_m5); + prodsum_psi_a_epi16(psi_r_m3_m7, a_r_m3_m7, psi_i_m3_m7, a_i_m3_m7, psi_a_m3_m7); + prodsum_psi_a_epi16(psi_r_m5_p7, a_r_m5_p7, psi_i_m5_p7, a_i_m5_p7, psi_a_m5_p7); + prodsum_psi_a_epi16(psi_r_m5_p5, a_r_m5_p5, psi_i_m5_p5, a_i_m5_p5, psi_a_m5_p5); + prodsum_psi_a_epi16(psi_r_m5_p3, a_r_m5_p3, psi_i_m5_p3, a_i_m5_p3, psi_a_m5_p3); + prodsum_psi_a_epi16(psi_r_m5_p1, a_r_m5_p1, psi_i_m5_p1, a_i_m5_p1, psi_a_m5_p1); + prodsum_psi_a_epi16(psi_r_m5_m1, a_r_m5_m1, psi_i_m5_m1, a_i_m5_m1, psi_a_m5_m1); + prodsum_psi_a_epi16(psi_r_m5_m3, a_r_m5_m3, psi_i_m5_m3, a_i_m5_m3, psi_a_m5_m3); + prodsum_psi_a_epi16(psi_r_m5_m5, a_r_m5_m5, psi_i_m5_m5, a_i_m5_m5, psi_a_m5_m5); + prodsum_psi_a_epi16(psi_r_m5_m7, a_r_m5_m7, psi_i_m5_m7, a_i_m5_m7, psi_a_m5_m7); + prodsum_psi_a_epi16(psi_r_m7_p7, a_r_m7_p7, psi_i_m7_p7, a_i_m7_p7, psi_a_m7_p7); + prodsum_psi_a_epi16(psi_r_m7_p5, a_r_m7_p5, psi_i_m7_p5, a_i_m7_p5, psi_a_m7_p5); + prodsum_psi_a_epi16(psi_r_m7_p3, a_r_m7_p3, psi_i_m7_p3, a_i_m7_p3, psi_a_m7_p3); + prodsum_psi_a_epi16(psi_r_m7_p1, a_r_m7_p1, psi_i_m7_p1, a_i_m7_p1, psi_a_m7_p1); + prodsum_psi_a_epi16(psi_r_m7_m1, a_r_m7_m1, psi_i_m7_m1, a_i_m7_m1, psi_a_m7_m1); + prodsum_psi_a_epi16(psi_r_m7_m3, a_r_m7_m3, psi_i_m7_m3, a_i_m7_m3, psi_a_m7_m3); + prodsum_psi_a_epi16(psi_r_m7_m5, a_r_m7_m5, psi_i_m7_m5, a_i_m7_m5, psi_a_m7_m5); + prodsum_psi_a_epi16(psi_r_m7_m7, a_r_m7_m7, psi_i_m7_m7, a_i_m7_m7, psi_a_m7_m7); + + // Calculation of a group of two terms in the bit metric involving squares of interference + square_a_epi16(a_r_p7_p7, a_i_p7_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p7); + square_a_epi16(a_r_p7_p5, a_i_p7_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p5); + square_a_epi16(a_r_p7_p3, a_i_p7_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p3); + square_a_epi16(a_r_p7_p1, a_i_p7_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p1); + square_a_epi16(a_r_p7_m1, a_i_p7_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m1); + square_a_epi16(a_r_p7_m3, a_i_p7_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m3); + square_a_epi16(a_r_p7_m5, a_i_p7_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m5); + square_a_epi16(a_r_p7_m7, a_i_p7_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m7); + square_a_epi16(a_r_p5_p7, a_i_p5_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p7); + square_a_epi16(a_r_p5_p5, a_i_p5_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p5); + square_a_epi16(a_r_p5_p3, a_i_p5_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p3); + square_a_epi16(a_r_p5_p1, a_i_p5_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p1); + square_a_epi16(a_r_p5_m1, a_i_p5_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m1); + square_a_epi16(a_r_p5_m3, a_i_p5_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m3); + square_a_epi16(a_r_p5_m5, a_i_p5_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m5); + square_a_epi16(a_r_p5_m7, a_i_p5_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m7); + square_a_epi16(a_r_p3_p7, a_i_p3_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p7); + square_a_epi16(a_r_p3_p5, a_i_p3_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p5); + square_a_epi16(a_r_p3_p3, a_i_p3_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p3); + square_a_epi16(a_r_p3_p1, a_i_p3_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p1); + square_a_epi16(a_r_p3_m1, a_i_p3_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m1); + square_a_epi16(a_r_p3_m3, a_i_p3_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m3); + square_a_epi16(a_r_p3_m5, a_i_p3_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m5); + square_a_epi16(a_r_p3_m7, a_i_p3_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m7); + square_a_epi16(a_r_p1_p7, a_i_p1_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p7); + square_a_epi16(a_r_p1_p5, a_i_p1_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p5); + square_a_epi16(a_r_p1_p3, a_i_p1_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p3); + square_a_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p1); + square_a_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m1); + square_a_epi16(a_r_p1_m3, a_i_p1_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m3); + square_a_epi16(a_r_p1_m5, a_i_p1_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m5); + square_a_epi16(a_r_p1_m7, a_i_p1_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m7); + square_a_epi16(a_r_m1_p7, a_i_m1_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p7); + square_a_epi16(a_r_m1_p5, a_i_m1_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p5); + square_a_epi16(a_r_m1_p3, a_i_m1_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p3); + square_a_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p1); + square_a_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m1); + square_a_epi16(a_r_m1_m3, a_i_m1_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m3); + square_a_epi16(a_r_m1_m5, a_i_m1_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m5); + square_a_epi16(a_r_m1_m7, a_i_m1_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m7); + square_a_epi16(a_r_m3_p7, a_i_m3_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p7); + square_a_epi16(a_r_m3_p5, a_i_m3_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p5); + square_a_epi16(a_r_m3_p3, a_i_m3_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p3); + square_a_epi16(a_r_m3_p1, a_i_m3_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p1); + square_a_epi16(a_r_m3_m1, a_i_m3_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m1); + square_a_epi16(a_r_m3_m3, a_i_m3_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m3); + square_a_epi16(a_r_m3_m5, a_i_m3_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m5); + square_a_epi16(a_r_m3_m7, a_i_m3_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m7); + square_a_epi16(a_r_m5_p7, a_i_m5_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p7); + square_a_epi16(a_r_m5_p5, a_i_m5_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p5); + square_a_epi16(a_r_m5_p3, a_i_m5_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p3); + square_a_epi16(a_r_m5_p1, a_i_m5_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p1); + square_a_epi16(a_r_m5_m1, a_i_m5_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m1); + square_a_epi16(a_r_m5_m3, a_i_m5_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m3); + square_a_epi16(a_r_m5_m5, a_i_m5_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m5); + square_a_epi16(a_r_m5_m7, a_i_m5_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m7); + square_a_epi16(a_r_m7_p7, a_i_m7_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p7); + square_a_epi16(a_r_m7_p5, a_i_m7_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p5); + square_a_epi16(a_r_m7_p3, a_i_m7_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p3); + square_a_epi16(a_r_m7_p1, a_i_m7_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p1); + square_a_epi16(a_r_m7_m1, a_i_m7_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m1); + square_a_epi16(a_r_m7_m3, a_i_m7_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m3); + square_a_epi16(a_r_m7_m5, a_i_m7_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m5); + square_a_epi16(a_r_m7_m7, a_i_m7_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m7); + + // Computing different multiples of ||h0||^2 + // x=1, y=1 + ch_mag_2_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42); + ch_mag_2_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_2_over_42_with_sigma2,1); + // x=1, y=3 + ch_mag_10_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42); + ch_mag_10_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_10_over_42_with_sigma2,1); + // x=1, x=5 + ch_mag_26_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42); + ch_mag_26_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_26_over_42_with_sigma2,1); + // x=1, y=7 + ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=3, y=3 + ch_mag_18_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42); + ch_mag_18_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_18_over_42_with_sigma2,1); + // x=3, y=5 + ch_mag_34_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42); + ch_mag_34_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_34_over_42_with_sigma2,1); + // x=3, y=7 + ch_mag_58_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42); + ch_mag_58_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_58_over_42_with_sigma2,2); + // x=5, y=5 + ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=5, y=7 + ch_mag_74_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42); + ch_mag_74_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_74_over_42_with_sigma2,2); + // x=7, y=7 + ch_mag_98_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42); + ch_mag_98_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_98_over_42_with_sigma2,2); + + // Computing Metrics + xmm0 = _mm256_subs_epi16(psi_a_p7_p7, a_sq_p7_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_7); + bit_met_p7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p5, a_sq_p7_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_5); + bit_met_p7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p3, a_sq_p7_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_3); + bit_met_p7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p1, a_sq_p7_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_1); + bit_met_p7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m1, a_sq_p7_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_1); + bit_met_p7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m3, a_sq_p7_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_3); + bit_met_p7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m5, a_sq_p7_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_5); + bit_met_p7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m7, a_sq_p7_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_7); + bit_met_p7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p7, a_sq_p5_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_7); + bit_met_p5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p5, a_sq_p5_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_5); + bit_met_p5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p3, a_sq_p5_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_3); + bit_met_p5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p1, a_sq_p5_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_1); + bit_met_p5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m1, a_sq_p5_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_1); + bit_met_p5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m3, a_sq_p5_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_3); + bit_met_p5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m5, a_sq_p5_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_5); + bit_met_p5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m7, a_sq_p5_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_7); + bit_met_p5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p7, a_sq_p3_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_7); + bit_met_p3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p5, a_sq_p3_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_5); + bit_met_p3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p3, a_sq_p3_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_3); + bit_met_p3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p1, a_sq_p3_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_1); + bit_met_p3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m1, a_sq_p3_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_1); + bit_met_p3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m3, a_sq_p3_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_3); + bit_met_p3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m5, a_sq_p3_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_5); + bit_met_p3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m7, a_sq_p3_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_7); + bit_met_p3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p7, a_sq_p1_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_7); + bit_met_p1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p5, a_sq_p1_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_5); + bit_met_p1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p3, a_sq_p1_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_3); + bit_met_p1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p1, a_sq_p1_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_1); + bit_met_p1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m1, a_sq_p1_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_1); + bit_met_p1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m3, a_sq_p1_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_3); + bit_met_p1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m5, a_sq_p1_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_5); + bit_met_p1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m7, a_sq_p1_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_7); + bit_met_p1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + + xmm0 = _mm256_subs_epi16(psi_a_m1_p7, a_sq_m1_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_7); + bit_met_m1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p5, a_sq_m1_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_5); + bit_met_m1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p3, a_sq_m1_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_3); + bit_met_m1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p1, a_sq_m1_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_1); + bit_met_m1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m1, a_sq_m1_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_1); + bit_met_m1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m3, a_sq_m1_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_3); + bit_met_m1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m5, a_sq_m1_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_5); + bit_met_m1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m7, a_sq_m1_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_7); + bit_met_m1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p7, a_sq_m3_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_7); + bit_met_m3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p5, a_sq_m3_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_5); + bit_met_m3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p3, a_sq_m3_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_3); + bit_met_m3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p1, a_sq_m3_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_1); + bit_met_m3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m1, a_sq_m3_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_1); + bit_met_m3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m3, a_sq_m3_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_3); + bit_met_m3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m5, a_sq_m3_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_5); + bit_met_m3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m7, a_sq_m3_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_7); + bit_met_m3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p7, a_sq_m5_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_7); + bit_met_m5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p5, a_sq_m5_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_5); + bit_met_m5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p3, a_sq_m5_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_3); + bit_met_m5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p1, a_sq_m5_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_1); + bit_met_m5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m1, a_sq_m5_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_1); + bit_met_m5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m3, a_sq_m5_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_3); + bit_met_m5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m5, a_sq_m5_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_5); + bit_met_m5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m7, a_sq_m5_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_7); + bit_met_m5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p7, a_sq_m7_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_7); + bit_met_m7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p5, a_sq_m7_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_5); + bit_met_m7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p3, a_sq_m7_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_3); + bit_met_m7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p1, a_sq_m7_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_1); + bit_met_m7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m1, a_sq_m7_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_1); + bit_met_m7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m3, a_sq_m7_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_3); + bit_met_m7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m5, a_sq_m7_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_5); + bit_met_m7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m7, a_sq_m7_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_7); + bit_met_m7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + + // Detection for 1st bit (LTE mapping) + // bit = 1 + xmm0 = _mm256_max_epi16(bit_met_m7_p7, bit_met_m7_p5); + xmm1 = _mm256_max_epi16(bit_met_m7_p3, bit_met_m7_p1); + xmm2 = _mm256_max_epi16(bit_met_m7_m1, bit_met_m7_m3); + xmm3 = _mm256_max_epi16(bit_met_m7_m5, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m5_p5); + xmm1 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m5_p1); + xmm2 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m5_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m5_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m3_p7, bit_met_m3_p5); + xmm1 = _mm256_max_epi16(bit_met_m3_p3, bit_met_m3_p1); + xmm2 = _mm256_max_epi16(bit_met_m3_m1, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m3_m5, bit_met_m3_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m1_p5); + xmm1 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m1_m3); + xmm3 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m1_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p7_p5); + xmm1 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p7_p1); + xmm2 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p7_m3); + xmm3 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_p7, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p5_p3, bit_met_p5_p1); + xmm2 = _mm256_max_epi16(bit_met_p5_m1, bit_met_p5_m3); + xmm3 = _mm256_max_epi16(bit_met_p5_m5, bit_met_p5_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p3_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p3_p1); + xmm2 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p3_m3); + xmm3 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p3_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_p7, bit_met_p1_p5); + xmm1 = _mm256_max_epi16(bit_met_p1_p3, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_p1_m1, bit_met_p1_m3); + xmm3 = _mm256_max_epi16(bit_met_p1_m5, bit_met_p1_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y0r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 2nd bit (LTE mapping) + // bit = 1 + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y1r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 3rd bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y2r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 4th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y0i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + + // Detection for 5th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y1i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 6th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y2i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs + // RE 1 + j = 48*i; + stream0_out[j + 0] = ((short *)&y0r)[0]; + stream0_out[j + 1] = ((short *)&y1r)[0]; + stream0_out[j + 2] = ((short *)&y2r)[0]; + stream0_out[j + 3] = ((short *)&y0i)[0]; + stream0_out[j + 4] = ((short *)&y1i)[0]; + stream0_out[j + 5] = ((short *)&y2i)[0]; + // RE 2 + stream0_out[j + 6] = ((short *)&y0r)[1]; + stream0_out[j + 7] = ((short *)&y1r)[1]; + stream0_out[j + 8] = ((short *)&y2r)[1]; + stream0_out[j + 9] = ((short *)&y0i)[1]; + stream0_out[j + 10] = ((short *)&y1i)[1]; + stream0_out[j + 11] = ((short *)&y2i)[1]; + // RE 3 + stream0_out[j + 12] = ((short *)&y0r)[2]; + stream0_out[j + 13] = ((short *)&y1r)[2]; + stream0_out[j + 14] = ((short *)&y2r)[2]; + stream0_out[j + 15] = ((short *)&y0i)[2]; + stream0_out[j + 16] = ((short *)&y1i)[2]; + stream0_out[j + 17] = ((short *)&y2i)[2]; + // RE 4 + stream0_out[j + 18] = ((short *)&y0r)[3]; + stream0_out[j + 19] = ((short *)&y1r)[3]; + stream0_out[j + 20] = ((short *)&y2r)[3]; + stream0_out[j + 21] = ((short *)&y0i)[3]; + stream0_out[j + 22] = ((short *)&y1i)[3]; + stream0_out[j + 23] = ((short *)&y2i)[3]; + // RE 5 + stream0_out[j + 24] = ((short *)&y0r)[4]; + stream0_out[j + 25] = ((short *)&y1r)[4]; + stream0_out[j + 26] = ((short *)&y2r)[4]; + stream0_out[j + 27] = ((short *)&y0i)[4]; + stream0_out[j + 28] = ((short *)&y1i)[4]; + stream0_out[j + 29] = ((short *)&y2i)[4]; + // RE 6 + stream0_out[j + 30] = ((short *)&y0r)[5]; + stream0_out[j + 31] = ((short *)&y1r)[5]; + stream0_out[j + 32] = ((short *)&y2r)[5]; + stream0_out[j + 33] = ((short *)&y0i)[5]; + stream0_out[j + 34] = ((short *)&y1i)[5]; + stream0_out[j + 35] = ((short *)&y2i)[5]; + // RE 7 + stream0_out[j + 36] = ((short *)&y0r)[6]; + stream0_out[j + 37] = ((short *)&y1r)[6]; + stream0_out[j + 38] = ((short *)&y2r)[6]; + stream0_out[j + 39] = ((short *)&y0i)[6]; + stream0_out[j + 40] = ((short *)&y1i)[6]; + stream0_out[j + 41] = ((short *)&y2i)[6]; + // RE 8 + stream0_out[j + 42] = ((short *)&y0r)[7]; + stream0_out[j + 43] = ((short *)&y1r)[7]; + stream0_out[j + 44] = ((short *)&y2r)[7]; + stream0_out[j + 45] = ((short *)&y0i)[7]; + stream0_out[j + 46] = ((short *)&y1i)[7]; + stream0_out[j + 47] = ((short *)&y2i)[7]; + + // RE 9 + stream0_out[j + 48] = ((short *)&y0r)[8]; + stream0_out[j + 49] = ((short *)&y1r)[8]; + stream0_out[j + 50] = ((short *)&y2r)[8]; + stream0_out[j + 51] = ((short *)&y0i)[8]; + stream0_out[j + 52] = ((short *)&y1i)[8]; + stream0_out[j + 53] = ((short *)&y2i)[8]; + // RE 10 + stream0_out[j + 54] = ((short *)&y0r)[9]; + stream0_out[j + 55] = ((short *)&y1r)[9]; + stream0_out[j + 56] = ((short *)&y2r)[9]; + stream0_out[j + 57] = ((short *)&y0i)[9]; + stream0_out[j + 58] = ((short *)&y1i)[9]; + stream0_out[j + 59] = ((short *)&y2i)[9]; + // RE 11 + stream0_out[j + 60] = ((short *)&y0r)[10]; + stream0_out[j + 61] = ((short *)&y1r)[10]; + stream0_out[j + 62] = ((short *)&y2r)[10]; + stream0_out[j + 63] = ((short *)&y0i)[10]; + stream0_out[j + 64] = ((short *)&y1i)[10]; + stream0_out[j + 65] = ((short *)&y2i)[10]; + // RE 12 + stream0_out[j + 66] = ((short *)&y0r)[11]; + stream0_out[j + 67] = ((short *)&y1r)[11]; + stream0_out[j + 68] = ((short *)&y2r)[11]; + stream0_out[j + 69] = ((short *)&y0i)[11]; + stream0_out[j + 70] = ((short *)&y1i)[11]; + stream0_out[j + 71] = ((short *)&y2i)[11]; + // RE 13 + stream0_out[j + 72] = ((short *)&y0r)[12]; + stream0_out[j + 73] = ((short *)&y1r)[12]; + stream0_out[j + 74] = ((short *)&y2r)[12]; + stream0_out[j + 75] = ((short *)&y0i)[12]; + stream0_out[j + 76] = ((short *)&y1i)[12]; + stream0_out[j + 77] = ((short *)&y2i)[12]; + // RE 14 + stream0_out[j + 78] = ((short *)&y0r)[13]; + stream0_out[j + 79] = ((short *)&y1r)[13]; + stream0_out[j + 80] = ((short *)&y2r)[13]; + stream0_out[j + 81] = ((short *)&y0i)[13]; + stream0_out[j + 82] = ((short *)&y1i)[13]; + stream0_out[j + 83] = ((short *)&y2i)[13]; + // RE 15 + stream0_out[j + 84] = ((short *)&y0r)[14]; + stream0_out[j + 85] = ((short *)&y1r)[14]; + stream0_out[j + 86] = ((short *)&y2r)[14]; + stream0_out[j + 87] = ((short *)&y0i)[14]; + stream0_out[j + 88] = ((short *)&y1i)[14]; + stream0_out[j + 89] = ((short *)&y2i)[14]; + // RE 16 + stream0_out[j + 90] = ((short *)&y0r)[15]; + stream0_out[j + 91] = ((short *)&y1r)[15]; + stream0_out[j + 92] = ((short *)&y2r)[15]; + stream0_out[j + 93] = ((short *)&y0i)[15]; + stream0_out[j + 94] = ((short *)&y1i)[15]; + stream0_out[j + 95] = ((short *)&y2i)[15]; + +#elif defined(__arm__) + +#endif + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif + +} + +void qam64_qam64_avx2(int32_t *stream0_in, + int32_t *stream1_in, + int32_t *ch_mag, + int32_t *ch_mag_i, + int16_t *stream0_out, + int32_t *rho01, + int length + ) +{ + + /* + Author: S. Wagner + Date: 28-02-17 + + Input: + stream0_in: MF filter for 1st stream, i.e., y0=h0'*y + stream1_in: MF filter for 2nd stream, i.e., y1=h1'*y + ch_mag: 4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + ch_mag_i: 4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + rho01: Channel cross correlation, i.e., h1'*h0 + + Output: + stream0_out: output LLRs for 1st stream + */ + +#if defined(__x86_64__) || defined(__i386__) + + __m256i *rho01_256i = (__m256i *)rho01; + __m256i *stream0_256i_in = (__m256i *)stream0_in; + __m256i *stream1_256i_in = (__m256i *)stream1_in; + __m256i *ch_mag_256i = (__m256i *)ch_mag; + __m256i *ch_mag_256i_i = (__m256i *)ch_mag_i; + + __m256i ONE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(10112)); // round(1/sqrt(42)*2^16) + __m256i THREE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30337)); // round(3/sqrt(42)*2^16) + __m256i FIVE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(25281)); // round(5/sqrt(42)*2^15) + __m256i SEVEN_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(17697)); // round(7/sqrt(42)*2^14) Q2.14 + __m256i ONE_OVER_SQRT_2 = _mm256_broadcastw_epi16(_mm_set1_epi16(23170)); // round(1/sqrt(2)*2^15) + __m256i ONE_OVER_SQRT_2_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(3575)); // round(1/sqrt(2*42)*2^15) + __m256i THREE_OVER_SQRT_2_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(10726)); // round(3/sqrt(2*42)*2^15) + __m256i FIVE_OVER_SQRT_2_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(17876)); // round(5/sqrt(2*42)*2^15) + __m256i SEVEN_OVER_SQRT_2_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(25027)); // round(7/sqrt(2*42)*2^15) + __m256i FORTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30969)); // round(49/(4*sqrt(42))*2^14), Q2.14 + __m256i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(23385)); // round(37/(4*sqrt(42))*2^14), Q2.14 + __m256i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(31601)); // round(25/(4*sqrt(42))*2^15) + __m256i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(18329)); // round(29/(4*sqrt(42))*2^15), Q2.14 + __m256i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(21489)); // round(17/(4*sqrt(42))*2^15) + __m256i NINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(11376)); // round(9/(4*sqrt(42))*2^15) + __m256i THIRTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(16433)); // round(13/(4*sqrt(42))*2^15) + __m256i FIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(6320)); // round(5/(4*sqrt(42))*2^15) + __m256i ONE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(1264)); // round(1/(4*sqrt(42))*2^15) + __m256i SQRT_42_OVER_FOUR = _mm256_broadcastw_epi16(_mm_set1_epi16(13272)); // round(sqrt(42)/4*2^13), Q3.12 + + __m256i ch_mag_des; + __m256i ch_mag_int; + __m256i ch_mag_98_over_42_with_sigma2; + __m256i ch_mag_74_over_42_with_sigma2; + __m256i ch_mag_58_over_42_with_sigma2; + __m256i ch_mag_50_over_42_with_sigma2; + __m256i ch_mag_34_over_42_with_sigma2; + __m256i ch_mag_18_over_42_with_sigma2; + __m256i ch_mag_26_over_42_with_sigma2; + __m256i ch_mag_10_over_42_with_sigma2; + __m256i ch_mag_2_over_42_with_sigma2; + __m256i y0r_one_over_sqrt_21; + __m256i y0r_three_over_sqrt_21; + __m256i y0r_five_over_sqrt_21; + __m256i y0r_seven_over_sqrt_21; + __m256i y0i_one_over_sqrt_21; + __m256i y0i_three_over_sqrt_21; + __m256i y0i_five_over_sqrt_21; + __m256i y0i_seven_over_sqrt_21; + __m256i ch_mag_int_with_sigma2; + __m256i two_ch_mag_int_with_sigma2; + __m256i three_ch_mag_int_with_sigma2; +#elif defined(__arm__) + +#endif + + int i,j; + uint32_t len256 = (length)>>3; + + for (i=0; i<len256; i+=2) { + +#if defined(__x86_64__) || defined(__i386__) + + // Get rho + /* + xmm0 = rho01_256i[i]; + xmm1 = rho01_256i[i+1]; + xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + //xmm0 = [Re(0,1,2,3) Im(0,1,2,3) Re(4,5,6,7) Im(4,5,6,7)] + //xmm0 = [Re(8,9,10,11) Im(8,9,10,11) Re(12,13,14,15) Im(12,13,14,15)] + + xmm2 = _mm256_unpacklo_epi64(xmm0, xmm1); + //xmm2 = [Re(0,1,2,3) Re(8,9,10,11) Re(4,5,6,7) Re(12,13,14,15)] + xmm2 = _mm256_permute4x64_epi64(xmm2,0xd8); // Re(rho) + + xmm3 = _mm256_unpackhi_epi64(xmm0, xmm1); + //xmm3 = [Im(0,1,2,3) Im(8,9,10,11) Im(4,5,6,7) Im(12,13,14,15)] + xmm3 = _mm256_permute4x64_epi64(xmm3,0xd8); // Im(rho) + */ + + seperate_real_imag_parts(&xmm2, &xmm3, rho01_256i[i], rho01_256i[i+1]); + + rho_rpi = _mm256_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) + rho_rmi = _mm256_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + + // Compute the different rhos + rho_rpi_1_1 = _mm256_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42); + rho_rmi_1_1 = _mm256_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42); + rho_rpi_3_3 = _mm256_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42); + rho_rmi_3_3 = _mm256_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42); + rho_rpi_5_5 = _mm256_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42); + rho_rmi_5_5 = _mm256_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42); + rho_rpi_7_7 = _mm256_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42); + rho_rmi_7_7 = _mm256_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42); + + rho_rpi_5_5 = _mm256_slli_epi16(rho_rpi_5_5, 1); + rho_rmi_5_5 = _mm256_slli_epi16(rho_rmi_5_5, 1); + rho_rpi_7_7 = _mm256_slli_epi16(rho_rpi_7_7, 2); + rho_rmi_7_7 = _mm256_slli_epi16(rho_rmi_7_7, 2); + + xmm4 = _mm256_mulhi_epi16(xmm2, ONE_OVER_SQRT_42); + xmm5 = _mm256_mulhi_epi16(xmm3, ONE_OVER_SQRT_42); + xmm6 = _mm256_mulhi_epi16(xmm3, THREE_OVER_SQRT_42); + xmm7 = _mm256_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42); + xmm8 = _mm256_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42); + xmm7 = _mm256_slli_epi16(xmm7, 1); + xmm8 = _mm256_slli_epi16(xmm8, 2); + + rho_rpi_1_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_1_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_1_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_1_5 = _mm256_subs_epi16(xmm4, xmm7); + rho_rpi_1_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_1_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, THREE_OVER_SQRT_42); + rho_rpi_3_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_3_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_3_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_3_5 = _mm256_subs_epi16(xmm4, xmm7); + rho_rpi_3_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_3_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42); + xmm4 = _mm256_slli_epi16(xmm4, 1); + rho_rpi_5_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_5_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_5_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_5_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_5_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_5_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42); + xmm4 = _mm256_slli_epi16(xmm4, 2); + rho_rpi_7_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_7_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_7_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_7_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_7_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_7_5 = _mm256_subs_epi16(xmm4, xmm7); + + // Rearrange interfering MF output + /* + xmm0 = stream1_256i_in[i]; + xmm1 = stream1_256i_in[i+1]; + xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + y1r = _mm256_unpacklo_epi64(xmm0, xmm1); + y1r = _mm256_permute4x64_epi64(y1r,0xd8); // Re(y1) + + y1i = _mm256_unpackhi_epi64(xmm0, xmm1); + y1i = _mm256_permute4x64_epi64(y1i,0xd8); // Im(y1) + */ + + seperate_real_imag_parts(&y1r, &y1i, stream1_256i_in[i], stream1_256i_in[i+1]); + + // Psi_r calculation from rho_rpi or rho_rmi + xmm0 = _mm256_broadcastw_epi16(_mm_set1_epi16(0));// ZERO for abs_pi16 + xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1r); + + psi_r_p7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1r); + psi_r_p7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1r); + psi_r_p7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1r); + psi_r_p7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1r); + psi_r_p7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1r); + psi_r_p7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1r); + psi_r_p7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1r); + psi_r_p7_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1r); + psi_r_p5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1r); + psi_r_p5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1r); + psi_r_p5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1r); + psi_r_p5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1r); + psi_r_p5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1r); + psi_r_p5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1r); + psi_r_p5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1r); + psi_r_p5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1r); + psi_r_p3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1r); + psi_r_p3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1r); + psi_r_p3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1r); + psi_r_p3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1r); + psi_r_p3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1r); + psi_r_p3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1r); + psi_r_p3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1r); + psi_r_p3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1r); + psi_r_p1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1r); + psi_r_p1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1r); + psi_r_p1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1r); + psi_r_p1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1r); + psi_r_p1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1r); + psi_r_p1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1r); + psi_r_p1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1r); + psi_r_p1_m7 = _mm256_abs_epi16(xmm2); + + xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1r); + psi_r_m1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1r); + psi_r_m1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1r); + psi_r_m1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1r); + psi_r_m1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1r); + psi_r_m1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1r); + psi_r_m1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1r); + psi_r_m1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1r); + psi_r_m1_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1r); + psi_r_m3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1r); + psi_r_m3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1r); + psi_r_m3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1r); + psi_r_m3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1r); + psi_r_m3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1r); + psi_r_m3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1r); + psi_r_m3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1r); + psi_r_m3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1r); + psi_r_m5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1r); + psi_r_m5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1r); + psi_r_m5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1r); + psi_r_m5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1r); + psi_r_m5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1r); + psi_r_m5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1r); + psi_r_m5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1r); + psi_r_m5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1r); + psi_r_m7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1r); + psi_r_m7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1r); + psi_r_m7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1r); + psi_r_m7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1r); + psi_r_m7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1r); + psi_r_m7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1r); + psi_r_m7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1r); + psi_r_m7_m7 = _mm256_abs_epi16(xmm2); + + // Psi_i calculation from rho_rpi or rho_rmi + xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1i); + psi_i_p7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1i); + psi_i_p7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1i); + psi_i_p7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1i); + psi_i_p7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1i); + psi_i_p7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1i); + psi_i_p7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1i); + psi_i_p7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1i); + psi_i_p7_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1i); + psi_i_p5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1i); + psi_i_p5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1i); + psi_i_p5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1i); + psi_i_p5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1i); + psi_i_p5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1i); + psi_i_p5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1i); + psi_i_p5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1i); + psi_i_p5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1i); + psi_i_p3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1i); + psi_i_p3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1i); + psi_i_p3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1i); + psi_i_p3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1i); + psi_i_p3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1i); + psi_i_p3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1i); + psi_i_p3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1i); + psi_i_p3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1i); + psi_i_p1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1i); + psi_i_p1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1i); + psi_i_p1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1i); + psi_i_p1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1i); + psi_i_p1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1i); + psi_i_p1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1i); + psi_i_p1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1i); + psi_i_p1_m7 = _mm256_abs_epi16(xmm2); + + xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1i); + psi_i_m1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1i); + psi_i_m1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1i); + psi_i_m1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1i); + psi_i_m1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1i); + psi_i_m1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1i); + psi_i_m1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1i); + psi_i_m1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1i); + psi_i_m1_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1i); + psi_i_m3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1i); + psi_i_m3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1i); + psi_i_m3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1i); + psi_i_m3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1i); + psi_i_m3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1i); + psi_i_m3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1i); + psi_i_m3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1i); + psi_i_m3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1i); + psi_i_m5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1i); + psi_i_m5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1i); + psi_i_m5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1i); + psi_i_m5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1i); + psi_i_m5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1i); + psi_i_m5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1i); + psi_i_m5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1i); + psi_i_m5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1i); + psi_i_m7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1i); + psi_i_m7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1i); + psi_i_m7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1i); + psi_i_m7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1i); + psi_i_m7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1i); + psi_i_m7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1i); + psi_i_m7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1i); + psi_i_m7_m7 = _mm256_abs_epi16(xmm2); + + /* + // Rearrange desired MF output + xmm0 = stream0_256i_in[i]; + xmm1 = stream0_256i_in[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + y0i = _mm_unpackhi_epi64(xmm0,xmm1); + */ + seperate_real_imag_parts(&y0r, &y0i, stream0_256i_in[i], stream0_256i_in[i+1]); + + // Rearrange desired channel magnitudes + // [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2),...,,|h|^2(7),|h|^2(7)]*(2/sqrt(10)) + /* + xmm2 = ch_mag_256i[i]; + xmm3 = ch_mag_256i[i+1]; + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); + */ + // xmm2 is dummy variable that contains the same values as ch_mag_des + seperate_real_imag_parts(&ch_mag_des, &xmm2, ch_mag_256i[i], ch_mag_256i[i+1]); + + + // Rearrange interfering channel magnitudes + /* + xmm2 = ch_mag_256i_i[i]; + xmm3 = ch_mag_256i_i[i+1]; + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3); + */ + seperate_real_imag_parts(&ch_mag_int, &xmm2, ch_mag_256i_i[i], ch_mag_256i_i[i+1]); + + y0r_one_over_sqrt_21 = _mm256_mulhi_epi16(y0r, ONE_OVER_SQRT_42); + y0r_three_over_sqrt_21 = _mm256_mulhi_epi16(y0r, THREE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm256_mulhi_epi16(y0r, FIVE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm256_slli_epi16(y0r_five_over_sqrt_21, 1); + y0r_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42); + y0r_seven_over_sqrt_21 = _mm256_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14 + + y0i_one_over_sqrt_21 = _mm256_mulhi_epi16(y0i, ONE_OVER_SQRT_42); + y0i_three_over_sqrt_21 = _mm256_mulhi_epi16(y0i, THREE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm256_mulhi_epi16(y0i, FIVE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm256_slli_epi16(y0i_five_over_sqrt_21, 1); + y0i_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42); + y0i_seven_over_sqrt_21 = _mm256_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14 + + + y0_p_7_1 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_7_3 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_7_5 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_7_7 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_5_1 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_5_3 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_5_5 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_5_7 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_3_1 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_3_3 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_3_5 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_3_7 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_1_1 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_1_3 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_1_5 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_1_7 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + + y0_m_1_1 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_1_3 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_1_5 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_1_7 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_3_1 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_3_3 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_3_5 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_3_7 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_5_1 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_5_3 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_5_5 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_5_7 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_7_1 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_7_3 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_7_5 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_7_7 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + + // Detection of interference term + ch_mag_int_with_sigma2 = _mm256_srai_epi16(ch_mag_int, 1); // *2 + two_ch_mag_int_with_sigma2 = ch_mag_int; // *4 + three_ch_mag_int_with_sigma2 = _mm256_adds_epi16(ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2); // *6 + + interference_abs_64qam_epi16(psi_r_p7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + + interference_abs_64qam_epi16(psi_i_p7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + + // Calculation of a group of two terms in the bit metric involving product of psi and interference + prodsum_psi_a_epi16(psi_r_p7_p7, a_r_p7_p7, psi_i_p7_p7, a_i_p7_p7, psi_a_p7_p7); + prodsum_psi_a_epi16(psi_r_p7_p5, a_r_p7_p5, psi_i_p7_p5, a_i_p7_p5, psi_a_p7_p5); + prodsum_psi_a_epi16(psi_r_p7_p3, a_r_p7_p3, psi_i_p7_p3, a_i_p7_p3, psi_a_p7_p3); + prodsum_psi_a_epi16(psi_r_p7_p1, a_r_p7_p1, psi_i_p7_p1, a_i_p7_p1, psi_a_p7_p1); + prodsum_psi_a_epi16(psi_r_p7_m1, a_r_p7_m1, psi_i_p7_m1, a_i_p7_m1, psi_a_p7_m1); + prodsum_psi_a_epi16(psi_r_p7_m3, a_r_p7_m3, psi_i_p7_m3, a_i_p7_m3, psi_a_p7_m3); + prodsum_psi_a_epi16(psi_r_p7_m5, a_r_p7_m5, psi_i_p7_m5, a_i_p7_m5, psi_a_p7_m5); + prodsum_psi_a_epi16(psi_r_p7_m7, a_r_p7_m7, psi_i_p7_m7, a_i_p7_m7, psi_a_p7_m7); + prodsum_psi_a_epi16(psi_r_p5_p7, a_r_p5_p7, psi_i_p5_p7, a_i_p5_p7, psi_a_p5_p7); + prodsum_psi_a_epi16(psi_r_p5_p5, a_r_p5_p5, psi_i_p5_p5, a_i_p5_p5, psi_a_p5_p5); + prodsum_psi_a_epi16(psi_r_p5_p3, a_r_p5_p3, psi_i_p5_p3, a_i_p5_p3, psi_a_p5_p3); + prodsum_psi_a_epi16(psi_r_p5_p1, a_r_p5_p1, psi_i_p5_p1, a_i_p5_p1, psi_a_p5_p1); + prodsum_psi_a_epi16(psi_r_p5_m1, a_r_p5_m1, psi_i_p5_m1, a_i_p5_m1, psi_a_p5_m1); + prodsum_psi_a_epi16(psi_r_p5_m3, a_r_p5_m3, psi_i_p5_m3, a_i_p5_m3, psi_a_p5_m3); + prodsum_psi_a_epi16(psi_r_p5_m5, a_r_p5_m5, psi_i_p5_m5, a_i_p5_m5, psi_a_p5_m5); + prodsum_psi_a_epi16(psi_r_p5_m7, a_r_p5_m7, psi_i_p5_m7, a_i_p5_m7, psi_a_p5_m7); + prodsum_psi_a_epi16(psi_r_p3_p7, a_r_p3_p7, psi_i_p3_p7, a_i_p3_p7, psi_a_p3_p7); + prodsum_psi_a_epi16(psi_r_p3_p5, a_r_p3_p5, psi_i_p3_p5, a_i_p3_p5, psi_a_p3_p5); + prodsum_psi_a_epi16(psi_r_p3_p3, a_r_p3_p3, psi_i_p3_p3, a_i_p3_p3, psi_a_p3_p3); + prodsum_psi_a_epi16(psi_r_p3_p1, a_r_p3_p1, psi_i_p3_p1, a_i_p3_p1, psi_a_p3_p1); + prodsum_psi_a_epi16(psi_r_p3_m1, a_r_p3_m1, psi_i_p3_m1, a_i_p3_m1, psi_a_p3_m1); + prodsum_psi_a_epi16(psi_r_p3_m3, a_r_p3_m3, psi_i_p3_m3, a_i_p3_m3, psi_a_p3_m3); + prodsum_psi_a_epi16(psi_r_p3_m5, a_r_p3_m5, psi_i_p3_m5, a_i_p3_m5, psi_a_p3_m5); + prodsum_psi_a_epi16(psi_r_p3_m7, a_r_p3_m7, psi_i_p3_m7, a_i_p3_m7, psi_a_p3_m7); + prodsum_psi_a_epi16(psi_r_p1_p7, a_r_p1_p7, psi_i_p1_p7, a_i_p1_p7, psi_a_p1_p7); + prodsum_psi_a_epi16(psi_r_p1_p5, a_r_p1_p5, psi_i_p1_p5, a_i_p1_p5, psi_a_p1_p5); + prodsum_psi_a_epi16(psi_r_p1_p3, a_r_p1_p3, psi_i_p1_p3, a_i_p1_p3, psi_a_p1_p3); + prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1); + prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1); + prodsum_psi_a_epi16(psi_r_p1_m3, a_r_p1_m3, psi_i_p1_m3, a_i_p1_m3, psi_a_p1_m3); + prodsum_psi_a_epi16(psi_r_p1_m5, a_r_p1_m5, psi_i_p1_m5, a_i_p1_m5, psi_a_p1_m5); + prodsum_psi_a_epi16(psi_r_p1_m7, a_r_p1_m7, psi_i_p1_m7, a_i_p1_m7, psi_a_p1_m7); + prodsum_psi_a_epi16(psi_r_m1_p7, a_r_m1_p7, psi_i_m1_p7, a_i_m1_p7, psi_a_m1_p7); + prodsum_psi_a_epi16(psi_r_m1_p5, a_r_m1_p5, psi_i_m1_p5, a_i_m1_p5, psi_a_m1_p5); + prodsum_psi_a_epi16(psi_r_m1_p3, a_r_m1_p3, psi_i_m1_p3, a_i_m1_p3, psi_a_m1_p3); + prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1); + prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1); + prodsum_psi_a_epi16(psi_r_m1_m3, a_r_m1_m3, psi_i_m1_m3, a_i_m1_m3, psi_a_m1_m3); + prodsum_psi_a_epi16(psi_r_m1_m5, a_r_m1_m5, psi_i_m1_m5, a_i_m1_m5, psi_a_m1_m5); + prodsum_psi_a_epi16(psi_r_m1_m7, a_r_m1_m7, psi_i_m1_m7, a_i_m1_m7, psi_a_m1_m7); + prodsum_psi_a_epi16(psi_r_m3_p7, a_r_m3_p7, psi_i_m3_p7, a_i_m3_p7, psi_a_m3_p7); + prodsum_psi_a_epi16(psi_r_m3_p5, a_r_m3_p5, psi_i_m3_p5, a_i_m3_p5, psi_a_m3_p5); + prodsum_psi_a_epi16(psi_r_m3_p3, a_r_m3_p3, psi_i_m3_p3, a_i_m3_p3, psi_a_m3_p3); + prodsum_psi_a_epi16(psi_r_m3_p1, a_r_m3_p1, psi_i_m3_p1, a_i_m3_p1, psi_a_m3_p1); + prodsum_psi_a_epi16(psi_r_m3_m1, a_r_m3_m1, psi_i_m3_m1, a_i_m3_m1, psi_a_m3_m1); + prodsum_psi_a_epi16(psi_r_m3_m3, a_r_m3_m3, psi_i_m3_m3, a_i_m3_m3, psi_a_m3_m3); + prodsum_psi_a_epi16(psi_r_m3_m5, a_r_m3_m5, psi_i_m3_m5, a_i_m3_m5, psi_a_m3_m5); + prodsum_psi_a_epi16(psi_r_m3_m7, a_r_m3_m7, psi_i_m3_m7, a_i_m3_m7, psi_a_m3_m7); + prodsum_psi_a_epi16(psi_r_m5_p7, a_r_m5_p7, psi_i_m5_p7, a_i_m5_p7, psi_a_m5_p7); + prodsum_psi_a_epi16(psi_r_m5_p5, a_r_m5_p5, psi_i_m5_p5, a_i_m5_p5, psi_a_m5_p5); + prodsum_psi_a_epi16(psi_r_m5_p3, a_r_m5_p3, psi_i_m5_p3, a_i_m5_p3, psi_a_m5_p3); + prodsum_psi_a_epi16(psi_r_m5_p1, a_r_m5_p1, psi_i_m5_p1, a_i_m5_p1, psi_a_m5_p1); + prodsum_psi_a_epi16(psi_r_m5_m1, a_r_m5_m1, psi_i_m5_m1, a_i_m5_m1, psi_a_m5_m1); + prodsum_psi_a_epi16(psi_r_m5_m3, a_r_m5_m3, psi_i_m5_m3, a_i_m5_m3, psi_a_m5_m3); + prodsum_psi_a_epi16(psi_r_m5_m5, a_r_m5_m5, psi_i_m5_m5, a_i_m5_m5, psi_a_m5_m5); + prodsum_psi_a_epi16(psi_r_m5_m7, a_r_m5_m7, psi_i_m5_m7, a_i_m5_m7, psi_a_m5_m7); + prodsum_psi_a_epi16(psi_r_m7_p7, a_r_m7_p7, psi_i_m7_p7, a_i_m7_p7, psi_a_m7_p7); + prodsum_psi_a_epi16(psi_r_m7_p5, a_r_m7_p5, psi_i_m7_p5, a_i_m7_p5, psi_a_m7_p5); + prodsum_psi_a_epi16(psi_r_m7_p3, a_r_m7_p3, psi_i_m7_p3, a_i_m7_p3, psi_a_m7_p3); + prodsum_psi_a_epi16(psi_r_m7_p1, a_r_m7_p1, psi_i_m7_p1, a_i_m7_p1, psi_a_m7_p1); + prodsum_psi_a_epi16(psi_r_m7_m1, a_r_m7_m1, psi_i_m7_m1, a_i_m7_m1, psi_a_m7_m1); + prodsum_psi_a_epi16(psi_r_m7_m3, a_r_m7_m3, psi_i_m7_m3, a_i_m7_m3, psi_a_m7_m3); + prodsum_psi_a_epi16(psi_r_m7_m5, a_r_m7_m5, psi_i_m7_m5, a_i_m7_m5, psi_a_m7_m5); + prodsum_psi_a_epi16(psi_r_m7_m7, a_r_m7_m7, psi_i_m7_m7, a_i_m7_m7, psi_a_m7_m7); + + // Multiply by sqrt(2) + psi_a_p7_p7 = _mm256_mulhi_epi16(psi_a_p7_p7, ONE_OVER_SQRT_2); + psi_a_p7_p7 = _mm256_slli_epi16(psi_a_p7_p7, 2); + psi_a_p7_p5 = _mm256_mulhi_epi16(psi_a_p7_p5, ONE_OVER_SQRT_2); + psi_a_p7_p5 = _mm256_slli_epi16(psi_a_p7_p5, 2); + psi_a_p7_p3 = _mm256_mulhi_epi16(psi_a_p7_p3, ONE_OVER_SQRT_2); + psi_a_p7_p3 = _mm256_slli_epi16(psi_a_p7_p3, 2); + psi_a_p7_p1 = _mm256_mulhi_epi16(psi_a_p7_p1, ONE_OVER_SQRT_2); + psi_a_p7_p1 = _mm256_slli_epi16(psi_a_p7_p1, 2); + psi_a_p7_m1 = _mm256_mulhi_epi16(psi_a_p7_m1, ONE_OVER_SQRT_2); + psi_a_p7_m1 = _mm256_slli_epi16(psi_a_p7_m1, 2); + psi_a_p7_m3 = _mm256_mulhi_epi16(psi_a_p7_m3, ONE_OVER_SQRT_2); + psi_a_p7_m3 = _mm256_slli_epi16(psi_a_p7_m3, 2); + psi_a_p7_m5 = _mm256_mulhi_epi16(psi_a_p7_m5, ONE_OVER_SQRT_2); + psi_a_p7_m5 = _mm256_slli_epi16(psi_a_p7_m5, 2); + psi_a_p7_m7 = _mm256_mulhi_epi16(psi_a_p7_m7, ONE_OVER_SQRT_2); + psi_a_p7_m7 = _mm256_slli_epi16(psi_a_p7_m7, 2); + psi_a_p5_p7 = _mm256_mulhi_epi16(psi_a_p5_p7, ONE_OVER_SQRT_2); + psi_a_p5_p7 = _mm256_slli_epi16(psi_a_p5_p7, 2); + psi_a_p5_p5 = _mm256_mulhi_epi16(psi_a_p5_p5, ONE_OVER_SQRT_2); + psi_a_p5_p5 = _mm256_slli_epi16(psi_a_p5_p5, 2); + psi_a_p5_p3 = _mm256_mulhi_epi16(psi_a_p5_p3, ONE_OVER_SQRT_2); + psi_a_p5_p3 = _mm256_slli_epi16(psi_a_p5_p3, 2); + psi_a_p5_p1 = _mm256_mulhi_epi16(psi_a_p5_p1, ONE_OVER_SQRT_2); + psi_a_p5_p1 = _mm256_slli_epi16(psi_a_p5_p1, 2); + psi_a_p5_m1 = _mm256_mulhi_epi16(psi_a_p5_m1, ONE_OVER_SQRT_2); + psi_a_p5_m1 = _mm256_slli_epi16(psi_a_p5_m1, 2); + psi_a_p5_m3 = _mm256_mulhi_epi16(psi_a_p5_m3, ONE_OVER_SQRT_2); + psi_a_p5_m3 = _mm256_slli_epi16(psi_a_p5_m3, 2); + psi_a_p5_m5 = _mm256_mulhi_epi16(psi_a_p5_m5, ONE_OVER_SQRT_2); + psi_a_p5_m5 = _mm256_slli_epi16(psi_a_p5_m5, 2); + psi_a_p5_m7 = _mm256_mulhi_epi16(psi_a_p5_m7, ONE_OVER_SQRT_2); + psi_a_p5_m7 = _mm256_slli_epi16(psi_a_p5_m7, 2); + psi_a_p3_p7 = _mm256_mulhi_epi16(psi_a_p3_p7, ONE_OVER_SQRT_2); + psi_a_p3_p7 = _mm256_slli_epi16(psi_a_p3_p7, 2); + psi_a_p3_p5 = _mm256_mulhi_epi16(psi_a_p3_p5, ONE_OVER_SQRT_2); + psi_a_p3_p5 = _mm256_slli_epi16(psi_a_p3_p5, 2); + psi_a_p3_p3 = _mm256_mulhi_epi16(psi_a_p3_p3, ONE_OVER_SQRT_2); + psi_a_p3_p3 = _mm256_slli_epi16(psi_a_p3_p3, 2); + psi_a_p3_p1 = _mm256_mulhi_epi16(psi_a_p3_p1, ONE_OVER_SQRT_2); + psi_a_p3_p1 = _mm256_slli_epi16(psi_a_p3_p1, 2); + psi_a_p3_m1 = _mm256_mulhi_epi16(psi_a_p3_m1, ONE_OVER_SQRT_2); + psi_a_p3_m1 = _mm256_slli_epi16(psi_a_p3_m1, 2); + psi_a_p3_m3 = _mm256_mulhi_epi16(psi_a_p3_m3, ONE_OVER_SQRT_2); + psi_a_p3_m3 = _mm256_slli_epi16(psi_a_p3_m3, 2); + psi_a_p3_m5 = _mm256_mulhi_epi16(psi_a_p3_m5, ONE_OVER_SQRT_2); + psi_a_p3_m5 = _mm256_slli_epi16(psi_a_p3_m5, 2); + psi_a_p3_m7 = _mm256_mulhi_epi16(psi_a_p3_m7, ONE_OVER_SQRT_2); + psi_a_p3_m7 = _mm256_slli_epi16(psi_a_p3_m7, 2); + psi_a_p1_p7 = _mm256_mulhi_epi16(psi_a_p1_p7, ONE_OVER_SQRT_2); + psi_a_p1_p7 = _mm256_slli_epi16(psi_a_p1_p7, 2); + psi_a_p1_p5 = _mm256_mulhi_epi16(psi_a_p1_p5, ONE_OVER_SQRT_2); + psi_a_p1_p5 = _mm256_slli_epi16(psi_a_p1_p5, 2); + psi_a_p1_p3 = _mm256_mulhi_epi16(psi_a_p1_p3, ONE_OVER_SQRT_2); + psi_a_p1_p3 = _mm256_slli_epi16(psi_a_p1_p3, 2); + psi_a_p1_p1 = _mm256_mulhi_epi16(psi_a_p1_p1, ONE_OVER_SQRT_2); + psi_a_p1_p1 = _mm256_slli_epi16(psi_a_p1_p1, 2); + psi_a_p1_m1 = _mm256_mulhi_epi16(psi_a_p1_m1, ONE_OVER_SQRT_2); + psi_a_p1_m1 = _mm256_slli_epi16(psi_a_p1_m1, 2); + psi_a_p1_m3 = _mm256_mulhi_epi16(psi_a_p1_m3, ONE_OVER_SQRT_2); + psi_a_p1_m3 = _mm256_slli_epi16(psi_a_p1_m3, 2); + psi_a_p1_m5 = _mm256_mulhi_epi16(psi_a_p1_m5, ONE_OVER_SQRT_2); + psi_a_p1_m5 = _mm256_slli_epi16(psi_a_p1_m5, 2); + psi_a_p1_m7 = _mm256_mulhi_epi16(psi_a_p1_m7, ONE_OVER_SQRT_2); + psi_a_p1_m7 = _mm256_slli_epi16(psi_a_p1_m7, 2); + psi_a_m1_p7 = _mm256_mulhi_epi16(psi_a_m1_p7, ONE_OVER_SQRT_2); + psi_a_m1_p7 = _mm256_slli_epi16(psi_a_m1_p7, 2); + psi_a_m1_p5 = _mm256_mulhi_epi16(psi_a_m1_p5, ONE_OVER_SQRT_2); + psi_a_m1_p5 = _mm256_slli_epi16(psi_a_m1_p5, 2); + psi_a_m1_p3 = _mm256_mulhi_epi16(psi_a_m1_p3, ONE_OVER_SQRT_2); + psi_a_m1_p3 = _mm256_slli_epi16(psi_a_m1_p3, 2); + psi_a_m1_p1 = _mm256_mulhi_epi16(psi_a_m1_p1, ONE_OVER_SQRT_2); + psi_a_m1_p1 = _mm256_slli_epi16(psi_a_m1_p1, 2); + psi_a_m1_m1 = _mm256_mulhi_epi16(psi_a_m1_m1, ONE_OVER_SQRT_2); + psi_a_m1_m1 = _mm256_slli_epi16(psi_a_m1_m1, 2); + psi_a_m1_m3 = _mm256_mulhi_epi16(psi_a_m1_m3, ONE_OVER_SQRT_2); + psi_a_m1_m3 = _mm256_slli_epi16(psi_a_m1_m3, 2); + psi_a_m1_m5 = _mm256_mulhi_epi16(psi_a_m1_m5, ONE_OVER_SQRT_2); + psi_a_m1_m5 = _mm256_slli_epi16(psi_a_m1_m5, 2); + psi_a_m1_m7 = _mm256_mulhi_epi16(psi_a_m1_m7, ONE_OVER_SQRT_2); + psi_a_m1_m7 = _mm256_slli_epi16(psi_a_m1_m7, 2); + psi_a_m3_p7 = _mm256_mulhi_epi16(psi_a_m3_p7, ONE_OVER_SQRT_2); + psi_a_m3_p7 = _mm256_slli_epi16(psi_a_m3_p7, 2); + psi_a_m3_p5 = _mm256_mulhi_epi16(psi_a_m3_p5, ONE_OVER_SQRT_2); + psi_a_m3_p5 = _mm256_slli_epi16(psi_a_m3_p5, 2); + psi_a_m3_p3 = _mm256_mulhi_epi16(psi_a_m3_p3, ONE_OVER_SQRT_2); + psi_a_m3_p3 = _mm256_slli_epi16(psi_a_m3_p3, 2); + psi_a_m3_p1 = _mm256_mulhi_epi16(psi_a_m3_p1, ONE_OVER_SQRT_2); + psi_a_m3_p1 = _mm256_slli_epi16(psi_a_m3_p1, 2); + psi_a_m3_m1 = _mm256_mulhi_epi16(psi_a_m3_m1, ONE_OVER_SQRT_2); + psi_a_m3_m1 = _mm256_slli_epi16(psi_a_m3_m1, 2); + psi_a_m3_m3 = _mm256_mulhi_epi16(psi_a_m3_m3, ONE_OVER_SQRT_2); + psi_a_m3_m3 = _mm256_slli_epi16(psi_a_m3_m3, 2); + psi_a_m3_m5 = _mm256_mulhi_epi16(psi_a_m3_m5, ONE_OVER_SQRT_2); + psi_a_m3_m5 = _mm256_slli_epi16(psi_a_m3_m5, 2); + psi_a_m3_m7 = _mm256_mulhi_epi16(psi_a_m3_m7, ONE_OVER_SQRT_2); + psi_a_m3_m7 = _mm256_slli_epi16(psi_a_m3_m7, 2); + psi_a_m5_p7 = _mm256_mulhi_epi16(psi_a_m5_p7, ONE_OVER_SQRT_2); + psi_a_m5_p7 = _mm256_slli_epi16(psi_a_m5_p7, 2); + psi_a_m5_p5 = _mm256_mulhi_epi16(psi_a_m5_p5, ONE_OVER_SQRT_2); + psi_a_m5_p5 = _mm256_slli_epi16(psi_a_m5_p5, 2); + psi_a_m5_p3 = _mm256_mulhi_epi16(psi_a_m5_p3, ONE_OVER_SQRT_2); + psi_a_m5_p3 = _mm256_slli_epi16(psi_a_m5_p3, 2); + psi_a_m5_p1 = _mm256_mulhi_epi16(psi_a_m5_p1, ONE_OVER_SQRT_2); + psi_a_m5_p1 = _mm256_slli_epi16(psi_a_m5_p1, 2); + psi_a_m5_m1 = _mm256_mulhi_epi16(psi_a_m5_m1, ONE_OVER_SQRT_2); + psi_a_m5_m1 = _mm256_slli_epi16(psi_a_m5_m1, 2); + psi_a_m5_m3 = _mm256_mulhi_epi16(psi_a_m5_m3, ONE_OVER_SQRT_2); + psi_a_m5_m3 = _mm256_slli_epi16(psi_a_m5_m3, 2); + psi_a_m5_m5 = _mm256_mulhi_epi16(psi_a_m5_m5, ONE_OVER_SQRT_2); + psi_a_m5_m5 = _mm256_slli_epi16(psi_a_m5_m5, 2); + psi_a_m5_m7 = _mm256_mulhi_epi16(psi_a_m5_m7, ONE_OVER_SQRT_2); + psi_a_m5_m7 = _mm256_slli_epi16(psi_a_m5_m7, 2); + psi_a_m7_p7 = _mm256_mulhi_epi16(psi_a_m7_p7, ONE_OVER_SQRT_2); + psi_a_m7_p7 = _mm256_slli_epi16(psi_a_m7_p7, 2); + psi_a_m7_p5 = _mm256_mulhi_epi16(psi_a_m7_p5, ONE_OVER_SQRT_2); + psi_a_m7_p5 = _mm256_slli_epi16(psi_a_m7_p5, 2); + psi_a_m7_p3 = _mm256_mulhi_epi16(psi_a_m7_p3, ONE_OVER_SQRT_2); + psi_a_m7_p3 = _mm256_slli_epi16(psi_a_m7_p3, 2); + psi_a_m7_p1 = _mm256_mulhi_epi16(psi_a_m7_p1, ONE_OVER_SQRT_2); + psi_a_m7_p1 = _mm256_slli_epi16(psi_a_m7_p1, 2); + psi_a_m7_m1 = _mm256_mulhi_epi16(psi_a_m7_m1, ONE_OVER_SQRT_2); + psi_a_m7_m1 = _mm256_slli_epi16(psi_a_m7_m1, 2); + psi_a_m7_m3 = _mm256_mulhi_epi16(psi_a_m7_m3, ONE_OVER_SQRT_2); + psi_a_m7_m3 = _mm256_slli_epi16(psi_a_m7_m3, 2); + psi_a_m7_m5 = _mm256_mulhi_epi16(psi_a_m7_m5, ONE_OVER_SQRT_2); + psi_a_m7_m5 = _mm256_slli_epi16(psi_a_m7_m5, 2); + psi_a_m7_m7 = _mm256_mulhi_epi16(psi_a_m7_m7, ONE_OVER_SQRT_2); + psi_a_m7_m7 = _mm256_slli_epi16(psi_a_m7_m7, 2); + + // Calculation of a group of two terms in the bit metric involving squares of interference + square_a_64qam_epi16(a_r_p7_p7, a_i_p7_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p7); + square_a_64qam_epi16(a_r_p7_p5, a_i_p7_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p5); + square_a_64qam_epi16(a_r_p7_p3, a_i_p7_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p3); + square_a_64qam_epi16(a_r_p7_p1, a_i_p7_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p1); + square_a_64qam_epi16(a_r_p7_m1, a_i_p7_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m1); + square_a_64qam_epi16(a_r_p7_m3, a_i_p7_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m3); + square_a_64qam_epi16(a_r_p7_m5, a_i_p7_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m5); + square_a_64qam_epi16(a_r_p7_m7, a_i_p7_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m7); + square_a_64qam_epi16(a_r_p5_p7, a_i_p5_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p7); + square_a_64qam_epi16(a_r_p5_p5, a_i_p5_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p5); + square_a_64qam_epi16(a_r_p5_p3, a_i_p5_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p3); + square_a_64qam_epi16(a_r_p5_p1, a_i_p5_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p1); + square_a_64qam_epi16(a_r_p5_m1, a_i_p5_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m1); + square_a_64qam_epi16(a_r_p5_m3, a_i_p5_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m3); + square_a_64qam_epi16(a_r_p5_m5, a_i_p5_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m5); + square_a_64qam_epi16(a_r_p5_m7, a_i_p5_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m7); + square_a_64qam_epi16(a_r_p3_p7, a_i_p3_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p7); + square_a_64qam_epi16(a_r_p3_p5, a_i_p3_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p5); + square_a_64qam_epi16(a_r_p3_p3, a_i_p3_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p3); + square_a_64qam_epi16(a_r_p3_p1, a_i_p3_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p1); + square_a_64qam_epi16(a_r_p3_m1, a_i_p3_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m1); + square_a_64qam_epi16(a_r_p3_m3, a_i_p3_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m3); + square_a_64qam_epi16(a_r_p3_m5, a_i_p3_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m5); + square_a_64qam_epi16(a_r_p3_m7, a_i_p3_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m7); + square_a_64qam_epi16(a_r_p1_p7, a_i_p1_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p7); + square_a_64qam_epi16(a_r_p1_p5, a_i_p1_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p5); + square_a_64qam_epi16(a_r_p1_p3, a_i_p1_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p3); + square_a_64qam_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p1); + square_a_64qam_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m1); + square_a_64qam_epi16(a_r_p1_m3, a_i_p1_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m3); + square_a_64qam_epi16(a_r_p1_m5, a_i_p1_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m5); + square_a_64qam_epi16(a_r_p1_m7, a_i_p1_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m7); + square_a_64qam_epi16(a_r_m1_p7, a_i_m1_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p7); + square_a_64qam_epi16(a_r_m1_p5, a_i_m1_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p5); + square_a_64qam_epi16(a_r_m1_p3, a_i_m1_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p3); + square_a_64qam_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p1); + square_a_64qam_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m1); + square_a_64qam_epi16(a_r_m1_m3, a_i_m1_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m3); + square_a_64qam_epi16(a_r_m1_m5, a_i_m1_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m5); + square_a_64qam_epi16(a_r_m1_m7, a_i_m1_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m7); + square_a_64qam_epi16(a_r_m3_p7, a_i_m3_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p7); + square_a_64qam_epi16(a_r_m3_p5, a_i_m3_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p5); + square_a_64qam_epi16(a_r_m3_p3, a_i_m3_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p3); + square_a_64qam_epi16(a_r_m3_p1, a_i_m3_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p1); + square_a_64qam_epi16(a_r_m3_m1, a_i_m3_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m1); + square_a_64qam_epi16(a_r_m3_m3, a_i_m3_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m3); + square_a_64qam_epi16(a_r_m3_m5, a_i_m3_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m5); + square_a_64qam_epi16(a_r_m3_m7, a_i_m3_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m7); + square_a_64qam_epi16(a_r_m5_p7, a_i_m5_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p7); + square_a_64qam_epi16(a_r_m5_p5, a_i_m5_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p5); + square_a_64qam_epi16(a_r_m5_p3, a_i_m5_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p3); + square_a_64qam_epi16(a_r_m5_p1, a_i_m5_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p1); + square_a_64qam_epi16(a_r_m5_m1, a_i_m5_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m1); + square_a_64qam_epi16(a_r_m5_m3, a_i_m5_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m3); + square_a_64qam_epi16(a_r_m5_m5, a_i_m5_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m5); + square_a_64qam_epi16(a_r_m5_m7, a_i_m5_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m7); + square_a_64qam_epi16(a_r_m7_p7, a_i_m7_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p7); + square_a_64qam_epi16(a_r_m7_p5, a_i_m7_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p5); + square_a_64qam_epi16(a_r_m7_p3, a_i_m7_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p3); + square_a_64qam_epi16(a_r_m7_p1, a_i_m7_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p1); + square_a_64qam_epi16(a_r_m7_m1, a_i_m7_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m1); + square_a_64qam_epi16(a_r_m7_m3, a_i_m7_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m3); + square_a_64qam_epi16(a_r_m7_m5, a_i_m7_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m5); + square_a_64qam_epi16(a_r_m7_m7, a_i_m7_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m7); + + // Computing different multiples of ||h0||^2 + // x=1, y=1 + ch_mag_2_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42); + ch_mag_2_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_2_over_42_with_sigma2,1); + // x=1, y=3 + ch_mag_10_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42); + ch_mag_10_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_10_over_42_with_sigma2,1); + // x=1, x=5 + ch_mag_26_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42); + ch_mag_26_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_26_over_42_with_sigma2,1); + // x=1, y=7 + ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=3, y=3 + ch_mag_18_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42); + ch_mag_18_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_18_over_42_with_sigma2,1); + // x=3, y=5 + ch_mag_34_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42); + ch_mag_34_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_34_over_42_with_sigma2,1); + // x=3, y=7 + ch_mag_58_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42); + ch_mag_58_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_58_over_42_with_sigma2,2); + // x=5, y=5 + ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=5, y=7 + ch_mag_74_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42); + ch_mag_74_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_74_over_42_with_sigma2,2); + // x=7, y=7 + ch_mag_98_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42); + ch_mag_98_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_98_over_42_with_sigma2,2); + + // Computing Metrics + xmm0 = _mm256_subs_epi16(psi_a_p7_p7, a_sq_p7_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_7); + bit_met_p7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p5, a_sq_p7_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_5); + bit_met_p7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p3, a_sq_p7_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_3); + bit_met_p7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p1, a_sq_p7_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_1); + bit_met_p7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m1, a_sq_p7_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_1); + bit_met_p7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m3, a_sq_p7_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_3); + bit_met_p7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m5, a_sq_p7_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_5); + bit_met_p7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m7, a_sq_p7_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_7); + bit_met_p7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p7, a_sq_p5_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_7); + bit_met_p5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p5, a_sq_p5_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_5); + bit_met_p5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p3, a_sq_p5_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_3); + bit_met_p5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p1, a_sq_p5_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_1); + bit_met_p5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m1, a_sq_p5_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_1); + bit_met_p5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m3, a_sq_p5_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_3); + bit_met_p5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m5, a_sq_p5_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_5); + bit_met_p5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m7, a_sq_p5_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_7); + bit_met_p5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p7, a_sq_p3_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_7); + bit_met_p3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p5, a_sq_p3_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_5); + bit_met_p3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p3, a_sq_p3_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_3); + bit_met_p3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p1, a_sq_p3_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_1); + bit_met_p3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m1, a_sq_p3_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_1); + bit_met_p3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m3, a_sq_p3_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_3); + bit_met_p3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m5, a_sq_p3_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_5); + bit_met_p3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m7, a_sq_p3_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_7); + bit_met_p3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p7, a_sq_p1_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_7); + bit_met_p1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p5, a_sq_p1_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_5); + bit_met_p1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p3, a_sq_p1_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_3); + bit_met_p1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p1, a_sq_p1_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_1); + bit_met_p1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m1, a_sq_p1_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_1); + bit_met_p1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m3, a_sq_p1_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_3); + bit_met_p1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m5, a_sq_p1_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_5); + bit_met_p1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m7, a_sq_p1_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_7); + bit_met_p1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + + xmm0 = _mm256_subs_epi16(psi_a_m1_p7, a_sq_m1_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_7); + bit_met_m1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p5, a_sq_m1_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_5); + bit_met_m1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p3, a_sq_m1_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_3); + bit_met_m1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p1, a_sq_m1_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_1); + bit_met_m1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m1, a_sq_m1_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_1); + bit_met_m1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m3, a_sq_m1_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_3); + bit_met_m1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m5, a_sq_m1_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_5); + bit_met_m1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m7, a_sq_m1_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_7); + bit_met_m1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p7, a_sq_m3_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_7); + bit_met_m3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p5, a_sq_m3_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_5); + bit_met_m3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p3, a_sq_m3_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_3); + bit_met_m3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p1, a_sq_m3_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_1); + bit_met_m3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m1, a_sq_m3_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_1); + bit_met_m3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m3, a_sq_m3_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_3); + bit_met_m3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m5, a_sq_m3_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_5); + bit_met_m3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m7, a_sq_m3_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_7); + bit_met_m3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p7, a_sq_m5_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_7); + bit_met_m5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p5, a_sq_m5_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_5); + bit_met_m5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p3, a_sq_m5_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_3); + bit_met_m5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p1, a_sq_m5_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_1); + bit_met_m5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m1, a_sq_m5_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_1); + bit_met_m5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m3, a_sq_m5_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_3); + bit_met_m5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m5, a_sq_m5_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_5); + bit_met_m5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m7, a_sq_m5_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_7); + bit_met_m5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p7, a_sq_m7_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_7); + bit_met_m7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p5, a_sq_m7_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_5); + bit_met_m7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p3, a_sq_m7_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_3); + bit_met_m7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p1, a_sq_m7_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_1); + bit_met_m7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m1, a_sq_m7_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_1); + bit_met_m7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m3, a_sq_m7_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_3); + bit_met_m7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m5, a_sq_m7_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_5); + bit_met_m7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m7, a_sq_m7_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_7); + bit_met_m7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + + // Detection for 1st bit (LTE mapping) + // bit = 1 + xmm0 = _mm256_max_epi16(bit_met_m7_p7, bit_met_m7_p5); + xmm1 = _mm256_max_epi16(bit_met_m7_p3, bit_met_m7_p1); + xmm2 = _mm256_max_epi16(bit_met_m7_m1, bit_met_m7_m3); + xmm3 = _mm256_max_epi16(bit_met_m7_m5, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m5_p5); + xmm1 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m5_p1); + xmm2 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m5_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m5_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m3_p7, bit_met_m3_p5); + xmm1 = _mm256_max_epi16(bit_met_m3_p3, bit_met_m3_p1); + xmm2 = _mm256_max_epi16(bit_met_m3_m1, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m3_m5, bit_met_m3_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m1_p5); + xmm1 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m1_m3); + xmm3 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m1_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p7_p5); + xmm1 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p7_p1); + xmm2 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p7_m3); + xmm3 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_p7, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p5_p3, bit_met_p5_p1); + xmm2 = _mm256_max_epi16(bit_met_p5_m1, bit_met_p5_m3); + xmm3 = _mm256_max_epi16(bit_met_p5_m5, bit_met_p5_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p3_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p3_p1); + xmm2 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p3_m3); + xmm3 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p3_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_p7, bit_met_p1_p5); + xmm1 = _mm256_max_epi16(bit_met_p1_p3, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_p1_m1, bit_met_p1_m3); + xmm3 = _mm256_max_epi16(bit_met_p1_m5, bit_met_p1_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y0r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 2nd bit (LTE mapping) + // bit = 1 + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y1r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 3rd bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y2r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 4th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y0i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + + // Detection for 5th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y1i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 6th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y2i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs + // RE 1 + j = 48*i; + stream0_out[j + 0] = ((short *)&y0r)[0]; + stream0_out[j + 1] = ((short *)&y1r)[0]; + stream0_out[j + 2] = ((short *)&y2r)[0]; + stream0_out[j + 3] = ((short *)&y0i)[0]; + stream0_out[j + 4] = ((short *)&y1i)[0]; + stream0_out[j + 5] = ((short *)&y2i)[0]; + // RE 2 + stream0_out[j + 6] = ((short *)&y0r)[1]; + stream0_out[j + 7] = ((short *)&y1r)[1]; + stream0_out[j + 8] = ((short *)&y2r)[1]; + stream0_out[j + 9] = ((short *)&y0i)[1]; + stream0_out[j + 10] = ((short *)&y1i)[1]; + stream0_out[j + 11] = ((short *)&y2i)[1]; + // RE 3 + stream0_out[j + 12] = ((short *)&y0r)[2]; + stream0_out[j + 13] = ((short *)&y1r)[2]; + stream0_out[j + 14] = ((short *)&y2r)[2]; + stream0_out[j + 15] = ((short *)&y0i)[2]; + stream0_out[j + 16] = ((short *)&y1i)[2]; + stream0_out[j + 17] = ((short *)&y2i)[2]; + // RE 4 + stream0_out[j + 18] = ((short *)&y0r)[3]; + stream0_out[j + 19] = ((short *)&y1r)[3]; + stream0_out[j + 20] = ((short *)&y2r)[3]; + stream0_out[j + 21] = ((short *)&y0i)[3]; + stream0_out[j + 22] = ((short *)&y1i)[3]; + stream0_out[j + 23] = ((short *)&y2i)[3]; + // RE 5 + stream0_out[j + 24] = ((short *)&y0r)[4]; + stream0_out[j + 25] = ((short *)&y1r)[4]; + stream0_out[j + 26] = ((short *)&y2r)[4]; + stream0_out[j + 27] = ((short *)&y0i)[4]; + stream0_out[j + 28] = ((short *)&y1i)[4]; + stream0_out[j + 29] = ((short *)&y2i)[4]; + // RE 6 + stream0_out[j + 30] = ((short *)&y0r)[5]; + stream0_out[j + 31] = ((short *)&y1r)[5]; + stream0_out[j + 32] = ((short *)&y2r)[5]; + stream0_out[j + 33] = ((short *)&y0i)[5]; + stream0_out[j + 34] = ((short *)&y1i)[5]; + stream0_out[j + 35] = ((short *)&y2i)[5]; + // RE 7 + stream0_out[j + 36] = ((short *)&y0r)[6]; + stream0_out[j + 37] = ((short *)&y1r)[6]; + stream0_out[j + 38] = ((short *)&y2r)[6]; + stream0_out[j + 39] = ((short *)&y0i)[6]; + stream0_out[j + 40] = ((short *)&y1i)[6]; + stream0_out[j + 41] = ((short *)&y2i)[6]; + // RE 8 + stream0_out[j + 42] = ((short *)&y0r)[7]; + stream0_out[j + 43] = ((short *)&y1r)[7]; + stream0_out[j + 44] = ((short *)&y2r)[7]; + stream0_out[j + 45] = ((short *)&y0i)[7]; + stream0_out[j + 46] = ((short *)&y1i)[7]; + stream0_out[j + 47] = ((short *)&y2i)[7]; + + // RE 9 + stream0_out[j + 48] = ((short *)&y0r)[8]; + stream0_out[j + 49] = ((short *)&y1r)[8]; + stream0_out[j + 50] = ((short *)&y2r)[8]; + stream0_out[j + 51] = ((short *)&y0i)[8]; + stream0_out[j + 52] = ((short *)&y1i)[8]; + stream0_out[j + 53] = ((short *)&y2i)[8]; + // RE 10 + stream0_out[j + 54] = ((short *)&y0r)[9]; + stream0_out[j + 55] = ((short *)&y1r)[9]; + stream0_out[j + 56] = ((short *)&y2r)[9]; + stream0_out[j + 57] = ((short *)&y0i)[9]; + stream0_out[j + 58] = ((short *)&y1i)[9]; + stream0_out[j + 59] = ((short *)&y2i)[9]; + // RE 11 + stream0_out[j + 60] = ((short *)&y0r)[10]; + stream0_out[j + 61] = ((short *)&y1r)[10]; + stream0_out[j + 62] = ((short *)&y2r)[10]; + stream0_out[j + 63] = ((short *)&y0i)[10]; + stream0_out[j + 64] = ((short *)&y1i)[10]; + stream0_out[j + 65] = ((short *)&y2i)[10]; + // RE 12 + stream0_out[j + 66] = ((short *)&y0r)[11]; + stream0_out[j + 67] = ((short *)&y1r)[11]; + stream0_out[j + 68] = ((short *)&y2r)[11]; + stream0_out[j + 69] = ((short *)&y0i)[11]; + stream0_out[j + 70] = ((short *)&y1i)[11]; + stream0_out[j + 71] = ((short *)&y2i)[11]; + // RE 13 + stream0_out[j + 72] = ((short *)&y0r)[12]; + stream0_out[j + 73] = ((short *)&y1r)[12]; + stream0_out[j + 74] = ((short *)&y2r)[12]; + stream0_out[j + 75] = ((short *)&y0i)[12]; + stream0_out[j + 76] = ((short *)&y1i)[12]; + stream0_out[j + 77] = ((short *)&y2i)[12]; + // RE 14 + stream0_out[j + 78] = ((short *)&y0r)[13]; + stream0_out[j + 79] = ((short *)&y1r)[13]; + stream0_out[j + 80] = ((short *)&y2r)[13]; + stream0_out[j + 81] = ((short *)&y0i)[13]; + stream0_out[j + 82] = ((short *)&y1i)[13]; + stream0_out[j + 83] = ((short *)&y2i)[13]; + // RE 15 + stream0_out[j + 84] = ((short *)&y0r)[14]; + stream0_out[j + 85] = ((short *)&y1r)[14]; + stream0_out[j + 86] = ((short *)&y2r)[14]; + stream0_out[j + 87] = ((short *)&y0i)[14]; + stream0_out[j + 88] = ((short *)&y1i)[14]; + stream0_out[j + 89] = ((short *)&y2i)[14]; + // RE 16 + stream0_out[j + 90] = ((short *)&y0r)[15]; + stream0_out[j + 91] = ((short *)&y1r)[15]; + stream0_out[j + 92] = ((short *)&y2r)[15]; + stream0_out[j + 93] = ((short *)&y0i)[15]; + stream0_out[j + 94] = ((short *)&y1i)[15]; + stream0_out[j + 95] = ((short *)&y2i)[15]; + +#elif defined(__arm__) + +#endif + + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif +} diff --git a/openair1/PHY/LTE_TRANSPORT/print_stats.c b/openair1/PHY/LTE_TRANSPORT/print_stats.c index c8d4292e8d03c8866f6a053ce1c7a45d37b591ad..c3e40e1e72f323b108d740b7d5db815f76cf8cd1 100644 --- a/openair1/PHY/LTE_TRANSPORT/print_stats.c +++ b/openair1/PHY/LTE_TRANSPORT/print_stats.c @@ -107,9 +107,9 @@ int dump_ue_stats(PHY_VARS_UE *ue, UE_rxtx_proc_t *proc,char* buffer, int length len += sprintf(&buffer[len], "[UE PROC] Po_PUCCH = %d dBm (Po_NOMINAL_PUCCH %d dBm, g_pucch %d dB)\n", get_PL(ue->Mod_id,ue->CC_id,0)+ ue->frame_parms.ul_power_control_config_common.p0_NominalPUCCH+ - ue->dlsch[0][0]->g_pucch, + ue->dlsch[0][0][0]->g_pucch, ue->frame_parms.ul_power_control_config_common.p0_NominalPUCCH, - ue->dlsch[0][0]->g_pucch); + ue->dlsch[0][0][0]->g_pucch); } //for (eNB=0;eNB<NUMBER_OF_eNB_MAX;eNB++) { for (eNB=0; eNB<1; eNB++) { @@ -482,24 +482,24 @@ int dump_ue_stats(PHY_VARS_UE *ue, UE_rxtx_proc_t *proc,char* buffer, int length len += sprintf(&buffer[len], "[UE PROC] Mode 6 Wideband CQI eNB %d : %d dB\n",eNB,ue->measurements.precoded_cqi_dB[eNB][0]); for (harq_pid=0;harq_pid<8;harq_pid++) { - len+=sprintf(&buffer[len],"[UE PROC] eNB %d: CW 0 harq_pid %d, mcs %d:",eNB,harq_pid,ue->dlsch[0][0]->harq_processes[harq_pid]->mcs); + len+=sprintf(&buffer[len],"[UE PROC] eNB %d: CW 0 harq_pid %d, mcs %d:",eNB,harq_pid,ue->dlsch[0][0][0]->harq_processes[harq_pid]->mcs); for (round=0;round<8;round++) len+=sprintf(&buffer[len],"%d/%d ", - ue->dlsch[0][0]->harq_processes[harq_pid]->errors[round], - ue->dlsch[0][0]->harq_processes[harq_pid]->trials[round]); + ue->dlsch[0][0][0]->harq_processes[harq_pid]->errors[round], + ue->dlsch[0][0][0]->harq_processes[harq_pid]->trials[round]); len+=sprintf(&buffer[len],"\n"); } - if (ue->dlsch[0] && ue->dlsch[0][0] && ue->dlsch[0][1]) { - len += sprintf(&buffer[len], "[UE PROC] Saved PMI for DLSCH eNB %d : %jx (%p)\n",eNB,pmi2hex_2Ar1(ue->dlsch[0][0]->pmi_alloc),ue->dlsch[0][0]); + if (ue->dlsch[0][0] && ue->dlsch[0][0][0] && ue->dlsch[0][0][1]) { + len += sprintf(&buffer[len], "[UE PROC] Saved PMI for DLSCH eNB %d : %jx (%p)\n",eNB,pmi2hex_2Ar1(ue->dlsch[0][0][0]->pmi_alloc),ue->dlsch[0][0][0]); - len += sprintf(&buffer[len], "[UE PROC] eNB %d: dl_power_off = %d\n",eNB,ue->dlsch[0][0]->harq_processes[0]->dl_power_off); + len += sprintf(&buffer[len], "[UE PROC] eNB %d: dl_power_off = %d\n",eNB,ue->dlsch[0][0][0]->harq_processes[0]->dl_power_off); for (harq_pid=0;harq_pid<8;harq_pid++) { - len+=sprintf(&buffer[len],"[UE PROC] eNB %d: CW 1 harq_pid %d, mcs %d:",eNB,harq_pid,ue->dlsch[0][1]->harq_processes[0]->mcs); + len+=sprintf(&buffer[len],"[UE PROC] eNB %d: CW 1 harq_pid %d, mcs %d:",eNB,harq_pid,ue->dlsch[0][0][1]->harq_processes[0]->mcs); for (round=0;round<8;round++) len+=sprintf(&buffer[len],"%d/%d ", - ue->dlsch[0][1]->harq_processes[harq_pid]->errors[round], - ue->dlsch[0][1]->harq_processes[harq_pid]->trials[round]); + ue->dlsch[0][0][1]->harq_processes[harq_pid]->errors[round], + ue->dlsch[0][0][1]->harq_processes[harq_pid]->trials[round]); len+=sprintf(&buffer[len],"\n"); } } diff --git a/openair1/PHY/LTE_TRANSPORT/proto.h b/openair1/PHY/LTE_TRANSPORT/proto.h index 5fc8dea9867cee8eac0abf65256d5d0179628afc..fe7991e343ab200cf96d8d5f7a34f66a29a8fba0 100644 --- a/openair1/PHY/LTE_TRANSPORT/proto.h +++ b/openair1/PHY/LTE_TRANSPORT/proto.h @@ -704,6 +704,22 @@ void qam64_qam16(short *stream0_in, short *rho01, int length); +/** \brief This function computes the LLRs for ML (max-logsum approximation) dual-stream 64QAM/16QAM reception. + @param stream0_in Input from channel compensated (MR combined) stream 0 + @param stream1_in Input from channel compensated (MR combined) stream 1 + @param ch_mag Input from scaled channel magnitude square of h0'*g0 + @param ch_mag_i Input from scaled channel magnitude square of h0'*g1 + @param stream0_out Output from LLR unit for stream0 + @param rho01 Cross-correlation between channels (MR combined) + @param length in complex channel outputs*/ +void qam64_qam16_avx2(short *stream0_in, + short *stream1_in, + short *ch_mag, + short *ch_mag_i, + short *stream0_out, + short *rho01, + int length); + /** \brief This function perform LLR computation for dual-stream (64QAM/16QAM) transmission. @param frame_parms Frame descriptor structure @param rxdataF_comp Compensated channel output @@ -746,6 +762,22 @@ void qam64_qam64(short *stream0_in, short *rho01, int length); +/** \brief This function computes the LLRs for ML (max-logsum approximation) dual-stream 64QAM/64QAM reception. + @param stream0_in Input from channel compensated (MR combined) stream 0 + @param stream1_in Input from channel compensated (MR combined) stream 1 + @param ch_mag Input from scaled channel magnitude square of h0'*g0 + @param ch_mag_i Input from scaled channel magnitude square of h0'*g1 + @param stream0_out Output from LLR unit for stream0 + @param rho01 Cross-correlation between channels (MR combined) + @param length in complex channel outputs*/ +void qam64_qam64_avx2(int32_t *stream0_in, + int32_t *stream1_in, + int32_t *ch_mag, + int32_t *ch_mag_i, + int16_t *stream0_out, + int32_t *rho01, + int length); + /** \brief This function perform LLR computation for dual-stream (64QAM/64QAM) transmission. @param frame_parms Frame descriptor structure @param rxdataF_comp Compensated channel output diff --git a/openair1/PHY/LTE_TRANSPORT/ulsch_coding.c b/openair1/PHY/LTE_TRANSPORT/ulsch_coding.c index 9571ea6cd52823d8409a2781bdd1c2ab5fc7c6e4..dbb7a16ac9cbb983a650b5811e9cfc594e2245e3 100644 --- a/openair1/PHY/LTE_TRANSPORT/ulsch_coding.c +++ b/openair1/PHY/LTE_TRANSPORT/ulsch_coding.c @@ -233,7 +233,7 @@ uint32_t ulsch_encoding(uint8_t *a, LTE_DL_FRAME_PARMS *frame_parms=&ue->frame_parms; PHY_MEASUREMENTS *meas = &ue->measurements; LTE_UE_ULSCH_t *ulsch=ue->ulsch[eNB_id]; - LTE_UE_DLSCH_t **dlsch = ue->dlsch[eNB_id]; + LTE_UE_DLSCH_t **dlsch = ue->dlsch[0][eNB_id]; uint16_t rnti = 0xffff; if (!ulsch) { @@ -966,7 +966,7 @@ int ulsch_encoding_emul(uint8_t *ulsch_buffer, { LTE_UE_ULSCH_t *ulsch = ue->ulsch[eNB_id]; - LTE_UE_DLSCH_t **dlsch = ue->dlsch[eNB_id]; + LTE_UE_DLSCH_t **dlsch = ue->dlsch[0][eNB_id]; PHY_MEASUREMENTS *meas = &ue->measurements; uint8_t tmode = ue->transmission_mode[eNB_id]; uint16_t rnti=ue->pdcch_vars[eNB_id]->crnti; diff --git a/openair1/PHY/LTE_TRANSPORT/ulsch_decoding.c b/openair1/PHY/LTE_TRANSPORT/ulsch_decoding.c index ca67ccdb3c9c34dbf01b7752201ebce456b34017..bb2a5b3e224652d3fc62c6f2594743d88dc30cc1 100644 --- a/openair1/PHY/LTE_TRANSPORT/ulsch_decoding.c +++ b/openair1/PHY/LTE_TRANSPORT/ulsch_decoding.c @@ -2066,7 +2066,7 @@ uint32_t ulsch_decoding_emul(PHY_VARS_eNB *eNB, eNB_rxtx_proc_t *proc, // get local ue's ack if ((UE_index >= oai_emulation.info.first_ue_local) ||(UE_index <(oai_emulation.info.first_ue_local+oai_emulation.info.nb_ue_local))) { get_ack(&eNB->frame_parms, - PHY_vars_UE_g[UE_id][CC_id]->dlsch[0][0]->harq_ack, + PHY_vars_UE_g[UE_id][CC_id]->dlsch[0][0][0]->harq_ack, subframe, eNB->ulsch[UE_index]->harq_processes[harq_pid]->o_ACK,0); } else { // get remote UEs' ack diff --git a/openair1/PHY/Makefile.inc b/openair1/PHY/Makefile.inc index 1586f353c0f623853f6149ca91f313630847b92a..90094b31f8605946ba6c18f31eb070b4ff99f484 100644 --- a/openair1/PHY/Makefile.inc +++ b/openair1/PHY/Makefile.inc @@ -7,6 +7,7 @@ PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/dlsch_coding.o PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/dlsch_modulation.o PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/dlsch_demodulation.o PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/dlsch_llr_computation.o +PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.o PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/power_control.o PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/dlsch_decoding.o PHY_OBJS += $(TOP_DIR)/PHY/LTE_TRANSPORT/dlsch_scrambling.o diff --git a/openair1/PHY/TOOLS/lte_phy_scope.c b/openair1/PHY/TOOLS/lte_phy_scope.c index ca08f875190d18a8def154faadd11d18418299c2..7a533464530f337d4aeba89722164670a830be63 100644 --- a/openair1/PHY/TOOLS/lte_phy_scope.c +++ b/openair1/PHY/TOOLS/lte_phy_scope.c @@ -510,16 +510,16 @@ void phy_scope_UE(FD_lte_phy_scope_ue *form, int beamforming_mode = phy_vars_ue->transmission_mode[eNB_id]>6 ? phy_vars_ue->transmission_mode[eNB_id] : 0; - if (phy_vars_ue->dlsch[eNB_id][0]!=NULL) { - harq_pid = phy_vars_ue->dlsch[eNB_id][0]->current_harq_pid; + if (phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]!=NULL) { + harq_pid = phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]->current_harq_pid; if (harq_pid>=8) return; - mcs = phy_vars_ue->dlsch[eNB_id][0]->harq_processes[harq_pid]->mcs; + mcs = phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]->harq_processes[harq_pid]->mcs; // Button 0 - if(!phy_vars_ue->dlsch[eNB_id][0]->harq_processes[harq_pid]->dl_power_off) { + if(!phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]->harq_processes[harq_pid]->dl_power_off) { // we are in TM5 fl_show_object(form->button_0); } @@ -530,12 +530,12 @@ void phy_scope_UE(FD_lte_phy_scope_ue *form, } // coded_bits_per_codeword = frame_parms->N_RB_DL*12*get_Qm(mcs)*(frame_parms->symbols_per_tti); - if (phy_vars_ue->dlsch[eNB_id][0]!=NULL) { + if (phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]!=NULL) { coded_bits_per_codeword = get_G(frame_parms, - phy_vars_ue->dlsch[eNB_id][0]->harq_processes[harq_pid]->nb_rb, - phy_vars_ue->dlsch[eNB_id][0]->harq_processes[harq_pid]->rb_alloc_even, + phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]->harq_processes[harq_pid]->nb_rb, + phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]->harq_processes[harq_pid]->rb_alloc_even, get_Qm(mcs), - phy_vars_ue->dlsch[eNB_id][0]->harq_processes[harq_pid]->Nl, + phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]->harq_processes[harq_pid]->Nl, num_pdcch_symbols, frame, subframe, diff --git a/openair1/PHY/TOOLS/lte_phy_scope_tm4.c b/openair1/PHY/TOOLS/lte_phy_scope_tm4.c index 60c5ace2c5379471d7746421daad79f9a1844cfc..6807f1f6f48a6de4ee057c9c2341b80084526448 100755 --- a/openair1/PHY/TOOLS/lte_phy_scope_tm4.c +++ b/openair1/PHY/TOOLS/lte_phy_scope_tm4.c @@ -448,11 +448,11 @@ void phy_scope_UE(FD_lte_phy_scope_ue *form, int mcs1=0; unsigned char harq_pid = 0; int beamforming_mode = phy_vars_ue->transmission_mode[eNB_id]>6 ? phy_vars_ue->transmission_mode[eNB_id] : 0; - if (phy_vars_ue->dlsch[eNB_id][0]!=NULL) { - harq_pid = phy_vars_ue->dlsch[eNB_id][0]->current_harq_pid; + if (phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]!=NULL) { + harq_pid = phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]->current_harq_pid; if (harq_pid>=8) return; - mcs0 = phy_vars_ue->dlsch[eNB_id][0]->harq_processes[harq_pid]->mcs; + mcs0 = phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]->harq_processes[harq_pid]->mcs; // Button 0 /* if(!phy_vars_ue->dlsch_ue[eNB_id][0]->harq_processes[harq_pid]->dl_power_off) { @@ -461,23 +461,23 @@ void phy_scope_UE(FD_lte_phy_scope_ue *form, } */ } - if (phy_vars_ue->dlsch[eNB_id][1]!=NULL) { - harq_pid = phy_vars_ue->dlsch[eNB_id][1]->current_harq_pid; + if (phy_vars_ue->dlsch[subframe&0x1][eNB_id][1]!=NULL) { + harq_pid = phy_vars_ue->dlsch[subframe&0x1][eNB_id][1]->current_harq_pid; if (harq_pid>=8) return; - mcs1 = phy_vars_ue->dlsch[eNB_id][1]->harq_processes[harq_pid]->mcs; + mcs1 = phy_vars_ue->dlsch[subframe&0x1][eNB_id][1]->harq_processes[harq_pid]->mcs; } if (phy_vars_ue->pdcch_vars[eNB_id]!=NULL) { num_pdcch_symbols = phy_vars_ue->pdcch_vars[eNB_id]->num_pdcch_symbols; } // coded_bits_per_codeword = frame_parms->N_RB_DL*12*get_Qm(mcs)*(frame_parms->symbols_per_tti); - if (phy_vars_ue->dlsch[eNB_id][0]!=NULL) { + if (phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]!=NULL) { mod0 = get_Qm(mcs0); coded_bits_per_codeword0 = get_G(frame_parms, - phy_vars_ue->dlsch[eNB_id][0]->harq_processes[harq_pid]->nb_rb, - phy_vars_ue->dlsch[eNB_id][0]->harq_processes[harq_pid]->rb_alloc_even, + phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]->harq_processes[harq_pid]->nb_rb, + phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]->harq_processes[harq_pid]->rb_alloc_even, get_Qm(mcs0), - phy_vars_ue->dlsch[eNB_id][0]->harq_processes[harq_pid]->Nl, + phy_vars_ue->dlsch[subframe&0x1][eNB_id][0]->harq_processes[harq_pid]->Nl, num_pdcch_symbols, frame, subframe, @@ -486,13 +486,13 @@ void phy_scope_UE(FD_lte_phy_scope_ue *form, coded_bits_per_codeword0 = 0; //frame_parms->N_RB_DL*12*get_Qm(mcs)*(frame_parms->symbols_per_tti); mod0=0; } - if (phy_vars_ue->dlsch[eNB_id][1]!=NULL) { + if (phy_vars_ue->dlsch[subframe&0x1][eNB_id][1]!=NULL) { mod1 = get_Qm(mcs1); coded_bits_per_codeword1 = get_G(frame_parms, - phy_vars_ue->dlsch[eNB_id][1]->harq_processes[harq_pid]->nb_rb, - phy_vars_ue->dlsch[eNB_id][1]->harq_processes[harq_pid]->rb_alloc_even, + phy_vars_ue->dlsch[subframe&0x1][eNB_id][1]->harq_processes[harq_pid]->nb_rb, + phy_vars_ue->dlsch[subframe&0x1][eNB_id][1]->harq_processes[harq_pid]->rb_alloc_even, get_Qm(mcs1), - phy_vars_ue->dlsch[eNB_id][1]->harq_processes[harq_pid]->Nl, + phy_vars_ue->dlsch[subframe&0x1][eNB_id][1]->harq_processes[harq_pid]->Nl, num_pdcch_symbols, frame, subframe, diff --git a/openair1/PHY/defs.h b/openair1/PHY/defs.h index d70bfb3f722df0bcf526266d633b932bb534057b..5c1b1158ba45a62f9a206e45022a746a5bd23db7 100644 --- a/openair1/PHY/defs.h +++ b/openair1/PHY/defs.h @@ -706,7 +706,7 @@ typedef struct { LTE_DL_FRAME_PARMS frame_parms_before_ho; LTE_UE_COMMON common_vars; - LTE_UE_PDSCH *pdsch_vars[2][NUMBER_OF_CONNECTED_eNB_MAX+1]; + LTE_UE_PDSCH *pdsch_vars[2][NUMBER_OF_CONNECTED_eNB_MAX+1]; // two RxTx Threads LTE_UE_PDSCH_FLP *pdsch_vars_flp[NUMBER_OF_CONNECTED_eNB_MAX+1]; LTE_UE_PDSCH *pdsch_vars_SI[NUMBER_OF_CONNECTED_eNB_MAX+1]; LTE_UE_PDSCH *pdsch_vars_ra[NUMBER_OF_CONNECTED_eNB_MAX+1]; @@ -715,7 +715,7 @@ typedef struct { LTE_UE_PBCH *pbch_vars[NUMBER_OF_CONNECTED_eNB_MAX]; LTE_UE_PDCCH *pdcch_vars[NUMBER_OF_CONNECTED_eNB_MAX]; LTE_UE_PRACH *prach_vars[NUMBER_OF_CONNECTED_eNB_MAX]; - LTE_UE_DLSCH_t *dlsch[NUMBER_OF_CONNECTED_eNB_MAX][2]; + LTE_UE_DLSCH_t *dlsch[2][NUMBER_OF_CONNECTED_eNB_MAX][2]; // two RxTx Threads LTE_UE_ULSCH_t *ulsch[NUMBER_OF_CONNECTED_eNB_MAX]; LTE_UE_DLSCH_t *dlsch_SI[NUMBER_OF_CONNECTED_eNB_MAX]; LTE_UE_DLSCH_t *dlsch_ra[NUMBER_OF_CONNECTED_eNB_MAX]; @@ -802,6 +802,8 @@ typedef struct { uint8_t prach_cnt; uint8_t prach_PreambleIndex; // uint8_t prach_timer; + uint8_t decode_SIB; + uint8_t decode_MIB; int rx_offset; /// Timing offset int rx_offset_diff; /// Timing adjustment for ofdm symbol0 on HW USRP int timing_advance; ///timing advance signalled from eNB @@ -872,7 +874,7 @@ typedef struct { time_stats_t phy_proc; time_stats_t phy_proc_tx; - time_stats_t phy_proc_rx; + time_stats_t phy_proc_rx[2]; uint32_t use_ia_receiver; @@ -885,6 +887,10 @@ typedef struct { time_stats_t ulsch_interleaving_stats; time_stats_t ulsch_multiplexing_stats; + time_stats_t generic_stat; + time_stats_t pdsch_procedures_stat; + time_stats_t dlsch_procedures_stat; + time_stats_t ofdm_demod_stats; time_stats_t dlsch_rx_pdcch_stats; time_stats_t rx_dft_stats; diff --git a/openair1/SCHED/phy_procedures_lte_ue.c b/openair1/SCHED/phy_procedures_lte_ue.c index cd8fbdbc8eec0b3130e91eb6645f9ca69f6f8278..4a75d5d9e7aedd9fdf70e7a889747be0df93ce8d 100644 --- a/openair1/SCHED/phy_procedures_lte_ue.c +++ b/openair1/SCHED/phy_procedures_lte_ue.c @@ -75,6 +75,7 @@ fifo_dump_emos_UE emos_dump_UE; extern int oai_exit; +extern double cpuf; @@ -90,10 +91,10 @@ void dump_dlsch(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uint8_t subf uint8_t nsymb = (ue->frame_parms.Ncp == 0) ? 14 : 12; coded_bits_per_codeword = get_G(&ue->frame_parms, - ue->dlsch[eNB_id][0]->harq_processes[harq_pid]->nb_rb, - ue->dlsch[eNB_id][0]->harq_processes[harq_pid]->rb_alloc_even, - ue->dlsch[eNB_id][0]->harq_processes[harq_pid]->Qm, - ue->dlsch[eNB_id][0]->harq_processes[harq_pid]->Nl, + ue->dlsch[subframe&0x1][eNB_id][0]->harq_processes[harq_pid]->nb_rb, + ue->dlsch[subframe&0x1][eNB_id][0]->harq_processes[harq_pid]->rb_alloc_even, + ue->dlsch[subframe&0x1][eNB_id][0]->harq_processes[harq_pid]->Qm, + ue->dlsch[subframe&0x1][eNB_id][0]->harq_processes[harq_pid]->Nl, ue->pdcch_vars[eNB_id]->num_pdcch_symbols, proc->frame_rx, subframe, @@ -268,37 +269,39 @@ void phy_reset_ue(uint8_t Mod_id,uint8_t CC_id,uint8_t eNB_index) uint8_t i,j,k,s; PHY_VARS_UE *ue = PHY_vars_UE_g[Mod_id][CC_id]; - //[NUMBER_OF_CONNECTED_eNB_MAX][2]; - for(i=0; i<NUMBER_OF_CONNECTED_eNB_MAX; i++) { - for(j=0; j<2; j++) { - //DL HARQ - if(ue->dlsch[i][j]) { - for(k=0; k<NUMBER_OF_HARQ_PID_MAX && ue->dlsch[i][j]->harq_processes[k]; k++) { - ue->dlsch[i][j]->harq_processes[k]->status = SCH_IDLE; - for (s=0; s<10; s++) { - // reset ACK/NACK bit to DTX for all subframes s = 0..9 - ue->dlsch[i][j]->harq_ack[s].ack = 2; - ue->dlsch[i][j]->harq_ack[s].send_harq_status = 0; - ue->dlsch[i][j]->harq_ack[s].vDAI_UL = 0xff; - ue->dlsch[i][j]->harq_ack[s].vDAI_DL = 0xff; + //[NUMBER_OF_RX_THREAD=2][NUMBER_OF_CONNECTED_eNB_MAX][2]; + for(int l=0; l<2; l++) { + for(i=0; i<NUMBER_OF_CONNECTED_eNB_MAX; i++) { + for(j=0; j<2; j++) { + //DL HARQ + if(ue->dlsch[l][i][j]) { + for(k=0; k<NUMBER_OF_HARQ_PID_MAX && ue->dlsch[l][i][j]->harq_processes[k]; k++) { + ue->dlsch[l][i][j]->harq_processes[k]->status = SCH_IDLE; + for (s=0; s<10; s++) { + // reset ACK/NACK bit to DTX for all subframes s = 0..9 + ue->dlsch[l][i][j]->harq_ack[s].ack = 2; + ue->dlsch[l][i][j]->harq_ack[s].send_harq_status = 0; + ue->dlsch[l][i][j]->harq_ack[s].vDAI_UL = 0xff; + ue->dlsch[l][i][j]->harq_ack[s].vDAI_DL = 0xff; + } + } + } } - } - } - } - //UL HARQ - if(ue->ulsch[i]) { - for(k=0; k<NUMBER_OF_HARQ_PID_MAX && ue->ulsch[i]->harq_processes[k]; k++) { - ue->ulsch[i]->harq_processes[k]->status = SCH_IDLE; - //Set NDIs for all UL HARQs to 0 - // ue->ulsch[i]->harq_processes[k]->Ndi = 0; + //UL HARQ + if(ue->ulsch[i]) { + for(k=0; k<NUMBER_OF_HARQ_PID_MAX && ue->ulsch[i]->harq_processes[k]; k++) { + ue->ulsch[i]->harq_processes[k]->status = SCH_IDLE; + //Set NDIs for all UL HARQs to 0 + // ue->ulsch[i]->harq_processes[k]->Ndi = 0; - } - } + } + } - // flush Msg3 buffer - ue->ulsch_Msg3_active[i] = 0; + // flush Msg3 buffer + ue->ulsch_Msg3_active[i] = 0; + } } } @@ -327,7 +330,8 @@ void ra_succeeded(uint8_t Mod_id,uint8_t CC_id,uint8_t eNB_index) for (i=0; i<8; i++) { if (PHY_vars_UE_g[Mod_id][CC_id]->ulsch[eNB_index]->harq_processes[i]) { PHY_vars_UE_g[Mod_id][CC_id]->ulsch[eNB_index]->harq_processes[i]->status=IDLE; - PHY_vars_UE_g[Mod_id][CC_id]->dlsch[eNB_index][0]->harq_processes[i]->round=0; + PHY_vars_UE_g[Mod_id][CC_id]->dlsch[0][eNB_index][0]->harq_processes[i]->round=0; + PHY_vars_UE_g[Mod_id][CC_id]->dlsch[1][eNB_index][0]->harq_processes[i]->round=0; } } @@ -638,7 +642,7 @@ void ue_compute_srs_occasion(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id uint8_t pucch_ack_payload[2]; if (get_ack(&ue->frame_parms, - ue->dlsch[eNB_id][0]->harq_ack, + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->harq_ack, subframe_tx,pucch_ack_payload,0) > 0) { is_sr_an_subframe = 1; @@ -790,6 +794,7 @@ PUCCH_FMT_t get_pucch_format(lte_frame_type_t frame_type, return pucch_format2; } } + return pucch_format1a; } uint16_t get_n1_pucch(PHY_VARS_UE *ue, UE_rxtx_proc_t *proc, @@ -929,11 +934,11 @@ uint16_t get_n1_pucch(PHY_VARS_UE *ue, n1_pucch1 = get_Np(frame_parms->N_RB_DL,nCCE1,1) + nCCE1 + frame_parms->pucch_config_common.n1PUCCH_AN; // set ACK/NAK to values if not DTX - if (ue->dlsch[eNB_id][0]->harq_ack[(6+last_dl)%10].send_harq_status>0) // n-6 // subframe 6 is to be ACK/NAKed - harq_ack1 = ue->dlsch[eNB_id][0]->harq_ack[(6+last_dl)%10].ack; + if (ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->harq_ack[(6+last_dl)%10].send_harq_status>0) // n-6 // subframe 6 is to be ACK/NAKed + harq_ack1 = ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->harq_ack[(6+last_dl)%10].ack; - if (ue->dlsch[eNB_id][0]->harq_ack[5+last_dl].send_harq_status>0) // n-6 // subframe 5 is to be ACK/NAKed - harq_ack0 = ue->dlsch[eNB_id][0]->harq_ack[5+last_dl].ack; + if (ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->harq_ack[5+last_dl].send_harq_status>0) // n-6 // subframe 5 is to be ACK/NAKed + harq_ack0 = ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->harq_ack[5+last_dl].ack; if (harq_ack1!=2) { // n-6 // subframe 6,8,0 and maybe 5,7,9 is to be ACK/NAKed @@ -1040,10 +1045,10 @@ void ulsch_common_procedures(PHY_VARS_UE *ue, UE_rxtx_proc_t *proc, uint8_t empt int subframe_tx = proc->subframe_tx; int frame_tx = proc->frame_tx; int ulsch_start; - int overflow=0; #if defined(EXMIMO) || defined(OAI_USRP) || defined(OAI_BLADERF) || defined(OAI_LMSSDR) + int overflow=0; int k,l; - int dummy_tx_buffer[frame_parms->samples_per_tti] __attribute__((aligned(16))); + int dummy_tx_buffer[3840*4] __attribute__((aligned(16))); #endif VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_UE_TX_ULSCH_COMMON,VCD_FUNCTION_IN); @@ -1069,22 +1074,18 @@ void ulsch_common_procedures(PHY_VARS_UE *ue, UE_rxtx_proc_t *proc, uint8_t empt ulsch_start = (frame_parms->samples_per_tti*subframe_tx)-ue->N_TA_offset; //-ue->timing_advance; #endif //else EXMIMO -//#if defined(EXMIMO) || defined(OAI_USRP) || defined(OAI_BLADERF) || defined(OAI_LMSSDR) +#if defined(EXMIMO) || defined(OAI_USRP) || defined(OAI_BLADERF) || defined(OAI_LMSSDR) if (empty_subframe) { //#if 1 overflow = ulsch_start - 9*frame_parms->samples_per_tti; for (aa=0; aa<frame_parms->nb_antennas_tx; aa++) { - if (overflow > 0) - { - memset(&ue->common_vars.txdata[aa][ulsch_start],0,4*(frame_parms->samples_per_tti-overflow)); - memset(&ue->common_vars.txdata[aa][0],0,4*overflow); - } - else - { - memset(&ue->common_vars.txdata[aa][ulsch_start],0,4*frame_parms->samples_per_tti); - } + memset(&ue->common_vars.txdata[aa][ulsch_start],0, + 4*cmin(frame_parms->samples_per_tti-overflow,frame_parms->samples_per_tti)); + + if (overflow> 0) + memset(&ue->common_vars.txdata[aa][0],0,4*overflow); } /*#else overflow = ulsch_start - 9*frame_parms->samples_per_tti; @@ -1102,7 +1103,7 @@ void ulsch_common_procedures(PHY_VARS_UE *ue, UE_rxtx_proc_t *proc, uint8_t empt #endif*/ return; } -//#endif +#endif if ((frame_tx%100) == 0) LOG_D(PHY,"[UE %d] Frame %d, subframe %d: ulsch_start = %d (rxoff %d, HW TA %d, timing advance %d, TA_offset %d\n", @@ -1336,7 +1337,8 @@ void ue_ulsch_uespec_procedures(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB uint8_t ulsch_input_buffer[5477] __attribute__ ((aligned(32))); uint8_t access_mode; uint8_t Nbundled=0; - uint8_t ack_status=0; + uint8_t ack_status_cw0=0; + uint8_t ack_status_cw1=0; VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_UE_TX_ULSCH_UESPEC,VCD_FUNCTION_IN); @@ -1437,11 +1439,16 @@ void ue_ulsch_uespec_procedures(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB ue->ulsch[eNB_id]->harq_processes[harq_pid]->round = 0; } - ack_status = reset_ack(&ue->frame_parms, - ue->dlsch[eNB_id][0]->harq_ack, - subframe_tx, - ue->ulsch[eNB_id]->o_ACK,0); - Nbundled = ack_status; + ack_status_cw0 = reset_ack(&ue->frame_parms, + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->harq_ack, + subframe_tx, + ue->ulsch[eNB_id]->o_ACK,0); + ack_status_cw1 = reset_ack(&ue->frame_parms, + ue->dlsch[proc->subframe_rx&0x1][eNB_id][1]->harq_ack, + subframe_tx, + ue->ulsch[eNB_id]->o_ACK,1); + + Nbundled = ack_status_cw0; first_rb = ue->ulsch[eNB_id]->harq_processes[harq_pid]->first_rb; nb_rb = ue->ulsch[eNB_id]->harq_processes[harq_pid]->nb_rb; @@ -1449,31 +1456,31 @@ void ue_ulsch_uespec_procedures(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB - if (ack_status > 0) { + if (ack_status_cw0 > 0) { // check if we received a PDSCH at subframe_tx - 4 // ==> send ACK/NACK on PUSCH - ue->ulsch[eNB_id]->harq_processes[harq_pid]->O_ACK = 1; + ue->ulsch[eNB_id]->harq_processes[harq_pid]->O_ACK = ack_status_cw0 + ack_status_cw1; #if T_TRACER if(ue->ulsch[eNB_id]->o_ACK[0]) { LOG_I(PHY,"PUSCH ACK\n"); - T(T_UE_PHY_DLSCH_UE_ACK, T_INT(eNB_id), T_INT(frame_tx%1024), T_INT(subframe_tx), T_INT(Mod_id), T_INT(ue->dlsch[eNB_id][0]->rnti), - T_INT(ue->dlsch[eNB_id][0]->current_harq_pid)); + T(T_UE_PHY_DLSCH_UE_ACK, T_INT(eNB_id), T_INT(frame_tx%1024), T_INT(subframe_tx), T_INT(Mod_id), T_INT(ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->rnti), + T_INT(ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->current_harq_pid)); } else { LOG_I(PHY,"PUSCH NACK\n"); - T(T_UE_PHY_DLSCH_UE_NACK, T_INT(eNB_id), T_INT(frame_tx%1024), T_INT(subframe_tx), T_INT(Mod_id), T_INT(ue->dlsch[eNB_id][0]->rnti), - T_INT(ue->dlsch[eNB_id][0]->current_harq_pid)); + T(T_UE_PHY_DLSCH_UE_NACK, T_INT(eNB_id), T_INT(frame_tx%1024), T_INT(subframe_tx), T_INT(Mod_id), T_INT(ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->rnti), + T_INT(ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->current_harq_pid)); } #endif - LOG_D(PHY,"[UE %d][PDSCH %x] AbsSubFrame %d.%d Generating ACK (%d,%d) for %d bits on PUSCH\n", + LOG_I(PHY,"[UE %d][PDSCH %x] AbsSubFrame %d.%d Generating ACK (%d,%d) for %d bits on PUSCH\n", Mod_id, ue->ulsch[eNB_id]->rnti, - frame_tx%1024,subframe_tx, + frame_tx,subframe_tx, ue->ulsch[eNB_id]->o_ACK[0],ue->ulsch[eNB_id]->o_ACK[1], ue->ulsch[eNB_id]->harq_processes[harq_pid]->O_ACK); } @@ -1530,6 +1537,7 @@ void ue_ulsch_uespec_procedures(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB mac_xface->macphy_exit("Error in ulsch_coding"); VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_UE_TX, VCD_FUNCTION_OUT); stop_meas(&ue->phy_proc_tx); + //printf("------FULL TX PROC : %5.2f ------\n",ue->phy_proc_tx.p_time/(cpuf*1000.0)); return; } } @@ -1636,8 +1644,8 @@ void ue_ulsch_uespec_procedures(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB T(T_UE_PHY_PUSCH_TX_POWER, T_INT(eNB_id),T_INT(Mod_id), T_INT(frame_tx%1024), T_INT(subframe_tx),T_INT(ue->tx_power_dBm[subframe_tx]), T_INT(tx_amp),T_INT(ue->ulsch[eNB_id]->f_pusch),T_INT(get_PL(Mod_id,0,eNB_id)),T_INT(nb_rb)); #endif - LOG_D(PHY,"[UE %d][PUSCH %d] AbsSubFrame %d.%d, generating PUSCH, Po_PUSCH: %d dBm (max %d dBm), amp %d\n", - Mod_id,harq_pid,frame_tx%1024,subframe_tx,ue->tx_power_dBm[subframe_tx],ue->tx_power_max_dBm, tx_amp); + LOG_D(PHY,"[UE %d][PUSCH %d] Frame %d subframe %d, generating PUSCH, Po_PUSCH: %d dBm (max %d dBm), amp %d\n", + Mod_id,harq_pid,frame_tx,subframe_tx,ue->tx_power_dBm[subframe_tx],ue->tx_power_max_dBm, tx_amp); start_meas(&ue->ulsch_modulation_stats); ulsch_modulation(ue->common_vars.txdataF, tx_amp, @@ -1770,13 +1778,13 @@ void get_pucch_param(PHY_VARS_UE *ue, { pucch_resource[0] = get_n1_pucch(ue, proc, - ue->dlsch[eNB_id][0]->harq_ack, + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->harq_ack, eNB_id, ack_payload, SR); pucch_payload[0] = ack_payload[0]; - //pucch_payload[1] = ack_payload[1]; - pucch_payload[1] = 1; + pucch_payload[1] = ack_payload[1]; + //pucch_payload[1] = 1; } break; @@ -1785,7 +1793,7 @@ void get_pucch_param(PHY_VARS_UE *ue, pucch_resource[0] = ue->cqi_report_config[eNB_id].CQI_ReportPeriodic.cqi_PUCCH_ResourceIndex; if(cqi_report) { - pucch_payload[0] = get_pucch2_cqi(ue,eNB_id,plength); + pucch_payload[0] = get_pucch2_cqi(ue,eNB_id,(int*)plength); } else { @@ -1806,14 +1814,12 @@ void ue_pucch_procedures(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uin uint8_t pucch_ack_payload[2]; - uint8_t n1_pucch,n2_pucch; + uint8_t n2_pucch; uint16_t pucch_resource; ANFBmode_t bundling_flag; PUCCH_FMT_t format; uint8_t SR_payload; - uint16_t CQI_payload; - uint16_t RI_payload; uint8_t pucch_payload[2]; uint16_t len; @@ -1824,13 +1830,11 @@ void ue_pucch_procedures(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uin int CC_id = ue->CC_id; int tx_amp; int16_t Po_PUCCH; - uint8_t ack_status=0; uint8_t ack_status_cw0=0; uint8_t ack_status_cw1=0; uint8_t nb_cw=0; uint8_t cqi_status=0; uint8_t ri_status=0; - uint8_t ack_sr_generated = 0; VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_UE_TX_PUCCH,VCD_FUNCTION_IN); @@ -1891,13 +1895,13 @@ void ue_pucch_procedures(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uin } ack_status_cw0 = reset_ack(&ue->frame_parms, - ue->dlsch[eNB_id][0]->harq_ack, + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->harq_ack, subframe_tx, pucch_ack_payload, 0); ack_status_cw1 = reset_ack(&ue->frame_parms, - ue->dlsch[eNB_id][1]->harq_ack, + ue->dlsch[proc->subframe_rx&0x1][eNB_id][1]->harq_ack, subframe_tx, pucch_ack_payload, 1); @@ -1936,11 +1940,10 @@ void ue_pucch_procedures(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uin SR_payload, cqi_status, &pucch_resource, - &pucch_payload, + (uint8_t *)&pucch_payload, &len); - LOG_D(PHY,"PUCCH feedback AbsSubframe %d.%d SR %d NbCW %d AckNack %d.%d CQI %d RI %d format %d pucch_resource %d pucch_payload %d %d \n", - frame_tx%1024, subframe_tx, SR_payload, nb_cw, pucch_ack_payload[0], pucch_ack_payload[1], cqi_status, ri_status, format, pucch_resource,pucch_payload[0],pucch_payload[1]); + LOG_D(PHY,"PUCCH feedback AbsSubframe %d.%d SR %d NbCW %d AckNack %d.%d CQI %d RI %d format %d pucch_resource %d pucch_payload %d %d \n", frame_tx, subframe_tx, SR_payload, nb_cw, pucch_ack_payload[0], pucch_ack_payload[1], cqi_status, ri_status, format, pucch_resource,pucch_payload[0],pucch_payload[1]); // Part - IV @@ -1971,13 +1974,13 @@ void ue_pucch_procedures(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uin #endif #if T_TRACER T(T_UE_PHY_PUCCH_TX_POWER, T_INT(eNB_id),T_INT(Mod_id), T_INT(frame_tx%1024), T_INT(subframe_tx),T_INT(ue->tx_power_dBm[subframe_tx]), - T_INT(tx_amp),T_INT(ue->dlsch[eNB_id][0]->g_pucch),T_INT(get_PL(ue->Mod_id,ue->CC_id,eNB_id))); + T_INT(tx_amp),T_INT(ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->g_pucch),T_INT(get_PL(ue->Mod_id,ue->CC_id,eNB_id))); #endif if(format == pucch_format1) { LOG_D(PHY,"[UE %d][SR %x] AbsSubframe %d.%d Generating PUCCH 1 (SR for PUSCH), an_srs_simultanous %d, shorten_pucch %d, n1_pucch %d, Po_PUCCH %d\n", Mod_id, - ue->dlsch[eNB_id][0]->rnti, + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->rnti, frame_tx, subframe_tx, frame_parms->soundingrs_ul_config_common.ackNackSRS_SimultaneousTransmission, isShortenPucch, @@ -1989,7 +1992,7 @@ void ue_pucch_procedures(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uin if (SR_payload>0) { LOG_D(PHY,"[UE %d][SR %x] AbsSubFrame %d.%d Generating PUCCH %s payload %d,%d (with SR for PUSCH), an_srs_simultanous %d, shorten_pucch %d, n1_pucch %d, Po_PUCCH %d, amp %d\n", Mod_id, - ue->dlsch[eNB_id][0]->rnti, + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->rnti, frame_tx % 1024, subframe_tx, (format == pucch_format1a? "1a": ( format == pucch_format1b? "1b" : "??")), @@ -2002,7 +2005,7 @@ void ue_pucch_procedures(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uin } else { LOG_D(PHY,"[UE %d][PDSCH %x] AbsSubFrame %d.%d rx_offset_diff: %d, Generating PUCCH %s, an_srs_simultanous %d, shorten_pucch %d, n1_pucch %d, b[0]=%d,b[1]=%d (SR_Payload %d), Po_PUCCH %d, amp %d\n", Mod_id, - ue->dlsch[eNB_id][0]->rnti, + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->rnti, frame_tx%1024, subframe_tx,ue->rx_offset_diff, (format == pucch_format1a? "1a": ( format == pucch_format1b? "1b" : "??")), @@ -2017,13 +2020,13 @@ void ue_pucch_procedures(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uin #if T_TRACER if(pucch_payload[0]) { - T(T_UE_PHY_DLSCH_UE_ACK, T_INT(eNB_id), T_INT(frame_tx%1024), T_INT(subframe_tx), T_INT(Mod_id), T_INT(ue->dlsch[eNB_id][0]->rnti), - T_INT(ue->dlsch[eNB_id][0]->current_harq_pid)); + T(T_UE_PHY_DLSCH_UE_ACK, T_INT(eNB_id), T_INT(frame_tx%1024), T_INT(subframe_tx), T_INT(Mod_id), T_INT(ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->rnti), + T_INT(ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->current_harq_pid)); } else { - T(T_UE_PHY_DLSCH_UE_NACK, T_INT(eNB_id), T_INT(frame_tx%1024), T_INT(subframe_tx), T_INT(Mod_id), T_INT(ue->dlsch[eNB_id][0]->rnti), - T_INT(ue->dlsch[eNB_id][0]->current_harq_pid)); + T(T_UE_PHY_DLSCH_UE_NACK, T_INT(eNB_id), T_INT(frame_tx%1024), T_INT(subframe_tx), T_INT(Mod_id), T_INT(ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->rnti), + T_INT(ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->current_harq_pid)); } #endif @@ -2076,12 +2079,12 @@ void ue_pucch_procedures(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uin #endif #if T_TRACER T(T_UE_PHY_PUCCH_TX_POWER, T_INT(eNB_id),T_INT(Mod_id), T_INT(frame_tx%1024), T_INT(subframe_tx),T_INT(ue->tx_power_dBm[subframe_tx]), - T_INT(tx_amp),T_INT(ue->dlsch[eNB_id][0]->g_pucch),T_INT(get_PL(ue->Mod_id,ue->CC_id,eNB_id))); + T_INT(tx_amp),T_INT(ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->g_pucch),T_INT(get_PL(ue->Mod_id,ue->CC_id,eNB_id))); #endif LOG_D(PHY,"[UE %d][RNTI %x] AbsSubFrame %d.%d Generating PUCCH 2 (RI or CQI), n2_pucch %d, Po_PUCCH %d, isShortenPucch %d, amp %d\n", Mod_id, - ue->dlsch[eNB_id][0]->rnti, + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->rnti, frame_tx%1024, subframe_tx, n2_pucch, Po_PUCCH, @@ -2106,13 +2109,13 @@ void ue_pucch_procedures(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uin case pucch_format2a: LOG_I(PHY,"[UE %d][RNTI %x] AbsSubFrame %d.%d Generating PUCCH 2a (RI or CQI) Ack/Nack 1bit \n", Mod_id, - ue->dlsch[eNB_id][0]->rnti, + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->rnti, frame_tx%1024, subframe_tx); break; case pucch_format2b: LOG_I(PHY,"[UE %d][RNTI %x] AbsSubFrame %d.%d Generating PUCCH 2b (RI or CQI) Ack/Nack 2bits\n", Mod_id, - ue->dlsch[eNB_id][0]->rnti, + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->rnti, frame_tx%1024, subframe_tx); break; default: @@ -2254,7 +2257,7 @@ void phy_procedures_UE_TX(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,ui // reset DL ACK/NACK status reset_ack(&ue->frame_parms, - ue->dlsch[eNB_id][0]->harq_ack, + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->harq_ack, subframe_tx, ue->ulsch[eNB_id]->o_ACK,0); @@ -2451,8 +2454,8 @@ void phy_procedures_emos_UE_RX(PHY_VARS_UE *ue,uint8_t last_slot,uint8_t eNB_id) emos_dump_UE.total_TBS_last = ue->total_TBS_last[eNB_id]; emos_dump_UE.bitrate = ue->bitrate[eNB_id]; emos_dump_UE.total_received_bits = ue->total_received_bits[eNB_id]; - emos_dump_UE.pmi_saved = ue->dlsch[eNB_id][0]->pmi_alloc; - emos_dump_UE.mcs = ue->dlsch[eNB_id][0]->harq_processes[ue->dlsch[eNB_id][0]->current_harq_pid]->mcs; + emos_dump_UE.pmi_saved = ue->dlsch[subframe&0x1][eNB_id][0]->pmi_alloc; + emos_dump_UE.mcs = ue->dlsch[subframe&0x1][eNB_id][0]->harq_processes[ue->dlsch[subframe&0x1][eNB_id][0]->current_harq_pid]->mcs; emos_dump_UE.use_ia_receiver = openair_daq_vars.use_ia_receiver; bytes = rtf_put(CHANSOUNDER_FIFO_MINOR, &emos_dump_UE, sizeof(fifo_dump_emos_UE)); @@ -2859,7 +2862,7 @@ int ue_pdcch_procedures(uint8_t eNB_id,PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint (void *)&dci_alloc_rx[i].dci_pdu, ue->pdcch_vars[eNB_id]->crnti, dci_alloc_rx[i].format, - ue->dlsch[eNB_id], + ue->dlsch[subframe_rx&0x1][eNB_id], &ue->frame_parms, ue->pdsch_config_dedicated, SI_RNTI, @@ -2876,7 +2879,7 @@ int ue_pdcch_procedures(uint8_t eNB_id,PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint (dci_alloc_rx[i].format == format2A) || (dci_alloc_rx[i].format == format2B)) { - ue->dlsch[eNB_id][0]->g_pucch += ue->dlsch[eNB_id][0]->harq_processes[ue->dlsch[eNB_id][0]->current_harq_pid]->delta_PUCCH; + ue->dlsch[subframe_rx&0x1][eNB_id][0]->g_pucch += ue->dlsch[subframe_rx&0x1][eNB_id][0]->harq_processes[ue->dlsch[subframe_rx&0x1][eNB_id][0]->current_harq_pid]->delta_PUCCH; } ue->dlsch_received[eNB_id]++; @@ -2884,7 +2887,7 @@ int ue_pdcch_procedures(uint8_t eNB_id,PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint #ifdef DEBUG_PHY_PROC LOG_D(PHY,"[UE %d] Generated UE DLSCH C_RNTI format %d\n",ue->Mod_id,dci_alloc_rx[i].format); dump_dci(&ue->frame_parms, &dci_alloc_rx[i]); - LOG_D(PHY,"[UE %d] *********** dlsch->active in subframe %d=> %d\n",ue->Mod_id,subframe_rx,ue->dlsch[eNB_id][0]->active); + LOG_D(PHY,"[UE %d] *********** dlsch->active in subframe %d=> %d\n",ue->Mod_id,subframe_rx,ue->dlsch[subframe_rx&0x1][eNB_id][0]->active); #endif // we received a CRNTI, so we're in PUSCH @@ -3262,7 +3265,14 @@ void ue_pdsch_procedures(PHY_VARS_UE *ue, UE_rxtx_proc_t *proc, int eNB_id, PDSC dual_stream_UE = 1; eNB_id_i = ue->n_connected_eNB; i_mod = dlsch0->harq_processes[harq_pid]->Qm; - } else { + } + else if((pdsch==PDSCH) && (ue->transmission_mode[eNB_id]==3)) + { + dual_stream_UE = rx_IC_dual_stream; + eNB_id_i = eNB_id; + i_mod = 0; + } + else { dual_stream_UE = 0; eNB_id_i = eNB_id+1; i_mod = 0; @@ -3401,11 +3411,34 @@ void ue_dlsch_procedures(PHY_VARS_UE *ue, int harq_pid; int frame_rx = proc->frame_rx; int subframe_rx = proc->subframe_rx; - int ret=0; + int ret=0, ret1=0; int CC_id = ue->CC_id; LTE_UE_PDSCH *pdsch_vars; + uint8_t is_cw0_active = 0; + uint8_t is_cw1_active = 0; + + if (dlsch0==NULL) + AssertFatal(0,"dlsch0 should be defined at this level \n"); + + harq_pid = dlsch0->current_harq_pid; + is_cw0_active = dlsch0->harq_processes[harq_pid]->status; + + if(dlsch1) + is_cw1_active = dlsch1->harq_processes[harq_pid]->status; + + LOG_D(PHY,"AbsSubframe %d.%d Start Turbo Decoder for CW0 [harq_pid %d] ? %d \n", frame_rx%1024, subframe_rx, harq_pid, is_cw0_active); + LOG_D(PHY,"AbsSubframe %d.%d Start Turbo Decoder for CW1 [harq_pid %d] ? %d \n", frame_rx%1024, subframe_rx, harq_pid, is_cw1_active); - if (dlsch0 && (!dlsch1)) { + if(is_cw0_active && is_cw1_active) + { + dlsch0->Kmimo = 2; + dlsch1->Kmimo = 2; + } + else + { + dlsch0->Kmimo = 1; + } + if (1) { switch (pdsch) { case SI_PDSCH: pdsch_vars = ue->pdsch_vars_SI[eNB_id]; @@ -3431,8 +3464,6 @@ void ue_dlsch_procedures(PHY_VARS_UE *ue, break; } - - harq_pid = dlsch0->current_harq_pid; if (frame_rx < *dlsch_errors) *dlsch_errors=0; @@ -3449,6 +3480,7 @@ void ue_dlsch_procedures(PHY_VARS_UE *ue, if (abstraction_flag == 0) { + // start turbo decode for CW 0 dlsch0->harq_processes[harq_pid]->G = get_G(&ue->frame_parms, dlsch0->harq_processes[harq_pid]->nb_rb, dlsch0->harq_processes[harq_pid]->rb_alloc_even, @@ -3468,6 +3500,13 @@ void ue_dlsch_procedures(PHY_VARS_UE *ue, subframe_rx<<1); stop_meas(&ue->dlsch_unscrambling_stats); + //LOG_I(PHY,"start turbo decode for CW 0 --> nb_rb %d \n", dlsch0->harq_processes[harq_pid]->nb_rb); + //LOG_I(PHY,"start turbo decode for CW 0 --> rb_alloc_even %x \n", dlsch0->harq_processes[harq_pid]->rb_alloc_even); + //LOG_I(PHY,"start turbo decode for CW 0 --> Qm %d \n", dlsch0->harq_processes[harq_pid]->Qm); + //LOG_I(PHY,"start turbo decode for CW 0 --> Nl %d \n", dlsch0->harq_processes[harq_pid]->Nl); + //LOG_I(PHY,"start turbo decode for CW 0 --> G %d \n", dlsch0->harq_processes[harq_pid]->G); + //LOG_I(PHY,"start turbo decode for CW 0 --> Kmimo %d \n", dlsch0->Kmimo); + start_meas(&ue->dlsch_decoding_stats); ret = dlsch_decoding(ue, pdsch_vars->llr[0], @@ -3480,6 +3519,60 @@ void ue_dlsch_procedures(PHY_VARS_UE *ue, pdsch==PDSCH?1:0, dlsch0->harq_processes[harq_pid]->TBS>256?1:0); stop_meas(&ue->dlsch_decoding_stats); + + //printf(" --> Unscrambling for CW0 %5.3f\n", + // (ue->dlsch_unscrambling_stats.p_time)/(cpuf*1000.0)); + //printf(" --> Turbo Decoding for CW0 %5.3f\n", + // (ue->dlsch_decoding_stats.p_time)/(cpuf*1000.0)); + + if(is_cw1_active) + { + // start turbo decode for CW 1 + dlsch1->harq_processes[harq_pid]->G = get_G(&ue->frame_parms, + dlsch1->harq_processes[harq_pid]->nb_rb, + dlsch1->harq_processes[harq_pid]->rb_alloc_even, + dlsch1->harq_processes[harq_pid]->Qm, + dlsch1->harq_processes[harq_pid]->Nl, + ue->pdcch_vars[eNB_id]->num_pdcch_symbols, + frame_rx, + subframe_rx, + ue->transmission_mode[eNB_id]<7?0:ue->transmission_mode[eNB_id]); + + start_meas(&ue->dlsch_unscrambling_stats); + dlsch_unscrambling(&ue->frame_parms, + 0, + dlsch1, + dlsch1->harq_processes[harq_pid]->G, + pdsch_vars->llr[1], + 1, + subframe_rx<<1); + stop_meas(&ue->dlsch_unscrambling_stats); + + //LOG_I(PHY,"start turbo decode for CW 1 --> nb_rb %d \n", dlsch1->harq_processes[harq_pid]->nb_rb); + //LOG_I(PHY,"start turbo decode for CW 1 --> rb_alloc_even %x \n", dlsch1->harq_processes[harq_pid]->rb_alloc_even); + //LOG_I(PHY,"start turbo decode for CW 1 --> Qm %d \n", dlsch1->harq_processes[harq_pid]->Qm); + //LOG_I(PHY,"start turbo decode for CW 1 --> Nl %d \n", dlsch1->harq_processes[harq_pid]->Nl); + //LOG_I(PHY,"start turbo decode for CW 1 --> G %d \n", dlsch1->harq_processes[harq_pid]->G); + //LOG_I(PHY,"start turbo decode for CW 1 --> Kmimo %d \n", dlsch1->Kmimo); + + start_meas(&ue->dlsch_decoding_stats); + ret1 = dlsch_decoding(ue, + pdsch_vars->llr[1], + &ue->frame_parms, + dlsch1, + dlsch1->harq_processes[harq_pid], + frame_rx, + subframe_rx, + harq_pid, + pdsch==PDSCH?1:0, + dlsch1->harq_processes[harq_pid]->TBS>256?1:0); + stop_meas(&ue->dlsch_decoding_stats); + + //printf(" --> Unscrambling for CW1 %5.3f\n", + // (ue->dlsch_unscrambling_stats.p_time)/(cpuf*1000.0)); + //printf(" --> Turbo Decoding for CW1 %5.3f\n", + // (ue->dlsch_decoding_stats.p_time)/(cpuf*1000.0)); + } } else { @@ -3492,12 +3585,13 @@ void ue_dlsch_procedures(PHY_VARS_UE *ue, #endif } + // Check CRC for CW 0 if (ret == (1+dlsch0->max_turbo_iterations)) { *dlsch_errors=*dlsch_errors+1; if(dlsch0->rnti != 0xffff) { - LOG_D(PHY,"[UE %d][PDSCH %x/%d] Frame %d subframe %d DLSCH in error (rv %d,mcs %d,TBS %d)\n", + LOG_D(PHY,"[UE %d][PDSCH %x/%d] AbsSubframe %d.%d : DLSCH CW0 in error (rv %d,mcs %d,TBS %d)\n", ue->Mod_id,dlsch0->rnti, harq_pid,frame_rx,subframe_rx, dlsch0->harq_processes[harq_pid]->rvidx, @@ -3509,7 +3603,7 @@ void ue_dlsch_procedures(PHY_VARS_UE *ue, } else { if(dlsch0->rnti != 0xffff) { - LOG_D(PHY,"[UE %d][PDSCH %x/%d] Frame %d subframe %d: Received DLSCH (rv %d,mcs %d,TBS %d)\n", + LOG_D(PHY,"[UE %d][PDSCH %x/%d] AbsSubframe %d.%d : Received DLSCH CW0 (rv %d,mcs %d,TBS %d)\n", ue->Mod_id,dlsch0->rnti, harq_pid,frame_rx,subframe_rx, dlsch0->harq_processes[harq_pid]->rvidx, @@ -3574,6 +3668,49 @@ void ue_dlsch_procedures(PHY_VARS_UE *ue, dlsch0->harq_processes[dlsch0->current_harq_pid]->TBS; } + // Check CRC for CW 1 + if(is_cw1_active) + { + if (ret1 == (1+dlsch0->max_turbo_iterations)) { + LOG_D(PHY,"[UE %d][PDSCH %x/%d] Frame %d subframe %d DLSCH CW1 in error (rv %d,mcs %d,TBS %d)\n", + ue->Mod_id,dlsch0->rnti, + harq_pid,frame_rx,subframe_rx, + dlsch0->harq_processes[harq_pid]->rvidx, + dlsch0->harq_processes[harq_pid]->mcs, + dlsch0->harq_processes[harq_pid]->TBS); + + } else { + LOG_D(PHY,"[UE %d][PDSCH %x/%d] Frame %d subframe %d: Received DLSCH CW1 (rv %d,mcs %d,TBS %d)\n", + ue->Mod_id,dlsch0->rnti, + harq_pid,frame_rx,subframe_rx, + dlsch0->harq_processes[harq_pid]->rvidx, + dlsch0->harq_processes[harq_pid]->mcs, + dlsch0->harq_processes[harq_pid]->TBS); + + + if (ue->mac_enabled == 1) { + switch (pdsch) { + case PDSCH: + if(is_cw1_active) + mac_xface->ue_send_sdu(ue->Mod_id, + CC_id, + frame_rx, + subframe_rx, + dlsch1->harq_processes[dlsch1->current_harq_pid]->b, + dlsch1->harq_processes[dlsch1->current_harq_pid]->TBS>>3, + eNB_id); + break; + case SI_PDSCH: + case P_PDSCH: + case RA_PDSCH: + case PDSCH1: + case PMCH: + AssertFatal(0,"exiting"); + break; + } + } + } + } #ifdef DEBUG_PHY_PROC @@ -3622,16 +3759,18 @@ int phy_procedures_UE_RX(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uin T_BUFFER(&ue->common_vars.rxdata[0][subframe_rx*ue->frame_parms.samples_per_tti], ue->frame_parms.samples_per_tti * 4)); - start_meas(&ue->phy_proc_rx); + // start timers + start_meas(&ue->phy_proc_rx[subframe_rx&0x1]); + start_meas(&ue->generic_stat); pmch_flag = is_pmch_subframe(frame_rx,subframe_rx,&ue->frame_parms) ? 1 : 0; // deactivate reception until we scan pdcch - if (ue->dlsch[eNB_id][0]) - ue->dlsch[eNB_id][0]->active = 0; - if (ue->dlsch[eNB_id][1]) - ue->dlsch[eNB_id][1]->active = 0; + if (ue->dlsch[subframe_rx&0x1][eNB_id][0]) + ue->dlsch[subframe_rx&0x1][eNB_id][0]->active = 0; + if (ue->dlsch[subframe_rx&0x1][eNB_id][1]) + ue->dlsch[subframe_rx&0x1][eNB_id][1]->active = 0; if (ue->dlsch_SI[eNB_id]) ue->dlsch_SI[eNB_id]->active = 0; @@ -3717,15 +3856,18 @@ int phy_procedures_UE_RX(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uin 0); // first slot has been processed (FFTs + Channel Estimation, PCFICH/PHICH/PDCCH) - + stop_meas(&ue->generic_stat); + //printf("[SFN %d] Slot0: FFT + Channel Estimate + PCFICH/PHICH/PDCCH %5.2f \n",subframe_rx,ue->generic_stat.p_time/(cpuf*1000.0)); + + start_meas(&ue->generic_stat); // do procedures for C-RNTI - if (ue->dlsch[eNB_id][0]->active == 1) { + if (ue->dlsch[subframe_rx&0x1][eNB_id][0]->active == 1) { VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PDSCH_PROC, VCD_FUNCTION_IN); ue_pdsch_procedures(ue, proc, eNB_id, PDSCH, - ue->dlsch[eNB_id][0], + ue->dlsch[subframe_rx&0x1][eNB_id][0], NULL, ue->pdcch_vars[eNB_id]->num_pdcch_symbols, ue->frame_parms.symbols_per_tti>>1, @@ -3809,35 +3951,50 @@ int phy_procedures_UE_RX(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uin } } // not an S-subframe + stop_meas(&ue->generic_stat); + //printf("[SFN %d] Slot1: FFT + Channel Estimate + Pdsch Proc Slot0 %5.2f \n",subframe_rx,ue->generic_stat.p_time/(cpuf*1000.0)); + // run pbch procedures if subframe is 0 - if (subframe_rx == 0) + if ( (subframe_rx == 0) && (ue->decode_MIB == 1)) + { ue_pbch_procedures(eNB_id,ue,proc,abstraction_flag); + } // do procedures for C-RNTI - if (ue->dlsch[eNB_id][0]->active == 1) { + if (ue->dlsch[subframe_rx&0x1][eNB_id][0]->active == 1) { VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PDSCH_PROC, VCD_FUNCTION_IN); + start_meas(&ue->pdsch_procedures_stat); ue_pdsch_procedures(ue, proc, eNB_id, PDSCH, - ue->dlsch[eNB_id][0], + ue->dlsch[subframe_rx&0x1][eNB_id][0], NULL, 1+(ue->frame_parms.symbols_per_tti>>1), ue->frame_parms.symbols_per_tti-1, abstraction_flag); + stop_meas(&ue->pdsch_procedures_stat); + + start_meas(&ue->dlsch_procedures_stat); ue_dlsch_procedures(ue, proc, eNB_id, PDSCH, - ue->dlsch[eNB_id][0], - NULL, + ue->dlsch[subframe_rx&0x1][eNB_id][0], + ue->dlsch[subframe_rx&0x1][eNB_id][1], &ue->dlsch_errors[eNB_id], mode, abstraction_flag); + stop_meas(&ue->dlsch_procedures_stat); + //printf("[SFN %d] Slot1: Pdsch Proc %5.2f\n",subframe_rx,ue->pdsch_procedures_stat.p_time/(cpuf*1000.0)); + //printf("[SFN %d] Slot0 Slot1: Dlsch Proc %5.2f\n",subframe_rx,ue->dlsch_procedures_stat.p_time/(cpuf*1000.0)); + VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PDSCH_PROC, VCD_FUNCTION_OUT); } + start_meas(&ue->generic_stat); + // do procedures for SI-RNTI if ((ue->dlsch_SI[eNB_id]) && (ue->dlsch_SI[eNB_id]->active == 1)) { ue_pdsch_procedures(ue, @@ -3932,7 +4089,8 @@ int phy_procedures_UE_RX(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uin } - + stop_meas(&ue->generic_stat); + //printf("after tubo until end of Rx %5.2f \n",ue->generic_stat.p_time/(cpuf*1000.0)); #ifdef EMOS phy_procedures_emos_UE_RX(ue,slot,eNB_id); @@ -3940,7 +4098,10 @@ int phy_procedures_UE_RX(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t eNB_id,uin VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_UE_RX, VCD_FUNCTION_OUT); - stop_meas(&ue->phy_proc_rx); + stop_meas(&ue->phy_proc_rx[subframe_rx&0x1]); + + //printf("------FULL RX PROC [SFN %d]: %5.2f ------\n",subframe_rx,ue->phy_proc_rx[subframe_rx&0x1].p_time/(cpuf*1000.0)); + return (0); } diff --git a/openair1/SCHED/pucch_pc.c b/openair1/SCHED/pucch_pc.c index 5bc03bb58de4f8e0653410e68e091d05cb6780dc..ae831238622a91fef15d58e9be8d7d0f65ae28a5 100644 --- a/openair1/SCHED/pucch_pc.c +++ b/openair1/SCHED/pucch_pc.c @@ -51,7 +51,7 @@ int16_t pucch_power_cntl(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t subframe,u Po_PUCCH = get_PL(ue->Mod_id,ue->CC_id,eNB_id)+ ue->frame_parms.ul_power_control_config_common.p0_NominalPUCCH+ - ue->dlsch[eNB_id][0]->g_pucch; + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->g_pucch; switch (pucch_fmt) { case pucch_format1: @@ -90,19 +90,19 @@ int16_t pucch_power_cntl(PHY_VARS_UE *ue,UE_rxtx_proc_t *proc,uint8_t subframe,u if (pucch_fmt!=pucch_format1) { LOG_D(PHY,"[UE %d][PDSCH %x] AbsSubframe %d.%d: Po_PUCCH %d dBm : Po_NOMINAL_PUCCH %d dBm, PL %d dB, g_pucch %d dB\n", ue->Mod_id, - ue->dlsch[eNB_id][0]->rnti,proc->frame_tx%1024,subframe, + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->rnti,proc->frame_tx%1024,subframe, Po_PUCCH, ue->frame_parms.ul_power_control_config_common.p0_NominalPUCCH, get_PL(ue->Mod_id,ue->CC_id,eNB_id), - ue->dlsch[eNB_id][0]->g_pucch); + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->g_pucch); } else { LOG_D(PHY,"[UE %d][SR %x] AbsSubframe %d.%d: Po_PUCCH %d dBm : Po_NOMINAL_PUCCH %d dBm, PL %d dB g_pucch %d dB\n", ue->Mod_id, - ue->dlsch[eNB_id][0]->rnti,proc->frame_tx%1024,subframe, + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->rnti,proc->frame_tx%1024,subframe, Po_PUCCH, ue->frame_parms.ul_power_control_config_common.p0_NominalPUCCH, get_PL(ue->Mod_id,ue->CC_id,eNB_id), - ue->dlsch[eNB_id][0]->g_pucch); + ue->dlsch[proc->subframe_rx&0x1][eNB_id][0]->g_pucch); } return(Po_PUCCH); diff --git a/openair3/NAS/TOOLS/ue_tcl_test.conf b/openair3/NAS/TOOLS/ue_tcl_test.conf new file mode 100644 index 0000000000000000000000000000000000000000..05db6be4d12130603fec8d272bf102158d8f23a4 --- /dev/null +++ b/openair3/NAS/TOOLS/ue_tcl_test.conf @@ -0,0 +1,114 @@ +# List of known PLMNS +PLMN: { + PLMN0: { + FULLNAME="Test network"; + SHORTNAME="OAI4G"; + MNC="01"; + MCC="001"; + + }; + PLMN1: { + FULLNAME="SFR France"; + SHORTNAME="SFR"; + MNC="10"; + MCC="208"; + + }; + PLMN2: { + FULLNAME="SFR France"; + SHORTNAME="SFR"; + MNC="11"; + MCC="208"; + }; + PLMN3: { + FULLNAME="SFR France"; + SHORTNAME="SFR"; + MNC="13"; + MCC="208"; + }; + PLMN4: { + FULLNAME="OAI LTEBOX"; + SHORTNAME="OAIALU"; + MNC="93"; + MCC="208"; + }; + PLMN5: { + FULLNAME="T-Mobile USA"; + SHORTNAME="T-Mobile"; + MNC="280"; + MCC="310"; + }; + PLMN6: { + FULLNAME="FICTITIOUS USA"; + SHORTNAME="FICTITIO"; + MNC="028"; + MCC="310"; + }; + PLMN7: { + FULLNAME="Vodafone Italia"; + SHORTNAME="VODAFONE"; + MNC="10"; + MCC="222"; + }; + PLMN8: { + FULLNAME="Vodafone Spain"; + SHORTNAME="VODAFONE"; + MNC="01"; + MCC="214"; + }; + PLMN9: { + FULLNAME="Vodafone Spain"; + SHORTNAME="VODAFONE"; + MNC="06"; + MCC="214"; + }; + PLMN10: { + FULLNAME="Vodafone Germ"; + SHORTNAME="VODAFONE"; + MNC="02"; + MCC="262"; + }; + PLMN11: { + FULLNAME="Vodafone Germ"; + SHORTNAME="VODAFONE"; + MNC="04"; + MCC="262"; + }; +}; + +UE0: +{ + USER: { + IMEI="356113022094149"; + MANUFACTURER="EURECOM"; + MODEL="LTE Android PC"; + PIN="0000"; + }; + + SIM: { + MSIN="000001234"; + USIM_API_K="000102030405060708090A0B0C0D0E0F"; + OPC="C42449363BBAD02B66D16BC975D77CC1"; + MSISDN="000000000000";//"33611123456"; + }; + + # Home PLMN Selector with Access Technology + HPLMN= "00101"; + + # User controlled PLMN Selector with Access Technology + UCPLMN_LIST = (); + + # Operator PLMN List + OPLMN_LIST = ("00101", "20810", "20811", "20813", "20893", "310280", "310028"); + + # Operator controlled PLMN Selector with Access Technology + OCPLMN_LIST = ("22210", "21401", "21406", "26202", "26204"); + + # Forbidden plmns + FPLMN_LIST = (); + + # List of Equivalent HPLMNs +#TODO: UE does not connect if set, to be fixed in the UE +# EHPLMN_LIST= ("20811", "20813"); + EHPLMN_LIST= (); +}; diff --git a/targets/RT/USER/lte-softmodem.c b/targets/RT/USER/lte-softmodem.c index 8303d88218fbe0fbf83ab827133e3b29d355f5de..5761577259eb8365881dacd72c994a6707b788e4 100644 --- a/targets/RT/USER/lte-softmodem.c +++ b/targets/RT/USER/lte-softmodem.c @@ -151,6 +151,8 @@ uint8_t usim_test = 0; uint8_t nb_antenna_tx = 1; uint8_t nb_antenna_rx = 1; +int16_t dlsch_demod_shift = 0; + char ref[128] = "internal"; char channels[128] = "0"; @@ -635,6 +637,7 @@ static void get_options (int argc, char **argv) { LONG_OPTION_THREADIQ, LONG_OPTION_THREADODDSUBFRAME, LONG_OPTION_THREADEVENSUBFRAME, + LONG_OPTION_DEMOD_SHIFT, #if T_TRACER LONG_OPTION_T_PORT, LONG_OPTION_T_NOWAIT, @@ -670,6 +673,7 @@ static void get_options (int argc, char **argv) { {"threadIQ", required_argument, NULL, LONG_OPTION_THREADIQ}, {"threadOddSubframe", required_argument, NULL, LONG_OPTION_THREADODDSUBFRAME}, {"threadEvenSubframe", required_argument, NULL, LONG_OPTION_THREADEVENSUBFRAME}, + {"dlsch-demod-shift", required_argument, NULL, LONG_OPTION_DEMOD_SHIFT}, #if T_TRACER {"T_port", required_argument, 0, LONG_OPTION_T_PORT}, {"T_nowait", no_argument, 0, LONG_OPTION_T_NOWAIT}, @@ -800,7 +804,9 @@ static void get_options (int argc, char **argv) { case LONG_OPTION_THREADEVENSUBFRAME: threads.even=atoi(optarg); break; - + case LONG_OPTION_DEMOD_SHIFT: + dlsch_demod_shift = atof(optarg); + break; #if T_TRACER case LONG_OPTION_T_PORT: { extern int T_port; diff --git a/targets/RT/USER/lte-ue.c b/targets/RT/USER/lte-ue.c index 3b6ea9e5c9166b098675de288347bc34f6405e15..49093ed2def4f22334c7870ff358abc938ce70f3 100644 --- a/targets/RT/USER/lte-ue.c +++ b/targets/RT/USER/lte-ue.c @@ -56,6 +56,8 @@ #include "T.h" +extern double cpuf; + #define FRAME_PERIOD 100000000ULL #define DAQ_PERIOD 66667ULL #define FIFO_PRIORITY 40 @@ -538,6 +540,9 @@ static void *UE_thread_rxn_txnp4(void *arg) { } phy_procedures_UE_RX( UE, proc, 0, 0, UE->mode, no_relay, NULL ); } + + start_meas(&UE->generic_stat); + if (UE->mac_enabled==1) { ret = mac_xface->ue_scheduler(UE->Mod_id, @@ -567,6 +572,9 @@ static void *UE_thread_rxn_txnp4(void *arg) { UE->Mod_id, proc->frame_rx, proc->subframe_tx,txt ); } } + + stop_meas(&UE->generic_stat); + // Prepare the future Tx data if ((subframe_select( &UE->frame_parms, proc->subframe_tx) == SF_UL) || diff --git a/targets/SIMU/USER/init_lte.c b/targets/SIMU/USER/init_lte.c index 8ff274be14d14c9e618ba6d9f8b7b22709109a3a..a99ccf2c09f40866ec31acd82fc74c65886e758e 100644 --- a/targets/SIMU/USER/init_lte.c +++ b/targets/SIMU/USER/init_lte.c @@ -161,30 +161,32 @@ PHY_VARS_UE* init_lte_UE(LTE_DL_FRAME_PARMS *frame_parms, memcpy(&(PHY_vars_UE->frame_parms), frame_parms, sizeof(LTE_DL_FRAME_PARMS)); phy_init_lte_ue(PHY_vars_UE,1,abstraction_flag); - for (i=0; i<NUMBER_OF_CONNECTED_eNB_MAX; i++) { - for (j=0; j<2; j++) { - PHY_vars_UE->dlsch[i][j] = new_ue_dlsch(1,NUMBER_OF_HARQ_PID_MAX,NSOFT,MAX_TURBO_ITERATIONS,frame_parms->N_RB_DL, abstraction_flag); + for (int l=0; l<2; l++) { + for (i=0; i<NUMBER_OF_CONNECTED_eNB_MAX; i++) { + for (j=0; j<2; j++) { + PHY_vars_UE->dlsch[l][i][j] = new_ue_dlsch(1,NUMBER_OF_HARQ_PID_MAX,NSOFT,MAX_TURBO_ITERATIONS,frame_parms->N_RB_DL, abstraction_flag); - if (!PHY_vars_UE->dlsch[i][j]) { - LOG_E(PHY,"Can't get ue dlsch structures\n"); - exit(-1); - } else - LOG_D(PHY,"dlsch[%d][%d] => %p\n",UE_id,i,PHY_vars_UE->dlsch[i][j]); - } + if (!PHY_vars_UE->dlsch[l][i][j]) { + LOG_E(PHY,"Can't get ue dlsch structures\n"); + exit(-1); + } else + LOG_D(PHY,"dlsch[%d][%d] => %p\n",UE_id,i,PHY_vars_UE->dlsch[l][i][j]); + } - PHY_vars_UE->ulsch[i] = new_ue_ulsch(frame_parms->N_RB_UL, abstraction_flag); + PHY_vars_UE->ulsch[i] = new_ue_ulsch(frame_parms->N_RB_UL, abstraction_flag); - if (!PHY_vars_UE->ulsch[i]) { - LOG_E(PHY,"Can't get ue ulsch structures\n"); - exit(-1); - } + if (!PHY_vars_UE->ulsch[i]) { + LOG_E(PHY,"Can't get ue ulsch structures\n"); + exit(-1); + } - PHY_vars_UE->dlsch_SI[i] = new_ue_dlsch(1,1,NSOFT,MAX_TURBO_ITERATIONS,frame_parms->N_RB_DL, abstraction_flag); - PHY_vars_UE->dlsch_ra[i] = new_ue_dlsch(1,1,NSOFT,MAX_TURBO_ITERATIONS,frame_parms->N_RB_DL, abstraction_flag); + PHY_vars_UE->dlsch_SI[i] = new_ue_dlsch(1,1,NSOFT,MAX_TURBO_ITERATIONS,frame_parms->N_RB_DL, abstraction_flag); + PHY_vars_UE->dlsch_ra[i] = new_ue_dlsch(1,1,NSOFT,MAX_TURBO_ITERATIONS,frame_parms->N_RB_DL, abstraction_flag); - PHY_vars_UE->transmission_mode[i] = frame_parms->nb_antenna_ports_eNB==1 ? 1 : 2; + PHY_vars_UE->transmission_mode[i] = frame_parms->nb_antenna_ports_eNB==1 ? 1 : 2; + } } PHY_vars_UE->frame_parms.pucch_config_common.deltaPUCCH_Shift = 1;