diff --git a/cmake_targets/CMakeLists.txt b/cmake_targets/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89212ec3f12c214ed8ae0327d11dee80e84c020a
--- /dev/null
+++ b/cmake_targets/CMakeLists.txt
@@ -0,0 +1,2534 @@
+#/*
+# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The OpenAirInterface Software Alliance licenses this file to You under
+# * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+# * except in compliance with the License.
+# * You may obtain a copy of the License at
+# *
+# *      http://www.openairinterface.org/?page_id=698
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# *-------------------------------------------------------------------------------
+# * For more information about the OpenAirInterface (OAI) Software Alliance:
+# *      contact@openairinterface.org
+# */
+
+# Author: laurent THOMAS, Lionel GAUTHIER
+
+cmake_minimum_required (VERSION 2.8)
+
+#############################################
+# Base directories, compatible with legacy OAI building
+################################################
+set (OPENAIR_DIR     $ENV{OPENAIR_DIR})
+set (NFAPI_DIR       ${OPENAIR_DIR}/nfapi/open-nFAPI)
+set (NFAPI_USER_DIR  ${OPENAIR_DIR}/nfapi/oai_integration)
+set (OPENAIR1_DIR    ${OPENAIR_DIR}/openair1)
+set (OPENAIR2_DIR    ${OPENAIR_DIR}/openair2)
+set (OPENAIR3_DIR    ${OPENAIR_DIR}/openair3)
+set (OPENAIR_TARGETS ${OPENAIR_DIR}/targets)
+set (OPENAIR3_DIR    ${OPENAIR_DIR}/openair3)
+set (OPENAIR_CMAKE   ${OPENAIR_DIR}/cmake_targets)
+set (OPENAIR_BIN_DIR ${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY})
+
+project (OpenAirInterface)
+
+###########################################
+# macros to define options as there is numerous options in oai
+################################################
+macro(add_option name val helpstr)
+  if(DEFINED ${name})
+    set(value ${${name}})
+  else(DEFINED ${name})
+    set(value ${val})
+  endif()
+  set(${name} ${value} CACHE STRING "${helpstr}")
+  add_definitions("-D${name}=${value}")
+endmacro(add_option)
+
+macro(add_boolean_option name val helpstr)
+  if(DEFINED ${name})
+    set(value ${${name}})
+  else(DEFINED ${name})
+    set(value ${val})
+  endif()
+  set(${name} ${value} CACHE STRING "${helpstr}")
+  set_property(CACHE ${name} PROPERTY TYPE BOOL)
+  if (${value})
+    add_definitions("-D${name}")
+  endif (${value})
+endmacro(add_boolean_option)
+
+macro(add_integer_option name val helpstr)
+  if(DEFINED ${name})
+    set(value ${${name}})
+  else(DEFINED ${name})
+    set(value ${val})
+  endif()
+  set(${name} ${value} CACHE STRING "${helpstr}")
+  add_definitions("-D${name}=${value}")
+endmacro(add_integer_option)
+
+macro(add_list1_option name val helpstr)
+  if(DEFINED ${name})
+    set(value ${${name}})
+  else(DEFINED ${name})
+    set(value ${val})
+  endif()
+  set(${name} ${value} CACHE STRING "${helpstr}")
+  set_property(CACHE ${name} PROPERTY STRINGS ${ARGN})
+  if(NOT "${value}" STREQUAL "False")
+    add_definitions("-D${name}=${value}")
+  endif()
+endmacro(add_list1_option)
+
+macro(add_list2_option name val helpstr)
+  if(DEFINED ${name})
+    set(value ${${name}})
+  else(DEFINED ${name})
+    set(value ${val})
+  endif()
+  set(${name} ${value} CACHE STRING "${helpstr}")
+  set_property(CACHE ${name} PROPERTY STRINGS ${ARGN})
+  if(NOT "${value}" STREQUAL "False")
+    add_definitions("-D${value}=1")
+  endif()
+endmacro(add_list2_option)
+
+macro(add_list_string_option name val helpstr)
+  if(DEFINED ${name})
+    set(value ${${name}})
+  else(DEFINED ${name})
+    set(value ${val})
+  endif()
+  set(${name} ${value} CACHE STRING "${helpstr}")
+  set_property(CACHE ${name} PROPERTY STRINGS ${ARGN})
+  if(NOT "${value}" STREQUAL "False")
+    add_definitions("-D${name}=\"${value}\"")
+  endif()
+endmacro(add_list_string_option)
+####################################################
+# compilation flags
+#############################################
+
+#set(CMAKE_BUILD_TYPE "Debug")
+if (CMAKE_BUILD_TYPE STREQUAL "")
+   set(CMAKE_BUILD_TYPE "RelWithDebInfo")
+endif()
+message("CMAKE_BUILD_TYPE is ${CMAKE_BUILD_TYPE}")
+add_list_string_option(CMAKE_BUILD_TYPE "RelWithDebInfo" "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel." Debug Release RelWithDebInfo MinSizeRel)
+
+Message("Architecture is ${CMAKE_SYSTEM_PROCESSOR}")
+if (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
+  set(C_FLAGS_PROCESSOR "-gdwarf-2 -mfloat-abi=hard -mfpu=neon -lgcc -lrt")
+else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
+  if(EXISTS  "/proc/cpuinfo")
+    file(STRINGS "/proc/cpuinfo" CPUINFO REGEX flags LIMIT_COUNT 1)
+    if (CPUINFO MATCHES "avx2")
+      set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx2")
+      set(COMPILATION_AVX2 "True")
+    else()
+      set(COMPILATION_AVX2 "False")
+    endif()
+    if (CPUINFO MATCHES "sse4_1")
+      set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -msse4.1")
+    endif()
+    if (CPUINFO MATCHES "ssse3")
+      set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mssse3")
+    endif()
+  else()
+    Message("/proc/cpuinfo does not exit. We will use manual CPU flags")
+  endif()
+endif()
+
+set(C_FLAGS_PROCESSOR " ${C_FLAGS_PROCESSOR} ${CFLAGS_PROCESSOR_USER}")
+
+Message("C_FLAGS_PROCESSOR is ${C_FLAGS_PROCESSOR}")
+
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86")
+  if ( (NOT( C_FLAGS_PROCESSOR MATCHES "ssse3")) OR (NOT( C_FLAGS_PROCESSOR MATCHES "msse4.1")) )
+    Message(FATAL_ERROR "For x86 Architecture, you must have following flags: -mssse3 -msse4.1. The current detected flags are: ${C_FLAGS_PROCESSOR}. You can pass the flags manually in build script, for example: ./build_oai --cflags_processor \"-mssse3 -msse4.1 -mavx2\" ")
+  endif()
+endif()
+
+#
+set(CMAKE_C_FLAGS
+  "${CMAKE_C_FLAGS} ${C_FLAGS_PROCESSOR} -std=gnu99 -Wall -Wstrict-prototypes -fno-strict-aliasing -rdynamic -funroll-loops -Wno-packed-bitfield-compat -fPIC ")
+# add autotools definitions that were maybe used!
+set(CMAKE_C_FLAGS
+  "${CMAKE_C_FLAGS} -DSTDC_HEADERS=1 -DHAVE_SYS_TYPES_H=1 -DHAVE_SYS_STAT_H=1 -DHAVE_STDLIB_H=1 -DHAVE_STRING_H=1 -DHAVE_MEMORY_H=1 -DHAVE_STRINGS_H=1 -DHAVE_INTTYPES_H=1 -DHAVE_STDINT_H=1 -DHAVE_UNISTD_H=1 -DHAVE_FCNTL_H=1 -DHAVE_ARPA_INET_H=1 -DHAVE_SYS_TIME_H=1 -DHAVE_SYS_SOCKET_H=1 -DHAVE_STRERROR=1 -DHAVE_SOCKET=1 -DHAVE_MEMSET=1 -DHAVE_GETTIMEOFDAY=1 -DHAVE_STDLIB_H=1 -DHAVE_MALLOC=1 -DHAVE_LIBSCTP"
+)
+set(CMAKE_CXX_FLAGS
+  "${CMAKE_CXX_FLAGS} ${C_FLAGS_PROCESSOR} -std=c++11 "
+)
+
+
+#########################
+set(CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath -Wl,${CMAKE_CURRENT_BINARY_DIR}")
+#########################
+# set a flag for changes in the source code
+# these changes are related to hardcoded path to include .h files
+add_definitions(-DCMAKER)
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3")
+set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3 -O2")
+
+
+set(GIT_BRANCH        "UNKNOWN")
+set(GIT_COMMIT_HASH   "UNKNOWN")
+set(GIT_COMMIT_DATE   "UNKNOWN")
+
+find_package(Git)
+if(GIT_FOUND)
+  message("git found: ${GIT_EXECUTABLE}")
+  # Get the current working branch
+  execute_process(
+    COMMAND git rev-parse --abbrev-ref HEAD
+    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+    OUTPUT_VARIABLE GIT_BRANCH
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+  )
+
+  # Get the latest abbreviated commit hash of the working branch
+  execute_process(
+    COMMAND git log -1 --format=%h
+    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+    OUTPUT_VARIABLE GIT_COMMIT_HASH
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+  )
+
+  # Get the latest commit date of the working branch
+  execute_process(
+    COMMAND git log -1 --format=%cd
+    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+    OUTPUT_VARIABLE GIT_COMMIT_DATE
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+  )
+endif()
+
+
+# Below is a hard-coded info
+set (FIRMWARE_VERSION "No svn information")
+add_definitions("-DFIRMWARE_VERSION=\"${FIRMWARE_VERSION}\"")
+add_definitions("-DPACKAGE_VERSION=\"Branch: ${GIT_BRANCH} Abrev. Hash: ${GIT_COMMIT_HASH} Date: ${GIT_COMMIT_DATE}\"")
+add_definitions("-DPACKAGE_BUGREPORT=\"openair4g-devel@lists.eurecom.fr\"")
+
+
+
+# Debug related options
+#########################################
+add_boolean_option(ASN_DEBUG           False "ASN1 coder/decoder Debug")
+add_boolean_option(EMIT_ASN_DEBUG      False "ASN1 coder/decoder Debug")
+add_boolean_option(MSG_PRINT           False "print debug messages")
+add_boolean_option(DISABLE_XER_PRINT   False "print XER Format")
+add_boolean_option(XER_PRINT           False "print XER Format")
+add_boolean_option(RRC_MSG_PRINT       False "print RRC messages")
+add_boolean_option(PDCP_MSG_PRINT      False "print PDCP messages to /tmp/pdcp.log")
+add_boolean_option(DEBUG_PDCP_PAYLOAD  False "print PDCP PDU to stdout")  # if true, make sure that global and PDCP log levels are trace
+add_boolean_option(DEBUG_MAC_INTERFACE False "print MAC-RLC PDU exchange to stdout") # if true, make sure that global and PDCP log levels are trace
+add_boolean_option(TRACE_RLC_PAYLOAD   False "print RLC PDU to stdout") # if true, make sure that global and PDCP log levels are trace
+add_boolean_option(TEST_OMG            False "???")
+add_boolean_option(DEBUG_OMG           False "???")
+add_boolean_option(XFORMS              False "This adds the possibility to see the signal oscilloscope")
+add_boolean_option(PRINT_STATS         False "This adds the possibility to see the status")
+add_boolean_option(T_TRACER            False "Activate the T tracer, a debugging/monitoring framework" )
+add_boolean_option(UE_AUTOTEST_TRACE   False "Activate UE autotest specific logs")
+add_boolean_option(UE_DEBUG_TRACE      False "Activate UE debug trace")
+add_boolean_option(UE_TIMING_TRACE     False "Activate UE timing trace")
+add_boolean_option(DISABLE_LOG_X       False "Deactivate all LOG_* macros")
+add_boolean_option(USRP_REC_PLAY       False "Enable USRP record playback mode")
+
+add_boolean_option(DEBUG_CONSOLE False "makes debugging easier, disables stdout/stderr buffering")
+
+add_boolean_option(ENABLE_ITTI True "ITTI is internal messaging, should remain enabled for most targets")
+set (ITTI_DIR ${OPENAIR_DIR}/common/utils/itti)
+if (${ENABLE_ITTI})
+  add_library(ITTI
+    # add .h files if depend on (this one is generated)
+    ${ITTI_DIR}/intertask_interface.h
+    ${ITTI_DIR}/intertask_interface.c
+    ${ITTI_DIR}/intertask_interface_dump.c
+    ${ITTI_DIR}/backtrace.c
+    ${ITTI_DIR}/memory_pools.c
+    ${ITTI_DIR}/signals.c
+    ${ITTI_DIR}/timer.c
+    )
+  set(ITTI_LIB ITTI)
+  set(GTPU_need_ITTI ${OPENAIR3_DIR}/GTPV1-U/gtpv1u_eNB.c)
+endif (${ENABLE_ITTI})
+
+#############################
+# ASN.1 grammar C code generation & dependancies
+################################
+# A difficulty: asn1c generates C code of a un-predictable list of files
+# so, generate the c from asn1c once at cmake run time
+# So, if someone modify the asn.1 source file in such as way that it will create
+# (so creating new asn.1 objects instead of modifying the object attributes)
+# New C code source file, cmake must be re-run (instead of re-running make only)
+#############
+set(asn1c_call "${OPENAIR_CMAKE}/tools/generate_asn1")
+set(fix_asn1c_call "${OPENAIR_CMAKE}/tools/fix_asn1")
+set(asn1_generated_dir ${OPENAIR_BIN_DIR})
+
+set(protoc_call "${OPENAIR_CMAKE}/tools/generate_protobuf")
+set(protobuf_generated_dir ${OPENAIR_BIN_DIR})
+
+# RRC
+######
+
+add_list2_option(RRC_ASN1_VERSION "Rel14" "ASN.1 version of RRC interface" "Rel8" "Rel10" "Rel14" "CBA")
+
+if (${RRC_ASN1_VERSION} STREQUAL "Rel8")
+  set (RRC_GRAMMAR ${OPENAIR2_DIR}/RRC/LITE/MESSAGES/asn1c/ASN1_files/EUTRA-RRC-Definitions-86.asn)
+elseif (${RRC_ASN1_VERSION} STREQUAL "CBA")
+  set (RRC_GRAMMAR ${OPENAIR2_DIR}/RRC/LITE/MESSAGES/asn1c/ASN1_files/EUTRA-RRC-Definitions-a20-lola.asn)
+elseif (${RRC_ASN1_VERSION} STREQUAL "Rel10")
+  set (RRC_GRAMMAR ${OPENAIR2_DIR}/RRC/LITE/MESSAGES/asn1c/ASN1_files/EUTRA-RRC-Definitions-a20.asn)
+else()
+  set (RRC_GRAMMAR ${OPENAIR2_DIR}/RRC/LITE/MESSAGES/asn1c/ASN1_files/RRC-e30.asn)
+endif  (${RRC_ASN1_VERSION} STREQUAL "Rel8")
+
+set (RRC_FULL_DIR ${asn1_generated_dir}/RRC_${RRC_ASN1_VERSION})
+
+if(NOT EXISTS ${asn1c_call})
+  message( FATAL_ERROR "The script ${asn1c_call} must be present" )
+endif(NOT EXISTS ${asn1c_call})
+
+message("calling asn1c -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${RRC_FULL_DIR} ${RRC_GRAMMAR}")
+execute_process(COMMAND ${asn1c_call}  
+			${RRC_FULL_DIR} 
+			${RRC_GRAMMAR}
+			RRC
+                	RESULT_VARIABLE ret)
+
+if (NOT ${ret} STREQUAL 0)
+  message(FATAL_ERROR "${asn1c_call}: error")
+endif (NOT ${ret} STREQUAL 0)
+
+if(NOT EXISTS ${fix_asn1c_call})
+  message( FATAL_ERROR "The script ${fix_asn1c_call} must be present" )
+endif(NOT EXISTS ${fix_asn1c_call})
+
+execute_process(COMMAND ${fix_asn1c_call} 
+			${RRC_FULL_DIR} 
+			RRC 
+			${RRC_ASN1_VERSION}
+                	RESULT_VARIABLE ret)
+
+if (NOT ${ret} STREQUAL 0)
+  message(FATAL_ERROR "${fix_asn1c_call}: error")
+endif (NOT ${ret} STREQUAL 0)
+
+file(GLOB rrc_source ${RRC_FULL_DIR}/*.c)
+file(GLOB rrc_h ${RRC_FULL_DIR}/*.h)
+set(rrc_h ${rrc_h} ${RRC_FULL_DIR}/asn1_constants.h)
+set_source_files_properties(${rrc_source} PROPERTIES COMPILE_FLAGS -w) # suppress warnings from generated code
+add_library(RRC_LIB ${rrc_h} ${rrc_source}
+    ${OPENAIR2_DIR}/RRC/LITE/MESSAGES/asn1_msg.c
+    ${OPENAIR2_DIR}/RRC/LITE/MESSAGES/asn1_msg_NB_IoT.c)
+include_directories ("${RRC_FULL_DIR}")
+
+# add the command to generate the source code
+# Warning: if you modify ASN.1 source file to generate new C files, cmake should be re-run instead of make
+add_custom_command (
+  OUTPUT ${RRC_FULL_DIR}/asn1_constants.h
+  COMMAND ${asn1c_call}  ${RRC_FULL_DIR} ${RRC_GRAMMAR}
+  COMMAND ${fix_asn1c_call}  ${RRC_FULL_DIR} RRC ${RRC_ASN1_VERSION}
+  DEPENDS ${RRC_GRAMMAR}
+  )
+
+
+#NR RRC
+######
+
+add_list2_option(NR_RRC_ASN1_VERSION "NR_Rel15" "ASN.1 version of NR_RRC interface")
+
+if (${NR_RRC_ASN1_VERSION} STREQUAL "NR_Rel15")
+  set (NR_RRC_GRAMMAR ${OPENAIR2_DIR}/RRC/NR/MESSAGES/asn1c/ASN1_files/NR-RRC-Definitions.asn)
+endif  (${NR_RRC_ASN1_VERSION} STREQUAL "NR_Rel15")
+
+set (NR_RRC_FULL_DIR ${asn1_generated_dir}/${NR_RRC_ASN1_VERSION})
+
+if(NOT EXISTS ${asn1c_call})
+message( FATAL_ERROR "The script ${asn1c_call} must be present" )
+endif(NOT EXISTS ${asn1c_call})
+
+message("calling ASN1C_PREFIX=NR_ asn1c -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${RRC_FULL_DIR} ${RRC_GRAMMAR}")
+execute_process(COMMAND ${asn1c_call}  
+			${NR_RRC_FULL_DIR} 
+			${NR_RRC_GRAMMAR}
+			NR_RRC
+                        RESULT_VARIABLE ret)
+
+if (NOT ${ret} STREQUAL 0)
+   message(FATAL_ERROR "${asn1c_call}: error")
+endif (NOT ${ret} STREQUAL 0)
+
+if(NOT EXISTS ${fix_asn1c_call})
+  message( FATAL_ERROR "The script ${fix_asn1c_call} must be present" )
+endif(NOT EXISTS ${fix_asn1c_call})
+
+execute_process(COMMAND ${fix_asn1c_call} 
+			${NR_RRC_FULL_DIR} 
+			NR_RRC 
+			${NR_RRC_ASN1_VERSION}
+			RESULT_VARIABLE ret)
+
+if (NOT ${ret} STREQUAL 0)
+  message(FATAL_ERROR "${fix_asn1c_call}: error")
+endif (NOT ${ret} STREQUAL 0)
+
+file(GLOB nr_rrc_source ${NR_RRC_FULL_DIR}/*.c)
+file(GLOB nr_rrc_h ${NR_RRC_FULL_DIR}/*.h)
+set(nr_rrc_h ${nr_rrc_h} ${NR_RRC_FULL_DIR}/asn1_constants.h)
+set_source_files_properties(${nr_rrc_source} PROPERTIES COMPILE_FLAGS -w) # suppress warnings from generated code
+add_library(NR_RRC_LIB ${nr_rrc_h} ${nr_rrc_source}
+    ${OPENAIR2_DIR}/RRC/NR/MESSAGES/asn1_msg.c)
+include_directories ("${NR_RRC_FULL_DIR}")
+
+# add the command to generate the source code
+# Warning: if you modify ASN.1 source file to generate new C files, cmake should be re-run instead of make
+
+add_custom_command (
+  OUTPUT ${NR_RRC_FULL_DIR}/asn1_constants.h
+  COMMAND ${asn1c_call}  ${NR_RRC_FULL_DIR} ${NR_RRC_GRAMMAR} RRC
+  COMMAND ${fix_asn1c_call}  ${NR_RRC_FULL_DIR} RRC ${NR_RRC_ASN1_VERSION}
+  DEPENDS ${RRC_GRAMMAR}
+  )
+
+# S1AP
+# Same limitation as described in RRC: unknown generated file list
+# so we generate it at cmake time
+##############
+add_list1_option(S1AP_VERSION R14 "S1AP Asn.1 grammar version" R8 R9 R10 R14)
+
+set(S1AP_DIR ${OPENAIR3_DIR}/S1AP)
+
+if (${S1AP_VERSION} STREQUAL "R14")
+  set (ASN1RELDIR R14.4)
+  add_definitions("-DUPDATE_RELEASE_9 -DUPDATE_RELEASE_10 -DUPDATE_RELEASE_14")
+  set(S1AP_ASN_FILES s1ap-14.4.0.asn1)
+elseif (${S1AP_VERSION} STREQUAL "R10")
+  set (ASN1RELDIR R10.5)
+  add_definitions("-DUPDATE_RELEASE_9 -DUPDATE_RELEASE_10")
+elseif (${S1AP_VERSION} STREQUAL "R9")
+  set (ASN1RELDIR R9.8)
+  add_definitions("-DUPDATE_RELEASE_9")
+else(${S1AP_VERSION} STREQUAL "R8")
+  set (ASN1RELDIR R8.10)
+endif(${S1AP_VERSION} STREQUAL "R14")
+
+set(S1AP_ASN_DIR ${S1AP_DIR}/MESSAGES/ASN1/${ASN1RELDIR})
+set(S1AP_C_DIR ${asn1_generated_dir}/S1AP_${ASN1RELDIR})
+
+message("calling ASN1C_PREFIX=S1AP_ asn1c -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${S1AP_C_DIR} ${S1AP_ASN_DIR}/${S1AP_ASN_FILES}")
+
+execute_process(COMMAND mkdir -p ${S1AP_C_DIR}
+                COMMAND env "ASN1C_PREFIX=S1AP_" asn1c -pdu=all -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${S1AP_C_DIR} ${S1AP_ASN_DIR}/${S1AP_ASN_FILES}
+                RESULT_VARIABLE ret)
+
+if (NOT ${ret} STREQUAL 0)
+  message(FATAL_ERROR "${ret}: error")
+endif (NOT ${ret} STREQUAL 0)
+
+file(GLOB S1AP_source ${S1AP_C_DIR}/*.c)
+file(GLOB s1ap_h ${S1AP_C_DIR}/*.h)
+set(s1ap_h ${s1ap_h} )
+
+add_custom_command (
+  OUTPUT ${S1AP_C_DIR}/S1AP_asn_constant.h
+  COMMAND mkdir -p ${S1AP_C_DIR}
+  COMMAND env "ASN1C_PREFIX=S1AP_" asn1c -pdu=all -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${S1AP_C_DIR} ${S1AP_ASN_DIR}/${S1AP_ASN_FILES}
+  DEPENDS ${S1AP_ASN_DIR}/${S1AP_ASN_FILES}
+)
+
+add_library(S1AP_LIB
+  ${S1AP_source}
+  ${S1AP_DIR}/s1ap_common.c
+  )
+
+include_directories ("${S1AP_C_DIR}")
+include_directories ("${S1AP_DIR}")
+
+add_library(S1AP_ENB
+  # ${S1AP_C_DIR}/s1ap_ies_defs.h
+  ${S1AP_DIR}/s1ap_eNB.c
+  ${S1AP_DIR}/s1ap_eNB_context_management_procedures.c
+  ${S1AP_DIR}/s1ap_eNB_encoder.c
+  ${S1AP_DIR}/s1ap_eNB_handlers.c
+  ${S1AP_DIR}/s1ap_eNB_itti_messaging.c
+  ${S1AP_DIR}/s1ap_eNB_management_procedures.c
+  ${S1AP_DIR}/s1ap_eNB_nas_procedures.c
+  ${S1AP_DIR}/s1ap_eNB_nnsf.c
+  ${S1AP_DIR}/s1ap_eNB_overload.c
+  ${S1AP_DIR}/s1ap_eNB_trace.c
+  ${S1AP_DIR}/s1ap_eNB_ue_context.c
+  ${S1AP_DIR}/s1ap_eNB_decoder.c
+  )
+
+
+
+#X2AP
+# Same limitation as described in RRC/S1AP: unknown generated file list
+# so we generate it at cmake time
+##############
+add_list1_option(X2AP_VERSION R14 "X2AP Asn.1 grammar version" R10 R11 R14)
+set(X2AP_DIR ${OPENAIR2_DIR}/X2AP)
+if (${X2AP_VERSION} STREQUAL "R14")
+  set (ASN1RELDIR R14.5)
+  set (X2AP_ASN_FILES x2ap-14.5.0.asn1)
+elseif (${X2AP_VERSION} STREQUAL "R11")
+  set (ASN1RELDIR R11.2)
+elseif (${X2AP_VERSION} STREQUAL "R10")
+  set (ASN1RELDIR R.UNKNOWN)
+endif(${X2AP_VERSION} STREQUAL "R14")
+set(X2AP_ASN_DIR ${X2AP_DIR}/MESSAGES/ASN1/${ASN1RELDIR})
+
+set(X2AP_C_DIR ${asn1_generated_dir}/X2AP_${ASN1RELDIR})
+message("calling asn1c -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${X2AP_C_DIR} ${X2AP_ASN_DIR}/${X2AP_ASN_FILES}")
+execute_process(COMMAND mkdir -p ${X2AP_C_DIR}
+                COMMAND env "ASN1C_PREFIX=X2AP_" asn1c -pdu=all -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${X2AP_C_DIR} ${X2AP_ASN_DIR}/${X2AP_ASN_FILES}
+                RESULT_VARIABLE ret)
+if (NOT ${ret} STREQUAL 0)
+  message(FATAL_ERROR "${asn1c_call}: error")
+endif (NOT ${ret} STREQUAL 0)
+file(GLOB X2AP_source ${X2AP_C_DIR}/*.c)
+
+file(GLOB x2ap_h ${X2AP_C_DIR}/*.h)
+set(x2ap_h ${x2ap_h} )
+
+add_custom_command (
+  OUTPUT ${X2AP_C_DIR}/X2AP_asn_constant.h
+  COMMAND mkdir -p ${X2AP_C_DIR}
+  COMMAND env "ASN1C_PREFIX=X2AP_" asn1c -pdu=all -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${X2AP_C_DIR} ${X2AP_ASN_DIR}/${X2AP_ASN_FILES}
+  DEPENDS ${X2AP_ASN_DIR}/${X2AP_ASN_FILES}
+  )
+
+add_library(X2AP_LIB
+  ${X2AP_source}
+  ${X2AP_DIR}/x2ap_common.c
+  )
+
+include_directories ("${X2AP_C_DIR}")
+include_directories ("${X2AP_DIR}")
+
+# Hardware dependant options
+###################################
+add_list1_option(NB_ANTENNAS_RX "2" "Number of antennas in reception" "1" "2" "4")
+add_list1_option(NB_ANTENNAS_TX "4" "Number of antennas in transmission" "1" "2" "4")
+
+add_list2_option(RF_BOARD "EXMIMO" "RF head type" "None" "EXMIMO" "OAI_USRP" "OAI_BLADERF" "CPRIGW" "OAI_LMSSDR")
+
+add_list2_option(TRANSP_PRO "None" "Transport protocol type" "None" "ETHERNET")
+#NOKIA config enhancement
+set (CONFIG_ROOTDIR
+    ${OPENAIR_DIR}/common/config
+   )
+set (CONFIG_SOURCES
+    ${CONFIG_ROOTDIR}/config_load_configmodule.c
+    ${CONFIG_ROOTDIR}/config_userapi.c
+    ${CONFIG_ROOTDIR}/config_cmdline.c
+   ) 
+set (CONFIG_LIBCONFIG_SOURCES
+    ${CONFIG_ROOTDIR}/libconfig/config_libconfig.c
+   )   
+add_library(params_libconfig MODULE ${CONFIG_LIBCONFIG_SOURCES} )
+target_link_libraries(params_libconfig config)  
+# shared library loader
+set (SHLIB_LOADER_SOURCES
+    ${OPENAIR_DIR}/common/utils/load_module_shlib.c
+)
+# include RF devices / transport protocols library modules
+######################################################################
+
+include_directories("${OPENAIR_TARGETS}/ARCH/EXMIMO/USERSPACE/LIB/")
+include_directories ("${OPENAIR_TARGETS}/ARCH/EXMIMO/DEFS/")
+#set (option_HWEXMIMOLIB_lib "-l ")
+set(HWLIB_EXMIMO_SOURCE
+  ${OPENAIR_TARGETS}/ARCH/EXMIMO/USERSPACE/LIB/openair0_lib.c
+#  ${OPENAIR_TARGETS}/ARCH/EXMIMO/USERSPACE/LIB/gain_control.c
+  )
+add_library(oai_exmimodevif MODULE ${HWLIB_EXMIMO_SOURCE} )
+
+include_directories("${OPENAIR_TARGETS}/ARCH/USRP/USERSPACE/LIB/")
+set(HWLIB_USRP_SOURCE
+  ${OPENAIR_TARGETS}/ARCH/USRP/USERSPACE/LIB/usrp_lib.cpp
+  )
+add_library(oai_usrpdevif MODULE ${HWLIB_USRP_SOURCE} )
+target_link_libraries(oai_usrpdevif uhd)
+
+include_directories("${OPENAIR_TARGETS}/ARCH/BLADERF/USERSPACE/LIB/")
+set(HWLIB_BLADERF_SOURCE
+  ${OPENAIR_TARGETS}/ARCH/BLADERF/USERSPACE/LIB/bladerf_lib.c
+  )
+add_library(oai_bladerfdevif MODULE ${HWLIB_BLADERF_SOURCE} )
+target_link_libraries(oai_bladerfdevif bladeRF)
+
+include_directories("${OPENAIR_TARGETS}/ARCH/LMSSDR/USERSPACE/LIB/")
+
+set(HWLIB_LMSSDR_SOURCE
+  ${OPENAIR_TARGETS}/ARCH/LMSSDR/USERSPACE/LIB/lms_lib.cpp
+  )
+add_library(oai_lmssdrdevif MODULE ${HWLIB_LMSSDR_SOURCE} )
+target_include_directories(oai_lmssdrdevif PRIVATE /usr/local/include/lime)
+target_link_libraries(oai_lmssdrdevif LimeSuite )
+
+include_directories("${OPENAIR_TARGETS}/ARCH/ETHERNET/USERSPACE/LIB/")
+set(TPLIB_ETHERNET_SOURCE
+  ${OPENAIR_TARGETS}/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.c
+  ${OPENAIR_TARGETS}/ARCH/ETHERNET/USERSPACE/LIB/eth_udp.c
+  ${OPENAIR_TARGETS}/ARCH/ETHERNET/USERSPACE/LIB/eth_raw.c
+  )
+add_library(oai_eth_transpro MODULE ${TPLIB_ETHERNET_SOURCE} )
+
+include_directories("${OPENAIR_TARGETS}/ARCH/mobipass/")
+set(TPLIB_MOBIPASS_SOURCE
+  ${OPENAIR_TARGETS}/ARCH/mobipass/interface.c
+  ${OPENAIR_TARGETS}/ARCH/mobipass/mobipass.c
+  ${OPENAIR_TARGETS}/ARCH/mobipass/queues.c
+  )
+add_library(oai_mobipass MODULE ${TPLIB_MOBIPASS_SOURCE} )
+
+# Hide all functions/variables in the mobipass library.
+# Use __attribute__((__visibility__("default")))
+# in the source code to unhide a function/variable.
+get_target_property(mobipas_cflags oai_mobipass COMPILE_FLAGS)
+set_target_properties(oai_mobipass PROPERTIES COMPILE_FLAGS "${mobipass_cflags} -fvisibility=hidden")
+
+set(HWLIB_TCP_BRIDGE_SOURCE
+  ${OPENAIR_TARGETS}/ARCH/tcp_bridge/tcp_bridge.c
+  )
+add_library(oai_tcp_bridge MODULE ${HWLIB_TCP_BRIDGE_SOURCE} )
+
+#get_target_property(tcp_bridge_cflags oai_tcp_bridge COMPILE_FLAGS)
+#set_target_properties(oai_tcp_bridge PROPERTIES COMPILE_FLAGS "${tcp_bridge_cflags} -fvisibility=hidden")
+set_target_properties(oai_tcp_bridge PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
+
+##########################################################
+
+include_directories ("${OPENAIR_TARGETS}/ARCH/COMMON")
+
+Message("DEADLINE_SCHEDULER flag  is ${DEADLINE_SCHEDULER}")
+Message("CPU_Affinity flag is ${CPU_AFFINITY}")
+
+##############################################################
+#    ???!!! TO BE DOCUMENTED OPTIONS !!!???
+##############################################################
+add_boolean_option(ENABLE_SECURITY         True  "Enable LTE integrity and ciphering between RRC UE and eNB")
+add_boolean_option(ENABLE_USE_MME          True  "eNB connected to MME (INTERFACE S1-C), not standalone eNB")
+add_boolean_option(NO_RRM                  True  "DO WE HAVE A RADIO RESSOURCE MANAGER: NO")
+add_boolean_option(RRC_DEFAULT_RAB_IS_AM False "set the RLC mode to AM for the default bearer")
+
+add_boolean_option(OAI_NW_DRIVER_TYPE_ETHERNET False "????")
+add_boolean_option(DEADLINE_SCHEDULER True "Use the Linux scheduler SCHED_DEADLINE: kernel >= 3.14")
+add_boolean_option(CPU_AFFINITY False "Enable CPU Affinity of threads (only valid without deadline scheduler). It is enabled only with >2 CPUs")
+add_boolean_option(NAS_ADDRESS_FIX False "specific to oaisim: for nasmesh driver")
+add_boolean_option(NAS_NETLINK False "useless ??? Must be True to compile nasmesh driver without rtai ????")
+add_boolean_option(OAISIM False "specific to oaisim")
+add_boolean_option(OAI_NW_DRIVER_USE_NETLINK True "????")
+
+add_boolean_option(USE_MME False "this flag is used only one time in lte-softmodem.c")
+add_list_string_option(PACKAGE_NAME "NotDefined" "As per attribute name")
+add_boolean_option(MESSAGE_CHART_GENERATOR False         "For generating sequence diagrams")
+add_boolean_option(MESSAGE_CHART_GENERATOR_RLC_MAC False "trace RLC-MAC exchanges in sequence diagrams")
+add_boolean_option(MESSAGE_CHART_GENERATOR_PHY     False "trace some PHY exchanges in sequence diagrams")
+
+########################
+# Include order
+##########################
+add_boolean_option(ENB_MODE True "Swap the include directories between openair2 and openair3" )
+
+##########################
+# SCHEDULING/REAL-TIME/PERF options
+##########################
+add_boolean_option(ENABLE_USE_CPU_EXECUTION_TIME True "Add data in vcd traces: disable it if perf issues")
+add_boolean_option(ENABLE_VCD              True  "always true now, time measurements of proc calls and var displays")
+add_boolean_option(ENABLE_VCD_FIFO         True  "time measurements of proc calls and var displays sent to FIFO (one more thread)")
+add_boolean_option(LINUX                   False "used in weird memcpy() in pdcp.c ???")
+add_boolean_option(LINUX_LIST              False "used only in lists.c: either use OAI implementation of lists or Linux one (should be True, but it is False")
+add_boolean_option(LOG_NO_THREAD           True  "Disable thread for log, seems always set to true")
+add_boolean_option(OPENAIR_LTE             True "Seems legacy: keep it to true")
+
+##########################
+# PHY options
+##########################
+add_boolean_option(DRIVER2013              True "only relevant for EXMIMO")
+add_boolean_option(ENABLE_NEW_MULTICAST    False "specific to oaisim")
+add_boolean_option(EXMIMO_IOT              True "????")
+add_boolean_option(LARGE_SCALE             False "specific to oaisim: defines max eNB=2 and max UE=120")
+add_boolean_option(LOCALIZATION            False "???")
+add_integer_option(MAX_NUM_CCs             1     "????")
+add_boolean_option(MU_RECEIVER             False "????")
+add_boolean_option(PHYSIM                  True  "for L1 simulators (dlsim, ulsim, ...)")
+add_boolean_option(PHY_CONTEXT             True "not clear: must remain False for dlsim")
+add_boolean_option(PHY_EMUL                False "not clear: must remain False for dlsim")
+add_boolean_option(SMBV                    False "Rohde&Schwarz SMBV100A vector signal generator")
+add_boolean_option(DEBUG_PHY               False "Enable PHY layer debugging options")
+add_boolean_option(DEBUG_PHY_PROC          False "Enable debugging of PHY layer procedures")
+add_boolean_option(DEBUG_DLSCH             False "Enable debugging of DLSCH physical layer channel")
+
+##########################
+# 802.21 options
+##########################
+add_boolean_option(ENABLE_RAL              False "ENABLE 802.21 INTERFACE")
+add_boolean_option(USE_3GPP_ADDR_AS_LINK_ADDR False "As per attribute name")
+
+##########################
+# NAS LAYER OPTIONS
+##########################
+add_boolean_option(ENABLE_NAS_UE_LOGGING   True  "????")
+add_boolean_option(NAS_BUILT_IN_UE         True  "UE NAS layer present in this executable")
+add_boolean_option(NAS_UE                  True  "NAS UE INSTANCE (<> NAS_MME)")
+
+
+##########################
+# ACCESS STRATUM LAYER2 OPTIONS
+##########################
+add_boolean_option(MAC_CONTEXT             True  "specific to oaisim")
+add_boolean_option(JUMBO_FRAME             True  "ENABLE LARGE SDU in ACCESS STRATUM (larger than common MTU)")
+
+##########################
+# RLC LAYER OPTIONS
+##########################
+add_boolean_option(OPENAIR2                True  "Access Stratum layer 2 built in executable")
+add_boolean_option(TRACE_RLC_PAYLOAD       False "Fatal assert in this case")
+add_boolean_option(RLC_STOP_ON_LOST_PDU    False "Fatal assert in this case")
+
+add_boolean_option(TRACE_RLC_MUTEX         True  "TRACE for RLC, possible problem in thread scheduling")
+add_boolean_option(TRACE_RLC_AM_BO         False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG")
+add_boolean_option(TRACE_RLC_AM_FREE_SDU   False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG")
+add_boolean_option(TRACE_RLC_AM_HOLE       False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG")
+add_boolean_option(TRACE_RLC_AM_PDU        False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG")
+add_boolean_option(TRACE_RLC_AM_RESEGMENT  False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG")
+add_boolean_option(TRACE_RLC_AM_RX         False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG")
+add_boolean_option(TRACE_RLC_AM_RX_DECODE  False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG")
+add_boolean_option(TRACE_RLC_AM_TX         False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG")
+add_boolean_option(TRACE_RLC_AM_TX_STATUS  False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG")
+add_boolean_option(TRACE_RLC_AM_STATUS_CREATION   False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG")
+
+add_boolean_option(STOP_ON_IP_TRAFFIC_OVERLOAD      False "")
+add_boolean_option(TRACE_RLC_UM_DAR        False "TRACE for RLC UM, TO BE CHANGED IN A MORE GENERAL FLAG")
+add_boolean_option(TRACE_RLC_UM_DISPLAY_ASCII_DATA  False "TRACE for RLC UM, TO BE CHANGED IN A MORE GENERAL FLAG")
+add_boolean_option(TRACE_RLC_UM_PDU        False "TRACE for RLC UM, TO BE CHANGED IN A MORE GENERAL FLAG")
+add_boolean_option(TRACE_RLC_UM_RX         False "TRACE for RLC UM, TO BE CHANGED IN A MORE GENERAL FLAG")
+add_boolean_option(TRACE_RLC_UM_SEGMENT    False "TRACE for RLC UM, TO BE CHANGED IN A MORE GENERAL FLAG")
+add_boolean_option(TRACE_RLC_UM_TX_STATUS  False "TRACE for RLC UM, TO BE CHANGED IN A MORE GENERAL FLAG")
+
+
+##########################
+# PDCP LAYER OPTIONS
+##########################
+add_boolean_option(PDCP_USE_NETLINK            False "For eNB, PDCP communicate with a NETLINK socket if connected to network driver, else could use a RT-FIFO")
+add_boolean_option(PDCP_USE_NETLINK_QUEUES     False "When PDCP_USE_NETLINK is true, incoming IP packets are stored in queues")
+add_boolean_option(LINK_ENB_PDCP_TO_IP_DRIVER  False "For eNB, PDCP communicate with a IP driver")
+add_boolean_option(LINK_ENB_PDCP_TO_GTPV1U     True  "For eNB, PDCP communicate with GTP-U protocol (eNB<->S-GW)")
+
+##########################
+# RRC LAYER OPTIONS
+##########################
+add_boolean_option(RRC_DEFAULT_RAB_IS_AM       False  "Otherwise it is UM, configure params are actually set in rrc_eNB.c:rrc_eNB_generate_defaultRRCConnectionReconfiguration(...)")
+
+
+##########################
+# S1AP LAYER OPTIONS
+##########################
+# none
+
+# add the binary tree to the search path for include files
+#######################################################
+# We will find ConfigOAI.h after generation in target directory
+include_directories("${OPENAIR_BIN_DIR}")
+# add directories to find all include files
+# the internal rule is to use generic names such as defs.h
+# but to make it uniq name as adding the relative path in the include directtive
+# example: #include "RRC/LITE/defs.h"
+#find_path (include_dirs_all *.h ${OPENAIR_DIR})
+#find_path (include_dirs_all *.h PATHS /usr/include NO_CMAKE_PATH)
+#include_directories("${include_dirs_all}")
+
+# Legacy exact order
+if(ENB_MODE)
+  include_directories("${OPENAIR2_DIR}/COMMON")
+  include_directories("${OPENAIR2_DIR}/UTIL")
+  include_directories("${OPENAIR2_DIR}/UTIL/LOG")
+  include_directories("${OPENAIR3_DIR}/COMMON")
+  include_directories("${OPENAIR3_DIR}/UTILS")
+else()
+  include_directories("${OPENAIR3_DIR}/COMMON")
+  include_directories("${OPENAIR3_DIR}/UTILS")
+  include_directories("${OPENAIR2_DIR}/COMMON")
+  include_directories("${OPENAIR2_DIR}/UTIL")
+  include_directories("${OPENAIR2_DIR}/UTIL/LOG")
+endif()
+include_directories("${NFAPI_DIR}/nfapi/public_inc")
+include_directories("${NFAPI_DIR}/common/public_inc")
+include_directories("${NFAPI_DIR}/pnf/public_inc")
+include_directories("${NFAPI_DIR}/nfapi/inc")
+include_directories("${NFAPI_DIR}/sim_common/inc")
+include_directories("${NFAPI_DIR}/pnf_sim/inc")
+include_directories("${OPENAIR1_DIR}")
+include_directories("${OPENAIR2_DIR}")
+include_directories("${OPENAIR2_DIR}/LAYER2/RLC")
+include_directories("${OPENAIR2_DIR}/LAYER2/RLC/AM_v9.3.0")
+include_directories("${OPENAIR2_DIR}/LAYER2/RLC/UM_v9.3.0")
+include_directories("${OPENAIR2_DIR}/LAYER2/RLC/TM_v9.3.0")
+include_directories("${OPENAIR2_DIR}/LAYER2/PDCP_v10.1.0")
+include_directories("${OPENAIR2_DIR}/RRC/LITE/MESSAGES")
+include_directories("${OPENAIR2_DIR}/RRC/LITE")
+include_directories("${OPENAIR3_DIR}/RAL-LTE/INTERFACE-802.21/INCLUDE")
+include_directories("${OPENAIR3_DIR}/RAL-LTE/LTE_RAL_ENB/INCLUDE")
+include_directories("${OPENAIR3_DIR}/RAL-LTE/LTE_RAL_UE/INCLUDE")
+include_directories("${OPENAIR_DIR}/common/utils")
+include_directories("${OPENAIR_DIR}/common/utils/itti")
+include_directories("${OPENAIR3_DIR}/NAS/COMMON")
+include_directories("${OPENAIR3_DIR}/NAS/COMMON/API/NETWORK")
+include_directories("${OPENAIR3_DIR}/NAS/COMMON/EMM/MSG")
+include_directories("${OPENAIR3_DIR}/NAS/COMMON/ESM/MSG")
+include_directories("${OPENAIR3_DIR}/NAS/COMMON/IES")
+include_directories("${OPENAIR3_DIR}/NAS/COMMON/UTIL")
+include_directories("${OPENAIR3_DIR}/SECU")
+include_directories("${OPENAIR3_DIR}/SCTP")
+include_directories("${OPENAIR3_DIR}/S1AP")
+include_directories("${OPENAIR2_DIR}/X2AP")
+include_directories("${OPENAIR3_DIR}/UDP")
+include_directories("${OPENAIR3_DIR}/GTPV1-U")
+include_directories("${OPENAIR_DIR}/targets/COMMON")
+include_directories("${OPENAIR_DIR}/targets/ARCH/COMMON")
+include_directories("${OPENAIR_DIR}/targets/ARCH/EXMIMO/USERSPACE/LIB/")
+include_directories("${OPENAIR_DIR}/targets/ARCH/EXMIMO/DEFS")
+include_directories("${OPENAIR2_DIR}/ENB_APP")
+include_directories("${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/MAC")
+include_directories("${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/RRC")
+include_directories("${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/PDCP")
+include_directories("${OPENAIR2_DIR}/UTIL/OSA")
+include_directories("${OPENAIR2_DIR}/UTIL/LFDS/liblfds6.1.1/liblfds611/inc")
+include_directories("${OPENAIR2_DIR}/UTIL/LFDS/liblfds7.0.0/liblfds700/inc")
+include_directories("${OPENAIR2_DIR}/UTIL/MEM")
+include_directories("${OPENAIR2_DIR}/UTIL/LISTS")
+include_directories("${OPENAIR2_DIR}/UTIL/FIFO")
+include_directories("${OPENAIR2_DIR}/UTIL/OCG")
+include_directories("${OPENAIR2_DIR}/UTIL/MATH")
+include_directories("${OPENAIR2_DIR}/UTIL/TIMER")
+include_directories("${OPENAIR2_DIR}/UTIL/OMG")
+include_directories("${OPENAIR2_DIR}/UTIL/OTG")
+include_directories("${OPENAIR2_DIR}/UTIL/CLI")
+include_directories("${OPENAIR2_DIR}/UTIL/OPT")
+include_directories("${OPENAIR2_DIR}/UTIL/OMV")
+include_directories("${OPENAIR2_DIR}/RRC/LITE/MESSAGES")
+include_directories("${OPENAIR3_DIR}/GTPV1-U/nw-gtpv1u/shared")
+include_directories("${OPENAIR3_DIR}/GTPV1-U/nw-gtpv1u/include")
+include_directories("${OPENAIR_DIR}")
+
+# Utilities Library
+################
+# set the version of protobuf messages, V3 not supported yet
+add_list1_option(FLPT_VERSION V2 "FLPT MSG  protobuf  grammar version" V2 V3)
+
+if (${FLPT_VERSION} STREQUAL "V2")
+  set (FLPTDIR V2)
+elseif (${FLPT_VERSION} STREQUAL "V3")
+  set (FLPTDIR V3)
+endif(${FLPT_VERSION} STREQUAL "V2")
+
+set(FLPT_MSG_DIR ${OPENAIR2_DIR}/ENB_APP/MESSAGES/${FLPTDIR} )
+set(FLPT_MSG_FILES
+  ${FLPT_MSG_DIR}/header.proto
+  ${FLPT_MSG_DIR}/flexran.proto
+  ${FLPT_MSG_DIR}/stats_common.proto
+  ${FLPT_MSG_DIR}/stats_messages.proto
+  ${FLPT_MSG_DIR}/time_common.proto
+  ${FLPT_MSG_DIR}/controller_commands.proto
+  ${FLPT_MSG_DIR}/mac_primitives.proto
+  ${FLPT_MSG_DIR}/config_messages.proto
+  ${FLPT_MSG_DIR}/config_common.proto
+  ${FLPT_MSG_DIR}/control_delegation.proto
+  )
+
+set(FLPT_C_DIR ${protobuf_generated_dir}/${FLPTDIR})
+#message("calling protoc_call=${protoc_call} FLPT_C_DIR=${FLPT_C_DIR} FLPT_MSG_FILES=${FLPT_MSG_FILES}")
+execute_process(COMMAND ${protoc_call} ${FLPT_C_DIR} ${FLPT_MSG_DIR} ${FLPT_MSG_FILES})
+file(GLOB FLPT_source ${FLPT_C_DIR}/*.c)
+set(FLPT_OAI_generated
+  ${FLPT_C_DIR}/header.pb-c.c
+  ${FLPT_C_DIR}/flexran.pb-c.c
+  ${FLPT_C_DIR}/stats_common.pb-c.c
+  ${FLPT_C_DIR}/stats_messages.pb-c.c
+  ${FLPT_C_DIR}/time_common.pb-c.c
+  ${FLPT_C_DIR}/controller_commands.pb-c.c
+  ${FLPT_C_DIR}/mac_primitives.pb-c.c
+  ${FLPT_C_DIR}/config_messages.pb-c.c
+  ${FLPT_C_DIR}/config_common.pb-c.c
+  ${FLPT_C_DIR}/control_delegation.pb-c.c
+  )
+
+file(GLOB flpt_h ${FLPT_C_DIR}/*.h)
+set(flpt_h ${flpt_h} )
+
+add_library(FLPT_MSG
+  ${FLPT_OAI_generated}
+  ${FLPT_source}
+  )
+set(FLPT_MSG_LIB FLPT_MSG)
+#message("prpt c dir is : ${FLPT_C_DIR}")
+include_directories (${FLPT_C_DIR})
+
+add_library(ASYNC_IF
+  ${OPENAIR2_DIR}/UTIL/ASYNC_IF/socket_link.c
+  ${OPENAIR2_DIR}/UTIL/ASYNC_IF/link_manager.c
+  ${OPENAIR2_DIR}/UTIL/ASYNC_IF/message_queue.c
+  ${OPENAIR2_DIR}/UTIL/ASYNC_IF/ringbuffer_queue.c
+  )
+set(ASYNC_IF_LIB ASYNC_IF)
+include_directories(${OPENAIR2_DIR}/UTIL/ASYNC_IF)
+
+add_library(FLEXRAN_AGENT
+  ${OPENAIR2_DIR}/ENB_APP/flexran_agent_handler.c
+  ${OPENAIR2_DIR}/ENB_APP/flexran_agent_common.c
+  ${OPENAIR2_DIR}/ENB_APP/flexran_agent_ran_api.c
+  ${OPENAIR2_DIR}/ENB_APP/flexran_agent_timer.c
+  ${OPENAIR2_DIR}/ENB_APP/flexran_agent_common_internal.c
+  ${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/MAC/flexran_agent_mac.c
+  ${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/RRC/flexran_agent_rrc.c
+  ${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/PDCP/flexran_agent_pdcp.c
+  ${OPENAIR2_DIR}/ENB_APP/flexran_agent.c
+  ${OPENAIR2_DIR}/ENB_APP/flexran_agent_task_manager.c
+  ${OPENAIR2_DIR}/ENB_APP/flexran_agent_net_comm.c
+  ${OPENAIR2_DIR}/ENB_APP/flexran_agent_async.c
+  ${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/MAC/flexran_agent_mac_internal.c
+  )
+set(FLEXRAN_AGENT_LIB FLEXRAN_AGENT)
+#include_directories(${OPENAIR2_DIR}/ENB_APP)
+
+set(PROTOBUF_LIB "protobuf-c")
+
+FIND_PATH(LIBYAML_INCLUDE_DIR NAMES yaml.h)
+FIND_LIBRARY(LIBYAML_LIBRARIES NAMES yaml libyaml)
+
+INCLUDE(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(Yaml DEFAULT_MSG LIBYAML_LIBRARIES LIBYAML_INCLUDE_DIR)
+MARK_AS_ADVANCED(LIBYAML_INCLUDE_DIR LIBYAML_LIBRARIES)
+
+#set(PROTOBUF_LIB "protobuf") #for Cpp
+
+
+add_library(HASHTABLE
+  ${OPENAIR_DIR}/common/utils/hashtable/hashtable.c
+  ${OPENAIR_DIR}/common/utils/hashtable/obj_hashtable.c
+)
+include_directories(${OPENAIR_DIR}/common/utils/hashtable)
+
+if (MESSAGE_CHART_GENERATOR)
+  add_library(MSC
+    ${OPENAIR_DIR}/common/utils/msc/msc.c
+  )
+  set(MSC_LIB MSC)
+endif()
+include_directories(${OPENAIR_DIR}/common/utils/msc)
+
+set(UTIL_SRC
+  ${OPENAIR2_DIR}/UTIL/CLI/cli.c
+  ${OPENAIR2_DIR}/UTIL/CLI/cli_cmd.c
+  ${OPENAIR2_DIR}/UTIL/CLI/cli_server.c
+  ${OPENAIR2_DIR}/UTIL/FIFO/pad_list.c
+  ${OPENAIR2_DIR}/UTIL/LISTS/list.c
+  ${OPENAIR2_DIR}/UTIL/LISTS/list2.c
+  ${OPENAIR2_DIR}/UTIL/LOG/log.c
+  ${OPENAIR2_DIR}/UTIL/LOG/vcd_signal_dumper.c
+  ${OPENAIR2_DIR}/UTIL/MATH/oml.c
+  ${OPENAIR2_DIR}/UTIL/MEM/mem_block.c
+  ${OPENAIR2_DIR}/UTIL/OCG/OCG.c
+  ${OPENAIR2_DIR}/UTIL/OCG/OCG_create_dir.c
+  ${OPENAIR2_DIR}/UTIL/OCG/OCG_detect_file.c
+  ${OPENAIR2_DIR}/UTIL/OCG/OCG_generate_report.c
+  ${OPENAIR2_DIR}/UTIL/OCG/OCG_parse_filename.c
+  ${OPENAIR2_DIR}/UTIL/OCG/OCG_parse_XML.c
+  ${OPENAIR2_DIR}/UTIL/OCG/OCG_save_XML.c
+  ${OPENAIR2_DIR}/UTIL/OMG/common.c
+  ${OPENAIR2_DIR}/UTIL/OMG/grid.c
+  ${OPENAIR2_DIR}/UTIL/OMG/job.c
+  ${OPENAIR2_DIR}/UTIL/OMG/mobility_parser.c
+  ${OPENAIR2_DIR}/UTIL/OMG/omg.c
+  #${OPENAIR2_DIR}/UTIL/OMG/omg_hashtable.c
+  ${OPENAIR2_DIR}/UTIL/OMG/rwalk.c
+  ${OPENAIR2_DIR}/UTIL/OMG/rwp.c
+  ${OPENAIR2_DIR}/UTIL/OMG/static.c
+  ${OPENAIR2_DIR}/UTIL/OMG/steadystaterwp.c
+  ${OPENAIR2_DIR}/UTIL/OMG/trace.c
+  ${OPENAIR2_DIR}/UTIL/OMG/trace_hashtable.c
+  ${OPENAIR2_DIR}/UTIL/OPT/probe.c
+  ${OPENAIR2_DIR}/UTIL/OTG/otg_tx.c
+  ${OPENAIR2_DIR}/UTIL/OTG/otg.c
+  ${OPENAIR2_DIR}/UTIL/OTG/otg_kpi.c
+  ${OPENAIR2_DIR}/UTIL/OTG/otg_models.c
+  ${OPENAIR2_DIR}/UTIL/OTG/otg_form.c
+  ${OPENAIR2_DIR}/UTIL/OTG/otg_rx.c
+  )
+add_library(UTIL ${UTIL_SRC})
+
+#set(OMG_SUMO_SRC
+#  ${OPENAIR2_DIR}/UTIL/OMG/client_traci_OMG.c
+#  ${OPENAIR2_DIR}/UTIL/OMG/id_manager.c
+#  ${OPENAIR2_DIR}/UTIL/OMG/sumo.c
+#  ${OPENAIR2_DIR}/UTIL/OMG/socket_traci_OMG.c
+#  ${OPENAIR2_DIR}/UTIL/OMG/storage_traci_OMG.c
+#  )
+#add_library(OMG_SUMO ${OMG_SUMO_SRC})
+
+set(SECU_OSA_SRC
+  ${OPENAIR2_DIR}/UTIL/OSA/osa_key_deriver.c
+  ${OPENAIR2_DIR}/UTIL/OSA/osa_rijndael.c
+  ${OPENAIR2_DIR}/UTIL/OSA/osa_snow3g.c
+  ${OPENAIR2_DIR}/UTIL/OSA/osa_stream_eea.c
+  ${OPENAIR2_DIR}/UTIL/OSA/osa_stream_eia.c
+  )
+add_library(SECU_OSA ${SECU_OSA_SRC})
+
+set(SECU_CN_SRC
+  ${OPENAIR3_DIR}/SECU/kdf.c
+  ${OPENAIR3_DIR}/SECU/rijndael.c
+  ${OPENAIR3_DIR}/SECU/snow3g.c
+  ${OPENAIR3_DIR}/SECU/key_nas_deriver.c
+  ${OPENAIR3_DIR}/SECU/nas_stream_eea1.c
+  ${OPENAIR3_DIR}/SECU/nas_stream_eia1.c
+  ${OPENAIR3_DIR}/SECU/nas_stream_eea2.c
+  ${OPENAIR3_DIR}/SECU/nas_stream_eia2.c
+  )
+add_library(SECU_CN ${SECU_CN_SRC})
+
+# Scheduler
+################################"
+set(SCHED_SRC
+  ${OPENAIR1_DIR}/SCHED/fapi_l1.c
+  ${OPENAIR1_DIR}/SCHED/phy_procedures_lte_eNb.c
+  ${OPENAIR1_DIR}/SCHED/phy_procedures_lte_ue.c
+  ${OPENAIR1_DIR}/SCHED/phy_procedures_lte_common.c
+  ${OPENAIR1_DIR}/SCHED/prach_procedures.c
+  ${OPENAIR1_DIR}/SCHED/ru_procedures.c
+#  ${OPENAIR1_DIR}/SCHED/phy_mac_stub.c
+  ${OPENAIR1_DIR}/SCHED/pucch_pc.c
+  ${OPENAIR1_DIR}/SCHED/pusch_pc.c
+  ${OPENAIR1_DIR}/SCHED/srs_pc.c
+)
+add_library(SCHED_LIB ${SCHED_SRC})
+
+set(SCHED_SRC_UE
+  ${OPENAIR1_DIR}/SCHED/phy_procedures_lte_ue.c
+  ${OPENAIR1_DIR}/SCHED/phy_procedures_lte_common.c
+  ${OPENAIR1_DIR}/SCHED/ru_procedures.c
+  ${OPENAIR1_DIR}/SCHED/prach_procedures.c
+  ${OPENAIR1_DIR}/SCHED/pucch_pc.c
+  ${OPENAIR1_DIR}/SCHED/pusch_pc.c
+  ${OPENAIR1_DIR}/SCHED/srs_pc.c
+)
+add_library(SCHED_UE_LIB ${SCHED_SRC_UE})
+
+# nFAPI
+#################################
+set(NFAPI_COMMON_SRC
+  ${NFAPI_DIR}/common/src/debug.c
+)
+add_library(NFAPI_COMMON_LIB ${NFAPI_COMMON_SRC})
+
+include_directories(${NFAPI_DIR}/common/public_inc)
+
+set(NFAPI_SRC
+  ${NFAPI_DIR}/nfapi/src/nfapi.c
+  ${NFAPI_DIR}/nfapi/src/nfapi_p4.c
+  ${NFAPI_DIR}/nfapi/src/nfapi_p5.c
+  ${NFAPI_DIR}/nfapi/src/nfapi_p7.c
+)
+add_library(NFAPI_LIB ${NFAPI_SRC})
+
+include_directories(${NFAPI_DIR}/nfapi/public_inc)
+include_directories(${NFAPI_DIR}/nfapi/inc)
+
+set(NFAPI_PNF_SRC
+  ${NFAPI_DIR}/pnf/src/pnf.c
+  ${NFAPI_DIR}/pnf/src/pnf_interface.c
+  ${NFAPI_DIR}/pnf/src/pnf_p7.c
+  ${NFAPI_DIR}/pnf/src/pnf_p7_interface.c
+)
+add_library(NFAPI_PNF_LIB ${NFAPI_PNF_SRC})
+
+include_directories(${NFAPI_DIR}/pnf/public_inc)
+include_directories(${NFAPI_DIR}/pnf/inc)
+
+set(NFAPI_VNF_SRC
+  ${NFAPI_DIR}/vnf/src/vnf.c
+  ${NFAPI_DIR}/vnf/src/vnf_interface.c
+  ${NFAPI_DIR}/vnf/src/vnf_p7.c
+  ${NFAPI_DIR}/vnf/src/vnf_p7_interface.c
+)
+add_library(NFAPI_VNF_LIB ${NFAPI_VNF_SRC})
+
+include_directories(${NFAPI_DIR}/vnf/public_inc)
+include_directories(${NFAPI_DIR}/vnf/inc)
+
+# nFAPI user defined code
+#############################
+set(NFAPI_USER_SRC
+  ${NFAPI_USER_DIR}/nfapi.c
+  ${NFAPI_USER_DIR}/nfapi_pnf.c
+  ${NFAPI_USER_DIR}/nfapi_vnf.c
+)
+add_library(NFAPI_USER_LIB ${NFAPI_USER_SRC})
+include_directories(${NFAPI_USER_DIR})
+
+# Layer 1
+#############################
+set(PHY_TURBOSRC
+  ${OPENAIR1_DIR}/PHY/CODING/3gpplte_sse.c
+  ${OPENAIR1_DIR}/PHY/CODING/3gpplte.c
+  ${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c
+  ${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c
+  ${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_avx2_16bit.c
+  ${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder.c
+)
+set(PHY_TURBOIF
+  ${OPENAIR1_DIR}/PHY/CODING/coding_load.c
+)
+
+add_library(coding MODULE ${PHY_TURBOSRC} )
+set(PHY_SRC
+  # depend on code generation from asn1c
+  ${RRC_FULL_DIR}/asn1_constants.h
+  # actual source
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pss.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/sss.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pilots.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pilots_mbsfn.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_coding.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_modulation.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_demodulation.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_llr_computation.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/power_control.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_decoding.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_scrambling.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dci_tools.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/uci_tools.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/lte_mcs.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pbch.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dci.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/edci.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/phich.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pcfich.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pucch.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/prach.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pmch.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pch.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/group_hopping.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/srs_modulation.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/drs_modulation.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_modulation.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_demodulation.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_coding.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_decoding.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/rar_tools.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/print_stats.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/initial_sync.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/if4_tools.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/if5_tools.c
+  ${OPENAIR1_DIR}/PHY/MODULATION/ofdm_mod.c
+  ${OPENAIR1_DIR}/PHY/MODULATION/slot_fep.c
+  ${OPENAIR1_DIR}/PHY/MODULATION/slot_fep_mbsfn.c
+  ${OPENAIR1_DIR}/PHY/MODULATION/slot_fep_ul.c
+  ${OPENAIR1_DIR}/PHY/MODULATION/ul_7_5_kHz.c
+  ${OPENAIR1_DIR}/PHY/MODULATION/beamforming.c
+  ${OPENAIR1_DIR}/PHY/MODULATION/compute_bf_weights.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/freq_equalization.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_sync_time.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_sync_timefreq.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_adjust_sync.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_dl_channel_estimation.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_dl_bf_channel_estimation.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_dl_mbsfn_channel_estimation.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_est_freq_offset.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_ue_measurements.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_eNB_measurements.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/adjust_gain.c
+  ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_dl_cell_spec.c
+  ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_dl_uespec.c
+  ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_gold.c
+  ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_gold_mbsfn.c
+  ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_dl_mbsfn.c
+  ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_ul_ref.c
+  ${OPENAIR1_DIR}/PHY/CODING/lte_segmentation.c
+  ${OPENAIR1_DIR}/PHY/CODING/ccoding_byte.c
+  ${OPENAIR1_DIR}/PHY/CODING/ccoding_byte_lte.c
+  ${OPENAIR1_DIR}/PHY/CODING/crc_byte.c
+  ${PHY_TURBOIF}
+  ${OPENAIR1_DIR}/PHY/CODING/lte_rate_matching.c
+  ${OPENAIR1_DIR}/PHY/CODING/viterbi.c
+  ${OPENAIR1_DIR}/PHY/CODING/viterbi_lte.c
+  ${OPENAIR1_DIR}/PHY/INIT/lte_init.c
+  ${OPENAIR1_DIR}/PHY/INIT/lte_init_ru.c
+  ${OPENAIR1_DIR}/PHY/INIT/lte_init_ue.c
+  ${OPENAIR1_DIR}/PHY/INIT/init_top.c
+  ${OPENAIR1_DIR}/PHY/INIT/lte_parms.c
+  ${OPENAIR1_DIR}/PHY/INIT/lte_param_init.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/file_output.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/cadd_vv.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/lte_dfts.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/log2_approx.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/cmult_sv.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/cmult_vv.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/cdot_prod.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/signal_energy.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/dB_routines.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/sqrt.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/time_meas.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/lut.c
+  )
+
+set(PHY_SRC_UE
+  # depend on code generation from asn1c
+  ${RRC_FULL_DIR}/asn1_constants.h
+  # actual source
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pss.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/sss.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pilots.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pilots_mbsfn.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_coding.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_modulation.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_demodulation.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_llr_computation.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/power_control.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_decoding.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_scrambling.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dci_tools.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/uci_tools.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/lte_mcs.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pbch.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dci.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/edci.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/phich.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pcfich.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pucch.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/prach.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pmch.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pch.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/group_hopping.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/srs_modulation.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/drs_modulation.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_modulation.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_demodulation.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_coding.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_decoding.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/rar_tools.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/print_stats.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/initial_sync.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/if4_tools.c
+  ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/if5_tools.c
+  ${OPENAIR1_DIR}/PHY/MODULATION/ofdm_mod.c
+  ${OPENAIR1_DIR}/PHY/MODULATION/slot_fep.c
+  ${OPENAIR1_DIR}/PHY/MODULATION/slot_fep_mbsfn.c
+  ${OPENAIR1_DIR}/PHY/MODULATION/slot_fep_ul.c
+  ${OPENAIR1_DIR}/PHY/MODULATION/ul_7_5_kHz.c
+  ${OPENAIR1_DIR}/PHY/MODULATION/beamforming.c
+  ${OPENAIR1_DIR}/PHY/MODULATION/compute_bf_weights.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/freq_equalization.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_sync_time.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_sync_timefreq.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_adjust_sync.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_dl_channel_estimation.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_dl_bf_channel_estimation.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_dl_mbsfn_channel_estimation.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_est_freq_offset.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_ue_measurements.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_eNB_measurements.c
+  ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/adjust_gain.c
+  ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_dl_cell_spec.c
+  ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_dl_uespec.c
+  ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_gold.c
+  ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_gold_mbsfn.c
+  ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_dl_mbsfn.c
+  ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_ul_ref.c
+  ${OPENAIR1_DIR}/PHY/CODING/lte_segmentation.c
+  ${OPENAIR1_DIR}/PHY/CODING/ccoding_byte.c
+  ${OPENAIR1_DIR}/PHY/CODING/ccoding_byte_lte.c
+  ${OPENAIR1_DIR}/PHY/CODING/3gpplte_sse.c
+  ${OPENAIR1_DIR}/PHY/CODING/crc_byte.c
+  ${PHY_TURBOIF}
+  ${OPENAIR1_DIR}/PHY/CODING/lte_rate_matching.c
+  ${OPENAIR1_DIR}/PHY/CODING/viterbi.c
+  ${OPENAIR1_DIR}/PHY/CODING/viterbi_lte.c
+  ${OPENAIR1_DIR}/PHY/INIT/lte_init_ru.c
+  ${OPENAIR1_DIR}/PHY/INIT/lte_init_ue.c
+  ${OPENAIR1_DIR}/PHY/INIT/init_top.c
+  ${OPENAIR1_DIR}/PHY/INIT/lte_parms.c
+  ${OPENAIR1_DIR}/PHY/INIT/lte_param_init.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/file_output.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/cadd_vv.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/lte_dfts.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/log2_approx.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/cmult_sv.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/cmult_vv.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/cdot_prod.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/signal_energy.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/dB_routines.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/sqrt.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/time_meas.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/lut.c
+  )
+
+if (${SMBV})
+  set(PHY_SRC "${PHY_SRC} ${OPENAIR1_DIR}/PHY/TOOLS/smbv.c")
+endif  (${SMBV})
+
+if (${COMPILATION_AVX2} STREQUAL "True")
+  set(PHY_SRC ${PHY_SRC} ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c)
+  set(PHY_SRC_UE ${PHY_SRC_UE} ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c)
+endif ()
+
+add_library(PHY ${PHY_SRC})
+add_library(PHY_UE ${PHY_SRC_UE})
+
+#Layer 2 library
+#####################
+set(MAC_DIR ${OPENAIR2_DIR}/LAYER2/MAC)
+set(PHY_INTERFACE_DIR ${OPENAIR2_DIR}/PHY_INTERFACE)
+set(RLC_DIR ${OPENAIR2_DIR}/LAYER2/RLC)
+set(RLC_UM_DIR ${OPENAIR2_DIR}/LAYER2/RLC/UM_v9.3.0)
+set(RLC_AM_DIR ${OPENAIR2_DIR}/LAYER2/RLC/AM_v9.3.0)
+set(RLC_TM_DIR ${OPENAIR2_DIR}/LAYER2/RLC/TM_v9.3.0)
+set(RRC_DIR ${OPENAIR2_DIR}/RRC/LITE)
+set(PDCP_DIR  ${OPENAIR2_DIR}/LAYER2/PDCP_v10.1.0)
+set(L2_SRC
+  ${OPENAIR2_DIR}/LAYER2/openair2_proc.c
+  ${PDCP_DIR}/pdcp.c
+  ${PDCP_DIR}/pdcp_fifo.c
+  ${PDCP_DIR}/pdcp_sequence_manager.c
+  ${PDCP_DIR}/pdcp_primitives.c
+  ${PDCP_DIR}/pdcp_util.c
+  ${PDCP_DIR}/pdcp_security.c
+  ${PDCP_DIR}/pdcp_netlink.c
+  ${RLC_AM_DIR}/rlc_am.c
+  ${RLC_AM_DIR}/rlc_am_init.c
+  ${RLC_AM_DIR}/rlc_am_timer_poll_retransmit.c
+  ${RLC_AM_DIR}/rlc_am_timer_reordering.c
+  ${RLC_AM_DIR}/rlc_am_timer_status_prohibit.c
+  ${RLC_AM_DIR}/rlc_am_segment.c
+  ${RLC_AM_DIR}/rlc_am_segments_holes.c
+  ${RLC_AM_DIR}/rlc_am_in_sdu.c
+  ${RLC_AM_DIR}/rlc_am_receiver.c
+  ${RLC_AM_DIR}/rlc_am_retransmit.c
+  ${RLC_AM_DIR}/rlc_am_windows.c
+  ${RLC_AM_DIR}/rlc_am_rx_list.c
+  ${RLC_AM_DIR}/rlc_am_reassembly.c
+  ${RLC_AM_DIR}/rlc_am_status_report.c
+  ${RLC_TM_DIR}/rlc_tm.c
+  ${RLC_TM_DIR}/rlc_tm_init.c
+  ${RLC_UM_DIR}/rlc_um.c
+  ${RLC_UM_DIR}/rlc_um_fsm.c
+  ${RLC_UM_DIR}/rlc_um_control_primitives.c
+  ${RLC_UM_DIR}/rlc_um_segment.c
+  ${RLC_UM_DIR}/rlc_um_reassembly.c
+  ${RLC_UM_DIR}/rlc_um_receiver.c
+  ${RLC_UM_DIR}/rlc_um_dar.c
+  ${RLC_DIR}/rlc_mac.c
+  ${RLC_DIR}/rlc.c
+  ${RLC_DIR}/rlc_rrc.c
+  ${RLC_DIR}/rlc_mpls.c
+  ${RRC_DIR}/rrc_UE.c
+  ${RRC_DIR}/rrc_eNB.c
+  ${RRC_DIR}/rrc_eNB_S1AP.c
+  ${RRC_DIR}/rrc_eNB_UE_context.c
+  ${RRC_DIR}/rrc_common.c
+  ${RRC_DIR}/L2_interface.c
+  ${RRC_DIR}/L2_interface_common.c
+  ${RRC_DIR}/L2_interface_ue.c
+  )
+
+set(L2_SRC_UE
+  ${PDCP_DIR}/pdcp.c
+  ${PDCP_DIR}/pdcp_fifo.c
+  ${PDCP_DIR}/pdcp_sequence_manager.c
+  ${PDCP_DIR}/pdcp_primitives.c
+  ${PDCP_DIR}/pdcp_util.c
+  ${PDCP_DIR}/pdcp_security.c
+  ${PDCP_DIR}/pdcp_netlink.c
+  ${RLC_AM_DIR}/rlc_am.c
+  ${RLC_AM_DIR}/rlc_am_init.c
+  ${RLC_AM_DIR}/rlc_am_timer_poll_retransmit.c
+  ${RLC_AM_DIR}/rlc_am_timer_reordering.c
+  ${RLC_AM_DIR}/rlc_am_timer_status_prohibit.c
+  ${RLC_AM_DIR}/rlc_am_segment.c
+  ${RLC_AM_DIR}/rlc_am_segments_holes.c
+  ${RLC_AM_DIR}/rlc_am_in_sdu.c
+  ${RLC_AM_DIR}/rlc_am_receiver.c
+  ${RLC_AM_DIR}/rlc_am_retransmit.c
+  ${RLC_AM_DIR}/rlc_am_windows.c
+  ${RLC_AM_DIR}/rlc_am_rx_list.c
+  ${RLC_AM_DIR}/rlc_am_reassembly.c
+  ${RLC_AM_DIR}/rlc_am_status_report.c
+  ${RLC_TM_DIR}/rlc_tm.c
+  ${RLC_TM_DIR}/rlc_tm_init.c
+  ${RLC_UM_DIR}/rlc_um.c
+  ${RLC_UM_DIR}/rlc_um_fsm.c
+  ${RLC_UM_DIR}/rlc_um_control_primitives.c
+  ${RLC_UM_DIR}/rlc_um_segment.c
+  ${RLC_UM_DIR}/rlc_um_reassembly.c
+  ${RLC_UM_DIR}/rlc_um_receiver.c
+  ${RLC_UM_DIR}/rlc_um_dar.c
+  ${RLC_DIR}/rlc_mac.c
+  ${RLC_DIR}/rlc.c
+  ${RLC_DIR}/rlc_rrc.c
+  ${RLC_DIR}/rlc_mpls.c
+  ${RRC_DIR}/rrc_UE.c
+  ${RRC_DIR}/rrc_common.c
+  ${RRC_DIR}/L2_interface_common.c
+  ${RRC_DIR}/L2_interface_ue.c
+  )
+
+set (MAC_SRC
+  ${PHY_INTERFACE_DIR}/IF_Module.c
+  ${MAC_DIR}/main.c
+  ${MAC_DIR}/main_ue.c
+  ${MAC_DIR}/ue_procedures.c
+  ${MAC_DIR}/ra_procedures.c
+  ${MAC_DIR}/l1_helpers.c
+  ${MAC_DIR}/rar_tools.c
+  ${MAC_DIR}/rar_tools_ue.c
+  ${MAC_DIR}/eNB_scheduler.c
+  ${MAC_DIR}/eNB_scheduler_dlsch.c
+  ${MAC_DIR}/eNB_scheduler_ulsch.c
+  ${MAC_DIR}/eNB_scheduler_mch.c
+  ${MAC_DIR}/eNB_scheduler_bch.c
+  ${MAC_DIR}/eNB_scheduler_primitives.c
+  ${MAC_DIR}/eNB_scheduler_RA.c
+  ${MAC_DIR}/pre_processor.c
+  ${MAC_DIR}/config.c
+  ${MAC_DIR}/config_ue.c
+ )
+
+set (MAC_SRC_UE
+  ${MAC_DIR}/main_ue.c
+  ${MAC_DIR}/ue_procedures.c
+  ${MAC_DIR}/ra_procedures.c
+  ${MAC_DIR}/l1_helpers.c
+  ${MAC_DIR}/rar_tools_ue.c
+  ${MAC_DIR}/config_ue.c
+ )
+
+set (ENB_APP_SRC
+  ${OPENAIR2_DIR}/ENB_APP/enb_app.c
+  ${OPENAIR2_DIR}/ENB_APP/enb_config.c
+  ${OPENAIR2_DIR}/ENB_APP/RRC_config_tools.c
+  )
+
+add_library(L2
+  ${L2_SRC}
+  ${MAC_SRC}
+  ${ENB_APP_SRC})
+#  ${OPENAIR2_DIR}/RRC/L2_INTERFACE/openair_rrc_L2_interface.c)
+
+add_library(L2_UE
+  ${L2_SRC_UE}
+  ${MAC_SRC_UE}
+)
+
+
+include_directories(${NFAPI_USER_DIR})
+
+# L3 Libs
+##########################
+
+set(RAL_LTE_DIR ${OPENAIR3_DIR}/RAL-LTE/)
+if (${ENABLE_RAL})
+  set(RAL_LTE_SRC
+    ${RRC_DIR}/rrc_UE_ral.c
+    ${RRC_DIR}/rrc_eNB_ral.c
+    ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_action.c
+    ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_main.c
+    ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_mih_msg.c
+    ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_parameters.c
+    ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_process.c
+    ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_rrc_msg.c
+    ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_subscribe.c
+    ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_thresholds.c
+    ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_action.c
+    ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_main.c
+    ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_mih_msg.c
+    ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_parameters.c
+    ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_process.c
+    ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_rrc_msg.c
+    ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_subscribe.c
+    ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_thresholds.c
+    )
+  add_library(RAL ${RAL_LTE_SRC})
+  set(RAL_LIB RAL)
+endif()
+
+# CN libs
+##########################
+
+add_library(CN_UTILS
+  ${OPENAIR3_DIR}/UTILS/conversions.c
+  ${OPENAIR3_DIR}/UTILS/enum_string.c
+  ${OPENAIR3_DIR}/UTILS/log.c
+  ${OPENAIR3_DIR}/UTILS/mcc_mnc_itu.c
+  )
+
+set(GTPV1U_DIR ${OPENAIR3_DIR}/GTPV1-U)
+set (GTPV1U_SRC
+  ${RRC_DIR}/rrc_eNB_GTPV1U.c
+  ${GTPV1U_DIR}/nw-gtpv1u/src/NwGtpv1uTunnelEndPoint.c
+  ${GTPV1U_DIR}/nw-gtpv1u/src/NwGtpv1uTrxn.c
+  ${GTPV1U_DIR}/nw-gtpv1u/src/NwGtpv1uMsg.c
+  ${GTPV1U_DIR}/nw-gtpv1u/src/NwGtpv1u.c
+  ${GTPV1U_DIR}/gtpv1u_teid_pool.c
+)
+add_library(GTPV1U ${GTPV1U_SRC})
+
+set(SCTP_SRC
+  ${OPENAIR3_DIR}/SCTP/sctp_common.c
+  ${OPENAIR3_DIR}/SCTP/sctp_eNB_task.c
+  ${OPENAIR3_DIR}/SCTP/sctp_eNB_itti_messaging.c
+)
+add_library(SCTP_CLIENT ${SCTP_SRC})
+
+add_library(UDP ${OPENAIR3_DIR}/UDP/udp_eNB_task.c)
+
+
+set(NAS_SRC ${OPENAIR3_DIR}/NAS/)
+set(libnas_api_OBJS
+  ${NAS_SRC}COMMON/API/NETWORK/as_message.c
+  ${NAS_SRC}COMMON/API/NETWORK/nas_message.c
+  ${NAS_SRC}COMMON/API/NETWORK/network_api.c
+  )
+
+set(libnas_emm_msg_OBJS
+  ${NAS_SRC}COMMON/EMM/MSG/AttachAccept.c
+  ${NAS_SRC}COMMON/EMM/MSG/AttachComplete.c
+  ${NAS_SRC}COMMON/EMM/MSG/AttachReject.c
+  ${NAS_SRC}COMMON/EMM/MSG/AttachRequest.c
+  ${NAS_SRC}COMMON/EMM/MSG/AuthenticationFailure.c
+  ${NAS_SRC}COMMON/EMM/MSG/AuthenticationReject.c
+  ${NAS_SRC}COMMON/EMM/MSG/AuthenticationRequest.c
+  ${NAS_SRC}COMMON/EMM/MSG/AuthenticationResponse.c
+  ${NAS_SRC}COMMON/EMM/MSG/CsServiceNotification.c
+  ${NAS_SRC}COMMON/EMM/MSG/DetachAccept.c
+  ${NAS_SRC}COMMON/EMM/MSG/DetachRequest.c
+  ${NAS_SRC}COMMON/EMM/MSG/DownlinkNasTransport.c
+  ${NAS_SRC}COMMON/EMM/MSG/EmmInformation.c
+  ${NAS_SRC}COMMON/EMM/MSG/emm_msg.c
+  ${NAS_SRC}COMMON/EMM/MSG/EmmStatus.c
+  ${NAS_SRC}COMMON/EMM/MSG/ExtendedServiceRequest.c
+  ${NAS_SRC}COMMON/EMM/MSG/GutiReallocationCommand.c
+  ${NAS_SRC}COMMON/EMM/MSG/GutiReallocationComplete.c
+  ${NAS_SRC}COMMON/EMM/MSG/IdentityRequest.c
+  ${NAS_SRC}COMMON/EMM/MSG/IdentityResponse.c
+  ${NAS_SRC}COMMON/EMM/MSG/SecurityModeCommand.c
+  ${NAS_SRC}COMMON/EMM/MSG/SecurityModeComplete.c
+  ${NAS_SRC}COMMON/EMM/MSG/SecurityModeReject.c
+  ${NAS_SRC}COMMON/EMM/MSG/ServiceReject.c
+  ${NAS_SRC}COMMON/EMM/MSG/ServiceRequest.c
+  ${NAS_SRC}COMMON/EMM/MSG/TrackingAreaUpdateAccept.c
+  ${NAS_SRC}COMMON/EMM/MSG/TrackingAreaUpdateComplete.c
+  ${NAS_SRC}COMMON/EMM/MSG/TrackingAreaUpdateReject.c
+  ${NAS_SRC}COMMON/EMM/MSG/TrackingAreaUpdateRequest.c
+  ${NAS_SRC}COMMON/EMM/MSG/UplinkNasTransport.c
+)
+
+set(libnas_esm_msg_OBJS
+  ${NAS_SRC}COMMON/ESM/MSG/ActivateDedicatedEpsBearerContextAccept.c
+  ${NAS_SRC}COMMON/ESM/MSG/ActivateDedicatedEpsBearerContextReject.c
+  ${NAS_SRC}COMMON/ESM/MSG/ActivateDedicatedEpsBearerContextRequest.c
+  ${NAS_SRC}COMMON/ESM/MSG/ActivateDefaultEpsBearerContextAccept.c
+  ${NAS_SRC}COMMON/ESM/MSG/ActivateDefaultEpsBearerContextReject.c
+  ${NAS_SRC}COMMON/ESM/MSG/ActivateDefaultEpsBearerContextRequest.c
+  ${NAS_SRC}COMMON/ESM/MSG/BearerResourceAllocationReject.c
+  ${NAS_SRC}COMMON/ESM/MSG/BearerResourceAllocationRequest.c
+  ${NAS_SRC}COMMON/ESM/MSG/BearerResourceModificationReject.c
+  ${NAS_SRC}COMMON/ESM/MSG/BearerResourceModificationRequest.c
+  ${NAS_SRC}COMMON/ESM/MSG/DeactivateEpsBearerContextAccept.c
+  ${NAS_SRC}COMMON/ESM/MSG/DeactivateEpsBearerContextRequest.c
+  ${NAS_SRC}COMMON/ESM/MSG/EsmInformationRequest.c
+  ${NAS_SRC}COMMON/ESM/MSG/EsmInformationResponse.c
+  ${NAS_SRC}COMMON/ESM/MSG/esm_msg.c
+  ${NAS_SRC}COMMON/ESM/MSG/EsmStatus.c
+  ${NAS_SRC}COMMON/ESM/MSG/ModifyEpsBearerContextAccept.c
+  ${NAS_SRC}COMMON/ESM/MSG/ModifyEpsBearerContextReject.c
+  ${NAS_SRC}COMMON/ESM/MSG/ModifyEpsBearerContextRequest.c
+  ${NAS_SRC}COMMON/ESM/MSG/PdnConnectivityReject.c
+  ${NAS_SRC}COMMON/ESM/MSG/PdnConnectivityRequest.c
+  ${NAS_SRC}COMMON/ESM/MSG/PdnDisconnectReject.c
+  ${NAS_SRC}COMMON/ESM/MSG/PdnDisconnectRequest.c
+)
+
+set(libnas_ies_OBJS
+  ${NAS_SRC}COMMON/IES/AccessPointName.c
+  ${NAS_SRC}COMMON/IES/AdditionalUpdateResult.c
+  ${NAS_SRC}COMMON/IES/AdditionalUpdateType.c
+  ${NAS_SRC}COMMON/IES/ApnAggregateMaximumBitRate.c
+  ${NAS_SRC}COMMON/IES/AuthenticationFailureParameter.c
+  ${NAS_SRC}COMMON/IES/AuthenticationParameterAutn.c
+  ${NAS_SRC}COMMON/IES/AuthenticationParameterRand.c
+  ${NAS_SRC}COMMON/IES/AuthenticationResponseParameter.c
+  ${NAS_SRC}COMMON/IES/CipheringKeySequenceNumber.c
+  ${NAS_SRC}COMMON/IES/Cli.c
+  ${NAS_SRC}COMMON/IES/CsfbResponse.c
+  ${NAS_SRC}COMMON/IES/DaylightSavingTime.c
+  ${NAS_SRC}COMMON/IES/DetachType.c
+  ${NAS_SRC}COMMON/IES/DrxParameter.c
+  ${NAS_SRC}COMMON/IES/EmergencyNumberList.c
+  ${NAS_SRC}COMMON/IES/EmmCause.c
+  ${NAS_SRC}COMMON/IES/EpsAttachResult.c
+  ${NAS_SRC}COMMON/IES/EpsAttachType.c
+  ${NAS_SRC}COMMON/IES/EpsBearerContextStatus.c
+  ${NAS_SRC}COMMON/IES/EpsBearerIdentity.c
+  ${NAS_SRC}COMMON/IES/EpsMobileIdentity.c
+  ${NAS_SRC}COMMON/IES/EpsNetworkFeatureSupport.c
+  ${NAS_SRC}COMMON/IES/EpsQualityOfService.c
+  ${NAS_SRC}COMMON/IES/EpsUpdateResult.c
+  ${NAS_SRC}COMMON/IES/EpsUpdateType.c
+  ${NAS_SRC}COMMON/IES/EsmCause.c
+  ${NAS_SRC}COMMON/IES/EsmInformationTransferFlag.c
+  ${NAS_SRC}COMMON/IES/EsmMessageContainer.c
+  ${NAS_SRC}COMMON/IES/GprsTimer.c
+  ${NAS_SRC}COMMON/IES/GutiType.c
+  ${NAS_SRC}COMMON/IES/IdentityType2.c
+  ${NAS_SRC}COMMON/IES/ImeisvRequest.c
+  ${NAS_SRC}COMMON/IES/KsiAndSequenceNumber.c
+  ${NAS_SRC}COMMON/IES/LcsClientIdentity.c
+  ${NAS_SRC}COMMON/IES/LcsIndicator.c
+  ${NAS_SRC}COMMON/IES/LinkedEpsBearerIdentity.c
+  ${NAS_SRC}COMMON/IES/LlcServiceAccessPointIdentifier.c
+  ${NAS_SRC}COMMON/IES/LocationAreaIdentification.c
+  ${NAS_SRC}COMMON/IES/MessageType.c
+  ${NAS_SRC}COMMON/IES/MobileIdentity.c
+  ${NAS_SRC}COMMON/IES/MobileStationClassmark2.c
+  ${NAS_SRC}COMMON/IES/MobileStationClassmark3.c
+  ${NAS_SRC}COMMON/IES/MsNetworkCapability.c
+  ${NAS_SRC}COMMON/IES/MsNetworkFeatureSupport.c
+  ${NAS_SRC}COMMON/IES/NasKeySetIdentifier.c
+  ${NAS_SRC}COMMON/IES/NasMessageContainer.c
+  ${NAS_SRC}COMMON/IES/NasRequestType.c
+  ${NAS_SRC}COMMON/IES/NasSecurityAlgorithms.c
+  ${NAS_SRC}COMMON/IES/NetworkName.c
+  ${NAS_SRC}COMMON/IES/Nonce.c
+  ${NAS_SRC}COMMON/IES/PacketFlowIdentifier.c
+  ${NAS_SRC}COMMON/IES/PagingIdentity.c
+  ${NAS_SRC}COMMON/IES/PdnAddress.c
+  ${NAS_SRC}COMMON/IES/PdnType.c
+  ${NAS_SRC}COMMON/IES/PlmnList.c
+  ${NAS_SRC}COMMON/IES/ProcedureTransactionIdentity.c
+  ${NAS_SRC}COMMON/IES/ProtocolConfigurationOptions.c
+  ${NAS_SRC}COMMON/IES/ProtocolDiscriminator.c
+  ${NAS_SRC}COMMON/IES/PTmsiSignature.c
+  ${NAS_SRC}COMMON/IES/QualityOfService.c
+  ${NAS_SRC}COMMON/IES/RadioPriority.c
+  ${NAS_SRC}COMMON/IES/SecurityHeaderType.c
+  ${NAS_SRC}COMMON/IES/ServiceType.c
+  ${NAS_SRC}COMMON/IES/ShortMac.c
+  ${NAS_SRC}COMMON/IES/SsCode.c
+  ${NAS_SRC}COMMON/IES/SupportedCodecList.c
+  ${NAS_SRC}COMMON/IES/TimeZoneAndTime.c
+  ${NAS_SRC}COMMON/IES/TimeZone.c
+  ${NAS_SRC}COMMON/IES/TmsiStatus.c
+  ${NAS_SRC}COMMON/IES/TrackingAreaIdentity.c
+  ${NAS_SRC}COMMON/IES/TrackingAreaIdentityList.c
+  ${NAS_SRC}COMMON/IES/TrafficFlowAggregateDescription.c
+  ${NAS_SRC}COMMON/IES/TrafficFlowTemplate.c
+  ${NAS_SRC}COMMON/IES/TransactionIdentifier.c
+  ${NAS_SRC}COMMON/IES/UeNetworkCapability.c
+  ${NAS_SRC}COMMON/IES/UeRadioCapabilityInformationUpdateNeeded.c
+  ${NAS_SRC}COMMON/IES/UeSecurityCapability.c
+  ${NAS_SRC}COMMON/IES/VoiceDomainPreferenceAndUeUsageSetting.c
+)
+
+set (libnas_utils_OBJS
+  ${NAS_SRC}COMMON/UTIL/device.c
+  ${NAS_SRC}COMMON/UTIL/memory.c
+  ${NAS_SRC}COMMON/UTIL/nas_log.c
+  ${NAS_SRC}COMMON/UTIL/nas_timer.c
+  ${NAS_SRC}COMMON/UTIL/socket.c
+  ${NAS_SRC}COMMON/UTIL/stty.c
+  ${NAS_SRC}COMMON/UTIL/TLVEncoder.c
+  ${NAS_SRC}COMMON/UTIL/TLVDecoder.c
+  ${NAS_SRC}COMMON/UTIL/OctetString.c
+)
+
+if(NAS_UE)
+  set(libnas_ue_api_OBJS
+    ${NAS_SRC}UE/API/USER/at_command.c
+    ${NAS_SRC}UE/API/USER/at_error.c
+    ${NAS_SRC}UE/API/USER/at_response.c
+    ${NAS_SRC}UE/API/USER/user_api.c
+    ${NAS_SRC}UE/API/USER/user_indication.c
+    ${NAS_SRC}UE/API/USIM/aka_functions.c
+    ${NAS_SRC}UE/API/USIM/usim_api.c
+  )
+  set(libnas_ue_emm_OBJS
+    ${NAS_SRC}UE/EMM/Attach.c
+    ${NAS_SRC}UE/EMM/Authentication.c
+    ${NAS_SRC}UE/EMM/Detach.c
+    ${NAS_SRC}UE/EMM/emm_main.c
+    ${NAS_SRC}UE/EMM/EmmStatusHdl.c
+    ${NAS_SRC}UE/EMM/Identification.c
+    ${NAS_SRC}UE/EMM/IdleMode.c
+    ${NAS_SRC}UE/EMM/LowerLayer.c
+    ${NAS_SRC}UE/EMM/SecurityModeControl.c
+    ${NAS_SRC}UE/EMM/ServiceRequestHdl.c
+    ${NAS_SRC}UE/EMM/TrackingAreaUpdate.c
+  )
+  set(libnas_ue_emm_sap_OBJS
+    ${NAS_SRC}UE/EMM/SAP/emm_as.c
+    ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredAttachNeeded.c
+    ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredAttemptingToAttach.c
+    ${NAS_SRC}UE/EMM/SAP/EmmDeregistered.c
+    ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredInitiated.c
+    ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredLimitedService.c
+    ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredNoCellAvailable.c
+    ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredNoImsi.c
+    ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredNormalService.c
+    ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredPlmnSearch.c
+    ${NAS_SRC}UE/EMM/SAP/emm_esm.c
+    ${NAS_SRC}UE/EMM/SAP/emm_fsm.c
+    ${NAS_SRC}UE/EMM/SAP/EmmNull.c
+    ${NAS_SRC}UE/EMM/SAP/emm_recv.c
+    ${NAS_SRC}UE/EMM/SAP/emm_reg.c
+    ${NAS_SRC}UE/EMM/SAP/EmmRegisteredAttemptingToUpdate.c
+    ${NAS_SRC}UE/EMM/SAP/EmmRegistered.c
+    ${NAS_SRC}UE/EMM/SAP/EmmRegisteredImsiDetachInitiated.c
+    ${NAS_SRC}UE/EMM/SAP/EmmRegisteredInitiated.c
+    ${NAS_SRC}UE/EMM/SAP/EmmRegisteredLimitedService.c
+    ${NAS_SRC}UE/EMM/SAP/EmmRegisteredNoCellAvailable.c
+    ${NAS_SRC}UE/EMM/SAP/EmmRegisteredNormalService.c
+    ${NAS_SRC}UE/EMM/SAP/EmmRegisteredPlmnSearch.c
+    ${NAS_SRC}UE/EMM/SAP/EmmRegisteredUpdateNeeded.c
+    ${NAS_SRC}UE/EMM/SAP/emm_sap.c
+    ${NAS_SRC}UE/EMM/SAP/emm_send.c
+    ${NAS_SRC}UE/EMM/SAP/EmmServiceRequestInitiated.c
+    ${NAS_SRC}UE/EMM/SAP/EmmTrackingAreaUpdatingInitiated.c
+  )
+  set (libnas_ue_esm_OBJS
+    ${NAS_SRC}UE/ESM/DedicatedEpsBearerContextActivation.c
+    ${NAS_SRC}UE/ESM/DefaultEpsBearerContextActivation.c
+    ${NAS_SRC}UE/ESM/EpsBearerContextDeactivation.c
+    ${NAS_SRC}UE/ESM/esm_ebr.c
+    ${NAS_SRC}UE/ESM/esm_ebr_context.c
+    ${NAS_SRC}UE/ESM/esm_ip.c
+    ${NAS_SRC}UE/ESM/esm_main.c
+    ${NAS_SRC}UE/ESM/esm_pt.c
+    ${NAS_SRC}UE/ESM/EsmStatusHdl.c
+    ${NAS_SRC}UE/ESM/PdnConnectivity.c
+    ${NAS_SRC}UE/ESM/PdnDisconnect.c
+  )
+  set(libnas_ue_esm_sap_OBJS
+    ${NAS_SRC}UE/ESM/SAP/esm_recv.c
+    ${NAS_SRC}UE/ESM/SAP/esm_send.c
+    ${NAS_SRC}UE/ESM/SAP/esm_sap.c
+  )
+  add_library(LIB_NAS_UE
+    ${NAS_SRC}UE/nas_itti_messaging.c
+    ${NAS_SRC}UE/nas_network.c
+    ${NAS_SRC}UE/nas_parser.c
+    ${NAS_SRC}UE/nas_proc.c
+    ${NAS_SRC}UE/nas_user.c
+    ${libnas_api_OBJS}
+    ${libnas_ue_api_OBJS}
+    ${libnas_emm_msg_OBJS}
+    ${libnas_esm_msg_OBJS}
+    ${libnas_ies_OBJS}
+    ${libnas_utils_OBJS}
+    ${libnas_ue_emm_OBJS}
+    ${libnas_ue_emm_sap_OBJS}
+    ${libnas_ue_esm_OBJS}
+    ${libnas_ue_esm_sap_OBJS}
+  )
+  set(NAS_UE_LIB LIB_NAS_UE)
+
+  include_directories(${NAS_SRC}UE)
+  include_directories(${NAS_SRC}UE/API/USER)
+  include_directories(${NAS_SRC}UE/API/USIM)
+  include_directories(${NAS_SRC}UE/EMM)
+  include_directories(${NAS_SRC}UE/EMM/SAP)
+  include_directories(${NAS_SRC}UE/ESM)
+  include_directories(${NAS_SRC}UE/ESM/SAP)
+endif()
+
+
+# nbiot
+add_definitions("-DNUMBER_OF_UE_MAX_NB_IoT=16")
+set (NBIOT_SOURCES
+    ${OPENAIR2_DIR}/ENB_APP/NB_IoT_config.c
+)
+add_library(NB_IoT MODULE ${NBIOT_SOURCES} )
+
+# shared library loader
+set (SHLIB_LOADER_SOURCES
+    ${OPENAIR_DIR}/common/utils/load_module_shlib.c
+)
+
+# Make lfds as a own source code (even if it is a outside library)
+# For better intergration with compilation flags & structure of cmake
+###################################################################
+set(lfds ${OPENAIR2_DIR}/UTIL/LFDS/liblfds6.1.1/liblfds611/src/)
+file(GLOB lfds_queue ${lfds}/lfds611_queue/*.c)
+file(GLOB lfds_ring ${lfds}/lfds611_ringbuffer/*.c)
+file(GLOB lfds_slist ${lfds}/lfds611_slist/*.c)
+file(GLOB lfds_stack ${lfds}/lfds611_stack/*.c)
+file(GLOB lfds_freelist ${lfds}/lfds611_freelist/*.c)
+
+include_directories(${lfds})
+add_library(LFDS
+  ${lfds_queue} ${lfds_ring} ${lfds_slist} ${lfds_stack} ${lfds_freelist}
+  ${lfds}/lfds611_liblfds/lfds611_liblfds_abstraction_test_helpers.c
+  ${lfds}/lfds611_liblfds/lfds611_liblfds_aligned_free.c
+  ${lfds}/lfds611_liblfds/lfds611_liblfds_aligned_malloc.c
+  ${lfds}/lfds611_abstraction/lfds611_abstraction_free.c
+  ${lfds}/lfds611_abstraction/lfds611_abstraction_malloc.c
+)
+
+set(lfds7 ${OPENAIR2_DIR}/UTIL/LFDS/liblfds7.0.0/liblfds700/src/)
+file(GLOB lfds7_queue ${lfds7}/lfds700_queue/*.c)
+file(GLOB lfds7_ring ${lfds7}/lfds700_ringbuffer/*.c)
+file(GLOB lfds7_qbss ${lfds7}/lfds700_queue_bounded_singleconsumer_singleproducer/*.c)
+file(GLOB lfds7_stack ${lfds7}/lfds700_stack/*.c)
+file(GLOB lfds7_freelist ${lfds7}/lfds700_freelist/*.c)
+file(GLOB lfds7_btree ${lfds7}/lfds700_btree_addonly_unbalanced/*.c)
+file(GLOB lfds7_hash ${lfds7}/lfds700_hash_addonly/*.c)
+file(GLOB lfds7_ordered_list ${lfds7}/lfds700_list_addonly_ordered_singlylinked/*.c)
+file(GLOB lfds7_unordered_list ${lfds7}/lfds700_list_addonly_singlylinked_unordered/*.c)
+file(GLOB lfds7_misc ${lfds7}/lfds700_misc/*.c)
+
+include_directories(${lfds7})
+add_library(LFDS7
+  ${lfds7_queue} ${lfds7_ring} ${lfds7_qbss} ${lfds7_stack} ${lfds7_freelist} ${lfds7_btree} ${lfds7_hash} ${lfds7_ordered_list} ${lfds7_unordered_list} ${lfds7_misc}
+)
+
+# Simulation library
+##########################
+add_library(SIMU
+${OPENAIR1_DIR}/SIMULATION/TOOLS/random_channel.c
+${OPENAIR1_DIR}/SIMULATION/TOOLS/rangen_double.c
+${OPENAIR1_DIR}/SIMULATION/TOOLS/taus.c
+${OPENAIR1_DIR}/SIMULATION/TOOLS/multipath_channel.c
+${OPENAIR1_DIR}/SIMULATION/TOOLS/abstraction.c
+${OPENAIR1_DIR}/SIMULATION/TOOLS/multipath_tv_channel.c
+${OPENAIR1_DIR}/SIMULATION/RF/rf.c
+${OPENAIR1_DIR}/SIMULATION/RF/dac.c
+${OPENAIR1_DIR}/SIMULATION/RF/adc.c
+${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/netlink_init.c
+)
+
+add_library(SIMU_ETH
+${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/netlink_init.c
+${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/multicast_link.c
+${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/socket.c
+${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/bypass_session_layer.c
+#${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/emu_transport.c
+)
+
+add_library(OPENAIR0_LIB
+  ${OPENAIR_TARGETS}/ARCH/EXMIMO/USERSPACE/LIB/openair0_lib.c
+)
+
+include_directories("${NFAPI_DIR}/nfapi/public_inc")
+include_directories("${NFAPI_DIR}/common/public_inc")
+include_directories("${NFAPI_DIR}/pnf/public_inc")
+include_directories("${NFAPI_DIR}/nfapi/inc")
+include_directories("${NFAPI_DIR}/sim_common/inc")
+include_directories("${NFAPI_DIR}/pnf_sim/inc")
+
+
+# System packages that are required
+# We use either the cmake buildin, in ubuntu are in: /usr/share/cmake*/Modules/
+# or cmake provide a generic interface to pkg-config that widely used
+###################################
+include(FindPkgConfig)
+
+pkg_search_module(LIBXML2 libxml-2.0 REQUIRED)
+include_directories(${LIBXML2_INCLUDE_DIRS})
+
+pkg_search_module(LIBXSLT libxslt REQUIRED)
+include_directories(${LIBXSLT_INCLUDE_DIRS})
+
+pkg_search_module(OPENSSL openssl REQUIRED)
+include_directories(${OPENSSL_INCLUDE_DIRS})
+
+pkg_search_module(CONFIG libconfig REQUIRED)
+include_directories(${CONFIG_INCLUDE_DIRS})
+
+pkg_search_module(CRYPTO libcrypto REQUIRED)
+include_directories(${CRYPTO_INCLUDE_DIRS})
+
+#use native cmake method as this package is not in pkg-config
+if (${RF_BOARD} STREQUAL "OAI_USRP")
+  find_package(Boost REQUIRED)
+  include_directories(${LIBBOOST_INCLUDE_DIR})
+endif (${RF_BOARD} STREQUAL "OAI_USRP")
+
+pkg_search_module(OPENPGM openpgm-5.1 openpgm-5.2)
+if(NOT ${OPENPGM_FOUND})
+  message("PACKAGE openpgm-5.1 is required by binaries such as oaisim: will fail later if this target is built")
+else()
+  include_directories(${OPENPGM_INCLUDE_DIRS})
+endif()
+
+pkg_search_module(NETTLE nettle)
+if(NOT ${NETTLE_FOUND})
+  message( FATAL_ERROR "PACKAGE nettle not found: some targets will fail. Run build_oai -I again!")
+else()
+  include_directories(${NETTLE_INCLUDE_DIRS})
+endif()
+
+message ("NETTLE VERSION_INSTALLED  = ${NETTLE_VERSION}")
+
+string(REGEX REPLACE "([0-9]+).*" "\\1" NETTLE_VERSION_MAJOR ${NETTLE_VERSION})
+string(REGEX REPLACE "[0-9]+\\.([0-9]+).*" "\\1" NETTLE_VERSION_MINOR ${NETTLE_VERSION})
+message ("NETTLE_VERSION_MAJOR = ${NETTLE_VERSION_MAJOR}")
+message ("NETTLE_VERSION_MINOR = ${NETTLE_VERSION_MINOR}")
+
+if ("${NETTLE_VERSION_MAJOR}" STREQUAL "" OR "${NETTLE_VERSION_MINOR}" STREQUAL "")
+  message( FATAL_ERROR "The nettle version not detected properly. Try to run build_oai -I again" )
+endif()
+
+add_definitions("-DNETTLE_VERSION_MAJOR=${NETTLE_VERSION_MAJOR}")
+add_definitions("-DNETTLE_VERSION_MINOR=${NETTLE_VERSION_MINOR}")
+
+pkg_search_module(XPM xpm)
+if(NOT ${XPM_FOUND})
+  message("PACKAGE xpm not found: some targets will fail")
+else()
+  include_directories(${XPM_INCLUDE_DIRS})
+endif()
+
+# Atlas is required by some packages, but not found in pkg-config
+# So, here are some hacks here. Hope this gets fixed in future!
+if(EXISTS "/usr/include/atlas/cblas.h" OR EXISTS "/usr/include/cblas.h")
+  include_directories("/usr/include/atlas")
+  LINK_DIRECTORIES("/usr/lib64")
+  LINK_DIRECTORIES("/usr/lib64/atlas") #Added because atlas libraries in CentOS 7 are here!
+  
+  if(EXISTS "/usr/lib64/libblas.so" OR EXISTS "/usr/lib/libblas.so") #Case for CentOS7
+     list(APPEND ATLAS_LIBRARIES blas)
+  else() # Case for Ubuntu
+     list(APPEND ATLAS_LIBRARIES cblas)
+  endif()
+
+  if(EXISTS "/usr/lib/atlas/libtatlas.so" OR EXISTS "/usr/lib64/atlas/libtatlas.so") #Case for CentOS7
+     list(APPEND ATLAS_LIBRARIES tatlas)
+  else()
+     list(APPEND ATLAS_LIBRARIES atlas) #Case for Ubuntu
+  endif()
+
+  list(APPEND ATLAS_LIBRARIES lapack)
+
+# for ubuntu 17.10, directories are different
+elseif(EXISTS "/usr/include/x86_64-linux-gnu/cblas.h")
+
+  include_directories("/usr/include/x86_64-linux-gnu")
+  LINK_DIRECTORIES("/usr/lib/x86_64-linux-gnu")
+  list(APPEND ATLAS_LIBRARIES cblas)
+  list(APPEND ATLAS_LIBRARIES atlas)
+  list(APPEND ATLAS_LIBRARIES lapack)
+
+else()
+  message("No Blas/Atlas libs found, some targets will fail")
+endif()
+
+if (${XFORMS})
+  include_directories ("/usr/include/X11")
+  set(XFORMS_SOURCE
+    ${OPENAIR1_DIR}/PHY/TOOLS/lte_phy_scope.c
+    )
+  set(XFORMS_SOURCE_SOFTMODEM
+    ${OPENAIR_TARGETS}/RT/USER/stats.c
+    )
+  set(XFORMS_LIBRARIES "forms")
+endif (${XFORMS})
+
+set(CMAKE_MODULE_PATH "${OPENAIR_DIR}/cmake_targets/tools/MODULES" "${CMAKE_MODULE_PATH}")
+
+#include T directory even if the T is off because T macros are in the code
+#no matter what
+include_directories("${OPENAIR_DIR}/common/utils/T")
+
+if (${T_TRACER})
+  set(T_SOURCE
+      ${OPENAIR_DIR}/common/utils/T/T.c
+      ${OPENAIR_DIR}/common/utils/T/local_tracer.c)
+  set (T_LIB "rt")
+endif (${T_TRACER})
+
+#Some files in the T directory are generated.
+#This rule and the following deal with it.
+add_custom_command (
+  OUTPUT ${OPENAIR_DIR}/common/utils/T/T_IDs.h
+  COMMAND make
+  WORKING_DIRECTORY ${OPENAIR_DIR}/common/utils/T
+  DEPENDS ${OPENAIR_DIR}/common/utils/T/T_messages.txt
+  )
+
+#This rule is specifically needed to generate T files
+#before anything else in a project that uses the T.
+#See below, there are some 'add_dependencies' showing that.
+#Basically we create a custom target and we make other
+#targets depend on it. That forces cmake to generate
+#T files before anything else.
+add_custom_target (
+  generate_T
+  DEPENDS ${OPENAIR_DIR}/common/utils/T/T_IDs.h
+)
+
+# Hack on a test of asn1c version (already dirty)
+add_definitions(-DASN1_MINIMUM_VERSION=924)
+
+#################################
+# add executables for operation
+#################################
+
+# lte-softmodem is both eNB and UE implementation
+###################################################
+
+add_executable(lte-softmodem
+  ${rrc_h}
+  ${s1ap_h}
+  ${OPENAIR_BIN_DIR}/messages_xml.h
+  ${OPENAIR_TARGETS}/RT/USER/rt_wrapper.c
+  ${OPENAIR_TARGETS}/RT/USER/lte-enb.c
+  ${OPENAIR_TARGETS}/RT/USER/lte-ru.c
+  ${OPENAIR_TARGETS}/RT/USER/lte-softmodem.c
+  ${OPENAIR2_DIR}/ENB_APP/NB_IoT_interface.c
+  ${OPENAIR1_DIR}/SIMULATION/TOOLS/taus.c
+  ${OPENAIR_TARGETS}/SIMU/USER/init_lte.c
+  ${OPENAIR_TARGETS}/COMMON/create_tasks.c
+  ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c
+  ${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/netlink_init.c
+  ${OPENAIR3_DIR}/NAS/UE/nas_ue_task.c
+  ${OPENAIR_DIR}/common/utils/utils.c
+  ${OPENAIR_DIR}/common/utils/system.c
+  ${GTPU_need_ITTI}
+  ${XFORMS_SOURCE}
+  ${XFORMS_SOURCE_SOFTMODEM}
+  ${T_SOURCE}
+  ${CONFIG_SOURCES}
+  ${SHLIB_LOADER_SOURCES}
+  )
+
+target_link_libraries (lte-softmodem
+  -Wl,--start-group
+  RRC_LIB S1AP_LIB S1AP_ENB GTPV1U SECU_CN SECU_OSA UTIL HASHTABLE SCTP_CLIENT UDP SCHED_LIB PHY LFDS L2 
+  ${MSC_LIB} ${RAL_LIB} ${NAS_UE_LIB} ${ITTI_LIB} ${FLPT_MSG_LIB} ${ASYNC_IF_LIB} ${FLEXRAN_AGENT_LIB} LFDS7
+  NFAPI_COMMON_LIB NFAPI_LIB NFAPI_VNF_LIB NFAPI_PNF_LIB NFAPI_USER_LIB
+  -Wl,--end-group z dl)
+
+target_link_libraries (lte-softmodem ${LIBXML2_LIBRARIES})
+target_link_libraries (lte-softmodem pthread m ${CONFIG_LIBRARIES} rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} sctp  ${XFORMS_LIBRARIES} ${PROTOBUF_LIB}  ${CMAKE_DL_LIBS} ${LIBYAML_LIBRARIES})
+target_link_libraries (lte-softmodem ${LIB_LMS_LIBRARIES})
+target_link_libraries (lte-softmodem ${T_LIB})
+
+# lte-softmodem-nos1 is both eNB and UE implementation
+###################################################
+add_executable(lte-softmodem-nos1
+  ${rrc_h}
+  ${s1ap_h}
+  ${OPENAIR_BIN_DIR}/messages_xml.h
+  ${OPENAIR_TARGETS}/RT/USER/rt_wrapper.c
+  ${OPENAIR_TARGETS}/RT/USER/lte-enb.c
+  ${OPENAIR_TARGETS}/RT/USER/lte-ru.c
+  ${OPENAIR_TARGETS}/RT/USER/lte-softmodem.c
+  ${OPENAIR2_DIR}/ENB_APP/NB_IoT_interface.c
+  ${OPENAIR1_DIR}/SIMULATION/TOOLS/taus.c
+  ${OPENAIR_TARGETS}/SIMU/USER/init_lte.c
+  ${OPENAIR_TARGETS}/COMMON/create_tasks.c
+  ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c
+  ${OPENAIR2_DIR}/RRC/NAS/nas_config.c
+  ${OPENAIR2_DIR}/RRC/NAS/rb_config.c
+  ${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/netlink_init.c
+  ${OPENAIR_DIR}/common/utils/system.c
+  ${XFORMS_SOURCE}
+  ${XFORMS_SOURCE_SOFTMODEM}
+  ${T_SOURCE}
+  ${CONFIG_SOURCES}
+  ${SHLIB_LOADER_SOURCES}
+  )
+target_link_libraries (lte-softmodem-nos1
+  -Wl,--start-group
+  RRC_LIB SECU_CN SECU_OSA UTIL HASHTABLE SCHED_LIB PHY LFDS L2 ${MSC_LIB} ${RAL_LIB} ${ITTI_LIB} 
+  ${MIH_LIB} ${FLPT_MSG_LIB} ${ASYNC_IF_LIB} ${FLEXRAN_AGENT_LIB} LFDS7
+  NFAPI_COMMON_LIB NFAPI_LIB NFAPI_VNF_LIB NFAPI_PNF_LIB NFAPI_USER_LIB
+  -Wl,--end-group z dl )
+
+target_link_libraries (lte-softmodem-nos1 ${LIBXML2_LIBRARIES})
+target_link_libraries (lte-softmodem-nos1 pthread m ${CONFIG_LIBRARIES} rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} sctp  ${XFORMS_LIBRARIES} ${PROTOBUF_LIB} ${CMAKE_DL_LIBS} ${LIBYAML_LIBRARIES})
+target_link_libraries (lte-softmodem-nos1  ${LIB_LMS_LIBRARIES})
+target_link_libraries (lte-softmodem-nos1 ${T_LIB})
+
+# lte-uesoftmodem is  UE implementation
+#######################################
+
+add_executable(lte-uesoftmodem
+  ${rrc_h}
+  ${s1ap_h}
+  ${OPENAIR_BIN_DIR}/messages_xml.h
+  ${OPENAIR_TARGETS}/RT/USER/rt_wrapper.c
+  ${OPENAIR_TARGETS}/RT/USER/lte-ue.c
+  ${OPENAIR_TARGETS}/RT/USER/lte-uesoftmodem.c
+  ${OPENAIR1_DIR}/SIMULATION/TOOLS/taus.c
+  ${OPENAIR_TARGETS}/SIMU/USER/init_lte.c
+  ${OPENAIR_TARGETS}/COMMON/create_tasks_ue.c
+  ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c
+  ${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/netlink_init.c
+  ${OPENAIR3_DIR}/NAS/UE/nas_ue_task.c
+  ${OPENAIR_DIR}/common/utils/utils.c
+  ${OPENAIR_DIR}/common/utils/system.c
+  ${XFORMS_SOURCE}
+  ${XFORMS_SOURCE_SOFTMODEM}
+  ${T_SOURCE}
+  ${CONFIG_SOURCES}
+  ${SHLIB_LOADER_SOURCES}
+  )
+
+target_link_libraries (lte-uesoftmodem
+  -Wl,--start-group
+  RRC_LIB S1AP_LIB S1AP_ENB GTPV1U SECU_CN SECU_OSA UTIL HASHTABLE SCTP_CLIENT UDP SCHED_UE_LIB PHY_UE LFDS L2_UE 
+  ${MSC_LIB} ${RAL_LIB} ${NAS_UE_LIB} ${ITTI_LIB} ${FLPT_MSG_LIB} ${ASYNC_IF_LIB} LFDS7
+  -Wl,--end-group z dl)
+
+target_link_libraries (lte-uesoftmodem ${LIBXML2_LIBRARIES})
+target_link_libraries (lte-uesoftmodem pthread m ${CONFIG_LIBRARIES} rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} sctp  ${XFORMS_LIBRARIES} ${PROTOBUF_LIB}  ${CMAKE_DL_LIBS} ${LIBYAML_LIBRARIES})
+target_link_libraries (lte-uesoftmodem ${LIB_LMS_LIBRARIES})
+target_link_libraries (lte-uesoftmodem ${T_LIB})
+
+# lte-softmodem-nos1 is both eNB and UE implementation
+###################################################
+add_executable(lte-uesoftmodem-nos1
+  ${rrc_h}
+  ${s1ap_h}
+  ${OPENAIR_BIN_DIR}/messages_xml.h
+  ${OPENAIR_TARGETS}/RT/USER/rt_wrapper.c
+  ${OPENAIR_TARGETS}/RT/USER/lte-ue.c
+  ${OPENAIR_TARGETS}/RT/USER/lte-uesoftmodem.c
+  ${OPENAIR1_DIR}/SIMULATION/TOOLS/taus.c
+  ${OPENAIR_TARGETS}/SIMU/USER/init_lte.c
+  ${OPENAIR_TARGETS}/COMMON/create_tasks_ue.c
+  ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c
+  ${OPENAIR2_DIR}/RRC/NAS/nas_config.c
+  ${OPENAIR2_DIR}/RRC/NAS/rb_config.c
+  ${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/netlink_init.c
+  ${OPENAIR_DIR}/common/utils/system.c
+  ${XFORMS_SOURCE}
+  ${XFORMS_SOURCE_SOFTMODEM}
+  ${T_SOURCE}
+  ${CONFIG_SOURCES}
+  ${SHLIB_LOADER_SOURCES}
+  )
+target_link_libraries (lte-uesoftmodem-nos1
+  -Wl,--start-group
+  RRC_LIB SECU_CN SECU_OSA UTIL HASHTABLE SCHED_UE_LIB PHY_UE LFDS L2_UE ${MSC_LIB} ${RAL_LIB} ${ITTI_LIB} 
+  ${MIH_LIB} ${FLPT_MSG_LIB} ${ASYNC_IF_LIB} LFDS7
+  -Wl,--end-group z dl )
+
+target_link_libraries (lte-uesoftmodem-nos1 ${LIBXML2_LIBRARIES})
+target_link_libraries (lte-uesoftmodem-nos1 pthread m ${CONFIG_LIBRARIES} rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} sctp  ${XFORMS_LIBRARIES} ${PROTOBUF_LIB} ${CMAKE_DL_LIBS} ${LIBYAML_LIBRARIES})
+target_link_libraries (lte-uesoftmodem-nos1  ${LIB_LMS_LIBRARIES})
+target_link_libraries (lte-uesoftmodem-nos1 ${T_LIB})
+
+# USIM process
+#################
+#add_executable(usim
+#  ${OPENAIR3_DIR}/NAS/TOOLS/usim_data.c
+#  ${OPENAIR3_DIR}/NAS/USER/API/USIM/usim_api.c
+#  ${OPENAIR3_DIR}/NAS/USER/API/USIM/aka_functions.c
+#  ${OPENAIR3_DIR}/NAS/COMMON/UTIL/memory.c
+#  ${OPENAIR3_DIR}/NAS/COMMON/UTIL/nas_log.c
+#  ${OPENAIR3_DIR}/NAS/COMMON/UTIL/OctetString.c
+#  ${OPENAIR3_DIR}/NAS/COMMON/UTIL/TLVEncoder.c
+#  )
+#target_link_libraries (usim ${NAS_LIB} UTIL ${ITTI_LIB} LFDS pthread rt nettle crypto m)
+
+# ???
+#####################
+#add_executable(nvram
+#  ${OPENAIR3_DIR}/NAS/TOOLS/ue_data.c
+#  ${OPENAIR3_DIR}/NAS/COMMON/UTIL/memory.c
+#  ${OPENAIR3_DIR}/NAS/COMMON/UTIL/nas_log.c
+#  )
+#target_link_libraries (nvram LIB_NAS_UE UTIL ${ITTI_LIB} LFDS pthread rt nettle crypto m)
+
+
+###################################"
+# Addexecutables for tests
+####################################
+
+# A all in one network simulator
+################
+add_executable(oaisim
+  ${rrc_h}
+  ${s1ap_h}
+  ${x2ap_h}
+  ${OPENAIR_BIN_DIR}/messages_xml.h
+  ${OPENAIR_TARGETS}/RT/USER/lte-ue.c
+  ${OPENAIR_TARGETS}/RT/USER/lte-ru.c
+  ${OPENAIR_TARGETS}/RT/USER/rt_wrapper.c
+  ${OPENAIR_TARGETS}/SIMU/USER/channel_sim.c
+  ${OPENAIR_TARGETS}/SIMU/USER/init_lte.c
+  ${OPENAIR_TARGETS}/SIMU/USER/oaisim_config.c
+  ${OPENAIR_TARGETS}/SIMU/USER/sinr_sim.c
+  ${OPENAIR_TARGETS}/SIMU/USER/cor_SF_sim.c
+  ${OPENAIR_TARGETS}/SIMU/USER/oaisim_functions.c
+  ${OPENAIR_TARGETS}/SIMU/USER/event_handler.c
+  ${OPENAIR_TARGETS}/SIMU/USER/oaisim.c
+  ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c
+  ${OPENAIR2_DIR}/RRC/NAS/nas_config.c
+  ${OPENAIR2_DIR}/RRC/NAS/rb_config.c
+  ${OPENAIR3_DIR}/NAS/UE/nas_ue_task.c
+  ${OPENAIR_DIR}/common/utils/utils.c
+  ${OPENAIR_DIR}/common/utils/system.c
+  ${OPENAIR_TARGETS}/COMMON/create_tasks_ue.c
+  ${XFORMS_SOURCE}
+  ${T_SOURCE}
+  ${CONFIG_SOURCES}
+  ${SHLIB_LOADER_SOURCES}
+)
+
+
+target_include_directories(oaisim PUBLIC  ${OPENAIR_TARGETS}/SIMU/USER)
+target_link_libraries (oaisim
+  -Wl,-ldl,--start-group
+  RRC_LIB S1AP_LIB S1AP_ENB X2AP_LIB SECU_CN UTIL HASHTABLE SCTP_CLIENT UDP SCHED_UE_LIB PHY_UE LFDS L2_UE ${MSC_LIB} LIB_NAS_UE SIMU SECU_OSA ${ITTI_LIB}  ${MIH_LIB}
+  ${FLPT_MSG_LIB} ${ASYNC_IF_LIB} ${FLEXRAN_AGENT_LIB} LFDS7
+  -Wl,--end-group z dl)
+
+target_link_libraries (oaisim ${LIBXML2_LIBRARIES} ${LAPACK_LIBRARIES})
+target_link_libraries (oaisim pthread m ${CONFIG_LIBRARIES} rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES}  ${NETTLE_LIBRARIES} sctp z
+  ${ATLAS_LIBRARIES} ${XFORMS_LIBRARIES} ${OPENPGM_LIBRARIES} ${PROTOBUF_LIB} ${CMAKE_DL_LIBS} ${LIBYAML_LIBRARIES})
+#Force link with forms, regardless XFORMS option
+target_link_libraries (oaisim forms)
+target_link_libraries (oaisim ${T_LIB})
+
+
+# A all in one network simulator
+################
+add_executable(oaisim_nos1
+  ${rrc_h}
+  ${s1ap_h}
+  ${x2ap_h}
+  ${OPENAIR_BIN_DIR}/messages_xml.h
+  ${OPENAIR_TARGETS}/RT/USER/lte-ue.c
+  ${OPENAIR_TARGETS}/RT/USER/lte-ru.c
+  ${OPENAIR_TARGETS}/RT/USER/rt_wrapper.c
+  ${OPENAIR_TARGETS}/SIMU/USER/channel_sim.c
+  ${OPENAIR_TARGETS}/SIMU/USER/init_lte.c
+  ${OPENAIR_TARGETS}/SIMU/USER/oaisim_config.c
+  ${OPENAIR_TARGETS}/SIMU/USER/sinr_sim.c
+  ${OPENAIR_TARGETS}/SIMU/USER/cor_SF_sim.c
+  ${OPENAIR_TARGETS}/SIMU/USER/oaisim_functions.c
+  ${OPENAIR_TARGETS}/SIMU/USER/event_handler.c
+  ${OPENAIR_TARGETS}/SIMU/USER/oaisim.c
+  ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c
+  ${OPENAIR2_DIR}/RRC/NAS/nas_config.c
+  ${OPENAIR2_DIR}/RRC/NAS/rb_config.c
+  ${OPENAIR_TARGETS}/COMMON/create_tasks_ue.c
+  ${OPENAIR_DIR}/common/utils/system.c
+  ${XFORMS_SOURCE}
+  ${T_SOURCE}
+  ${CONFIG_SOURCES}
+  ${SHLIB_LOADER_SOURCES}
+)
+target_include_directories(oaisim_nos1 PUBLIC  ${OPENAIR_TARGETS}/SIMU/USER)
+target_link_libraries (oaisim_nos1
+  -Wl,--start-group
+  RRC_LIB X2AP_LIB SECU_CN UTIL HASHTABLE SCHED_UE_LIB PHY_UE LFDS ${MSC_LIB} ${ITTI_LIB} SIMU L2_UE ${FLPT_MSG_LIB} ${ASYNC_IF_LIB} LFDS7
+  -Wl,--end-group z dl )
+
+target_link_libraries (oaisim_nos1 ${LIBXML2_LIBRARIES} ${LAPACK_LIBRARIES})
+target_link_libraries (oaisim_nos1 pthread m ${CONFIG_LIBRARIES} rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES}  ${NETTLE_LIBRARIES}  
+  ${ATLAS_LIBRARIES} ${XFORMS_LIBRARIES} ${OPENPGM_LIBRARIES} ${PROTOBUF_LIB} ${CMAKE_DL_LIBS} ${LIBYAML_LIBRARIES})
+#Force link with forms, regardless XFORMS option
+target_link_libraries (oaisim_nos1 forms)
+
+#message("protobuflib is  ${PROTOBUF_LIB}")
+
+target_link_libraries (oaisim_nos1 ${T_LIB})
+
+
+# Unitary tests for each piece of L1: example, mbmssim is MBMS L1 simulator
+#####################################
+
+#special case for dlim TM4, which uses its own version of phy_scope code
+add_executable(dlsim_tm4
+  ${OPENAIR_BIN_DIR}/messages_xml.h
+  ${OPENAIR1_DIR}/SIMULATION/LTE_PHY/dlsim_tm4.c
+  ${OPENAIR1_DIR}/PHY/TOOLS/lte_phy_scope_tm4.c
+  ${T_SOURCE}
+  )
+target_link_libraries (dlsim_tm4
+  -Wl,--start-group SIMU UTIL SCHED_LIB PHY LFDS ${ITTI_LIB} -Wl,--end-group
+  pthread m rt ${CONFIG_LIBRARIES} ${ATLAS_LIBRARIES} ${XFORMS_LIBRARIES} ${T_LIB}
+  )
+
+foreach(myExe dlsim dlsim_tm7 ulsim pbchsim scansim mbmssim pdcchsim pucchsim prachsim syncsim)
+
+  add_executable(${myExe}
+    ${OPENAIR_BIN_DIR}/messages_xml.h
+    ${OPENAIR1_DIR}/SIMULATION/LTE_PHY/${myExe}.c
+    ${XFORMS_SOURCE}
+    ${T_SOURCE}
+    ${CONFIG_SOURCES}
+    ${SHLIB_LOADER_SOURCES}
+    )
+  target_link_libraries (${myExe}
+
+    -Wl,--start-group SIMU UTIL SCHED_LIB PHY LFDS ${ITTI_LIB} LFDS7 -Wl,--end-group
+    pthread m rt ${CONFIG_LIBRARIES} ${ATLAS_LIBRARIES} ${XFORMS_LIBRARIES} ${T_LIB} dl
+    )
+endforeach(myExe)
+
+add_executable(test_epc_generate_scenario
+  ${OPENAIR3_DIR}/TEST/EPC_TEST/generate_scenario.c
+  ${OPENAIR3_DIR}/TEST/EPC_TEST/generate_scenario.h
+  ${OPENAIR2_DIR}/ENB_APP/enb_config.h
+  ${OPENAIR2_DIR}/COMMON/commonDef.h
+  ${OPENAIR2_DIR}/COMMON/messages_def.h
+  ${OPENAIR2_DIR}/COMMON/messages_types.h
+  ${OPENAIR3_DIR}/S1AP/s1ap_eNB_defs.h
+  ${OPENAIR_BIN_DIR}/messages_xml.h
+  )
+target_link_libraries (test_epc_generate_scenario
+  -Wl,--start-group RRC_LIB S1AP_LIB S1AP_ENB X2AP_LIB GTPV1U LIB_NAS_UE SECU_CN UTIL HASHTABLE SCTP_CLIENT UDP SCHED_LIB PHY LFDS ${ITTI_LIB} ${MSC_LIB} L2 -Wl,--end-group pthread m rt crypt sctp ${LIBXML2_LIBRARIES} ${LIBXSLT_LIBRARIES} ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} ${CONFIG_LIBRARIES}
+  )
+
+add_executable(test_epc_play_scenario
+  ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario.c
+  ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_decode.c
+  ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_display.c
+  ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_fsm.c
+  ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_parse.c
+  ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_s1ap.c
+  ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_s1ap_compare_ie.c
+  ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_s1ap_eNB_defs.h
+  ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_sctp.c
+  ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario.h
+  ${OPENAIR2_DIR}/COMMON/commonDef.h
+  ${OPENAIR2_DIR}/COMMON/messages_def.h
+  ${OPENAIR2_DIR}/COMMON/messages_types.h
+  ${OPENAIR_BIN_DIR}/messages_xml.h
+  )
+target_include_directories(test_epc_play_scenario PUBLIC /usr/local/share/asn1c)
+target_link_libraries (test_epc_play_scenario
+  -Wl,--start-group RRC_LIB S1AP_LIB X2AP_LIB GTPV1U LIB_NAS_UE SECU_CN UTIL HASHTABLE SCTP_CLIENT UDP SCHED_LIB PHY LFDS ${ITTI_LIB} ${MSC_LIB} -Wl,--end-group pthread m rt crypt sctp ${LIBXML2_LIBRARIES} ${LIBXSLT_LIBRARIES} ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} ${CONFIG_LIBRARIES}
+  )
+
+
+#unitary tests for Core NEtwork pieces
+#################################
+foreach(myExe s1ap
+    secu_knas_encrypt_eia1
+    secu_kenb
+    aes128_ctr_encrypt
+    aes128_ctr_decrypt
+    secu_knas_encrypt_eea2
+    secu_knas secu_knas_encrypt_eea1
+    kdf
+    aes128_cmac_encrypt
+    secu_knas_encrypt_eia2)
+  add_executable(test_${myExe}
+    ${OPENAIR3_DIR}/TEST/test_${myExe}.c
+    )
+  target_link_libraries (test_${myExe}
+    -Wl,--start-group SECU_CN UTIL LFDS -Wl,--end-group m rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} ${CONFIG_LIBRARIES}
+    )
+endforeach(myExe)
+
+# to be added
+#../targets/TEST/PDCP/test_pdcp.c
+#../targets/TEST/PDCP/with_rlc/test_pdcp_rlc.c
+
+#ensure that the T header files are generated before targets depending on them
+if (${T_TRACER})
+  foreach(i
+        #all "add_executable" definitions (except tests, rb_tool, updatefw)
+        lte-softmodem lte-softmodem-nos1 oaisim oaisim_nos1
+        dlsim_tm4 dlsim dlsim_tm7 ulsim pbchsim scansim mbmssim
+        pdcchsim pucchsim prachsim syncsim
+        #all "add_library" definitions
+        ITTI RRC_LIB S1AP_LIB S1AP_ENB X2AP_LIB
+        oai_exmimodevif oai_usrpdevif oai_bladerfdevif oai_lmssdrdevif
+        oai_eth_transpro
+        FLPT_MSG ASYNC_IF FLEXRAN_AGENT HASHTABLE MSC UTIL OMG_SUMO SECU_OSA
+        SECU_CN SCHED_LIB PHY L2 default_sched remote_sched RAL CN_UTILS
+        GTPV1U SCTP_CLIENT UDP LIB_NAS_UE LFDS LFDS7 SIMU OPENAIR0_LIB)
+    if (TARGET ${i})
+      add_dependencies(${i} generate_T)
+    endif()
+  endforeach(i)
+endif (${T_TRACER})
+
+##################################################
+# Generated specific cases is not regular code
+###############################################
+
+##################""
+# itti symbolic debug print require to generate a specific include file
+########################################
+
+# retrieve the compiler options to send it to gccxml
+get_directory_property( DirDefs COMPILE_DEFINITIONS )
+foreach( d ${DirDefs} )
+    list(APPEND itti_compiler_options "-D${d}")
+endforeach()
+get_directory_property( DirDefs INCLUDE_DIRECTORIES )
+foreach( d ${DirDefs} )
+    list(APPEND itti_compiler_options "-I${d}")
+endforeach()
+
+# castxml doesn't work with c11 (gcc 5 default)
+# force castxml and clang compilation with gnu89 standard
+# we can't use cXX standard as pthread_rwlock_t is gnu standard
+list(APPEND itti_compiler_options "-std=gnu89")
+set (ITTI_H ${ITTI_DIR}/intertask_interface_types.h)
+if(EXISTS /usr/bin/gccxml)
+   set(xml_command gccxml ${itti_compiler_options} -fxml=${OPENAIR_BIN_DIR}/messages.xml ${ITTI_H})
+else()
+   set(xml_command castxml --castxml-gccxml ${itti_compiler_options} ${ITTI_H} -o ${OPENAIR_BIN_DIR}/messages.xml)
+endif()
+
+add_custom_command (
+  OUTPUT ${OPENAIR_BIN_DIR}/messages.xml
+  COMMAND ${xml_command}
+  DEPENDS ${S1AP_OAI_generated} ${RRC_FULL_DIR}/asn1_constants.h
+  )
+
+add_custom_command (
+  OUTPUT ${OPENAIR_BIN_DIR}/messages_xml.h
+  COMMAND sed -e 's/ *//'   -e 's/\"/\\\\\"/g' -e 's/^/\"/' -e 's/$$/\\\\n\"/' ${OPENAIR_BIN_DIR}/messages.xml  > ${OPENAIR_BIN_DIR}/messages_xml.h
+  DEPENDS ${OPENAIR_BIN_DIR}/messages.xml ${RRC_FULL_DIR}/asn1_constants.h
+  )
+
+################
+# Kernel modules
+###############
+# Set compiler options for kernel modules
+# we need to get out cmake to use the regular Linux Kernel process
+# this is documented as https://www.kernel.org/doc/Documentation/kbuild/modules.txt
+######################################
+
+# retrieve the compiler options to send it to gccxml
+get_directory_property(DirDefs COMPILE_DEFINITIONS )
+foreach( d ${DirDefs} )
+  set(module_cc_opt "${module_cc_opt} -D${d}")
+endforeach()
+get_directory_property( DirDefs INCLUDE_DIRECTORIES )
+foreach( d ${DirDefs} )
+  set(module_cc_opt "${module_cc_opt} -I${d}")
+endforeach()
+
+EXECUTE_PROCESS(COMMAND uname -r
+  OUTPUT_VARIABLE os_release
+  OUTPUT_STRIP_TRAILING_WHITESPACE)
+SET(module_build_path /lib/modules/${os_release}/build)
+
+function(make_driver name dir)
+  file(MAKE_DIRECTORY ${OPENAIR_BIN_DIR}/${name})
+  foreach(f  IN  ITEMS ${ARGN})
+    list(APPEND src_path_list ${dir}/${f})
+    string(REGEX REPLACE "c *$" "o" obj ${f})
+    set(objs "${objs} ${obj}")
+  endforeach()
+  CONFIGURE_FILE(${OPENAIR_CMAKE}/tools/Kbuild.cmake ${OPENAIR_BIN_DIR}/${name}/Kbuild)
+  add_custom_command(OUTPUT ${name}.ko
+    COMMAND make -C ${module_build_path} M=${OPENAIR_BIN_DIR}/${name}
+    WORKING_DIRECTORY ${OPENAIR_BIN_DIR}/${name}
+    COMMENT "building ${module}.ko"
+    VERBATIM
+    SOURCES  ${src_path_list}
+    )
+  add_custom_target(${name} DEPENDS ${name}.ko)
+endfunction(make_driver name dir src)
+
+# nashmesh module
+################
+list(APPEND nasmesh_src device.c common.c ioctl.c classifier.c tool.c mesh.c)
+set(module_cc_opt "${module_cc_opt} -DNAS_NETLINK -DPDCP_USE_NETLINK")
+# legacy Makefile was using NAS_NETLINK flag, but other drivers the hereafter flag
+# so, this cmake use OAI_NW_DRIVER_USE_NETLINK everywhere
+if (OAI_NW_DRIVER_USE_NETLINK)
+  list(APPEND nasmesh_src netlink.c)
+endif()
+make_driver(nasmesh  ${OPENAIR2_DIR}/NETWORK_DRIVER/MESH ${nasmesh_src})
+
+# user space tool for configuring MESH IP driver
+################
+add_executable(rb_tool
+  ${OPENAIR2_DIR}/NETWORK_DRIVER/MESH/constant.h
+  ${OPENAIR2_DIR}/NETWORK_DRIVER/MESH/ioctl.h
+  ${OPENAIR2_DIR}/NETWORK_DRIVER/MESH/rrc_nas_primitives.h
+  ${OPENAIR2_DIR}/NETWORK_DRIVER/MESH/RB_TOOL/rb_tool.c
+)
+target_include_directories(rb_tool PRIVATE ${OPENAIR2_DIR}/NETWORK_DRIVER/MESH/)
+
+# ???
+####################
+list(APPEND oai_nw_drv_src device.c common.c ioctl.c classifier.c tool.c)
+if(OAI_NW_DRIVER_USE_NETLINK)
+  list(APPEND oai_nw_drv_src netlink.c)
+endif()
+make_driver(oai_nw_drv ${OPENAIR2_DIR}/NETWORK_DRIVER/LITE ${oai_nw_drv_src})
+
+# Exmimo board drivers
+#########################
+list(APPEND openair_rf_src module_main.c irq.c fileops.c exmimo_fw.c)
+make_driver(openair_rf ${OPENAIR_TARGETS}/ARCH/EXMIMO/DRIVER/eurecom ${openair_rf_src})
+
+add_executable(updatefw
+  ${OPENAIR_TARGETS}/ARCH/EXMIMO/USERSPACE/OAI_FW_INIT/updatefw.c
+)
+
+# ue_ip: purpose ???
+###############
+list(APPEND ue_ip_src device.c common.c)
+if(OAI_NW_DRIVER_USE_NETLINK)
+  list(APPEND ue_ip_src netlink.c)
+endif()
+make_driver(ue_ip ${OPENAIR2_DIR}/NETWORK_DRIVER/UE_IP ${ue_ip_src})
+
+
+# OCTAVE tools
+###############
+set(OCT_INCL -I${OPENAIR_TARGETS}/ARCH/EXMIMO/DEFS -I${OPENAIR_TARGETS}/ARCH/EXMIMO/USERSPACE/LIB -I${OPENAIR_TARGETS}/ARCH/COMMON)
+set(OCT_LIBS -L${CMAKE_CURRENT_BINARY_DIR} -lm -lOPENAIR0_LIB)
+set(OCT_FLAGS -DEXMIMO)
+set(OCT_DIR ${OPENAIR_TARGETS}/ARCH/EXMIMO/USERSPACE/OCTAVE)
+set(OCT_FILES
+  oarf_config_exmimo.oct
+  oarf_config_exmimo.oct
+  oarf_get_frame.oct
+  oarf_stop.oct
+  oarf_send_frame.oct
+  oarf_get_num_detected_cards.oct
+  oarf_stop_without_reset.oct
+)
+
+foreach(file IN ITEMS ${OCT_FILES})
+  string(REGEX REPLACE "oct *$" "cc" src ${file})
+  add_custom_command(
+    OUTPUT ${file}
+    DEPENDS ${OCT_DIR}/${src} OPENAIR0_LIB
+    COMMAND mkoctfile
+    ARGS ${OCT_FLAGS} ${OCT_INCL} ${OCT_LIBS}
+    ARGS -o ${file} ${OCT_DIR}/${src}
+    COMMENT "Generating ${file}"
+    VERBATIM
+  )
+endforeach(file)
+
+ADD_CUSTOM_TARGET(oarf
+   DEPENDS ${OCT_FILES}
+)
+
+include (${OPENAIR_DIR}/common/utils/telnetsrv/telnetsrv_CMakeLists.txt)
+
+
+
+
diff --git a/cmake_targets/tools/build_helper b/cmake_targets/tools/build_helper
new file mode 100755
index 0000000000000000000000000000000000000000..f0ae7e79c8240c34f0b196613f0e54f556601122
--- /dev/null
+++ b/cmake_targets/tools/build_helper
@@ -0,0 +1,771 @@
+#/*
+# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The OpenAirInterface Software Alliance licenses this file to You under
+# * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+# * except in compliance with the License.
+# * You may obtain a copy of the License at
+# *
+# *      http://www.openairinterface.org/?page_id=698
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# *-------------------------------------------------------------------------------
+# * For more information about the OpenAirInterface (OAI) Software Alliance:
+# *      contact@openairinterface.org
+# */
+
+# file build_helper
+# brief
+# authors Laurent Thomas, Lionel GAUTHIER
+#
+#######################################
+if [ ! -f /etc/os-release ]; then
+  echo "No /etc/os-release file found. You're likely on an unsupported distro."
+  exit -1
+fi
+OS_DISTRO=$(grep "^ID=" /etc/os-release | sed "s/ID=//" | sed "s/\"//g")
+OS_RELEASE=$(grep "^VERSION_ID=" /etc/os-release | sed "s/VERSION_ID=//" | sed "s/\"//g")
+case "$OS_DISTRO" in
+  fedora) OS_BASEDISTRO="fedora"; INSTALLER="dnf"; CMAKE="cmake" ;;
+  rhel)   OS_BASEDISTRO="fedora"; INSTALLER="yum"; CMAKE="cmake3" ;;
+  centos) OS_BASEDISTRO="fedora"; INSTALLER="yum"; CMAKE="cmake3" ;;
+  debian) OS_BASEDISTRO="debian"; INSTALLER="apt-get"; CMAKE="cmake" ;;
+  ubuntu) OS_BASEDISTRO="debian"; INSTALLER="apt-get"; CMAKE="cmake" ;;
+esac
+KERNEL_VERSION=$(uname -r | cut -d '.' -f1)
+KERNEL_MAJOR=$(uname -r | cut -d '.' -f2)
+
+SUDO='sudo -E'
+
+###############################
+## echo and  family
+###############################
+black='\E[30m'
+red='\E[31m'
+green='\E[32m'
+yellow='\E[33m'
+blue='\E[1;34m'
+magenta='\E[35m'
+cyan='\E[36m'
+white='\E[37m'
+reset_color='\E[00m'
+COLORIZE=1
+
+cecho()  {  
+    # Color-echo
+    # arg1 = message
+    # arg2 = color
+    local default_msg="No Message."
+    message=${1:-$default_msg}
+    color=${2:-$green}
+    [ "$COLORIZE" = "1" ] && message="$color$message$reset_color"
+    echo -e "$message"
+    return
+}
+
+echo_error()   { cecho "$*" $red          ;}
+echo_fatal()   { cecho "$*" $red; exit -1 ;}
+echo_warning() { cecho "$*" $yellow       ;}
+echo_success() { cecho "$*" $green        ;}
+echo_info()    { cecho "$*" $blue         ;}
+
+########################
+# distribution helpers #
+########################
+
+# This function return a string to identify the distribution we are running
+# If we can't check the distribution, it returns "Unknown"
+# This function return always true as exit code by design
+# Examples:
+#   ubuntu16.04
+#   debian8.5
+get_distribution_release() {
+    if [[ ! -z "$OS_DISTRO$OS_RELEASE" ]]; then
+        echo "$OS_DISTRO$OS_RELEASE"
+    else
+        echo Unknown
+    fi
+}
+
+check_supported_distribution() {
+    local distribution=$(get_distribution_release)
+    case "$distribution" in
+        "ubuntu17.10") return 0 ;;
+        "ubuntu17.04") return 0 ;;
+        "ubuntu16.04") return 0 ;;
+        "ubuntu14.04") return 0 ;;
+        "fedora24")    return 0 ;;
+        "rhel7")       return 0 ;;
+        "centos7")     return 0 ;;
+    esac
+    return 1
+}
+
+##################
+# Error handlers #
+##################
+
+handler_EXIT() {
+	local exit_code=$?
+    [ "$exit_code" -eq 0 ] || echo_error "build have failed"
+	exit $exit_code
+}
+
+trap handler_EXIT EXIT
+
+###########################
+# Cleaners
+###########################
+
+clean_kernel() {
+    $SUDO modprobe ip_tables
+    $SUDO modprobe x_tables
+    $SUDO iptables -P INPUT ACCEPT
+    $SUDO iptables -F INPUT
+    $SUDO iptables -P OUTPUT ACCEPT
+    $SUDO iptables -F OUTPUT
+    $SUDO iptables -P FORWARD ACCEPT
+    $SUDO iptables -F FORWARD
+    $SUDO iptables -t nat -F
+    $SUDO iptables -t mangle -F
+    $SUDO iptables -t filter -F
+    $SUDO iptables -t raw -F
+    echo_info "Flushed iptables"
+    $SUDO rmmod nasmesh > /dev/null 2>&1
+    $SUDO rmmod oai_nw_drv  > /dev/null 2>&1
+    $SUDO rmmod openair_rf > /dev/null 2>&1
+    $SUDO rmmod ue_ip > /dev/null 2>&1
+    echo_info "removed drivers from kernel"
+}
+
+clean_all_files() {
+ set_openair_env
+ dir=$OPENAIR_DIR/cmake_targets
+ rm -rf $dir/log $OPENAIR_DIR/targets/bin/* 
+ rm -rf $dir/lte_build_oai $dir/lte-simulators/build
+ rm -rf $dir/oaisim_build_oai/build $dir/oaisim_build_oai/CMakeLists.txt
+ rm -rf $dir/autotests/bin $dir/autotests/log $dir/autotests/*/build 
+}
+
+###################################
+# Compilers
+###################################
+
+#check_warnings:
+#    print error message if the compilation had warnings
+#argument:
+#    $1: log file
+check_warnings() {
+  #we look for 'warning:' in the compilation log file
+  #this is how gcc starts a warning
+  #this is not perfect, we may get false positive
+  warning_count=`grep "warning:" "$1"|wc -l`
+  if [ $warning_count -gt 0 ]; then
+    echo_error "WARNING: $warning_count warnings. See $1"
+  fi
+}
+
+compilations() {
+  cd $OPENAIR_DIR/cmake_targets/$1/build
+  set +e
+  {
+    rm -f $3
+    if [ "$VERBOSE_COMPILE" == "1" ]; then
+       make -j`nproc` $2 VERBOSE=$VERBOSE_COMPILE
+    else
+       make -j`nproc` $2
+    fi
+
+  } > $dlog/$2.$REL.txt 2>&1
+  set -e
+  echo_info "Log file for compilation has been written to: $dlog/$2.$REL.txt"
+  if [ -s $3 ] ; then
+     cp $3 $4
+     echo_success "$2 compiled"
+     check_warnings "$dlog/$2.$REL.txt"
+  else
+     echo_error "$2 compilation failed"
+     exit 1
+  fi
+}
+
+############################################
+# External packages installers
+############################################
+
+install_protobuf_from_source(){
+    protobuf_install_log=$OPENAIR_DIR/cmake_targets/log/protobuf_install_log.txt
+    echo_info "\nInstalling Google Protobuf from sources. The log file for Protobuf installation is here: $protobuf_install_log "
+    (
+    cd /tmp
+    echo "Downloading protobuf"
+    #rm -rf /tmp/protobuf-2.6.1.tar.gz* /tmp/protobuf-2.6.1
+    #wget https://github.com/google/protobuf/releases/download/v2.6.1/protobuf-2.6.1.tar.gz
+    #tar -xzvf protobuf-2.6.1.tar.gz --owner $USER --group $USER --no-same-owner
+    #cd protobuf-2.6.1/
+    rm -rf /tmp/protobuf-cpp-3.3.0.tar.gz* /tmp/protobuf-3.3.0
+    wget https://github.com/google/protobuf/releases/download/v3.3.0/protobuf-cpp-3.3.0.tar.gz
+    tar -xzvf protobuf-cpp-3.3.0.tar.gz --owner $USER --group $(groups | cut -d" " -f1) --no-same-owner
+    cd protobuf-3.3.0/
+    ./configure
+    echo "Compiling protobuf"
+    make -j`nproc`
+    $SUDO make install
+    $SUDO ldconfig
+    ) >& $protobuf_install_log
+}
+
+install_protobuf_c_from_source(){
+    protobuf_c_install_log=$OPENAIR_DIR/cmake_targets/log/protobuf_c_install_log.txt
+    echo_info "\nInstalling Google Protobuf_C from sources. The log file for Protobuf_C installation is here: $protobuf_c_install_log "
+    (
+    if [[ "$OS_DISTRO" == "rhel" ]] || [[ "$OS_DISTRO" == "centos" ]]; then
+        export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig
+    fi
+    cd /tmp
+    echo "Downloading protobuf-c"
+    rm -rf /tmp/protobuf-c
+    git clone https://github.com/protobuf-c/protobuf-c.git
+    cd protobuf-c
+	git checkout 2a46af42784abf86804d536f6e0122d47cfeea45
+    ./autogen.sh
+    ./configure
+    echo "Compiling protobuf-c"
+    make -j`nproc`
+    $SUDO make install
+    $SUDO ldconfig
+    ) >& $protobuf_c_install_log
+}
+
+install_usrp_uhd_driver_from_source(){
+    uhd_install_log=$OPENAIR_DIR/cmake_targets/log/uhd_install_log.txt
+    echo_info "\nInstalling UHD driver from sources. The log file for UHD driver installation is here: $uhd_install_log "
+    (
+    cd /tmp
+    echo "Downloading UHD driver"
+    rm -rf /tmp/uhd
+    git clone https://github.com/EttusResearch/uhd.git
+    cd uhd
+    git checkout tags/release_003_010_001_001
+    mkdir -p host/build
+    cd host/build
+    $CMAKE ../
+    echo "Compiling UHD"
+    make -j`nproc`
+    make test
+    $SUDO make install
+    $SUDO ldconfig
+    ) >& $uhd_install_log
+}
+
+check_install_usrp_uhd_driver(){
+    if [[ "$OS_DISTRO" == "ubuntu" ]]; then
+        #first we remove old installation
+        $SUDO apt-get remove -y uhd || true
+        $SUDO apt-get remove libuhd-dev libuhd003 uhd-host -y
+        v=$(lsb_release -cs)
+        $SUDO apt-add-repository --remove "deb http://files.ettus.com/binaries/uhd/repo/uhd/ubuntu/$v $v main"
+        #The new USRP repository
+        $SUDO add-apt-repository ppa:ettusresearch/uhd -y
+        $SUDO apt-get update
+        $SUDO apt-get -y --allow-unauthenticated install  python python-tk libboost-all-dev libusb-1.0-0-dev
+        $SUDO apt-get -y --allow-unauthenticated install libuhd-dev libuhd003 uhd-host
+    elif [[ "$OS_BASEDISTRO" == "fedora" ]]; then
+        $SUDO $INSTALLER -y install python boost libusb-devel libusbx-devel boost-devel python-mako python-docutils cmake
+        $SUDO pip install requests
+        if [[ "$OS_DISTRO" == "rhel" ]] || [[ "$OS_DISTRO" == "centos" ]]; then
+            # until EPEL repo hasn't bumped UHD driver to >=3.10 in EPEL, build driver from source
+            $SUDO $INSTALLER -y remove uhd uhd-devel uhd-firmware
+            install_usrp_uhd_driver_from_source
+        else
+            $SUDO $INSTALLER -y install uhd uhd-devel uhd-firmware
+        fi
+    fi
+}
+
+install_usrp_uhd_driver() {
+    if [[ "$OS_DISTRO" == "ubuntu" ]]; then
+        # We move uhd-host apart because it depends on linux kernel version
+        # On newer kernels, it fails to install
+        $SUDO apt-get -y install uhd-host
+    fi
+    if [ -z $1 ]; then
+      $SUDO uhd_images_downloader
+    else
+      $SUDO uhd_images_downloader -i $1
+    fi
+}
+
+install_bladerf_driver_from_source(){
+    bladerf_install_log=$OPENAIR_DIR/cmake_targets/log/bladerf_install_log.txt
+    echo_info "\nInstalling BladeRF driver from sources. The log file for BladeRF driver installation is here: $bladerf_install_log "
+    (
+    cd /tmp
+    echo "Downloading BladeRF driver"
+    rm -rf /tmp/bladeRF
+    git clone https://github.com/Nuand/bladeRF.git
+    cd bladeRF
+    git checkout tags/2016.06
+    mkdir -p build
+    cd build
+    $CMAKE ../
+    echo "Compiling BladeRF driver"
+    make
+    $SUDO make install
+    $SUDO ldconfig
+    echo "Downloading FPGA and firmware images"
+    cd /tmp/bladeRF
+    wget https://www.nuand.com/fx3/bladeRF_fw_latest.img
+    wget https://www.nuand.com/fpga/hostedx40-latest.rbf
+    sudo mkdir -p /usr/share/Nuand/bladeRF
+    sudo mv bladeRF_fw_latest.img /usr/share/Nuand/bladeRF/bladeRF_fw.img
+    sudo mv hostedx40-latest.rbf /usr/share/Nuand/bladeRF/hostedx40.rbf
+    ) >& $bladerf_install_log
+}
+
+check_install_bladerf_driver(){
+    if [[ "$OS_DISTRO" == "ubuntu" ]]; then
+        if [ "$(get_distribution_release)" == "ubuntu14.04" ] ; then
+            $SUDO add-apt-repository -y ppa:bladerf/bladerf
+            $SUDO apt-get update
+        fi
+        $SUDO apt-get install -y --allow-unauthenticated  bladerf libbladerf-dev
+        $SUDO apt-get install -y --allow-unauthenticated bladerf-firmware-fx3
+        $SUDO apt-get install -y --allow-unauthenticated bladerf-fpga-hostedx40	
+   elif [[ "$OS_BASEDISTRO" == "fedora" ]]; then
+        install_bladerf_driver_from_source
+   else
+        echo_error "BladeRF Installer for OAI does not support automatic build. Install BladeRF compiling sources manually from BladeRF website"
+   fi
+}
+
+flash_firmware_bladerf() {
+	$SUDO bladeRF-cli --flash-firmware /usr/share/Nuand/bladeRF/bladeRF_fw.img
+}
+
+check_install_lmssdr_driver(){
+	if ( [ -d "/usr/local/include/lime" ] &&
+             [ -f "/usr/local/include/lime/LimeSuite.h" ] )
+	then
+  		echo_success "Found lmssdr drivers and tools installed from source"
+        else
+                echo_error "lmssdr support implies installing lmssdr drivers and tools" \
+                           " from sources. check:"
+                echo_info "https://open-cells.com/index.php/2017/05/10/limesdr-installation/"
+                echo_fatal "Cannot compile lmssdr device" 
+	fi
+
+
+}
+
+check_install_additional_tools (){
+  $SUDO $INSTALLER update -y
+  if [[ "$OS_DISTRO" == "ubuntu" ]]; then
+    PACKAGE_LIST="\
+	check \
+	dialog \
+	dkms \
+	gawk \
+	libboost-all-dev \
+	libpthread-stubs0-dev \
+	openvpn \
+	pkg-config \
+	python-dev  \
+	python-pexpect \
+	sshfs \
+	swig  \
+	tshark \
+	uml-utilities \
+	unzip  \
+	valgrind  \
+	vlan	  \
+	ctags \
+        ntpdate \
+        iperf3 \
+        android-tools-adb \
+	wvdial \
+        python-numpy \
+        sshpass \
+        nscd \
+        bc \
+        ntp \
+        python-scipy \
+        python-matplotlib"
+  elif [[ "$OS_DISTRO" == "rhel" ]] || [[ "$OS_DISTRO" == "centos" ]]; then
+    PACKAGE_LIST="\
+      check \
+      dialog \
+      dkms \
+      gawk \
+      boost-devel \
+      openvpn \
+      pkgconfig \
+      pexpect \
+      sshfs \
+      swig  \
+      wireshark \
+      unzip  \
+      valgrind  \
+      vconfig	  \
+      ctags \
+      ntpdate \
+      iperf3 \
+      wvdial \
+      numpy \
+      sshpass \
+      nscd \
+      python2-paramiko \
+      python-pyroute2 \
+      python-netifaces \
+      scipy \
+      python-matplotlib"
+  elif [[ "$OS_DISTRO" == "fedora" ]]; then
+    PACKAGE_LIST=" \
+      check \
+      dialog \
+      dkms \
+      gawk \
+      boost-devel \
+      openvpn \
+      pkgconfig \
+      python-pexpect \
+      sshfs \
+      swig  \
+      wireshark \
+      unzip  \
+      valgrind  \
+      vconfig	  \
+      ctags \
+      ntpdate \
+      iperf3 \
+      wvdial \
+      python-numpy \
+      sshpass \
+      nscd \
+      python2-paramiko \
+      python-pyroute2 \
+      python-netifaces \
+      python2-scipy \
+      python2-matplotlib"
+  fi
+    $SUDO $INSTALLER install -y $PACKAGE_LIST
+    
+    $SUDO rm -fr /opt/ssh
+    $SUDO GIT_SSL_NO_VERIFY=true git clone https://gitlab.eurecom.fr/oai/ssh.git /opt/ssh
+
+  #The packages below are already installed for Redhat distros (RHEL, CentOS, Fedora)
+  if [[ "$OS_DISTRO" == "ubuntu" ]]; then
+    $SUDO pip install paramiko
+    $SUDO pip install pyroute2 colorama
+    log_netiface=$OPENAIR_DIR/cmake_targets/log/netiface_install_log.txt
+    echo_info "Installing Netinterfaces package. The logfile for installation is in $log_netiface"
+    (
+    $SUDO rm -fr /tmp/netifaces-0.10.4.tar.gz /tmp/netifaces
+    wget -P /tmp  https://pypi.python.org/packages/18/fa/dd13d4910aea339c0bb87d2b3838d8fd923c11869b1f6e741dbd0ff3bc00/netifaces-0.10.4.tar.gz
+    tar -xzvf /tmp/netifaces-0.10.4.tar.gz -C /tmp
+    cd /tmp/netifaces-0.10.4
+    $SUDO python setup.py install
+    cd -
+    ) >& $log_netiface
+  fi
+}
+
+check_install_oai_software() {
+    local specific_packages=""
+    if ! check_supported_distribution; then
+        echo_error "Your distribution $(get_distribution_release) is not supported by oai !"
+        exit 1
+    fi
+    $SUDO $INSTALLER update -y
+  if [[ "$OS_DISTRO" == "ubuntu" ]]; then
+    local LAPACK_LIBNAME="liblapack.so"
+    local LAPACK_TARGET="/usr/lib/atlas-base/atlas/liblapack.so"
+    $SUDO apt install -y software-properties-common
+    case "$(get_distribution_release)" in
+        "ubuntu14.04")
+            specific_packages="libtasn1-3-dev gccxml libgnutls-dev libatlas-dev"
+            # For iperf3
+            $SUDO add-apt-repository "deb http://archive.ubuntu.com/ubuntu trusty-backports universe"
+            $SUDO apt-get update
+            ;;
+        "ubuntu16.04")
+            specific_packages="libtasn1-6-dev gccxml libgnutls-dev libatlas-dev"
+            ;;
+        "ubuntu17.04")
+            specific_packages="libtasn1-6-dev castxml libgnutls28-dev libatlas-dev"
+            ;;
+        "ubuntu17.10")
+            specific_packages="libtasn1-6-dev castxml libgnutls28-dev"
+            LAPACK_LIBNAME="liblapack.so-x86_64-linux-gnu"
+            LAPACK_TARGET="/usr/lib/x86_64-linux-gnu/atlas/liblapack.so"
+            ;;
+    esac
+    $SUDO apt-get install -y \
+    $specific_packages \
+	autoconf  \
+	automake  \
+	bison  \
+	build-essential \
+	cmake \
+	cmake-curses-gui  \
+	doxygen \
+	doxygen-gui \
+	texlive-latex-base \
+	ethtool \
+	flex  \
+	gdb  \
+	git \
+	graphviz \
+	gtkwave \
+	guile-2.0-dev  \
+	iperf \
+	iproute \
+	iptables \
+	iptables-dev \
+	libatlas-base-dev \
+	libblas-dev \
+	libconfig8-dev \
+	libffi-dev \
+	libforms-bin \
+	libforms-dev \
+	libgcrypt11-dev \
+	libgmp-dev \
+	libgtk-3-dev \
+	libidn2-0-dev  \
+	libidn11-dev \
+	libmysqlclient-dev  \
+	liboctave-dev \
+	libpgm-dev \
+	libpython2.7-dev \
+	libsctp1  \
+	libsctp-dev  \
+	libssl-dev  \
+	libtool  \
+	libusb-1.0-0-dev \
+	libxml2 \
+	libxml2-dev  \
+	libxslt1-dev \
+	mscgen  \
+	octave \
+	octave-signal \
+	openssh-client \
+	openssh-server \
+	openssl \
+	python  \
+	subversion \
+	xmlstarlet \
+	python-pip \
+	pydb \
+	libyaml-dev \
+	wget \
+	libxpm-dev
+
+    $SUDO update-alternatives --set "$LAPACK_LIBNAME" "$LAPACK_TARGET"
+
+    $SUDO apt-get install -y nettle-dev nettle-bin
+  elif [[ "$OS_BASEDISTRO" == "fedora" ]]; then
+    if [[ "$OS_DISTRO" == "rhel" ]] || [[ "$OS_DISTRO" == "centos" ]]; then
+      if rpm -q epel-release > /dev/null; then
+        echo "EPEL repos already present. Good."
+      else
+        echo "EPEL repos not present. Installing them."
+        $SUDO $INSTALLER install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
+      fi
+      $SUDO $INSTALLER install -y python-epdb
+      $SUDO $INSTALLER install -y gccxml
+    else
+      $SUDO $INSTALLER install -y mscgen pydb
+      # Fedora repos already contain gccxml's successor castxml.
+      $SUDO $INSTALLER install -y castxml
+    fi
+    
+    $SUDO $INSTALLER install -y \
+      autoconf \
+      automake \
+      bc \
+      bison \
+      $CMAKE \
+      doxygen \
+      ethtool \
+      flex \
+      gdb \
+      git \
+      graphviz \
+      gtkwave \
+      guile-devel \
+      iperf \
+      iproute \
+      iptables \
+      iptables-devel \
+      atlas-devel \
+      blas-devel \
+      libconfig-devel \
+      libffi-devel \
+      xforms \
+      xforms-devel \
+      libgcrypt-devel \
+      gmp-devel \
+      gtk3-devel \
+      libidn2-devel  \
+      libidn-devel \
+      mariadb-devel \
+      octave-devel \
+      openpgm-devel \
+      lksctp-tools \
+      lksctp-tools-devel \
+      openssl-devel \
+      libtasn1 \
+      libtool \
+      libusb-devel \
+      libxml2 \
+      libxml2-devel \
+      libxslt-devel \
+      octave \
+      octave-signal \
+      openssh-clients \
+      openssh-server \
+      openssl \
+      patch \
+      psmisc \
+      python \
+      subversion \
+      xmlstarlet \
+      python-pip \
+      wget \
+      kernel-headers \
+      kernel-devel \
+      nettle-devel \
+      gnutls-devel \
+      libXpm-devel \
+      lapack \
+      lapack-devel \
+      blas \
+      blas-devel \
+      libyaml-devel
+  fi
+
+    install_asn1c_from_source
+    $SUDO rm -fr /opt/ssh
+    $SUDO git clone https://gist.github.com/2190472.git /opt/ssh
+}
+
+install_asn1c_from_source(){
+    asn1_install_log=$OPENAIR_DIR/cmake_targets/log/asn1c_install_log.txt
+    echo_info "\nInstalling ASN1. The log file for ASN1 installation is here: $asn1_install_log "
+    (
+    $SUDO rm -rf /tmp/asn1c
+    # GIT_SSL_NO_VERIFY=true git clone https://gitlab.eurecom.fr/oai/asn1c.git /tmp/asn1c
+    git clone https://github.com/velichkov/asn1c /tmp/asn1c
+    cd /tmp/asn1c
+    git checkout s1ap
+    test -f configure || autoreconf -iv
+    ./configure
+    make -j`nproc`
+    $SUDO make install
+    cd -
+    $SUDO ldconfig
+    ) > $asn1_install_log 2>&1
+}
+
+#################################################
+# 2. compile 
+################################################
+
+install_nas_tools() {
+  if [ ! -f .ue.nvram0 ]; then
+    echo_success "generate .ue_emm.nvram .ue.nvram"
+    ./nvram --gen -c $1 -o $2
+  else
+    [ ./nvram -nt .ue.nvram0 -o ./nvram -nt .ue_emm.nvram0 ] && ./nvram --gen -c $1 -o $2
+  fi
+
+  if [ ! -f .usim.nvram0 ]; then
+    echo_success "generate .usim.nvram"
+    ./usim --gen -c $1 -o $2
+  else
+    [ ./usim -nt .usim.nvram0 ] && ./usim --gen -c $1 -o $2
+  fi
+
+}
+
+
+################################
+# set_openair_env
+###############################
+set_openair_env(){
+    fullpath=`readlink -f $BASH_SOURCE`
+    [ -f "/.$fullpath" ] || fullpath=`readlink -f $PWD/$fullpath`
+    openair_path=${fullpath%/cmake_targets/*}
+    openair_path=${openair_path%/targets/*}
+    openair_path=${openair_path%/openair[123]/*}    
+    export OPENAIR_DIR=$openair_path
+    export OPENAIR1_DIR=$openair_path/openair1
+    export OPENAIR2_DIR=$openair_path/openair2
+    export OPENAIR3_DIR=$openair_path/openair3
+    export OPENAIR_TARGETS=$openair_path/targets
+}
+
+################################
+# Function to killall the subprocesses when Ctrl-C Key is hit
+###############################
+function handle_ctrl_c(){
+CURPID=$$
+ppid=$$
+arraycounter=1
+echo_info "** Trapped CTRL-C. Killing all subprocesses now..."
+echo_info "** Calling sync now..."
+sync 
+while true
+do
+        FORLOOP=FALSE
+        # Get all the child process id
+        for i in `ps -ef| awk '$3 == '$ppid' { print $2 }'`
+        do
+                if [ $i -ne $CURPID ] ; then
+                        procid[$arraycounter]=$i
+                        arraycounter=`expr $arraycounter + 1`
+                        ppid=$i
+                        FORLOOP=TRUE
+                fi
+        done
+        if [ "$FORLOOP" = "FALSE" ] ; then
+           arraycounter=`expr $arraycounter - 1`
+           ## We want to kill child process id first and then parent id's
+           while [ $arraycounter -ne 0 ]
+           do  
+             echo "first we send ctrl-c to program"
+             $SUDO kill -INT "${procid[$arraycounter]}"
+             sleep 5
+             echo "Now we force kill if that didn't work"
+             $SUDO kill -9 "${procid[$arraycounter]}" >/dev/null
+             arraycounter=`expr $arraycounter - 1`
+           done
+         exit
+        fi
+done
+}
+
+
+# get from http://www.linuxjournal.com/content/validating-ip-address-bash-script
+validate_ip() {
+
+local  ip=$1
+local  stat=1
+
+if [[ $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
+    OIFS=$IFS
+    IFS='.'
+    ip=($ip)
+    IFS=$OIFS
+    [[ ${ip[0]} -le 255 && ${ip[1]} -le 255 \
+        && ${ip[2]} -le 255 && ${ip[3]} -le 255 ]]
+    stat=$?
+fi
+
+return $stat
+}
diff --git a/cmake_targets/tools/fix_asn1 b/cmake_targets/tools/fix_asn1
new file mode 100755
index 0000000000000000000000000000000000000000..cba93f589853330ace893b7bd2049c01375f8129
--- /dev/null
+++ b/cmake_targets/tools/fix_asn1
@@ -0,0 +1,202 @@
+#!/bin/bash
+
+# in those arrays, each line is:
+#   <file> <sha1sum of file (without line 4 which changes depending on the location of the files)> <patch to apply to file>
+
+RRC_Rel14=(
+  "SystemInformation-r8-IEs.h" 4df485c5ddf2540eca271876cdc512caa19b0890 "fix_asn1.data/RRC.rel14/SystemInformation-r8-IEs.h.diff"
+  "SystemInformation-NB-r13-IEs.h" 6d91332d5c39205819b06e5e36efe62ff8e5b33b "fix_asn1.data/RRC.rel14/SystemInformation-NB-r13-IEs.h.diff"
+)
+
+RRC_Rel10=(
+  "SystemInformation-r8-IEs.h" 603cd6615cff36ec7020692d72c0d6de7c4859cb "fix_asn1.data/RRC.rel10/SystemInformation-r8-IEs.h.diff"
+)
+
+X2AP_Rel11_2=(
+  "X2ap-CriticalityDiagnostics-IE-List.h" ae96308b37fcbcbf39da5012e42968135fc5f27b "fix_asn1.data/X2AP.rel11.2/X2ap-CriticalityDiagnostics-IE-List.h.diff"
+)
+
+red_color="$(tput setaf 1)"
+green_color="$(tput setaf 2)"
+reset_color="$(tput sgr0)"
+
+function error()
+{
+  echo -e "$red_color"ERROR: "$@""$reset_color"
+  exit 1
+}
+
+function check_sha1()
+{
+  local file="$1"
+  local target_sha1="$2"
+
+  if [ ! -f "$file" ]
+  then
+    error "$file: no such file"
+  fi
+
+  # we don't use the line 4 of the file
+  # it contains the location of the ASN1 grammar
+  # and this location is not the same on every
+  # installation (this is for *.h files, for *.c
+  # files it's no big deal to skip that line)
+  local computed_sha1=$(sed 4d "$file" | sha1sum | cut -f 1 -d ' ')
+
+  if [ "$target_sha1" != "$computed_sha1" ]
+  then
+    error "$file: wrong SHA1"
+  fi
+}
+
+function patch_file()
+{
+  local patch="$1"
+  local file="$2"
+
+  echo -e "$green_color""patch file $file with $OPENAIR_DIR/cmake_targets/tools/$patch""$reset_color"
+
+  patch "$file" "$OPENAIR_DIR/cmake_targets/tools/$patch"
+  if [ $? -ne 0 ]
+  then
+    error "patching of $file with $OPENAIR_DIR/cmake_targets/tools/$patch failed"
+  fi
+}
+
+function apply_patches()
+{
+  local directory="$1"
+  local array=$2
+  local len=$3       # the length could be computed locally but the way to do it is not clear to me [CROUX]
+
+  local i
+  local file
+  local sha1
+  local patch
+  local item
+
+  for (( i = 0; i < $len; i += 3 ))
+  do
+    # special bash syntax to access the array
+    item=$array[$i];       file=${!item}
+    item=$array[$((i+1))]; sha1=${!item}
+    item=$array[$((i+2))]; patch=${!item}
+    check_sha1 "$directory/$file" "$sha1"
+    patch_file "$patch" "$directory/$file"
+  done
+}
+
+function patch_rrc()
+{
+  local directory="$1"
+  local version="$2"
+
+  case "$version" in
+    Rel14 )
+      echo "patching RRC files release 14"
+      #apply_patches "$directory" RRC_Rel14 ${#RRC_Rel14[*]}
+      ;;
+    Rel10 )
+      echo "patching RRC files release 10"
+      apply_patches "$directory" RRC_Rel10 ${#RRC_Rel10[*]}
+      ;;
+    Rel8 )
+      echo "patching RRC files release 8 TODO?"
+      ;;
+    * )
+      error unknwon/unhandled RRC version \'"$version"\'
+      ;;
+  esac
+}
+
+function patch_nr_rrc()
+{
+  local directory="$1"
+  local version="$2"
+
+  case "$version" in
+    NR_Rel15 )
+      echo "patching NR_RRC files release 15"
+      apply_patches "$directory" NR_RRC_Rel15 ${#NR_RRC_Rel15[*]}
+      ;;
+    * )
+      error unknwon/unhandled NR_RRC version \'"$version"\'
+      ;;
+  esac
+}
+
+
+function patch_x2ap()
+{
+  local directory="$1"
+  local version="$2"
+
+  case "$version" in
+    R14 )
+      ;;
+    R11 )
+      echo "patching X2AP files release 11.2"
+      apply_patches "$directory" X2AP_Rel11_2 ${#X2AP_Rel11_2[*]}
+      ;;
+    * )
+      error unknwon/unhandled X2AP version \'"$version"\'
+      ;;
+  esac
+}
+
+function patch_s1ap()
+{
+  local directory="$1"
+  local version="$2"
+
+  case "$version" in
+    R14 )
+      ;;
+    R10 )
+      #nothing to do anymore (fixes went to asn1c)
+      ;;
+    * )
+      error unknwon/unhandled S1AP version \'"$version"\'
+      ;;
+  esac
+}
+
+function main()
+{
+  if [ $# -ne 3 ]
+  then
+    echo "ERROR: pass <output directory> <module> <version>"
+    exit 1
+  fi
+
+  if [ x"$OPENAIR_DIR" = x ]
+  then
+    error "the variable OPENAIR_DIR is not set"
+  fi
+
+  local directory="$1"
+  local module="$2"
+  local version="$3"
+
+  case "$module" in
+    RRC )
+      patch_rrc "$directory" "$version"
+      ;;
+    NR_RRC )
+      patch_nr_rrc "$directory" "$version"
+      ;;
+    X2AP )
+      patch_x2ap "$directory" "$version"
+      ;;
+    S1AP )
+      patch_s1ap "$directory" "$version"
+      ;;
+    * )
+      error unknown module "$module"
+      ;;
+  esac
+
+  exit 0
+}
+
+main "$@"
diff --git a/cmake_targets/tools/generate_asn1 b/cmake_targets/tools/generate_asn1
new file mode 100755
index 0000000000000000000000000000000000000000..fb38455a126a809026fb306e8663a53a4ed01c2a
--- /dev/null
+++ b/cmake_targets/tools/generate_asn1
@@ -0,0 +1,165 @@
+#!/bin/bash
+
+function main()
+{
+mkdir -p $1
+cd $1
+shift
+
+local module="$2"
+
+#if this script is called with only 2 arguments (so 1 here after the shift), it's for RRC
+#(there may be a better way...)
+if [ $# -eq 2 ]; then
+
+#asn1c does not work well with extension groups, we need the following fix:
+# replace [[ by '<name> SEQUENCE {'
+#     and ]] by '} OPTIONAL'
+#<name> is ext<N> with N starting from 1 and incremented at each new [[ ]] just
+#following another [[ ]]
+#
+#this is what the following C program does
+
+echo generate asnfix.c
+
+cat << EOF > asnfix.c
+/* transforms:
+ * '[[' to 'name SEQUENCE {'
+ * ']]' to '} OPTIONAL'
+ * name is ext1, ext2, ..., for each [[ at the same level
+ * levels are delimited by { and }
+ * -- to end of line is a comment and unprocessed
+ * nested [[ ]] not handled
+ * { and } must be balanced
+ * [[ and ]] can be whatever, every combination is valid
+ */
+#include <stdio.h>
+#include <stdlib.h>
+
+void level(int toplevel)
+{
+  int c;
+  int next_name = 1;
+
+  while (1) {
+    c = getchar();
+next:
+    if (c == EOF) { if (toplevel) break; abort(); }
+
+    if (c == '-') {
+      c = getchar();
+      if (c != '-') { putchar('-'); goto next; }
+      putchar(c); putchar(c);
+      while (1) {
+        c = getchar(); if (c == EOF) abort();
+        putchar(c);
+        if (c == '\n') break;
+      }
+      continue;
+    }
+
+    if (c == '[') {
+      c = getchar();
+      if (c != '[') { putchar('['); goto next; }
+      printf("ext%d SEQUENCE {", next_name);
+      next_name++;
+      continue;
+    }
+
+    if (c == ']') {
+      c = getchar();
+      if (c != ']') { putchar(']'); goto next; }
+      printf("} OPTIONAL");
+      continue;
+    }
+
+    putchar(c);
+    if (c == '}') { if (toplevel) abort(); break; }
+    if (c == '{') level(0);
+  }
+}
+
+int main(void)
+{
+  level(1);
+  fflush(stdout);
+  return 0;
+}
+EOF
+
+echo compile asnfix.c
+
+gcc -Wall -o asnfix asnfix.c
+
+echo run asnfix on $1
+
+./asnfix < $1 > fixed_grammar.asn
+
+rm -f asnfix asnfix.c
+
+echo done with asnfix
+
+echo running asn1c
+
+case "$module" in
+  RRC )
+  	asn1c -gen-PER -fcompound-names -no-gen-example fixed_grammar.asn 2>&1 | grep -v -- '->' | grep -v '^Compiled' |grep -v sample	
+  ;;
+  NR_RRC )
+    	export ASN1C_PREFIX=NR_
+  	asn1c -gen-PER -fcompound-names -no-gen-example fixed_grammar.asn 2>&1 | grep -v -- '->' | grep -v '^Compiled' |grep -v sample	
+  ;;
+  S1AP )
+	export ASN1C_PREFIX=S1AP_
+  	asn1c -gen-PER -fcompound-names -no-gen-example fixed_grammar.asn 2>&1 | grep -v -- '->' | grep -v '^Compiled' |grep -v sample	
+  ;;
+esac
+
+
+
+rm -f fixed_grammar.asn
+
+echo asn1c done
+
+else
+
+case "$module" in
+  RRC )
+  	asn1c -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example $* 2>&1 | grep -v -- '->' | grep -v '^Compiled' |grep -v sample
+  ;; 
+  NR_RRC )
+    	export ASN1C_PREFIX=NR_
+  	asn1c -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example $* 2>&1 | grep -v -- '->' | grep -v '^Compiled' |grep -v sample	
+  ;;
+  S1AP )
+    	export ASN1C_PREFIX=S1AP_
+    	asn1c -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example $* 2>&1 | grep -v -- '->' | grep -v '^Compiled' |grep -v sample	
+  ;;
+esac
+
+fi
+
+awk ' 
+  BEGIN { 
+     print "#ifndef __ASN1_CONSTANTS_H__"
+     print "#define __ASN1_CONSTANTS_H__"
+  }  
+  /INTEGER ::=/ { 
+     gsub("INTEGER ::=","")
+     gsub("--","//")
+     gsub("-1","_minus_1")
+     gsub("-","_")
+     printf("#define %s\n",$0)
+  } 
+  /::=.*INTEGER.*[(]/ {
+     nb_fields=split($0,val,"[:=().]+");
+     gsub("-","_",val[1]);
+     printf("#define min_val_%s %s\n",val[1],val[nb_fields-2]);
+     printf("#define max_val_%s %s\n",val[1],val[nb_fields-1]);
+  }
+  END {
+     print "#endif ";
+  } ' $1  > asn1_constants.h
+}
+
+main "$@"
diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_decoding.c b/openair1/PHY/LTE_TRANSPORT/dlsch_decoding.c
new file mode 100644
index 0000000000000000000000000000000000000000..4020056e6f68043a591f4792c287202e49a76ab0
--- /dev/null
+++ b/openair1/PHY/LTE_TRANSPORT/dlsch_decoding.c
@@ -0,0 +1,748 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+/*! \file PHY/LTE_TRANSPORT/dlsch_decoding.c
+* \brief Top-level routines for decoding  Turbo-coded (DLSCH) transport channels from 36-212, V8.6 2009-03
+* \author R. Knopp
+* \date 2011
+* \version 0.1
+* \company Eurecom
+* \email: knopp@eurecom.fr
+* \note
+* \warning
+*/
+
+//#include "defs.h"
+#include "PHY/defs.h"
+#include "PHY/extern.h"
+#include "PHY/CODING/extern.h"
+#include "SCHED/extern.h"
+#include "SIMULATION/TOOLS/defs.h"
+//#define DEBUG_DLSCH_DECODING
+//#define UE_DEBUG_TRACE 1
+
+
+void free_ue_dlsch(LTE_UE_DLSCH_t *dlsch)
+{
+
+  int i,r;
+
+  if (dlsch) {
+    for (i=0; i<dlsch->Mdlharq; i++) {
+      if (dlsch->harq_processes[i]) {
+        if (dlsch->harq_processes[i]->b) {
+          free16(dlsch->harq_processes[i]->b,MAX_DLSCH_PAYLOAD_BYTES);
+          dlsch->harq_processes[i]->b = NULL;
+        }
+
+        for (r=0; r<MAX_NUM_DLSCH_SEGMENTS; r++) {
+          free16(dlsch->harq_processes[i]->c[r],((r==0)?8:0) + 3+768);
+          dlsch->harq_processes[i]->c[r] = NULL;
+        }
+
+        for (r=0; r<MAX_NUM_DLSCH_SEGMENTS; r++)
+          if (dlsch->harq_processes[i]->d[r]) {
+            free16(dlsch->harq_processes[i]->d[r],((3*8*6144)+12+96)*sizeof(short));
+            dlsch->harq_processes[i]->d[r] = NULL;
+          }
+
+        free16(dlsch->harq_processes[i],sizeof(LTE_DL_UE_HARQ_t));
+        dlsch->harq_processes[i] = NULL;
+      }
+    }
+
+    free16(dlsch,sizeof(LTE_UE_DLSCH_t));
+    dlsch = NULL;
+  }
+}
+
+LTE_UE_DLSCH_t *new_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint32_t Nsoft,uint8_t max_turbo_iterations,uint8_t N_RB_DL, uint8_t abstraction_flag)
+{
+
+  LTE_UE_DLSCH_t *dlsch;
+  uint8_t exit_flag = 0,i,r;
+
+  unsigned char bw_scaling =1;
+
+  switch (N_RB_DL) {
+  case 6:
+    bw_scaling =16;
+    break;
+
+  case 25:
+    bw_scaling =4;
+    break;
+
+  case 50:
+    bw_scaling =2;
+    break;
+
+  default:
+    bw_scaling =1;
+    break;
+  }
+
+  dlsch = (LTE_UE_DLSCH_t *)malloc16(sizeof(LTE_UE_DLSCH_t));
+
+  if (dlsch) {
+    memset(dlsch,0,sizeof(LTE_UE_DLSCH_t));
+    dlsch->Kmimo = Kmimo;
+    dlsch->Mdlharq = Mdlharq;
+    dlsch->Nsoft = Nsoft;
+    dlsch->max_turbo_iterations = max_turbo_iterations;
+
+    for (i=0; i<Mdlharq; i++) {
+      //      printf("new_ue_dlsch: Harq process %d\n",i);
+      dlsch->harq_processes[i] = (LTE_DL_UE_HARQ_t *)malloc16(sizeof(LTE_DL_UE_HARQ_t));
+
+      if (dlsch->harq_processes[i]) {
+        memset(dlsch->harq_processes[i],0,sizeof(LTE_DL_UE_HARQ_t));
+        dlsch->harq_processes[i]->first_tx=1;
+        dlsch->harq_processes[i]->b = (uint8_t*)malloc16(MAX_DLSCH_PAYLOAD_BYTES/bw_scaling);
+
+        if (dlsch->harq_processes[i]->b)
+          memset(dlsch->harq_processes[i]->b,0,MAX_DLSCH_PAYLOAD_BYTES/bw_scaling);
+        else
+          exit_flag=3;
+
+        if (abstraction_flag == 0) {
+          for (r=0; r<MAX_NUM_DLSCH_SEGMENTS/bw_scaling; r++) {
+            dlsch->harq_processes[i]->c[r] = (uint8_t*)malloc16(((r==0)?8:0) + 3+ 768);
+
+            if (dlsch->harq_processes[i]->c[r])
+              memset(dlsch->harq_processes[i]->c[r],0,((r==0)?8:0) + 3+ 768);
+            else
+              exit_flag=2;
+
+            dlsch->harq_processes[i]->d[r] = (short*)malloc16(((3*8*6144)+12+96)*sizeof(short));
+
+            if (dlsch->harq_processes[i]->d[r])
+              memset(dlsch->harq_processes[i]->d[r],0,((3*8*6144)+12+96)*sizeof(short));
+            else
+              exit_flag=2;
+          }
+        }
+      } else {
+        exit_flag=1;
+      }
+    }
+
+    if (exit_flag==0)
+      return(dlsch);
+  }
+
+  printf("new_ue_dlsch with size %zu: exit_flag = %u\n",sizeof(LTE_DL_UE_HARQ_t), exit_flag);
+  free_ue_dlsch(dlsch);
+
+  return(NULL);
+}
+
+uint32_t  dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
+                         short *dlsch_llr,
+                         LTE_DL_FRAME_PARMS *frame_parms,
+                         LTE_UE_DLSCH_t *dlsch,
+                         LTE_DL_UE_HARQ_t *harq_process,
+                         uint32_t frame,
+                         uint8_t subframe,
+                         uint8_t harq_pid,
+                         uint8_t is_crnti,
+                         uint8_t llr8_flag)
+{
+
+#if UE_TIMING_TRACE
+  time_stats_t *dlsch_rate_unmatching_stats=&phy_vars_ue->dlsch_rate_unmatching_stats;
+  time_stats_t *dlsch_turbo_decoding_stats=&phy_vars_ue->dlsch_turbo_decoding_stats;
+  time_stats_t *dlsch_deinterleaving_stats=&phy_vars_ue->dlsch_deinterleaving_stats;
+#endif
+  uint32_t A,E;
+  uint32_t G;
+  uint32_t ret,offset;
+  uint16_t iind;
+  //  uint8_t dummy_channel_output[(3*8*block_length)+12];
+  short dummy_w[MAX_NUM_DLSCH_SEGMENTS][3*(6144+64)];
+  uint32_t r,r_offset=0,Kr,Kr_bytes,err_flag=0;
+  uint8_t crc_type;
+#ifdef DEBUG_DLSCH_DECODING
+  uint16_t i;
+#endif
+  //#ifdef __AVX2__
+#if 0
+  int Kr_last,skipped_last=0;
+  uint8_t (*tc_2cw)(int16_t *y,
+		    int16_t *y2,
+		    uint8_t *,
+		    uint8_t *,
+		    uint16_t,
+		    uint16_t,
+		    uint16_t,
+		    uint8_t,
+		    uint8_t,
+		    uint8_t,
+		    time_stats_t *,
+		    time_stats_t *,
+		    time_stats_t *,
+		    time_stats_t *,
+		    time_stats_t *,
+		    time_stats_t *,
+		    time_stats_t *);
+
+#endif
+decoder_if_t tc;
+
+
+
+
+  if (!dlsch_llr) {
+    printf("dlsch_decoding.c: NULL dlsch_llr pointer\n");
+    return(dlsch->max_turbo_iterations);
+  }
+
+  if (!harq_process) {
+    printf("dlsch_decoding.c: NULL harq_process pointer\n");
+    return(dlsch->max_turbo_iterations);
+  }
+
+  if (!frame_parms) {
+    printf("dlsch_decoding.c: NULL frame_parms pointer\n");
+    return(dlsch->max_turbo_iterations);
+  }
+
+  if (subframe>9) {
+    printf("dlsch_decoding.c: Illegal subframe index %d\n",subframe);
+    return(dlsch->max_turbo_iterations);
+  }
+
+  if (dlsch->harq_ack[subframe].ack != 2) {
+    LOG_D(PHY, "[UE %d] DLSCH @ SF%d : ACK bit is %d instead of DTX even before PDSCH is decoded!\n",
+        phy_vars_ue->Mod_id, subframe, dlsch->harq_ack[subframe].ack);
+  }
+
+  if (llr8_flag == 0) {
+    //#ifdef __AVX2__
+#if 0
+    tc_2cw = phy_threegpplte_turbo_decoder16avx2;
+#endif
+    tc = decoder16;
+  }
+  else
+  {
+	  AssertFatal (harq_process->TBS >= 256 , "Mismatch flag nbRB=%d TBS=%d mcs=%d Qm=%d RIV=%d round=%d \n",
+			  harq_process->nb_rb, harq_process->TBS,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round);
+	    tc = decoder8;
+  }
+
+
+  //  nb_rb = dlsch->nb_rb;
+
+  /*
+  if (nb_rb > frame_parms->N_RB_DL) {
+    printf("dlsch_decoding.c: Illegal nb_rb %d\n",nb_rb);
+    return(max_turbo_iterations);
+    }*/
+
+  /*harq_pid = dlsch->current_harq_pid[phy_vars_ue->current_thread_id[subframe]];
+  if (harq_pid >= 8) {
+    printf("dlsch_decoding.c: Illegal harq_pid %d\n",harq_pid);
+    return(max_turbo_iterations);
+  }
+  */
+
+  harq_process->trials[harq_process->round]++;
+
+  A = harq_process->TBS; //2072 for QPSK 1/3
+
+  ret = dlsch->max_turbo_iterations;
+
+
+  G = harq_process->G;
+  //get_G(frame_parms,nb_rb,dlsch->rb_alloc,mod_order,num_pdcch_symbols,phy_vars_ue->frame,subframe);
+
+  //  printf("DLSCH Decoding, harq_pid %d Ndi %d\n",harq_pid,harq_process->Ndi);
+
+  if (harq_process->round == 0) {
+    // This is a new packet, so compute quantities regarding segmentation
+    harq_process->B = A+24;
+    lte_segmentation(NULL,
+                     NULL,
+                     harq_process->B,
+                     &harq_process->C,
+                     &harq_process->Cplus,
+                     &harq_process->Cminus,
+                     &harq_process->Kplus,
+                     &harq_process->Kminus,
+                     &harq_process->F);
+    //  CLEAR LLR's HERE for first packet in process
+  }
+
+  /*
+  else {
+    printf("dlsch_decoding.c: Ndi>0 not checked yet!!\n");
+    return(max_turbo_iterations);
+  }
+  */
+  err_flag = 0;
+  r_offset = 0;
+
+  unsigned char bw_scaling =1;
+
+  switch (frame_parms->N_RB_DL) {
+  case 6:
+    bw_scaling =16;
+    break;
+
+  case 25:
+    bw_scaling =4;
+    break;
+
+  case 50:
+    bw_scaling =2;
+    break;
+
+  default:
+    bw_scaling =1;
+    break;
+  }
+
+  if (harq_process->C > MAX_NUM_DLSCH_SEGMENTS/bw_scaling) {
+    LOG_E(PHY,"Illegal harq_process->C %d > %d\n",harq_process->C,MAX_NUM_DLSCH_SEGMENTS/bw_scaling);
+    return((1+dlsch->max_turbo_iterations));
+  }
+#ifdef DEBUG_DLSCH_DECODING
+  printf("Segmentation: C %d, Cminus %d, Kminus %d, Kplus %d\n",harq_process->C,harq_process->Cminus,harq_process->Kminus,harq_process->Kplus);
+#endif
+
+  opp_enabled=1;
+
+  for (r=0; r<harq_process->C; r++) {
+
+
+    // Get Turbo interleaver parameters
+    if (r<harq_process->Cminus)
+      Kr = harq_process->Kminus;
+    else
+      Kr = harq_process->Kplus;
+
+    Kr_bytes = Kr>>3;
+
+    if (Kr_bytes<=64)
+      iind = (Kr_bytes-5);
+    else if (Kr_bytes <=128)
+      iind = 59 + ((Kr_bytes-64)>>1);
+    else if (Kr_bytes <= 256)
+      iind = 91 + ((Kr_bytes-128)>>2);
+    else if (Kr_bytes <= 768)
+      iind = 123 + ((Kr_bytes-256)>>3);
+    else {
+      printf("dlsch_decoding: Illegal codeword size %d!!!\n",Kr_bytes);
+      return(dlsch->max_turbo_iterations);
+    }
+
+#ifdef DEBUG_DLSCH_DECODING
+    printf("f1 %d, f2 %d, F %d\n",f1f2mat_old[2*iind],f1f2mat_old[1+(2*iind)],(r==0) ? harq_process->F : 0);
+#endif
+
+#if UE_TIMING_TRACE
+    start_meas(dlsch_rate_unmatching_stats);
+#endif
+    memset(&dummy_w[r][0],0,3*(6144+64)*sizeof(short));
+    harq_process->RTC[r] = generate_dummy_w(4+(Kr_bytes*8),
+                                            (uint8_t*) &dummy_w[r][0],
+                                            (r==0) ? harq_process->F : 0);
+
+#ifdef DEBUG_DLSCH_DECODING
+    LOG_D(PHY,"HARQ_PID %d Rate Matching Segment %d (coded bits %d,unpunctured/repeated bits %d, TBS %d, mod_order %d, nb_rb %d, Nl %d, rv %d, round %d)...\n",
+          harq_pid,r, G,
+          Kr*3,
+          harq_process->TBS,
+          harq_process->Qm,
+          harq_process->nb_rb,
+          harq_process->Nl,
+          harq_process->rvidx,
+          harq_process->round);
+#endif
+
+#ifdef DEBUG_DLSCH_DECODING
+    printf(" in decoding dlsch->harq_processes[harq_pid]->rvidx = %d\n", dlsch->harq_processes[harq_pid]->rvidx);
+#endif
+    if (lte_rate_matching_turbo_rx(harq_process->RTC[r],
+                                   G,
+                                   harq_process->w[r],
+                                   (uint8_t*)&dummy_w[r][0],
+                                   dlsch_llr+r_offset,
+                                   harq_process->C,
+                                   dlsch->Nsoft,
+                                   dlsch->Mdlharq,
+                                   dlsch->Kmimo,
+                                   harq_process->rvidx,
+                                   (harq_process->round==0)?1:0,
+                                   harq_process->Qm,
+                                   harq_process->Nl,
+                                   r,
+                                   &E)==-1) {
+#if UE_TIMING_TRACE
+      stop_meas(dlsch_rate_unmatching_stats);
+#endif
+      LOG_E(PHY,"dlsch_decoding.c: Problem in rate_matching\n");
+      return(dlsch->max_turbo_iterations);
+    } else
+    {
+#if UE_TIMING_TRACE
+      stop_meas(dlsch_rate_unmatching_stats);
+#endif
+    }
+    r_offset += E;
+
+    /*
+    printf("Subblock deinterleaving, d %p w %p\n",
+     harq_process->d[r],
+     harq_process->w);
+    */
+#if UE_TIMING_TRACE
+    start_meas(dlsch_deinterleaving_stats);
+#endif
+    sub_block_deinterleaving_turbo(4+Kr,
+                                   &harq_process->d[r][96],
+
+                                   harq_process->w[r]);
+#if UE_TIMING_TRACE
+    stop_meas(dlsch_deinterleaving_stats);
+#endif
+#ifdef DEBUG_DLSCH_DECODING
+    /*
+    if (r==0) {
+              write_output("decoder_llr.m","decllr",dlsch_llr,G,1,0);
+              write_output("decoder_in.m","dec",&harq_process->d[0][96],(3*8*Kr_bytes)+12,1,0);
+    }
+
+    printf("decoder input(segment %d) :",r);
+    int i; for (i=0;i<(3*8*Kr_bytes)+12;i++)
+      printf("%d : %d\n",i,harq_process->d[r][96+i]);
+      printf("\n");*/
+#endif
+
+
+    //    printf("Clearing c, %p\n",harq_process->c[r]);
+    memset(harq_process->c[r],0,Kr_bytes);
+
+    //    printf("done\n");
+    if (harq_process->C == 1)
+      crc_type = CRC24_A;
+    else
+      crc_type = CRC24_B;
+
+    /*
+    printf("decoder input(segment %d)\n",r);
+    for (i=0;i<(3*8*Kr_bytes)+12;i++)
+      if ((harq_process->d[r][96+i]>7) ||
+    (harq_process->d[r][96+i] < -8))
+    printf("%d : %d\n",i,harq_process->d[r][96+i]);
+    printf("\n");
+    */
+
+    //#ifndef __AVX2__
+#if 1
+    if (err_flag == 0) {
+/*
+        LOG_I(PHY, "turbo algo Kr=%d cb_cnt=%d C=%d nbRB=%d crc_type %d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d maxIter %d\n",
+                            Kr,r,harq_process->C,harq_process->nb_rb,crc_type,A,harq_process->TBS,
+                            harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round,dlsch->max_turbo_iterations);
+*/
+    	if (llr8_flag) {
+    		AssertFatal (Kr >= 256, "turbo algo issue Kr=%d cb_cnt=%d C=%d nbRB=%d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d\n",
+    				Kr,r,harq_process->C,harq_process->nb_rb,A,harq_process->TBS,harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round);
+    	}
+#if UE_TIMING_TRACE
+        start_meas(dlsch_turbo_decoding_stats);
+#endif
+      LOG_D(PHY,"AbsSubframe %d.%d Start turbo segment %d/%d \n",frame%1024,subframe,r,harq_process->C-1);
+      ret = tc
+            (&harq_process->d[r][96],
+             NULL,
+             harq_process->c[r],
+             NULL,
+             Kr,
+             f1f2mat_old[iind*2],
+             f1f2mat_old[(iind*2)+1],
+             dlsch->max_turbo_iterations,
+             crc_type,
+             (r==0) ? harq_process->F : 0,
+             &phy_vars_ue->dlsch_tc_init_stats,
+             &phy_vars_ue->dlsch_tc_alpha_stats,
+             &phy_vars_ue->dlsch_tc_beta_stats,
+             &phy_vars_ue->dlsch_tc_gamma_stats,
+             &phy_vars_ue->dlsch_tc_ext_stats,
+             &phy_vars_ue->dlsch_tc_intl1_stats,
+             &phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1);
+
+#if UE_TIMING_TRACE
+      stop_meas(dlsch_turbo_decoding_stats);
+#endif
+    }
+#else
+    if ((harq_process->C == 1) ||
+	((r==harq_process->C-1) && (skipped_last==0))) { // last segment with odd number of segments
+
+#if UE_TIMING_TRACE
+        start_meas(dlsch_turbo_decoding_stats);
+#endif
+      ret = tc
+            (&harq_process->d[r][96],
+             harq_process->c[r],
+             Kr,
+             f1f2mat_old[iind*2],
+             f1f2mat_old[(iind*2)+1],
+             dlsch->max_turbo_iterations,
+             crc_type,
+             (r==0) ? harq_process->F : 0,
+             &phy_vars_ue->dlsch_tc_init_stats,
+             &phy_vars_ue->dlsch_tc_alpha_stats,
+             &phy_vars_ue->dlsch_tc_beta_stats,
+             &phy_vars_ue->dlsch_tc_gamma_stats,
+             &phy_vars_ue->dlsch_tc_ext_stats,
+             &phy_vars_ue->dlsch_tc_intl1_stats,
+             &phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1);
+ #if UE_TIMING_TRACE
+      stop_meas(dlsch_turbo_decoding_stats);
+#endif
+      //      printf("single decode, exit\n");
+      //      exit(-1);
+    }
+    else {
+    // we can merge code segments
+      if ((skipped_last == 0) && (r<harq_process->C-1)) {
+	skipped_last = 1;
+	Kr_last = Kr;
+      }
+      else {
+	skipped_last=0;
+
+	if (Kr_last == Kr) { // decode 2 code segments with AVX2 version
+#ifdef DEBUG_DLSCH_DECODING
+	  printf("single decoding segment %d (%p)\n",r-1,&harq_process->d[r-1][96]);
+#endif
+#if UE_TIMING_TRACE
+	  start_meas(dlsch_turbo_decoding_stats);
+#endif
+#ifdef DEBUG_DLSCH_DECODING
+	  printf("double decoding segments %d,%d (%p,%p)\n",r-1,r,&harq_process->d[r-1][96],&harq_process->d[r][96]);
+#endif
+	  ret = tc_2cw
+            (&harq_process->d[r-1][96],
+	     &harq_process->d[r][96],
+             harq_process->c[r-1],
+             harq_process->c[r],
+             Kr,
+             f1f2mat_old[iind*2],
+             f1f2mat_old[(iind*2)+1],
+             dlsch->max_turbo_iterations,
+             crc_type,
+             (r==0) ? harq_process->F : 0,
+             &phy_vars_ue->dlsch_tc_init_stats,
+             &phy_vars_ue->dlsch_tc_alpha_stats,
+             &phy_vars_ue->dlsch_tc_beta_stats,
+             &phy_vars_ue->dlsch_tc_gamma_stats,
+             &phy_vars_ue->dlsch_tc_ext_stats,
+             &phy_vars_ue->dlsch_tc_intl1_stats,
+             &phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1);
+	  /*
+	  ret = tc
+            (&harq_process->d[r-1][96],
+             harq_process->c[r-1],
+             Kr_last,
+             f1f2mat_old[iind*2],
+             f1f2mat_old[(iind*2)+1],
+             dlsch->max_turbo_iterations,
+             crc_type,
+             (r==0) ? harq_process->F : 0,
+             &phy_vars_ue->dlsch_tc_init_stats,
+             &phy_vars_ue->dlsch_tc_alpha_stats,
+             &phy_vars_ue->dlsch_tc_beta_stats,
+             &phy_vars_ue->dlsch_tc_gamma_stats,
+             &phy_vars_ue->dlsch_tc_ext_stats,
+             &phy_vars_ue->dlsch_tc_intl1_stats,
+             &phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1);
+
+	     exit(-1);*/
+#if UE_TIMING_TRACE
+      stop_meas(dlsch_turbo_decoding_stats);
+#endif
+	}
+	else { // Kr_last != Kr
+#if UE_TIMING_TRACE
+	  start_meas(dlsch_turbo_decoding_stats);
+#endif
+	  ret = tc
+            (&harq_process->d[r-1][96],
+             harq_process->c[r-1],
+             Kr_last,
+             f1f2mat_old[iind*2],
+             f1f2mat_old[(iind*2)+1],
+             dlsch->max_turbo_iterations,
+             crc_type,
+             (r==0) ? harq_process->F : 0,
+             &phy_vars_ue->dlsch_tc_init_stats,
+             &phy_vars_ue->dlsch_tc_alpha_stats,
+             &phy_vars_ue->dlsch_tc_beta_stats,
+             &phy_vars_ue->dlsch_tc_gamma_stats,
+             &phy_vars_ue->dlsch_tc_ext_stats,
+             &phy_vars_ue->dlsch_tc_intl1_stats,
+             &phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1);
+#if UE_TIMING_TRACE
+      stop_meas(dlsch_turbo_decoding_stats);
+
+	  start_meas(dlsch_turbo_decoding_stats);
+#endif
+
+	  ret = tc
+            (&harq_process->d[r][96],
+             harq_process->c[r],
+             Kr,
+             f1f2mat_old[iind*2],
+             f1f2mat_old[(iind*2)+1],
+             dlsch->max_turbo_iterations,
+             crc_type,
+             (r==0) ? harq_process->F : 0,
+             &phy_vars_ue->dlsch_tc_init_stats,
+             &phy_vars_ue->dlsch_tc_alpha_stats,
+             &phy_vars_ue->dlsch_tc_beta_stats,
+             &phy_vars_ue->dlsch_tc_gamma_stats,
+             &phy_vars_ue->dlsch_tc_ext_stats,
+             &phy_vars_ue->dlsch_tc_intl1_stats,
+             &phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1);
+
+#if UE_TIMING_TRACE
+
+	  stop_meas(dlsch_turbo_decoding_stats);
+
+	  /*printf("Segmentation: C %d r %d, dlsch_rate_unmatching_stats %5.3f dlsch_deinterleaving_stats %5.3f  dlsch_turbo_decoding_stats %5.3f \n",
+              harq_process->C,
+              r,
+              dlsch_rate_unmatching_stats->p_time/(cpuf*1000.0),
+              dlsch_deinterleaving_stats->p_time/(cpuf*1000.0),
+              dlsch_turbo_decoding_stats->p_time/(cpuf*1000.0));*/
+#endif
+	}
+      }
+    }
+#endif
+
+
+    if ((err_flag == 0) && (ret>=(1+dlsch->max_turbo_iterations))) {// a Code segment is in error so break;
+      LOG_D(PHY,"AbsSubframe %d.%d CRC failed, segment %d/%d \n",frame%1024,subframe,r,harq_process->C-1);
+      err_flag = 1;
+    }
+  }
+
+  int32_t frame_rx_prev = frame;
+  int32_t subframe_rx_prev = subframe - 1;
+  if (subframe_rx_prev < 0) {
+    frame_rx_prev--;
+    subframe_rx_prev += 10;
+  }
+  frame_rx_prev = frame_rx_prev%1024;
+
+  if (err_flag == 1) {
+#if UE_DEBUG_TRACE
+    LOG_I(PHY,"[UE %d] DLSCH: Setting NAK for SFN/SF %d/%d (pid %d, status %d, round %d, TBS %d, mcs %d) Kr %d r %d harq_process->round %d\n",
+        phy_vars_ue->Mod_id, frame, subframe, harq_pid,harq_process->status, harq_process->round,harq_process->TBS,harq_process->mcs,Kr,r,harq_process->round);
+#endif
+    dlsch->harq_ack[subframe].ack = 0;
+    dlsch->harq_ack[subframe].harq_id = harq_pid;
+    dlsch->harq_ack[subframe].send_harq_status = 1;
+    harq_process->errors[harq_process->round]++;
+    harq_process->round++;
+
+
+    //    printf("Rate: [UE %d] DLSCH: Setting NACK for subframe %d (pid %d, round %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round);
+    if (harq_process->round >= dlsch->Mdlharq) {
+      harq_process->status = SCH_IDLE;
+      harq_process->round  = 0;
+    }
+    if(is_crnti)
+    {
+    LOG_D(PHY,"[UE %d] DLSCH: Setting NACK for subframe %d (pid %d, pid status %d, round %d/Max %d, TBS %d)\n",
+               phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->status,harq_process->round,dlsch->Mdlharq,harq_process->TBS);
+    }
+
+    return((1+dlsch->max_turbo_iterations));
+  } else {
+#if UE_DEBUG_TRACE
+      LOG_I(PHY,"[UE %d] DLSCH: Setting ACK for subframe %d TBS %d mcs %d nb_rb %d\n",
+           phy_vars_ue->Mod_id,subframe,harq_process->TBS,harq_process->mcs,harq_process->nb_rb);
+#endif
+
+    harq_process->status = SCH_IDLE;
+    harq_process->round  = 0;
+    dlsch->harq_ack[subframe].ack = 1;
+    dlsch->harq_ack[subframe].harq_id = harq_pid;
+    dlsch->harq_ack[subframe].send_harq_status = 1;
+    //LOG_I(PHY,"[UE %d] DLSCH: Setting ACK for SFN/SF %d/%d (pid %d, status %d, round %d, TBS %d, mcs %d)\n",
+      //  phy_vars_ue->Mod_id, frame, subframe, harq_pid, harq_process->status, harq_process->round,harq_process->TBS,harq_process->mcs);
+
+    if(is_crnti)
+    {
+    LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for subframe %d (pid %d, round %d, TBS %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round,harq_process->TBS);
+    }
+    //LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for subframe %d (pid %d, round %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round);
+
+  }
+
+  // Reassembly of Transport block here
+  offset = 0;
+
+  /*
+  printf("harq_pid %d\n",harq_pid);
+  printf("F %d, Fbytes %d\n",harq_process->F,harq_process->F>>3);
+  printf("C %d\n",harq_process->C);
+  */
+  for (r=0; r<harq_process->C; r++) {
+    if (r<harq_process->Cminus)
+      Kr = harq_process->Kminus;
+    else
+      Kr = harq_process->Kplus;
+
+    Kr_bytes = Kr>>3;
+
+    //    printf("Segment %d : Kr= %d bytes\n",r,Kr_bytes);
+    if (r==0) {
+      memcpy(harq_process->b,
+             &harq_process->c[0][(harq_process->F>>3)],
+             Kr_bytes - (harq_process->F>>3)- ((harq_process->C>1)?3:0));
+      offset = Kr_bytes - (harq_process->F>>3) - ((harq_process->C>1)?3:0);
+      //            printf("copied %d bytes to b sequence (harq_pid %d)\n",
+      //          Kr_bytes - (harq_process->F>>3),harq_pid);
+      //          printf("b[0] = %x,c[%d] = %x\n",
+      //      harq_process->b[0],
+      //      harq_process->F>>3,
+      //      harq_process->c[0][(harq_process->F>>3)]);
+    } else {
+      memcpy(harq_process->b+offset,
+             harq_process->c[r],
+             Kr_bytes- ((harq_process->C>1)?3:0));
+      offset += (Kr_bytes - ((harq_process->C>1)?3:0));
+    }
+  }
+
+  dlsch->last_iteration_cnt = ret;
+
+  return(ret);
+}
+
diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c
new file mode 100644
index 0000000000000000000000000000000000000000..6284f279d895d04590cf44ca3d792c1fcda40f47
--- /dev/null
+++ b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c
@@ -0,0 +1,6155 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+/*! \file PHY/LTE_TRANSPORT/dlsch_demodulation.c
+ * \brief Top-level routines for demodulating the PDSCH physical channel from 36-211, V8.6 2009-03
+ * \author R. Knopp, F. Kaltenberger,A. Bhamri, S. Aubert, X. Xiang
+ * \date 2011
+ * \version 0.1
+ * \company Eurecom
+ * \email: knopp@eurecom.fr,florian.kaltenberger@eurecom.fr,ankit.bhamri@eurecom.fr,sebastien.aubert@eurecom.fr
+ * \note
+ * \warning
+ */
+//#include "PHY/defs.h"
+#include "PHY/extern.h"
+#include "SCHED/defs.h"
+#include "defs.h"
+#include "extern.h"
+#include "PHY/sse_intrin.h"
+#include "T.h"
+
+#define NOCYGWIN_STATIC
+
+/* dynamic shift for LLR computation for TM3/4
+ * set as command line argument, see lte-softmodem.c
+ * default value: 0
+ */
+int16_t dlsch_demod_shift = 0;
+int16_t interf_unaw_shift = 13;
+
+//#define DEBUG_HARQ
+
+#define DEBUG_PHY 1
+//#define DEBUG_DLSCH_DEMOD 1
+
+//#define DISABLE_LOG_X
+
+// [MCS][i_mod (0,1,2) = (2,4,6)]
+unsigned char offset_mumimo_llr_drange_fix=0;
+//inferference-free case
+unsigned char interf_unaw_shift_tm4_mcs[29]={5, 3, 4, 3, 3, 2, 1, 1, 2, 0, 1, 1, 1, 1, 0, 0,
+                                             1, 1, 1, 1, 0, 2, 1, 0, 1, 0, 1, 0, 0} ;
+unsigned char interf_unaw_shift_tm1_mcs[29]={5, 5, 4, 3, 3, 3, 2, 2, 4, 4, 2, 3, 3, 3, 1, 1,
+                                             0, 1, 1, 2, 5, 4, 4, 6, 5, 1, 0, 5, 6} ; // mcs 21, 26, 28 seem to be errorneous
+
+/*
+//original values from sebastion + same hand tuning
+unsigned char offset_mumimo_llr_drange[29][3]={{8,8,8},{7,7,7},{7,7,7},{7,7,7},{6,6,6},{6,6,6},{6,6,6},{5,5,5},{4,4,4},{1,2,4}, // QPSK
+{5,5,4},{5,5,5},{5,5,5},{3,3,3},{2,2,2},{2,2,2},{2,2,2}, // 16-QAM
+{2,2,1},{3,3,3},{3,3,3},{3,3,1},{2,2,2},{2,2,2},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}}; //64-QAM
+*/
+ /*
+ //first optimization try
+ unsigned char offset_mumimo_llr_drange[29][3]={{7, 8, 7},{6, 6, 7},{6, 6, 7},{6, 6, 6},{5, 6, 6},{5, 5, 6},{5, 5, 6},{4, 5, 4},{4, 3, 4},{3, 2, 2},{6, 5, 5},{5, 4, 4},{5, 5, 4},{3, 3, 2},{2, 2, 1},{2, 1, 1},{2, 2, 2},{3, 3, 3},{3, 3, 2},{3, 3, 2},{3, 2, 1},{2, 2, 2},{2, 2, 2},{0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0}};
+ */
+ //second optimization try
+ /*
+   unsigned char offset_mumimo_llr_drange[29][3]={{5, 8, 7},{4, 6, 8},{3, 6, 7},{7, 7, 6},{4, 7, 8},{4, 7, 4},{6, 6, 6},{3, 6, 6},{3, 6, 6},{1, 3, 4},{1, 1, 0},{3, 3, 2},{3, 4, 1},{4, 0, 1},{4, 2, 2},{3, 1, 2},{2, 1, 0},{2, 1, 1},{1, 0, 1},{1, 0, 1},{0, 0, 0},{1, 0, 0},{0, 0, 0},{0, 1, 0},{1, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0}};  w
+ */
+unsigned char offset_mumimo_llr_drange[29][3]= {{0, 6, 5},{0, 4, 5},{0, 4, 5},{0, 5, 4},{0, 5, 6},{0, 5, 3},{0, 4, 4},{0, 4, 4},{0, 3, 3},{0, 1, 2},{1, 1, 0},{1, 3, 2},{3, 4, 1},{2, 0, 0},{2, 2, 2},{1, 1, 1},{2, 1, 0},{2, 1, 1},{1, 0, 1},{1, 0, 1},{0, 0, 0},{1, 0, 0},{0, 0, 0},{0, 1, 0},{1, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0}};
+
+
+extern void print_shorts(char *s,int16_t *x);
+
+
+int rx_pdsch(PHY_VARS_UE *ue,
+             PDSCH_t type,
+             unsigned char eNB_id,
+             unsigned char eNB_id_i, //if this == ue->n_connected_eNB, we assume MU interference
+             uint32_t frame,
+             uint8_t subframe,
+             unsigned char symbol,
+             unsigned char first_symbol_flag,
+             RX_type_t rx_type,
+             unsigned char i_mod,
+             unsigned char harq_pid)
+{
+
+  LTE_UE_COMMON *common_vars  = &ue->common_vars;
+  LTE_UE_PDSCH **pdsch_vars;
+  LTE_DL_FRAME_PARMS *frame_parms    = &ue->frame_parms;
+  PHY_MEASUREMENTS *measurements = &ue->measurements;
+  LTE_UE_DLSCH_t   **dlsch;
+
+  int avg[4];
+  int avg_0[2];
+  int avg_1[2];
+
+#if UE_TIMING_TRACE
+  uint8_t slot = 0;
+#endif
+
+  unsigned char aatx,aarx;
+
+  unsigned short nb_rb = 0, round;
+  int avgs = 0, rb;
+  LTE_DL_UE_HARQ_t *dlsch0_harq,*dlsch1_harq = 0;
+
+  uint8_t beamforming_mode;
+  uint32_t *rballoc;
+
+  int32_t **rxdataF_comp_ptr;
+  int32_t **dl_ch_mag_ptr;
+  int32_t codeword_TB0 = -1;
+  int32_t codeword_TB1 = -1;
+
+
+
+  switch (type) {
+  case SI_PDSCH:
+    pdsch_vars = &ue->pdsch_vars_SI[eNB_id];
+    dlsch = &ue->dlsch_SI[eNB_id];
+    dlsch0_harq = dlsch[0]->harq_processes[harq_pid];
+    beamforming_mode  = 0;
+    break;
+
+  case RA_PDSCH:
+    pdsch_vars = &ue->pdsch_vars_ra[eNB_id];
+    dlsch = &ue->dlsch_ra[eNB_id];
+    dlsch0_harq = dlsch[0]->harq_processes[harq_pid];
+    beamforming_mode  = 0;
+    break;
+
+  case PDSCH:
+    pdsch_vars = ue->pdsch_vars[ue->current_thread_id[subframe]];
+    dlsch = ue->dlsch[ue->current_thread_id[subframe]][eNB_id];
+    //printf("status TB0 = %d, status TB1 = %d \n", dlsch[0]->harq_processes[harq_pid]->status, dlsch[1]->harq_processes[harq_pid]->status);
+    LOG_D(PHY,"AbsSubframe %d.%d / Sym %d harq_pid %d,  harq status %d.%d \n",
+                   frame,subframe,symbol,harq_pid,
+                   dlsch[0]->harq_processes[harq_pid]->status,
+                   dlsch[1]->harq_processes[harq_pid]->status);
+
+    if ((dlsch[0]->harq_processes[harq_pid]->status == ACTIVE) &&
+        (dlsch[1]->harq_processes[harq_pid]->status == ACTIVE)){
+      codeword_TB0 = dlsch[0]->harq_processes[harq_pid]->codeword;
+      codeword_TB1 = dlsch[1]->harq_processes[harq_pid]->codeword;
+      dlsch0_harq = dlsch[codeword_TB0]->harq_processes[harq_pid];
+      dlsch1_harq = dlsch[codeword_TB1]->harq_processes[harq_pid];
+#ifdef DEBUG_HARQ
+      printf("[DEMOD] I am assuming both TBs are active\n");
+#endif
+    }
+     else if ((dlsch[0]->harq_processes[harq_pid]->status == ACTIVE) &&
+              (dlsch[1]->harq_processes[harq_pid]->status != ACTIVE) ) {
+      codeword_TB0 = dlsch[0]->harq_processes[harq_pid]->codeword;
+      dlsch0_harq = dlsch[0]->harq_processes[harq_pid];
+      dlsch1_harq = NULL;
+      codeword_TB1 = -1;
+#ifdef DEBUG_HARQ
+      printf("[DEMOD] I am assuming only TB0 is active\n");
+#endif
+    }
+     else if ((dlsch[0]->harq_processes[harq_pid]->status != ACTIVE) &&
+              (dlsch[1]->harq_processes[harq_pid]->status == ACTIVE) ){
+      codeword_TB1 = dlsch[1]->harq_processes[harq_pid]->codeword;
+      dlsch0_harq  = dlsch[1]->harq_processes[harq_pid];
+      dlsch1_harq  = NULL;
+      codeword_TB0 = -1;
+#ifdef DEBUG_HARQ
+      printf("[DEMOD] I am assuming only TB1 is active, it is in cw %d\n", dlsch0_harq->codeword);
+#endif
+    }
+    else {
+      LOG_E(PHY,"[UE][FATAL] Frame %d subframe %d: no active DLSCH\n",ue->proc.proc_rxtx[0].frame_rx,subframe);
+      return(-1);
+    }
+    beamforming_mode  = ue->transmission_mode[eNB_id]<7?0:ue->transmission_mode[eNB_id];
+    break;
+
+  default:
+    LOG_E(PHY,"[UE][FATAL] Frame %d subframe %d: Unknown PDSCH format %d\n",ue->proc.proc_rxtx[0].frame_rx,subframe,type);
+    return(-1);
+    break;
+  }
+#ifdef DEBUG_HARQ
+  printf("[DEMOD] MIMO mode = %d\n", dlsch0_harq->mimo_mode);
+  printf("[DEMOD] cw for TB0 = %d, cw for TB1 = %d\n", codeword_TB0, codeword_TB1);
+#endif
+
+  DevAssert(dlsch0_harq);
+  round = dlsch0_harq->round;
+  //printf("round = %d\n", round);
+
+  if (eNB_id > 2) {
+    LOG_W(PHY,"dlsch_demodulation.c: Illegal eNB_id %d\n",eNB_id);
+    return(-1);
+  }
+
+  if (!common_vars) {
+    LOG_W(PHY,"dlsch_demodulation.c: Null common_vars\n");
+    return(-1);
+  }
+
+  if (!dlsch[0]) {
+    LOG_W(PHY,"dlsch_demodulation.c: Null dlsch_ue pointer\n");
+    return(-1);
+  }
+
+  if (!pdsch_vars) {
+    LOG_W(PHY,"dlsch_demodulation.c: Null pdsch_vars pointer\n");
+    return(-1);
+  }
+
+  if (!frame_parms) {
+    LOG_W(PHY,"dlsch_demodulation.c: Null frame_parms\n");
+    return(-1);
+  }
+
+  if (((frame_parms->Ncp == NORMAL) && (symbol>=7)) ||
+      ((frame_parms->Ncp == EXTENDED) && (symbol>=6)))
+    rballoc = dlsch0_harq->rb_alloc_odd;
+  else
+    rballoc = dlsch0_harq->rb_alloc_even;
+
+
+  if (dlsch0_harq->mimo_mode>DUALSTREAM_PUSCH_PRECODING) {
+    LOG_E(PHY,"This transmission mode is not yet supported!\n");
+    return(-1);
+  }
+
+
+  if ((dlsch0_harq->mimo_mode==LARGE_CDD) || ((dlsch0_harq->mimo_mode>=DUALSTREAM_UNIFORM_PRECODING1) && (dlsch0_harq->mimo_mode<=DUALSTREAM_PUSCH_PRECODING)))  {
+    DevAssert(dlsch1_harq);
+    if (eNB_id!=eNB_id_i) {
+      LOG_E(PHY,"TM3/TM4 requires to set eNB_id==eNB_id_i!\n");
+      return(-1);
+    }
+  }
+
+#if UE_TIMING_TRACE
+  if(symbol > ue->frame_parms.symbols_per_tti>>1)
+  {
+      slot = 1;
+  }
+#endif
+
+#ifdef DEBUG_HARQ
+  printf("Demod  dlsch0_harq->pmi_alloc %d\n",  dlsch0_harq->pmi_alloc);
+#endif
+
+  if (frame_parms->nb_antenna_ports_eNB>1 && beamforming_mode==0) {
+#ifdef DEBUG_DLSCH_MOD
+    LOG_I(PHY,"dlsch: using pmi %x (%p), rb_alloc %x\n",pmi2hex_2Ar1(dlsch0_harq->pmi_alloc),dlsch[0],dlsch0_harq->rb_alloc_even[0]);
+#endif
+
+#if UE_TIMING_TRACE
+    start_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]);
+#endif
+    nb_rb = dlsch_extract_rbs_dual(common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF,
+                                   common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id],
+                                   pdsch_vars[eNB_id]->rxdataF_ext,
+                                   pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+                                   dlsch0_harq->pmi_alloc,
+                                   pdsch_vars[eNB_id]->pmi_ext,
+                                   rballoc,
+                                   symbol,
+                                   subframe,
+                                   ue->high_speed_flag,
+                                   frame_parms,
+                                   dlsch0_harq->mimo_mode);
+#ifdef DEBUG_DLSCH_MOD
+      printf("dlsch: using pmi %lx, rb_alloc %x, pmi_ext ",pmi2hex_2Ar1(dlsch0_harq->pmi_alloc),*rballoc);
+       for (rb=0;rb<nb_rb;rb++)
+          printf("%d",pdsch_vars[eNB_id]->pmi_ext[rb]);
+       printf("\n");
+#endif
+
+   if (rx_type >= rx_IC_single_stream) {
+      if (eNB_id_i<ue->n_connected_eNB) // we are in TM5
+      nb_rb = dlsch_extract_rbs_dual(common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF,
+                                       common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id_i],
+                                       pdsch_vars[eNB_id_i]->rxdataF_ext,
+                                       pdsch_vars[eNB_id_i]->dl_ch_estimates_ext,
+                                       dlsch0_harq->pmi_alloc,
+                                       pdsch_vars[eNB_id_i]->pmi_ext,
+                                       rballoc,
+                                       symbol,
+                                       subframe,
+                                       ue->high_speed_flag,
+                                       frame_parms,
+                                       dlsch0_harq->mimo_mode);
+      else
+        nb_rb = dlsch_extract_rbs_dual(common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF,
+                                       common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id],
+                                       pdsch_vars[eNB_id_i]->rxdataF_ext,
+                                       pdsch_vars[eNB_id_i]->dl_ch_estimates_ext,
+                                       dlsch0_harq->pmi_alloc,
+                                       pdsch_vars[eNB_id_i]->pmi_ext,
+                                       rballoc,
+                                       symbol,
+                                       subframe,
+                                       ue->high_speed_flag,
+                                       frame_parms,
+                                       dlsch0_harq->mimo_mode);
+    }
+  } else if (beamforming_mode==0) { //else if nb_antennas_ports_eNB==1 && beamforming_mode == 0
+    nb_rb = dlsch_extract_rbs_single(common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF,
+                                     common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id],
+                                     pdsch_vars[eNB_id]->rxdataF_ext,
+                                     pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+                                     dlsch0_harq->pmi_alloc,
+                                     pdsch_vars[eNB_id]->pmi_ext,
+                                     rballoc,
+                                     symbol,
+                                     subframe,
+                                     ue->high_speed_flag,
+                                     frame_parms);
+
+   if (rx_type==rx_IC_single_stream) {
+     if (eNB_id_i<ue->n_connected_eNB)
+        nb_rb = dlsch_extract_rbs_single(common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF,
+                                         common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id_i],
+                                         pdsch_vars[eNB_id_i]->rxdataF_ext,
+                                         pdsch_vars[eNB_id_i]->dl_ch_estimates_ext,
+                                         dlsch0_harq->pmi_alloc,
+                                         pdsch_vars[eNB_id_i]->pmi_ext,
+                                         rballoc,
+                                         symbol,
+                                         subframe,
+                                         ue->high_speed_flag,
+                                         frame_parms);
+      else
+        nb_rb = dlsch_extract_rbs_single(common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF,
+                                         common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id],
+                                         pdsch_vars[eNB_id_i]->rxdataF_ext,
+                                         pdsch_vars[eNB_id_i]->dl_ch_estimates_ext,
+                                         dlsch0_harq->pmi_alloc,
+                                         pdsch_vars[eNB_id_i]->pmi_ext,
+                                         rballoc,
+                                         symbol,
+                                         subframe,
+                                         ue->high_speed_flag,
+                                         frame_parms);
+    }
+  } else if (beamforming_mode==7) { //else if beamforming_mode == 7
+    nb_rb = dlsch_extract_rbs_TM7(common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF,
+                                  pdsch_vars[eNB_id]->dl_bf_ch_estimates,
+                                  pdsch_vars[eNB_id]->rxdataF_ext,
+                                  pdsch_vars[eNB_id]->dl_bf_ch_estimates_ext,
+                                  rballoc,
+                                  symbol,
+                                  subframe,
+                                  ue->high_speed_flag,
+                                  frame_parms);
+
+  } else if(beamforming_mode>7) {
+    LOG_W(PHY,"dlsch_demodulation: beamforming mode not supported yet.\n");
+  }
+
+  //printf("nb_rb = %d, eNB_id %d\n",nb_rb,eNB_id);
+  if (nb_rb==0) {
+    //    LOG_D(PHY,"dlsch_demodulation.c: nb_rb=0\n");
+    return(-1);
+  }
+
+
+#if UE_TIMING_TRACE
+    stop_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]);
+#if DISABLE_LOG_X
+    printf("[AbsSFN %d.%d] Slot%d Symbol %d Flag %d type %d: Pilot/Data extraction %5.2f \n",frame,subframe,slot,
+            symbol,ue->high_speed_flag,type,symbol,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0));
+#else
+    LOG_I(PHY, "[AbsSFN %d.%d] Slot%d Symbol %d Flag %d type %d: Pilot/Data extraction  %5.2f \n",frame,subframe,slot,symbol,
+            ue->high_speed_flag,type,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0));
+#endif
+#endif
+
+
+#if UE_TIMING_TRACE
+    start_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]);
+#endif
+  aatx = frame_parms->nb_antenna_ports_eNB;
+  aarx = frame_parms->nb_antennas_rx;
+
+  dlsch_scale_channel(pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+                      frame_parms,
+                      dlsch,
+                      symbol,
+                      nb_rb);
+
+  if ((dlsch0_harq->mimo_mode<DUALSTREAM_UNIFORM_PRECODING1) &&
+      (rx_type==rx_IC_single_stream) &&
+      (eNB_id_i==ue->n_connected_eNB) &&
+      (dlsch0_harq->dl_power_off==0)
+     )  // TM5 two-user
+  {
+    dlsch_scale_channel(pdsch_vars[eNB_id_i]->dl_ch_estimates_ext,
+                        frame_parms,
+                        dlsch,
+                        symbol,
+                        nb_rb);
+  }
+
+#if UE_TIMING_TRACE
+    stop_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]);
+#if DISABLE_LOG_X
+    printf("[AbsSFN %d.%d] Slot%d Symbol %d: Channel Scale %5.2f \n",frame,subframe,slot,symbol,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0));
+#else
+    LOG_I(PHY, "[AbsSFN %d.%d] Slot%d Symbol %d: Channel Scale  %5.2f \n",frame,subframe,slot,symbol,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0));
+#endif
+#endif
+
+#if UE_TIMING_TRACE
+    start_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]);
+#endif
+  if (first_symbol_flag==1) {
+    if (beamforming_mode==0){
+      if (dlsch0_harq->mimo_mode<LARGE_CDD) {
+        dlsch_channel_level(pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+                           frame_parms,
+                           avg,
+                           symbol,
+                           nb_rb);
+        avgs = 0;
+        for (aatx=0;aatx<frame_parms->nb_antenna_ports_eNB;aatx++)
+          for (aarx=0;aarx<frame_parms->nb_antennas_rx;aarx++)
+            avgs = cmax(avgs,avg[(aatx<<1)+aarx]);
+
+        pdsch_vars[eNB_id]->log2_maxh = (log2_approx(avgs)/2)+1;
+     }
+     else if ((dlsch0_harq->mimo_mode == LARGE_CDD) ||
+           ((dlsch0_harq->mimo_mode >=DUALSTREAM_UNIFORM_PRECODING1) &&
+            (dlsch0_harq->mimo_mode <=DUALSTREAM_PUSCH_PRECODING)))
+     {
+      dlsch_channel_level_TM34(pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+                                 frame_parms,
+                                 pdsch_vars[eNB_id]->pmi_ext,
+                                 avg_0,
+                                 avg_1,
+                                 symbol,
+                                 nb_rb,
+                                 dlsch0_harq->mimo_mode);
+
+      LOG_D(PHY,"Channel Level TM34  avg_0 %d, avg_1 %d, rx_type %d, rx_standard %d, dlsch_demod_shift %d \n", avg_0[0],
+              avg_1[0], rx_type, rx_standard, dlsch_demod_shift);
+        if (rx_type>rx_standard) {
+          avg_0[0] = (log2_approx(avg_0[0])/2) + dlsch_demod_shift;// + 2 ;//+ 4;
+          avg_1[0] = (log2_approx(avg_1[0])/2) + dlsch_demod_shift;// + 2 ;//+ 4;
+          pdsch_vars[eNB_id]->log2_maxh0 = cmax(avg_0[0],0);
+          pdsch_vars[eNB_id]->log2_maxh1 = cmax(avg_1[0],0);
+         // printf("dlsch_demod_shift  %d\n", dlsch_demod_shift);
+         }
+          else {
+          avg_0[0] = (log2_approx(avg_0[0])/2) - 13 + interf_unaw_shift;
+          avg_1[0] = (log2_approx(avg_1[0])/2) - 13 + interf_unaw_shift;
+          pdsch_vars[eNB_id]->log2_maxh0 = cmax(avg_0[0],0);
+          pdsch_vars[eNB_id]->log2_maxh1 = cmax(avg_1[0],0);
+        }
+      }
+      else if (dlsch0_harq->mimo_mode<DUALSTREAM_UNIFORM_PRECODING1) {// single-layer precoding (TM5, TM6)
+        if ((rx_type==rx_IC_single_stream) && (eNB_id_i==ue->n_connected_eNB) && (dlsch0_harq->dl_power_off==0)) {
+            dlsch_channel_level_TM56(pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+                                frame_parms,
+                                pdsch_vars[eNB_id]->pmi_ext,
+                                avg,
+                                symbol,
+                                nb_rb);
+            avg[0] = log2_approx(avg[0]) - 13 + offset_mumimo_llr_drange[dlsch0_harq->mcs][(i_mod>>1)-1];
+            pdsch_vars[eNB_id]->log2_maxh = cmax(avg[0],0);
+
+        }
+        else if (dlsch0_harq->dl_power_off==1) { //TM6
+
+          dlsch_channel_level(pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+                                   frame_parms,
+                                   avg,
+                                   symbol,
+                                   nb_rb);
+
+          avgs = 0;
+          for (aatx=0;aatx<frame_parms->nb_antenna_ports_eNB;aatx++)
+            for (aarx=0;aarx<frame_parms->nb_antennas_rx;aarx++)
+              avgs = cmax(avgs,avg[(aatx<<1)+aarx]);
+
+          pdsch_vars[eNB_id]->log2_maxh = (log2_approx(avgs)/2) + 1;
+          pdsch_vars[eNB_id]->log2_maxh++;
+
+        }
+      }
+
+    }
+    else if (beamforming_mode==7)
+       dlsch_channel_level_TM7(pdsch_vars[eNB_id]->dl_bf_ch_estimates_ext,
+                              frame_parms,
+                              avg,
+                              symbol,
+                              nb_rb);
+#ifdef UE_DEBUG_TRACE
+    LOG_D(PHY,"[DLSCH] AbsSubframe %d.%d log2_maxh = %d [log2_maxh0 %d log2_maxh1 %d] (%d,%d)\n",
+            frame%1024,subframe, pdsch_vars[eNB_id]->log2_maxh,
+                                                 pdsch_vars[eNB_id]->log2_maxh0,
+                                                 pdsch_vars[eNB_id]->log2_maxh1,
+                                                 avg[0],avgs);
+    //LOG_D(PHY,"[DLSCH] mimo_mode = %d\n", dlsch0_harq->mimo_mode);
+#endif
+
+    //wait until pdcch is decoded
+    //proc->channel_level = 1;
+  }
+
+  /*
+  uint32_t wait = 0;
+  while(proc->channel_level == 0)
+  {
+      usleep(1);
+      wait++;
+  }
+  */
+
+#if T_TRACER
+    if (type == PDSCH)
+    {
+      T(T_UE_PHY_PDSCH_ENERGY, T_INT(eNB_id), T_INT(frame%1024), T_INT(subframe),
+                               T_INT(avg[0]), T_INT(avg[1]),     T_INT(avg[2]),   T_INT(avg[3]));
+    }
+#endif
+
+#if UE_TIMING_TRACE
+    stop_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]);
+#if DISABLE_LOG_X
+    printf("[AbsSFN %d.%d] Slot%d Symbol %d first_symbol_flag %d: Channel Level %5.2f \n",frame,subframe,slot,symbol,first_symbol_flag,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0));
+#else
+    LOG_I(PHY, "[AbsSFN %d.%d] Slot%d Symbol %d first_symbol_flag %d: Channel Level  %5.2f \n",frame,subframe,slot,symbol,first_symbol_flag,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0));
+#endif
+#endif
+
+
+#if UE_TIMING_TRACE
+    start_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]);
+#endif
+// Now channel compensation
+  if (dlsch0_harq->mimo_mode<LARGE_CDD) {
+    dlsch_channel_compensation(pdsch_vars[eNB_id]->rxdataF_ext,
+                               pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+                               pdsch_vars[eNB_id]->dl_ch_mag0,
+                               pdsch_vars[eNB_id]->dl_ch_magb0,
+                               pdsch_vars[eNB_id]->rxdataF_comp0,
+                               (aatx>1) ? pdsch_vars[eNB_id]->rho : NULL,
+                               frame_parms,
+                               symbol,
+                               first_symbol_flag,
+                               dlsch0_harq->Qm,
+                               nb_rb,
+                               pdsch_vars[eNB_id]->log2_maxh,
+                               measurements); // log2_maxh+I0_shift
+ /*if (symbol == 5) {
+     write_output("rxF_comp_d.m","rxF_c_d",&pdsch_vars[eNB_id]->rxdataF_comp0[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);
+ } */
+    if ((rx_type==rx_IC_single_stream) &&
+        (eNB_id_i<ue->n_connected_eNB)) {
+         dlsch_channel_compensation(pdsch_vars[eNB_id_i]->rxdataF_ext,
+                                 pdsch_vars[eNB_id_i]->dl_ch_estimates_ext,
+                                 pdsch_vars[eNB_id_i]->dl_ch_mag0,
+                                 pdsch_vars[eNB_id_i]->dl_ch_magb0,
+                                 pdsch_vars[eNB_id_i]->rxdataF_comp0,
+                                 (aatx>1) ? pdsch_vars[eNB_id_i]->rho : NULL,
+                                 frame_parms,
+                                 symbol,
+                                 first_symbol_flag,
+                                 i_mod,
+                                 nb_rb,
+                                 pdsch_vars[eNB_id]->log2_maxh,
+                                 measurements); // log2_maxh+I0_shift
+#ifdef DEBUG_PHY
+      if (symbol == 5) {
+        write_output("rxF_comp_d.m","rxF_c_d",&pdsch_vars[eNB_id]->rxdataF_comp0[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);
+        write_output("rxF_comp_i.m","rxF_c_i",&pdsch_vars[eNB_id_i]->rxdataF_comp0[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);
+      }
+#endif
+
+      dlsch_dual_stream_correlation(frame_parms,
+                                    symbol,
+                                    nb_rb,
+                                    pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+                                    pdsch_vars[eNB_id_i]->dl_ch_estimates_ext,
+                                    pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round],
+                                    pdsch_vars[eNB_id]->log2_maxh);
+    }
+  } else if ((dlsch0_harq->mimo_mode == LARGE_CDD) || ((dlsch0_harq->mimo_mode >=DUALSTREAM_UNIFORM_PRECODING1) &&
+            (dlsch0_harq->mimo_mode <=DUALSTREAM_PUSCH_PRECODING))){
+      dlsch_channel_compensation_TM34(frame_parms,
+                                     pdsch_vars[eNB_id],
+                                     measurements,
+                                     eNB_id,
+                                     symbol,
+                                     dlsch0_harq->Qm,
+                                     dlsch1_harq->Qm,
+                                     harq_pid,
+                                     dlsch0_harq->round,
+                                     dlsch0_harq->mimo_mode,
+                                     nb_rb,
+                                     pdsch_vars[eNB_id]->log2_maxh0,
+                                     pdsch_vars[eNB_id]->log2_maxh1);
+  /*   if (symbol == 5) {
+     write_output("rxF_comp_d00.m","rxF_c_d00",&pdsch_vars[eNB_id]->rxdataF_comp0[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);// should be QAM
+     write_output("rxF_comp_d01.m","rxF_c_d01",&pdsch_vars[eNB_id]->rxdataF_comp0[1][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);//should be almost 0
+     write_output("rxF_comp_d10.m","rxF_c_d10",&pdsch_vars[eNB_id]->rxdataF_comp1[harq_pid][round][0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);//should be almost 0
+     write_output("rxF_comp_d11.m","rxF_c_d11",&pdsch_vars[eNB_id]->rxdataF_comp1[harq_pid][round][1][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);//should be QAM
+        } */
+      // compute correlation between signal and interference channels (rho12 and rho21)
+        dlsch_dual_stream_correlation(frame_parms, // this is doing h11'*h12 and h21'*h22
+                                    symbol,
+                                    nb_rb,
+                                    pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+                                    &(pdsch_vars[eNB_id]->dl_ch_estimates_ext[2]),
+                                    pdsch_vars[eNB_id]->dl_ch_rho2_ext,
+                                    pdsch_vars[eNB_id]->log2_maxh0);
+        //printf("rho stream1 =%d\n", &pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round] );
+      //to be optimized (just take complex conjugate)
+      dlsch_dual_stream_correlation(frame_parms, // this is doing h12'*h11 and h22'*h21
+                                    symbol,
+                                    nb_rb,
+                                    &(pdsch_vars[eNB_id]->dl_ch_estimates_ext[2]),
+                                    pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+                                    pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round],
+                                    pdsch_vars[eNB_id]->log2_maxh1);
+    //  printf("rho stream2 =%d\n",&pdsch_vars[eNB_id]->dl_ch_rho2_ext );
+      //printf("TM3 log2_maxh : %d\n",pdsch_vars[eNB_id]->log2_maxh);
+  /*     if (symbol == 5) {
+     write_output("rho0_0.m","rho0_0",&pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round][0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);// should be QAM
+     write_output("rho2_0.m","rho2_0",&pdsch_vars[eNB_id]->dl_ch_rho2_ext[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);//should be almost 0
+     write_output("rho0_1.m.m","rho0_1",&pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round][1][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);//should be almost 0
+     write_output("rho2_1.m","rho2_1",&pdsch_vars[eNB_id]->dl_ch_rho2_ext[1][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);//should be QAM
+        } */
+
+    } else if (dlsch0_harq->mimo_mode<DUALSTREAM_UNIFORM_PRECODING1) {// single-layer precoding (TM5, TM6)
+        if ((rx_type==rx_IC_single_stream) && (eNB_id_i==ue->n_connected_eNB) && (dlsch0_harq->dl_power_off==0)) {
+          dlsch_channel_compensation_TM56(pdsch_vars[eNB_id]->rxdataF_ext,
+                                      pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+                                      pdsch_vars[eNB_id]->dl_ch_mag0,
+                                      pdsch_vars[eNB_id]->dl_ch_magb0,
+                                      pdsch_vars[eNB_id]->rxdataF_comp0,
+                                      pdsch_vars[eNB_id]->pmi_ext,
+                                      frame_parms,
+                                      measurements,
+                                      eNB_id,
+                                      symbol,
+                                      dlsch0_harq->Qm,
+                                      nb_rb,
+                                      pdsch_vars[eNB_id]->log2_maxh,
+                                      dlsch0_harq->dl_power_off);
+
+        for (rb=0; rb<nb_rb; rb++) {
+          switch(pdsch_vars[eNB_id]->pmi_ext[rb]) {
+          case 0:
+            pdsch_vars[eNB_id_i]->pmi_ext[rb]=1;
+            break;
+         case 1:
+            pdsch_vars[eNB_id_i]->pmi_ext[rb]=0;
+            break;
+         case 2:
+            pdsch_vars[eNB_id_i]->pmi_ext[rb]=3;
+            break;
+          case 3:
+            pdsch_vars[eNB_id_i]->pmi_ext[rb]=2;
+            break;
+          }
+       //  if (rb==0)
+        //    printf("pmi %d, pmi_i %d\n",pdsch_vars[eNB_id]->pmi_ext[rb],pdsch_vars[eNB_id_i]->pmi_ext[rb]);
+      }
+      dlsch_channel_compensation_TM56(pdsch_vars[eNB_id_i]->rxdataF_ext,
+                                      pdsch_vars[eNB_id_i]->dl_ch_estimates_ext,
+                                      pdsch_vars[eNB_id_i]->dl_ch_mag0,
+                                      pdsch_vars[eNB_id_i]->dl_ch_magb0,
+                                      pdsch_vars[eNB_id_i]->rxdataF_comp0,
+                                      pdsch_vars[eNB_id_i]->pmi_ext,
+                                      frame_parms,
+                                      measurements,
+                                      eNB_id_i,
+                                      symbol,
+                                      i_mod,
+                                      nb_rb,
+                                      pdsch_vars[eNB_id]->log2_maxh,
+                                      dlsch0_harq->dl_power_off);
+#ifdef DEBUG_PHY
+      if (symbol==5) {
+        write_output("rxF_comp_d.m","rxF_c_d",&pdsch_vars[eNB_id]->rxdataF_comp0[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);
+       write_output("rxF_comp_i.m","rxF_c_i",&pdsch_vars[eNB_id_i]->rxdataF_comp0[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);
+      }
+#endif
+      dlsch_dual_stream_correlation(frame_parms,
+                                    symbol,
+                                    nb_rb,
+                                    pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+                                    pdsch_vars[eNB_id_i]->dl_ch_estimates_ext,
+                                    pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round],
+                                    pdsch_vars[eNB_id]->log2_maxh);
+    }  else if (dlsch0_harq->dl_power_off==1)  {
+      dlsch_channel_compensation_TM56(pdsch_vars[eNB_id]->rxdataF_ext,
+                                      pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+                                      pdsch_vars[eNB_id]->dl_ch_mag0,
+                                      pdsch_vars[eNB_id]->dl_ch_magb0,
+                                      pdsch_vars[eNB_id]->rxdataF_comp0,
+                                      pdsch_vars[eNB_id]->pmi_ext,
+                                      frame_parms,
+                                      measurements,
+                                      eNB_id,
+                                      symbol,
+                                      dlsch0_harq->Qm,
+                                      nb_rb,
+                                      pdsch_vars[eNB_id]->log2_maxh,
+                                      1);
+
+      }
+
+
+    } else if (dlsch0_harq->mimo_mode==TM7) { //TM7
+
+      dlsch_channel_compensation(pdsch_vars[eNB_id]->rxdataF_ext,
+                                 pdsch_vars[eNB_id]->dl_bf_ch_estimates_ext,
+                                 pdsch_vars[eNB_id]->dl_ch_mag0,
+                                 pdsch_vars[eNB_id]->dl_ch_magb0,
+                                 pdsch_vars[eNB_id]->rxdataF_comp0,
+                                 (aatx>1) ? pdsch_vars[eNB_id]->rho : NULL,
+                                 frame_parms,
+                                 symbol,
+                                 first_symbol_flag,
+                                 get_Qm(dlsch0_harq->mcs),
+                                 nb_rb,
+                                 //9,
+                                 pdsch_vars[eNB_id]->log2_maxh,
+                                 measurements); // log2_maxh+I0_shift
+  }
+
+#if UE_TIMING_TRACE
+    stop_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]);
+#if DISABLE_LOG_X
+    printf("[AbsSFN %d.%d] Slot%d Symbol %d log2_maxh %d channel_level %d: Channel Comp %5.2f \n",frame,subframe,slot,symbol,pdsch_vars[eNB_id]->log2_maxh,proc->channel_level,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0));
+#else
+    LOG_I(PHY, "[AbsSFN %d.%d] Slot%d Symbol %d log2_maxh %d channel_level %d: Channel Comp  %5.2f \n",frame,subframe,slot,symbol,pdsch_vars[eNB_id]->log2_maxh,proc->channel_level,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0));
+#endif
+#endif
+// MRC
+#if UE_TIMING_TRACE
+    start_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]);
+#endif
+
+   if (frame_parms->nb_antennas_rx > 1) {
+    if ((dlsch0_harq->mimo_mode == LARGE_CDD) ||
+        ((dlsch0_harq->mimo_mode >=DUALSTREAM_UNIFORM_PRECODING1) &&
+         (dlsch0_harq->mimo_mode <=DUALSTREAM_PUSCH_PRECODING))){  // TM3 or TM4
+      if (frame_parms->nb_antenna_ports_eNB == 2) {
+        dlsch_detection_mrc_TM34(frame_parms,
+                                 pdsch_vars[eNB_id],
+                                 harq_pid,
+                                 dlsch0_harq->round,
+                                 symbol,
+                                 nb_rb,
+                                 1);
+    /*   if (symbol == 5) {
+     write_output("rho0_mrc.m","rho0_0",&pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round][0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);// should be QAM
+     write_output("rho2_mrc.m","rho2_0",&pdsch_vars[eNB_id]->dl_ch_rho2_ext[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);//should be almost 0
+        } */
+      }
+    } else {
+      dlsch_detection_mrc(frame_parms,
+                          pdsch_vars[eNB_id]->rxdataF_comp0,
+                          pdsch_vars[eNB_id_i]->rxdataF_comp0,
+                          pdsch_vars[eNB_id]->rho,
+                          pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round],
+                          pdsch_vars[eNB_id]->dl_ch_mag0,
+                          pdsch_vars[eNB_id]->dl_ch_magb0,
+                          pdsch_vars[eNB_id_i]->dl_ch_mag0,
+                          pdsch_vars[eNB_id_i]->dl_ch_magb0,
+                          symbol,
+                          nb_rb,
+                          rx_type==rx_IC_single_stream);
+    }
+  }
+  //  printf("Combining");
+  if ((dlsch0_harq->mimo_mode == SISO) ||
+      ((dlsch0_harq->mimo_mode >= UNIFORM_PRECODING11) &&
+       (dlsch0_harq->mimo_mode <= PUSCH_PRECODING0)) ||
+       (dlsch0_harq->mimo_mode == TM7)) {
+    /*
+      dlsch_siso(frame_parms,
+      pdsch_vars[eNB_id]->rxdataF_comp,
+      pdsch_vars[eNB_id_i]->rxdataF_comp,
+      symbol,
+      nb_rb);
+    */
+  } else if (dlsch0_harq->mimo_mode == ALAMOUTI) {
+    dlsch_alamouti(frame_parms,
+                   pdsch_vars[eNB_id]->rxdataF_comp0,
+                   pdsch_vars[eNB_id]->dl_ch_mag0,
+                   pdsch_vars[eNB_id]->dl_ch_magb0,
+                   symbol,
+                   nb_rb);
+  }
+
+  //    printf("LLR");
+  if ((dlsch0_harq->mimo_mode == LARGE_CDD) ||
+      ((dlsch0_harq->mimo_mode >=DUALSTREAM_UNIFORM_PRECODING1) &&
+       (dlsch0_harq->mimo_mode <=DUALSTREAM_PUSCH_PRECODING)))  {
+    rxdataF_comp_ptr = pdsch_vars[eNB_id]->rxdataF_comp1[harq_pid][round];
+    dl_ch_mag_ptr = pdsch_vars[eNB_id]->dl_ch_mag1[harq_pid][round];
+  }
+  else {
+    rxdataF_comp_ptr = pdsch_vars[eNB_id_i]->rxdataF_comp0;
+    dl_ch_mag_ptr = pdsch_vars[eNB_id_i]->dl_ch_mag0;
+    //i_mod should have been passed as a parameter
+  }
+
+#if UE_TIMING_TRACE
+    stop_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]);
+#if DISABLE_LOG_X
+    printf("[AbsSFN %d.%d] Slot%d Symbol %d: Channel Combine %5.2f \n",frame,subframe,slot,symbol,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0));
+#else
+    LOG_I(PHY, "[AbsSFN %d.%d] Slot%d Symbol %d: Channel Combine  %5.2f \n",frame,subframe,slot,symbol,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0));
+#endif
+#endif
+
+#if UE_TIMING_TRACE
+    start_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]);
+#endif
+  //printf("LLR dlsch0_harq->Qm %d rx_type %d cw0 %d cw1 %d symbol %d \n",dlsch0_harq->Qm,rx_type,codeword_TB0,codeword_TB1,symbol);
+  // compute LLRs
+  // -> // compute @pointer where llrs should filled for this ofdm-symbol
+  int8_t  *pllr_symbol_cw0;
+  int8_t  *pllr_symbol_cw1;
+  uint32_t llr_offset_symbol;
+  llr_offset_symbol = pdsch_vars[eNB_id]->llr_offset[symbol];
+  pllr_symbol_cw0  = (int8_t*)pdsch_vars[eNB_id]->llr[0];
+  pllr_symbol_cw1  = (int8_t*)pdsch_vars[eNB_id]->llr[1];
+  pllr_symbol_cw0 += llr_offset_symbol;
+  pllr_symbol_cw1 += llr_offset_symbol;
+
+  LOG_I(PHY,"compute LLRs [AbsSubframe %d.%d-%d] NbRB %d Qm %d LLRs-Length %d LLR-Offset %d @LLR Buff %p @LLR Buff(symb) %p\n",
+             frame, subframe,symbol,
+             nb_rb,dlsch0_harq->Qm,
+             pdsch_vars[eNB_id]->llr_length[symbol],
+             pdsch_vars[eNB_id]->llr_offset[symbol],
+             (int16_t*)pdsch_vars[eNB_id]->llr[0],
+             pllr_symbol_cw0);
+
+  switch (dlsch0_harq->Qm) {
+  case 2 :
+    if ((rx_type==rx_standard) || (codeword_TB1 == -1)) {
+
+
+        dlsch_qpsk_llr(frame_parms,
+                       pdsch_vars[eNB_id]->rxdataF_comp0,
+                       (int16_t*)pllr_symbol_cw0,
+                       symbol,
+                       first_symbol_flag,
+                       nb_rb,
+                       adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,2,subframe,symbol),
+                       beamforming_mode);
+
+    } else if (codeword_TB0 == -1){
+
+        dlsch_qpsk_llr(frame_parms,
+                       pdsch_vars[eNB_id]->rxdataF_comp0,
+                       (int16_t*)pllr_symbol_cw1,
+                       symbol,
+                       first_symbol_flag,
+                       nb_rb,
+                       adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,2,subframe,symbol),
+                       beamforming_mode);
+    }
+      else if (rx_type >= rx_IC_single_stream) {
+        if (dlsch1_harq->Qm == 2) {
+          dlsch_qpsk_qpsk_llr(frame_parms,
+                              pdsch_vars[eNB_id]->rxdataF_comp0,
+                              rxdataF_comp_ptr,
+                              pdsch_vars[eNB_id]->dl_ch_rho2_ext,
+                              pdsch_vars[eNB_id]->llr[0],
+                              symbol,first_symbol_flag,nb_rb,
+                              adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,2,subframe,symbol),
+                              pdsch_vars[eNB_id]->llr128);
+          if (rx_type==rx_IC_dual_stream) {
+            dlsch_qpsk_qpsk_llr(frame_parms,
+                                rxdataF_comp_ptr,
+                                pdsch_vars[eNB_id]->rxdataF_comp0,
+                                pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round],
+                                pdsch_vars[eNB_id]->llr[1],
+                                symbol,first_symbol_flag,nb_rb,
+                                adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,2,subframe,symbol),
+                                pdsch_vars[eNB_id]->llr128_2ndstream);
+          }
+        }
+        else if (dlsch1_harq->Qm == 4) {
+          dlsch_qpsk_16qam_llr(frame_parms,
+                               pdsch_vars[eNB_id]->rxdataF_comp0,
+                               rxdataF_comp_ptr,//i
+                               dl_ch_mag_ptr,//i
+                               pdsch_vars[eNB_id]->dl_ch_rho2_ext,
+                               pdsch_vars[eNB_id]->llr[0],
+                               symbol,first_symbol_flag,nb_rb,
+                               adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,2,subframe,symbol),
+                               pdsch_vars[eNB_id]->llr128);
+          if (rx_type==rx_IC_dual_stream) {
+            dlsch_16qam_qpsk_llr(frame_parms,
+                                 rxdataF_comp_ptr,
+                                 pdsch_vars[eNB_id]->rxdataF_comp0,//i
+                                 dl_ch_mag_ptr,
+                                 pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round],
+                                 pdsch_vars[eNB_id]->llr[1],
+                                 symbol,first_symbol_flag,nb_rb,
+                                 adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,4,subframe,symbol),
+                                 pdsch_vars[eNB_id]->llr128_2ndstream);
+          }
+        }
+        else {
+          dlsch_qpsk_64qam_llr(frame_parms,
+                               pdsch_vars[eNB_id]->rxdataF_comp0,
+                               rxdataF_comp_ptr,//i
+                               dl_ch_mag_ptr,//i
+                               pdsch_vars[eNB_id]->dl_ch_rho2_ext,
+                               pdsch_vars[eNB_id]->llr[0],
+                               symbol,first_symbol_flag,nb_rb,
+                               adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,2,subframe,symbol),
+                               pdsch_vars[eNB_id]->llr128);
+          if (rx_type==rx_IC_dual_stream) {
+            dlsch_64qam_qpsk_llr(frame_parms,
+                                 rxdataF_comp_ptr,
+                                 pdsch_vars[eNB_id]->rxdataF_comp0,//i
+                                 dl_ch_mag_ptr,
+                                 pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round],
+                                 pdsch_vars[eNB_id]->llr[1],
+                                 symbol,first_symbol_flag,nb_rb,
+                                 adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,6,subframe,symbol),
+                                 pdsch_vars[eNB_id]->llr128_2ndstream);
+          }
+        }
+      }
+    break;
+  case 4 :
+    if ((rx_type==rx_standard ) || (codeword_TB1 == -1)) {
+      dlsch_16qam_llr(frame_parms,
+                      pdsch_vars[eNB_id]->rxdataF_comp0,
+                      pdsch_vars[eNB_id]->llr[0],
+                      pdsch_vars[eNB_id]->dl_ch_mag0,
+                      symbol,first_symbol_flag,nb_rb,
+                      adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,4,subframe,symbol),
+                      pdsch_vars[eNB_id]->llr128,
+                      beamforming_mode);
+    } else if (codeword_TB0 == -1){
+      dlsch_16qam_llr(frame_parms,
+                      pdsch_vars[eNB_id]->rxdataF_comp0,
+                      pdsch_vars[eNB_id]->llr[1],
+                      pdsch_vars[eNB_id]->dl_ch_mag0,
+                      symbol,first_symbol_flag,nb_rb,
+                      adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,4,subframe,symbol),
+                      pdsch_vars[eNB_id]->llr128_2ndstream,
+                      beamforming_mode);
+    }
+    else if (rx_type >= rx_IC_single_stream) {
+      if (dlsch1_harq->Qm == 2) {
+        dlsch_16qam_qpsk_llr(frame_parms,
+                             pdsch_vars[eNB_id]->rxdataF_comp0,
+                             rxdataF_comp_ptr,//i
+                             pdsch_vars[eNB_id]->dl_ch_mag0,
+                             pdsch_vars[eNB_id]->dl_ch_rho2_ext,
+                             pdsch_vars[eNB_id]->llr[0],
+                             symbol,first_symbol_flag,nb_rb,
+                             adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,4,subframe,symbol),
+                             pdsch_vars[eNB_id]->llr128);
+        if (rx_type==rx_IC_dual_stream) {
+          dlsch_qpsk_16qam_llr(frame_parms,
+                               rxdataF_comp_ptr,
+                               pdsch_vars[eNB_id]->rxdataF_comp0,//i
+                               pdsch_vars[eNB_id]->dl_ch_mag0,//i
+                               pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round],
+                               pdsch_vars[eNB_id]->llr[1],
+                               symbol,first_symbol_flag,nb_rb,
+                               adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,2,subframe,symbol),
+                               pdsch_vars[eNB_id]->llr128_2ndstream);
+        }
+      }
+      else if (dlsch1_harq->Qm == 4) {
+        dlsch_16qam_16qam_llr(frame_parms,
+                              pdsch_vars[eNB_id]->rxdataF_comp0,
+                              rxdataF_comp_ptr,//i
+                              pdsch_vars[eNB_id]->dl_ch_mag0,
+                              dl_ch_mag_ptr,//i
+                              pdsch_vars[eNB_id]->dl_ch_rho2_ext,
+                              pdsch_vars[eNB_id]->llr[0],
+                              symbol,first_symbol_flag,nb_rb,
+                              adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,4,subframe,symbol),
+                              pdsch_vars[eNB_id]->llr128);
+        if (rx_type==rx_IC_dual_stream) {
+          dlsch_16qam_16qam_llr(frame_parms,
+                                rxdataF_comp_ptr,
+                                pdsch_vars[eNB_id]->rxdataF_comp0,//i
+                                dl_ch_mag_ptr,
+                                pdsch_vars[eNB_id]->dl_ch_mag0,//i
+                                pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round],
+                                pdsch_vars[eNB_id]->llr[1],
+                                symbol,first_symbol_flag,nb_rb,
+                                adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,4,subframe,symbol),
+                                pdsch_vars[eNB_id]->llr128_2ndstream);
+        }
+      }
+      else {
+        dlsch_16qam_64qam_llr(frame_parms,
+                              pdsch_vars[eNB_id]->rxdataF_comp0,
+                              rxdataF_comp_ptr,//i
+                              pdsch_vars[eNB_id]->dl_ch_mag0,
+                              dl_ch_mag_ptr,//i
+                              pdsch_vars[eNB_id]->dl_ch_rho2_ext,
+                              pdsch_vars[eNB_id]->llr[0],
+                              symbol,first_symbol_flag,nb_rb,
+                              adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,4,subframe,symbol),
+                              pdsch_vars[eNB_id]->llr128);
+        if (rx_type==rx_IC_dual_stream) {
+          dlsch_64qam_16qam_llr(frame_parms,
+                                rxdataF_comp_ptr,
+                                pdsch_vars[eNB_id]->rxdataF_comp0,
+                                dl_ch_mag_ptr,
+                                pdsch_vars[eNB_id]->dl_ch_mag0,
+                                pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round],
+                                pdsch_vars[eNB_id]->llr[1],
+                                symbol,first_symbol_flag,nb_rb,
+                                adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,6,subframe,symbol),
+                                pdsch_vars[eNB_id]->llr128_2ndstream);
+        }
+      }
+    }
+    break;
+  case 6 :
+    if ((rx_type==rx_standard) || (codeword_TB1 == -1))  {
+      dlsch_64qam_llr(frame_parms,
+                      pdsch_vars[eNB_id]->rxdataF_comp0,
+                      (int16_t*)pllr_symbol_cw0,
+                      pdsch_vars[eNB_id]->dl_ch_mag0,
+                      pdsch_vars[eNB_id]->dl_ch_magb0,
+                      symbol,first_symbol_flag,nb_rb,
+                      adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,6,subframe,symbol),
+                      pdsch_vars[eNB_id]->llr_offset[symbol],
+                      beamforming_mode);
+    } else if (codeword_TB0 == -1){
+      dlsch_64qam_llr(frame_parms,
+                      pdsch_vars[eNB_id]->rxdataF_comp0,
+                      (int16_t*)pllr_symbol_cw1,
+                      pdsch_vars[eNB_id]->dl_ch_mag0,
+                      pdsch_vars[eNB_id]->dl_ch_magb0,
+                      symbol,first_symbol_flag,nb_rb,
+                      adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,6,subframe,symbol),
+                      pdsch_vars[eNB_id]->llr_offset[symbol],
+                      beamforming_mode);
+    }
+    else if (rx_type >= rx_IC_single_stream) {
+      if (dlsch1_harq->Qm == 2) {
+        dlsch_64qam_qpsk_llr(frame_parms,
+                             pdsch_vars[eNB_id]->rxdataF_comp0,
+                             rxdataF_comp_ptr,//i
+                             pdsch_vars[eNB_id]->dl_ch_mag0,
+                             pdsch_vars[eNB_id]->dl_ch_rho2_ext,
+                             pdsch_vars[eNB_id]->llr[0],
+                             symbol,first_symbol_flag,nb_rb,
+                             adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,6,subframe,symbol),
+                             pdsch_vars[eNB_id]->llr128);
+        if (rx_type==rx_IC_dual_stream) {
+          dlsch_qpsk_64qam_llr(frame_parms,
+                               rxdataF_comp_ptr,
+                               pdsch_vars[eNB_id]->rxdataF_comp0,//i
+                               pdsch_vars[eNB_id]->dl_ch_mag0,
+                               pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round],
+                               pdsch_vars[eNB_id]->llr[1],
+                               symbol,first_symbol_flag,nb_rb,
+                               adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,2,subframe,symbol),
+                               pdsch_vars[eNB_id]->llr128_2ndstream);
+        }
+      }
+      else if (dlsch1_harq->Qm == 4) {
+        dlsch_64qam_16qam_llr(frame_parms,
+                              pdsch_vars[eNB_id]->rxdataF_comp0,
+                              rxdataF_comp_ptr,//i
+                              pdsch_vars[eNB_id]->dl_ch_mag0,
+                              dl_ch_mag_ptr,//i
+                              pdsch_vars[eNB_id]->dl_ch_rho2_ext,
+                              pdsch_vars[eNB_id]->llr[0],
+                              symbol,first_symbol_flag,nb_rb,
+                              adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,6,subframe,symbol),
+                              pdsch_vars[eNB_id]->llr128);
+        if (rx_type==rx_IC_dual_stream) {
+          dlsch_16qam_64qam_llr(frame_parms,
+                                rxdataF_comp_ptr,
+                                pdsch_vars[eNB_id]->rxdataF_comp0,//i
+                                dl_ch_mag_ptr,
+                                pdsch_vars[eNB_id]->dl_ch_mag0,//i
+                                pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round],
+                                pdsch_vars[eNB_id]->llr[1],
+                                symbol,first_symbol_flag,nb_rb,
+                                adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,4,subframe,symbol),
+                                pdsch_vars[eNB_id]->llr128_2ndstream);
+        }
+      }
+      else {
+        dlsch_64qam_64qam_llr(frame_parms,
+                              pdsch_vars[eNB_id]->rxdataF_comp0,
+                              rxdataF_comp_ptr,//i
+                              pdsch_vars[eNB_id]->dl_ch_mag0,
+                              dl_ch_mag_ptr,//i
+                              pdsch_vars[eNB_id]->dl_ch_rho2_ext,
+                              (int16_t*)pllr_symbol_cw0,
+                              symbol,first_symbol_flag,nb_rb,
+                              adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,6,subframe,symbol),
+                              pdsch_vars[eNB_id]->llr_offset[symbol]);
+        if (rx_type==rx_IC_dual_stream) {
+          dlsch_64qam_64qam_llr(frame_parms,
+                                rxdataF_comp_ptr,
+                                pdsch_vars[eNB_id]->rxdataF_comp0,//i
+                                dl_ch_mag_ptr,
+                                pdsch_vars[eNB_id]->dl_ch_mag0,//i
+                                pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round],
+                                (int16_t*)pllr_symbol_cw1,
+                                symbol,first_symbol_flag,nb_rb,
+                                adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,6,subframe,symbol),
+                                pdsch_vars[eNB_id]->llr_offset[symbol]);
+        }
+      }
+    }
+    break;
+  default:
+    LOG_W(PHY,"rx_dlsch.c : Unknown mod_order!!!!\n");
+    return(-1);
+    break;
+  }
+  if (dlsch1_harq) {
+  switch (get_Qm(dlsch1_harq->mcs)) {
+  case 2 :
+    if (rx_type==rx_standard) {
+        dlsch_qpsk_llr(frame_parms,
+                       pdsch_vars[eNB_id]->rxdataF_comp0,
+                       (int16_t*)pllr_symbol_cw0,
+                       symbol,first_symbol_flag,nb_rb,
+                       adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,2,subframe,symbol),
+                       beamforming_mode);
+    }
+    break;
+  case 4:
+    if (rx_type==rx_standard) {
+      dlsch_16qam_llr(frame_parms,
+                      pdsch_vars[eNB_id]->rxdataF_comp0,
+                      pdsch_vars[eNB_id]->llr[0],
+                      pdsch_vars[eNB_id]->dl_ch_mag0,
+                      symbol,first_symbol_flag,nb_rb,
+                      adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,4,subframe,symbol),
+                      pdsch_vars[eNB_id]->llr128,
+                      beamforming_mode);
+    }
+    break;
+  case 6 :
+    if (rx_type==rx_standard) {
+      dlsch_64qam_llr(frame_parms,
+                      pdsch_vars[eNB_id]->rxdataF_comp0,
+                      (int16_t*)pllr_symbol_cw0,
+                      pdsch_vars[eNB_id]->dl_ch_mag0,
+                      pdsch_vars[eNB_id]->dl_ch_magb0,
+                      symbol,first_symbol_flag,nb_rb,
+                      adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,6,subframe,symbol),
+                      pdsch_vars[eNB_id]->llr_offset[symbol],
+                      beamforming_mode);
+  }
+    break;
+  default:
+    LOG_W(PHY,"rx_dlsch.c : Unknown mod_order!!!!\n");
+    return(-1);
+    break;
+  }
+  }
+
+#if UE_TIMING_TRACE
+    stop_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]);
+#if DISABLE_LOG_X
+    printf("[AbsSFN %d.%d] Slot%d Symbol %d: LLR Computation %5.2f \n",frame,subframe,slot,symbol,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0));
+#else
+    LOG_I(PHY, "[AbsSFN %d.%d] Slot%d Symbol %d: LLR Computation  %5.2f \n",frame,subframe,slot,symbol,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0));
+#endif
+#endif
+// Please keep it: useful for debugging
+#if 0
+  if( (symbol == 13) && (subframe==0) && (dlsch0_harq->Qm == 6) /*&& (nb_rb==25)*/)
+  {
+      LOG_E(PHY,"Dump Phy Chan Est \n");
+      if(1)
+      {
+#if 1
+      write_output("rxdataF0.m"    , "rxdataF0",             &common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF[0][0],14*frame_parms->ofdm_symbol_size,1,1);
+      //write_output("rxdataF1.m"    , "rxdataF1",             &common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF[0][0],14*frame_parms->ofdm_symbol_size,1,1);
+      write_output("dl_ch_estimates00.m", "dl_ch_estimates00",   &common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id][0][0],14*frame_parms->ofdm_symbol_size,1,1);
+      //write_output("dl_ch_estimates01.m", "dl_ch_estimates01",   &common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id][1][0],14*frame_parms->ofdm_symbol_size,1,1);
+      //write_output("dl_ch_estimates10.m", "dl_ch_estimates10",   &common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id][2][0],14*frame_parms->ofdm_symbol_size,1,1);
+      //write_output("dl_ch_estimates11.m", "dl_ch_estimates11",   &common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id][3][0],14*frame_parms->ofdm_symbol_size,1,1);
+
+
+      //write_output("rxdataF_ext00.m"    , "rxdataF_ext00",       &pdsch_vars[eNB_id]->rxdataF_ext[0][0],14*frame_parms->N_RB_DL*12,1,1);
+      //write_output("rxdataF_ext01.m"    , "rxdataF_ext01",       &pdsch_vars[eNB_id]->rxdataF_ext[1][0],14*frame_parms->N_RB_DL*12,1,1);
+      //write_output("rxdataF_ext10.m"    , "rxdataF_ext10",       &pdsch_vars[eNB_id]->rxdataF_ext[2][0],14*frame_parms->N_RB_DL*12,1,1);
+      //write_output("rxdataF_ext11.m"    , "rxdataF_ext11",       &pdsch_vars[eNB_id]->rxdataF_ext[3][0],14*frame_parms->N_RB_DL*12,1,1);
+      write_output("dl_ch_estimates_ext00.m", "dl_ch_estimates_ext00", &pdsch_vars[eNB_id]->dl_ch_estimates_ext[0][0],14*frame_parms->N_RB_DL*12,1,1);
+      //write_output("dl_ch_estimates_ext01.m", "dl_ch_estimates_ext01", &pdsch_vars[eNB_id]->dl_ch_estimates_ext[1][0],14*frame_parms->N_RB_DL*12,1,1);
+      //write_output("dl_ch_estimates_ext10.m", "dl_ch_estimates_ext10", &pdsch_vars[eNB_id]->dl_ch_estimates_ext[2][0],14*frame_parms->N_RB_DL*12,1,1);
+      //write_output("dl_ch_estimates_ext11.m", "dl_ch_estimates_ext11", &pdsch_vars[eNB_id]->dl_ch_estimates_ext[3][0],14*frame_parms->N_RB_DL*12,1,1);
+      write_output("rxdataF_comp00.m","rxdataF_comp00",              &pdsch_vars[eNB_id]->rxdataF_comp0[0][0],14*frame_parms->N_RB_DL*12,1,1);
+      //write_output("rxdataF_comp01.m","rxdataF_comp01",              &pdsch_vars[eNB_id]->rxdataF_comp0[1][0],14*frame_parms->N_RB_DL*12,1,1);
+      //write_output("rxdataF_comp10.m","rxdataF_comp10",              &pdsch_vars[eNB_id]->rxdataF_comp1[harq_pid][round][0][0],14*frame_parms->N_RB_DL*12,1,1);
+      //write_output("rxdataF_comp11.m","rxdataF_comp11",              &pdsch_vars[eNB_id]->rxdataF_comp1[harq_pid][round][1][0],14*frame_parms->N_RB_DL*12,1,1);
+#endif
+      write_output("llr0.m","llr0",  &pdsch_vars[eNB_id]->llr[0][0],(14*nb_rb*12*dlsch1_harq->Qm) - 4*(nb_rb*4*dlsch1_harq->Qm),1,0);
+      //write_output("llr1.m","llr1",  &pdsch_vars[eNB_id]->llr[1][0],(14*nb_rb*12*dlsch1_harq->Qm) - 4*(nb_rb*4*dlsch1_harq->Qm),1,0);
+
+
+      AssertFatal(0," ");
+      }
+
+  }
+#endif
+
+  T(T_UE_PHY_PDSCH_IQ, T_INT(eNB_id), T_INT(frame%1024),
+    T_INT(subframe), T_INT(nb_rb),
+    T_INT(frame_parms->N_RB_UL), T_INT(frame_parms->symbols_per_tti),
+    T_BUFFER(&pdsch_vars[eNB_id]->rxdataF_comp0[eNB_id][0],
+             2 * /* ulsch[UE_id]->harq_processes[harq_pid]->nb_rb */ frame_parms->N_RB_UL *12*frame_parms->symbols_per_tti*2));
+
+  return 0;
+}
+
+//==============================================================================================
+// Pre-processing for LLR computation
+//==============================================================================================
+
+void dlsch_channel_compensation(int **rxdataF_ext,
+                                int **dl_ch_estimates_ext,
+                                int **dl_ch_mag,
+                                int **dl_ch_magb,
+                                int **rxdataF_comp,
+                                int **rho,
+                                LTE_DL_FRAME_PARMS *frame_parms,
+                                unsigned char symbol,
+                                uint8_t first_symbol_flag,
+                                unsigned char mod_order,
+                                unsigned short nb_rb,
+                                unsigned char output_shift,
+                                PHY_MEASUREMENTS *measurements)
+{
+
+#if defined(__i386) || defined(__x86_64)
+
+  unsigned short rb;
+  unsigned char aatx,aarx,symbol_mod,pilots=0;
+  __m128i *dl_ch128,*dl_ch128_2,*dl_ch_mag128,*dl_ch_mag128b,*rxdataF128,*rxdataF_comp128,*rho128;
+  __m128i mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3,QAM_amp128,QAM_amp128b;
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+  if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) {
+
+    if (frame_parms->nb_antenna_ports_eNB==1) // 10 out of 12 so don't reduce size
+      nb_rb=1+(5*nb_rb/6);
+    else
+      pilots=1;
+  }
+
+  for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++) {
+    if (mod_order == 4) {
+      QAM_amp128 = _mm_set1_epi16(QAM16_n1);  // 2/sqrt(10)
+      QAM_amp128b = _mm_setzero_si128();
+    } else if (mod_order == 6) {
+      QAM_amp128  = _mm_set1_epi16(QAM64_n1); //
+      QAM_amp128b = _mm_set1_epi16(QAM64_n2);
+    }
+
+    //    printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol);
+
+    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+
+      dl_ch128          = (__m128i *)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128      = (__m128i *)&dl_ch_mag[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128b     = (__m128i *)&dl_ch_magb[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12];
+      rxdataF128        = (__m128i *)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+      rxdataF_comp128   = (__m128i *)&rxdataF_comp[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12];
+
+
+      for (rb=0; rb<nb_rb; rb++) {
+        if (mod_order>2) {
+          // get channel amplitude if not QPSK
+
+          mmtmpD0 = _mm_madd_epi16(dl_ch128[0],dl_ch128[0]);
+          mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+
+          mmtmpD1 = _mm_madd_epi16(dl_ch128[1],dl_ch128[1]);
+          mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+
+          mmtmpD0 = _mm_packs_epi32(mmtmpD0,mmtmpD1);
+
+          // store channel magnitude here in a new field of dlsch
+
+          dl_ch_mag128[0] = _mm_unpacklo_epi16(mmtmpD0,mmtmpD0);
+          dl_ch_mag128b[0] = dl_ch_mag128[0];
+          dl_ch_mag128[0] = _mm_mulhi_epi16(dl_ch_mag128[0],QAM_amp128);
+          dl_ch_mag128[0] = _mm_slli_epi16(dl_ch_mag128[0],1);
+    //print_ints("Re(ch):",(int16_t*)&mmtmpD0);
+    //print_shorts("QAM_amp:",(int16_t*)&QAM_amp128);
+    //print_shorts("mag:",(int16_t*)&dl_ch_mag128[0]);
+          dl_ch_mag128[1] = _mm_unpackhi_epi16(mmtmpD0,mmtmpD0);
+          dl_ch_mag128b[1] = dl_ch_mag128[1];
+          dl_ch_mag128[1] = _mm_mulhi_epi16(dl_ch_mag128[1],QAM_amp128);
+          dl_ch_mag128[1] = _mm_slli_epi16(dl_ch_mag128[1],1);
+
+          if (pilots==0) {
+            mmtmpD0 = _mm_madd_epi16(dl_ch128[2],dl_ch128[2]);
+            mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+            mmtmpD1 = _mm_packs_epi32(mmtmpD0,mmtmpD0);
+
+            dl_ch_mag128[2] = _mm_unpacklo_epi16(mmtmpD1,mmtmpD1);
+            dl_ch_mag128b[2] = dl_ch_mag128[2];
+
+            dl_ch_mag128[2] = _mm_mulhi_epi16(dl_ch_mag128[2],QAM_amp128);
+            dl_ch_mag128[2] = _mm_slli_epi16(dl_ch_mag128[2],1);
+          }
+
+          dl_ch_mag128b[0] = _mm_mulhi_epi16(dl_ch_mag128b[0],QAM_amp128b);
+          dl_ch_mag128b[0] = _mm_slli_epi16(dl_ch_mag128b[0],1);
+
+
+          dl_ch_mag128b[1] = _mm_mulhi_epi16(dl_ch_mag128b[1],QAM_amp128b);
+          dl_ch_mag128b[1] = _mm_slli_epi16(dl_ch_mag128b[1],1);
+
+          if (pilots==0) {
+            dl_ch_mag128b[2] = _mm_mulhi_epi16(dl_ch_mag128b[2],QAM_amp128b);
+            dl_ch_mag128b[2] = _mm_slli_epi16(dl_ch_mag128b[2],1);
+          }
+        }
+
+        // multiply by conjugated channel
+        mmtmpD0 = _mm_madd_epi16(dl_ch128[0],rxdataF128[0]);
+        //  print_ints("re",&mmtmpD0);
+
+        // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+        mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[0],_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]);
+        //  print_ints("im",&mmtmpD1);
+        mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[0]);
+        // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+        //  print_ints("re(shift)",&mmtmpD0);
+        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+        //  print_ints("im(shift)",&mmtmpD1);
+        mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+        mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+        //        print_ints("c0",&mmtmpD2);
+        //  print_ints("c1",&mmtmpD3);
+        rxdataF_comp128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+        //  print_shorts("rx:",rxdataF128);
+        //  print_shorts("ch:",dl_ch128);
+        //  print_shorts("pack:",rxdataF_comp128);
+
+        // multiply by conjugated channel
+        mmtmpD0 = _mm_madd_epi16(dl_ch128[1],rxdataF128[1]);
+        // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+        mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[1],_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+        mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[1]);
+        // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+        mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+        mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+
+        rxdataF_comp128[1] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+        //  print_shorts("rx:",rxdataF128+1);
+        //  print_shorts("ch:",dl_ch128+1);
+        //  print_shorts("pack:",rxdataF_comp128+1);
+
+        if (pilots==0) {
+          // multiply by conjugated channel
+          mmtmpD0 = _mm_madd_epi16(dl_ch128[2],rxdataF128[2]);
+          // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+          mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[2],_MM_SHUFFLE(2,3,0,1));
+          mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+          mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+          mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[2]);
+          // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+          mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+          mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+          mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+          mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+
+          rxdataF_comp128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+          //  print_shorts("rx:",rxdataF128+2);
+          //  print_shorts("ch:",dl_ch128+2);
+          //        print_shorts("pack:",rxdataF_comp128+2);
+
+          dl_ch128+=3;
+          dl_ch_mag128+=3;
+          dl_ch_mag128b+=3;
+          rxdataF128+=3;
+          rxdataF_comp128+=3;
+        } else { // we have a smaller PDSCH in symbols with pilots so skip last group of 4 REs and increment less
+          dl_ch128+=2;
+          dl_ch_mag128+=2;
+          dl_ch_mag128b+=2;
+          rxdataF128+=2;
+          rxdataF_comp128+=2;
+        }
+
+      }
+    }
+  }
+
+  if (rho) {
+
+
+    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+      rho128        = (__m128i *)&rho[aarx][symbol*frame_parms->N_RB_DL*12];
+      dl_ch128      = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+      dl_ch128_2    = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12];
+
+      for (rb=0; rb<nb_rb; rb++) {
+        // multiply by conjugated channel
+        mmtmpD0 = _mm_madd_epi16(dl_ch128[0],dl_ch128_2[0]);
+        //  print_ints("re",&mmtmpD0);
+
+        // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+        mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[0],_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]);
+        //  print_ints("im",&mmtmpD1);
+        mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128_2[0]);
+        // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+        //  print_ints("re(shift)",&mmtmpD0);
+        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+        //  print_ints("im(shift)",&mmtmpD1);
+        mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+        mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+        //        print_ints("c0",&mmtmpD2);
+        //  print_ints("c1",&mmtmpD3);
+        rho128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+
+        //print_shorts("rx:",dl_ch128_2);
+        //print_shorts("ch:",dl_ch128);
+        //print_shorts("pack:",rho128);
+
+        // multiply by conjugated channel
+        mmtmpD0 = _mm_madd_epi16(dl_ch128[1],dl_ch128_2[1]);
+        // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+        mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[1],_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+        mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128_2[1]);
+        // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+        mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+        mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+
+
+        rho128[1] =_mm_packs_epi32(mmtmpD2,mmtmpD3);
+        //print_shorts("rx:",dl_ch128_2+1);
+        //print_shorts("ch:",dl_ch128+1);
+        //print_shorts("pack:",rho128+1);
+        // multiply by conjugated channel
+        mmtmpD0 = _mm_madd_epi16(dl_ch128[2],dl_ch128_2[2]);
+        // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+        mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[2],_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+        mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128_2[2]);
+        // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+        mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+        mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+
+        rho128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+        //print_shorts("rx:",dl_ch128_2+2);
+        //print_shorts("ch:",dl_ch128+2);
+        //print_shorts("pack:",rho128+2);
+
+        dl_ch128+=3;
+        dl_ch128_2+=3;
+        rho128+=3;
+
+      }
+
+      if (first_symbol_flag==1) {
+        measurements->rx_correlation[0][aarx] = signal_energy(&rho[aarx][symbol*frame_parms->N_RB_DL*12],rb*12);
+      }
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+#elif defined(__arm__)
+
+
+  unsigned short rb;
+  unsigned char aatx,aarx,symbol_mod,pilots=0;
+
+  int16x4_t *dl_ch128,*dl_ch128_2,*rxdataF128;
+  int32x4_t mmtmpD0,mmtmpD1,mmtmpD0b,mmtmpD1b;
+  int16x8_t *dl_ch_mag128,*dl_ch_mag128b,mmtmpD2,mmtmpD3,mmtmpD4;
+  int16x8_t QAM_amp128,QAM_amp128b;
+  int16x4x2_t *rxdataF_comp128,*rho128;
+
+  int16_t conj[4]__attribute__((aligned(16))) = {1,-1,1,-1};
+  int32x4_t output_shift128 = vmovq_n_s32(-(int32_t)output_shift);
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+  if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) {
+    if (frame_parms->nb_antenna_ports_eNB==1) { // 10 out of 12 so don't reduce size
+      nb_rb=1+(5*nb_rb/6);
+    }
+    else {
+      pilots=1;
+    }
+  }
+
+  for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++) {
+    if (mod_order == 4) {
+      QAM_amp128  = vmovq_n_s16(QAM16_n1);  // 2/sqrt(10)
+      QAM_amp128b = vmovq_n_s16(0);
+    } else if (mod_order == 6) {
+      QAM_amp128  = vmovq_n_s16(QAM64_n1); //
+      QAM_amp128b = vmovq_n_s16(QAM64_n2);
+    }
+    //    printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol);
+
+    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+      dl_ch128          = (int16x4_t*)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128      = (int16x8_t*)&dl_ch_mag[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128b     = (int16x8_t*)&dl_ch_magb[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12];
+      rxdataF128        = (int16x4_t*)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+      rxdataF_comp128   = (int16x4x2_t*)&rxdataF_comp[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12];
+
+      for (rb=0; rb<nb_rb; rb++) {
+  if (mod_order>2) {
+    // get channel amplitude if not QPSK
+    mmtmpD0 = vmull_s16(dl_ch128[0], dl_ch128[0]);
+    // mmtmpD0 = [ch0*ch0,ch1*ch1,ch2*ch2,ch3*ch3];
+    mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128);
+    // mmtmpD0 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3]>>output_shift128 on 32-bits
+    mmtmpD1 = vmull_s16(dl_ch128[1], dl_ch128[1]);
+    mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128);
+    mmtmpD2 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+    // mmtmpD2 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3,ch4*ch4 + ch5*ch5,ch4*ch4 + ch5*ch5,ch6*ch6 + ch7*ch7,ch6*ch6 + ch7*ch7]>>output_shift128 on 16-bits
+    mmtmpD0 = vmull_s16(dl_ch128[2], dl_ch128[2]);
+    mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128);
+    mmtmpD1 = vmull_s16(dl_ch128[3], dl_ch128[3]);
+    mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128);
+    mmtmpD3 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+    if (pilots==0) {
+      mmtmpD0 = vmull_s16(dl_ch128[4], dl_ch128[4]);
+      mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128);
+      mmtmpD1 = vmull_s16(dl_ch128[5], dl_ch128[5]);
+      mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128);
+      mmtmpD4 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+    }
+
+    dl_ch_mag128b[0] = vqdmulhq_s16(mmtmpD2,QAM_amp128b);
+    dl_ch_mag128b[1] = vqdmulhq_s16(mmtmpD3,QAM_amp128b);
+    dl_ch_mag128[0] = vqdmulhq_s16(mmtmpD2,QAM_amp128);
+    dl_ch_mag128[1] = vqdmulhq_s16(mmtmpD3,QAM_amp128);
+
+    if (pilots==0) {
+      dl_ch_mag128b[2] = vqdmulhq_s16(mmtmpD4,QAM_amp128b);
+      dl_ch_mag128[2]  = vqdmulhq_s16(mmtmpD4,QAM_amp128);
+    }
+  }
+
+  mmtmpD0 = vmull_s16(dl_ch128[0], rxdataF128[0]);
+  //mmtmpD0 = [Re(ch[0])Re(rx[0]) Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1]) Im(ch[1])Im(ch[1])]
+  mmtmpD1 = vmull_s16(dl_ch128[1], rxdataF128[1]);
+  //mmtmpD1 = [Re(ch[2])Re(rx[2]) Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3]) Im(ch[3])Im(ch[3])]
+  mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
+             vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
+  //mmtmpD0 = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])]
+
+  mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[0],*(int16x4_t*)conj)), rxdataF128[0]);
+  //mmtmpD0 = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])]
+  mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[1],*(int16x4_t*)conj)), rxdataF128[1]);
+  //mmtmpD0 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])]
+  mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
+             vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
+  //mmtmpD1 = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])]
+
+  mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
+  mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
+  rxdataF_comp128[0] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+  mmtmpD0 = vmull_s16(dl_ch128[2], rxdataF128[2]);
+  mmtmpD1 = vmull_s16(dl_ch128[3], rxdataF128[3]);
+  mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
+             vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
+  mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[2],*(int16x4_t*)conj)), rxdataF128[2]);
+  mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[3],*(int16x4_t*)conj)), rxdataF128[3]);
+  mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
+             vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
+  mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
+  mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
+  rxdataF_comp128[1] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+  if (pilots==0) {
+    mmtmpD0 = vmull_s16(dl_ch128[4], rxdataF128[4]);
+    mmtmpD1 = vmull_s16(dl_ch128[5], rxdataF128[5]);
+    mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
+         vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
+
+    mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[4],*(int16x4_t*)conj)), rxdataF128[4]);
+    mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[5],*(int16x4_t*)conj)), rxdataF128[5]);
+    mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
+         vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
+
+
+    mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
+    mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
+    rxdataF_comp128[2] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+
+    dl_ch128+=6;
+    dl_ch_mag128+=3;
+    dl_ch_mag128b+=3;
+    rxdataF128+=6;
+    rxdataF_comp128+=3;
+
+  } else { // we have a smaller PDSCH in symbols with pilots so skip last group of 4 REs and increment less
+    dl_ch128+=4;
+    dl_ch_mag128+=2;
+    dl_ch_mag128b+=2;
+    rxdataF128+=4;
+    rxdataF_comp128+=2;
+  }
+      }
+    }
+  }
+
+  if (rho) {
+    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+      rho128        = (int16x4x2_t*)&rho[aarx][symbol*frame_parms->N_RB_DL*12];
+      dl_ch128      = (int16x4_t*)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+      dl_ch128_2    = (int16x4_t*)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12];
+      for (rb=0; rb<nb_rb; rb++) {
+  mmtmpD0 = vmull_s16(dl_ch128[0], dl_ch128_2[0]);
+  mmtmpD1 = vmull_s16(dl_ch128[1], dl_ch128_2[1]);
+  mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
+             vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
+  mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[0],*(int16x4_t*)conj)), dl_ch128_2[0]);
+  mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[1],*(int16x4_t*)conj)), dl_ch128_2[1]);
+  mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
+             vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
+
+  mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
+  mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
+  rho128[0] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+  mmtmpD0 = vmull_s16(dl_ch128[2], dl_ch128_2[2]);
+  mmtmpD1 = vmull_s16(dl_ch128[3], dl_ch128_2[3]);
+  mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
+             vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
+  mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[2],*(int16x4_t*)conj)), dl_ch128_2[2]);
+  mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[3],*(int16x4_t*)conj)), dl_ch128_2[3]);
+  mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
+             vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
+
+  mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
+  mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
+  rho128[1] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+  mmtmpD0 = vmull_s16(dl_ch128[0], dl_ch128_2[0]);
+  mmtmpD1 = vmull_s16(dl_ch128[1], dl_ch128_2[1]);
+  mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
+             vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
+  mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[4],*(int16x4_t*)conj)), dl_ch128_2[4]);
+  mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[5],*(int16x4_t*)conj)), dl_ch128_2[5]);
+  mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
+             vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
+
+  mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
+  mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
+  rho128[2] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+
+  dl_ch128+=6;
+  dl_ch128_2+=6;
+  rho128+=3;
+      }
+
+      if (first_symbol_flag==1) {
+  measurements->rx_correlation[0][aarx] = signal_energy(&rho[aarx][symbol*frame_parms->N_RB_DL*12],rb*12);
+      }
+    }
+  }
+#endif
+}
+
+#if defined(__x86_64__) || defined(__i386__)
+
+void prec2A_TM56_128(unsigned char pmi,__m128i *ch0,__m128i *ch1)
+{
+
+  __m128i amp;
+  amp = _mm_set1_epi16(ONE_OVER_SQRT2_Q15);
+
+  switch (pmi) {
+
+  case 0 :   // +1 +1
+    //    print_shorts("phase 0 :ch0",ch0);
+    //    print_shorts("phase 0 :ch1",ch1);
+    ch0[0] = _mm_adds_epi16(ch0[0],ch1[0]);
+    break;
+
+  case 1 :   // +1 -1
+    //    print_shorts("phase 1 :ch0",ch0);
+    //    print_shorts("phase 1 :ch1",ch1);
+    ch0[0] = _mm_subs_epi16(ch0[0],ch1[0]);
+    //    print_shorts("phase 1 :ch0-ch1",ch0);
+    break;
+
+  case 2 :   // +1 +j
+    ch1[0] = _mm_sign_epi16(ch1[0],*(__m128i*)&conjugate[0]);
+    ch1[0] = _mm_shufflelo_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1));
+    ch1[0] = _mm_shufflehi_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1));
+    ch0[0] = _mm_subs_epi16(ch0[0],ch1[0]);
+
+    break;   // +1 -j
+
+  case 3 :
+    ch1[0] = _mm_sign_epi16(ch1[0],*(__m128i*)&conjugate[0]);
+    ch1[0] = _mm_shufflelo_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1));
+    ch1[0] = _mm_shufflehi_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1));
+    ch0[0] = _mm_adds_epi16(ch0[0],ch1[0]);
+    break;
+  }
+
+  ch0[0] = _mm_mulhi_epi16(ch0[0],amp);
+  ch0[0] = _mm_slli_epi16(ch0[0],1);
+
+  _mm_empty();
+  _m_empty();
+}
+#elif defined(__arm__)
+void prec2A_TM56_128(unsigned char pmi,__m128i *ch0,__m128i *ch1) {
+
+  // sqrt(2) is already taken into account in computation sqrt_rho_a, sqrt_rho_b,
+  //so removed it
+
+  //__m128i amp;
+  //amp = _mm_set1_epi16(ONE_OVER_SQRT2_Q15);
+
+  switch (pmi) {
+
+  case 0 :   // +1 +1
+    //    print_shorts("phase 0 :ch0",ch0);
+    //    print_shorts("phase 0 :ch1",ch1);
+    ch0[0] = _mm_adds_epi16(ch0[0],ch1[0]);
+    break;
+  case 1 :   // +1 -1
+    //    print_shorts("phase 1 :ch0",ch0);
+    //    print_shorts("phase 1 :ch1",ch1);
+    ch0[0] = _mm_subs_epi16(ch0[0],ch1[0]);
+    //    print_shorts("phase 1 :ch0-ch1",ch0);
+    break;
+  case 2 :   // +1 +j
+    ch1[0] = _mm_sign_epi16(ch1[0],*(__m128i*)&conjugate[0]);
+    ch1[0] = _mm_shufflelo_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1));
+    ch1[0] = _mm_shufflehi_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1));
+    ch0[0] = _mm_subs_epi16(ch0[0],ch1[0]);
+
+    break;   // +1 -j
+  case 3 :
+    ch1[0] = _mm_sign_epi16(ch1[0],*(__m128i*)&conjugate[0]);
+    ch1[0] = _mm_shufflelo_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1));
+    ch1[0] = _mm_shufflehi_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1));
+    ch0[0] = _mm_adds_epi16(ch0[0],ch1[0]);
+    break;
+  }
+
+  //ch0[0] = _mm_mulhi_epi16(ch0[0],amp);
+  //ch0[0] = _mm_slli_epi16(ch0[0],1);
+
+  _mm_empty();
+  _m_empty();
+}
+#endif
+// precoding is stream 0 .5(1,1)  .5(1,-1) .5(1,1)  .5(1,-1)
+//              stream 1 .5(1,-1) .5(1,1)  .5(1,-1) .5(1,1)
+// store "precoded" channel for stream 0 in ch0, stream 1 in ch1
+
+short TM3_prec[8]__attribute__((aligned(16))) = {1,1,-1,-1,1,1,-1,-1} ;
+
+void prec2A_TM3_128(__m128i *ch0,__m128i *ch1) {
+
+  __m128i amp = _mm_set1_epi16(ONE_OVER_SQRT2_Q15);
+
+  __m128i tmp0,tmp1;
+
+  //_mm_mulhi_epi16
+  //  print_shorts("prec2A_TM3 ch0 (before):",ch0);
+  //  print_shorts("prec2A_TM3 ch1 (before):",ch1);
+
+  tmp0 = ch0[0];
+  tmp1  = _mm_sign_epi16(ch1[0],((__m128i*)&TM3_prec)[0]);
+  //  print_shorts("prec2A_TM3 ch1*s (mid):",(__m128i*)TM3_prec);
+
+  ch0[0] = _mm_adds_epi16(ch0[0],tmp1);
+  ch1[0] = _mm_subs_epi16(tmp0,tmp1);
+
+  ch0[0] = _mm_mulhi_epi16(ch0[0],amp);
+  ch0[0] = _mm_slli_epi16(ch0[0],1);
+
+  ch1[0] = _mm_mulhi_epi16(ch1[0],amp);
+  ch1[0] = _mm_slli_epi16(ch1[0],1);
+
+  //  print_shorts("prec2A_TM3 ch0 (mid):",&tmp0);
+  //  print_shorts("prec2A_TM3 ch1 (mid):",ch1);
+
+  //ch0[0] = _mm_mulhi_epi16(ch0[0],amp);
+  //ch0[0] = _mm_slli_epi16(ch0[0],1);
+  //ch1[0] = _mm_mulhi_epi16(ch1[0],amp);
+  //ch1[0] = _mm_slli_epi16(ch1[0],1);
+
+  //ch0[0] = _mm_srai_epi16(ch0[0],1);
+  //ch1[0] = _mm_srai_epi16(ch1[0],1);
+
+  //  print_shorts("prec2A_TM3 ch0 (after):",ch0);
+  //  print_shorts("prec2A_TM3 ch1 (after):",ch1);
+
+  _mm_empty();
+  _m_empty();
+}
+
+// pmi = 0 => stream 0 (1,1), stream 1 (1,-1)
+// pmi = 1 => stream 0 (1,j), stream 2 (1,-j)
+
+void prec2A_TM4_128(int pmi,__m128i *ch0,__m128i *ch1) {
+
+// sqrt(2) is already taken into account in computation sqrt_rho_a, sqrt_rho_b,
+//so divide by 2 is replaced by divide by sqrt(2).
+
+ // printf ("demod pmi=%d\n", pmi);
+ __m128i amp;
+ amp = _mm_set1_epi16(ONE_OVER_SQRT2_Q15);
+  __m128i tmp0,tmp1;
+
+ // print_shorts("prec2A_TM4 ch0 (before):",ch0);
+ // print_shorts("prec2A_TM4 ch1 (before):",ch1);
+
+  if (pmi == 0) { //[1 1;1 -1]
+    tmp0 = ch0[0];
+    tmp1 = ch1[0];
+    ch0[0] = _mm_adds_epi16(tmp0,tmp1);
+    ch1[0] = _mm_subs_epi16(tmp0,tmp1);
+  }
+  else { //ch0+j*ch1 ch0-j*ch1
+    tmp0 = ch0[0];
+    tmp1   = _mm_sign_epi16(ch1[0],*(__m128i*)&conjugate[0]);
+    tmp1   = _mm_shufflelo_epi16(tmp1,_MM_SHUFFLE(2,3,0,1));
+    tmp1   = _mm_shufflehi_epi16(tmp1,_MM_SHUFFLE(2,3,0,1));
+    ch0[0] = _mm_subs_epi16(tmp0,tmp1);
+    ch1[0] = _mm_add_epi16(tmp0,tmp1);
+  }
+
+  //print_shorts("prec2A_TM4 ch0 (middle):",ch0);
+  //print_shorts("prec2A_TM4 ch1 (middle):",ch1);
+
+  ch0[0] = _mm_mulhi_epi16(ch0[0],amp);
+  ch0[0] = _mm_slli_epi16(ch0[0],1);
+  ch1[0] = _mm_mulhi_epi16(ch1[0],amp);
+  ch1[0] = _mm_slli_epi16(ch1[0],1);
+
+
+ // ch0[0] = _mm_srai_epi16(ch0[0],1); //divide by 2
+ // ch1[0] = _mm_srai_epi16(ch1[0],1); //divide by 2
+  //print_shorts("prec2A_TM4 ch0 (end):",ch0);
+  //print_shorts("prec2A_TM4 ch1 (end):",ch1);
+  _mm_empty();
+  _m_empty();
+ // print_shorts("prec2A_TM4 ch0 (end):",ch0);
+  //print_shorts("prec2A_TM4 ch1 (end):",ch1);
+}
+
+void dlsch_channel_compensation_TM56(int **rxdataF_ext,
+                                     int **dl_ch_estimates_ext,
+                                     int **dl_ch_mag,
+                                     int **dl_ch_magb,
+                                     int **rxdataF_comp,
+                                     unsigned char *pmi_ext,
+                                     LTE_DL_FRAME_PARMS *frame_parms,
+                                     PHY_MEASUREMENTS *measurements,
+                                     int eNB_id,
+                                     unsigned char symbol,
+                                     unsigned char mod_order,
+                                     unsigned short nb_rb,
+                                     unsigned char output_shift,
+                                     unsigned char dl_power_off)
+{
+
+#if defined(__x86_64__) || defined(__i386__)
+
+  unsigned short rb,Nre;
+  __m128i *dl_ch0_128,*dl_ch1_128,*dl_ch_mag128,*dl_ch_mag128b,*rxdataF128,*rxdataF_comp128;
+  unsigned char aarx=0,symbol_mod,pilots=0;
+  int precoded_signal_strength=0;
+  __m128i mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3,QAM_amp128,QAM_amp128b;
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+  if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp)))
+    pilots=1;
+
+
+  //printf("comp prec: symbol %d, pilots %d\n",symbol, pilots);
+
+  if (mod_order == 4) {
+    QAM_amp128 = _mm_set1_epi16(QAM16_n1);
+    QAM_amp128b = _mm_setzero_si128();
+  } else if (mod_order == 6) {
+    QAM_amp128  = _mm_set1_epi16(QAM64_n1);
+    QAM_amp128b = _mm_set1_epi16(QAM64_n2);
+  }
+
+  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+
+    dl_ch0_128          = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch1_128          = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12];
+
+
+    dl_ch_mag128      = (__m128i *)&dl_ch_mag[aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch_mag128b     = (__m128i *)&dl_ch_magb[aarx][symbol*frame_parms->N_RB_DL*12];
+    rxdataF128        = (__m128i *)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+    rxdataF_comp128   = (__m128i *)&rxdataF_comp[aarx][symbol*frame_parms->N_RB_DL*12];
+
+
+    for (rb=0; rb<nb_rb; rb++) {
+      // combine TX channels using precoder from pmi
+#ifdef DEBUG_DLSCH_DEMOD
+      printf("mode 6 prec: rb %d, pmi->%d\n",rb,pmi_ext[rb]);
+#endif
+      prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128[0],&dl_ch1_128[0]);
+      prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128[1],&dl_ch1_128[1]);
+
+      if (pilots==0) {
+
+        prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128[2],&dl_ch1_128[2]);
+      }
+
+      if (mod_order>2) {
+        // get channel amplitude if not QPSK
+
+        mmtmpD0 = _mm_madd_epi16(dl_ch0_128[0],dl_ch0_128[0]);
+        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+
+        mmtmpD1 = _mm_madd_epi16(dl_ch0_128[1],dl_ch0_128[1]);
+        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+
+        mmtmpD0 = _mm_packs_epi32(mmtmpD0,mmtmpD1);
+
+        dl_ch_mag128[0] = _mm_unpacklo_epi16(mmtmpD0,mmtmpD0);
+        dl_ch_mag128b[0] = dl_ch_mag128[0];
+        dl_ch_mag128[0] = _mm_mulhi_epi16(dl_ch_mag128[0],QAM_amp128);
+        dl_ch_mag128[0] = _mm_slli_epi16(dl_ch_mag128[0],1);
+
+
+        //print_shorts("dl_ch_mag128[0]=",&dl_ch_mag128[0]);
+
+        //print_shorts("dl_ch_mag128[0]=",&dl_ch_mag128[0]);
+
+        dl_ch_mag128[1] = _mm_unpackhi_epi16(mmtmpD0,mmtmpD0);
+        dl_ch_mag128b[1] = dl_ch_mag128[1];
+        dl_ch_mag128[1] = _mm_mulhi_epi16(dl_ch_mag128[1],QAM_amp128);
+        dl_ch_mag128[1] = _mm_slli_epi16(dl_ch_mag128[1],1);
+
+        if (pilots==0) {
+          mmtmpD0 = _mm_madd_epi16(dl_ch0_128[2],dl_ch0_128[2]);
+          mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+
+          mmtmpD1 = _mm_packs_epi32(mmtmpD0,mmtmpD0);
+
+          dl_ch_mag128[2] = _mm_unpacklo_epi16(mmtmpD1,mmtmpD1);
+          dl_ch_mag128b[2] = dl_ch_mag128[2];
+
+          dl_ch_mag128[2] = _mm_mulhi_epi16(dl_ch_mag128[2],QAM_amp128);
+          dl_ch_mag128[2] = _mm_slli_epi16(dl_ch_mag128[2],1);
+        }
+
+        dl_ch_mag128b[0] = _mm_mulhi_epi16(dl_ch_mag128b[0],QAM_amp128b);
+        dl_ch_mag128b[0] = _mm_slli_epi16(dl_ch_mag128b[0],1);
+
+        //print_shorts("dl_ch_mag128b[0]=",&dl_ch_mag128b[0]);
+
+        dl_ch_mag128b[1] = _mm_mulhi_epi16(dl_ch_mag128b[1],QAM_amp128b);
+        dl_ch_mag128b[1] = _mm_slli_epi16(dl_ch_mag128b[1],1);
+
+        if (pilots==0) {
+          dl_ch_mag128b[2] = _mm_mulhi_epi16(dl_ch_mag128b[2],QAM_amp128b);
+          dl_ch_mag128b[2] = _mm_slli_epi16(dl_ch_mag128b[2],1);
+
+        }
+      }
+
+      // MF multiply by conjugated channel
+      mmtmpD0 = _mm_madd_epi16(dl_ch0_128[0],rxdataF128[0]);
+      //        print_ints("re",&mmtmpD0);
+
+      // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+      mmtmpD1 = _mm_shufflelo_epi16(dl_ch0_128[0],_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]);
+
+      //        print_ints("im",&mmtmpD1);
+      mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[0]);
+      // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+      mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+      //        print_ints("re(shift)",&mmtmpD0);
+      mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+      //        print_ints("im(shift)",&mmtmpD1);
+      mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+      mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+      //        print_ints("c0",&mmtmpD2);
+      //        print_ints("c1",&mmtmpD3);
+      rxdataF_comp128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+      //        print_shorts("rx:",rxdataF128);
+      //        print_shorts("ch:",dl_ch128);
+      //        print_shorts("pack:",rxdataF_comp128);
+
+      // multiply by conjugated channel
+      mmtmpD0 = _mm_madd_epi16(dl_ch0_128[1],rxdataF128[1]);
+      // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+      mmtmpD1 = _mm_shufflelo_epi16(dl_ch0_128[1],_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+      mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[1]);
+      // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+      mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+      mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+      mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+      mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+
+      rxdataF_comp128[1] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+      //  print_shorts("rx:",rxdataF128+1);
+      //  print_shorts("ch:",dl_ch128+1);
+      //  print_shorts("pack:",rxdataF_comp128+1);
+
+      if (pilots==0) {
+        // multiply by conjugated channel
+        mmtmpD0 = _mm_madd_epi16(dl_ch0_128[2],rxdataF128[2]);
+        // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+        mmtmpD1 = _mm_shufflelo_epi16(dl_ch0_128[2],_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+        mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[2]);
+        // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+        mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+        mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+
+        rxdataF_comp128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+        //  print_shorts("rx:",rxdataF128+2);
+        //  print_shorts("ch:",dl_ch128+2);
+        //        print_shorts("pack:",rxdataF_comp128+2);
+
+        dl_ch0_128+=3;
+        dl_ch1_128+=3;
+        dl_ch_mag128+=3;
+        dl_ch_mag128b+=3;
+        rxdataF128+=3;
+        rxdataF_comp128+=3;
+      } else {
+        dl_ch0_128+=2;
+        dl_ch1_128+=2;
+        dl_ch_mag128+=2;
+        dl_ch_mag128b+=2;
+        rxdataF128+=2;
+        rxdataF_comp128+=2;
+      }
+    }
+
+    Nre = (pilots==0) ? 12 : 8;
+
+    precoded_signal_strength += ((signal_energy_nodc(&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*Nre],
+                                                     (nb_rb*Nre))) - (measurements->n0_power[aarx]));
+  } // rx_antennas
+
+  measurements->precoded_cqi_dB[eNB_id][0] = dB_fixed2(precoded_signal_strength,measurements->n0_power_tot);
+
+  //printf("eNB_id %d, symbol %d: precoded CQI %d dB\n",eNB_id,symbol,
+  //   measurements->precoded_cqi_dB[eNB_id][0]);
+
+#elif defined(__arm__)
+
+  uint32_t rb,Nre;
+  uint32_t aarx,symbol_mod,pilots=0;
+
+  int16x4_t *dl_ch0_128,*dl_ch1_128,*rxdataF128;
+  int16x8_t *dl_ch0_128b,*dl_ch1_128b;
+  int32x4_t mmtmpD0,mmtmpD1,mmtmpD0b,mmtmpD1b;
+  int16x8_t *dl_ch_mag128,*dl_ch_mag128b,mmtmpD2,mmtmpD3,mmtmpD4,*rxdataF_comp128;
+  int16x8_t QAM_amp128,QAM_amp128b;
+
+  int16_t conj[4]__attribute__((aligned(16))) = {1,-1,1,-1};
+  int32x4_t output_shift128 = vmovq_n_s32(-(int32_t)output_shift);
+  int32_t precoded_signal_strength=0;
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+  if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) {
+    if (frame_parms->nb_antenna_ports_eNB==1) // 10 out of 12 so don't reduce size
+      { nb_rb=1+(5*nb_rb/6); }
+
+    else
+      { pilots=1; }
+  }
+
+
+  if (mod_order == 4) {
+    QAM_amp128  = vmovq_n_s16(QAM16_n1);  // 2/sqrt(10)
+    QAM_amp128b = vmovq_n_s16(0);
+
+  } else if (mod_order == 6) {
+    QAM_amp128  = vmovq_n_s16(QAM64_n1); //
+    QAM_amp128b = vmovq_n_s16(QAM64_n2);
+  }
+
+  //    printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol);
+
+  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+
+
+
+    dl_ch0_128          = (int16x4_t*)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch1_128          = (int16x4_t*)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch0_128b         = (int16x8_t*)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch1_128b         = (int16x8_t*)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch_mag128        = (int16x8_t*)&dl_ch_mag[aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch_mag128b       = (int16x8_t*)&dl_ch_magb[aarx][symbol*frame_parms->N_RB_DL*12];
+    rxdataF128          = (int16x4_t*)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+    rxdataF_comp128     = (int16x8_t*)&rxdataF_comp[aarx][symbol*frame_parms->N_RB_DL*12];
+
+    for (rb=0; rb<nb_rb; rb++) {
+#ifdef DEBUG_DLSCH_DEMOD
+      printf("mode 6 prec: rb %d, pmi->%d\n",rb,pmi_ext[rb]);
+#endif
+      prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128b[0],&dl_ch1_128b[0]);
+      prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128b[1],&dl_ch1_128b[1]);
+
+      if (pilots==0) {
+        prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128b[2],&dl_ch1_128b[2]);
+      }
+
+      if (mod_order>2) {
+        // get channel amplitude if not QPSK
+        mmtmpD0 = vmull_s16(dl_ch0_128[0], dl_ch0_128[0]);
+        // mmtmpD0 = [ch0*ch0,ch1*ch1,ch2*ch2,ch3*ch3];
+        mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128);
+        // mmtmpD0 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3]>>output_shift128 on 32-bits
+        mmtmpD1 = vmull_s16(dl_ch0_128[1], dl_ch0_128[1]);
+        mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128);
+        mmtmpD2 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+        // mmtmpD2 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3,ch4*ch4 + ch5*ch5,ch4*ch4 + ch5*ch5,ch6*ch6 + ch7*ch7,ch6*ch6 + ch7*ch7]>>output_shift128 on 16-bits
+        mmtmpD0 = vmull_s16(dl_ch0_128[2], dl_ch0_128[2]);
+        mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128);
+        mmtmpD1 = vmull_s16(dl_ch0_128[3], dl_ch0_128[3]);
+        mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128);
+        mmtmpD3 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+        if (pilots==0) {
+          mmtmpD0 = vmull_s16(dl_ch0_128[4], dl_ch0_128[4]);
+          mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128);
+          mmtmpD1 = vmull_s16(dl_ch0_128[5], dl_ch0_128[5]);
+          mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128);
+          mmtmpD4 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+
+        }
+
+        dl_ch_mag128b[0] = vqdmulhq_s16(mmtmpD2,QAM_amp128b);
+        dl_ch_mag128b[1] = vqdmulhq_s16(mmtmpD3,QAM_amp128b);
+        dl_ch_mag128[0] = vqdmulhq_s16(mmtmpD2,QAM_amp128);
+        dl_ch_mag128[1] = vqdmulhq_s16(mmtmpD3,QAM_amp128);
+
+
+        if (pilots==0) {
+          dl_ch_mag128b[2] = vqdmulhq_s16(mmtmpD4,QAM_amp128b);
+          dl_ch_mag128[2]  = vqdmulhq_s16(mmtmpD4,QAM_amp128);
+        }
+      }
+      mmtmpD0 = vmull_s16(dl_ch0_128[0], rxdataF128[0]);
+      //mmtmpD0 = [Re(ch[0])Re(rx[0]) Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1]) Im(ch[1])Im(ch[1])]
+      mmtmpD1 = vmull_s16(dl_ch0_128[1], rxdataF128[1]);
+      //mmtmpD1 = [Re(ch[2])Re(rx[2]) Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3]) Im(ch[3])Im(ch[3])]
+      mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
+                             vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
+      //mmtmpD0 = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])]
+
+      mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[0],*(int16x4_t*)conj)), rxdataF128[0]);
+      //mmtmpD0 = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])]
+      mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[1],*(int16x4_t*)conj)), rxdataF128[1]);
+      //mmtmpD0 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])]
+      mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
+                             vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
+      //mmtmpD1 = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])]
+
+      mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
+      mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
+      rxdataF_comp128[0] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+      mmtmpD0 = vmull_s16(dl_ch0_128[2], rxdataF128[2]);
+      mmtmpD1 = vmull_s16(dl_ch0_128[3], rxdataF128[3]);
+      mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
+                             vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
+
+      mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[2],*(int16x4_t*)conj)), rxdataF128[2]);
+      mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[3],*(int16x4_t*)conj)), rxdataF128[3]);
+      mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
+                             vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
+
+      mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
+      mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
+      rxdataF_comp128[1] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+      if (pilots==0) {
+        mmtmpD0 = vmull_s16(dl_ch0_128[4], rxdataF128[4]);
+        mmtmpD1 = vmull_s16(dl_ch0_128[5], rxdataF128[5]);
+        mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
+                               vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
+
+        mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[4],*(int16x4_t*)conj)), rxdataF128[4]);
+        mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[5],*(int16x4_t*)conj)), rxdataF128[5]);
+        mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
+                               vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
+
+
+        mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
+        mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
+        rxdataF_comp128[2] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+
+        dl_ch0_128+=6;
+        dl_ch1_128+=6;
+        dl_ch_mag128+=3;
+        dl_ch_mag128b+=3;
+        rxdataF128+=6;
+        rxdataF_comp128+=3;
+
+      } else { // we have a smaller PDSCH in symbols with pilots so skip last group of 4 REs and increment less
+        dl_ch0_128+=4;
+        dl_ch1_128+=4;
+        dl_ch_mag128+=2;
+        dl_ch_mag128b+=2;
+        rxdataF128+=4;
+        rxdataF_comp128+=2;
+      }
+    }
+
+    Nre = (pilots==0) ? 12 : 8;
+
+
+    precoded_signal_strength += ((signal_energy_nodc(&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*Nre],
+
+                                                     (nb_rb*Nre))) - (measurements->n0_power[aarx]));
+    // rx_antennas
+  }
+  measurements->precoded_cqi_dB[eNB_id][0] = dB_fixed2(precoded_signal_strength,measurements->n0_power_tot);
+
+  //printf("eNB_id %d, symbol %d: precoded CQI %d dB\n",eNB_id,symbol,
+  //     measurements->precoded_cqi_dB[eNB_id][0]);
+
+#endif
+  _mm_empty();
+  _m_empty();
+}
+
+void dlsch_channel_compensation_TM34(LTE_DL_FRAME_PARMS *frame_parms,
+                                    LTE_UE_PDSCH *pdsch_vars,
+                                    PHY_MEASUREMENTS *measurements,
+                                    int eNB_id,
+                                    unsigned char symbol,
+                                    unsigned char mod_order0,
+                                    unsigned char mod_order1,
+                                    int harq_pid,
+                                    int round,
+                                    MIMO_mode_t mimo_mode,
+                                    unsigned short nb_rb,
+                                    unsigned char output_shift0,
+                                    unsigned char output_shift1) {
+
+#if defined(__x86_64__) || defined(__i386__)
+
+  unsigned short rb,Nre;
+  __m128i *dl_ch0_128,*dl_ch1_128,*dl_ch_mag0_128,*dl_ch_mag1_128,*dl_ch_mag0_128b,*dl_ch_mag1_128b,*rxdataF128,*rxdataF_comp0_128,*rxdataF_comp1_128;
+  unsigned char aarx=0,symbol_mod,pilots=0;
+  int precoded_signal_strength0=0,precoded_signal_strength1=0;
+  int rx_power_correction;
+
+  int **rxdataF_ext           = pdsch_vars->rxdataF_ext;
+  int **dl_ch_estimates_ext   = pdsch_vars->dl_ch_estimates_ext;
+  int **dl_ch_mag0            = pdsch_vars->dl_ch_mag0;
+  int **dl_ch_mag1            = pdsch_vars->dl_ch_mag1[harq_pid][round];
+  int **dl_ch_magb0           = pdsch_vars->dl_ch_magb0;
+  int **dl_ch_magb1           = pdsch_vars->dl_ch_magb1[harq_pid][round];
+  int **rxdataF_comp0         = pdsch_vars->rxdataF_comp0;
+  int **rxdataF_comp1         = pdsch_vars->rxdataF_comp1[harq_pid][round];
+  unsigned char *pmi_ext      = pdsch_vars->pmi_ext;
+  __m128i mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3,QAM_amp0_128,QAM_amp0_128b,QAM_amp1_128,QAM_amp1_128b;
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+  if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp)))
+    pilots=1;
+
+  rx_power_correction = 1;
+
+ // printf("comp prec: symbol %d, pilots %d\n",symbol, pilots);
+
+  if (mod_order0 == 4) {
+    QAM_amp0_128  = _mm_set1_epi16(QAM16_n1);
+    QAM_amp0_128b = _mm_setzero_si128();
+  } else if (mod_order0 == 6) {
+    QAM_amp0_128  = _mm_set1_epi16(QAM64_n1);
+    QAM_amp0_128b = _mm_set1_epi16(QAM64_n2);
+  }
+
+  if (mod_order1 == 4) {
+    QAM_amp1_128  = _mm_set1_epi16(QAM16_n1);
+    QAM_amp1_128b = _mm_setzero_si128();
+  } else if (mod_order1 == 6) {
+    QAM_amp1_128  = _mm_set1_epi16(QAM64_n1);
+    QAM_amp1_128b = _mm_set1_epi16(QAM64_n2);
+  }
+
+  for (aarx=0;aarx<frame_parms->nb_antennas_rx;aarx++) {
+
+   /* if (aarx==0) {
+      output_shift=output_shift0;
+    }
+      else {
+        output_shift=output_shift1;
+      } */
+
+     // printf("antenna %d\n", aarx);
+   // printf("symbol %d, rx antenna %d\n", symbol, aarx);
+
+    dl_ch0_128          = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; // this is h11
+    dl_ch1_128          = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; // this is h12
+
+
+    dl_ch_mag0_128      = (__m128i *)&dl_ch_mag0[aarx][symbol*frame_parms->N_RB_DL*12]; //responsible for x1
+    dl_ch_mag0_128b     = (__m128i *)&dl_ch_magb0[aarx][symbol*frame_parms->N_RB_DL*12];//responsible for x1
+    dl_ch_mag1_128      = (__m128i *)&dl_ch_mag1[aarx][symbol*frame_parms->N_RB_DL*12];   //responsible for x2. always coming from tx2
+    dl_ch_mag1_128b     = (__m128i *)&dl_ch_magb1[aarx][symbol*frame_parms->N_RB_DL*12];  //responsible for x2. always coming from tx2
+    rxdataF128          = (__m128i *)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12]; //received signal on antenna of interest h11*x1+h12*x2
+    rxdataF_comp0_128   = (__m128i *)&rxdataF_comp0[aarx][symbol*frame_parms->N_RB_DL*12]; //result of multipl with MF x1 on antenna of interest
+    rxdataF_comp1_128   = (__m128i *)&rxdataF_comp1[aarx][symbol*frame_parms->N_RB_DL*12]; //result of multipl with MF x2 on antenna of interest
+
+    for (rb=0; rb<nb_rb; rb++) {
+
+      // combine TX channels using precoder from pmi
+      if (mimo_mode==LARGE_CDD) {
+        prec2A_TM3_128(&dl_ch0_128[0],&dl_ch1_128[0]);
+        prec2A_TM3_128(&dl_ch0_128[1],&dl_ch1_128[1]);
+
+
+        if (pilots==0) {
+          prec2A_TM3_128(&dl_ch0_128[2],&dl_ch1_128[2]);
+        }
+      }
+      else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) {
+        prec2A_TM4_128(0,&dl_ch0_128[0],&dl_ch1_128[0]);
+        prec2A_TM4_128(0,&dl_ch0_128[1],&dl_ch1_128[1]);
+
+        if (pilots==0) {
+          prec2A_TM4_128(0,&dl_ch0_128[2],&dl_ch1_128[2]);
+        }
+      }
+      else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) {
+        prec2A_TM4_128(1,&dl_ch0_128[0],&dl_ch1_128[0]);
+        prec2A_TM4_128(1,&dl_ch0_128[1],&dl_ch1_128[1]);
+
+        if (pilots==0) {
+          prec2A_TM4_128(1,&dl_ch0_128[2],&dl_ch1_128[2]);
+        }
+      }
+
+        else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING) {
+        prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128[0],&dl_ch1_128[0]);
+        prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128[1],&dl_ch1_128[1]);
+
+        if (pilots==0) {
+          prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128[2],&dl_ch1_128[2]);
+        }
+      }
+
+
+      else {
+        LOG_E(PHY,"Unknown MIMO mode\n");
+        return;
+      }
+
+
+      if (mod_order0>2) {
+        // get channel amplitude if not QPSK
+
+        mmtmpD0 = _mm_madd_epi16(dl_ch0_128[0],dl_ch0_128[0]);
+        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift0);
+
+        mmtmpD1 = _mm_madd_epi16(dl_ch0_128[1],dl_ch0_128[1]);
+        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift0);
+
+        mmtmpD0 = _mm_packs_epi32(mmtmpD0,mmtmpD1);
+
+        dl_ch_mag0_128[0] = _mm_unpacklo_epi16(mmtmpD0,mmtmpD0);
+        dl_ch_mag0_128b[0] = dl_ch_mag0_128[0];
+        dl_ch_mag0_128[0] = _mm_mulhi_epi16(dl_ch_mag0_128[0],QAM_amp0_128);
+        dl_ch_mag0_128[0] = _mm_slli_epi16(dl_ch_mag0_128[0],1);
+
+        //  print_shorts("dl_ch_mag0_128[0]=",&dl_ch_mag0_128[0]);
+
+
+        dl_ch_mag0_128[1] = _mm_unpackhi_epi16(mmtmpD0,mmtmpD0);
+        dl_ch_mag0_128b[1] = dl_ch_mag0_128[1];
+        dl_ch_mag0_128[1] = _mm_mulhi_epi16(dl_ch_mag0_128[1],QAM_amp0_128);
+        dl_ch_mag0_128[1] = _mm_slli_epi16(dl_ch_mag0_128[1],1);
+
+        if (pilots==0) {
+          mmtmpD0 = _mm_madd_epi16(dl_ch0_128[2],dl_ch0_128[2]);
+          mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift0);
+
+          mmtmpD1 = _mm_packs_epi32(mmtmpD0,mmtmpD0);
+
+          dl_ch_mag0_128[2] = _mm_unpacklo_epi16(mmtmpD1,mmtmpD1);
+          dl_ch_mag0_128b[2] = dl_ch_mag0_128[2];
+
+          dl_ch_mag0_128[2] = _mm_mulhi_epi16(dl_ch_mag0_128[2],QAM_amp0_128);
+          dl_ch_mag0_128[2] = _mm_slli_epi16(dl_ch_mag0_128[2],1);
+        }
+
+        dl_ch_mag0_128b[0] = _mm_mulhi_epi16(dl_ch_mag0_128b[0],QAM_amp0_128b);
+        dl_ch_mag0_128b[0] = _mm_slli_epi16(dl_ch_mag0_128b[0],1);
+
+       // print_shorts("dl_ch_mag0_128b[0]=",&dl_ch_mag0_128b[0]);
+
+        dl_ch_mag0_128b[1] = _mm_mulhi_epi16(dl_ch_mag0_128b[1],QAM_amp0_128b);
+        dl_ch_mag0_128b[1] = _mm_slli_epi16(dl_ch_mag0_128b[1],1);
+
+        if (pilots==0) {
+          dl_ch_mag0_128b[2] = _mm_mulhi_epi16(dl_ch_mag0_128b[2],QAM_amp0_128b);
+          dl_ch_mag0_128b[2] = _mm_slli_epi16(dl_ch_mag0_128b[2],1);
+        }
+      }
+
+      if (mod_order1>2) {
+        // get channel amplitude if not QPSK
+
+        mmtmpD0 = _mm_madd_epi16(dl_ch1_128[0],dl_ch1_128[0]);
+        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift1);
+
+        mmtmpD1 = _mm_madd_epi16(dl_ch1_128[1],dl_ch1_128[1]);
+        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift1);
+
+        mmtmpD0 = _mm_packs_epi32(mmtmpD0,mmtmpD1);
+
+        dl_ch_mag1_128[0] = _mm_unpacklo_epi16(mmtmpD0,mmtmpD0);
+        dl_ch_mag1_128b[0] = dl_ch_mag1_128[0];
+        dl_ch_mag1_128[0] = _mm_mulhi_epi16(dl_ch_mag1_128[0],QAM_amp1_128);
+        dl_ch_mag1_128[0] = _mm_slli_epi16(dl_ch_mag1_128[0],1);
+
+       // print_shorts("dl_ch_mag1_128[0]=",&dl_ch_mag1_128[0]);
+
+        dl_ch_mag1_128[1] = _mm_unpackhi_epi16(mmtmpD0,mmtmpD0);
+        dl_ch_mag1_128b[1] = dl_ch_mag1_128[1];
+        dl_ch_mag1_128[1] = _mm_mulhi_epi16(dl_ch_mag1_128[1],QAM_amp1_128);
+        dl_ch_mag1_128[1] = _mm_slli_epi16(dl_ch_mag1_128[1],1);
+
+        if (pilots==0) {
+          mmtmpD0 = _mm_madd_epi16(dl_ch1_128[2],dl_ch1_128[2]);
+          mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift1);
+
+          mmtmpD1 = _mm_packs_epi32(mmtmpD0,mmtmpD0);
+
+          dl_ch_mag1_128[2] = _mm_unpacklo_epi16(mmtmpD1,mmtmpD1);
+          dl_ch_mag1_128b[2] = dl_ch_mag1_128[2];
+
+          dl_ch_mag1_128[2] = _mm_mulhi_epi16(dl_ch_mag1_128[2],QAM_amp1_128);
+          dl_ch_mag1_128[2] = _mm_slli_epi16(dl_ch_mag1_128[2],1);
+        }
+
+        dl_ch_mag1_128b[0] = _mm_mulhi_epi16(dl_ch_mag1_128b[0],QAM_amp1_128b);
+        dl_ch_mag1_128b[0] = _mm_slli_epi16(dl_ch_mag1_128b[0],1);
+
+       // print_shorts("dl_ch_mag1_128b[0]=",&dl_ch_mag1_128b[0]);
+
+        dl_ch_mag1_128b[1] = _mm_mulhi_epi16(dl_ch_mag1_128b[1],QAM_amp1_128b);
+        dl_ch_mag1_128b[1] = _mm_slli_epi16(dl_ch_mag1_128b[1],1);
+
+        if (pilots==0) {
+          dl_ch_mag1_128b[2] = _mm_mulhi_epi16(dl_ch_mag1_128b[2],QAM_amp1_128b);
+          dl_ch_mag1_128b[2] = _mm_slli_epi16(dl_ch_mag1_128b[2],1);
+        }
+      }
+
+      // layer 0
+      // MF multiply by conjugated channel
+      mmtmpD0 = _mm_madd_epi16(dl_ch0_128[0],rxdataF128[0]);
+    //  print_ints("re",&mmtmpD0);
+
+      // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+      mmtmpD1 = _mm_shufflelo_epi16(dl_ch0_128[0],_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]);
+      mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[0]);
+           // print_ints("im",&mmtmpD1);
+      // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+      mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift0);
+           // printf("Shift: %d\n",output_shift);
+          // print_ints("re(shift)",&mmtmpD0);
+      mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift0);
+           // print_ints("im(shift)",&mmtmpD1);
+      mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+      mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+          //  print_ints("c0",&mmtmpD2);
+          // print_ints("c1",&mmtmpD3);
+      rxdataF_comp0_128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+
+           // print_shorts("rx:",rxdataF128);
+           // print_shorts("ch:",dl_ch0_128);
+        // print_shorts("pack:",rxdataF_comp0_128);
+
+      // multiply by conjugated channel
+      mmtmpD0 = _mm_madd_epi16(dl_ch0_128[1],rxdataF128[1]);
+      // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+      mmtmpD1 = _mm_shufflelo_epi16(dl_ch0_128[1],_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+      mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[1]);
+      // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+      mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift0);
+      mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift0);
+      mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+      mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+
+      rxdataF_comp0_128[1] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+           //  print_shorts("rx:",rxdataF128+1);
+            //  print_shorts("ch:",dl_ch0_128+1);
+            // print_shorts("pack:",rxdataF_comp0_128+1);
+
+      if (pilots==0) {
+        // multiply by conjugated channel
+        mmtmpD0 = _mm_madd_epi16(dl_ch0_128[2],rxdataF128[2]);
+        // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+        mmtmpD1 = _mm_shufflelo_epi16(dl_ch0_128[2],_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+        mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[2]);
+        // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift0);
+        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift0);
+        mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+        mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+
+        rxdataF_comp0_128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+           //   print_shorts("rx:",rxdataF128+2);
+           //   print_shorts("ch:",dl_ch0_128+2);
+            //  print_shorts("pack:",rxdataF_comp0_128+2);
+
+      }
+
+
+      // layer 1
+      // MF multiply by conjugated channel
+      mmtmpD0 = _mm_madd_epi16(dl_ch1_128[0],rxdataF128[0]);
+           //  print_ints("re",&mmtmpD0);
+
+     // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+      mmtmpD1 = _mm_shufflelo_epi16(dl_ch1_128[0],_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]);
+            //  print_ints("im",&mmtmpD1);
+      mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[0]);
+      // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+      mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift1);
+             // print_ints("re(shift)",&mmtmpD0);
+      mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift1);
+             // print_ints("im(shift)",&mmtmpD1);
+      mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+      mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+             // print_ints("c0",&mmtmpD2);
+             // print_ints("c1",&mmtmpD3);
+      rxdataF_comp1_128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+            // print_shorts("rx:",rxdataF128);
+            //  print_shorts("ch:",dl_ch1_128);
+            // print_shorts("pack:",rxdataF_comp1_128);
+
+     // multiply by conjugated channel
+      mmtmpD0 = _mm_madd_epi16(dl_ch1_128[1],rxdataF128[1]);
+      // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+      mmtmpD1 = _mm_shufflelo_epi16(dl_ch1_128[1],_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+      mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[1]);
+      // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+      mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift1);
+      mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift1);
+      mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+      mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+
+      rxdataF_comp1_128[1] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+            //  print_shorts("rx:",rxdataF128+1);
+           // print_shorts("ch:",dl_ch1_128+1);
+            // print_shorts("pack:",rxdataF_comp1_128+1);
+
+      if (pilots==0) {
+        // multiply by conjugated channel
+        mmtmpD0 = _mm_madd_epi16(dl_ch1_128[2],rxdataF128[2]);
+        // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+        mmtmpD1 = _mm_shufflelo_epi16(dl_ch1_128[2],_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+        mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[2]);
+        // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift1);
+        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift1);
+        mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+        mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+
+        rxdataF_comp1_128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+          //   print_shorts("rx:",rxdataF128+2);
+           //  print_shorts("ch:",dl_ch1_128+2);
+             //         print_shorts("pack:",rxdataF_comp1_128+2);
+
+        dl_ch0_128+=3;
+        dl_ch1_128+=3;
+        dl_ch_mag0_128+=3;
+        dl_ch_mag1_128+=3;
+        dl_ch_mag0_128b+=3;
+        dl_ch_mag1_128b+=3;
+        rxdataF128+=3;
+        rxdataF_comp0_128+=3;
+        rxdataF_comp1_128+=3;
+      }
+      else {
+        dl_ch0_128+=2;
+        dl_ch1_128+=2;
+        dl_ch_mag0_128+=2;
+        dl_ch_mag1_128+=2;
+        dl_ch_mag0_128b+=2;
+        dl_ch_mag1_128b+=2;
+        rxdataF128+=2;
+        rxdataF_comp0_128+=2;
+        rxdataF_comp1_128+=2;
+      }
+
+    } // rb loop
+    Nre = (pilots==0) ? 12 : 8;
+
+    precoded_signal_strength0 += ((signal_energy_nodc(&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*Nre],
+                                                        (nb_rb*Nre))*rx_power_correction) - (measurements->n0_power[aarx]));
+
+    precoded_signal_strength1 += ((signal_energy_nodc(&dl_ch_estimates_ext[aarx+2][symbol*frame_parms->N_RB_DL*Nre],
+                                                        (nb_rb*Nre))*rx_power_correction) - (measurements->n0_power[aarx]));
+  } // rx_antennas
+
+  measurements->precoded_cqi_dB[eNB_id][0] = dB_fixed2(precoded_signal_strength0,measurements->n0_power_tot);
+  measurements->precoded_cqi_dB[eNB_id][1] = dB_fixed2(precoded_signal_strength1,measurements->n0_power_tot);
+
+ // printf("eNB_id %d, symbol %d: precoded CQI %d dB\n",eNB_id,symbol,
+     //  measurements->precoded_cqi_dB[eNB_id][0]);
+
+  _mm_empty();
+  _m_empty();
+
+  #elif defined(__arm__)
+
+  unsigned short rb,Nre;
+  unsigned char aarx,symbol_mod,pilots=0;
+  int precoded_signal_strength0=0,precoded_signal_strength1=0, rx_power_correction;
+  int16x4_t *dl_ch0_128,*rxdataF128;
+  int16x4_t *dl_ch1_128;
+  int16x8_t *dl_ch0_128b,*dl_ch1_128b;
+
+  int32x4_t mmtmpD0,mmtmpD1,mmtmpD0b,mmtmpD1b;
+  int16x8_t *dl_ch_mag0_128,*dl_ch_mag0_128b,*dl_ch_mag1_128,*dl_ch_mag1_128b,mmtmpD2,mmtmpD3,mmtmpD4,*rxdataF_comp0_128,*rxdataF_comp1_128;
+  int16x8_t QAM_amp0_128,QAM_amp0_128b,QAM_amp1_128,QAM_amp1_128b;
+  int32x4_t output_shift128 = vmovq_n_s32(-(int32_t)output_shift);
+
+  int **rxdataF_ext           = pdsch_vars->rxdataF_ext;
+  int **dl_ch_estimates_ext   = pdsch_vars->dl_ch_estimates_ext;
+  int **dl_ch_mag0            = pdsch_vars->dl_ch_mag0;
+  int **dl_ch_mag1            = pdsch_vars->dl_ch_mag1[harq_pid][round];
+  int **dl_ch_magb0           = pdsch_vars->dl_ch_magb0;
+  int **dl_ch_magb1           = pdsch_vars->dl_ch_magb1[harq_pid][round];
+  int **rxdataF_comp0         = pdsch_vars->rxdataF_comp0;
+  int **rxdataF_comp1         = pdsch_vars->rxdataF_comp1[harq_pid][round];
+
+  int16_t conj[4]__attribute__((aligned(16))) = {1,-1,1,-1};
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+  if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) {
+    if (frame_parms->nb_antenna_ports_eNB==1) // 10 out of 12 so don't reduce size
+      { nb_rb=1+(5*nb_rb/6); }
+
+    else
+      { pilots=1; }
+  }
+
+  rx_power_correction=1;
+
+  if (mod_order0 == 4) {
+    QAM_amp0_128  = vmovq_n_s16(QAM16_n1);  // 2/sqrt(10)
+    QAM_amp0_128b = vmovq_n_s16(0);
+
+  } else if (mod_order0 == 6) {
+    QAM_amp0_128  = vmovq_n_s16(QAM64_n1); //
+    QAM_amp0_128b = vmovq_n_s16(QAM64_n2);
+  }
+
+  if (mod_order1 == 4) {
+    QAM_amp1_128  = vmovq_n_s16(QAM16_n1);  // 2/sqrt(10)
+    QAM_amp1_128b = vmovq_n_s16(0);
+
+  } else if (mod_order1 == 6) {
+    QAM_amp1_128  = vmovq_n_s16(QAM64_n1); //
+    QAM_amp1_128b = vmovq_n_s16(QAM64_n2);
+  }
+
+  //    printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol);
+
+  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+
+
+
+    dl_ch0_128          = (int16x4_t*)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch1_128          = (int16x4_t*)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch0_128b          = (int16x8_t*)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch1_128b          = (int16x8_t*)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch_mag0_128      = (int16x8_t*)&dl_ch_mag0[aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch_mag0_128b     = (int16x8_t*)&dl_ch_magb0[aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch_mag1_128      = (int16x8_t*)&dl_ch_mag1[aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch_mag1_128b     = (int16x8_t*)&dl_ch_magb1[aarx][symbol*frame_parms->N_RB_DL*12];
+    rxdataF128          = (int16x4_t*)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+    rxdataF_comp0_128   = (int16x8_t*)&rxdataF_comp0[aarx][symbol*frame_parms->N_RB_DL*12];
+    rxdataF_comp1_128   = (int16x8_t*)&rxdataF_comp1[aarx][symbol*frame_parms->N_RB_DL*12];
+
+    for (rb=0; rb<nb_rb; rb++) {
+      // combine TX channels using precoder from pmi
+      if (mimo_mode==LARGE_CDD) {
+        prec2A_TM3_128(&dl_ch0_128[0],&dl_ch1_128[0]);
+        prec2A_TM3_128(&dl_ch0_128[1],&dl_ch1_128[1]);
+
+
+        if (pilots==0) {
+          prec2A_TM3_128(&dl_ch0_128[2],&dl_ch1_128[2]);
+        }
+      }
+      else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) {
+        prec2A_TM4_128(0,&dl_ch0_128[0],&dl_ch1_128[0]);
+        prec2A_TM4_128(0,&dl_ch0_128[1],&dl_ch1_128[1]);
+
+        if (pilots==0) {
+          prec2A_TM4_128(0,&dl_ch0_128[2],&dl_ch1_128[2]);
+        }
+      }
+      else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) {
+        prec2A_TM4_128(1,&dl_ch0_128[0],&dl_ch1_128[0]);
+        prec2A_TM4_128(1,&dl_ch0_128[1],&dl_ch1_128[1]);
+
+        if (pilots==0) {
+          prec2A_TM4_128(1,&dl_ch0_128[2],&dl_ch1_128[2]);
+        }
+      }
+      else {
+        LOG_E(PHY,"Unknown MIMO mode\n");
+        return;
+      }
+
+
+      if (mod_order0>2) {
+        // get channel amplitude if not QPSK
+        mmtmpD0 = vmull_s16(dl_ch0_128[0], dl_ch0_128[0]);
+        // mmtmpD0 = [ch0*ch0,ch1*ch1,ch2*ch2,ch3*ch3];
+        mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128);
+        // mmtmpD0 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3]>>output_shift128 on 32-bits
+        mmtmpD1 = vmull_s16(dl_ch0_128[1], dl_ch0_128[1]);
+        mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128);
+        mmtmpD2 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+        // mmtmpD2 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3,ch4*ch4 + ch5*ch5,ch4*ch4 + ch5*ch5,ch6*ch6 + ch7*ch7,ch6*ch6 + ch7*ch7]>>output_shift128 on 16-bits
+        mmtmpD0 = vmull_s16(dl_ch0_128[2], dl_ch0_128[2]);
+        mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128);
+        mmtmpD1 = vmull_s16(dl_ch0_128[3], dl_ch0_128[3]);
+        mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128);
+        mmtmpD3 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+        if (pilots==0) {
+          mmtmpD0 = vmull_s16(dl_ch0_128[4], dl_ch0_128[4]);
+          mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128);
+          mmtmpD1 = vmull_s16(dl_ch0_128[5], dl_ch0_128[5]);
+          mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128);
+          mmtmpD4 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+
+        }
+
+        dl_ch_mag0_128b[0] = vqdmulhq_s16(mmtmpD2,QAM_amp0_128b);
+        dl_ch_mag0_128b[1] = vqdmulhq_s16(mmtmpD3,QAM_amp0_128b);
+        dl_ch_mag0_128[0] = vqdmulhq_s16(mmtmpD2,QAM_amp0_128);
+        dl_ch_mag0_128[1] = vqdmulhq_s16(mmtmpD3,QAM_amp0_128);
+
+
+        if (pilots==0) {
+          dl_ch_mag0_128b[2] = vqdmulhq_s16(mmtmpD4,QAM_amp0_128b);
+          dl_ch_mag0_128[2]  = vqdmulhq_s16(mmtmpD4,QAM_amp0_128);
+        }
+      }
+
+      if (mod_order1>2) {
+        // get channel amplitude if not QPSK
+        mmtmpD0 = vmull_s16(dl_ch1_128[0], dl_ch1_128[0]);
+        // mmtmpD0 = [ch0*ch0,ch1*ch1,ch2*ch2,ch3*ch3];
+        mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128);
+        // mmtmpD0 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3]>>output_shift128 on 32-bits
+        mmtmpD1 = vmull_s16(dl_ch1_128[1], dl_ch1_128[1]);
+        mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128);
+        mmtmpD2 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+        // mmtmpD2 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3,ch4*ch4 + ch5*ch5,ch4*ch4 + ch5*ch5,ch6*ch6 + ch7*ch7,ch6*ch6 + ch7*ch7]>>output_shift128 on 16-bits
+        mmtmpD0 = vmull_s16(dl_ch1_128[2], dl_ch1_128[2]);
+        mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128);
+        mmtmpD1 = vmull_s16(dl_ch1_128[3], dl_ch1_128[3]);
+        mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128);
+        mmtmpD3 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+        if (pilots==0) {
+          mmtmpD0 = vmull_s16(dl_ch1_128[4], dl_ch1_128[4]);
+          mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128);
+          mmtmpD1 = vmull_s16(dl_ch1_128[5], dl_ch1_128[5]);
+          mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128);
+          mmtmpD4 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+
+        }
+
+        dl_ch_mag1_128b[0] = vqdmulhq_s16(mmtmpD2,QAM_amp1_128b);
+        dl_ch_mag1_128b[1] = vqdmulhq_s16(mmtmpD3,QAM_amp1_128b);
+        dl_ch_mag1_128[0] = vqdmulhq_s16(mmtmpD2,QAM_amp1_128);
+        dl_ch_mag1_128[1] = vqdmulhq_s16(mmtmpD3,QAM_amp1_128);
+
+
+        if (pilots==0) {
+          dl_ch_mag1_128b[2] = vqdmulhq_s16(mmtmpD4,QAM_amp1_128b);
+          dl_ch_mag1_128[2]  = vqdmulhq_s16(mmtmpD4,QAM_amp1_128);
+        }
+      }
+
+      mmtmpD0 = vmull_s16(dl_ch0_128[0], rxdataF128[0]);
+      //mmtmpD0 = [Re(ch[0])Re(rx[0]) Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1]) Im(ch[1])Im(ch[1])]
+      mmtmpD1 = vmull_s16(dl_ch0_128[1], rxdataF128[1]);
+      //mmtmpD1 = [Re(ch[2])Re(rx[2]) Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3]) Im(ch[3])Im(ch[3])]
+      mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
+                             vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
+      //mmtmpD0 = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])]
+
+      mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[0],*(int16x4_t*)conj)), rxdataF128[0]);
+      //mmtmpD0 = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])]
+      mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[1],*(int16x4_t*)conj)), rxdataF128[1]);
+      //mmtmpD0 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])]
+      mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
+                             vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
+      //mmtmpD1 = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])]
+
+      mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
+      mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
+      rxdataF_comp0_128[0] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+      mmtmpD0 = vmull_s16(dl_ch0_128[2], rxdataF128[2]);
+      mmtmpD1 = vmull_s16(dl_ch0_128[3], rxdataF128[3]);
+      mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
+                             vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
+
+      mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[2],*(int16x4_t*)conj)), rxdataF128[2]);
+      mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[3],*(int16x4_t*)conj)), rxdataF128[3]);
+      mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
+                             vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
+
+      mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
+      mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
+      rxdataF_comp0_128[1] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+      // second stream
+      mmtmpD0 = vmull_s16(dl_ch1_128[0], rxdataF128[0]);
+      mmtmpD1 = vmull_s16(dl_ch1_128[1], rxdataF128[1]);
+      mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
+                             vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
+      mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[0],*(int16x4_t*)conj)), rxdataF128[0]);
+
+      mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[1],*(int16x4_t*)conj)), rxdataF128[1]);
+      //mmtmpD0 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])]
+      mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
+                             vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
+      //mmtmpD1 = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])]
+
+      mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
+      mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
+      rxdataF_comp1_128[0] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+      mmtmpD0 = vmull_s16(dl_ch1_128[2], rxdataF128[2]);
+      mmtmpD1 = vmull_s16(dl_ch1_128[3], rxdataF128[3]);
+      mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
+                             vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
+
+      mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[2],*(int16x4_t*)conj)), rxdataF128[2]);
+      mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[3],*(int16x4_t*)conj)), rxdataF128[3]);
+      mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
+                             vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
+
+      mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
+      mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
+      rxdataF_comp1_128[1] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+
+      if (pilots==0) {
+        mmtmpD0 = vmull_s16(dl_ch0_128[4], rxdataF128[4]);
+        mmtmpD1 = vmull_s16(dl_ch0_128[5], rxdataF128[5]);
+        mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
+                               vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
+
+        mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[4],*(int16x4_t*)conj)), rxdataF128[4]);
+        mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[5],*(int16x4_t*)conj)), rxdataF128[5]);
+        mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
+                               vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
+
+
+        mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
+        mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
+        rxdataF_comp0_128[2] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+        mmtmpD0 = vmull_s16(dl_ch1_128[4], rxdataF128[4]);
+        mmtmpD1 = vmull_s16(dl_ch1_128[5], rxdataF128[5]);
+        mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)),
+                               vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1)));
+
+        mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch1_128[4],*(int16x4_t*)conj)), rxdataF128[4]);
+        mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch1_128[5],*(int16x4_t*)conj)), rxdataF128[5]);
+        mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)),
+                               vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b)));
+
+
+        mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128);
+        mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128);
+        rxdataF_comp1_128[2] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1));
+      }
+    }
+
+
+
+    Nre = (pilots==0) ? 12 : 8;
+
+    // rx_antennas
+  }
+
+
+  Nre = (pilots==0) ? 12 : 8;
+
+  precoded_signal_strength0 += ((signal_energy_nodc(&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*Nre],
+                                                        (nb_rb*Nre))*rx_power_correction) - (measurements->n0_power[aarx]));
+  precoded_signal_strength1 += ((signal_energy_nodc(&dl_ch_estimates_ext[aarx+2][symbol*frame_parms->N_RB_DL*Nre],
+                                                        (nb_rb*Nre))*rx_power_correction) - (measurements->n0_power[aarx]));
+
+  measurements->precoded_cqi_dB[eNB_id][0] = dB_fixed2(precoded_signal_strength0,measurements->n0_power_tot);
+  measurements->precoded_cqi_dB[eNB_id][1] = dB_fixed2(precoded_signal_strength1,measurements->n0_power_tot);
+
+#endif
+}
+
+
+void dlsch_dual_stream_correlation(LTE_DL_FRAME_PARMS *frame_parms,
+                                   unsigned char symbol,
+                                   unsigned short nb_rb,
+                                   int **dl_ch_estimates_ext,
+                                   int **dl_ch_estimates_ext_i,
+                                   int **dl_ch_rho_ext,
+                                   unsigned char output_shift)
+{
+
+#if defined(__x86_64__)||defined(__i386__)
+
+  unsigned short rb;
+  __m128i *dl_ch128,*dl_ch128i,*dl_ch_rho128,mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3;
+  unsigned char aarx,symbol_mod,pilots=0;
+
+  //    printf("dlsch_dual_stream_correlation: symbol %d\n",symbol);
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+  if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) {
+    pilots=1;
+  }
+
+  //  printf("Dual stream correlation (%p)\n",dl_ch_estimates_ext_i);
+
+  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+
+
+
+ //printf ("antenna %d", aarx);
+    dl_ch128          = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+
+    if (dl_ch_estimates_ext_i == NULL) // TM3/4
+      dl_ch128i         = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12];
+    else
+      dl_ch128i         = (__m128i *)&dl_ch_estimates_ext_i[aarx][symbol*frame_parms->N_RB_DL*12];
+
+    dl_ch_rho128      = (__m128i *)&dl_ch_rho_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+
+
+    for (rb=0; rb<nb_rb; rb++) {
+      // multiply by conjugated channel
+      mmtmpD0 = _mm_madd_epi16(dl_ch128[0],dl_ch128i[0]);
+      //      print_ints("re",&mmtmpD0);
+      // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+      mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[0],_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]);
+      mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128i[0]);
+      //      print_ints("im",&mmtmpD1);
+      // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+      mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+      //      print_ints("re(shift)",&mmtmpD0);
+      mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+      //      print_ints("im(shift)",&mmtmpD1);
+      mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+      mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+      //      print_ints("c0",&mmtmpD2);
+      //      print_ints("c1",&mmtmpD3);
+      dl_ch_rho128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+    // print_shorts("rho 0:",dl_ch_rho128);
+      // multiply by conjugated channel
+      mmtmpD0 = _mm_madd_epi16(dl_ch128[1],dl_ch128i[1]);
+      // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+      mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[1],_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+      mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128i[1]);
+      // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+      mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+      mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+      mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+      mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+      dl_ch_rho128[1] =_mm_packs_epi32(mmtmpD2,mmtmpD3);
+
+
+      if (pilots==0) {
+
+        // multiply by conjugated channel
+        mmtmpD0 = _mm_madd_epi16(dl_ch128[2],dl_ch128i[2]);
+        // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+        mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[2],_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+        mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128i[2]);
+        // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+        mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+        mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+        dl_ch_rho128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+
+       dl_ch128+=3;
+        dl_ch128i+=3;
+        dl_ch_rho128+=3;
+      } else {
+
+        dl_ch128+=2;
+        dl_ch128i+=2;
+        dl_ch_rho128+=2;
+      }
+    }
+
+  }
+
+  _mm_empty();
+  _m_empty();
+
+#elif defined(__arm__)
+
+#endif
+}
+
+
+/*void dlsch_dual_stream_correlationTM34(LTE_DL_FRAME_PARMS *frame_parms,
+                                   unsigned char symbol,
+                                   unsigned short nb_rb,
+                                   int **dl_ch_estimates_ext,
+                                   int **dl_ch_estimates_ext_i,
+                                   int **dl_ch_rho_ext,
+                                   unsigned char output_shift0,
+                                   unsigned char output_shift1)
+{
+
+#if defined(__x86_64__)||defined(__i386__)
+
+  unsigned short rb;
+  __m128i *dl_ch128,*dl_ch128i,*dl_ch_rho128,mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3;
+  unsigned char aarx,symbol_mod,pilots=0;
+  int output_shift;
+
+  //    printf("dlsch_dual_stream_correlation: symbol %d\n",symbol);
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+  if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) {
+    pilots=1;
+  }
+
+  //  printf("Dual stream correlation (%p)\n",dl_ch_estimates_ext_i);
+
+  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+
+       if (aarx==0) {
+      output_shift=output_shift0;
+    }
+      else {
+        output_shift=output_shift1;
+      }
+
+ //printf ("antenna %d", aarx);
+    dl_ch128          = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+
+    if (dl_ch_estimates_ext_i == NULL) // TM3/4
+      dl_ch128i         = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12];
+    else
+      dl_ch128i         = (__m128i *)&dl_ch_estimates_ext_i[aarx][symbol*frame_parms->N_RB_DL*12];
+
+    dl_ch_rho128      = (__m128i *)&dl_ch_rho_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+
+
+    for (rb=0; rb<nb_rb; rb++) {
+      // multiply by conjugated channel
+      mmtmpD0 = _mm_madd_epi16(dl_ch128[0],dl_ch128i[0]);
+      //      print_ints("re",&mmtmpD0);
+      // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+      mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[0],_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]);
+      mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128i[0]);
+      //      print_ints("im",&mmtmpD1);
+      // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+      mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+      //      print_ints("re(shift)",&mmtmpD0);
+      mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+      //      print_ints("im(shift)",&mmtmpD1);
+      mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+      mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+      //      print_ints("c0",&mmtmpD2);
+      //      print_ints("c1",&mmtmpD3);
+      dl_ch_rho128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+    // print_shorts("rho 0:",dl_ch_rho128);
+      // multiply by conjugated channel
+      mmtmpD0 = _mm_madd_epi16(dl_ch128[1],dl_ch128i[1]);
+      // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+      mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[1],_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+      mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128i[1]);
+      // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+      mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+      mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+      mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+      mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+      dl_ch_rho128[1] =_mm_packs_epi32(mmtmpD2,mmtmpD3);
+
+
+      if (pilots==0) {
+
+        // multiply by conjugated channel
+        mmtmpD0 = _mm_madd_epi16(dl_ch128[2],dl_ch128i[2]);
+        // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+        mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[2],_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+        mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128i[2]);
+        // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+        mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+        mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+        dl_ch_rho128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+
+       dl_ch128+=3;
+        dl_ch128i+=3;
+        dl_ch_rho128+=3;
+      } else {
+
+        dl_ch128+=2;
+        dl_ch128i+=2;
+        dl_ch_rho128+=2;
+      }
+    }
+
+  }
+
+  _mm_empty();
+  _m_empty();
+
+#elif defined(__arm__)
+
+#endif
+}
+*/
+
+void dlsch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms,
+                         int **rxdataF_comp,
+                         int **rxdataF_comp_i,
+                         int **rho,
+                         int **rho_i,
+                         int **dl_ch_mag,
+                         int **dl_ch_magb,
+                         int **dl_ch_mag_i,
+                         int **dl_ch_magb_i,
+                         unsigned char symbol,
+                         unsigned short nb_rb,
+                         unsigned char dual_stream_UE)
+{
+
+#if defined(__x86_64__)||defined(__i386__)
+
+  unsigned char aatx;
+  int i;
+  __m128i *rxdataF_comp128_0,*rxdataF_comp128_1,*rxdataF_comp128_i0,*rxdataF_comp128_i1,*dl_ch_mag128_0,*dl_ch_mag128_1,*dl_ch_mag128_0b,*dl_ch_mag128_1b,*rho128_0,*rho128_1,*rho128_i0,*rho128_i1,
+    *dl_ch_mag128_i0,*dl_ch_mag128_i1,*dl_ch_mag128_i0b,*dl_ch_mag128_i1b;
+
+  if (frame_parms->nb_antennas_rx>1) {
+
+    for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++) {
+
+      rxdataF_comp128_0   = (__m128i *)&rxdataF_comp[(aatx<<1)][symbol*frame_parms->N_RB_DL*12];
+      rxdataF_comp128_1   = (__m128i *)&rxdataF_comp[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_0      = (__m128i *)&dl_ch_mag[(aatx<<1)][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_1      = (__m128i *)&dl_ch_mag[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_0b     = (__m128i *)&dl_ch_magb[(aatx<<1)][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_1b     = (__m128i *)&dl_ch_magb[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12];
+
+      // MRC on each re of rb, both on MF output and magnitude (for 16QAM/64QAM llr computation)
+      for (i=0;i<nb_rb*3;i++) {
+        rxdataF_comp128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_0[i],1),_mm_srai_epi16(rxdataF_comp128_1[i],1));
+        dl_ch_mag128_0[i]    = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_0[i],1),_mm_srai_epi16(dl_ch_mag128_1[i],1));
+        dl_ch_mag128_0b[i]   = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_0b[i],1),_mm_srai_epi16(dl_ch_mag128_1b[i],1));
+          //       print_shorts("mrc comp0:",&rxdataF_comp128_0[i]);
+        //       print_shorts("mrc mag0:",&dl_ch_mag128_0[i]);
+        //       print_shorts("mrc mag0b:",&dl_ch_mag128_0b[i]);
+        //      print_shorts("mrc rho1:",&rho128_1[i]);
+
+      }
+    }
+
+    if (rho) {
+      rho128_0 = (__m128i *) &rho[0][symbol*frame_parms->N_RB_DL*12];
+      rho128_1 = (__m128i *) &rho[1][symbol*frame_parms->N_RB_DL*12];
+      for (i=0;i<nb_rb*3;i++) {
+        //      print_shorts("mrc rho0:",&rho128_0[i]);
+        //      print_shorts("mrc rho1:",&rho128_1[i]);
+        rho128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rho128_0[i],1),_mm_srai_epi16(rho128_1[i],1));
+      }
+    }
+
+
+    if (dual_stream_UE == 1) {
+      rho128_i0 = (__m128i *) &rho_i[0][symbol*frame_parms->N_RB_DL*12];
+      rho128_i1 = (__m128i *) &rho_i[1][symbol*frame_parms->N_RB_DL*12];
+      rxdataF_comp128_i0   = (__m128i *)&rxdataF_comp_i[0][symbol*frame_parms->N_RB_DL*12];
+      rxdataF_comp128_i1   = (__m128i *)&rxdataF_comp_i[1][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_i0      = (__m128i *)&dl_ch_mag_i[0][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_i1      = (__m128i *)&dl_ch_mag_i[1][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_i0b     = (__m128i *)&dl_ch_magb_i[0][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_i1b     = (__m128i *)&dl_ch_magb_i[1][symbol*frame_parms->N_RB_DL*12];
+
+      for (i=0; i<nb_rb*3; i++) {
+        rxdataF_comp128_i0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_i0[i],1),_mm_srai_epi16(rxdataF_comp128_i1[i],1));
+        rho128_i0[i]           = _mm_adds_epi16(_mm_srai_epi16(rho128_i0[i],1),_mm_srai_epi16(rho128_i1[i],1));
+
+        dl_ch_mag128_i0[i]    = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_i0[i],1),_mm_srai_epi16(dl_ch_mag128_i1[i],1));
+        dl_ch_mag128_i0b[i]    = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_i0b[i],1),_mm_srai_epi16(dl_ch_mag128_i1b[i],1));
+      }
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+#elif defined(__arm__)
+
+  unsigned char aatx;
+  int i;
+  int16x8_t *rxdataF_comp128_0,*rxdataF_comp128_1,*rxdataF_comp128_i0,*rxdataF_comp128_i1,*dl_ch_mag128_0,*dl_ch_mag128_1,*dl_ch_mag128_0b,*dl_ch_mag128_1b,*rho128_0,*rho128_1,*rho128_i0,*rho128_i1,*dl_ch_mag128_i0,*dl_ch_mag128_i1,*dl_ch_mag128_i0b,*dl_ch_mag128_i1b;
+
+  if (frame_parms->nb_antennas_rx>1) {
+
+    for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++) {
+
+      rxdataF_comp128_0   = (int16x8_t *)&rxdataF_comp[(aatx<<1)][symbol*frame_parms->N_RB_DL*12];
+      rxdataF_comp128_1   = (int16x8_t *)&rxdataF_comp[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_0      = (int16x8_t *)&dl_ch_mag[(aatx<<1)][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_1      = (int16x8_t *)&dl_ch_mag[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_0b     = (int16x8_t *)&dl_ch_magb[(aatx<<1)][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_1b     = (int16x8_t *)&dl_ch_magb[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12];
+
+      // MRC on each re of rb, both on MF output and magnitude (for 16QAM/64QAM llr computation)
+      for (i=0; i<nb_rb*3; i++) {
+        rxdataF_comp128_0[i] = vhaddq_s16(rxdataF_comp128_0[i],rxdataF_comp128_1[i]);
+        dl_ch_mag128_0[i]    = vhaddq_s16(dl_ch_mag128_0[i],dl_ch_mag128_1[i]);
+        dl_ch_mag128_0b[i]   = vhaddq_s16(dl_ch_mag128_0b[i],dl_ch_mag128_1b[i]);
+      }
+    }
+
+    if (rho) {
+      rho128_0 = (int16x8_t *) &rho[0][symbol*frame_parms->N_RB_DL*12];
+      rho128_1 = (int16x8_t *) &rho[1][symbol*frame_parms->N_RB_DL*12];
+
+      for (i=0; i<nb_rb*3; i++) {
+        //  print_shorts("mrc rho0:",&rho128_0[i]);
+        //  print_shorts("mrc rho1:",&rho128_1[i]);
+        rho128_0[i] = vhaddq_s16(rho128_0[i],rho128_1[i]);
+      }
+    }
+
+
+    if (dual_stream_UE == 1) {
+      rho128_i0 = (int16x8_t *) &rho_i[0][symbol*frame_parms->N_RB_DL*12];
+      rho128_i1 = (int16x8_t *) &rho_i[1][symbol*frame_parms->N_RB_DL*12];
+      rxdataF_comp128_i0   = (int16x8_t *)&rxdataF_comp_i[0][symbol*frame_parms->N_RB_DL*12];
+      rxdataF_comp128_i1   = (int16x8_t *)&rxdataF_comp_i[1][symbol*frame_parms->N_RB_DL*12];
+
+      dl_ch_mag128_i0      = (int16x8_t *)&dl_ch_mag_i[0][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_i1      = (int16x8_t *)&dl_ch_mag_i[1][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_i0b     = (int16x8_t *)&dl_ch_magb_i[0][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_i1b     = (int16x8_t *)&dl_ch_magb_i[1][symbol*frame_parms->N_RB_DL*12];
+
+      for (i=0; i<nb_rb*3; i++) {
+        rxdataF_comp128_i0[i] = vhaddq_s16(rxdataF_comp128_i0[i],rxdataF_comp128_i1[i]);
+        rho128_i0[i]          = vhaddq_s16(rho128_i0[i],rho128_i1[i]);
+
+        dl_ch_mag128_i0[i]    = vhaddq_s16(dl_ch_mag128_i0[i],dl_ch_mag128_i1[i]);
+        dl_ch_mag128_i0b[i]   = vhaddq_s16(dl_ch_mag128_i0b[i],dl_ch_mag128_i1b[i]);
+      }
+    }
+  }
+
+#endif
+}
+
+
+void dlsch_detection_mrc_TM34(LTE_DL_FRAME_PARMS *frame_parms,
+                              LTE_UE_PDSCH *pdsch_vars,
+                              int harq_pid,
+                              int round,
+                              unsigned char symbol,
+                              unsigned short nb_rb,
+                              unsigned char dual_stream_UE) {
+
+  int i;
+  __m128i *rxdataF_comp128_0,*rxdataF_comp128_1,*rxdataF_comp128_i0,*rxdataF_comp128_i1,*dl_ch_mag128_0,*dl_ch_mag128_1,*dl_ch_mag128_0b,*dl_ch_mag128_1b,*rho128_0,*rho128_1,*rho128_i0,*rho128_i1,*dl_ch_mag128_i0,*dl_ch_mag128_i1,*dl_ch_mag128_i0b,*dl_ch_mag128_i1b;
+
+  int **rxdataF_comp0           = pdsch_vars->rxdataF_comp0;
+  int **rxdataF_comp1           = pdsch_vars->rxdataF_comp1[harq_pid][round];
+  int **dl_ch_rho_ext           = pdsch_vars->dl_ch_rho_ext[harq_pid][round]; //for second stream
+  int **dl_ch_rho2_ext          = pdsch_vars->dl_ch_rho2_ext;
+  int **dl_ch_mag0              = pdsch_vars->dl_ch_mag0;
+  int **dl_ch_mag1              = pdsch_vars->dl_ch_mag1[harq_pid][round];
+  int **dl_ch_magb0             = pdsch_vars->dl_ch_magb0;
+  int **dl_ch_magb1             = pdsch_vars->dl_ch_magb1[harq_pid][round];
+
+  if (frame_parms->nb_antennas_rx>1) {
+
+      rxdataF_comp128_0   = (__m128i *)&rxdataF_comp0[0][symbol*frame_parms->N_RB_DL*12];
+      rxdataF_comp128_1   = (__m128i *)&rxdataF_comp0[1][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_0      = (__m128i *)&dl_ch_mag0[0][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_1      = (__m128i *)&dl_ch_mag0[1][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_0b     = (__m128i *)&dl_ch_magb0[0][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_1b     = (__m128i *)&dl_ch_magb0[1][symbol*frame_parms->N_RB_DL*12];
+
+      // MRC on each re of rb, both on MF output and magnitude (for 16QAM/64QAM llr computation)
+      for (i=0;i<nb_rb*3;i++) {
+        rxdataF_comp128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_0[i],1),_mm_srai_epi16(rxdataF_comp128_1[i],1));
+        dl_ch_mag128_0[i]    = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_0[i],1),_mm_srai_epi16(dl_ch_mag128_1[i],1));
+        dl_ch_mag128_0b[i]   = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_0b[i],1),_mm_srai_epi16(dl_ch_mag128_1b[i],1));
+
+        // print_shorts("mrc compens0:",&rxdataF_comp128_0[i]);
+        // print_shorts("mrc mag128_0:",&dl_ch_mag128_0[i]);
+        // print_shorts("mrc mag128_0b:",&dl_ch_mag128_0b[i]);
+      }    }
+
+   // if (rho) {
+      rho128_0 = (__m128i *) &dl_ch_rho2_ext[0][symbol*frame_parms->N_RB_DL*12];
+      rho128_1 = (__m128i *) &dl_ch_rho2_ext[1][symbol*frame_parms->N_RB_DL*12];
+      for (i=0;i<nb_rb*3;i++) {
+           //  print_shorts("mrc rho0:",&rho128_0[i]);
+            //  print_shorts("mrc rho1:",&rho128_1[i]);
+        rho128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rho128_0[i],1),_mm_srai_epi16(rho128_1[i],1));
+      }
+   //}
+
+
+    if (dual_stream_UE == 1) {
+      rho128_i0 = (__m128i *) &dl_ch_rho_ext[0][symbol*frame_parms->N_RB_DL*12];
+      rho128_i1 = (__m128i *) &dl_ch_rho_ext[1][symbol*frame_parms->N_RB_DL*12];
+      rxdataF_comp128_i0   = (__m128i *)&rxdataF_comp1[0][symbol*frame_parms->N_RB_DL*12];
+      rxdataF_comp128_i1   = (__m128i *)&rxdataF_comp1[1][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_i0      = (__m128i *)&dl_ch_mag1[0][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_i1      = (__m128i *)&dl_ch_mag1[1][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_i0b     = (__m128i *)&dl_ch_magb1[0][symbol*frame_parms->N_RB_DL*12];
+      dl_ch_mag128_i1b     = (__m128i *)&dl_ch_magb1[1][symbol*frame_parms->N_RB_DL*12];
+      for (i=0;i<nb_rb*3;i++) {
+        rxdataF_comp128_i0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_i0[i],1),_mm_srai_epi16(rxdataF_comp128_i1[i],1));
+        rho128_i0[i]           = _mm_adds_epi16(_mm_srai_epi16(rho128_i0[i],1),_mm_srai_epi16(rho128_i1[i],1));
+
+        dl_ch_mag128_i0[i]    = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_i0[i],1),_mm_srai_epi16(dl_ch_mag128_i1[i],1));
+        dl_ch_mag128_i0b[i]    = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_i0b[i],1),_mm_srai_epi16(dl_ch_mag128_i1b[i],1));
+
+        //print_shorts("mrc compens1:",&rxdataF_comp128_i0[i]);
+        //print_shorts("mrc mag128_i0:",&dl_ch_mag128_i0[i]);
+        //print_shorts("mrc mag128_i0b:",&dl_ch_mag128_i0b[i]);
+      }
+    }
+
+
+  _mm_empty();
+  _m_empty();
+}
+
+
+
+void dlsch_scale_channel(int **dl_ch_estimates_ext,
+                         LTE_DL_FRAME_PARMS *frame_parms,
+                         LTE_UE_DLSCH_t **dlsch_ue,
+                         uint8_t symbol,
+                         unsigned short nb_rb)
+{
+
+#if defined(__x86_64__)||defined(__i386__)
+
+  short rb, ch_amp;
+  unsigned char aatx,aarx,pilots=0,symbol_mod;
+  __m128i *dl_ch128, ch_amp128;
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+  if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) {
+    if (frame_parms->nb_antenna_ports_eNB==1) // 10 out of 12 so don't reduce size
+      nb_rb=1+(5*nb_rb/6);
+    else
+      pilots=1;
+  }
+
+  // Determine scaling amplitude based the symbol
+
+  ch_amp = ((pilots) ? (dlsch_ue[0]->sqrt_rho_b) : (dlsch_ue[0]->sqrt_rho_a));
+  
+  LOG_D(PHY,"Scaling PDSCH Chest in OFDM symbol %d by %d, pilots %d nb_rb %d NCP %d symbol %d\n",symbol_mod,ch_amp,pilots,nb_rb,frame_parms->Ncp,symbol);
+   // printf("Scaling PDSCH Chest in OFDM symbol %d by %d\n",symbol_mod,ch_amp);
+
+  ch_amp128 = _mm_set1_epi16(ch_amp); // Q3.13
+
+  for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++) {
+    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+
+      dl_ch128=(__m128i *)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12];
+
+      for (rb=0;rb<nb_rb;rb++) {
+
+        dl_ch128[0] = _mm_mulhi_epi16(dl_ch128[0],ch_amp128);
+        dl_ch128[0] = _mm_slli_epi16(dl_ch128[0],3);
+
+        dl_ch128[1] = _mm_mulhi_epi16(dl_ch128[1],ch_amp128);
+        dl_ch128[1] = _mm_slli_epi16(dl_ch128[1],3);
+
+        if (pilots) {
+          dl_ch128+=2;
+        } else {
+          dl_ch128[2] = _mm_mulhi_epi16(dl_ch128[2],ch_amp128);
+          dl_ch128[2] = _mm_slli_epi16(dl_ch128[2],3);
+          dl_ch128+=3;
+
+        }
+      }
+    }
+  }
+
+#elif defined(__arm__)
+
+#endif
+}
+
+
+//compute average channel_level on each (TX,RX) antenna pair
+void dlsch_channel_level(int **dl_ch_estimates_ext,
+                         LTE_DL_FRAME_PARMS *frame_parms,
+                         int32_t *avg,
+                         uint8_t symbol,
+                         unsigned short nb_rb)
+{
+
+#if defined(__x86_64__)||defined(__i386__)
+
+  short rb;
+  unsigned char aatx,aarx,nre=12,symbol_mod;
+  __m128i *dl_ch128, avg128D;
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+  if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1))
+    nre=8;
+  else if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB==1))
+    nre=10;
+  else
+    nre=12;
+
+  //nb_rb*nre = y * 2^x
+  int16_t x = factor2(nb_rb*nre);
+  int16_t y = (nb_rb*nre)>>x;
+  //printf("nb_rb*nre = %d = %d * 2^(%d)\n",nb_rb*nre,y,x);
+
+  for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++)
+    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+      //clear average level
+      avg128D = _mm_setzero_si128();
+      // 5 is always a symbol with no pilots for both normal and extended prefix
+
+      dl_ch128=(__m128i *)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12];
+
+      for (rb=0;rb<nb_rb;rb++) {
+        //      printf("rb %d : ",rb);
+        //      print_shorts("ch",&dl_ch128[0]);
+	avg128D = _mm_add_epi32(avg128D,_mm_srai_epi16(_mm_madd_epi16(dl_ch128[0],dl_ch128[0]),x));
+	avg128D = _mm_add_epi32(avg128D,_mm_srai_epi16(_mm_madd_epi16(dl_ch128[1],dl_ch128[1]),x));
+
+        //avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch128[0],_mm_srai_epi16(_mm_mulhi_epi16(dl_ch128[0], coeff128),15)));
+        //avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch128[1],_mm_srai_epi16(_mm_mulhi_epi16(dl_ch128[1], coeff128),15)));
+
+        if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) {
+          dl_ch128+=2;
+        }
+        else {
+	  avg128D = _mm_add_epi32(avg128D,_mm_srai_epi16(_mm_madd_epi16(dl_ch128[2],dl_ch128[2]),x));
+          //avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch128[2],_mm_srai_epi16(_mm_mulhi_epi16(dl_ch128[2], coeff128),15)));
+          dl_ch128+=3;
+        }
+        /*
+          if (rb==0) {
+          print_shorts("dl_ch128",&dl_ch128[0]);
+          print_shorts("dl_ch128",&dl_ch128[1]);
+          print_shorts("dl_ch128",&dl_ch128[2]);
+          }
+        */
+      }
+
+      avg[(aatx<<1)+aarx] =(((int32_t*)&avg128D)[0] +
+                            ((int32_t*)&avg128D)[1] +
+                            ((int32_t*)&avg128D)[2] +
+			      ((int32_t*)&avg128D)[3])/y;
+                //  printf("Channel level : %d\n",avg[(aatx<<1)+aarx]);
+    }
+
+  _mm_empty();
+  _m_empty();
+
+#elif defined(__arm__)
+
+  short rb;
+  unsigned char aatx,aarx,nre=12,symbol_mod;
+  int32x4_t avg128D;
+  int16x4_t *dl_ch128;
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+  for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++)
+    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+      //clear average level
+      avg128D = vdupq_n_s32(0);
+      // 5 is always a symbol with no pilots for both normal and extended prefix
+
+      dl_ch128=(int16x4_t *)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12];
+
+      for (rb=0; rb<nb_rb; rb++) {
+        //  printf("rb %d : ",rb);
+        //  print_shorts("ch",&dl_ch128[0]);
+        avg128D = vqaddq_s32(avg128D,vmull_s16(dl_ch128[0],dl_ch128[0]));
+        avg128D = vqaddq_s32(avg128D,vmull_s16(dl_ch128[1],dl_ch128[1]));
+        avg128D = vqaddq_s32(avg128D,vmull_s16(dl_ch128[2],dl_ch128[2]));
+        avg128D = vqaddq_s32(avg128D,vmull_s16(dl_ch128[3],dl_ch128[3]));
+
+        if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) {
+          dl_ch128+=4;
+        } else {
+          avg128D = vqaddq_s32(avg128D,vmull_s16(dl_ch128[4],dl_ch128[4]));
+          avg128D = vqaddq_s32(avg128D,vmull_s16(dl_ch128[5],dl_ch128[5]));
+          dl_ch128+=6;
+        }
+
+        /*
+          if (rb==0) {
+          print_shorts("dl_ch128",&dl_ch128[0]);
+          print_shorts("dl_ch128",&dl_ch128[1]);
+          print_shorts("dl_ch128",&dl_ch128[2]);
+          }
+        */
+      }
+
+
+      if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1))
+        nre=8;
+      else if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB==1))
+        nre=10;
+      else
+        nre=12;
+
+      avg[(aatx<<1)+aarx] = (((int32_t*)&avg128D)[0] +
+                             ((int32_t*)&avg128D)[1] +
+                             ((int32_t*)&avg128D)[2] +
+                             ((int32_t*)&avg128D)[3])/(nb_rb*nre);
+
+      //            printf("Channel level : %d\n",avg[(aatx<<1)+aarx]);
+    }
+
+
+#endif
+}
+
+//compute average channel_level of effective (precoded) channel
+
+//compute average channel_level of effective (precoded) channel
+void dlsch_channel_level_TM34(int **dl_ch_estimates_ext,
+                              LTE_DL_FRAME_PARMS *frame_parms,
+                              unsigned char *pmi_ext,
+                              int *avg_0,
+                              int *avg_1,
+                              uint8_t symbol,
+                              unsigned short nb_rb,
+                              MIMO_mode_t mimo_mode){
+
+#if defined(__x86_64__)||defined(__i386__)
+
+
+  short rb;
+  unsigned char aarx,nre=12,symbol_mod;
+  __m128i *dl_ch0_128,*dl_ch1_128, dl_ch0_128_tmp, dl_ch1_128_tmp, avg_0_128D, avg_1_128D;
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+  //clear average level
+ // avg_0_128D = _mm_setzero_si128();
+ // avg_1_128D = _mm_setzero_si128();
+  avg_0[0] = 0;
+  avg_0[1] = 0;
+  avg_1[0] = 0;
+  avg_1[1] = 0;
+  // 5 is always a symbol with no pilots for both normal and extended prefix
+
+  if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1))
+    nre=8;
+  else if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB==1))
+    nre=10;
+  else
+    nre=12;
+
+  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+    dl_ch0_128 = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch1_128 = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12];
+
+    avg_0_128D = _mm_setzero_si128();
+    avg_1_128D = _mm_setzero_si128();
+    for (rb=0; rb<nb_rb; rb++) {
+              // printf("rb %d : \n",rb);
+              // print_shorts("ch0\n",&dl_ch0_128[0]);
+               //print_shorts("ch1\n",&dl_ch1_128[0]);
+      dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[0]);
+      dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[0]);
+
+      if (mimo_mode==LARGE_CDD)
+        prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+      else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1)
+        prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+      else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj)
+        prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+      else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING)
+        prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+
+      //      mmtmpD0 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp);
+      avg_0_128D = _mm_add_epi32(avg_0_128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp));
+
+      avg_1_128D = _mm_add_epi32(avg_1_128D,_mm_madd_epi16(dl_ch1_128_tmp,dl_ch1_128_tmp));
+
+      dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[1]);
+      dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[1]);
+
+      if (mimo_mode==LARGE_CDD)
+        prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+      else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1)
+        prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+      else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj)
+        prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+      else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING)
+        prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+
+      //      mmtmpD1 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp);
+      avg_0_128D = _mm_add_epi32(avg_0_128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp));
+
+      avg_1_128D = _mm_add_epi32(avg_1_128D,_mm_madd_epi16(dl_ch1_128_tmp,dl_ch1_128_tmp));
+
+      if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) {
+        dl_ch0_128+=2;
+        dl_ch1_128+=2;
+      }
+      else {
+        dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[2]);
+        dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[2]);
+
+        if (mimo_mode==LARGE_CDD)
+          prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+        else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1)
+          prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+        else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj)
+          prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+        else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING)
+          prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+        //      mmtmpD2 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp);
+
+        avg_1_128D = _mm_add_epi32(avg_1_128D,_mm_madd_epi16(dl_ch1_128_tmp,dl_ch1_128_tmp));
+        avg_0_128D = _mm_add_epi32(avg_0_128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp));
+
+        dl_ch0_128+=3;
+        dl_ch1_128+=3;
+      }
+    }
+
+
+    avg_0[aarx] = (((int*)&avg_0_128D)[0])/(nb_rb*nre) +
+      (((int*)&avg_0_128D)[1])/(nb_rb*nre) +
+      (((int*)&avg_0_128D)[2])/(nb_rb*nre) +
+      (((int*)&avg_0_128D)[3])/(nb_rb*nre);
+    //  printf("From Chan_level aver stream 0 %d =%d\n", aarx, avg_0[aarx]);
+
+    avg_1[aarx] = (((int*)&avg_1_128D)[0])/(nb_rb*nre) +
+      (((int*)&avg_1_128D)[1])/(nb_rb*nre) +
+      (((int*)&avg_1_128D)[2])/(nb_rb*nre) +
+      (((int*)&avg_1_128D)[3])/(nb_rb*nre);
+  //    printf("From Chan_level aver stream 1 %d =%d\n", aarx, avg_1[aarx]);
+  }
+//avg_0[0] = max(avg_0[0],avg_0[1]);
+//avg_1[0] = max(avg_1[0],avg_1[1]);
+//avg_0[0]= max(avg_0[0], avg_1[0]);
+
+  avg_0[0] = avg_0[0] + avg_0[1];
+ // printf("From Chan_level aver stream 0 final =%d\n", avg_0[0]);
+  avg_1[0] = avg_1[0] + avg_1[1];
+ // printf("From Chan_level aver stream 1 final =%d\n", avg_1[0]);
+ avg_0[0] = min (avg_0[0], avg_1[0]);
+ avg_1[0] = avg_0[0];
+
+  _mm_empty();
+  _m_empty();
+
+#elif defined(__arm__)
+
+#endif
+}
+
+
+
+/*void dlsch_channel_level_TM34(int **dl_ch_estimates_ext,
+                              LTE_DL_FRAME_PARMS *frame_parms,
+                              int *avg,
+                              uint8_t symbol,
+                              unsigned short nb_rb,
+                              MIMO_mode_t mimo_mode){
+
+#if defined(__x86_64__)||defined(__i386__)
+
+
+  short rb;
+  unsigned char aarx,nre=12,symbol_mod;
+  __m128i *dl_ch0_128,*dl_ch1_128, dl_ch0_128_tmp, dl_ch1_128_tmp,avg128D;
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+  //clear average level
+  avg128D = _mm_setzero_si128();
+  avg[0] = 0;
+  avg[1] = 0;
+  // 5 is always a symbol with no pilots for both normal and extended prefix
+
+  if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1))
+    nre=8;
+  else if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB==1))
+    nre=10;
+  else
+    nre=12;
+
+  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+    dl_ch0_128 = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch1_128 = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12];
+
+    for (rb=0; rb<nb_rb; rb++) {
+
+      dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[0]);
+      dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[0]);
+
+      if (mimo_mode==LARGE_CDD)
+        prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+      else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1)
+        prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+      else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj)
+        prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+
+      //      mmtmpD0 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp);
+      avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp));
+
+      dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[1]);
+      dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[1]);
+
+      if (mimo_mode==LARGE_CDD)
+        prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+      else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1)
+        prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+      else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj)
+        prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+
+      //      mmtmpD1 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp);
+      avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp));
+
+      if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) {
+        dl_ch0_128+=2;
+        dl_ch1_128+=2;
+      }
+      else {
+        dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[2]);
+        dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[2]);
+
+        if (mimo_mode==LARGE_CDD)
+          prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+        else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1)
+          prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+        else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj)
+          prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+
+        //      mmtmpD2 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp);
+        avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp));
+
+        dl_ch0_128+=3;
+        dl_ch1_128+=3;
+      }
+    }
+
+    avg[aarx] = (((int*)&avg128D)[0])/(nb_rb*nre) +
+      (((int*)&avg128D)[1])/(nb_rb*nre) +
+      (((int*)&avg128D)[2])/(nb_rb*nre) +
+      (((int*)&avg128D)[3])/(nb_rb*nre);
+  }
+
+  // choose maximum of the 2 effective channels
+  avg[0] = cmax(avg[0],avg[1]);
+
+  _mm_empty();
+  _m_empty();
+
+#elif defined(__arm__)
+
+#endif
+}*/
+
+//compute average channel_level of effective (precoded) channel
+void dlsch_channel_level_TM56(int **dl_ch_estimates_ext,
+                              LTE_DL_FRAME_PARMS *frame_parms,
+                              unsigned char *pmi_ext,
+                              int *avg,
+                              uint8_t symbol,
+                              unsigned short nb_rb)
+{
+
+#if defined(__x86_64__)||defined(__i386__)
+
+  short rb;
+  unsigned char aarx,nre=12,symbol_mod;
+  __m128i *dl_ch0_128,*dl_ch1_128, dl_ch0_128_tmp, dl_ch1_128_tmp,avg128D;
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+  //clear average level
+  avg128D = _mm_setzero_si128();
+  avg[0] = 0;
+  avg[1] = 0;
+  // 5 is always a symbol with no pilots for both normal and extended prefix
+
+  if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1))
+    nre=8;
+  else if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB==1))
+    nre=10;
+  else
+    nre=12;
+
+  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+    dl_ch0_128 = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+    dl_ch1_128 = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12];
+
+    for (rb=0; rb<nb_rb; rb++) {
+
+      dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[0]);
+      dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[0]);
+
+      prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+      //      mmtmpD0 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp);
+      avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp));
+
+      dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[1]);
+      dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[1]);
+
+      prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+      //      mmtmpD1 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp);
+      avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp));
+
+      if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) {
+        dl_ch0_128+=2;
+        dl_ch1_128+=2;
+      }
+      else {
+        dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[2]);
+        dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[2]);
+
+        prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp);
+        //      mmtmpD2 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp);
+        avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp));
+
+        dl_ch0_128+=3;
+        dl_ch1_128+=3;
+      }
+    }
+
+    avg[aarx] = (((int*)&avg128D)[0])/(nb_rb*nre) +
+      (((int*)&avg128D)[1])/(nb_rb*nre) +
+      (((int*)&avg128D)[2])/(nb_rb*nre) +
+      (((int*)&avg128D)[3])/(nb_rb*nre);
+  }
+
+  // choose maximum of the 2 effective channels
+  avg[0] = cmax(avg[0],avg[1]);
+
+  _mm_empty();
+  _m_empty();
+
+#elif defined(__arm__)
+
+
+#endif
+}
+
+//compute average channel_level for TM7
+void dlsch_channel_level_TM7(int **dl_bf_ch_estimates_ext,
+                         LTE_DL_FRAME_PARMS *frame_parms,
+                         int *avg,
+                         uint8_t symbol,
+                         unsigned short nb_rb)
+{
+
+#if defined(__x86_64__)||defined(__i386__)
+
+  short rb;
+  unsigned char aatx,aarx,nre=12,symbol_mod;
+  __m128i *dl_ch128,avg128D;
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+  for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++)
+    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+      //clear average level
+      avg128D = _mm_setzero_si128();
+      // 5 is always a symbol with no pilots for both normal and extended prefix
+
+      dl_ch128=(__m128i *)&dl_bf_ch_estimates_ext[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12];
+
+      for (rb=0; rb<nb_rb; rb++) {
+        //  printf("rb %d : ",rb);
+        //  print_shorts("ch",&dl_ch128[0]);
+        avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch128[0],dl_ch128[0]));
+        avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch128[1],dl_ch128[1]));
+
+        if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) {
+          dl_ch128+=2;
+        } else {
+          avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch128[2],dl_ch128[2]));
+          dl_ch128+=3;
+        }
+
+        /*
+          if (rb==0) {
+          print_shorts("dl_ch128",&dl_ch128[0]);
+          print_shorts("dl_ch128",&dl_ch128[1]);
+          print_shorts("dl_ch128",&dl_ch128[2]);
+          }
+        */
+      }
+
+      if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1))))
+        nre=10;
+      else if ((frame_parms->Ncp==0) && (symbol==3 || symbol==6 || symbol==9 || symbol==12))
+        nre=9;
+      else if ((frame_parms->Ncp==1) && (symbol==4 || symbol==7 || symbol==9))
+        nre=8;
+      else
+        nre=12;
+
+      avg[(aatx<<1)+aarx] = (((int*)&avg128D)[0] +
+                             ((int*)&avg128D)[1] +
+                             ((int*)&avg128D)[2] +
+                             ((int*)&avg128D)[3])/(nb_rb*nre);
+
+      //            printf("Channel level : %d\n",avg[(aatx<<1)+aarx]);
+    }
+
+  _mm_empty();
+  _m_empty();
+
+#elif defined(__arm__)
+
+#endif
+}
+//#define ONE_OVER_2_Q15 16384
+void dlsch_alamouti(LTE_DL_FRAME_PARMS *frame_parms,
+                    int **rxdataF_comp,
+                    int **dl_ch_mag,
+                    int **dl_ch_magb,
+                    unsigned char symbol,
+                    unsigned short nb_rb)
+{
+
+#if defined(__x86_64__)||defined(__i386__)
+
+  short *rxF0,*rxF1;
+  __m128i *ch_mag0,*ch_mag1,*ch_mag0b,*ch_mag1b, *rxF0_128;
+  unsigned char rb,re;
+  int jj = (symbol*frame_parms->N_RB_DL*12);
+  uint8_t symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+  uint8_t pilots = ((symbol_mod==0)||(symbol_mod==(4-frame_parms->Ncp))) ? 1 : 0;
+  rxF0_128 = (__m128i*) &rxdataF_comp[0][jj];
+
+  //amp = _mm_set1_epi16(ONE_OVER_2_Q15);
+
+
+  //    printf("Doing alamouti!\n");
+  rxF0     = (short*)&rxdataF_comp[0][jj];  //tx antenna 0  h0*y
+  rxF1     = (short*)&rxdataF_comp[2][jj];  //tx antenna 1  h1*y
+  ch_mag0 = (__m128i *)&dl_ch_mag[0][jj];
+  ch_mag1 = (__m128i *)&dl_ch_mag[2][jj];
+  ch_mag0b = (__m128i *)&dl_ch_magb[0][jj];
+  ch_mag1b = (__m128i *)&dl_ch_magb[2][jj];
+
+  for (rb=0; rb<nb_rb; rb++) {
+
+    for (re=0; re<((pilots==0)?12:8); re+=2) {
+
+      // Alamouti RX combining
+
+      //      printf("Alamouti: symbol %d, rb %d, re %d: rxF0 (%d,%d,%d,%d), rxF1 (%d,%d,%d,%d)\n",symbol,rb,re,rxF0[0],rxF0[1],rxF0[2],rxF0[3],rxF1[0],rxF1[1],rxF1[2],rxF1[3]);
+      rxF0[0] = rxF0[0] + rxF1[2];
+      rxF0[1] = rxF0[1] - rxF1[3];
+
+      rxF0[2] = rxF0[2] - rxF1[0];
+      rxF0[3] = rxF0[3] + rxF1[1];
+
+      //      printf("Alamouti: rxF0 after (%d,%d,%d,%d)\n",rxF0[0],rxF0[1],rxF0[2],rxF0[3]);
+      rxF0+=4;
+      rxF1+=4;
+
+    }
+
+    // compute levels for 16QAM or 64 QAM llr unit
+    ch_mag0[0] = _mm_adds_epi16(ch_mag0[0],ch_mag1[0]);
+    ch_mag0[1] = _mm_adds_epi16(ch_mag0[1],ch_mag1[1]);
+
+    ch_mag0b[0] = _mm_adds_epi16(ch_mag0b[0],ch_mag1b[0]);
+    ch_mag0b[1] = _mm_adds_epi16(ch_mag0b[1],ch_mag1b[1]);
+
+    // account for 1/sqrt(2) scaling at transmission
+    //ch_mag0[0] = _mm_srai_epi16(ch_mag0[0],1);
+    //ch_mag0[1] = _mm_srai_epi16(ch_mag0[1],1);
+    //ch_mag0b[0] = _mm_srai_epi16(ch_mag0b[0],1);
+    //ch_mag0b[1] = _mm_srai_epi16(ch_mag0b[1],1);
+
+    //rxF0_128[0] = _mm_mulhi_epi16(rxF0_128[0],amp);
+    //rxF0_128[0] = _mm_slli_epi16(rxF0_128[0],1);
+    //rxF0_128[1] = _mm_mulhi_epi16(rxF0_128[1],amp);
+    //rxF0_128[1] = _mm_slli_epi16(rxF0_128[1],1);
+
+    //rxF0_128[0] = _mm_srai_epi16(rxF0_128[0],1);
+    //rxF0_128[1] = _mm_srai_epi16(rxF0_128[1],1);
+
+
+
+    if (pilots==0) {
+      ch_mag0[2] = _mm_adds_epi16(ch_mag0[2],ch_mag1[2]);
+      ch_mag0b[2] = _mm_adds_epi16(ch_mag0b[2],ch_mag1b[2]);
+
+      //ch_mag0[2] = _mm_srai_epi16(ch_mag0[2],1);
+      //ch_mag0b[2] = _mm_srai_epi16(ch_mag0b[2],1);
+
+      //rxF0_128[2] = _mm_mulhi_epi16(rxF0_128[2],amp);
+      //rxF0_128[2] = _mm_slli_epi16(rxF0_128[2],1);
+
+      //rxF0_128[2] = _mm_srai_epi16(rxF0_128[2],1);
+
+
+      ch_mag0+=3;
+      ch_mag1+=3;
+      ch_mag0b+=3;
+      ch_mag1b+=3;
+      rxF0_128+=3;
+    } else {
+      ch_mag0+=2;
+      ch_mag1+=2;
+      ch_mag0b+=2;
+      ch_mag1b+=2;
+      rxF0_128+=2;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+#elif defined(__arm__)
+
+#endif
+}
+
+
+//==============================================================================================
+// Extraction functions
+//==============================================================================================
+
+unsigned short dlsch_extract_rbs_single(int **rxdataF,
+                                        int **dl_ch_estimates,
+                                        int **rxdataF_ext,
+                                        int **dl_ch_estimates_ext,
+                                        unsigned short pmi,
+                                        unsigned char *pmi_ext,
+                                        unsigned int *rb_alloc,
+                                        unsigned char symbol,
+                                        unsigned char subframe,
+                                        uint32_t high_speed_flag,
+                                        LTE_DL_FRAME_PARMS *frame_parms) {
+
+
+
+  unsigned short rb,nb_rb=0;
+  unsigned char rb_alloc_ind;
+  unsigned char i,aarx,l,nsymb,skip_half=0,sss_symb,pss_symb=0;
+  int *dl_ch0,*dl_ch0_ext,*rxF,*rxF_ext;
+
+
+
+  unsigned char symbol_mod,pilots=0,j=0,poffset=0;
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+  pilots = ((symbol_mod==0)||(symbol_mod==(4-frame_parms->Ncp))) ? 1 : 0;
+  l=symbol;
+  nsymb = (frame_parms->Ncp==NORMAL) ? 14:12;
+
+  if (frame_parms->frame_type == TDD) {  // TDD
+    sss_symb = nsymb-1;
+    pss_symb = 2;
+  } else {
+    sss_symb = (nsymb>>1)-2;
+    pss_symb = (nsymb>>1)-1;
+  }
+
+  if (symbol_mod==(4-frame_parms->Ncp))
+    poffset=3;
+
+  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+
+    if (high_speed_flag == 1)
+      dl_ch0     = &dl_ch_estimates[aarx][5+(symbol*(frame_parms->ofdm_symbol_size))];
+    else
+      dl_ch0     = &dl_ch_estimates[aarx][5];
+
+    dl_ch0_ext = &dl_ch_estimates_ext[aarx][symbol*(frame_parms->N_RB_DL*12)];
+
+    rxF_ext   = &rxdataF_ext[aarx][symbol*(frame_parms->N_RB_DL*12)];
+    rxF       = &rxdataF[aarx][(frame_parms->first_carrier_offset + (symbol*(frame_parms->ofdm_symbol_size)))];
+
+    if ((frame_parms->N_RB_DL&1) == 0)  // even number of RBs
+
+      for (rb=0;rb<frame_parms->N_RB_DL;rb++) {
+
+        if (rb < 32)
+          rb_alloc_ind = (rb_alloc[0]>>rb) & 1;
+        else if (rb < 64)
+          rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1;
+        else if (rb < 96)
+          rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1;
+        else if (rb < 100)
+          rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1;
+        else
+          rb_alloc_ind = 0;
+
+        if (rb_alloc_ind == 1)
+          nb_rb++;
+
+        // For second half of RBs skip DC carrier
+        if (rb==(frame_parms->N_RB_DL>>1)) {
+          rxF       = &rxdataF[aarx][(1 + (symbol*(frame_parms->ofdm_symbol_size)))];
+          //dl_ch0++;
+        }
+
+        // PBCH
+        if ((subframe==0) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l>=nsymb>>1) && (l<((nsymb>>1) + 4))) {
+          rb_alloc_ind = 0;
+        }
+
+        //SSS
+        if (((subframe==0)||(subframe==5)) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==sss_symb) ) {
+          rb_alloc_ind = 0;
+        }
+
+
+        if (frame_parms->frame_type == FDD) {
+          //PSS
+          if (((subframe==0)||(subframe==5)) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+          }
+        }
+
+        if ((frame_parms->frame_type == TDD) &&
+            (subframe==6)) { //TDD Subframe 6
+          if ((rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+          }
+        }
+
+        if (rb_alloc_ind==1) {
+          *pmi_ext = (pmi>>((rb>>2)<<1))&3;
+          memcpy(dl_ch0_ext,dl_ch0,12*sizeof(int));
+
+          /*
+            printf("rb %d\n",rb);
+            for (i=0;i<12;i++)
+            printf("(%d %d)",((short *)dl_ch0)[i<<1],((short*)dl_ch0)[1+(i<<1)]);
+            printf("\n");
+          */
+          if (pilots==0) {
+            for (i=0; i<12; i++) {
+              rxF_ext[i]=rxF[i];
+              /*
+                printf("%d : (%d,%d)\n",(rxF+i-&rxdataF[aarx][( (symbol*(frame_parms->ofdm_symbol_size)))]),
+                ((short*)&rxF[i])[0],((short*)&rxF[i])[1]);*/
+            }
+
+            dl_ch0_ext+=12;
+            rxF_ext+=12;
+          } else {
+            j=0;
+
+            for (i=0; i<12; i++) {
+              if ((i!=(frame_parms->nushift+poffset)) &&
+                  (i!=((frame_parms->nushift+poffset+6)%12))) {
+                rxF_ext[j]=rxF[i];
+                //            printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j]));
+                dl_ch0_ext[j++]=dl_ch0[i];
+
+              }
+            }
+
+            dl_ch0_ext+=10;
+            rxF_ext+=10;
+          }
+
+
+        }
+
+        dl_ch0+=12;
+        rxF+=12;
+
+      }
+    else {  // Odd number of RBs
+      for (rb=0; rb<frame_parms->N_RB_DL>>1; rb++) {
+#ifdef DEBUG_DLSCH_DEMOD
+        printf("dlch_ext %d\n",dl_ch0_ext-&dl_ch_estimates_ext[aarx][0]);
+#endif
+        skip_half=0;
+
+        if (rb < 32)
+          rb_alloc_ind = (rb_alloc[0]>>rb) & 1;
+        else if (rb < 64)
+          rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1;
+        else if (rb < 96)
+          rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1;
+        else if (rb < 100)
+          rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1;
+        else
+          rb_alloc_ind = 0;
+
+        if (rb_alloc_ind == 1)
+          nb_rb++;
+
+
+        // PBCH
+        if ((subframe==0) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4))) {
+          rb_alloc_ind = 0;
+        }
+
+        //PBCH subframe 0, symbols nsymb>>1 ... nsymb>>1 + 3
+        if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4)))
+          skip_half=1;
+        else if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4)))
+          skip_half=2;
+
+        //SSS
+
+        if (((subframe==0)||(subframe==5)) &&
+            (rb>((frame_parms->N_RB_DL>>1)-3)) &&
+            (rb<((frame_parms->N_RB_DL>>1)+3)) &&
+            (l==sss_symb) ) {
+          rb_alloc_ind = 0;
+        }
+        //SSS
+        if (((subframe==0)||(subframe==5)) &&
+            (rb==((frame_parms->N_RB_DL>>1)-3)) &&
+            (l==sss_symb))
+          skip_half=1;
+        else if (((subframe==0)||(subframe==5)) &&
+                 (rb==((frame_parms->N_RB_DL>>1)+3)) &&
+                 (l==sss_symb))
+          skip_half=2;
+
+        //PSS in subframe 0/5 if FDD
+        if (frame_parms->frame_type == FDD) {  //FDD
+
+          if (((subframe==0)||(subframe==5)) &&
+              (rb>((frame_parms->N_RB_DL>>1)-3)) &&
+              (rb<((frame_parms->N_RB_DL>>1)+3)) &&
+              (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+          }
+
+          if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb))
+            skip_half=1;
+          else if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb))
+            skip_half=2;
+        }
+
+        if ((frame_parms->frame_type == TDD) &&
+            (subframe==6)){  //TDD Subframe 6
+          if ((rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+          }
+          if ((rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb))
+            skip_half=1;
+          else if ((rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb))
+            skip_half=2;
+        }
+
+
+        if (rb_alloc_ind==1) {
+
+#ifdef DEBUG_DLSCH_DEMOD
+          printf("rb %d/symbol %d (skip_half %d)\n",rb,l,skip_half);
+#endif
+          if (pilots==0) {
+            //      printf("Extracting w/o pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half);
+            if (skip_half==1) {
+              memcpy(dl_ch0_ext,dl_ch0,6*sizeof(int));
+
+              for (i=0; i<6; i++) {
+                rxF_ext[i]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+                printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+              }
+              dl_ch0_ext+=6;
+              rxF_ext+=6;
+            } else if (skip_half==2) {
+              memcpy(dl_ch0_ext,dl_ch0+6,6*sizeof(int));
+
+              for (i=0; i<6; i++) {
+                rxF_ext[i]=rxF[(i+6)];
+#ifdef DEBUG_DLSCH_DEMOD
+                printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+              }
+              dl_ch0_ext+=6;
+              rxF_ext+=6;
+            } else {
+              memcpy(dl_ch0_ext,dl_ch0,12*sizeof(int));
+
+              for (i=0; i<12; i++) {
+                rxF_ext[i]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+                printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+              }
+              dl_ch0_ext+=12;
+              rxF_ext+=12;
+            }
+          } else {
+            //      printf("Extracting with pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half);
+            j=0;
+
+            if (skip_half==1) {
+              for (i=0; i<6; i++) {
+                if (i!=((frame_parms->nushift+poffset)%6)) {
+                  rxF_ext[j]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+                  printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j]));
+#endif
+                  dl_ch0_ext[j++]=dl_ch0[i];
+                }
+              }
+              rxF_ext+=5;
+              dl_ch0_ext+=5;
+            } else if (skip_half==2) {
+              for (i=0; i<6; i++) {
+                if (i!=((frame_parms->nushift+poffset)%6)) {
+                  rxF_ext[j]=rxF[(i+6)];
+#ifdef DEBUG_DLSCH_DEMOD
+                  printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j]));
+#endif
+                  dl_ch0_ext[j++]=dl_ch0[i+6];
+                }
+              }
+
+              dl_ch0_ext+=5;
+              rxF_ext+=5;
+            } else {
+              for (i=0; i<12; i++) {
+                if ((i!=(frame_parms->nushift+poffset)) &&
+                    (i!=((frame_parms->nushift+poffset+6)%12))) {
+                  rxF_ext[j]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+                  printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j]));
+#endif
+                  dl_ch0_ext[j++]=dl_ch0[i];
+
+                }
+              }
+
+              dl_ch0_ext+=10;
+              rxF_ext+=10;
+            }
+          }
+        }
+        dl_ch0+=12;
+        rxF+=12;
+      } // first half loop
+
+
+      // Do middle RB (around DC)
+      if (rb < 32)
+        rb_alloc_ind = (rb_alloc[0]>>rb) & 1;
+      else if (rb < 64)
+        rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1;
+      else if (rb < 96)
+        rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1;
+      else if (rb < 100)
+        rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1;
+      else
+        rb_alloc_ind = 0;
+
+
+      if (rb_alloc_ind == 1)
+        nb_rb++;
+
+      // PBCH
+
+      if ((subframe==0) &&
+          (l>=(nsymb>>1)) &&
+          (l<((nsymb>>1) + 4))) {
+        rb_alloc_ind = 0;
+      }
+
+      //SSS
+      if (((subframe==0)||(subframe==5)) && (l==sss_symb) ) {
+        rb_alloc_ind = 0;
+      }
+
+      if (frame_parms->frame_type == FDD) {
+        //PSS
+        if (((subframe==0)||(subframe==5)) && (l==pss_symb) ) {
+          rb_alloc_ind = 0;
+        }
+      }
+
+      //PSS
+      if ((frame_parms->frame_type == TDD) &&
+          (subframe==6) &&
+          (l==pss_symb) ) {
+        rb_alloc_ind = 0;
+      }
+
+
+      //  printf("dlch_ext %d\n",dl_ch0_ext-&dl_ch_estimates_ext[aarx][0]);
+      //      printf("DC rb %d (%p)\n",rb,rxF);
+      if (rb_alloc_ind==1) {
+#ifdef DEBUG_DLSCH_DEMOD
+        printf("rb %d/symbol %d (skip_half %d)\n",rb,l,skip_half);
+#endif
+        if (pilots==0) {
+          for (i=0; i<6; i++) {
+            dl_ch0_ext[i]=dl_ch0[i];
+            rxF_ext[i]=rxF[i];
+          }
+
+          rxF       = &rxdataF[aarx][((symbol*(frame_parms->ofdm_symbol_size)))];
+
+          for (; i<12; i++) {
+            dl_ch0_ext[i]=dl_ch0[i];
+            rxF_ext[i]=rxF[(1+i-6)];
+          }
+
+          dl_ch0_ext+=12;
+          rxF_ext+=12;
+        } else { // pilots==1
+          j=0;
+
+          for (i=0; i<6; i++) {
+            if (i!=((frame_parms->nushift+poffset)%6)) {
+              dl_ch0_ext[j]=dl_ch0[i];
+              rxF_ext[j++]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+              printf("**extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j-1],*(1+(short*)&rxF_ext[j-1]));
+#endif
+            }
+          }
+
+          rxF       = &rxdataF[aarx][((symbol*(frame_parms->ofdm_symbol_size)))];
+
+          for (; i<12; i++) {
+            if (i!=((frame_parms->nushift+6+poffset)%12)) {
+              dl_ch0_ext[j]=dl_ch0[i];
+              rxF_ext[j++]=rxF[(1+i-6)];
+#ifdef DEBUG_DLSCH_DEMOD
+              printf("**extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j-1],*(1+(short*)&rxF_ext[j-1]));
+#endif
+            }
+          }
+
+          dl_ch0_ext+=10;
+          rxF_ext+=10;
+        } // symbol_mod==0
+      } // rballoc==1
+      else {
+        rxF       = &rxdataF[aarx][((symbol*(frame_parms->ofdm_symbol_size)))];
+      }
+
+      dl_ch0+=12;
+      rxF+=7;
+      rb++;
+
+      for (;rb<frame_parms->N_RB_DL;rb++) {
+        //      printf("dlch_ext %d\n",dl_ch0_ext-&dl_ch_estimates_ext[aarx][0]);
+        //      printf("rb %d (%p)\n",rb,rxF);
+        skip_half=0;
+
+        if (rb < 32)
+          rb_alloc_ind = (rb_alloc[0]>>rb) & 1;
+        else if (rb < 64)
+          rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1;
+        else if (rb < 96)
+          rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1;
+        else if (rb < 100)
+          rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1;
+        else
+          rb_alloc_ind = 0;
+
+        if (rb_alloc_ind == 1)
+          nb_rb++;
+
+        // PBCH
+        if ((subframe==0) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l>=nsymb>>1) && (l<((nsymb>>1) + 4))) {
+          rb_alloc_ind = 0;
+        }
+        //PBCH subframe 0, symbols nsymb>>1 ... nsymb>>1 + 3
+        if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4)))
+          skip_half=1;
+        else if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4)))
+          skip_half=2;
+
+        //SSS
+        if (((subframe==0)||(subframe==5)) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==sss_symb) ) {
+          rb_alloc_ind = 0;
+        }
+        //SSS
+        if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l==sss_symb))
+          skip_half=1;
+        else if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l==sss_symb))
+          skip_half=2;
+        if (frame_parms->frame_type == FDD) {
+          //PSS
+          if (((subframe==0)||(subframe==5)) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+          }
+
+          //PSS
+
+          if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb))
+            skip_half=1;
+          else if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb))
+            skip_half=2;
+        }
+
+        if ((frame_parms->frame_type == TDD) &&
+
+            (subframe==6)) { //TDD Subframe 6
+          if ((rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+          }
+
+          if ((rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb))
+            skip_half=1;
+          else if ((rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb))
+            skip_half=2;
+        }
+
+        if (rb_alloc_ind==1) {
+#ifdef DEBUG_DLSCH_DEMOD
+          printf("rb %d/symbol %d (skip_half %d)\n",rb,l,skip_half);
+#endif
+          /*
+            printf("rb %d\n",rb);
+            for (i=0;i<12;i++)
+            printf("(%d %d)",((short *)dl_ch0)[i<<1],((short*)dl_ch0)[1+(i<<1)]);
+            printf("\n");
+          */
+          if (pilots==0) {
+            //      printf("Extracting w/o pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half);
+            if (skip_half==1) {
+              memcpy(dl_ch0_ext,dl_ch0,6*sizeof(int));
+
+              for (i=0; i<6; i++) {
+                rxF_ext[i]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+                printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+              }
+              dl_ch0_ext+=6;
+              rxF_ext+=6;
+
+            } else if (skip_half==2) {
+              memcpy(dl_ch0_ext,dl_ch0+6,6*sizeof(int));
+
+              for (i=0; i<6; i++) {
+                rxF_ext[i]=rxF[(i+6)];
+#ifdef DEBUG_DLSCH_DEMOD
+                printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+              }
+              dl_ch0_ext+=6;
+              rxF_ext+=6;
+
+            } else {
+              memcpy(dl_ch0_ext,dl_ch0,12*sizeof(int));
+
+              for (i=0; i<12; i++) {
+                rxF_ext[i]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+                printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+              }
+              dl_ch0_ext+=12;
+              rxF_ext+=12;
+            }
+          } else {
+            //      printf("Extracting with pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half);
+            j=0;
+
+            if (skip_half==1) {
+              for (i=0; i<6; i++) {
+                if (i!=((frame_parms->nushift+poffset)%6)) {
+                  rxF_ext[j]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+                  printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j]));
+#endif
+                  dl_ch0_ext[j++]=dl_ch0[i];
+                }
+              }
+
+              dl_ch0_ext+=5;
+              rxF_ext+=5;
+            } else if (skip_half==2) {
+              for (i=0; i<6; i++) {
+                if (i!=((frame_parms->nushift+poffset)%6)) {
+                  rxF_ext[j]=rxF[(i+6)];
+#ifdef DEBUG_DLSCH_DEMOD
+                  printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j]));
+#endif
+                  dl_ch0_ext[j++]=dl_ch0[i+6];
+                }
+              }
+
+              dl_ch0_ext+=5;
+              rxF_ext+=5;
+            } else {
+              for (i=0; i<12; i++) {
+                if ((i!=(frame_parms->nushift+poffset)) &&
+                    (i!=((frame_parms->nushift+poffset+6)%12))) {
+                  rxF_ext[j]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+                  printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j]));
+#endif
+                  dl_ch0_ext[j++]=dl_ch0[i];
+                }
+              }
+              dl_ch0_ext+=10;
+              rxF_ext+=10;
+            }
+          } // pilots=0
+        }
+
+        dl_ch0+=12;
+        rxF+=12;
+      }
+    }
+  }
+
+
+  return(nb_rb/frame_parms->nb_antennas_rx);
+}
+
+unsigned short dlsch_extract_rbs_dual(int **rxdataF,
+                                      int **dl_ch_estimates,
+                                      int **rxdataF_ext,
+                                      int **dl_ch_estimates_ext,
+                                      unsigned short pmi,
+                                      unsigned char *pmi_ext,
+                                      unsigned int *rb_alloc,
+                                      unsigned char symbol,
+                                      unsigned char subframe,
+                                      uint32_t high_speed_flag,
+                                      LTE_DL_FRAME_PARMS *frame_parms,
+                                      MIMO_mode_t mimo_mode) {
+
+  int prb,nb_rb=0;
+  int prb_off,prb_off2;
+  int rb_alloc_ind,skip_half=0,sss_symb,pss_symb=0,nsymb,l;
+  int i,aarx;
+  int32_t *dl_ch0,*dl_ch0p,*dl_ch0_ext,*dl_ch1,*dl_ch1p,*dl_ch1_ext,*rxF,*rxF_ext;
+  int symbol_mod,pilots=0,j=0;
+  unsigned char *pmi_loc;
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+  //  printf("extract_rbs: symbol_mod %d\n",symbol_mod);
+
+  if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp)))
+    pilots=1;
+
+  nsymb = (frame_parms->Ncp==NORMAL) ? 14:12;
+  l=symbol;
+
+  if (frame_parms->frame_type == TDD) {  // TDD
+    sss_symb = nsymb-1;
+    pss_symb = 2;
+  } else {
+    sss_symb = (nsymb>>1)-2;
+    pss_symb = (nsymb>>1)-1;
+  }
+
+  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+
+    if (high_speed_flag==1) {
+      dl_ch0     = &dl_ch_estimates[aarx][5+(symbol*(frame_parms->ofdm_symbol_size))];
+      dl_ch1     = &dl_ch_estimates[2+aarx][5+(symbol*(frame_parms->ofdm_symbol_size))];
+    } else {
+      dl_ch0     = &dl_ch_estimates[aarx][5];
+      dl_ch1     = &dl_ch_estimates[2+aarx][5];
+    }
+
+    pmi_loc = pmi_ext;
+
+    // pointers to extracted RX signals and channel estimates
+    rxF_ext    = &rxdataF_ext[aarx][symbol*(frame_parms->N_RB_DL*12)];
+    dl_ch0_ext = &dl_ch_estimates_ext[aarx][symbol*(frame_parms->N_RB_DL*12)];
+    dl_ch1_ext = &dl_ch_estimates_ext[2+aarx][symbol*(frame_parms->N_RB_DL*12)];
+
+    for (prb=0; prb<frame_parms->N_RB_DL; prb++) {
+      skip_half=0;
+
+      if (prb < 32)
+        rb_alloc_ind = (rb_alloc[0]>>prb) & 1;
+      else if (prb < 64)
+        rb_alloc_ind = (rb_alloc[1]>>(prb-32)) & 1;
+      else if (prb < 96)
+        rb_alloc_ind = (rb_alloc[2]>>(prb-64)) & 1;
+      else if (prb < 100)
+        rb_alloc_ind = (rb_alloc[3]>>(prb-96)) & 1;
+      else
+        rb_alloc_ind = 0;
+
+      if (rb_alloc_ind == 1)
+          nb_rb++;
+
+
+      if ((frame_parms->N_RB_DL&1) == 0) {  // even number of RBs
+
+        // PBCH
+        if ((subframe==0) &&
+            (prb>=((frame_parms->N_RB_DL>>1)-3)) &&
+            (prb<((frame_parms->N_RB_DL>>1)+3)) &&
+            (l>=(nsymb>>1)) &&
+            (l<((nsymb>>1) + 4))) {
+          rb_alloc_ind = 0;
+          //    printf("symbol %d / rb %d: skipping PBCH REs\n",symbol,prb);
+        }
+
+        //SSS
+
+        if (((subframe==0)||(subframe==5)) &&
+            (prb>=((frame_parms->N_RB_DL>>1)-3)) &&
+            (prb<((frame_parms->N_RB_DL>>1)+3)) &&
+            (l==sss_symb) ) {
+          rb_alloc_ind = 0;
+          //    printf("symbol %d / rb %d: skipping SSS REs\n",symbol,prb);
+        }
+
+
+
+        //PSS in subframe 0/5 if FDD
+        if (frame_parms->frame_type == FDD) {  //FDD
+          if (((subframe==0)||(subframe==5)) &&
+              (prb>=((frame_parms->N_RB_DL>>1)-3)) &&
+              (prb<((frame_parms->N_RB_DL>>1)+3)) &&
+              (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+            //    printf("symbol %d / rb %d: skipping PSS REs\n",symbol,prb);
+          }
+        }
+
+        if ((frame_parms->frame_type == TDD) &&
+            (subframe==6)) { //TDD Subframe 6
+          if ((prb>=((frame_parms->N_RB_DL>>1)-3)) &&
+              (prb<((frame_parms->N_RB_DL>>1)+3)) &&
+              (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+          }
+        }
+
+        if (rb_alloc_ind==1) {              // PRB is allocated
+
+
+
+          prb_off      = 12*prb;
+          prb_off2     = 1+(12*(prb-(frame_parms->N_RB_DL>>1)));
+          dl_ch0p    = dl_ch0+(12*prb);
+          dl_ch1p    = dl_ch1+(12*prb);
+          if (prb<(frame_parms->N_RB_DL>>1)){
+            rxF      = &rxdataF[aarx][prb_off+
+                                      frame_parms->first_carrier_offset +
+                                      (symbol*(frame_parms->ofdm_symbol_size))];
+          }
+          else {
+            rxF      = &rxdataF[aarx][prb_off2+
+                                      (symbol*(frame_parms->ofdm_symbol_size))];
+          }
+
+         /*
+         if (mimo_mode <= PUSCH_PRECODING1)
+          *pmi_loc = (pmi>>((prb>>2)<<1))&3;
+         else
+          *pmi_loc=(pmi>>prb)&1;*/
+
+         *pmi_loc = get_pmi(frame_parms->N_RB_DL,mimo_mode,pmi,prb);
+          pmi_loc++;
+
+
+          if (pilots == 0) {
+
+            memcpy(dl_ch0_ext,dl_ch0p,12*sizeof(int));
+            memcpy(dl_ch1_ext,dl_ch1p,12*sizeof(int));
+            memcpy(rxF_ext,rxF,12*sizeof(int));
+            dl_ch0_ext +=12;
+            dl_ch1_ext +=12;
+            rxF_ext    +=12;
+          } else { // pilots==1
+            j=0;
+            for (i=0; i<12; i++) {
+              if ((i!=frame_parms->nushift) &&
+                  (i!=frame_parms->nushift+3) &&
+                  (i!=frame_parms->nushift+6) &&
+                  (i!=((frame_parms->nushift+9)%12))) {
+                rxF_ext[j]=rxF[i];
+                //        printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j]));
+                dl_ch0_ext[j]=dl_ch0p[i];
+                dl_ch1_ext[j++]=dl_ch1p[i];
+              }
+            }
+            dl_ch0_ext+=8;
+            dl_ch1_ext+=8;
+            rxF_ext+=8;
+          } // pilots==1
+
+        }
+      } else {  // Odd number of RBs
+
+
+      // PBCH
+        if ((subframe==0) &&
+            (prb>((frame_parms->N_RB_DL>>1)-3)) &&
+            (prb<((frame_parms->N_RB_DL>>1)+3)) &&
+            (l>=(nsymb>>1)) &&
+            (l<((nsymb>>1) + 4))) {
+          rb_alloc_ind = 0;
+          //    printf("symbol %d / rb %d: skipping PBCH REs\n",symbol,prb);
+        }
+
+        //SSS
+
+        if (((subframe==0)||(subframe==5)) &&
+            (prb>((frame_parms->N_RB_DL>>1)-3)) &&
+            (prb<((frame_parms->N_RB_DL>>1)+3)) &&
+            (l==sss_symb) ) {
+          rb_alloc_ind = 0;
+          //    printf("symbol %d / rb %d: skipping SSS REs\n",symbol,prb);
+        }
+
+
+
+        //PSS in subframe 0/5 if FDD
+        if (frame_parms->frame_type == FDD) {  //FDD
+          if (((subframe==0)||(subframe==5)) &&
+              (prb>((frame_parms->N_RB_DL>>1)-3)) &&
+              (prb<((frame_parms->N_RB_DL>>1)+3)) &&
+              (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+            //    printf("symbol %d / rb %d: skipping PSS REs\n",symbol,prb);
+          }
+        }
+
+        if ((frame_parms->frame_type == TDD) &&
+            ((subframe==1) || (subframe==6))) { //TDD Subframe 1-6
+          if ((prb>((frame_parms->N_RB_DL>>1)-3)) &&
+              (prb<((frame_parms->N_RB_DL>>1)+3)) &&
+              (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+          }
+        }
+
+        if (rb_alloc_ind == 1) {
+          skip_half=0;
+
+          //Check if we have to drop half a PRB due to PSS/SSS/PBCH
+          // skip_half == 0 means full PRB
+          // skip_half == 1 means first half is used (leftmost half-PRB from PSS/SSS/PBCH)
+          // skip_half == 2 means second half is used (rightmost half-PRB from PSS/SSS/PBCH)
+          //PBCH subframe 0, symbols nsymb>>1 ... nsymb>>1 + 3
+          if ((subframe==0) &&
+              (prb==((frame_parms->N_RB_DL>>1)-3)) &&
+              (l>=(nsymb>>1)) &&
+              (l<((nsymb>>1) + 4)))
+            skip_half=1;
+          else if ((subframe==0) &&
+                   (prb==((frame_parms->N_RB_DL>>1)+3)) &&
+                   (l>=(nsymb>>1)) &&
+                   (l<((nsymb>>1) + 4)))
+            skip_half=2;
+
+          //SSS
+          if (((subframe==0)||(subframe==5)) &&
+              (prb==((frame_parms->N_RB_DL>>1)-3)) &&
+              (l==sss_symb))
+            skip_half=1;
+          else if (((subframe==0)||(subframe==5)) &&
+                   (prb==((frame_parms->N_RB_DL>>1)+3)) &&
+                   (l==sss_symb))
+            skip_half=2;
+
+          //PSS Subframe 0,5
+          if (((frame_parms->frame_type == FDD) &&
+               (((subframe==0)||(subframe==5)))) ||  //FDD Subframes 0,5
+              ((frame_parms->frame_type == TDD) &&
+               (((subframe==1) || (subframe==6))))) { //TDD Subframes 1,6
+
+            if ((prb==((frame_parms->N_RB_DL>>1)-3)) &&
+                (l==pss_symb))
+              skip_half=1;
+            else if ((prb==((frame_parms->N_RB_DL>>1)+3)) &&
+                     (l==pss_symb))
+              skip_half=2;
+          }
+
+
+          prb_off      = 12*prb;
+          prb_off2     = 7+(12*(prb-(frame_parms->N_RB_DL>>1)-1));
+          dl_ch0p      = dl_ch0+(12*prb);
+          dl_ch1p      = dl_ch1+(12*prb);
+
+          if (prb<=(frame_parms->N_RB_DL>>1)){
+            rxF      = &rxdataF[aarx][prb_off+
+                                      frame_parms->first_carrier_offset +
+                                      (symbol*(frame_parms->ofdm_symbol_size))];
+          }
+          else {
+            rxF      = &rxdataF[aarx][prb_off2+
+                                      (symbol*(frame_parms->ofdm_symbol_size))];
+          }
+#ifdef DEBUG_DLSCH_DEMOD
+          printf("symbol %d / rb %d: alloc %d skip_half %d (rxF %p, rxF_ext %p) prb_off (%d,%d)\n",symbol,prb,rb_alloc_ind,skip_half,rxF,rxF_ext,prb_off,prb_off2);
+#endif
+         /* if (mimo_mode <= PUSCH_PRECODING1)
+           *pmi_loc = (pmi>>((prb>>2)<<1))&3;
+          else
+           *pmi_loc=(pmi>>prb)&1;
+         // printf("symbol_mod %d (pilots %d) rb %d, sb %d, pmi %d (pmi_loc %p,rxF %p, ch00 %p, ch01 %p, rxF_ext %p dl_ch0_ext %p dl_ch1_ext %p)\n",symbol_mod,pilots,prb,prb>>2,*pmi_loc,pmi_loc,rxF,dl_ch0, dl_ch1, rxF_ext,dl_ch0_ext,dl_ch1_ext);
+*/
+         *pmi_loc = get_pmi(frame_parms->N_RB_DL,mimo_mode,pmi,prb);
+          pmi_loc++;
+
+          if (prb != (frame_parms->N_RB_DL>>1)) { // This PRB is not around DC
+            if (pilots==0) {
+              if (skip_half==1) {
+                memcpy(dl_ch0_ext,dl_ch0p,6*sizeof(int32_t));
+                memcpy(dl_ch1_ext,dl_ch1p,6*sizeof(int32_t));
+                memcpy(rxF_ext,rxF,6*sizeof(int32_t));
+#ifdef DEBUG_DLSCH_DEMOD
+                for (i=0;i<6;i++)
+                  printf("extract rb %d, re %d => (%d,%d)\n",prb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                dl_ch0_ext+=6;
+                dl_ch1_ext+=6;
+                rxF_ext+=6;
+              } else if (skip_half==2) {
+                memcpy(dl_ch0_ext,dl_ch0p+6,6*sizeof(int32_t));
+                memcpy(dl_ch1_ext,dl_ch1p+6,6*sizeof(int32_t));
+                memcpy(rxF_ext,rxF+6,6*sizeof(int32_t));
+#ifdef DEBUG_DLSCH_DEMOD
+                for (i=0;i<6;i++)
+                  printf("extract rb %d, re %d => (%d,%d)\n",prb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                dl_ch0_ext+=6;
+                dl_ch1_ext+=6;
+                rxF_ext+=6;
+              } else {  // skip_half==0
+                memcpy(dl_ch0_ext,dl_ch0p,12*sizeof(int32_t));
+                memcpy(dl_ch1_ext,dl_ch1p,12*sizeof(int32_t));
+                memcpy(rxF_ext,rxF,12*sizeof(int32_t));
+#ifdef DEBUG_DLSCH_DEMOD
+                for (i=0;i<12;i++)
+                  printf("extract rb %d, re %d => (%d,%d)\n",prb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                dl_ch0_ext+=12;
+                dl_ch1_ext+=12;
+                rxF_ext+=12;
+              }
+            } else { // pilots=1
+              j=0;
+
+              if (skip_half==1) {
+                for (i=0; i<6; i++) {
+                  if ((i!=frame_parms->nushift) &&
+                      (i!=((frame_parms->nushift+3)%6))) {
+                    rxF_ext[j]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+                    printf("(pilots,skip1)extract rb %d, re %d (%d)=> (%d,%d)\n",prb,i,j,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j]));
+#endif
+                    dl_ch0_ext[j]=dl_ch0p[i];
+                    dl_ch1_ext[j++]=dl_ch1p[i];
+                  }
+                }
+                dl_ch0_ext+=4;
+                dl_ch1_ext+=4;
+                rxF_ext+=4;
+              } else if (skip_half==2) {
+                for (i=0; i<6; i++) {
+                  if ((i!=frame_parms->nushift) &&
+                      (i!=((frame_parms->nushift+3)%6))) {
+                    rxF_ext[j]=rxF[(i+6)];
+#ifdef DEBUG_DLSCH_DEMOD
+                    printf("(pilots,skip2)extract rb %d, re %d (%d) => (%d,%d)\n",prb,i,j,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j]));
+#endif
+                    dl_ch0_ext[j]=dl_ch0p[i+6];
+                    dl_ch1_ext[j++]=dl_ch1p[i+6];
+                  }
+                }
+                dl_ch0_ext+=4;
+                dl_ch1_ext+=4;
+                rxF_ext+=4;
+
+              } else { //skip_half==0
+                for (i=0; i<12; i++) {
+                  if ((i!=frame_parms->nushift) &&
+                      (i!=frame_parms->nushift+3) &&
+                      (i!=frame_parms->nushift+6) &&
+                      (i!=((frame_parms->nushift+9)%12))) {
+                    rxF_ext[j]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+                    printf("(pilots)extract rb %d, re %d => (%d,%d)\n",prb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j]));
+#endif
+                    dl_ch0_ext[j]  =dl_ch0p[i];
+                    dl_ch1_ext[j++]=dl_ch1p[i];
+                  }
+                }
+                dl_ch0_ext+=8;
+                dl_ch1_ext+=8;
+                rxF_ext+=8;
+              } //skip_half==0
+            } //pilots==1
+          } else {       // Do middle RB (around DC)
+
+            if (pilots==0) {
+              memcpy(dl_ch0_ext,dl_ch0p,6*sizeof(int32_t));
+              memcpy(dl_ch1_ext,dl_ch1p,6*sizeof(int32_t));
+              memcpy(rxF_ext,rxF,6*sizeof(int32_t));
+#ifdef DEBUG_DLSCH_DEMOD
+              for (i=0; i<6; i++) {
+                printf("extract rb %d, re %d => (%d,%d)\n",prb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+              }
+#endif
+              rxF_ext+=6;
+              dl_ch0_ext+=6;
+              dl_ch1_ext+=6;
+              dl_ch0p+=6;
+              dl_ch1p+=6;
+
+              rxF       = &rxdataF[aarx][1+((symbol*(frame_parms->ofdm_symbol_size)))];
+
+              memcpy(dl_ch0_ext,dl_ch0p,6*sizeof(int32_t));
+              memcpy(dl_ch1_ext,dl_ch1p,6*sizeof(int32_t));
+              memcpy(rxF_ext,rxF,6*sizeof(int32_t));
+#ifdef DEBUG_DLSCH_DEMOD
+              for (i=0; i<6; i++) {
+                printf("extract rb %d, re %d => (%d,%d)\n",prb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+              }
+#endif
+              rxF_ext+=6;
+              dl_ch0_ext+=6;
+              dl_ch1_ext+=6;
+            } else { // pilots==1
+              j=0;
+
+              for (i=0; i<6; i++) {
+                if ((i!=frame_parms->nushift) &&
+                    (i!=((frame_parms->nushift+3)%6))) {
+                  dl_ch0_ext[j]=dl_ch0p[i];
+                  dl_ch1_ext[j]=dl_ch1p[i];
+                  rxF_ext[j++]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+                  printf("(pilots)extract rb %d, re %d (%d) => (%d,%d)\n",prb,i,j,*(short *)&rxF[i],*(1+(short*)&rxF[i]));
+#endif
+                }
+              }
+              rxF       = &rxdataF[aarx][1+symbol*(frame_parms->ofdm_symbol_size)];
+
+              for (; i<12; i++) {
+                if ((i!=((frame_parms->nushift+6)%12)) &&
+                    (i!=((frame_parms->nushift+9)%12))) {
+                  dl_ch0_ext[j]=dl_ch0p[i];
+                  dl_ch1_ext[j]=dl_ch1p[i];
+                  rxF_ext[j++]=rxF[i-6];
+#ifdef DEBUG_DLSCH_DEMOD
+                  printf("(pilots)extract rb %d, re %d (%d) => (%d,%d)\n",prb,i,j,*(short *)&rxF[1+i-6],*(1+(short*)&rxF[1+i-6]));
+#endif
+                }
+              }
+
+              dl_ch0_ext+=8;
+              dl_ch1_ext+=8;
+              rxF_ext+=8;
+            } //pilots==1
+          }  // if Middle PRB
+        } // if odd PRB
+      } // if rballoc==1
+    } // for prb
+  } // for aarx
+  return(nb_rb/frame_parms->nb_antennas_rx);
+}
+
+unsigned short dlsch_extract_rbs_TM7(int **rxdataF,
+                                     int **dl_bf_ch_estimates,
+                                     int **rxdataF_ext,
+                                     int **dl_bf_ch_estimates_ext,
+                                     unsigned int *rb_alloc,
+                                     unsigned char symbol,
+                                     unsigned char subframe,
+                                     uint32_t high_speed_flag,
+                                     LTE_DL_FRAME_PARMS *frame_parms)
+{
+
+  unsigned short rb,nb_rb=0;
+  unsigned char rb_alloc_ind;
+  unsigned char i,aarx,l,nsymb,skip_half=0,sss_symb,pss_symb=0;
+  int *dl_ch0,*dl_ch0_ext,*rxF,*rxF_ext;
+
+  unsigned char symbol_mod,pilots=0,uespec_pilots=0,j=0,poffset=0,uespec_poffset=0;
+  int8_t uespec_nushift = frame_parms->Nid_cell%3;
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+  pilots = ((symbol_mod==0)||(symbol_mod==(4-frame_parms->Ncp))) ? 1 : 0;
+  l=symbol;
+  nsymb = (frame_parms->Ncp==NORMAL) ? 14:12;
+
+  if (frame_parms->Ncp==0){
+    if (symbol==3 || symbol==6 || symbol==9 || symbol==12)
+      uespec_pilots = 1;
+  } else{
+    if (symbol==4 || symbol==7 || symbol==10)
+      uespec_pilots = 1;
+  }
+
+  if (frame_parms->frame_type == TDD) {// TDD
+    sss_symb = nsymb-1;
+    pss_symb = 2;
+  } else {
+    sss_symb = (nsymb>>1)-2;
+    pss_symb = (nsymb>>1)-1;
+  }
+
+  if (symbol_mod==(4-frame_parms->Ncp))
+    poffset=3;
+
+  if ((frame_parms->Ncp==0 && (symbol==6 ||symbol ==12)) || (frame_parms->Ncp==1 && symbol==7))
+    uespec_poffset=2;
+
+  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+
+    if (high_speed_flag == 1)
+      dl_ch0     = &dl_bf_ch_estimates[aarx][symbol*(frame_parms->ofdm_symbol_size)];
+    else
+      dl_ch0     = &dl_bf_ch_estimates[aarx][0];
+
+    dl_ch0_ext = &dl_bf_ch_estimates_ext[aarx][symbol*(frame_parms->N_RB_DL*12)];
+
+    rxF_ext    = &rxdataF_ext[aarx][symbol*(frame_parms->N_RB_DL*12)];
+    rxF        = &rxdataF[aarx][(frame_parms->first_carrier_offset + (symbol*(frame_parms->ofdm_symbol_size)))];
+
+    if ((frame_parms->N_RB_DL&1) == 0)  // even number of RBs
+      for (rb=0; rb<frame_parms->N_RB_DL; rb++) {
+
+        if (rb < 32)
+          rb_alloc_ind = (rb_alloc[0]>>rb) & 1;
+        else if (rb < 64)
+          rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1;
+        else if (rb < 96)
+          rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1;
+        else if (rb < 100)
+          rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1;
+        else
+          rb_alloc_ind = 0;
+
+  if (rb_alloc_ind == 1)
+          nb_rb++;
+
+        // For second half of RBs skip DC carrier
+        if (rb==(frame_parms->N_RB_DL>>1)) {
+          rxF       = &rxdataF[aarx][(1 + (symbol*(frame_parms->ofdm_symbol_size)))];
+          //dl_ch0++;
+        }
+
+        // PBCH
+        if ((subframe==0) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l>=nsymb>>1) && (l<((nsymb>>1) + 4))) {
+          rb_alloc_ind = 0;
+        }
+
+        //SSS
+        if (((subframe==0)||(subframe==5)) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==sss_symb) ) {
+          rb_alloc_ind = 0;
+        }
+
+
+        if (frame_parms->frame_type == FDD) {
+          //PSS
+          if (((subframe==0)||(subframe==5)) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+          }
+        }
+
+        if ((frame_parms->frame_type == TDD) &&
+            (subframe==6)) { //TDD Subframe 6
+          if ((rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+          }
+        }
+
+        if (rb_alloc_ind==1) {
+
+          /*
+              printf("rb %d\n",rb);
+              for (i=0;i<12;i++)
+              printf("(%d %d)",((short *)dl_ch0)[i<<1],((short*)dl_ch0)[1+(i<<1)]);
+              printf("\n");
+          */
+          if (pilots==0 && uespec_pilots==0) {
+            memcpy(dl_ch0_ext,dl_ch0,12*sizeof(int));
+
+            for (i=0; i<12; i++) {
+              rxF_ext[i]=rxF[i];
+            }
+
+            dl_ch0_ext+=12;
+            rxF_ext+=12;
+          } else if(pilots==1 && uespec_pilots==0) {
+            j=0;
+
+            for (i=0; i<12; i++) {
+              if ((i!=(frame_parms->nushift+poffset)) &&
+                  (i!=((frame_parms->nushift+poffset+6)%12))) {
+                rxF_ext[j]=rxF[i];
+                dl_ch0_ext[j++]=dl_ch0[i];
+              }
+            }
+
+            dl_ch0_ext+=10;
+            rxF_ext+=10;
+
+          } else if (pilots==0 && uespec_pilots==1) {
+            j=0;
+
+
+      for (i=0; i<12; i++){
+              if (frame_parms->Ncp==0){
+                if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){
+      rxF_ext[j] = rxF[i];
+                  dl_ch0_ext[j++]=dl_ch0[i];
+                }
+              } else{
+                if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){
+      rxF_ext[j] = rxF[i];
+                  dl_ch0_ext[j++]=dl_ch0[i];
+                }
+              }
+
+      }
+
+            dl_ch0_ext+=9-frame_parms->Ncp;
+            rxF_ext+=9-frame_parms->Ncp;
+
+          } else {
+            LOG_E(PHY,"dlsch_extract_rbs_TM7(dl_demodulation.c):pilot or ue spec pilot detection error\n");
+            exit(-1);
+          }
+
+        }
+
+        dl_ch0+=12;
+        rxF+=12;
+
+      }
+    else {  // Odd number of RBs
+      for (rb=0; rb<frame_parms->N_RB_DL>>1; rb++) {
+        skip_half=0;
+
+        if (rb < 32)
+          rb_alloc_ind = (rb_alloc[0]>>rb) & 1;
+        else if (rb < 64)
+          rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1;
+        else if (rb < 96)
+          rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1;
+        else if (rb < 100)
+          rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1;
+        else
+          rb_alloc_ind = 0;
+
+        if (rb_alloc_ind == 1)
+          nb_rb++;
+
+        // PBCH
+        if ((subframe==0) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4))) {
+          rb_alloc_ind = 0;
+        }
+
+        //PBCH subframe 0, symbols nsymb>>1 ... nsymb>>1 + 3
+        if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4)))
+          skip_half=1;
+        else if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4)))
+          skip_half=2;
+
+        //SSS
+
+        if (((subframe==0)||(subframe==5)) &&
+            (rb>((frame_parms->N_RB_DL>>1)-3)) &&
+            (rb<((frame_parms->N_RB_DL>>1)+3)) &&
+            (l==sss_symb) ) {
+          rb_alloc_ind = 0;
+        }
+
+        //SSS
+        if (((subframe==0)||(subframe==5)) &&
+            (rb==((frame_parms->N_RB_DL>>1)-3)) &&
+            (l==sss_symb))
+          skip_half=1;
+        else if (((subframe==0)||(subframe==5)) &&
+                 (rb==((frame_parms->N_RB_DL>>1)+3)) &&
+                 (l==sss_symb))
+          skip_half=2;
+
+        //PSS in subframe 0/5 if FDD
+        if (frame_parms->frame_type == FDD) {  //FDD
+          if (((subframe==0)||(subframe==5)) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+          }
+
+          if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb))
+            skip_half=1;
+          else if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb))
+            skip_half=2;
+        }
+
+        if ((frame_parms->frame_type == TDD) && ((subframe==1)||(subframe==6))) { //TDD Subframe 1 and 6
+          if ((rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+          }
+
+          if ((rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb))
+            skip_half=1;
+          else if ((rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb))
+            skip_half=2;
+        }
+
+
+        if (rb_alloc_ind==1) {
+#ifdef DEBUG_DLSCH_DEMOD
+          printf("rb %d/symbol %d pilots %d, uespec_pilots %d, (skip_half %d)\n",rb,l,pilots,uespec_pilots,skip_half);
+#endif
+
+          if (pilots==0 && uespec_pilots==0) {
+            //printf("Extracting w/o pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half);
+
+            if (skip_half==1) {
+              memcpy(dl_ch0_ext,dl_ch0,6*sizeof(int));
+
+              for (i=0; i<6; i++) {
+                rxF_ext[i]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+    printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+              }
+
+              dl_ch0_ext+=6;
+              rxF_ext+=6;
+            } else if (skip_half==2) {
+              memcpy(dl_ch0_ext,dl_ch0+6,6*sizeof(int));
+
+              for (i=0; i<6; i++) {
+                rxF_ext[i]=rxF[(i+6)];
+#ifdef DEBUG_DLSCH_DEMOD
+    printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+              }
+
+              dl_ch0_ext+=6;
+              rxF_ext+=6;
+            } else {
+              memcpy(dl_ch0_ext,dl_ch0,12*sizeof(int));
+
+              for (i=0; i<12; i++){
+                rxF_ext[i]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+                printf("extract rb %d, re %d => (%d,%d)\n",symbol,rb,i,*(short *)&rxF[i],*(1+(short*)&rxF[i]));
+#endif
+              }
+              dl_ch0_ext+=12;
+              rxF_ext+=12;
+            }
+          } else if (pilots==1 && uespec_pilots==0) {
+            // printf("Extracting with pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half);
+            j=0;
+
+            if (skip_half==1) {
+              for (i=0; i<6; i++) {
+                if (i!=((frame_parms->nushift+poffset)%6)) {
+                  rxF_ext[j]=rxF[i];
+                  dl_ch0_ext[j++]=dl_ch0[i];
+#ifdef DEBUG_DLSCH_DEMOD
+    printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                }
+              }
+
+              dl_ch0_ext+=5;
+              rxF_ext+=5;
+            } else if (skip_half==2) {
+              for (i=0; i<6; i++) {
+                if (i!=((frame_parms->nushift+poffset)%6)) {
+                  rxF_ext[j]=rxF[(i+6)];
+                  dl_ch0_ext[j++]=dl_ch0[i+6];
+#ifdef DEBUG_DLSCH_DEMOD
+    printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                }
+              }
+
+              dl_ch0_ext+=5;
+              rxF_ext+=5;
+            } else {
+              for (i=0; i<12; i++) {
+                if ((i!=(frame_parms->nushift+poffset)) &&
+                    (i!=((frame_parms->nushift+poffset+6)%12))) {
+                  rxF_ext[j]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+                  printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j]));
+#endif
+                  dl_ch0_ext[j++]=dl_ch0[i];
+
+                }
+              }
+
+              dl_ch0_ext+=10;
+              rxF_ext+=10;
+            }
+          } else if(pilots==0 && uespec_pilots==1){
+            //printf("Extracting with uespec pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half);
+            j=0;
+
+            if (skip_half==1) {
+              if (frame_parms->Ncp==0){
+                for (i=0; i<6; i++) {
+                  if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){
+                    rxF_ext[j]=rxF[i];
+                    dl_ch0_ext[j++]=dl_ch0[i];
+#ifdef DEBUG_DLSCH_DEMOD
+              printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                  }
+                }
+                dl_ch0_ext+=6-(uespec_nushift+uespec_poffset<6)-(uespec_nushift+uespec_poffset+4<6)-((uespec_nushift+uespec_poffset+8)%12<6);
+                rxF_ext+=6-(uespec_nushift+uespec_poffset<6)-(uespec_nushift+uespec_poffset+4<6)-((uespec_nushift+uespec_poffset+8)%12<6);
+
+              } else{
+                for (i=0; i<6; i++) {
+                  if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){
+                    rxF_ext[j]=rxF[i];
+                    dl_ch0_ext[j++]=dl_ch0[i];
+#ifdef DEBUG_DLSCH_DEMOD
+        printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                  }
+                }
+                dl_ch0_ext+=4;
+                rxF_ext+=4;
+              }
+
+            } else if (skip_half==2) {
+              if(frame_parms->Ncp==0){
+                for (i=0; i<6; i++) {
+                  if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){
+                    rxF_ext[j]=rxF[(i+6)];
+                    dl_ch0_ext[j++]=dl_ch0[i+6];
+#ifdef DEBUG_DLSCH_DEMOD
+              printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                  }
+                }
+                dl_ch0_ext+=6-(uespec_nushift+uespec_poffset>6)-(uespec_nushift+uespec_poffset+4>6)-((uespec_nushift+uespec_poffset+8)%12>6);
+                rxF_ext+=6-(uespec_nushift+uespec_poffset>6)-(uespec_nushift+uespec_poffset+4>6)-((uespec_nushift+uespec_poffset+8)%12>6);
+
+              } else {
+                for (i=0; i<6; i++) {
+                  if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){
+                    rxF_ext[j]=rxF[(i+6)];
+                    dl_ch0_ext[j++]=dl_ch0[i+6];
+#ifdef DEBUG_DLSCH_DEMOD
+        printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                  }
+                }
+                dl_ch0_ext+=4;
+                rxF_ext+=4;
+              }
+
+            } else {
+
+        for (i=0; i<12; i++){
+                if (frame_parms->Ncp==0){
+                  if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){
+              rxF_ext[j] = rxF[i];
+                    dl_ch0_ext[j++] = dl_ch0[i];
+#ifdef DEBUG_DLSCH_DEMOD
+                    printf("extract rb %d, re %d, j %d => (%d,%d)\n",symbol,rb,i,j-1,*(short *)&dl_ch0[j],*(1+(short*)&dl_ch0[i]));
+#endif
+                  }
+                } else{
+                  if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){
+              rxF_ext[j] = rxF[i];
+                    dl_ch0_ext[j++]=dl_ch0[i];
+#ifdef DEBUG_DLSCH_DEMOD
+        printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                  }
+                }
+
+        }
+
+              dl_ch0_ext+=9-frame_parms->Ncp;
+              rxF_ext+=9-frame_parms->Ncp;
+      }
+
+          } else {
+            LOG_E(PHY,"dlsch_extract_rbs_TM7(dl_demodulation.c):pilot or ue spec pilot detection error\n");
+            exit(-1);
+
+          }
+        }
+
+        dl_ch0+=12;
+        rxF+=12;
+      } // first half loop
+
+
+      // Do middle RB (around DC)
+      if (rb < 32)
+        rb_alloc_ind = (rb_alloc[0]>>rb) & 1;
+      else if (rb < 64)
+        rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1;
+      else if (rb < 96)
+        rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1;
+      else if (rb < 100)
+        rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1;
+      else
+        rb_alloc_ind = 0;
+
+      if (rb_alloc_ind == 1)
+        nb_rb++;
+
+      // PBCH
+      if ((subframe==0) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4))) {
+        rb_alloc_ind = 0;
+      }
+
+      //SSS
+      if (((subframe==0)||(subframe==5)) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==sss_symb) ) {
+        rb_alloc_ind = 0;
+      }
+
+      if (frame_parms->frame_type == FDD) {
+        //PSS
+        if (((subframe==0)||(subframe==5)) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) {
+          rb_alloc_ind = 0;
+        }
+      }
+
+      if ((frame_parms->frame_type == TDD) && ((subframe==1)||(subframe==6))) {
+        //PSS
+        if ((rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) {
+          rb_alloc_ind = 0;
+        }
+      }
+
+      //printf("dlch_ext %d\n",dl_ch0_ext-&dl_ch_estimates_ext[aarx][0]);
+      //printf("DC rb %d (%p)\n",rb,rxF);
+      if (rb_alloc_ind==1) {
+        //printf("rb %d/symbol %d (skip_half %d)\n",rb,l,skip_half);
+        if (pilots==0 && uespec_pilots==0) {
+          for (i=0; i<6; i++) {
+            dl_ch0_ext[i]=dl_ch0[i];
+            rxF_ext[i]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+      printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+          }
+
+          rxF       = &rxdataF[aarx][((symbol*(frame_parms->ofdm_symbol_size)))];
+
+          for (; i<12; i++) {
+            dl_ch0_ext[i]=dl_ch0[i];
+            rxF_ext[i]=rxF[(1+i-6)];
+#ifdef DEBUG_DLSCH_DEMOD
+      printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+          }
+
+          dl_ch0_ext+=12;
+          rxF_ext+=12;
+        } else if(pilots==1 && uespec_pilots==0){ // pilots==1
+          j=0;
+
+          for (i=0; i<6; i++) {
+            if (i!=((frame_parms->nushift+poffset)%6)) {
+              dl_ch0_ext[j]=dl_ch0[i];
+              rxF_ext[j++]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+        printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+            }
+          }
+
+          rxF       = &rxdataF[aarx][((symbol*(frame_parms->ofdm_symbol_size)))];
+
+          for (; i<12; i++) {
+            if (i!=((frame_parms->nushift+6+poffset)%12)) {
+              dl_ch0_ext[j]=dl_ch0[i];
+              rxF_ext[j++]=rxF[(1+i-6)];
+#ifdef DEBUG_DLSCH_DEMOD
+        printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+            }
+          }
+
+          dl_ch0_ext+=10;
+          rxF_ext+=10;
+        } else if(pilots==0 && uespec_pilots==1) {
+          j=0;
+
+    for (i=0; i<6; i++) {
+            if (frame_parms->Ncp==0){
+              if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){
+                dl_ch0_ext[j]=dl_ch0[i];
+          rxF_ext[j++] = rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+          printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+              }
+            } else {
+              if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){
+                dl_ch0_ext[j]=dl_ch0[i];
+          rxF_ext[j++] = rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+              printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+              }
+            }
+    }
+
+          rxF       = &rxdataF[aarx][((symbol*(frame_parms->ofdm_symbol_size)))];
+
+          for (; i<12; i++) {
+            if (frame_parms->Ncp==0){
+              if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){
+                dl_ch0_ext[j]=dl_ch0[i];
+                rxF_ext[j++]=rxF[(1+i-6)];
+#ifdef DEBUG_DLSCH_DEMOD
+          printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+              }
+            } else {
+              if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){
+                dl_ch0_ext[j]=dl_ch0[i];
+          rxF_ext[j++] = rxF[(1+i-6)];
+#ifdef DEBUG_DLSCH_DEMOD
+          printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+              }
+            }
+          }
+
+          dl_ch0_ext+=9-frame_parms->Ncp;
+          rxF_ext+=9-frame_parms->Ncp;
+
+  }// symbol_mod==0
+
+      } // rballoc==1
+      else {
+        rxF       = &rxdataF[aarx][((symbol*(frame_parms->ofdm_symbol_size)))];
+      }
+
+      dl_ch0+=12;
+      rxF+=7;
+      rb++;
+
+      for (; rb<frame_parms->N_RB_DL; rb++) {
+        //  printf("dlch_ext %d\n",dl_ch0_ext-&dl_ch_estimates_ext[aarx][0]);
+        //  printf("rb %d (%p)\n",rb,rxF);
+        skip_half=0;
+
+        if (rb < 32)
+          rb_alloc_ind = (rb_alloc[0]>>rb) & 1;
+        else if (rb < 64)
+          rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1;
+        else if (rb < 96)
+          rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1;
+        else if (rb < 100)
+          rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1;
+        else
+          rb_alloc_ind = 0;
+
+        if (rb_alloc_ind==1)
+          nb_rb++;
+
+        // PBCH
+        if ((subframe==0) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l>=nsymb>>1) && (l<((nsymb>>1) + 4))) {
+          rb_alloc_ind = 0;
+        }
+
+        //PBCH subframe 0, symbols nsymb>>1 ... nsymb>>1 + 3
+        if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4)))
+          skip_half=1;
+        else if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4)))
+          skip_half=2;
+
+        //SSS
+        if (((subframe==0)||(subframe==5)) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==sss_symb) ) {
+          rb_alloc_ind = 0;
+        }
+
+        //SSS
+        if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l==sss_symb))
+          skip_half=1;
+        else if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l==sss_symb))
+          skip_half=2;
+
+        //PSS
+        if (frame_parms->frame_type == FDD) {
+          if (((subframe==0)||(subframe==5)) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+          }
+
+          if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb))
+            skip_half=1;
+          else if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb))
+            skip_half=2;
+        }
+
+        if ((frame_parms->frame_type == TDD) && ((subframe==1)||(subframe==6))) { //TDD Subframe 1 and 6
+          if ((rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) {
+            rb_alloc_ind = 0;
+          }
+
+          if ((rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb))
+            skip_half=1;
+          else if ((rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb))
+            skip_half=2;
+        }
+
+        if (rb_alloc_ind==1) {
+#ifdef DEBUG_DLSCH_DEMOD
+           printf("rb %d/symbol %d (skip_half %d)\n",rb,l,skip_half);
+#endif
+          /*
+              printf("rb %d\n",rb);
+            for (i=0;i<12;i++)
+            printf("(%d %d)",((short *)dl_ch0)[i<<1],((short*)dl_ch0)[1+(i<<1)]);
+            printf("\n");
+          */
+          if (pilots==0 && uespec_pilots==0) {
+            //printf("Extracting w/o pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half);
+            if (skip_half==1) {
+              memcpy(dl_ch0_ext,dl_ch0,6*sizeof(int));
+
+              for (i=0; i<6; i++) {
+                rxF_ext[i]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+          printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+              }
+
+              dl_ch0_ext+=6;
+              rxF_ext+=6;
+
+            } else if (skip_half==2) {
+              memcpy(dl_ch0_ext,dl_ch0+6,6*sizeof(int));
+
+              for (i=0; i<6; i++) {
+                rxF_ext[i]=rxF[i+6];
+#ifdef DEBUG_DLSCH_DEMOD
+          printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+              }
+
+              dl_ch0_ext+=6;
+              rxF_ext+=6;
+
+            } else {
+              memcpy(dl_ch0_ext,dl_ch0,12*sizeof(int));
+              //printf("symbol %d, extract rb %d, => (%d,%d)\n",symbol,rb,*(short *)&dl_ch0[j],*(1+(short*)&dl_ch0[i]));
+
+              for (i=0; i<12; i++) {
+                rxF_ext[i]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+          printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+              }
+
+              dl_ch0_ext+=12;
+              rxF_ext+=12;
+            }
+          } else if (pilots==1 && uespec_pilots==0){
+            //printf("Extracting with pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half);
+            j=0;
+
+            if (skip_half==1) {
+              for (i=0; i<6; i++) {
+                if (i!=((frame_parms->nushift+poffset)%6)) {
+                  rxF_ext[j]=rxF[i];
+                  dl_ch0_ext[j++]=dl_ch0[i];
+#ifdef DEBUG_DLSCH_DEMOD
+            printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                }
+              }
+
+              dl_ch0_ext+=5;
+              rxF_ext+=5;
+            } else if (skip_half==2) {
+              for (i=0; i<6; i++) {
+                if (i!=((frame_parms->nushift+poffset)%6)) {
+                  rxF_ext[j]=rxF[(i+6)];
+                  dl_ch0_ext[j++]=dl_ch0[i+6];
+#ifdef DEBUG_DLSCH_DEMOD
+            printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                }
+              }
+
+              dl_ch0_ext+=5;
+              rxF_ext+=5;
+            } else {
+              for (i=0; i<12; i++) {
+                if ((i!=(frame_parms->nushift+poffset)) &&
+                    (i!=((frame_parms->nushift+poffset+6)%12))) {
+                  rxF_ext[j]=rxF[i];
+#ifdef DEBUG_DLSCH_DEMOD
+                  printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j]));
+#endif
+                  dl_ch0_ext[j++]=dl_ch0[i];
+                }
+              }
+
+              dl_ch0_ext+=10;
+              rxF_ext+=10;
+            }
+          } else if(pilots==0 && uespec_pilots==1) {
+            j=0;
+
+            if (skip_half==1) {
+              if (frame_parms->Ncp==0){
+                for (i=0; i<6; i++) {
+                  if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){
+                    rxF_ext[j]=rxF[i];
+                    dl_ch0_ext[j++]=dl_ch0[i];
+#ifdef DEBUG_DLSCH_DEMOD
+              printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                  }
+                }
+                dl_ch0_ext+=6-(uespec_nushift+uespec_poffset<6)-(uespec_nushift+uespec_poffset+4<6)-((uespec_nushift+uespec_poffset+8)%12<6);
+                rxF_ext+=6-(uespec_nushift+uespec_poffset<6)-(uespec_nushift+uespec_poffset+4<6)-((uespec_nushift+uespec_poffset+8)%12<6);
+
+              } else{
+                for (i=0; i<6; i++) {
+                  if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){
+                    rxF_ext[j]=rxF[i];
+                    dl_ch0_ext[j++]=dl_ch0[i];
+#ifdef DEBUG_DLSCH_DEMOD
+              printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                  }
+                }
+                dl_ch0_ext+=4;
+                rxF_ext+=4;
+              }
+
+            } else if (skip_half==2) {
+              if(frame_parms->Ncp==0){
+                for (i=0; i<6; i++) {
+                  if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){
+                    rxF_ext[j]=rxF[i+6];
+                    dl_ch0_ext[j++]=dl_ch0[i+6];
+#ifdef DEBUG_DLSCH_DEMOD
+              printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                  }
+                }
+                dl_ch0_ext+=6-(uespec_nushift+uespec_poffset>6)-(uespec_nushift+uespec_poffset+4>6)-((uespec_nushift+uespec_poffset+8)%12>6);
+                rxF_ext+=6-(uespec_nushift+uespec_poffset>6)-(uespec_nushift+uespec_poffset+4>6)-((uespec_nushift+uespec_poffset+8)%12>6);
+
+              } else {
+                for (i=0; i<6; i++) {
+                  if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){
+                    rxF_ext[j]=rxF[(i+6)];
+                    dl_ch0_ext[j++]=dl_ch0[i+6];
+#ifdef DEBUG_DLSCH_DEMOD
+              printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                  }
+                }
+                dl_ch0_ext+=4;
+                rxF_ext+=4;
+              }
+
+            } else {
+        for (i=0; i<12; i++){
+                if (frame_parms->Ncp==0){
+                  if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){
+              rxF_ext[j] = rxF[i];
+                    dl_ch0_ext[j++]=dl_ch0[i];
+#ifdef DEBUG_DLSCH_DEMOD
+              printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                  }
+                } else{
+                  if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){
+              rxF_ext[j] = rxF[i];
+                    dl_ch0_ext[j++]=dl_ch0[i];
+#ifdef DEBUG_DLSCH_DEMOD
+              printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i]));
+#endif
+                  }
+                }
+        }
+
+              dl_ch0_ext+=9-frame_parms->Ncp;
+              rxF_ext+=9-frame_parms->Ncp;
+
+            }
+
+          }// pilots=0
+        }
+
+        dl_ch0+=12;
+        rxF+=12;
+      }
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+  return(nb_rb/frame_parms->nb_antennas_rx);
+}
+
+//==============================================================================================
+
+void dump_dlsch2(PHY_VARS_UE *ue,uint8_t eNB_id,uint8_t subframe,unsigned int *coded_bits_per_codeword,int round,  unsigned char harq_pid)
+{
+  unsigned int nsymb = (ue->frame_parms.Ncp == 0) ? 14 : 12;
+  char fname[32],vname[32];
+  int N_RB_DL=ue->frame_parms.N_RB_DL;
+
+  sprintf(fname,"dlsch%d_rxF_r%d_ext0.m",eNB_id,round);
+  sprintf(vname,"dl%d_rxF_r%d_ext0",eNB_id,round);
+  write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->rxdataF_ext[0],12*N_RB_DL*nsymb,1,1);
+
+  if (ue->frame_parms.nb_antennas_rx >1) {
+    sprintf(fname,"dlsch%d_rxF_r%d_ext1.m",eNB_id,round);
+    sprintf(vname,"dl%d_rxF_r%d_ext1",eNB_id,round);
+    write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->rxdataF_ext[1],12*N_RB_DL*nsymb,1,1);
+  }
+
+  sprintf(fname,"dlsch%d_ch_r%d_ext00.m",eNB_id,round);
+  sprintf(vname,"dl%d_ch_r%d_ext00",eNB_id,round);
+  write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_estimates_ext[0],12*N_RB_DL*nsymb,1,1);
+
+  if (ue->transmission_mode[eNB_id]==7){
+    sprintf(fname,"dlsch%d_bf_ch_r%d.m",eNB_id,round);
+    sprintf(vname,"dl%d_bf_ch_r%d",eNB_id,round);
+    write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_bf_ch_estimates[0],512*nsymb,1,1);
+    //write_output(fname,vname,phy_vars_ue->lte_ue_pdsch_vars[eNB_id]->dl_bf_ch_estimates[0],512,1,1);
+
+    sprintf(fname,"dlsch%d_bf_ch_r%d_ext00.m",eNB_id,round);
+    sprintf(vname,"dl%d_bf_ch_r%d_ext00",eNB_id,round);
+    write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_bf_ch_estimates_ext[0],12*N_RB_DL*nsymb,1,1);
+  }
+
+  if (ue->frame_parms.nb_antennas_rx == 2) {
+    sprintf(fname,"dlsch%d_ch_r%d_ext01.m",eNB_id,round);
+    sprintf(vname,"dl%d_ch_r%d_ext01",eNB_id,round);
+    write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_estimates_ext[1],12*N_RB_DL*nsymb,1,1);
+  }
+
+  if (ue->frame_parms.nb_antenna_ports_eNB == 2) {
+    sprintf(fname,"dlsch%d_ch_r%d_ext10.m",eNB_id,round);
+    sprintf(vname,"dl%d_ch_r%d_ext10",eNB_id,round);
+    write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_estimates_ext[2],12*N_RB_DL*nsymb,1,1);
+
+    if (ue->frame_parms.nb_antennas_rx == 2) {
+      sprintf(fname,"dlsch%d_ch_r%d_ext11.m",eNB_id,round);
+      sprintf(vname,"dl%d_ch_r%d_ext11",eNB_id,round);
+      write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_estimates_ext[3],12*N_RB_DL*nsymb,1,1);
+    }
+  }
+
+  sprintf(fname,"dlsch%d_rxF_r%d_uespec0.m",eNB_id,round);
+  sprintf(vname,"dl%d_rxF_r%d_uespec0",eNB_id,round);
+  write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->rxdataF_uespec_pilots[0],12*N_RB_DL,1,1);
+
+  /*
+    write_output("dlsch%d_ch_ext01.m","dl01_ch0_ext",pdsch_vars[eNB_id]->dl_ch_estimates_ext[1],12*N_RB_DL*nsymb,1,1);
+    write_output("dlsch%d_ch_ext10.m","dl10_ch0_ext",pdsch_vars[eNB_id]->dl_ch_estimates_ext[2],12*N_RB_DL*nsymb,1,1);
+    write_output("dlsch%d_ch_ext11.m","dl11_ch0_ext",pdsch_vars[eNB_id]->dl_ch_estimates_ext[3],12*N_RB_DL*nsymb,1,1);
+  */
+  sprintf(fname,"dlsch%d_r%d_rho.m",eNB_id,round);
+  sprintf(vname,"dl_rho_r%d_%d",eNB_id,round);
+
+  write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_rho_ext[harq_pid][round][0],12*N_RB_DL*nsymb,1,1);
+
+  sprintf(fname,"dlsch%d_r%d_rho2.m",eNB_id,round);
+  sprintf(vname,"dl_rho2_r%d_%d",eNB_id,round);
+
+  write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_rho2_ext[0],12*N_RB_DL*nsymb,1,1);
+
+  sprintf(fname,"dlsch%d_rxF_r%d_comp0.m",eNB_id,round);
+  sprintf(vname,"dl%d_rxF_r%d_comp0",eNB_id,round);
+  write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->rxdataF_comp0[0],12*N_RB_DL*nsymb,1,1);
+  if (ue->frame_parms.nb_antenna_ports_eNB == 2) {
+    sprintf(fname,"dlsch%d_rxF_r%d_comp1.m",eNB_id,round);
+    sprintf(vname,"dl%d_rxF_r%d_comp1",eNB_id,round);
+    write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->rxdataF_comp1[harq_pid][round][0],12*N_RB_DL*nsymb,1,1);
+  }
+
+  sprintf(fname,"dlsch%d_rxF_r%d_llr.m",eNB_id,round);
+  sprintf(vname,"dl%d_r%d_llr",eNB_id,round);
+  write_output(fname,vname, ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->llr[0],coded_bits_per_codeword[0],1,0);
+  sprintf(fname,"dlsch%d_r%d_mag1.m",eNB_id,round);
+  sprintf(vname,"dl%d_r%d_mag1",eNB_id,round);
+  write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_mag0[0],12*N_RB_DL*nsymb,1,1);
+  sprintf(fname,"dlsch%d_r%d_mag2.m",eNB_id,round);
+  sprintf(vname,"dl%d_r%d_mag2",eNB_id,round);
+  write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_magb0[0],12*N_RB_DL*nsymb,1,1);
+
+  //  printf("log2_maxh = %d\n",ue->pdsch_vars[eNB_id]->log2_maxh);
+}
+
+#ifdef DEBUG_DLSCH_DEMOD
+/*
+void print_bytes(char *s,__m128i *x)
+{
+
+  char *tempb = (char *)x;
+
+  printf("%s  : %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",s,
+         tempb[0],tempb[1],tempb[2],tempb[3],tempb[4],tempb[5],tempb[6],tempb[7],
+         tempb[8],tempb[9],tempb[10],tempb[11],tempb[12],tempb[13],tempb[14],tempb[15]
+         );
+
+}
+
+void print_shorts(char *s,__m128i *x)
+{
+
+  short *tempb = (short *)x;
+  printf("%s  : %d,%d,%d,%d,%d,%d,%d,%d\n",s,
+         tempb[0],tempb[1],tempb[2],tempb[3],tempb[4],tempb[5],tempb[6],tempb[7]);
+
+}
+
+void print_shorts2(char *s,__m64 *x)
+{
+
+  short *tempb = (short *)x;
+  printf("%s  : %d,%d,%d,%d\n",s,
+         tempb[0],tempb[1],tempb[2],tempb[3]);
+
+}
+
+void print_ints(char *s,__m128i *x)
+{
+
+  int *tempb = (int *)x;
+  printf("%s  : %d,%d,%d,%d\n",s,
+         tempb[0],tempb[1],tempb[2],tempb[3]);
+
+}*/
+#endif
diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c
new file mode 100644
index 0000000000000000000000000000000000000000..7682045ae1307ca6a10ee83ef071091f7e28528d
--- /dev/null
+++ b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c
@@ -0,0 +1,8899 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+/*! \file PHY/LTE_TRANSPORT/dlsch_llr_computation.c
+ * \brief Top-level routines for LLR computation of the PDSCH physical channel from 36-211, V8.6 2009-03
+ * \author R. Knopp, F. Kaltenberger,A. Bhamri, S. Aubert, S. Wagner, X Jiang
+ * \date 2011
+ * \version 0.1
+ * \company Eurecom
+ * \email: knopp@eurecom.fr,florian.kaltenberger@eurecom.fr,ankit.bhamri@eurecom.fr,sebastien.aubert@eurecom.fr, sebastian.wagner@eurecom.fr
+ * \note
+ * \warning
+ */
+
+#include "PHY/defs.h"
+#include "PHY/TOOLS/defs.h"
+#include "PHY/extern.h"
+#include "defs.h"
+#include "extern.h"
+#include "PHY/sse_intrin.h"
+
+//#define DEBUG_LLR_SIC
+
+
+int16_t zeros[8] __attribute__ ((aligned(16))) = {0,0,0,0,0,0,0,0};
+int16_t ones[8] __attribute__ ((aligned(16))) = {0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff};
+#if defined(__x86_64__) || defined(__i386__)
+__m128i rho_rpi __attribute__ ((aligned(16)));
+__m128i rho_rmi __attribute__ ((aligned(16)));
+__m128i rho_rpi_1_1 __attribute__ ((aligned(16)));
+__m128i rho_rpi_1_3 __attribute__ ((aligned(16)));
+__m128i rho_rpi_1_5 __attribute__ ((aligned(16)));
+__m128i rho_rpi_1_7 __attribute__ ((aligned(16)));
+__m128i rho_rpi_3_1 __attribute__ ((aligned(16)));
+__m128i rho_rpi_3_3 __attribute__ ((aligned(16)));
+__m128i rho_rpi_3_5 __attribute__ ((aligned(16)));
+__m128i rho_rpi_3_7 __attribute__ ((aligned(16)));
+__m128i rho_rpi_5_1 __attribute__ ((aligned(16)));
+__m128i rho_rpi_5_3 __attribute__ ((aligned(16)));
+__m128i rho_rpi_5_5 __attribute__ ((aligned(16)));
+__m128i rho_rpi_5_7 __attribute__ ((aligned(16)));
+__m128i rho_rpi_7_1 __attribute__ ((aligned(16)));
+__m128i rho_rpi_7_3 __attribute__ ((aligned(16)));
+__m128i rho_rpi_7_5 __attribute__ ((aligned(16)));
+__m128i rho_rpi_7_7 __attribute__ ((aligned(16)));
+__m128i rho_rmi_1_1 __attribute__ ((aligned(16)));
+__m128i rho_rmi_1_3 __attribute__ ((aligned(16)));
+__m128i rho_rmi_1_5 __attribute__ ((aligned(16)));
+__m128i rho_rmi_1_7 __attribute__ ((aligned(16)));
+__m128i rho_rmi_3_1 __attribute__ ((aligned(16)));
+__m128i rho_rmi_3_3 __attribute__ ((aligned(16)));
+__m128i rho_rmi_3_5 __attribute__ ((aligned(16)));
+__m128i rho_rmi_3_7 __attribute__ ((aligned(16)));
+__m128i rho_rmi_5_1 __attribute__ ((aligned(16)));
+__m128i rho_rmi_5_3 __attribute__ ((aligned(16)));
+__m128i rho_rmi_5_5 __attribute__ ((aligned(16)));
+__m128i rho_rmi_5_7 __attribute__ ((aligned(16)));
+__m128i rho_rmi_7_1 __attribute__ ((aligned(16)));
+__m128i rho_rmi_7_3 __attribute__ ((aligned(16)));
+__m128i rho_rmi_7_5 __attribute__ ((aligned(16)));
+__m128i rho_rmi_7_7 __attribute__ ((aligned(16)));
+
+__m128i psi_r_m7_m7 __attribute__ ((aligned(16)));
+__m128i psi_r_m7_m5 __attribute__ ((aligned(16)));
+__m128i psi_r_m7_m3 __attribute__ ((aligned(16)));
+__m128i psi_r_m7_m1 __attribute__ ((aligned(16)));
+__m128i psi_r_m7_p1 __attribute__ ((aligned(16)));
+__m128i psi_r_m7_p3 __attribute__ ((aligned(16)));
+__m128i psi_r_m7_p5 __attribute__ ((aligned(16)));
+__m128i psi_r_m7_p7 __attribute__ ((aligned(16)));
+__m128i psi_r_m5_m7 __attribute__ ((aligned(16)));
+__m128i psi_r_m5_m5 __attribute__ ((aligned(16)));
+__m128i psi_r_m5_m3 __attribute__ ((aligned(16)));
+__m128i psi_r_m5_m1 __attribute__ ((aligned(16)));
+__m128i psi_r_m5_p1 __attribute__ ((aligned(16)));
+__m128i psi_r_m5_p3 __attribute__ ((aligned(16)));
+__m128i psi_r_m5_p5 __attribute__ ((aligned(16)));
+__m128i psi_r_m5_p7 __attribute__ ((aligned(16)));
+__m128i psi_r_m3_m7 __attribute__ ((aligned(16)));
+__m128i psi_r_m3_m5 __attribute__ ((aligned(16)));
+__m128i psi_r_m3_m3 __attribute__ ((aligned(16)));
+__m128i psi_r_m3_m1 __attribute__ ((aligned(16)));
+__m128i psi_r_m3_p1 __attribute__ ((aligned(16)));
+__m128i psi_r_m3_p3 __attribute__ ((aligned(16)));
+__m128i psi_r_m3_p5 __attribute__ ((aligned(16)));
+__m128i psi_r_m3_p7 __attribute__ ((aligned(16)));
+__m128i psi_r_m1_m7 __attribute__ ((aligned(16)));
+__m128i psi_r_m1_m5 __attribute__ ((aligned(16)));
+__m128i psi_r_m1_m3 __attribute__ ((aligned(16)));
+__m128i psi_r_m1_m1 __attribute__ ((aligned(16)));
+__m128i psi_r_m1_p1 __attribute__ ((aligned(16)));
+__m128i psi_r_m1_p3 __attribute__ ((aligned(16)));
+__m128i psi_r_m1_p5 __attribute__ ((aligned(16)));
+__m128i psi_r_m1_p7 __attribute__ ((aligned(16)));
+__m128i psi_r_p1_m7 __attribute__ ((aligned(16)));
+__m128i psi_r_p1_m5 __attribute__ ((aligned(16)));
+__m128i psi_r_p1_m3 __attribute__ ((aligned(16)));
+__m128i psi_r_p1_m1 __attribute__ ((aligned(16)));
+__m128i psi_r_p1_p1 __attribute__ ((aligned(16)));
+__m128i psi_r_p1_p3 __attribute__ ((aligned(16)));
+__m128i psi_r_p1_p5 __attribute__ ((aligned(16)));
+__m128i psi_r_p1_p7 __attribute__ ((aligned(16)));
+__m128i psi_r_p3_m7 __attribute__ ((aligned(16)));
+__m128i psi_r_p3_m5 __attribute__ ((aligned(16)));
+__m128i psi_r_p3_m3 __attribute__ ((aligned(16)));
+__m128i psi_r_p3_m1 __attribute__ ((aligned(16)));
+__m128i psi_r_p3_p1 __attribute__ ((aligned(16)));
+__m128i psi_r_p3_p3 __attribute__ ((aligned(16)));
+__m128i psi_r_p3_p5 __attribute__ ((aligned(16)));
+__m128i psi_r_p3_p7 __attribute__ ((aligned(16)));
+__m128i psi_r_p5_m7 __attribute__ ((aligned(16)));
+__m128i psi_r_p5_m5 __attribute__ ((aligned(16)));
+__m128i psi_r_p5_m3 __attribute__ ((aligned(16)));
+__m128i psi_r_p5_m1 __attribute__ ((aligned(16)));
+__m128i psi_r_p5_p1 __attribute__ ((aligned(16)));
+__m128i psi_r_p5_p3 __attribute__ ((aligned(16)));
+__m128i psi_r_p5_p5 __attribute__ ((aligned(16)));
+__m128i psi_r_p5_p7 __attribute__ ((aligned(16)));
+__m128i psi_r_p7_m7 __attribute__ ((aligned(16)));
+__m128i psi_r_p7_m5 __attribute__ ((aligned(16)));
+__m128i psi_r_p7_m3 __attribute__ ((aligned(16)));
+__m128i psi_r_p7_m1 __attribute__ ((aligned(16)));
+__m128i psi_r_p7_p1 __attribute__ ((aligned(16)));
+__m128i psi_r_p7_p3 __attribute__ ((aligned(16)));
+__m128i psi_r_p7_p5 __attribute__ ((aligned(16)));
+__m128i psi_r_p7_p7 __attribute__ ((aligned(16)));
+
+__m128i psi_i_m7_m7 __attribute__ ((aligned(16)));
+__m128i psi_i_m7_m5 __attribute__ ((aligned(16)));
+__m128i psi_i_m7_m3 __attribute__ ((aligned(16)));
+__m128i psi_i_m7_m1 __attribute__ ((aligned(16)));
+__m128i psi_i_m7_p1 __attribute__ ((aligned(16)));
+__m128i psi_i_m7_p3 __attribute__ ((aligned(16)));
+__m128i psi_i_m7_p5 __attribute__ ((aligned(16)));
+__m128i psi_i_m7_p7 __attribute__ ((aligned(16)));
+__m128i psi_i_m5_m7 __attribute__ ((aligned(16)));
+__m128i psi_i_m5_m5 __attribute__ ((aligned(16)));
+__m128i psi_i_m5_m3 __attribute__ ((aligned(16)));
+__m128i psi_i_m5_m1 __attribute__ ((aligned(16)));
+__m128i psi_i_m5_p1 __attribute__ ((aligned(16)));
+__m128i psi_i_m5_p3 __attribute__ ((aligned(16)));
+__m128i psi_i_m5_p5 __attribute__ ((aligned(16)));
+__m128i psi_i_m5_p7 __attribute__ ((aligned(16)));
+__m128i psi_i_m3_m7 __attribute__ ((aligned(16)));
+__m128i psi_i_m3_m5 __attribute__ ((aligned(16)));
+__m128i psi_i_m3_m3 __attribute__ ((aligned(16)));
+__m128i psi_i_m3_m1 __attribute__ ((aligned(16)));
+__m128i psi_i_m3_p1 __attribute__ ((aligned(16)));
+__m128i psi_i_m3_p3 __attribute__ ((aligned(16)));
+__m128i psi_i_m3_p5 __attribute__ ((aligned(16)));
+__m128i psi_i_m3_p7 __attribute__ ((aligned(16)));
+__m128i psi_i_m1_m7 __attribute__ ((aligned(16)));
+__m128i psi_i_m1_m5 __attribute__ ((aligned(16)));
+__m128i psi_i_m1_m3 __attribute__ ((aligned(16)));
+__m128i psi_i_m1_m1 __attribute__ ((aligned(16)));
+__m128i psi_i_m1_p1 __attribute__ ((aligned(16)));
+__m128i psi_i_m1_p3 __attribute__ ((aligned(16)));
+__m128i psi_i_m1_p5 __attribute__ ((aligned(16)));
+__m128i psi_i_m1_p7 __attribute__ ((aligned(16)));
+__m128i psi_i_p1_m7 __attribute__ ((aligned(16)));
+__m128i psi_i_p1_m5 __attribute__ ((aligned(16)));
+__m128i psi_i_p1_m3 __attribute__ ((aligned(16)));
+__m128i psi_i_p1_m1 __attribute__ ((aligned(16)));
+__m128i psi_i_p1_p1 __attribute__ ((aligned(16)));
+__m128i psi_i_p1_p3 __attribute__ ((aligned(16)));
+__m128i psi_i_p1_p5 __attribute__ ((aligned(16)));
+__m128i psi_i_p1_p7 __attribute__ ((aligned(16)));
+__m128i psi_i_p3_m7 __attribute__ ((aligned(16)));
+__m128i psi_i_p3_m5 __attribute__ ((aligned(16)));
+__m128i psi_i_p3_m3 __attribute__ ((aligned(16)));
+__m128i psi_i_p3_m1 __attribute__ ((aligned(16)));
+__m128i psi_i_p3_p1 __attribute__ ((aligned(16)));
+__m128i psi_i_p3_p3 __attribute__ ((aligned(16)));
+__m128i psi_i_p3_p5 __attribute__ ((aligned(16)));
+__m128i psi_i_p3_p7 __attribute__ ((aligned(16)));
+__m128i psi_i_p5_m7 __attribute__ ((aligned(16)));
+__m128i psi_i_p5_m5 __attribute__ ((aligned(16)));
+__m128i psi_i_p5_m3 __attribute__ ((aligned(16)));
+__m128i psi_i_p5_m1 __attribute__ ((aligned(16)));
+__m128i psi_i_p5_p1 __attribute__ ((aligned(16)));
+__m128i psi_i_p5_p3 __attribute__ ((aligned(16)));
+__m128i psi_i_p5_p5 __attribute__ ((aligned(16)));
+__m128i psi_i_p5_p7 __attribute__ ((aligned(16)));
+__m128i psi_i_p7_m7 __attribute__ ((aligned(16)));
+__m128i psi_i_p7_m5 __attribute__ ((aligned(16)));
+__m128i psi_i_p7_m3 __attribute__ ((aligned(16)));
+__m128i psi_i_p7_m1 __attribute__ ((aligned(16)));
+__m128i psi_i_p7_p1 __attribute__ ((aligned(16)));
+__m128i psi_i_p7_p3 __attribute__ ((aligned(16)));
+__m128i psi_i_p7_p5 __attribute__ ((aligned(16)));
+__m128i psi_i_p7_p7 __attribute__ ((aligned(16)));
+
+__m128i a_r_m7_m7 __attribute__ ((aligned(16)));
+__m128i a_r_m7_m5 __attribute__ ((aligned(16)));
+__m128i a_r_m7_m3 __attribute__ ((aligned(16)));
+__m128i a_r_m7_m1 __attribute__ ((aligned(16)));
+__m128i a_r_m7_p1 __attribute__ ((aligned(16)));
+__m128i a_r_m7_p3 __attribute__ ((aligned(16)));
+__m128i a_r_m7_p5 __attribute__ ((aligned(16)));
+__m128i a_r_m7_p7 __attribute__ ((aligned(16)));
+__m128i a_r_m5_m7 __attribute__ ((aligned(16)));
+__m128i a_r_m5_m5 __attribute__ ((aligned(16)));
+__m128i a_r_m5_m3 __attribute__ ((aligned(16)));
+__m128i a_r_m5_m1 __attribute__ ((aligned(16)));
+__m128i a_r_m5_p1 __attribute__ ((aligned(16)));
+__m128i a_r_m5_p3 __attribute__ ((aligned(16)));
+__m128i a_r_m5_p5 __attribute__ ((aligned(16)));
+__m128i a_r_m5_p7 __attribute__ ((aligned(16)));
+__m128i a_r_m3_m7 __attribute__ ((aligned(16)));
+__m128i a_r_m3_m5 __attribute__ ((aligned(16)));
+__m128i a_r_m3_m3 __attribute__ ((aligned(16)));
+__m128i a_r_m3_m1 __attribute__ ((aligned(16)));
+__m128i a_r_m3_p1 __attribute__ ((aligned(16)));
+__m128i a_r_m3_p3 __attribute__ ((aligned(16)));
+__m128i a_r_m3_p5 __attribute__ ((aligned(16)));
+__m128i a_r_m3_p7 __attribute__ ((aligned(16)));
+__m128i a_r_m1_m7 __attribute__ ((aligned(16)));
+__m128i a_r_m1_m5 __attribute__ ((aligned(16)));
+__m128i a_r_m1_m3 __attribute__ ((aligned(16)));
+__m128i a_r_m1_m1 __attribute__ ((aligned(16)));
+__m128i a_r_m1_p1 __attribute__ ((aligned(16)));
+__m128i a_r_m1_p3 __attribute__ ((aligned(16)));
+__m128i a_r_m1_p5 __attribute__ ((aligned(16)));
+__m128i a_r_m1_p7 __attribute__ ((aligned(16)));
+__m128i a_r_p1_m7 __attribute__ ((aligned(16)));
+__m128i a_r_p1_m5 __attribute__ ((aligned(16)));
+__m128i a_r_p1_m3 __attribute__ ((aligned(16)));
+__m128i a_r_p1_m1 __attribute__ ((aligned(16)));
+__m128i a_r_p1_p1 __attribute__ ((aligned(16)));
+__m128i a_r_p1_p3 __attribute__ ((aligned(16)));
+__m128i a_r_p1_p5 __attribute__ ((aligned(16)));
+__m128i a_r_p1_p7 __attribute__ ((aligned(16)));
+__m128i a_r_p3_m7 __attribute__ ((aligned(16)));
+__m128i a_r_p3_m5 __attribute__ ((aligned(16)));
+__m128i a_r_p3_m3 __attribute__ ((aligned(16)));
+__m128i a_r_p3_m1 __attribute__ ((aligned(16)));
+__m128i a_r_p3_p1 __attribute__ ((aligned(16)));
+__m128i a_r_p3_p3 __attribute__ ((aligned(16)));
+__m128i a_r_p3_p5 __attribute__ ((aligned(16)));
+__m128i a_r_p3_p7 __attribute__ ((aligned(16)));
+__m128i a_r_p5_m7 __attribute__ ((aligned(16)));
+__m128i a_r_p5_m5 __attribute__ ((aligned(16)));
+__m128i a_r_p5_m3 __attribute__ ((aligned(16)));
+__m128i a_r_p5_m1 __attribute__ ((aligned(16)));
+__m128i a_r_p5_p1 __attribute__ ((aligned(16)));
+__m128i a_r_p5_p3 __attribute__ ((aligned(16)));
+__m128i a_r_p5_p5 __attribute__ ((aligned(16)));
+__m128i a_r_p5_p7 __attribute__ ((aligned(16)));
+__m128i a_r_p7_m7 __attribute__ ((aligned(16)));
+__m128i a_r_p7_m5 __attribute__ ((aligned(16)));
+__m128i a_r_p7_m3 __attribute__ ((aligned(16)));
+__m128i a_r_p7_m1 __attribute__ ((aligned(16)));
+__m128i a_r_p7_p1 __attribute__ ((aligned(16)));
+__m128i a_r_p7_p3 __attribute__ ((aligned(16)));
+__m128i a_r_p7_p5 __attribute__ ((aligned(16)));
+__m128i a_r_p7_p7 __attribute__ ((aligned(16)));
+
+__m128i a_i_m7_m7 __attribute__ ((aligned(16)));
+__m128i a_i_m7_m5 __attribute__ ((aligned(16)));
+__m128i a_i_m7_m3 __attribute__ ((aligned(16)));
+__m128i a_i_m7_m1 __attribute__ ((aligned(16)));
+__m128i a_i_m7_p1 __attribute__ ((aligned(16)));
+__m128i a_i_m7_p3 __attribute__ ((aligned(16)));
+__m128i a_i_m7_p5 __attribute__ ((aligned(16)));
+__m128i a_i_m7_p7 __attribute__ ((aligned(16)));
+__m128i a_i_m5_m7 __attribute__ ((aligned(16)));
+__m128i a_i_m5_m5 __attribute__ ((aligned(16)));
+__m128i a_i_m5_m3 __attribute__ ((aligned(16)));
+__m128i a_i_m5_m1 __attribute__ ((aligned(16)));
+__m128i a_i_m5_p1 __attribute__ ((aligned(16)));
+__m128i a_i_m5_p3 __attribute__ ((aligned(16)));
+__m128i a_i_m5_p5 __attribute__ ((aligned(16)));
+__m128i a_i_m5_p7 __attribute__ ((aligned(16)));
+__m128i a_i_m3_m7 __attribute__ ((aligned(16)));
+__m128i a_i_m3_m5 __attribute__ ((aligned(16)));
+__m128i a_i_m3_m3 __attribute__ ((aligned(16)));
+__m128i a_i_m3_m1 __attribute__ ((aligned(16)));
+__m128i a_i_m3_p1 __attribute__ ((aligned(16)));
+__m128i a_i_m3_p3 __attribute__ ((aligned(16)));
+__m128i a_i_m3_p5 __attribute__ ((aligned(16)));
+__m128i a_i_m3_p7 __attribute__ ((aligned(16)));
+__m128i a_i_m1_m7 __attribute__ ((aligned(16)));
+__m128i a_i_m1_m5 __attribute__ ((aligned(16)));
+__m128i a_i_m1_m3 __attribute__ ((aligned(16)));
+__m128i a_i_m1_m1 __attribute__ ((aligned(16)));
+__m128i a_i_m1_p1 __attribute__ ((aligned(16)));
+__m128i a_i_m1_p3 __attribute__ ((aligned(16)));
+__m128i a_i_m1_p5 __attribute__ ((aligned(16)));
+__m128i a_i_m1_p7 __attribute__ ((aligned(16)));
+__m128i a_i_p1_m7 __attribute__ ((aligned(16)));
+__m128i a_i_p1_m5 __attribute__ ((aligned(16)));
+__m128i a_i_p1_m3 __attribute__ ((aligned(16)));
+__m128i a_i_p1_m1 __attribute__ ((aligned(16)));
+__m128i a_i_p1_p1 __attribute__ ((aligned(16)));
+__m128i a_i_p1_p3 __attribute__ ((aligned(16)));
+__m128i a_i_p1_p5 __attribute__ ((aligned(16)));
+__m128i a_i_p1_p7 __attribute__ ((aligned(16)));
+__m128i a_i_p3_m7 __attribute__ ((aligned(16)));
+__m128i a_i_p3_m5 __attribute__ ((aligned(16)));
+__m128i a_i_p3_m3 __attribute__ ((aligned(16)));
+__m128i a_i_p3_m1 __attribute__ ((aligned(16)));
+__m128i a_i_p3_p1 __attribute__ ((aligned(16)));
+__m128i a_i_p3_p3 __attribute__ ((aligned(16)));
+__m128i a_i_p3_p5 __attribute__ ((aligned(16)));
+__m128i a_i_p3_p7 __attribute__ ((aligned(16)));
+__m128i a_i_p5_m7 __attribute__ ((aligned(16)));
+__m128i a_i_p5_m5 __attribute__ ((aligned(16)));
+__m128i a_i_p5_m3 __attribute__ ((aligned(16)));
+__m128i a_i_p5_m1 __attribute__ ((aligned(16)));
+__m128i a_i_p5_p1 __attribute__ ((aligned(16)));
+__m128i a_i_p5_p3 __attribute__ ((aligned(16)));
+__m128i a_i_p5_p5 __attribute__ ((aligned(16)));
+__m128i a_i_p5_p7 __attribute__ ((aligned(16)));
+__m128i a_i_p7_m7 __attribute__ ((aligned(16)));
+__m128i a_i_p7_m5 __attribute__ ((aligned(16)));
+__m128i a_i_p7_m3 __attribute__ ((aligned(16)));
+__m128i a_i_p7_m1 __attribute__ ((aligned(16)));
+__m128i a_i_p7_p1 __attribute__ ((aligned(16)));
+__m128i a_i_p7_p3 __attribute__ ((aligned(16)));
+__m128i a_i_p7_p5 __attribute__ ((aligned(16)));
+__m128i a_i_p7_p7 __attribute__ ((aligned(16)));
+
+__m128i psi_a_m7_m7 __attribute__ ((aligned(16)));
+__m128i psi_a_m7_m5 __attribute__ ((aligned(16)));
+__m128i psi_a_m7_m3 __attribute__ ((aligned(16)));
+__m128i psi_a_m7_m1 __attribute__ ((aligned(16)));
+__m128i psi_a_m7_p1 __attribute__ ((aligned(16)));
+__m128i psi_a_m7_p3 __attribute__ ((aligned(16)));
+__m128i psi_a_m7_p5 __attribute__ ((aligned(16)));
+__m128i psi_a_m7_p7 __attribute__ ((aligned(16)));
+__m128i psi_a_m5_m7 __attribute__ ((aligned(16)));
+__m128i psi_a_m5_m5 __attribute__ ((aligned(16)));
+__m128i psi_a_m5_m3 __attribute__ ((aligned(16)));
+__m128i psi_a_m5_m1 __attribute__ ((aligned(16)));
+__m128i psi_a_m5_p1 __attribute__ ((aligned(16)));
+__m128i psi_a_m5_p3 __attribute__ ((aligned(16)));
+__m128i psi_a_m5_p5 __attribute__ ((aligned(16)));
+__m128i psi_a_m5_p7 __attribute__ ((aligned(16)));
+__m128i psi_a_m3_m7 __attribute__ ((aligned(16)));
+__m128i psi_a_m3_m5 __attribute__ ((aligned(16)));
+__m128i psi_a_m3_m3 __attribute__ ((aligned(16)));
+__m128i psi_a_m3_m1 __attribute__ ((aligned(16)));
+__m128i psi_a_m3_p1 __attribute__ ((aligned(16)));
+__m128i psi_a_m3_p3 __attribute__ ((aligned(16)));
+__m128i psi_a_m3_p5 __attribute__ ((aligned(16)));
+__m128i psi_a_m3_p7 __attribute__ ((aligned(16)));
+__m128i psi_a_m1_m7 __attribute__ ((aligned(16)));
+__m128i psi_a_m1_m5 __attribute__ ((aligned(16)));
+__m128i psi_a_m1_m3 __attribute__ ((aligned(16)));
+__m128i psi_a_m1_m1 __attribute__ ((aligned(16)));
+__m128i psi_a_m1_p1 __attribute__ ((aligned(16)));
+__m128i psi_a_m1_p3 __attribute__ ((aligned(16)));
+__m128i psi_a_m1_p5 __attribute__ ((aligned(16)));
+__m128i psi_a_m1_p7 __attribute__ ((aligned(16)));
+__m128i psi_a_p1_m7 __attribute__ ((aligned(16)));
+__m128i psi_a_p1_m5 __attribute__ ((aligned(16)));
+__m128i psi_a_p1_m3 __attribute__ ((aligned(16)));
+__m128i psi_a_p1_m1 __attribute__ ((aligned(16)));
+__m128i psi_a_p1_p1 __attribute__ ((aligned(16)));
+__m128i psi_a_p1_p3 __attribute__ ((aligned(16)));
+__m128i psi_a_p1_p5 __attribute__ ((aligned(16)));
+__m128i psi_a_p1_p7 __attribute__ ((aligned(16)));
+__m128i psi_a_p3_m7 __attribute__ ((aligned(16)));
+__m128i psi_a_p3_m5 __attribute__ ((aligned(16)));
+__m128i psi_a_p3_m3 __attribute__ ((aligned(16)));
+__m128i psi_a_p3_m1 __attribute__ ((aligned(16)));
+__m128i psi_a_p3_p1 __attribute__ ((aligned(16)));
+__m128i psi_a_p3_p3 __attribute__ ((aligned(16)));
+__m128i psi_a_p3_p5 __attribute__ ((aligned(16)));
+__m128i psi_a_p3_p7 __attribute__ ((aligned(16)));
+__m128i psi_a_p5_m7 __attribute__ ((aligned(16)));
+__m128i psi_a_p5_m5 __attribute__ ((aligned(16)));
+__m128i psi_a_p5_m3 __attribute__ ((aligned(16)));
+__m128i psi_a_p5_m1 __attribute__ ((aligned(16)));
+__m128i psi_a_p5_p1 __attribute__ ((aligned(16)));
+__m128i psi_a_p5_p3 __attribute__ ((aligned(16)));
+__m128i psi_a_p5_p5 __attribute__ ((aligned(16)));
+__m128i psi_a_p5_p7 __attribute__ ((aligned(16)));
+__m128i psi_a_p7_m7 __attribute__ ((aligned(16)));
+__m128i psi_a_p7_m5 __attribute__ ((aligned(16)));
+__m128i psi_a_p7_m3 __attribute__ ((aligned(16)));
+__m128i psi_a_p7_m1 __attribute__ ((aligned(16)));
+__m128i psi_a_p7_p1 __attribute__ ((aligned(16)));
+__m128i psi_a_p7_p3 __attribute__ ((aligned(16)));
+__m128i psi_a_p7_p5 __attribute__ ((aligned(16)));
+__m128i psi_a_p7_p7 __attribute__ ((aligned(16)));
+
+__m128i a_sq_m7_m7 __attribute__ ((aligned(16)));
+__m128i a_sq_m7_m5 __attribute__ ((aligned(16)));
+__m128i a_sq_m7_m3 __attribute__ ((aligned(16)));
+__m128i a_sq_m7_m1 __attribute__ ((aligned(16)));
+__m128i a_sq_m7_p1 __attribute__ ((aligned(16)));
+__m128i a_sq_m7_p3 __attribute__ ((aligned(16)));
+__m128i a_sq_m7_p5 __attribute__ ((aligned(16)));
+__m128i a_sq_m7_p7 __attribute__ ((aligned(16)));
+__m128i a_sq_m5_m7 __attribute__ ((aligned(16)));
+__m128i a_sq_m5_m5 __attribute__ ((aligned(16)));
+__m128i a_sq_m5_m3 __attribute__ ((aligned(16)));
+__m128i a_sq_m5_m1 __attribute__ ((aligned(16)));
+__m128i a_sq_m5_p1 __attribute__ ((aligned(16)));
+__m128i a_sq_m5_p3 __attribute__ ((aligned(16)));
+__m128i a_sq_m5_p5 __attribute__ ((aligned(16)));
+__m128i a_sq_m5_p7 __attribute__ ((aligned(16)));
+__m128i a_sq_m3_m7 __attribute__ ((aligned(16)));
+__m128i a_sq_m3_m5 __attribute__ ((aligned(16)));
+__m128i a_sq_m3_m3 __attribute__ ((aligned(16)));
+__m128i a_sq_m3_m1 __attribute__ ((aligned(16)));
+__m128i a_sq_m3_p1 __attribute__ ((aligned(16)));
+__m128i a_sq_m3_p3 __attribute__ ((aligned(16)));
+__m128i a_sq_m3_p5 __attribute__ ((aligned(16)));
+__m128i a_sq_m3_p7 __attribute__ ((aligned(16)));
+__m128i a_sq_m1_m7 __attribute__ ((aligned(16)));
+__m128i a_sq_m1_m5 __attribute__ ((aligned(16)));
+__m128i a_sq_m1_m3 __attribute__ ((aligned(16)));
+__m128i a_sq_m1_m1 __attribute__ ((aligned(16)));
+__m128i a_sq_m1_p1 __attribute__ ((aligned(16)));
+__m128i a_sq_m1_p3 __attribute__ ((aligned(16)));
+__m128i a_sq_m1_p5 __attribute__ ((aligned(16)));
+__m128i a_sq_m1_p7 __attribute__ ((aligned(16)));
+__m128i a_sq_p1_m7 __attribute__ ((aligned(16)));
+__m128i a_sq_p1_m5 __attribute__ ((aligned(16)));
+__m128i a_sq_p1_m3 __attribute__ ((aligned(16)));
+__m128i a_sq_p1_m1 __attribute__ ((aligned(16)));
+__m128i a_sq_p1_p1 __attribute__ ((aligned(16)));
+__m128i a_sq_p1_p3 __attribute__ ((aligned(16)));
+__m128i a_sq_p1_p5 __attribute__ ((aligned(16)));
+__m128i a_sq_p1_p7 __attribute__ ((aligned(16)));
+__m128i a_sq_p3_m7 __attribute__ ((aligned(16)));
+__m128i a_sq_p3_m5 __attribute__ ((aligned(16)));
+__m128i a_sq_p3_m3 __attribute__ ((aligned(16)));
+__m128i a_sq_p3_m1 __attribute__ ((aligned(16)));
+__m128i a_sq_p3_p1 __attribute__ ((aligned(16)));
+__m128i a_sq_p3_p3 __attribute__ ((aligned(16)));
+__m128i a_sq_p3_p5 __attribute__ ((aligned(16)));
+__m128i a_sq_p3_p7 __attribute__ ((aligned(16)));
+__m128i a_sq_p5_m7 __attribute__ ((aligned(16)));
+__m128i a_sq_p5_m5 __attribute__ ((aligned(16)));
+__m128i a_sq_p5_m3 __attribute__ ((aligned(16)));
+__m128i a_sq_p5_m1 __attribute__ ((aligned(16)));
+__m128i a_sq_p5_p1 __attribute__ ((aligned(16)));
+__m128i a_sq_p5_p3 __attribute__ ((aligned(16)));
+__m128i a_sq_p5_p5 __attribute__ ((aligned(16)));
+__m128i a_sq_p5_p7 __attribute__ ((aligned(16)));
+__m128i a_sq_p7_m7 __attribute__ ((aligned(16)));
+__m128i a_sq_p7_m5 __attribute__ ((aligned(16)));
+__m128i a_sq_p7_m3 __attribute__ ((aligned(16)));
+__m128i a_sq_p7_m1 __attribute__ ((aligned(16)));
+__m128i a_sq_p7_p1 __attribute__ ((aligned(16)));
+__m128i a_sq_p7_p3 __attribute__ ((aligned(16)));
+__m128i a_sq_p7_p5 __attribute__ ((aligned(16)));
+__m128i a_sq_p7_p7 __attribute__ ((aligned(16)));
+
+__m128i bit_met_m7_m7 __attribute__ ((aligned(16)));
+__m128i bit_met_m7_m5 __attribute__ ((aligned(16)));
+__m128i bit_met_m7_m3 __attribute__ ((aligned(16)));
+__m128i bit_met_m7_m1 __attribute__ ((aligned(16)));
+__m128i bit_met_m7_p1 __attribute__ ((aligned(16)));
+__m128i bit_met_m7_p3 __attribute__ ((aligned(16)));
+__m128i bit_met_m7_p5 __attribute__ ((aligned(16)));
+__m128i bit_met_m7_p7 __attribute__ ((aligned(16)));
+__m128i bit_met_m5_m7 __attribute__ ((aligned(16)));
+__m128i bit_met_m5_m5 __attribute__ ((aligned(16)));
+__m128i bit_met_m5_m3 __attribute__ ((aligned(16)));
+__m128i bit_met_m5_m1 __attribute__ ((aligned(16)));
+__m128i bit_met_m5_p1 __attribute__ ((aligned(16)));
+__m128i bit_met_m5_p3 __attribute__ ((aligned(16)));
+__m128i bit_met_m5_p5 __attribute__ ((aligned(16)));
+__m128i bit_met_m5_p7 __attribute__ ((aligned(16)));
+__m128i bit_met_m3_m7 __attribute__ ((aligned(16)));
+__m128i bit_met_m3_m5 __attribute__ ((aligned(16)));
+__m128i bit_met_m3_m3 __attribute__ ((aligned(16)));
+__m128i bit_met_m3_m1 __attribute__ ((aligned(16)));
+__m128i bit_met_m3_p1 __attribute__ ((aligned(16)));
+__m128i bit_met_m3_p3 __attribute__ ((aligned(16)));
+__m128i bit_met_m3_p5 __attribute__ ((aligned(16)));
+__m128i bit_met_m3_p7 __attribute__ ((aligned(16)));
+__m128i bit_met_m1_m7 __attribute__ ((aligned(16)));
+__m128i bit_met_m1_m5 __attribute__ ((aligned(16)));
+__m128i bit_met_m1_m3 __attribute__ ((aligned(16)));
+__m128i bit_met_m1_m1 __attribute__ ((aligned(16)));
+__m128i bit_met_m1_p1 __attribute__ ((aligned(16)));
+__m128i bit_met_m1_p3 __attribute__ ((aligned(16)));
+__m128i bit_met_m1_p5 __attribute__ ((aligned(16)));
+__m128i bit_met_m1_p7 __attribute__ ((aligned(16)));
+__m128i bit_met_p1_m7 __attribute__ ((aligned(16)));
+__m128i bit_met_p1_m5 __attribute__ ((aligned(16)));
+__m128i bit_met_p1_m3 __attribute__ ((aligned(16)));
+__m128i bit_met_p1_m1 __attribute__ ((aligned(16)));
+__m128i bit_met_p1_p1 __attribute__ ((aligned(16)));
+__m128i bit_met_p1_p3 __attribute__ ((aligned(16)));
+__m128i bit_met_p1_p5 __attribute__ ((aligned(16)));
+__m128i bit_met_p1_p7 __attribute__ ((aligned(16)));
+__m128i bit_met_p3_m7 __attribute__ ((aligned(16)));
+__m128i bit_met_p3_m5 __attribute__ ((aligned(16)));
+__m128i bit_met_p3_m3 __attribute__ ((aligned(16)));
+__m128i bit_met_p3_m1 __attribute__ ((aligned(16)));
+__m128i bit_met_p3_p1 __attribute__ ((aligned(16)));
+__m128i bit_met_p3_p3 __attribute__ ((aligned(16)));
+__m128i bit_met_p3_p5 __attribute__ ((aligned(16)));
+__m128i bit_met_p3_p7 __attribute__ ((aligned(16)));
+__m128i bit_met_p5_m7 __attribute__ ((aligned(16)));
+__m128i bit_met_p5_m5 __attribute__ ((aligned(16)));
+__m128i bit_met_p5_m3 __attribute__ ((aligned(16)));
+__m128i bit_met_p5_m1 __attribute__ ((aligned(16)));
+__m128i bit_met_p5_p1 __attribute__ ((aligned(16)));
+__m128i bit_met_p5_p3 __attribute__ ((aligned(16)));
+__m128i bit_met_p5_p5 __attribute__ ((aligned(16)));
+__m128i bit_met_p5_p7 __attribute__ ((aligned(16)));
+__m128i bit_met_p7_m7 __attribute__ ((aligned(16)));
+__m128i bit_met_p7_m5 __attribute__ ((aligned(16)));
+__m128i bit_met_p7_m3 __attribute__ ((aligned(16)));
+__m128i bit_met_p7_m1 __attribute__ ((aligned(16)));
+__m128i bit_met_p7_p1 __attribute__ ((aligned(16)));
+__m128i bit_met_p7_p3 __attribute__ ((aligned(16)));
+__m128i bit_met_p7_p5 __attribute__ ((aligned(16)));
+__m128i bit_met_p7_p7 __attribute__ ((aligned(16)));
+
+__m128i  y0_p_1_1 __attribute__ ((aligned(16)));
+__m128i  y0_p_1_3 __attribute__ ((aligned(16)));
+__m128i  y0_p_1_5 __attribute__ ((aligned(16)));
+__m128i  y0_p_1_7 __attribute__ ((aligned(16)));
+__m128i  y0_p_3_1 __attribute__ ((aligned(16)));
+__m128i  y0_p_3_3 __attribute__ ((aligned(16)));
+__m128i  y0_p_3_5 __attribute__ ((aligned(16)));
+__m128i  y0_p_3_7 __attribute__ ((aligned(16)));
+__m128i  y0_p_5_1 __attribute__ ((aligned(16)));
+__m128i  y0_p_5_3 __attribute__ ((aligned(16)));
+__m128i  y0_p_5_5 __attribute__ ((aligned(16)));
+__m128i  y0_p_5_7 __attribute__ ((aligned(16)));
+__m128i  y0_p_7_1 __attribute__ ((aligned(16)));
+__m128i  y0_p_7_3 __attribute__ ((aligned(16)));
+__m128i  y0_p_7_5 __attribute__ ((aligned(16)));
+__m128i  y0_p_7_7 __attribute__ ((aligned(16)));
+__m128i  y0_m_1_1 __attribute__ ((aligned(16)));
+__m128i  y0_m_1_3 __attribute__ ((aligned(16)));
+__m128i  y0_m_1_5 __attribute__ ((aligned(16)));
+__m128i  y0_m_1_7 __attribute__ ((aligned(16)));
+__m128i  y0_m_3_1 __attribute__ ((aligned(16)));
+__m128i  y0_m_3_3 __attribute__ ((aligned(16)));
+__m128i  y0_m_3_5 __attribute__ ((aligned(16)));
+__m128i  y0_m_3_7 __attribute__ ((aligned(16)));
+__m128i  y0_m_5_1 __attribute__ ((aligned(16)));
+__m128i  y0_m_5_3 __attribute__ ((aligned(16)));
+__m128i  y0_m_5_5 __attribute__ ((aligned(16)));
+__m128i  y0_m_5_7 __attribute__ ((aligned(16)));
+__m128i  y0_m_7_1 __attribute__ ((aligned(16)));
+__m128i  y0_m_7_3 __attribute__ ((aligned(16)));
+__m128i  y0_m_7_5 __attribute__ ((aligned(16)));
+__m128i  y0_m_7_7 __attribute__ ((aligned(16)));
+
+__m128i  xmm0 __attribute__ ((aligned(16)));
+__m128i  xmm1 __attribute__ ((aligned(16)));
+__m128i  xmm2 __attribute__ ((aligned(16)));
+__m128i  xmm3 __attribute__ ((aligned(16)));
+__m128i  xmm4 __attribute__ ((aligned(16)));
+__m128i  xmm5 __attribute__ ((aligned(16)));
+__m128i  xmm6 __attribute__ ((aligned(16)));
+__m128i  xmm7 __attribute__ ((aligned(16)));
+__m128i  xmm8 __attribute__ ((aligned(16)));
+
+__m128i  y0r __attribute__ ((aligned(16)));
+__m128i  y0i __attribute__ ((aligned(16)));
+__m128i  y1r __attribute__ ((aligned(16)));
+__m128i  y1i __attribute__ ((aligned(16)));
+__m128i  y2r __attribute__ ((aligned(16)));
+__m128i  y2i __attribute__ ((aligned(16)));
+
+__m128i  logmax_num_re0 __attribute__ ((aligned(16)));
+__m128i  logmax_num_im0 __attribute__ ((aligned(16)));
+__m128i  logmax_den_re0 __attribute__ ((aligned(16)));
+__m128i  logmax_den_im0 __attribute__ ((aligned(16)));
+__m128i  logmax_num_re1 __attribute__ ((aligned(16)));
+__m128i  logmax_num_im1 __attribute__ ((aligned(16)));
+__m128i  logmax_den_re1 __attribute__ ((aligned(16)));
+__m128i  logmax_den_im1 __attribute__ ((aligned(16)));
+
+__m128i tmp_result  __attribute__ ((aligned(16)));
+__m128i tmp_result2 __attribute__ ((aligned(16)));
+__m128i tmp_result3 __attribute__ ((aligned(16)));
+__m128i tmp_result4 __attribute__ ((aligned(16)));
+
+
+//==============================================================================================
+// Auxiliary Makros
+
+// calculates psi_a = psi_r*a_r + psi_i*a_i
+#define prodsum_psi_a_epi16(psi_r,a_r,psi_i,a_i,psi_a) tmp_result = _mm_mulhi_epi16(psi_r,a_r); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result2 = _mm_mulhi_epi16(psi_i,a_i); tmp_result2 = _mm_slli_epi16(tmp_result2,1); psi_a = _mm_adds_epi16(tmp_result,tmp_result2);
+
+// calculate interference magnitude
+#define interference_abs_epi16(psi,int_ch_mag,int_mag,c1,c2) tmp_result = _mm_cmplt_epi16(psi,int_ch_mag); tmp_result2 = _mm_xor_si128(tmp_result,(*(__m128i*)&ones[0])); tmp_result = _mm_and_si128(tmp_result,c1); tmp_result2 = _mm_and_si128(tmp_result2,c2); int_mag = _mm_or_si128(tmp_result,tmp_result2);
+
+// calculate interference magnitude
+// tmp_result = ones in shorts corr. to interval 2<=x<=4, tmp_result2 interval < 2, tmp_result3 interval 4<x<6 and tmp_result4 interval x>6
+#define interference_abs_64qam_epi16(psi,int_ch_mag,int_two_ch_mag,int_three_ch_mag,a,c1,c3,c5,c7) tmp_result = _mm_cmplt_epi16(psi,int_two_ch_mag); tmp_result3 = _mm_xor_si128(tmp_result,(*(__m128i*)&ones[0])); tmp_result2 = _mm_cmplt_epi16(psi,int_ch_mag); tmp_result = _mm_xor_si128(tmp_result,tmp_result2); tmp_result4 = _mm_cmpgt_epi16(psi,int_three_ch_mag); tmp_result3 = _mm_xor_si128(tmp_result3,tmp_result4); tmp_result = _mm_and_si128(tmp_result,c3); tmp_result2 = _mm_and_si128(tmp_result2,c1); tmp_result3 = _mm_and_si128(tmp_result3,c5); tmp_result4 = _mm_and_si128(tmp_result4,c7); tmp_result = _mm_or_si128(tmp_result,tmp_result2); tmp_result3 = _mm_or_si128(tmp_result3,tmp_result4); a = _mm_or_si128(tmp_result,tmp_result3);
+
+// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor
+#define square_a_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq) tmp_result = _mm_mulhi_epi16(a_r,a_r); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result = _mm_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result = _mm_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result2 = _mm_mulhi_epi16(a_i,a_i); tmp_result2 = _mm_slli_epi16(tmp_result2,1); tmp_result2 = _mm_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm_slli_epi16(tmp_result2,1); tmp_result2 = _mm_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm_slli_epi16(tmp_result2,1); a_sq = _mm_adds_epi16(tmp_result,tmp_result2);
+
+// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor for 64-QAM
+#define square_a_64qam_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq)  tmp_result = _mm_mulhi_epi16(a_r,a_r); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result = _mm_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm_slli_epi16(tmp_result,3); tmp_result = _mm_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result2 = _mm_mulhi_epi16(a_i,a_i); tmp_result2 = _mm_slli_epi16(tmp_result2,1); tmp_result2 = _mm_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm_slli_epi16(tmp_result2,3); tmp_result2 = _mm_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm_slli_epi16(tmp_result2,1); a_sq = _mm_adds_epi16(tmp_result,tmp_result2);
+
+#elif defined(__arm__)
+
+#endif
+
+//==============================================================================================
+// SINGLE-STREAM
+//==============================================================================================
+
+//----------------------------------------------------------------------------------------------
+// QPSK
+//----------------------------------------------------------------------------------------------
+
+int dlsch_qpsk_llr(LTE_DL_FRAME_PARMS *frame_parms,
+                   int32_t **rxdataF_comp,
+                   int16_t *dlsch_llr,
+                   uint8_t symbol,
+                   uint8_t first_symbol_flag,
+                   uint16_t nb_rb,
+                   uint16_t pbch_pss_sss_adjust,
+                   uint8_t beamforming_mode)
+{
+
+  uint32_t *rxF = (uint32_t*)&rxdataF_comp[0][((int32_t)symbol*frame_parms->N_RB_DL*12)];
+  uint32_t *llr32;
+  int i,len;
+  uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol;
+
+  /*
+  if (first_symbol_flag==1) {
+    llr32 = (uint32_t*)dlsch_llr;
+  } else {
+    llr32 = (uint32_t*)(*llr32p);
+  }*/
+
+  llr32 = (uint32_t*)dlsch_llr;
+  if (!llr32) {
+    LOG_E(PHY,"dlsch_qpsk_llr: llr is null, symbol %d, llr32=%p\n",symbol, llr32);
+    return(-1);
+  }
+
+
+  if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) {
+    if (frame_parms->nb_antenna_ports_eNB!=1)
+      len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+    else
+      len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6);
+  } else if((beamforming_mode==7) && (frame_parms->Ncp==0) && (symbol==3 || symbol==6 || symbol==9 || symbol==12)){
+      len = (nb_rb*9) - (3*pbch_pss_sss_adjust/4);
+  } else if((beamforming_mode==7) && (frame_parms->Ncp==1) && (symbol==4 || symbol==7 || symbol==10)){
+      len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+  } else {
+    len = (nb_rb*12) - pbch_pss_sss_adjust;
+  }
+
+
+  /*
+  LOG_I(PHY,"dlsch_qpsk_llr: [symb %d / FirstSym %d / Length %d]: @LLR Buff %x, @LLR Buff(symb) %x \n",
+             symbol,
+             first_symbol_flag,
+             len,
+             dlsch_llr,
+             llr32);
+  */
+  //printf("ll32p=%p , dlsch_llr=%p, symbol=%d, flag=%d \n", llr32, dlsch_llr, symbol, first_symbol_flag);
+  for (i=0; i<len; i++) {
+    *llr32 = *rxF;
+     //printf("llr %d : (%d,%d)\n",i,((int16_t*)llr32)[0],((int16_t*)llr32)[1]);
+    rxF++;
+    llr32++;
+  }
+
+  //*llr32p = (int16_t *)llr32;
+
+  return(0);
+}
+
+int32_t dlsch_qpsk_llr_SIC(LTE_DL_FRAME_PARMS *frame_parms,
+                           int32_t **rxdataF_comp,
+                           int32_t **sic_buffer,  //Q15
+                           int32_t **rho_i,
+                           short *dlsch_llr,
+                           uint8_t num_pdcch_symbols,
+                           uint16_t nb_rb,
+                           uint8_t subframe,
+                           uint16_t mod_order_0,
+                           uint32_t rb_alloc)
+{
+
+  int16_t rho_amp_x0[2*frame_parms->N_RB_DL*12];
+  int16_t rho_rho_amp_x0[2*frame_parms->N_RB_DL*12];
+  uint16_t amp_tmp;
+  uint16_t *llr16=(uint16_t*)dlsch_llr;
+  int i, len,  nsymb;
+  uint8_t symbol, symbol_mod;
+  int len_acc=0;
+  uint16_t *sic_data;
+  uint16_t pbch_pss_sss_adjust;
+
+  nsymb = (frame_parms->Ncp==0) ? 14:12;
+
+  for (symbol=num_pdcch_symbols; symbol<nsymb; symbol++) {
+    uint16_t *rxF = (uint16_t*)(&rxdataF_comp[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]);
+    int16_t *rho_1=(int16_t*)(&rho_i[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]);
+    sic_data = (uint16_t*)&sic_buffer[0][((int16_t)len_acc)];
+
+    symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+
+
+
+    if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) //pilots=1
+      amp_tmp=0x1fff;//dlsch0->sqrt_rho_b; already taken into account
+    else //pilots=0
+      amp_tmp=0x1fff;//1.5*dlsch0->sqrt_rho_a; already taken into account
+
+    if (mod_order_0==6)
+      amp_tmp=amp_tmp<<1; // to compensate for >> 1 shift in modulation
+
+
+    pbch_pss_sss_adjust=adjust_G2(frame_parms,&rb_alloc,2,subframe,symbol);
+
+    if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) {
+      if (frame_parms->nb_antenna_ports_eNB!=1)
+        len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+      else
+        len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6);
+    } else {
+      len = (nb_rb*12) - pbch_pss_sss_adjust;
+    }
+
+   //  printf("dlsch_qpsk_llr: symbol %d,nb_rb %d, len %d,pbch_pss_sss_adjust %d\n",symbol,nb_rb,len,pbch_pss_sss_adjust);
+
+    len_acc+=len; //accumulated length; this is done because in sic_buffer we have only data symbols
+
+    multadd_complex_vector_real_scalar((int16_t *)sic_data,
+                                       amp_tmp,
+                                       (int16_t *)rho_amp_x0, //this is in Q13
+                                       1,
+                                       len);
+
+    mult_cpx_vector((int16_t *)rho_1, //Q15
+                    (int16_t *)rho_amp_x0, //Q13
+                    (int16_t*)rho_rho_amp_x0,
+                    len,
+                    13);
+
+#ifdef DEBUG_LLR_SIC
+    write_output("rho_for_multipl.m","rho_for_m", rho_1,len,1,
+     symbol==num_pdcch_symbols ? 15 :
+     symbol==nsymb-1 ? 14 : 13);
+
+    write_output("rho_rho_in_llr.m","rho2", rho_rho_amp_x0,len,1,
+     symbol==num_pdcch_symbols ? 15 :
+     symbol==nsymb-1 ? 14 : 13);
+#endif
+
+    sub_cpx_vector16((int16_t *)rxF,
+                     (int16_t *)rho_rho_amp_x0,
+                     //(int16_t *)clean_x1,
+                     (int16_t *)rxF,
+                     len*2);
+
+#ifdef DEBUG_LLR_SIC
+    write_output("rxFdata_comp1_after.m","rxF_a", rxF,len,1,1);
+    write_output("rxF_comp1.m","rxF_1_comp", rxF,len,1,
+                 symbol==num_pdcch_symbols ? 15 :
+                 symbol==nsymb-1 ? 14 : 13);
+#endif
+
+    //this is for QPSK only!!!
+    for (i=0; i<len*2; i++) {
+      *llr16 =rxF[i];
+      //printf("llr %d : (%d,%d)\n",i,((int16_t*)llr32)[0],((int16_t*)llr32)[1]);
+      llr16++;
+    }
+
+  }
+
+ // printf("dlsch_qpsk_llr_SIC: acc_len=%d\n",len_acc);
+
+  return(0);
+}
+
+
+//----------------------------------------------------------------------------------------------
+// 16-QAM
+//----------------------------------------------------------------------------------------------
+
+void dlsch_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
+                     int32_t **rxdataF_comp,
+                     int16_t *dlsch_llr,
+                     int32_t **dl_ch_mag,
+                     uint8_t symbol,
+                     uint8_t first_symbol_flag,
+                     uint16_t nb_rb,
+                     uint16_t pbch_pss_sss_adjust,
+                     int16_t **llr32p,
+                     uint8_t beamforming_mode)
+{
+
+#if defined(__x86_64__) || defined(__i386__)
+  __m128i *rxF = (__m128i*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
+  __m128i *ch_mag;
+  __m128i llr128[2];
+  uint32_t *llr32;
+#elif defined(__arm__)
+  int16x8_t *rxF = (int16x8_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16x8_t *ch_mag;
+  int16x8_t xmm0;
+  int16_t *llr16;
+#endif
+
+
+  int i,len;
+  unsigned char symbol_mod,len_mod4=0;
+
+
+#if defined(__x86_64__) || defined(__i386__)
+  if (first_symbol_flag==1) {
+    llr32 = (uint32_t*)dlsch_llr;
+  } else {
+    llr32 = (uint32_t*)*llr32p;
+  }
+#elif defined(__arm__)
+  if (first_symbol_flag==1) {
+    llr16 = (int16_t*)dlsch_llr;
+  } else {
+    llr16 = (int16_t*)*llr32p;
+  }
+#endif
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+#if defined(__x86_64__) || defined(__i386__)
+  ch_mag = (__m128i*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
+#elif defined(__arm__)
+  ch_mag = (int16x8_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
+#endif
+  if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) {
+    if (frame_parms->nb_antenna_ports_eNB!=1)
+      len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+    else
+      len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6);
+  } else if((beamforming_mode==7) && (frame_parms->Ncp==0) && (symbol==3 || symbol==6 || symbol==9 || symbol==12)){
+      len = (nb_rb*9) - (3*pbch_pss_sss_adjust/4);
+  } else if((beamforming_mode==7) && (frame_parms->Ncp==1) && (symbol==4 || symbol==7 || symbol==10)){
+      len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+  } else {
+    len = (nb_rb*12) - pbch_pss_sss_adjust;
+  }
+
+  // update output pointer according to number of REs in this symbol (<<2 because 4 bits per RE)
+  if (first_symbol_flag == 1)
+    *llr32p = dlsch_llr + (len<<2);
+  else
+    *llr32p += (len<<2);
+
+ // printf("len=%d\n", len);
+  len_mod4 = len&3;
+ // printf("len_mod4=%d\n", len_mod4);
+  len>>=2;  // length in quad words (4 REs)
+ // printf("len>>=2=%d\n", len);
+  len+=(len_mod4==0 ? 0 : 1);
+ // printf("len+=%d\n", len);
+  for (i=0; i<len; i++) {
+
+#if defined(__x86_64__) || defined(__i386)
+    xmm0 = _mm_abs_epi16(rxF[i]);
+    xmm0 = _mm_subs_epi16(ch_mag[i],xmm0);
+
+    // lambda_1=y_R, lambda_2=|y_R|-|h|^2, lamda_3=y_I, lambda_4=|y_I|-|h|^2
+    llr128[0] = _mm_unpacklo_epi32(rxF[i],xmm0);
+    llr128[1] = _mm_unpackhi_epi32(rxF[i],xmm0);
+    llr32[0] = _mm_extract_epi32(llr128[0],0); //((uint32_t *)&llr128[0])[0];
+    llr32[1] = _mm_extract_epi32(llr128[0],1); //((uint32_t *)&llr128[0])[1];
+    llr32[2] = _mm_extract_epi32(llr128[0],2); //((uint32_t *)&llr128[0])[2];
+    llr32[3] = _mm_extract_epi32(llr128[0],3); //((uint32_t *)&llr128[0])[3];
+    llr32[4] = _mm_extract_epi32(llr128[1],0); //((uint32_t *)&llr128[1])[0];
+    llr32[5] = _mm_extract_epi32(llr128[1],1); //((uint32_t *)&llr128[1])[1];
+    llr32[6] = _mm_extract_epi32(llr128[1],2); //((uint32_t *)&llr128[1])[2];
+    llr32[7] = _mm_extract_epi32(llr128[1],3); //((uint32_t *)&llr128[1])[3];
+    llr32+=8;
+#elif defined(__arm__)
+    xmm0 = vabsq_s16(rxF[i]);
+    xmm0 = vqsubq_s16(ch_mag[i],xmm0);
+    // lambda_1=y_R, lambda_2=|y_R|-|h|^2, lamda_3=y_I, lambda_4=|y_I|-|h|^2
+
+    llr16[0] = vgetq_lane_s16(rxF[i],0);
+    llr16[1] = vgetq_lane_s16(rxF[i],1);
+    llr16[2] = vgetq_lane_s16(xmm0,0);
+    llr16[3] = vgetq_lane_s16(xmm0,1);
+    llr16[4] = vgetq_lane_s16(rxF[i],2);
+    llr16[5] = vgetq_lane_s16(rxF[i],3);
+    llr16[6] = vgetq_lane_s16(xmm0,2);
+    llr16[7] = vgetq_lane_s16(xmm0,3);
+    llr16[8] = vgetq_lane_s16(rxF[i],4);
+    llr16[9] = vgetq_lane_s16(rxF[i],5);
+    llr16[10] = vgetq_lane_s16(xmm0,4);
+    llr16[11] = vgetq_lane_s16(xmm0,5);
+    llr16[12] = vgetq_lane_s16(rxF[i],6);
+    llr16[13] = vgetq_lane_s16(rxF[i],6);
+    llr16[14] = vgetq_lane_s16(xmm0,7);
+    llr16[15] = vgetq_lane_s16(xmm0,7);
+    llr16+=16;
+#endif
+
+  }
+
+#if defined(__x86_64__) || defined(__i386__)
+  _mm_empty();
+  _m_empty();
+#endif
+}
+
+void dlsch_16qam_llr_SIC (LTE_DL_FRAME_PARMS *frame_parms,
+                          int32_t **rxdataF_comp,
+                          int32_t **sic_buffer,  //Q15
+                          int32_t **rho_i,
+                          int16_t *dlsch_llr,
+                          uint8_t num_pdcch_symbols,
+                          int32_t **dl_ch_mag,
+                          uint16_t nb_rb,
+                          uint8_t subframe,
+                          uint16_t mod_order_0,
+                          uint32_t rb_alloc)
+{
+  int16_t rho_amp_x0[2*frame_parms->N_RB_DL*12];
+  int16_t rho_rho_amp_x0[2*frame_parms->N_RB_DL*12];
+  uint16_t amp_tmp;
+  uint32_t *llr32=(uint32_t*)dlsch_llr;
+  int i, len,  nsymb;
+  uint8_t symbol, symbol_mod;
+  int len_acc=0;
+  uint16_t *sic_data;
+  uint16_t pbch_pss_sss_adjust;
+  unsigned char len_mod4=0;
+  __m128i llr128[2];
+  __m128i *ch_mag;
+  nsymb = (frame_parms->Ncp==0) ? 14:12;
+
+    for (symbol=num_pdcch_symbols; symbol<nsymb; symbol++) {
+    uint16_t *rxF = (uint16_t*)(&rxdataF_comp[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]);
+    int16_t *rho_1=(int16_t*)(&rho_i[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]);
+    ch_mag = (__m128i*)(&dl_ch_mag[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]);
+    sic_data = (uint16_t*)(&sic_buffer[0][((int16_t)len_acc)]);
+
+    symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+    pbch_pss_sss_adjust=adjust_G2(frame_parms,&rb_alloc,4,subframe,symbol);
+
+    if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) {
+      amp_tmp=0x1fff;//dlsch0->sqrt_rho_b; already taken into account
+      if (frame_parms->nb_antenna_ports_eNB!=1)
+        len = nb_rb*8 - (2*pbch_pss_sss_adjust/3);
+      else
+        len = nb_rb*10 - (5*pbch_pss_sss_adjust/6);
+    } else {
+      amp_tmp=0x1fff;;//dlsch0->sqrt_rho_a; already taken into account
+      len = nb_rb*12 - pbch_pss_sss_adjust;
+    }
+
+    if (mod_order_0==6)
+      amp_tmp=amp_tmp<<1; // to compensate for >> 1 shift in modulation
+
+    len_acc+=len;
+
+    multadd_complex_vector_real_scalar((int16_t *)sic_data,
+                                       amp_tmp,
+                                       (int16_t *)rho_amp_x0, //this is in Q13
+                                       1,
+                                       len);
+
+     mult_cpx_vector((int16_t *)rho_1, //Q15
+                    (int16_t *)rho_amp_x0, //Q13
+                    (int16_t*)rho_rho_amp_x0,
+                    len,
+                    13);
+
+     sub_cpx_vector16((int16_t *)rxF,
+                      (int16_t *)rho_rho_amp_x0,
+                      //(int16_t *)clean_x1,
+                      (int16_t *)rxF,
+                      len*2);
+
+    len_mod4 = len&3;
+    len>>=2;  // length in quad words (4 REs)
+    len+=(len_mod4==0 ? 0 : 1);
+
+    for (i=0; i<len; i++) {
+
+
+    __m128i *x1 = (__m128i*)rxF;//clean_x1;
+//printf("%p %p %p\n", clean_x1, &clean_x1, &clean_x1[0]);
+//int *a = malloc(10*sizeof(int));
+//printf("%p %p\n", a, &a);
+//exit(0);
+    xmm0 = _mm_abs_epi16(x1[i]);
+    xmm0 = _mm_subs_epi16(ch_mag[i],xmm0);
+
+    // lambda_1=y_R, lambda_2=|y_R|-|h|^2, lamda_3=y_I, lambda_4=|y_I|-|h|^2
+    llr128[0] = _mm_unpacklo_epi32(x1[i],xmm0);
+    llr128[1] = _mm_unpackhi_epi32(x1[i],xmm0);
+    llr32[0] = _mm_extract_epi32(llr128[0],0); //((uint32_t *)&llr128[0])[0];
+    llr32[1] = _mm_extract_epi32(llr128[0],1); //((uint32_t *)&llr128[0])[1];
+    llr32[2] = _mm_extract_epi32(llr128[0],2); //((uint32_t *)&llr128[0])[2];
+    llr32[3] = _mm_extract_epi32(llr128[0],3); //((uint32_t *)&llr128[0])[3];
+    llr32[4] = _mm_extract_epi32(llr128[1],0); //((uint32_t *)&llr128[1])[0];
+    llr32[5] = _mm_extract_epi32(llr128[1],1); //((uint32_t *)&llr128[1])[1];
+    llr32[6] = _mm_extract_epi32(llr128[1],2); //((uint32_t *)&llr128[1])[2];
+    llr32[7] = _mm_extract_epi32(llr128[1],3); //((uint32_t *)&llr128[1])[3];
+    llr32+=8;
+
+  }
+  _mm_empty();
+  _m_empty();
+}
+}
+
+//----------------------------------------------------------------------------------------------
+// 64-QAM
+//----------------------------------------------------------------------------------------------
+
+void dlsch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
+                     int32_t **rxdataF_comp,
+                     int16_t *dlsch_llr,
+                     int32_t **dl_ch_mag,
+                     int32_t **dl_ch_magb,
+                     uint8_t symbol,
+                     uint8_t first_symbol_flag,
+                     uint16_t nb_rb,
+                     uint16_t pbch_pss_sss_adjust,
+                     //int16_t **llr_save,
+                     uint32_t llr_offset,
+                     uint8_t beamforming_mode)
+{
+#if defined(__x86_64__) || defined(__i386__)
+  __m128i *rxF = (__m128i*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
+  __m128i *ch_mag,*ch_magb;
+#elif defined(__arm__)
+  int16x8_t *rxF = (int16x8_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16x8_t *ch_mag,*ch_magb,xmm1,xmm2;
+#endif
+  int i,len,len2;
+  unsigned char symbol_mod,len_mod4;
+  short *llr;
+  int16_t *llr2;
+  int8_t *pllr_symbol;
+
+  /*
+  if (first_symbol_flag==1)
+    llr = dlsch_llr;
+  else
+    llr = *llr_save;
+  */
+  llr = dlsch_llr;
+
+  pllr_symbol = (int8_t*)dlsch_llr;
+  pllr_symbol += llr_offset;
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+#if defined(__x86_64__) || defined(__i386__)
+  ch_mag = (__m128i*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
+  ch_magb = (__m128i*)&dl_ch_magb[0][(symbol*frame_parms->N_RB_DL*12)];
+#elif defined(__arm__)
+  ch_mag = (int16x8_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
+  ch_magb = (int16x8_t*)&dl_ch_magb[0][(symbol*frame_parms->N_RB_DL*12)];
+#endif
+  if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) {
+    if (frame_parms->nb_antenna_ports_eNB!=1)
+      len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+    else
+      len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6);
+  } else if((beamforming_mode==7) && (frame_parms->Ncp==0) && (symbol==3 || symbol==6 || symbol==9 || symbol==12)){
+      len = (nb_rb*9) - (3*pbch_pss_sss_adjust/4);
+  } else if((beamforming_mode==7) && (frame_parms->Ncp==1) && (symbol==4 || symbol==7 || symbol==10)){
+      len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+  } else {
+    len = (nb_rb*12) - pbch_pss_sss_adjust;
+  }
+
+//  printf("dlsch_64qam_llr: symbol %d,nb_rb %d, len %d,pbch_pss_sss_adjust %d\n",symbol,nb_rb,len,pbch_pss_sss_adjust);
+
+/*  LOG_I(PHY,"dlsch_64qam_llr [symb %d / FirstSym %d / Length %d]: @LLR Buff %x \n",
+             symbol,
+             first_symbol_flag,
+             len,
+             dlsch_llr,
+             pllr_symbol);*/
+
+  llr2 = llr;
+  llr += (len*6);
+
+  len_mod4 =len&3;
+  len2=len>>2;  // length in quad words (4 REs)
+  len2+=((len_mod4==0)?0:1);
+
+  for (i=0; i<len2; i++) {
+
+#if defined(__x86_64__) || defined(__i386__)
+    xmm1 = _mm_abs_epi16(rxF[i]);
+    xmm1 = _mm_subs_epi16(ch_mag[i],xmm1);
+    xmm2 = _mm_abs_epi16(xmm1);
+    xmm2 = _mm_subs_epi16(ch_magb[i],xmm2);
+#elif defined(__arm__)
+    xmm1 = vabsq_s16(rxF[i]);
+    xmm1 = vsubq_s16(ch_mag[i],xmm1);
+    xmm2 = vabsq_s16(xmm1);
+    xmm2 = vsubq_s16(ch_magb[i],xmm2);
+#endif
+    // loop over all LLRs in quad word (24 coded bits)
+    /*
+      for (j=0;j<8;j+=2) {
+      llr2[0] = ((short *)&rxF[i])[j];
+      llr2[1] = ((short *)&rxF[i])[j+1];
+      llr2[2] = ((short *)&xmm1)[j];
+      llr2[3] = ((short *)&xmm1)[j+1];
+      llr2[4] = ((short *)&xmm2)[j];
+      llr2[5] = ((short *)&xmm2)[j+1];
+
+     llr2+=6;
+      }
+    */
+    llr2[0] = ((short *)&rxF[i])[0];
+    llr2[1] = ((short *)&rxF[i])[1];
+#if defined(__x86_64__) || defined(__i386__)
+    llr2[2] = _mm_extract_epi16(xmm1,0);
+    llr2[3] = _mm_extract_epi16(xmm1,1);//((short *)&xmm1)[j+1];
+    llr2[4] = _mm_extract_epi16(xmm2,0);//((short *)&xmm2)[j];
+    llr2[5] = _mm_extract_epi16(xmm2,1);//((short *)&xmm2)[j+1];
+#elif defined(__arm__)
+    llr2[2] = vgetq_lane_s16(xmm1,0);
+    llr2[3] = vgetq_lane_s16(xmm1,1);//((short *)&xmm1)[j+1];
+    llr2[4] = vgetq_lane_s16(xmm2,0);//((short *)&xmm2)[j];
+    llr2[5] = vgetq_lane_s16(xmm2,1);//((short *)&xmm2)[j+1];
+#endif
+
+    llr2+=6;
+    llr2[0] = ((short *)&rxF[i])[2];
+    llr2[1] = ((short *)&rxF[i])[3];
+#if defined(__x86_64__) || defined(__i386__)
+    llr2[2] = _mm_extract_epi16(xmm1,2);
+    llr2[3] = _mm_extract_epi16(xmm1,3);//((short *)&xmm1)[j+1];
+    llr2[4] = _mm_extract_epi16(xmm2,2);//((short *)&xmm2)[j];
+    llr2[5] = _mm_extract_epi16(xmm2,3);//((short *)&xmm2)[j+1];
+#elif defined(__arm__)
+    llr2[2] = vgetq_lane_s16(xmm1,2);
+    llr2[3] = vgetq_lane_s16(xmm1,3);//((short *)&xmm1)[j+1];
+    llr2[4] = vgetq_lane_s16(xmm2,2);//((short *)&xmm2)[j];
+    llr2[5] = vgetq_lane_s16(xmm2,3);//((short *)&xmm2)[j+1];
+#endif
+
+    llr2+=6;
+    llr2[0] = ((short *)&rxF[i])[4];
+    llr2[1] = ((short *)&rxF[i])[5];
+#if defined(__x86_64__) || defined(__i386__)
+    llr2[2] = _mm_extract_epi16(xmm1,4);
+    llr2[3] = _mm_extract_epi16(xmm1,5);//((short *)&xmm1)[j+1];
+    llr2[4] = _mm_extract_epi16(xmm2,4);//((short *)&xmm2)[j];
+    llr2[5] = _mm_extract_epi16(xmm2,5);//((short *)&xmm2)[j+1];
+#elif defined(__arm__)
+    llr2[2] = vgetq_lane_s16(xmm1,4);
+    llr2[3] = vgetq_lane_s16(xmm1,5);//((short *)&xmm1)[j+1];
+    llr2[4] = vgetq_lane_s16(xmm2,4);//((short *)&xmm2)[j];
+    llr2[5] = vgetq_lane_s16(xmm2,5);//((short *)&xmm2)[j+1];
+#endif
+    llr2+=6;
+    llr2[0] = ((short *)&rxF[i])[6];
+    llr2[1] = ((short *)&rxF[i])[7];
+#if defined(__x86_64__) || defined(__i386__)
+    llr2[2] = _mm_extract_epi16(xmm1,6);
+    llr2[3] = _mm_extract_epi16(xmm1,7);//((short *)&xmm1)[j+1];
+    llr2[4] = _mm_extract_epi16(xmm2,6);//((short *)&xmm2)[j];
+    llr2[5] = _mm_extract_epi16(xmm2,7);//((short *)&xmm2)[j+1];
+#elif defined(__arm__)
+    llr2[2] = vgetq_lane_s16(xmm1,6);
+    llr2[3] = vgetq_lane_s16(xmm1,7);//((short *)&xmm1)[j+1];
+    llr2[4] = vgetq_lane_s16(xmm2,6);//((short *)&xmm2)[j];
+    llr2[5] = vgetq_lane_s16(xmm2,7);//((short *)&xmm2)[j+1];
+#endif
+    llr2+=6;
+
+  }
+
+#if defined(__x86_64__) || defined(__i386__)
+  _mm_empty();
+  _m_empty();
+#endif
+}
+
+//#if 0
+void dlsch_64qam_llr_SIC(LTE_DL_FRAME_PARMS *frame_parms,
+                         int32_t **rxdataF_comp,
+                         int32_t **sic_buffer,  //Q15
+                         int32_t **rho_i,
+                         int16_t *dlsch_llr,
+                         uint8_t num_pdcch_symbols,
+                         int32_t **dl_ch_mag,
+                         int32_t **dl_ch_magb,
+                         uint16_t nb_rb,
+                         uint8_t subframe,
+                         uint16_t mod_order_0,
+                         uint32_t rb_alloc)
+{
+  int16_t rho_amp_x0[2*frame_parms->N_RB_DL*12];
+  int16_t rho_rho_amp_x0[2*frame_parms->N_RB_DL*12];
+  uint16_t amp_tmp;
+  uint16_t *llr32=(uint16_t*)dlsch_llr;
+  int i, len,  nsymb, len2;
+  uint8_t symbol, symbol_mod;
+  int len_acc=0;
+  uint16_t *sic_data;
+  uint16_t pbch_pss_sss_adjust;
+  unsigned char len_mod4=0;
+  uint16_t *llr2;
+  __m128i *ch_mag,*ch_magb;
+
+  nsymb = (frame_parms->Ncp==0) ? 14:12;
+
+  for (symbol=num_pdcch_symbols; symbol<nsymb; symbol++) {
+    uint16_t *rxF = (uint16_t*)(&rxdataF_comp[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]);
+    int16_t *rho_1=(int16_t*)(&rho_i[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]);
+    ch_mag = (__m128i*)(&dl_ch_mag[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]);
+    ch_magb = (__m128i*)(&dl_ch_magb[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]);
+    sic_data = (uint16_t*)(&sic_buffer[0][((int16_t)len_acc)]);
+
+    symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+    pbch_pss_sss_adjust=adjust_G2(frame_parms,&rb_alloc,6,subframe,symbol);
+
+    if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) {
+      amp_tmp = 0x1fff;//dlsch0->sqrt_rho_b; already taken into account
+      if (frame_parms->nb_antenna_ports_eNB!=1)
+        len = nb_rb*8 - (2*pbch_pss_sss_adjust/3);
+      else
+        len = nb_rb*10 - (5*pbch_pss_sss_adjust/6);
+      } else {
+        amp_tmp = 0x1fff; //dlsch0->sqrt_rho_a; already taken into account
+        len = nb_rb*12 - pbch_pss_sss_adjust;
+      }
+
+    if (mod_order_0==6)
+      amp_tmp=amp_tmp<<1; // to compensate for >> 1 shift in modulation
+
+    len_acc+=len;
+
+    multadd_complex_vector_real_scalar((int16_t *)sic_data,
+                                        amp_tmp,
+                                        (int16_t *)rho_amp_x0, //this is in Q13
+                                        1,
+                                        len);
+
+    mult_cpx_vector((int16_t *)rho_1, //Q15
+                    (int16_t *)rho_amp_x0, //Q13
+                    (int16_t*)rho_rho_amp_x0,
+                    len,
+                    13);
+
+    sub_cpx_vector16((int16_t *)rxF,
+                      (int16_t *)rho_rho_amp_x0,
+                      //(int16_t *)clean_x1,
+                      (int16_t *)rxF,
+                      len*2);
+
+    llr2 = llr32;
+    llr32 += (len*6);
+
+    len_mod4 =len&3;
+    len2=len>>2;  // length in quad words (4 REs)
+    len2+=(len_mod4?0:1);
+
+
+
+    for (i=0; i<len2; i++) {
+
+      __m128i *x1 = (__m128i*)rxF;
+      xmm1 = _mm_abs_epi16(x1[i]);
+      xmm1 = _mm_subs_epi16(ch_mag[i],xmm1);
+      xmm2 = _mm_abs_epi16(xmm1);
+      xmm2 = _mm_subs_epi16(ch_magb[i],xmm2);
+
+      // loop over all LLRs in quad word (24 coded bits)
+      /*
+        for (j=0;j<8;j+=2) {
+        llr2[0] = ((short *)&rxF[i])[j];
+        llr2[1] = ((short *)&rxF[i])[j+1];
+        llr2[2] = ((short *)&xmm1)[j];
+        llr2[3] = ((short *)&xmm1)[j+1];
+        llr2[4] = ((short *)&xmm2)[j];
+        llr2[5] = ((short *)&xmm2)[j+1];
+
+       llr2+=6;
+        }
+      */
+      llr2[0] = ((short *)&x1[i])[0];
+      llr2[1] = ((short *)&x1[i])[1];
+      llr2[2] = _mm_extract_epi16(xmm1,0);
+      llr2[3] = _mm_extract_epi16(xmm1,1);//((short *)&xmm1)[j+1];
+      llr2[4] = _mm_extract_epi16(xmm2,0);//((short *)&xmm2)[j];
+      llr2[5] = _mm_extract_epi16(xmm2,1);//((short *)&xmm2)[j+1];
+
+
+      llr2+=6;
+      llr2[0] = ((short *)&x1[i])[2];
+      llr2[1] = ((short *)&x1[i])[3];
+
+      llr2[2] = _mm_extract_epi16(xmm1,2);
+      llr2[3] = _mm_extract_epi16(xmm1,3);//((short *)&xmm1)[j+1];
+      llr2[4] = _mm_extract_epi16(xmm2,2);//((short *)&xmm2)[j];
+      llr2[5] = _mm_extract_epi16(xmm2,3);//((short *)&xmm2)[j+1];
+
+      llr2+=6;
+      llr2[0] = ((short *)&x1[i])[4];
+      llr2[1] = ((short *)&x1[i])[5];
+
+      llr2[2] = _mm_extract_epi16(xmm1,4);
+      llr2[3] = _mm_extract_epi16(xmm1,5);//((short *)&xmm1)[j+1];
+      llr2[4] = _mm_extract_epi16(xmm2,4);//((short *)&xmm2)[j];
+      llr2[5] = _mm_extract_epi16(xmm2,5);//((short *)&xmm2)[j+1];
+
+      llr2+=6;
+      llr2[0] = ((short *)&x1[i])[6];
+      llr2[1] = ((short *)&x1[i])[7];
+
+      llr2[2] = _mm_extract_epi16(xmm1,6);
+      llr2[3] = _mm_extract_epi16(xmm1,7);//((short *)&xmm1)[j+1];
+      llr2[4] = _mm_extract_epi16(xmm2,6);//((short *)&xmm2)[j];
+      llr2[5] = _mm_extract_epi16(xmm2,7);//((short *)&xmm2)[j+1];
+
+      llr2+=6;
+
+    }
+
+ // *llr_save = llr;
+
+  _mm_empty();
+  _m_empty();
+
+  }
+}
+//#endif
+//==============================================================================================
+// DUAL-STREAM
+//==============================================================================================
+
+//----------------------------------------------------------------------------------------------
+// QPSK
+//----------------------------------------------------------------------------------------------
+
+#if defined(__x86_64__) || defined(__i386)
+__m128i  y0r_over2 __attribute__ ((aligned(16)));
+__m128i  y0i_over2 __attribute__ ((aligned(16)));
+__m128i  y1r_over2 __attribute__ ((aligned(16)));
+__m128i  y1i_over2 __attribute__ ((aligned(16)));
+
+__m128i  A __attribute__ ((aligned(16)));
+__m128i  B __attribute__ ((aligned(16)));
+__m128i  C __attribute__ ((aligned(16)));
+__m128i  D __attribute__ ((aligned(16)));
+__m128i  E __attribute__ ((aligned(16)));
+__m128i  F __attribute__ ((aligned(16)));
+__m128i  G __attribute__ ((aligned(16)));
+__m128i  H __attribute__ ((aligned(16)));
+
+#endif
+
+int dlsch_qpsk_qpsk_llr(LTE_DL_FRAME_PARMS *frame_parms,
+                        int **rxdataF_comp,
+                        int **rxdataF_comp_i,
+                        int **rho_i,
+                        short *dlsch_llr,
+                        unsigned char symbol,
+                        unsigned char first_symbol_flag,
+                        unsigned short nb_rb,
+                        uint16_t pbch_pss_sss_adjust,
+                        short **llr16p)
+{
+
+  int16_t *rxF=(int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rxF_i=(int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rho=(int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *llr16;
+  int len;
+  uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol;
+
+  if (first_symbol_flag == 1) {
+    llr16 = (int16_t*)dlsch_llr;
+  } else {
+    llr16 = (int16_t*)(*llr16p);
+  }
+
+  AssertFatal(llr16!=NULL,"dlsch_qpsk_qpsk_llr: llr is null, symbol %d\n",symbol);
+
+  if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) {
+    // if symbol has pilots
+    if (frame_parms->nb_antenna_ports_eNB!=1)
+      // in 2 antenna ports we have 8 REs per symbol per RB
+      len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+    else
+      // for 1 antenna port we have 10 REs per symbol per RB
+      len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6);
+  } else {
+    // symbol has no pilots
+    len = (nb_rb*12) - pbch_pss_sss_adjust;
+  }
+
+  // printf("dlsch_qpsk_qpsk_llr: symbol %d,nb_rb %d, len %d,pbch_pss_sss_adjust %d\n",symbol,nb_rb,len,pbch_pss_sss_adjust);
+  //    printf("qpsk_qpsk: len %d, llr16 %p\n",len,llr16);
+  qpsk_qpsk((short *)rxF,
+            (short *)rxF_i,
+            (short *)llr16,
+            (short *)rho,
+            len);
+
+  llr16 += (len<<1);
+  *llr16p = (short *)llr16;
+
+  return(0);
+}
+
+//__m128i ONE_OVER_SQRT_8 __attribute__((aligned(16)));
+
+void qpsk_qpsk(short *stream0_in,
+               short *stream1_in,
+               short *stream0_out,
+               short *rho01,
+               int length
+         )
+{
+
+  /*
+    This function computes the LLRs of stream 0 (s_0) in presence of the interfering stream 1 (s_1) assuming that both symbols are QPSK. It can be used for both MU-MIMO interference-aware receiver or for SU-MIMO receivers.
+
+    Parameters:
+    stream0_in = Matched filter output y0' = (h0*g0)*y0
+    stream1_in = Matched filter output y1' = (h0*g1)*y0
+    stream0_out = LLRs
+    rho01 = Correlation between the two effective channels \rho_{10} = (h1*g1)*(h0*g0)
+    length = number of resource elements
+  */
+
+#if defined(__x86_64__) || defined(__i386__)
+  __m128i *rho01_128i = (__m128i *)rho01;
+  __m128i *stream0_128i_in = (__m128i *)stream0_in;
+  __m128i *stream1_128i_in = (__m128i *)stream1_in;
+  __m128i *stream0_128i_out = (__m128i *)stream0_out;
+  __m128i ONE_OVER_SQRT_8 = _mm_set1_epi16(23170); //round(2^16/sqrt(8))
+#elif defined(__arm__)
+  int16x8_t *rho01_128i = (int16x8_t *)rho01;
+  int16x8_t *stream0_128i_in = (int16x8_t *)stream0_in;
+  int16x8_t *stream1_128i_in = (int16x8_t *)stream1_in;
+  int16x8_t *stream0_128i_out = (int16x8_t *)stream0_out;
+  int16x8_t ONE_OVER_SQRT_8 = vdupq_n_s16(23170); //round(2^16/sqrt(8))
+#endif
+
+  int i;
+
+
+  for (i=0; i<length>>2; i+=2) {
+    // in each iteration, we take 8 complex samples
+#if defined(__x86_64__) || defined(__i386__)
+    xmm0 = rho01_128i[i]; // 4 symbols
+    xmm1 = rho01_128i[i+1];
+
+    // put (rho_r + rho_i)/2sqrt2 in rho_rpi
+    // put (rho_r - rho_i)/2sqrt2 in rho_rmi
+
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho)
+    xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho)
+    rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho)
+    rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho)
+
+    // divide by sqrt(8), no shift needed ONE_OVER_SQRT_8 = Q1.16
+    rho_rpi = _mm_mulhi_epi16(rho_rpi,ONE_OVER_SQRT_8);
+    rho_rmi = _mm_mulhi_epi16(rho_rmi,ONE_OVER_SQRT_8);
+#elif defined(__arm__)
+
+
+#endif
+    // Compute LLR for first bit of stream 0
+
+    // Compute real and imaginary parts of MF output for stream 0
+#if defined(__x86_64__) || defined(__i386__)
+    xmm0 = stream0_128i_in[i];
+    xmm1 = stream0_128i_in[i+1];
+
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)]
+    y0i = _mm_unpackhi_epi64(xmm0,xmm1);
+
+    y0r_over2  = _mm_srai_epi16(y0r,1);   // divide by 2
+    y0i_over2  = _mm_srai_epi16(y0i,1);   // divide by 2
+#elif defined(__arm__)
+
+
+#endif
+    // Compute real and imaginary parts of MF output for stream 1
+#if defined(__x86_64__) || defined(__i386__)
+    xmm0 = stream1_128i_in[i];
+    xmm1 = stream1_128i_in[i+1];
+
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
+    y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
+
+    y1r_over2  = _mm_srai_epi16(y1r,1);   // divide by 2
+    y1i_over2  = _mm_srai_epi16(y1i,1);   // divide by 2
+
+    // Compute the terms for the LLR of first bit
+
+    xmm0 = _mm_setzero_si128(); // ZERO
+
+    // 1 term for numerator of LLR
+    xmm3 = _mm_subs_epi16(y1r_over2,rho_rpi);
+    A = _mm_abs_epi16(xmm3); // A = |y1r/2 - rho/sqrt(8)|
+    xmm2 = _mm_adds_epi16(A,y0i_over2); // = |y1r/2 - rho/sqrt(8)| + y0i/2
+    xmm3 = _mm_subs_epi16(y1i_over2,rho_rmi);
+    B = _mm_abs_epi16(xmm3); // B = |y1i/2 - rho*/sqrt(8)|
+    logmax_num_re0 = _mm_adds_epi16(B,xmm2); // = |y1r/2 - rho/sqrt(8)|+|y1i/2 - rho*/sqrt(8)| + y0i/2
+
+    // 2 term for numerator of LLR
+    xmm3 = _mm_subs_epi16(y1r_over2,rho_rmi);
+    C = _mm_abs_epi16(xmm3); // C = |y1r/2 - rho*/4|
+    xmm2 = _mm_subs_epi16(C,y0i_over2); // = |y1r/2 - rho*/4| - y0i/2
+    xmm3 = _mm_adds_epi16(y1i_over2,rho_rpi);
+    D = _mm_abs_epi16(xmm3); // D = |y1i/2 + rho/4|
+    xmm2 = _mm_adds_epi16(xmm2,D); // |y1r/2 - rho*/4| + |y1i/2 + rho/4| - y0i/2
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0,xmm2); // max, numerator done
+
+    // 1 term for denominator of LLR
+    xmm3 = _mm_adds_epi16(y1r_over2,rho_rmi);
+    E = _mm_abs_epi16(xmm3); // E = |y1r/2 + rho*/4|
+    xmm2 = _mm_adds_epi16(E,y0i_over2); // = |y1r/2 + rho*/4| + y0i/2
+    xmm3 = _mm_subs_epi16(y1i_over2,rho_rpi);
+    F = _mm_abs_epi16(xmm3); // F = |y1i/2 - rho/4|
+    logmax_den_re0 = _mm_adds_epi16(F,xmm2); // = |y1r/2 + rho*/4| + |y1i/2 - rho/4| + y0i/2
+
+    // 2 term for denominator of LLR
+    xmm3 = _mm_adds_epi16(y1r_over2,rho_rpi);
+    G = _mm_abs_epi16(xmm3); // G = |y1r/2 + rho/4|
+    xmm2 = _mm_subs_epi16(G,y0i_over2); // = |y1r/2 + rho/4| - y0i/2
+    xmm3 = _mm_adds_epi16(y1i_over2,rho_rmi);
+    H = _mm_abs_epi16(xmm3); // H = |y1i/2 + rho*/4|
+    xmm2 = _mm_adds_epi16(xmm2,H); // = |y1r/2 + rho/4| + |y1i/2 + rho*/4| - y0i/2
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0,xmm2); // max, denominator done
+
+    // Compute the terms for the LLR of first bit
+
+    // 1 term for nominator of LLR
+    xmm2 = _mm_adds_epi16(A,y0r_over2);
+    logmax_num_im0 = _mm_adds_epi16(B,xmm2); // = |y1r/2 - rho/4| + |y1i/2 - rho*/4| + y0r/2
+
+    // 2 term for nominator of LLR
+    xmm2 = _mm_subs_epi16(E,y0r_over2);
+    xmm2 = _mm_adds_epi16(xmm2,F); // = |y1r/2 + rho*/4| + |y1i/2 - rho/4| - y0r/2
+
+    logmax_num_im0 = _mm_max_epi16(logmax_num_im0,xmm2); // max, nominator done
+
+    // 1 term for denominator of LLR
+    xmm2 = _mm_adds_epi16(C,y0r_over2);
+    logmax_den_im0 = _mm_adds_epi16(D,xmm2); // = |y1r/2 - rho*/4| + |y1i/2 + rho/4| - y0r/2
+
+    xmm2 = _mm_subs_epi16(G,y0r_over2);
+    xmm2 = _mm_adds_epi16(xmm2,H); // = |y1r/2 + rho/4| + |y1i/2 + rho*/4| - y0r/2
+
+    logmax_den_im0 = _mm_max_epi16(logmax_den_im0,xmm2); // max, denominator done
+
+    // LLR of first bit [L1(1), L1(2), L1(3), L1(4)]
+    y0r = _mm_adds_epi16(y0r,logmax_num_re0);
+    y0r = _mm_subs_epi16(y0r,logmax_den_re0);
+
+    // LLR of second bit [L2(1), L2(2), L2(3), L2(4)]
+    y0i = _mm_adds_epi16(y0i,logmax_num_im0);
+    y0i = _mm_subs_epi16(y0i,logmax_den_im0);
+
+    _mm_storeu_si128(&stream0_128i_out[i],_mm_unpacklo_epi16(y0r,y0i)); // = [L1(1), L2(1), L1(2), L2(2)]
+
+    if (i<((length>>1) - 1)) // false if only 2 REs remain
+      _mm_storeu_si128(&stream0_128i_out[i+1],_mm_unpackhi_epi16(y0r,y0i));
+
+#elif defined(__x86_64__)
+
+#endif
+  }
+
+#if defined(__x86_64__) || defined(__i386__)
+  _mm_empty();
+  _m_empty();
+#endif
+}
+
+int dlsch_qpsk_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
+                         int32_t **rxdataF_comp,
+                         int32_t **rxdataF_comp_i,
+                         int32_t **dl_ch_mag_i, //|h_1|^2*(2/sqrt{10})
+                         int32_t **rho_i,
+                         int16_t *dlsch_llr,
+                         uint8_t symbol,
+                         uint8_t first_symbol_flag,
+                         uint16_t nb_rb,
+                         uint16_t pbch_pss_sss_adjust,
+                         int16_t **llr16p)
+{
+
+  int16_t *rxF=(int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rxF_i=(int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *ch_mag_i = (int16_t*)&dl_ch_mag_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rho=(int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *llr16;
+  int len;
+  uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol;
+
+  if (first_symbol_flag == 1) {
+    llr16 = (int16_t*)dlsch_llr;
+  } else {
+    llr16 = (int16_t*)(*llr16p);
+  }
+
+  AssertFatal(llr16!=NULL,"dlsch_qpsk_qpsk_llr: llr is null, symbol %d\n",symbol);
+
+
+  if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) {
+    // if symbol has pilots
+    if (frame_parms->nb_antenna_ports_eNB!=1)
+      // in 2 antenna ports we have 8 REs per symbol per RB
+      len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+    else
+      // for 1 antenna port we have 10 REs per symbol per RB
+      len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6);
+  } else {
+    // symbol has no pilots
+    len = (nb_rb*12) - pbch_pss_sss_adjust;
+  }
+
+  qpsk_qam16((short *)rxF,
+             (short *)rxF_i,
+             (short *)ch_mag_i,
+             (short *)llr16,
+             (short *)rho,
+             len);
+
+  llr16 += (len<<1);
+  *llr16p = (short *)llr16;
+
+  return(0);
+}
+
+/*
+#if defined(__x86_64__) || defined(__i386__)
+__m128i ONE_OVER_SQRT_2 __attribute__((aligned(16)));
+__m128i ONE_OVER_SQRT_10 __attribute__((aligned(16)));
+__m128i THREE_OVER_SQRT_10 __attribute__((aligned(16)));
+__m128i ONE_OVER_SQRT_10_Q15 __attribute__((aligned(16)));
+__m128i SQRT_10_OVER_FOUR __attribute__((aligned(16)));
+__m128i ch_mag_int;
+#endif
+*/
+void qpsk_qam16(int16_t *stream0_in,
+                int16_t *stream1_in,
+                int16_t *ch_mag_i,
+                int16_t *stream0_out,
+                int16_t *rho01,
+                int32_t length
+    )
+{
+  /*
+    This function computes the LLRs of stream 0 (s_0) in presence of the interfering stream 1 (s_1) assuming that both symbols are QPSK. It can be used for both MU-MIMO interference-aware receiver or for SU-MIMO receivers.
+
+    Parameters:
+    stream0_in = Matched filter output y0' = (h0*g0)*y0
+    stream1_in = Matched filter output y1' = (h0*g1)*y0
+    stream0_out = LLRs
+    rho01 = Correlation between the two effective channels \rho_{10} = (h1*g1)*(h0*g0)
+    length = number of resource elements
+  */
+
+#if defined(__x86_64__) || defined(__i386__)
+  __m128i *rho01_128i = (__m128i *)rho01;
+  __m128i *stream0_128i_in = (__m128i *)stream0_in;
+  __m128i *stream1_128i_in = (__m128i *)stream1_in;
+  __m128i *stream0_128i_out = (__m128i *)stream0_out;
+  __m128i *ch_mag_128i_i    = (__m128i *)ch_mag_i;
+  __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15)
+  __m128i ONE_OVER_SQRT_10_Q15 = _mm_set1_epi16(10362); // round(1/sqrt(10)*2^15)
+  __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15)
+  __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15)
+  __m128i ch_mag_int __attribute__((aligned(16)));
+#elif defined(__arm__)
+  int16x8_t *rho01_128i = (int16x8_t *)rho01;
+  int16x8_t *stream0_128i_in = (int16x8_t *)stream0_in;
+  int16x8_t *stream1_128i_in = (int16x8_t *)stream1_in;
+  int16x8_t *stream0_128i_out = (int16x8_t *)stream0_out;
+  int16x8_t *ch_mag_128i_i    = (int16x8_t *)ch_mag_i;
+  int16x8_t ONE_OVER_SQRT_2 = vdupq_n_s16(23170); // round(1/sqrt(2)*2^15)
+  int16x8_t ONE_OVER_SQRT_10_Q15 = vdupq_n_s16(10362); // round(1/sqrt(10)*2^15)
+  int16x8_t THREE_OVER_SQRT_10 = vdupq_n_s16(31086); // round(3/sqrt(10)*2^15)
+  int16x8_t SQRT_10_OVER_FOUR = vdupq_n_s16(25905); // round(sqrt(10)/4*2^15)
+  int16x8_t ch_mag_int __attribute__((aligned(16)));
+#endif
+
+#ifdef DEBUG_LLR
+  print_shorts2("rho01_128i:\n",rho01_128i);
+#endif
+
+  int i;
+
+
+  for (i=0; i<length>>2; i+=2) {
+    // in each iteration, we take 8 complex samples
+
+#if defined(__x86_64__) || defined(__i386__)
+
+    xmm0 = rho01_128i[i]; // 4 symbols
+    xmm1 = rho01_128i[i+1];
+
+    // put (rho_r + rho_i)/2sqrt2 in rho_rpi
+    // put (rho_r - rho_i)/2sqrt2 in rho_rmi
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho)
+    xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho)
+    rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho)
+    rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho)
+
+    // divide by sqrt(2)
+    rho_rpi = _mm_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_2);
+    rho_rmi = _mm_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_2);
+    rho_rpi = _mm_slli_epi16(rho_rpi,1);
+    rho_rmi = _mm_slli_epi16(rho_rmi,1);
+
+    // Compute LLR for first bit of stream 0
+
+    // Compute real and imaginary parts of MF output for stream 0
+    xmm0 = stream0_128i_in[i];
+    xmm1 = stream0_128i_in[i+1];
+
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)]
+    y0i = _mm_unpackhi_epi64(xmm0,xmm1);
+
+    // divide by sqrt(2)
+    y0r_over2 = _mm_mulhi_epi16(y0r, ONE_OVER_SQRT_2);
+    y0i_over2 = _mm_mulhi_epi16(y0i, ONE_OVER_SQRT_2);
+    y0r_over2  = _mm_slli_epi16(y0r,1);
+    y0i_over2  = _mm_slli_epi16(y0i,1);
+
+    y0_p_1_1 = _mm_adds_epi16(y0r_over2, y0i_over2);
+    y0_m_1_1 = _mm_subs_epi16(y0r_over2, y0i_over2);
+
+    // Compute real and imaginary parts of MF output for stream 1
+    xmm0 = stream1_128i_in[i];
+    xmm1 = stream1_128i_in[i+1];
+
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
+    y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
+
+    xmm0 = _mm_setzero_si128(); // ZERO
+
+    // compute psi
+    xmm3 = _mm_subs_epi16(y1r,rho_rpi);
+    psi_r_p1_p1 = _mm_abs_epi16(xmm3);
+    xmm3 = _mm_subs_epi16(y1i,rho_rmi);
+    psi_i_p1_p1 = _mm_abs_epi16(xmm3);
+    xmm3 = _mm_subs_epi16(y1r,rho_rmi);
+    psi_r_p1_m1 = _mm_abs_epi16(xmm3);
+    xmm3 = _mm_adds_epi16(y1i,rho_rpi);
+    psi_i_p1_m1 = _mm_abs_epi16(xmm3);
+    xmm3 = _mm_adds_epi16(y1r,rho_rmi);
+    psi_r_m1_p1 = _mm_abs_epi16(xmm3);
+    xmm3 = _mm_subs_epi16(y1i,rho_rpi);
+    psi_i_m1_p1 = _mm_abs_epi16(xmm3);
+    xmm3 = _mm_adds_epi16(y1r,rho_rpi);
+    psi_r_m1_m1 = _mm_abs_epi16(xmm3);
+    xmm3 = _mm_adds_epi16(y1i,rho_rmi);
+    psi_i_m1_m1 = _mm_abs_epi16(xmm3);
+
+    // Rearrange interfering channel magnitudes
+    xmm2 = ch_mag_128i_i[i];
+    xmm3 = ch_mag_128i_i[i+1];
+
+    xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+
+    ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3);
+
+    // calculate optimal interference amplitudes
+    interference_abs_epi16(psi_r_p1_p1 , ch_mag_int, a_r_p1_p1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_p1 , ch_mag_int, a_i_p1_p1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_m1 , ch_mag_int, a_r_p1_m1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_m1 , ch_mag_int, a_i_p1_m1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_p1 , ch_mag_int, a_r_m1_p1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_p1 , ch_mag_int, a_i_m1_p1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_m1 , ch_mag_int, a_r_m1_m1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_m1 , ch_mag_int, a_i_m1_m1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+
+    // prodsum
+    prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1);
+    prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1);
+    prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1);
+    prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1);
+
+    // squares
+    square_a_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p1);
+    square_a_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m1);
+    square_a_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p1);
+    square_a_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m1);
+
+    // Computing Metrics
+    xmm0 = _mm_subs_epi16(psi_a_p1_p1, a_sq_p1_p1);
+    bit_met_p1_p1 = _mm_adds_epi16(xmm0, y0_p_1_1);
+
+    xmm0 = _mm_subs_epi16(psi_a_p1_m1, a_sq_p1_m1);
+    bit_met_p1_m1 = _mm_adds_epi16(xmm0, y0_m_1_1);
+
+    xmm0 = _mm_subs_epi16(psi_a_m1_p1, a_sq_m1_p1);
+    bit_met_m1_p1 = _mm_subs_epi16(xmm0, y0_m_1_1);
+
+    xmm0 = _mm_subs_epi16(psi_a_m1_m1, a_sq_m1_m1);
+    bit_met_m1_m1 = _mm_subs_epi16(xmm0, y0_p_1_1);
+
+    // MSB
+    logmax_num_re0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_m1); // bit=0
+    logmax_den_re0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_m1); // bit=1
+
+    y0r = _mm_subs_epi16(logmax_num_re0,logmax_den_re0);
+
+    // LSB
+    logmax_num_im0 = _mm_max_epi16(bit_met_p1_p1,bit_met_m1_p1); // bit=0
+    logmax_den_im0 = _mm_max_epi16(bit_met_p1_m1,bit_met_m1_m1); // bit=1
+
+    y0i = _mm_subs_epi16(logmax_num_im0,logmax_den_im0);
+
+    stream0_128i_out[i] = _mm_unpacklo_epi16(y0r,y0i); // = [L1(1), L2(1), L1(2), L2(2)]
+
+    if (i<((length>>1) - 1)) // false if only 2 REs remain
+      stream0_128i_out[i+1] = _mm_unpackhi_epi16(y0r,y0i);
+
+#elif defined(__arm__)
+
+#endif
+  }
+
+#if defined(__x86_64__) || defined(__i386__)
+  _mm_empty();
+  _m_empty();
+#endif
+}
+
+int dlsch_qpsk_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
+                         int32_t **rxdataF_comp,
+                         int32_t **rxdataF_comp_i,
+                         int32_t **dl_ch_mag_i, //|h_1|^2*(2/sqrt{10})
+                         int32_t **rho_i,
+                         int16_t *dlsch_llr,
+                         uint8_t symbol,
+                         uint8_t first_symbol_flag,
+                         uint16_t nb_rb,
+                         uint16_t pbch_pss_sss_adjust,
+                         int16_t **llr16p)
+{
+
+  int16_t *rxF=(int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rxF_i=(int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *ch_mag_i = (int16_t*)&dl_ch_mag_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rho=(int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *llr16;
+  int len;
+  uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol;
+
+
+  if (first_symbol_flag == 1) {
+    llr16 = (int16_t*)dlsch_llr;
+  } else {
+    llr16 = (int16_t*)(*llr16p);
+  }
+
+  AssertFatal(llr16!=NULL,"dlsch_qpsk_qam64_llr: llr is null, symbol %d\n",symbol);
+
+  if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) {
+    // if symbol has pilots
+    if (frame_parms->nb_antenna_ports_eNB!=1)
+      // in 2 antenna ports we have 8 REs per symbol per RB
+      len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+    else
+      // for 1 antenna port we have 10 REs per symbol per RB
+      len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6);
+  } else {
+    // symbol has no pilots
+    len = (nb_rb*12) - pbch_pss_sss_adjust;
+  }
+
+  qpsk_qam64((short *)rxF,
+             (short *)rxF_i,
+             (short *)ch_mag_i,
+             (short *)llr16,
+             (short *)rho,
+             len);
+
+  llr16 += (len<<1);
+  *llr16p = (short *)llr16;
+
+  return(0);
+}
+/*
+__m128i ONE_OVER_SQRT_2_42 __attribute__((aligned(16)));
+__m128i THREE_OVER_SQRT_2_42 __attribute__((aligned(16)));
+__m128i FIVE_OVER_SQRT_2_42 __attribute__((aligned(16)));
+__m128i SEVEN_OVER_SQRT_2_42 __attribute__((aligned(16)));
+
+__m128i ch_mag_int_with_sigma2 __attribute__((aligned(16)));
+__m128i two_ch_mag_int_with_sigma2 __attribute__((aligned(16)));
+__m128i three_ch_mag_int_with_sigma2 __attribute__((aligned(16)));
+__m128i SQRT_42_OVER_FOUR __attribute__((aligned(16)));
+*/
+void qpsk_qam64(short *stream0_in,
+                short *stream1_in,
+                short *ch_mag_i,
+                short *stream0_out,
+                short *rho01,
+                int length
+    )
+{
+
+  /*
+    This function computes the LLRs of stream 0 (s_0) in presence of the interfering stream 1 (s_1) assuming that both symbols are QPSK. It can be used for both MU-MIMO interference-aware receiver or for SU-MIMO receivers.
+
+    Parameters:
+    stream0_in = Matched filter output y0' = (h0*g0)*y0
+    stream1_in = Matched filter output y1' = (h0*g1)*y0
+    stream0_out = LLRs
+    rho01 = Correlation between the two effective channels \rho_{10} = (h1*g1)*(h0*g0)
+    length = number of resource elements
+  */
+
+#if defined(__x86_64__) || defined(__i386__)
+  __m128i *rho01_128i = (__m128i *)rho01;
+  __m128i *stream0_128i_in = (__m128i *)stream0_in;
+  __m128i *stream1_128i_in = (__m128i *)stream1_in;
+  __m128i *stream0_128i_out = (__m128i *)stream0_out;
+  __m128i *ch_mag_128i_i    = (__m128i *)ch_mag_i;
+  __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15)
+  __m128i ONE_OVER_SQRT_2_42 = _mm_set1_epi16(3575); // round(1/sqrt(2*42)*2^15)
+  __m128i THREE_OVER_SQRT_2_42 = _mm_set1_epi16(10726); // round(3/sqrt(2*42)*2^15)
+  __m128i FIVE_OVER_SQRT_2_42 = _mm_set1_epi16(17876); // round(5/sqrt(2*42)*2^15)
+  __m128i SEVEN_OVER_SQRT_2_42 = _mm_set1_epi16(25027); // round(7/sqrt(2*42)*2^15)
+  __m128i SQRT_42_OVER_FOUR = _mm_set1_epi16(13272); // round(sqrt(42)/4*2^13), Q3.1
+  __m128i ch_mag_int;
+  __m128i ch_mag_int_with_sigma2;
+  __m128i two_ch_mag_int_with_sigma2;
+  __m128i three_ch_mag_int_with_sigma2;
+#elif defined(__arm__)
+
+#endif
+
+#ifdef DEBUG_LLR
+  print_shorts2("rho01_128i:\n",rho01_128i);
+#endif
+
+  int i;
+
+
+  for (i=0; i<length>>2; i+=2) {
+    // in each iteration, we take 8 complex samples
+
+#if defined(__x86_64__) || defined(__i386__)
+
+    xmm0 = rho01_128i[i]; // 4 symbols
+    xmm1 = rho01_128i[i+1];
+
+    // put (rho_r + rho_i)/sqrt2 in rho_rpi
+    // put (rho_r - rho_i)/sqrt2 in rho_rmi
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho)
+    xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho)
+    rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho)
+    rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho)
+
+    // divide by sqrt(2)
+    rho_rpi = _mm_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_2);
+    rho_rmi = _mm_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_2);
+    rho_rpi = _mm_slli_epi16(rho_rpi,1);
+    rho_rmi = _mm_slli_epi16(rho_rmi,1);
+
+    // Compute LLR for first bit of stream 0
+
+    // Compute real and imaginary parts of MF output for stream 0
+    xmm0 = stream0_128i_in[i];
+    xmm1 = stream0_128i_in[i+1];
+
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)]
+    y0i = _mm_unpackhi_epi64(xmm0,xmm1);
+
+    // divide by sqrt(2)
+    y0r_over2 = _mm_mulhi_epi16(y0r, ONE_OVER_SQRT_2);
+    y0i_over2 = _mm_mulhi_epi16(y0i, ONE_OVER_SQRT_2);
+    y0r_over2  = _mm_slli_epi16(y0r,1);
+    y0i_over2  = _mm_slli_epi16(y0i,1);
+
+    y0_p_1_1 = _mm_adds_epi16(y0r_over2, y0i_over2);
+    y0_m_1_1 = _mm_subs_epi16(y0r_over2, y0i_over2);
+
+    // Compute real and imaginary parts of MF output for stream 1
+    xmm0 = stream1_128i_in[i];
+    xmm1 = stream1_128i_in[i+1];
+
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
+    y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
+
+    xmm0 = _mm_setzero_si128(); // ZERO
+
+    // compute psi
+    xmm3 = _mm_subs_epi16(y1r,rho_rpi);
+    psi_r_p1_p1 = _mm_abs_epi16(xmm3);
+    xmm3 = _mm_subs_epi16(y1i,rho_rmi);
+    psi_i_p1_p1 = _mm_abs_epi16(xmm3);
+    xmm3 = _mm_subs_epi16(y1r,rho_rmi);
+    psi_r_p1_m1 = _mm_abs_epi16(xmm3);
+    xmm3 = _mm_adds_epi16(y1i,rho_rpi);
+    psi_i_p1_m1 = _mm_abs_epi16(xmm3);
+    xmm3 = _mm_adds_epi16(y1r,rho_rmi);
+    psi_r_m1_p1 = _mm_abs_epi16(xmm3);
+    xmm3 = _mm_subs_epi16(y1i,rho_rpi);
+    psi_i_m1_p1 = _mm_abs_epi16(xmm3);
+    xmm3 = _mm_adds_epi16(y1r,rho_rpi);
+    psi_r_m1_m1 = _mm_abs_epi16(xmm3);
+    xmm3 = _mm_adds_epi16(y1i,rho_rmi);
+    psi_i_m1_m1 = _mm_abs_epi16(xmm3);
+
+    // Rearrange interfering channel magnitudes
+    xmm2 = ch_mag_128i_i[i];
+    xmm3 = ch_mag_128i_i[i+1];
+
+    xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+
+    ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3);
+    ch_mag_int_with_sigma2       = _mm_srai_epi16(ch_mag_int, 1); // *2
+    two_ch_mag_int_with_sigma2   = ch_mag_int; // *4
+    three_ch_mag_int_with_sigma2 = _mm_adds_epi16(ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2); // *6
+
+    interference_abs_64qam_epi16(psi_r_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+
+    // prodsum
+    prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1);
+    prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1);
+    prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1);
+    prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1);
+
+    // Multiply by sqrt(2)
+    psi_a_p1_p1 = _mm_mulhi_epi16(psi_a_p1_p1, ONE_OVER_SQRT_2);
+    psi_a_p1_p1 = _mm_slli_epi16(psi_a_p1_p1, 2);
+    psi_a_p1_m1 = _mm_mulhi_epi16(psi_a_p1_m1, ONE_OVER_SQRT_2);
+    psi_a_p1_m1 = _mm_slli_epi16(psi_a_p1_m1, 2);
+    psi_a_m1_p1 = _mm_mulhi_epi16(psi_a_m1_p1, ONE_OVER_SQRT_2);
+    psi_a_m1_p1 = _mm_slli_epi16(psi_a_m1_p1, 2);
+    psi_a_m1_m1 = _mm_mulhi_epi16(psi_a_m1_m1, ONE_OVER_SQRT_2);
+    psi_a_m1_m1 = _mm_slli_epi16(psi_a_m1_m1, 2);
+
+    square_a_64qam_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p1);
+    square_a_64qam_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m1);
+    square_a_64qam_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p1);
+    square_a_64qam_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m1);
+
+    // Computing Metrics
+    xmm0 = _mm_subs_epi16(psi_a_p1_p1, a_sq_p1_p1);
+    bit_met_p1_p1 = _mm_adds_epi16(xmm0, y0_p_1_1);
+
+    xmm0 = _mm_subs_epi16(psi_a_p1_m1, a_sq_p1_m1);
+    bit_met_p1_m1 = _mm_adds_epi16(xmm0, y0_m_1_1);
+
+    xmm0 = _mm_subs_epi16(psi_a_m1_p1, a_sq_m1_p1);
+    bit_met_m1_p1 = _mm_subs_epi16(xmm0, y0_m_1_1);
+
+    xmm0 = _mm_subs_epi16(psi_a_m1_m1, a_sq_m1_m1);
+    bit_met_m1_m1 = _mm_subs_epi16(xmm0, y0_p_1_1);
+
+    // MSB
+    logmax_num_re0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_m1); // bit=0
+    logmax_den_re0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_m1); // bit=1
+
+    y0r = _mm_subs_epi16(logmax_num_re0,logmax_den_re0);
+
+    // LSB
+    logmax_num_im0 = _mm_max_epi16(bit_met_p1_p1,bit_met_m1_p1); // bit=0
+    logmax_den_im0 = _mm_max_epi16(bit_met_p1_m1,bit_met_m1_m1); // bit=1
+
+    y0i = _mm_subs_epi16(logmax_num_im0,logmax_den_im0);
+
+    stream0_128i_out[i] = _mm_unpacklo_epi16(y0r,y0i); // = [L1(1), L2(1), L1(2), L2(2)]
+
+    if (i<((length>>1) - 1)) // false if only 2 REs remain
+      stream0_128i_out[i+1] = _mm_unpackhi_epi16(y0r,y0i);
+
+#elif defined(__arm__)
+
+#endif
+  }
+
+#if defined(__x86_64__) || defined(__i386__)
+  _mm_empty();
+  _m_empty();
+#endif
+}
+
+
+//----------------------------------------------------------------------------------------------
+// 16-QAM
+//----------------------------------------------------------------------------------------------
+
+/*
+__m128i ONE_OVER_TWO_SQRT_10 __attribute__((aligned(16)));
+__m128i NINE_OVER_TWO_SQRT_10 __attribute__((aligned(16)));
+
+__m128i  y0r_over_sqrt10 __attribute__ ((aligned(16)));
+__m128i  y0i_over_sqrt10 __attribute__ ((aligned(16)));
+__m128i  y0r_three_over_sqrt10 __attribute__ ((aligned(16)));
+__m128i  y0i_three_over_sqrt10 __attribute__ ((aligned(16)));
+
+__m128i ch_mag_des __attribute__((aligned(16)));
+__m128i ch_mag_over_10 __attribute__ ((aligned(16)));
+__m128i ch_mag_over_2 __attribute__ ((aligned(16)));
+__m128i ch_mag_9_over_10 __attribute__ ((aligned(16)));
+*/
+
+void qam16_qpsk(short *stream0_in,
+                short *stream1_in,
+                short *ch_mag,
+                short *stream0_out,
+                short *rho01,
+                int length
+    )
+{
+
+  /*
+    Author: Sebastian Wagner
+    Date: 2012-06-04
+
+    Input:
+    stream0_in:  MF filter for 1st stream, i.e., y0=h0'*y
+    stream!_in:  MF filter for 2nd stream, i.e., y1=h1'*y
+    ch_mag:      2*h0/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    ch_mag_i:    2*h1/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    rho01:       Channel cross correlation, i.e., h1'*h0
+
+    Output:
+    stream0_out: output LLRs for 1st stream
+  */
+
+#if defined(__x86_64__) || defined(__i386__)
+  __m128i *rho01_128i       = (__m128i *)rho01;
+  __m128i *stream0_128i_in  = (__m128i *)stream0_in;
+  __m128i *stream1_128i_in  = (__m128i *)stream1_in;
+  __m128i *stream0_128i_out = (__m128i *)stream0_out;
+  __m128i *ch_mag_128i      = (__m128i *)ch_mag;
+  __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15)
+  __m128i ONE_OVER_SQRT_10 = _mm_set1_epi16(20724); // round(1/sqrt(10)*2^16)
+  __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15)
+  __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15)
+  __m128i ONE_OVER_TWO_SQRT_10 = _mm_set1_epi16(10362); // round(1/2/sqrt(10)*2^16)
+  __m128i NINE_OVER_TWO_SQRT_10 = _mm_set1_epi16(23315); // round(9/2/sqrt(10)*2^14)
+  __m128i  y0r_over_sqrt10;
+  __m128i  y0i_over_sqrt10;
+  __m128i  y0r_three_over_sqrt10;
+  __m128i  y0i_three_over_sqrt10;
+
+  __m128i ch_mag_des;
+  __m128i ch_mag_over_10;
+  __m128i ch_mag_over_2;
+  __m128i ch_mag_9_over_10;
+#elif defined(__arm__)
+
+#endif
+
+  int i;
+
+
+  for (i=0; i<length>>2; i+=2) {
+    // In one iteration, we deal with 8 REs
+
+#if defined(__x86_64__) || defined(__i386__)
+    // Get rho
+    xmm0 = rho01_128i[i];
+    xmm1 = rho01_128i[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho)
+    xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho)
+    rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho)
+    rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho)
+
+    // Compute the different rhos
+    rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi,ONE_OVER_SQRT_10);
+    rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi,ONE_OVER_SQRT_10);
+    rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi,THREE_OVER_SQRT_10);
+    rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi,THREE_OVER_SQRT_10);
+    rho_rpi_3_3 = _mm_slli_epi16(rho_rpi_3_3,1);
+    rho_rmi_3_3 = _mm_slli_epi16(rho_rmi_3_3,1);
+
+    xmm4 = _mm_mulhi_epi16(xmm2,ONE_OVER_SQRT_10); // Re(rho)
+    xmm5 = _mm_mulhi_epi16(xmm3,THREE_OVER_SQRT_10); // Im(rho)
+    xmm5 = _mm_slli_epi16(xmm5,1);
+
+    rho_rpi_1_3 = _mm_adds_epi16(xmm4,xmm5);
+    rho_rmi_1_3 = _mm_subs_epi16(xmm4,xmm5);
+
+    xmm6 = _mm_mulhi_epi16(xmm2,THREE_OVER_SQRT_10); // Re(rho)
+    xmm7 = _mm_mulhi_epi16(xmm3,ONE_OVER_SQRT_10); // Im(rho)
+    xmm6 = _mm_slli_epi16(xmm6,1);
+
+    rho_rpi_3_1 = _mm_adds_epi16(xmm6,xmm7);
+    rho_rmi_3_1 = _mm_subs_epi16(xmm6,xmm7);
+
+    // Rearrange interfering MF output
+    xmm0 = stream1_128i_in[i];
+    xmm1 = stream1_128i_in[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
+    y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
+
+    xmm0 = _mm_setzero_si128(); // ZERO
+    xmm2 = _mm_subs_epi16(rho_rpi_1_1,y1r); // = [Re(rho)+ Im(rho)]/sqrt(10) - y1r
+    psi_r_p1_p1 = _mm_abs_epi16(xmm2); // = |[Re(rho)+ Im(rho)]/sqrt(10) - y1r|
+
+    xmm2= _mm_subs_epi16(rho_rmi_1_1,y1r);
+    psi_r_p1_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_1_1,y1i);
+    psi_i_p1_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_1_3,y1r);
+    psi_r_p1_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_1_3,y1r);
+    psi_r_p1_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_3_1,y1i);
+    psi_i_p1_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_3_1,y1r);
+    psi_r_p3_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_3_1,y1r);
+    psi_r_p3_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_1_3,y1i);
+    psi_i_p3_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_3_3,y1r);
+    psi_r_p3_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_3_3,y1r);
+    psi_r_p3_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_3_3,y1i);
+    psi_i_p3_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_1_1,y1i);
+    psi_i_m1_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_3_1,y1i);
+    psi_i_m1_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_1_3,y1i);
+    psi_i_m3_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_3_3,y1i);
+    psi_i_m3_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_1_1,y1i);
+    psi_i_p1_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_3_1,y1i);
+    psi_i_p1_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_1_3,y1i);
+    psi_i_p3_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_3_3,y1i);
+    psi_i_p3_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_1_1,y1r);
+    psi_r_m1_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_1_3,y1r);
+    psi_r_m1_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_3_1,y1r);
+    psi_r_m3_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_3_3,y1r);
+    psi_r_m3_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1r,rho_rmi_1_1);
+    psi_r_m1_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1r,rho_rmi_1_3);
+    psi_r_m1_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1i,rho_rmi_1_1);
+    psi_i_m1_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1i,rho_rmi_3_1);
+    psi_i_m1_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1r,rho_rmi_3_1);
+    psi_r_m3_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1r,rho_rmi_3_3);
+    psi_r_m3_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1i,rho_rmi_1_3);
+    psi_i_m3_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1i,rho_rmi_3_3);
+    psi_i_m3_m3 = _mm_abs_epi16(xmm2);
+
+    // Rearrange desired MF output
+    xmm0 = stream0_128i_in[i];
+    xmm1 = stream0_128i_in[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)]
+    y0i = _mm_unpackhi_epi64(xmm0,xmm1);
+
+    // Rearrange desired channel magnitudes
+    xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10))
+    xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10))
+    xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+
+    ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); // = [|h|^2(1),|h|^2(2),|h|^2(3),|h|^2(4)]*(2/sqrt(10))
+
+    // Scale MF output of desired signal
+    y0r_over_sqrt10 = _mm_mulhi_epi16(y0r,ONE_OVER_SQRT_10);
+    y0i_over_sqrt10 = _mm_mulhi_epi16(y0i,ONE_OVER_SQRT_10);
+    y0r_three_over_sqrt10 = _mm_mulhi_epi16(y0r,THREE_OVER_SQRT_10);
+    y0i_three_over_sqrt10 = _mm_mulhi_epi16(y0i,THREE_OVER_SQRT_10);
+    y0r_three_over_sqrt10 = _mm_slli_epi16(y0r_three_over_sqrt10,1);
+    y0i_three_over_sqrt10 = _mm_slli_epi16(y0i_three_over_sqrt10,1);
+
+    // Compute necessary combination of required terms
+    y0_p_1_1 = _mm_adds_epi16(y0r_over_sqrt10,y0i_over_sqrt10);
+    y0_m_1_1 = _mm_subs_epi16(y0r_over_sqrt10,y0i_over_sqrt10);
+
+    y0_p_1_3 = _mm_adds_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10);
+    y0_m_1_3 = _mm_subs_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10);
+
+    y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10);
+    y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10);
+
+    y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10);
+    y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10);
+
+    // Add psi
+    psi_a_p1_p1 = _mm_adds_epi16(psi_r_p1_p1 ,psi_i_p1_p1);
+    psi_a_p1_p3 = _mm_adds_epi16(psi_r_p1_p3 ,psi_i_p1_p3);
+    psi_a_p3_p1 = _mm_adds_epi16(psi_r_p3_p1 ,psi_i_p3_p1);
+    psi_a_p3_p3 = _mm_adds_epi16(psi_r_p3_p3 ,psi_i_p3_p3);
+    psi_a_p1_m1 = _mm_adds_epi16(psi_r_p1_m1 ,psi_i_p1_m1);
+    psi_a_p1_m3 = _mm_adds_epi16(psi_r_p1_m3 ,psi_i_p1_m3);
+    psi_a_p3_m1 = _mm_adds_epi16(psi_r_p3_m1 ,psi_i_p3_m1);
+    psi_a_p3_m3 = _mm_adds_epi16(psi_r_p3_m3 ,psi_i_p3_m3);
+    psi_a_m1_p1 = _mm_adds_epi16(psi_r_m1_p1 ,psi_i_m1_p1);
+    psi_a_m1_p3 = _mm_adds_epi16(psi_r_m1_p3 ,psi_i_m1_p3);
+    psi_a_m3_p1 = _mm_adds_epi16(psi_r_m3_p1 ,psi_i_m3_p1);
+    psi_a_m3_p3 = _mm_adds_epi16(psi_r_m3_p3 ,psi_i_m3_p3);
+    psi_a_m1_m1 = _mm_adds_epi16(psi_r_m1_m1 ,psi_i_m1_m1);
+    psi_a_m1_m3 = _mm_adds_epi16(psi_r_m1_m3 ,psi_i_m1_m3);
+    psi_a_m3_m1 = _mm_adds_epi16(psi_r_m3_m1 ,psi_i_m3_m1);
+    psi_a_m3_m3 = _mm_adds_epi16(psi_r_m3_m3 ,psi_i_m3_m3);
+
+    // scale by sqrt(2)
+    psi_a_p1_p1 = _mm_mulhi_epi16(psi_a_p1_p1,ONE_OVER_SQRT_2);
+    psi_a_p1_p1 = _mm_slli_epi16(psi_a_p1_p1,1);
+    psi_a_p1_p3 = _mm_mulhi_epi16(psi_a_p1_p3,ONE_OVER_SQRT_2);
+    psi_a_p1_p3 = _mm_slli_epi16(psi_a_p1_p3,1);
+    psi_a_p3_p1 = _mm_mulhi_epi16(psi_a_p3_p1,ONE_OVER_SQRT_2);
+    psi_a_p3_p1 = _mm_slli_epi16(psi_a_p3_p1,1);
+    psi_a_p3_p3 = _mm_mulhi_epi16(psi_a_p3_p3,ONE_OVER_SQRT_2);
+    psi_a_p3_p3 = _mm_slli_epi16(psi_a_p3_p3,1);
+
+    psi_a_p1_m1 = _mm_mulhi_epi16(psi_a_p1_m1,ONE_OVER_SQRT_2);
+    psi_a_p1_m1 = _mm_slli_epi16(psi_a_p1_m1,1);
+    psi_a_p1_m3 = _mm_mulhi_epi16(psi_a_p1_m3,ONE_OVER_SQRT_2);
+    psi_a_p1_m3 = _mm_slli_epi16(psi_a_p1_m3,1);
+    psi_a_p3_m1 = _mm_mulhi_epi16(psi_a_p3_m1,ONE_OVER_SQRT_2);
+    psi_a_p3_m1 = _mm_slli_epi16(psi_a_p3_m1,1);
+    psi_a_p3_m3 = _mm_mulhi_epi16(psi_a_p3_m3,ONE_OVER_SQRT_2);
+    psi_a_p3_m3 = _mm_slli_epi16(psi_a_p3_m3,1);
+
+    psi_a_m1_p1 = _mm_mulhi_epi16(psi_a_m1_p1,ONE_OVER_SQRT_2);
+    psi_a_m1_p1 = _mm_slli_epi16(psi_a_m1_p1,1);
+    psi_a_m1_p3 = _mm_mulhi_epi16(psi_a_m1_p3,ONE_OVER_SQRT_2);
+    psi_a_m1_p3 = _mm_slli_epi16(psi_a_m1_p3,1);
+    psi_a_m3_p1 = _mm_mulhi_epi16(psi_a_m3_p1,ONE_OVER_SQRT_2);
+    psi_a_m3_p1 = _mm_slli_epi16(psi_a_m3_p1,1);
+    psi_a_m3_p3 = _mm_mulhi_epi16(psi_a_m3_p3,ONE_OVER_SQRT_2);
+    psi_a_m3_p3 = _mm_slli_epi16(psi_a_m3_p3,1);
+
+    psi_a_m1_m1 = _mm_mulhi_epi16(psi_a_m1_m1,ONE_OVER_SQRT_2);
+    psi_a_m1_m1 = _mm_slli_epi16(psi_a_m1_m1,1);
+    psi_a_m1_m3 = _mm_mulhi_epi16(psi_a_m1_m3,ONE_OVER_SQRT_2);
+    psi_a_m1_m3 = _mm_slli_epi16(psi_a_m1_m3,1);
+    psi_a_m3_m1 = _mm_mulhi_epi16(psi_a_m3_m1,ONE_OVER_SQRT_2);
+    psi_a_m3_m1 = _mm_slli_epi16(psi_a_m3_m1,1);
+    psi_a_m3_m3 = _mm_mulhi_epi16(psi_a_m3_m3,ONE_OVER_SQRT_2);
+    psi_a_m3_m3 = _mm_slli_epi16(psi_a_m3_m3,1);
+
+    // Computing different multiples of channel norms
+    ch_mag_over_10=_mm_mulhi_epi16(ch_mag_des, ONE_OVER_TWO_SQRT_10);
+    ch_mag_over_2=_mm_mulhi_epi16(ch_mag_des, SQRT_10_OVER_FOUR);
+    ch_mag_over_2=_mm_slli_epi16(ch_mag_over_2, 1);
+    ch_mag_9_over_10=_mm_mulhi_epi16(ch_mag_des, NINE_OVER_TWO_SQRT_10);
+    ch_mag_9_over_10=_mm_slli_epi16(ch_mag_9_over_10, 2);
+
+    // Computing Metrics
+    xmm1 = _mm_adds_epi16(psi_a_p1_p1, y0_p_1_1);
+    bit_met_p1_p1= _mm_subs_epi16(xmm1, ch_mag_over_10);
+
+    xmm1 = _mm_adds_epi16(psi_a_p1_p3, y0_p_1_3);
+    bit_met_p1_p3= _mm_subs_epi16(xmm1, ch_mag_over_2);
+
+    xmm1 = _mm_adds_epi16(psi_a_p1_m1, y0_m_1_1);
+    bit_met_p1_m1= _mm_subs_epi16(xmm1, ch_mag_over_10);
+
+    xmm1 = _mm_adds_epi16(psi_a_p1_m3, y0_m_1_3);
+    bit_met_p1_m3= _mm_subs_epi16(xmm1, ch_mag_over_2);
+
+    xmm1 = _mm_adds_epi16(psi_a_p3_p1, y0_p_3_1);
+    bit_met_p3_p1= _mm_subs_epi16(xmm1, ch_mag_over_2);
+
+    xmm1 = _mm_adds_epi16(psi_a_p3_p3, y0_p_3_3);
+    bit_met_p3_p3= _mm_subs_epi16(xmm1, ch_mag_9_over_10);
+
+    xmm1 = _mm_adds_epi16(psi_a_p3_m1, y0_m_3_1);
+    bit_met_p3_m1= _mm_subs_epi16(xmm1, ch_mag_over_2);
+
+    xmm1 = _mm_adds_epi16(psi_a_p3_m3, y0_m_3_3);
+    bit_met_p3_m3= _mm_subs_epi16(xmm1, ch_mag_9_over_10);
+
+    xmm1 = _mm_subs_epi16(psi_a_m1_p1, y0_m_1_1);
+    bit_met_m1_p1= _mm_subs_epi16(xmm1, ch_mag_over_10);
+
+    xmm1 = _mm_subs_epi16(psi_a_m1_p3, y0_m_1_3);
+    bit_met_m1_p3= _mm_subs_epi16(xmm1, ch_mag_over_2);
+
+    xmm1 = _mm_subs_epi16(psi_a_m1_m1, y0_p_1_1);
+    bit_met_m1_m1= _mm_subs_epi16(xmm1, ch_mag_over_10);
+
+    xmm1 = _mm_subs_epi16(psi_a_m1_m3, y0_p_1_3);
+    bit_met_m1_m3= _mm_subs_epi16(xmm1, ch_mag_over_2);
+
+    xmm1 = _mm_subs_epi16(psi_a_m3_p1, y0_m_3_1);
+    bit_met_m3_p1= _mm_subs_epi16(xmm1, ch_mag_over_2);
+
+    xmm1 = _mm_subs_epi16(psi_a_m3_p3, y0_m_3_3);
+    bit_met_m3_p3= _mm_subs_epi16(xmm1, ch_mag_9_over_10);
+
+    xmm1 = _mm_subs_epi16(psi_a_m3_m1, y0_p_3_1);
+    bit_met_m3_m1= _mm_subs_epi16(xmm1, ch_mag_over_2);
+
+    xmm1 = _mm_subs_epi16(psi_a_m3_m3, y0_p_3_3);
+    bit_met_m3_m3= _mm_subs_epi16(xmm1, ch_mag_9_over_10);
+
+    // LLR of the first bit
+    // Bit = 1
+    xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3);
+    xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3);
+    xmm2 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_num_re0= _mm_max_epi16(xmm4,xmm5);
+
+    // Bit = 0
+    xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3);
+    xmm1 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3);
+    xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4,xmm5);
+
+    // LLR of first bit [L1(1), L1(2), L1(3), L1(4), L1(5), L1(6), L1(7), L1(8)]
+    y0r = _mm_subs_epi16(logmax_den_re0,logmax_num_re0);
+
+    // LLR of the second bit
+    // Bit = 1
+    xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1);
+    xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1);
+    xmm2 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3);
+    xmm3 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_num_re1 = _mm_max_epi16(xmm4,xmm5);
+
+    // Bit = 0
+    xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1);
+    xmm1 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1);
+    xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_den_re1 = _mm_max_epi16(xmm4,xmm5);
+
+    // LLR of second bit [L2(1), L2(2), L2(3), L2(4)]
+    y1r = _mm_subs_epi16(logmax_den_re1,logmax_num_re1);
+
+    // LLR of the third bit
+    // Bit = 1
+    xmm0 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3);
+    xmm1 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3);
+    xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_num_im0 = _mm_max_epi16(xmm4,xmm5);
+
+    // Bit = 0
+    xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3);
+    xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3);
+    xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3);
+    xmm3 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_den_im0 = _mm_max_epi16(xmm4,xmm5);
+
+    // LLR of third bit [L3(1), L3(2), L3(3), L3(4)]
+    y0i = _mm_subs_epi16(logmax_den_im0,logmax_num_im0);
+
+    // LLR of the fourth bit
+    // Bit = 1
+    xmm0 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3);
+    xmm1 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3);
+    xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_num_im1 = _mm_max_epi16(xmm4,xmm5);
+
+    // Bit = 0
+    xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1);
+    xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1);
+    xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1);
+    xmm3 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_den_im1 = _mm_max_epi16(xmm4,xmm5);
+
+    // LLR of fourth bit [L4(1), L4(2), L4(3), L4(4)]
+    y1i = _mm_subs_epi16(logmax_den_im1,logmax_num_im1);
+
+    // Pack LLRs in output
+    // [L1(1), L2(1), L1(2), L2(2), L1(3), L2(3), L1(4), L2(4)]
+    xmm0 = _mm_unpacklo_epi16(y0r,y1r);
+    // [L1(5), L2(5), L1(6), L2(6), L1(7), L2(7), L1(8), L2(8)]
+    xmm1 = _mm_unpackhi_epi16(y0r,y1r);
+    // [L3(1), L4(1), L3(2), L4(2), L3(3), L4(3), L3(4), L4(4)]
+    xmm2 = _mm_unpacklo_epi16(y0i,y1i);
+    // [L3(5), L4(5), L3(6), L4(6), L3(7), L4(7), L3(8), L4(8)]
+    xmm3 = _mm_unpackhi_epi16(y0i,y1i);
+
+    stream0_128i_out[2*i+0] = _mm_unpacklo_epi32(xmm0,xmm2); // 8LLRs, 2REs
+    stream0_128i_out[2*i+1] = _mm_unpackhi_epi32(xmm0,xmm2);
+    stream0_128i_out[2*i+2] = _mm_unpacklo_epi32(xmm1,xmm3);
+    stream0_128i_out[2*i+3] = _mm_unpackhi_epi32(xmm1,xmm3);
+
+#elif defined(__arm__)
+
+#endif
+  }
+
+#if defined(__x86_64__) || defined(__i386__)
+  _mm_empty();
+  _m_empty();
+#endif
+
+}
+
+int dlsch_16qam_qpsk_llr(LTE_DL_FRAME_PARMS *frame_parms,
+                         int32_t **rxdataF_comp,
+                         int32_t **rxdataF_comp_i,
+                         int32_t **dl_ch_mag,   //|h_0|^2*(2/sqrt{10})
+                         int32_t **rho_i,
+                         int16_t *dlsch_llr,
+                         uint8_t symbol,
+                         uint8_t first_symbol_flag,
+                         uint16_t nb_rb,
+                         uint16_t pbch_pss_sss_adjust,
+                         int16_t **llr16p)
+{
+
+  int16_t *rxF      = (int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rxF_i    = (int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *ch_mag   = (int16_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rho      = (int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *llr16;
+  int len;
+  uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol;
+
+  // first symbol has different structure due to more pilots
+  if (first_symbol_flag == 1) {
+    llr16 = (int16_t*)dlsch_llr;
+  } else {
+    llr16 = (int16_t*)(*llr16p);
+  }
+
+  AssertFatal(llr16!=NULL,"dlsch_16qam_qpsk_llr: llr is null, symbol %d\n",symbol);
+
+  if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) {
+    // if symbol has pilots
+    if (frame_parms->nb_antenna_ports_eNB!=1)
+      // in 2 antenna ports we have 8 REs per symbol per RB
+      len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+    else
+      // for 1 antenna port we have 10 REs per symbol per RB
+      len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6);
+  } else {
+    // symbol has no pilots
+    len = (nb_rb*12) - pbch_pss_sss_adjust;
+  }
+
+  // printf("symbol %d: qam16_llr, len %d (llr16 %p)\n",symbol,len,llr16);
+
+  qam16_qpsk((short *)rxF,
+             (short *)rxF_i,
+             (short *)ch_mag,
+             (short *)llr16,
+             (short *)rho,
+             len);
+
+  llr16 += (len<<2);
+  *llr16p = (short *)llr16;
+
+  return(0);
+}
+
+void qam16_qam16(short *stream0_in,
+                 short *stream1_in,
+                 short *ch_mag,
+                 short *ch_mag_i,
+                 short *stream0_out,
+                 short *rho01,
+                 int length
+     )
+{
+
+  /*
+    Author: Sebastian Wagner
+    Date: 2012-06-04
+
+    Input:
+    stream0_in:  MF filter for 1st stream, i.e., y0=h0'*y
+    stream!_in:  MF filter for 2nd stream, i.e., y1=h1'*y
+    ch_mag:      2*h0/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    ch_mag_i:    2*h1/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    rho01:       Channel cross correlation, i.e., h1'*h0
+
+    Output:
+    stream0_out: output LLRs for 1st stream
+  */
+#if defined(__x86_64__) || defined(__i386__)
+  __m128i *rho01_128i       = (__m128i *)rho01;
+  __m128i *stream0_128i_in  = (__m128i *)stream0_in;
+  __m128i *stream1_128i_in  = (__m128i *)stream1_in;
+  __m128i *stream0_128i_out = (__m128i *)stream0_out;
+  __m128i *ch_mag_128i      = (__m128i *)ch_mag;
+  __m128i *ch_mag_128i_i    = (__m128i *)ch_mag_i;
+
+
+
+  __m128i ONE_OVER_SQRT_10 = _mm_set1_epi16(20724); // round(1/sqrt(10)*2^16)
+  __m128i ONE_OVER_SQRT_10_Q15 = _mm_set1_epi16(10362); // round(1/sqrt(10)*2^15)
+  __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15)
+  __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15)
+  __m128i ONE_OVER_TWO_SQRT_10 = _mm_set1_epi16(10362); // round(1/2/sqrt(10)*2^16)
+  __m128i NINE_OVER_TWO_SQRT_10 = _mm_set1_epi16(23315); // round(9/2/sqrt(10)*2^14)
+  __m128i ch_mag_des,ch_mag_int;
+  __m128i  y0r_over_sqrt10;
+  __m128i  y0i_over_sqrt10;
+  __m128i  y0r_three_over_sqrt10;
+  __m128i  y0i_three_over_sqrt10;
+  __m128i ch_mag_over_10;
+  __m128i ch_mag_over_2;
+  __m128i ch_mag_9_over_10;
+#elif defined(__arm__)
+
+#endif
+
+  int i;
+
+  for (i=0; i<length>>2; i+=2) {
+    // In one iteration, we deal with 8 REs
+
+#if defined(__x86_64__) || defined(__i386__)
+    // Get rho
+    xmm0 = rho01_128i[i];
+    xmm1 = rho01_128i[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho)
+    xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho)
+    rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho)
+    rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho)
+
+    // Compute the different rhos
+    rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi,ONE_OVER_SQRT_10);
+    rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi,ONE_OVER_SQRT_10);
+    rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi,THREE_OVER_SQRT_10);
+    rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi,THREE_OVER_SQRT_10);
+    rho_rpi_3_3 = _mm_slli_epi16(rho_rpi_3_3,1);
+    rho_rmi_3_3 = _mm_slli_epi16(rho_rmi_3_3,1);
+
+    xmm4 = _mm_mulhi_epi16(xmm2,ONE_OVER_SQRT_10); // Re(rho)
+    xmm5 = _mm_mulhi_epi16(xmm3,THREE_OVER_SQRT_10); // Im(rho)
+    xmm5 = _mm_slli_epi16(xmm5,1);
+
+    rho_rpi_1_3 = _mm_adds_epi16(xmm4,xmm5);
+    rho_rmi_1_3 = _mm_subs_epi16(xmm4,xmm5);
+
+    xmm6 = _mm_mulhi_epi16(xmm2,THREE_OVER_SQRT_10); // Re(rho)
+    xmm7 = _mm_mulhi_epi16(xmm3,ONE_OVER_SQRT_10); // Im(rho)
+    xmm6 = _mm_slli_epi16(xmm6,1);
+
+    rho_rpi_3_1 = _mm_adds_epi16(xmm6,xmm7);
+    rho_rmi_3_1 = _mm_subs_epi16(xmm6,xmm7);
+
+    // Rearrange interfering MF output
+    xmm0 = stream1_128i_in[i];
+    xmm1 = stream1_128i_in[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
+    y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
+
+    xmm0 = _mm_setzero_si128(); // ZERO
+    xmm2 = _mm_subs_epi16(rho_rpi_1_1,y1r); // = [Re(rho)+ Im(rho)]/sqrt(10) - y1r
+    psi_r_p1_p1 = _mm_abs_epi16(xmm2); // = |[Re(rho)+ Im(rho)]/sqrt(10) - y1r|
+
+    xmm2= _mm_subs_epi16(rho_rmi_1_1,y1r);
+    psi_r_p1_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_1_1,y1i);
+    psi_i_p1_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_1_3,y1r);
+    psi_r_p1_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_1_3,y1r);
+    psi_r_p1_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_3_1,y1i);
+    psi_i_p1_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_3_1,y1r);
+    psi_r_p3_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_3_1,y1r);
+    psi_r_p3_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_1_3,y1i);
+    psi_i_p3_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_3_3,y1r);
+    psi_r_p3_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_3_3,y1r);
+    psi_r_p3_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_3_3,y1i);
+    psi_i_p3_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_1_1,y1i);
+    psi_i_m1_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_3_1,y1i);
+    psi_i_m1_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_1_3,y1i);
+    psi_i_m3_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_3_3,y1i);
+    psi_i_m3_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_1_1,y1i);
+    psi_i_p1_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_3_1,y1i);
+    psi_i_p1_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_1_3,y1i);
+    psi_i_p3_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_3_3,y1i);
+    psi_i_p3_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_1_1,y1r);
+    psi_r_m1_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_1_3,y1r);
+    psi_r_m1_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_3_1,y1r);
+    psi_r_m3_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_3_3,y1r);
+    psi_r_m3_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1r,rho_rmi_1_1);
+    psi_r_m1_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1r,rho_rmi_1_3);
+    psi_r_m1_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1i,rho_rmi_1_1);
+    psi_i_m1_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1i,rho_rmi_3_1);
+    psi_i_m1_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1r,rho_rmi_3_1);
+    psi_r_m3_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1r,rho_rmi_3_3);
+    psi_r_m3_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1i,rho_rmi_1_3);
+    psi_i_m3_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1i,rho_rmi_3_3);
+    psi_i_m3_m3 = _mm_abs_epi16(xmm2);
+
+    // Rearrange desired MF output
+    xmm0 = stream0_128i_in[i];
+    xmm1 = stream0_128i_in[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)]
+    y0i = _mm_unpackhi_epi64(xmm0,xmm1);
+
+    // Rearrange desired channel magnitudes
+    xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10))
+    xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10))
+    xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+
+    ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); // = [|h|^2(1),|h|^2(2),|h|^2(3),|h|^2(4)]*(2/sqrt(10))
+
+    // Rearrange interfering channel magnitudes
+    xmm2 = ch_mag_128i_i[i];
+    xmm3 = ch_mag_128i_i[i+1];
+
+    xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+
+    ch_mag_int  = _mm_unpacklo_epi64(xmm2,xmm3);
+
+    // Scale MF output of desired signal
+    y0r_over_sqrt10 = _mm_mulhi_epi16(y0r,ONE_OVER_SQRT_10);
+    y0i_over_sqrt10 = _mm_mulhi_epi16(y0i,ONE_OVER_SQRT_10);
+    y0r_three_over_sqrt10 = _mm_mulhi_epi16(y0r,THREE_OVER_SQRT_10);
+    y0i_three_over_sqrt10 = _mm_mulhi_epi16(y0i,THREE_OVER_SQRT_10);
+    y0r_three_over_sqrt10 = _mm_slli_epi16(y0r_three_over_sqrt10,1);
+    y0i_three_over_sqrt10 = _mm_slli_epi16(y0i_three_over_sqrt10,1);
+
+    // Compute necessary combination of required terms
+    y0_p_1_1 = _mm_adds_epi16(y0r_over_sqrt10,y0i_over_sqrt10);
+    y0_m_1_1 = _mm_subs_epi16(y0r_over_sqrt10,y0i_over_sqrt10);
+
+    y0_p_1_3 = _mm_adds_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10);
+    y0_m_1_3 = _mm_subs_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10);
+
+    y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10);
+    y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10);
+
+    y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10);
+    y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10);
+
+    // Compute optimal interfering symbol magnitude
+    interference_abs_epi16(psi_r_p1_p1 ,ch_mag_int,a_r_p1_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_p1 ,ch_mag_int,a_i_p1_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_p3 ,ch_mag_int,a_r_p1_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_p3 ,ch_mag_int,a_i_p1_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_m1 ,ch_mag_int,a_r_p1_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_m1 ,ch_mag_int,a_i_p1_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_m3 ,ch_mag_int,a_r_p1_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_m3 ,ch_mag_int,a_i_p1_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_p1 ,ch_mag_int,a_r_p3_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_p1 ,ch_mag_int,a_i_p3_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_p3 ,ch_mag_int,a_r_p3_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_p3 ,ch_mag_int,a_i_p3_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_m1 ,ch_mag_int,a_r_p3_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_m1 ,ch_mag_int,a_i_p3_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_m3 ,ch_mag_int,a_r_p3_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_m3 ,ch_mag_int,a_i_p3_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_p1 ,ch_mag_int,a_r_m1_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_p1 ,ch_mag_int,a_i_m1_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_p3 ,ch_mag_int,a_r_m1_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_p3 ,ch_mag_int,a_i_m1_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_m1 ,ch_mag_int,a_r_m1_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_m1 ,ch_mag_int,a_i_m1_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_m3 ,ch_mag_int,a_r_m1_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_m3 ,ch_mag_int,a_i_m1_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_p1 ,ch_mag_int,a_r_m3_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_p1 ,ch_mag_int,a_i_m3_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_p3 ,ch_mag_int,a_r_m3_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_p3 ,ch_mag_int,a_i_m3_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_m1 ,ch_mag_int,a_r_m3_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_m1 ,ch_mag_int,a_i_m3_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_m3 ,ch_mag_int,a_r_m3_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_m3 ,ch_mag_int,a_i_m3_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+
+    // Calculation of groups of two terms in the bit metric involving product of psi and interference magnitude
+    prodsum_psi_a_epi16(psi_r_p1_p1,a_r_p1_p1,psi_i_p1_p1,a_i_p1_p1,psi_a_p1_p1);
+    prodsum_psi_a_epi16(psi_r_p1_p3,a_r_p1_p3,psi_i_p1_p3,a_i_p1_p3,psi_a_p1_p3);
+    prodsum_psi_a_epi16(psi_r_p3_p1,a_r_p3_p1,psi_i_p3_p1,a_i_p3_p1,psi_a_p3_p1);
+    prodsum_psi_a_epi16(psi_r_p3_p3,a_r_p3_p3,psi_i_p3_p3,a_i_p3_p3,psi_a_p3_p3);
+    prodsum_psi_a_epi16(psi_r_p1_m1,a_r_p1_m1,psi_i_p1_m1,a_i_p1_m1,psi_a_p1_m1);
+    prodsum_psi_a_epi16(psi_r_p1_m3,a_r_p1_m3,psi_i_p1_m3,a_i_p1_m3,psi_a_p1_m3);
+    prodsum_psi_a_epi16(psi_r_p3_m1,a_r_p3_m1,psi_i_p3_m1,a_i_p3_m1,psi_a_p3_m1);
+    prodsum_psi_a_epi16(psi_r_p3_m3,a_r_p3_m3,psi_i_p3_m3,a_i_p3_m3,psi_a_p3_m3);
+    prodsum_psi_a_epi16(psi_r_m1_p1,a_r_m1_p1,psi_i_m1_p1,a_i_m1_p1,psi_a_m1_p1);
+    prodsum_psi_a_epi16(psi_r_m1_p3,a_r_m1_p3,psi_i_m1_p3,a_i_m1_p3,psi_a_m1_p3);
+    prodsum_psi_a_epi16(psi_r_m3_p1,a_r_m3_p1,psi_i_m3_p1,a_i_m3_p1,psi_a_m3_p1);
+    prodsum_psi_a_epi16(psi_r_m3_p3,a_r_m3_p3,psi_i_m3_p3,a_i_m3_p3,psi_a_m3_p3);
+    prodsum_psi_a_epi16(psi_r_m1_m1,a_r_m1_m1,psi_i_m1_m1,a_i_m1_m1,psi_a_m1_m1);
+    prodsum_psi_a_epi16(psi_r_m1_m3,a_r_m1_m3,psi_i_m1_m3,a_i_m1_m3,psi_a_m1_m3);
+    prodsum_psi_a_epi16(psi_r_m3_m1,a_r_m3_m1,psi_i_m3_m1,a_i_m3_m1,psi_a_m3_m1);
+    prodsum_psi_a_epi16(psi_r_m3_m3,a_r_m3_m3,psi_i_m3_m3,a_i_m3_m3,psi_a_m3_m3);
+
+
+    // squared interference magnitude times int. ch. power
+    square_a_epi16(a_r_p1_p1,a_i_p1_p1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p1_p1);
+    square_a_epi16(a_r_p1_p3,a_i_p1_p3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p1_p3);
+    square_a_epi16(a_r_p3_p1,a_i_p3_p1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p3_p1);
+    square_a_epi16(a_r_p3_p3,a_i_p3_p3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p3_p3);
+    square_a_epi16(a_r_p1_m1,a_i_p1_m1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p1_m1);
+    square_a_epi16(a_r_p1_m3,a_i_p1_m3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p1_m3);
+    square_a_epi16(a_r_p3_m1,a_i_p3_m1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p3_m1);
+    square_a_epi16(a_r_p3_m3,a_i_p3_m3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p3_m3);
+    square_a_epi16(a_r_m1_p1,a_i_m1_p1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m1_p1);
+    square_a_epi16(a_r_m1_p3,a_i_m1_p3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m1_p3);
+    square_a_epi16(a_r_m3_p1,a_i_m3_p1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m3_p1);
+    square_a_epi16(a_r_m3_p3,a_i_m3_p3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m3_p3);
+    square_a_epi16(a_r_m1_m1,a_i_m1_m1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m1_m1);
+    square_a_epi16(a_r_m1_m3,a_i_m1_m3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m1_m3);
+    square_a_epi16(a_r_m3_m1,a_i_m3_m1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m3_m1);
+    square_a_epi16(a_r_m3_m3,a_i_m3_m3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m3_m3);
+
+    // Computing different multiples of channel norms
+    ch_mag_over_10=_mm_mulhi_epi16(ch_mag_des, ONE_OVER_TWO_SQRT_10);
+    ch_mag_over_2=_mm_mulhi_epi16(ch_mag_des, SQRT_10_OVER_FOUR);
+    ch_mag_over_2=_mm_slli_epi16(ch_mag_over_2, 1);
+    ch_mag_9_over_10=_mm_mulhi_epi16(ch_mag_des, NINE_OVER_TWO_SQRT_10);
+    ch_mag_9_over_10=_mm_slli_epi16(ch_mag_9_over_10, 2);
+
+    // Computing Metrics
+    xmm0 = _mm_subs_epi16(psi_a_p1_p1,a_sq_p1_p1);
+    xmm1 = _mm_adds_epi16(xmm0,y0_p_1_1);
+    bit_met_p1_p1= _mm_subs_epi16(xmm1,ch_mag_over_10);
+
+    xmm0 = _mm_subs_epi16(psi_a_p1_p3,a_sq_p1_p3);
+    xmm1 = _mm_adds_epi16(xmm0,y0_p_1_3);
+    bit_met_p1_p3= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_p1_m1,a_sq_p1_m1);
+    xmm1 = _mm_adds_epi16(xmm0,y0_m_1_1);
+    bit_met_p1_m1= _mm_subs_epi16(xmm1,ch_mag_over_10);
+
+    xmm0 = _mm_subs_epi16(psi_a_p1_m3,a_sq_p1_m3);
+    xmm1 = _mm_adds_epi16(xmm0,y0_m_1_3);
+    bit_met_p1_m3= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_p3_p1,a_sq_p3_p1);
+    xmm1 = _mm_adds_epi16(xmm0,y0_p_3_1);
+    bit_met_p3_p1= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_p3_p3,a_sq_p3_p3);
+    xmm1 = _mm_adds_epi16(xmm0,y0_p_3_3);
+    bit_met_p3_p3= _mm_subs_epi16(xmm1,ch_mag_9_over_10);
+
+    xmm0 = _mm_subs_epi16(psi_a_p3_m1,a_sq_p3_m1);
+    xmm1 = _mm_adds_epi16(xmm0,y0_m_3_1);
+    bit_met_p3_m1= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_p3_m3,a_sq_p3_m3);
+    xmm1 = _mm_adds_epi16(xmm0,y0_m_3_3);
+    bit_met_p3_m3= _mm_subs_epi16(xmm1,ch_mag_9_over_10);
+
+    xmm0 = _mm_subs_epi16(psi_a_m1_p1,a_sq_m1_p1);
+    xmm1 = _mm_subs_epi16(xmm0,y0_m_1_1);
+    bit_met_m1_p1= _mm_subs_epi16(xmm1,ch_mag_over_10);
+
+    xmm0 = _mm_subs_epi16(psi_a_m1_p3,a_sq_m1_p3);
+    xmm1 = _mm_subs_epi16(xmm0,y0_m_1_3);
+    bit_met_m1_p3= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_m1_m1,a_sq_m1_m1);
+    xmm1 = _mm_subs_epi16(xmm0,y0_p_1_1);
+    bit_met_m1_m1= _mm_subs_epi16(xmm1,ch_mag_over_10);
+
+    xmm0 = _mm_subs_epi16(psi_a_m1_m3,a_sq_m1_m3);
+    xmm1 = _mm_subs_epi16(xmm0,y0_p_1_3);
+    bit_met_m1_m3= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_m3_p1,a_sq_m3_p1);
+    xmm1 = _mm_subs_epi16(xmm0,y0_m_3_1);
+    bit_met_m3_p1= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_m3_p3,a_sq_m3_p3);
+    xmm1 = _mm_subs_epi16(xmm0,y0_m_3_3);
+    bit_met_m3_p3= _mm_subs_epi16(xmm1,ch_mag_9_over_10);
+
+    xmm0 = _mm_subs_epi16(psi_a_m3_m1,a_sq_m3_m1);
+    xmm1 = _mm_subs_epi16(xmm0,y0_p_3_1);
+    bit_met_m3_m1= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_m3_m3,a_sq_m3_m3);
+    xmm1 = _mm_subs_epi16(xmm0,y0_p_3_3);
+    bit_met_m3_m3= _mm_subs_epi16(xmm1,ch_mag_9_over_10);
+
+    // LLR of the first bit
+    // Bit = 1
+    xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3);
+    xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3);
+    xmm2 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_num_re0= _mm_max_epi16(xmm4,xmm5);
+
+    // Bit = 0
+    xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3);
+    xmm1 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3);
+    xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4,xmm5);
+
+    // LLR of first bit [L1(1), L1(2), L1(3), L1(4), L1(5), L1(6), L1(7), L1(8)]
+    y0r = _mm_subs_epi16(logmax_den_re0,logmax_num_re0);
+
+    // LLR of the second bit
+    // Bit = 1
+    xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1);
+    xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1);
+    xmm2 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3);
+    xmm3 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_num_re1 = _mm_max_epi16(xmm4,xmm5);
+
+    // Bit = 0
+    xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1);
+    xmm1 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1);
+    xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_den_re1 = _mm_max_epi16(xmm4,xmm5);
+
+    // LLR of second bit [L2(1), L2(2), L2(3), L2(4)]
+    y1r = _mm_subs_epi16(logmax_den_re1,logmax_num_re1);
+
+    // LLR of the third bit
+    // Bit = 1
+    xmm0 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3);
+    xmm1 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3);
+    xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_num_im0 = _mm_max_epi16(xmm4,xmm5);
+
+    // Bit = 0
+    xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3);
+    xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3);
+    xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3);
+    xmm3 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_den_im0 = _mm_max_epi16(xmm4,xmm5);
+
+    // LLR of third bit [L3(1), L3(2), L3(3), L3(4)]
+    y0i = _mm_subs_epi16(logmax_den_im0,logmax_num_im0);
+
+    // LLR of the fourth bit
+    // Bit = 1
+    xmm0 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3);
+    xmm1 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3);
+    xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_num_im1 = _mm_max_epi16(xmm4,xmm5);
+
+    // Bit = 0
+    xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1);
+    xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1);
+    xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1);
+    xmm3 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_den_im1 = _mm_max_epi16(xmm4,xmm5);
+
+    // LLR of fourth bit [L4(1), L4(2), L4(3), L4(4)]
+    y1i = _mm_subs_epi16(logmax_den_im1,logmax_num_im1);
+
+    // Pack LLRs in output
+    // [L1(1), L2(1), L1(2), L2(2), L1(3), L2(3), L1(4), L2(4)]
+    xmm0 = _mm_unpacklo_epi16(y0r,y1r);
+    // [L1(5), L2(5), L1(6), L2(6), L1(7), L2(7), L1(8), L2(8)]
+    xmm1 = _mm_unpackhi_epi16(y0r,y1r);
+    // [L3(1), L4(1), L3(2), L4(2), L3(3), L4(3), L3(4), L4(4)]
+    xmm2 = _mm_unpacklo_epi16(y0i,y1i);
+    // [L3(5), L4(5), L3(6), L4(6), L3(7), L4(7), L3(8), L4(8)]
+    xmm3 = _mm_unpackhi_epi16(y0i,y1i);
+
+    stream0_128i_out[2*i+0] = _mm_unpacklo_epi32(xmm0,xmm2); // 8LLRs, 2REs
+    stream0_128i_out[2*i+1] = _mm_unpackhi_epi32(xmm0,xmm2);
+    stream0_128i_out[2*i+2] = _mm_unpacklo_epi32(xmm1,xmm3);
+    stream0_128i_out[2*i+3] = _mm_unpackhi_epi32(xmm1,xmm3);
+#elif defined(__arm__)
+
+#endif
+
+  }
+
+#if defined(__x86_64__) || defined(__i386__)
+  _mm_empty();
+  _m_empty();
+#endif
+}
+
+int dlsch_16qam_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
+                          int32_t **rxdataF_comp,
+                          int32_t **rxdataF_comp_i,
+                          int32_t **dl_ch_mag,   //|h_0|^2*(2/sqrt{10})
+                          int32_t **dl_ch_mag_i, //|h_1|^2*(2/sqrt{10})
+                          int32_t **rho_i,
+                          int16_t *dlsch_llr,
+                          uint8_t symbol,
+                          uint8_t first_symbol_flag,
+                          uint16_t nb_rb,
+                          uint16_t pbch_pss_sss_adjust,
+                          int16_t **llr16p)
+{
+
+  int16_t *rxF      = (int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rxF_i    = (int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *ch_mag   = (int16_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *ch_mag_i = (int16_t*)&dl_ch_mag_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rho      = (int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *llr16;
+  int len;
+  uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol;
+
+  // first symbol has different structure due to more pilots
+  if (first_symbol_flag == 1) {
+    llr16 = (int16_t*)dlsch_llr;
+  } else {
+    llr16 = (int16_t*)(*llr16p);
+  }
+
+
+  AssertFatal(llr16!=NULL,"dlsch_16qam_16qam_llr: llr is null, symbol %d\n",symbol);
+
+  if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) {
+    // if symbol has pilots
+    if (frame_parms->nb_antenna_ports_eNB!=1)
+      // in 2 antenna ports we have 8 REs per symbol per RB
+      len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+    else
+      // for 1 antenna port we have 10 REs per symbol per RB
+      len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6);
+  } else {
+    // symbol has no pilots
+    len = (nb_rb*12) - pbch_pss_sss_adjust;
+  }
+
+  // printf("symbol %d: qam16_llr, len %d (llr16 %p)\n",symbol,len,llr16);
+
+  qam16_qam16((short *)rxF,
+              (short *)rxF_i,
+              (short *)ch_mag,
+              (short *)ch_mag_i,
+              (short *)llr16,
+              (short *)rho,
+              len);
+
+  llr16 += (len<<2);
+  *llr16p = (short *)llr16;
+
+  return(0);
+}
+
+void qam16_qam64(int16_t *stream0_in,
+                 int16_t *stream1_in,
+                 int16_t *ch_mag,
+                 int16_t *ch_mag_i,
+                 int16_t *stream0_out,
+                 int16_t *rho01,
+                 int32_t length
+     )
+{
+
+  /*
+    Author: Sebastian Wagner
+    Date: 2012-06-04
+
+    Input:
+    stream0_in:  MF filter for 1st stream, i.e., y0=h0'*y
+    stream!_in:  MF filter for 2nd stream, i.e., y1=h1'*y
+    ch_mag:      2*h0/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    ch_mag_i:    2*h1/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    rho01:       Channel cross correlation, i.e., h1'*h0
+
+    Output:
+    stream0_out: output LLRs for 1st stream
+  */
+
+#if defined(__x86_64__) || defined(__i386__)
+  __m128i *rho01_128i       = (__m128i *)rho01;
+  __m128i *stream0_128i_in  = (__m128i *)stream0_in;
+  __m128i *stream1_128i_in  = (__m128i *)stream1_in;
+  __m128i *stream0_128i_out = (__m128i *)stream0_out;
+  __m128i *ch_mag_128i      = (__m128i *)ch_mag;
+  __m128i *ch_mag_128i_i    = (__m128i *)ch_mag_i;
+
+
+  __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15)
+  __m128i ONE_OVER_SQRT_10 = _mm_set1_epi16(20724); // round(1/sqrt(10)*2^16)
+  __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15)
+  __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15)
+  __m128i ONE_OVER_TWO_SQRT_10 = _mm_set1_epi16(10362); // round(1/2/sqrt(10)*2^16)
+  __m128i NINE_OVER_TWO_SQRT_10 = _mm_set1_epi16(23315); // round(9/2/sqrt(10)*2^14)
+  __m128i ONE_OVER_SQRT_2_42 = _mm_set1_epi16(3575); // round(1/sqrt(2*42)*2^15)
+  __m128i THREE_OVER_SQRT_2_42 = _mm_set1_epi16(10726); // round(3/sqrt(2*42)*2^15)
+  __m128i FIVE_OVER_SQRT_2_42 = _mm_set1_epi16(17876); // round(5/sqrt(2*42)*2^15)
+  __m128i SEVEN_OVER_SQRT_2_42 = _mm_set1_epi16(25027); // round(7/sqrt(2*42)*2^15)
+  __m128i SQRT_42_OVER_FOUR = _mm_set1_epi16(13272); // round(sqrt(42)/4*2^13), Q3.
+  __m128i ch_mag_des,ch_mag_int;
+  __m128i  y0r_over_sqrt10;
+  __m128i  y0i_over_sqrt10;
+  __m128i  y0r_three_over_sqrt10;
+  __m128i  y0i_three_over_sqrt10;
+  __m128i ch_mag_over_10;
+  __m128i ch_mag_over_2;
+  __m128i ch_mag_9_over_10;
+  __m128i ch_mag_int_with_sigma2;
+  __m128i two_ch_mag_int_with_sigma2;
+  __m128i three_ch_mag_int_with_sigma2;
+
+#elif defined(__arm__)
+
+#endif
+  int i;
+
+  for (i=0; i<length>>2; i+=2) {
+    // In one iteration, we deal with 8 REs
+
+#if defined(__x86_64__) || defined(__i386__)
+    // Get rho
+    xmm0 = rho01_128i[i];
+    xmm1 = rho01_128i[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho)
+    xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho)
+    rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho)
+    rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho)
+
+    // Compute the different rhos
+    rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi,ONE_OVER_SQRT_10);
+    rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi,ONE_OVER_SQRT_10);
+    rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi,THREE_OVER_SQRT_10);
+    rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi,THREE_OVER_SQRT_10);
+    rho_rpi_3_3 = _mm_slli_epi16(rho_rpi_3_3,1);
+    rho_rmi_3_3 = _mm_slli_epi16(rho_rmi_3_3,1);
+
+    xmm4 = _mm_mulhi_epi16(xmm2,ONE_OVER_SQRT_10); // Re(rho)
+    xmm5 = _mm_mulhi_epi16(xmm3,THREE_OVER_SQRT_10); // Im(rho)
+    xmm5 = _mm_slli_epi16(xmm5,1);
+
+    rho_rpi_1_3 = _mm_adds_epi16(xmm4,xmm5);
+    rho_rmi_1_3 = _mm_subs_epi16(xmm4,xmm5);
+
+    xmm6 = _mm_mulhi_epi16(xmm2,THREE_OVER_SQRT_10); // Re(rho)
+    xmm7 = _mm_mulhi_epi16(xmm3,ONE_OVER_SQRT_10); // Im(rho)
+    xmm6 = _mm_slli_epi16(xmm6,1);
+
+    rho_rpi_3_1 = _mm_adds_epi16(xmm6,xmm7);
+    rho_rmi_3_1 = _mm_subs_epi16(xmm6,xmm7);
+
+    // Rearrange interfering MF output
+    xmm0 = stream1_128i_in[i];
+    xmm1 = stream1_128i_in[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
+    y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
+
+    xmm0 = _mm_setzero_si128(); // ZERO
+    xmm2 = _mm_subs_epi16(rho_rpi_1_1,y1r); // = [Re(rho)+ Im(rho)]/sqrt(10) - y1r
+    psi_r_p1_p1 = _mm_abs_epi16(xmm2); // = |[Re(rho)+ Im(rho)]/sqrt(10) - y1r|
+
+    xmm2= _mm_subs_epi16(rho_rmi_1_1,y1r);
+    psi_r_p1_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_1_1,y1i);
+    psi_i_p1_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_1_3,y1r);
+    psi_r_p1_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_1_3,y1r);
+    psi_r_p1_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_3_1,y1i);
+    psi_i_p1_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_3_1,y1r);
+    psi_r_p3_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_3_1,y1r);
+    psi_r_p3_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_1_3,y1i);
+    psi_i_p3_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_3_3,y1r);
+    psi_r_p3_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_3_3,y1r);
+    psi_r_p3_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rmi_3_3,y1i);
+    psi_i_p3_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_1_1,y1i);
+    psi_i_m1_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_3_1,y1i);
+    psi_i_m1_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_1_3,y1i);
+    psi_i_m3_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_subs_epi16(rho_rpi_3_3,y1i);
+    psi_i_m3_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_1_1,y1i);
+    psi_i_p1_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_3_1,y1i);
+    psi_i_p1_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_1_3,y1i);
+    psi_i_p3_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_3_3,y1i);
+    psi_i_p3_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_1_1,y1r);
+    psi_r_m1_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_1_3,y1r);
+    psi_r_m1_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_3_1,y1r);
+    psi_r_m3_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(rho_rpi_3_3,y1r);
+    psi_r_m3_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1r,rho_rmi_1_1);
+    psi_r_m1_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1r,rho_rmi_1_3);
+    psi_r_m1_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1i,rho_rmi_1_1);
+    psi_i_m1_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1i,rho_rmi_3_1);
+    psi_i_m1_m3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1r,rho_rmi_3_1);
+    psi_r_m3_p1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1r,rho_rmi_3_3);
+    psi_r_m3_p3 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1i,rho_rmi_1_3);
+    psi_i_m3_m1 = _mm_abs_epi16(xmm2);
+    xmm2= _mm_adds_epi16(y1i,rho_rmi_3_3);
+    psi_i_m3_m3 = _mm_abs_epi16(xmm2);
+
+    // Rearrange desired MF output
+    xmm0 = stream0_128i_in[i];
+    xmm1 = stream0_128i_in[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)]
+    y0i = _mm_unpackhi_epi64(xmm0,xmm1);
+
+    // Rearrange desired channel magnitudes
+    xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10))
+    xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10))
+    xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+
+    ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); // = [|h|^2(1),|h|^2(2),|h|^2(3),|h|^2(4)]*(2/sqrt(10))
+
+    // Rearrange interfering channel magnitudes
+    xmm2 = ch_mag_128i_i[i];
+    xmm3 = ch_mag_128i_i[i+1];
+
+    xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+
+    ch_mag_int  = _mm_unpacklo_epi64(xmm2,xmm3);
+
+    // Scale MF output of desired signal
+    y0r_over_sqrt10 = _mm_mulhi_epi16(y0r,ONE_OVER_SQRT_10);
+    y0i_over_sqrt10 = _mm_mulhi_epi16(y0i,ONE_OVER_SQRT_10);
+    y0r_three_over_sqrt10 = _mm_mulhi_epi16(y0r,THREE_OVER_SQRT_10);
+    y0i_three_over_sqrt10 = _mm_mulhi_epi16(y0i,THREE_OVER_SQRT_10);
+    y0r_three_over_sqrt10 = _mm_slli_epi16(y0r_three_over_sqrt10,1);
+    y0i_three_over_sqrt10 = _mm_slli_epi16(y0i_three_over_sqrt10,1);
+
+    // Compute necessary combination of required terms
+    y0_p_1_1 = _mm_adds_epi16(y0r_over_sqrt10,y0i_over_sqrt10);
+    y0_m_1_1 = _mm_subs_epi16(y0r_over_sqrt10,y0i_over_sqrt10);
+
+    y0_p_1_3 = _mm_adds_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10);
+    y0_m_1_3 = _mm_subs_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10);
+
+    y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10);
+    y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10);
+
+    y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10);
+    y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10);
+
+    // Compute optimal interfering symbol magnitude
+    ch_mag_int_with_sigma2       = _mm_srai_epi16(ch_mag_int, 1); // *2
+    two_ch_mag_int_with_sigma2   = ch_mag_int; // *4
+    three_ch_mag_int_with_sigma2 = _mm_adds_epi16(ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2); // *6
+
+    interference_abs_64qam_epi16(psi_r_p1_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+
+    // Calculation of groups of two terms in the bit metric involving product of psi and interference magnitude
+    prodsum_psi_a_epi16(psi_r_p1_p1,a_r_p1_p1,psi_i_p1_p1,a_i_p1_p1,psi_a_p1_p1);
+    prodsum_psi_a_epi16(psi_r_p1_p3,a_r_p1_p3,psi_i_p1_p3,a_i_p1_p3,psi_a_p1_p3);
+    prodsum_psi_a_epi16(psi_r_p3_p1,a_r_p3_p1,psi_i_p3_p1,a_i_p3_p1,psi_a_p3_p1);
+    prodsum_psi_a_epi16(psi_r_p3_p3,a_r_p3_p3,psi_i_p3_p3,a_i_p3_p3,psi_a_p3_p3);
+    prodsum_psi_a_epi16(psi_r_p1_m1,a_r_p1_m1,psi_i_p1_m1,a_i_p1_m1,psi_a_p1_m1);
+    prodsum_psi_a_epi16(psi_r_p1_m3,a_r_p1_m3,psi_i_p1_m3,a_i_p1_m3,psi_a_p1_m3);
+    prodsum_psi_a_epi16(psi_r_p3_m1,a_r_p3_m1,psi_i_p3_m1,a_i_p3_m1,psi_a_p3_m1);
+    prodsum_psi_a_epi16(psi_r_p3_m3,a_r_p3_m3,psi_i_p3_m3,a_i_p3_m3,psi_a_p3_m3);
+    prodsum_psi_a_epi16(psi_r_m1_p1,a_r_m1_p1,psi_i_m1_p1,a_i_m1_p1,psi_a_m1_p1);
+    prodsum_psi_a_epi16(psi_r_m1_p3,a_r_m1_p3,psi_i_m1_p3,a_i_m1_p3,psi_a_m1_p3);
+    prodsum_psi_a_epi16(psi_r_m3_p1,a_r_m3_p1,psi_i_m3_p1,a_i_m3_p1,psi_a_m3_p1);
+    prodsum_psi_a_epi16(psi_r_m3_p3,a_r_m3_p3,psi_i_m3_p3,a_i_m3_p3,psi_a_m3_p3);
+    prodsum_psi_a_epi16(psi_r_m1_m1,a_r_m1_m1,psi_i_m1_m1,a_i_m1_m1,psi_a_m1_m1);
+    prodsum_psi_a_epi16(psi_r_m1_m3,a_r_m1_m3,psi_i_m1_m3,a_i_m1_m3,psi_a_m1_m3);
+    prodsum_psi_a_epi16(psi_r_m3_m1,a_r_m3_m1,psi_i_m3_m1,a_i_m3_m1,psi_a_m3_m1);
+    prodsum_psi_a_epi16(psi_r_m3_m3,a_r_m3_m3,psi_i_m3_m3,a_i_m3_m3,psi_a_m3_m3);
+
+    // Multiply by sqrt(2)
+    psi_a_p1_p1 = _mm_mulhi_epi16(psi_a_p1_p1, ONE_OVER_SQRT_2);
+    psi_a_p1_p1 = _mm_slli_epi16(psi_a_p1_p1, 2);
+    psi_a_p1_p3 = _mm_mulhi_epi16(psi_a_p1_p3, ONE_OVER_SQRT_2);
+    psi_a_p1_p3 = _mm_slli_epi16(psi_a_p1_p3, 2);
+    psi_a_p3_p1 = _mm_mulhi_epi16(psi_a_p3_p1, ONE_OVER_SQRT_2);
+    psi_a_p3_p1 = _mm_slli_epi16(psi_a_p3_p1, 2);
+    psi_a_p3_p3 = _mm_mulhi_epi16(psi_a_p3_p3, ONE_OVER_SQRT_2);
+    psi_a_p3_p3 = _mm_slli_epi16(psi_a_p3_p3, 2);
+    psi_a_p1_m1 = _mm_mulhi_epi16(psi_a_p1_m1, ONE_OVER_SQRT_2);
+    psi_a_p1_m1 = _mm_slli_epi16(psi_a_p1_m1, 2);
+    psi_a_p1_m3 = _mm_mulhi_epi16(psi_a_p1_m3, ONE_OVER_SQRT_2);
+    psi_a_p1_m3 = _mm_slli_epi16(psi_a_p1_m3, 2);
+    psi_a_p3_m1 = _mm_mulhi_epi16(psi_a_p3_m1, ONE_OVER_SQRT_2);
+    psi_a_p3_m1 = _mm_slli_epi16(psi_a_p3_m1, 2);
+    psi_a_p3_m3 = _mm_mulhi_epi16(psi_a_p3_m3, ONE_OVER_SQRT_2);
+    psi_a_p3_m3 = _mm_slli_epi16(psi_a_p3_m3, 2);
+    psi_a_m1_p1 = _mm_mulhi_epi16(psi_a_m1_p1, ONE_OVER_SQRT_2);
+    psi_a_m1_p1 = _mm_slli_epi16(psi_a_m1_p1, 2);
+    psi_a_m1_p3 = _mm_mulhi_epi16(psi_a_m1_p3, ONE_OVER_SQRT_2);
+    psi_a_m1_p3 = _mm_slli_epi16(psi_a_m1_p3, 2);
+    psi_a_m3_p1 = _mm_mulhi_epi16(psi_a_m3_p1, ONE_OVER_SQRT_2);
+    psi_a_m3_p1 = _mm_slli_epi16(psi_a_m3_p1, 2);
+    psi_a_m3_p3 = _mm_mulhi_epi16(psi_a_m3_p3, ONE_OVER_SQRT_2);
+    psi_a_m3_p3 = _mm_slli_epi16(psi_a_m3_p3, 2);
+    psi_a_m1_m1 = _mm_mulhi_epi16(psi_a_m1_m1, ONE_OVER_SQRT_2);
+    psi_a_m1_m1 = _mm_slli_epi16(psi_a_m1_m1, 2);
+    psi_a_m1_m3 = _mm_mulhi_epi16(psi_a_m1_m3, ONE_OVER_SQRT_2);
+    psi_a_m1_m3 = _mm_slli_epi16(psi_a_m1_m3, 2);
+    psi_a_m3_m1 = _mm_mulhi_epi16(psi_a_m3_m1, ONE_OVER_SQRT_2);
+    psi_a_m3_m1 = _mm_slli_epi16(psi_a_m3_m1, 2);
+    psi_a_m3_m3 = _mm_mulhi_epi16(psi_a_m3_m3, ONE_OVER_SQRT_2);
+    psi_a_m3_m3 = _mm_slli_epi16(psi_a_m3_m3, 2);
+
+    // squared interference magnitude times int. ch. power
+    square_a_64qam_epi16(a_r_p1_p1,a_i_p1_p1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p1_p1);
+    square_a_64qam_epi16(a_r_p1_p3,a_i_p1_p3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p1_p3);
+    square_a_64qam_epi16(a_r_p3_p1,a_i_p3_p1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p3_p1);
+    square_a_64qam_epi16(a_r_p3_p3,a_i_p3_p3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p3_p3);
+    square_a_64qam_epi16(a_r_p1_m1,a_i_p1_m1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p1_m1);
+    square_a_64qam_epi16(a_r_p1_m3,a_i_p1_m3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p1_m3);
+    square_a_64qam_epi16(a_r_p3_m1,a_i_p3_m1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p3_m1);
+    square_a_64qam_epi16(a_r_p3_m3,a_i_p3_m3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p3_m3);
+    square_a_64qam_epi16(a_r_m1_p1,a_i_m1_p1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m1_p1);
+    square_a_64qam_epi16(a_r_m1_p3,a_i_m1_p3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m1_p3);
+    square_a_64qam_epi16(a_r_m3_p1,a_i_m3_p1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m3_p1);
+    square_a_64qam_epi16(a_r_m3_p3,a_i_m3_p3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m3_p3);
+    square_a_64qam_epi16(a_r_m1_m1,a_i_m1_m1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m1_m1);
+    square_a_64qam_epi16(a_r_m1_m3,a_i_m1_m3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m1_m3);
+    square_a_64qam_epi16(a_r_m3_m1,a_i_m3_m1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m3_m1);
+    square_a_64qam_epi16(a_r_m3_m3,a_i_m3_m3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m3_m3);
+
+    // Computing different multiples of channel norms
+    ch_mag_over_10=_mm_mulhi_epi16(ch_mag_des, ONE_OVER_TWO_SQRT_10);
+    ch_mag_over_2=_mm_mulhi_epi16(ch_mag_des, SQRT_10_OVER_FOUR);
+    ch_mag_over_2=_mm_slli_epi16(ch_mag_over_2, 1);
+    ch_mag_9_over_10=_mm_mulhi_epi16(ch_mag_des, NINE_OVER_TWO_SQRT_10);
+    ch_mag_9_over_10=_mm_slli_epi16(ch_mag_9_over_10, 2);
+
+    // Computing Metrics
+    xmm0 = _mm_subs_epi16(psi_a_p1_p1,a_sq_p1_p1);
+    xmm1 = _mm_adds_epi16(xmm0,y0_p_1_1);
+    bit_met_p1_p1= _mm_subs_epi16(xmm1,ch_mag_over_10);
+
+    xmm0 = _mm_subs_epi16(psi_a_p1_p3,a_sq_p1_p3);
+    xmm1 = _mm_adds_epi16(xmm0,y0_p_1_3);
+    bit_met_p1_p3= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_p1_m1,a_sq_p1_m1);
+    xmm1 = _mm_adds_epi16(xmm0,y0_m_1_1);
+    bit_met_p1_m1= _mm_subs_epi16(xmm1,ch_mag_over_10);
+
+    xmm0 = _mm_subs_epi16(psi_a_p1_m3,a_sq_p1_m3);
+    xmm1 = _mm_adds_epi16(xmm0,y0_m_1_3);
+    bit_met_p1_m3= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_p3_p1,a_sq_p3_p1);
+    xmm1 = _mm_adds_epi16(xmm0,y0_p_3_1);
+    bit_met_p3_p1= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_p3_p3,a_sq_p3_p3);
+    xmm1 = _mm_adds_epi16(xmm0,y0_p_3_3);
+    bit_met_p3_p3= _mm_subs_epi16(xmm1,ch_mag_9_over_10);
+
+    xmm0 = _mm_subs_epi16(psi_a_p3_m1,a_sq_p3_m1);
+    xmm1 = _mm_adds_epi16(xmm0,y0_m_3_1);
+    bit_met_p3_m1= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_p3_m3,a_sq_p3_m3);
+    xmm1 = _mm_adds_epi16(xmm0,y0_m_3_3);
+    bit_met_p3_m3= _mm_subs_epi16(xmm1,ch_mag_9_over_10);
+
+    xmm0 = _mm_subs_epi16(psi_a_m1_p1,a_sq_m1_p1);
+    xmm1 = _mm_subs_epi16(xmm0,y0_m_1_1);
+    bit_met_m1_p1= _mm_subs_epi16(xmm1,ch_mag_over_10);
+
+    xmm0 = _mm_subs_epi16(psi_a_m1_p3,a_sq_m1_p3);
+    xmm1 = _mm_subs_epi16(xmm0,y0_m_1_3);
+    bit_met_m1_p3= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_m1_m1,a_sq_m1_m1);
+    xmm1 = _mm_subs_epi16(xmm0,y0_p_1_1);
+    bit_met_m1_m1= _mm_subs_epi16(xmm1,ch_mag_over_10);
+
+    xmm0 = _mm_subs_epi16(psi_a_m1_m3,a_sq_m1_m3);
+    xmm1 = _mm_subs_epi16(xmm0,y0_p_1_3);
+    bit_met_m1_m3= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_m3_p1,a_sq_m3_p1);
+    xmm1 = _mm_subs_epi16(xmm0,y0_m_3_1);
+    bit_met_m3_p1= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_m3_p3,a_sq_m3_p3);
+    xmm1 = _mm_subs_epi16(xmm0,y0_m_3_3);
+    bit_met_m3_p3= _mm_subs_epi16(xmm1,ch_mag_9_over_10);
+
+    xmm0 = _mm_subs_epi16(psi_a_m3_m1,a_sq_m3_m1);
+    xmm1 = _mm_subs_epi16(xmm0,y0_p_3_1);
+    bit_met_m3_m1= _mm_subs_epi16(xmm1,ch_mag_over_2);
+
+    xmm0 = _mm_subs_epi16(psi_a_m3_m3,a_sq_m3_m3);
+    xmm1 = _mm_subs_epi16(xmm0,y0_p_3_3);
+    bit_met_m3_m3= _mm_subs_epi16(xmm1,ch_mag_9_over_10);
+
+    // LLR of the first bit
+    // Bit = 1
+    xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3);
+    xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3);
+    xmm2 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_num_re0= _mm_max_epi16(xmm4,xmm5);
+
+    // Bit = 0
+    xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3);
+    xmm1 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3);
+    xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4,xmm5);
+
+    // LLR of first bit [L1(1), L1(2), L1(3), L1(4), L1(5), L1(6), L1(7), L1(8)]
+    y0r = _mm_subs_epi16(logmax_den_re0,logmax_num_re0);
+
+    // LLR of the second bit
+    // Bit = 1
+    xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1);
+    xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1);
+    xmm2 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3);
+    xmm3 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_num_re1 = _mm_max_epi16(xmm4,xmm5);
+
+    // Bit = 0
+    xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1);
+    xmm1 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1);
+    xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_den_re1 = _mm_max_epi16(xmm4,xmm5);
+
+    // LLR of second bit [L2(1), L2(2), L2(3), L2(4)]
+    y1r = _mm_subs_epi16(logmax_den_re1,logmax_num_re1);
+
+    // LLR of the third bit
+    // Bit = 1
+    xmm0 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3);
+    xmm1 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3);
+    xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_num_im0 = _mm_max_epi16(xmm4,xmm5);
+
+    // Bit = 0
+    xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3);
+    xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3);
+    xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3);
+    xmm3 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_den_im0 = _mm_max_epi16(xmm4,xmm5);
+
+    // LLR of third bit [L3(1), L3(2), L3(3), L3(4)]
+    y0i = _mm_subs_epi16(logmax_den_im0,logmax_num_im0);
+
+    // LLR of the fourth bit
+    // Bit = 1
+    xmm0 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3);
+    xmm1 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3);
+    xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_num_im1 = _mm_max_epi16(xmm4,xmm5);
+
+    // Bit = 0
+    xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1);
+    xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1);
+    xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1);
+    xmm3 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1);
+    xmm4 = _mm_max_epi16(xmm0,xmm1);
+    xmm5 = _mm_max_epi16(xmm2,xmm3);
+    logmax_den_im1 = _mm_max_epi16(xmm4,xmm5);
+
+    // LLR of fourth bit [L4(1), L4(2), L4(3), L4(4)]
+    y1i = _mm_subs_epi16(logmax_den_im1,logmax_num_im1);
+
+    // Pack LLRs in output
+    // [L1(1), L2(1), L1(2), L2(2), L1(3), L2(3), L1(4), L2(4)]
+    xmm0 = _mm_unpacklo_epi16(y0r,y1r);
+    // [L1(5), L2(5), L1(6), L2(6), L1(7), L2(7), L1(8), L2(8)]
+    xmm1 = _mm_unpackhi_epi16(y0r,y1r);
+    // [L3(1), L4(1), L3(2), L4(2), L3(3), L4(3), L3(4), L4(4)]
+    xmm2 = _mm_unpacklo_epi16(y0i,y1i);
+    // [L3(5), L4(5), L3(6), L4(6), L3(7), L4(7), L3(8), L4(8)]
+    xmm3 = _mm_unpackhi_epi16(y0i,y1i);
+
+    stream0_128i_out[2*i+0] = _mm_unpacklo_epi32(xmm0,xmm2); // 8LLRs, 2REs
+    stream0_128i_out[2*i+1] = _mm_unpackhi_epi32(xmm0,xmm2);
+    stream0_128i_out[2*i+2] = _mm_unpacklo_epi32(xmm1,xmm3);
+    stream0_128i_out[2*i+3] = _mm_unpackhi_epi32(xmm1,xmm3);
+#elif defined(__arm__)
+
+#endif
+
+  }
+
+#if defined(__x86_64__) || defined(__i386__)
+  _mm_empty();
+  _m_empty();
+#endif
+}
+
+int dlsch_16qam_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
+                          int32_t **rxdataF_comp,
+                          int32_t **rxdataF_comp_i,
+                          int32_t **dl_ch_mag,   //|h_0|^2*(2/sqrt{10})
+                          int32_t **dl_ch_mag_i, //|h_1|^2*(2/sqrt{10})
+                          int32_t **rho_i,
+                          int16_t *dlsch_llr,
+                          uint8_t symbol,
+                          uint8_t first_symbol_flag,
+                          uint16_t nb_rb,
+                          uint16_t pbch_pss_sss_adjust,
+                          int16_t **llr16p)
+{
+
+  int16_t *rxF      = (int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rxF_i    = (int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *ch_mag   = (int16_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *ch_mag_i = (int16_t*)&dl_ch_mag_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rho      = (int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *llr16;
+  int len;
+  uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol;
+
+  // first symbol has different structure due to more pilots
+  if (first_symbol_flag == 1) {
+    llr16 = (int16_t*)dlsch_llr;
+  } else {
+    llr16 = (int16_t*)(*llr16p);
+  }
+
+
+  AssertFatal(llr16!=NULL,"dlsch_16qam_64qam_llr:llr is null, symbol %d\n",symbol);
+
+
+  if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) {
+    // if symbol has pilots
+    if (frame_parms->nb_antenna_ports_eNB!=1)
+      // in 2 antenna ports we have 8 REs per symbol per RB
+      len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+    else
+      // for 1 antenna port we have 10 REs per symbol per RB
+      len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6);
+  } else {
+    // symbol has no pilots
+    len = (nb_rb*12) - pbch_pss_sss_adjust;
+  }
+
+  // printf("symbol %d: qam16_llr, len %d (llr16 %p)\n",symbol,len,llr16);
+
+  qam16_qam64((short *)rxF,
+              (short *)rxF_i,
+              (short *)ch_mag,
+              (short *)ch_mag_i,
+              (short *)llr16,
+              (short *)rho,
+              len);
+
+  llr16 += (len<<2);
+  *llr16p = (short *)llr16;
+
+  return(0);
+}
+
+//----------------------------------------------------------------------------------------------
+// 64-QAM
+//----------------------------------------------------------------------------------------------
+
+/*
+__m128i ONE_OVER_SQRT_42 __attribute__((aligned(16)));
+__m128i THREE_OVER_SQRT_42 __attribute__((aligned(16)));
+__m128i FIVE_OVER_SQRT_42 __attribute__((aligned(16)));
+__m128i SEVEN_OVER_SQRT_42 __attribute__((aligned(16)));
+
+__m128i FORTYNINE_OVER_FOUR_SQRT_42 __attribute__((aligned(16)));
+__m128i THIRTYSEVEN_OVER_FOUR_SQRT_42 __attribute__((aligned(16)));
+__m128i TWENTYNINE_OVER_FOUR_SQRT_42 __attribute__((aligned(16)));
+__m128i TWENTYFIVE_OVER_FOUR_SQRT_42 __attribute__((aligned(16)));
+__m128i SEVENTEEN_OVER_FOUR_SQRT_42 __attribute__((aligned(16)));
+__m128i NINE_OVER_FOUR_SQRT_42 __attribute__((aligned(16)));
+__m128i THIRTEEN_OVER_FOUR_SQRT_42 __attribute__((aligned(16)));
+__m128i FIVE_OVER_FOUR_SQRT_42 __attribute__((aligned(16)));
+__m128i ONE_OVER_FOUR_SQRT_42 __attribute__((aligned(16)));
+
+__m128i  y0r_one_over_sqrt_21 __attribute__((aligned(16)));
+__m128i  y0r_three_over_sqrt_21 __attribute__((aligned(16)));
+__m128i  y0r_five_over_sqrt_21 __attribute__((aligned(16)));
+__m128i  y0r_seven_over_sqrt_21 __attribute__((aligned(16)));
+__m128i  y0i_one_over_sqrt_21 __attribute__((aligned(16)));
+__m128i  y0i_three_over_sqrt_21 __attribute__((aligned(16)));
+__m128i  y0i_five_over_sqrt_21 __attribute__((aligned(16)));
+__m128i  y0i_seven_over_sqrt_21 __attribute__((aligned(16)));
+
+__m128i ch_mag_98_over_42_with_sigma2 __attribute__((aligned(16)));
+__m128i ch_mag_74_over_42_with_sigma2 __attribute__((aligned(16)));
+__m128i ch_mag_58_over_42_with_sigma2 __attribute__((aligned(16)));
+__m128i ch_mag_50_over_42_with_sigma2 __attribute__((aligned(16)));
+__m128i ch_mag_34_over_42_with_sigma2 __attribute__((aligned(16)));
+__m128i ch_mag_18_over_42_with_sigma2 __attribute__((aligned(16)));
+__m128i ch_mag_26_over_42_with_sigma2 __attribute__((aligned(16)));
+__m128i ch_mag_10_over_42_with_sigma2 __attribute__((aligned(16)));
+__m128i ch_mag_2_over_42_with_sigma2 __attribute__((aligned(16)));
+
+*/
+
+void qam64_qpsk(int16_t *stream0_in,
+                int16_t *stream1_in,
+                int16_t *ch_mag,
+                int16_t *stream0_out,
+                int16_t *rho01,
+                int32_t length
+    )
+{
+
+  /*
+    Author: S. Wagner
+    Date: 31-07-12
+
+    Input:
+    stream0_in:  MF filter for 1st stream, i.e., y0=h0'*y
+    stream1_in:  MF filter for 2nd stream, i.e., y1=h1'*y
+    ch_mag:      4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    ch_mag_i:    4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    rho01:       Channel cross correlation, i.e., h1'*h0
+
+    Output:
+    stream0_out: output LLRs for 1st stream
+  */
+
+#if defined(__x86_64__) || defined(__i386__)
+  __m128i *rho01_128i      = (__m128i *)rho01;
+  __m128i *stream0_128i_in = (__m128i *)stream0_in;
+  __m128i *stream1_128i_in = (__m128i *)stream1_in;
+  __m128i *ch_mag_128i     = (__m128i *)ch_mag;
+
+
+  __m128i ONE_OVER_SQRT_42 = _mm_set1_epi16(10112); // round(1/sqrt(42)*2^16)
+  __m128i THREE_OVER_SQRT_42 = _mm_set1_epi16(30337); // round(3/sqrt(42)*2^16)
+  __m128i FIVE_OVER_SQRT_42 = _mm_set1_epi16(25281); // round(5/sqrt(42)*2^15)
+  __m128i SEVEN_OVER_SQRT_42 = _mm_set1_epi16(17697); // round(5/sqrt(42)*2^15)
+  __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15)
+  __m128i FORTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(30969); // round(49/(4*sqrt(42))*2^14), Q2.14
+  __m128i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(23385); // round(37/(4*sqrt(42))*2^14), Q2.14
+  __m128i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(31601); // round(25/(4*sqrt(42))*2^15)
+  __m128i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(18329); // round(29/(4*sqrt(42))*2^15), Q2.14
+  __m128i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(21489); // round(17/(4*sqrt(42))*2^15)
+  __m128i NINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(11376); // round(9/(4*sqrt(42))*2^15)
+  __m128i THIRTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(16433); // round(13/(4*sqrt(42))*2^15)
+  __m128i FIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(6320); // round(5/(4*sqrt(42))*2^15)
+  __m128i ONE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(1264); // round(1/(4*sqrt(42))*2^15)
+
+
+  __m128i ch_mag_des;
+  __m128i ch_mag_98_over_42_with_sigma2;
+  __m128i ch_mag_74_over_42_with_sigma2;
+  __m128i ch_mag_58_over_42_with_sigma2;
+  __m128i ch_mag_50_over_42_with_sigma2;
+  __m128i ch_mag_34_over_42_with_sigma2;
+  __m128i ch_mag_18_over_42_with_sigma2;
+  __m128i ch_mag_26_over_42_with_sigma2;
+  __m128i ch_mag_10_over_42_with_sigma2;
+  __m128i ch_mag_2_over_42_with_sigma2;
+  __m128i  y0r_one_over_sqrt_21;
+  __m128i  y0r_three_over_sqrt_21;
+  __m128i  y0r_five_over_sqrt_21;
+  __m128i  y0r_seven_over_sqrt_21;
+  __m128i  y0i_one_over_sqrt_21;
+  __m128i  y0i_three_over_sqrt_21;
+  __m128i  y0i_five_over_sqrt_21;
+  __m128i  y0i_seven_over_sqrt_21;
+#elif defined(__arm__)
+
+#endif
+
+  int i,j;
+
+  for (i=0; i<length>>2; i+=2) {
+
+#if defined(__x86_64) || defined(__i386__)
+    // Get rho
+    xmm0 = rho01_128i[i];
+    xmm1 = rho01_128i[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho)
+    xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho)
+    rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho)
+    rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho)
+
+    // Compute the different rhos
+    rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42);
+    rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42);
+    rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42);
+    rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42);
+    rho_rpi_5_5 = _mm_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42);
+    rho_rmi_5_5 = _mm_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42);
+    rho_rpi_7_7 = _mm_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42);
+    rho_rmi_7_7 = _mm_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42);
+
+    rho_rpi_5_5 = _mm_slli_epi16(rho_rpi_5_5, 1);
+    rho_rmi_5_5 = _mm_slli_epi16(rho_rmi_5_5, 1);
+    rho_rpi_7_7 = _mm_slli_epi16(rho_rpi_7_7, 2);
+    rho_rmi_7_7 = _mm_slli_epi16(rho_rmi_7_7, 2);
+
+    xmm4 = _mm_mulhi_epi16(xmm2, ONE_OVER_SQRT_42);
+    xmm5 = _mm_mulhi_epi16(xmm3, ONE_OVER_SQRT_42);
+    xmm6 = _mm_mulhi_epi16(xmm3, THREE_OVER_SQRT_42);
+    xmm7 = _mm_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42);
+    xmm8 = _mm_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42);
+    xmm7 = _mm_slli_epi16(xmm7, 1);
+    xmm8 = _mm_slli_epi16(xmm8, 2);
+
+    rho_rpi_1_3 = _mm_adds_epi16(xmm4, xmm6);
+    rho_rmi_1_3 = _mm_subs_epi16(xmm4, xmm6);
+    rho_rpi_1_5 = _mm_adds_epi16(xmm4, xmm7);
+    rho_rmi_1_5 = _mm_subs_epi16(xmm4, xmm7);
+    rho_rpi_1_7 = _mm_adds_epi16(xmm4, xmm8);
+    rho_rmi_1_7 = _mm_subs_epi16(xmm4, xmm8);
+
+    xmm4 = _mm_mulhi_epi16(xmm2, THREE_OVER_SQRT_42);
+    rho_rpi_3_1 = _mm_adds_epi16(xmm4, xmm5);
+    rho_rmi_3_1 = _mm_subs_epi16(xmm4, xmm5);
+    rho_rpi_3_5 = _mm_adds_epi16(xmm4, xmm7);
+    rho_rmi_3_5 = _mm_subs_epi16(xmm4, xmm7);
+    rho_rpi_3_7 = _mm_adds_epi16(xmm4, xmm8);
+    rho_rmi_3_7 = _mm_subs_epi16(xmm4, xmm8);
+
+    xmm4 = _mm_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42);
+    xmm4 = _mm_slli_epi16(xmm4, 1);
+    rho_rpi_5_1 = _mm_adds_epi16(xmm4, xmm5);
+    rho_rmi_5_1 = _mm_subs_epi16(xmm4, xmm5);
+    rho_rpi_5_3 = _mm_adds_epi16(xmm4, xmm6);
+    rho_rmi_5_3 = _mm_subs_epi16(xmm4, xmm6);
+    rho_rpi_5_7 = _mm_adds_epi16(xmm4, xmm8);
+    rho_rmi_5_7 = _mm_subs_epi16(xmm4, xmm8);
+
+    xmm4 = _mm_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42);
+    xmm4 = _mm_slli_epi16(xmm4, 2);
+    rho_rpi_7_1 = _mm_adds_epi16(xmm4, xmm5);
+    rho_rmi_7_1 = _mm_subs_epi16(xmm4, xmm5);
+    rho_rpi_7_3 = _mm_adds_epi16(xmm4, xmm6);
+    rho_rmi_7_3 = _mm_subs_epi16(xmm4, xmm6);
+    rho_rpi_7_5 = _mm_adds_epi16(xmm4, xmm7);
+    rho_rmi_7_5 = _mm_subs_epi16(xmm4, xmm7);
+
+    // Rearrange interfering MF output
+    xmm0 = stream1_128i_in[i];
+    xmm1 = stream1_128i_in[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
+    y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
+
+    // Psi_r calculation from rho_rpi or rho_rmi
+    xmm0 = _mm_setzero_si128(); // ZERO for abs_pi16
+    xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1r);
+    psi_r_p7_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1r);
+    psi_r_p7_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1r);
+    psi_r_p7_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1r);
+    psi_r_p7_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1r);
+    psi_r_p7_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1r);
+    psi_r_p7_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1r);
+    psi_r_p7_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1r);
+    psi_r_p7_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1r);
+    psi_r_p5_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1r);
+    psi_r_p5_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1r);
+    psi_r_p5_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1r);
+    psi_r_p5_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1r);
+    psi_r_p5_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1r);
+    psi_r_p5_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1r);
+    psi_r_p5_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1r);
+    psi_r_p5_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1r);
+    psi_r_p3_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1r);
+    psi_r_p3_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1r);
+    psi_r_p3_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1r);
+    psi_r_p3_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1r);
+    psi_r_p3_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1r);
+    psi_r_p3_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1r);
+    psi_r_p3_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1r);
+    psi_r_p3_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1r);
+    psi_r_p1_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1r);
+    psi_r_p1_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1r);
+    psi_r_p1_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1r);
+    psi_r_p1_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1r);
+    psi_r_p1_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1r);
+    psi_r_p1_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1r);
+    psi_r_p1_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1r);
+    psi_r_p1_m7 = _mm_abs_epi16(xmm2);
+
+    xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1r);
+    psi_r_m1_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1r);
+    psi_r_m1_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1r);
+    psi_r_m1_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1r);
+    psi_r_m1_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1r);
+    psi_r_m1_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1r);
+    psi_r_m1_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1r);
+    psi_r_m1_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1r);
+    psi_r_m1_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1r);
+    psi_r_m3_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1r);
+    psi_r_m3_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1r);
+    psi_r_m3_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1r);
+    psi_r_m3_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1r);
+    psi_r_m3_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1r);
+    psi_r_m3_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1r);
+    psi_r_m3_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1r);
+    psi_r_m3_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1r);
+    psi_r_m5_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1r);
+    psi_r_m5_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1r);
+    psi_r_m5_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1r);
+    psi_r_m5_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1r);
+    psi_r_m5_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1r);
+    psi_r_m5_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1r);
+    psi_r_m5_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1r);
+    psi_r_m5_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1r);
+    psi_r_m7_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1r);
+    psi_r_m7_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1r);
+    psi_r_m7_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1r);
+    psi_r_m7_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1r);
+    psi_r_m7_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1r);
+    psi_r_m7_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1r);
+    psi_r_m7_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1r);
+    psi_r_m7_m7 = _mm_abs_epi16(xmm2);
+
+    // Psi_i calculation from rho_rpi or rho_rmi
+    xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1i);
+    psi_i_p7_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1i);
+    psi_i_p7_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1i);
+    psi_i_p7_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1i);
+    psi_i_p7_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1i);
+    psi_i_p7_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1i);
+    psi_i_p7_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1i);
+    psi_i_p7_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1i);
+    psi_i_p7_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1i);
+    psi_i_p5_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1i);
+    psi_i_p5_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1i);
+    psi_i_p5_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1i);
+    psi_i_p5_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1i);
+    psi_i_p5_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1i);
+    psi_i_p5_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1i);
+    psi_i_p5_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1i);
+    psi_i_p5_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1i);
+    psi_i_p3_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1i);
+    psi_i_p3_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1i);
+    psi_i_p3_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1i);
+    psi_i_p3_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1i);
+    psi_i_p3_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1i);
+    psi_i_p3_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1i);
+    psi_i_p3_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1i);
+    psi_i_p3_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1i);
+    psi_i_p1_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1i);
+    psi_i_p1_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1i);
+    psi_i_p1_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1i);
+    psi_i_p1_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1i);
+    psi_i_p1_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1i);
+    psi_i_p1_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1i);
+    psi_i_p1_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1i);
+    psi_i_p1_m7 = _mm_abs_epi16(xmm2);
+
+    xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1i);
+    psi_i_m1_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1i);
+    psi_i_m1_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1i);
+    psi_i_m1_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1i);
+    psi_i_m1_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1i);
+    psi_i_m1_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1i);
+    psi_i_m1_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1i);
+    psi_i_m1_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1i);
+    psi_i_m1_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1i);
+    psi_i_m3_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1i);
+    psi_i_m3_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1i);
+    psi_i_m3_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1i);
+    psi_i_m3_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1i);
+    psi_i_m3_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1i);
+    psi_i_m3_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1i);
+    psi_i_m3_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1i);
+    psi_i_m3_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1i);
+    psi_i_m5_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1i);
+    psi_i_m5_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1i);
+    psi_i_m5_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1i);
+    psi_i_m5_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1i);
+    psi_i_m5_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1i);
+    psi_i_m5_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1i);
+    psi_i_m5_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1i);
+    psi_i_m5_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1i);
+    psi_i_m7_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1i);
+    psi_i_m7_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1i);
+    psi_i_m7_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1i);
+    psi_i_m7_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1i);
+    psi_i_m7_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1i);
+    psi_i_m7_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1i);
+    psi_i_m7_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1i);
+    psi_i_m7_m7 = _mm_abs_epi16(xmm2);
+
+
+    // Rearrange desired MF output
+    xmm0 = stream0_128i_in[i];
+    xmm1 = stream0_128i_in[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)]
+    y0i = _mm_unpackhi_epi64(xmm0,xmm1);
+
+    // Rearrange desired channel magnitudes
+    xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10))
+    xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10))
+    xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3);
+
+    y0r_one_over_sqrt_21   = _mm_mulhi_epi16(y0r, ONE_OVER_SQRT_42);
+    y0r_three_over_sqrt_21 = _mm_mulhi_epi16(y0r, THREE_OVER_SQRT_42);
+    y0r_five_over_sqrt_21  = _mm_mulhi_epi16(y0r, FIVE_OVER_SQRT_42);
+    y0r_five_over_sqrt_21  = _mm_slli_epi16(y0r_five_over_sqrt_21, 1);
+    y0r_seven_over_sqrt_21 = _mm_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42);
+    y0r_seven_over_sqrt_21 = _mm_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14
+
+    y0i_one_over_sqrt_21   = _mm_mulhi_epi16(y0i, ONE_OVER_SQRT_42);
+    y0i_three_over_sqrt_21 = _mm_mulhi_epi16(y0i, THREE_OVER_SQRT_42);
+    y0i_five_over_sqrt_21  = _mm_mulhi_epi16(y0i, FIVE_OVER_SQRT_42);
+    y0i_five_over_sqrt_21  = _mm_slli_epi16(y0i_five_over_sqrt_21, 1);
+    y0i_seven_over_sqrt_21 = _mm_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42);
+    y0i_seven_over_sqrt_21 = _mm_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14
+
+    y0_p_7_1 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_7_3 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_7_5 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_7_7 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_5_1 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_5_3 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_5_5 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_5_7 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_3_5 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_3_7 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_1_1 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_1_3 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_1_5 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_1_7 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
+
+    y0_m_1_1 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_1_3 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_1_5 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_1_7 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_3_5 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_3_7 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_5_1 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_5_3 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_5_5 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_5_7 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_7_1 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_7_3 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_7_5 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_7_7 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
+
+    // divide by sqrt(2)
+    psi_r_p7_p7 = _mm_mulhi_epi16(psi_r_p7_p7, ONE_OVER_SQRT_2);
+    psi_r_p7_p7 = _mm_slli_epi16(psi_r_p7_p7, 1);
+    psi_r_p7_p5 = _mm_mulhi_epi16(psi_r_p7_p5, ONE_OVER_SQRT_2);
+    psi_r_p7_p5 = _mm_slli_epi16(psi_r_p7_p5, 1);
+    psi_r_p7_p3 = _mm_mulhi_epi16(psi_r_p7_p3, ONE_OVER_SQRT_2);
+    psi_r_p7_p3 = _mm_slli_epi16(psi_r_p7_p3, 1);
+    psi_r_p7_p1 = _mm_mulhi_epi16(psi_r_p7_p1, ONE_OVER_SQRT_2);
+    psi_r_p7_p1 = _mm_slli_epi16(psi_r_p7_p1, 1);
+    psi_r_p7_m1 = _mm_mulhi_epi16(psi_r_p7_m1, ONE_OVER_SQRT_2);
+    psi_r_p7_m1 = _mm_slli_epi16(psi_r_p7_m1, 1);
+    psi_r_p7_m3 = _mm_mulhi_epi16(psi_r_p7_m3, ONE_OVER_SQRT_2);
+    psi_r_p7_m3 = _mm_slli_epi16(psi_r_p7_m3, 1);
+    psi_r_p7_m5 = _mm_mulhi_epi16(psi_r_p7_m5, ONE_OVER_SQRT_2);
+    psi_r_p7_m5 = _mm_slli_epi16(psi_r_p7_m5, 1);
+    psi_r_p7_m7 = _mm_mulhi_epi16(psi_r_p7_m7, ONE_OVER_SQRT_2);
+    psi_r_p7_m7 = _mm_slli_epi16(psi_r_p7_m7, 1);
+    psi_r_p5_p7 = _mm_mulhi_epi16(psi_r_p5_p7, ONE_OVER_SQRT_2);
+    psi_r_p5_p7 = _mm_slli_epi16(psi_r_p5_p7, 1);
+    psi_r_p5_p5 = _mm_mulhi_epi16(psi_r_p5_p5, ONE_OVER_SQRT_2);
+    psi_r_p5_p5 = _mm_slli_epi16(psi_r_p5_p5, 1);
+    psi_r_p5_p3 = _mm_mulhi_epi16(psi_r_p5_p3, ONE_OVER_SQRT_2);
+    psi_r_p5_p3 = _mm_slli_epi16(psi_r_p5_p3, 1);
+    psi_r_p5_p1 = _mm_mulhi_epi16(psi_r_p5_p1, ONE_OVER_SQRT_2);
+    psi_r_p5_p1 = _mm_slli_epi16(psi_r_p5_p1, 1);
+    psi_r_p5_m1 = _mm_mulhi_epi16(psi_r_p5_m1, ONE_OVER_SQRT_2);
+    psi_r_p5_m1 = _mm_slli_epi16(psi_r_p5_m1, 1);
+    psi_r_p5_m3 = _mm_mulhi_epi16(psi_r_p5_m3, ONE_OVER_SQRT_2);
+    psi_r_p5_m3 = _mm_slli_epi16(psi_r_p5_m3, 1);
+    psi_r_p5_m5 = _mm_mulhi_epi16(psi_r_p5_m5, ONE_OVER_SQRT_2);
+    psi_r_p5_m5 = _mm_slli_epi16(psi_r_p5_m5, 1);
+    psi_r_p5_m7 = _mm_mulhi_epi16(psi_r_p5_m7, ONE_OVER_SQRT_2);
+    psi_r_p5_m7 = _mm_slli_epi16(psi_r_p5_m7, 1);
+    psi_r_p3_p7 = _mm_mulhi_epi16(psi_r_p3_p7, ONE_OVER_SQRT_2);
+    psi_r_p3_p7 = _mm_slli_epi16(psi_r_p3_p7, 1);
+    psi_r_p3_p5 = _mm_mulhi_epi16(psi_r_p3_p5, ONE_OVER_SQRT_2);
+    psi_r_p3_p5 = _mm_slli_epi16(psi_r_p3_p5, 1);
+    psi_r_p3_p3 = _mm_mulhi_epi16(psi_r_p3_p3, ONE_OVER_SQRT_2);
+    psi_r_p3_p3 = _mm_slli_epi16(psi_r_p3_p3, 1);
+    psi_r_p3_p1 = _mm_mulhi_epi16(psi_r_p3_p1, ONE_OVER_SQRT_2);
+    psi_r_p3_p1 = _mm_slli_epi16(psi_r_p3_p1, 1);
+    psi_r_p3_m1 = _mm_mulhi_epi16(psi_r_p3_m1, ONE_OVER_SQRT_2);
+    psi_r_p3_m1 = _mm_slli_epi16(psi_r_p3_m1, 1);
+    psi_r_p3_m3 = _mm_mulhi_epi16(psi_r_p3_m3, ONE_OVER_SQRT_2);
+    psi_r_p3_m3 = _mm_slli_epi16(psi_r_p3_m3, 1);
+    psi_r_p3_m5 = _mm_mulhi_epi16(psi_r_p3_m5, ONE_OVER_SQRT_2);
+    psi_r_p3_m5 = _mm_slli_epi16(psi_r_p3_m5, 1);
+    psi_r_p3_m7 = _mm_mulhi_epi16(psi_r_p3_m7, ONE_OVER_SQRT_2);
+    psi_r_p3_m7 = _mm_slli_epi16(psi_r_p3_m7, 1);
+    psi_r_p1_p7 = _mm_mulhi_epi16(psi_r_p1_p7, ONE_OVER_SQRT_2);
+    psi_r_p1_p7 = _mm_slli_epi16(psi_r_p1_p7, 1);
+    psi_r_p1_p5 = _mm_mulhi_epi16(psi_r_p1_p5, ONE_OVER_SQRT_2);
+    psi_r_p1_p5 = _mm_slli_epi16(psi_r_p1_p5, 1);
+    psi_r_p1_p3 = _mm_mulhi_epi16(psi_r_p1_p3, ONE_OVER_SQRT_2);
+    psi_r_p1_p3 = _mm_slli_epi16(psi_r_p1_p3, 1);
+    psi_r_p1_p1 = _mm_mulhi_epi16(psi_r_p1_p1, ONE_OVER_SQRT_2);
+    psi_r_p1_p1 = _mm_slli_epi16(psi_r_p1_p1, 1);
+    psi_r_p1_m1 = _mm_mulhi_epi16(psi_r_p1_m1, ONE_OVER_SQRT_2);
+    psi_r_p1_m1 = _mm_slli_epi16(psi_r_p1_m1, 1);
+    psi_r_p1_m3 = _mm_mulhi_epi16(psi_r_p1_m3, ONE_OVER_SQRT_2);
+    psi_r_p1_m3 = _mm_slli_epi16(psi_r_p1_m3, 1);
+    psi_r_p1_m5 = _mm_mulhi_epi16(psi_r_p1_m5, ONE_OVER_SQRT_2);
+    psi_r_p1_m5 = _mm_slli_epi16(psi_r_p1_m5, 1);
+    psi_r_p1_m7 = _mm_mulhi_epi16(psi_r_p1_m7, ONE_OVER_SQRT_2);
+    psi_r_p1_m7 = _mm_slli_epi16(psi_r_p1_m7, 1);
+    psi_r_m1_p7 = _mm_mulhi_epi16(psi_r_m1_p7, ONE_OVER_SQRT_2);
+    psi_r_m1_p7 = _mm_slli_epi16(psi_r_m1_p7, 1);
+    psi_r_m1_p5 = _mm_mulhi_epi16(psi_r_m1_p5, ONE_OVER_SQRT_2);
+    psi_r_m1_p5 = _mm_slli_epi16(psi_r_m1_p5, 1);
+    psi_r_m1_p3 = _mm_mulhi_epi16(psi_r_m1_p3, ONE_OVER_SQRT_2);
+    psi_r_m1_p3 = _mm_slli_epi16(psi_r_m1_p3, 1);
+    psi_r_m1_p1 = _mm_mulhi_epi16(psi_r_m1_p1, ONE_OVER_SQRT_2);
+    psi_r_m1_p1 = _mm_slli_epi16(psi_r_m1_p1, 1);
+    psi_r_m1_m1 = _mm_mulhi_epi16(psi_r_m1_m1, ONE_OVER_SQRT_2);
+    psi_r_m1_m1 = _mm_slli_epi16(psi_r_m1_m1, 1);
+    psi_r_m1_m3 = _mm_mulhi_epi16(psi_r_m1_m3, ONE_OVER_SQRT_2);
+    psi_r_m1_m3 = _mm_slli_epi16(psi_r_m1_m3, 1);
+    psi_r_m1_m5 = _mm_mulhi_epi16(psi_r_m1_m5, ONE_OVER_SQRT_2);
+    psi_r_m1_m5 = _mm_slli_epi16(psi_r_m1_m5, 1);
+    psi_r_m1_m7 = _mm_mulhi_epi16(psi_r_m1_m7, ONE_OVER_SQRT_2);
+    psi_r_m1_m7 = _mm_slli_epi16(psi_r_m1_m7, 1);
+    psi_r_m3_p7 = _mm_mulhi_epi16(psi_r_m3_p7, ONE_OVER_SQRT_2);
+    psi_r_m3_p7 = _mm_slli_epi16(psi_r_m3_p7, 1);
+    psi_r_m3_p5 = _mm_mulhi_epi16(psi_r_m3_p5, ONE_OVER_SQRT_2);
+    psi_r_m3_p5 = _mm_slli_epi16(psi_r_m3_p5, 1);
+    psi_r_m3_p3 = _mm_mulhi_epi16(psi_r_m3_p3, ONE_OVER_SQRT_2);
+    psi_r_m3_p3 = _mm_slli_epi16(psi_r_m3_p3, 1);
+    psi_r_m3_p1 = _mm_mulhi_epi16(psi_r_m3_p1, ONE_OVER_SQRT_2);
+    psi_r_m3_p1 = _mm_slli_epi16(psi_r_m3_p1, 1);
+    psi_r_m3_m1 = _mm_mulhi_epi16(psi_r_m3_m1, ONE_OVER_SQRT_2);
+    psi_r_m3_m1 = _mm_slli_epi16(psi_r_m3_m1, 1);
+    psi_r_m3_m3 = _mm_mulhi_epi16(psi_r_m3_m3, ONE_OVER_SQRT_2);
+    psi_r_m3_m3 = _mm_slli_epi16(psi_r_m3_m3, 1);
+    psi_r_m3_m5 = _mm_mulhi_epi16(psi_r_m3_m5, ONE_OVER_SQRT_2);
+    psi_r_m3_m5 = _mm_slli_epi16(psi_r_m3_m5, 1);
+    psi_r_m3_m7 = _mm_mulhi_epi16(psi_r_m3_m7, ONE_OVER_SQRT_2);
+    psi_r_m3_m7 = _mm_slli_epi16(psi_r_m3_m7, 1);
+    psi_r_m5_p7 = _mm_mulhi_epi16(psi_r_m5_p7, ONE_OVER_SQRT_2);
+    psi_r_m5_p7 = _mm_slli_epi16(psi_r_m5_p7, 1);
+    psi_r_m5_p5 = _mm_mulhi_epi16(psi_r_m5_p5, ONE_OVER_SQRT_2);
+    psi_r_m5_p5 = _mm_slli_epi16(psi_r_m5_p5, 1);
+    psi_r_m5_p3 = _mm_mulhi_epi16(psi_r_m5_p3, ONE_OVER_SQRT_2);
+    psi_r_m5_p3 = _mm_slli_epi16(psi_r_m5_p3, 1);
+    psi_r_m5_p1 = _mm_mulhi_epi16(psi_r_m5_p1, ONE_OVER_SQRT_2);
+    psi_r_m5_p1 = _mm_slli_epi16(psi_r_m5_p1, 1);
+    psi_r_m5_m1 = _mm_mulhi_epi16(psi_r_m5_m1, ONE_OVER_SQRT_2);
+    psi_r_m5_m1 = _mm_slli_epi16(psi_r_m5_m1, 1);
+    psi_r_m5_m3 = _mm_mulhi_epi16(psi_r_m5_m3, ONE_OVER_SQRT_2);
+    psi_r_m5_m3 = _mm_slli_epi16(psi_r_m5_m3, 1);
+    psi_r_m5_m5 = _mm_mulhi_epi16(psi_r_m5_m5, ONE_OVER_SQRT_2);
+    psi_r_m5_m5 = _mm_slli_epi16(psi_r_m5_m5, 1);
+    psi_r_m5_m7 = _mm_mulhi_epi16(psi_r_m5_m7, ONE_OVER_SQRT_2);
+    psi_r_m5_m7 = _mm_slli_epi16(psi_r_m5_m7, 1);
+    psi_r_m7_p7 = _mm_mulhi_epi16(psi_r_m7_p7, ONE_OVER_SQRT_2);
+    psi_r_m7_p7 = _mm_slli_epi16(psi_r_m7_p7, 1);
+    psi_r_m7_p5 = _mm_mulhi_epi16(psi_r_m7_p5, ONE_OVER_SQRT_2);
+    psi_r_m7_p5 = _mm_slli_epi16(psi_r_m7_p5, 1);
+    psi_r_m7_p3 = _mm_mulhi_epi16(psi_r_m7_p3, ONE_OVER_SQRT_2);
+    psi_r_m7_p3 = _mm_slli_epi16(psi_r_m7_p3, 1);
+    psi_r_m7_p1 = _mm_mulhi_epi16(psi_r_m7_p1, ONE_OVER_SQRT_2);
+    psi_r_m7_p1 = _mm_slli_epi16(psi_r_m7_p1, 1);
+    psi_r_m7_m1 = _mm_mulhi_epi16(psi_r_m7_m1, ONE_OVER_SQRT_2);
+    psi_r_m7_m1 = _mm_slli_epi16(psi_r_m7_m1, 1);
+    psi_r_m7_m3 = _mm_mulhi_epi16(psi_r_m7_m3, ONE_OVER_SQRT_2);
+    psi_r_m7_m3 = _mm_slli_epi16(psi_r_m7_m3, 1);
+    psi_r_m7_m5 = _mm_mulhi_epi16(psi_r_m7_m5, ONE_OVER_SQRT_2);
+    psi_r_m7_m5 = _mm_slli_epi16(psi_r_m7_m5, 1);
+    psi_r_m7_m7 = _mm_mulhi_epi16(psi_r_m7_m7, ONE_OVER_SQRT_2);
+    psi_r_m7_m7 = _mm_slli_epi16(psi_r_m7_m7, 1);
+
+    psi_i_p7_p7 = _mm_mulhi_epi16(psi_i_p7_p7, ONE_OVER_SQRT_2);
+    psi_i_p7_p7 = _mm_slli_epi16(psi_i_p7_p7, 1);
+    psi_i_p7_p5 = _mm_mulhi_epi16(psi_i_p7_p5, ONE_OVER_SQRT_2);
+    psi_i_p7_p5 = _mm_slli_epi16(psi_i_p7_p5, 1);
+    psi_i_p7_p3 = _mm_mulhi_epi16(psi_i_p7_p3, ONE_OVER_SQRT_2);
+    psi_i_p7_p3 = _mm_slli_epi16(psi_i_p7_p3, 1);
+    psi_i_p7_p1 = _mm_mulhi_epi16(psi_i_p7_p1, ONE_OVER_SQRT_2);
+    psi_i_p7_p1 = _mm_slli_epi16(psi_i_p7_p1, 1);
+    psi_i_p7_m1 = _mm_mulhi_epi16(psi_i_p7_m1, ONE_OVER_SQRT_2);
+    psi_i_p7_m1 = _mm_slli_epi16(psi_i_p7_m1, 1);
+    psi_i_p7_m3 = _mm_mulhi_epi16(psi_i_p7_m3, ONE_OVER_SQRT_2);
+    psi_i_p7_m3 = _mm_slli_epi16(psi_i_p7_m3, 1);
+    psi_i_p7_m5 = _mm_mulhi_epi16(psi_i_p7_m5, ONE_OVER_SQRT_2);
+    psi_i_p7_m5 = _mm_slli_epi16(psi_i_p7_m5, 1);
+    psi_i_p7_m7 = _mm_mulhi_epi16(psi_i_p7_m7, ONE_OVER_SQRT_2);
+    psi_i_p7_m7 = _mm_slli_epi16(psi_i_p7_m7, 1);
+    psi_i_p5_p7 = _mm_mulhi_epi16(psi_i_p5_p7, ONE_OVER_SQRT_2);
+    psi_i_p5_p7 = _mm_slli_epi16(psi_i_p5_p7, 1);
+    psi_i_p5_p5 = _mm_mulhi_epi16(psi_i_p5_p5, ONE_OVER_SQRT_2);
+    psi_i_p5_p5 = _mm_slli_epi16(psi_i_p5_p5, 1);
+    psi_i_p5_p3 = _mm_mulhi_epi16(psi_i_p5_p3, ONE_OVER_SQRT_2);
+    psi_i_p5_p3 = _mm_slli_epi16(psi_i_p5_p3, 1);
+    psi_i_p5_p1 = _mm_mulhi_epi16(psi_i_p5_p1, ONE_OVER_SQRT_2);
+    psi_i_p5_p1 = _mm_slli_epi16(psi_i_p5_p1, 1);
+    psi_i_p5_m1 = _mm_mulhi_epi16(psi_i_p5_m1, ONE_OVER_SQRT_2);
+    psi_i_p5_m1 = _mm_slli_epi16(psi_i_p5_m1, 1);
+    psi_i_p5_m3 = _mm_mulhi_epi16(psi_i_p5_m3, ONE_OVER_SQRT_2);
+    psi_i_p5_m3 = _mm_slli_epi16(psi_i_p5_m3, 1);
+    psi_i_p5_m5 = _mm_mulhi_epi16(psi_i_p5_m5, ONE_OVER_SQRT_2);
+    psi_i_p5_m5 = _mm_slli_epi16(psi_i_p5_m5, 1);
+    psi_i_p5_m7 = _mm_mulhi_epi16(psi_i_p5_m7, ONE_OVER_SQRT_2);
+    psi_i_p5_m7 = _mm_slli_epi16(psi_i_p5_m7, 1);
+    psi_i_p3_p7 = _mm_mulhi_epi16(psi_i_p3_p7, ONE_OVER_SQRT_2);
+    psi_i_p3_p7 = _mm_slli_epi16(psi_i_p3_p7, 1);
+    psi_i_p3_p5 = _mm_mulhi_epi16(psi_i_p3_p5, ONE_OVER_SQRT_2);
+    psi_i_p3_p5 = _mm_slli_epi16(psi_i_p3_p5, 1);
+    psi_i_p3_p3 = _mm_mulhi_epi16(psi_i_p3_p3, ONE_OVER_SQRT_2);
+    psi_i_p3_p3 = _mm_slli_epi16(psi_i_p3_p3, 1);
+    psi_i_p3_p1 = _mm_mulhi_epi16(psi_i_p3_p1, ONE_OVER_SQRT_2);
+    psi_i_p3_p1 = _mm_slli_epi16(psi_i_p3_p1, 1);
+    psi_i_p3_m1 = _mm_mulhi_epi16(psi_i_p3_m1, ONE_OVER_SQRT_2);
+    psi_i_p3_m1 = _mm_slli_epi16(psi_i_p3_m1, 1);
+    psi_i_p3_m3 = _mm_mulhi_epi16(psi_i_p3_m3, ONE_OVER_SQRT_2);
+    psi_i_p3_m3 = _mm_slli_epi16(psi_i_p3_m3, 1);
+    psi_i_p3_m5 = _mm_mulhi_epi16(psi_i_p3_m5, ONE_OVER_SQRT_2);
+    psi_i_p3_m5 = _mm_slli_epi16(psi_i_p3_m5, 1);
+    psi_i_p3_m7 = _mm_mulhi_epi16(psi_i_p3_m7, ONE_OVER_SQRT_2);
+    psi_i_p3_m7 = _mm_slli_epi16(psi_i_p3_m7, 1);
+    psi_i_p1_p7 = _mm_mulhi_epi16(psi_i_p1_p7, ONE_OVER_SQRT_2);
+    psi_i_p1_p7 = _mm_slli_epi16(psi_i_p1_p7, 1);
+    psi_i_p1_p5 = _mm_mulhi_epi16(psi_i_p1_p5, ONE_OVER_SQRT_2);
+    psi_i_p1_p5 = _mm_slli_epi16(psi_i_p1_p5, 1);
+    psi_i_p1_p3 = _mm_mulhi_epi16(psi_i_p1_p3, ONE_OVER_SQRT_2);
+    psi_i_p1_p3 = _mm_slli_epi16(psi_i_p1_p3, 1);
+    psi_i_p1_p1 = _mm_mulhi_epi16(psi_i_p1_p1, ONE_OVER_SQRT_2);
+    psi_i_p1_p1 = _mm_slli_epi16(psi_i_p1_p1, 1);
+    psi_i_p1_m1 = _mm_mulhi_epi16(psi_i_p1_m1, ONE_OVER_SQRT_2);
+    psi_i_p1_m1 = _mm_slli_epi16(psi_i_p1_m1, 1);
+    psi_i_p1_m3 = _mm_mulhi_epi16(psi_i_p1_m3, ONE_OVER_SQRT_2);
+    psi_i_p1_m3 = _mm_slli_epi16(psi_i_p1_m3, 1);
+    psi_i_p1_m5 = _mm_mulhi_epi16(psi_i_p1_m5, ONE_OVER_SQRT_2);
+    psi_i_p1_m5 = _mm_slli_epi16(psi_i_p1_m5, 1);
+    psi_i_p1_m7 = _mm_mulhi_epi16(psi_i_p1_m7, ONE_OVER_SQRT_2);
+    psi_i_p1_m7 = _mm_slli_epi16(psi_i_p1_m7, 1);
+    psi_i_m1_p7 = _mm_mulhi_epi16(psi_i_m1_p7, ONE_OVER_SQRT_2);
+    psi_i_m1_p7 = _mm_slli_epi16(psi_i_m1_p7, 1);
+    psi_i_m1_p5 = _mm_mulhi_epi16(psi_i_m1_p5, ONE_OVER_SQRT_2);
+    psi_i_m1_p5 = _mm_slli_epi16(psi_i_m1_p5, 1);
+    psi_i_m1_p3 = _mm_mulhi_epi16(psi_i_m1_p3, ONE_OVER_SQRT_2);
+    psi_i_m1_p3 = _mm_slli_epi16(psi_i_m1_p3, 1);
+    psi_i_m1_p1 = _mm_mulhi_epi16(psi_i_m1_p1, ONE_OVER_SQRT_2);
+    psi_i_m1_p1 = _mm_slli_epi16(psi_i_m1_p1, 1);
+    psi_i_m1_m1 = _mm_mulhi_epi16(psi_i_m1_m1, ONE_OVER_SQRT_2);
+    psi_i_m1_m1 = _mm_slli_epi16(psi_i_m1_m1, 1);
+    psi_i_m1_m3 = _mm_mulhi_epi16(psi_i_m1_m3, ONE_OVER_SQRT_2);
+    psi_i_m1_m3 = _mm_slli_epi16(psi_i_m1_m3, 1);
+    psi_i_m1_m5 = _mm_mulhi_epi16(psi_i_m1_m5, ONE_OVER_SQRT_2);
+    psi_i_m1_m5 = _mm_slli_epi16(psi_i_m1_m5, 1);
+    psi_i_m1_m7 = _mm_mulhi_epi16(psi_i_m1_m7, ONE_OVER_SQRT_2);
+    psi_i_m1_m7 = _mm_slli_epi16(psi_i_m1_m7, 1);
+    psi_i_m3_p7 = _mm_mulhi_epi16(psi_i_m3_p7, ONE_OVER_SQRT_2);
+    psi_i_m3_p7 = _mm_slli_epi16(psi_i_m3_p7, 1);
+    psi_i_m3_p5 = _mm_mulhi_epi16(psi_i_m3_p5, ONE_OVER_SQRT_2);
+    psi_i_m3_p5 = _mm_slli_epi16(psi_i_m3_p5, 1);
+    psi_i_m3_p3 = _mm_mulhi_epi16(psi_i_m3_p3, ONE_OVER_SQRT_2);
+    psi_i_m3_p3 = _mm_slli_epi16(psi_i_m3_p3, 1);
+    psi_i_m3_p1 = _mm_mulhi_epi16(psi_i_m3_p1, ONE_OVER_SQRT_2);
+    psi_i_m3_p1 = _mm_slli_epi16(psi_i_m3_p1, 1);
+    psi_i_m3_m1 = _mm_mulhi_epi16(psi_i_m3_m1, ONE_OVER_SQRT_2);
+    psi_i_m3_m1 = _mm_slli_epi16(psi_i_m3_m1, 1);
+    psi_i_m3_m3 = _mm_mulhi_epi16(psi_i_m3_m3, ONE_OVER_SQRT_2);
+    psi_i_m3_m3 = _mm_slli_epi16(psi_i_m3_m3, 1);
+    psi_i_m3_m5 = _mm_mulhi_epi16(psi_i_m3_m5, ONE_OVER_SQRT_2);
+    psi_i_m3_m5 = _mm_slli_epi16(psi_i_m3_m5, 1);
+    psi_i_m3_m7 = _mm_mulhi_epi16(psi_i_m3_m7, ONE_OVER_SQRT_2);
+    psi_i_m3_m7 = _mm_slli_epi16(psi_i_m3_m7, 1);
+    psi_i_m5_p7 = _mm_mulhi_epi16(psi_i_m5_p7, ONE_OVER_SQRT_2);
+    psi_i_m5_p7 = _mm_slli_epi16(psi_i_m5_p7, 1);
+    psi_i_m5_p5 = _mm_mulhi_epi16(psi_i_m5_p5, ONE_OVER_SQRT_2);
+    psi_i_m5_p5 = _mm_slli_epi16(psi_i_m5_p5, 1);
+    psi_i_m5_p3 = _mm_mulhi_epi16(psi_i_m5_p3, ONE_OVER_SQRT_2);
+    psi_i_m5_p3 = _mm_slli_epi16(psi_i_m5_p3, 1);
+    psi_i_m5_p1 = _mm_mulhi_epi16(psi_i_m5_p1, ONE_OVER_SQRT_2);
+    psi_i_m5_p1 = _mm_slli_epi16(psi_i_m5_p1, 1);
+    psi_i_m5_m1 = _mm_mulhi_epi16(psi_i_m5_m1, ONE_OVER_SQRT_2);
+    psi_i_m5_m1 = _mm_slli_epi16(psi_i_m5_m1, 1);
+    psi_i_m5_m3 = _mm_mulhi_epi16(psi_i_m5_m3, ONE_OVER_SQRT_2);
+    psi_i_m5_m3 = _mm_slli_epi16(psi_i_m5_m3, 1);
+    psi_i_m5_m5 = _mm_mulhi_epi16(psi_i_m5_m5, ONE_OVER_SQRT_2);
+    psi_i_m5_m5 = _mm_slli_epi16(psi_i_m5_m5, 1);
+    psi_i_m5_m7 = _mm_mulhi_epi16(psi_i_m5_m7, ONE_OVER_SQRT_2);
+    psi_i_m5_m7 = _mm_slli_epi16(psi_i_m5_m7, 1);
+    psi_i_m7_p7 = _mm_mulhi_epi16(psi_i_m7_p7, ONE_OVER_SQRT_2);
+    psi_i_m7_p7 = _mm_slli_epi16(psi_i_m7_p7, 1);
+    psi_i_m7_p5 = _mm_mulhi_epi16(psi_i_m7_p5, ONE_OVER_SQRT_2);
+    psi_i_m7_p5 = _mm_slli_epi16(psi_i_m7_p5, 1);
+    psi_i_m7_p3 = _mm_mulhi_epi16(psi_i_m7_p3, ONE_OVER_SQRT_2);
+    psi_i_m7_p3 = _mm_slli_epi16(psi_i_m7_p3, 1);
+    psi_i_m7_p1 = _mm_mulhi_epi16(psi_i_m7_p1, ONE_OVER_SQRT_2);
+    psi_i_m7_p1 = _mm_slli_epi16(psi_i_m7_p1, 1);
+    psi_i_m7_m1 = _mm_mulhi_epi16(psi_i_m7_m1, ONE_OVER_SQRT_2);
+    psi_i_m7_m1 = _mm_slli_epi16(psi_i_m7_m1, 1);
+    psi_i_m7_m3 = _mm_mulhi_epi16(psi_i_m7_m3, ONE_OVER_SQRT_2);
+    psi_i_m7_m3 = _mm_slli_epi16(psi_i_m7_m3, 1);
+    psi_i_m7_m5 = _mm_mulhi_epi16(psi_i_m7_m5, ONE_OVER_SQRT_2);
+    psi_i_m7_m5 = _mm_slli_epi16(psi_i_m7_m5, 1);
+    psi_i_m7_m7 = _mm_mulhi_epi16(psi_i_m7_m7, ONE_OVER_SQRT_2);
+    psi_i_m7_m7 = _mm_slli_epi16(psi_i_m7_m7, 1);
+
+    psi_a_p7_p7 = _mm_adds_epi16(psi_r_p7_p7, psi_i_p7_p7);
+    psi_a_p7_p5 = _mm_adds_epi16(psi_r_p7_p5, psi_i_p7_p5);
+    psi_a_p7_p3 = _mm_adds_epi16(psi_r_p7_p3, psi_i_p7_p3);
+    psi_a_p7_p1 = _mm_adds_epi16(psi_r_p7_p1, psi_i_p7_p1);
+    psi_a_p7_m1 = _mm_adds_epi16(psi_r_p7_m1, psi_i_p7_m1);
+    psi_a_p7_m3 = _mm_adds_epi16(psi_r_p7_m3, psi_i_p7_m3);
+    psi_a_p7_m5 = _mm_adds_epi16(psi_r_p7_m5, psi_i_p7_m5);
+    psi_a_p7_m7 = _mm_adds_epi16(psi_r_p7_m7, psi_i_p7_m7);
+    psi_a_p5_p7 = _mm_adds_epi16(psi_r_p5_p7, psi_i_p5_p7);
+    psi_a_p5_p5 = _mm_adds_epi16(psi_r_p5_p5, psi_i_p5_p5);
+    psi_a_p5_p3 = _mm_adds_epi16(psi_r_p5_p3, psi_i_p5_p3);
+    psi_a_p5_p1 = _mm_adds_epi16(psi_r_p5_p1, psi_i_p5_p1);
+    psi_a_p5_m1 = _mm_adds_epi16(psi_r_p5_m1, psi_i_p5_m1);
+    psi_a_p5_m3 = _mm_adds_epi16(psi_r_p5_m3, psi_i_p5_m3);
+    psi_a_p5_m5 = _mm_adds_epi16(psi_r_p5_m5, psi_i_p5_m5);
+    psi_a_p5_m7 = _mm_adds_epi16(psi_r_p5_m7, psi_i_p5_m7);
+    psi_a_p3_p7 = _mm_adds_epi16(psi_r_p3_p7, psi_i_p3_p7);
+    psi_a_p3_p5 = _mm_adds_epi16(psi_r_p3_p5, psi_i_p3_p5);
+    psi_a_p3_p3 = _mm_adds_epi16(psi_r_p3_p3, psi_i_p3_p3);
+    psi_a_p3_p1 = _mm_adds_epi16(psi_r_p3_p1, psi_i_p3_p1);
+    psi_a_p3_m1 = _mm_adds_epi16(psi_r_p3_m1, psi_i_p3_m1);
+    psi_a_p3_m3 = _mm_adds_epi16(psi_r_p3_m3, psi_i_p3_m3);
+    psi_a_p3_m5 = _mm_adds_epi16(psi_r_p3_m5, psi_i_p3_m5);
+    psi_a_p3_m7 = _mm_adds_epi16(psi_r_p3_m7, psi_i_p3_m7);
+    psi_a_p1_p7 = _mm_adds_epi16(psi_r_p1_p7, psi_i_p1_p7);
+    psi_a_p1_p5 = _mm_adds_epi16(psi_r_p1_p5, psi_i_p1_p5);
+    psi_a_p1_p3 = _mm_adds_epi16(psi_r_p1_p3, psi_i_p1_p3);
+    psi_a_p1_p1 = _mm_adds_epi16(psi_r_p1_p1, psi_i_p1_p1);
+    psi_a_p1_m1 = _mm_adds_epi16(psi_r_p1_m1, psi_i_p1_m1);
+    psi_a_p1_m3 = _mm_adds_epi16(psi_r_p1_m3, psi_i_p1_m3);
+    psi_a_p1_m5 = _mm_adds_epi16(psi_r_p1_m5, psi_i_p1_m5);
+    psi_a_p1_m7 = _mm_adds_epi16(psi_r_p1_m7, psi_i_p1_m7);
+    psi_a_m1_p7 = _mm_adds_epi16(psi_r_m1_p7, psi_i_m1_p7);
+    psi_a_m1_p5 = _mm_adds_epi16(psi_r_m1_p5, psi_i_m1_p5);
+    psi_a_m1_p3 = _mm_adds_epi16(psi_r_m1_p3, psi_i_m1_p3);
+    psi_a_m1_p1 = _mm_adds_epi16(psi_r_m1_p1, psi_i_m1_p1);
+    psi_a_m1_m1 = _mm_adds_epi16(psi_r_m1_m1, psi_i_m1_m1);
+    psi_a_m1_m3 = _mm_adds_epi16(psi_r_m1_m3, psi_i_m1_m3);
+    psi_a_m1_m5 = _mm_adds_epi16(psi_r_m1_m5, psi_i_m1_m5);
+    psi_a_m1_m7 = _mm_adds_epi16(psi_r_m1_m7, psi_i_m1_m7);
+    psi_a_m3_p7 = _mm_adds_epi16(psi_r_m3_p7, psi_i_m3_p7);
+    psi_a_m3_p5 = _mm_adds_epi16(psi_r_m3_p5, psi_i_m3_p5);
+    psi_a_m3_p3 = _mm_adds_epi16(psi_r_m3_p3, psi_i_m3_p3);
+    psi_a_m3_p1 = _mm_adds_epi16(psi_r_m3_p1, psi_i_m3_p1);
+    psi_a_m3_m1 = _mm_adds_epi16(psi_r_m3_m1, psi_i_m3_m1);
+    psi_a_m3_m3 = _mm_adds_epi16(psi_r_m3_m3, psi_i_m3_m3);
+    psi_a_m3_m5 = _mm_adds_epi16(psi_r_m3_m5, psi_i_m3_m5);
+    psi_a_m3_m7 = _mm_adds_epi16(psi_r_m3_m7, psi_i_m3_m7);
+    psi_a_m5_p7 = _mm_adds_epi16(psi_r_m5_p7, psi_i_m5_p7);
+    psi_a_m5_p5 = _mm_adds_epi16(psi_r_m5_p5, psi_i_m5_p5);
+    psi_a_m5_p3 = _mm_adds_epi16(psi_r_m5_p3, psi_i_m5_p3);
+    psi_a_m5_p1 = _mm_adds_epi16(psi_r_m5_p1, psi_i_m5_p1);
+    psi_a_m5_m1 = _mm_adds_epi16(psi_r_m5_m1, psi_i_m5_m1);
+    psi_a_m5_m3 = _mm_adds_epi16(psi_r_m5_m3, psi_i_m5_m3);
+    psi_a_m5_m5 = _mm_adds_epi16(psi_r_m5_m5, psi_i_m5_m5);
+    psi_a_m5_m7 = _mm_adds_epi16(psi_r_m5_m7, psi_i_m5_m7);
+    psi_a_m7_p7 = _mm_adds_epi16(psi_r_m7_p7, psi_i_m7_p7);
+    psi_a_m7_p5 = _mm_adds_epi16(psi_r_m7_p5, psi_i_m7_p5);
+    psi_a_m7_p3 = _mm_adds_epi16(psi_r_m7_p3, psi_i_m7_p3);
+    psi_a_m7_p1 = _mm_adds_epi16(psi_r_m7_p1, psi_i_m7_p1);
+    psi_a_m7_m1 = _mm_adds_epi16(psi_r_m7_m1, psi_i_m7_m1);
+    psi_a_m7_m3 = _mm_adds_epi16(psi_r_m7_m3, psi_i_m7_m3);
+    psi_a_m7_m5 = _mm_adds_epi16(psi_r_m7_m5, psi_i_m7_m5);
+    psi_a_m7_m7 = _mm_adds_epi16(psi_r_m7_m7, psi_i_m7_m7);
+
+    // Computing different multiples of ||h0||^2
+    // x=1, y=1
+    ch_mag_2_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42);
+    ch_mag_2_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_2_over_42_with_sigma2,1);
+    // x=1, y=3
+    ch_mag_10_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42);
+    ch_mag_10_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_10_over_42_with_sigma2,1);
+    // x=1, x=5
+    ch_mag_26_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42);
+    ch_mag_26_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_26_over_42_with_sigma2,1);
+    // x=1, y=7
+    ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
+    ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
+    // x=3, y=3
+    ch_mag_18_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42);
+    ch_mag_18_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_18_over_42_with_sigma2,1);
+    // x=3, y=5
+    ch_mag_34_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42);
+    ch_mag_34_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_34_over_42_with_sigma2,1);
+    // x=3, y=7
+    ch_mag_58_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42);
+    ch_mag_58_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_58_over_42_with_sigma2,2);
+    // x=5, y=5
+    ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
+    ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
+    // x=5, y=7
+    ch_mag_74_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42);
+    ch_mag_74_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_74_over_42_with_sigma2,2);
+    // x=7, y=7
+    ch_mag_98_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42);
+    ch_mag_98_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_98_over_42_with_sigma2,2);
+
+    // Computing Metrics
+    xmm1 = _mm_adds_epi16(psi_a_p7_p7, y0_p_7_7);
+    bit_met_p7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p7_p5, y0_p_7_5);
+    bit_met_p7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p7_p3, y0_p_7_3);
+    bit_met_p7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p7_p1, y0_p_7_1);
+    bit_met_p7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p7_m1, y0_m_7_1);
+    bit_met_p7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p7_m3, y0_m_7_3);
+    bit_met_p7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p7_m5, y0_m_7_5);
+    bit_met_p7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p7_m7, y0_m_7_7);
+    bit_met_p7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p5_p7, y0_p_5_7);
+    bit_met_p5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p5_p5, y0_p_5_5);
+    bit_met_p5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p5_p3, y0_p_5_3);
+    bit_met_p5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p5_p1, y0_p_5_1);
+    bit_met_p5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p5_m1, y0_m_5_1);
+    bit_met_p5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p5_m3, y0_m_5_3);
+    bit_met_p5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p5_m5, y0_m_5_5);
+    bit_met_p5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p5_m7, y0_m_5_7);
+    bit_met_p5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p3_p7, y0_p_3_7);
+    bit_met_p3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p3_p5, y0_p_3_5);
+    bit_met_p3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p3_p3, y0_p_3_3);
+    bit_met_p3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p3_p1, y0_p_3_1);
+    bit_met_p3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p3_m1, y0_m_3_1);
+    bit_met_p3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p3_m3, y0_m_3_3);
+    bit_met_p3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p3_m5, y0_m_3_5);
+    bit_met_p3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p3_m7, y0_m_3_7);
+    bit_met_p3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p1_p7, y0_p_1_7);
+    bit_met_p1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p1_p5, y0_p_1_5);
+    bit_met_p1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p1_p3, y0_p_1_3);
+    bit_met_p1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p1_p1, y0_p_1_1);
+    bit_met_p1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p1_m1, y0_m_1_1);
+    bit_met_p1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p1_m3, y0_m_1_3);
+    bit_met_p1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p1_m5, y0_m_1_5);
+    bit_met_p1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm1 = _mm_adds_epi16(psi_a_p1_m7, y0_m_1_7);
+    bit_met_p1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+
+    xmm1 = _mm_subs_epi16(psi_a_m1_p7, y0_m_1_7);
+    bit_met_m1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m1_p5, y0_m_1_5);
+    bit_met_m1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m1_p3, y0_m_1_3);
+    bit_met_m1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m1_p1, y0_m_1_1);
+    bit_met_m1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m1_m1, y0_p_1_1);
+    bit_met_m1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m1_m3, y0_p_1_3);
+    bit_met_m1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m1_m5, y0_p_1_5);
+    bit_met_m1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m1_m7, y0_p_1_7);
+    bit_met_m1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m3_p7, y0_m_3_7);
+    bit_met_m3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m3_p5, y0_m_3_5);
+    bit_met_m3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m3_p3, y0_m_3_3);
+    bit_met_m3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m3_p1, y0_m_3_1);
+    bit_met_m3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m3_m1, y0_p_3_1);
+    bit_met_m3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m3_m3, y0_p_3_3);
+    bit_met_m3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m3_m5, y0_p_3_5);
+    bit_met_m3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m3_m7, y0_p_3_7);
+    bit_met_m3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m5_p7, y0_m_5_7);
+    bit_met_m5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m5_p5, y0_m_5_5);
+    bit_met_m5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m5_p3, y0_m_5_3);
+    bit_met_m5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m5_p1, y0_m_5_1);
+    bit_met_m5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m5_m1, y0_p_5_1);
+    bit_met_m5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m5_m3, y0_p_5_3);
+    bit_met_m5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m5_m5, y0_p_5_5);
+    bit_met_m5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m5_m7, y0_p_5_7);
+    bit_met_m5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m7_p7, y0_m_7_7);
+    bit_met_m7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m7_p5, y0_m_7_5);
+    bit_met_m7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m7_p3, y0_m_7_3);
+    bit_met_m7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m7_p1, y0_m_7_1);
+    bit_met_m7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m7_m1, y0_p_7_1);
+    bit_met_m7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m7_m3, y0_p_7_3);
+    bit_met_m7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m7_m5, y0_p_7_5);
+    bit_met_m7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm1 = _mm_subs_epi16(psi_a_m7_m7, y0_p_7_7);
+    bit_met_m7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+
+    // Detection for 1st bit (LTE mapping)
+    // bit = 1
+    xmm0 = _mm_max_epi16(bit_met_m7_p7, bit_met_m7_p5);
+    xmm1 = _mm_max_epi16(bit_met_m7_p3, bit_met_m7_p1);
+    xmm2 = _mm_max_epi16(bit_met_m7_m1, bit_met_m7_m3);
+    xmm3 = _mm_max_epi16(bit_met_m7_m5, bit_met_m7_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m5_p7, bit_met_m5_p5);
+    xmm1 = _mm_max_epi16(bit_met_m5_p3, bit_met_m5_p1);
+    xmm2 = _mm_max_epi16(bit_met_m5_m1, bit_met_m5_m3);
+    xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m5_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m3_p7, bit_met_m3_p5);
+    xmm1 = _mm_max_epi16(bit_met_m3_p3, bit_met_m3_p1);
+    xmm2 = _mm_max_epi16(bit_met_m3_m1, bit_met_m3_m3);
+    xmm3 = _mm_max_epi16(bit_met_m3_m5, bit_met_m3_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m1_p7, bit_met_m1_p5);
+    xmm1 = _mm_max_epi16(bit_met_m1_p3, bit_met_m1_p1);
+    xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m1_m3);
+    xmm3 = _mm_max_epi16(bit_met_m1_m5, bit_met_m1_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    // bit = 0
+    xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p7_p5);
+    xmm1 = _mm_max_epi16(bit_met_p7_p3, bit_met_p7_p1);
+    xmm2 = _mm_max_epi16(bit_met_p7_m1, bit_met_p7_m3);
+    xmm3 = _mm_max_epi16(bit_met_p7_m5, bit_met_p7_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p5_p7, bit_met_p5_p5);
+    xmm1 = _mm_max_epi16(bit_met_p5_p3, bit_met_p5_p1);
+    xmm2 = _mm_max_epi16(bit_met_p5_m1, bit_met_p5_m3);
+    xmm3 = _mm_max_epi16(bit_met_p5_m5, bit_met_p5_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p3_p7, bit_met_p3_p5);
+    xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p3_p1);
+    xmm2 = _mm_max_epi16(bit_met_p3_m1, bit_met_p3_m3);
+    xmm3 = _mm_max_epi16(bit_met_p3_m5, bit_met_p3_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p1_p7, bit_met_p1_p5);
+    xmm1 = _mm_max_epi16(bit_met_p1_p3, bit_met_p1_p1);
+    xmm2 = _mm_max_epi16(bit_met_p1_m1, bit_met_p1_m3);
+    xmm3 = _mm_max_epi16(bit_met_p1_m5, bit_met_p1_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y0r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 2nd bit (LTE mapping)
+    // bit = 1
+    xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    // bit = 0
+    xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y1r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 3rd bit (LTE mapping)
+    xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
+    xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
+    xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
+    xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
+    xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
+    xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
+    xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
+    xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
+    xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
+    xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
+    xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
+    xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
+    xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
+    xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
+    xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
+    xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
+    xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
+    xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
+    xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
+    xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
+    xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
+    xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
+    xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y2r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 4th bit (LTE mapping)
+    xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y0i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+
+    // Detection for 5th bit (LTE mapping)
+    xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
+    xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
+    xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
+    xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
+    xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
+    xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
+    xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
+    xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
+    xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
+    xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
+    xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
+    xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
+    xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
+    xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
+    xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
+    xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
+    xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
+    xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
+    xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
+    xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
+    xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
+    xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
+    xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y1i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 6th bit (LTE mapping)
+    xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y2i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+
+    // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs
+    // RE 1
+    j = 24*i;
+    stream0_out[j + 0] = ((short *)&y0r)[0];
+    stream0_out[j + 1] = ((short *)&y1r)[0];
+    stream0_out[j + 2] = ((short *)&y2r)[0];
+    stream0_out[j + 3] = ((short *)&y0i)[0];
+    stream0_out[j + 4] = ((short *)&y1i)[0];
+    stream0_out[j + 5] = ((short *)&y2i)[0];
+    // RE 2
+    stream0_out[j + 6] = ((short *)&y0r)[1];
+    stream0_out[j + 7] = ((short *)&y1r)[1];
+    stream0_out[j + 8] = ((short *)&y2r)[1];
+    stream0_out[j + 9] = ((short *)&y0i)[1];
+    stream0_out[j + 10] = ((short *)&y1i)[1];
+    stream0_out[j + 11] = ((short *)&y2i)[1];
+    // RE 3
+    stream0_out[j + 12] = ((short *)&y0r)[2];
+    stream0_out[j + 13] = ((short *)&y1r)[2];
+    stream0_out[j + 14] = ((short *)&y2r)[2];
+    stream0_out[j + 15] = ((short *)&y0i)[2];
+    stream0_out[j + 16] = ((short *)&y1i)[2];
+    stream0_out[j + 17] = ((short *)&y2i)[2];
+    // RE 4
+    stream0_out[j + 18] = ((short *)&y0r)[3];
+    stream0_out[j + 19] = ((short *)&y1r)[3];
+    stream0_out[j + 20] = ((short *)&y2r)[3];
+    stream0_out[j + 21] = ((short *)&y0i)[3];
+    stream0_out[j + 22] = ((short *)&y1i)[3];
+    stream0_out[j + 23] = ((short *)&y2i)[3];
+    // RE 5
+    stream0_out[j + 24] = ((short *)&y0r)[4];
+    stream0_out[j + 25] = ((short *)&y1r)[4];
+    stream0_out[j + 26] = ((short *)&y2r)[4];
+    stream0_out[j + 27] = ((short *)&y0i)[4];
+    stream0_out[j + 28] = ((short *)&y1i)[4];
+    stream0_out[j + 29] = ((short *)&y2i)[4];
+    // RE 6
+    stream0_out[j + 30] = ((short *)&y0r)[5];
+    stream0_out[j + 31] = ((short *)&y1r)[5];
+    stream0_out[j + 32] = ((short *)&y2r)[5];
+    stream0_out[j + 33] = ((short *)&y0i)[5];
+    stream0_out[j + 34] = ((short *)&y1i)[5];
+    stream0_out[j + 35] = ((short *)&y2i)[5];
+    // RE 7
+    stream0_out[j + 36] = ((short *)&y0r)[6];
+    stream0_out[j + 37] = ((short *)&y1r)[6];
+    stream0_out[j + 38] = ((short *)&y2r)[6];
+    stream0_out[j + 39] = ((short *)&y0i)[6];
+    stream0_out[j + 40] = ((short *)&y1i)[6];
+    stream0_out[j + 41] = ((short *)&y2i)[6];
+    // RE 8
+    stream0_out[j + 42] = ((short *)&y0r)[7];
+    stream0_out[j + 43] = ((short *)&y1r)[7];
+    stream0_out[j + 44] = ((short *)&y2r)[7];
+    stream0_out[j + 45] = ((short *)&y0i)[7];
+    stream0_out[j + 46] = ((short *)&y1i)[7];
+    stream0_out[j + 47] = ((short *)&y2i)[7];
+#elif defined(__arm__)
+
+#endif
+  }
+
+#if defined(__x86_64__) || defined(__i386__)
+  _mm_empty();
+  _m_empty();
+#endif
+}
+
+
+int dlsch_64qam_qpsk_llr(LTE_DL_FRAME_PARMS *frame_parms,
+                         int32_t **rxdataF_comp,
+                         int32_t **rxdataF_comp_i,
+                         int32_t **dl_ch_mag,
+                         int32_t **rho_i,
+                         int16_t *dlsch_llr,
+                         uint8_t symbol,
+                         uint8_t first_symbol_flag,
+                         uint16_t nb_rb,
+                         uint16_t pbch_pss_sss_adjust,
+                         int16_t **llr16p)
+{
+
+  int16_t *rxF      = (int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rxF_i    = (int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *ch_mag   = (int16_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rho      = (int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *llr16;
+  int len;
+  uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol;
+
+  //first symbol has different structure due to more pilots
+  if (first_symbol_flag == 1) {
+    llr16 = (int16_t*)dlsch_llr;
+  } else {
+    llr16 = (int16_t*)(*llr16p);
+  }
+
+  AssertFatal(llr16!=NULL,"dlsch_16qam_64qam_llr:llr is null, symbol %d\n",symbol);
+
+  if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) {
+    // if symbol has pilots
+    if (frame_parms->nb_antenna_ports_eNB!=1)
+      // in 2 antenna ports we have 8 REs per symbol per RB
+      len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+    else
+      // for 1 antenna port we have 10 REs per symbol per RB
+      len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6);
+  } else {
+    // symbol has no pilots
+    len = (nb_rb*12) - pbch_pss_sss_adjust;
+  }
+
+  qam64_qpsk((short *)rxF,
+             (short *)rxF_i,
+             (short *)ch_mag,
+             (short *)llr16,
+             (short *)rho,
+             len);
+
+  llr16 += (6*len);
+  *llr16p = (short *)llr16;
+  return(0);
+}
+
+
+
+void qam64_qam16(short *stream0_in,
+                 short *stream1_in,
+                 short *ch_mag,
+                 short *ch_mag_i,
+                 short *stream0_out,
+                 short *rho01,
+                 int length
+     )
+{
+
+  /*
+    Author: S. Wagner
+    Date: 31-07-12
+
+    Input:
+    stream0_in:  MF filter for 1st stream, i.e., y0=h0'*y
+    stream1_in:  MF filter for 2nd stream, i.e., y1=h1'*y
+    ch_mag:      4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    ch_mag_i:    4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    rho01:       Channel cross correlation, i.e., h1'*h0
+
+    Output:
+    stream0_out: output LLRs for 1st stream
+  */
+
+#if defined(__x86_64__) || defined(__i386__)
+
+  __m128i *rho01_128i      = (__m128i *)rho01;
+  __m128i *stream0_128i_in = (__m128i *)stream0_in;
+  __m128i *stream1_128i_in = (__m128i *)stream1_in;
+  __m128i *ch_mag_128i     = (__m128i *)ch_mag;
+  __m128i *ch_mag_128i_i   = (__m128i *)ch_mag_i;
+
+  __m128i ONE_OVER_SQRT_42 = _mm_set1_epi16(10112); // round(1/sqrt(42)*2^16)
+  __m128i THREE_OVER_SQRT_42 = _mm_set1_epi16(30337); // round(3/sqrt(42)*2^16)
+  __m128i FIVE_OVER_SQRT_42 = _mm_set1_epi16(25281); // round(5/sqrt(42)*2^15)
+  __m128i SEVEN_OVER_SQRT_42 = _mm_set1_epi16(17697); // round(5/sqrt(42)*2^15)
+  __m128i FORTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(30969); // round(49/(4*sqrt(42))*2^14), Q2.14
+  __m128i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(23385); // round(37/(4*sqrt(42))*2^14), Q2.14
+  __m128i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(31601); // round(25/(4*sqrt(42))*2^15)
+  __m128i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(18329); // round(29/(4*sqrt(42))*2^15), Q2.14
+  __m128i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(21489); // round(17/(4*sqrt(42))*2^15)
+  __m128i NINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(11376); // round(9/(4*sqrt(42))*2^15)
+  __m128i THIRTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(16433); // round(13/(4*sqrt(42))*2^15)
+  __m128i FIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(6320); // round(5/(4*sqrt(42))*2^15)
+  __m128i ONE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(1264); // round(1/(4*sqrt(42))*2^15)
+  __m128i ONE_OVER_SQRT_10_Q15 = _mm_set1_epi16(10362); // round(1/sqrt(10)*2^15)
+  __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15)
+  __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15)
+
+
+  __m128i ch_mag_int;
+  __m128i ch_mag_des;
+  __m128i ch_mag_98_over_42_with_sigma2;
+  __m128i ch_mag_74_over_42_with_sigma2;
+  __m128i ch_mag_58_over_42_with_sigma2;
+  __m128i ch_mag_50_over_42_with_sigma2;
+  __m128i ch_mag_34_over_42_with_sigma2;
+  __m128i ch_mag_18_over_42_with_sigma2;
+  __m128i ch_mag_26_over_42_with_sigma2;
+  __m128i ch_mag_10_over_42_with_sigma2;
+  __m128i ch_mag_2_over_42_with_sigma2;
+  __m128i  y0r_one_over_sqrt_21;
+  __m128i  y0r_three_over_sqrt_21;
+  __m128i  y0r_five_over_sqrt_21;
+  __m128i  y0r_seven_over_sqrt_21;
+  __m128i  y0i_one_over_sqrt_21;
+  __m128i  y0i_three_over_sqrt_21;
+  __m128i  y0i_five_over_sqrt_21;
+  __m128i  y0i_seven_over_sqrt_21;
+
+#elif defined(__arm__)
+
+#endif
+  int i,j;
+
+
+
+  for (i=0; i<length>>2; i+=2) {
+
+#if defined(__x86_64__) || defined(__i386__)
+    // Get rho
+    xmm0 = rho01_128i[i];
+    xmm1 = rho01_128i[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho)
+    xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho)
+    rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho)
+    rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho)
+
+    // Compute the different rhos
+    rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42);
+    rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42);
+    rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42);
+    rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42);
+    rho_rpi_5_5 = _mm_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42);
+    rho_rmi_5_5 = _mm_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42);
+    rho_rpi_7_7 = _mm_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42);
+    rho_rmi_7_7 = _mm_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42);
+
+    rho_rpi_5_5 = _mm_slli_epi16(rho_rpi_5_5, 1);
+    rho_rmi_5_5 = _mm_slli_epi16(rho_rmi_5_5, 1);
+    rho_rpi_7_7 = _mm_slli_epi16(rho_rpi_7_7, 2);
+    rho_rmi_7_7 = _mm_slli_epi16(rho_rmi_7_7, 2);
+
+    xmm4 = _mm_mulhi_epi16(xmm2, ONE_OVER_SQRT_42);
+    xmm5 = _mm_mulhi_epi16(xmm3, ONE_OVER_SQRT_42);
+    xmm6 = _mm_mulhi_epi16(xmm3, THREE_OVER_SQRT_42);
+    xmm7 = _mm_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42);
+    xmm8 = _mm_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42);
+    xmm7 = _mm_slli_epi16(xmm7, 1);
+    xmm8 = _mm_slli_epi16(xmm8, 2);
+
+    rho_rpi_1_3 = _mm_adds_epi16(xmm4, xmm6);
+    rho_rmi_1_3 = _mm_subs_epi16(xmm4, xmm6);
+    rho_rpi_1_5 = _mm_adds_epi16(xmm4, xmm7);
+    rho_rmi_1_5 = _mm_subs_epi16(xmm4, xmm7);
+    rho_rpi_1_7 = _mm_adds_epi16(xmm4, xmm8);
+    rho_rmi_1_7 = _mm_subs_epi16(xmm4, xmm8);
+
+    xmm4 = _mm_mulhi_epi16(xmm2, THREE_OVER_SQRT_42);
+    rho_rpi_3_1 = _mm_adds_epi16(xmm4, xmm5);
+    rho_rmi_3_1 = _mm_subs_epi16(xmm4, xmm5);
+    rho_rpi_3_5 = _mm_adds_epi16(xmm4, xmm7);
+    rho_rmi_3_5 = _mm_subs_epi16(xmm4, xmm7);
+    rho_rpi_3_7 = _mm_adds_epi16(xmm4, xmm8);
+    rho_rmi_3_7 = _mm_subs_epi16(xmm4, xmm8);
+
+    xmm4 = _mm_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42);
+    xmm4 = _mm_slli_epi16(xmm4, 1);
+    rho_rpi_5_1 = _mm_adds_epi16(xmm4, xmm5);
+    rho_rmi_5_1 = _mm_subs_epi16(xmm4, xmm5);
+    rho_rpi_5_3 = _mm_adds_epi16(xmm4, xmm6);
+    rho_rmi_5_3 = _mm_subs_epi16(xmm4, xmm6);
+    rho_rpi_5_7 = _mm_adds_epi16(xmm4, xmm8);
+    rho_rmi_5_7 = _mm_subs_epi16(xmm4, xmm8);
+
+    xmm4 = _mm_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42);
+    xmm4 = _mm_slli_epi16(xmm4, 2);
+    rho_rpi_7_1 = _mm_adds_epi16(xmm4, xmm5);
+    rho_rmi_7_1 = _mm_subs_epi16(xmm4, xmm5);
+    rho_rpi_7_3 = _mm_adds_epi16(xmm4, xmm6);
+    rho_rmi_7_3 = _mm_subs_epi16(xmm4, xmm6);
+    rho_rpi_7_5 = _mm_adds_epi16(xmm4, xmm7);
+    rho_rmi_7_5 = _mm_subs_epi16(xmm4, xmm7);
+
+    // Rearrange interfering MF output
+    xmm0 = stream1_128i_in[i];
+    xmm1 = stream1_128i_in[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
+    y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
+
+    // Psi_r calculation from rho_rpi or rho_rmi
+    xmm0 = _mm_setzero_si128(); // ZERO for abs_pi16
+    xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1r);
+    psi_r_p7_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1r);
+    psi_r_p7_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1r);
+    psi_r_p7_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1r);
+    psi_r_p7_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1r);
+    psi_r_p7_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1r);
+    psi_r_p7_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1r);
+    psi_r_p7_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1r);
+    psi_r_p7_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1r);
+    psi_r_p5_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1r);
+    psi_r_p5_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1r);
+    psi_r_p5_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1r);
+    psi_r_p5_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1r);
+    psi_r_p5_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1r);
+    psi_r_p5_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1r);
+    psi_r_p5_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1r);
+    psi_r_p5_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1r);
+    psi_r_p3_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1r);
+    psi_r_p3_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1r);
+    psi_r_p3_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1r);
+    psi_r_p3_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1r);
+    psi_r_p3_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1r);
+    psi_r_p3_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1r);
+    psi_r_p3_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1r);
+    psi_r_p3_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1r);
+    psi_r_p1_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1r);
+    psi_r_p1_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1r);
+    psi_r_p1_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1r);
+    psi_r_p1_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1r);
+    psi_r_p1_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1r);
+    psi_r_p1_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1r);
+    psi_r_p1_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1r);
+    psi_r_p1_m7 = _mm_abs_epi16(xmm2);
+
+    xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1r);
+    psi_r_m1_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1r);
+    psi_r_m1_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1r);
+    psi_r_m1_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1r);
+    psi_r_m1_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1r);
+    psi_r_m1_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1r);
+    psi_r_m1_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1r);
+    psi_r_m1_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1r);
+    psi_r_m1_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1r);
+    psi_r_m3_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1r);
+    psi_r_m3_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1r);
+    psi_r_m3_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1r);
+    psi_r_m3_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1r);
+    psi_r_m3_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1r);
+    psi_r_m3_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1r);
+    psi_r_m3_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1r);
+    psi_r_m3_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1r);
+    psi_r_m5_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1r);
+    psi_r_m5_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1r);
+    psi_r_m5_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1r);
+    psi_r_m5_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1r);
+    psi_r_m5_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1r);
+    psi_r_m5_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1r);
+    psi_r_m5_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1r);
+    psi_r_m5_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1r);
+    psi_r_m7_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1r);
+    psi_r_m7_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1r);
+    psi_r_m7_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1r);
+    psi_r_m7_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1r);
+    psi_r_m7_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1r);
+    psi_r_m7_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1r);
+    psi_r_m7_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1r);
+    psi_r_m7_m7 = _mm_abs_epi16(xmm2);
+
+    // Psi_i calculation from rho_rpi or rho_rmi
+    xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1i);
+    psi_i_p7_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1i);
+    psi_i_p7_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1i);
+    psi_i_p7_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1i);
+    psi_i_p7_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1i);
+    psi_i_p7_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1i);
+    psi_i_p7_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1i);
+    psi_i_p7_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1i);
+    psi_i_p7_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1i);
+    psi_i_p5_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1i);
+    psi_i_p5_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1i);
+    psi_i_p5_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1i);
+    psi_i_p5_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1i);
+    psi_i_p5_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1i);
+    psi_i_p5_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1i);
+    psi_i_p5_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1i);
+    psi_i_p5_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1i);
+    psi_i_p3_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1i);
+    psi_i_p3_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1i);
+    psi_i_p3_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1i);
+    psi_i_p3_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1i);
+    psi_i_p3_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1i);
+    psi_i_p3_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1i);
+    psi_i_p3_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1i);
+    psi_i_p3_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1i);
+    psi_i_p1_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1i);
+    psi_i_p1_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1i);
+    psi_i_p1_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1i);
+    psi_i_p1_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1i);
+    psi_i_p1_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1i);
+    psi_i_p1_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1i);
+    psi_i_p1_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1i);
+    psi_i_p1_m7 = _mm_abs_epi16(xmm2);
+
+    xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1i);
+    psi_i_m1_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1i);
+    psi_i_m1_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1i);
+    psi_i_m1_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1i);
+    psi_i_m1_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1i);
+    psi_i_m1_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1i);
+    psi_i_m1_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1i);
+    psi_i_m1_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1i);
+    psi_i_m1_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1i);
+    psi_i_m3_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1i);
+    psi_i_m3_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1i);
+    psi_i_m3_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1i);
+    psi_i_m3_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1i);
+    psi_i_m3_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1i);
+    psi_i_m3_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1i);
+    psi_i_m3_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1i);
+    psi_i_m3_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1i);
+    psi_i_m5_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1i);
+    psi_i_m5_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1i);
+    psi_i_m5_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1i);
+    psi_i_m5_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1i);
+    psi_i_m5_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1i);
+    psi_i_m5_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1i);
+    psi_i_m5_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1i);
+    psi_i_m5_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1i);
+    psi_i_m7_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1i);
+    psi_i_m7_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1i);
+    psi_i_m7_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1i);
+    psi_i_m7_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1i);
+    psi_i_m7_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1i);
+    psi_i_m7_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1i);
+    psi_i_m7_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1i);
+    psi_i_m7_m7 = _mm_abs_epi16(xmm2);
+
+
+    // Rearrange desired MF output
+    xmm0 = stream0_128i_in[i];
+    xmm1 = stream0_128i_in[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)]
+    y0i = _mm_unpackhi_epi64(xmm0,xmm1);
+
+    // Rearrange desired channel magnitudes
+    xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10))
+    xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10))
+    xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3);
+
+    // Rearrange interfering channel magnitudes
+    xmm2 = ch_mag_128i_i[i];
+    xmm3 = ch_mag_128i_i[i+1];
+    xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    ch_mag_int  = _mm_unpacklo_epi64(xmm2,xmm3);
+
+    y0r_one_over_sqrt_21   = _mm_mulhi_epi16(y0r, ONE_OVER_SQRT_42);
+    y0r_three_over_sqrt_21 = _mm_mulhi_epi16(y0r, THREE_OVER_SQRT_42);
+    y0r_five_over_sqrt_21  = _mm_mulhi_epi16(y0r, FIVE_OVER_SQRT_42);
+    y0r_five_over_sqrt_21  = _mm_slli_epi16(y0r_five_over_sqrt_21, 1);
+    y0r_seven_over_sqrt_21 = _mm_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42);
+    y0r_seven_over_sqrt_21 = _mm_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14
+
+    y0i_one_over_sqrt_21   = _mm_mulhi_epi16(y0i, ONE_OVER_SQRT_42);
+    y0i_three_over_sqrt_21 = _mm_mulhi_epi16(y0i, THREE_OVER_SQRT_42);
+    y0i_five_over_sqrt_21  = _mm_mulhi_epi16(y0i, FIVE_OVER_SQRT_42);
+    y0i_five_over_sqrt_21  = _mm_slli_epi16(y0i_five_over_sqrt_21, 1);
+    y0i_seven_over_sqrt_21 = _mm_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42);
+    y0i_seven_over_sqrt_21 = _mm_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14
+
+    y0_p_7_1 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_7_3 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_7_5 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_7_7 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_5_1 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_5_3 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_5_5 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_5_7 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_3_5 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_3_7 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_1_1 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_1_3 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_1_5 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_1_7 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
+
+    y0_m_1_1 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_1_3 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_1_5 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_1_7 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_3_5 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_3_7 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_5_1 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_5_3 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_5_5 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_5_7 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_7_1 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_7_3 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_7_5 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_7_7 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
+
+    interference_abs_epi16(psi_r_p7_p7, ch_mag_int, a_r_p7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p7_p5, ch_mag_int, a_r_p7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p7_p3, ch_mag_int, a_r_p7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p7_p1, ch_mag_int, a_r_p7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p7_m1, ch_mag_int, a_r_p7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p7_m3, ch_mag_int, a_r_p7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p7_m5, ch_mag_int, a_r_p7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p7_m7, ch_mag_int, a_r_p7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_p7, ch_mag_int, a_r_p5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_p5, ch_mag_int, a_r_p5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_p3, ch_mag_int, a_r_p5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_p1, ch_mag_int, a_r_p5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_m1, ch_mag_int, a_r_p5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_m3, ch_mag_int, a_r_p5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_m5, ch_mag_int, a_r_p5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_m7, ch_mag_int, a_r_p5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_p7, ch_mag_int, a_r_p3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_p5, ch_mag_int, a_r_p3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_p3, ch_mag_int, a_r_p3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_p1, ch_mag_int, a_r_p3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_m1, ch_mag_int, a_r_p3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_m3, ch_mag_int, a_r_p3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_m5, ch_mag_int, a_r_p3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_m7, ch_mag_int, a_r_p3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_p7, ch_mag_int, a_r_p1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_p5, ch_mag_int, a_r_p1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_p3, ch_mag_int, a_r_p1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_p1, ch_mag_int, a_r_p1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_m1, ch_mag_int, a_r_p1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_m3, ch_mag_int, a_r_p1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_m5, ch_mag_int, a_r_p1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_m7, ch_mag_int, a_r_p1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_p7, ch_mag_int, a_r_m1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_p5, ch_mag_int, a_r_m1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_p3, ch_mag_int, a_r_m1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_p1, ch_mag_int, a_r_m1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_m1, ch_mag_int, a_r_m1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_m3, ch_mag_int, a_r_m1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_m5, ch_mag_int, a_r_m1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_m7, ch_mag_int, a_r_m1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_p7, ch_mag_int, a_r_m3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_p5, ch_mag_int, a_r_m3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_p3, ch_mag_int, a_r_m3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_p1, ch_mag_int, a_r_m3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_m1, ch_mag_int, a_r_m3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_m3, ch_mag_int, a_r_m3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_m5, ch_mag_int, a_r_m3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_m7, ch_mag_int, a_r_m3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_p7, ch_mag_int, a_r_m5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_p5, ch_mag_int, a_r_m5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_p3, ch_mag_int, a_r_m5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_p1, ch_mag_int, a_r_m5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_m1, ch_mag_int, a_r_m5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_m3, ch_mag_int, a_r_m5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_m5, ch_mag_int, a_r_m5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_m7, ch_mag_int, a_r_m5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_p7, ch_mag_int, a_r_m7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_p5, ch_mag_int, a_r_m7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_p3, ch_mag_int, a_r_m7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_p1, ch_mag_int, a_r_m7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_m1, ch_mag_int, a_r_m7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_m3, ch_mag_int, a_r_m7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_m5, ch_mag_int, a_r_m7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_m7, ch_mag_int, a_r_m7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+
+    interference_abs_epi16(psi_i_p7_p7, ch_mag_int, a_i_p7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p7_p5, ch_mag_int, a_i_p7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p7_p3, ch_mag_int, a_i_p7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p7_p1, ch_mag_int, a_i_p7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p7_m1, ch_mag_int, a_i_p7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p7_m3, ch_mag_int, a_i_p7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p7_m5, ch_mag_int, a_i_p7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p7_m7, ch_mag_int, a_i_p7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_p7, ch_mag_int, a_i_p5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_p5, ch_mag_int, a_i_p5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_p3, ch_mag_int, a_i_p5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_p1, ch_mag_int, a_i_p5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_m1, ch_mag_int, a_i_p5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_m3, ch_mag_int, a_i_p5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_m5, ch_mag_int, a_i_p5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_m7, ch_mag_int, a_i_p5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_p7, ch_mag_int, a_i_p3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_p5, ch_mag_int, a_i_p3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_p3, ch_mag_int, a_i_p3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_p1, ch_mag_int, a_i_p3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_m1, ch_mag_int, a_i_p3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_m3, ch_mag_int, a_i_p3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_m5, ch_mag_int, a_i_p3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_m7, ch_mag_int, a_i_p3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_p7, ch_mag_int, a_i_p1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_p5, ch_mag_int, a_i_p1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_p3, ch_mag_int, a_i_p1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_p1, ch_mag_int, a_i_p1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_m1, ch_mag_int, a_i_p1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_m3, ch_mag_int, a_i_p1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_m5, ch_mag_int, a_i_p1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_m7, ch_mag_int, a_i_p1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_p7, ch_mag_int, a_i_m1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_p5, ch_mag_int, a_i_m1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_p3, ch_mag_int, a_i_m1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_p1, ch_mag_int, a_i_m1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_m1, ch_mag_int, a_i_m1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_m3, ch_mag_int, a_i_m1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_m5, ch_mag_int, a_i_m1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_m7, ch_mag_int, a_i_m1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_p7, ch_mag_int, a_i_m3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_p5, ch_mag_int, a_i_m3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_p3, ch_mag_int, a_i_m3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_p1, ch_mag_int, a_i_m3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_m1, ch_mag_int, a_i_m3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_m3, ch_mag_int, a_i_m3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_m5, ch_mag_int, a_i_m3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_m7, ch_mag_int, a_i_m3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_p7, ch_mag_int, a_i_m5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_p5, ch_mag_int, a_i_m5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_p3, ch_mag_int, a_i_m5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_p1, ch_mag_int, a_i_m5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_m1, ch_mag_int, a_i_m5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_m3, ch_mag_int, a_i_m5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_m5, ch_mag_int, a_i_m5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_m7, ch_mag_int, a_i_m5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_p7, ch_mag_int, a_i_m7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_p5, ch_mag_int, a_i_m7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_p3, ch_mag_int, a_i_m7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_p1, ch_mag_int, a_i_m7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_m1, ch_mag_int, a_i_m7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_m3, ch_mag_int, a_i_m7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_m5, ch_mag_int, a_i_m7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_m7, ch_mag_int, a_i_m7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+
+    // Calculation of a group of two terms in the bit metric involving product of psi and interference
+    prodsum_psi_a_epi16(psi_r_p7_p7, a_r_p7_p7, psi_i_p7_p7, a_i_p7_p7, psi_a_p7_p7);
+    prodsum_psi_a_epi16(psi_r_p7_p5, a_r_p7_p5, psi_i_p7_p5, a_i_p7_p5, psi_a_p7_p5);
+    prodsum_psi_a_epi16(psi_r_p7_p3, a_r_p7_p3, psi_i_p7_p3, a_i_p7_p3, psi_a_p7_p3);
+    prodsum_psi_a_epi16(psi_r_p7_p1, a_r_p7_p1, psi_i_p7_p1, a_i_p7_p1, psi_a_p7_p1);
+    prodsum_psi_a_epi16(psi_r_p7_m1, a_r_p7_m1, psi_i_p7_m1, a_i_p7_m1, psi_a_p7_m1);
+    prodsum_psi_a_epi16(psi_r_p7_m3, a_r_p7_m3, psi_i_p7_m3, a_i_p7_m3, psi_a_p7_m3);
+    prodsum_psi_a_epi16(psi_r_p7_m5, a_r_p7_m5, psi_i_p7_m5, a_i_p7_m5, psi_a_p7_m5);
+    prodsum_psi_a_epi16(psi_r_p7_m7, a_r_p7_m7, psi_i_p7_m7, a_i_p7_m7, psi_a_p7_m7);
+    prodsum_psi_a_epi16(psi_r_p5_p7, a_r_p5_p7, psi_i_p5_p7, a_i_p5_p7, psi_a_p5_p7);
+    prodsum_psi_a_epi16(psi_r_p5_p5, a_r_p5_p5, psi_i_p5_p5, a_i_p5_p5, psi_a_p5_p5);
+    prodsum_psi_a_epi16(psi_r_p5_p3, a_r_p5_p3, psi_i_p5_p3, a_i_p5_p3, psi_a_p5_p3);
+    prodsum_psi_a_epi16(psi_r_p5_p1, a_r_p5_p1, psi_i_p5_p1, a_i_p5_p1, psi_a_p5_p1);
+    prodsum_psi_a_epi16(psi_r_p5_m1, a_r_p5_m1, psi_i_p5_m1, a_i_p5_m1, psi_a_p5_m1);
+    prodsum_psi_a_epi16(psi_r_p5_m3, a_r_p5_m3, psi_i_p5_m3, a_i_p5_m3, psi_a_p5_m3);
+    prodsum_psi_a_epi16(psi_r_p5_m5, a_r_p5_m5, psi_i_p5_m5, a_i_p5_m5, psi_a_p5_m5);
+    prodsum_psi_a_epi16(psi_r_p5_m7, a_r_p5_m7, psi_i_p5_m7, a_i_p5_m7, psi_a_p5_m7);
+    prodsum_psi_a_epi16(psi_r_p3_p7, a_r_p3_p7, psi_i_p3_p7, a_i_p3_p7, psi_a_p3_p7);
+    prodsum_psi_a_epi16(psi_r_p3_p5, a_r_p3_p5, psi_i_p3_p5, a_i_p3_p5, psi_a_p3_p5);
+    prodsum_psi_a_epi16(psi_r_p3_p3, a_r_p3_p3, psi_i_p3_p3, a_i_p3_p3, psi_a_p3_p3);
+    prodsum_psi_a_epi16(psi_r_p3_p1, a_r_p3_p1, psi_i_p3_p1, a_i_p3_p1, psi_a_p3_p1);
+    prodsum_psi_a_epi16(psi_r_p3_m1, a_r_p3_m1, psi_i_p3_m1, a_i_p3_m1, psi_a_p3_m1);
+    prodsum_psi_a_epi16(psi_r_p3_m3, a_r_p3_m3, psi_i_p3_m3, a_i_p3_m3, psi_a_p3_m3);
+    prodsum_psi_a_epi16(psi_r_p3_m5, a_r_p3_m5, psi_i_p3_m5, a_i_p3_m5, psi_a_p3_m5);
+    prodsum_psi_a_epi16(psi_r_p3_m7, a_r_p3_m7, psi_i_p3_m7, a_i_p3_m7, psi_a_p3_m7);
+    prodsum_psi_a_epi16(psi_r_p1_p7, a_r_p1_p7, psi_i_p1_p7, a_i_p1_p7, psi_a_p1_p7);
+    prodsum_psi_a_epi16(psi_r_p1_p5, a_r_p1_p5, psi_i_p1_p5, a_i_p1_p5, psi_a_p1_p5);
+    prodsum_psi_a_epi16(psi_r_p1_p3, a_r_p1_p3, psi_i_p1_p3, a_i_p1_p3, psi_a_p1_p3);
+    prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1);
+    prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1);
+    prodsum_psi_a_epi16(psi_r_p1_m3, a_r_p1_m3, psi_i_p1_m3, a_i_p1_m3, psi_a_p1_m3);
+    prodsum_psi_a_epi16(psi_r_p1_m5, a_r_p1_m5, psi_i_p1_m5, a_i_p1_m5, psi_a_p1_m5);
+    prodsum_psi_a_epi16(psi_r_p1_m7, a_r_p1_m7, psi_i_p1_m7, a_i_p1_m7, psi_a_p1_m7);
+    prodsum_psi_a_epi16(psi_r_m1_p7, a_r_m1_p7, psi_i_m1_p7, a_i_m1_p7, psi_a_m1_p7);
+    prodsum_psi_a_epi16(psi_r_m1_p5, a_r_m1_p5, psi_i_m1_p5, a_i_m1_p5, psi_a_m1_p5);
+    prodsum_psi_a_epi16(psi_r_m1_p3, a_r_m1_p3, psi_i_m1_p3, a_i_m1_p3, psi_a_m1_p3);
+    prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1);
+    prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1);
+    prodsum_psi_a_epi16(psi_r_m1_m3, a_r_m1_m3, psi_i_m1_m3, a_i_m1_m3, psi_a_m1_m3);
+    prodsum_psi_a_epi16(psi_r_m1_m5, a_r_m1_m5, psi_i_m1_m5, a_i_m1_m5, psi_a_m1_m5);
+    prodsum_psi_a_epi16(psi_r_m1_m7, a_r_m1_m7, psi_i_m1_m7, a_i_m1_m7, psi_a_m1_m7);
+    prodsum_psi_a_epi16(psi_r_m3_p7, a_r_m3_p7, psi_i_m3_p7, a_i_m3_p7, psi_a_m3_p7);
+    prodsum_psi_a_epi16(psi_r_m3_p5, a_r_m3_p5, psi_i_m3_p5, a_i_m3_p5, psi_a_m3_p5);
+    prodsum_psi_a_epi16(psi_r_m3_p3, a_r_m3_p3, psi_i_m3_p3, a_i_m3_p3, psi_a_m3_p3);
+    prodsum_psi_a_epi16(psi_r_m3_p1, a_r_m3_p1, psi_i_m3_p1, a_i_m3_p1, psi_a_m3_p1);
+    prodsum_psi_a_epi16(psi_r_m3_m1, a_r_m3_m1, psi_i_m3_m1, a_i_m3_m1, psi_a_m3_m1);
+    prodsum_psi_a_epi16(psi_r_m3_m3, a_r_m3_m3, psi_i_m3_m3, a_i_m3_m3, psi_a_m3_m3);
+    prodsum_psi_a_epi16(psi_r_m3_m5, a_r_m3_m5, psi_i_m3_m5, a_i_m3_m5, psi_a_m3_m5);
+    prodsum_psi_a_epi16(psi_r_m3_m7, a_r_m3_m7, psi_i_m3_m7, a_i_m3_m7, psi_a_m3_m7);
+    prodsum_psi_a_epi16(psi_r_m5_p7, a_r_m5_p7, psi_i_m5_p7, a_i_m5_p7, psi_a_m5_p7);
+    prodsum_psi_a_epi16(psi_r_m5_p5, a_r_m5_p5, psi_i_m5_p5, a_i_m5_p5, psi_a_m5_p5);
+    prodsum_psi_a_epi16(psi_r_m5_p3, a_r_m5_p3, psi_i_m5_p3, a_i_m5_p3, psi_a_m5_p3);
+    prodsum_psi_a_epi16(psi_r_m5_p1, a_r_m5_p1, psi_i_m5_p1, a_i_m5_p1, psi_a_m5_p1);
+    prodsum_psi_a_epi16(psi_r_m5_m1, a_r_m5_m1, psi_i_m5_m1, a_i_m5_m1, psi_a_m5_m1);
+    prodsum_psi_a_epi16(psi_r_m5_m3, a_r_m5_m3, psi_i_m5_m3, a_i_m5_m3, psi_a_m5_m3);
+    prodsum_psi_a_epi16(psi_r_m5_m5, a_r_m5_m5, psi_i_m5_m5, a_i_m5_m5, psi_a_m5_m5);
+    prodsum_psi_a_epi16(psi_r_m5_m7, a_r_m5_m7, psi_i_m5_m7, a_i_m5_m7, psi_a_m5_m7);
+    prodsum_psi_a_epi16(psi_r_m7_p7, a_r_m7_p7, psi_i_m7_p7, a_i_m7_p7, psi_a_m7_p7);
+    prodsum_psi_a_epi16(psi_r_m7_p5, a_r_m7_p5, psi_i_m7_p5, a_i_m7_p5, psi_a_m7_p5);
+    prodsum_psi_a_epi16(psi_r_m7_p3, a_r_m7_p3, psi_i_m7_p3, a_i_m7_p3, psi_a_m7_p3);
+    prodsum_psi_a_epi16(psi_r_m7_p1, a_r_m7_p1, psi_i_m7_p1, a_i_m7_p1, psi_a_m7_p1);
+    prodsum_psi_a_epi16(psi_r_m7_m1, a_r_m7_m1, psi_i_m7_m1, a_i_m7_m1, psi_a_m7_m1);
+    prodsum_psi_a_epi16(psi_r_m7_m3, a_r_m7_m3, psi_i_m7_m3, a_i_m7_m3, psi_a_m7_m3);
+    prodsum_psi_a_epi16(psi_r_m7_m5, a_r_m7_m5, psi_i_m7_m5, a_i_m7_m5, psi_a_m7_m5);
+    prodsum_psi_a_epi16(psi_r_m7_m7, a_r_m7_m7, psi_i_m7_m7, a_i_m7_m7, psi_a_m7_m7);
+
+    // Calculation of a group of two terms in the bit metric involving squares of interference
+    square_a_epi16(a_r_p7_p7, a_i_p7_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p7);
+    square_a_epi16(a_r_p7_p5, a_i_p7_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p5);
+    square_a_epi16(a_r_p7_p3, a_i_p7_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p3);
+    square_a_epi16(a_r_p7_p1, a_i_p7_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p1);
+    square_a_epi16(a_r_p7_m1, a_i_p7_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m1);
+    square_a_epi16(a_r_p7_m3, a_i_p7_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m3);
+    square_a_epi16(a_r_p7_m5, a_i_p7_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m5);
+    square_a_epi16(a_r_p7_m7, a_i_p7_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m7);
+    square_a_epi16(a_r_p5_p7, a_i_p5_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p7);
+    square_a_epi16(a_r_p5_p5, a_i_p5_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p5);
+    square_a_epi16(a_r_p5_p3, a_i_p5_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p3);
+    square_a_epi16(a_r_p5_p1, a_i_p5_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p1);
+    square_a_epi16(a_r_p5_m1, a_i_p5_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m1);
+    square_a_epi16(a_r_p5_m3, a_i_p5_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m3);
+    square_a_epi16(a_r_p5_m5, a_i_p5_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m5);
+    square_a_epi16(a_r_p5_m7, a_i_p5_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m7);
+    square_a_epi16(a_r_p3_p7, a_i_p3_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p7);
+    square_a_epi16(a_r_p3_p5, a_i_p3_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p5);
+    square_a_epi16(a_r_p3_p3, a_i_p3_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p3);
+    square_a_epi16(a_r_p3_p1, a_i_p3_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p1);
+    square_a_epi16(a_r_p3_m1, a_i_p3_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m1);
+    square_a_epi16(a_r_p3_m3, a_i_p3_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m3);
+    square_a_epi16(a_r_p3_m5, a_i_p3_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m5);
+    square_a_epi16(a_r_p3_m7, a_i_p3_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m7);
+    square_a_epi16(a_r_p1_p7, a_i_p1_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p7);
+    square_a_epi16(a_r_p1_p5, a_i_p1_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p5);
+    square_a_epi16(a_r_p1_p3, a_i_p1_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p3);
+    square_a_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p1);
+    square_a_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m1);
+    square_a_epi16(a_r_p1_m3, a_i_p1_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m3);
+    square_a_epi16(a_r_p1_m5, a_i_p1_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m5);
+    square_a_epi16(a_r_p1_m7, a_i_p1_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m7);
+    square_a_epi16(a_r_m1_p7, a_i_m1_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p7);
+    square_a_epi16(a_r_m1_p5, a_i_m1_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p5);
+    square_a_epi16(a_r_m1_p3, a_i_m1_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p3);
+    square_a_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p1);
+    square_a_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m1);
+    square_a_epi16(a_r_m1_m3, a_i_m1_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m3);
+    square_a_epi16(a_r_m1_m5, a_i_m1_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m5);
+    square_a_epi16(a_r_m1_m7, a_i_m1_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m7);
+    square_a_epi16(a_r_m3_p7, a_i_m3_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p7);
+    square_a_epi16(a_r_m3_p5, a_i_m3_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p5);
+    square_a_epi16(a_r_m3_p3, a_i_m3_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p3);
+    square_a_epi16(a_r_m3_p1, a_i_m3_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p1);
+    square_a_epi16(a_r_m3_m1, a_i_m3_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m1);
+    square_a_epi16(a_r_m3_m3, a_i_m3_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m3);
+    square_a_epi16(a_r_m3_m5, a_i_m3_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m5);
+    square_a_epi16(a_r_m3_m7, a_i_m3_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m7);
+    square_a_epi16(a_r_m5_p7, a_i_m5_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p7);
+    square_a_epi16(a_r_m5_p5, a_i_m5_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p5);
+    square_a_epi16(a_r_m5_p3, a_i_m5_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p3);
+    square_a_epi16(a_r_m5_p1, a_i_m5_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p1);
+    square_a_epi16(a_r_m5_m1, a_i_m5_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m1);
+    square_a_epi16(a_r_m5_m3, a_i_m5_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m3);
+    square_a_epi16(a_r_m5_m5, a_i_m5_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m5);
+    square_a_epi16(a_r_m5_m7, a_i_m5_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m7);
+    square_a_epi16(a_r_m7_p7, a_i_m7_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p7);
+    square_a_epi16(a_r_m7_p5, a_i_m7_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p5);
+    square_a_epi16(a_r_m7_p3, a_i_m7_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p3);
+    square_a_epi16(a_r_m7_p1, a_i_m7_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p1);
+    square_a_epi16(a_r_m7_m1, a_i_m7_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m1);
+    square_a_epi16(a_r_m7_m3, a_i_m7_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m3);
+    square_a_epi16(a_r_m7_m5, a_i_m7_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m5);
+    square_a_epi16(a_r_m7_m7, a_i_m7_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m7);
+
+    // Computing different multiples of ||h0||^2
+    // x=1, y=1
+    ch_mag_2_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42);
+    ch_mag_2_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_2_over_42_with_sigma2,1);
+    // x=1, y=3
+    ch_mag_10_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42);
+    ch_mag_10_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_10_over_42_with_sigma2,1);
+    // x=1, x=5
+    ch_mag_26_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42);
+    ch_mag_26_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_26_over_42_with_sigma2,1);
+    // x=1, y=7
+    ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
+    ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
+    // x=3, y=3
+    ch_mag_18_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42);
+    ch_mag_18_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_18_over_42_with_sigma2,1);
+    // x=3, y=5
+    ch_mag_34_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42);
+    ch_mag_34_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_34_over_42_with_sigma2,1);
+    // x=3, y=7
+    ch_mag_58_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42);
+    ch_mag_58_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_58_over_42_with_sigma2,2);
+    // x=5, y=5
+    ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
+    ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
+    // x=5, y=7
+    ch_mag_74_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42);
+    ch_mag_74_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_74_over_42_with_sigma2,2);
+    // x=7, y=7
+    ch_mag_98_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42);
+    ch_mag_98_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_98_over_42_with_sigma2,2);
+
+    // Computing Metrics
+    xmm0 = _mm_subs_epi16(psi_a_p7_p7, a_sq_p7_p7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_7_7);
+    bit_met_p7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p7_p5, a_sq_p7_p5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_7_5);
+    bit_met_p7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p7_p3, a_sq_p7_p3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_7_3);
+    bit_met_p7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p7_p1, a_sq_p7_p1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_7_1);
+    bit_met_p7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p7_m1, a_sq_p7_m1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_7_1);
+    bit_met_p7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p7_m3, a_sq_p7_m3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_7_3);
+    bit_met_p7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p7_m5, a_sq_p7_m5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_7_5);
+    bit_met_p7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p7_m7, a_sq_p7_m7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_7_7);
+    bit_met_p7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_p7, a_sq_p5_p7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_5_7);
+    bit_met_p5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_p5, a_sq_p5_p5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_5_5);
+    bit_met_p5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_p3, a_sq_p5_p3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_5_3);
+    bit_met_p5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_p1, a_sq_p5_p1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_5_1);
+    bit_met_p5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_m1, a_sq_p5_m1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_5_1);
+    bit_met_p5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_m3, a_sq_p5_m3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_5_3);
+    bit_met_p5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_m5, a_sq_p5_m5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_5_5);
+    bit_met_p5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_m7, a_sq_p5_m7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_5_7);
+    bit_met_p5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_p7, a_sq_p3_p7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_3_7);
+    bit_met_p3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_p5, a_sq_p3_p5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_3_5);
+    bit_met_p3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_p3, a_sq_p3_p3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_3_3);
+    bit_met_p3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_p1, a_sq_p3_p1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_3_1);
+    bit_met_p3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_m1, a_sq_p3_m1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_3_1);
+    bit_met_p3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_m3, a_sq_p3_m3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_3_3);
+    bit_met_p3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_m5, a_sq_p3_m5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_3_5);
+    bit_met_p3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_m7, a_sq_p3_m7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_3_7);
+    bit_met_p3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_p7, a_sq_p1_p7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_1_7);
+    bit_met_p1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_p5, a_sq_p1_p5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_1_5);
+    bit_met_p1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_p3, a_sq_p1_p3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_1_3);
+    bit_met_p1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_p1, a_sq_p1_p1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_1_1);
+    bit_met_p1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_m1, a_sq_p1_m1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_1_1);
+    bit_met_p1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_m3, a_sq_p1_m3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_1_3);
+    bit_met_p1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_m5, a_sq_p1_m5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_1_5);
+    bit_met_p1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_m7, a_sq_p1_m7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_1_7);
+    bit_met_p1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+
+    xmm0 = _mm_subs_epi16(psi_a_m1_p7, a_sq_m1_p7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_1_7);
+    bit_met_m1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m1_p5, a_sq_m1_p5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_1_5);
+    bit_met_m1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m1_p3, a_sq_m1_p3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_1_3);
+    bit_met_m1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m1_p1, a_sq_m1_p1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_1_1);
+    bit_met_m1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m1_m1, a_sq_m1_m1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_1_1);
+    bit_met_m1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m1_m3, a_sq_m1_m3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_1_3);
+    bit_met_m1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m1_m5, a_sq_m1_m5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_1_5);
+    bit_met_m1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m1_m7, a_sq_m1_m7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_1_7);
+    bit_met_m1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_p7, a_sq_m3_p7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_3_7);
+    bit_met_m3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_p5, a_sq_m3_p5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_3_5);
+    bit_met_m3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_p3, a_sq_m3_p3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_3_3);
+    bit_met_m3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_p1, a_sq_m3_p1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_3_1);
+    bit_met_m3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_m1, a_sq_m3_m1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_3_1);
+    bit_met_m3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_m3, a_sq_m3_m3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_3_3);
+    bit_met_m3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_m5, a_sq_m3_m5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_3_5);
+    bit_met_m3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_m7, a_sq_m3_m7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_3_7);
+    bit_met_m3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_p7, a_sq_m5_p7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_5_7);
+    bit_met_m5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_p5, a_sq_m5_p5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_5_5);
+    bit_met_m5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_p3, a_sq_m5_p3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_5_3);
+    bit_met_m5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_p1, a_sq_m5_p1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_5_1);
+    bit_met_m5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_m1, a_sq_m5_m1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_5_1);
+    bit_met_m5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_m3, a_sq_m5_m3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_5_3);
+    bit_met_m5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_m5, a_sq_m5_m5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_5_5);
+    bit_met_m5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_m7, a_sq_m5_m7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_5_7);
+    bit_met_m5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_p7, a_sq_m7_p7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_7_7);
+    bit_met_m7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_p5, a_sq_m7_p5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_7_5);
+    bit_met_m7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_p3, a_sq_m7_p3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_7_3);
+    bit_met_m7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_p1, a_sq_m7_p1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_7_1);
+    bit_met_m7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_m1, a_sq_m7_m1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_7_1);
+    bit_met_m7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_m3, a_sq_m7_m3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_7_3);
+    bit_met_m7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_m5, a_sq_m7_m5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_7_5);
+    bit_met_m7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_m7, a_sq_m7_m7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_7_7);
+    bit_met_m7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+
+    // Detection for 1st bit (LTE mapping)
+    // bit = 1
+    xmm0 = _mm_max_epi16(bit_met_m7_p7, bit_met_m7_p5);
+    xmm1 = _mm_max_epi16(bit_met_m7_p3, bit_met_m7_p1);
+    xmm2 = _mm_max_epi16(bit_met_m7_m1, bit_met_m7_m3);
+    xmm3 = _mm_max_epi16(bit_met_m7_m5, bit_met_m7_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m5_p7, bit_met_m5_p5);
+    xmm1 = _mm_max_epi16(bit_met_m5_p3, bit_met_m5_p1);
+    xmm2 = _mm_max_epi16(bit_met_m5_m1, bit_met_m5_m3);
+    xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m5_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m3_p7, bit_met_m3_p5);
+    xmm1 = _mm_max_epi16(bit_met_m3_p3, bit_met_m3_p1);
+    xmm2 = _mm_max_epi16(bit_met_m3_m1, bit_met_m3_m3);
+    xmm3 = _mm_max_epi16(bit_met_m3_m5, bit_met_m3_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m1_p7, bit_met_m1_p5);
+    xmm1 = _mm_max_epi16(bit_met_m1_p3, bit_met_m1_p1);
+    xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m1_m3);
+    xmm3 = _mm_max_epi16(bit_met_m1_m5, bit_met_m1_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    // bit = 0
+    xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p7_p5);
+    xmm1 = _mm_max_epi16(bit_met_p7_p3, bit_met_p7_p1);
+    xmm2 = _mm_max_epi16(bit_met_p7_m1, bit_met_p7_m3);
+    xmm3 = _mm_max_epi16(bit_met_p7_m5, bit_met_p7_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p5_p7, bit_met_p5_p5);
+    xmm1 = _mm_max_epi16(bit_met_p5_p3, bit_met_p5_p1);
+    xmm2 = _mm_max_epi16(bit_met_p5_m1, bit_met_p5_m3);
+    xmm3 = _mm_max_epi16(bit_met_p5_m5, bit_met_p5_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p3_p7, bit_met_p3_p5);
+    xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p3_p1);
+    xmm2 = _mm_max_epi16(bit_met_p3_m1, bit_met_p3_m3);
+    xmm3 = _mm_max_epi16(bit_met_p3_m5, bit_met_p3_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p1_p7, bit_met_p1_p5);
+    xmm1 = _mm_max_epi16(bit_met_p1_p3, bit_met_p1_p1);
+    xmm2 = _mm_max_epi16(bit_met_p1_m1, bit_met_p1_m3);
+    xmm3 = _mm_max_epi16(bit_met_p1_m5, bit_met_p1_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y0r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 2nd bit (LTE mapping)
+    // bit = 1
+    xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    // bit = 0
+    xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y1r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 3rd bit (LTE mapping)
+    xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
+    xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
+    xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
+    xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
+    xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
+    xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
+    xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
+    xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
+    xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
+    xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
+    xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
+    xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
+    xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
+    xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
+    xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
+    xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
+    xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
+    xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
+    xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
+    xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
+    xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
+    xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
+    xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y2r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 4th bit (LTE mapping)
+    xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y0i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+
+    // Detection for 5th bit (LTE mapping)
+    xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
+    xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
+    xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
+    xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
+    xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
+    xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
+    xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
+    xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
+    xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
+    xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
+    xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
+    xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
+    xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
+    xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
+    xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
+    xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
+    xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
+    xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
+    xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
+    xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
+    xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
+    xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
+    xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y1i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 6th bit (LTE mapping)
+    xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y2i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+
+    // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs
+    // RE 1
+    j = 24*i;
+    stream0_out[j + 0] = ((short *)&y0r)[0];
+    stream0_out[j + 1] = ((short *)&y1r)[0];
+    stream0_out[j + 2] = ((short *)&y2r)[0];
+    stream0_out[j + 3] = ((short *)&y0i)[0];
+    stream0_out[j + 4] = ((short *)&y1i)[0];
+    stream0_out[j + 5] = ((short *)&y2i)[0];
+    // RE 2
+    stream0_out[j + 6] = ((short *)&y0r)[1];
+    stream0_out[j + 7] = ((short *)&y1r)[1];
+    stream0_out[j + 8] = ((short *)&y2r)[1];
+    stream0_out[j + 9] = ((short *)&y0i)[1];
+    stream0_out[j + 10] = ((short *)&y1i)[1];
+    stream0_out[j + 11] = ((short *)&y2i)[1];
+    // RE 3
+    stream0_out[j + 12] = ((short *)&y0r)[2];
+    stream0_out[j + 13] = ((short *)&y1r)[2];
+    stream0_out[j + 14] = ((short *)&y2r)[2];
+    stream0_out[j + 15] = ((short *)&y0i)[2];
+    stream0_out[j + 16] = ((short *)&y1i)[2];
+    stream0_out[j + 17] = ((short *)&y2i)[2];
+    // RE 4
+    stream0_out[j + 18] = ((short *)&y0r)[3];
+    stream0_out[j + 19] = ((short *)&y1r)[3];
+    stream0_out[j + 20] = ((short *)&y2r)[3];
+    stream0_out[j + 21] = ((short *)&y0i)[3];
+    stream0_out[j + 22] = ((short *)&y1i)[3];
+    stream0_out[j + 23] = ((short *)&y2i)[3];
+    // RE 5
+    stream0_out[j + 24] = ((short *)&y0r)[4];
+    stream0_out[j + 25] = ((short *)&y1r)[4];
+    stream0_out[j + 26] = ((short *)&y2r)[4];
+    stream0_out[j + 27] = ((short *)&y0i)[4];
+    stream0_out[j + 28] = ((short *)&y1i)[4];
+    stream0_out[j + 29] = ((short *)&y2i)[4];
+    // RE 6
+    stream0_out[j + 30] = ((short *)&y0r)[5];
+    stream0_out[j + 31] = ((short *)&y1r)[5];
+    stream0_out[j + 32] = ((short *)&y2r)[5];
+    stream0_out[j + 33] = ((short *)&y0i)[5];
+    stream0_out[j + 34] = ((short *)&y1i)[5];
+    stream0_out[j + 35] = ((short *)&y2i)[5];
+    // RE 7
+    stream0_out[j + 36] = ((short *)&y0r)[6];
+    stream0_out[j + 37] = ((short *)&y1r)[6];
+    stream0_out[j + 38] = ((short *)&y2r)[6];
+    stream0_out[j + 39] = ((short *)&y0i)[6];
+    stream0_out[j + 40] = ((short *)&y1i)[6];
+    stream0_out[j + 41] = ((short *)&y2i)[6];
+    // RE 8
+    stream0_out[j + 42] = ((short *)&y0r)[7];
+    stream0_out[j + 43] = ((short *)&y1r)[7];
+    stream0_out[j + 44] = ((short *)&y2r)[7];
+    stream0_out[j + 45] = ((short *)&y0i)[7];
+    stream0_out[j + 46] = ((short *)&y1i)[7];
+    stream0_out[j + 47] = ((short *)&y2i)[7];
+
+#elif defined(__arm__)
+
+#endif
+  }
+
+#if defined(__x86_64__) || defined(__i386__)
+  _mm_empty();
+  _m_empty();
+#endif
+
+}
+
+
+int dlsch_64qam_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
+                          int32_t **rxdataF_comp,
+                          int32_t **rxdataF_comp_i,
+                          int32_t **dl_ch_mag,
+                          int32_t **dl_ch_mag_i,
+                          int32_t **rho_i,
+                          int16_t *dlsch_llr,
+                          uint8_t symbol,
+                          uint8_t first_symbol_flag,
+                          uint16_t nb_rb,
+                          uint16_t pbch_pss_sss_adjust,
+                          int16_t **llr16p)
+{
+
+  int16_t *rxF      = (int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rxF_i    = (int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *ch_mag   = (int16_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *ch_mag_i = (int16_t*)&dl_ch_mag_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rho      = (int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *llr16;
+  int len;
+  uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol;
+
+  //first symbol has different structure due to more pilots
+  if (first_symbol_flag == 1) {
+    llr16 = (int16_t*)dlsch_llr;
+  } else {
+    llr16 = (int16_t*)(*llr16p);
+  }
+
+  AssertFatal(llr16!=NULL,"dlsch_16qam_64qam_llr:llr is null, symbol %d\n",symbol);
+
+  if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) {
+    // if symbol has pilots
+    if (frame_parms->nb_antenna_ports_eNB!=1)
+      // in 2 antenna ports we have 8 REs per symbol per RB
+      len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+    else
+      // for 1 antenna port we have 10 REs per symbol per RB
+      len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6);
+  } else {
+    // symbol has no pilots
+    len = (nb_rb*12) - pbch_pss_sss_adjust;
+  }
+
+  qam64_qam16((short *)rxF,
+              (short *)rxF_i,
+              (short *)ch_mag,
+              (short *)ch_mag_i,
+              (short *)llr16,
+              (short *)rho,
+              len);
+
+  llr16 += (6*len);
+  *llr16p = (short *)llr16;
+  return(0);
+}
+
+void qam64_qam64(short *stream0_in,
+                 short *stream1_in,
+                 short *ch_mag,
+                 short *ch_mag_i,
+                 short *stream0_out,
+                 short *rho01,
+                 int length
+     )
+{
+
+  /*
+    Author: S. Wagner
+    Date: 31-07-12
+
+    Input:
+    stream0_in:  MF filter for 1st stream, i.e., y0=h0'*y
+    stream1_in:  MF filter for 2nd stream, i.e., y1=h1'*y
+    ch_mag:      4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    ch_mag_i:    4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    rho01:       Channel cross correlation, i.e., h1'*h0
+
+    Output:
+    stream0_out: output LLRs for 1st stream
+  */
+
+#if defined(__x86_64__) || defined(__i386__)
+
+  __m128i *rho01_128i      = (__m128i *)rho01;
+  __m128i *stream0_128i_in = (__m128i *)stream0_in;
+  __m128i *stream1_128i_in = (__m128i *)stream1_in;
+  __m128i *ch_mag_128i     = (__m128i *)ch_mag;
+  __m128i *ch_mag_128i_i   = (__m128i *)ch_mag_i;
+
+  __m128i ONE_OVER_SQRT_42 = _mm_set1_epi16(10112); // round(1/sqrt(42)*2^16)
+  __m128i THREE_OVER_SQRT_42 = _mm_set1_epi16(30337); // round(3/sqrt(42)*2^16)
+  __m128i FIVE_OVER_SQRT_42 = _mm_set1_epi16(25281); // round(5/sqrt(42)*2^15)
+  __m128i SEVEN_OVER_SQRT_42 = _mm_set1_epi16(17697); // round(7/sqrt(42)*2^14) Q2.14
+  __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15)
+  __m128i ONE_OVER_SQRT_2_42 = _mm_set1_epi16(3575); // round(1/sqrt(2*42)*2^15)
+  __m128i THREE_OVER_SQRT_2_42 = _mm_set1_epi16(10726); // round(3/sqrt(2*42)*2^15)
+  __m128i FIVE_OVER_SQRT_2_42 = _mm_set1_epi16(17876); // round(5/sqrt(2*42)*2^15)
+  __m128i SEVEN_OVER_SQRT_2_42 = _mm_set1_epi16(25027); // round(7/sqrt(2*42)*2^15)
+  __m128i FORTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(30969); // round(49/(4*sqrt(42))*2^14), Q2.14
+  __m128i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(23385); // round(37/(4*sqrt(42))*2^14), Q2.14
+  __m128i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(31601); // round(25/(4*sqrt(42))*2^15)
+  __m128i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(18329); // round(29/(4*sqrt(42))*2^15), Q2.14
+  __m128i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(21489); // round(17/(4*sqrt(42))*2^15)
+  __m128i NINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(11376); // round(9/(4*sqrt(42))*2^15)
+  __m128i THIRTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(16433); // round(13/(4*sqrt(42))*2^15)
+  __m128i FIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(6320); // round(5/(4*sqrt(42))*2^15)
+  __m128i ONE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(1264); // round(1/(4*sqrt(42))*2^15)
+  __m128i SQRT_42_OVER_FOUR = _mm_set1_epi16(13272); // round(sqrt(42)/4*2^13), Q3.12
+
+  __m128i ch_mag_des;
+  __m128i ch_mag_int;
+  __m128i ch_mag_98_over_42_with_sigma2;
+  __m128i ch_mag_74_over_42_with_sigma2;
+  __m128i ch_mag_58_over_42_with_sigma2;
+  __m128i ch_mag_50_over_42_with_sigma2;
+  __m128i ch_mag_34_over_42_with_sigma2;
+  __m128i ch_mag_18_over_42_with_sigma2;
+  __m128i ch_mag_26_over_42_with_sigma2;
+  __m128i ch_mag_10_over_42_with_sigma2;
+  __m128i ch_mag_2_over_42_with_sigma2;
+  __m128i  y0r_one_over_sqrt_21;
+  __m128i  y0r_three_over_sqrt_21;
+  __m128i  y0r_five_over_sqrt_21;
+  __m128i  y0r_seven_over_sqrt_21;
+  __m128i  y0i_one_over_sqrt_21;
+  __m128i  y0i_three_over_sqrt_21;
+  __m128i  y0i_five_over_sqrt_21;
+  __m128i  y0i_seven_over_sqrt_21;
+  __m128i ch_mag_int_with_sigma2;
+  __m128i two_ch_mag_int_with_sigma2;
+  __m128i three_ch_mag_int_with_sigma2;
+#elif defined(__arm__)
+
+#endif
+
+  int i,j;
+
+
+  for (i=0; i<length>>2; i+=2) {
+
+#if defined(__x86_64__) || defined(__i386__)
+
+    // Get rho
+    xmm0 = rho01_128i[i];
+    xmm1 = rho01_128i[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho)
+    xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho)
+    rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho)
+    rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho)
+
+    // Compute the different rhos
+    rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42);
+    rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42);
+    rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42);
+    rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42);
+    rho_rpi_5_5 = _mm_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42);
+    rho_rmi_5_5 = _mm_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42);
+    rho_rpi_7_7 = _mm_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42);
+    rho_rmi_7_7 = _mm_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42);
+
+    rho_rpi_5_5 = _mm_slli_epi16(rho_rpi_5_5, 1);
+    rho_rmi_5_5 = _mm_slli_epi16(rho_rmi_5_5, 1);
+    rho_rpi_7_7 = _mm_slli_epi16(rho_rpi_7_7, 2);
+    rho_rmi_7_7 = _mm_slli_epi16(rho_rmi_7_7, 2);
+
+    xmm4 = _mm_mulhi_epi16(xmm2, ONE_OVER_SQRT_42);
+    xmm5 = _mm_mulhi_epi16(xmm3, ONE_OVER_SQRT_42);
+    xmm6 = _mm_mulhi_epi16(xmm3, THREE_OVER_SQRT_42);
+    xmm7 = _mm_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42);
+    xmm8 = _mm_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42);
+    xmm7 = _mm_slli_epi16(xmm7, 1);
+    xmm8 = _mm_slli_epi16(xmm8, 2);
+
+    rho_rpi_1_3 = _mm_adds_epi16(xmm4, xmm6);
+    rho_rmi_1_3 = _mm_subs_epi16(xmm4, xmm6);
+    rho_rpi_1_5 = _mm_adds_epi16(xmm4, xmm7);
+    rho_rmi_1_5 = _mm_subs_epi16(xmm4, xmm7);
+    rho_rpi_1_7 = _mm_adds_epi16(xmm4, xmm8);
+    rho_rmi_1_7 = _mm_subs_epi16(xmm4, xmm8);
+
+    xmm4 = _mm_mulhi_epi16(xmm2, THREE_OVER_SQRT_42);
+    rho_rpi_3_1 = _mm_adds_epi16(xmm4, xmm5);
+    rho_rmi_3_1 = _mm_subs_epi16(xmm4, xmm5);
+    rho_rpi_3_5 = _mm_adds_epi16(xmm4, xmm7);
+    rho_rmi_3_5 = _mm_subs_epi16(xmm4, xmm7);
+    rho_rpi_3_7 = _mm_adds_epi16(xmm4, xmm8);
+    rho_rmi_3_7 = _mm_subs_epi16(xmm4, xmm8);
+
+    xmm4 = _mm_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42);
+    xmm4 = _mm_slli_epi16(xmm4, 1);
+    rho_rpi_5_1 = _mm_adds_epi16(xmm4, xmm5);
+    rho_rmi_5_1 = _mm_subs_epi16(xmm4, xmm5);
+    rho_rpi_5_3 = _mm_adds_epi16(xmm4, xmm6);
+    rho_rmi_5_3 = _mm_subs_epi16(xmm4, xmm6);
+    rho_rpi_5_7 = _mm_adds_epi16(xmm4, xmm8);
+    rho_rmi_5_7 = _mm_subs_epi16(xmm4, xmm8);
+
+    xmm4 = _mm_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42);
+    xmm4 = _mm_slli_epi16(xmm4, 2);
+    rho_rpi_7_1 = _mm_adds_epi16(xmm4, xmm5);
+    rho_rmi_7_1 = _mm_subs_epi16(xmm4, xmm5);
+    rho_rpi_7_3 = _mm_adds_epi16(xmm4, xmm6);
+    rho_rmi_7_3 = _mm_subs_epi16(xmm4, xmm6);
+    rho_rpi_7_5 = _mm_adds_epi16(xmm4, xmm7);
+    rho_rmi_7_5 = _mm_subs_epi16(xmm4, xmm7);
+
+    // Rearrange interfering MF output
+    xmm0 = stream1_128i_in[i];
+    xmm1 = stream1_128i_in[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
+    y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
+
+    // Psi_r calculation from rho_rpi or rho_rmi
+    xmm0 = _mm_setzero_si128(); // ZERO for abs_pi16
+    xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1r);
+    psi_r_p7_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1r);
+    psi_r_p7_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1r);
+    psi_r_p7_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1r);
+    psi_r_p7_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1r);
+    psi_r_p7_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1r);
+    psi_r_p7_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1r);
+    psi_r_p7_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1r);
+    psi_r_p7_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1r);
+    psi_r_p5_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1r);
+    psi_r_p5_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1r);
+    psi_r_p5_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1r);
+    psi_r_p5_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1r);
+    psi_r_p5_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1r);
+    psi_r_p5_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1r);
+    psi_r_p5_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1r);
+    psi_r_p5_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1r);
+    psi_r_p3_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1r);
+    psi_r_p3_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1r);
+    psi_r_p3_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1r);
+    psi_r_p3_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1r);
+    psi_r_p3_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1r);
+    psi_r_p3_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1r);
+    psi_r_p3_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1r);
+    psi_r_p3_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1r);
+    psi_r_p1_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1r);
+    psi_r_p1_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1r);
+    psi_r_p1_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1r);
+    psi_r_p1_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1r);
+    psi_r_p1_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1r);
+    psi_r_p1_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1r);
+    psi_r_p1_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1r);
+    psi_r_p1_m7 = _mm_abs_epi16(xmm2);
+
+    xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1r);
+    psi_r_m1_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1r);
+    psi_r_m1_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1r);
+    psi_r_m1_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1r);
+    psi_r_m1_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1r);
+    psi_r_m1_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1r);
+    psi_r_m1_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1r);
+    psi_r_m1_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1r);
+    psi_r_m1_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1r);
+    psi_r_m3_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1r);
+    psi_r_m3_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1r);
+    psi_r_m3_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1r);
+    psi_r_m3_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1r);
+    psi_r_m3_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1r);
+    psi_r_m3_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1r);
+    psi_r_m3_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1r);
+    psi_r_m3_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1r);
+    psi_r_m5_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1r);
+    psi_r_m5_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1r);
+    psi_r_m5_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1r);
+    psi_r_m5_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1r);
+    psi_r_m5_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1r);
+    psi_r_m5_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1r);
+    psi_r_m5_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1r);
+    psi_r_m5_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1r);
+    psi_r_m7_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1r);
+    psi_r_m7_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1r);
+    psi_r_m7_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1r);
+    psi_r_m7_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1r);
+    psi_r_m7_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1r);
+    psi_r_m7_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1r);
+    psi_r_m7_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1r);
+    psi_r_m7_m7 = _mm_abs_epi16(xmm2);
+
+    // Psi_i calculation from rho_rpi or rho_rmi
+    xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1i);
+    psi_i_p7_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1i);
+    psi_i_p7_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1i);
+    psi_i_p7_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1i);
+    psi_i_p7_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1i);
+    psi_i_p7_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1i);
+    psi_i_p7_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1i);
+    psi_i_p7_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1i);
+    psi_i_p7_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1i);
+    psi_i_p5_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1i);
+    psi_i_p5_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1i);
+    psi_i_p5_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1i);
+    psi_i_p5_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1i);
+    psi_i_p5_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1i);
+    psi_i_p5_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1i);
+    psi_i_p5_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1i);
+    psi_i_p5_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1i);
+    psi_i_p3_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1i);
+    psi_i_p3_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1i);
+    psi_i_p3_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1i);
+    psi_i_p3_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1i);
+    psi_i_p3_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1i);
+    psi_i_p3_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1i);
+    psi_i_p3_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1i);
+    psi_i_p3_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1i);
+    psi_i_p1_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1i);
+    psi_i_p1_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1i);
+    psi_i_p1_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1i);
+    psi_i_p1_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1i);
+    psi_i_p1_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1i);
+    psi_i_p1_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1i);
+    psi_i_p1_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1i);
+    psi_i_p1_m7 = _mm_abs_epi16(xmm2);
+
+    xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1i);
+    psi_i_m1_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1i);
+    psi_i_m1_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1i);
+    psi_i_m1_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1i);
+    psi_i_m1_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1i);
+    psi_i_m1_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1i);
+    psi_i_m1_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1i);
+    psi_i_m1_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1i);
+    psi_i_m1_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1i);
+    psi_i_m3_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1i);
+    psi_i_m3_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1i);
+    psi_i_m3_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1i);
+    psi_i_m3_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1i);
+    psi_i_m3_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1i);
+    psi_i_m3_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1i);
+    psi_i_m3_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1i);
+    psi_i_m3_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1i);
+    psi_i_m5_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1i);
+    psi_i_m5_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1i);
+    psi_i_m5_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1i);
+    psi_i_m5_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1i);
+    psi_i_m5_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1i);
+    psi_i_m5_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1i);
+    psi_i_m5_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1i);
+    psi_i_m5_m7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1i);
+    psi_i_m7_p7 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1i);
+    psi_i_m7_p5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1i);
+    psi_i_m7_p3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1i);
+    psi_i_m7_p1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1i);
+    psi_i_m7_m1 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1i);
+    psi_i_m7_m3 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1i);
+    psi_i_m7_m5 = _mm_abs_epi16(xmm2);
+    xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1i);
+    psi_i_m7_m7 = _mm_abs_epi16(xmm2);
+
+
+    // Rearrange desired MF output
+    xmm0 = stream0_128i_in[i];
+    xmm1 = stream0_128i_in[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)]
+    y0i = _mm_unpackhi_epi64(xmm0,xmm1);
+
+    // Rearrange desired channel magnitudes
+    xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10))
+    xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10))
+    xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3);
+
+    // Rearrange interfering channel magnitudes
+    xmm2 = ch_mag_128i_i[i];
+    xmm3 = ch_mag_128i_i[i+1];
+    xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    ch_mag_int  = _mm_unpacklo_epi64(xmm2,xmm3);
+
+    y0r_one_over_sqrt_21   = _mm_mulhi_epi16(y0r, ONE_OVER_SQRT_42);
+    y0r_three_over_sqrt_21 = _mm_mulhi_epi16(y0r, THREE_OVER_SQRT_42);
+    y0r_five_over_sqrt_21  = _mm_mulhi_epi16(y0r, FIVE_OVER_SQRT_42);
+    y0r_five_over_sqrt_21  = _mm_slli_epi16(y0r_five_over_sqrt_21, 1);
+    y0r_seven_over_sqrt_21 = _mm_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42);
+    y0r_seven_over_sqrt_21 = _mm_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14
+
+    y0i_one_over_sqrt_21   = _mm_mulhi_epi16(y0i, ONE_OVER_SQRT_42);
+    y0i_three_over_sqrt_21 = _mm_mulhi_epi16(y0i, THREE_OVER_SQRT_42);
+    y0i_five_over_sqrt_21  = _mm_mulhi_epi16(y0i, FIVE_OVER_SQRT_42);
+    y0i_five_over_sqrt_21  = _mm_slli_epi16(y0i_five_over_sqrt_21, 1);
+    y0i_seven_over_sqrt_21 = _mm_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42);
+    y0i_seven_over_sqrt_21 = _mm_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14
+
+    y0_p_7_1 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_7_3 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_7_5 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_7_7 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_5_1 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_5_3 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_5_5 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_5_7 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_3_5 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_3_7 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_1_1 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_1_3 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_1_5 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_1_7 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
+
+    y0_m_1_1 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_1_3 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_1_5 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_1_7 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_3_5 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_3_7 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_5_1 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_5_3 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_5_5 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_5_7 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_7_1 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_7_3 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_7_5 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_7_7 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
+
+    // Detection of interference term
+    ch_mag_int_with_sigma2       = _mm_srai_epi16(ch_mag_int, 1); // *2
+    two_ch_mag_int_with_sigma2   = ch_mag_int; // *4
+    three_ch_mag_int_with_sigma2 = _mm_adds_epi16(ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2); // *6
+
+    interference_abs_64qam_epi16(psi_r_p7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+
+    interference_abs_64qam_epi16(psi_i_p7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+
+    // Calculation of a group of two terms in the bit metric involving product of psi and interference
+    prodsum_psi_a_epi16(psi_r_p7_p7, a_r_p7_p7, psi_i_p7_p7, a_i_p7_p7, psi_a_p7_p7);
+    prodsum_psi_a_epi16(psi_r_p7_p5, a_r_p7_p5, psi_i_p7_p5, a_i_p7_p5, psi_a_p7_p5);
+    prodsum_psi_a_epi16(psi_r_p7_p3, a_r_p7_p3, psi_i_p7_p3, a_i_p7_p3, psi_a_p7_p3);
+    prodsum_psi_a_epi16(psi_r_p7_p1, a_r_p7_p1, psi_i_p7_p1, a_i_p7_p1, psi_a_p7_p1);
+    prodsum_psi_a_epi16(psi_r_p7_m1, a_r_p7_m1, psi_i_p7_m1, a_i_p7_m1, psi_a_p7_m1);
+    prodsum_psi_a_epi16(psi_r_p7_m3, a_r_p7_m3, psi_i_p7_m3, a_i_p7_m3, psi_a_p7_m3);
+    prodsum_psi_a_epi16(psi_r_p7_m5, a_r_p7_m5, psi_i_p7_m5, a_i_p7_m5, psi_a_p7_m5);
+    prodsum_psi_a_epi16(psi_r_p7_m7, a_r_p7_m7, psi_i_p7_m7, a_i_p7_m7, psi_a_p7_m7);
+    prodsum_psi_a_epi16(psi_r_p5_p7, a_r_p5_p7, psi_i_p5_p7, a_i_p5_p7, psi_a_p5_p7);
+    prodsum_psi_a_epi16(psi_r_p5_p5, a_r_p5_p5, psi_i_p5_p5, a_i_p5_p5, psi_a_p5_p5);
+    prodsum_psi_a_epi16(psi_r_p5_p3, a_r_p5_p3, psi_i_p5_p3, a_i_p5_p3, psi_a_p5_p3);
+    prodsum_psi_a_epi16(psi_r_p5_p1, a_r_p5_p1, psi_i_p5_p1, a_i_p5_p1, psi_a_p5_p1);
+    prodsum_psi_a_epi16(psi_r_p5_m1, a_r_p5_m1, psi_i_p5_m1, a_i_p5_m1, psi_a_p5_m1);
+    prodsum_psi_a_epi16(psi_r_p5_m3, a_r_p5_m3, psi_i_p5_m3, a_i_p5_m3, psi_a_p5_m3);
+    prodsum_psi_a_epi16(psi_r_p5_m5, a_r_p5_m5, psi_i_p5_m5, a_i_p5_m5, psi_a_p5_m5);
+    prodsum_psi_a_epi16(psi_r_p5_m7, a_r_p5_m7, psi_i_p5_m7, a_i_p5_m7, psi_a_p5_m7);
+    prodsum_psi_a_epi16(psi_r_p3_p7, a_r_p3_p7, psi_i_p3_p7, a_i_p3_p7, psi_a_p3_p7);
+    prodsum_psi_a_epi16(psi_r_p3_p5, a_r_p3_p5, psi_i_p3_p5, a_i_p3_p5, psi_a_p3_p5);
+    prodsum_psi_a_epi16(psi_r_p3_p3, a_r_p3_p3, psi_i_p3_p3, a_i_p3_p3, psi_a_p3_p3);
+    prodsum_psi_a_epi16(psi_r_p3_p1, a_r_p3_p1, psi_i_p3_p1, a_i_p3_p1, psi_a_p3_p1);
+    prodsum_psi_a_epi16(psi_r_p3_m1, a_r_p3_m1, psi_i_p3_m1, a_i_p3_m1, psi_a_p3_m1);
+    prodsum_psi_a_epi16(psi_r_p3_m3, a_r_p3_m3, psi_i_p3_m3, a_i_p3_m3, psi_a_p3_m3);
+    prodsum_psi_a_epi16(psi_r_p3_m5, a_r_p3_m5, psi_i_p3_m5, a_i_p3_m5, psi_a_p3_m5);
+    prodsum_psi_a_epi16(psi_r_p3_m7, a_r_p3_m7, psi_i_p3_m7, a_i_p3_m7, psi_a_p3_m7);
+    prodsum_psi_a_epi16(psi_r_p1_p7, a_r_p1_p7, psi_i_p1_p7, a_i_p1_p7, psi_a_p1_p7);
+    prodsum_psi_a_epi16(psi_r_p1_p5, a_r_p1_p5, psi_i_p1_p5, a_i_p1_p5, psi_a_p1_p5);
+    prodsum_psi_a_epi16(psi_r_p1_p3, a_r_p1_p3, psi_i_p1_p3, a_i_p1_p3, psi_a_p1_p3);
+    prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1);
+    prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1);
+    prodsum_psi_a_epi16(psi_r_p1_m3, a_r_p1_m3, psi_i_p1_m3, a_i_p1_m3, psi_a_p1_m3);
+    prodsum_psi_a_epi16(psi_r_p1_m5, a_r_p1_m5, psi_i_p1_m5, a_i_p1_m5, psi_a_p1_m5);
+    prodsum_psi_a_epi16(psi_r_p1_m7, a_r_p1_m7, psi_i_p1_m7, a_i_p1_m7, psi_a_p1_m7);
+    prodsum_psi_a_epi16(psi_r_m1_p7, a_r_m1_p7, psi_i_m1_p7, a_i_m1_p7, psi_a_m1_p7);
+    prodsum_psi_a_epi16(psi_r_m1_p5, a_r_m1_p5, psi_i_m1_p5, a_i_m1_p5, psi_a_m1_p5);
+    prodsum_psi_a_epi16(psi_r_m1_p3, a_r_m1_p3, psi_i_m1_p3, a_i_m1_p3, psi_a_m1_p3);
+    prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1);
+    prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1);
+    prodsum_psi_a_epi16(psi_r_m1_m3, a_r_m1_m3, psi_i_m1_m3, a_i_m1_m3, psi_a_m1_m3);
+    prodsum_psi_a_epi16(psi_r_m1_m5, a_r_m1_m5, psi_i_m1_m5, a_i_m1_m5, psi_a_m1_m5);
+    prodsum_psi_a_epi16(psi_r_m1_m7, a_r_m1_m7, psi_i_m1_m7, a_i_m1_m7, psi_a_m1_m7);
+    prodsum_psi_a_epi16(psi_r_m3_p7, a_r_m3_p7, psi_i_m3_p7, a_i_m3_p7, psi_a_m3_p7);
+    prodsum_psi_a_epi16(psi_r_m3_p5, a_r_m3_p5, psi_i_m3_p5, a_i_m3_p5, psi_a_m3_p5);
+    prodsum_psi_a_epi16(psi_r_m3_p3, a_r_m3_p3, psi_i_m3_p3, a_i_m3_p3, psi_a_m3_p3);
+    prodsum_psi_a_epi16(psi_r_m3_p1, a_r_m3_p1, psi_i_m3_p1, a_i_m3_p1, psi_a_m3_p1);
+    prodsum_psi_a_epi16(psi_r_m3_m1, a_r_m3_m1, psi_i_m3_m1, a_i_m3_m1, psi_a_m3_m1);
+    prodsum_psi_a_epi16(psi_r_m3_m3, a_r_m3_m3, psi_i_m3_m3, a_i_m3_m3, psi_a_m3_m3);
+    prodsum_psi_a_epi16(psi_r_m3_m5, a_r_m3_m5, psi_i_m3_m5, a_i_m3_m5, psi_a_m3_m5);
+    prodsum_psi_a_epi16(psi_r_m3_m7, a_r_m3_m7, psi_i_m3_m7, a_i_m3_m7, psi_a_m3_m7);
+    prodsum_psi_a_epi16(psi_r_m5_p7, a_r_m5_p7, psi_i_m5_p7, a_i_m5_p7, psi_a_m5_p7);
+    prodsum_psi_a_epi16(psi_r_m5_p5, a_r_m5_p5, psi_i_m5_p5, a_i_m5_p5, psi_a_m5_p5);
+    prodsum_psi_a_epi16(psi_r_m5_p3, a_r_m5_p3, psi_i_m5_p3, a_i_m5_p3, psi_a_m5_p3);
+    prodsum_psi_a_epi16(psi_r_m5_p1, a_r_m5_p1, psi_i_m5_p1, a_i_m5_p1, psi_a_m5_p1);
+    prodsum_psi_a_epi16(psi_r_m5_m1, a_r_m5_m1, psi_i_m5_m1, a_i_m5_m1, psi_a_m5_m1);
+    prodsum_psi_a_epi16(psi_r_m5_m3, a_r_m5_m3, psi_i_m5_m3, a_i_m5_m3, psi_a_m5_m3);
+    prodsum_psi_a_epi16(psi_r_m5_m5, a_r_m5_m5, psi_i_m5_m5, a_i_m5_m5, psi_a_m5_m5);
+    prodsum_psi_a_epi16(psi_r_m5_m7, a_r_m5_m7, psi_i_m5_m7, a_i_m5_m7, psi_a_m5_m7);
+    prodsum_psi_a_epi16(psi_r_m7_p7, a_r_m7_p7, psi_i_m7_p7, a_i_m7_p7, psi_a_m7_p7);
+    prodsum_psi_a_epi16(psi_r_m7_p5, a_r_m7_p5, psi_i_m7_p5, a_i_m7_p5, psi_a_m7_p5);
+    prodsum_psi_a_epi16(psi_r_m7_p3, a_r_m7_p3, psi_i_m7_p3, a_i_m7_p3, psi_a_m7_p3);
+    prodsum_psi_a_epi16(psi_r_m7_p1, a_r_m7_p1, psi_i_m7_p1, a_i_m7_p1, psi_a_m7_p1);
+    prodsum_psi_a_epi16(psi_r_m7_m1, a_r_m7_m1, psi_i_m7_m1, a_i_m7_m1, psi_a_m7_m1);
+    prodsum_psi_a_epi16(psi_r_m7_m3, a_r_m7_m3, psi_i_m7_m3, a_i_m7_m3, psi_a_m7_m3);
+    prodsum_psi_a_epi16(psi_r_m7_m5, a_r_m7_m5, psi_i_m7_m5, a_i_m7_m5, psi_a_m7_m5);
+    prodsum_psi_a_epi16(psi_r_m7_m7, a_r_m7_m7, psi_i_m7_m7, a_i_m7_m7, psi_a_m7_m7);
+
+    // Multiply by sqrt(2)
+    psi_a_p7_p7 = _mm_mulhi_epi16(psi_a_p7_p7, ONE_OVER_SQRT_2);
+    psi_a_p7_p7 = _mm_slli_epi16(psi_a_p7_p7, 2);
+    psi_a_p7_p5 = _mm_mulhi_epi16(psi_a_p7_p5, ONE_OVER_SQRT_2);
+    psi_a_p7_p5 = _mm_slli_epi16(psi_a_p7_p5, 2);
+    psi_a_p7_p3 = _mm_mulhi_epi16(psi_a_p7_p3, ONE_OVER_SQRT_2);
+    psi_a_p7_p3 = _mm_slli_epi16(psi_a_p7_p3, 2);
+    psi_a_p7_p1 = _mm_mulhi_epi16(psi_a_p7_p1, ONE_OVER_SQRT_2);
+    psi_a_p7_p1 = _mm_slli_epi16(psi_a_p7_p1, 2);
+    psi_a_p7_m1 = _mm_mulhi_epi16(psi_a_p7_m1, ONE_OVER_SQRT_2);
+    psi_a_p7_m1 = _mm_slli_epi16(psi_a_p7_m1, 2);
+    psi_a_p7_m3 = _mm_mulhi_epi16(psi_a_p7_m3, ONE_OVER_SQRT_2);
+    psi_a_p7_m3 = _mm_slli_epi16(psi_a_p7_m3, 2);
+    psi_a_p7_m5 = _mm_mulhi_epi16(psi_a_p7_m5, ONE_OVER_SQRT_2);
+    psi_a_p7_m5 = _mm_slli_epi16(psi_a_p7_m5, 2);
+    psi_a_p7_m7 = _mm_mulhi_epi16(psi_a_p7_m7, ONE_OVER_SQRT_2);
+    psi_a_p7_m7 = _mm_slli_epi16(psi_a_p7_m7, 2);
+    psi_a_p5_p7 = _mm_mulhi_epi16(psi_a_p5_p7, ONE_OVER_SQRT_2);
+    psi_a_p5_p7 = _mm_slli_epi16(psi_a_p5_p7, 2);
+    psi_a_p5_p5 = _mm_mulhi_epi16(psi_a_p5_p5, ONE_OVER_SQRT_2);
+    psi_a_p5_p5 = _mm_slli_epi16(psi_a_p5_p5, 2);
+    psi_a_p5_p3 = _mm_mulhi_epi16(psi_a_p5_p3, ONE_OVER_SQRT_2);
+    psi_a_p5_p3 = _mm_slli_epi16(psi_a_p5_p3, 2);
+    psi_a_p5_p1 = _mm_mulhi_epi16(psi_a_p5_p1, ONE_OVER_SQRT_2);
+    psi_a_p5_p1 = _mm_slli_epi16(psi_a_p5_p1, 2);
+    psi_a_p5_m1 = _mm_mulhi_epi16(psi_a_p5_m1, ONE_OVER_SQRT_2);
+    psi_a_p5_m1 = _mm_slli_epi16(psi_a_p5_m1, 2);
+    psi_a_p5_m3 = _mm_mulhi_epi16(psi_a_p5_m3, ONE_OVER_SQRT_2);
+    psi_a_p5_m3 = _mm_slli_epi16(psi_a_p5_m3, 2);
+    psi_a_p5_m5 = _mm_mulhi_epi16(psi_a_p5_m5, ONE_OVER_SQRT_2);
+    psi_a_p5_m5 = _mm_slli_epi16(psi_a_p5_m5, 2);
+    psi_a_p5_m7 = _mm_mulhi_epi16(psi_a_p5_m7, ONE_OVER_SQRT_2);
+    psi_a_p5_m7 = _mm_slli_epi16(psi_a_p5_m7, 2);
+    psi_a_p3_p7 = _mm_mulhi_epi16(psi_a_p3_p7, ONE_OVER_SQRT_2);
+    psi_a_p3_p7 = _mm_slli_epi16(psi_a_p3_p7, 2);
+    psi_a_p3_p5 = _mm_mulhi_epi16(psi_a_p3_p5, ONE_OVER_SQRT_2);
+    psi_a_p3_p5 = _mm_slli_epi16(psi_a_p3_p5, 2);
+    psi_a_p3_p3 = _mm_mulhi_epi16(psi_a_p3_p3, ONE_OVER_SQRT_2);
+    psi_a_p3_p3 = _mm_slli_epi16(psi_a_p3_p3, 2);
+    psi_a_p3_p1 = _mm_mulhi_epi16(psi_a_p3_p1, ONE_OVER_SQRT_2);
+    psi_a_p3_p1 = _mm_slli_epi16(psi_a_p3_p1, 2);
+    psi_a_p3_m1 = _mm_mulhi_epi16(psi_a_p3_m1, ONE_OVER_SQRT_2);
+    psi_a_p3_m1 = _mm_slli_epi16(psi_a_p3_m1, 2);
+    psi_a_p3_m3 = _mm_mulhi_epi16(psi_a_p3_m3, ONE_OVER_SQRT_2);
+    psi_a_p3_m3 = _mm_slli_epi16(psi_a_p3_m3, 2);
+    psi_a_p3_m5 = _mm_mulhi_epi16(psi_a_p3_m5, ONE_OVER_SQRT_2);
+    psi_a_p3_m5 = _mm_slli_epi16(psi_a_p3_m5, 2);
+    psi_a_p3_m7 = _mm_mulhi_epi16(psi_a_p3_m7, ONE_OVER_SQRT_2);
+    psi_a_p3_m7 = _mm_slli_epi16(psi_a_p3_m7, 2);
+    psi_a_p1_p7 = _mm_mulhi_epi16(psi_a_p1_p7, ONE_OVER_SQRT_2);
+    psi_a_p1_p7 = _mm_slli_epi16(psi_a_p1_p7, 2);
+    psi_a_p1_p5 = _mm_mulhi_epi16(psi_a_p1_p5, ONE_OVER_SQRT_2);
+    psi_a_p1_p5 = _mm_slli_epi16(psi_a_p1_p5, 2);
+    psi_a_p1_p3 = _mm_mulhi_epi16(psi_a_p1_p3, ONE_OVER_SQRT_2);
+    psi_a_p1_p3 = _mm_slli_epi16(psi_a_p1_p3, 2);
+    psi_a_p1_p1 = _mm_mulhi_epi16(psi_a_p1_p1, ONE_OVER_SQRT_2);
+    psi_a_p1_p1 = _mm_slli_epi16(psi_a_p1_p1, 2);
+    psi_a_p1_m1 = _mm_mulhi_epi16(psi_a_p1_m1, ONE_OVER_SQRT_2);
+    psi_a_p1_m1 = _mm_slli_epi16(psi_a_p1_m1, 2);
+    psi_a_p1_m3 = _mm_mulhi_epi16(psi_a_p1_m3, ONE_OVER_SQRT_2);
+    psi_a_p1_m3 = _mm_slli_epi16(psi_a_p1_m3, 2);
+    psi_a_p1_m5 = _mm_mulhi_epi16(psi_a_p1_m5, ONE_OVER_SQRT_2);
+    psi_a_p1_m5 = _mm_slli_epi16(psi_a_p1_m5, 2);
+    psi_a_p1_m7 = _mm_mulhi_epi16(psi_a_p1_m7, ONE_OVER_SQRT_2);
+    psi_a_p1_m7 = _mm_slli_epi16(psi_a_p1_m7, 2);
+    psi_a_m1_p7 = _mm_mulhi_epi16(psi_a_m1_p7, ONE_OVER_SQRT_2);
+    psi_a_m1_p7 = _mm_slli_epi16(psi_a_m1_p7, 2);
+    psi_a_m1_p5 = _mm_mulhi_epi16(psi_a_m1_p5, ONE_OVER_SQRT_2);
+    psi_a_m1_p5 = _mm_slli_epi16(psi_a_m1_p5, 2);
+    psi_a_m1_p3 = _mm_mulhi_epi16(psi_a_m1_p3, ONE_OVER_SQRT_2);
+    psi_a_m1_p3 = _mm_slli_epi16(psi_a_m1_p3, 2);
+    psi_a_m1_p1 = _mm_mulhi_epi16(psi_a_m1_p1, ONE_OVER_SQRT_2);
+    psi_a_m1_p1 = _mm_slli_epi16(psi_a_m1_p1, 2);
+    psi_a_m1_m1 = _mm_mulhi_epi16(psi_a_m1_m1, ONE_OVER_SQRT_2);
+    psi_a_m1_m1 = _mm_slli_epi16(psi_a_m1_m1, 2);
+    psi_a_m1_m3 = _mm_mulhi_epi16(psi_a_m1_m3, ONE_OVER_SQRT_2);
+    psi_a_m1_m3 = _mm_slli_epi16(psi_a_m1_m3, 2);
+    psi_a_m1_m5 = _mm_mulhi_epi16(psi_a_m1_m5, ONE_OVER_SQRT_2);
+    psi_a_m1_m5 = _mm_slli_epi16(psi_a_m1_m5, 2);
+    psi_a_m1_m7 = _mm_mulhi_epi16(psi_a_m1_m7, ONE_OVER_SQRT_2);
+    psi_a_m1_m7 = _mm_slli_epi16(psi_a_m1_m7, 2);
+    psi_a_m3_p7 = _mm_mulhi_epi16(psi_a_m3_p7, ONE_OVER_SQRT_2);
+    psi_a_m3_p7 = _mm_slli_epi16(psi_a_m3_p7, 2);
+    psi_a_m3_p5 = _mm_mulhi_epi16(psi_a_m3_p5, ONE_OVER_SQRT_2);
+    psi_a_m3_p5 = _mm_slli_epi16(psi_a_m3_p5, 2);
+    psi_a_m3_p3 = _mm_mulhi_epi16(psi_a_m3_p3, ONE_OVER_SQRT_2);
+    psi_a_m3_p3 = _mm_slli_epi16(psi_a_m3_p3, 2);
+    psi_a_m3_p1 = _mm_mulhi_epi16(psi_a_m3_p1, ONE_OVER_SQRT_2);
+    psi_a_m3_p1 = _mm_slli_epi16(psi_a_m3_p1, 2);
+    psi_a_m3_m1 = _mm_mulhi_epi16(psi_a_m3_m1, ONE_OVER_SQRT_2);
+    psi_a_m3_m1 = _mm_slli_epi16(psi_a_m3_m1, 2);
+    psi_a_m3_m3 = _mm_mulhi_epi16(psi_a_m3_m3, ONE_OVER_SQRT_2);
+    psi_a_m3_m3 = _mm_slli_epi16(psi_a_m3_m3, 2);
+    psi_a_m3_m5 = _mm_mulhi_epi16(psi_a_m3_m5, ONE_OVER_SQRT_2);
+    psi_a_m3_m5 = _mm_slli_epi16(psi_a_m3_m5, 2);
+    psi_a_m3_m7 = _mm_mulhi_epi16(psi_a_m3_m7, ONE_OVER_SQRT_2);
+    psi_a_m3_m7 = _mm_slli_epi16(psi_a_m3_m7, 2);
+    psi_a_m5_p7 = _mm_mulhi_epi16(psi_a_m5_p7, ONE_OVER_SQRT_2);
+    psi_a_m5_p7 = _mm_slli_epi16(psi_a_m5_p7, 2);
+    psi_a_m5_p5 = _mm_mulhi_epi16(psi_a_m5_p5, ONE_OVER_SQRT_2);
+    psi_a_m5_p5 = _mm_slli_epi16(psi_a_m5_p5, 2);
+    psi_a_m5_p3 = _mm_mulhi_epi16(psi_a_m5_p3, ONE_OVER_SQRT_2);
+    psi_a_m5_p3 = _mm_slli_epi16(psi_a_m5_p3, 2);
+    psi_a_m5_p1 = _mm_mulhi_epi16(psi_a_m5_p1, ONE_OVER_SQRT_2);
+    psi_a_m5_p1 = _mm_slli_epi16(psi_a_m5_p1, 2);
+    psi_a_m5_m1 = _mm_mulhi_epi16(psi_a_m5_m1, ONE_OVER_SQRT_2);
+    psi_a_m5_m1 = _mm_slli_epi16(psi_a_m5_m1, 2);
+    psi_a_m5_m3 = _mm_mulhi_epi16(psi_a_m5_m3, ONE_OVER_SQRT_2);
+    psi_a_m5_m3 = _mm_slli_epi16(psi_a_m5_m3, 2);
+    psi_a_m5_m5 = _mm_mulhi_epi16(psi_a_m5_m5, ONE_OVER_SQRT_2);
+    psi_a_m5_m5 = _mm_slli_epi16(psi_a_m5_m5, 2);
+    psi_a_m5_m7 = _mm_mulhi_epi16(psi_a_m5_m7, ONE_OVER_SQRT_2);
+    psi_a_m5_m7 = _mm_slli_epi16(psi_a_m5_m7, 2);
+    psi_a_m7_p7 = _mm_mulhi_epi16(psi_a_m7_p7, ONE_OVER_SQRT_2);
+    psi_a_m7_p7 = _mm_slli_epi16(psi_a_m7_p7, 2);
+    psi_a_m7_p5 = _mm_mulhi_epi16(psi_a_m7_p5, ONE_OVER_SQRT_2);
+    psi_a_m7_p5 = _mm_slli_epi16(psi_a_m7_p5, 2);
+    psi_a_m7_p3 = _mm_mulhi_epi16(psi_a_m7_p3, ONE_OVER_SQRT_2);
+    psi_a_m7_p3 = _mm_slli_epi16(psi_a_m7_p3, 2);
+    psi_a_m7_p1 = _mm_mulhi_epi16(psi_a_m7_p1, ONE_OVER_SQRT_2);
+    psi_a_m7_p1 = _mm_slli_epi16(psi_a_m7_p1, 2);
+    psi_a_m7_m1 = _mm_mulhi_epi16(psi_a_m7_m1, ONE_OVER_SQRT_2);
+    psi_a_m7_m1 = _mm_slli_epi16(psi_a_m7_m1, 2);
+    psi_a_m7_m3 = _mm_mulhi_epi16(psi_a_m7_m3, ONE_OVER_SQRT_2);
+    psi_a_m7_m3 = _mm_slli_epi16(psi_a_m7_m3, 2);
+    psi_a_m7_m5 = _mm_mulhi_epi16(psi_a_m7_m5, ONE_OVER_SQRT_2);
+    psi_a_m7_m5 = _mm_slli_epi16(psi_a_m7_m5, 2);
+    psi_a_m7_m7 = _mm_mulhi_epi16(psi_a_m7_m7, ONE_OVER_SQRT_2);
+    psi_a_m7_m7 = _mm_slli_epi16(psi_a_m7_m7, 2);
+
+    // Calculation of a group of two terms in the bit metric involving squares of interference
+    square_a_64qam_epi16(a_r_p7_p7, a_i_p7_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p7);
+    square_a_64qam_epi16(a_r_p7_p5, a_i_p7_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p5);
+    square_a_64qam_epi16(a_r_p7_p3, a_i_p7_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p3);
+    square_a_64qam_epi16(a_r_p7_p1, a_i_p7_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p1);
+    square_a_64qam_epi16(a_r_p7_m1, a_i_p7_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m1);
+    square_a_64qam_epi16(a_r_p7_m3, a_i_p7_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m3);
+    square_a_64qam_epi16(a_r_p7_m5, a_i_p7_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m5);
+    square_a_64qam_epi16(a_r_p7_m7, a_i_p7_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m7);
+    square_a_64qam_epi16(a_r_p5_p7, a_i_p5_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p7);
+    square_a_64qam_epi16(a_r_p5_p5, a_i_p5_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p5);
+    square_a_64qam_epi16(a_r_p5_p3, a_i_p5_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p3);
+    square_a_64qam_epi16(a_r_p5_p1, a_i_p5_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p1);
+    square_a_64qam_epi16(a_r_p5_m1, a_i_p5_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m1);
+    square_a_64qam_epi16(a_r_p5_m3, a_i_p5_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m3);
+    square_a_64qam_epi16(a_r_p5_m5, a_i_p5_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m5);
+    square_a_64qam_epi16(a_r_p5_m7, a_i_p5_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m7);
+    square_a_64qam_epi16(a_r_p3_p7, a_i_p3_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p7);
+    square_a_64qam_epi16(a_r_p3_p5, a_i_p3_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p5);
+    square_a_64qam_epi16(a_r_p3_p3, a_i_p3_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p3);
+    square_a_64qam_epi16(a_r_p3_p1, a_i_p3_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p1);
+    square_a_64qam_epi16(a_r_p3_m1, a_i_p3_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m1);
+    square_a_64qam_epi16(a_r_p3_m3, a_i_p3_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m3);
+    square_a_64qam_epi16(a_r_p3_m5, a_i_p3_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m5);
+    square_a_64qam_epi16(a_r_p3_m7, a_i_p3_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m7);
+    square_a_64qam_epi16(a_r_p1_p7, a_i_p1_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p7);
+    square_a_64qam_epi16(a_r_p1_p5, a_i_p1_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p5);
+    square_a_64qam_epi16(a_r_p1_p3, a_i_p1_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p3);
+    square_a_64qam_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p1);
+    square_a_64qam_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m1);
+    square_a_64qam_epi16(a_r_p1_m3, a_i_p1_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m3);
+    square_a_64qam_epi16(a_r_p1_m5, a_i_p1_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m5);
+    square_a_64qam_epi16(a_r_p1_m7, a_i_p1_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m7);
+    square_a_64qam_epi16(a_r_m1_p7, a_i_m1_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p7);
+    square_a_64qam_epi16(a_r_m1_p5, a_i_m1_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p5);
+    square_a_64qam_epi16(a_r_m1_p3, a_i_m1_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p3);
+    square_a_64qam_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p1);
+    square_a_64qam_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m1);
+    square_a_64qam_epi16(a_r_m1_m3, a_i_m1_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m3);
+    square_a_64qam_epi16(a_r_m1_m5, a_i_m1_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m5);
+    square_a_64qam_epi16(a_r_m1_m7, a_i_m1_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m7);
+    square_a_64qam_epi16(a_r_m3_p7, a_i_m3_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p7);
+    square_a_64qam_epi16(a_r_m3_p5, a_i_m3_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p5);
+    square_a_64qam_epi16(a_r_m3_p3, a_i_m3_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p3);
+    square_a_64qam_epi16(a_r_m3_p1, a_i_m3_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p1);
+    square_a_64qam_epi16(a_r_m3_m1, a_i_m3_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m1);
+    square_a_64qam_epi16(a_r_m3_m3, a_i_m3_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m3);
+    square_a_64qam_epi16(a_r_m3_m5, a_i_m3_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m5);
+    square_a_64qam_epi16(a_r_m3_m7, a_i_m3_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m7);
+    square_a_64qam_epi16(a_r_m5_p7, a_i_m5_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p7);
+    square_a_64qam_epi16(a_r_m5_p5, a_i_m5_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p5);
+    square_a_64qam_epi16(a_r_m5_p3, a_i_m5_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p3);
+    square_a_64qam_epi16(a_r_m5_p1, a_i_m5_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p1);
+    square_a_64qam_epi16(a_r_m5_m1, a_i_m5_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m1);
+    square_a_64qam_epi16(a_r_m5_m3, a_i_m5_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m3);
+    square_a_64qam_epi16(a_r_m5_m5, a_i_m5_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m5);
+    square_a_64qam_epi16(a_r_m5_m7, a_i_m5_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m7);
+    square_a_64qam_epi16(a_r_m7_p7, a_i_m7_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p7);
+    square_a_64qam_epi16(a_r_m7_p5, a_i_m7_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p5);
+    square_a_64qam_epi16(a_r_m7_p3, a_i_m7_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p3);
+    square_a_64qam_epi16(a_r_m7_p1, a_i_m7_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p1);
+    square_a_64qam_epi16(a_r_m7_m1, a_i_m7_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m1);
+    square_a_64qam_epi16(a_r_m7_m3, a_i_m7_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m3);
+    square_a_64qam_epi16(a_r_m7_m5, a_i_m7_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m5);
+    square_a_64qam_epi16(a_r_m7_m7, a_i_m7_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m7);
+
+    // Computing different multiples of ||h0||^2
+    // x=1, y=1
+    ch_mag_2_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42);
+    ch_mag_2_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_2_over_42_with_sigma2,1);
+    // x=1, y=3
+    ch_mag_10_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42);
+    ch_mag_10_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_10_over_42_with_sigma2,1);
+    // x=1, x=5
+    ch_mag_26_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42);
+    ch_mag_26_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_26_over_42_with_sigma2,1);
+    // x=1, y=7
+    ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
+    ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
+    // x=3, y=3
+    ch_mag_18_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42);
+    ch_mag_18_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_18_over_42_with_sigma2,1);
+    // x=3, y=5
+    ch_mag_34_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42);
+    ch_mag_34_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_34_over_42_with_sigma2,1);
+    // x=3, y=7
+    ch_mag_58_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42);
+    ch_mag_58_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_58_over_42_with_sigma2,2);
+    // x=5, y=5
+    ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
+    ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
+    // x=5, y=7
+    ch_mag_74_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42);
+    ch_mag_74_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_74_over_42_with_sigma2,2);
+    // x=7, y=7
+    ch_mag_98_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42);
+    ch_mag_98_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_98_over_42_with_sigma2,2);
+
+    // Computing Metrics
+    xmm0 = _mm_subs_epi16(psi_a_p7_p7, a_sq_p7_p7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_7_7);
+    bit_met_p7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p7_p5, a_sq_p7_p5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_7_5);
+    bit_met_p7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p7_p3, a_sq_p7_p3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_7_3);
+    bit_met_p7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p7_p1, a_sq_p7_p1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_7_1);
+    bit_met_p7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p7_m1, a_sq_p7_m1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_7_1);
+    bit_met_p7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p7_m3, a_sq_p7_m3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_7_3);
+    bit_met_p7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p7_m5, a_sq_p7_m5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_7_5);
+    bit_met_p7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p7_m7, a_sq_p7_m7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_7_7);
+    bit_met_p7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_p7, a_sq_p5_p7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_5_7);
+    bit_met_p5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_p5, a_sq_p5_p5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_5_5);
+    bit_met_p5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_p3, a_sq_p5_p3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_5_3);
+    bit_met_p5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_p1, a_sq_p5_p1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_5_1);
+    bit_met_p5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_m1, a_sq_p5_m1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_5_1);
+    bit_met_p5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_m3, a_sq_p5_m3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_5_3);
+    bit_met_p5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_m5, a_sq_p5_m5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_5_5);
+    bit_met_p5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p5_m7, a_sq_p5_m7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_5_7);
+    bit_met_p5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_p7, a_sq_p3_p7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_3_7);
+    bit_met_p3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_p5, a_sq_p3_p5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_3_5);
+    bit_met_p3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_p3, a_sq_p3_p3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_3_3);
+    bit_met_p3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_p1, a_sq_p3_p1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_3_1);
+    bit_met_p3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_m1, a_sq_p3_m1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_3_1);
+    bit_met_p3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_m3, a_sq_p3_m3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_3_3);
+    bit_met_p3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_m5, a_sq_p3_m5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_3_5);
+    bit_met_p3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p3_m7, a_sq_p3_m7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_3_7);
+    bit_met_p3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_p7, a_sq_p1_p7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_1_7);
+    bit_met_p1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_p5, a_sq_p1_p5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_1_5);
+    bit_met_p1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_p3, a_sq_p1_p3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_1_3);
+    bit_met_p1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_p1, a_sq_p1_p1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_p_1_1);
+    bit_met_p1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_m1, a_sq_p1_m1);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_1_1);
+    bit_met_p1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_m3, a_sq_p1_m3);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_1_3);
+    bit_met_p1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_m5, a_sq_p1_m5);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_1_5);
+    bit_met_p1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_p1_m7, a_sq_p1_m7);
+    xmm1 = _mm_adds_epi16(xmm0, y0_m_1_7);
+    bit_met_p1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+
+    xmm0 = _mm_subs_epi16(psi_a_m1_p7, a_sq_m1_p7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_1_7);
+    bit_met_m1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m1_p5, a_sq_m1_p5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_1_5);
+    bit_met_m1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m1_p3, a_sq_m1_p3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_1_3);
+    bit_met_m1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m1_p1, a_sq_m1_p1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_1_1);
+    bit_met_m1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m1_m1, a_sq_m1_m1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_1_1);
+    bit_met_m1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m1_m3, a_sq_m1_m3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_1_3);
+    bit_met_m1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m1_m5, a_sq_m1_m5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_1_5);
+    bit_met_m1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m1_m7, a_sq_m1_m7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_1_7);
+    bit_met_m1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_p7, a_sq_m3_p7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_3_7);
+    bit_met_m3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_p5, a_sq_m3_p5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_3_5);
+    bit_met_m3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_p3, a_sq_m3_p3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_3_3);
+    bit_met_m3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_p1, a_sq_m3_p1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_3_1);
+    bit_met_m3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_m1, a_sq_m3_m1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_3_1);
+    bit_met_m3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_m3, a_sq_m3_m3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_3_3);
+    bit_met_m3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_m5, a_sq_m3_m5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_3_5);
+    bit_met_m3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m3_m7, a_sq_m3_m7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_3_7);
+    bit_met_m3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_p7, a_sq_m5_p7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_5_7);
+    bit_met_m5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_p5, a_sq_m5_p5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_5_5);
+    bit_met_m5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_p3, a_sq_m5_p3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_5_3);
+    bit_met_m5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_p1, a_sq_m5_p1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_5_1);
+    bit_met_m5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_m1, a_sq_m5_m1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_5_1);
+    bit_met_m5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_m3, a_sq_m5_m3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_5_3);
+    bit_met_m5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_m5, a_sq_m5_m5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_5_5);
+    bit_met_m5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m5_m7, a_sq_m5_m7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_5_7);
+    bit_met_m5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_p7, a_sq_m7_p7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_7_7);
+    bit_met_m7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_p5, a_sq_m7_p5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_7_5);
+    bit_met_m7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_p3, a_sq_m7_p3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_7_3);
+    bit_met_m7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_p1, a_sq_m7_p1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_m_7_1);
+    bit_met_m7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_m1, a_sq_m7_m1);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_7_1);
+    bit_met_m7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_m3, a_sq_m7_m3);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_7_3);
+    bit_met_m7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_m5, a_sq_m7_m5);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_7_5);
+    bit_met_m7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm_subs_epi16(psi_a_m7_m7, a_sq_m7_m7);
+    xmm1 = _mm_subs_epi16(xmm0, y0_p_7_7);
+    bit_met_m7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+
+    // Detection for 1st bit (LTE mapping)
+    // bit = 1
+    xmm0 = _mm_max_epi16(bit_met_m7_p7, bit_met_m7_p5);
+    xmm1 = _mm_max_epi16(bit_met_m7_p3, bit_met_m7_p1);
+    xmm2 = _mm_max_epi16(bit_met_m7_m1, bit_met_m7_m3);
+    xmm3 = _mm_max_epi16(bit_met_m7_m5, bit_met_m7_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m5_p7, bit_met_m5_p5);
+    xmm1 = _mm_max_epi16(bit_met_m5_p3, bit_met_m5_p1);
+    xmm2 = _mm_max_epi16(bit_met_m5_m1, bit_met_m5_m3);
+    xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m5_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m3_p7, bit_met_m3_p5);
+    xmm1 = _mm_max_epi16(bit_met_m3_p3, bit_met_m3_p1);
+    xmm2 = _mm_max_epi16(bit_met_m3_m1, bit_met_m3_m3);
+    xmm3 = _mm_max_epi16(bit_met_m3_m5, bit_met_m3_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m1_p7, bit_met_m1_p5);
+    xmm1 = _mm_max_epi16(bit_met_m1_p3, bit_met_m1_p1);
+    xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m1_m3);
+    xmm3 = _mm_max_epi16(bit_met_m1_m5, bit_met_m1_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    // bit = 0
+    xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p7_p5);
+    xmm1 = _mm_max_epi16(bit_met_p7_p3, bit_met_p7_p1);
+    xmm2 = _mm_max_epi16(bit_met_p7_m1, bit_met_p7_m3);
+    xmm3 = _mm_max_epi16(bit_met_p7_m5, bit_met_p7_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p5_p7, bit_met_p5_p5);
+    xmm1 = _mm_max_epi16(bit_met_p5_p3, bit_met_p5_p1);
+    xmm2 = _mm_max_epi16(bit_met_p5_m1, bit_met_p5_m3);
+    xmm3 = _mm_max_epi16(bit_met_p5_m5, bit_met_p5_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p3_p7, bit_met_p3_p5);
+    xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p3_p1);
+    xmm2 = _mm_max_epi16(bit_met_p3_m1, bit_met_p3_m3);
+    xmm3 = _mm_max_epi16(bit_met_p3_m5, bit_met_p3_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p1_p7, bit_met_p1_p5);
+    xmm1 = _mm_max_epi16(bit_met_p1_p3, bit_met_p1_p1);
+    xmm2 = _mm_max_epi16(bit_met_p1_m1, bit_met_p1_m3);
+    xmm3 = _mm_max_epi16(bit_met_p1_m5, bit_met_p1_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y0r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 2nd bit (LTE mapping)
+    // bit = 1
+    xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    // bit = 0
+    xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y1r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 3rd bit (LTE mapping)
+    xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
+    xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
+    xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
+    xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
+    xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
+    xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
+    xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
+    xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
+    xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
+    xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
+    xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
+    xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
+    xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
+    xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
+    xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
+    xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
+    xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
+    xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
+    xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
+    xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
+    xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
+    xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
+    xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y2r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 4th bit (LTE mapping)
+    xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y0i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+
+    // Detection for 5th bit (LTE mapping)
+    xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
+    xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
+    xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
+    xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
+    xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
+    xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
+    xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
+    xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
+    xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
+    xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
+    xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
+    xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
+    xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
+    xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
+    xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
+    xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
+    xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
+    xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
+    xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
+    xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
+    xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
+    xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
+    xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
+    xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y1i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 6th bit (LTE mapping)
+    xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(xmm4, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
+    xmm4 = _mm_max_epi16(xmm0, xmm1);
+    xmm5 = _mm_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5);
+
+    y2i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+
+    // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs
+    // RE 1
+    j = 24*i;
+    stream0_out[j + 0] = ((short *)&y0r)[0];
+    stream0_out[j + 1] = ((short *)&y1r)[0];
+    stream0_out[j + 2] = ((short *)&y2r)[0];
+    stream0_out[j + 3] = ((short *)&y0i)[0];
+    stream0_out[j + 4] = ((short *)&y1i)[0];
+    stream0_out[j + 5] = ((short *)&y2i)[0];
+    // RE 2
+    stream0_out[j + 6] = ((short *)&y0r)[1];
+    stream0_out[j + 7] = ((short *)&y1r)[1];
+    stream0_out[j + 8] = ((short *)&y2r)[1];
+    stream0_out[j + 9] = ((short *)&y0i)[1];
+    stream0_out[j + 10] = ((short *)&y1i)[1];
+    stream0_out[j + 11] = ((short *)&y2i)[1];
+    // RE 3
+    stream0_out[j + 12] = ((short *)&y0r)[2];
+    stream0_out[j + 13] = ((short *)&y1r)[2];
+    stream0_out[j + 14] = ((short *)&y2r)[2];
+    stream0_out[j + 15] = ((short *)&y0i)[2];
+    stream0_out[j + 16] = ((short *)&y1i)[2];
+    stream0_out[j + 17] = ((short *)&y2i)[2];
+    // RE 4
+    stream0_out[j + 18] = ((short *)&y0r)[3];
+    stream0_out[j + 19] = ((short *)&y1r)[3];
+    stream0_out[j + 20] = ((short *)&y2r)[3];
+    stream0_out[j + 21] = ((short *)&y0i)[3];
+    stream0_out[j + 22] = ((short *)&y1i)[3];
+    stream0_out[j + 23] = ((short *)&y2i)[3];
+    // RE 5
+    stream0_out[j + 24] = ((short *)&y0r)[4];
+    stream0_out[j + 25] = ((short *)&y1r)[4];
+    stream0_out[j + 26] = ((short *)&y2r)[4];
+    stream0_out[j + 27] = ((short *)&y0i)[4];
+    stream0_out[j + 28] = ((short *)&y1i)[4];
+    stream0_out[j + 29] = ((short *)&y2i)[4];
+    // RE 6
+    stream0_out[j + 30] = ((short *)&y0r)[5];
+    stream0_out[j + 31] = ((short *)&y1r)[5];
+    stream0_out[j + 32] = ((short *)&y2r)[5];
+    stream0_out[j + 33] = ((short *)&y0i)[5];
+    stream0_out[j + 34] = ((short *)&y1i)[5];
+    stream0_out[j + 35] = ((short *)&y2i)[5];
+    // RE 7
+    stream0_out[j + 36] = ((short *)&y0r)[6];
+    stream0_out[j + 37] = ((short *)&y1r)[6];
+    stream0_out[j + 38] = ((short *)&y2r)[6];
+    stream0_out[j + 39] = ((short *)&y0i)[6];
+    stream0_out[j + 40] = ((short *)&y1i)[6];
+    stream0_out[j + 41] = ((short *)&y2i)[6];
+    // RE 8
+    stream0_out[j + 42] = ((short *)&y0r)[7];
+    stream0_out[j + 43] = ((short *)&y1r)[7];
+    stream0_out[j + 44] = ((short *)&y2r)[7];
+    stream0_out[j + 45] = ((short *)&y0i)[7];
+    stream0_out[j + 46] = ((short *)&y1i)[7];
+    stream0_out[j + 47] = ((short *)&y2i)[7];
+
+#elif defined(__arm__)
+
+#endif
+
+  }
+
+#if defined(__x86_64__) || defined(__i386__)
+  _mm_empty();
+  _m_empty();
+#endif
+}
+
+
+int dlsch_64qam_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
+                          int32_t **rxdataF_comp,
+                          int32_t **rxdataF_comp_i,
+                          int32_t **dl_ch_mag,
+                          int32_t **dl_ch_mag_i,
+                          int32_t **rho_i,
+                          int16_t *dlsch_llr,
+                          uint8_t symbol,
+                          uint8_t first_symbol_flag,
+                          uint16_t nb_rb,
+                          uint16_t pbch_pss_sss_adjust,
+                          //int16_t **llr16p,
+                          uint32_t llr_offset)
+{
+
+  int16_t *rxF      = (int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rxF_i    = (int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *ch_mag   = (int16_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *ch_mag_i = (int16_t*)&dl_ch_mag_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *rho      = (int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)];
+  int16_t *llr16;
+  int8_t  *pllr_symbol; // pointer where llrs should filled for this ofdm symbol
+  int len;
+  uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol;
+
+  //first symbol has different structure due to more pilots
+  /*if (first_symbol_flag == 1) {
+    llr16 = (int16_t*)dlsch_llr;
+  } else {
+    llr16 = (int16_t*)(*llr16p);
+  }*/
+
+  llr16 = (int16_t*)dlsch_llr;
+
+  AssertFatal(llr16!=NULL,"dlsch_16qam_64qam_llr:llr is null, symbol %d\n",symbol);
+
+
+  if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) {
+    // if symbol has pilots
+    if (frame_parms->nb_antenna_ports_eNB!=1)
+      // in 2 antenna ports we have 8 REs per symbol per RB
+      len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3);
+    else
+      // for 1 antenna port we have 10 REs per symbol per RB
+      len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6);
+  } else {
+    // symbol has no pilots
+    len = (nb_rb*12) - pbch_pss_sss_adjust;
+  }
+
+  pllr_symbol = (int8_t*)dlsch_llr;
+  pllr_symbol += llr_offset;
+  //printf("dlsch_64qam_64qam_llr: symbol %d,nb_rb %d, len %d,pbch_pss_sss_adjust %d\n",symbol,nb_rb,len,pbch_pss_sss_adjust);
+  /*LOG_I(PHY,"dlsch_64qam_64qam_llr [symb %d / FirstSym %d / Length %d / LLR Offset %d]: @LLR Buff %x, @LLR Buff(symb) %x, , @Compute LLR Buff(symb) %x  \n",
+             symbol,
+             first_symbol_flag,
+             len,
+             llr_offset,
+             (int16_t*)dlsch_llr,
+             llr16,
+             pllr_symbol);*/
+
+#ifdef __AVX2__
+
+  // Round length up to multiple of 16 words
+  uint32_t len256i = ((len+16)>>4)*16;
+  int32_t *rxF_256i      = (int32_t*) malloc16_clear(len256i*4);
+  int32_t *rxF_i_256i    = (int32_t*) malloc16_clear(len256i*4);
+  int32_t *ch_mag_256i   = (int32_t*) malloc16_clear(len256i*4);
+  int32_t *ch_mag_i_256i = (int32_t*) malloc16_clear(len256i*4);
+  int32_t *rho_256i      = (int32_t*) malloc16_clear(len256i*4);
+
+  memcpy(rxF_256i, rxF, len*4);
+  memcpy(rxF_i_256i, rxF_i, len*4);
+  memcpy(ch_mag_256i, ch_mag, len*4);
+  memcpy(ch_mag_i_256i, ch_mag_i, len*4);
+  memcpy(rho_256i, rho, len*4);
+
+#if 0
+  qam64_qam16_avx2((short *)rxF_256i,
+                   (short *)rxF_i_256i,
+                   (short *)ch_mag_256i,
+                   (short *)ch_mag_i_256i,
+                   (short *)llr16,
+                   (short *) rho_256i,
+                   len);
+#else
+  qam64_qam64_avx2((int32_t *)rxF_256i,
+                   (int32_t *)rxF_i_256i,
+                   (int32_t *)ch_mag_256i,
+                   (int32_t *)ch_mag_i_256i,
+                   (int16_t *)llr16,
+                   (int32_t *) rho_256i,
+                   len);
+#endif
+  
+  free16(rxF_256i, sizeof(rxF_256i));
+  free16(rxF_i_256i, sizeof(rxF_i_256i));
+  free16(ch_mag_256i, sizeof(ch_mag_256i));
+  free16(ch_mag_i_256i, sizeof(ch_mag_i_256i));
+  free16(rho_256i, sizeof(rho_256i));
+
+#else
+  qam64_qam64((short *)rxF,
+              (short *)rxF_i,
+              (short *)ch_mag,
+              (short *)ch_mag_i,
+              (short *)llr16,
+              (short *)rho,
+              len);
+#endif
+
+  llr16 += (6*len);
+  //*llr16p = (short *)llr16;
+
+  return(0);
+}
diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c
new file mode 100644
index 0000000000000000000000000000000000000000..588adfbc55c65f736444797013a08ab37ade4a65
--- /dev/null
+++ b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c
@@ -0,0 +1,4034 @@
+ /*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+/*! \file PHY/LTE_TRANSPORT/dlsch_llr_computation.c
+ * \brief Top-level routines for LLR computation of the PDSCH physical channel from 36-211, V8.6 2009-03
+ * \author R. Knopp, F. Kaltenberger,A. Bhamri, S. Aubert, S. Wagner, X Jiang
+ * \date 2011
+ * \version 0.1
+ * \company Eurecom
+ * \email: knopp@eurecom.fr,florian.kaltenberger@eurecom.fr,ankit.bhamri@eurecom.fr,sebastien.aubert@eurecom.fr, sebastian.wagner@eurecom.fr
+ * \note
+ * \warning
+ */
+
+#include "PHY/defs.h"
+#include "PHY/TOOLS/defs.h"
+#include "PHY/extern.h"
+#include "defs.h"
+#include "extern.h"
+#include "PHY/sse_intrin.h"
+
+int16_t ones256[16] __attribute__ ((aligned(32))) = {0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff};
+
+static __m256i rho_rpi __attribute__ ((aligned(32)));
+static __m256i rho_rmi __attribute__ ((aligned(32)));
+static __m256i rho_rpi_1_1 __attribute__ ((aligned(32)));
+static __m256i rho_rpi_1_3 __attribute__ ((aligned(32)));
+static __m256i rho_rpi_1_5 __attribute__ ((aligned(32)));
+static __m256i rho_rpi_1_7 __attribute__ ((aligned(32)));
+static __m256i rho_rpi_3_1 __attribute__ ((aligned(32)));
+static __m256i rho_rpi_3_3 __attribute__ ((aligned(32)));
+static __m256i rho_rpi_3_5 __attribute__ ((aligned(32)));
+static __m256i rho_rpi_3_7 __attribute__ ((aligned(32)));
+static __m256i rho_rpi_5_1 __attribute__ ((aligned(32)));
+static __m256i rho_rpi_5_3 __attribute__ ((aligned(32)));
+static __m256i rho_rpi_5_5 __attribute__ ((aligned(32)));
+static __m256i rho_rpi_5_7 __attribute__ ((aligned(32)));
+static __m256i rho_rpi_7_1 __attribute__ ((aligned(32)));
+static __m256i rho_rpi_7_3 __attribute__ ((aligned(32)));
+static __m256i rho_rpi_7_5 __attribute__ ((aligned(32)));
+static __m256i rho_rpi_7_7 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_1_1 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_1_3 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_1_5 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_1_7 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_3_1 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_3_3 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_3_5 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_3_7 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_5_1 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_5_3 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_5_5 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_5_7 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_7_1 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_7_3 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_7_5 __attribute__ ((aligned(32)));
+static __m256i rho_rmi_7_7 __attribute__ ((aligned(32)));
+
+static __m256i psi_r_m7_m7 __attribute__ ((aligned(32)));
+static __m256i psi_r_m7_m5 __attribute__ ((aligned(32)));
+static __m256i psi_r_m7_m3 __attribute__ ((aligned(32)));
+static __m256i psi_r_m7_m1 __attribute__ ((aligned(32)));
+static __m256i psi_r_m7_p1 __attribute__ ((aligned(32)));
+static __m256i psi_r_m7_p3 __attribute__ ((aligned(32)));
+static __m256i psi_r_m7_p5 __attribute__ ((aligned(32)));
+static __m256i psi_r_m7_p7 __attribute__ ((aligned(32)));
+static __m256i psi_r_m5_m7 __attribute__ ((aligned(32)));
+static __m256i psi_r_m5_m5 __attribute__ ((aligned(32)));
+static __m256i psi_r_m5_m3 __attribute__ ((aligned(32)));
+static __m256i psi_r_m5_m1 __attribute__ ((aligned(32)));
+static __m256i psi_r_m5_p1 __attribute__ ((aligned(32)));
+static __m256i psi_r_m5_p3 __attribute__ ((aligned(32)));
+static __m256i psi_r_m5_p5 __attribute__ ((aligned(32)));
+static __m256i psi_r_m5_p7 __attribute__ ((aligned(32)));
+static __m256i psi_r_m3_m7 __attribute__ ((aligned(32)));
+static __m256i psi_r_m3_m5 __attribute__ ((aligned(32)));
+static __m256i psi_r_m3_m3 __attribute__ ((aligned(32)));
+static __m256i psi_r_m3_m1 __attribute__ ((aligned(32)));
+static __m256i psi_r_m3_p1 __attribute__ ((aligned(32)));
+static __m256i psi_r_m3_p3 __attribute__ ((aligned(32)));
+static __m256i psi_r_m3_p5 __attribute__ ((aligned(32)));
+static __m256i psi_r_m3_p7 __attribute__ ((aligned(32)));
+static __m256i psi_r_m1_m7 __attribute__ ((aligned(32)));
+static __m256i psi_r_m1_m5 __attribute__ ((aligned(32)));
+static __m256i psi_r_m1_m3 __attribute__ ((aligned(32)));
+static __m256i psi_r_m1_m1 __attribute__ ((aligned(32)));
+static __m256i psi_r_m1_p1 __attribute__ ((aligned(32)));
+static __m256i psi_r_m1_p3 __attribute__ ((aligned(32)));
+static __m256i psi_r_m1_p5 __attribute__ ((aligned(32)));
+static __m256i psi_r_m1_p7 __attribute__ ((aligned(32)));
+static __m256i psi_r_p1_m7 __attribute__ ((aligned(32)));
+static __m256i psi_r_p1_m5 __attribute__ ((aligned(32)));
+static __m256i psi_r_p1_m3 __attribute__ ((aligned(32)));
+static __m256i psi_r_p1_m1 __attribute__ ((aligned(32)));
+static __m256i psi_r_p1_p1 __attribute__ ((aligned(32)));
+static __m256i psi_r_p1_p3 __attribute__ ((aligned(32)));
+static __m256i psi_r_p1_p5 __attribute__ ((aligned(32)));
+static __m256i psi_r_p1_p7 __attribute__ ((aligned(32)));
+static __m256i psi_r_p3_m7 __attribute__ ((aligned(32)));
+static __m256i psi_r_p3_m5 __attribute__ ((aligned(32)));
+static __m256i psi_r_p3_m3 __attribute__ ((aligned(32)));
+static __m256i psi_r_p3_m1 __attribute__ ((aligned(32)));
+static __m256i psi_r_p3_p1 __attribute__ ((aligned(32)));
+static __m256i psi_r_p3_p3 __attribute__ ((aligned(32)));
+static __m256i psi_r_p3_p5 __attribute__ ((aligned(32)));
+static __m256i psi_r_p3_p7 __attribute__ ((aligned(32)));
+static __m256i psi_r_p5_m7 __attribute__ ((aligned(32)));
+static __m256i psi_r_p5_m5 __attribute__ ((aligned(32)));
+static __m256i psi_r_p5_m3 __attribute__ ((aligned(32)));
+static __m256i psi_r_p5_m1 __attribute__ ((aligned(32)));
+static __m256i psi_r_p5_p1 __attribute__ ((aligned(32)));
+static __m256i psi_r_p5_p3 __attribute__ ((aligned(32)));
+static __m256i psi_r_p5_p5 __attribute__ ((aligned(32)));
+static __m256i psi_r_p5_p7 __attribute__ ((aligned(32)));
+static __m256i psi_r_p7_m7 __attribute__ ((aligned(32)));
+static __m256i psi_r_p7_m5 __attribute__ ((aligned(32)));
+static __m256i psi_r_p7_m3 __attribute__ ((aligned(32)));
+static __m256i psi_r_p7_m1 __attribute__ ((aligned(32)));
+static __m256i psi_r_p7_p1 __attribute__ ((aligned(32)));
+static __m256i psi_r_p7_p3 __attribute__ ((aligned(32)));
+static __m256i psi_r_p7_p5 __attribute__ ((aligned(32)));
+static __m256i psi_r_p7_p7 __attribute__ ((aligned(32)));
+
+static __m256i psi_i_m7_m7 __attribute__ ((aligned(32)));
+static __m256i psi_i_m7_m5 __attribute__ ((aligned(32)));
+static __m256i psi_i_m7_m3 __attribute__ ((aligned(32)));
+static __m256i psi_i_m7_m1 __attribute__ ((aligned(32)));
+static __m256i psi_i_m7_p1 __attribute__ ((aligned(32)));
+static __m256i psi_i_m7_p3 __attribute__ ((aligned(32)));
+static __m256i psi_i_m7_p5 __attribute__ ((aligned(32)));
+static __m256i psi_i_m7_p7 __attribute__ ((aligned(32)));
+static __m256i psi_i_m5_m7 __attribute__ ((aligned(32)));
+static __m256i psi_i_m5_m5 __attribute__ ((aligned(32)));
+static __m256i psi_i_m5_m3 __attribute__ ((aligned(32)));
+static __m256i psi_i_m5_m1 __attribute__ ((aligned(32)));
+static __m256i psi_i_m5_p1 __attribute__ ((aligned(32)));
+static __m256i psi_i_m5_p3 __attribute__ ((aligned(32)));
+static __m256i psi_i_m5_p5 __attribute__ ((aligned(32)));
+static __m256i psi_i_m5_p7 __attribute__ ((aligned(32)));
+static __m256i psi_i_m3_m7 __attribute__ ((aligned(32)));
+static __m256i psi_i_m3_m5 __attribute__ ((aligned(32)));
+static __m256i psi_i_m3_m3 __attribute__ ((aligned(32)));
+static __m256i psi_i_m3_m1 __attribute__ ((aligned(32)));
+static __m256i psi_i_m3_p1 __attribute__ ((aligned(32)));
+static __m256i psi_i_m3_p3 __attribute__ ((aligned(32)));
+static __m256i psi_i_m3_p5 __attribute__ ((aligned(32)));
+static __m256i psi_i_m3_p7 __attribute__ ((aligned(32)));
+static __m256i psi_i_m1_m7 __attribute__ ((aligned(32)));
+static __m256i psi_i_m1_m5 __attribute__ ((aligned(32)));
+static __m256i psi_i_m1_m3 __attribute__ ((aligned(32)));
+static __m256i psi_i_m1_m1 __attribute__ ((aligned(32)));
+static __m256i psi_i_m1_p1 __attribute__ ((aligned(32)));
+static __m256i psi_i_m1_p3 __attribute__ ((aligned(32)));
+static __m256i psi_i_m1_p5 __attribute__ ((aligned(32)));
+static __m256i psi_i_m1_p7 __attribute__ ((aligned(32)));
+static __m256i psi_i_p1_m7 __attribute__ ((aligned(32)));
+static __m256i psi_i_p1_m5 __attribute__ ((aligned(32)));
+static __m256i psi_i_p1_m3 __attribute__ ((aligned(32)));
+static __m256i psi_i_p1_m1 __attribute__ ((aligned(32)));
+static __m256i psi_i_p1_p1 __attribute__ ((aligned(32)));
+static __m256i psi_i_p1_p3 __attribute__ ((aligned(32)));
+static __m256i psi_i_p1_p5 __attribute__ ((aligned(32)));
+static __m256i psi_i_p1_p7 __attribute__ ((aligned(32)));
+static __m256i psi_i_p3_m7 __attribute__ ((aligned(32)));
+static __m256i psi_i_p3_m5 __attribute__ ((aligned(32)));
+static __m256i psi_i_p3_m3 __attribute__ ((aligned(32)));
+static __m256i psi_i_p3_m1 __attribute__ ((aligned(32)));
+static __m256i psi_i_p3_p1 __attribute__ ((aligned(32)));
+static __m256i psi_i_p3_p3 __attribute__ ((aligned(32)));
+static __m256i psi_i_p3_p5 __attribute__ ((aligned(32)));
+static __m256i psi_i_p3_p7 __attribute__ ((aligned(32)));
+static __m256i psi_i_p5_m7 __attribute__ ((aligned(32)));
+static __m256i psi_i_p5_m5 __attribute__ ((aligned(32)));
+static __m256i psi_i_p5_m3 __attribute__ ((aligned(32)));
+static __m256i psi_i_p5_m1 __attribute__ ((aligned(32)));
+static __m256i psi_i_p5_p1 __attribute__ ((aligned(32)));
+static __m256i psi_i_p5_p3 __attribute__ ((aligned(32)));
+static __m256i psi_i_p5_p5 __attribute__ ((aligned(32)));
+static __m256i psi_i_p5_p7 __attribute__ ((aligned(32)));
+static __m256i psi_i_p7_m7 __attribute__ ((aligned(32)));
+static __m256i psi_i_p7_m5 __attribute__ ((aligned(32)));
+static __m256i psi_i_p7_m3 __attribute__ ((aligned(32)));
+static __m256i psi_i_p7_m1 __attribute__ ((aligned(32)));
+static __m256i psi_i_p7_p1 __attribute__ ((aligned(32)));
+static __m256i psi_i_p7_p3 __attribute__ ((aligned(32)));
+static __m256i psi_i_p7_p5 __attribute__ ((aligned(32)));
+static __m256i psi_i_p7_p7 __attribute__ ((aligned(32)));
+
+static __m256i a_r_m7_m7 __attribute__ ((aligned(32)));
+static __m256i a_r_m7_m5 __attribute__ ((aligned(32)));
+static __m256i a_r_m7_m3 __attribute__ ((aligned(32)));
+static __m256i a_r_m7_m1 __attribute__ ((aligned(32)));
+static __m256i a_r_m7_p1 __attribute__ ((aligned(32)));
+static __m256i a_r_m7_p3 __attribute__ ((aligned(32)));
+static __m256i a_r_m7_p5 __attribute__ ((aligned(32)));
+static __m256i a_r_m7_p7 __attribute__ ((aligned(32)));
+static __m256i a_r_m5_m7 __attribute__ ((aligned(32)));
+static __m256i a_r_m5_m5 __attribute__ ((aligned(32)));
+static __m256i a_r_m5_m3 __attribute__ ((aligned(32)));
+static __m256i a_r_m5_m1 __attribute__ ((aligned(32)));
+static __m256i a_r_m5_p1 __attribute__ ((aligned(32)));
+static __m256i a_r_m5_p3 __attribute__ ((aligned(32)));
+static __m256i a_r_m5_p5 __attribute__ ((aligned(32)));
+static __m256i a_r_m5_p7 __attribute__ ((aligned(32)));
+static __m256i a_r_m3_m7 __attribute__ ((aligned(32)));
+static __m256i a_r_m3_m5 __attribute__ ((aligned(32)));
+static __m256i a_r_m3_m3 __attribute__ ((aligned(32)));
+static __m256i a_r_m3_m1 __attribute__ ((aligned(32)));
+static __m256i a_r_m3_p1 __attribute__ ((aligned(32)));
+static __m256i a_r_m3_p3 __attribute__ ((aligned(32)));
+static __m256i a_r_m3_p5 __attribute__ ((aligned(32)));
+static __m256i a_r_m3_p7 __attribute__ ((aligned(32)));
+static __m256i a_r_m1_m7 __attribute__ ((aligned(32)));
+static __m256i a_r_m1_m5 __attribute__ ((aligned(32)));
+static __m256i a_r_m1_m3 __attribute__ ((aligned(32)));
+static __m256i a_r_m1_m1 __attribute__ ((aligned(32)));
+static __m256i a_r_m1_p1 __attribute__ ((aligned(32)));
+static __m256i a_r_m1_p3 __attribute__ ((aligned(32)));
+static __m256i a_r_m1_p5 __attribute__ ((aligned(32)));
+static __m256i a_r_m1_p7 __attribute__ ((aligned(32)));
+static __m256i a_r_p1_m7 __attribute__ ((aligned(32)));
+static __m256i a_r_p1_m5 __attribute__ ((aligned(32)));
+static __m256i a_r_p1_m3 __attribute__ ((aligned(32)));
+static __m256i a_r_p1_m1 __attribute__ ((aligned(32)));
+static __m256i a_r_p1_p1 __attribute__ ((aligned(32)));
+static __m256i a_r_p1_p3 __attribute__ ((aligned(32)));
+static __m256i a_r_p1_p5 __attribute__ ((aligned(32)));
+static __m256i a_r_p1_p7 __attribute__ ((aligned(32)));
+static __m256i a_r_p3_m7 __attribute__ ((aligned(32)));
+static __m256i a_r_p3_m5 __attribute__ ((aligned(32)));
+static __m256i a_r_p3_m3 __attribute__ ((aligned(32)));
+static __m256i a_r_p3_m1 __attribute__ ((aligned(32)));
+static __m256i a_r_p3_p1 __attribute__ ((aligned(32)));
+static __m256i a_r_p3_p3 __attribute__ ((aligned(32)));
+static __m256i a_r_p3_p5 __attribute__ ((aligned(32)));
+static __m256i a_r_p3_p7 __attribute__ ((aligned(32)));
+static __m256i a_r_p5_m7 __attribute__ ((aligned(32)));
+static __m256i a_r_p5_m5 __attribute__ ((aligned(32)));
+static __m256i a_r_p5_m3 __attribute__ ((aligned(32)));
+static __m256i a_r_p5_m1 __attribute__ ((aligned(32)));
+static __m256i a_r_p5_p1 __attribute__ ((aligned(32)));
+static __m256i a_r_p5_p3 __attribute__ ((aligned(32)));
+static __m256i a_r_p5_p5 __attribute__ ((aligned(32)));
+static __m256i a_r_p5_p7 __attribute__ ((aligned(32)));
+static __m256i a_r_p7_m7 __attribute__ ((aligned(32)));
+static __m256i a_r_p7_m5 __attribute__ ((aligned(32)));
+static __m256i a_r_p7_m3 __attribute__ ((aligned(32)));
+static __m256i a_r_p7_m1 __attribute__ ((aligned(32)));
+static __m256i a_r_p7_p1 __attribute__ ((aligned(32)));
+static __m256i a_r_p7_p3 __attribute__ ((aligned(32)));
+static __m256i a_r_p7_p5 __attribute__ ((aligned(32)));
+static __m256i a_r_p7_p7 __attribute__ ((aligned(32)));
+
+static __m256i a_i_m7_m7 __attribute__ ((aligned(32)));
+static __m256i a_i_m7_m5 __attribute__ ((aligned(32)));
+static __m256i a_i_m7_m3 __attribute__ ((aligned(32)));
+static __m256i a_i_m7_m1 __attribute__ ((aligned(32)));
+static __m256i a_i_m7_p1 __attribute__ ((aligned(32)));
+static __m256i a_i_m7_p3 __attribute__ ((aligned(32)));
+static __m256i a_i_m7_p5 __attribute__ ((aligned(32)));
+static __m256i a_i_m7_p7 __attribute__ ((aligned(32)));
+static __m256i a_i_m5_m7 __attribute__ ((aligned(32)));
+static __m256i a_i_m5_m5 __attribute__ ((aligned(32)));
+static __m256i a_i_m5_m3 __attribute__ ((aligned(32)));
+static __m256i a_i_m5_m1 __attribute__ ((aligned(32)));
+static __m256i a_i_m5_p1 __attribute__ ((aligned(32)));
+static __m256i a_i_m5_p3 __attribute__ ((aligned(32)));
+static __m256i a_i_m5_p5 __attribute__ ((aligned(32)));
+static __m256i a_i_m5_p7 __attribute__ ((aligned(32)));
+static __m256i a_i_m3_m7 __attribute__ ((aligned(32)));
+static __m256i a_i_m3_m5 __attribute__ ((aligned(32)));
+static __m256i a_i_m3_m3 __attribute__ ((aligned(32)));
+static __m256i a_i_m3_m1 __attribute__ ((aligned(32)));
+static __m256i a_i_m3_p1 __attribute__ ((aligned(32)));
+static __m256i a_i_m3_p3 __attribute__ ((aligned(32)));
+static __m256i a_i_m3_p5 __attribute__ ((aligned(32)));
+static __m256i a_i_m3_p7 __attribute__ ((aligned(32)));
+static __m256i a_i_m1_m7 __attribute__ ((aligned(32)));
+static __m256i a_i_m1_m5 __attribute__ ((aligned(32)));
+static __m256i a_i_m1_m3 __attribute__ ((aligned(32)));
+static __m256i a_i_m1_m1 __attribute__ ((aligned(32)));
+static __m256i a_i_m1_p1 __attribute__ ((aligned(32)));
+static __m256i a_i_m1_p3 __attribute__ ((aligned(32)));
+static __m256i a_i_m1_p5 __attribute__ ((aligned(32)));
+static __m256i a_i_m1_p7 __attribute__ ((aligned(32)));
+static __m256i a_i_p1_m7 __attribute__ ((aligned(32)));
+static __m256i a_i_p1_m5 __attribute__ ((aligned(32)));
+static __m256i a_i_p1_m3 __attribute__ ((aligned(32)));
+static __m256i a_i_p1_m1 __attribute__ ((aligned(32)));
+static __m256i a_i_p1_p1 __attribute__ ((aligned(32)));
+static __m256i a_i_p1_p3 __attribute__ ((aligned(32)));
+static __m256i a_i_p1_p5 __attribute__ ((aligned(32)));
+static __m256i a_i_p1_p7 __attribute__ ((aligned(32)));
+static __m256i a_i_p3_m7 __attribute__ ((aligned(32)));
+static __m256i a_i_p3_m5 __attribute__ ((aligned(32)));
+static __m256i a_i_p3_m3 __attribute__ ((aligned(32)));
+static __m256i a_i_p3_m1 __attribute__ ((aligned(32)));
+static __m256i a_i_p3_p1 __attribute__ ((aligned(32)));
+static __m256i a_i_p3_p3 __attribute__ ((aligned(32)));
+static __m256i a_i_p3_p5 __attribute__ ((aligned(32)));
+static __m256i a_i_p3_p7 __attribute__ ((aligned(32)));
+static __m256i a_i_p5_m7 __attribute__ ((aligned(32)));
+static __m256i a_i_p5_m5 __attribute__ ((aligned(32)));
+static __m256i a_i_p5_m3 __attribute__ ((aligned(32)));
+static __m256i a_i_p5_m1 __attribute__ ((aligned(32)));
+static __m256i a_i_p5_p1 __attribute__ ((aligned(32)));
+static __m256i a_i_p5_p3 __attribute__ ((aligned(32)));
+static __m256i a_i_p5_p5 __attribute__ ((aligned(32)));
+static __m256i a_i_p5_p7 __attribute__ ((aligned(32)));
+static __m256i a_i_p7_m7 __attribute__ ((aligned(32)));
+static __m256i a_i_p7_m5 __attribute__ ((aligned(32)));
+static __m256i a_i_p7_m3 __attribute__ ((aligned(32)));
+static __m256i a_i_p7_m1 __attribute__ ((aligned(32)));
+static __m256i a_i_p7_p1 __attribute__ ((aligned(32)));
+static __m256i a_i_p7_p3 __attribute__ ((aligned(32)));
+static __m256i a_i_p7_p5 __attribute__ ((aligned(32)));
+static __m256i a_i_p7_p7 __attribute__ ((aligned(32)));
+
+static __m256i psi_a_m7_m7 __attribute__ ((aligned(32)));
+static __m256i psi_a_m7_m5 __attribute__ ((aligned(32)));
+static __m256i psi_a_m7_m3 __attribute__ ((aligned(32)));
+static __m256i psi_a_m7_m1 __attribute__ ((aligned(32)));
+static __m256i psi_a_m7_p1 __attribute__ ((aligned(32)));
+static __m256i psi_a_m7_p3 __attribute__ ((aligned(32)));
+static __m256i psi_a_m7_p5 __attribute__ ((aligned(32)));
+static __m256i psi_a_m7_p7 __attribute__ ((aligned(32)));
+static __m256i psi_a_m5_m7 __attribute__ ((aligned(32)));
+static __m256i psi_a_m5_m5 __attribute__ ((aligned(32)));
+static __m256i psi_a_m5_m3 __attribute__ ((aligned(32)));
+static __m256i psi_a_m5_m1 __attribute__ ((aligned(32)));
+static __m256i psi_a_m5_p1 __attribute__ ((aligned(32)));
+static __m256i psi_a_m5_p3 __attribute__ ((aligned(32)));
+static __m256i psi_a_m5_p5 __attribute__ ((aligned(32)));
+static __m256i psi_a_m5_p7 __attribute__ ((aligned(32)));
+static __m256i psi_a_m3_m7 __attribute__ ((aligned(32)));
+static __m256i psi_a_m3_m5 __attribute__ ((aligned(32)));
+static __m256i psi_a_m3_m3 __attribute__ ((aligned(32)));
+static __m256i psi_a_m3_m1 __attribute__ ((aligned(32)));
+static __m256i psi_a_m3_p1 __attribute__ ((aligned(32)));
+static __m256i psi_a_m3_p3 __attribute__ ((aligned(32)));
+static __m256i psi_a_m3_p5 __attribute__ ((aligned(32)));
+static __m256i psi_a_m3_p7 __attribute__ ((aligned(32)));
+static __m256i psi_a_m1_m7 __attribute__ ((aligned(32)));
+static __m256i psi_a_m1_m5 __attribute__ ((aligned(32)));
+static __m256i psi_a_m1_m3 __attribute__ ((aligned(32)));
+static __m256i psi_a_m1_m1 __attribute__ ((aligned(32)));
+static __m256i psi_a_m1_p1 __attribute__ ((aligned(32)));
+static __m256i psi_a_m1_p3 __attribute__ ((aligned(32)));
+static __m256i psi_a_m1_p5 __attribute__ ((aligned(32)));
+static __m256i psi_a_m1_p7 __attribute__ ((aligned(32)));
+static __m256i psi_a_p1_m7 __attribute__ ((aligned(32)));
+static __m256i psi_a_p1_m5 __attribute__ ((aligned(32)));
+static __m256i psi_a_p1_m3 __attribute__ ((aligned(32)));
+static __m256i psi_a_p1_m1 __attribute__ ((aligned(32)));
+static __m256i psi_a_p1_p1 __attribute__ ((aligned(32)));
+static __m256i psi_a_p1_p3 __attribute__ ((aligned(32)));
+static __m256i psi_a_p1_p5 __attribute__ ((aligned(32)));
+static __m256i psi_a_p1_p7 __attribute__ ((aligned(32)));
+static __m256i psi_a_p3_m7 __attribute__ ((aligned(32)));
+static __m256i psi_a_p3_m5 __attribute__ ((aligned(32)));
+static __m256i psi_a_p3_m3 __attribute__ ((aligned(32)));
+static __m256i psi_a_p3_m1 __attribute__ ((aligned(32)));
+static __m256i psi_a_p3_p1 __attribute__ ((aligned(32)));
+static __m256i psi_a_p3_p3 __attribute__ ((aligned(32)));
+static __m256i psi_a_p3_p5 __attribute__ ((aligned(32)));
+static __m256i psi_a_p3_p7 __attribute__ ((aligned(32)));
+static __m256i psi_a_p5_m7 __attribute__ ((aligned(32)));
+static __m256i psi_a_p5_m5 __attribute__ ((aligned(32)));
+static __m256i psi_a_p5_m3 __attribute__ ((aligned(32)));
+static __m256i psi_a_p5_m1 __attribute__ ((aligned(32)));
+static __m256i psi_a_p5_p1 __attribute__ ((aligned(32)));
+static __m256i psi_a_p5_p3 __attribute__ ((aligned(32)));
+static __m256i psi_a_p5_p5 __attribute__ ((aligned(32)));
+static __m256i psi_a_p5_p7 __attribute__ ((aligned(32)));
+static __m256i psi_a_p7_m7 __attribute__ ((aligned(32)));
+static __m256i psi_a_p7_m5 __attribute__ ((aligned(32)));
+static __m256i psi_a_p7_m3 __attribute__ ((aligned(32)));
+static __m256i psi_a_p7_m1 __attribute__ ((aligned(32)));
+static __m256i psi_a_p7_p1 __attribute__ ((aligned(32)));
+static __m256i psi_a_p7_p3 __attribute__ ((aligned(32)));
+static __m256i psi_a_p7_p5 __attribute__ ((aligned(32)));
+static __m256i psi_a_p7_p7 __attribute__ ((aligned(32)));
+
+static __m256i a_sq_m7_m7 __attribute__ ((aligned(32)));
+static __m256i a_sq_m7_m5 __attribute__ ((aligned(32)));
+static __m256i a_sq_m7_m3 __attribute__ ((aligned(32)));
+static __m256i a_sq_m7_m1 __attribute__ ((aligned(32)));
+static __m256i a_sq_m7_p1 __attribute__ ((aligned(32)));
+static __m256i a_sq_m7_p3 __attribute__ ((aligned(32)));
+static __m256i a_sq_m7_p5 __attribute__ ((aligned(32)));
+static __m256i a_sq_m7_p7 __attribute__ ((aligned(32)));
+static __m256i a_sq_m5_m7 __attribute__ ((aligned(32)));
+static __m256i a_sq_m5_m5 __attribute__ ((aligned(32)));
+static __m256i a_sq_m5_m3 __attribute__ ((aligned(32)));
+static __m256i a_sq_m5_m1 __attribute__ ((aligned(32)));
+static __m256i a_sq_m5_p1 __attribute__ ((aligned(32)));
+static __m256i a_sq_m5_p3 __attribute__ ((aligned(32)));
+static __m256i a_sq_m5_p5 __attribute__ ((aligned(32)));
+static __m256i a_sq_m5_p7 __attribute__ ((aligned(32)));
+static __m256i a_sq_m3_m7 __attribute__ ((aligned(32)));
+static __m256i a_sq_m3_m5 __attribute__ ((aligned(32)));
+static __m256i a_sq_m3_m3 __attribute__ ((aligned(32)));
+static __m256i a_sq_m3_m1 __attribute__ ((aligned(32)));
+static __m256i a_sq_m3_p1 __attribute__ ((aligned(32)));
+static __m256i a_sq_m3_p3 __attribute__ ((aligned(32)));
+static __m256i a_sq_m3_p5 __attribute__ ((aligned(32)));
+static __m256i a_sq_m3_p7 __attribute__ ((aligned(32)));
+static __m256i a_sq_m1_m7 __attribute__ ((aligned(32)));
+static __m256i a_sq_m1_m5 __attribute__ ((aligned(32)));
+static __m256i a_sq_m1_m3 __attribute__ ((aligned(32)));
+static __m256i a_sq_m1_m1 __attribute__ ((aligned(32)));
+static __m256i a_sq_m1_p1 __attribute__ ((aligned(32)));
+static __m256i a_sq_m1_p3 __attribute__ ((aligned(32)));
+static __m256i a_sq_m1_p5 __attribute__ ((aligned(32)));
+static __m256i a_sq_m1_p7 __attribute__ ((aligned(32)));
+static __m256i a_sq_p1_m7 __attribute__ ((aligned(32)));
+static __m256i a_sq_p1_m5 __attribute__ ((aligned(32)));
+static __m256i a_sq_p1_m3 __attribute__ ((aligned(32)));
+static __m256i a_sq_p1_m1 __attribute__ ((aligned(32)));
+static __m256i a_sq_p1_p1 __attribute__ ((aligned(32)));
+static __m256i a_sq_p1_p3 __attribute__ ((aligned(32)));
+static __m256i a_sq_p1_p5 __attribute__ ((aligned(32)));
+static __m256i a_sq_p1_p7 __attribute__ ((aligned(32)));
+static __m256i a_sq_p3_m7 __attribute__ ((aligned(32)));
+static __m256i a_sq_p3_m5 __attribute__ ((aligned(32)));
+static __m256i a_sq_p3_m3 __attribute__ ((aligned(32)));
+static __m256i a_sq_p3_m1 __attribute__ ((aligned(32)));
+static __m256i a_sq_p3_p1 __attribute__ ((aligned(32)));
+static __m256i a_sq_p3_p3 __attribute__ ((aligned(32)));
+static __m256i a_sq_p3_p5 __attribute__ ((aligned(32)));
+static __m256i a_sq_p3_p7 __attribute__ ((aligned(32)));
+static __m256i a_sq_p5_m7 __attribute__ ((aligned(32)));
+static __m256i a_sq_p5_m5 __attribute__ ((aligned(32)));
+static __m256i a_sq_p5_m3 __attribute__ ((aligned(32)));
+static __m256i a_sq_p5_m1 __attribute__ ((aligned(32)));
+static __m256i a_sq_p5_p1 __attribute__ ((aligned(32)));
+static __m256i a_sq_p5_p3 __attribute__ ((aligned(32)));
+static __m256i a_sq_p5_p5 __attribute__ ((aligned(32)));
+static __m256i a_sq_p5_p7 __attribute__ ((aligned(32)));
+static __m256i a_sq_p7_m7 __attribute__ ((aligned(32)));
+static __m256i a_sq_p7_m5 __attribute__ ((aligned(32)));
+static __m256i a_sq_p7_m3 __attribute__ ((aligned(32)));
+static __m256i a_sq_p7_m1 __attribute__ ((aligned(32)));
+static __m256i a_sq_p7_p1 __attribute__ ((aligned(32)));
+static __m256i a_sq_p7_p3 __attribute__ ((aligned(32)));
+static __m256i a_sq_p7_p5 __attribute__ ((aligned(32)));
+static __m256i a_sq_p7_p7 __attribute__ ((aligned(32)));
+
+static __m256i bit_met_m7_m7 __attribute__ ((aligned(32)));
+static __m256i bit_met_m7_m5 __attribute__ ((aligned(32)));
+static __m256i bit_met_m7_m3 __attribute__ ((aligned(32)));
+static __m256i bit_met_m7_m1 __attribute__ ((aligned(32)));
+static __m256i bit_met_m7_p1 __attribute__ ((aligned(32)));
+static __m256i bit_met_m7_p3 __attribute__ ((aligned(32)));
+static __m256i bit_met_m7_p5 __attribute__ ((aligned(32)));
+static __m256i bit_met_m7_p7 __attribute__ ((aligned(32)));
+static __m256i bit_met_m5_m7 __attribute__ ((aligned(32)));
+static __m256i bit_met_m5_m5 __attribute__ ((aligned(32)));
+static __m256i bit_met_m5_m3 __attribute__ ((aligned(32)));
+static __m256i bit_met_m5_m1 __attribute__ ((aligned(32)));
+static __m256i bit_met_m5_p1 __attribute__ ((aligned(32)));
+static __m256i bit_met_m5_p3 __attribute__ ((aligned(32)));
+static __m256i bit_met_m5_p5 __attribute__ ((aligned(32)));
+static __m256i bit_met_m5_p7 __attribute__ ((aligned(32)));
+static __m256i bit_met_m3_m7 __attribute__ ((aligned(32)));
+static __m256i bit_met_m3_m5 __attribute__ ((aligned(32)));
+static __m256i bit_met_m3_m3 __attribute__ ((aligned(32)));
+static __m256i bit_met_m3_m1 __attribute__ ((aligned(32)));
+static __m256i bit_met_m3_p1 __attribute__ ((aligned(32)));
+static __m256i bit_met_m3_p3 __attribute__ ((aligned(32)));
+static __m256i bit_met_m3_p5 __attribute__ ((aligned(32)));
+static __m256i bit_met_m3_p7 __attribute__ ((aligned(32)));
+static __m256i bit_met_m1_m7 __attribute__ ((aligned(32)));
+static __m256i bit_met_m1_m5 __attribute__ ((aligned(32)));
+static __m256i bit_met_m1_m3 __attribute__ ((aligned(32)));
+static __m256i bit_met_m1_m1 __attribute__ ((aligned(32)));
+static __m256i bit_met_m1_p1 __attribute__ ((aligned(32)));
+static __m256i bit_met_m1_p3 __attribute__ ((aligned(32)));
+static __m256i bit_met_m1_p5 __attribute__ ((aligned(32)));
+static __m256i bit_met_m1_p7 __attribute__ ((aligned(32)));
+static __m256i bit_met_p1_m7 __attribute__ ((aligned(32)));
+static __m256i bit_met_p1_m5 __attribute__ ((aligned(32)));
+static __m256i bit_met_p1_m3 __attribute__ ((aligned(32)));
+static __m256i bit_met_p1_m1 __attribute__ ((aligned(32)));
+static __m256i bit_met_p1_p1 __attribute__ ((aligned(32)));
+static __m256i bit_met_p1_p3 __attribute__ ((aligned(32)));
+static __m256i bit_met_p1_p5 __attribute__ ((aligned(32)));
+static __m256i bit_met_p1_p7 __attribute__ ((aligned(32)));
+static __m256i bit_met_p3_m7 __attribute__ ((aligned(32)));
+static __m256i bit_met_p3_m5 __attribute__ ((aligned(32)));
+static __m256i bit_met_p3_m3 __attribute__ ((aligned(32)));
+static __m256i bit_met_p3_m1 __attribute__ ((aligned(32)));
+static __m256i bit_met_p3_p1 __attribute__ ((aligned(32)));
+static __m256i bit_met_p3_p3 __attribute__ ((aligned(32)));
+static __m256i bit_met_p3_p5 __attribute__ ((aligned(32)));
+static __m256i bit_met_p3_p7 __attribute__ ((aligned(32)));
+static __m256i bit_met_p5_m7 __attribute__ ((aligned(32)));
+static __m256i bit_met_p5_m5 __attribute__ ((aligned(32)));
+static __m256i bit_met_p5_m3 __attribute__ ((aligned(32)));
+static __m256i bit_met_p5_m1 __attribute__ ((aligned(32)));
+static __m256i bit_met_p5_p1 __attribute__ ((aligned(32)));
+static __m256i bit_met_p5_p3 __attribute__ ((aligned(32)));
+static __m256i bit_met_p5_p5 __attribute__ ((aligned(32)));
+static __m256i bit_met_p5_p7 __attribute__ ((aligned(32)));
+static __m256i bit_met_p7_m7 __attribute__ ((aligned(32)));
+static __m256i bit_met_p7_m5 __attribute__ ((aligned(32)));
+static __m256i bit_met_p7_m3 __attribute__ ((aligned(32)));
+static __m256i bit_met_p7_m1 __attribute__ ((aligned(32)));
+static __m256i bit_met_p7_p1 __attribute__ ((aligned(32)));
+static __m256i bit_met_p7_p3 __attribute__ ((aligned(32)));
+static __m256i bit_met_p7_p5 __attribute__ ((aligned(32)));
+static __m256i bit_met_p7_p7 __attribute__ ((aligned(32)));
+
+static __m256i  y0_p_1_1 __attribute__ ((aligned(32)));
+static __m256i  y0_p_1_3 __attribute__ ((aligned(32)));
+static __m256i  y0_p_1_5 __attribute__ ((aligned(32)));
+static __m256i  y0_p_1_7 __attribute__ ((aligned(32)));
+static __m256i  y0_p_3_1 __attribute__ ((aligned(32)));
+static __m256i  y0_p_3_3 __attribute__ ((aligned(32)));
+static __m256i  y0_p_3_5 __attribute__ ((aligned(32)));
+static __m256i  y0_p_3_7 __attribute__ ((aligned(32)));
+static __m256i  y0_p_5_1 __attribute__ ((aligned(32)));
+static __m256i  y0_p_5_3 __attribute__ ((aligned(32)));
+static __m256i  y0_p_5_5 __attribute__ ((aligned(32)));
+static __m256i  y0_p_5_7 __attribute__ ((aligned(32)));
+static __m256i  y0_p_7_1 __attribute__ ((aligned(32)));
+static __m256i  y0_p_7_3 __attribute__ ((aligned(32)));
+static __m256i  y0_p_7_5 __attribute__ ((aligned(32)));
+static __m256i  y0_p_7_7 __attribute__ ((aligned(32)));
+static __m256i  y0_m_1_1 __attribute__ ((aligned(32)));
+static __m256i  y0_m_1_3 __attribute__ ((aligned(32)));
+static __m256i  y0_m_1_5 __attribute__ ((aligned(32)));
+static __m256i  y0_m_1_7 __attribute__ ((aligned(32)));
+static __m256i  y0_m_3_1 __attribute__ ((aligned(32)));
+static __m256i  y0_m_3_3 __attribute__ ((aligned(32)));
+static __m256i  y0_m_3_5 __attribute__ ((aligned(32)));
+static __m256i  y0_m_3_7 __attribute__ ((aligned(32)));
+static __m256i  y0_m_5_1 __attribute__ ((aligned(32)));
+static __m256i  y0_m_5_3 __attribute__ ((aligned(32)));
+static __m256i  y0_m_5_5 __attribute__ ((aligned(32)));
+static __m256i  y0_m_5_7 __attribute__ ((aligned(32)));
+static __m256i  y0_m_7_1 __attribute__ ((aligned(32)));
+static __m256i  y0_m_7_3 __attribute__ ((aligned(32)));
+static __m256i  y0_m_7_5 __attribute__ ((aligned(32)));
+static __m256i  y0_m_7_7 __attribute__ ((aligned(32)));
+
+static __m256i  xmm0 __attribute__ ((aligned(32)));
+static __m256i  xmm1 __attribute__ ((aligned(32)));
+static __m256i  xmm2 __attribute__ ((aligned(32)));
+static __m256i  xmm3 __attribute__ ((aligned(32)));
+static __m256i  xmm4 __attribute__ ((aligned(32)));
+static __m256i  xmm5 __attribute__ ((aligned(32)));
+static __m256i  xmm6 __attribute__ ((aligned(32)));
+static __m256i  xmm7 __attribute__ ((aligned(32)));
+static __m256i  xmm8 __attribute__ ((aligned(32)));
+
+static __m256i  y0r __attribute__ ((aligned(32)));
+static __m256i  y0i __attribute__ ((aligned(32)));
+static __m256i  y1r __attribute__ ((aligned(32)));
+static __m256i  y1i __attribute__ ((aligned(32)));
+static __m256i  y2r __attribute__ ((aligned(32)));
+static __m256i  y2i __attribute__ ((aligned(32)));
+
+static __m256i  logmax_num_re0 __attribute__ ((aligned(32)));
+static __m256i  logmax_den_re0 __attribute__ ((aligned(32)));
+
+static __m256i tmp_result  __attribute__ ((aligned(32)));
+static __m256i tmp_result2 __attribute__ ((aligned(32)));
+static __m256i tmp_result3 __attribute__ ((aligned(32)));
+static __m256i tmp_result4 __attribute__ ((aligned(32)));
+
+//==============================================================================================
+// Auxiliary Makros
+
+// calculate interference magnitude
+#define interference_abs_epi16(psi,int_ch_mag,int_mag,c1,c2) tmp_result = _mm256_cmpgt_epi16(int_ch_mag,psi); tmp_result2 = _mm256_xor_si256(tmp_result,(*(__m256i*)&ones256[0])); tmp_result = _mm256_and_si256(tmp_result,c1); tmp_result2 = _mm256_and_si256(tmp_result2,c2); int_mag = _mm256_or_si256(tmp_result,tmp_result2);
+
+// calculate interference magnitude
+// tmp_result = ones in shorts corr. to interval 2<=x<=4, tmp_result2 interval < 2, tmp_result3 interval 4<x<6 and tmp_result4 interval x>6
+#define interference_abs_64qam_epi16(psi,int_ch_mag,int_two_ch_mag,int_three_ch_mag,a,c1,c3,c5,c7) tmp_result = _mm256_cmpgt_epi16(int_two_ch_mag,psi); tmp_result3 = _mm256_xor_si256(tmp_result,(*(__m256i*)&ones256[0])); tmp_result2 = _mm256_cmpgt_epi16(int_ch_mag,psi); tmp_result = _mm256_xor_si256(tmp_result,tmp_result2); tmp_result4 = _mm256_cmpgt_epi16(psi,int_three_ch_mag); tmp_result3 = _mm256_xor_si256(tmp_result3,tmp_result4); tmp_result = _mm256_and_si256(tmp_result,c3); tmp_result2 = _mm256_and_si256(tmp_result2,c1); tmp_result3 = _mm256_and_si256(tmp_result3,c5); tmp_result4 = _mm256_and_si256(tmp_result4,c7); tmp_result = _mm256_or_si256(tmp_result,tmp_result2); tmp_result3 = _mm256_or_si256(tmp_result3,tmp_result4); a = _mm256_or_si256(tmp_result,tmp_result3);
+
+// calculates psi_a = psi_r*a_r + psi_i*a_i
+#define prodsum_psi_a_epi16(psi_r,a_r,psi_i,a_i,psi_a) tmp_result = _mm256_mulhi_epi16(psi_r,a_r); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result2 = _mm256_mulhi_epi16(psi_i,a_i); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); psi_a = _mm256_adds_epi16(tmp_result,tmp_result2);
+
+// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor
+#define square_a_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq) tmp_result = _mm256_mulhi_epi16(a_r,a_r); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result = _mm256_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result = _mm256_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result2 = _mm256_mulhi_epi16(a_i,a_i); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); a_sq = _mm256_adds_epi16(tmp_result,tmp_result2);
+
+// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor for 64-QAM
+#define square_a_64qam_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq)  tmp_result = _mm256_mulhi_epi16(a_r,a_r); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result = _mm256_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm256_slli_epi16(tmp_result,3); tmp_result = _mm256_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result2 = _mm256_mulhi_epi16(a_i,a_i); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm256_slli_epi16(tmp_result2,3); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); a_sq = _mm256_adds_epi16(tmp_result,tmp_result2);
+
+void seperate_real_imag_parts(__m256i *out_re,
+                              __m256i *out_im,
+                              __m256i in0,
+                              __m256i in1)
+{
+    __m256i tmp0;
+    __m256i tmp1;
+
+    in0 = _mm256_shufflelo_epi16(in0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    in0 = _mm256_shufflehi_epi16(in0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    in0 = _mm256_shuffle_epi32(in0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+
+    in1 = _mm256_shufflelo_epi16(in1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    in1 = _mm256_shufflehi_epi16(in1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    in1 = _mm256_shuffle_epi32(in1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+
+    //in0 = [Re(0,1,2,3)   Im(0,1,2,3)   Re(4,5,6,7)     Im(4,5,6,7)]
+    //in0 = [Re(8,9,10,11) Im(8,9,10,11) Re(12,13,14,15) Im(12,13,14,15)]
+
+    tmp0 = _mm256_unpacklo_epi64(in0, in1);
+    //axmm2 = [Re(0,1,2,3) Re(8,9,10,11) Re(4,5,6,7) Re(12,13,14,15)]
+    tmp0 = _mm256_permute4x64_epi64(tmp0,0xd8); // Re(rho)
+
+    tmp1 = _mm256_unpackhi_epi64(in0, in1);
+    //axmm3 = [Im(0,1,2,3) Im(8,9,10,11) Im(4,5,6,7) Im(12,13,14,15)]
+    tmp1 = _mm256_permute4x64_epi64(tmp1,0xd8); // Im(rho)
+
+    *out_re = tmp0;
+    *out_im = tmp1;
+}
+
+void qam64_qam16_avx2(short *stream0_in,
+                      short *stream1_in,
+                      short *ch_mag,
+                      short *ch_mag_i,
+                      short *stream0_out,
+                      short *rho01,
+                      int length
+    )
+{
+
+  /*
+    Author: S. Wagner
+    Date: 31-07-12
+
+    Input:
+    stream0_in:  MF filter for 1st stream, i.e., y0=h0'*y
+    stream1_in:  MF filter for 2nd stream, i.e., y1=h1'*y
+    ch_mag:      4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    ch_mag_i:    4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    rho01:       Channel cross correlation, i.e., h1'*h0
+
+    Output:
+    stream0_out: output LLRs for 1st stream
+  */
+
+#if defined(__x86_64__) || defined(__i386__)
+
+  __m256i *rho01_256i      = (__m256i *)rho01;
+  __m256i *stream0_256i_in = (__m256i *)stream0_in;
+  __m256i *stream1_256i_in = (__m256i *)stream1_in;
+  __m256i *ch_mag_256i     = (__m256i *)ch_mag;
+  __m256i *ch_mag_256i_i   = (__m256i *)ch_mag_i;
+
+  __m256i ONE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(10112)); // round(1/sqrt(42)*2^16)
+  __m256i THREE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30337)); // round(3/sqrt(42)*2^16)
+  __m256i FIVE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(25281)); // round(5/sqrt(42)*2^15)
+  __m256i SEVEN_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(17697)); // round(5/sqrt(42)*2^15)
+  __m256i FORTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30969)); // round(49/(4*sqrt(42))*2^14), Q2.14
+  __m256i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(23385)); // round(37/(4*sqrt(42))*2^14), Q2.14
+  __m256i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(31601)); // round(25/(4*sqrt(42))*2^15)
+  __m256i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(18329)); // round(29/(4*sqrt(42))*2^15), Q2.14
+  __m256i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(21489)); // round(17/(4*sqrt(42))*2^15)
+  __m256i NINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(11376)); // round(9/(4*sqrt(42))*2^15)
+  __m256i THIRTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(16433)); // round(13/(4*sqrt(42))*2^15)
+  __m256i FIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(6320)); // round(5/(4*sqrt(42))*2^15)
+  __m256i ONE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(1264)); // round(1/(4*sqrt(42))*2^15)
+  __m256i ONE_OVER_SQRT_10_Q15 = _mm256_broadcastw_epi16(_mm_set1_epi16(10362)); // round(1/sqrt(10)*2^15)
+  __m256i THREE_OVER_SQRT_10 = _mm256_broadcastw_epi16(_mm_set1_epi16(31086)); // round(3/sqrt(10)*2^15)
+  __m256i SQRT_10_OVER_FOUR = _mm256_broadcastw_epi16(_mm_set1_epi16(25905)); // round(sqrt(10)/4*2^15)
+
+
+  __m256i ch_mag_int;
+  __m256i ch_mag_des;
+  __m256i ch_mag_98_over_42_with_sigma2;
+  __m256i ch_mag_74_over_42_with_sigma2;
+  __m256i ch_mag_58_over_42_with_sigma2;
+  __m256i ch_mag_50_over_42_with_sigma2;
+  __m256i ch_mag_34_over_42_with_sigma2;
+  __m256i ch_mag_18_over_42_with_sigma2;
+  __m256i ch_mag_26_over_42_with_sigma2;
+  __m256i ch_mag_10_over_42_with_sigma2;
+  __m256i ch_mag_2_over_42_with_sigma2;
+  __m256i  y0r_one_over_sqrt_21;
+  __m256i  y0r_three_over_sqrt_21;
+  __m256i  y0r_five_over_sqrt_21;
+  __m256i  y0r_seven_over_sqrt_21;
+  __m256i  y0i_one_over_sqrt_21;
+  __m256i  y0i_three_over_sqrt_21;
+  __m256i  y0i_five_over_sqrt_21;
+  __m256i  y0i_seven_over_sqrt_21;
+
+#elif defined(__arm__)
+
+#endif
+  int i,j;
+  uint32_t len256 = (length)>>3;
+
+  for (i=0; i<len256; i+=2) {
+
+#if defined(__x86_64__) || defined(__i386__)
+    // Get rho
+      /*
+    xmm0 = rho01_128i[i];
+    xmm1 = rho01_128i[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho)
+    xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho)
+      */
+    seperate_real_imag_parts(&xmm2, &xmm3, rho01_256i[i], rho01_256i[i+1]);
+
+    rho_rpi = _mm256_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho)
+    rho_rmi = _mm256_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho)
+
+    // Compute the different rhos
+    rho_rpi_1_1 = _mm256_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42);
+    rho_rmi_1_1 = _mm256_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42);
+    rho_rpi_3_3 = _mm256_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42);
+    rho_rmi_3_3 = _mm256_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42);
+    rho_rpi_5_5 = _mm256_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42);
+    rho_rmi_5_5 = _mm256_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42);
+    rho_rpi_7_7 = _mm256_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42);
+    rho_rmi_7_7 = _mm256_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42);
+
+    rho_rpi_5_5 = _mm256_slli_epi16(rho_rpi_5_5, 1);
+    rho_rmi_5_5 = _mm256_slli_epi16(rho_rmi_5_5, 1);
+    rho_rpi_7_7 = _mm256_slli_epi16(rho_rpi_7_7, 2);
+    rho_rmi_7_7 = _mm256_slli_epi16(rho_rmi_7_7, 2);
+
+    xmm4 = _mm256_mulhi_epi16(xmm2, ONE_OVER_SQRT_42);
+    xmm5 = _mm256_mulhi_epi16(xmm3, ONE_OVER_SQRT_42);
+    xmm6 = _mm256_mulhi_epi16(xmm3, THREE_OVER_SQRT_42);
+    xmm7 = _mm256_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42);
+    xmm8 = _mm256_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42);
+    xmm7 = _mm256_slli_epi16(xmm7, 1);
+    xmm8 = _mm256_slli_epi16(xmm8, 2);
+
+    rho_rpi_1_3 = _mm256_adds_epi16(xmm4, xmm6);
+    rho_rmi_1_3 = _mm256_subs_epi16(xmm4, xmm6);
+    rho_rpi_1_5 = _mm256_adds_epi16(xmm4, xmm7);
+    rho_rmi_1_5 = _mm256_subs_epi16(xmm4, xmm7);
+    rho_rpi_1_7 = _mm256_adds_epi16(xmm4, xmm8);
+    rho_rmi_1_7 = _mm256_subs_epi16(xmm4, xmm8);
+
+    xmm4 = _mm256_mulhi_epi16(xmm2, THREE_OVER_SQRT_42);
+    rho_rpi_3_1 = _mm256_adds_epi16(xmm4, xmm5);
+    rho_rmi_3_1 = _mm256_subs_epi16(xmm4, xmm5);
+    rho_rpi_3_5 = _mm256_adds_epi16(xmm4, xmm7);
+    rho_rmi_3_5 = _mm256_subs_epi16(xmm4, xmm7);
+    rho_rpi_3_7 = _mm256_adds_epi16(xmm4, xmm8);
+    rho_rmi_3_7 = _mm256_subs_epi16(xmm4, xmm8);
+
+    xmm4 = _mm256_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42);
+    xmm4 = _mm256_slli_epi16(xmm4, 1);
+    rho_rpi_5_1 = _mm256_adds_epi16(xmm4, xmm5);
+    rho_rmi_5_1 = _mm256_subs_epi16(xmm4, xmm5);
+    rho_rpi_5_3 = _mm256_adds_epi16(xmm4, xmm6);
+    rho_rmi_5_3 = _mm256_subs_epi16(xmm4, xmm6);
+    rho_rpi_5_7 = _mm256_adds_epi16(xmm4, xmm8);
+    rho_rmi_5_7 = _mm256_subs_epi16(xmm4, xmm8);
+
+    xmm4 = _mm256_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42);
+    xmm4 = _mm256_slli_epi16(xmm4, 2);
+    rho_rpi_7_1 = _mm256_adds_epi16(xmm4, xmm5);
+    rho_rmi_7_1 = _mm256_subs_epi16(xmm4, xmm5);
+    rho_rpi_7_3 = _mm256_adds_epi16(xmm4, xmm6);
+    rho_rmi_7_3 = _mm256_subs_epi16(xmm4, xmm6);
+    rho_rpi_7_5 = _mm256_adds_epi16(xmm4, xmm7);
+    rho_rmi_7_5 = _mm256_subs_epi16(xmm4, xmm7);
+
+    // Rearrange interfering MF output
+    /*
+    xmm0 = stream1_128i_in[i];
+    xmm1 = stream1_128i_in[i+1];
+    xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y1r = _mm256_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
+    y1i = _mm256_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
+    */
+
+    seperate_real_imag_parts(&y1r, &y1i, stream1_256i_in[i], stream1_256i_in[i+1]);
+
+    // Psi_r calculation from rho_rpi or rho_rmi
+    xmm0 = _mm256_broadcastw_epi16(_mm_set1_epi16(0));// ZERO for abs_pi16
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1r);
+    psi_r_p7_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1r);
+    psi_r_p7_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1r);
+    psi_r_p7_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1r);
+    psi_r_p7_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1r);
+    psi_r_p7_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1r);
+    psi_r_p7_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1r);
+    psi_r_p7_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1r);
+    psi_r_p7_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1r);
+    psi_r_p5_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1r);
+    psi_r_p5_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1r);
+    psi_r_p5_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1r);
+    psi_r_p5_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1r);
+    psi_r_p5_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1r);
+    psi_r_p5_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1r);
+    psi_r_p5_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1r);
+    psi_r_p5_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1r);
+    psi_r_p3_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1r);
+    psi_r_p3_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1r);
+    psi_r_p3_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1r);
+    psi_r_p3_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1r);
+    psi_r_p3_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1r);
+    psi_r_p3_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1r);
+    psi_r_p3_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1r);
+    psi_r_p3_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1r);
+    psi_r_p1_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1r);
+    psi_r_p1_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1r);
+    psi_r_p1_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1r);
+    psi_r_p1_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1r);
+    psi_r_p1_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1r);
+    psi_r_p1_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1r);
+    psi_r_p1_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1r);
+    psi_r_p1_m7 = _mm256_abs_epi16(xmm2);
+
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1r);
+    psi_r_m1_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1r);
+    psi_r_m1_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1r);
+    psi_r_m1_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1r);
+    psi_r_m1_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1r);
+    psi_r_m1_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1r);
+    psi_r_m1_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1r);
+    psi_r_m1_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1r);
+    psi_r_m1_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1r);
+    psi_r_m3_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1r);
+    psi_r_m3_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1r);
+    psi_r_m3_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1r);
+    psi_r_m3_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1r);
+    psi_r_m3_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1r);
+    psi_r_m3_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1r);
+    psi_r_m3_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1r);
+    psi_r_m3_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1r);
+    psi_r_m5_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1r);
+    psi_r_m5_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1r);
+    psi_r_m5_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1r);
+    psi_r_m5_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1r);
+    psi_r_m5_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1r);
+    psi_r_m5_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1r);
+    psi_r_m5_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1r);
+    psi_r_m5_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1r);
+    psi_r_m7_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1r);
+    psi_r_m7_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1r);
+    psi_r_m7_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1r);
+    psi_r_m7_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1r);
+    psi_r_m7_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1r);
+    psi_r_m7_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1r);
+    psi_r_m7_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1r);
+    psi_r_m7_m7 = _mm256_abs_epi16(xmm2);
+
+    // Psi_i calculation from rho_rpi or rho_rmi
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1i);
+    psi_i_p7_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1i);
+    psi_i_p7_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1i);
+    psi_i_p7_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1i);
+    psi_i_p7_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1i);
+    psi_i_p7_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1i);
+    psi_i_p7_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1i);
+    psi_i_p7_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1i);
+    psi_i_p7_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1i);
+    psi_i_p5_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1i);
+    psi_i_p5_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1i);
+    psi_i_p5_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1i);
+    psi_i_p5_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1i);
+    psi_i_p5_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1i);
+    psi_i_p5_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1i);
+    psi_i_p5_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1i);
+    psi_i_p5_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1i);
+    psi_i_p3_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1i);
+    psi_i_p3_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1i);
+    psi_i_p3_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1i);
+    psi_i_p3_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1i);
+    psi_i_p3_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1i);
+    psi_i_p3_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1i);
+    psi_i_p3_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1i);
+    psi_i_p3_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1i);
+    psi_i_p1_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1i);
+    psi_i_p1_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1i);
+    psi_i_p1_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1i);
+    psi_i_p1_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1i);
+    psi_i_p1_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1i);
+    psi_i_p1_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1i);
+    psi_i_p1_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1i);
+    psi_i_p1_m7 = _mm256_abs_epi16(xmm2);
+
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1i);
+    psi_i_m1_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1i);
+    psi_i_m1_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1i);
+    psi_i_m1_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1i);
+    psi_i_m1_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1i);
+    psi_i_m1_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1i);
+    psi_i_m1_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1i);
+    psi_i_m1_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1i);
+    psi_i_m1_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1i);
+    psi_i_m3_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1i);
+    psi_i_m3_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1i);
+    psi_i_m3_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1i);
+    psi_i_m3_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1i);
+    psi_i_m3_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1i);
+    psi_i_m3_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1i);
+    psi_i_m3_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1i);
+    psi_i_m3_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1i);
+    psi_i_m5_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1i);
+    psi_i_m5_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1i);
+    psi_i_m5_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1i);
+    psi_i_m5_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1i);
+    psi_i_m5_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1i);
+    psi_i_m5_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1i);
+    psi_i_m5_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1i);
+    psi_i_m5_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1i);
+    psi_i_m7_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1i);
+    psi_i_m7_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1i);
+    psi_i_m7_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1i);
+    psi_i_m7_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1i);
+    psi_i_m7_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1i);
+    psi_i_m7_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1i);
+    psi_i_m7_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1i);
+    psi_i_m7_m7 = _mm256_abs_epi16(xmm2);
+
+/*
+    // Rearrange desired MF output
+    xmm0 = stream0_128i_in[i];
+    xmm1 = stream0_128i_in[i+1];
+    xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y0r = _mm256_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)]
+    y0i = _mm256_unpackhi_epi64(xmm0,xmm1);
+*/
+    seperate_real_imag_parts(&y0r, &y0i, stream0_256i_in[i], stream0_256i_in[i+1]);
+
+    /*
+    // Rearrange desired channel magnitudes
+    xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10))
+    xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10))
+    xmm2 = _mm256_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm256_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm256_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm256_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm256_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm256_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    ch_mag_des = _mm256_unpacklo_epi64(xmm2,xmm3);
+    */
+
+    seperate_real_imag_parts(&ch_mag_des, &xmm2, ch_mag_256i[i], ch_mag_256i[i+1]);
+
+    // Rearrange interfering channel magnitudes
+    /*
+    xmm2 = ch_mag_128i_i[i];
+    xmm3 = ch_mag_128i_i[i+1];
+    xmm2 = _mm256_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm256_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm256_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm256_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm256_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm256_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    ch_mag_int  = _mm256_unpacklo_epi64(xmm2,xmm3);
+    */
+
+    seperate_real_imag_parts(&ch_mag_int, &xmm2, ch_mag_256i_i[i], ch_mag_256i_i[i+1]);
+
+    y0r_one_over_sqrt_21   = _mm256_mulhi_epi16(y0r, ONE_OVER_SQRT_42);
+    y0r_three_over_sqrt_21 = _mm256_mulhi_epi16(y0r, THREE_OVER_SQRT_42);
+    y0r_five_over_sqrt_21  = _mm256_mulhi_epi16(y0r, FIVE_OVER_SQRT_42);
+    y0r_five_over_sqrt_21  = _mm256_slli_epi16(y0r_five_over_sqrt_21, 1);
+    y0r_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42);
+    y0r_seven_over_sqrt_21 = _mm256_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14
+
+    y0i_one_over_sqrt_21   = _mm256_mulhi_epi16(y0i, ONE_OVER_SQRT_42);
+    y0i_three_over_sqrt_21 = _mm256_mulhi_epi16(y0i, THREE_OVER_SQRT_42);
+    y0i_five_over_sqrt_21  = _mm256_mulhi_epi16(y0i, FIVE_OVER_SQRT_42);
+    y0i_five_over_sqrt_21  = _mm256_slli_epi16(y0i_five_over_sqrt_21, 1);
+    y0i_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42);
+    y0i_seven_over_sqrt_21 = _mm256_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14
+
+    y0_p_7_1 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_7_3 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_7_5 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_7_7 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_5_1 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_5_3 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_5_5 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_5_7 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_3_1 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_3_3 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_3_5 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_3_7 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_1_1 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_1_3 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_1_5 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_1_7 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
+
+    y0_m_1_1 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_1_3 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_1_5 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_1_7 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_3_1 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_3_3 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_3_5 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_3_7 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_5_1 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_5_3 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_5_5 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_5_7 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_7_1 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_7_3 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_7_5 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_7_7 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
+
+    interference_abs_epi16(psi_r_p7_p7, ch_mag_int, a_r_p7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p7_p5, ch_mag_int, a_r_p7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p7_p3, ch_mag_int, a_r_p7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p7_p1, ch_mag_int, a_r_p7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p7_m1, ch_mag_int, a_r_p7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p7_m3, ch_mag_int, a_r_p7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p7_m5, ch_mag_int, a_r_p7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p7_m7, ch_mag_int, a_r_p7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_p7, ch_mag_int, a_r_p5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_p5, ch_mag_int, a_r_p5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_p3, ch_mag_int, a_r_p5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_p1, ch_mag_int, a_r_p5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_m1, ch_mag_int, a_r_p5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_m3, ch_mag_int, a_r_p5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_m5, ch_mag_int, a_r_p5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p5_m7, ch_mag_int, a_r_p5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_p7, ch_mag_int, a_r_p3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_p5, ch_mag_int, a_r_p3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_p3, ch_mag_int, a_r_p3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_p1, ch_mag_int, a_r_p3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_m1, ch_mag_int, a_r_p3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_m3, ch_mag_int, a_r_p3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_m5, ch_mag_int, a_r_p3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p3_m7, ch_mag_int, a_r_p3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_p7, ch_mag_int, a_r_p1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_p5, ch_mag_int, a_r_p1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_p3, ch_mag_int, a_r_p1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_p1, ch_mag_int, a_r_p1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_m1, ch_mag_int, a_r_p1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_m3, ch_mag_int, a_r_p1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_m5, ch_mag_int, a_r_p1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_p1_m7, ch_mag_int, a_r_p1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_p7, ch_mag_int, a_r_m1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_p5, ch_mag_int, a_r_m1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_p3, ch_mag_int, a_r_m1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_p1, ch_mag_int, a_r_m1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_m1, ch_mag_int, a_r_m1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_m3, ch_mag_int, a_r_m1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_m5, ch_mag_int, a_r_m1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m1_m7, ch_mag_int, a_r_m1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_p7, ch_mag_int, a_r_m3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_p5, ch_mag_int, a_r_m3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_p3, ch_mag_int, a_r_m3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_p1, ch_mag_int, a_r_m3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_m1, ch_mag_int, a_r_m3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_m3, ch_mag_int, a_r_m3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_m5, ch_mag_int, a_r_m3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m3_m7, ch_mag_int, a_r_m3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_p7, ch_mag_int, a_r_m5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_p5, ch_mag_int, a_r_m5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_p3, ch_mag_int, a_r_m5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_p1, ch_mag_int, a_r_m5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_m1, ch_mag_int, a_r_m5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_m3, ch_mag_int, a_r_m5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_m5, ch_mag_int, a_r_m5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m5_m7, ch_mag_int, a_r_m5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_p7, ch_mag_int, a_r_m7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_p5, ch_mag_int, a_r_m7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_p3, ch_mag_int, a_r_m7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_p1, ch_mag_int, a_r_m7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_m1, ch_mag_int, a_r_m7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_m3, ch_mag_int, a_r_m7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_m5, ch_mag_int, a_r_m7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_r_m7_m7, ch_mag_int, a_r_m7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+
+    interference_abs_epi16(psi_i_p7_p7, ch_mag_int, a_i_p7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p7_p5, ch_mag_int, a_i_p7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p7_p3, ch_mag_int, a_i_p7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p7_p1, ch_mag_int, a_i_p7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p7_m1, ch_mag_int, a_i_p7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p7_m3, ch_mag_int, a_i_p7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p7_m5, ch_mag_int, a_i_p7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p7_m7, ch_mag_int, a_i_p7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_p7, ch_mag_int, a_i_p5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_p5, ch_mag_int, a_i_p5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_p3, ch_mag_int, a_i_p5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_p1, ch_mag_int, a_i_p5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_m1, ch_mag_int, a_i_p5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_m3, ch_mag_int, a_i_p5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_m5, ch_mag_int, a_i_p5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p5_m7, ch_mag_int, a_i_p5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_p7, ch_mag_int, a_i_p3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_p5, ch_mag_int, a_i_p3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_p3, ch_mag_int, a_i_p3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_p1, ch_mag_int, a_i_p3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_m1, ch_mag_int, a_i_p3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_m3, ch_mag_int, a_i_p3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_m5, ch_mag_int, a_i_p3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p3_m7, ch_mag_int, a_i_p3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_p7, ch_mag_int, a_i_p1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_p5, ch_mag_int, a_i_p1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_p3, ch_mag_int, a_i_p1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_p1, ch_mag_int, a_i_p1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_m1, ch_mag_int, a_i_p1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_m3, ch_mag_int, a_i_p1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_m5, ch_mag_int, a_i_p1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_p1_m7, ch_mag_int, a_i_p1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_p7, ch_mag_int, a_i_m1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_p5, ch_mag_int, a_i_m1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_p3, ch_mag_int, a_i_m1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_p1, ch_mag_int, a_i_m1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_m1, ch_mag_int, a_i_m1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_m3, ch_mag_int, a_i_m1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_m5, ch_mag_int, a_i_m1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m1_m7, ch_mag_int, a_i_m1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_p7, ch_mag_int, a_i_m3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_p5, ch_mag_int, a_i_m3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_p3, ch_mag_int, a_i_m3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_p1, ch_mag_int, a_i_m3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_m1, ch_mag_int, a_i_m3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_m3, ch_mag_int, a_i_m3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_m5, ch_mag_int, a_i_m3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m3_m7, ch_mag_int, a_i_m3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_p7, ch_mag_int, a_i_m5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_p5, ch_mag_int, a_i_m5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_p3, ch_mag_int, a_i_m5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_p1, ch_mag_int, a_i_m5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_m1, ch_mag_int, a_i_m5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_m3, ch_mag_int, a_i_m5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_m5, ch_mag_int, a_i_m5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m5_m7, ch_mag_int, a_i_m5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_p7, ch_mag_int, a_i_m7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_p5, ch_mag_int, a_i_m7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_p3, ch_mag_int, a_i_m7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_p1, ch_mag_int, a_i_m7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_m1, ch_mag_int, a_i_m7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_m3, ch_mag_int, a_i_m7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_m5, ch_mag_int, a_i_m7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+    interference_abs_epi16(psi_i_m7_m7, ch_mag_int, a_i_m7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
+
+    // Calculation of a group of two terms in the bit metric involving product of psi and interference
+    prodsum_psi_a_epi16(psi_r_p7_p7, a_r_p7_p7, psi_i_p7_p7, a_i_p7_p7, psi_a_p7_p7);
+    prodsum_psi_a_epi16(psi_r_p7_p5, a_r_p7_p5, psi_i_p7_p5, a_i_p7_p5, psi_a_p7_p5);
+    prodsum_psi_a_epi16(psi_r_p7_p3, a_r_p7_p3, psi_i_p7_p3, a_i_p7_p3, psi_a_p7_p3);
+    prodsum_psi_a_epi16(psi_r_p7_p1, a_r_p7_p1, psi_i_p7_p1, a_i_p7_p1, psi_a_p7_p1);
+    prodsum_psi_a_epi16(psi_r_p7_m1, a_r_p7_m1, psi_i_p7_m1, a_i_p7_m1, psi_a_p7_m1);
+    prodsum_psi_a_epi16(psi_r_p7_m3, a_r_p7_m3, psi_i_p7_m3, a_i_p7_m3, psi_a_p7_m3);
+    prodsum_psi_a_epi16(psi_r_p7_m5, a_r_p7_m5, psi_i_p7_m5, a_i_p7_m5, psi_a_p7_m5);
+    prodsum_psi_a_epi16(psi_r_p7_m7, a_r_p7_m7, psi_i_p7_m7, a_i_p7_m7, psi_a_p7_m7);
+    prodsum_psi_a_epi16(psi_r_p5_p7, a_r_p5_p7, psi_i_p5_p7, a_i_p5_p7, psi_a_p5_p7);
+    prodsum_psi_a_epi16(psi_r_p5_p5, a_r_p5_p5, psi_i_p5_p5, a_i_p5_p5, psi_a_p5_p5);
+    prodsum_psi_a_epi16(psi_r_p5_p3, a_r_p5_p3, psi_i_p5_p3, a_i_p5_p3, psi_a_p5_p3);
+    prodsum_psi_a_epi16(psi_r_p5_p1, a_r_p5_p1, psi_i_p5_p1, a_i_p5_p1, psi_a_p5_p1);
+    prodsum_psi_a_epi16(psi_r_p5_m1, a_r_p5_m1, psi_i_p5_m1, a_i_p5_m1, psi_a_p5_m1);
+    prodsum_psi_a_epi16(psi_r_p5_m3, a_r_p5_m3, psi_i_p5_m3, a_i_p5_m3, psi_a_p5_m3);
+    prodsum_psi_a_epi16(psi_r_p5_m5, a_r_p5_m5, psi_i_p5_m5, a_i_p5_m5, psi_a_p5_m5);
+    prodsum_psi_a_epi16(psi_r_p5_m7, a_r_p5_m7, psi_i_p5_m7, a_i_p5_m7, psi_a_p5_m7);
+    prodsum_psi_a_epi16(psi_r_p3_p7, a_r_p3_p7, psi_i_p3_p7, a_i_p3_p7, psi_a_p3_p7);
+    prodsum_psi_a_epi16(psi_r_p3_p5, a_r_p3_p5, psi_i_p3_p5, a_i_p3_p5, psi_a_p3_p5);
+    prodsum_psi_a_epi16(psi_r_p3_p3, a_r_p3_p3, psi_i_p3_p3, a_i_p3_p3, psi_a_p3_p3);
+    prodsum_psi_a_epi16(psi_r_p3_p1, a_r_p3_p1, psi_i_p3_p1, a_i_p3_p1, psi_a_p3_p1);
+    prodsum_psi_a_epi16(psi_r_p3_m1, a_r_p3_m1, psi_i_p3_m1, a_i_p3_m1, psi_a_p3_m1);
+    prodsum_psi_a_epi16(psi_r_p3_m3, a_r_p3_m3, psi_i_p3_m3, a_i_p3_m3, psi_a_p3_m3);
+    prodsum_psi_a_epi16(psi_r_p3_m5, a_r_p3_m5, psi_i_p3_m5, a_i_p3_m5, psi_a_p3_m5);
+    prodsum_psi_a_epi16(psi_r_p3_m7, a_r_p3_m7, psi_i_p3_m7, a_i_p3_m7, psi_a_p3_m7);
+    prodsum_psi_a_epi16(psi_r_p1_p7, a_r_p1_p7, psi_i_p1_p7, a_i_p1_p7, psi_a_p1_p7);
+    prodsum_psi_a_epi16(psi_r_p1_p5, a_r_p1_p5, psi_i_p1_p5, a_i_p1_p5, psi_a_p1_p5);
+    prodsum_psi_a_epi16(psi_r_p1_p3, a_r_p1_p3, psi_i_p1_p3, a_i_p1_p3, psi_a_p1_p3);
+    prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1);
+    prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1);
+    prodsum_psi_a_epi16(psi_r_p1_m3, a_r_p1_m3, psi_i_p1_m3, a_i_p1_m3, psi_a_p1_m3);
+    prodsum_psi_a_epi16(psi_r_p1_m5, a_r_p1_m5, psi_i_p1_m5, a_i_p1_m5, psi_a_p1_m5);
+    prodsum_psi_a_epi16(psi_r_p1_m7, a_r_p1_m7, psi_i_p1_m7, a_i_p1_m7, psi_a_p1_m7);
+    prodsum_psi_a_epi16(psi_r_m1_p7, a_r_m1_p7, psi_i_m1_p7, a_i_m1_p7, psi_a_m1_p7);
+    prodsum_psi_a_epi16(psi_r_m1_p5, a_r_m1_p5, psi_i_m1_p5, a_i_m1_p5, psi_a_m1_p5);
+    prodsum_psi_a_epi16(psi_r_m1_p3, a_r_m1_p3, psi_i_m1_p3, a_i_m1_p3, psi_a_m1_p3);
+    prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1);
+    prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1);
+    prodsum_psi_a_epi16(psi_r_m1_m3, a_r_m1_m3, psi_i_m1_m3, a_i_m1_m3, psi_a_m1_m3);
+    prodsum_psi_a_epi16(psi_r_m1_m5, a_r_m1_m5, psi_i_m1_m5, a_i_m1_m5, psi_a_m1_m5);
+    prodsum_psi_a_epi16(psi_r_m1_m7, a_r_m1_m7, psi_i_m1_m7, a_i_m1_m7, psi_a_m1_m7);
+    prodsum_psi_a_epi16(psi_r_m3_p7, a_r_m3_p7, psi_i_m3_p7, a_i_m3_p7, psi_a_m3_p7);
+    prodsum_psi_a_epi16(psi_r_m3_p5, a_r_m3_p5, psi_i_m3_p5, a_i_m3_p5, psi_a_m3_p5);
+    prodsum_psi_a_epi16(psi_r_m3_p3, a_r_m3_p3, psi_i_m3_p3, a_i_m3_p3, psi_a_m3_p3);
+    prodsum_psi_a_epi16(psi_r_m3_p1, a_r_m3_p1, psi_i_m3_p1, a_i_m3_p1, psi_a_m3_p1);
+    prodsum_psi_a_epi16(psi_r_m3_m1, a_r_m3_m1, psi_i_m3_m1, a_i_m3_m1, psi_a_m3_m1);
+    prodsum_psi_a_epi16(psi_r_m3_m3, a_r_m3_m3, psi_i_m3_m3, a_i_m3_m3, psi_a_m3_m3);
+    prodsum_psi_a_epi16(psi_r_m3_m5, a_r_m3_m5, psi_i_m3_m5, a_i_m3_m5, psi_a_m3_m5);
+    prodsum_psi_a_epi16(psi_r_m3_m7, a_r_m3_m7, psi_i_m3_m7, a_i_m3_m7, psi_a_m3_m7);
+    prodsum_psi_a_epi16(psi_r_m5_p7, a_r_m5_p7, psi_i_m5_p7, a_i_m5_p7, psi_a_m5_p7);
+    prodsum_psi_a_epi16(psi_r_m5_p5, a_r_m5_p5, psi_i_m5_p5, a_i_m5_p5, psi_a_m5_p5);
+    prodsum_psi_a_epi16(psi_r_m5_p3, a_r_m5_p3, psi_i_m5_p3, a_i_m5_p3, psi_a_m5_p3);
+    prodsum_psi_a_epi16(psi_r_m5_p1, a_r_m5_p1, psi_i_m5_p1, a_i_m5_p1, psi_a_m5_p1);
+    prodsum_psi_a_epi16(psi_r_m5_m1, a_r_m5_m1, psi_i_m5_m1, a_i_m5_m1, psi_a_m5_m1);
+    prodsum_psi_a_epi16(psi_r_m5_m3, a_r_m5_m3, psi_i_m5_m3, a_i_m5_m3, psi_a_m5_m3);
+    prodsum_psi_a_epi16(psi_r_m5_m5, a_r_m5_m5, psi_i_m5_m5, a_i_m5_m5, psi_a_m5_m5);
+    prodsum_psi_a_epi16(psi_r_m5_m7, a_r_m5_m7, psi_i_m5_m7, a_i_m5_m7, psi_a_m5_m7);
+    prodsum_psi_a_epi16(psi_r_m7_p7, a_r_m7_p7, psi_i_m7_p7, a_i_m7_p7, psi_a_m7_p7);
+    prodsum_psi_a_epi16(psi_r_m7_p5, a_r_m7_p5, psi_i_m7_p5, a_i_m7_p5, psi_a_m7_p5);
+    prodsum_psi_a_epi16(psi_r_m7_p3, a_r_m7_p3, psi_i_m7_p3, a_i_m7_p3, psi_a_m7_p3);
+    prodsum_psi_a_epi16(psi_r_m7_p1, a_r_m7_p1, psi_i_m7_p1, a_i_m7_p1, psi_a_m7_p1);
+    prodsum_psi_a_epi16(psi_r_m7_m1, a_r_m7_m1, psi_i_m7_m1, a_i_m7_m1, psi_a_m7_m1);
+    prodsum_psi_a_epi16(psi_r_m7_m3, a_r_m7_m3, psi_i_m7_m3, a_i_m7_m3, psi_a_m7_m3);
+    prodsum_psi_a_epi16(psi_r_m7_m5, a_r_m7_m5, psi_i_m7_m5, a_i_m7_m5, psi_a_m7_m5);
+    prodsum_psi_a_epi16(psi_r_m7_m7, a_r_m7_m7, psi_i_m7_m7, a_i_m7_m7, psi_a_m7_m7);
+
+    // Calculation of a group of two terms in the bit metric involving squares of interference
+    square_a_epi16(a_r_p7_p7, a_i_p7_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p7);
+    square_a_epi16(a_r_p7_p5, a_i_p7_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p5);
+    square_a_epi16(a_r_p7_p3, a_i_p7_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p3);
+    square_a_epi16(a_r_p7_p1, a_i_p7_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p1);
+    square_a_epi16(a_r_p7_m1, a_i_p7_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m1);
+    square_a_epi16(a_r_p7_m3, a_i_p7_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m3);
+    square_a_epi16(a_r_p7_m5, a_i_p7_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m5);
+    square_a_epi16(a_r_p7_m7, a_i_p7_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m7);
+    square_a_epi16(a_r_p5_p7, a_i_p5_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p7);
+    square_a_epi16(a_r_p5_p5, a_i_p5_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p5);
+    square_a_epi16(a_r_p5_p3, a_i_p5_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p3);
+    square_a_epi16(a_r_p5_p1, a_i_p5_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p1);
+    square_a_epi16(a_r_p5_m1, a_i_p5_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m1);
+    square_a_epi16(a_r_p5_m3, a_i_p5_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m3);
+    square_a_epi16(a_r_p5_m5, a_i_p5_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m5);
+    square_a_epi16(a_r_p5_m7, a_i_p5_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m7);
+    square_a_epi16(a_r_p3_p7, a_i_p3_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p7);
+    square_a_epi16(a_r_p3_p5, a_i_p3_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p5);
+    square_a_epi16(a_r_p3_p3, a_i_p3_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p3);
+    square_a_epi16(a_r_p3_p1, a_i_p3_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p1);
+    square_a_epi16(a_r_p3_m1, a_i_p3_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m1);
+    square_a_epi16(a_r_p3_m3, a_i_p3_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m3);
+    square_a_epi16(a_r_p3_m5, a_i_p3_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m5);
+    square_a_epi16(a_r_p3_m7, a_i_p3_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m7);
+    square_a_epi16(a_r_p1_p7, a_i_p1_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p7);
+    square_a_epi16(a_r_p1_p5, a_i_p1_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p5);
+    square_a_epi16(a_r_p1_p3, a_i_p1_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p3);
+    square_a_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p1);
+    square_a_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m1);
+    square_a_epi16(a_r_p1_m3, a_i_p1_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m3);
+    square_a_epi16(a_r_p1_m5, a_i_p1_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m5);
+    square_a_epi16(a_r_p1_m7, a_i_p1_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m7);
+    square_a_epi16(a_r_m1_p7, a_i_m1_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p7);
+    square_a_epi16(a_r_m1_p5, a_i_m1_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p5);
+    square_a_epi16(a_r_m1_p3, a_i_m1_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p3);
+    square_a_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p1);
+    square_a_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m1);
+    square_a_epi16(a_r_m1_m3, a_i_m1_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m3);
+    square_a_epi16(a_r_m1_m5, a_i_m1_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m5);
+    square_a_epi16(a_r_m1_m7, a_i_m1_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m7);
+    square_a_epi16(a_r_m3_p7, a_i_m3_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p7);
+    square_a_epi16(a_r_m3_p5, a_i_m3_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p5);
+    square_a_epi16(a_r_m3_p3, a_i_m3_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p3);
+    square_a_epi16(a_r_m3_p1, a_i_m3_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p1);
+    square_a_epi16(a_r_m3_m1, a_i_m3_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m1);
+    square_a_epi16(a_r_m3_m3, a_i_m3_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m3);
+    square_a_epi16(a_r_m3_m5, a_i_m3_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m5);
+    square_a_epi16(a_r_m3_m7, a_i_m3_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m7);
+    square_a_epi16(a_r_m5_p7, a_i_m5_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p7);
+    square_a_epi16(a_r_m5_p5, a_i_m5_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p5);
+    square_a_epi16(a_r_m5_p3, a_i_m5_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p3);
+    square_a_epi16(a_r_m5_p1, a_i_m5_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p1);
+    square_a_epi16(a_r_m5_m1, a_i_m5_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m1);
+    square_a_epi16(a_r_m5_m3, a_i_m5_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m3);
+    square_a_epi16(a_r_m5_m5, a_i_m5_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m5);
+    square_a_epi16(a_r_m5_m7, a_i_m5_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m7);
+    square_a_epi16(a_r_m7_p7, a_i_m7_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p7);
+    square_a_epi16(a_r_m7_p5, a_i_m7_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p5);
+    square_a_epi16(a_r_m7_p3, a_i_m7_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p3);
+    square_a_epi16(a_r_m7_p1, a_i_m7_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p1);
+    square_a_epi16(a_r_m7_m1, a_i_m7_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m1);
+    square_a_epi16(a_r_m7_m3, a_i_m7_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m3);
+    square_a_epi16(a_r_m7_m5, a_i_m7_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m5);
+    square_a_epi16(a_r_m7_m7, a_i_m7_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m7);
+
+    // Computing different multiples of ||h0||^2
+    // x=1, y=1
+    ch_mag_2_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42);
+    ch_mag_2_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_2_over_42_with_sigma2,1);
+    // x=1, y=3
+    ch_mag_10_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42);
+    ch_mag_10_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_10_over_42_with_sigma2,1);
+    // x=1, x=5
+    ch_mag_26_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42);
+    ch_mag_26_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_26_over_42_with_sigma2,1);
+    // x=1, y=7
+    ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
+    ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
+    // x=3, y=3
+    ch_mag_18_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42);
+    ch_mag_18_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_18_over_42_with_sigma2,1);
+    // x=3, y=5
+    ch_mag_34_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42);
+    ch_mag_34_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_34_over_42_with_sigma2,1);
+    // x=3, y=7
+    ch_mag_58_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42);
+    ch_mag_58_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_58_over_42_with_sigma2,2);
+    // x=5, y=5
+    ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
+    ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
+    // x=5, y=7
+    ch_mag_74_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42);
+    ch_mag_74_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_74_over_42_with_sigma2,2);
+    // x=7, y=7
+    ch_mag_98_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42);
+    ch_mag_98_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_98_over_42_with_sigma2,2);
+
+    // Computing Metrics
+    xmm0 = _mm256_subs_epi16(psi_a_p7_p7, a_sq_p7_p7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_7);
+    bit_met_p7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p7_p5, a_sq_p7_p5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_5);
+    bit_met_p7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p7_p3, a_sq_p7_p3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_3);
+    bit_met_p7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p7_p1, a_sq_p7_p1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_1);
+    bit_met_p7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p7_m1, a_sq_p7_m1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_1);
+    bit_met_p7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p7_m3, a_sq_p7_m3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_3);
+    bit_met_p7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p7_m5, a_sq_p7_m5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_5);
+    bit_met_p7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p7_m7, a_sq_p7_m7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_7);
+    bit_met_p7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_p7, a_sq_p5_p7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_7);
+    bit_met_p5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_p5, a_sq_p5_p5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_5);
+    bit_met_p5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_p3, a_sq_p5_p3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_3);
+    bit_met_p5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_p1, a_sq_p5_p1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_1);
+    bit_met_p5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_m1, a_sq_p5_m1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_1);
+    bit_met_p5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_m3, a_sq_p5_m3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_3);
+    bit_met_p5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_m5, a_sq_p5_m5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_5);
+    bit_met_p5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_m7, a_sq_p5_m7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_7);
+    bit_met_p5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_p7, a_sq_p3_p7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_7);
+    bit_met_p3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_p5, a_sq_p3_p5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_5);
+    bit_met_p3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_p3, a_sq_p3_p3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_3);
+    bit_met_p3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_p1, a_sq_p3_p1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_1);
+    bit_met_p3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_m1, a_sq_p3_m1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_1);
+    bit_met_p3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_m3, a_sq_p3_m3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_3);
+    bit_met_p3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_m5, a_sq_p3_m5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_5);
+    bit_met_p3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_m7, a_sq_p3_m7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_7);
+    bit_met_p3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_p7, a_sq_p1_p7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_7);
+    bit_met_p1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_p5, a_sq_p1_p5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_5);
+    bit_met_p1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_p3, a_sq_p1_p3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_3);
+    bit_met_p1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_p1, a_sq_p1_p1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_1);
+    bit_met_p1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_m1, a_sq_p1_m1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_1);
+    bit_met_p1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_m3, a_sq_p1_m3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_3);
+    bit_met_p1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_m5, a_sq_p1_m5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_5);
+    bit_met_p1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_m7, a_sq_p1_m7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_7);
+    bit_met_p1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+
+    xmm0 = _mm256_subs_epi16(psi_a_m1_p7, a_sq_m1_p7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_7);
+    bit_met_m1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m1_p5, a_sq_m1_p5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_5);
+    bit_met_m1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m1_p3, a_sq_m1_p3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_3);
+    bit_met_m1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m1_p1, a_sq_m1_p1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_1);
+    bit_met_m1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m1_m1, a_sq_m1_m1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_1);
+    bit_met_m1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m1_m3, a_sq_m1_m3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_3);
+    bit_met_m1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m1_m5, a_sq_m1_m5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_5);
+    bit_met_m1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m1_m7, a_sq_m1_m7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_7);
+    bit_met_m1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_p7, a_sq_m3_p7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_7);
+    bit_met_m3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_p5, a_sq_m3_p5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_5);
+    bit_met_m3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_p3, a_sq_m3_p3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_3);
+    bit_met_m3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_p1, a_sq_m3_p1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_1);
+    bit_met_m3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_m1, a_sq_m3_m1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_1);
+    bit_met_m3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_m3, a_sq_m3_m3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_3);
+    bit_met_m3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_m5, a_sq_m3_m5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_5);
+    bit_met_m3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_m7, a_sq_m3_m7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_7);
+    bit_met_m3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_p7, a_sq_m5_p7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_7);
+    bit_met_m5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_p5, a_sq_m5_p5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_5);
+    bit_met_m5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_p3, a_sq_m5_p3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_3);
+    bit_met_m5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_p1, a_sq_m5_p1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_1);
+    bit_met_m5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_m1, a_sq_m5_m1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_1);
+    bit_met_m5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_m3, a_sq_m5_m3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_3);
+    bit_met_m5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_m5, a_sq_m5_m5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_5);
+    bit_met_m5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_m7, a_sq_m5_m7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_7);
+    bit_met_m5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_p7, a_sq_m7_p7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_7);
+    bit_met_m7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_p5, a_sq_m7_p5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_5);
+    bit_met_m7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_p3, a_sq_m7_p3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_3);
+    bit_met_m7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_p1, a_sq_m7_p1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_1);
+    bit_met_m7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_m1, a_sq_m7_m1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_1);
+    bit_met_m7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_m3, a_sq_m7_m3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_3);
+    bit_met_m7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_m5, a_sq_m7_m5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_5);
+    bit_met_m7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_m7, a_sq_m7_m7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_7);
+    bit_met_m7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+
+    // Detection for 1st bit (LTE mapping)
+    // bit = 1
+    xmm0 = _mm256_max_epi16(bit_met_m7_p7, bit_met_m7_p5);
+    xmm1 = _mm256_max_epi16(bit_met_m7_p3, bit_met_m7_p1);
+    xmm2 = _mm256_max_epi16(bit_met_m7_m1, bit_met_m7_m3);
+    xmm3 = _mm256_max_epi16(bit_met_m7_m5, bit_met_m7_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m5_p5);
+    xmm1 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m5_p1);
+    xmm2 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m5_m3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m5_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_m3_p7, bit_met_m3_p5);
+    xmm1 = _mm256_max_epi16(bit_met_m3_p3, bit_met_m3_p1);
+    xmm2 = _mm256_max_epi16(bit_met_m3_m1, bit_met_m3_m3);
+    xmm3 = _mm256_max_epi16(bit_met_m3_m5, bit_met_m3_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m1_p5);
+    xmm1 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m1_p1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m1_m3);
+    xmm3 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m1_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+
+    // bit = 0
+    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p7_p5);
+    xmm1 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p7_p1);
+    xmm2 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p7_m3);
+    xmm3 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p7_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p5_p7, bit_met_p5_p5);
+    xmm1 = _mm256_max_epi16(bit_met_p5_p3, bit_met_p5_p1);
+    xmm2 = _mm256_max_epi16(bit_met_p5_m1, bit_met_p5_m3);
+    xmm3 = _mm256_max_epi16(bit_met_p5_m5, bit_met_p5_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p3_p5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p3_p1);
+    xmm2 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p3_m3);
+    xmm3 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p3_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p1_p7, bit_met_p1_p5);
+    xmm1 = _mm256_max_epi16(bit_met_p1_p3, bit_met_p1_p1);
+    xmm2 = _mm256_max_epi16(bit_met_p1_m1, bit_met_p1_m3);
+    xmm3 = _mm256_max_epi16(bit_met_p1_m5, bit_met_p1_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y0r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 2nd bit (LTE mapping)
+    // bit = 1
+    xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+
+    // bit = 0
+    xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y1r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 3rd bit (LTE mapping)
+    xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y2r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 4th bit (LTE mapping)
+    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p5);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y0i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+
+    // Detection for 5th bit (LTE mapping)
+    xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y1i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 6th bit (LTE mapping)
+    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p1);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y2i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs
+    // RE 1
+    j = 48*i;
+    stream0_out[j + 0] = ((short *)&y0r)[0];
+    stream0_out[j + 1] = ((short *)&y1r)[0];
+    stream0_out[j + 2] = ((short *)&y2r)[0];
+    stream0_out[j + 3] = ((short *)&y0i)[0];
+    stream0_out[j + 4] = ((short *)&y1i)[0];
+    stream0_out[j + 5] = ((short *)&y2i)[0];
+    // RE 2
+    stream0_out[j + 6] = ((short *)&y0r)[1];
+    stream0_out[j + 7] = ((short *)&y1r)[1];
+    stream0_out[j + 8] = ((short *)&y2r)[1];
+    stream0_out[j + 9] = ((short *)&y0i)[1];
+    stream0_out[j + 10] = ((short *)&y1i)[1];
+    stream0_out[j + 11] = ((short *)&y2i)[1];
+    // RE 3
+    stream0_out[j + 12] = ((short *)&y0r)[2];
+    stream0_out[j + 13] = ((short *)&y1r)[2];
+    stream0_out[j + 14] = ((short *)&y2r)[2];
+    stream0_out[j + 15] = ((short *)&y0i)[2];
+    stream0_out[j + 16] = ((short *)&y1i)[2];
+    stream0_out[j + 17] = ((short *)&y2i)[2];
+    // RE 4
+    stream0_out[j + 18] = ((short *)&y0r)[3];
+    stream0_out[j + 19] = ((short *)&y1r)[3];
+    stream0_out[j + 20] = ((short *)&y2r)[3];
+    stream0_out[j + 21] = ((short *)&y0i)[3];
+    stream0_out[j + 22] = ((short *)&y1i)[3];
+    stream0_out[j + 23] = ((short *)&y2i)[3];
+    // RE 5
+    stream0_out[j + 24] = ((short *)&y0r)[4];
+    stream0_out[j + 25] = ((short *)&y1r)[4];
+    stream0_out[j + 26] = ((short *)&y2r)[4];
+    stream0_out[j + 27] = ((short *)&y0i)[4];
+    stream0_out[j + 28] = ((short *)&y1i)[4];
+    stream0_out[j + 29] = ((short *)&y2i)[4];
+    // RE 6
+    stream0_out[j + 30] = ((short *)&y0r)[5];
+    stream0_out[j + 31] = ((short *)&y1r)[5];
+    stream0_out[j + 32] = ((short *)&y2r)[5];
+    stream0_out[j + 33] = ((short *)&y0i)[5];
+    stream0_out[j + 34] = ((short *)&y1i)[5];
+    stream0_out[j + 35] = ((short *)&y2i)[5];
+    // RE 7
+    stream0_out[j + 36] = ((short *)&y0r)[6];
+    stream0_out[j + 37] = ((short *)&y1r)[6];
+    stream0_out[j + 38] = ((short *)&y2r)[6];
+    stream0_out[j + 39] = ((short *)&y0i)[6];
+    stream0_out[j + 40] = ((short *)&y1i)[6];
+    stream0_out[j + 41] = ((short *)&y2i)[6];
+    // RE 8
+    stream0_out[j + 42] = ((short *)&y0r)[7];
+    stream0_out[j + 43] = ((short *)&y1r)[7];
+    stream0_out[j + 44] = ((short *)&y2r)[7];
+    stream0_out[j + 45] = ((short *)&y0i)[7];
+    stream0_out[j + 46] = ((short *)&y1i)[7];
+    stream0_out[j + 47] = ((short *)&y2i)[7];
+
+    // RE 9
+    stream0_out[j + 48] = ((short *)&y0r)[8];
+    stream0_out[j + 49] = ((short *)&y1r)[8];
+    stream0_out[j + 50] = ((short *)&y2r)[8];
+    stream0_out[j + 51] = ((short *)&y0i)[8];
+    stream0_out[j + 52] = ((short *)&y1i)[8];
+    stream0_out[j + 53] = ((short *)&y2i)[8];
+    // RE 10
+    stream0_out[j + 54] = ((short *)&y0r)[9];
+    stream0_out[j + 55] = ((short *)&y1r)[9];
+    stream0_out[j + 56] = ((short *)&y2r)[9];
+    stream0_out[j + 57] = ((short *)&y0i)[9];
+    stream0_out[j + 58] = ((short *)&y1i)[9];
+    stream0_out[j + 59] = ((short *)&y2i)[9];
+    // RE 11
+    stream0_out[j + 60] = ((short *)&y0r)[10];
+    stream0_out[j + 61] = ((short *)&y1r)[10];
+    stream0_out[j + 62] = ((short *)&y2r)[10];
+    stream0_out[j + 63] = ((short *)&y0i)[10];
+    stream0_out[j + 64] = ((short *)&y1i)[10];
+    stream0_out[j + 65] = ((short *)&y2i)[10];
+    // RE 12
+    stream0_out[j + 66] = ((short *)&y0r)[11];
+    stream0_out[j + 67] = ((short *)&y1r)[11];
+    stream0_out[j + 68] = ((short *)&y2r)[11];
+    stream0_out[j + 69] = ((short *)&y0i)[11];
+    stream0_out[j + 70] = ((short *)&y1i)[11];
+    stream0_out[j + 71] = ((short *)&y2i)[11];
+    // RE 13
+    stream0_out[j + 72] = ((short *)&y0r)[12];
+    stream0_out[j + 73] = ((short *)&y1r)[12];
+    stream0_out[j + 74] = ((short *)&y2r)[12];
+    stream0_out[j + 75] = ((short *)&y0i)[12];
+    stream0_out[j + 76] = ((short *)&y1i)[12];
+    stream0_out[j + 77] = ((short *)&y2i)[12];
+    // RE 14
+    stream0_out[j + 78] = ((short *)&y0r)[13];
+    stream0_out[j + 79] = ((short *)&y1r)[13];
+    stream0_out[j + 80] = ((short *)&y2r)[13];
+    stream0_out[j + 81] = ((short *)&y0i)[13];
+    stream0_out[j + 82] = ((short *)&y1i)[13];
+    stream0_out[j + 83] = ((short *)&y2i)[13];
+    // RE 15
+    stream0_out[j + 84] = ((short *)&y0r)[14];
+    stream0_out[j + 85] = ((short *)&y1r)[14];
+    stream0_out[j + 86] = ((short *)&y2r)[14];
+    stream0_out[j + 87] = ((short *)&y0i)[14];
+    stream0_out[j + 88] = ((short *)&y1i)[14];
+    stream0_out[j + 89] = ((short *)&y2i)[14];
+    // RE 16
+    stream0_out[j + 90] = ((short *)&y0r)[15];
+    stream0_out[j + 91] = ((short *)&y1r)[15];
+    stream0_out[j + 92] = ((short *)&y2r)[15];
+    stream0_out[j + 93] = ((short *)&y0i)[15];
+    stream0_out[j + 94] = ((short *)&y1i)[15];
+    stream0_out[j + 95] = ((short *)&y2i)[15];
+
+#elif defined(__arm__)
+
+#endif
+  }
+
+#if defined(__x86_64__) || defined(__i386__)
+  _mm_empty();
+  _m_empty();
+#endif
+
+}
+
+void qam64_qam64_avx2(int32_t *stream0_in,
+                      int32_t *stream1_in,
+                      int32_t *ch_mag,
+                      int32_t *ch_mag_i,
+                      int16_t *stream0_out,
+                      int32_t *rho01,
+                      int length
+    )
+{
+
+  /*
+    Author: S. Wagner
+    Date: 28-02-17
+
+    Input:
+    stream0_in:  MF filter for 1st stream, i.e., y0=h0'*y
+    stream1_in:  MF filter for 2nd stream, i.e., y1=h1'*y
+    ch_mag:      4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    ch_mag_i:    4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc
+    rho01:       Channel cross correlation, i.e., h1'*h0
+
+    Output:
+    stream0_out: output LLRs for 1st stream
+  */
+
+#if defined(__x86_64__) || defined(__i386__)
+
+  __m256i *rho01_256i      = (__m256i *)rho01;
+  __m256i *stream0_256i_in = (__m256i *)stream0_in;
+  __m256i *stream1_256i_in = (__m256i *)stream1_in;
+  __m256i *ch_mag_256i     = (__m256i *)ch_mag;
+  __m256i *ch_mag_256i_i   = (__m256i *)ch_mag_i;
+
+  __m256i ONE_OVER_SQRT_42              = _mm256_broadcastw_epi16(_mm_set1_epi16(10112)); // round(1/sqrt(42)*2^16)
+  __m256i THREE_OVER_SQRT_42            = _mm256_broadcastw_epi16(_mm_set1_epi16(30337)); // round(3/sqrt(42)*2^16)
+  __m256i FIVE_OVER_SQRT_42             = _mm256_broadcastw_epi16(_mm_set1_epi16(25281)); // round(5/sqrt(42)*2^15)
+  __m256i SEVEN_OVER_SQRT_42            = _mm256_broadcastw_epi16(_mm_set1_epi16(17697)); // round(7/sqrt(42)*2^14) Q2.14
+  __m256i ONE_OVER_SQRT_2               = _mm256_broadcastw_epi16(_mm_set1_epi16(23170)); // round(1/sqrt(2)*2^15)
+  __m256i ONE_OVER_SQRT_2_42            = _mm256_broadcastw_epi16(_mm_set1_epi16(3575));  // round(1/sqrt(2*42)*2^15)
+  __m256i THREE_OVER_SQRT_2_42          = _mm256_broadcastw_epi16(_mm_set1_epi16(10726)); // round(3/sqrt(2*42)*2^15)
+  __m256i FIVE_OVER_SQRT_2_42           = _mm256_broadcastw_epi16(_mm_set1_epi16(17876)); // round(5/sqrt(2*42)*2^15)
+  __m256i SEVEN_OVER_SQRT_2_42          = _mm256_broadcastw_epi16(_mm_set1_epi16(25027)); // round(7/sqrt(2*42)*2^15)
+  __m256i FORTYNINE_OVER_FOUR_SQRT_42   = _mm256_broadcastw_epi16(_mm_set1_epi16(30969)); // round(49/(4*sqrt(42))*2^14), Q2.14
+  __m256i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(23385)); // round(37/(4*sqrt(42))*2^14), Q2.14
+  __m256i TWENTYFIVE_OVER_FOUR_SQRT_42  = _mm256_broadcastw_epi16(_mm_set1_epi16(31601)); // round(25/(4*sqrt(42))*2^15)
+  __m256i TWENTYNINE_OVER_FOUR_SQRT_42  = _mm256_broadcastw_epi16(_mm_set1_epi16(18329)); // round(29/(4*sqrt(42))*2^15), Q2.14
+  __m256i SEVENTEEN_OVER_FOUR_SQRT_42   = _mm256_broadcastw_epi16(_mm_set1_epi16(21489)); // round(17/(4*sqrt(42))*2^15)
+  __m256i NINE_OVER_FOUR_SQRT_42        = _mm256_broadcastw_epi16(_mm_set1_epi16(11376)); // round(9/(4*sqrt(42))*2^15)
+  __m256i THIRTEEN_OVER_FOUR_SQRT_42    = _mm256_broadcastw_epi16(_mm_set1_epi16(16433)); // round(13/(4*sqrt(42))*2^15)
+  __m256i FIVE_OVER_FOUR_SQRT_42        = _mm256_broadcastw_epi16(_mm_set1_epi16(6320));  // round(5/(4*sqrt(42))*2^15)
+  __m256i ONE_OVER_FOUR_SQRT_42         = _mm256_broadcastw_epi16(_mm_set1_epi16(1264));  // round(1/(4*sqrt(42))*2^15)
+  __m256i SQRT_42_OVER_FOUR             = _mm256_broadcastw_epi16(_mm_set1_epi16(13272)); // round(sqrt(42)/4*2^13), Q3.12
+
+  __m256i ch_mag_des;
+  __m256i ch_mag_int;
+  __m256i ch_mag_98_over_42_with_sigma2;
+  __m256i ch_mag_74_over_42_with_sigma2;
+  __m256i ch_mag_58_over_42_with_sigma2;
+  __m256i ch_mag_50_over_42_with_sigma2;
+  __m256i ch_mag_34_over_42_with_sigma2;
+  __m256i ch_mag_18_over_42_with_sigma2;
+  __m256i ch_mag_26_over_42_with_sigma2;
+  __m256i ch_mag_10_over_42_with_sigma2;
+  __m256i ch_mag_2_over_42_with_sigma2;
+  __m256i y0r_one_over_sqrt_21;
+  __m256i y0r_three_over_sqrt_21;
+  __m256i y0r_five_over_sqrt_21;
+  __m256i y0r_seven_over_sqrt_21;
+  __m256i y0i_one_over_sqrt_21;
+  __m256i y0i_three_over_sqrt_21;
+  __m256i y0i_five_over_sqrt_21;
+  __m256i y0i_seven_over_sqrt_21;
+  __m256i ch_mag_int_with_sigma2;
+  __m256i two_ch_mag_int_with_sigma2;
+  __m256i three_ch_mag_int_with_sigma2;
+#elif defined(__arm__)
+
+#endif
+
+  int i,j;
+  uint32_t len256 = (length)>>3;
+
+  for (i=0; i<len256; i+=2) {
+
+#if defined(__x86_64__) || defined(__i386__)
+
+    // Get rho
+      /*
+    xmm0 = rho01_256i[i];
+    xmm1 = rho01_256i[i+1];
+    xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+
+    xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+
+    //xmm0 = [Re(0,1,2,3)   Im(0,1,2,3)   Re(4,5,6,7)     Im(4,5,6,7)]
+    //xmm0 = [Re(8,9,10,11) Im(8,9,10,11) Re(12,13,14,15) Im(12,13,14,15)]
+
+    xmm2 = _mm256_unpacklo_epi64(xmm0, xmm1);
+    //xmm2 = [Re(0,1,2,3) Re(8,9,10,11) Re(4,5,6,7) Re(12,13,14,15)]
+    xmm2 = _mm256_permute4x64_epi64(xmm2,0xd8); // Re(rho)
+
+    xmm3 = _mm256_unpackhi_epi64(xmm0, xmm1);
+    //xmm3 = [Im(0,1,2,3) Im(8,9,10,11) Im(4,5,6,7) Im(12,13,14,15)]
+    xmm3 = _mm256_permute4x64_epi64(xmm3,0xd8); // Im(rho)
+      */
+
+    seperate_real_imag_parts(&xmm2, &xmm3, rho01_256i[i], rho01_256i[i+1]);
+
+    rho_rpi = _mm256_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho)
+    rho_rmi = _mm256_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho)
+
+    // Compute the different rhos
+    rho_rpi_1_1 = _mm256_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42);
+    rho_rmi_1_1 = _mm256_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42);
+    rho_rpi_3_3 = _mm256_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42);
+    rho_rmi_3_3 = _mm256_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42);
+    rho_rpi_5_5 = _mm256_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42);
+    rho_rmi_5_5 = _mm256_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42);
+    rho_rpi_7_7 = _mm256_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42);
+    rho_rmi_7_7 = _mm256_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42);
+
+    rho_rpi_5_5 = _mm256_slli_epi16(rho_rpi_5_5, 1);
+    rho_rmi_5_5 = _mm256_slli_epi16(rho_rmi_5_5, 1);
+    rho_rpi_7_7 = _mm256_slli_epi16(rho_rpi_7_7, 2);
+    rho_rmi_7_7 = _mm256_slli_epi16(rho_rmi_7_7, 2);
+
+    xmm4 = _mm256_mulhi_epi16(xmm2, ONE_OVER_SQRT_42);
+    xmm5 = _mm256_mulhi_epi16(xmm3, ONE_OVER_SQRT_42);
+    xmm6 = _mm256_mulhi_epi16(xmm3, THREE_OVER_SQRT_42);
+    xmm7 = _mm256_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42);
+    xmm8 = _mm256_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42);
+    xmm7 = _mm256_slli_epi16(xmm7, 1);
+    xmm8 = _mm256_slli_epi16(xmm8, 2);
+
+    rho_rpi_1_3 = _mm256_adds_epi16(xmm4, xmm6);
+    rho_rmi_1_3 = _mm256_subs_epi16(xmm4, xmm6);
+    rho_rpi_1_5 = _mm256_adds_epi16(xmm4, xmm7);
+    rho_rmi_1_5 = _mm256_subs_epi16(xmm4, xmm7);
+    rho_rpi_1_7 = _mm256_adds_epi16(xmm4, xmm8);
+    rho_rmi_1_7 = _mm256_subs_epi16(xmm4, xmm8);
+
+    xmm4 = _mm256_mulhi_epi16(xmm2, THREE_OVER_SQRT_42);
+    rho_rpi_3_1 = _mm256_adds_epi16(xmm4, xmm5);
+    rho_rmi_3_1 = _mm256_subs_epi16(xmm4, xmm5);
+    rho_rpi_3_5 = _mm256_adds_epi16(xmm4, xmm7);
+    rho_rmi_3_5 = _mm256_subs_epi16(xmm4, xmm7);
+    rho_rpi_3_7 = _mm256_adds_epi16(xmm4, xmm8);
+    rho_rmi_3_7 = _mm256_subs_epi16(xmm4, xmm8);
+
+    xmm4 = _mm256_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42);
+    xmm4 = _mm256_slli_epi16(xmm4, 1);
+    rho_rpi_5_1 = _mm256_adds_epi16(xmm4, xmm5);
+    rho_rmi_5_1 = _mm256_subs_epi16(xmm4, xmm5);
+    rho_rpi_5_3 = _mm256_adds_epi16(xmm4, xmm6);
+    rho_rmi_5_3 = _mm256_subs_epi16(xmm4, xmm6);
+    rho_rpi_5_7 = _mm256_adds_epi16(xmm4, xmm8);
+    rho_rmi_5_7 = _mm256_subs_epi16(xmm4, xmm8);
+
+    xmm4 = _mm256_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42);
+    xmm4 = _mm256_slli_epi16(xmm4, 2);
+    rho_rpi_7_1 = _mm256_adds_epi16(xmm4, xmm5);
+    rho_rmi_7_1 = _mm256_subs_epi16(xmm4, xmm5);
+    rho_rpi_7_3 = _mm256_adds_epi16(xmm4, xmm6);
+    rho_rmi_7_3 = _mm256_subs_epi16(xmm4, xmm6);
+    rho_rpi_7_5 = _mm256_adds_epi16(xmm4, xmm7);
+    rho_rmi_7_5 = _mm256_subs_epi16(xmm4, xmm7);
+
+    // Rearrange interfering MF output
+    /*
+    xmm0 = stream1_256i_in[i];
+    xmm1 = stream1_256i_in[i+1];
+    xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+
+    xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+
+    y1r = _mm256_unpacklo_epi64(xmm0, xmm1);
+    y1r = _mm256_permute4x64_epi64(y1r,0xd8); // Re(y1)
+
+    y1i = _mm256_unpackhi_epi64(xmm0, xmm1);
+    y1i = _mm256_permute4x64_epi64(y1i,0xd8); // Im(y1)
+    */
+
+    seperate_real_imag_parts(&y1r, &y1i, stream1_256i_in[i], stream1_256i_in[i+1]);
+
+    // Psi_r calculation from rho_rpi or rho_rmi
+    xmm0 = _mm256_broadcastw_epi16(_mm_set1_epi16(0));// ZERO for abs_pi16
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1r);
+
+    psi_r_p7_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1r);
+    psi_r_p7_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1r);
+    psi_r_p7_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1r);
+    psi_r_p7_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1r);
+    psi_r_p7_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1r);
+    psi_r_p7_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1r);
+    psi_r_p7_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1r);
+    psi_r_p7_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1r);
+    psi_r_p5_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1r);
+    psi_r_p5_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1r);
+    psi_r_p5_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1r);
+    psi_r_p5_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1r);
+    psi_r_p5_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1r);
+    psi_r_p5_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1r);
+    psi_r_p5_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1r);
+    psi_r_p5_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1r);
+    psi_r_p3_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1r);
+    psi_r_p3_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1r);
+    psi_r_p3_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1r);
+    psi_r_p3_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1r);
+    psi_r_p3_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1r);
+    psi_r_p3_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1r);
+    psi_r_p3_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1r);
+    psi_r_p3_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1r);
+    psi_r_p1_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1r);
+    psi_r_p1_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1r);
+    psi_r_p1_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1r);
+    psi_r_p1_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1r);
+    psi_r_p1_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1r);
+    psi_r_p1_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1r);
+    psi_r_p1_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1r);
+    psi_r_p1_m7 = _mm256_abs_epi16(xmm2);
+
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1r);
+    psi_r_m1_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1r);
+    psi_r_m1_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1r);
+    psi_r_m1_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1r);
+    psi_r_m1_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1r);
+    psi_r_m1_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1r);
+    psi_r_m1_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1r);
+    psi_r_m1_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1r);
+    psi_r_m1_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1r);
+    psi_r_m3_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1r);
+    psi_r_m3_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1r);
+    psi_r_m3_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1r);
+    psi_r_m3_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1r);
+    psi_r_m3_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1r);
+    psi_r_m3_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1r);
+    psi_r_m3_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1r);
+    psi_r_m3_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1r);
+    psi_r_m5_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1r);
+    psi_r_m5_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1r);
+    psi_r_m5_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1r);
+    psi_r_m5_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1r);
+    psi_r_m5_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1r);
+    psi_r_m5_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1r);
+    psi_r_m5_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1r);
+    psi_r_m5_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1r);
+    psi_r_m7_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1r);
+    psi_r_m7_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1r);
+    psi_r_m7_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1r);
+    psi_r_m7_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1r);
+    psi_r_m7_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1r);
+    psi_r_m7_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1r);
+    psi_r_m7_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1r);
+    psi_r_m7_m7 = _mm256_abs_epi16(xmm2);
+
+    // Psi_i calculation from rho_rpi or rho_rmi
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1i);
+    psi_i_p7_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1i);
+    psi_i_p7_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1i);
+    psi_i_p7_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1i);
+    psi_i_p7_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1i);
+    psi_i_p7_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1i);
+    psi_i_p7_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1i);
+    psi_i_p7_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1i);
+    psi_i_p7_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1i);
+    psi_i_p5_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1i);
+    psi_i_p5_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1i);
+    psi_i_p5_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1i);
+    psi_i_p5_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1i);
+    psi_i_p5_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1i);
+    psi_i_p5_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1i);
+    psi_i_p5_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1i);
+    psi_i_p5_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1i);
+    psi_i_p3_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1i);
+    psi_i_p3_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1i);
+    psi_i_p3_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1i);
+    psi_i_p3_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1i);
+    psi_i_p3_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1i);
+    psi_i_p3_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1i);
+    psi_i_p3_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1i);
+    psi_i_p3_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1i);
+    psi_i_p1_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1i);
+    psi_i_p1_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1i);
+    psi_i_p1_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1i);
+    psi_i_p1_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1i);
+    psi_i_p1_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1i);
+    psi_i_p1_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1i);
+    psi_i_p1_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1i);
+    psi_i_p1_m7 = _mm256_abs_epi16(xmm2);
+
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1i);
+    psi_i_m1_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1i);
+    psi_i_m1_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1i);
+    psi_i_m1_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1i);
+    psi_i_m1_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1i);
+    psi_i_m1_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1i);
+    psi_i_m1_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1i);
+    psi_i_m1_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1i);
+    psi_i_m1_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1i);
+    psi_i_m3_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1i);
+    psi_i_m3_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1i);
+    psi_i_m3_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1i);
+    psi_i_m3_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1i);
+    psi_i_m3_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1i);
+    psi_i_m3_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1i);
+    psi_i_m3_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1i);
+    psi_i_m3_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1i);
+    psi_i_m5_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1i);
+    psi_i_m5_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1i);
+    psi_i_m5_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1i);
+    psi_i_m5_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1i);
+    psi_i_m5_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1i);
+    psi_i_m5_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1i);
+    psi_i_m5_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1i);
+    psi_i_m5_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1i);
+    psi_i_m7_p7 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1i);
+    psi_i_m7_p5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1i);
+    psi_i_m7_p3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1i);
+    psi_i_m7_p1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1i);
+    psi_i_m7_m1 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1i);
+    psi_i_m7_m3 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1i);
+    psi_i_m7_m5 = _mm256_abs_epi16(xmm2);
+    xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1i);
+    psi_i_m7_m7 = _mm256_abs_epi16(xmm2);
+
+    /*
+    // Rearrange desired MF output
+    xmm0 = stream0_256i_in[i];
+    xmm1 = stream0_256i_in[i+1];
+    xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
+    //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
+    y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)]
+    y0i = _mm_unpackhi_epi64(xmm0,xmm1);
+    */
+    seperate_real_imag_parts(&y0r, &y0i, stream0_256i_in[i], stream0_256i_in[i+1]);
+
+    // Rearrange desired channel magnitudes
+    // [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2),...,,|h|^2(7),|h|^2(7)]*(2/sqrt(10))
+    /*
+    xmm2 = ch_mag_256i[i];
+    xmm3 = ch_mag_256i[i+1];
+    xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3);
+    */
+    // xmm2 is dummy variable that contains the same values as ch_mag_des
+    seperate_real_imag_parts(&ch_mag_des, &xmm2, ch_mag_256i[i], ch_mag_256i[i+1]);
+
+
+    // Rearrange interfering channel magnitudes
+    /*
+    xmm2 = ch_mag_256i_i[i];
+    xmm3 = ch_mag_256i_i[i+1];
+    xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    ch_mag_int  = _mm_unpacklo_epi64(xmm2,xmm3);
+    */
+    seperate_real_imag_parts(&ch_mag_int, &xmm2, ch_mag_256i_i[i], ch_mag_256i_i[i+1]);
+
+    y0r_one_over_sqrt_21   = _mm256_mulhi_epi16(y0r, ONE_OVER_SQRT_42);
+    y0r_three_over_sqrt_21 = _mm256_mulhi_epi16(y0r, THREE_OVER_SQRT_42);
+    y0r_five_over_sqrt_21  = _mm256_mulhi_epi16(y0r, FIVE_OVER_SQRT_42);
+    y0r_five_over_sqrt_21  = _mm256_slli_epi16(y0r_five_over_sqrt_21, 1);
+    y0r_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42);
+    y0r_seven_over_sqrt_21 = _mm256_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14
+
+    y0i_one_over_sqrt_21   = _mm256_mulhi_epi16(y0i, ONE_OVER_SQRT_42);
+    y0i_three_over_sqrt_21 = _mm256_mulhi_epi16(y0i, THREE_OVER_SQRT_42);
+    y0i_five_over_sqrt_21  = _mm256_mulhi_epi16(y0i, FIVE_OVER_SQRT_42);
+    y0i_five_over_sqrt_21  = _mm256_slli_epi16(y0i_five_over_sqrt_21, 1);
+    y0i_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42);
+    y0i_seven_over_sqrt_21 = _mm256_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14
+
+
+    y0_p_7_1 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_7_3 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_7_5 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_7_7 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_5_1 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_5_3 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_5_5 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_5_7 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_3_1 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_3_3 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_3_5 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_3_7 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_1_1 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_1_3 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_1_5 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_1_7 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
+
+    y0_m_1_1 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_1_3 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_1_5 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_1_7 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_3_1 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_3_3 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_3_5 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_3_7 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_5_1 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_5_3 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_5_5 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_5_7 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_7_1 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_7_3 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_7_5 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_7_7 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
+
+    // Detection of interference term
+    ch_mag_int_with_sigma2       = _mm256_srai_epi16(ch_mag_int, 1); // *2
+    two_ch_mag_int_with_sigma2   = ch_mag_int; // *4
+    three_ch_mag_int_with_sigma2 = _mm256_adds_epi16(ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2); // *6
+
+    interference_abs_64qam_epi16(psi_r_p7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_p1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_r_m7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+
+    interference_abs_64qam_epi16(psi_i_p7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_p1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+    interference_abs_64qam_epi16(psi_i_m7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
+                                 SEVEN_OVER_SQRT_2_42);
+
+    // Calculation of a group of two terms in the bit metric involving product of psi and interference
+    prodsum_psi_a_epi16(psi_r_p7_p7, a_r_p7_p7, psi_i_p7_p7, a_i_p7_p7, psi_a_p7_p7);
+    prodsum_psi_a_epi16(psi_r_p7_p5, a_r_p7_p5, psi_i_p7_p5, a_i_p7_p5, psi_a_p7_p5);
+    prodsum_psi_a_epi16(psi_r_p7_p3, a_r_p7_p3, psi_i_p7_p3, a_i_p7_p3, psi_a_p7_p3);
+    prodsum_psi_a_epi16(psi_r_p7_p1, a_r_p7_p1, psi_i_p7_p1, a_i_p7_p1, psi_a_p7_p1);
+    prodsum_psi_a_epi16(psi_r_p7_m1, a_r_p7_m1, psi_i_p7_m1, a_i_p7_m1, psi_a_p7_m1);
+    prodsum_psi_a_epi16(psi_r_p7_m3, a_r_p7_m3, psi_i_p7_m3, a_i_p7_m3, psi_a_p7_m3);
+    prodsum_psi_a_epi16(psi_r_p7_m5, a_r_p7_m5, psi_i_p7_m5, a_i_p7_m5, psi_a_p7_m5);
+    prodsum_psi_a_epi16(psi_r_p7_m7, a_r_p7_m7, psi_i_p7_m7, a_i_p7_m7, psi_a_p7_m7);
+    prodsum_psi_a_epi16(psi_r_p5_p7, a_r_p5_p7, psi_i_p5_p7, a_i_p5_p7, psi_a_p5_p7);
+    prodsum_psi_a_epi16(psi_r_p5_p5, a_r_p5_p5, psi_i_p5_p5, a_i_p5_p5, psi_a_p5_p5);
+    prodsum_psi_a_epi16(psi_r_p5_p3, a_r_p5_p3, psi_i_p5_p3, a_i_p5_p3, psi_a_p5_p3);
+    prodsum_psi_a_epi16(psi_r_p5_p1, a_r_p5_p1, psi_i_p5_p1, a_i_p5_p1, psi_a_p5_p1);
+    prodsum_psi_a_epi16(psi_r_p5_m1, a_r_p5_m1, psi_i_p5_m1, a_i_p5_m1, psi_a_p5_m1);
+    prodsum_psi_a_epi16(psi_r_p5_m3, a_r_p5_m3, psi_i_p5_m3, a_i_p5_m3, psi_a_p5_m3);
+    prodsum_psi_a_epi16(psi_r_p5_m5, a_r_p5_m5, psi_i_p5_m5, a_i_p5_m5, psi_a_p5_m5);
+    prodsum_psi_a_epi16(psi_r_p5_m7, a_r_p5_m7, psi_i_p5_m7, a_i_p5_m7, psi_a_p5_m7);
+    prodsum_psi_a_epi16(psi_r_p3_p7, a_r_p3_p7, psi_i_p3_p7, a_i_p3_p7, psi_a_p3_p7);
+    prodsum_psi_a_epi16(psi_r_p3_p5, a_r_p3_p5, psi_i_p3_p5, a_i_p3_p5, psi_a_p3_p5);
+    prodsum_psi_a_epi16(psi_r_p3_p3, a_r_p3_p3, psi_i_p3_p3, a_i_p3_p3, psi_a_p3_p3);
+    prodsum_psi_a_epi16(psi_r_p3_p1, a_r_p3_p1, psi_i_p3_p1, a_i_p3_p1, psi_a_p3_p1);
+    prodsum_psi_a_epi16(psi_r_p3_m1, a_r_p3_m1, psi_i_p3_m1, a_i_p3_m1, psi_a_p3_m1);
+    prodsum_psi_a_epi16(psi_r_p3_m3, a_r_p3_m3, psi_i_p3_m3, a_i_p3_m3, psi_a_p3_m3);
+    prodsum_psi_a_epi16(psi_r_p3_m5, a_r_p3_m5, psi_i_p3_m5, a_i_p3_m5, psi_a_p3_m5);
+    prodsum_psi_a_epi16(psi_r_p3_m7, a_r_p3_m7, psi_i_p3_m7, a_i_p3_m7, psi_a_p3_m7);
+    prodsum_psi_a_epi16(psi_r_p1_p7, a_r_p1_p7, psi_i_p1_p7, a_i_p1_p7, psi_a_p1_p7);
+    prodsum_psi_a_epi16(psi_r_p1_p5, a_r_p1_p5, psi_i_p1_p5, a_i_p1_p5, psi_a_p1_p5);
+    prodsum_psi_a_epi16(psi_r_p1_p3, a_r_p1_p3, psi_i_p1_p3, a_i_p1_p3, psi_a_p1_p3);
+    prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1);
+    prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1);
+    prodsum_psi_a_epi16(psi_r_p1_m3, a_r_p1_m3, psi_i_p1_m3, a_i_p1_m3, psi_a_p1_m3);
+    prodsum_psi_a_epi16(psi_r_p1_m5, a_r_p1_m5, psi_i_p1_m5, a_i_p1_m5, psi_a_p1_m5);
+    prodsum_psi_a_epi16(psi_r_p1_m7, a_r_p1_m7, psi_i_p1_m7, a_i_p1_m7, psi_a_p1_m7);
+    prodsum_psi_a_epi16(psi_r_m1_p7, a_r_m1_p7, psi_i_m1_p7, a_i_m1_p7, psi_a_m1_p7);
+    prodsum_psi_a_epi16(psi_r_m1_p5, a_r_m1_p5, psi_i_m1_p5, a_i_m1_p5, psi_a_m1_p5);
+    prodsum_psi_a_epi16(psi_r_m1_p3, a_r_m1_p3, psi_i_m1_p3, a_i_m1_p3, psi_a_m1_p3);
+    prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1);
+    prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1);
+    prodsum_psi_a_epi16(psi_r_m1_m3, a_r_m1_m3, psi_i_m1_m3, a_i_m1_m3, psi_a_m1_m3);
+    prodsum_psi_a_epi16(psi_r_m1_m5, a_r_m1_m5, psi_i_m1_m5, a_i_m1_m5, psi_a_m1_m5);
+    prodsum_psi_a_epi16(psi_r_m1_m7, a_r_m1_m7, psi_i_m1_m7, a_i_m1_m7, psi_a_m1_m7);
+    prodsum_psi_a_epi16(psi_r_m3_p7, a_r_m3_p7, psi_i_m3_p7, a_i_m3_p7, psi_a_m3_p7);
+    prodsum_psi_a_epi16(psi_r_m3_p5, a_r_m3_p5, psi_i_m3_p5, a_i_m3_p5, psi_a_m3_p5);
+    prodsum_psi_a_epi16(psi_r_m3_p3, a_r_m3_p3, psi_i_m3_p3, a_i_m3_p3, psi_a_m3_p3);
+    prodsum_psi_a_epi16(psi_r_m3_p1, a_r_m3_p1, psi_i_m3_p1, a_i_m3_p1, psi_a_m3_p1);
+    prodsum_psi_a_epi16(psi_r_m3_m1, a_r_m3_m1, psi_i_m3_m1, a_i_m3_m1, psi_a_m3_m1);
+    prodsum_psi_a_epi16(psi_r_m3_m3, a_r_m3_m3, psi_i_m3_m3, a_i_m3_m3, psi_a_m3_m3);
+    prodsum_psi_a_epi16(psi_r_m3_m5, a_r_m3_m5, psi_i_m3_m5, a_i_m3_m5, psi_a_m3_m5);
+    prodsum_psi_a_epi16(psi_r_m3_m7, a_r_m3_m7, psi_i_m3_m7, a_i_m3_m7, psi_a_m3_m7);
+    prodsum_psi_a_epi16(psi_r_m5_p7, a_r_m5_p7, psi_i_m5_p7, a_i_m5_p7, psi_a_m5_p7);
+    prodsum_psi_a_epi16(psi_r_m5_p5, a_r_m5_p5, psi_i_m5_p5, a_i_m5_p5, psi_a_m5_p5);
+    prodsum_psi_a_epi16(psi_r_m5_p3, a_r_m5_p3, psi_i_m5_p3, a_i_m5_p3, psi_a_m5_p3);
+    prodsum_psi_a_epi16(psi_r_m5_p1, a_r_m5_p1, psi_i_m5_p1, a_i_m5_p1, psi_a_m5_p1);
+    prodsum_psi_a_epi16(psi_r_m5_m1, a_r_m5_m1, psi_i_m5_m1, a_i_m5_m1, psi_a_m5_m1);
+    prodsum_psi_a_epi16(psi_r_m5_m3, a_r_m5_m3, psi_i_m5_m3, a_i_m5_m3, psi_a_m5_m3);
+    prodsum_psi_a_epi16(psi_r_m5_m5, a_r_m5_m5, psi_i_m5_m5, a_i_m5_m5, psi_a_m5_m5);
+    prodsum_psi_a_epi16(psi_r_m5_m7, a_r_m5_m7, psi_i_m5_m7, a_i_m5_m7, psi_a_m5_m7);
+    prodsum_psi_a_epi16(psi_r_m7_p7, a_r_m7_p7, psi_i_m7_p7, a_i_m7_p7, psi_a_m7_p7);
+    prodsum_psi_a_epi16(psi_r_m7_p5, a_r_m7_p5, psi_i_m7_p5, a_i_m7_p5, psi_a_m7_p5);
+    prodsum_psi_a_epi16(psi_r_m7_p3, a_r_m7_p3, psi_i_m7_p3, a_i_m7_p3, psi_a_m7_p3);
+    prodsum_psi_a_epi16(psi_r_m7_p1, a_r_m7_p1, psi_i_m7_p1, a_i_m7_p1, psi_a_m7_p1);
+    prodsum_psi_a_epi16(psi_r_m7_m1, a_r_m7_m1, psi_i_m7_m1, a_i_m7_m1, psi_a_m7_m1);
+    prodsum_psi_a_epi16(psi_r_m7_m3, a_r_m7_m3, psi_i_m7_m3, a_i_m7_m3, psi_a_m7_m3);
+    prodsum_psi_a_epi16(psi_r_m7_m5, a_r_m7_m5, psi_i_m7_m5, a_i_m7_m5, psi_a_m7_m5);
+    prodsum_psi_a_epi16(psi_r_m7_m7, a_r_m7_m7, psi_i_m7_m7, a_i_m7_m7, psi_a_m7_m7);
+
+    // Multiply by sqrt(2)
+    psi_a_p7_p7 = _mm256_mulhi_epi16(psi_a_p7_p7, ONE_OVER_SQRT_2);
+    psi_a_p7_p7 = _mm256_slli_epi16(psi_a_p7_p7, 2);
+    psi_a_p7_p5 = _mm256_mulhi_epi16(psi_a_p7_p5, ONE_OVER_SQRT_2);
+    psi_a_p7_p5 = _mm256_slli_epi16(psi_a_p7_p5, 2);
+    psi_a_p7_p3 = _mm256_mulhi_epi16(psi_a_p7_p3, ONE_OVER_SQRT_2);
+    psi_a_p7_p3 = _mm256_slli_epi16(psi_a_p7_p3, 2);
+    psi_a_p7_p1 = _mm256_mulhi_epi16(psi_a_p7_p1, ONE_OVER_SQRT_2);
+    psi_a_p7_p1 = _mm256_slli_epi16(psi_a_p7_p1, 2);
+    psi_a_p7_m1 = _mm256_mulhi_epi16(psi_a_p7_m1, ONE_OVER_SQRT_2);
+    psi_a_p7_m1 = _mm256_slli_epi16(psi_a_p7_m1, 2);
+    psi_a_p7_m3 = _mm256_mulhi_epi16(psi_a_p7_m3, ONE_OVER_SQRT_2);
+    psi_a_p7_m3 = _mm256_slli_epi16(psi_a_p7_m3, 2);
+    psi_a_p7_m5 = _mm256_mulhi_epi16(psi_a_p7_m5, ONE_OVER_SQRT_2);
+    psi_a_p7_m5 = _mm256_slli_epi16(psi_a_p7_m5, 2);
+    psi_a_p7_m7 = _mm256_mulhi_epi16(psi_a_p7_m7, ONE_OVER_SQRT_2);
+    psi_a_p7_m7 = _mm256_slli_epi16(psi_a_p7_m7, 2);
+    psi_a_p5_p7 = _mm256_mulhi_epi16(psi_a_p5_p7, ONE_OVER_SQRT_2);
+    psi_a_p5_p7 = _mm256_slli_epi16(psi_a_p5_p7, 2);
+    psi_a_p5_p5 = _mm256_mulhi_epi16(psi_a_p5_p5, ONE_OVER_SQRT_2);
+    psi_a_p5_p5 = _mm256_slli_epi16(psi_a_p5_p5, 2);
+    psi_a_p5_p3 = _mm256_mulhi_epi16(psi_a_p5_p3, ONE_OVER_SQRT_2);
+    psi_a_p5_p3 = _mm256_slli_epi16(psi_a_p5_p3, 2);
+    psi_a_p5_p1 = _mm256_mulhi_epi16(psi_a_p5_p1, ONE_OVER_SQRT_2);
+    psi_a_p5_p1 = _mm256_slli_epi16(psi_a_p5_p1, 2);
+    psi_a_p5_m1 = _mm256_mulhi_epi16(psi_a_p5_m1, ONE_OVER_SQRT_2);
+    psi_a_p5_m1 = _mm256_slli_epi16(psi_a_p5_m1, 2);
+    psi_a_p5_m3 = _mm256_mulhi_epi16(psi_a_p5_m3, ONE_OVER_SQRT_2);
+    psi_a_p5_m3 = _mm256_slli_epi16(psi_a_p5_m3, 2);
+    psi_a_p5_m5 = _mm256_mulhi_epi16(psi_a_p5_m5, ONE_OVER_SQRT_2);
+    psi_a_p5_m5 = _mm256_slli_epi16(psi_a_p5_m5, 2);
+    psi_a_p5_m7 = _mm256_mulhi_epi16(psi_a_p5_m7, ONE_OVER_SQRT_2);
+    psi_a_p5_m7 = _mm256_slli_epi16(psi_a_p5_m7, 2);
+    psi_a_p3_p7 = _mm256_mulhi_epi16(psi_a_p3_p7, ONE_OVER_SQRT_2);
+    psi_a_p3_p7 = _mm256_slli_epi16(psi_a_p3_p7, 2);
+    psi_a_p3_p5 = _mm256_mulhi_epi16(psi_a_p3_p5, ONE_OVER_SQRT_2);
+    psi_a_p3_p5 = _mm256_slli_epi16(psi_a_p3_p5, 2);
+    psi_a_p3_p3 = _mm256_mulhi_epi16(psi_a_p3_p3, ONE_OVER_SQRT_2);
+    psi_a_p3_p3 = _mm256_slli_epi16(psi_a_p3_p3, 2);
+    psi_a_p3_p1 = _mm256_mulhi_epi16(psi_a_p3_p1, ONE_OVER_SQRT_2);
+    psi_a_p3_p1 = _mm256_slli_epi16(psi_a_p3_p1, 2);
+    psi_a_p3_m1 = _mm256_mulhi_epi16(psi_a_p3_m1, ONE_OVER_SQRT_2);
+    psi_a_p3_m1 = _mm256_slli_epi16(psi_a_p3_m1, 2);
+    psi_a_p3_m3 = _mm256_mulhi_epi16(psi_a_p3_m3, ONE_OVER_SQRT_2);
+    psi_a_p3_m3 = _mm256_slli_epi16(psi_a_p3_m3, 2);
+    psi_a_p3_m5 = _mm256_mulhi_epi16(psi_a_p3_m5, ONE_OVER_SQRT_2);
+    psi_a_p3_m5 = _mm256_slli_epi16(psi_a_p3_m5, 2);
+    psi_a_p3_m7 = _mm256_mulhi_epi16(psi_a_p3_m7, ONE_OVER_SQRT_2);
+    psi_a_p3_m7 = _mm256_slli_epi16(psi_a_p3_m7, 2);
+    psi_a_p1_p7 = _mm256_mulhi_epi16(psi_a_p1_p7, ONE_OVER_SQRT_2);
+    psi_a_p1_p7 = _mm256_slli_epi16(psi_a_p1_p7, 2);
+    psi_a_p1_p5 = _mm256_mulhi_epi16(psi_a_p1_p5, ONE_OVER_SQRT_2);
+    psi_a_p1_p5 = _mm256_slli_epi16(psi_a_p1_p5, 2);
+    psi_a_p1_p3 = _mm256_mulhi_epi16(psi_a_p1_p3, ONE_OVER_SQRT_2);
+    psi_a_p1_p3 = _mm256_slli_epi16(psi_a_p1_p3, 2);
+    psi_a_p1_p1 = _mm256_mulhi_epi16(psi_a_p1_p1, ONE_OVER_SQRT_2);
+    psi_a_p1_p1 = _mm256_slli_epi16(psi_a_p1_p1, 2);
+    psi_a_p1_m1 = _mm256_mulhi_epi16(psi_a_p1_m1, ONE_OVER_SQRT_2);
+    psi_a_p1_m1 = _mm256_slli_epi16(psi_a_p1_m1, 2);
+    psi_a_p1_m3 = _mm256_mulhi_epi16(psi_a_p1_m3, ONE_OVER_SQRT_2);
+    psi_a_p1_m3 = _mm256_slli_epi16(psi_a_p1_m3, 2);
+    psi_a_p1_m5 = _mm256_mulhi_epi16(psi_a_p1_m5, ONE_OVER_SQRT_2);
+    psi_a_p1_m5 = _mm256_slli_epi16(psi_a_p1_m5, 2);
+    psi_a_p1_m7 = _mm256_mulhi_epi16(psi_a_p1_m7, ONE_OVER_SQRT_2);
+    psi_a_p1_m7 = _mm256_slli_epi16(psi_a_p1_m7, 2);
+    psi_a_m1_p7 = _mm256_mulhi_epi16(psi_a_m1_p7, ONE_OVER_SQRT_2);
+    psi_a_m1_p7 = _mm256_slli_epi16(psi_a_m1_p7, 2);
+    psi_a_m1_p5 = _mm256_mulhi_epi16(psi_a_m1_p5, ONE_OVER_SQRT_2);
+    psi_a_m1_p5 = _mm256_slli_epi16(psi_a_m1_p5, 2);
+    psi_a_m1_p3 = _mm256_mulhi_epi16(psi_a_m1_p3, ONE_OVER_SQRT_2);
+    psi_a_m1_p3 = _mm256_slli_epi16(psi_a_m1_p3, 2);
+    psi_a_m1_p1 = _mm256_mulhi_epi16(psi_a_m1_p1, ONE_OVER_SQRT_2);
+    psi_a_m1_p1 = _mm256_slli_epi16(psi_a_m1_p1, 2);
+    psi_a_m1_m1 = _mm256_mulhi_epi16(psi_a_m1_m1, ONE_OVER_SQRT_2);
+    psi_a_m1_m1 = _mm256_slli_epi16(psi_a_m1_m1, 2);
+    psi_a_m1_m3 = _mm256_mulhi_epi16(psi_a_m1_m3, ONE_OVER_SQRT_2);
+    psi_a_m1_m3 = _mm256_slli_epi16(psi_a_m1_m3, 2);
+    psi_a_m1_m5 = _mm256_mulhi_epi16(psi_a_m1_m5, ONE_OVER_SQRT_2);
+    psi_a_m1_m5 = _mm256_slli_epi16(psi_a_m1_m5, 2);
+    psi_a_m1_m7 = _mm256_mulhi_epi16(psi_a_m1_m7, ONE_OVER_SQRT_2);
+    psi_a_m1_m7 = _mm256_slli_epi16(psi_a_m1_m7, 2);
+    psi_a_m3_p7 = _mm256_mulhi_epi16(psi_a_m3_p7, ONE_OVER_SQRT_2);
+    psi_a_m3_p7 = _mm256_slli_epi16(psi_a_m3_p7, 2);
+    psi_a_m3_p5 = _mm256_mulhi_epi16(psi_a_m3_p5, ONE_OVER_SQRT_2);
+    psi_a_m3_p5 = _mm256_slli_epi16(psi_a_m3_p5, 2);
+    psi_a_m3_p3 = _mm256_mulhi_epi16(psi_a_m3_p3, ONE_OVER_SQRT_2);
+    psi_a_m3_p3 = _mm256_slli_epi16(psi_a_m3_p3, 2);
+    psi_a_m3_p1 = _mm256_mulhi_epi16(psi_a_m3_p1, ONE_OVER_SQRT_2);
+    psi_a_m3_p1 = _mm256_slli_epi16(psi_a_m3_p1, 2);
+    psi_a_m3_m1 = _mm256_mulhi_epi16(psi_a_m3_m1, ONE_OVER_SQRT_2);
+    psi_a_m3_m1 = _mm256_slli_epi16(psi_a_m3_m1, 2);
+    psi_a_m3_m3 = _mm256_mulhi_epi16(psi_a_m3_m3, ONE_OVER_SQRT_2);
+    psi_a_m3_m3 = _mm256_slli_epi16(psi_a_m3_m3, 2);
+    psi_a_m3_m5 = _mm256_mulhi_epi16(psi_a_m3_m5, ONE_OVER_SQRT_2);
+    psi_a_m3_m5 = _mm256_slli_epi16(psi_a_m3_m5, 2);
+    psi_a_m3_m7 = _mm256_mulhi_epi16(psi_a_m3_m7, ONE_OVER_SQRT_2);
+    psi_a_m3_m7 = _mm256_slli_epi16(psi_a_m3_m7, 2);
+    psi_a_m5_p7 = _mm256_mulhi_epi16(psi_a_m5_p7, ONE_OVER_SQRT_2);
+    psi_a_m5_p7 = _mm256_slli_epi16(psi_a_m5_p7, 2);
+    psi_a_m5_p5 = _mm256_mulhi_epi16(psi_a_m5_p5, ONE_OVER_SQRT_2);
+    psi_a_m5_p5 = _mm256_slli_epi16(psi_a_m5_p5, 2);
+    psi_a_m5_p3 = _mm256_mulhi_epi16(psi_a_m5_p3, ONE_OVER_SQRT_2);
+    psi_a_m5_p3 = _mm256_slli_epi16(psi_a_m5_p3, 2);
+    psi_a_m5_p1 = _mm256_mulhi_epi16(psi_a_m5_p1, ONE_OVER_SQRT_2);
+    psi_a_m5_p1 = _mm256_slli_epi16(psi_a_m5_p1, 2);
+    psi_a_m5_m1 = _mm256_mulhi_epi16(psi_a_m5_m1, ONE_OVER_SQRT_2);
+    psi_a_m5_m1 = _mm256_slli_epi16(psi_a_m5_m1, 2);
+    psi_a_m5_m3 = _mm256_mulhi_epi16(psi_a_m5_m3, ONE_OVER_SQRT_2);
+    psi_a_m5_m3 = _mm256_slli_epi16(psi_a_m5_m3, 2);
+    psi_a_m5_m5 = _mm256_mulhi_epi16(psi_a_m5_m5, ONE_OVER_SQRT_2);
+    psi_a_m5_m5 = _mm256_slli_epi16(psi_a_m5_m5, 2);
+    psi_a_m5_m7 = _mm256_mulhi_epi16(psi_a_m5_m7, ONE_OVER_SQRT_2);
+    psi_a_m5_m7 = _mm256_slli_epi16(psi_a_m5_m7, 2);
+    psi_a_m7_p7 = _mm256_mulhi_epi16(psi_a_m7_p7, ONE_OVER_SQRT_2);
+    psi_a_m7_p7 = _mm256_slli_epi16(psi_a_m7_p7, 2);
+    psi_a_m7_p5 = _mm256_mulhi_epi16(psi_a_m7_p5, ONE_OVER_SQRT_2);
+    psi_a_m7_p5 = _mm256_slli_epi16(psi_a_m7_p5, 2);
+    psi_a_m7_p3 = _mm256_mulhi_epi16(psi_a_m7_p3, ONE_OVER_SQRT_2);
+    psi_a_m7_p3 = _mm256_slli_epi16(psi_a_m7_p3, 2);
+    psi_a_m7_p1 = _mm256_mulhi_epi16(psi_a_m7_p1, ONE_OVER_SQRT_2);
+    psi_a_m7_p1 = _mm256_slli_epi16(psi_a_m7_p1, 2);
+    psi_a_m7_m1 = _mm256_mulhi_epi16(psi_a_m7_m1, ONE_OVER_SQRT_2);
+    psi_a_m7_m1 = _mm256_slli_epi16(psi_a_m7_m1, 2);
+    psi_a_m7_m3 = _mm256_mulhi_epi16(psi_a_m7_m3, ONE_OVER_SQRT_2);
+    psi_a_m7_m3 = _mm256_slli_epi16(psi_a_m7_m3, 2);
+    psi_a_m7_m5 = _mm256_mulhi_epi16(psi_a_m7_m5, ONE_OVER_SQRT_2);
+    psi_a_m7_m5 = _mm256_slli_epi16(psi_a_m7_m5, 2);
+    psi_a_m7_m7 = _mm256_mulhi_epi16(psi_a_m7_m7, ONE_OVER_SQRT_2);
+    psi_a_m7_m7 = _mm256_slli_epi16(psi_a_m7_m7, 2);
+
+    // Calculation of a group of two terms in the bit metric involving squares of interference
+    square_a_64qam_epi16(a_r_p7_p7, a_i_p7_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p7);
+    square_a_64qam_epi16(a_r_p7_p5, a_i_p7_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p5);
+    square_a_64qam_epi16(a_r_p7_p3, a_i_p7_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p3);
+    square_a_64qam_epi16(a_r_p7_p1, a_i_p7_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p1);
+    square_a_64qam_epi16(a_r_p7_m1, a_i_p7_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m1);
+    square_a_64qam_epi16(a_r_p7_m3, a_i_p7_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m3);
+    square_a_64qam_epi16(a_r_p7_m5, a_i_p7_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m5);
+    square_a_64qam_epi16(a_r_p7_m7, a_i_p7_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m7);
+    square_a_64qam_epi16(a_r_p5_p7, a_i_p5_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p7);
+    square_a_64qam_epi16(a_r_p5_p5, a_i_p5_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p5);
+    square_a_64qam_epi16(a_r_p5_p3, a_i_p5_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p3);
+    square_a_64qam_epi16(a_r_p5_p1, a_i_p5_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p1);
+    square_a_64qam_epi16(a_r_p5_m1, a_i_p5_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m1);
+    square_a_64qam_epi16(a_r_p5_m3, a_i_p5_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m3);
+    square_a_64qam_epi16(a_r_p5_m5, a_i_p5_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m5);
+    square_a_64qam_epi16(a_r_p5_m7, a_i_p5_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m7);
+    square_a_64qam_epi16(a_r_p3_p7, a_i_p3_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p7);
+    square_a_64qam_epi16(a_r_p3_p5, a_i_p3_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p5);
+    square_a_64qam_epi16(a_r_p3_p3, a_i_p3_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p3);
+    square_a_64qam_epi16(a_r_p3_p1, a_i_p3_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p1);
+    square_a_64qam_epi16(a_r_p3_m1, a_i_p3_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m1);
+    square_a_64qam_epi16(a_r_p3_m3, a_i_p3_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m3);
+    square_a_64qam_epi16(a_r_p3_m5, a_i_p3_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m5);
+    square_a_64qam_epi16(a_r_p3_m7, a_i_p3_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m7);
+    square_a_64qam_epi16(a_r_p1_p7, a_i_p1_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p7);
+    square_a_64qam_epi16(a_r_p1_p5, a_i_p1_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p5);
+    square_a_64qam_epi16(a_r_p1_p3, a_i_p1_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p3);
+    square_a_64qam_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p1);
+    square_a_64qam_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m1);
+    square_a_64qam_epi16(a_r_p1_m3, a_i_p1_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m3);
+    square_a_64qam_epi16(a_r_p1_m5, a_i_p1_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m5);
+    square_a_64qam_epi16(a_r_p1_m7, a_i_p1_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m7);
+    square_a_64qam_epi16(a_r_m1_p7, a_i_m1_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p7);
+    square_a_64qam_epi16(a_r_m1_p5, a_i_m1_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p5);
+    square_a_64qam_epi16(a_r_m1_p3, a_i_m1_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p3);
+    square_a_64qam_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p1);
+    square_a_64qam_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m1);
+    square_a_64qam_epi16(a_r_m1_m3, a_i_m1_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m3);
+    square_a_64qam_epi16(a_r_m1_m5, a_i_m1_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m5);
+    square_a_64qam_epi16(a_r_m1_m7, a_i_m1_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m7);
+    square_a_64qam_epi16(a_r_m3_p7, a_i_m3_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p7);
+    square_a_64qam_epi16(a_r_m3_p5, a_i_m3_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p5);
+    square_a_64qam_epi16(a_r_m3_p3, a_i_m3_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p3);
+    square_a_64qam_epi16(a_r_m3_p1, a_i_m3_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p1);
+    square_a_64qam_epi16(a_r_m3_m1, a_i_m3_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m1);
+    square_a_64qam_epi16(a_r_m3_m3, a_i_m3_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m3);
+    square_a_64qam_epi16(a_r_m3_m5, a_i_m3_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m5);
+    square_a_64qam_epi16(a_r_m3_m7, a_i_m3_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m7);
+    square_a_64qam_epi16(a_r_m5_p7, a_i_m5_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p7);
+    square_a_64qam_epi16(a_r_m5_p5, a_i_m5_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p5);
+    square_a_64qam_epi16(a_r_m5_p3, a_i_m5_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p3);
+    square_a_64qam_epi16(a_r_m5_p1, a_i_m5_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p1);
+    square_a_64qam_epi16(a_r_m5_m1, a_i_m5_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m1);
+    square_a_64qam_epi16(a_r_m5_m3, a_i_m5_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m3);
+    square_a_64qam_epi16(a_r_m5_m5, a_i_m5_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m5);
+    square_a_64qam_epi16(a_r_m5_m7, a_i_m5_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m7);
+    square_a_64qam_epi16(a_r_m7_p7, a_i_m7_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p7);
+    square_a_64qam_epi16(a_r_m7_p5, a_i_m7_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p5);
+    square_a_64qam_epi16(a_r_m7_p3, a_i_m7_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p3);
+    square_a_64qam_epi16(a_r_m7_p1, a_i_m7_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p1);
+    square_a_64qam_epi16(a_r_m7_m1, a_i_m7_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m1);
+    square_a_64qam_epi16(a_r_m7_m3, a_i_m7_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m3);
+    square_a_64qam_epi16(a_r_m7_m5, a_i_m7_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m5);
+    square_a_64qam_epi16(a_r_m7_m7, a_i_m7_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m7);
+
+    // Computing different multiples of ||h0||^2
+    // x=1, y=1
+    ch_mag_2_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42);
+    ch_mag_2_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_2_over_42_with_sigma2,1);
+    // x=1, y=3
+    ch_mag_10_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42);
+    ch_mag_10_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_10_over_42_with_sigma2,1);
+    // x=1, x=5
+    ch_mag_26_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42);
+    ch_mag_26_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_26_over_42_with_sigma2,1);
+    // x=1, y=7
+    ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
+    ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
+    // x=3, y=3
+    ch_mag_18_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42);
+    ch_mag_18_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_18_over_42_with_sigma2,1);
+    // x=3, y=5
+    ch_mag_34_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42);
+    ch_mag_34_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_34_over_42_with_sigma2,1);
+    // x=3, y=7
+    ch_mag_58_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42);
+    ch_mag_58_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_58_over_42_with_sigma2,2);
+    // x=5, y=5
+    ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
+    ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
+    // x=5, y=7
+    ch_mag_74_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42);
+    ch_mag_74_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_74_over_42_with_sigma2,2);
+    // x=7, y=7
+    ch_mag_98_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42);
+    ch_mag_98_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_98_over_42_with_sigma2,2);
+
+    // Computing Metrics
+    xmm0 = _mm256_subs_epi16(psi_a_p7_p7, a_sq_p7_p7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_7);
+    bit_met_p7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p7_p5, a_sq_p7_p5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_5);
+    bit_met_p7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p7_p3, a_sq_p7_p3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_3);
+    bit_met_p7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p7_p1, a_sq_p7_p1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_1);
+    bit_met_p7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p7_m1, a_sq_p7_m1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_1);
+    bit_met_p7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p7_m3, a_sq_p7_m3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_3);
+    bit_met_p7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p7_m5, a_sq_p7_m5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_5);
+    bit_met_p7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p7_m7, a_sq_p7_m7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_7);
+    bit_met_p7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_p7, a_sq_p5_p7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_7);
+    bit_met_p5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_p5, a_sq_p5_p5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_5);
+    bit_met_p5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_p3, a_sq_p5_p3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_3);
+    bit_met_p5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_p1, a_sq_p5_p1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_1);
+    bit_met_p5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_m1, a_sq_p5_m1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_1);
+    bit_met_p5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_m3, a_sq_p5_m3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_3);
+    bit_met_p5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_m5, a_sq_p5_m5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_5);
+    bit_met_p5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p5_m7, a_sq_p5_m7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_7);
+    bit_met_p5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_p7, a_sq_p3_p7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_7);
+    bit_met_p3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_p5, a_sq_p3_p5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_5);
+    bit_met_p3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_p3, a_sq_p3_p3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_3);
+    bit_met_p3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_p1, a_sq_p3_p1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_1);
+    bit_met_p3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_m1, a_sq_p3_m1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_1);
+    bit_met_p3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_m3, a_sq_p3_m3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_3);
+    bit_met_p3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_m5, a_sq_p3_m5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_5);
+    bit_met_p3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p3_m7, a_sq_p3_m7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_7);
+    bit_met_p3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_p7, a_sq_p1_p7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_7);
+    bit_met_p1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_p5, a_sq_p1_p5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_5);
+    bit_met_p1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_p3, a_sq_p1_p3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_3);
+    bit_met_p1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_p1, a_sq_p1_p1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_1);
+    bit_met_p1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_m1, a_sq_p1_m1);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_1);
+    bit_met_p1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_m3, a_sq_p1_m3);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_3);
+    bit_met_p1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_m5, a_sq_p1_m5);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_5);
+    bit_met_p1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_p1_m7, a_sq_p1_m7);
+    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_7);
+    bit_met_p1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+
+    xmm0 = _mm256_subs_epi16(psi_a_m1_p7, a_sq_m1_p7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_7);
+    bit_met_m1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m1_p5, a_sq_m1_p5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_5);
+    bit_met_m1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m1_p3, a_sq_m1_p3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_3);
+    bit_met_m1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m1_p1, a_sq_m1_p1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_1);
+    bit_met_m1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m1_m1, a_sq_m1_m1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_1);
+    bit_met_m1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m1_m3, a_sq_m1_m3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_3);
+    bit_met_m1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m1_m5, a_sq_m1_m5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_5);
+    bit_met_m1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m1_m7, a_sq_m1_m7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_7);
+    bit_met_m1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_p7, a_sq_m3_p7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_7);
+    bit_met_m3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_p5, a_sq_m3_p5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_5);
+    bit_met_m3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_p3, a_sq_m3_p3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_3);
+    bit_met_m3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_p1, a_sq_m3_p1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_1);
+    bit_met_m3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_m1, a_sq_m3_m1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_1);
+    bit_met_m3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_m3, a_sq_m3_m3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_3);
+    bit_met_m3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_m5, a_sq_m3_m5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_5);
+    bit_met_m3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m3_m7, a_sq_m3_m7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_7);
+    bit_met_m3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_p7, a_sq_m5_p7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_7);
+    bit_met_m5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_p5, a_sq_m5_p5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_5);
+    bit_met_m5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_p3, a_sq_m5_p3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_3);
+    bit_met_m5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_p1, a_sq_m5_p1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_1);
+    bit_met_m5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_m1, a_sq_m5_m1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_1);
+    bit_met_m5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_m3, a_sq_m5_m3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_3);
+    bit_met_m5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_m5, a_sq_m5_m5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_5);
+    bit_met_m5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m5_m7, a_sq_m5_m7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_7);
+    bit_met_m5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_p7, a_sq_m7_p7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_7);
+    bit_met_m7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_p5, a_sq_m7_p5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_5);
+    bit_met_m7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_p3, a_sq_m7_p3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_3);
+    bit_met_m7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_p1, a_sq_m7_p1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_1);
+    bit_met_m7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_m1, a_sq_m7_m1);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_1);
+    bit_met_m7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_m3, a_sq_m7_m3);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_3);
+    bit_met_m7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_m5, a_sq_m7_m5);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_5);
+    bit_met_m7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = _mm256_subs_epi16(psi_a_m7_m7, a_sq_m7_m7);
+    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_7);
+    bit_met_m7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+
+    // Detection for 1st bit (LTE mapping)
+    // bit = 1
+    xmm0 = _mm256_max_epi16(bit_met_m7_p7, bit_met_m7_p5);
+    xmm1 = _mm256_max_epi16(bit_met_m7_p3, bit_met_m7_p1);
+    xmm2 = _mm256_max_epi16(bit_met_m7_m1, bit_met_m7_m3);
+    xmm3 = _mm256_max_epi16(bit_met_m7_m5, bit_met_m7_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m5_p5);
+    xmm1 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m5_p1);
+    xmm2 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m5_m3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m5_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_m3_p7, bit_met_m3_p5);
+    xmm1 = _mm256_max_epi16(bit_met_m3_p3, bit_met_m3_p1);
+    xmm2 = _mm256_max_epi16(bit_met_m3_m1, bit_met_m3_m3);
+    xmm3 = _mm256_max_epi16(bit_met_m3_m5, bit_met_m3_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m1_p5);
+    xmm1 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m1_p1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m1_m3);
+    xmm3 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m1_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+
+    // bit = 0
+    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p7_p5);
+    xmm1 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p7_p1);
+    xmm2 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p7_m3);
+    xmm3 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p7_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p5_p7, bit_met_p5_p5);
+    xmm1 = _mm256_max_epi16(bit_met_p5_p3, bit_met_p5_p1);
+    xmm2 = _mm256_max_epi16(bit_met_p5_m1, bit_met_p5_m3);
+    xmm3 = _mm256_max_epi16(bit_met_p5_m5, bit_met_p5_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p3_p5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p3_p1);
+    xmm2 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p3_m3);
+    xmm3 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p3_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p1_p7, bit_met_p1_p5);
+    xmm1 = _mm256_max_epi16(bit_met_p1_p3, bit_met_p1_p1);
+    xmm2 = _mm256_max_epi16(bit_met_p1_m1, bit_met_p1_m3);
+    xmm3 = _mm256_max_epi16(bit_met_p1_m5, bit_met_p1_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y0r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 2nd bit (LTE mapping)
+    // bit = 1
+    xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+
+    // bit = 0
+    xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y1r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 3rd bit (LTE mapping)
+    xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y2r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 4th bit (LTE mapping)
+    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p5);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y0i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+
+    // Detection for 5th bit (LTE mapping)
+    xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
+    xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
+    xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
+    xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y1i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // Detection for 6th bit (LTE mapping)
+    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p1);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
+    xmm4 = _mm256_max_epi16(xmm0, xmm1);
+    xmm5 = _mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y2i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+
+    // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs
+    // RE 1
+    j = 48*i;
+    stream0_out[j + 0] = ((short *)&y0r)[0];
+    stream0_out[j + 1] = ((short *)&y1r)[0];
+    stream0_out[j + 2] = ((short *)&y2r)[0];
+    stream0_out[j + 3] = ((short *)&y0i)[0];
+    stream0_out[j + 4] = ((short *)&y1i)[0];
+    stream0_out[j + 5] = ((short *)&y2i)[0];
+    // RE 2
+    stream0_out[j + 6] = ((short *)&y0r)[1];
+    stream0_out[j + 7] = ((short *)&y1r)[1];
+    stream0_out[j + 8] = ((short *)&y2r)[1];
+    stream0_out[j + 9] = ((short *)&y0i)[1];
+    stream0_out[j + 10] = ((short *)&y1i)[1];
+    stream0_out[j + 11] = ((short *)&y2i)[1];
+    // RE 3
+    stream0_out[j + 12] = ((short *)&y0r)[2];
+    stream0_out[j + 13] = ((short *)&y1r)[2];
+    stream0_out[j + 14] = ((short *)&y2r)[2];
+    stream0_out[j + 15] = ((short *)&y0i)[2];
+    stream0_out[j + 16] = ((short *)&y1i)[2];
+    stream0_out[j + 17] = ((short *)&y2i)[2];
+    // RE 4
+    stream0_out[j + 18] = ((short *)&y0r)[3];
+    stream0_out[j + 19] = ((short *)&y1r)[3];
+    stream0_out[j + 20] = ((short *)&y2r)[3];
+    stream0_out[j + 21] = ((short *)&y0i)[3];
+    stream0_out[j + 22] = ((short *)&y1i)[3];
+    stream0_out[j + 23] = ((short *)&y2i)[3];
+    // RE 5
+    stream0_out[j + 24] = ((short *)&y0r)[4];
+    stream0_out[j + 25] = ((short *)&y1r)[4];
+    stream0_out[j + 26] = ((short *)&y2r)[4];
+    stream0_out[j + 27] = ((short *)&y0i)[4];
+    stream0_out[j + 28] = ((short *)&y1i)[4];
+    stream0_out[j + 29] = ((short *)&y2i)[4];
+    // RE 6
+    stream0_out[j + 30] = ((short *)&y0r)[5];
+    stream0_out[j + 31] = ((short *)&y1r)[5];
+    stream0_out[j + 32] = ((short *)&y2r)[5];
+    stream0_out[j + 33] = ((short *)&y0i)[5];
+    stream0_out[j + 34] = ((short *)&y1i)[5];
+    stream0_out[j + 35] = ((short *)&y2i)[5];
+    // RE 7
+    stream0_out[j + 36] = ((short *)&y0r)[6];
+    stream0_out[j + 37] = ((short *)&y1r)[6];
+    stream0_out[j + 38] = ((short *)&y2r)[6];
+    stream0_out[j + 39] = ((short *)&y0i)[6];
+    stream0_out[j + 40] = ((short *)&y1i)[6];
+    stream0_out[j + 41] = ((short *)&y2i)[6];
+    // RE 8
+    stream0_out[j + 42] = ((short *)&y0r)[7];
+    stream0_out[j + 43] = ((short *)&y1r)[7];
+    stream0_out[j + 44] = ((short *)&y2r)[7];
+    stream0_out[j + 45] = ((short *)&y0i)[7];
+    stream0_out[j + 46] = ((short *)&y1i)[7];
+    stream0_out[j + 47] = ((short *)&y2i)[7];
+
+    // RE 9
+    stream0_out[j + 48] = ((short *)&y0r)[8];
+    stream0_out[j + 49] = ((short *)&y1r)[8];
+    stream0_out[j + 50] = ((short *)&y2r)[8];
+    stream0_out[j + 51] = ((short *)&y0i)[8];
+    stream0_out[j + 52] = ((short *)&y1i)[8];
+    stream0_out[j + 53] = ((short *)&y2i)[8];
+    // RE 10
+    stream0_out[j + 54] = ((short *)&y0r)[9];
+    stream0_out[j + 55] = ((short *)&y1r)[9];
+    stream0_out[j + 56] = ((short *)&y2r)[9];
+    stream0_out[j + 57] = ((short *)&y0i)[9];
+    stream0_out[j + 58] = ((short *)&y1i)[9];
+    stream0_out[j + 59] = ((short *)&y2i)[9];
+    // RE 11
+    stream0_out[j + 60] = ((short *)&y0r)[10];
+    stream0_out[j + 61] = ((short *)&y1r)[10];
+    stream0_out[j + 62] = ((short *)&y2r)[10];
+    stream0_out[j + 63] = ((short *)&y0i)[10];
+    stream0_out[j + 64] = ((short *)&y1i)[10];
+    stream0_out[j + 65] = ((short *)&y2i)[10];
+    // RE 12
+    stream0_out[j + 66] = ((short *)&y0r)[11];
+    stream0_out[j + 67] = ((short *)&y1r)[11];
+    stream0_out[j + 68] = ((short *)&y2r)[11];
+    stream0_out[j + 69] = ((short *)&y0i)[11];
+    stream0_out[j + 70] = ((short *)&y1i)[11];
+    stream0_out[j + 71] = ((short *)&y2i)[11];
+    // RE 13
+    stream0_out[j + 72] = ((short *)&y0r)[12];
+    stream0_out[j + 73] = ((short *)&y1r)[12];
+    stream0_out[j + 74] = ((short *)&y2r)[12];
+    stream0_out[j + 75] = ((short *)&y0i)[12];
+    stream0_out[j + 76] = ((short *)&y1i)[12];
+    stream0_out[j + 77] = ((short *)&y2i)[12];
+    // RE 14
+    stream0_out[j + 78] = ((short *)&y0r)[13];
+    stream0_out[j + 79] = ((short *)&y1r)[13];
+    stream0_out[j + 80] = ((short *)&y2r)[13];
+    stream0_out[j + 81] = ((short *)&y0i)[13];
+    stream0_out[j + 82] = ((short *)&y1i)[13];
+    stream0_out[j + 83] = ((short *)&y2i)[13];
+    // RE 15
+    stream0_out[j + 84] = ((short *)&y0r)[14];
+    stream0_out[j + 85] = ((short *)&y1r)[14];
+    stream0_out[j + 86] = ((short *)&y2r)[14];
+    stream0_out[j + 87] = ((short *)&y0i)[14];
+    stream0_out[j + 88] = ((short *)&y1i)[14];
+    stream0_out[j + 89] = ((short *)&y2i)[14];
+    // RE 16
+    stream0_out[j + 90] = ((short *)&y0r)[15];
+    stream0_out[j + 91] = ((short *)&y1r)[15];
+    stream0_out[j + 92] = ((short *)&y2r)[15];
+    stream0_out[j + 93] = ((short *)&y0i)[15];
+    stream0_out[j + 94] = ((short *)&y1i)[15];
+    stream0_out[j + 95] = ((short *)&y2i)[15];
+
+#elif defined(__arm__)
+
+#endif
+
+  }
+
+#if defined(__x86_64__) || defined(__i386__)
+  _mm_empty();
+  _m_empty();
+#endif
+}
diff --git a/openair1/PHY/LTE_TRANSPORT/drs_modulation.c b/openair1/PHY/LTE_TRANSPORT/drs_modulation.c
new file mode 100644
index 0000000000000000000000000000000000000000..a0bde219b57706e59b5d5746f8408bc1307f46be
--- /dev/null
+++ b/openair1/PHY/LTE_TRANSPORT/drs_modulation.c
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+/*! \file PHY/LTE_TRANSPORT/drs_modulation.c
+* \brief Top-level routines for generating the Demodulation Reference Signals from 36-211, V8.6 2009-03
+* \author R. Knopp, F. Kaltenberger, A. Bhamri
+* \date 2011
+* \version 0.1
+* \company Eurecom
+* \email: knopp@eurecom.fr,florian.kaltenberger@eurecom.fr,ankit.bhamri@eurecom.fr
+* \note
+* \warning
+*/
+#include "PHY/defs.h"
+#include "PHY/extern.h"
+#include "PHY/sse_intrin.h"
+//#define DEBUG_DRS
+
+int generate_drs_pusch(PHY_VARS_UE *ue,
+		       UE_rxtx_proc_t *proc,
+                       uint8_t eNB_id,
+                       short amp,
+                       unsigned int subframe,
+                       unsigned int first_rb,
+                       unsigned int nb_rb,
+                       uint8_t ant)
+{
+
+  uint16_t k,l,Msc_RS,Msc_RS_idx,rb,drs_offset;
+  uint16_t * Msc_idx_ptr;
+  int subframe_offset,re_offset,symbol_offset;
+
+  //uint32_t phase_shift; // phase shift for cyclic delay in DM RS
+  //uint8_t alpha_ind;
+
+  int16_t alpha_re[12] = {32767, 28377, 16383,     0,-16384,  -28378,-32768,-28378,-16384,    -1, 16383, 28377};
+  int16_t alpha_im[12] = {0,     16383, 28377, 32767, 28377,   16383,     0,-16384,-28378,-32768,-28378,-16384};
+
+  uint8_t cyclic_shift,cyclic_shift0,cyclic_shift1;
+  LTE_DL_FRAME_PARMS *frame_parms = &ue->frame_parms;
+  int32_t *txdataF = ue->common_vars.txdataF[ant];
+  uint32_t u,v,alpha_ind;
+  uint32_t u0=frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.grouphop[subframe<<1];
+  uint32_t u1=frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.grouphop[1+(subframe<<1)];
+  uint32_t v0=frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.seqhop[subframe<<1];
+  uint32_t v1=frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.seqhop[1+(subframe<<1)];
+  int32_t ref_re,ref_im;
+  uint8_t harq_pid = subframe2harq_pid(frame_parms,proc->frame_tx,subframe);
+
+  cyclic_shift0 = (frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift +
+                   ue->ulsch[eNB_id]->harq_processes[harq_pid]->n_DMRS2 +
+                   frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.nPRS[subframe<<1]+
+                   ((ue->ulsch[0]->cooperation_flag==2)?10:0)+
+                   ant*6) % 12;
+  //  printf("PUSCH.cyclicShift %d, n_DMRS2 %d, nPRS %d\n",frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift,ue->ulsch[eNB_id]->n_DMRS2,ue->lte_frame_parms.pusch_config_common.ul_ReferenceSignalsPUSCH.nPRS[subframe<<1]);
+  cyclic_shift1 = (frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift +
+                   ue->ulsch[eNB_id]->harq_processes[harq_pid]->n_DMRS2 +
+                   frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.nPRS[(subframe<<1)+1]+
+                   ((ue->ulsch[0]->cooperation_flag==2)?10:0)+
+                   ant*6) % 12;
+
+  //       cyclic_shift0 = 0;
+  //        cyclic_shift1 = 0;
+  Msc_RS = 12*nb_rb;
+
+  Msc_idx_ptr = (uint16_t*) bsearch(&Msc_RS, dftsizes, 33, sizeof(uint16_t), compareints);
+
+  if (Msc_idx_ptr)
+    Msc_RS_idx = Msc_idx_ptr - dftsizes;
+  else {
+    LOG_I(PHY,"generate_drs_pusch: index for Msc_RS=%d not found\n",Msc_RS);
+    return(-1);
+  }
+
+  for (l = (3 - frame_parms->Ncp),u=u0,v=v0,cyclic_shift=cyclic_shift0;
+       l<frame_parms->symbols_per_tti;
+       l += (7 - frame_parms->Ncp),u=u1,v=v1,cyclic_shift=cyclic_shift1) {
+
+    drs_offset = 0;
+#ifdef DEBUG_DRS
+    printf("drs_modulation: Msc_RS = %d, Msc_RS_idx = %d, u=%d,v=%d\n",Msc_RS, Msc_RS_idx,u,v);
+#endif
+
+
+    re_offset = frame_parms->first_carrier_offset;
+    subframe_offset = subframe*frame_parms->symbols_per_tti*frame_parms->ofdm_symbol_size;
+    symbol_offset = subframe_offset + frame_parms->ofdm_symbol_size*l;
+
+
+#ifdef DEBUG_DRS
+    printf("generate_drs_pusch: symbol_offset %d, subframe offset %d, cyclic shift %d\n",symbol_offset,subframe_offset,cyclic_shift);
+#endif
+    alpha_ind = 0;
+
+    for (rb=0; rb<frame_parms->N_RB_UL; rb++) {
+
+      if ((rb >= first_rb) && (rb<(first_rb+nb_rb))) {
+
+#ifdef DEBUG_DRS
+        printf("generate_drs_pusch: doing RB %d, re_offset=%d, drs_offset=%d,cyclic shift %d\n",rb,re_offset,drs_offset,cyclic_shift);
+#endif
+
+        for (k=0; k<12; k++) {
+          ref_re = (int32_t) ul_ref_sigs[u][v][Msc_RS_idx][drs_offset<<1];
+          ref_im = (int32_t) ul_ref_sigs[u][v][Msc_RS_idx][(drs_offset<<1)+1];
+
+          ((int16_t*) txdataF)[2*(symbol_offset + re_offset)]   = (int16_t) (((ref_re*alpha_re[alpha_ind]) -
+              (ref_im*alpha_im[alpha_ind]))>>15);
+          ((int16_t*) txdataF)[2*(symbol_offset + re_offset)+1] = (int16_t) (((ref_re*alpha_im[alpha_ind]) +
+              (ref_im*alpha_re[alpha_ind]))>>15);
+          ((short*) txdataF)[2*(symbol_offset + re_offset)]   = (short) ((((short*) txdataF)[2*(symbol_offset + re_offset)]*(int32_t)amp)>>15);
+          ((short*) txdataF)[2*(symbol_offset + re_offset)+1] = (short) ((((short*) txdataF)[2*(symbol_offset + re_offset)+1]*(int32_t)amp)>>15);
+
+
+          alpha_ind = (alpha_ind + cyclic_shift);
+
+          if (alpha_ind > 11)
+            alpha_ind-=12;
+
+#ifdef DEBUG_DRS
+          printf("symbol_offset %d, alpha_ind %d , re_offset %d : (%d,%d)\n",
+              symbol_offset,
+              alpha_ind,
+              re_offset,
+              ((short*) txdataF)[2*(symbol_offset + re_offset)],
+              ((short*) txdataF)[2*(symbol_offset + re_offset)+1]);
+
+#endif  // DEBUG_DRS
+          re_offset++;
+          drs_offset++;
+
+          if (re_offset >= frame_parms->ofdm_symbol_size)
+            re_offset = 0;
+        }
+
+      } else {
+        re_offset+=12; // go to next RB
+
+        // check if we crossed the symbol boundary and skip DC
+
+        if (re_offset >= frame_parms->ofdm_symbol_size) {
+          if (frame_parms->N_RB_DL&1)  // odd number of RBs
+            re_offset=6;
+          else                         // even number of RBs (doesn't straddle DC)
+            re_offset=0;
+        }
+
+
+      }
+    }
+  }
+
+  return(0);
+}
+
diff --git a/openair1/PHY/LTE_TRANSPORT/ulsch_coding.c b/openair1/PHY/LTE_TRANSPORT/ulsch_coding.c
new file mode 100644
index 0000000000000000000000000000000000000000..09420f48ab1a7029fdbbb2015fd7f28f47ffcd8c
--- /dev/null
+++ b/openair1/PHY/LTE_TRANSPORT/ulsch_coding.c
@@ -0,0 +1,933 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+/*! \file PHY/LTE_TRANSPORT/ulsch_coding.c
+* \brief Top-level routines for coding the ULSCH transport channel as described in 36.212 V8.6 2009-03
+* \author R. Knopp
+* \date 2011
+* \version 0.1
+* \company Eurecom
+* \email: knopp@eurecom.fr
+* \note
+* \warning
+*/
+
+#include "PHY/defs.h"
+#include "PHY/extern.h"
+
+#include "PHY/CODING/defs.h"
+#include "PHY/CODING/extern.h"
+#include "PHY/CODING/lte_interleaver_inline.h"
+#include "PHY/LTE_TRANSPORT/defs.h"
+#include "defs.h"
+#include "extern.h"
+#include "SIMULATION/ETH_TRANSPORT/extern.h"
+#include "UTIL/LOG/vcd_signal_dumper.h"
+
+//#define DEBUG_ULSCH_CODING
+//#define DEBUG_ULSCH_FREE 1
+
+/*
+#define is_not_pilot(pilots,first_pilot,re) (pilots==0) || \
+  ((pilots==1)&&(first_pilot==1)&&(((re>2)&&(re<6))||((re>8)&&(re<12)))) || \
+  ((pilots==1)&&(first_pilot==0)&&(((re<3))||((re>5)&&(re<9)))) \
+*/
+#define is_not_pilot(pilots,first_pilot,re) (1)
+
+
+
+
+void free_ue_ulsch(LTE_UE_ULSCH_t *ulsch)
+{
+  int i;
+  int r;
+
+  if (ulsch) {
+#ifdef DEBUG_ULSCH_FREE
+    printf("Freeing ulsch %p\n",ulsch);
+#endif
+
+    for (i=0; i<8; i++) {
+      if (ulsch->harq_processes[i]) {
+        if (ulsch->harq_processes[i]->b) {
+          free16(ulsch->harq_processes[i]->b,MAX_ULSCH_PAYLOAD_BYTES);
+          ulsch->harq_processes[i]->b = NULL;
+        }
+        for (r=0; r<MAX_NUM_ULSCH_SEGMENTS; r++) {
+          if (ulsch->harq_processes[i]->c[r]) {
+            free16(ulsch->harq_processes[i]->c[r],((r==0)?8:0) + 3+768);
+            ulsch->harq_processes[i]->c[r] = NULL;
+          }
+        }
+
+        free16(ulsch->harq_processes[i],sizeof(LTE_UL_UE_HARQ_t));
+        ulsch->harq_processes[i] = NULL;
+      }
+    }
+    free16(ulsch,sizeof(LTE_UE_ULSCH_t));
+    ulsch = NULL;
+  }
+
+}
+
+LTE_UE_ULSCH_t *new_ue_ulsch(unsigned char N_RB_UL, uint8_t abstraction_flag)
+{
+
+  LTE_UE_ULSCH_t *ulsch;
+  unsigned char exit_flag = 0,i,j,r;
+  unsigned char bw_scaling =1;
+
+  switch (N_RB_UL) {
+  case 6:
+    bw_scaling =16;
+    break;
+
+  case 25:
+    bw_scaling =4;
+    break;
+
+  case 50:
+    bw_scaling =2;
+    break;
+
+  default:
+    bw_scaling =1;
+    break;
+  }
+
+  ulsch = (LTE_UE_ULSCH_t *)malloc16(sizeof(LTE_UE_ULSCH_t));
+
+  if (ulsch) {
+    memset(ulsch,0,sizeof(LTE_UE_ULSCH_t));
+
+    ulsch->Mlimit = 4;
+
+    for (i=0; i<8; i++) {
+
+      ulsch->harq_processes[i] = (LTE_UL_UE_HARQ_t *)malloc16(sizeof(LTE_UL_UE_HARQ_t));
+
+      //      printf("ulsch->harq_processes[%d] %p\n",i,ulsch->harq_processes[i]);
+      if (ulsch->harq_processes[i]) {
+        memset(ulsch->harq_processes[i], 0, sizeof(LTE_UL_UE_HARQ_t));
+        ulsch->harq_processes[i]->b = (unsigned char*)malloc16(MAX_ULSCH_PAYLOAD_BYTES/bw_scaling);
+
+        if (ulsch->harq_processes[i]->b)
+          memset(ulsch->harq_processes[i]->b,0,MAX_ULSCH_PAYLOAD_BYTES/bw_scaling);
+        else {
+          LOG_E(PHY,"Can't get b\n");
+          exit_flag=1;
+        }
+
+        if (abstraction_flag==0) {
+          for (r=0; r<MAX_NUM_ULSCH_SEGMENTS; r++) {
+            ulsch->harq_processes[i]->c[r] = (unsigned char*)malloc16(((r==0)?8:0) + 3+768);  // account for filler in first segment and CRCs for multiple segment case
+
+            if (ulsch->harq_processes[i]->c[r])
+              memset(ulsch->harq_processes[i]->c[r],0,((r==0)?8:0) + 3+768);
+            else {
+              LOG_E(PHY,"Can't get c\n");
+              exit_flag=2;
+            }
+          }
+        }
+
+        ulsch->harq_processes[i]->subframe_scheduling_flag = 0;
+        ulsch->harq_processes[i]->first_tx = 1;
+      } else {
+        LOG_E(PHY,"Can't get harq_p %d\n",i);
+        exit_flag=3;
+      }
+    }
+
+    if ((abstraction_flag == 0) && (exit_flag==0)) {
+      for (i=0; i<8; i++)
+        for (j=0; j<96; j++)
+          for (r=0; r<MAX_NUM_ULSCH_SEGMENTS; r++)
+            ulsch->harq_processes[i]->d[r][j] = LTE_NULL;
+
+      return(ulsch);
+    } else if (abstraction_flag==1)
+      return(ulsch);
+  }
+
+  LOG_E(PHY,"new_ue_ulsch exit flag, size of  %d ,   %zu\n",exit_flag, sizeof(LTE_UE_ULSCH_t));
+  free_ue_ulsch(ulsch);
+  return(NULL);
+
+
+}
+
+
+uint32_t ulsch_encoding(uint8_t *a,
+                        PHY_VARS_UE *ue,
+                        uint8_t harq_pid,
+                        uint8_t eNB_id,
+                        uint8_t subframe_rx,
+                        uint8_t tmode,
+                        uint8_t control_only_flag,
+                        uint8_t Nbundled)
+{
+
+  time_stats_t *seg_stats=&ue->ulsch_segmentation_stats;
+  time_stats_t *rm_stats=&ue->ulsch_rate_matching_stats;
+  time_stats_t *te_stats=&ue->ulsch_turbo_encoding_stats;
+  time_stats_t *i_stats=&ue->ulsch_interleaving_stats;
+  time_stats_t *m_stats=&ue->ulsch_multiplexing_stats;
+
+  //  uint16_t offset;
+  uint32_t crc=1;
+  uint16_t iind;
+  uint32_t A;
+  uint8_t Q_m=0;
+  uint32_t Kr=0,Kr_bytes,r,r_offset=0;
+  uint8_t y[6*14*1200],*yptr;;
+  uint8_t *columnset;
+  uint32_t sumKr=0;
+  uint32_t Qprime,L,G,Q_CQI=0,Q_RI=0,Q_ACK=0,H=0,Hprime=0,Hpp=0,Cmux=0,Rmux=0,Rmux_prime=0;
+  uint32_t Qprime_ACK=0,Qprime_CQI=0,Qprime_RI=0,len_ACK=0,len_RI=0;
+  //  uint32_t E;
+  uint8_t ack_parity;
+  uint32_t i,q,j,iprime,j2;
+  uint16_t o_RCC;
+  uint8_t o_flip[8];
+  uint32_t wACK_idx;
+  LTE_DL_FRAME_PARMS *frame_parms=&ue->frame_parms;
+  PHY_MEASUREMENTS *meas = &ue->measurements;
+  LTE_UE_ULSCH_t *ulsch=ue->ulsch[eNB_id];
+  LTE_UE_DLSCH_t **dlsch = ue->dlsch[0][eNB_id];
+  uint16_t rnti = 0xffff;
+
+  if (!ulsch) {
+    LOG_E(PHY,"Null ulsch ptr %p\n",ulsch);
+    return(-1);
+  }
+
+  if (harq_pid >= 8) {
+    LOG_E(PHY,"Illegal harq_pid %d\n",harq_pid);
+    return(-1);
+  }
+
+  if (ulsch->harq_processes[harq_pid]->O_ACK > 2) {
+    LOG_E(PHY,"Illegal O_ACK %d\n",ulsch->harq_processes[harq_pid]->O_ACK);
+    return(-1);
+  }
+
+  if (ulsch->O_RI > 1) {
+    LOG_E(PHY,"Illegal O_RI %d\n",ulsch->O_RI);
+    return(-1);
+  }
+
+  VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_IN);
+
+  // fill CQI/PMI information
+  if (ulsch->O>0) {
+    VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING_FILL_CQI, VCD_FUNCTION_IN);
+    rnti = ue->pdcch_vars[ue->current_thread_id[subframe_rx]][eNB_id]->crnti;
+    fill_CQI(ulsch,meas,0,harq_pid,ue->frame_parms.N_RB_DL,rnti, tmode,ue->sinr_eff);
+
+    LOG_D(PHY,"ULSCH Encoding rnti %x \n", rnti);
+    print_CQI(ulsch->o,ulsch->uci_format,0,ue->frame_parms.N_RB_DL);
+
+    // save PUSCH pmi for later (transmission modes 4,5,6)
+    if (dlsch[0]) {
+      //LOG_I(PHY,"XXX saving pmi for DL %x\n",pmi2hex_2Ar1(((wideband_cqi_rank1_2A_5MHz *)ulsch->o)->pmi));
+      dlsch[0]->pmi_alloc = ((wideband_cqi_rank1_2A_5MHz *)ulsch->o)->pmi;
+    }
+    VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING_FILL_CQI, VCD_FUNCTION_OUT);
+  }
+
+  if (ulsch->O<=32) {
+    o_flip[0] = ulsch->o[3];
+    o_flip[1] = ulsch->o[2];
+    o_flip[2] = ulsch->o[1];
+    o_flip[3] = ulsch->o[0];
+  } else {
+    o_flip[0] = ulsch->o[7];
+    o_flip[1] = ulsch->o[6];
+    o_flip[2] = ulsch->o[5];
+    o_flip[3] = ulsch->o[4];
+    o_flip[4] = ulsch->o[3];
+    o_flip[5] = ulsch->o[2];
+    o_flip[6] = ulsch->o[1];
+    o_flip[7] = ulsch->o[0];
+  }
+
+  if (control_only_flag == 0) {
+    A=ulsch->harq_processes[harq_pid]->TBS;
+    Q_m = get_Qm_ul(ulsch->harq_processes[harq_pid]->mcs);
+
+    ulsch->harq_processes[harq_pid]->control_only = 0;
+
+#ifdef DEBUG_ULSCH_CODING
+    printf("[PHY][UE] ULSCH coding : A %d, Qm %d, mcs %d, harq_pid %d, round %d, RV %d\n",
+        ulsch->harq_processes[harq_pid]->TBS,
+        Q_m,
+        ulsch->harq_processes[harq_pid]->mcs,
+        harq_pid,
+        ulsch->harq_processes[harq_pid]->round,
+        ulsch->harq_processes[harq_pid]->rvidx);
+
+    for (i=0; i<ulsch->harq_processes[harq_pid]->O_ACK; i++)
+      printf("ulsch_coding: o_ACK[%d] %d\n",i,ulsch->o_ACK[i]);
+
+    for (i=0; i<ulsch->O_RI; i++)
+      printf("ulsch_coding: o_RI[%d] %d\n",i,ulsch->o_RI[i]);
+
+    printf("ulsch_coding: O=%d\n",ulsch->O);
+
+    for (i=0; i<1+((8+ulsch->O)/8); i++) {
+      //    ulsch->o[i] = i;
+      printf("ulsch_coding: O[%d] %d\n",i,ulsch->o[i]);
+    }
+
+    if ((tmode != 4))
+      print_CQI(ulsch->o,wideband_cqi_rank1_2A,0,ue->frame_parms.N_RB_DL);
+    else
+      print_CQI(ulsch->o,HLC_subband_cqi_rank1_2A,0,ue->frame_parms.N_RB_DL);
+
+#endif
+
+    if (ulsch->harq_processes[harq_pid]->round == 0) {  // this is a new packet
+
+      start_meas(seg_stats);
+      // Add 24-bit crc (polynomial A) to payload
+      crc = crc24a(a,
+                   A)>>8;
+
+      a[A>>3] = ((uint8_t*)&crc)[2];
+      a[1+(A>>3)] = ((uint8_t*)&crc)[1];
+      a[2+(A>>3)] = ((uint8_t*)&crc)[0];
+
+      ulsch->harq_processes[harq_pid]->B = A+24;
+      ulsch->harq_processes[harq_pid]->b = a;
+      lte_segmentation(ulsch->harq_processes[harq_pid]->b,
+                       ulsch->harq_processes[harq_pid]->c,
+                       ulsch->harq_processes[harq_pid]->B,
+                       &ulsch->harq_processes[harq_pid]->C,
+                       &ulsch->harq_processes[harq_pid]->Cplus,
+                       &ulsch->harq_processes[harq_pid]->Cminus,
+                       &ulsch->harq_processes[harq_pid]->Kplus,
+                       &ulsch->harq_processes[harq_pid]->Kminus,
+                       &ulsch->harq_processes[harq_pid]->F);
+
+      stop_meas(seg_stats);
+
+      for (r=0; r<ulsch->harq_processes[harq_pid]->C; r++) {
+        if (r<ulsch->harq_processes[harq_pid]->Cminus)
+          Kr = ulsch->harq_processes[harq_pid]->Kminus;
+        else
+          Kr = ulsch->harq_processes[harq_pid]->Kplus;
+
+        Kr_bytes = Kr>>3;
+
+        // get interleaver index for Turbo code (lookup in Table 5.1.3-3 36-212, V8.6 2009-03, p. 13-14)
+        if (Kr_bytes<=64)
+          iind = (Kr_bytes-5);
+        else if (Kr_bytes <=128)
+          iind = 59 + ((Kr_bytes-64)>>1);
+        else if (Kr_bytes <= 256)
+          iind = 91 + ((Kr_bytes-128)>>2);
+        else if (Kr_bytes <= 768)
+          iind = 123 + ((Kr_bytes-256)>>3);
+        else {
+          LOG_E(PHY,"ulsch_coding: Illegal codeword size %d!!!\n",Kr_bytes);
+          VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT);
+          return(-1);
+        }
+
+
+#ifdef DEBUG_ULSCH_CODING
+        printf("Generating Code Segment %d (%d bits)\n",r,Kr);
+        // generate codewords
+
+        printf("bits_per_codeword (Kr)= %d\n",Kr);
+        printf("N_RB = %d\n",ulsch->harq_processes[harq_pid]->nb_rb);
+        printf("Ncp %d\n",frame_parms->Ncp);
+        printf("Qm %d\n",Q_m);
+#endif
+
+        //  offset=0;
+
+
+#ifdef DEBUG_ULSCH_CODING
+        printf("Encoding ... iind %d f1 %d, f2 %d\n",iind,f1f2mat_old[iind*2],f1f2mat_old[(iind*2)+1]);
+#endif
+        start_meas(te_stats);
+        encoder(ulsch->harq_processes[harq_pid]->c[r],
+        	Kr>>3,
+        	&ulsch->harq_processes[harq_pid]->d[r][96],
+        	(r==0) ? ulsch->harq_processes[harq_pid]->F : 0,
+        	f1f2mat_old[iind*2],   // f1 (see 36212-820, page 14)
+        	f1f2mat_old[(iind*2)+1]  // f2 (see 36212-820, page 14)
+               );
+        stop_meas(te_stats);
+#ifdef DEBUG_ULSCH_CODING
+
+        if (r==0)
+          write_output("enc_output0.m","enc0",&ulsch->harq_processes[harq_pid]->d[r][96],(3*8*Kr_bytes)+12,1,4);
+
+#endif
+        start_meas(i_stats);
+        ulsch->harq_processes[harq_pid]->RTC[r] =
+          sub_block_interleaving_turbo(4+(Kr_bytes*8),
+                                       &ulsch->harq_processes[harq_pid]->d[r][96],
+                                       ulsch->harq_processes[harq_pid]->w[r]);
+        stop_meas(i_stats);
+      }
+
+    }
+
+    if (ulsch->harq_processes[harq_pid]->C == 0) {
+      LOG_E(PHY,"null segment\n");
+      VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT);
+      return(-1);
+    }
+
+    sumKr = 0;
+
+    for (r=0; r<ulsch->harq_processes[harq_pid]->C; r++) {
+      if (r<ulsch->harq_processes[harq_pid]->Cminus)
+        Kr = ulsch->harq_processes[harq_pid]->Kminus;
+      else
+        Kr = ulsch->harq_processes[harq_pid]->Kplus;
+
+      sumKr += Kr;
+    }
+  } else { // This is a control-only PUSCH, set sumKr to O_CQI-MIN
+    ulsch->harq_processes[harq_pid]->control_only = 1;
+    sumKr = ulsch->O_CQI_MIN;
+  }
+
+  ulsch->harq_processes[harq_pid]->sumKr = sumKr;
+  // Compute Q_ri (p. 23 36-212)
+
+  Qprime = ulsch->O_RI*ulsch->harq_processes[harq_pid]->Msc_initial*ulsch->harq_processes[harq_pid]->Nsymb_initial * ulsch->beta_offset_ri_times8;
+
+  if (Qprime > 0) {
+    if ((Qprime % (8*sumKr)) > 0)
+      Qprime = 1+(Qprime/(8*sumKr));
+    else
+      Qprime = Qprime/(8*sumKr);
+
+    if (Qprime > 4*ulsch->harq_processes[harq_pid]->nb_rb * 12)
+      Qprime = 4*ulsch->harq_processes[harq_pid]->nb_rb * 12;
+  }
+
+  Q_RI = Q_m*Qprime;
+  Qprime_RI = Qprime;
+
+  // Compute Q_ack (p. 23 36-212)
+  Qprime = ulsch->harq_processes[harq_pid]->O_ACK*ulsch->harq_processes[harq_pid]->Msc_initial*ulsch->harq_processes[harq_pid]->Nsymb_initial * ulsch->beta_offset_harqack_times8;
+
+  if (Qprime > 0) {
+    if ((Qprime % (8*sumKr)) > 0)
+      Qprime = 1+(Qprime/(8*sumKr));
+    else
+      Qprime = Qprime/(8*sumKr);
+
+    if (Qprime > 4*ulsch->harq_processes[harq_pid]->nb_rb * 12)
+      Qprime = 4*ulsch->harq_processes[harq_pid]->nb_rb * 12;
+  }
+
+  Q_ACK = Qprime * Q_m;
+  Qprime_ACK = Qprime;
+
+  LOG_D(PHY,"UE (%x/%d) O_ACK %d, Mcs_initial %d, Nsymb_initial %d, beta_offset_harqack*8 %d, sum Kr %d, Qprime_ACK %d, Q_ACK %d\n",
+      rnti, harq_pid,
+      ulsch->harq_processes[harq_pid]->O_ACK,
+      ulsch->harq_processes[harq_pid]->Msc_initial,
+      ulsch->harq_processes[harq_pid]->Nsymb_initial,
+      ulsch->beta_offset_harqack_times8,
+      sumKr,
+      Qprime_ACK,
+      Q_ACK);
+
+  // Compute Q_cqi, assume O>11, p. 26 36-212
+  if (control_only_flag == 0) {
+
+    if (ulsch->O < 12)
+      L=0;
+    else
+      L=8;
+
+    if (ulsch->O > 0)
+      Qprime = (ulsch->O + L) * ulsch->harq_processes[harq_pid]->Msc_initial*ulsch->harq_processes[harq_pid]->Nsymb_initial * ulsch->beta_offset_cqi_times8;
+    else
+      Qprime = 0;
+
+    if (Qprime > 0) {
+      if ((Qprime % (8*sumKr)) > 0)
+        Qprime = 1+(Qprime/(8*sumKr));
+      else
+        Qprime = Qprime/(8*sumKr);
+    }
+
+    G = ulsch->harq_processes[harq_pid]->nb_rb * (12 * Q_m) * (ulsch->Nsymb_pusch);
+
+    if (Qprime > (G - ulsch->O_RI))
+      Qprime = G - ulsch->O_RI;
+
+    Q_CQI = Q_m * Qprime;
+    Qprime_CQI = Qprime;
+
+
+
+    G = G - Q_RI - Q_CQI;
+    ulsch->harq_processes[harq_pid]->G = G;
+
+/*
+    LOG_I(PHY,"ULSCH Encoding G %d, Q_RI %d (O_RI%d, Msc_initial %d, Nsymb_initial%d, beta_offset_ri_times8 %d), Q_CQI %d, Q_ACK %d \n",G,Q_RI,ulsch->O_RI,ulsch->harq_processes[harq_pid]->Msc_initial,ulsch->harq_processes[harq_pid]->Nsymb_initial,ulsch->beta_offset_ri_times8,Q_CQI,Q_ACK);
+
+    LOG_I(PHY,"ULSCH Encoding (Nid_cell %d, rnti %x): harq_pid %d round %d, RV %d, mcs %d, O_RI %d, O_ACK %d, G %d\n",
+          frame_parms->Nid_cell,ulsch->rnti,
+          harq_pid,
+          ulsch->harq_processes[harq_pid]->round,
+          ulsch->harq_processes[harq_pid]->rvidx,
+          ulsch->harq_processes[harq_pid]->mcs,
+          ulsch->O_RI,
+          ulsch->harq_processes[harq_pid]->O_ACK,
+          G);
+*/
+
+    if ((int)G < 0) {
+      LOG_E(PHY,"FATAL: ulsch_coding.c G < 0 (%d) : Q_RI %d, Q_CQI %d, O %d, betaCQI_times8 %d)\n",G,Q_RI,Q_CQI,ulsch->O,ulsch->beta_offset_cqi_times8);
+      VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT);
+      return(-1);
+    }
+
+
+    // Data and control multiplexing (5.2.2.7 36-212)
+
+    H = G + Q_CQI;
+    Hprime = H/Q_m;
+
+
+
+    // Fill in the "e"-sequence from 36-212, V8.6 2009-03, p. 16-17 (for each "e") and concatenate the
+    // outputs for each code segment, see Section 5.1.5 p.20
+
+    for (r=0; r<ulsch->harq_processes[harq_pid]->C; r++) {
+#ifdef DEBUG_ULSCH_CODING
+      printf("Rate Matching, Code segment %d (coded bits (G) %d,unpunctured/repeated bits per code segment %d,mod_order %d, nb_rb %d)...\n",
+          r,
+          G,
+          Kr*3,
+          Q_m,ulsch->harq_processes[harq_pid]->nb_rb);
+#endif
+
+      start_meas(rm_stats);
+      r_offset += lte_rate_matching_turbo(ulsch->harq_processes[harq_pid]->RTC[r],
+                                          G,
+                                          ulsch->harq_processes[harq_pid]->w[r],
+                                          ulsch->e+r_offset,
+                                          ulsch->harq_processes[harq_pid]->C, // C
+                                          NSOFT,                    // Nsoft,
+                                          0,  // this means UL
+                                          1,
+                                          ulsch->harq_processes[harq_pid]->rvidx,
+                                          get_Qm_ul(ulsch->harq_processes[harq_pid]->mcs),
+                                          1,
+                                          r,
+                                          ulsch->harq_processes[harq_pid]->nb_rb);
+                                          //ulsch->harq_processes[harq_pid]->mcs);                       // r
+      stop_meas(rm_stats);
+#ifdef DEBUG_ULSCH_CODING
+
+      if (r==ulsch->harq_processes[harq_pid]->C-1)
+        write_output("enc_output.m","enc",ulsch->e,r_offset,1,4);
+
+#endif
+    }
+  } else { //control-only PUSCH
+    Q_CQI = (ulsch->harq_processes[harq_pid]->nb_rb * (12 * Q_m) * (ulsch->Nsymb_pusch)) - Q_RI;
+    H = Q_CQI;
+    Hprime = H/Q_m;
+  }
+
+
+  //  Do CQI coding
+  if ((ulsch->O>1) && (ulsch->O < 12)) {
+    LOG_E(PHY,"short CQI sizes not supported yet\n");
+    VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT);
+    return(-1);
+  } else {
+    // add 8-bit CRC
+    crc = crc8(o_flip,
+               ulsch->O)>>24;
+#ifdef DEBUG_ULSCH_CODING
+    printf("crc(cqi) tx : %x\n",crc);
+#endif
+    memset((void *)&ulsch->o_d[0],LTE_NULL,96);
+
+    ccodelte_encode(ulsch->O,
+                    1,
+                    o_flip,
+                    &ulsch->o_d[96],
+                    0);
+
+
+    o_RCC = sub_block_interleaving_cc(8+ulsch->O,
+                                      &ulsch->o_d[96],
+                                      ulsch->o_w);
+
+    lte_rate_matching_cc(o_RCC,
+                         Q_CQI,
+                         ulsch->o_w,
+                         ulsch->q);
+
+  }
+
+  i=0;
+
+  //  Do RI coding
+  if (ulsch->O_RI == 1) {
+    switch (Q_m) {
+    case 2:
+      ulsch->q_RI[0] = ulsch->o_RI[0];
+      ulsch->q_RI[1] = PUSCH_y;//ulsch->o_RI[0];
+      len_RI=2;
+      break;
+
+    case 4:
+      ulsch->q_RI[0] = ulsch->o_RI[0];
+      ulsch->q_RI[1] = PUSCH_y;//1;
+      ulsch->q_RI[2] = PUSCH_x;//ulsch->o_RI[0];
+      ulsch->q_RI[3] = PUSCH_x;//1;
+      len_RI=4;
+      break;
+
+    case 6:
+      ulsch->q_RI[0] = ulsch->o_RI[0];
+      ulsch->q_RI[1] = PUSCH_y;//1;
+      ulsch->q_RI[2] = PUSCH_x;//1;
+      ulsch->q_RI[3] = PUSCH_x;//ulsch->o_RI[0];
+      ulsch->q_RI[4] = PUSCH_x;//1;
+      ulsch->q_RI[5] = PUSCH_x;//1;
+      len_RI=6;
+      break;
+    }
+  } else if (ulsch->O_RI>1) {
+    LOG_E(PHY,"RI cannot be more than 1 bit yet\n");
+    VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT);
+    return(-1);
+  }
+
+  //  Do ACK coding, Section 5.2.2.6 36.213 (p.23-24 in v8.6)
+  wACK_idx = (ulsch->bundling==0) ? 4 : ((Nbundled-1)&3);
+#ifdef DEBUG_ULSCH_CODING
+  printf("ulsch_coding.c: Bundling %d, Nbundled %d, wACK_idx %d\n",
+      ulsch->bundling,Nbundled,wACK_idx);
+#endif
+
+  // 1-bit ACK/NAK
+  if (ulsch->harq_processes[harq_pid]->O_ACK == 1) {
+    switch (Q_m) {
+    case 2:
+      ulsch->q_ACK[0] = (ulsch->o_ACK[0]+wACK[wACK_idx][0])&1;
+      ulsch->q_ACK[1] = (ulsch->bundling==0)? PUSCH_y : ((ulsch->o_ACK[0]+wACK[wACK_idx][1])&1);//ulsch->o_ACK[0];
+      len_ACK = 2;
+      break;
+
+    case 4:
+      ulsch->q_ACK[0] = (ulsch->o_ACK[0]+wACK[wACK_idx][0])&1;
+      ulsch->q_ACK[1] = (ulsch->bundling==0)? PUSCH_y : ((ulsch->o_ACK[0]+wACK[wACK_idx][1])&1);
+      ulsch->q_ACK[2] = PUSCH_x;
+      ulsch->q_ACK[3] = PUSCH_x;
+      len_ACK = 4;
+      break;
+
+    case 6:
+      ulsch->q_ACK[0] = (ulsch->o_ACK[0]+wACK[wACK_idx][0])&1;
+      ulsch->q_ACK[1] = (ulsch->bundling==0)? PUSCH_y : ((ulsch->o_ACK[0]+wACK[wACK_idx][1])&1);
+      ulsch->q_ACK[2] = PUSCH_x;
+      ulsch->q_ACK[3] = PUSCH_x;
+      ulsch->q_ACK[4] = PUSCH_x;
+      ulsch->q_ACK[6] = PUSCH_x;
+      len_ACK = 6;
+      break;
+    }
+  }
+
+  // two-bit ACK/NAK
+  if (ulsch->harq_processes[harq_pid]->O_ACK == 2) {
+    ack_parity = (ulsch->o_ACK[0]+ulsch->o_ACK[1])&1;
+
+    switch (Q_m) {
+    case 2:
+      ulsch->q_ACK[0] = (ulsch->o_ACK[0]+wACK[wACK_idx][0])&1;
+      ulsch->q_ACK[1] = (ulsch->o_ACK[1]+wACK[wACK_idx][0])&1;
+      ulsch->q_ACK[2] = (ack_parity+wACK[wACK_idx][0])&1;
+      ulsch->q_ACK[3] = (ulsch->o_ACK[0]+wACK[wACK_idx][1])&1;
+      ulsch->q_ACK[4] = (ulsch->o_ACK[1]+wACK[wACK_idx][1])&1;
+      ulsch->q_ACK[5] = (ack_parity+wACK[wACK_idx][1])&1;
+      len_ACK = 6;
+      break;
+
+    case 4:
+      ulsch->q_ACK[0]  = (ulsch->o_ACK[0]+wACK[wACK_idx][0])&1;
+      ulsch->q_ACK[1]  = (ulsch->o_ACK[1]+wACK[wACK_idx][0])&1;
+      ulsch->q_ACK[2]  = PUSCH_x;
+      ulsch->q_ACK[3]  = PUSCH_x;//1;
+      ulsch->q_ACK[4]  = (ack_parity+wACK[wACK_idx][0])&1;
+      ulsch->q_ACK[5]  = (ulsch->o_ACK[0]+wACK[wACK_idx][1])&1;
+      ulsch->q_ACK[6]  = PUSCH_x;
+      ulsch->q_ACK[7]  = PUSCH_x;//1;
+      ulsch->q_ACK[8]  = (ulsch->o_ACK[1]+wACK[wACK_idx][1])&1;
+      ulsch->q_ACK[9]  = (ack_parity+wACK[wACK_idx][1])&1;
+      ulsch->q_ACK[10] = PUSCH_x;
+      ulsch->q_ACK[11] = PUSCH_x;//1;
+      len_ACK = 12;
+      break;
+
+    case 6:
+      ulsch->q_ACK[0] = (ulsch->o_ACK[0]+wACK[wACK_idx][0])&1;
+      ulsch->q_ACK[1] = (ulsch->o_ACK[1]+wACK[wACK_idx][0])&1;
+      ulsch->q_ACK[2] = PUSCH_x;
+      ulsch->q_ACK[3] = PUSCH_x;
+      ulsch->q_ACK[4] = PUSCH_x;
+      ulsch->q_ACK[5] = PUSCH_x;
+
+      ulsch->q_ACK[6] = (ack_parity+wACK[wACK_idx][0])&1;
+      ulsch->q_ACK[7] = (ulsch->o_ACK[0]+wACK[wACK_idx][1])&1;
+      ulsch->q_ACK[8] = PUSCH_x;
+      ulsch->q_ACK[9] = PUSCH_x;
+      ulsch->q_ACK[10] = PUSCH_x;
+      ulsch->q_ACK[11] = PUSCH_x;
+
+      ulsch->q_ACK[12] = (ulsch->o_ACK[1]+wACK[wACK_idx][1])&1;
+      ulsch->q_ACK[13] = (ack_parity+wACK[wACK_idx][1])&1;
+      ulsch->q_ACK[14] = PUSCH_x;
+      ulsch->q_ACK[15] = PUSCH_x;
+      ulsch->q_ACK[16] = PUSCH_x;
+      ulsch->q_ACK[17] = PUSCH_x;
+      len_ACK = 18;
+
+      break;
+    }
+  }
+
+  if (ulsch->harq_processes[harq_pid]->O_ACK > 2) {
+    LOG_E(PHY,"ACK cannot be more than 2 bits yet\n");
+    VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT);
+    return(-1);
+  }
+
+
+  // channel multiplexing/interleaving
+
+  start_meas(m_stats);
+  Hpp = Hprime + Q_RI;
+
+  Cmux       = ulsch->Nsymb_pusch;
+  Rmux       = Hpp*Q_m/Cmux;
+  Rmux_prime = Rmux/Q_m;
+
+  Qprime_RI  = Q_RI / Q_m;
+  Qprime_ACK = Q_ACK / Q_m;
+  Qprime_CQI = Q_CQI / Q_m;
+
+  //  printf("Qprime_CQI = %d\n",Qprime_CQI);
+  // RI BITS
+
+  memset(y,LTE_NULL,Q_m*Hpp);
+
+  if (frame_parms->Ncp == 0)
+    columnset = cs_ri_normal;
+  else
+    columnset = cs_ri_extended;
+
+  j=0;
+
+  for (i=0; i<Qprime_RI; i++) {
+    r = Rmux_prime - 1 - (i>>2);
+
+    for (q=0; q<Q_m; q++)  {
+      y[q+(Q_m*((r*Cmux) + columnset[j]))]  = ulsch->q_RI[(q+(Q_m*i))%len_RI];
+      //      printf("ri[%d] %d => y[%d]\n",q+(Q_m*i)%len_RI,ulsch->q_RI[(q+(Q_m*i))%len_RI],q+(Q_m*((r*Cmux) + columnset[j])),y[q+(Q_m*((r*Cmux) + columnset[j]))]);
+    }
+
+    j=(j+3)&3;
+
+  }
+
+
+  // CQI and Data bits
+  j=0;
+  /*
+  for (i=0,iprime=-Qprime_CQI;i<Hprime;i++,iprime++) {
+
+    while (y[Q_m*j] != LTE_NULL) j++;
+
+    if (i<Qprime_CQI) {
+      for (q=0;q<Q_m;q++) {
+  y[q+(Q_m*j)] = ulsch->q[q+(Q_m*i)];
+  //printf("cqi[%d] %d => y[%d]\n",q+(Q_m*i),ulsch->q[q+(Q_m*i)],q+(Q_m*j));
+      }
+    }
+    else {
+      for (q=0;q<Q_m;q++) {
+  y[q+(Q_m*j)] = ulsch->e[q+(Q_m*iprime)];
+  //  printf("e[%d] %d => y[%d]\n",q+(Q_m*iprime),ulsch->e[q+(Q_m*iprime)],q+(Q_m*j));
+      }
+    }
+    j++;
+  }
+  */
+
+  for (i=0; i<Qprime_CQI; i++) {
+
+    while (y[Q_m*j] != LTE_NULL) j++;
+
+    for (q=0; q<Q_m; q++) {
+      y[q+(Q_m*j)] = ulsch->q[q+(Q_m*i)];
+      //        printf("cqi[%d] %d => y[%d] (j %d)\n",q+(Q_m*i),ulsch->q[q+(Q_m*i)],q+(Q_m*j),j);
+    }
+
+    j++;
+  }
+
+  j2 = j*Q_m;
+
+  switch (Q_m) {
+
+  case 2:
+
+    for (iprime=0; iprime<(Hprime-Qprime_CQI)<<1; iprime+=2) {
+      while (y[j2] != LTE_NULL) j2+=2;
+
+      y[j2]   = ulsch->e[iprime];
+      y[1+j2] = ulsch->e[1+iprime];
+      j2+=2;
+    }
+
+    break;
+
+  case 4:
+    for (iprime=0; iprime<(Hprime-Qprime_CQI)<<2; iprime+=4) {
+      while (y[j2] != LTE_NULL) j2+=4;
+
+      y[j2]   = ulsch->e[iprime];
+      y[1+j2] = ulsch->e[1+iprime];
+      y[2+j2] = ulsch->e[2+iprime];
+      y[3+j2] = ulsch->e[3+iprime];
+      j2+=4;
+    }
+
+    break;
+
+  case 6:
+    for (iprime=0; iprime<(Hprime-Qprime_CQI)*6; iprime+=6) {
+      while (y[j2] != LTE_NULL) j2+=6;
+
+      y[j2]   = ulsch->e[iprime];
+      y[1+j2] = ulsch->e[1+iprime];
+      y[2+j2] = ulsch->e[2+iprime];
+      y[3+j2] = ulsch->e[3+iprime];
+      y[4+j2] = ulsch->e[4+iprime];
+      y[5+j2] = ulsch->e[5+iprime];
+      j2+=6;
+    }
+
+    break;
+
+  }
+
+  // HARQ-ACK Bits (Note these overwrite some bits)
+
+  if (frame_parms->Ncp == 0)
+    columnset = cs_ack_normal;
+  else
+    columnset = cs_ack_extended;
+
+  j=0;
+
+  for (i=0; i<Qprime_ACK; i++) {
+    r = Rmux_prime - 1 - (i>>2);
+
+    for (q=0; q<Q_m; q++) {
+      y[q+(Q_m*((r*Cmux) + columnset[j]))]  = ulsch->q_ACK[(q+(Q_m*i))%len_ACK];
+#ifdef DEBUG_ULSCH_CODING
+      printf("ulsch_coding.c: ACK %d => y[%d]=%d (i %d, r*Cmux %d, columnset %d)\n",q+(Q_m*i),
+          q+(Q_m*((r*Cmux) + columnset[j])),ulsch->q_ACK[(q+(Q_m*i))%len_ACK],
+          i,r*Cmux,columnset[j]);
+#endif
+    }
+
+    j=(j+3)&3;
+
+  }
+
+  // write out buffer
+  j=0;
+
+  switch (Q_m) {
+  case 2:
+    for (i=0; i<Cmux; i++)
+      for (r=0; r<Rmux_prime; r++) {
+        yptr=&y[((r*Cmux)+i)<<1];
+        ulsch->h[j++] = *yptr++;
+        ulsch->h[j++] = *yptr++;
+      }
+
+    break;
+
+  case 4:
+    for (i=0; i<Cmux; i++)
+      for (r=0; r<Rmux_prime; r++) {
+        yptr = &y[((r*Cmux)+i)<<2];
+        ulsch->h[j++] = *yptr++;
+        ulsch->h[j++] = *yptr++;
+        ulsch->h[j++] = *yptr++;
+        ulsch->h[j++] = *yptr++;
+      }
+
+    break;
+
+  case 6:
+    for (i=0; i<Cmux; i++)
+      for (r=0; r<Rmux_prime; r++) {
+        yptr = &y[((r*Cmux)+i)*6];
+        ulsch->h[j++] = *yptr++;
+        ulsch->h[j++] = *yptr++;
+        ulsch->h[j++] = *yptr++;
+        ulsch->h[j++] = *yptr++;
+        ulsch->h[j++] = *yptr++;
+        ulsch->h[j++] = *yptr++;
+      }
+
+    break;
+
+  default:
+    break;
+  }
+
+  stop_meas(m_stats);
+
+  if (j!=(H+Q_RI)) {
+    LOG_E(PHY,"Error in output buffer length (j %d, H+Q_RI %d)\n",j,H+Q_RI);
+    VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT);
+    return(-1);
+  }
+
+  VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT);
+  return(0);
+}
+
diff --git a/openair1/PHY/LTE_TRANSPORT/ulsch_modulation.c b/openair1/PHY/LTE_TRANSPORT/ulsch_modulation.c
new file mode 100644
index 0000000000000000000000000000000000000000..d1718f6e90f600927f5d93f3a18e93213bea86f8
--- /dev/null
+++ b/openair1/PHY/LTE_TRANSPORT/ulsch_modulation.c
@@ -0,0 +1,777 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+/*! \file PHY/LTE_TRANSPORT/ulsch_modulation.c
+* \brief Top-level routines for generating PUSCH physical channel from 36.211 V8.6 2009-03
+* \author R. Knopp, F. Kaltenberger, A. Bhamri
+* \date 2011
+* \version 0.1
+* \company Eurecom
+* \email: knopp@eurecom.fr,florian.kaltenberger@eurecom.fr,ankit.bhamri@eurecom.fr
+* \note
+* \warning
+*/
+#include "PHY/defs.h"
+#include "PHY/extern.h"
+#include "PHY/CODING/defs.h"
+#include "PHY/CODING/extern.h"
+#include "PHY/LTE_TRANSPORT/defs.h"
+#include "defs.h"
+#include "UTIL/LOG/vcd_signal_dumper.h"
+
+
+
+//#define DEBUG_ULSCH_MODULATION
+
+#ifndef OFDMA_ULSCH
+void dft_lte(int32_t *z,int32_t *d, int32_t Msc_PUSCH, uint8_t Nsymb)
+{
+
+#if defined(__x86_64__) || defined(__i386__)
+  __m128i dft_in128[4][1200],dft_out128[4][1200];
+#elif defined(__arm__)
+  int16x8_t dft_in128[4][1200],dft_out128[4][1200];
+#endif
+  uint32_t *dft_in0=(uint32_t*)dft_in128[0],*dft_out0=(uint32_t*)dft_out128[0];
+  uint32_t *dft_in1=(uint32_t*)dft_in128[1],*dft_out1=(uint32_t*)dft_out128[1];
+  uint32_t *dft_in2=(uint32_t*)dft_in128[2],*dft_out2=(uint32_t*)dft_out128[2];
+  //  uint32_t *dft_in3=(uint32_t*)dft_in128[3],*dft_out3=(uint32_t*)dft_out128[3];
+
+  uint32_t *d0,*d1,*d2,*d3,*d4,*d5,*d6,*d7,*d8,*d9,*d10,*d11;
+
+  uint32_t *z0,*z1,*z2,*z3,*z4,*z5,*z6,*z7,*z8,*z9,*z10,*z11;
+  uint32_t i,ip;
+#if defined(__x86_64__) || defined(__i386__)
+  __m128i norm128;
+#elif defined(__arm__)
+  int16x8_t norm128;
+#endif
+  //  printf("Doing lte_dft for Msc_PUSCH %d\n",Msc_PUSCH);
+
+  d0 = (uint32_t *)d;
+  d1 = d0+Msc_PUSCH;
+  d2 = d1+Msc_PUSCH;
+  d3 = d2+Msc_PUSCH;
+  d4 = d3+Msc_PUSCH;
+  d5 = d4+Msc_PUSCH;
+  d6 = d5+Msc_PUSCH;
+  d7 = d6+Msc_PUSCH;
+  d8 = d7+Msc_PUSCH;
+  d9 = d8+Msc_PUSCH;
+  d10 = d9+Msc_PUSCH;
+  d11 = d10+Msc_PUSCH;
+
+  //  printf("symbol 0 (d0 %p, d %p)\n",d0,d);
+  for (i=0,ip=0; i<Msc_PUSCH; i++,ip+=4) {
+    dft_in0[ip]   =  d0[i];
+    dft_in0[ip+1] =  d1[i];
+    dft_in0[ip+2] =  d2[i];
+    dft_in0[ip+3] =  d3[i];
+    dft_in1[ip]   =  d4[i];
+    dft_in1[ip+1] =  d5[i];
+    dft_in1[ip+2] =  d6[i];
+    dft_in1[ip+3] =  d7[i];
+    dft_in2[ip]   =  d8[i];
+    dft_in2[ip+1] =  d9[i];
+    dft_in2[ip+2] =  d10[i];
+    dft_in2[ip+3] =  d11[i];
+    //    printf("dft%d %d: %d,%d,%d,%d\n",Msc_PUSCH,ip,d0[i],d1[i],d2[i],d3[i]);
+
+    //    dft_in_re2[ip+1] =  d9[i];
+    //    dft_in_re2[ip+2] =  d10[i];
+  }
+
+  //  printf("\n");
+
+  switch (Msc_PUSCH) {
+  case 12:
+    dft12((int16_t *)dft_in0,(int16_t *)dft_out0);
+    dft12((int16_t *)dft_in1,(int16_t *)dft_out1);
+    dft12((int16_t *)dft_in2,(int16_t *)dft_out2);
+
+    /*
+    dft12f(&((__m128i *)dft_in0)[0],&((__m128i *)dft_in0)[1],&((__m128i *)dft_in0)[2],&((__m128i *)dft_in0)[3],&((__m128i *)dft_in0)[4],&((__m128i *)dft_in0)[5],&((__m128i *)dft_in0)[6],&((__m128i *)dft_in0)[7],&((__m128i *)dft_in0)[8],&((__m128i *)dft_in0)[9],&((__m128i *)dft_in0)[10],&((__m128i *)dft_in0)[11],
+    &((__m128i *)dft_out0)[0],&((__m128i *)dft_out0)[1],&((__m128i *)dft_out0)[2],&((__m128i *)dft_out0)[3],&((__m128i *)dft_out0)[4],&((__m128i *)dft_out0)[5],&((__m128i *)dft_out0)[6],&((__m128i *)dft_out0)[7],&((__m128i *)dft_out0)[8],&((__m128i *)dft_out0)[9],&((__m128i *)dft_out0)[10],&((__m128i *)dft_out0)[11]);
+
+    dft12f(&((__m128i *)dft_in1)[0],&((__m128i *)dft_in1)[1],&((__m128i *)dft_in1)[2],&((__m128i *)dft_in1)[3],&((__m128i *)dft_in1)[4],&((__m128i *)dft_in1)[5],&((__m128i *)dft_in1)[6],&((__m128i *)dft_in1)[7],&((__m128i *)dft_in1)[8],&((__m128i *)dft_in1)[9],&((__m128i *)dft_in1)[10],&((__m128i *)dft_in1)[11],
+    &((__m128i *)dft_out1)[0],&((__m128i *)dft_out1)[1],&((__m128i *)dft_out1)[2],&((__m128i *)dft_out1)[3],&((__m128i *)dft_out1)[4],&((__m128i *)dft_out1)[5],&((__m128i *)dft_out1)[6],&((__m128i *)dft_out1)[7],&((__m128i *)dft_out1)[8],&((__m128i *)dft_out1)[9],&((__m128i *)dft_out1)[10],&((__m128i *)dft_out1)[11]);
+
+    dft12f(&((__m128i *)dft_in2)[0],&((__m128i *)dft_in2)[1],&((__m128i *)dft_in2)[2],&((__m128i *)dft_in2)[3],&((__m128i *)dft_in2)[4],&((__m128i *)dft_in2)[5],&((__m128i *)dft_in2)[6],&((__m128i *)dft_in2)[7],&((__m128i *)dft_in2)[8],&((__m128i *)dft_in2)[9],&((__m128i *)dft_in2)[10],&((__m128i *)dft_in2)[11],
+    &((__m128i *)dft_out2)[0],&((__m128i *)dft_out2)[1],&((__m128i *)dft_out2)[2],&((__m128i *)dft_out2)[3],&((__m128i *)dft_out2)[4],&((__m128i *)dft_out2)[5],&((__m128i *)dft_out2)[6],&((__m128i *)dft_out2)[7],&((__m128i *)dft_out2)[8],&((__m128i *)dft_out2)[9],&((__m128i *)dft_out2)[10],&((__m128i *)dft_out2)[11]);
+    */
+#if defined(__x86_64__) || defined(__i386__)
+    norm128 = _mm_set1_epi16(9459);
+#elif defined(__arm__)
+    norm128 = vdupq_n_s16(9459);
+#endif
+    for (i=0; i<12; i++) {
+#if defined(__x86_64__) || defined(__i386__)
+      ((__m128i*)dft_out0)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i*)dft_out0)[i],norm128),1);
+      ((__m128i*)dft_out1)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i*)dft_out1)[i],norm128),1);
+      ((__m128i*)dft_out2)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i*)dft_out2)[i],norm128),1);
+#elif defined(__arm__)
+      ((int16x8_t*)dft_out0)[i] = vqdmulhq_s16(((int16x8_t*)dft_out0)[i],norm128);
+      ((int16x8_t*)dft_out1)[i] = vqdmulhq_s16(((int16x8_t*)dft_out1)[i],norm128);
+      ((int16x8_t*)dft_out2)[i] = vqdmulhq_s16(((int16x8_t*)dft_out2)[i],norm128);
+#endif
+    }
+
+    break;
+
+  case 24:
+    dft24((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft24((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft24((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 36:
+    dft36((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft36((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft36((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 48:
+    dft48((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft48((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft48((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 60:
+    dft60((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft60((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft60((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 72:
+    dft72((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft72((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft72((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 96:
+    dft96((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft96((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft96((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 108:
+    dft108((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft108((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft108((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 120:
+    dft120((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft120((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft120((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 144:
+    dft144((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft144((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft144((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 180:
+    dft180((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft180((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft180((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 192:
+    dft192((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft192((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft192((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 216:
+    dft216((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft216((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft216((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 240:
+    dft240((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft240((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft240((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 288:
+    dft288((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft288((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft288((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 300:
+    dft300((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft300((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft300((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 324:
+    dft324((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft324((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft324((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 360:
+    dft360((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft360((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft360((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 384:
+    dft384((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft384((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft384((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 432:
+    dft432((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft432((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft432((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 480:
+    dft480((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft480((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft480((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 540:
+    dft540((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft540((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft540((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 576:
+    dft576((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft576((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft576((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 600:
+    dft600((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft600((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft600((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 648:
+    dft648((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft648((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft648((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 720:
+    dft720((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft720((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft720((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 864:
+    dft864((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft864((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft864((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 900:
+    dft900((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft900((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft900((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 960:
+    dft960((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft960((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft960((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 972:
+    dft972((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft972((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft972((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 1080:
+    dft1080((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft1080((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft1080((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 1152:
+    dft1152((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft1152((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft1152((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+
+  case 1200:
+    dft1200((int16_t*)dft_in0,(int16_t*)dft_out0,1);
+    dft1200((int16_t*)dft_in1,(int16_t*)dft_out1,1);
+    dft1200((int16_t*)dft_in2,(int16_t*)dft_out2,1);
+    break;
+  }
+
+  z0 = (uint32_t *)z;
+  z1 = z0+Msc_PUSCH;
+  z2 = z1+Msc_PUSCH;
+  z3 = z2+Msc_PUSCH;
+  z4 = z3+Msc_PUSCH;
+  z5 = z4+Msc_PUSCH;
+  z6 = z5+Msc_PUSCH;
+  z7 = z6+Msc_PUSCH;
+  z8 = z7+Msc_PUSCH;
+  z9 = z8+Msc_PUSCH;
+  z10 = z9+Msc_PUSCH;
+  z11 = z10+Msc_PUSCH;
+
+  //  printf("symbol0 (dft)\n");
+  for (i=0,ip=0; i<Msc_PUSCH; i++,ip+=4) {
+    z0[i]     = dft_out0[ip];
+    //    printf("%d,%d,",((short*)&z0[i])[0],((short*)&z0[i])[1]);
+    z1[i]     = dft_out0[ip+1];
+    z2[i]     = dft_out0[ip+2];
+    z3[i]     = dft_out0[ip+3];
+    z4[i]     = dft_out1[ip+0];
+    z5[i]     = dft_out1[ip+1];
+    z6[i]     = dft_out1[ip+2];
+    z7[i]     = dft_out1[ip+3];
+    z8[i]     = dft_out2[ip];
+    z9[i]     = dft_out2[ip+1];
+    z10[i]    = dft_out2[ip+2];
+    z11[i]    = dft_out2[ip+3];
+    //    printf("out dft%d %d: %d,%d,%d,%d,%d,%d,%d,%d\n",Msc_PUSCH,ip,z0[i],z1[i],z2[i],z3[i],z4[i],z5[i],z6[i],z7[i]);
+
+  }
+
+  //  printf("\n");
+}
+
+#endif
+void ulsch_modulation(int32_t **txdataF,
+                      short amp,
+                      uint32_t frame,
+                      uint32_t subframe,
+                      LTE_DL_FRAME_PARMS *frame_parms,
+                      LTE_UE_ULSCH_t *ulsch)
+{
+
+  uint8_t qam64_table_offset_re = 0;
+  uint8_t qam64_table_offset_im = 0;
+  uint8_t qam16_table_offset_re = 0;
+  uint8_t qam16_table_offset_im = 0;
+  short gain_lin_QPSK;
+
+  DevAssert(frame_parms);
+
+  int re_offset,re_offset0,i,Msymb,j,k,nsymb,Msc_PUSCH,l;
+  //  uint8_t harq_pid = (rag_flag == 1) ? 0 : subframe2harq_pid_tdd(frame_parms->tdd_config,subframe);
+  uint8_t harq_pid = subframe2harq_pid(frame_parms,frame,subframe);
+  uint8_t Q_m;
+  int32_t *txptr;
+  uint32_t symbol_offset;
+  uint16_t first_rb;
+  uint16_t nb_rb;
+  int G;
+
+  uint32_t x1, x2, s=0;
+  uint8_t c;
+
+  if (!ulsch) {
+    printf("ulsch_modulation.c: Null ulsch\n");
+    return;
+  }
+
+  // x1 is set in lte_gold_generic
+  x2 = (ulsch->rnti<<14) + (subframe<<9) + frame_parms->Nid_cell; //this is c_init in 36.211 Sec 6.3.1
+
+  if (harq_pid>=8) {
+    printf("ulsch_modulation.c: Illegal harq_pid %d\n",harq_pid);
+    return;
+  }
+
+  first_rb = ulsch->harq_processes[harq_pid]->first_rb;
+  nb_rb = ulsch->harq_processes[harq_pid]->nb_rb;
+
+  if (nb_rb == 0) {
+    printf("ulsch_modulation.c: Frame %d, Subframe %d Illegal nb_rb %d\n",frame,subframe,nb_rb);
+    return;
+  }
+
+  if (first_rb > frame_parms->N_RB_UL) {
+    printf("ulsch_modulation.c: Frame %d, Subframe %d Illegal first_rb %d\n",frame,subframe,first_rb);
+    return;
+  }
+
+  VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_MODULATION, VCD_FUNCTION_IN);
+
+  Q_m = get_Qm_ul(ulsch->harq_processes[harq_pid]->mcs);
+
+  G = (int)ulsch->harq_processes[harq_pid]->nb_rb * (12 * Q_m) * (ulsch->Nsymb_pusch);
+
+
+  // Mapping
+  nsymb = (frame_parms->Ncp==0) ? 14:12;
+  Msc_PUSCH = ulsch->harq_processes[harq_pid]->nb_rb*12;
+
+#ifdef DEBUG_ULSCH_MODULATION
+  LOG_D(PHY,"ulsch_modulation.c: Doing modulation (rnti %x,x2 %x) for G=%d bits, harq_pid %d , nb_rb %d, Q_m %d, Nsymb_pusch %d (nsymb %d), subframe %d\n",
+        ulsch->rnti,x2,G,harq_pid,ulsch->harq_processes[harq_pid]->nb_rb,Q_m, ulsch->Nsymb_pusch,nsymb,subframe);
+#endif
+
+  // scrambling (Note the placeholding bits are handled in ulsch_coding.c directly!)
+  //printf("ulsch bits: ");
+  s = lte_gold_generic(&x1, &x2, 1);
+  k=0;
+
+  //printf("G %d\n",G);
+  for (i=0; i<(1+(G>>5)); i++) {
+    for (j=0; j<32; j++,k++) {
+      c = (uint8_t)((s>>j)&1);
+
+      if (ulsch->h[k] == PUSCH_x) {
+        //  printf("i %d: PUSCH_x\n",i);
+        ulsch->b_tilde[k] = 1;
+      } else if (ulsch->h[k] == PUSCH_y) {
+        //  printf("i %d: PUSCH_y\n",i);
+        ulsch->b_tilde[k] = ulsch->b_tilde[k-1];
+      } else {
+        ulsch->b_tilde[k] = (ulsch->h[k]+c)&1;
+        //  printf("i %d : %d (h %d c %d)\n", (i<<5)+j,ulsch->b_tilde[k],ulsch->h[k],c);
+      }
+
+    }
+
+    s = lte_gold_generic(&x1, &x2, 0);
+  }
+
+  //printf("\n");
+
+
+  gain_lin_QPSK = (short)((amp*ONE_OVER_SQRT2_Q15)>>15);
+
+
+  // Modulation
+
+  Msymb = G/Q_m;
+
+  if(ulsch->cooperation_flag == 2)
+    // For Distributed Alamouti Scheme in Collabrative Communication
+  {
+    for (i=0,j=Q_m; i<Msymb; i+=2,j+=2*Q_m) {
+
+      switch (Q_m) {
+
+      case 2:
+
+
+        //UE1, -x1*
+        ((int16_t*)&ulsch->d[i])[0] = (ulsch->b_tilde[j] == 1)  ? (gain_lin_QPSK) : -gain_lin_QPSK;
+        ((int16_t*)&ulsch->d[i])[1] = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK;
+        //      if (i<Msc_PUSCH)
+        //  printf("input %d (%p): %d,%d\n", i,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]);
+
+        // UE1, x0*
+        ((int16_t*)&ulsch->d[i+1])[0] = (ulsch->b_tilde[j-2] == 1)  ? (-gain_lin_QPSK) : gain_lin_QPSK;
+        ((int16_t*)&ulsch->d[i+1])[1] = (ulsch->b_tilde[j-1] == 1)? (gain_lin_QPSK) : -gain_lin_QPSK;
+
+        break;
+
+      case 4:
+
+
+        //UE1,-x1*
+        qam16_table_offset_re = 0;
+        qam16_table_offset_im = 0;
+
+        if (ulsch->b_tilde[j] == 1)
+          qam16_table_offset_re+=2;
+
+        if (ulsch->b_tilde[j+1] == 1)
+          qam16_table_offset_im+=2;
+
+
+
+        if (ulsch->b_tilde[j+2] == 1)
+          qam16_table_offset_re+=1;
+
+        if (ulsch->b_tilde[j+3] == 1)
+          qam16_table_offset_im+=1;
+
+
+        ((int16_t*)&ulsch->d[i])[0]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15);
+        ((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15);
+
+        //UE1,x0*
+        qam16_table_offset_re = 0;
+        qam16_table_offset_im = 0;
+
+        if (ulsch->b_tilde[j-4] == 1)
+          qam16_table_offset_re+=2;
+
+        if (ulsch->b_tilde[j-3] == 1)
+          qam16_table_offset_im+=2;
+
+
+        if (ulsch->b_tilde[j-2] == 1)
+          qam16_table_offset_re+=1;
+
+        if (ulsch->b_tilde[j-1] == 1)
+          qam16_table_offset_im+=1;
+
+
+        //    ((int16_t*)&ulsch->d[i+1])[0]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15);
+        //    ((int16_t*)&ulsch->d[i+1])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15);
+        ((int16_t*)&ulsch->d[i+1])[0]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15);
+        ((int16_t*)&ulsch->d[i+1])[1]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15);
+
+
+        break;
+
+      case 6:
+
+
+
+        //UE1,-x1*FPGA_UE
+        qam64_table_offset_re = 0;
+        qam64_table_offset_im = 0;
+
+        if (ulsch->b_tilde[j] == 1)
+          qam64_table_offset_re+=4;
+
+        if (ulsch->b_tilde[j+1] == 1)
+          qam64_table_offset_im+=4;
+
+        if (ulsch->b_tilde[j+2] == 1)
+          qam64_table_offset_re+=2;
+
+
+        if (ulsch->b_tilde[j+3] == 1)
+          qam64_table_offset_im+=2;
+
+        if (ulsch->b_tilde[j+4] == 1)
+          qam64_table_offset_re+=1;
+
+        if (ulsch->b_tilde[j+5] == 1)
+          qam64_table_offset_im+=1;
+
+
+        ((int16_t*)&ulsch->d[i])[0]=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15);
+        ((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15);
+
+        //UE1,x0*
+        qam64_table_offset_re = 0;
+        qam64_table_offset_im = 0;
+
+        if (ulsch->b_tilde[j-6] == 1)
+          qam64_table_offset_re+=4;
+
+        if (ulsch->b_tilde[j-5] == 1)
+          qam64_table_offset_im+=4;
+
+        if (ulsch->b_tilde[j-4] == 1)
+          qam64_table_offset_re+=2;
+
+
+        if (ulsch->b_tilde[j-3] == 1)
+          qam64_table_offset_im+=2;
+
+        if (ulsch->b_tilde[j-2] == 1)
+          qam64_table_offset_re+=1;
+
+        if (ulsch->b_tilde[j-1] == 1)
+          qam64_table_offset_im+=1;
+
+
+        ((int16_t*)&ulsch->d[i+1])[0]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15);
+        ((int16_t*)&ulsch->d[i+1])[1]=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15);
+
+        break;
+
+      }//switch
+    }//for
+  }//cooperation_flag == 2
+  else {
+    for (i=0,j=0; i<Msymb; i++,j+=Q_m) {
+
+      switch (Q_m) {
+
+      case 2:
+        // TODO: this has to be updated!!!
+
+        ((int16_t*)&ulsch->d[i])[0] = (ulsch->b_tilde[j] == 1)  ? (-gain_lin_QPSK) : gain_lin_QPSK;
+        ((int16_t*)&ulsch->d[i])[1] = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK;
+        //        if (i<Msc_PUSCH)
+        //    printf("input %d/%d Msc_PUSCH %d (%p): %d,%d\n", i,Msymb,Msc_PUSCH,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]);
+
+        break;
+
+      case 4:
+
+        qam16_table_offset_re = 0;
+        qam16_table_offset_im = 0;
+
+        if (ulsch->b_tilde[j] == 1)
+          qam16_table_offset_re+=2;
+
+        if (ulsch->b_tilde[j+1] == 1)
+          qam16_table_offset_im+=2;
+
+        if (ulsch->b_tilde[j+2] == 1)
+          qam16_table_offset_re+=1;
+
+        if (ulsch->b_tilde[j+3] == 1)
+          qam16_table_offset_im+=1;
+
+
+        ((int16_t*)&ulsch->d[i])[0]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15);
+        ((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15);
+        //      printf("input(16qam) %d (%p): %d,%d\n", i,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]);
+        break;
+
+      case 6:
+
+
+        qam64_table_offset_re = 0;
+        qam64_table_offset_im = 0;
+
+        if (ulsch->b_tilde[j] == 1)
+          qam64_table_offset_re+=4;
+
+        if (ulsch->b_tilde[j+1] == 1)
+          qam64_table_offset_im+=4;
+
+        if (ulsch->b_tilde[j+2] == 1)
+          qam64_table_offset_re+=2;
+
+        if (ulsch->b_tilde[j+3] == 1)
+          qam64_table_offset_im+=2;
+
+        if (ulsch->b_tilde[j+4] == 1)
+          qam64_table_offset_re+=1;
+
+        if (ulsch->b_tilde[j+5] == 1)
+          qam64_table_offset_im+=1;
+
+
+        ((int16_t*)&ulsch->d[i])[0]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15);
+        ((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15);
+
+        break;
+
+      }
+    }
+  }// normal symbols
+
+
+  // Transform Precoding
+
+#ifdef OFDMA_ULSCH
+
+  for (i=0; i<Msymb; i++) {
+    ulsch->z[i] = ulsch->d[i];
+  }
+
+#else
+  dft_lte(ulsch->z,ulsch->d,Msc_PUSCH,ulsch->Nsymb_pusch);
+#endif
+
+  DevAssert(txdataF);
+
+#ifdef OFDMA_ULSCH
+  re_offset0 = frame_parms->first_carrier_offset + (ulsch->harq_processes[harq_pid]->first_rb*12);
+
+  if (re_offset0>frame_parms->ofdm_symbol_size) {
+    re_offset0 -= frame_parms->ofdm_symbol_size;
+    //    re_offset0++;
+  }
+
+  //  printf("re_offset0 %d\n",re_offset0);
+
+
+  for (j=0,l=0; l<(nsymb-ulsch->srs_active); l++) {
+    re_offset = re_offset0;
+    symbol_offset = (int)frame_parms->ofdm_symbol_size*(l+(subframe*nsymb));
+#ifdef DEBUG_ULSCH_MODULATION
+    printf("symbol %d (subframe %d): symbol_offset %d\n",l,subframe,symbol_offset);
+#endif
+    txptr = &txdataF[0][symbol_offset];
+
+    if (((frame_parms->Ncp == 0) && ((l==3) || (l==10)))||
+        ((frame_parms->Ncp == 1) && ((l==2) || (l==8)))) {
+    }
+    // Skip reference symbols
+    else {
+
+      //      printf("copying %d REs\n",Msc_PUSCH);
+      for (i=0; i<Msc_PUSCH; i++,j++) {
+#ifdef DEBUG_ULSCH_MODULATION
+        printf("re_offset %d (%p): %d,%d\n", re_offset,&ulsch->z[j],((int16_t*)&ulsch->z[j])[0],((int16_t*)&ulsch->z[j])[1]);
+#endif
+        txptr[re_offset++] = ulsch->z[j];
+
+        if (re_offset==frame_parms->ofdm_symbol_size)
+          re_offset = 0;
+      }
+    }
+  }
+
+# else  // OFDMA_ULSCH = 0
+  re_offset0 = frame_parms->first_carrier_offset + (ulsch->harq_processes[harq_pid]->first_rb*12);
+
+  if (re_offset0>frame_parms->ofdm_symbol_size) {
+    re_offset0 -= frame_parms->ofdm_symbol_size;
+    //    re_offset0++;
+  }
+
+  //    printf("re_offset0 %d\n",re_offset0);
+  //  printf("txdataF %p\n",&txdataF[0][0]);
+  for (j=0,l=0; l<(nsymb-ulsch->srs_active); l++) {
+    re_offset = re_offset0;
+    symbol_offset = (uint32_t)frame_parms->ofdm_symbol_size*(l+(subframe*nsymb));
+#ifdef DEBUG_ULSCH_MODULATION
+    printf("ulsch_mod (SC-FDMA) symbol %d (subframe %d): symbol_offset %d\n",l,subframe,symbol_offset);
+#endif
+    txptr = &txdataF[0][symbol_offset];
+
+    if (((frame_parms->Ncp == 0) && ((l==3) || (l==10)))||
+        ((frame_parms->Ncp == 1) && ((l==2) || (l==8)))) {
+    }
+    // Skip reference symbols
+    else {
+      //      printf("copying %d REs\n",Msc_PUSCH);
+      for (i=0; i<Msc_PUSCH; i++,j++) {
+
+#ifdef DEBUG_ULSCH_MODULATION
+        printf("re_offset %d (%p): %d,%d => %p\n", re_offset,&ulsch->z[j],((int16_t*)&ulsch->z[j])[0],((int16_t*)&ulsch->z[j])[1],&txptr[re_offset]);
+#endif //DEBUG_ULSCH_MODULATION
+        txptr[re_offset++] = ulsch->z[j];
+
+        if (re_offset==frame_parms->ofdm_symbol_size)
+          re_offset = 0;
+      }
+    }
+  }
+
+#endif
+  VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_MODULATION, VCD_FUNCTION_OUT);
+
+}
+
diff --git a/openair2/LAYER2/NR_MAC_gNB/config.c b/openair2/LAYER2/NR_MAC_gNB/config.c
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/openair2/NR_PHY_INTERFACE/IF_Module.h b/openair2/NR_PHY_INTERFACE/IF_Module.h
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/openair2/RRC/LITE/rrc_UE.c b/openair2/RRC/LITE/rrc_UE.c
index 0e3a70765d825b16229fea008bd1aa48d7225a72..f578e76dcdb5c1faff58fbb2bbbe2399320ad433 100644
--- a/openair2/RRC/LITE/rrc_UE.c
+++ b/openair2/RRC/LITE/rrc_UE.c
@@ -1815,8 +1815,8 @@ rrc_ue_process_rrcConnectionReconfiguration(
         rrc_ue_process_radioResourceConfigDedicated(ctxt_pP,eNB_index, rrcConnectionReconfiguration_r8->radioResourceConfigDedicated);
       }
       
-
-      void *non_criticical_ext_iterator = rrcConnectionReconfiguration_r8;
+      
+/*      void *non_criticical_ext_iterator = rrcConnectionReconfiguration_r8;
       RCCConnectionReconfiguration_v1510_IEs_t *rrc_connection_reconfiguration_v1510_IEs = (RRCConnectionReconfiguration_v1510_IEs_t *)0;
       // fetch EN-DC for NR_RRC here
       // r8
@@ -1869,7 +1869,7 @@ rrc_ue_process_rrcConnectionReconfiguration(
               break;
           }
         }
-      }
+      }*/
 
 #if defined(ENABLE_ITTI)
 
diff --git a/openair2/RRC/NR/proto_NR.h b/openair2/RRC/NR/proto_NR.h
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391