diff --git a/cmake_targets/CMakeLists.txt b/cmake_targets/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..89212ec3f12c214ed8ae0327d11dee80e84c020a --- /dev/null +++ b/cmake_targets/CMakeLists.txt @@ -0,0 +1,2534 @@ +#/* +# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The OpenAirInterface Software Alliance licenses this file to You under +# * the OAI Public License, Version 1.1 (the "License"); you may not use this file +# * except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.openairinterface.org/?page_id=698 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# *------------------------------------------------------------------------------- +# * For more information about the OpenAirInterface (OAI) Software Alliance: +# * contact@openairinterface.org +# */ + +# Author: laurent THOMAS, Lionel GAUTHIER + +cmake_minimum_required (VERSION 2.8) + +############################################# +# Base directories, compatible with legacy OAI building +################################################ +set (OPENAIR_DIR $ENV{OPENAIR_DIR}) +set (NFAPI_DIR ${OPENAIR_DIR}/nfapi/open-nFAPI) +set (NFAPI_USER_DIR ${OPENAIR_DIR}/nfapi/oai_integration) +set (OPENAIR1_DIR ${OPENAIR_DIR}/openair1) +set (OPENAIR2_DIR ${OPENAIR_DIR}/openair2) +set (OPENAIR3_DIR ${OPENAIR_DIR}/openair3) +set (OPENAIR_TARGETS ${OPENAIR_DIR}/targets) +set (OPENAIR3_DIR ${OPENAIR_DIR}/openair3) +set (OPENAIR_CMAKE ${OPENAIR_DIR}/cmake_targets) +set (OPENAIR_BIN_DIR ${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}) + +project (OpenAirInterface) + +########################################### +# macros to define options as there is numerous options in oai +################################################ +macro(add_option name val helpstr) + if(DEFINED ${name}) + set(value ${${name}}) + else(DEFINED ${name}) + set(value ${val}) + endif() + set(${name} ${value} CACHE STRING "${helpstr}") + add_definitions("-D${name}=${value}") +endmacro(add_option) + +macro(add_boolean_option name val helpstr) + if(DEFINED ${name}) + set(value ${${name}}) + else(DEFINED ${name}) + set(value ${val}) + endif() + set(${name} ${value} CACHE STRING "${helpstr}") + set_property(CACHE ${name} PROPERTY TYPE BOOL) + if (${value}) + add_definitions("-D${name}") + endif (${value}) +endmacro(add_boolean_option) + +macro(add_integer_option name val helpstr) + if(DEFINED ${name}) + set(value ${${name}}) + else(DEFINED ${name}) + set(value ${val}) + endif() + set(${name} ${value} CACHE STRING "${helpstr}") + add_definitions("-D${name}=${value}") +endmacro(add_integer_option) + +macro(add_list1_option name val helpstr) + if(DEFINED ${name}) + set(value ${${name}}) + else(DEFINED ${name}) + set(value ${val}) + endif() + set(${name} ${value} CACHE STRING "${helpstr}") + set_property(CACHE ${name} PROPERTY STRINGS ${ARGN}) + if(NOT "${value}" STREQUAL "False") + add_definitions("-D${name}=${value}") + endif() +endmacro(add_list1_option) + +macro(add_list2_option name val helpstr) + if(DEFINED ${name}) + set(value ${${name}}) + else(DEFINED ${name}) + set(value ${val}) + endif() + set(${name} ${value} CACHE STRING "${helpstr}") + set_property(CACHE ${name} PROPERTY STRINGS ${ARGN}) + if(NOT "${value}" STREQUAL "False") + add_definitions("-D${value}=1") + endif() +endmacro(add_list2_option) + +macro(add_list_string_option name val helpstr) + if(DEFINED ${name}) + set(value ${${name}}) + else(DEFINED ${name}) + set(value ${val}) + endif() + set(${name} ${value} CACHE STRING "${helpstr}") + set_property(CACHE ${name} PROPERTY STRINGS ${ARGN}) + if(NOT "${value}" STREQUAL "False") + add_definitions("-D${name}=\"${value}\"") + endif() +endmacro(add_list_string_option) +#################################################### +# compilation flags +############################################# + +#set(CMAKE_BUILD_TYPE "Debug") +if (CMAKE_BUILD_TYPE STREQUAL "") + set(CMAKE_BUILD_TYPE "RelWithDebInfo") +endif() +message("CMAKE_BUILD_TYPE is ${CMAKE_BUILD_TYPE}") +add_list_string_option(CMAKE_BUILD_TYPE "RelWithDebInfo" "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel." Debug Release RelWithDebInfo MinSizeRel) + +Message("Architecture is ${CMAKE_SYSTEM_PROCESSOR}") +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l") + set(C_FLAGS_PROCESSOR "-gdwarf-2 -mfloat-abi=hard -mfpu=neon -lgcc -lrt") +else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l") + if(EXISTS "/proc/cpuinfo") + file(STRINGS "/proc/cpuinfo" CPUINFO REGEX flags LIMIT_COUNT 1) + if (CPUINFO MATCHES "avx2") + set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx2") + set(COMPILATION_AVX2 "True") + else() + set(COMPILATION_AVX2 "False") + endif() + if (CPUINFO MATCHES "sse4_1") + set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -msse4.1") + endif() + if (CPUINFO MATCHES "ssse3") + set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mssse3") + endif() + else() + Message("/proc/cpuinfo does not exit. We will use manual CPU flags") + endif() +endif() + +set(C_FLAGS_PROCESSOR " ${C_FLAGS_PROCESSOR} ${CFLAGS_PROCESSOR_USER}") + +Message("C_FLAGS_PROCESSOR is ${C_FLAGS_PROCESSOR}") + +if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86") + if ( (NOT( C_FLAGS_PROCESSOR MATCHES "ssse3")) OR (NOT( C_FLAGS_PROCESSOR MATCHES "msse4.1")) ) + Message(FATAL_ERROR "For x86 Architecture, you must have following flags: -mssse3 -msse4.1. The current detected flags are: ${C_FLAGS_PROCESSOR}. You can pass the flags manually in build script, for example: ./build_oai --cflags_processor \"-mssse3 -msse4.1 -mavx2\" ") + endif() +endif() + +# +set(CMAKE_C_FLAGS + "${CMAKE_C_FLAGS} ${C_FLAGS_PROCESSOR} -std=gnu99 -Wall -Wstrict-prototypes -fno-strict-aliasing -rdynamic -funroll-loops -Wno-packed-bitfield-compat -fPIC ") +# add autotools definitions that were maybe used! +set(CMAKE_C_FLAGS + "${CMAKE_C_FLAGS} -DSTDC_HEADERS=1 -DHAVE_SYS_TYPES_H=1 -DHAVE_SYS_STAT_H=1 -DHAVE_STDLIB_H=1 -DHAVE_STRING_H=1 -DHAVE_MEMORY_H=1 -DHAVE_STRINGS_H=1 -DHAVE_INTTYPES_H=1 -DHAVE_STDINT_H=1 -DHAVE_UNISTD_H=1 -DHAVE_FCNTL_H=1 -DHAVE_ARPA_INET_H=1 -DHAVE_SYS_TIME_H=1 -DHAVE_SYS_SOCKET_H=1 -DHAVE_STRERROR=1 -DHAVE_SOCKET=1 -DHAVE_MEMSET=1 -DHAVE_GETTIMEOFDAY=1 -DHAVE_STDLIB_H=1 -DHAVE_MALLOC=1 -DHAVE_LIBSCTP" +) +set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} ${C_FLAGS_PROCESSOR} -std=c++11 " +) + + +######################### +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath -Wl,${CMAKE_CURRENT_BINARY_DIR}") +######################### +# set a flag for changes in the source code +# these changes are related to hardcoded path to include .h files +add_definitions(-DCMAKER) +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3") +set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3 -O2") + + +set(GIT_BRANCH "UNKNOWN") +set(GIT_COMMIT_HASH "UNKNOWN") +set(GIT_COMMIT_DATE "UNKNOWN") + +find_package(Git) +if(GIT_FOUND) + message("git found: ${GIT_EXECUTABLE}") + # Get the current working branch + execute_process( + COMMAND git rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_BRANCH + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + # Get the latest abbreviated commit hash of the working branch + execute_process( + COMMAND git log -1 --format=%h + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_HASH + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + # Get the latest commit date of the working branch + execute_process( + COMMAND git log -1 --format=%cd + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_DATE + OUTPUT_STRIP_TRAILING_WHITESPACE + ) +endif() + + +# Below is a hard-coded info +set (FIRMWARE_VERSION "No svn information") +add_definitions("-DFIRMWARE_VERSION=\"${FIRMWARE_VERSION}\"") +add_definitions("-DPACKAGE_VERSION=\"Branch: ${GIT_BRANCH} Abrev. Hash: ${GIT_COMMIT_HASH} Date: ${GIT_COMMIT_DATE}\"") +add_definitions("-DPACKAGE_BUGREPORT=\"openair4g-devel@lists.eurecom.fr\"") + + + +# Debug related options +######################################### +add_boolean_option(ASN_DEBUG False "ASN1 coder/decoder Debug") +add_boolean_option(EMIT_ASN_DEBUG False "ASN1 coder/decoder Debug") +add_boolean_option(MSG_PRINT False "print debug messages") +add_boolean_option(DISABLE_XER_PRINT False "print XER Format") +add_boolean_option(XER_PRINT False "print XER Format") +add_boolean_option(RRC_MSG_PRINT False "print RRC messages") +add_boolean_option(PDCP_MSG_PRINT False "print PDCP messages to /tmp/pdcp.log") +add_boolean_option(DEBUG_PDCP_PAYLOAD False "print PDCP PDU to stdout") # if true, make sure that global and PDCP log levels are trace +add_boolean_option(DEBUG_MAC_INTERFACE False "print MAC-RLC PDU exchange to stdout") # if true, make sure that global and PDCP log levels are trace +add_boolean_option(TRACE_RLC_PAYLOAD False "print RLC PDU to stdout") # if true, make sure that global and PDCP log levels are trace +add_boolean_option(TEST_OMG False "???") +add_boolean_option(DEBUG_OMG False "???") +add_boolean_option(XFORMS False "This adds the possibility to see the signal oscilloscope") +add_boolean_option(PRINT_STATS False "This adds the possibility to see the status") +add_boolean_option(T_TRACER False "Activate the T tracer, a debugging/monitoring framework" ) +add_boolean_option(UE_AUTOTEST_TRACE False "Activate UE autotest specific logs") +add_boolean_option(UE_DEBUG_TRACE False "Activate UE debug trace") +add_boolean_option(UE_TIMING_TRACE False "Activate UE timing trace") +add_boolean_option(DISABLE_LOG_X False "Deactivate all LOG_* macros") +add_boolean_option(USRP_REC_PLAY False "Enable USRP record playback mode") + +add_boolean_option(DEBUG_CONSOLE False "makes debugging easier, disables stdout/stderr buffering") + +add_boolean_option(ENABLE_ITTI True "ITTI is internal messaging, should remain enabled for most targets") +set (ITTI_DIR ${OPENAIR_DIR}/common/utils/itti) +if (${ENABLE_ITTI}) + add_library(ITTI + # add .h files if depend on (this one is generated) + ${ITTI_DIR}/intertask_interface.h + ${ITTI_DIR}/intertask_interface.c + ${ITTI_DIR}/intertask_interface_dump.c + ${ITTI_DIR}/backtrace.c + ${ITTI_DIR}/memory_pools.c + ${ITTI_DIR}/signals.c + ${ITTI_DIR}/timer.c + ) + set(ITTI_LIB ITTI) + set(GTPU_need_ITTI ${OPENAIR3_DIR}/GTPV1-U/gtpv1u_eNB.c) +endif (${ENABLE_ITTI}) + +############################# +# ASN.1 grammar C code generation & dependancies +################################ +# A difficulty: asn1c generates C code of a un-predictable list of files +# so, generate the c from asn1c once at cmake run time +# So, if someone modify the asn.1 source file in such as way that it will create +# (so creating new asn.1 objects instead of modifying the object attributes) +# New C code source file, cmake must be re-run (instead of re-running make only) +############# +set(asn1c_call "${OPENAIR_CMAKE}/tools/generate_asn1") +set(fix_asn1c_call "${OPENAIR_CMAKE}/tools/fix_asn1") +set(asn1_generated_dir ${OPENAIR_BIN_DIR}) + +set(protoc_call "${OPENAIR_CMAKE}/tools/generate_protobuf") +set(protobuf_generated_dir ${OPENAIR_BIN_DIR}) + +# RRC +###### + +add_list2_option(RRC_ASN1_VERSION "Rel14" "ASN.1 version of RRC interface" "Rel8" "Rel10" "Rel14" "CBA") + +if (${RRC_ASN1_VERSION} STREQUAL "Rel8") + set (RRC_GRAMMAR ${OPENAIR2_DIR}/RRC/LITE/MESSAGES/asn1c/ASN1_files/EUTRA-RRC-Definitions-86.asn) +elseif (${RRC_ASN1_VERSION} STREQUAL "CBA") + set (RRC_GRAMMAR ${OPENAIR2_DIR}/RRC/LITE/MESSAGES/asn1c/ASN1_files/EUTRA-RRC-Definitions-a20-lola.asn) +elseif (${RRC_ASN1_VERSION} STREQUAL "Rel10") + set (RRC_GRAMMAR ${OPENAIR2_DIR}/RRC/LITE/MESSAGES/asn1c/ASN1_files/EUTRA-RRC-Definitions-a20.asn) +else() + set (RRC_GRAMMAR ${OPENAIR2_DIR}/RRC/LITE/MESSAGES/asn1c/ASN1_files/RRC-e30.asn) +endif (${RRC_ASN1_VERSION} STREQUAL "Rel8") + +set (RRC_FULL_DIR ${asn1_generated_dir}/RRC_${RRC_ASN1_VERSION}) + +if(NOT EXISTS ${asn1c_call}) + message( FATAL_ERROR "The script ${asn1c_call} must be present" ) +endif(NOT EXISTS ${asn1c_call}) + +message("calling asn1c -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${RRC_FULL_DIR} ${RRC_GRAMMAR}") +execute_process(COMMAND ${asn1c_call} + ${RRC_FULL_DIR} + ${RRC_GRAMMAR} + RRC + RESULT_VARIABLE ret) + +if (NOT ${ret} STREQUAL 0) + message(FATAL_ERROR "${asn1c_call}: error") +endif (NOT ${ret} STREQUAL 0) + +if(NOT EXISTS ${fix_asn1c_call}) + message( FATAL_ERROR "The script ${fix_asn1c_call} must be present" ) +endif(NOT EXISTS ${fix_asn1c_call}) + +execute_process(COMMAND ${fix_asn1c_call} + ${RRC_FULL_DIR} + RRC + ${RRC_ASN1_VERSION} + RESULT_VARIABLE ret) + +if (NOT ${ret} STREQUAL 0) + message(FATAL_ERROR "${fix_asn1c_call}: error") +endif (NOT ${ret} STREQUAL 0) + +file(GLOB rrc_source ${RRC_FULL_DIR}/*.c) +file(GLOB rrc_h ${RRC_FULL_DIR}/*.h) +set(rrc_h ${rrc_h} ${RRC_FULL_DIR}/asn1_constants.h) +set_source_files_properties(${rrc_source} PROPERTIES COMPILE_FLAGS -w) # suppress warnings from generated code +add_library(RRC_LIB ${rrc_h} ${rrc_source} + ${OPENAIR2_DIR}/RRC/LITE/MESSAGES/asn1_msg.c + ${OPENAIR2_DIR}/RRC/LITE/MESSAGES/asn1_msg_NB_IoT.c) +include_directories ("${RRC_FULL_DIR}") + +# add the command to generate the source code +# Warning: if you modify ASN.1 source file to generate new C files, cmake should be re-run instead of make +add_custom_command ( + OUTPUT ${RRC_FULL_DIR}/asn1_constants.h + COMMAND ${asn1c_call} ${RRC_FULL_DIR} ${RRC_GRAMMAR} + COMMAND ${fix_asn1c_call} ${RRC_FULL_DIR} RRC ${RRC_ASN1_VERSION} + DEPENDS ${RRC_GRAMMAR} + ) + + +#NR RRC +###### + +add_list2_option(NR_RRC_ASN1_VERSION "NR_Rel15" "ASN.1 version of NR_RRC interface") + +if (${NR_RRC_ASN1_VERSION} STREQUAL "NR_Rel15") + set (NR_RRC_GRAMMAR ${OPENAIR2_DIR}/RRC/NR/MESSAGES/asn1c/ASN1_files/NR-RRC-Definitions.asn) +endif (${NR_RRC_ASN1_VERSION} STREQUAL "NR_Rel15") + +set (NR_RRC_FULL_DIR ${asn1_generated_dir}/${NR_RRC_ASN1_VERSION}) + +if(NOT EXISTS ${asn1c_call}) +message( FATAL_ERROR "The script ${asn1c_call} must be present" ) +endif(NOT EXISTS ${asn1c_call}) + +message("calling ASN1C_PREFIX=NR_ asn1c -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${RRC_FULL_DIR} ${RRC_GRAMMAR}") +execute_process(COMMAND ${asn1c_call} + ${NR_RRC_FULL_DIR} + ${NR_RRC_GRAMMAR} + NR_RRC + RESULT_VARIABLE ret) + +if (NOT ${ret} STREQUAL 0) + message(FATAL_ERROR "${asn1c_call}: error") +endif (NOT ${ret} STREQUAL 0) + +if(NOT EXISTS ${fix_asn1c_call}) + message( FATAL_ERROR "The script ${fix_asn1c_call} must be present" ) +endif(NOT EXISTS ${fix_asn1c_call}) + +execute_process(COMMAND ${fix_asn1c_call} + ${NR_RRC_FULL_DIR} + NR_RRC + ${NR_RRC_ASN1_VERSION} + RESULT_VARIABLE ret) + +if (NOT ${ret} STREQUAL 0) + message(FATAL_ERROR "${fix_asn1c_call}: error") +endif (NOT ${ret} STREQUAL 0) + +file(GLOB nr_rrc_source ${NR_RRC_FULL_DIR}/*.c) +file(GLOB nr_rrc_h ${NR_RRC_FULL_DIR}/*.h) +set(nr_rrc_h ${nr_rrc_h} ${NR_RRC_FULL_DIR}/asn1_constants.h) +set_source_files_properties(${nr_rrc_source} PROPERTIES COMPILE_FLAGS -w) # suppress warnings from generated code +add_library(NR_RRC_LIB ${nr_rrc_h} ${nr_rrc_source} + ${OPENAIR2_DIR}/RRC/NR/MESSAGES/asn1_msg.c) +include_directories ("${NR_RRC_FULL_DIR}") + +# add the command to generate the source code +# Warning: if you modify ASN.1 source file to generate new C files, cmake should be re-run instead of make + +add_custom_command ( + OUTPUT ${NR_RRC_FULL_DIR}/asn1_constants.h + COMMAND ${asn1c_call} ${NR_RRC_FULL_DIR} ${NR_RRC_GRAMMAR} RRC + COMMAND ${fix_asn1c_call} ${NR_RRC_FULL_DIR} RRC ${NR_RRC_ASN1_VERSION} + DEPENDS ${RRC_GRAMMAR} + ) + +# S1AP +# Same limitation as described in RRC: unknown generated file list +# so we generate it at cmake time +############## +add_list1_option(S1AP_VERSION R14 "S1AP Asn.1 grammar version" R8 R9 R10 R14) + +set(S1AP_DIR ${OPENAIR3_DIR}/S1AP) + +if (${S1AP_VERSION} STREQUAL "R14") + set (ASN1RELDIR R14.4) + add_definitions("-DUPDATE_RELEASE_9 -DUPDATE_RELEASE_10 -DUPDATE_RELEASE_14") + set(S1AP_ASN_FILES s1ap-14.4.0.asn1) +elseif (${S1AP_VERSION} STREQUAL "R10") + set (ASN1RELDIR R10.5) + add_definitions("-DUPDATE_RELEASE_9 -DUPDATE_RELEASE_10") +elseif (${S1AP_VERSION} STREQUAL "R9") + set (ASN1RELDIR R9.8) + add_definitions("-DUPDATE_RELEASE_9") +else(${S1AP_VERSION} STREQUAL "R8") + set (ASN1RELDIR R8.10) +endif(${S1AP_VERSION} STREQUAL "R14") + +set(S1AP_ASN_DIR ${S1AP_DIR}/MESSAGES/ASN1/${ASN1RELDIR}) +set(S1AP_C_DIR ${asn1_generated_dir}/S1AP_${ASN1RELDIR}) + +message("calling ASN1C_PREFIX=S1AP_ asn1c -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${S1AP_C_DIR} ${S1AP_ASN_DIR}/${S1AP_ASN_FILES}") + +execute_process(COMMAND mkdir -p ${S1AP_C_DIR} + COMMAND env "ASN1C_PREFIX=S1AP_" asn1c -pdu=all -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${S1AP_C_DIR} ${S1AP_ASN_DIR}/${S1AP_ASN_FILES} + RESULT_VARIABLE ret) + +if (NOT ${ret} STREQUAL 0) + message(FATAL_ERROR "${ret}: error") +endif (NOT ${ret} STREQUAL 0) + +file(GLOB S1AP_source ${S1AP_C_DIR}/*.c) +file(GLOB s1ap_h ${S1AP_C_DIR}/*.h) +set(s1ap_h ${s1ap_h} ) + +add_custom_command ( + OUTPUT ${S1AP_C_DIR}/S1AP_asn_constant.h + COMMAND mkdir -p ${S1AP_C_DIR} + COMMAND env "ASN1C_PREFIX=S1AP_" asn1c -pdu=all -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${S1AP_C_DIR} ${S1AP_ASN_DIR}/${S1AP_ASN_FILES} + DEPENDS ${S1AP_ASN_DIR}/${S1AP_ASN_FILES} +) + +add_library(S1AP_LIB + ${S1AP_source} + ${S1AP_DIR}/s1ap_common.c + ) + +include_directories ("${S1AP_C_DIR}") +include_directories ("${S1AP_DIR}") + +add_library(S1AP_ENB + # ${S1AP_C_DIR}/s1ap_ies_defs.h + ${S1AP_DIR}/s1ap_eNB.c + ${S1AP_DIR}/s1ap_eNB_context_management_procedures.c + ${S1AP_DIR}/s1ap_eNB_encoder.c + ${S1AP_DIR}/s1ap_eNB_handlers.c + ${S1AP_DIR}/s1ap_eNB_itti_messaging.c + ${S1AP_DIR}/s1ap_eNB_management_procedures.c + ${S1AP_DIR}/s1ap_eNB_nas_procedures.c + ${S1AP_DIR}/s1ap_eNB_nnsf.c + ${S1AP_DIR}/s1ap_eNB_overload.c + ${S1AP_DIR}/s1ap_eNB_trace.c + ${S1AP_DIR}/s1ap_eNB_ue_context.c + ${S1AP_DIR}/s1ap_eNB_decoder.c + ) + + + +#X2AP +# Same limitation as described in RRC/S1AP: unknown generated file list +# so we generate it at cmake time +############## +add_list1_option(X2AP_VERSION R14 "X2AP Asn.1 grammar version" R10 R11 R14) +set(X2AP_DIR ${OPENAIR2_DIR}/X2AP) +if (${X2AP_VERSION} STREQUAL "R14") + set (ASN1RELDIR R14.5) + set (X2AP_ASN_FILES x2ap-14.5.0.asn1) +elseif (${X2AP_VERSION} STREQUAL "R11") + set (ASN1RELDIR R11.2) +elseif (${X2AP_VERSION} STREQUAL "R10") + set (ASN1RELDIR R.UNKNOWN) +endif(${X2AP_VERSION} STREQUAL "R14") +set(X2AP_ASN_DIR ${X2AP_DIR}/MESSAGES/ASN1/${ASN1RELDIR}) + +set(X2AP_C_DIR ${asn1_generated_dir}/X2AP_${ASN1RELDIR}) +message("calling asn1c -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${X2AP_C_DIR} ${X2AP_ASN_DIR}/${X2AP_ASN_FILES}") +execute_process(COMMAND mkdir -p ${X2AP_C_DIR} + COMMAND env "ASN1C_PREFIX=X2AP_" asn1c -pdu=all -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${X2AP_C_DIR} ${X2AP_ASN_DIR}/${X2AP_ASN_FILES} + RESULT_VARIABLE ret) +if (NOT ${ret} STREQUAL 0) + message(FATAL_ERROR "${asn1c_call}: error") +endif (NOT ${ret} STREQUAL 0) +file(GLOB X2AP_source ${X2AP_C_DIR}/*.c) + +file(GLOB x2ap_h ${X2AP_C_DIR}/*.h) +set(x2ap_h ${x2ap_h} ) + +add_custom_command ( + OUTPUT ${X2AP_C_DIR}/X2AP_asn_constant.h + COMMAND mkdir -p ${X2AP_C_DIR} + COMMAND env "ASN1C_PREFIX=X2AP_" asn1c -pdu=all -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example -D ${X2AP_C_DIR} ${X2AP_ASN_DIR}/${X2AP_ASN_FILES} + DEPENDS ${X2AP_ASN_DIR}/${X2AP_ASN_FILES} + ) + +add_library(X2AP_LIB + ${X2AP_source} + ${X2AP_DIR}/x2ap_common.c + ) + +include_directories ("${X2AP_C_DIR}") +include_directories ("${X2AP_DIR}") + +# Hardware dependant options +################################### +add_list1_option(NB_ANTENNAS_RX "2" "Number of antennas in reception" "1" "2" "4") +add_list1_option(NB_ANTENNAS_TX "4" "Number of antennas in transmission" "1" "2" "4") + +add_list2_option(RF_BOARD "EXMIMO" "RF head type" "None" "EXMIMO" "OAI_USRP" "OAI_BLADERF" "CPRIGW" "OAI_LMSSDR") + +add_list2_option(TRANSP_PRO "None" "Transport protocol type" "None" "ETHERNET") +#NOKIA config enhancement +set (CONFIG_ROOTDIR + ${OPENAIR_DIR}/common/config + ) +set (CONFIG_SOURCES + ${CONFIG_ROOTDIR}/config_load_configmodule.c + ${CONFIG_ROOTDIR}/config_userapi.c + ${CONFIG_ROOTDIR}/config_cmdline.c + ) +set (CONFIG_LIBCONFIG_SOURCES + ${CONFIG_ROOTDIR}/libconfig/config_libconfig.c + ) +add_library(params_libconfig MODULE ${CONFIG_LIBCONFIG_SOURCES} ) +target_link_libraries(params_libconfig config) +# shared library loader +set (SHLIB_LOADER_SOURCES + ${OPENAIR_DIR}/common/utils/load_module_shlib.c +) +# include RF devices / transport protocols library modules +###################################################################### + +include_directories("${OPENAIR_TARGETS}/ARCH/EXMIMO/USERSPACE/LIB/") +include_directories ("${OPENAIR_TARGETS}/ARCH/EXMIMO/DEFS/") +#set (option_HWEXMIMOLIB_lib "-l ") +set(HWLIB_EXMIMO_SOURCE + ${OPENAIR_TARGETS}/ARCH/EXMIMO/USERSPACE/LIB/openair0_lib.c +# ${OPENAIR_TARGETS}/ARCH/EXMIMO/USERSPACE/LIB/gain_control.c + ) +add_library(oai_exmimodevif MODULE ${HWLIB_EXMIMO_SOURCE} ) + +include_directories("${OPENAIR_TARGETS}/ARCH/USRP/USERSPACE/LIB/") +set(HWLIB_USRP_SOURCE + ${OPENAIR_TARGETS}/ARCH/USRP/USERSPACE/LIB/usrp_lib.cpp + ) +add_library(oai_usrpdevif MODULE ${HWLIB_USRP_SOURCE} ) +target_link_libraries(oai_usrpdevif uhd) + +include_directories("${OPENAIR_TARGETS}/ARCH/BLADERF/USERSPACE/LIB/") +set(HWLIB_BLADERF_SOURCE + ${OPENAIR_TARGETS}/ARCH/BLADERF/USERSPACE/LIB/bladerf_lib.c + ) +add_library(oai_bladerfdevif MODULE ${HWLIB_BLADERF_SOURCE} ) +target_link_libraries(oai_bladerfdevif bladeRF) + +include_directories("${OPENAIR_TARGETS}/ARCH/LMSSDR/USERSPACE/LIB/") + +set(HWLIB_LMSSDR_SOURCE + ${OPENAIR_TARGETS}/ARCH/LMSSDR/USERSPACE/LIB/lms_lib.cpp + ) +add_library(oai_lmssdrdevif MODULE ${HWLIB_LMSSDR_SOURCE} ) +target_include_directories(oai_lmssdrdevif PRIVATE /usr/local/include/lime) +target_link_libraries(oai_lmssdrdevif LimeSuite ) + +include_directories("${OPENAIR_TARGETS}/ARCH/ETHERNET/USERSPACE/LIB/") +set(TPLIB_ETHERNET_SOURCE + ${OPENAIR_TARGETS}/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.c + ${OPENAIR_TARGETS}/ARCH/ETHERNET/USERSPACE/LIB/eth_udp.c + ${OPENAIR_TARGETS}/ARCH/ETHERNET/USERSPACE/LIB/eth_raw.c + ) +add_library(oai_eth_transpro MODULE ${TPLIB_ETHERNET_SOURCE} ) + +include_directories("${OPENAIR_TARGETS}/ARCH/mobipass/") +set(TPLIB_MOBIPASS_SOURCE + ${OPENAIR_TARGETS}/ARCH/mobipass/interface.c + ${OPENAIR_TARGETS}/ARCH/mobipass/mobipass.c + ${OPENAIR_TARGETS}/ARCH/mobipass/queues.c + ) +add_library(oai_mobipass MODULE ${TPLIB_MOBIPASS_SOURCE} ) + +# Hide all functions/variables in the mobipass library. +# Use __attribute__((__visibility__("default"))) +# in the source code to unhide a function/variable. +get_target_property(mobipas_cflags oai_mobipass COMPILE_FLAGS) +set_target_properties(oai_mobipass PROPERTIES COMPILE_FLAGS "${mobipass_cflags} -fvisibility=hidden") + +set(HWLIB_TCP_BRIDGE_SOURCE + ${OPENAIR_TARGETS}/ARCH/tcp_bridge/tcp_bridge.c + ) +add_library(oai_tcp_bridge MODULE ${HWLIB_TCP_BRIDGE_SOURCE} ) + +#get_target_property(tcp_bridge_cflags oai_tcp_bridge COMPILE_FLAGS) +#set_target_properties(oai_tcp_bridge PROPERTIES COMPILE_FLAGS "${tcp_bridge_cflags} -fvisibility=hidden") +set_target_properties(oai_tcp_bridge PROPERTIES COMPILE_FLAGS "-fvisibility=hidden") + +########################################################## + +include_directories ("${OPENAIR_TARGETS}/ARCH/COMMON") + +Message("DEADLINE_SCHEDULER flag is ${DEADLINE_SCHEDULER}") +Message("CPU_Affinity flag is ${CPU_AFFINITY}") + +############################################################## +# ???!!! TO BE DOCUMENTED OPTIONS !!!??? +############################################################## +add_boolean_option(ENABLE_SECURITY True "Enable LTE integrity and ciphering between RRC UE and eNB") +add_boolean_option(ENABLE_USE_MME True "eNB connected to MME (INTERFACE S1-C), not standalone eNB") +add_boolean_option(NO_RRM True "DO WE HAVE A RADIO RESSOURCE MANAGER: NO") +add_boolean_option(RRC_DEFAULT_RAB_IS_AM False "set the RLC mode to AM for the default bearer") + +add_boolean_option(OAI_NW_DRIVER_TYPE_ETHERNET False "????") +add_boolean_option(DEADLINE_SCHEDULER True "Use the Linux scheduler SCHED_DEADLINE: kernel >= 3.14") +add_boolean_option(CPU_AFFINITY False "Enable CPU Affinity of threads (only valid without deadline scheduler). It is enabled only with >2 CPUs") +add_boolean_option(NAS_ADDRESS_FIX False "specific to oaisim: for nasmesh driver") +add_boolean_option(NAS_NETLINK False "useless ??? Must be True to compile nasmesh driver without rtai ????") +add_boolean_option(OAISIM False "specific to oaisim") +add_boolean_option(OAI_NW_DRIVER_USE_NETLINK True "????") + +add_boolean_option(USE_MME False "this flag is used only one time in lte-softmodem.c") +add_list_string_option(PACKAGE_NAME "NotDefined" "As per attribute name") +add_boolean_option(MESSAGE_CHART_GENERATOR False "For generating sequence diagrams") +add_boolean_option(MESSAGE_CHART_GENERATOR_RLC_MAC False "trace RLC-MAC exchanges in sequence diagrams") +add_boolean_option(MESSAGE_CHART_GENERATOR_PHY False "trace some PHY exchanges in sequence diagrams") + +######################## +# Include order +########################## +add_boolean_option(ENB_MODE True "Swap the include directories between openair2 and openair3" ) + +########################## +# SCHEDULING/REAL-TIME/PERF options +########################## +add_boolean_option(ENABLE_USE_CPU_EXECUTION_TIME True "Add data in vcd traces: disable it if perf issues") +add_boolean_option(ENABLE_VCD True "always true now, time measurements of proc calls and var displays") +add_boolean_option(ENABLE_VCD_FIFO True "time measurements of proc calls and var displays sent to FIFO (one more thread)") +add_boolean_option(LINUX False "used in weird memcpy() in pdcp.c ???") +add_boolean_option(LINUX_LIST False "used only in lists.c: either use OAI implementation of lists or Linux one (should be True, but it is False") +add_boolean_option(LOG_NO_THREAD True "Disable thread for log, seems always set to true") +add_boolean_option(OPENAIR_LTE True "Seems legacy: keep it to true") + +########################## +# PHY options +########################## +add_boolean_option(DRIVER2013 True "only relevant for EXMIMO") +add_boolean_option(ENABLE_NEW_MULTICAST False "specific to oaisim") +add_boolean_option(EXMIMO_IOT True "????") +add_boolean_option(LARGE_SCALE False "specific to oaisim: defines max eNB=2 and max UE=120") +add_boolean_option(LOCALIZATION False "???") +add_integer_option(MAX_NUM_CCs 1 "????") +add_boolean_option(MU_RECEIVER False "????") +add_boolean_option(PHYSIM True "for L1 simulators (dlsim, ulsim, ...)") +add_boolean_option(PHY_CONTEXT True "not clear: must remain False for dlsim") +add_boolean_option(PHY_EMUL False "not clear: must remain False for dlsim") +add_boolean_option(SMBV False "Rohde&Schwarz SMBV100A vector signal generator") +add_boolean_option(DEBUG_PHY False "Enable PHY layer debugging options") +add_boolean_option(DEBUG_PHY_PROC False "Enable debugging of PHY layer procedures") +add_boolean_option(DEBUG_DLSCH False "Enable debugging of DLSCH physical layer channel") + +########################## +# 802.21 options +########################## +add_boolean_option(ENABLE_RAL False "ENABLE 802.21 INTERFACE") +add_boolean_option(USE_3GPP_ADDR_AS_LINK_ADDR False "As per attribute name") + +########################## +# NAS LAYER OPTIONS +########################## +add_boolean_option(ENABLE_NAS_UE_LOGGING True "????") +add_boolean_option(NAS_BUILT_IN_UE True "UE NAS layer present in this executable") +add_boolean_option(NAS_UE True "NAS UE INSTANCE (<> NAS_MME)") + + +########################## +# ACCESS STRATUM LAYER2 OPTIONS +########################## +add_boolean_option(MAC_CONTEXT True "specific to oaisim") +add_boolean_option(JUMBO_FRAME True "ENABLE LARGE SDU in ACCESS STRATUM (larger than common MTU)") + +########################## +# RLC LAYER OPTIONS +########################## +add_boolean_option(OPENAIR2 True "Access Stratum layer 2 built in executable") +add_boolean_option(TRACE_RLC_PAYLOAD False "Fatal assert in this case") +add_boolean_option(RLC_STOP_ON_LOST_PDU False "Fatal assert in this case") + +add_boolean_option(TRACE_RLC_MUTEX True "TRACE for RLC, possible problem in thread scheduling") +add_boolean_option(TRACE_RLC_AM_BO False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG") +add_boolean_option(TRACE_RLC_AM_FREE_SDU False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG") +add_boolean_option(TRACE_RLC_AM_HOLE False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG") +add_boolean_option(TRACE_RLC_AM_PDU False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG") +add_boolean_option(TRACE_RLC_AM_RESEGMENT False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG") +add_boolean_option(TRACE_RLC_AM_RX False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG") +add_boolean_option(TRACE_RLC_AM_RX_DECODE False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG") +add_boolean_option(TRACE_RLC_AM_TX False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG") +add_boolean_option(TRACE_RLC_AM_TX_STATUS False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG") +add_boolean_option(TRACE_RLC_AM_STATUS_CREATION False "TRACE for RLC AM, TO BE CHANGED IN A MORE GENERAL FLAG") + +add_boolean_option(STOP_ON_IP_TRAFFIC_OVERLOAD False "") +add_boolean_option(TRACE_RLC_UM_DAR False "TRACE for RLC UM, TO BE CHANGED IN A MORE GENERAL FLAG") +add_boolean_option(TRACE_RLC_UM_DISPLAY_ASCII_DATA False "TRACE for RLC UM, TO BE CHANGED IN A MORE GENERAL FLAG") +add_boolean_option(TRACE_RLC_UM_PDU False "TRACE for RLC UM, TO BE CHANGED IN A MORE GENERAL FLAG") +add_boolean_option(TRACE_RLC_UM_RX False "TRACE for RLC UM, TO BE CHANGED IN A MORE GENERAL FLAG") +add_boolean_option(TRACE_RLC_UM_SEGMENT False "TRACE for RLC UM, TO BE CHANGED IN A MORE GENERAL FLAG") +add_boolean_option(TRACE_RLC_UM_TX_STATUS False "TRACE for RLC UM, TO BE CHANGED IN A MORE GENERAL FLAG") + + +########################## +# PDCP LAYER OPTIONS +########################## +add_boolean_option(PDCP_USE_NETLINK False "For eNB, PDCP communicate with a NETLINK socket if connected to network driver, else could use a RT-FIFO") +add_boolean_option(PDCP_USE_NETLINK_QUEUES False "When PDCP_USE_NETLINK is true, incoming IP packets are stored in queues") +add_boolean_option(LINK_ENB_PDCP_TO_IP_DRIVER False "For eNB, PDCP communicate with a IP driver") +add_boolean_option(LINK_ENB_PDCP_TO_GTPV1U True "For eNB, PDCP communicate with GTP-U protocol (eNB<->S-GW)") + +########################## +# RRC LAYER OPTIONS +########################## +add_boolean_option(RRC_DEFAULT_RAB_IS_AM False "Otherwise it is UM, configure params are actually set in rrc_eNB.c:rrc_eNB_generate_defaultRRCConnectionReconfiguration(...)") + + +########################## +# S1AP LAYER OPTIONS +########################## +# none + +# add the binary tree to the search path for include files +####################################################### +# We will find ConfigOAI.h after generation in target directory +include_directories("${OPENAIR_BIN_DIR}") +# add directories to find all include files +# the internal rule is to use generic names such as defs.h +# but to make it uniq name as adding the relative path in the include directtive +# example: #include "RRC/LITE/defs.h" +#find_path (include_dirs_all *.h ${OPENAIR_DIR}) +#find_path (include_dirs_all *.h PATHS /usr/include NO_CMAKE_PATH) +#include_directories("${include_dirs_all}") + +# Legacy exact order +if(ENB_MODE) + include_directories("${OPENAIR2_DIR}/COMMON") + include_directories("${OPENAIR2_DIR}/UTIL") + include_directories("${OPENAIR2_DIR}/UTIL/LOG") + include_directories("${OPENAIR3_DIR}/COMMON") + include_directories("${OPENAIR3_DIR}/UTILS") +else() + include_directories("${OPENAIR3_DIR}/COMMON") + include_directories("${OPENAIR3_DIR}/UTILS") + include_directories("${OPENAIR2_DIR}/COMMON") + include_directories("${OPENAIR2_DIR}/UTIL") + include_directories("${OPENAIR2_DIR}/UTIL/LOG") +endif() +include_directories("${NFAPI_DIR}/nfapi/public_inc") +include_directories("${NFAPI_DIR}/common/public_inc") +include_directories("${NFAPI_DIR}/pnf/public_inc") +include_directories("${NFAPI_DIR}/nfapi/inc") +include_directories("${NFAPI_DIR}/sim_common/inc") +include_directories("${NFAPI_DIR}/pnf_sim/inc") +include_directories("${OPENAIR1_DIR}") +include_directories("${OPENAIR2_DIR}") +include_directories("${OPENAIR2_DIR}/LAYER2/RLC") +include_directories("${OPENAIR2_DIR}/LAYER2/RLC/AM_v9.3.0") +include_directories("${OPENAIR2_DIR}/LAYER2/RLC/UM_v9.3.0") +include_directories("${OPENAIR2_DIR}/LAYER2/RLC/TM_v9.3.0") +include_directories("${OPENAIR2_DIR}/LAYER2/PDCP_v10.1.0") +include_directories("${OPENAIR2_DIR}/RRC/LITE/MESSAGES") +include_directories("${OPENAIR2_DIR}/RRC/LITE") +include_directories("${OPENAIR3_DIR}/RAL-LTE/INTERFACE-802.21/INCLUDE") +include_directories("${OPENAIR3_DIR}/RAL-LTE/LTE_RAL_ENB/INCLUDE") +include_directories("${OPENAIR3_DIR}/RAL-LTE/LTE_RAL_UE/INCLUDE") +include_directories("${OPENAIR_DIR}/common/utils") +include_directories("${OPENAIR_DIR}/common/utils/itti") +include_directories("${OPENAIR3_DIR}/NAS/COMMON") +include_directories("${OPENAIR3_DIR}/NAS/COMMON/API/NETWORK") +include_directories("${OPENAIR3_DIR}/NAS/COMMON/EMM/MSG") +include_directories("${OPENAIR3_DIR}/NAS/COMMON/ESM/MSG") +include_directories("${OPENAIR3_DIR}/NAS/COMMON/IES") +include_directories("${OPENAIR3_DIR}/NAS/COMMON/UTIL") +include_directories("${OPENAIR3_DIR}/SECU") +include_directories("${OPENAIR3_DIR}/SCTP") +include_directories("${OPENAIR3_DIR}/S1AP") +include_directories("${OPENAIR2_DIR}/X2AP") +include_directories("${OPENAIR3_DIR}/UDP") +include_directories("${OPENAIR3_DIR}/GTPV1-U") +include_directories("${OPENAIR_DIR}/targets/COMMON") +include_directories("${OPENAIR_DIR}/targets/ARCH/COMMON") +include_directories("${OPENAIR_DIR}/targets/ARCH/EXMIMO/USERSPACE/LIB/") +include_directories("${OPENAIR_DIR}/targets/ARCH/EXMIMO/DEFS") +include_directories("${OPENAIR2_DIR}/ENB_APP") +include_directories("${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/MAC") +include_directories("${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/RRC") +include_directories("${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/PDCP") +include_directories("${OPENAIR2_DIR}/UTIL/OSA") +include_directories("${OPENAIR2_DIR}/UTIL/LFDS/liblfds6.1.1/liblfds611/inc") +include_directories("${OPENAIR2_DIR}/UTIL/LFDS/liblfds7.0.0/liblfds700/inc") +include_directories("${OPENAIR2_DIR}/UTIL/MEM") +include_directories("${OPENAIR2_DIR}/UTIL/LISTS") +include_directories("${OPENAIR2_DIR}/UTIL/FIFO") +include_directories("${OPENAIR2_DIR}/UTIL/OCG") +include_directories("${OPENAIR2_DIR}/UTIL/MATH") +include_directories("${OPENAIR2_DIR}/UTIL/TIMER") +include_directories("${OPENAIR2_DIR}/UTIL/OMG") +include_directories("${OPENAIR2_DIR}/UTIL/OTG") +include_directories("${OPENAIR2_DIR}/UTIL/CLI") +include_directories("${OPENAIR2_DIR}/UTIL/OPT") +include_directories("${OPENAIR2_DIR}/UTIL/OMV") +include_directories("${OPENAIR2_DIR}/RRC/LITE/MESSAGES") +include_directories("${OPENAIR3_DIR}/GTPV1-U/nw-gtpv1u/shared") +include_directories("${OPENAIR3_DIR}/GTPV1-U/nw-gtpv1u/include") +include_directories("${OPENAIR_DIR}") + +# Utilities Library +################ +# set the version of protobuf messages, V3 not supported yet +add_list1_option(FLPT_VERSION V2 "FLPT MSG protobuf grammar version" V2 V3) + +if (${FLPT_VERSION} STREQUAL "V2") + set (FLPTDIR V2) +elseif (${FLPT_VERSION} STREQUAL "V3") + set (FLPTDIR V3) +endif(${FLPT_VERSION} STREQUAL "V2") + +set(FLPT_MSG_DIR ${OPENAIR2_DIR}/ENB_APP/MESSAGES/${FLPTDIR} ) +set(FLPT_MSG_FILES + ${FLPT_MSG_DIR}/header.proto + ${FLPT_MSG_DIR}/flexran.proto + ${FLPT_MSG_DIR}/stats_common.proto + ${FLPT_MSG_DIR}/stats_messages.proto + ${FLPT_MSG_DIR}/time_common.proto + ${FLPT_MSG_DIR}/controller_commands.proto + ${FLPT_MSG_DIR}/mac_primitives.proto + ${FLPT_MSG_DIR}/config_messages.proto + ${FLPT_MSG_DIR}/config_common.proto + ${FLPT_MSG_DIR}/control_delegation.proto + ) + +set(FLPT_C_DIR ${protobuf_generated_dir}/${FLPTDIR}) +#message("calling protoc_call=${protoc_call} FLPT_C_DIR=${FLPT_C_DIR} FLPT_MSG_FILES=${FLPT_MSG_FILES}") +execute_process(COMMAND ${protoc_call} ${FLPT_C_DIR} ${FLPT_MSG_DIR} ${FLPT_MSG_FILES}) +file(GLOB FLPT_source ${FLPT_C_DIR}/*.c) +set(FLPT_OAI_generated + ${FLPT_C_DIR}/header.pb-c.c + ${FLPT_C_DIR}/flexran.pb-c.c + ${FLPT_C_DIR}/stats_common.pb-c.c + ${FLPT_C_DIR}/stats_messages.pb-c.c + ${FLPT_C_DIR}/time_common.pb-c.c + ${FLPT_C_DIR}/controller_commands.pb-c.c + ${FLPT_C_DIR}/mac_primitives.pb-c.c + ${FLPT_C_DIR}/config_messages.pb-c.c + ${FLPT_C_DIR}/config_common.pb-c.c + ${FLPT_C_DIR}/control_delegation.pb-c.c + ) + +file(GLOB flpt_h ${FLPT_C_DIR}/*.h) +set(flpt_h ${flpt_h} ) + +add_library(FLPT_MSG + ${FLPT_OAI_generated} + ${FLPT_source} + ) +set(FLPT_MSG_LIB FLPT_MSG) +#message("prpt c dir is : ${FLPT_C_DIR}") +include_directories (${FLPT_C_DIR}) + +add_library(ASYNC_IF + ${OPENAIR2_DIR}/UTIL/ASYNC_IF/socket_link.c + ${OPENAIR2_DIR}/UTIL/ASYNC_IF/link_manager.c + ${OPENAIR2_DIR}/UTIL/ASYNC_IF/message_queue.c + ${OPENAIR2_DIR}/UTIL/ASYNC_IF/ringbuffer_queue.c + ) +set(ASYNC_IF_LIB ASYNC_IF) +include_directories(${OPENAIR2_DIR}/UTIL/ASYNC_IF) + +add_library(FLEXRAN_AGENT + ${OPENAIR2_DIR}/ENB_APP/flexran_agent_handler.c + ${OPENAIR2_DIR}/ENB_APP/flexran_agent_common.c + ${OPENAIR2_DIR}/ENB_APP/flexran_agent_ran_api.c + ${OPENAIR2_DIR}/ENB_APP/flexran_agent_timer.c + ${OPENAIR2_DIR}/ENB_APP/flexran_agent_common_internal.c + ${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/MAC/flexran_agent_mac.c + ${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/RRC/flexran_agent_rrc.c + ${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/PDCP/flexran_agent_pdcp.c + ${OPENAIR2_DIR}/ENB_APP/flexran_agent.c + ${OPENAIR2_DIR}/ENB_APP/flexran_agent_task_manager.c + ${OPENAIR2_DIR}/ENB_APP/flexran_agent_net_comm.c + ${OPENAIR2_DIR}/ENB_APP/flexran_agent_async.c + ${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/MAC/flexran_agent_mac_internal.c + ) +set(FLEXRAN_AGENT_LIB FLEXRAN_AGENT) +#include_directories(${OPENAIR2_DIR}/ENB_APP) + +set(PROTOBUF_LIB "protobuf-c") + +FIND_PATH(LIBYAML_INCLUDE_DIR NAMES yaml.h) +FIND_LIBRARY(LIBYAML_LIBRARIES NAMES yaml libyaml) + +INCLUDE(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(Yaml DEFAULT_MSG LIBYAML_LIBRARIES LIBYAML_INCLUDE_DIR) +MARK_AS_ADVANCED(LIBYAML_INCLUDE_DIR LIBYAML_LIBRARIES) + +#set(PROTOBUF_LIB "protobuf") #for Cpp + + +add_library(HASHTABLE + ${OPENAIR_DIR}/common/utils/hashtable/hashtable.c + ${OPENAIR_DIR}/common/utils/hashtable/obj_hashtable.c +) +include_directories(${OPENAIR_DIR}/common/utils/hashtable) + +if (MESSAGE_CHART_GENERATOR) + add_library(MSC + ${OPENAIR_DIR}/common/utils/msc/msc.c + ) + set(MSC_LIB MSC) +endif() +include_directories(${OPENAIR_DIR}/common/utils/msc) + +set(UTIL_SRC + ${OPENAIR2_DIR}/UTIL/CLI/cli.c + ${OPENAIR2_DIR}/UTIL/CLI/cli_cmd.c + ${OPENAIR2_DIR}/UTIL/CLI/cli_server.c + ${OPENAIR2_DIR}/UTIL/FIFO/pad_list.c + ${OPENAIR2_DIR}/UTIL/LISTS/list.c + ${OPENAIR2_DIR}/UTIL/LISTS/list2.c + ${OPENAIR2_DIR}/UTIL/LOG/log.c + ${OPENAIR2_DIR}/UTIL/LOG/vcd_signal_dumper.c + ${OPENAIR2_DIR}/UTIL/MATH/oml.c + ${OPENAIR2_DIR}/UTIL/MEM/mem_block.c + ${OPENAIR2_DIR}/UTIL/OCG/OCG.c + ${OPENAIR2_DIR}/UTIL/OCG/OCG_create_dir.c + ${OPENAIR2_DIR}/UTIL/OCG/OCG_detect_file.c + ${OPENAIR2_DIR}/UTIL/OCG/OCG_generate_report.c + ${OPENAIR2_DIR}/UTIL/OCG/OCG_parse_filename.c + ${OPENAIR2_DIR}/UTIL/OCG/OCG_parse_XML.c + ${OPENAIR2_DIR}/UTIL/OCG/OCG_save_XML.c + ${OPENAIR2_DIR}/UTIL/OMG/common.c + ${OPENAIR2_DIR}/UTIL/OMG/grid.c + ${OPENAIR2_DIR}/UTIL/OMG/job.c + ${OPENAIR2_DIR}/UTIL/OMG/mobility_parser.c + ${OPENAIR2_DIR}/UTIL/OMG/omg.c + #${OPENAIR2_DIR}/UTIL/OMG/omg_hashtable.c + ${OPENAIR2_DIR}/UTIL/OMG/rwalk.c + ${OPENAIR2_DIR}/UTIL/OMG/rwp.c + ${OPENAIR2_DIR}/UTIL/OMG/static.c + ${OPENAIR2_DIR}/UTIL/OMG/steadystaterwp.c + ${OPENAIR2_DIR}/UTIL/OMG/trace.c + ${OPENAIR2_DIR}/UTIL/OMG/trace_hashtable.c + ${OPENAIR2_DIR}/UTIL/OPT/probe.c + ${OPENAIR2_DIR}/UTIL/OTG/otg_tx.c + ${OPENAIR2_DIR}/UTIL/OTG/otg.c + ${OPENAIR2_DIR}/UTIL/OTG/otg_kpi.c + ${OPENAIR2_DIR}/UTIL/OTG/otg_models.c + ${OPENAIR2_DIR}/UTIL/OTG/otg_form.c + ${OPENAIR2_DIR}/UTIL/OTG/otg_rx.c + ) +add_library(UTIL ${UTIL_SRC}) + +#set(OMG_SUMO_SRC +# ${OPENAIR2_DIR}/UTIL/OMG/client_traci_OMG.c +# ${OPENAIR2_DIR}/UTIL/OMG/id_manager.c +# ${OPENAIR2_DIR}/UTIL/OMG/sumo.c +# ${OPENAIR2_DIR}/UTIL/OMG/socket_traci_OMG.c +# ${OPENAIR2_DIR}/UTIL/OMG/storage_traci_OMG.c +# ) +#add_library(OMG_SUMO ${OMG_SUMO_SRC}) + +set(SECU_OSA_SRC + ${OPENAIR2_DIR}/UTIL/OSA/osa_key_deriver.c + ${OPENAIR2_DIR}/UTIL/OSA/osa_rijndael.c + ${OPENAIR2_DIR}/UTIL/OSA/osa_snow3g.c + ${OPENAIR2_DIR}/UTIL/OSA/osa_stream_eea.c + ${OPENAIR2_DIR}/UTIL/OSA/osa_stream_eia.c + ) +add_library(SECU_OSA ${SECU_OSA_SRC}) + +set(SECU_CN_SRC + ${OPENAIR3_DIR}/SECU/kdf.c + ${OPENAIR3_DIR}/SECU/rijndael.c + ${OPENAIR3_DIR}/SECU/snow3g.c + ${OPENAIR3_DIR}/SECU/key_nas_deriver.c + ${OPENAIR3_DIR}/SECU/nas_stream_eea1.c + ${OPENAIR3_DIR}/SECU/nas_stream_eia1.c + ${OPENAIR3_DIR}/SECU/nas_stream_eea2.c + ${OPENAIR3_DIR}/SECU/nas_stream_eia2.c + ) +add_library(SECU_CN ${SECU_CN_SRC}) + +# Scheduler +################################" +set(SCHED_SRC + ${OPENAIR1_DIR}/SCHED/fapi_l1.c + ${OPENAIR1_DIR}/SCHED/phy_procedures_lte_eNb.c + ${OPENAIR1_DIR}/SCHED/phy_procedures_lte_ue.c + ${OPENAIR1_DIR}/SCHED/phy_procedures_lte_common.c + ${OPENAIR1_DIR}/SCHED/prach_procedures.c + ${OPENAIR1_DIR}/SCHED/ru_procedures.c +# ${OPENAIR1_DIR}/SCHED/phy_mac_stub.c + ${OPENAIR1_DIR}/SCHED/pucch_pc.c + ${OPENAIR1_DIR}/SCHED/pusch_pc.c + ${OPENAIR1_DIR}/SCHED/srs_pc.c +) +add_library(SCHED_LIB ${SCHED_SRC}) + +set(SCHED_SRC_UE + ${OPENAIR1_DIR}/SCHED/phy_procedures_lte_ue.c + ${OPENAIR1_DIR}/SCHED/phy_procedures_lte_common.c + ${OPENAIR1_DIR}/SCHED/ru_procedures.c + ${OPENAIR1_DIR}/SCHED/prach_procedures.c + ${OPENAIR1_DIR}/SCHED/pucch_pc.c + ${OPENAIR1_DIR}/SCHED/pusch_pc.c + ${OPENAIR1_DIR}/SCHED/srs_pc.c +) +add_library(SCHED_UE_LIB ${SCHED_SRC_UE}) + +# nFAPI +################################# +set(NFAPI_COMMON_SRC + ${NFAPI_DIR}/common/src/debug.c +) +add_library(NFAPI_COMMON_LIB ${NFAPI_COMMON_SRC}) + +include_directories(${NFAPI_DIR}/common/public_inc) + +set(NFAPI_SRC + ${NFAPI_DIR}/nfapi/src/nfapi.c + ${NFAPI_DIR}/nfapi/src/nfapi_p4.c + ${NFAPI_DIR}/nfapi/src/nfapi_p5.c + ${NFAPI_DIR}/nfapi/src/nfapi_p7.c +) +add_library(NFAPI_LIB ${NFAPI_SRC}) + +include_directories(${NFAPI_DIR}/nfapi/public_inc) +include_directories(${NFAPI_DIR}/nfapi/inc) + +set(NFAPI_PNF_SRC + ${NFAPI_DIR}/pnf/src/pnf.c + ${NFAPI_DIR}/pnf/src/pnf_interface.c + ${NFAPI_DIR}/pnf/src/pnf_p7.c + ${NFAPI_DIR}/pnf/src/pnf_p7_interface.c +) +add_library(NFAPI_PNF_LIB ${NFAPI_PNF_SRC}) + +include_directories(${NFAPI_DIR}/pnf/public_inc) +include_directories(${NFAPI_DIR}/pnf/inc) + +set(NFAPI_VNF_SRC + ${NFAPI_DIR}/vnf/src/vnf.c + ${NFAPI_DIR}/vnf/src/vnf_interface.c + ${NFAPI_DIR}/vnf/src/vnf_p7.c + ${NFAPI_DIR}/vnf/src/vnf_p7_interface.c +) +add_library(NFAPI_VNF_LIB ${NFAPI_VNF_SRC}) + +include_directories(${NFAPI_DIR}/vnf/public_inc) +include_directories(${NFAPI_DIR}/vnf/inc) + +# nFAPI user defined code +############################# +set(NFAPI_USER_SRC + ${NFAPI_USER_DIR}/nfapi.c + ${NFAPI_USER_DIR}/nfapi_pnf.c + ${NFAPI_USER_DIR}/nfapi_vnf.c +) +add_library(NFAPI_USER_LIB ${NFAPI_USER_SRC}) +include_directories(${NFAPI_USER_DIR}) + +# Layer 1 +############################# +set(PHY_TURBOSRC + ${OPENAIR1_DIR}/PHY/CODING/3gpplte_sse.c + ${OPENAIR1_DIR}/PHY/CODING/3gpplte.c + ${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c + ${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c + ${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_avx2_16bit.c + ${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder.c +) +set(PHY_TURBOIF + ${OPENAIR1_DIR}/PHY/CODING/coding_load.c +) + +add_library(coding MODULE ${PHY_TURBOSRC} ) +set(PHY_SRC + # depend on code generation from asn1c + ${RRC_FULL_DIR}/asn1_constants.h + # actual source + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pss.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/sss.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pilots.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pilots_mbsfn.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_coding.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_modulation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_demodulation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_llr_computation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/power_control.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_decoding.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_scrambling.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dci_tools.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/uci_tools.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/lte_mcs.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pbch.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dci.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/edci.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/phich.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pcfich.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pucch.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/prach.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pmch.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pch.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/group_hopping.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/srs_modulation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/drs_modulation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_modulation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_demodulation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_coding.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_decoding.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/rar_tools.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/print_stats.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/initial_sync.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/if4_tools.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/if5_tools.c + ${OPENAIR1_DIR}/PHY/MODULATION/ofdm_mod.c + ${OPENAIR1_DIR}/PHY/MODULATION/slot_fep.c + ${OPENAIR1_DIR}/PHY/MODULATION/slot_fep_mbsfn.c + ${OPENAIR1_DIR}/PHY/MODULATION/slot_fep_ul.c + ${OPENAIR1_DIR}/PHY/MODULATION/ul_7_5_kHz.c + ${OPENAIR1_DIR}/PHY/MODULATION/beamforming.c + ${OPENAIR1_DIR}/PHY/MODULATION/compute_bf_weights.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/freq_equalization.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_sync_time.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_sync_timefreq.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_adjust_sync.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_dl_channel_estimation.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_dl_bf_channel_estimation.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_dl_mbsfn_channel_estimation.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_est_freq_offset.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_ue_measurements.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_eNB_measurements.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/adjust_gain.c + ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_dl_cell_spec.c + ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_dl_uespec.c + ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_gold.c + ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_gold_mbsfn.c + ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_dl_mbsfn.c + ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_ul_ref.c + ${OPENAIR1_DIR}/PHY/CODING/lte_segmentation.c + ${OPENAIR1_DIR}/PHY/CODING/ccoding_byte.c + ${OPENAIR1_DIR}/PHY/CODING/ccoding_byte_lte.c + ${OPENAIR1_DIR}/PHY/CODING/crc_byte.c + ${PHY_TURBOIF} + ${OPENAIR1_DIR}/PHY/CODING/lte_rate_matching.c + ${OPENAIR1_DIR}/PHY/CODING/viterbi.c + ${OPENAIR1_DIR}/PHY/CODING/viterbi_lte.c + ${OPENAIR1_DIR}/PHY/INIT/lte_init.c + ${OPENAIR1_DIR}/PHY/INIT/lte_init_ru.c + ${OPENAIR1_DIR}/PHY/INIT/lte_init_ue.c + ${OPENAIR1_DIR}/PHY/INIT/init_top.c + ${OPENAIR1_DIR}/PHY/INIT/lte_parms.c + ${OPENAIR1_DIR}/PHY/INIT/lte_param_init.c + ${OPENAIR1_DIR}/PHY/TOOLS/file_output.c + ${OPENAIR1_DIR}/PHY/TOOLS/cadd_vv.c + ${OPENAIR1_DIR}/PHY/TOOLS/lte_dfts.c + ${OPENAIR1_DIR}/PHY/TOOLS/log2_approx.c + ${OPENAIR1_DIR}/PHY/TOOLS/cmult_sv.c + ${OPENAIR1_DIR}/PHY/TOOLS/cmult_vv.c + ${OPENAIR1_DIR}/PHY/TOOLS/cdot_prod.c + ${OPENAIR1_DIR}/PHY/TOOLS/signal_energy.c + ${OPENAIR1_DIR}/PHY/TOOLS/dB_routines.c + ${OPENAIR1_DIR}/PHY/TOOLS/sqrt.c + ${OPENAIR1_DIR}/PHY/TOOLS/time_meas.c + ${OPENAIR1_DIR}/PHY/TOOLS/lut.c + ) + +set(PHY_SRC_UE + # depend on code generation from asn1c + ${RRC_FULL_DIR}/asn1_constants.h + # actual source + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pss.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/sss.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pilots.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pilots_mbsfn.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_coding.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_modulation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_demodulation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_llr_computation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/power_control.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_decoding.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_scrambling.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dci_tools.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/uci_tools.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/lte_mcs.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pbch.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dci.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/edci.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/phich.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pcfich.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pucch.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/prach.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pmch.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/pch.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/group_hopping.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/srs_modulation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/drs_modulation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_modulation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_demodulation.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_coding.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/ulsch_decoding.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/rar_tools.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/print_stats.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/initial_sync.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/if4_tools.c + ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/if5_tools.c + ${OPENAIR1_DIR}/PHY/MODULATION/ofdm_mod.c + ${OPENAIR1_DIR}/PHY/MODULATION/slot_fep.c + ${OPENAIR1_DIR}/PHY/MODULATION/slot_fep_mbsfn.c + ${OPENAIR1_DIR}/PHY/MODULATION/slot_fep_ul.c + ${OPENAIR1_DIR}/PHY/MODULATION/ul_7_5_kHz.c + ${OPENAIR1_DIR}/PHY/MODULATION/beamforming.c + ${OPENAIR1_DIR}/PHY/MODULATION/compute_bf_weights.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/freq_equalization.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_sync_time.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_sync_timefreq.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_adjust_sync.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_dl_channel_estimation.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_dl_bf_channel_estimation.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_dl_mbsfn_channel_estimation.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_est_freq_offset.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_ue_measurements.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/lte_eNB_measurements.c + ${OPENAIR1_DIR}/PHY/LTE_ESTIMATION/adjust_gain.c + ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_dl_cell_spec.c + ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_dl_uespec.c + ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_gold.c + ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_gold_mbsfn.c + ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_dl_mbsfn.c + ${OPENAIR1_DIR}/PHY/LTE_REFSIG/lte_ul_ref.c + ${OPENAIR1_DIR}/PHY/CODING/lte_segmentation.c + ${OPENAIR1_DIR}/PHY/CODING/ccoding_byte.c + ${OPENAIR1_DIR}/PHY/CODING/ccoding_byte_lte.c + ${OPENAIR1_DIR}/PHY/CODING/3gpplte_sse.c + ${OPENAIR1_DIR}/PHY/CODING/crc_byte.c + ${PHY_TURBOIF} + ${OPENAIR1_DIR}/PHY/CODING/lte_rate_matching.c + ${OPENAIR1_DIR}/PHY/CODING/viterbi.c + ${OPENAIR1_DIR}/PHY/CODING/viterbi_lte.c + ${OPENAIR1_DIR}/PHY/INIT/lte_init_ru.c + ${OPENAIR1_DIR}/PHY/INIT/lte_init_ue.c + ${OPENAIR1_DIR}/PHY/INIT/init_top.c + ${OPENAIR1_DIR}/PHY/INIT/lte_parms.c + ${OPENAIR1_DIR}/PHY/INIT/lte_param_init.c + ${OPENAIR1_DIR}/PHY/TOOLS/file_output.c + ${OPENAIR1_DIR}/PHY/TOOLS/cadd_vv.c + ${OPENAIR1_DIR}/PHY/TOOLS/lte_dfts.c + ${OPENAIR1_DIR}/PHY/TOOLS/log2_approx.c + ${OPENAIR1_DIR}/PHY/TOOLS/cmult_sv.c + ${OPENAIR1_DIR}/PHY/TOOLS/cmult_vv.c + ${OPENAIR1_DIR}/PHY/TOOLS/cdot_prod.c + ${OPENAIR1_DIR}/PHY/TOOLS/signal_energy.c + ${OPENAIR1_DIR}/PHY/TOOLS/dB_routines.c + ${OPENAIR1_DIR}/PHY/TOOLS/sqrt.c + ${OPENAIR1_DIR}/PHY/TOOLS/time_meas.c + ${OPENAIR1_DIR}/PHY/TOOLS/lut.c + ) + +if (${SMBV}) + set(PHY_SRC "${PHY_SRC} ${OPENAIR1_DIR}/PHY/TOOLS/smbv.c") +endif (${SMBV}) + +if (${COMPILATION_AVX2} STREQUAL "True") + set(PHY_SRC ${PHY_SRC} ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c) + set(PHY_SRC_UE ${PHY_SRC_UE} ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c) +endif () + +add_library(PHY ${PHY_SRC}) +add_library(PHY_UE ${PHY_SRC_UE}) + +#Layer 2 library +##################### +set(MAC_DIR ${OPENAIR2_DIR}/LAYER2/MAC) +set(PHY_INTERFACE_DIR ${OPENAIR2_DIR}/PHY_INTERFACE) +set(RLC_DIR ${OPENAIR2_DIR}/LAYER2/RLC) +set(RLC_UM_DIR ${OPENAIR2_DIR}/LAYER2/RLC/UM_v9.3.0) +set(RLC_AM_DIR ${OPENAIR2_DIR}/LAYER2/RLC/AM_v9.3.0) +set(RLC_TM_DIR ${OPENAIR2_DIR}/LAYER2/RLC/TM_v9.3.0) +set(RRC_DIR ${OPENAIR2_DIR}/RRC/LITE) +set(PDCP_DIR ${OPENAIR2_DIR}/LAYER2/PDCP_v10.1.0) +set(L2_SRC + ${OPENAIR2_DIR}/LAYER2/openair2_proc.c + ${PDCP_DIR}/pdcp.c + ${PDCP_DIR}/pdcp_fifo.c + ${PDCP_DIR}/pdcp_sequence_manager.c + ${PDCP_DIR}/pdcp_primitives.c + ${PDCP_DIR}/pdcp_util.c + ${PDCP_DIR}/pdcp_security.c + ${PDCP_DIR}/pdcp_netlink.c + ${RLC_AM_DIR}/rlc_am.c + ${RLC_AM_DIR}/rlc_am_init.c + ${RLC_AM_DIR}/rlc_am_timer_poll_retransmit.c + ${RLC_AM_DIR}/rlc_am_timer_reordering.c + ${RLC_AM_DIR}/rlc_am_timer_status_prohibit.c + ${RLC_AM_DIR}/rlc_am_segment.c + ${RLC_AM_DIR}/rlc_am_segments_holes.c + ${RLC_AM_DIR}/rlc_am_in_sdu.c + ${RLC_AM_DIR}/rlc_am_receiver.c + ${RLC_AM_DIR}/rlc_am_retransmit.c + ${RLC_AM_DIR}/rlc_am_windows.c + ${RLC_AM_DIR}/rlc_am_rx_list.c + ${RLC_AM_DIR}/rlc_am_reassembly.c + ${RLC_AM_DIR}/rlc_am_status_report.c + ${RLC_TM_DIR}/rlc_tm.c + ${RLC_TM_DIR}/rlc_tm_init.c + ${RLC_UM_DIR}/rlc_um.c + ${RLC_UM_DIR}/rlc_um_fsm.c + ${RLC_UM_DIR}/rlc_um_control_primitives.c + ${RLC_UM_DIR}/rlc_um_segment.c + ${RLC_UM_DIR}/rlc_um_reassembly.c + ${RLC_UM_DIR}/rlc_um_receiver.c + ${RLC_UM_DIR}/rlc_um_dar.c + ${RLC_DIR}/rlc_mac.c + ${RLC_DIR}/rlc.c + ${RLC_DIR}/rlc_rrc.c + ${RLC_DIR}/rlc_mpls.c + ${RRC_DIR}/rrc_UE.c + ${RRC_DIR}/rrc_eNB.c + ${RRC_DIR}/rrc_eNB_S1AP.c + ${RRC_DIR}/rrc_eNB_UE_context.c + ${RRC_DIR}/rrc_common.c + ${RRC_DIR}/L2_interface.c + ${RRC_DIR}/L2_interface_common.c + ${RRC_DIR}/L2_interface_ue.c + ) + +set(L2_SRC_UE + ${PDCP_DIR}/pdcp.c + ${PDCP_DIR}/pdcp_fifo.c + ${PDCP_DIR}/pdcp_sequence_manager.c + ${PDCP_DIR}/pdcp_primitives.c + ${PDCP_DIR}/pdcp_util.c + ${PDCP_DIR}/pdcp_security.c + ${PDCP_DIR}/pdcp_netlink.c + ${RLC_AM_DIR}/rlc_am.c + ${RLC_AM_DIR}/rlc_am_init.c + ${RLC_AM_DIR}/rlc_am_timer_poll_retransmit.c + ${RLC_AM_DIR}/rlc_am_timer_reordering.c + ${RLC_AM_DIR}/rlc_am_timer_status_prohibit.c + ${RLC_AM_DIR}/rlc_am_segment.c + ${RLC_AM_DIR}/rlc_am_segments_holes.c + ${RLC_AM_DIR}/rlc_am_in_sdu.c + ${RLC_AM_DIR}/rlc_am_receiver.c + ${RLC_AM_DIR}/rlc_am_retransmit.c + ${RLC_AM_DIR}/rlc_am_windows.c + ${RLC_AM_DIR}/rlc_am_rx_list.c + ${RLC_AM_DIR}/rlc_am_reassembly.c + ${RLC_AM_DIR}/rlc_am_status_report.c + ${RLC_TM_DIR}/rlc_tm.c + ${RLC_TM_DIR}/rlc_tm_init.c + ${RLC_UM_DIR}/rlc_um.c + ${RLC_UM_DIR}/rlc_um_fsm.c + ${RLC_UM_DIR}/rlc_um_control_primitives.c + ${RLC_UM_DIR}/rlc_um_segment.c + ${RLC_UM_DIR}/rlc_um_reassembly.c + ${RLC_UM_DIR}/rlc_um_receiver.c + ${RLC_UM_DIR}/rlc_um_dar.c + ${RLC_DIR}/rlc_mac.c + ${RLC_DIR}/rlc.c + ${RLC_DIR}/rlc_rrc.c + ${RLC_DIR}/rlc_mpls.c + ${RRC_DIR}/rrc_UE.c + ${RRC_DIR}/rrc_common.c + ${RRC_DIR}/L2_interface_common.c + ${RRC_DIR}/L2_interface_ue.c + ) + +set (MAC_SRC + ${PHY_INTERFACE_DIR}/IF_Module.c + ${MAC_DIR}/main.c + ${MAC_DIR}/main_ue.c + ${MAC_DIR}/ue_procedures.c + ${MAC_DIR}/ra_procedures.c + ${MAC_DIR}/l1_helpers.c + ${MAC_DIR}/rar_tools.c + ${MAC_DIR}/rar_tools_ue.c + ${MAC_DIR}/eNB_scheduler.c + ${MAC_DIR}/eNB_scheduler_dlsch.c + ${MAC_DIR}/eNB_scheduler_ulsch.c + ${MAC_DIR}/eNB_scheduler_mch.c + ${MAC_DIR}/eNB_scheduler_bch.c + ${MAC_DIR}/eNB_scheduler_primitives.c + ${MAC_DIR}/eNB_scheduler_RA.c + ${MAC_DIR}/pre_processor.c + ${MAC_DIR}/config.c + ${MAC_DIR}/config_ue.c + ) + +set (MAC_SRC_UE + ${MAC_DIR}/main_ue.c + ${MAC_DIR}/ue_procedures.c + ${MAC_DIR}/ra_procedures.c + ${MAC_DIR}/l1_helpers.c + ${MAC_DIR}/rar_tools_ue.c + ${MAC_DIR}/config_ue.c + ) + +set (ENB_APP_SRC + ${OPENAIR2_DIR}/ENB_APP/enb_app.c + ${OPENAIR2_DIR}/ENB_APP/enb_config.c + ${OPENAIR2_DIR}/ENB_APP/RRC_config_tools.c + ) + +add_library(L2 + ${L2_SRC} + ${MAC_SRC} + ${ENB_APP_SRC}) +# ${OPENAIR2_DIR}/RRC/L2_INTERFACE/openair_rrc_L2_interface.c) + +add_library(L2_UE + ${L2_SRC_UE} + ${MAC_SRC_UE} +) + + +include_directories(${NFAPI_USER_DIR}) + +# L3 Libs +########################## + +set(RAL_LTE_DIR ${OPENAIR3_DIR}/RAL-LTE/) +if (${ENABLE_RAL}) + set(RAL_LTE_SRC + ${RRC_DIR}/rrc_UE_ral.c + ${RRC_DIR}/rrc_eNB_ral.c + ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_action.c + ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_main.c + ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_mih_msg.c + ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_parameters.c + ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_process.c + ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_rrc_msg.c + ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_subscribe.c + ${RAL_LTE_DIR}LTE_RAL_ENB/SRC/lteRALenb_thresholds.c + ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_action.c + ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_main.c + ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_mih_msg.c + ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_parameters.c + ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_process.c + ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_rrc_msg.c + ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_subscribe.c + ${RAL_LTE_DIR}LTE_RAL_UE/SRC/lteRALue_thresholds.c + ) + add_library(RAL ${RAL_LTE_SRC}) + set(RAL_LIB RAL) +endif() + +# CN libs +########################## + +add_library(CN_UTILS + ${OPENAIR3_DIR}/UTILS/conversions.c + ${OPENAIR3_DIR}/UTILS/enum_string.c + ${OPENAIR3_DIR}/UTILS/log.c + ${OPENAIR3_DIR}/UTILS/mcc_mnc_itu.c + ) + +set(GTPV1U_DIR ${OPENAIR3_DIR}/GTPV1-U) +set (GTPV1U_SRC + ${RRC_DIR}/rrc_eNB_GTPV1U.c + ${GTPV1U_DIR}/nw-gtpv1u/src/NwGtpv1uTunnelEndPoint.c + ${GTPV1U_DIR}/nw-gtpv1u/src/NwGtpv1uTrxn.c + ${GTPV1U_DIR}/nw-gtpv1u/src/NwGtpv1uMsg.c + ${GTPV1U_DIR}/nw-gtpv1u/src/NwGtpv1u.c + ${GTPV1U_DIR}/gtpv1u_teid_pool.c +) +add_library(GTPV1U ${GTPV1U_SRC}) + +set(SCTP_SRC + ${OPENAIR3_DIR}/SCTP/sctp_common.c + ${OPENAIR3_DIR}/SCTP/sctp_eNB_task.c + ${OPENAIR3_DIR}/SCTP/sctp_eNB_itti_messaging.c +) +add_library(SCTP_CLIENT ${SCTP_SRC}) + +add_library(UDP ${OPENAIR3_DIR}/UDP/udp_eNB_task.c) + + +set(NAS_SRC ${OPENAIR3_DIR}/NAS/) +set(libnas_api_OBJS + ${NAS_SRC}COMMON/API/NETWORK/as_message.c + ${NAS_SRC}COMMON/API/NETWORK/nas_message.c + ${NAS_SRC}COMMON/API/NETWORK/network_api.c + ) + +set(libnas_emm_msg_OBJS + ${NAS_SRC}COMMON/EMM/MSG/AttachAccept.c + ${NAS_SRC}COMMON/EMM/MSG/AttachComplete.c + ${NAS_SRC}COMMON/EMM/MSG/AttachReject.c + ${NAS_SRC}COMMON/EMM/MSG/AttachRequest.c + ${NAS_SRC}COMMON/EMM/MSG/AuthenticationFailure.c + ${NAS_SRC}COMMON/EMM/MSG/AuthenticationReject.c + ${NAS_SRC}COMMON/EMM/MSG/AuthenticationRequest.c + ${NAS_SRC}COMMON/EMM/MSG/AuthenticationResponse.c + ${NAS_SRC}COMMON/EMM/MSG/CsServiceNotification.c + ${NAS_SRC}COMMON/EMM/MSG/DetachAccept.c + ${NAS_SRC}COMMON/EMM/MSG/DetachRequest.c + ${NAS_SRC}COMMON/EMM/MSG/DownlinkNasTransport.c + ${NAS_SRC}COMMON/EMM/MSG/EmmInformation.c + ${NAS_SRC}COMMON/EMM/MSG/emm_msg.c + ${NAS_SRC}COMMON/EMM/MSG/EmmStatus.c + ${NAS_SRC}COMMON/EMM/MSG/ExtendedServiceRequest.c + ${NAS_SRC}COMMON/EMM/MSG/GutiReallocationCommand.c + ${NAS_SRC}COMMON/EMM/MSG/GutiReallocationComplete.c + ${NAS_SRC}COMMON/EMM/MSG/IdentityRequest.c + ${NAS_SRC}COMMON/EMM/MSG/IdentityResponse.c + ${NAS_SRC}COMMON/EMM/MSG/SecurityModeCommand.c + ${NAS_SRC}COMMON/EMM/MSG/SecurityModeComplete.c + ${NAS_SRC}COMMON/EMM/MSG/SecurityModeReject.c + ${NAS_SRC}COMMON/EMM/MSG/ServiceReject.c + ${NAS_SRC}COMMON/EMM/MSG/ServiceRequest.c + ${NAS_SRC}COMMON/EMM/MSG/TrackingAreaUpdateAccept.c + ${NAS_SRC}COMMON/EMM/MSG/TrackingAreaUpdateComplete.c + ${NAS_SRC}COMMON/EMM/MSG/TrackingAreaUpdateReject.c + ${NAS_SRC}COMMON/EMM/MSG/TrackingAreaUpdateRequest.c + ${NAS_SRC}COMMON/EMM/MSG/UplinkNasTransport.c +) + +set(libnas_esm_msg_OBJS + ${NAS_SRC}COMMON/ESM/MSG/ActivateDedicatedEpsBearerContextAccept.c + ${NAS_SRC}COMMON/ESM/MSG/ActivateDedicatedEpsBearerContextReject.c + ${NAS_SRC}COMMON/ESM/MSG/ActivateDedicatedEpsBearerContextRequest.c + ${NAS_SRC}COMMON/ESM/MSG/ActivateDefaultEpsBearerContextAccept.c + ${NAS_SRC}COMMON/ESM/MSG/ActivateDefaultEpsBearerContextReject.c + ${NAS_SRC}COMMON/ESM/MSG/ActivateDefaultEpsBearerContextRequest.c + ${NAS_SRC}COMMON/ESM/MSG/BearerResourceAllocationReject.c + ${NAS_SRC}COMMON/ESM/MSG/BearerResourceAllocationRequest.c + ${NAS_SRC}COMMON/ESM/MSG/BearerResourceModificationReject.c + ${NAS_SRC}COMMON/ESM/MSG/BearerResourceModificationRequest.c + ${NAS_SRC}COMMON/ESM/MSG/DeactivateEpsBearerContextAccept.c + ${NAS_SRC}COMMON/ESM/MSG/DeactivateEpsBearerContextRequest.c + ${NAS_SRC}COMMON/ESM/MSG/EsmInformationRequest.c + ${NAS_SRC}COMMON/ESM/MSG/EsmInformationResponse.c + ${NAS_SRC}COMMON/ESM/MSG/esm_msg.c + ${NAS_SRC}COMMON/ESM/MSG/EsmStatus.c + ${NAS_SRC}COMMON/ESM/MSG/ModifyEpsBearerContextAccept.c + ${NAS_SRC}COMMON/ESM/MSG/ModifyEpsBearerContextReject.c + ${NAS_SRC}COMMON/ESM/MSG/ModifyEpsBearerContextRequest.c + ${NAS_SRC}COMMON/ESM/MSG/PdnConnectivityReject.c + ${NAS_SRC}COMMON/ESM/MSG/PdnConnectivityRequest.c + ${NAS_SRC}COMMON/ESM/MSG/PdnDisconnectReject.c + ${NAS_SRC}COMMON/ESM/MSG/PdnDisconnectRequest.c +) + +set(libnas_ies_OBJS + ${NAS_SRC}COMMON/IES/AccessPointName.c + ${NAS_SRC}COMMON/IES/AdditionalUpdateResult.c + ${NAS_SRC}COMMON/IES/AdditionalUpdateType.c + ${NAS_SRC}COMMON/IES/ApnAggregateMaximumBitRate.c + ${NAS_SRC}COMMON/IES/AuthenticationFailureParameter.c + ${NAS_SRC}COMMON/IES/AuthenticationParameterAutn.c + ${NAS_SRC}COMMON/IES/AuthenticationParameterRand.c + ${NAS_SRC}COMMON/IES/AuthenticationResponseParameter.c + ${NAS_SRC}COMMON/IES/CipheringKeySequenceNumber.c + ${NAS_SRC}COMMON/IES/Cli.c + ${NAS_SRC}COMMON/IES/CsfbResponse.c + ${NAS_SRC}COMMON/IES/DaylightSavingTime.c + ${NAS_SRC}COMMON/IES/DetachType.c + ${NAS_SRC}COMMON/IES/DrxParameter.c + ${NAS_SRC}COMMON/IES/EmergencyNumberList.c + ${NAS_SRC}COMMON/IES/EmmCause.c + ${NAS_SRC}COMMON/IES/EpsAttachResult.c + ${NAS_SRC}COMMON/IES/EpsAttachType.c + ${NAS_SRC}COMMON/IES/EpsBearerContextStatus.c + ${NAS_SRC}COMMON/IES/EpsBearerIdentity.c + ${NAS_SRC}COMMON/IES/EpsMobileIdentity.c + ${NAS_SRC}COMMON/IES/EpsNetworkFeatureSupport.c + ${NAS_SRC}COMMON/IES/EpsQualityOfService.c + ${NAS_SRC}COMMON/IES/EpsUpdateResult.c + ${NAS_SRC}COMMON/IES/EpsUpdateType.c + ${NAS_SRC}COMMON/IES/EsmCause.c + ${NAS_SRC}COMMON/IES/EsmInformationTransferFlag.c + ${NAS_SRC}COMMON/IES/EsmMessageContainer.c + ${NAS_SRC}COMMON/IES/GprsTimer.c + ${NAS_SRC}COMMON/IES/GutiType.c + ${NAS_SRC}COMMON/IES/IdentityType2.c + ${NAS_SRC}COMMON/IES/ImeisvRequest.c + ${NAS_SRC}COMMON/IES/KsiAndSequenceNumber.c + ${NAS_SRC}COMMON/IES/LcsClientIdentity.c + ${NAS_SRC}COMMON/IES/LcsIndicator.c + ${NAS_SRC}COMMON/IES/LinkedEpsBearerIdentity.c + ${NAS_SRC}COMMON/IES/LlcServiceAccessPointIdentifier.c + ${NAS_SRC}COMMON/IES/LocationAreaIdentification.c + ${NAS_SRC}COMMON/IES/MessageType.c + ${NAS_SRC}COMMON/IES/MobileIdentity.c + ${NAS_SRC}COMMON/IES/MobileStationClassmark2.c + ${NAS_SRC}COMMON/IES/MobileStationClassmark3.c + ${NAS_SRC}COMMON/IES/MsNetworkCapability.c + ${NAS_SRC}COMMON/IES/MsNetworkFeatureSupport.c + ${NAS_SRC}COMMON/IES/NasKeySetIdentifier.c + ${NAS_SRC}COMMON/IES/NasMessageContainer.c + ${NAS_SRC}COMMON/IES/NasRequestType.c + ${NAS_SRC}COMMON/IES/NasSecurityAlgorithms.c + ${NAS_SRC}COMMON/IES/NetworkName.c + ${NAS_SRC}COMMON/IES/Nonce.c + ${NAS_SRC}COMMON/IES/PacketFlowIdentifier.c + ${NAS_SRC}COMMON/IES/PagingIdentity.c + ${NAS_SRC}COMMON/IES/PdnAddress.c + ${NAS_SRC}COMMON/IES/PdnType.c + ${NAS_SRC}COMMON/IES/PlmnList.c + ${NAS_SRC}COMMON/IES/ProcedureTransactionIdentity.c + ${NAS_SRC}COMMON/IES/ProtocolConfigurationOptions.c + ${NAS_SRC}COMMON/IES/ProtocolDiscriminator.c + ${NAS_SRC}COMMON/IES/PTmsiSignature.c + ${NAS_SRC}COMMON/IES/QualityOfService.c + ${NAS_SRC}COMMON/IES/RadioPriority.c + ${NAS_SRC}COMMON/IES/SecurityHeaderType.c + ${NAS_SRC}COMMON/IES/ServiceType.c + ${NAS_SRC}COMMON/IES/ShortMac.c + ${NAS_SRC}COMMON/IES/SsCode.c + ${NAS_SRC}COMMON/IES/SupportedCodecList.c + ${NAS_SRC}COMMON/IES/TimeZoneAndTime.c + ${NAS_SRC}COMMON/IES/TimeZone.c + ${NAS_SRC}COMMON/IES/TmsiStatus.c + ${NAS_SRC}COMMON/IES/TrackingAreaIdentity.c + ${NAS_SRC}COMMON/IES/TrackingAreaIdentityList.c + ${NAS_SRC}COMMON/IES/TrafficFlowAggregateDescription.c + ${NAS_SRC}COMMON/IES/TrafficFlowTemplate.c + ${NAS_SRC}COMMON/IES/TransactionIdentifier.c + ${NAS_SRC}COMMON/IES/UeNetworkCapability.c + ${NAS_SRC}COMMON/IES/UeRadioCapabilityInformationUpdateNeeded.c + ${NAS_SRC}COMMON/IES/UeSecurityCapability.c + ${NAS_SRC}COMMON/IES/VoiceDomainPreferenceAndUeUsageSetting.c +) + +set (libnas_utils_OBJS + ${NAS_SRC}COMMON/UTIL/device.c + ${NAS_SRC}COMMON/UTIL/memory.c + ${NAS_SRC}COMMON/UTIL/nas_log.c + ${NAS_SRC}COMMON/UTIL/nas_timer.c + ${NAS_SRC}COMMON/UTIL/socket.c + ${NAS_SRC}COMMON/UTIL/stty.c + ${NAS_SRC}COMMON/UTIL/TLVEncoder.c + ${NAS_SRC}COMMON/UTIL/TLVDecoder.c + ${NAS_SRC}COMMON/UTIL/OctetString.c +) + +if(NAS_UE) + set(libnas_ue_api_OBJS + ${NAS_SRC}UE/API/USER/at_command.c + ${NAS_SRC}UE/API/USER/at_error.c + ${NAS_SRC}UE/API/USER/at_response.c + ${NAS_SRC}UE/API/USER/user_api.c + ${NAS_SRC}UE/API/USER/user_indication.c + ${NAS_SRC}UE/API/USIM/aka_functions.c + ${NAS_SRC}UE/API/USIM/usim_api.c + ) + set(libnas_ue_emm_OBJS + ${NAS_SRC}UE/EMM/Attach.c + ${NAS_SRC}UE/EMM/Authentication.c + ${NAS_SRC}UE/EMM/Detach.c + ${NAS_SRC}UE/EMM/emm_main.c + ${NAS_SRC}UE/EMM/EmmStatusHdl.c + ${NAS_SRC}UE/EMM/Identification.c + ${NAS_SRC}UE/EMM/IdleMode.c + ${NAS_SRC}UE/EMM/LowerLayer.c + ${NAS_SRC}UE/EMM/SecurityModeControl.c + ${NAS_SRC}UE/EMM/ServiceRequestHdl.c + ${NAS_SRC}UE/EMM/TrackingAreaUpdate.c + ) + set(libnas_ue_emm_sap_OBJS + ${NAS_SRC}UE/EMM/SAP/emm_as.c + ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredAttachNeeded.c + ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredAttemptingToAttach.c + ${NAS_SRC}UE/EMM/SAP/EmmDeregistered.c + ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredInitiated.c + ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredLimitedService.c + ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredNoCellAvailable.c + ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredNoImsi.c + ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredNormalService.c + ${NAS_SRC}UE/EMM/SAP/EmmDeregisteredPlmnSearch.c + ${NAS_SRC}UE/EMM/SAP/emm_esm.c + ${NAS_SRC}UE/EMM/SAP/emm_fsm.c + ${NAS_SRC}UE/EMM/SAP/EmmNull.c + ${NAS_SRC}UE/EMM/SAP/emm_recv.c + ${NAS_SRC}UE/EMM/SAP/emm_reg.c + ${NAS_SRC}UE/EMM/SAP/EmmRegisteredAttemptingToUpdate.c + ${NAS_SRC}UE/EMM/SAP/EmmRegistered.c + ${NAS_SRC}UE/EMM/SAP/EmmRegisteredImsiDetachInitiated.c + ${NAS_SRC}UE/EMM/SAP/EmmRegisteredInitiated.c + ${NAS_SRC}UE/EMM/SAP/EmmRegisteredLimitedService.c + ${NAS_SRC}UE/EMM/SAP/EmmRegisteredNoCellAvailable.c + ${NAS_SRC}UE/EMM/SAP/EmmRegisteredNormalService.c + ${NAS_SRC}UE/EMM/SAP/EmmRegisteredPlmnSearch.c + ${NAS_SRC}UE/EMM/SAP/EmmRegisteredUpdateNeeded.c + ${NAS_SRC}UE/EMM/SAP/emm_sap.c + ${NAS_SRC}UE/EMM/SAP/emm_send.c + ${NAS_SRC}UE/EMM/SAP/EmmServiceRequestInitiated.c + ${NAS_SRC}UE/EMM/SAP/EmmTrackingAreaUpdatingInitiated.c + ) + set (libnas_ue_esm_OBJS + ${NAS_SRC}UE/ESM/DedicatedEpsBearerContextActivation.c + ${NAS_SRC}UE/ESM/DefaultEpsBearerContextActivation.c + ${NAS_SRC}UE/ESM/EpsBearerContextDeactivation.c + ${NAS_SRC}UE/ESM/esm_ebr.c + ${NAS_SRC}UE/ESM/esm_ebr_context.c + ${NAS_SRC}UE/ESM/esm_ip.c + ${NAS_SRC}UE/ESM/esm_main.c + ${NAS_SRC}UE/ESM/esm_pt.c + ${NAS_SRC}UE/ESM/EsmStatusHdl.c + ${NAS_SRC}UE/ESM/PdnConnectivity.c + ${NAS_SRC}UE/ESM/PdnDisconnect.c + ) + set(libnas_ue_esm_sap_OBJS + ${NAS_SRC}UE/ESM/SAP/esm_recv.c + ${NAS_SRC}UE/ESM/SAP/esm_send.c + ${NAS_SRC}UE/ESM/SAP/esm_sap.c + ) + add_library(LIB_NAS_UE + ${NAS_SRC}UE/nas_itti_messaging.c + ${NAS_SRC}UE/nas_network.c + ${NAS_SRC}UE/nas_parser.c + ${NAS_SRC}UE/nas_proc.c + ${NAS_SRC}UE/nas_user.c + ${libnas_api_OBJS} + ${libnas_ue_api_OBJS} + ${libnas_emm_msg_OBJS} + ${libnas_esm_msg_OBJS} + ${libnas_ies_OBJS} + ${libnas_utils_OBJS} + ${libnas_ue_emm_OBJS} + ${libnas_ue_emm_sap_OBJS} + ${libnas_ue_esm_OBJS} + ${libnas_ue_esm_sap_OBJS} + ) + set(NAS_UE_LIB LIB_NAS_UE) + + include_directories(${NAS_SRC}UE) + include_directories(${NAS_SRC}UE/API/USER) + include_directories(${NAS_SRC}UE/API/USIM) + include_directories(${NAS_SRC}UE/EMM) + include_directories(${NAS_SRC}UE/EMM/SAP) + include_directories(${NAS_SRC}UE/ESM) + include_directories(${NAS_SRC}UE/ESM/SAP) +endif() + + +# nbiot +add_definitions("-DNUMBER_OF_UE_MAX_NB_IoT=16") +set (NBIOT_SOURCES + ${OPENAIR2_DIR}/ENB_APP/NB_IoT_config.c +) +add_library(NB_IoT MODULE ${NBIOT_SOURCES} ) + +# shared library loader +set (SHLIB_LOADER_SOURCES + ${OPENAIR_DIR}/common/utils/load_module_shlib.c +) + +# Make lfds as a own source code (even if it is a outside library) +# For better intergration with compilation flags & structure of cmake +################################################################### +set(lfds ${OPENAIR2_DIR}/UTIL/LFDS/liblfds6.1.1/liblfds611/src/) +file(GLOB lfds_queue ${lfds}/lfds611_queue/*.c) +file(GLOB lfds_ring ${lfds}/lfds611_ringbuffer/*.c) +file(GLOB lfds_slist ${lfds}/lfds611_slist/*.c) +file(GLOB lfds_stack ${lfds}/lfds611_stack/*.c) +file(GLOB lfds_freelist ${lfds}/lfds611_freelist/*.c) + +include_directories(${lfds}) +add_library(LFDS + ${lfds_queue} ${lfds_ring} ${lfds_slist} ${lfds_stack} ${lfds_freelist} + ${lfds}/lfds611_liblfds/lfds611_liblfds_abstraction_test_helpers.c + ${lfds}/lfds611_liblfds/lfds611_liblfds_aligned_free.c + ${lfds}/lfds611_liblfds/lfds611_liblfds_aligned_malloc.c + ${lfds}/lfds611_abstraction/lfds611_abstraction_free.c + ${lfds}/lfds611_abstraction/lfds611_abstraction_malloc.c +) + +set(lfds7 ${OPENAIR2_DIR}/UTIL/LFDS/liblfds7.0.0/liblfds700/src/) +file(GLOB lfds7_queue ${lfds7}/lfds700_queue/*.c) +file(GLOB lfds7_ring ${lfds7}/lfds700_ringbuffer/*.c) +file(GLOB lfds7_qbss ${lfds7}/lfds700_queue_bounded_singleconsumer_singleproducer/*.c) +file(GLOB lfds7_stack ${lfds7}/lfds700_stack/*.c) +file(GLOB lfds7_freelist ${lfds7}/lfds700_freelist/*.c) +file(GLOB lfds7_btree ${lfds7}/lfds700_btree_addonly_unbalanced/*.c) +file(GLOB lfds7_hash ${lfds7}/lfds700_hash_addonly/*.c) +file(GLOB lfds7_ordered_list ${lfds7}/lfds700_list_addonly_ordered_singlylinked/*.c) +file(GLOB lfds7_unordered_list ${lfds7}/lfds700_list_addonly_singlylinked_unordered/*.c) +file(GLOB lfds7_misc ${lfds7}/lfds700_misc/*.c) + +include_directories(${lfds7}) +add_library(LFDS7 + ${lfds7_queue} ${lfds7_ring} ${lfds7_qbss} ${lfds7_stack} ${lfds7_freelist} ${lfds7_btree} ${lfds7_hash} ${lfds7_ordered_list} ${lfds7_unordered_list} ${lfds7_misc} +) + +# Simulation library +########################## +add_library(SIMU +${OPENAIR1_DIR}/SIMULATION/TOOLS/random_channel.c +${OPENAIR1_DIR}/SIMULATION/TOOLS/rangen_double.c +${OPENAIR1_DIR}/SIMULATION/TOOLS/taus.c +${OPENAIR1_DIR}/SIMULATION/TOOLS/multipath_channel.c +${OPENAIR1_DIR}/SIMULATION/TOOLS/abstraction.c +${OPENAIR1_DIR}/SIMULATION/TOOLS/multipath_tv_channel.c +${OPENAIR1_DIR}/SIMULATION/RF/rf.c +${OPENAIR1_DIR}/SIMULATION/RF/dac.c +${OPENAIR1_DIR}/SIMULATION/RF/adc.c +${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/netlink_init.c +) + +add_library(SIMU_ETH +${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/netlink_init.c +${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/multicast_link.c +${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/socket.c +${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/bypass_session_layer.c +#${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/emu_transport.c +) + +add_library(OPENAIR0_LIB + ${OPENAIR_TARGETS}/ARCH/EXMIMO/USERSPACE/LIB/openair0_lib.c +) + +include_directories("${NFAPI_DIR}/nfapi/public_inc") +include_directories("${NFAPI_DIR}/common/public_inc") +include_directories("${NFAPI_DIR}/pnf/public_inc") +include_directories("${NFAPI_DIR}/nfapi/inc") +include_directories("${NFAPI_DIR}/sim_common/inc") +include_directories("${NFAPI_DIR}/pnf_sim/inc") + + +# System packages that are required +# We use either the cmake buildin, in ubuntu are in: /usr/share/cmake*/Modules/ +# or cmake provide a generic interface to pkg-config that widely used +################################### +include(FindPkgConfig) + +pkg_search_module(LIBXML2 libxml-2.0 REQUIRED) +include_directories(${LIBXML2_INCLUDE_DIRS}) + +pkg_search_module(LIBXSLT libxslt REQUIRED) +include_directories(${LIBXSLT_INCLUDE_DIRS}) + +pkg_search_module(OPENSSL openssl REQUIRED) +include_directories(${OPENSSL_INCLUDE_DIRS}) + +pkg_search_module(CONFIG libconfig REQUIRED) +include_directories(${CONFIG_INCLUDE_DIRS}) + +pkg_search_module(CRYPTO libcrypto REQUIRED) +include_directories(${CRYPTO_INCLUDE_DIRS}) + +#use native cmake method as this package is not in pkg-config +if (${RF_BOARD} STREQUAL "OAI_USRP") + find_package(Boost REQUIRED) + include_directories(${LIBBOOST_INCLUDE_DIR}) +endif (${RF_BOARD} STREQUAL "OAI_USRP") + +pkg_search_module(OPENPGM openpgm-5.1 openpgm-5.2) +if(NOT ${OPENPGM_FOUND}) + message("PACKAGE openpgm-5.1 is required by binaries such as oaisim: will fail later if this target is built") +else() + include_directories(${OPENPGM_INCLUDE_DIRS}) +endif() + +pkg_search_module(NETTLE nettle) +if(NOT ${NETTLE_FOUND}) + message( FATAL_ERROR "PACKAGE nettle not found: some targets will fail. Run build_oai -I again!") +else() + include_directories(${NETTLE_INCLUDE_DIRS}) +endif() + +message ("NETTLE VERSION_INSTALLED = ${NETTLE_VERSION}") + +string(REGEX REPLACE "([0-9]+).*" "\\1" NETTLE_VERSION_MAJOR ${NETTLE_VERSION}) +string(REGEX REPLACE "[0-9]+\\.([0-9]+).*" "\\1" NETTLE_VERSION_MINOR ${NETTLE_VERSION}) +message ("NETTLE_VERSION_MAJOR = ${NETTLE_VERSION_MAJOR}") +message ("NETTLE_VERSION_MINOR = ${NETTLE_VERSION_MINOR}") + +if ("${NETTLE_VERSION_MAJOR}" STREQUAL "" OR "${NETTLE_VERSION_MINOR}" STREQUAL "") + message( FATAL_ERROR "The nettle version not detected properly. Try to run build_oai -I again" ) +endif() + +add_definitions("-DNETTLE_VERSION_MAJOR=${NETTLE_VERSION_MAJOR}") +add_definitions("-DNETTLE_VERSION_MINOR=${NETTLE_VERSION_MINOR}") + +pkg_search_module(XPM xpm) +if(NOT ${XPM_FOUND}) + message("PACKAGE xpm not found: some targets will fail") +else() + include_directories(${XPM_INCLUDE_DIRS}) +endif() + +# Atlas is required by some packages, but not found in pkg-config +# So, here are some hacks here. Hope this gets fixed in future! +if(EXISTS "/usr/include/atlas/cblas.h" OR EXISTS "/usr/include/cblas.h") + include_directories("/usr/include/atlas") + LINK_DIRECTORIES("/usr/lib64") + LINK_DIRECTORIES("/usr/lib64/atlas") #Added because atlas libraries in CentOS 7 are here! + + if(EXISTS "/usr/lib64/libblas.so" OR EXISTS "/usr/lib/libblas.so") #Case for CentOS7 + list(APPEND ATLAS_LIBRARIES blas) + else() # Case for Ubuntu + list(APPEND ATLAS_LIBRARIES cblas) + endif() + + if(EXISTS "/usr/lib/atlas/libtatlas.so" OR EXISTS "/usr/lib64/atlas/libtatlas.so") #Case for CentOS7 + list(APPEND ATLAS_LIBRARIES tatlas) + else() + list(APPEND ATLAS_LIBRARIES atlas) #Case for Ubuntu + endif() + + list(APPEND ATLAS_LIBRARIES lapack) + +# for ubuntu 17.10, directories are different +elseif(EXISTS "/usr/include/x86_64-linux-gnu/cblas.h") + + include_directories("/usr/include/x86_64-linux-gnu") + LINK_DIRECTORIES("/usr/lib/x86_64-linux-gnu") + list(APPEND ATLAS_LIBRARIES cblas) + list(APPEND ATLAS_LIBRARIES atlas) + list(APPEND ATLAS_LIBRARIES lapack) + +else() + message("No Blas/Atlas libs found, some targets will fail") +endif() + +if (${XFORMS}) + include_directories ("/usr/include/X11") + set(XFORMS_SOURCE + ${OPENAIR1_DIR}/PHY/TOOLS/lte_phy_scope.c + ) + set(XFORMS_SOURCE_SOFTMODEM + ${OPENAIR_TARGETS}/RT/USER/stats.c + ) + set(XFORMS_LIBRARIES "forms") +endif (${XFORMS}) + +set(CMAKE_MODULE_PATH "${OPENAIR_DIR}/cmake_targets/tools/MODULES" "${CMAKE_MODULE_PATH}") + +#include T directory even if the T is off because T macros are in the code +#no matter what +include_directories("${OPENAIR_DIR}/common/utils/T") + +if (${T_TRACER}) + set(T_SOURCE + ${OPENAIR_DIR}/common/utils/T/T.c + ${OPENAIR_DIR}/common/utils/T/local_tracer.c) + set (T_LIB "rt") +endif (${T_TRACER}) + +#Some files in the T directory are generated. +#This rule and the following deal with it. +add_custom_command ( + OUTPUT ${OPENAIR_DIR}/common/utils/T/T_IDs.h + COMMAND make + WORKING_DIRECTORY ${OPENAIR_DIR}/common/utils/T + DEPENDS ${OPENAIR_DIR}/common/utils/T/T_messages.txt + ) + +#This rule is specifically needed to generate T files +#before anything else in a project that uses the T. +#See below, there are some 'add_dependencies' showing that. +#Basically we create a custom target and we make other +#targets depend on it. That forces cmake to generate +#T files before anything else. +add_custom_target ( + generate_T + DEPENDS ${OPENAIR_DIR}/common/utils/T/T_IDs.h +) + +# Hack on a test of asn1c version (already dirty) +add_definitions(-DASN1_MINIMUM_VERSION=924) + +################################# +# add executables for operation +################################# + +# lte-softmodem is both eNB and UE implementation +################################################### + +add_executable(lte-softmodem + ${rrc_h} + ${s1ap_h} + ${OPENAIR_BIN_DIR}/messages_xml.h + ${OPENAIR_TARGETS}/RT/USER/rt_wrapper.c + ${OPENAIR_TARGETS}/RT/USER/lte-enb.c + ${OPENAIR_TARGETS}/RT/USER/lte-ru.c + ${OPENAIR_TARGETS}/RT/USER/lte-softmodem.c + ${OPENAIR2_DIR}/ENB_APP/NB_IoT_interface.c + ${OPENAIR1_DIR}/SIMULATION/TOOLS/taus.c + ${OPENAIR_TARGETS}/SIMU/USER/init_lte.c + ${OPENAIR_TARGETS}/COMMON/create_tasks.c + ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c + ${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/netlink_init.c + ${OPENAIR3_DIR}/NAS/UE/nas_ue_task.c + ${OPENAIR_DIR}/common/utils/utils.c + ${OPENAIR_DIR}/common/utils/system.c + ${GTPU_need_ITTI} + ${XFORMS_SOURCE} + ${XFORMS_SOURCE_SOFTMODEM} + ${T_SOURCE} + ${CONFIG_SOURCES} + ${SHLIB_LOADER_SOURCES} + ) + +target_link_libraries (lte-softmodem + -Wl,--start-group + RRC_LIB S1AP_LIB S1AP_ENB GTPV1U SECU_CN SECU_OSA UTIL HASHTABLE SCTP_CLIENT UDP SCHED_LIB PHY LFDS L2 + ${MSC_LIB} ${RAL_LIB} ${NAS_UE_LIB} ${ITTI_LIB} ${FLPT_MSG_LIB} ${ASYNC_IF_LIB} ${FLEXRAN_AGENT_LIB} LFDS7 + NFAPI_COMMON_LIB NFAPI_LIB NFAPI_VNF_LIB NFAPI_PNF_LIB NFAPI_USER_LIB + -Wl,--end-group z dl) + +target_link_libraries (lte-softmodem ${LIBXML2_LIBRARIES}) +target_link_libraries (lte-softmodem pthread m ${CONFIG_LIBRARIES} rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} sctp ${XFORMS_LIBRARIES} ${PROTOBUF_LIB} ${CMAKE_DL_LIBS} ${LIBYAML_LIBRARIES}) +target_link_libraries (lte-softmodem ${LIB_LMS_LIBRARIES}) +target_link_libraries (lte-softmodem ${T_LIB}) + +# lte-softmodem-nos1 is both eNB and UE implementation +################################################### +add_executable(lte-softmodem-nos1 + ${rrc_h} + ${s1ap_h} + ${OPENAIR_BIN_DIR}/messages_xml.h + ${OPENAIR_TARGETS}/RT/USER/rt_wrapper.c + ${OPENAIR_TARGETS}/RT/USER/lte-enb.c + ${OPENAIR_TARGETS}/RT/USER/lte-ru.c + ${OPENAIR_TARGETS}/RT/USER/lte-softmodem.c + ${OPENAIR2_DIR}/ENB_APP/NB_IoT_interface.c + ${OPENAIR1_DIR}/SIMULATION/TOOLS/taus.c + ${OPENAIR_TARGETS}/SIMU/USER/init_lte.c + ${OPENAIR_TARGETS}/COMMON/create_tasks.c + ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c + ${OPENAIR2_DIR}/RRC/NAS/nas_config.c + ${OPENAIR2_DIR}/RRC/NAS/rb_config.c + ${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/netlink_init.c + ${OPENAIR_DIR}/common/utils/system.c + ${XFORMS_SOURCE} + ${XFORMS_SOURCE_SOFTMODEM} + ${T_SOURCE} + ${CONFIG_SOURCES} + ${SHLIB_LOADER_SOURCES} + ) +target_link_libraries (lte-softmodem-nos1 + -Wl,--start-group + RRC_LIB SECU_CN SECU_OSA UTIL HASHTABLE SCHED_LIB PHY LFDS L2 ${MSC_LIB} ${RAL_LIB} ${ITTI_LIB} + ${MIH_LIB} ${FLPT_MSG_LIB} ${ASYNC_IF_LIB} ${FLEXRAN_AGENT_LIB} LFDS7 + NFAPI_COMMON_LIB NFAPI_LIB NFAPI_VNF_LIB NFAPI_PNF_LIB NFAPI_USER_LIB + -Wl,--end-group z dl ) + +target_link_libraries (lte-softmodem-nos1 ${LIBXML2_LIBRARIES}) +target_link_libraries (lte-softmodem-nos1 pthread m ${CONFIG_LIBRARIES} rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} sctp ${XFORMS_LIBRARIES} ${PROTOBUF_LIB} ${CMAKE_DL_LIBS} ${LIBYAML_LIBRARIES}) +target_link_libraries (lte-softmodem-nos1 ${LIB_LMS_LIBRARIES}) +target_link_libraries (lte-softmodem-nos1 ${T_LIB}) + +# lte-uesoftmodem is UE implementation +####################################### + +add_executable(lte-uesoftmodem + ${rrc_h} + ${s1ap_h} + ${OPENAIR_BIN_DIR}/messages_xml.h + ${OPENAIR_TARGETS}/RT/USER/rt_wrapper.c + ${OPENAIR_TARGETS}/RT/USER/lte-ue.c + ${OPENAIR_TARGETS}/RT/USER/lte-uesoftmodem.c + ${OPENAIR1_DIR}/SIMULATION/TOOLS/taus.c + ${OPENAIR_TARGETS}/SIMU/USER/init_lte.c + ${OPENAIR_TARGETS}/COMMON/create_tasks_ue.c + ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c + ${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/netlink_init.c + ${OPENAIR3_DIR}/NAS/UE/nas_ue_task.c + ${OPENAIR_DIR}/common/utils/utils.c + ${OPENAIR_DIR}/common/utils/system.c + ${XFORMS_SOURCE} + ${XFORMS_SOURCE_SOFTMODEM} + ${T_SOURCE} + ${CONFIG_SOURCES} + ${SHLIB_LOADER_SOURCES} + ) + +target_link_libraries (lte-uesoftmodem + -Wl,--start-group + RRC_LIB S1AP_LIB S1AP_ENB GTPV1U SECU_CN SECU_OSA UTIL HASHTABLE SCTP_CLIENT UDP SCHED_UE_LIB PHY_UE LFDS L2_UE + ${MSC_LIB} ${RAL_LIB} ${NAS_UE_LIB} ${ITTI_LIB} ${FLPT_MSG_LIB} ${ASYNC_IF_LIB} LFDS7 + -Wl,--end-group z dl) + +target_link_libraries (lte-uesoftmodem ${LIBXML2_LIBRARIES}) +target_link_libraries (lte-uesoftmodem pthread m ${CONFIG_LIBRARIES} rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} sctp ${XFORMS_LIBRARIES} ${PROTOBUF_LIB} ${CMAKE_DL_LIBS} ${LIBYAML_LIBRARIES}) +target_link_libraries (lte-uesoftmodem ${LIB_LMS_LIBRARIES}) +target_link_libraries (lte-uesoftmodem ${T_LIB}) + +# lte-softmodem-nos1 is both eNB and UE implementation +################################################### +add_executable(lte-uesoftmodem-nos1 + ${rrc_h} + ${s1ap_h} + ${OPENAIR_BIN_DIR}/messages_xml.h + ${OPENAIR_TARGETS}/RT/USER/rt_wrapper.c + ${OPENAIR_TARGETS}/RT/USER/lte-ue.c + ${OPENAIR_TARGETS}/RT/USER/lte-uesoftmodem.c + ${OPENAIR1_DIR}/SIMULATION/TOOLS/taus.c + ${OPENAIR_TARGETS}/SIMU/USER/init_lte.c + ${OPENAIR_TARGETS}/COMMON/create_tasks_ue.c + ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c + ${OPENAIR2_DIR}/RRC/NAS/nas_config.c + ${OPENAIR2_DIR}/RRC/NAS/rb_config.c + ${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/netlink_init.c + ${OPENAIR_DIR}/common/utils/system.c + ${XFORMS_SOURCE} + ${XFORMS_SOURCE_SOFTMODEM} + ${T_SOURCE} + ${CONFIG_SOURCES} + ${SHLIB_LOADER_SOURCES} + ) +target_link_libraries (lte-uesoftmodem-nos1 + -Wl,--start-group + RRC_LIB SECU_CN SECU_OSA UTIL HASHTABLE SCHED_UE_LIB PHY_UE LFDS L2_UE ${MSC_LIB} ${RAL_LIB} ${ITTI_LIB} + ${MIH_LIB} ${FLPT_MSG_LIB} ${ASYNC_IF_LIB} LFDS7 + -Wl,--end-group z dl ) + +target_link_libraries (lte-uesoftmodem-nos1 ${LIBXML2_LIBRARIES}) +target_link_libraries (lte-uesoftmodem-nos1 pthread m ${CONFIG_LIBRARIES} rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} sctp ${XFORMS_LIBRARIES} ${PROTOBUF_LIB} ${CMAKE_DL_LIBS} ${LIBYAML_LIBRARIES}) +target_link_libraries (lte-uesoftmodem-nos1 ${LIB_LMS_LIBRARIES}) +target_link_libraries (lte-uesoftmodem-nos1 ${T_LIB}) + +# USIM process +################# +#add_executable(usim +# ${OPENAIR3_DIR}/NAS/TOOLS/usim_data.c +# ${OPENAIR3_DIR}/NAS/USER/API/USIM/usim_api.c +# ${OPENAIR3_DIR}/NAS/USER/API/USIM/aka_functions.c +# ${OPENAIR3_DIR}/NAS/COMMON/UTIL/memory.c +# ${OPENAIR3_DIR}/NAS/COMMON/UTIL/nas_log.c +# ${OPENAIR3_DIR}/NAS/COMMON/UTIL/OctetString.c +# ${OPENAIR3_DIR}/NAS/COMMON/UTIL/TLVEncoder.c +# ) +#target_link_libraries (usim ${NAS_LIB} UTIL ${ITTI_LIB} LFDS pthread rt nettle crypto m) + +# ??? +##################### +#add_executable(nvram +# ${OPENAIR3_DIR}/NAS/TOOLS/ue_data.c +# ${OPENAIR3_DIR}/NAS/COMMON/UTIL/memory.c +# ${OPENAIR3_DIR}/NAS/COMMON/UTIL/nas_log.c +# ) +#target_link_libraries (nvram LIB_NAS_UE UTIL ${ITTI_LIB} LFDS pthread rt nettle crypto m) + + +###################################" +# Addexecutables for tests +#################################### + +# A all in one network simulator +################ +add_executable(oaisim + ${rrc_h} + ${s1ap_h} + ${x2ap_h} + ${OPENAIR_BIN_DIR}/messages_xml.h + ${OPENAIR_TARGETS}/RT/USER/lte-ue.c + ${OPENAIR_TARGETS}/RT/USER/lte-ru.c + ${OPENAIR_TARGETS}/RT/USER/rt_wrapper.c + ${OPENAIR_TARGETS}/SIMU/USER/channel_sim.c + ${OPENAIR_TARGETS}/SIMU/USER/init_lte.c + ${OPENAIR_TARGETS}/SIMU/USER/oaisim_config.c + ${OPENAIR_TARGETS}/SIMU/USER/sinr_sim.c + ${OPENAIR_TARGETS}/SIMU/USER/cor_SF_sim.c + ${OPENAIR_TARGETS}/SIMU/USER/oaisim_functions.c + ${OPENAIR_TARGETS}/SIMU/USER/event_handler.c + ${OPENAIR_TARGETS}/SIMU/USER/oaisim.c + ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c + ${OPENAIR2_DIR}/RRC/NAS/nas_config.c + ${OPENAIR2_DIR}/RRC/NAS/rb_config.c + ${OPENAIR3_DIR}/NAS/UE/nas_ue_task.c + ${OPENAIR_DIR}/common/utils/utils.c + ${OPENAIR_DIR}/common/utils/system.c + ${OPENAIR_TARGETS}/COMMON/create_tasks_ue.c + ${XFORMS_SOURCE} + ${T_SOURCE} + ${CONFIG_SOURCES} + ${SHLIB_LOADER_SOURCES} +) + + +target_include_directories(oaisim PUBLIC ${OPENAIR_TARGETS}/SIMU/USER) +target_link_libraries (oaisim + -Wl,-ldl,--start-group + RRC_LIB S1AP_LIB S1AP_ENB X2AP_LIB SECU_CN UTIL HASHTABLE SCTP_CLIENT UDP SCHED_UE_LIB PHY_UE LFDS L2_UE ${MSC_LIB} LIB_NAS_UE SIMU SECU_OSA ${ITTI_LIB} ${MIH_LIB} + ${FLPT_MSG_LIB} ${ASYNC_IF_LIB} ${FLEXRAN_AGENT_LIB} LFDS7 + -Wl,--end-group z dl) + +target_link_libraries (oaisim ${LIBXML2_LIBRARIES} ${LAPACK_LIBRARIES}) +target_link_libraries (oaisim pthread m ${CONFIG_LIBRARIES} rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} sctp z + ${ATLAS_LIBRARIES} ${XFORMS_LIBRARIES} ${OPENPGM_LIBRARIES} ${PROTOBUF_LIB} ${CMAKE_DL_LIBS} ${LIBYAML_LIBRARIES}) +#Force link with forms, regardless XFORMS option +target_link_libraries (oaisim forms) +target_link_libraries (oaisim ${T_LIB}) + + +# A all in one network simulator +################ +add_executable(oaisim_nos1 + ${rrc_h} + ${s1ap_h} + ${x2ap_h} + ${OPENAIR_BIN_DIR}/messages_xml.h + ${OPENAIR_TARGETS}/RT/USER/lte-ue.c + ${OPENAIR_TARGETS}/RT/USER/lte-ru.c + ${OPENAIR_TARGETS}/RT/USER/rt_wrapper.c + ${OPENAIR_TARGETS}/SIMU/USER/channel_sim.c + ${OPENAIR_TARGETS}/SIMU/USER/init_lte.c + ${OPENAIR_TARGETS}/SIMU/USER/oaisim_config.c + ${OPENAIR_TARGETS}/SIMU/USER/sinr_sim.c + ${OPENAIR_TARGETS}/SIMU/USER/cor_SF_sim.c + ${OPENAIR_TARGETS}/SIMU/USER/oaisim_functions.c + ${OPENAIR_TARGETS}/SIMU/USER/event_handler.c + ${OPENAIR_TARGETS}/SIMU/USER/oaisim.c + ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c + ${OPENAIR2_DIR}/RRC/NAS/nas_config.c + ${OPENAIR2_DIR}/RRC/NAS/rb_config.c + ${OPENAIR_TARGETS}/COMMON/create_tasks_ue.c + ${OPENAIR_DIR}/common/utils/system.c + ${XFORMS_SOURCE} + ${T_SOURCE} + ${CONFIG_SOURCES} + ${SHLIB_LOADER_SOURCES} +) +target_include_directories(oaisim_nos1 PUBLIC ${OPENAIR_TARGETS}/SIMU/USER) +target_link_libraries (oaisim_nos1 + -Wl,--start-group + RRC_LIB X2AP_LIB SECU_CN UTIL HASHTABLE SCHED_UE_LIB PHY_UE LFDS ${MSC_LIB} ${ITTI_LIB} SIMU L2_UE ${FLPT_MSG_LIB} ${ASYNC_IF_LIB} LFDS7 + -Wl,--end-group z dl ) + +target_link_libraries (oaisim_nos1 ${LIBXML2_LIBRARIES} ${LAPACK_LIBRARIES}) +target_link_libraries (oaisim_nos1 pthread m ${CONFIG_LIBRARIES} rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} + ${ATLAS_LIBRARIES} ${XFORMS_LIBRARIES} ${OPENPGM_LIBRARIES} ${PROTOBUF_LIB} ${CMAKE_DL_LIBS} ${LIBYAML_LIBRARIES}) +#Force link with forms, regardless XFORMS option +target_link_libraries (oaisim_nos1 forms) + +#message("protobuflib is ${PROTOBUF_LIB}") + +target_link_libraries (oaisim_nos1 ${T_LIB}) + + +# Unitary tests for each piece of L1: example, mbmssim is MBMS L1 simulator +##################################### + +#special case for dlim TM4, which uses its own version of phy_scope code +add_executable(dlsim_tm4 + ${OPENAIR_BIN_DIR}/messages_xml.h + ${OPENAIR1_DIR}/SIMULATION/LTE_PHY/dlsim_tm4.c + ${OPENAIR1_DIR}/PHY/TOOLS/lte_phy_scope_tm4.c + ${T_SOURCE} + ) +target_link_libraries (dlsim_tm4 + -Wl,--start-group SIMU UTIL SCHED_LIB PHY LFDS ${ITTI_LIB} -Wl,--end-group + pthread m rt ${CONFIG_LIBRARIES} ${ATLAS_LIBRARIES} ${XFORMS_LIBRARIES} ${T_LIB} + ) + +foreach(myExe dlsim dlsim_tm7 ulsim pbchsim scansim mbmssim pdcchsim pucchsim prachsim syncsim) + + add_executable(${myExe} + ${OPENAIR_BIN_DIR}/messages_xml.h + ${OPENAIR1_DIR}/SIMULATION/LTE_PHY/${myExe}.c + ${XFORMS_SOURCE} + ${T_SOURCE} + ${CONFIG_SOURCES} + ${SHLIB_LOADER_SOURCES} + ) + target_link_libraries (${myExe} + + -Wl,--start-group SIMU UTIL SCHED_LIB PHY LFDS ${ITTI_LIB} LFDS7 -Wl,--end-group + pthread m rt ${CONFIG_LIBRARIES} ${ATLAS_LIBRARIES} ${XFORMS_LIBRARIES} ${T_LIB} dl + ) +endforeach(myExe) + +add_executable(test_epc_generate_scenario + ${OPENAIR3_DIR}/TEST/EPC_TEST/generate_scenario.c + ${OPENAIR3_DIR}/TEST/EPC_TEST/generate_scenario.h + ${OPENAIR2_DIR}/ENB_APP/enb_config.h + ${OPENAIR2_DIR}/COMMON/commonDef.h + ${OPENAIR2_DIR}/COMMON/messages_def.h + ${OPENAIR2_DIR}/COMMON/messages_types.h + ${OPENAIR3_DIR}/S1AP/s1ap_eNB_defs.h + ${OPENAIR_BIN_DIR}/messages_xml.h + ) +target_link_libraries (test_epc_generate_scenario + -Wl,--start-group RRC_LIB S1AP_LIB S1AP_ENB X2AP_LIB GTPV1U LIB_NAS_UE SECU_CN UTIL HASHTABLE SCTP_CLIENT UDP SCHED_LIB PHY LFDS ${ITTI_LIB} ${MSC_LIB} L2 -Wl,--end-group pthread m rt crypt sctp ${LIBXML2_LIBRARIES} ${LIBXSLT_LIBRARIES} ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} ${CONFIG_LIBRARIES} + ) + +add_executable(test_epc_play_scenario + ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario.c + ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_decode.c + ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_display.c + ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_fsm.c + ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_parse.c + ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_s1ap.c + ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_s1ap_compare_ie.c + ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_s1ap_eNB_defs.h + ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario_sctp.c + ${OPENAIR3_DIR}/TEST/EPC_TEST/play_scenario.h + ${OPENAIR2_DIR}/COMMON/commonDef.h + ${OPENAIR2_DIR}/COMMON/messages_def.h + ${OPENAIR2_DIR}/COMMON/messages_types.h + ${OPENAIR_BIN_DIR}/messages_xml.h + ) +target_include_directories(test_epc_play_scenario PUBLIC /usr/local/share/asn1c) +target_link_libraries (test_epc_play_scenario + -Wl,--start-group RRC_LIB S1AP_LIB X2AP_LIB GTPV1U LIB_NAS_UE SECU_CN UTIL HASHTABLE SCTP_CLIENT UDP SCHED_LIB PHY LFDS ${ITTI_LIB} ${MSC_LIB} -Wl,--end-group pthread m rt crypt sctp ${LIBXML2_LIBRARIES} ${LIBXSLT_LIBRARIES} ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} ${CONFIG_LIBRARIES} + ) + + +#unitary tests for Core NEtwork pieces +################################# +foreach(myExe s1ap + secu_knas_encrypt_eia1 + secu_kenb + aes128_ctr_encrypt + aes128_ctr_decrypt + secu_knas_encrypt_eea2 + secu_knas secu_knas_encrypt_eea1 + kdf + aes128_cmac_encrypt + secu_knas_encrypt_eia2) + add_executable(test_${myExe} + ${OPENAIR3_DIR}/TEST/test_${myExe}.c + ) + target_link_libraries (test_${myExe} + -Wl,--start-group SECU_CN UTIL LFDS -Wl,--end-group m rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${NETTLE_LIBRARIES} ${CONFIG_LIBRARIES} + ) +endforeach(myExe) + +# to be added +#../targets/TEST/PDCP/test_pdcp.c +#../targets/TEST/PDCP/with_rlc/test_pdcp_rlc.c + +#ensure that the T header files are generated before targets depending on them +if (${T_TRACER}) + foreach(i + #all "add_executable" definitions (except tests, rb_tool, updatefw) + lte-softmodem lte-softmodem-nos1 oaisim oaisim_nos1 + dlsim_tm4 dlsim dlsim_tm7 ulsim pbchsim scansim mbmssim + pdcchsim pucchsim prachsim syncsim + #all "add_library" definitions + ITTI RRC_LIB S1AP_LIB S1AP_ENB X2AP_LIB + oai_exmimodevif oai_usrpdevif oai_bladerfdevif oai_lmssdrdevif + oai_eth_transpro + FLPT_MSG ASYNC_IF FLEXRAN_AGENT HASHTABLE MSC UTIL OMG_SUMO SECU_OSA + SECU_CN SCHED_LIB PHY L2 default_sched remote_sched RAL CN_UTILS + GTPV1U SCTP_CLIENT UDP LIB_NAS_UE LFDS LFDS7 SIMU OPENAIR0_LIB) + if (TARGET ${i}) + add_dependencies(${i} generate_T) + endif() + endforeach(i) +endif (${T_TRACER}) + +################################################## +# Generated specific cases is not regular code +############################################### + +##################"" +# itti symbolic debug print require to generate a specific include file +######################################## + +# retrieve the compiler options to send it to gccxml +get_directory_property( DirDefs COMPILE_DEFINITIONS ) +foreach( d ${DirDefs} ) + list(APPEND itti_compiler_options "-D${d}") +endforeach() +get_directory_property( DirDefs INCLUDE_DIRECTORIES ) +foreach( d ${DirDefs} ) + list(APPEND itti_compiler_options "-I${d}") +endforeach() + +# castxml doesn't work with c11 (gcc 5 default) +# force castxml and clang compilation with gnu89 standard +# we can't use cXX standard as pthread_rwlock_t is gnu standard +list(APPEND itti_compiler_options "-std=gnu89") +set (ITTI_H ${ITTI_DIR}/intertask_interface_types.h) +if(EXISTS /usr/bin/gccxml) + set(xml_command gccxml ${itti_compiler_options} -fxml=${OPENAIR_BIN_DIR}/messages.xml ${ITTI_H}) +else() + set(xml_command castxml --castxml-gccxml ${itti_compiler_options} ${ITTI_H} -o ${OPENAIR_BIN_DIR}/messages.xml) +endif() + +add_custom_command ( + OUTPUT ${OPENAIR_BIN_DIR}/messages.xml + COMMAND ${xml_command} + DEPENDS ${S1AP_OAI_generated} ${RRC_FULL_DIR}/asn1_constants.h + ) + +add_custom_command ( + OUTPUT ${OPENAIR_BIN_DIR}/messages_xml.h + COMMAND sed -e 's/ *//' -e 's/\"/\\\\\"/g' -e 's/^/\"/' -e 's/$$/\\\\n\"/' ${OPENAIR_BIN_DIR}/messages.xml > ${OPENAIR_BIN_DIR}/messages_xml.h + DEPENDS ${OPENAIR_BIN_DIR}/messages.xml ${RRC_FULL_DIR}/asn1_constants.h + ) + +################ +# Kernel modules +############### +# Set compiler options for kernel modules +# we need to get out cmake to use the regular Linux Kernel process +# this is documented as https://www.kernel.org/doc/Documentation/kbuild/modules.txt +###################################### + +# retrieve the compiler options to send it to gccxml +get_directory_property(DirDefs COMPILE_DEFINITIONS ) +foreach( d ${DirDefs} ) + set(module_cc_opt "${module_cc_opt} -D${d}") +endforeach() +get_directory_property( DirDefs INCLUDE_DIRECTORIES ) +foreach( d ${DirDefs} ) + set(module_cc_opt "${module_cc_opt} -I${d}") +endforeach() + +EXECUTE_PROCESS(COMMAND uname -r + OUTPUT_VARIABLE os_release + OUTPUT_STRIP_TRAILING_WHITESPACE) +SET(module_build_path /lib/modules/${os_release}/build) + +function(make_driver name dir) + file(MAKE_DIRECTORY ${OPENAIR_BIN_DIR}/${name}) + foreach(f IN ITEMS ${ARGN}) + list(APPEND src_path_list ${dir}/${f}) + string(REGEX REPLACE "c *$" "o" obj ${f}) + set(objs "${objs} ${obj}") + endforeach() + CONFIGURE_FILE(${OPENAIR_CMAKE}/tools/Kbuild.cmake ${OPENAIR_BIN_DIR}/${name}/Kbuild) + add_custom_command(OUTPUT ${name}.ko + COMMAND make -C ${module_build_path} M=${OPENAIR_BIN_DIR}/${name} + WORKING_DIRECTORY ${OPENAIR_BIN_DIR}/${name} + COMMENT "building ${module}.ko" + VERBATIM + SOURCES ${src_path_list} + ) + add_custom_target(${name} DEPENDS ${name}.ko) +endfunction(make_driver name dir src) + +# nashmesh module +################ +list(APPEND nasmesh_src device.c common.c ioctl.c classifier.c tool.c mesh.c) +set(module_cc_opt "${module_cc_opt} -DNAS_NETLINK -DPDCP_USE_NETLINK") +# legacy Makefile was using NAS_NETLINK flag, but other drivers the hereafter flag +# so, this cmake use OAI_NW_DRIVER_USE_NETLINK everywhere +if (OAI_NW_DRIVER_USE_NETLINK) + list(APPEND nasmesh_src netlink.c) +endif() +make_driver(nasmesh ${OPENAIR2_DIR}/NETWORK_DRIVER/MESH ${nasmesh_src}) + +# user space tool for configuring MESH IP driver +################ +add_executable(rb_tool + ${OPENAIR2_DIR}/NETWORK_DRIVER/MESH/constant.h + ${OPENAIR2_DIR}/NETWORK_DRIVER/MESH/ioctl.h + ${OPENAIR2_DIR}/NETWORK_DRIVER/MESH/rrc_nas_primitives.h + ${OPENAIR2_DIR}/NETWORK_DRIVER/MESH/RB_TOOL/rb_tool.c +) +target_include_directories(rb_tool PRIVATE ${OPENAIR2_DIR}/NETWORK_DRIVER/MESH/) + +# ??? +#################### +list(APPEND oai_nw_drv_src device.c common.c ioctl.c classifier.c tool.c) +if(OAI_NW_DRIVER_USE_NETLINK) + list(APPEND oai_nw_drv_src netlink.c) +endif() +make_driver(oai_nw_drv ${OPENAIR2_DIR}/NETWORK_DRIVER/LITE ${oai_nw_drv_src}) + +# Exmimo board drivers +######################### +list(APPEND openair_rf_src module_main.c irq.c fileops.c exmimo_fw.c) +make_driver(openair_rf ${OPENAIR_TARGETS}/ARCH/EXMIMO/DRIVER/eurecom ${openair_rf_src}) + +add_executable(updatefw + ${OPENAIR_TARGETS}/ARCH/EXMIMO/USERSPACE/OAI_FW_INIT/updatefw.c +) + +# ue_ip: purpose ??? +############### +list(APPEND ue_ip_src device.c common.c) +if(OAI_NW_DRIVER_USE_NETLINK) + list(APPEND ue_ip_src netlink.c) +endif() +make_driver(ue_ip ${OPENAIR2_DIR}/NETWORK_DRIVER/UE_IP ${ue_ip_src}) + + +# OCTAVE tools +############### +set(OCT_INCL -I${OPENAIR_TARGETS}/ARCH/EXMIMO/DEFS -I${OPENAIR_TARGETS}/ARCH/EXMIMO/USERSPACE/LIB -I${OPENAIR_TARGETS}/ARCH/COMMON) +set(OCT_LIBS -L${CMAKE_CURRENT_BINARY_DIR} -lm -lOPENAIR0_LIB) +set(OCT_FLAGS -DEXMIMO) +set(OCT_DIR ${OPENAIR_TARGETS}/ARCH/EXMIMO/USERSPACE/OCTAVE) +set(OCT_FILES + oarf_config_exmimo.oct + oarf_config_exmimo.oct + oarf_get_frame.oct + oarf_stop.oct + oarf_send_frame.oct + oarf_get_num_detected_cards.oct + oarf_stop_without_reset.oct +) + +foreach(file IN ITEMS ${OCT_FILES}) + string(REGEX REPLACE "oct *$" "cc" src ${file}) + add_custom_command( + OUTPUT ${file} + DEPENDS ${OCT_DIR}/${src} OPENAIR0_LIB + COMMAND mkoctfile + ARGS ${OCT_FLAGS} ${OCT_INCL} ${OCT_LIBS} + ARGS -o ${file} ${OCT_DIR}/${src} + COMMENT "Generating ${file}" + VERBATIM + ) +endforeach(file) + +ADD_CUSTOM_TARGET(oarf + DEPENDS ${OCT_FILES} +) + +include (${OPENAIR_DIR}/common/utils/telnetsrv/telnetsrv_CMakeLists.txt) + + + + diff --git a/cmake_targets/tools/build_helper b/cmake_targets/tools/build_helper new file mode 100755 index 0000000000000000000000000000000000000000..f0ae7e79c8240c34f0b196613f0e54f556601122 --- /dev/null +++ b/cmake_targets/tools/build_helper @@ -0,0 +1,771 @@ +#/* +# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The OpenAirInterface Software Alliance licenses this file to You under +# * the OAI Public License, Version 1.1 (the "License"); you may not use this file +# * except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.openairinterface.org/?page_id=698 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# *------------------------------------------------------------------------------- +# * For more information about the OpenAirInterface (OAI) Software Alliance: +# * contact@openairinterface.org +# */ + +# file build_helper +# brief +# authors Laurent Thomas, Lionel GAUTHIER +# +####################################### +if [ ! -f /etc/os-release ]; then + echo "No /etc/os-release file found. You're likely on an unsupported distro." + exit -1 +fi +OS_DISTRO=$(grep "^ID=" /etc/os-release | sed "s/ID=//" | sed "s/\"//g") +OS_RELEASE=$(grep "^VERSION_ID=" /etc/os-release | sed "s/VERSION_ID=//" | sed "s/\"//g") +case "$OS_DISTRO" in + fedora) OS_BASEDISTRO="fedora"; INSTALLER="dnf"; CMAKE="cmake" ;; + rhel) OS_BASEDISTRO="fedora"; INSTALLER="yum"; CMAKE="cmake3" ;; + centos) OS_BASEDISTRO="fedora"; INSTALLER="yum"; CMAKE="cmake3" ;; + debian) OS_BASEDISTRO="debian"; INSTALLER="apt-get"; CMAKE="cmake" ;; + ubuntu) OS_BASEDISTRO="debian"; INSTALLER="apt-get"; CMAKE="cmake" ;; +esac +KERNEL_VERSION=$(uname -r | cut -d '.' -f1) +KERNEL_MAJOR=$(uname -r | cut -d '.' -f2) + +SUDO='sudo -E' + +############################### +## echo and family +############################### +black='\E[30m' +red='\E[31m' +green='\E[32m' +yellow='\E[33m' +blue='\E[1;34m' +magenta='\E[35m' +cyan='\E[36m' +white='\E[37m' +reset_color='\E[00m' +COLORIZE=1 + +cecho() { + # Color-echo + # arg1 = message + # arg2 = color + local default_msg="No Message." + message=${1:-$default_msg} + color=${2:-$green} + [ "$COLORIZE" = "1" ] && message="$color$message$reset_color" + echo -e "$message" + return +} + +echo_error() { cecho "$*" $red ;} +echo_fatal() { cecho "$*" $red; exit -1 ;} +echo_warning() { cecho "$*" $yellow ;} +echo_success() { cecho "$*" $green ;} +echo_info() { cecho "$*" $blue ;} + +######################## +# distribution helpers # +######################## + +# This function return a string to identify the distribution we are running +# If we can't check the distribution, it returns "Unknown" +# This function return always true as exit code by design +# Examples: +# ubuntu16.04 +# debian8.5 +get_distribution_release() { + if [[ ! -z "$OS_DISTRO$OS_RELEASE" ]]; then + echo "$OS_DISTRO$OS_RELEASE" + else + echo Unknown + fi +} + +check_supported_distribution() { + local distribution=$(get_distribution_release) + case "$distribution" in + "ubuntu17.10") return 0 ;; + "ubuntu17.04") return 0 ;; + "ubuntu16.04") return 0 ;; + "ubuntu14.04") return 0 ;; + "fedora24") return 0 ;; + "rhel7") return 0 ;; + "centos7") return 0 ;; + esac + return 1 +} + +################## +# Error handlers # +################## + +handler_EXIT() { + local exit_code=$? + [ "$exit_code" -eq 0 ] || echo_error "build have failed" + exit $exit_code +} + +trap handler_EXIT EXIT + +########################### +# Cleaners +########################### + +clean_kernel() { + $SUDO modprobe ip_tables + $SUDO modprobe x_tables + $SUDO iptables -P INPUT ACCEPT + $SUDO iptables -F INPUT + $SUDO iptables -P OUTPUT ACCEPT + $SUDO iptables -F OUTPUT + $SUDO iptables -P FORWARD ACCEPT + $SUDO iptables -F FORWARD + $SUDO iptables -t nat -F + $SUDO iptables -t mangle -F + $SUDO iptables -t filter -F + $SUDO iptables -t raw -F + echo_info "Flushed iptables" + $SUDO rmmod nasmesh > /dev/null 2>&1 + $SUDO rmmod oai_nw_drv > /dev/null 2>&1 + $SUDO rmmod openair_rf > /dev/null 2>&1 + $SUDO rmmod ue_ip > /dev/null 2>&1 + echo_info "removed drivers from kernel" +} + +clean_all_files() { + set_openair_env + dir=$OPENAIR_DIR/cmake_targets + rm -rf $dir/log $OPENAIR_DIR/targets/bin/* + rm -rf $dir/lte_build_oai $dir/lte-simulators/build + rm -rf $dir/oaisim_build_oai/build $dir/oaisim_build_oai/CMakeLists.txt + rm -rf $dir/autotests/bin $dir/autotests/log $dir/autotests/*/build +} + +################################### +# Compilers +################################### + +#check_warnings: +# print error message if the compilation had warnings +#argument: +# $1: log file +check_warnings() { + #we look for 'warning:' in the compilation log file + #this is how gcc starts a warning + #this is not perfect, we may get false positive + warning_count=`grep "warning:" "$1"|wc -l` + if [ $warning_count -gt 0 ]; then + echo_error "WARNING: $warning_count warnings. See $1" + fi +} + +compilations() { + cd $OPENAIR_DIR/cmake_targets/$1/build + set +e + { + rm -f $3 + if [ "$VERBOSE_COMPILE" == "1" ]; then + make -j`nproc` $2 VERBOSE=$VERBOSE_COMPILE + else + make -j`nproc` $2 + fi + + } > $dlog/$2.$REL.txt 2>&1 + set -e + echo_info "Log file for compilation has been written to: $dlog/$2.$REL.txt" + if [ -s $3 ] ; then + cp $3 $4 + echo_success "$2 compiled" + check_warnings "$dlog/$2.$REL.txt" + else + echo_error "$2 compilation failed" + exit 1 + fi +} + +############################################ +# External packages installers +############################################ + +install_protobuf_from_source(){ + protobuf_install_log=$OPENAIR_DIR/cmake_targets/log/protobuf_install_log.txt + echo_info "\nInstalling Google Protobuf from sources. The log file for Protobuf installation is here: $protobuf_install_log " + ( + cd /tmp + echo "Downloading protobuf" + #rm -rf /tmp/protobuf-2.6.1.tar.gz* /tmp/protobuf-2.6.1 + #wget https://github.com/google/protobuf/releases/download/v2.6.1/protobuf-2.6.1.tar.gz + #tar -xzvf protobuf-2.6.1.tar.gz --owner $USER --group $USER --no-same-owner + #cd protobuf-2.6.1/ + rm -rf /tmp/protobuf-cpp-3.3.0.tar.gz* /tmp/protobuf-3.3.0 + wget https://github.com/google/protobuf/releases/download/v3.3.0/protobuf-cpp-3.3.0.tar.gz + tar -xzvf protobuf-cpp-3.3.0.tar.gz --owner $USER --group $(groups | cut -d" " -f1) --no-same-owner + cd protobuf-3.3.0/ + ./configure + echo "Compiling protobuf" + make -j`nproc` + $SUDO make install + $SUDO ldconfig + ) >& $protobuf_install_log +} + +install_protobuf_c_from_source(){ + protobuf_c_install_log=$OPENAIR_DIR/cmake_targets/log/protobuf_c_install_log.txt + echo_info "\nInstalling Google Protobuf_C from sources. The log file for Protobuf_C installation is here: $protobuf_c_install_log " + ( + if [[ "$OS_DISTRO" == "rhel" ]] || [[ "$OS_DISTRO" == "centos" ]]; then + export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig + fi + cd /tmp + echo "Downloading protobuf-c" + rm -rf /tmp/protobuf-c + git clone https://github.com/protobuf-c/protobuf-c.git + cd protobuf-c + git checkout 2a46af42784abf86804d536f6e0122d47cfeea45 + ./autogen.sh + ./configure + echo "Compiling protobuf-c" + make -j`nproc` + $SUDO make install + $SUDO ldconfig + ) >& $protobuf_c_install_log +} + +install_usrp_uhd_driver_from_source(){ + uhd_install_log=$OPENAIR_DIR/cmake_targets/log/uhd_install_log.txt + echo_info "\nInstalling UHD driver from sources. The log file for UHD driver installation is here: $uhd_install_log " + ( + cd /tmp + echo "Downloading UHD driver" + rm -rf /tmp/uhd + git clone https://github.com/EttusResearch/uhd.git + cd uhd + git checkout tags/release_003_010_001_001 + mkdir -p host/build + cd host/build + $CMAKE ../ + echo "Compiling UHD" + make -j`nproc` + make test + $SUDO make install + $SUDO ldconfig + ) >& $uhd_install_log +} + +check_install_usrp_uhd_driver(){ + if [[ "$OS_DISTRO" == "ubuntu" ]]; then + #first we remove old installation + $SUDO apt-get remove -y uhd || true + $SUDO apt-get remove libuhd-dev libuhd003 uhd-host -y + v=$(lsb_release -cs) + $SUDO apt-add-repository --remove "deb http://files.ettus.com/binaries/uhd/repo/uhd/ubuntu/$v $v main" + #The new USRP repository + $SUDO add-apt-repository ppa:ettusresearch/uhd -y + $SUDO apt-get update + $SUDO apt-get -y --allow-unauthenticated install python python-tk libboost-all-dev libusb-1.0-0-dev + $SUDO apt-get -y --allow-unauthenticated install libuhd-dev libuhd003 uhd-host + elif [[ "$OS_BASEDISTRO" == "fedora" ]]; then + $SUDO $INSTALLER -y install python boost libusb-devel libusbx-devel boost-devel python-mako python-docutils cmake + $SUDO pip install requests + if [[ "$OS_DISTRO" == "rhel" ]] || [[ "$OS_DISTRO" == "centos" ]]; then + # until EPEL repo hasn't bumped UHD driver to >=3.10 in EPEL, build driver from source + $SUDO $INSTALLER -y remove uhd uhd-devel uhd-firmware + install_usrp_uhd_driver_from_source + else + $SUDO $INSTALLER -y install uhd uhd-devel uhd-firmware + fi + fi +} + +install_usrp_uhd_driver() { + if [[ "$OS_DISTRO" == "ubuntu" ]]; then + # We move uhd-host apart because it depends on linux kernel version + # On newer kernels, it fails to install + $SUDO apt-get -y install uhd-host + fi + if [ -z $1 ]; then + $SUDO uhd_images_downloader + else + $SUDO uhd_images_downloader -i $1 + fi +} + +install_bladerf_driver_from_source(){ + bladerf_install_log=$OPENAIR_DIR/cmake_targets/log/bladerf_install_log.txt + echo_info "\nInstalling BladeRF driver from sources. The log file for BladeRF driver installation is here: $bladerf_install_log " + ( + cd /tmp + echo "Downloading BladeRF driver" + rm -rf /tmp/bladeRF + git clone https://github.com/Nuand/bladeRF.git + cd bladeRF + git checkout tags/2016.06 + mkdir -p build + cd build + $CMAKE ../ + echo "Compiling BladeRF driver" + make + $SUDO make install + $SUDO ldconfig + echo "Downloading FPGA and firmware images" + cd /tmp/bladeRF + wget https://www.nuand.com/fx3/bladeRF_fw_latest.img + wget https://www.nuand.com/fpga/hostedx40-latest.rbf + sudo mkdir -p /usr/share/Nuand/bladeRF + sudo mv bladeRF_fw_latest.img /usr/share/Nuand/bladeRF/bladeRF_fw.img + sudo mv hostedx40-latest.rbf /usr/share/Nuand/bladeRF/hostedx40.rbf + ) >& $bladerf_install_log +} + +check_install_bladerf_driver(){ + if [[ "$OS_DISTRO" == "ubuntu" ]]; then + if [ "$(get_distribution_release)" == "ubuntu14.04" ] ; then + $SUDO add-apt-repository -y ppa:bladerf/bladerf + $SUDO apt-get update + fi + $SUDO apt-get install -y --allow-unauthenticated bladerf libbladerf-dev + $SUDO apt-get install -y --allow-unauthenticated bladerf-firmware-fx3 + $SUDO apt-get install -y --allow-unauthenticated bladerf-fpga-hostedx40 + elif [[ "$OS_BASEDISTRO" == "fedora" ]]; then + install_bladerf_driver_from_source + else + echo_error "BladeRF Installer for OAI does not support automatic build. Install BladeRF compiling sources manually from BladeRF website" + fi +} + +flash_firmware_bladerf() { + $SUDO bladeRF-cli --flash-firmware /usr/share/Nuand/bladeRF/bladeRF_fw.img +} + +check_install_lmssdr_driver(){ + if ( [ -d "/usr/local/include/lime" ] && + [ -f "/usr/local/include/lime/LimeSuite.h" ] ) + then + echo_success "Found lmssdr drivers and tools installed from source" + else + echo_error "lmssdr support implies installing lmssdr drivers and tools" \ + " from sources. check:" + echo_info "https://open-cells.com/index.php/2017/05/10/limesdr-installation/" + echo_fatal "Cannot compile lmssdr device" + fi + + +} + +check_install_additional_tools (){ + $SUDO $INSTALLER update -y + if [[ "$OS_DISTRO" == "ubuntu" ]]; then + PACKAGE_LIST="\ + check \ + dialog \ + dkms \ + gawk \ + libboost-all-dev \ + libpthread-stubs0-dev \ + openvpn \ + pkg-config \ + python-dev \ + python-pexpect \ + sshfs \ + swig \ + tshark \ + uml-utilities \ + unzip \ + valgrind \ + vlan \ + ctags \ + ntpdate \ + iperf3 \ + android-tools-adb \ + wvdial \ + python-numpy \ + sshpass \ + nscd \ + bc \ + ntp \ + python-scipy \ + python-matplotlib" + elif [[ "$OS_DISTRO" == "rhel" ]] || [[ "$OS_DISTRO" == "centos" ]]; then + PACKAGE_LIST="\ + check \ + dialog \ + dkms \ + gawk \ + boost-devel \ + openvpn \ + pkgconfig \ + pexpect \ + sshfs \ + swig \ + wireshark \ + unzip \ + valgrind \ + vconfig \ + ctags \ + ntpdate \ + iperf3 \ + wvdial \ + numpy \ + sshpass \ + nscd \ + python2-paramiko \ + python-pyroute2 \ + python-netifaces \ + scipy \ + python-matplotlib" + elif [[ "$OS_DISTRO" == "fedora" ]]; then + PACKAGE_LIST=" \ + check \ + dialog \ + dkms \ + gawk \ + boost-devel \ + openvpn \ + pkgconfig \ + python-pexpect \ + sshfs \ + swig \ + wireshark \ + unzip \ + valgrind \ + vconfig \ + ctags \ + ntpdate \ + iperf3 \ + wvdial \ + python-numpy \ + sshpass \ + nscd \ + python2-paramiko \ + python-pyroute2 \ + python-netifaces \ + python2-scipy \ + python2-matplotlib" + fi + $SUDO $INSTALLER install -y $PACKAGE_LIST + + $SUDO rm -fr /opt/ssh + $SUDO GIT_SSL_NO_VERIFY=true git clone https://gitlab.eurecom.fr/oai/ssh.git /opt/ssh + + #The packages below are already installed for Redhat distros (RHEL, CentOS, Fedora) + if [[ "$OS_DISTRO" == "ubuntu" ]]; then + $SUDO pip install paramiko + $SUDO pip install pyroute2 colorama + log_netiface=$OPENAIR_DIR/cmake_targets/log/netiface_install_log.txt + echo_info "Installing Netinterfaces package. The logfile for installation is in $log_netiface" + ( + $SUDO rm -fr /tmp/netifaces-0.10.4.tar.gz /tmp/netifaces + wget -P /tmp https://pypi.python.org/packages/18/fa/dd13d4910aea339c0bb87d2b3838d8fd923c11869b1f6e741dbd0ff3bc00/netifaces-0.10.4.tar.gz + tar -xzvf /tmp/netifaces-0.10.4.tar.gz -C /tmp + cd /tmp/netifaces-0.10.4 + $SUDO python setup.py install + cd - + ) >& $log_netiface + fi +} + +check_install_oai_software() { + local specific_packages="" + if ! check_supported_distribution; then + echo_error "Your distribution $(get_distribution_release) is not supported by oai !" + exit 1 + fi + $SUDO $INSTALLER update -y + if [[ "$OS_DISTRO" == "ubuntu" ]]; then + local LAPACK_LIBNAME="liblapack.so" + local LAPACK_TARGET="/usr/lib/atlas-base/atlas/liblapack.so" + $SUDO apt install -y software-properties-common + case "$(get_distribution_release)" in + "ubuntu14.04") + specific_packages="libtasn1-3-dev gccxml libgnutls-dev libatlas-dev" + # For iperf3 + $SUDO add-apt-repository "deb http://archive.ubuntu.com/ubuntu trusty-backports universe" + $SUDO apt-get update + ;; + "ubuntu16.04") + specific_packages="libtasn1-6-dev gccxml libgnutls-dev libatlas-dev" + ;; + "ubuntu17.04") + specific_packages="libtasn1-6-dev castxml libgnutls28-dev libatlas-dev" + ;; + "ubuntu17.10") + specific_packages="libtasn1-6-dev castxml libgnutls28-dev" + LAPACK_LIBNAME="liblapack.so-x86_64-linux-gnu" + LAPACK_TARGET="/usr/lib/x86_64-linux-gnu/atlas/liblapack.so" + ;; + esac + $SUDO apt-get install -y \ + $specific_packages \ + autoconf \ + automake \ + bison \ + build-essential \ + cmake \ + cmake-curses-gui \ + doxygen \ + doxygen-gui \ + texlive-latex-base \ + ethtool \ + flex \ + gdb \ + git \ + graphviz \ + gtkwave \ + guile-2.0-dev \ + iperf \ + iproute \ + iptables \ + iptables-dev \ + libatlas-base-dev \ + libblas-dev \ + libconfig8-dev \ + libffi-dev \ + libforms-bin \ + libforms-dev \ + libgcrypt11-dev \ + libgmp-dev \ + libgtk-3-dev \ + libidn2-0-dev \ + libidn11-dev \ + libmysqlclient-dev \ + liboctave-dev \ + libpgm-dev \ + libpython2.7-dev \ + libsctp1 \ + libsctp-dev \ + libssl-dev \ + libtool \ + libusb-1.0-0-dev \ + libxml2 \ + libxml2-dev \ + libxslt1-dev \ + mscgen \ + octave \ + octave-signal \ + openssh-client \ + openssh-server \ + openssl \ + python \ + subversion \ + xmlstarlet \ + python-pip \ + pydb \ + libyaml-dev \ + wget \ + libxpm-dev + + $SUDO update-alternatives --set "$LAPACK_LIBNAME" "$LAPACK_TARGET" + + $SUDO apt-get install -y nettle-dev nettle-bin + elif [[ "$OS_BASEDISTRO" == "fedora" ]]; then + if [[ "$OS_DISTRO" == "rhel" ]] || [[ "$OS_DISTRO" == "centos" ]]; then + if rpm -q epel-release > /dev/null; then + echo "EPEL repos already present. Good." + else + echo "EPEL repos not present. Installing them." + $SUDO $INSTALLER install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm + fi + $SUDO $INSTALLER install -y python-epdb + $SUDO $INSTALLER install -y gccxml + else + $SUDO $INSTALLER install -y mscgen pydb + # Fedora repos already contain gccxml's successor castxml. + $SUDO $INSTALLER install -y castxml + fi + + $SUDO $INSTALLER install -y \ + autoconf \ + automake \ + bc \ + bison \ + $CMAKE \ + doxygen \ + ethtool \ + flex \ + gdb \ + git \ + graphviz \ + gtkwave \ + guile-devel \ + iperf \ + iproute \ + iptables \ + iptables-devel \ + atlas-devel \ + blas-devel \ + libconfig-devel \ + libffi-devel \ + xforms \ + xforms-devel \ + libgcrypt-devel \ + gmp-devel \ + gtk3-devel \ + libidn2-devel \ + libidn-devel \ + mariadb-devel \ + octave-devel \ + openpgm-devel \ + lksctp-tools \ + lksctp-tools-devel \ + openssl-devel \ + libtasn1 \ + libtool \ + libusb-devel \ + libxml2 \ + libxml2-devel \ + libxslt-devel \ + octave \ + octave-signal \ + openssh-clients \ + openssh-server \ + openssl \ + patch \ + psmisc \ + python \ + subversion \ + xmlstarlet \ + python-pip \ + wget \ + kernel-headers \ + kernel-devel \ + nettle-devel \ + gnutls-devel \ + libXpm-devel \ + lapack \ + lapack-devel \ + blas \ + blas-devel \ + libyaml-devel + fi + + install_asn1c_from_source + $SUDO rm -fr /opt/ssh + $SUDO git clone https://gist.github.com/2190472.git /opt/ssh +} + +install_asn1c_from_source(){ + asn1_install_log=$OPENAIR_DIR/cmake_targets/log/asn1c_install_log.txt + echo_info "\nInstalling ASN1. The log file for ASN1 installation is here: $asn1_install_log " + ( + $SUDO rm -rf /tmp/asn1c + # GIT_SSL_NO_VERIFY=true git clone https://gitlab.eurecom.fr/oai/asn1c.git /tmp/asn1c + git clone https://github.com/velichkov/asn1c /tmp/asn1c + cd /tmp/asn1c + git checkout s1ap + test -f configure || autoreconf -iv + ./configure + make -j`nproc` + $SUDO make install + cd - + $SUDO ldconfig + ) > $asn1_install_log 2>&1 +} + +################################################# +# 2. compile +################################################ + +install_nas_tools() { + if [ ! -f .ue.nvram0 ]; then + echo_success "generate .ue_emm.nvram .ue.nvram" + ./nvram --gen -c $1 -o $2 + else + [ ./nvram -nt .ue.nvram0 -o ./nvram -nt .ue_emm.nvram0 ] && ./nvram --gen -c $1 -o $2 + fi + + if [ ! -f .usim.nvram0 ]; then + echo_success "generate .usim.nvram" + ./usim --gen -c $1 -o $2 + else + [ ./usim -nt .usim.nvram0 ] && ./usim --gen -c $1 -o $2 + fi + +} + + +################################ +# set_openair_env +############################### +set_openair_env(){ + fullpath=`readlink -f $BASH_SOURCE` + [ -f "/.$fullpath" ] || fullpath=`readlink -f $PWD/$fullpath` + openair_path=${fullpath%/cmake_targets/*} + openair_path=${openair_path%/targets/*} + openair_path=${openair_path%/openair[123]/*} + export OPENAIR_DIR=$openair_path + export OPENAIR1_DIR=$openair_path/openair1 + export OPENAIR2_DIR=$openair_path/openair2 + export OPENAIR3_DIR=$openair_path/openair3 + export OPENAIR_TARGETS=$openair_path/targets +} + +################################ +# Function to killall the subprocesses when Ctrl-C Key is hit +############################### +function handle_ctrl_c(){ +CURPID=$$ +ppid=$$ +arraycounter=1 +echo_info "** Trapped CTRL-C. Killing all subprocesses now..." +echo_info "** Calling sync now..." +sync +while true +do + FORLOOP=FALSE + # Get all the child process id + for i in `ps -ef| awk '$3 == '$ppid' { print $2 }'` + do + if [ $i -ne $CURPID ] ; then + procid[$arraycounter]=$i + arraycounter=`expr $arraycounter + 1` + ppid=$i + FORLOOP=TRUE + fi + done + if [ "$FORLOOP" = "FALSE" ] ; then + arraycounter=`expr $arraycounter - 1` + ## We want to kill child process id first and then parent id's + while [ $arraycounter -ne 0 ] + do + echo "first we send ctrl-c to program" + $SUDO kill -INT "${procid[$arraycounter]}" + sleep 5 + echo "Now we force kill if that didn't work" + $SUDO kill -9 "${procid[$arraycounter]}" >/dev/null + arraycounter=`expr $arraycounter - 1` + done + exit + fi +done +} + + +# get from http://www.linuxjournal.com/content/validating-ip-address-bash-script +validate_ip() { + +local ip=$1 +local stat=1 + +if [[ $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then + OIFS=$IFS + IFS='.' + ip=($ip) + IFS=$OIFS + [[ ${ip[0]} -le 255 && ${ip[1]} -le 255 \ + && ${ip[2]} -le 255 && ${ip[3]} -le 255 ]] + stat=$? +fi + +return $stat +} diff --git a/cmake_targets/tools/fix_asn1 b/cmake_targets/tools/fix_asn1 new file mode 100755 index 0000000000000000000000000000000000000000..cba93f589853330ace893b7bd2049c01375f8129 --- /dev/null +++ b/cmake_targets/tools/fix_asn1 @@ -0,0 +1,202 @@ +#!/bin/bash + +# in those arrays, each line is: +# <file> <sha1sum of file (without line 4 which changes depending on the location of the files)> <patch to apply to file> + +RRC_Rel14=( + "SystemInformation-r8-IEs.h" 4df485c5ddf2540eca271876cdc512caa19b0890 "fix_asn1.data/RRC.rel14/SystemInformation-r8-IEs.h.diff" + "SystemInformation-NB-r13-IEs.h" 6d91332d5c39205819b06e5e36efe62ff8e5b33b "fix_asn1.data/RRC.rel14/SystemInformation-NB-r13-IEs.h.diff" +) + +RRC_Rel10=( + "SystemInformation-r8-IEs.h" 603cd6615cff36ec7020692d72c0d6de7c4859cb "fix_asn1.data/RRC.rel10/SystemInformation-r8-IEs.h.diff" +) + +X2AP_Rel11_2=( + "X2ap-CriticalityDiagnostics-IE-List.h" ae96308b37fcbcbf39da5012e42968135fc5f27b "fix_asn1.data/X2AP.rel11.2/X2ap-CriticalityDiagnostics-IE-List.h.diff" +) + +red_color="$(tput setaf 1)" +green_color="$(tput setaf 2)" +reset_color="$(tput sgr0)" + +function error() +{ + echo -e "$red_color"ERROR: "$@""$reset_color" + exit 1 +} + +function check_sha1() +{ + local file="$1" + local target_sha1="$2" + + if [ ! -f "$file" ] + then + error "$file: no such file" + fi + + # we don't use the line 4 of the file + # it contains the location of the ASN1 grammar + # and this location is not the same on every + # installation (this is for *.h files, for *.c + # files it's no big deal to skip that line) + local computed_sha1=$(sed 4d "$file" | sha1sum | cut -f 1 -d ' ') + + if [ "$target_sha1" != "$computed_sha1" ] + then + error "$file: wrong SHA1" + fi +} + +function patch_file() +{ + local patch="$1" + local file="$2" + + echo -e "$green_color""patch file $file with $OPENAIR_DIR/cmake_targets/tools/$patch""$reset_color" + + patch "$file" "$OPENAIR_DIR/cmake_targets/tools/$patch" + if [ $? -ne 0 ] + then + error "patching of $file with $OPENAIR_DIR/cmake_targets/tools/$patch failed" + fi +} + +function apply_patches() +{ + local directory="$1" + local array=$2 + local len=$3 # the length could be computed locally but the way to do it is not clear to me [CROUX] + + local i + local file + local sha1 + local patch + local item + + for (( i = 0; i < $len; i += 3 )) + do + # special bash syntax to access the array + item=$array[$i]; file=${!item} + item=$array[$((i+1))]; sha1=${!item} + item=$array[$((i+2))]; patch=${!item} + check_sha1 "$directory/$file" "$sha1" + patch_file "$patch" "$directory/$file" + done +} + +function patch_rrc() +{ + local directory="$1" + local version="$2" + + case "$version" in + Rel14 ) + echo "patching RRC files release 14" + #apply_patches "$directory" RRC_Rel14 ${#RRC_Rel14[*]} + ;; + Rel10 ) + echo "patching RRC files release 10" + apply_patches "$directory" RRC_Rel10 ${#RRC_Rel10[*]} + ;; + Rel8 ) + echo "patching RRC files release 8 TODO?" + ;; + * ) + error unknwon/unhandled RRC version \'"$version"\' + ;; + esac +} + +function patch_nr_rrc() +{ + local directory="$1" + local version="$2" + + case "$version" in + NR_Rel15 ) + echo "patching NR_RRC files release 15" + apply_patches "$directory" NR_RRC_Rel15 ${#NR_RRC_Rel15[*]} + ;; + * ) + error unknwon/unhandled NR_RRC version \'"$version"\' + ;; + esac +} + + +function patch_x2ap() +{ + local directory="$1" + local version="$2" + + case "$version" in + R14 ) + ;; + R11 ) + echo "patching X2AP files release 11.2" + apply_patches "$directory" X2AP_Rel11_2 ${#X2AP_Rel11_2[*]} + ;; + * ) + error unknwon/unhandled X2AP version \'"$version"\' + ;; + esac +} + +function patch_s1ap() +{ + local directory="$1" + local version="$2" + + case "$version" in + R14 ) + ;; + R10 ) + #nothing to do anymore (fixes went to asn1c) + ;; + * ) + error unknwon/unhandled S1AP version \'"$version"\' + ;; + esac +} + +function main() +{ + if [ $# -ne 3 ] + then + echo "ERROR: pass <output directory> <module> <version>" + exit 1 + fi + + if [ x"$OPENAIR_DIR" = x ] + then + error "the variable OPENAIR_DIR is not set" + fi + + local directory="$1" + local module="$2" + local version="$3" + + case "$module" in + RRC ) + patch_rrc "$directory" "$version" + ;; + NR_RRC ) + patch_nr_rrc "$directory" "$version" + ;; + X2AP ) + patch_x2ap "$directory" "$version" + ;; + S1AP ) + patch_s1ap "$directory" "$version" + ;; + * ) + error unknown module "$module" + ;; + esac + + exit 0 +} + +main "$@" diff --git a/cmake_targets/tools/generate_asn1 b/cmake_targets/tools/generate_asn1 new file mode 100755 index 0000000000000000000000000000000000000000..fb38455a126a809026fb306e8663a53a4ed01c2a --- /dev/null +++ b/cmake_targets/tools/generate_asn1 @@ -0,0 +1,165 @@ +#!/bin/bash + +function main() +{ +mkdir -p $1 +cd $1 +shift + +local module="$2" + +#if this script is called with only 2 arguments (so 1 here after the shift), it's for RRC +#(there may be a better way...) +if [ $# -eq 2 ]; then + +#asn1c does not work well with extension groups, we need the following fix: +# replace [[ by '<name> SEQUENCE {' +# and ]] by '} OPTIONAL' +#<name> is ext<N> with N starting from 1 and incremented at each new [[ ]] just +#following another [[ ]] +# +#this is what the following C program does + +echo generate asnfix.c + +cat << EOF > asnfix.c +/* transforms: + * '[[' to 'name SEQUENCE {' + * ']]' to '} OPTIONAL' + * name is ext1, ext2, ..., for each [[ at the same level + * levels are delimited by { and } + * -- to end of line is a comment and unprocessed + * nested [[ ]] not handled + * { and } must be balanced + * [[ and ]] can be whatever, every combination is valid + */ +#include <stdio.h> +#include <stdlib.h> + +void level(int toplevel) +{ + int c; + int next_name = 1; + + while (1) { + c = getchar(); +next: + if (c == EOF) { if (toplevel) break; abort(); } + + if (c == '-') { + c = getchar(); + if (c != '-') { putchar('-'); goto next; } + putchar(c); putchar(c); + while (1) { + c = getchar(); if (c == EOF) abort(); + putchar(c); + if (c == '\n') break; + } + continue; + } + + if (c == '[') { + c = getchar(); + if (c != '[') { putchar('['); goto next; } + printf("ext%d SEQUENCE {", next_name); + next_name++; + continue; + } + + if (c == ']') { + c = getchar(); + if (c != ']') { putchar(']'); goto next; } + printf("} OPTIONAL"); + continue; + } + + putchar(c); + if (c == '}') { if (toplevel) abort(); break; } + if (c == '{') level(0); + } +} + +int main(void) +{ + level(1); + fflush(stdout); + return 0; +} +EOF + +echo compile asnfix.c + +gcc -Wall -o asnfix asnfix.c + +echo run asnfix on $1 + +./asnfix < $1 > fixed_grammar.asn + +rm -f asnfix asnfix.c + +echo done with asnfix + +echo running asn1c + +case "$module" in + RRC ) + asn1c -gen-PER -fcompound-names -no-gen-example fixed_grammar.asn 2>&1 | grep -v -- '->' | grep -v '^Compiled' |grep -v sample + ;; + NR_RRC ) + export ASN1C_PREFIX=NR_ + asn1c -gen-PER -fcompound-names -no-gen-example fixed_grammar.asn 2>&1 | grep -v -- '->' | grep -v '^Compiled' |grep -v sample + ;; + S1AP ) + export ASN1C_PREFIX=S1AP_ + asn1c -gen-PER -fcompound-names -no-gen-example fixed_grammar.asn 2>&1 | grep -v -- '->' | grep -v '^Compiled' |grep -v sample + ;; +esac + + + +rm -f fixed_grammar.asn + +echo asn1c done + +else + +case "$module" in + RRC ) + asn1c -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example $* 2>&1 | grep -v -- '->' | grep -v '^Compiled' |grep -v sample + ;; + NR_RRC ) + export ASN1C_PREFIX=NR_ + asn1c -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example $* 2>&1 | grep -v -- '->' | grep -v '^Compiled' |grep -v sample + ;; + S1AP ) + export ASN1C_PREFIX=S1AP_ + asn1c -fcompound-names -fno-include-deps -gen-PER -no-gen-OER -no-gen-example $* 2>&1 | grep -v -- '->' | grep -v '^Compiled' |grep -v sample + ;; +esac + +fi + +awk ' + BEGIN { + print "#ifndef __ASN1_CONSTANTS_H__" + print "#define __ASN1_CONSTANTS_H__" + } + /INTEGER ::=/ { + gsub("INTEGER ::=","") + gsub("--","//") + gsub("-1","_minus_1") + gsub("-","_") + printf("#define %s\n",$0) + } + /::=.*INTEGER.*[(]/ { + nb_fields=split($0,val,"[:=().]+"); + gsub("-","_",val[1]); + printf("#define min_val_%s %s\n",val[1],val[nb_fields-2]); + printf("#define max_val_%s %s\n",val[1],val[nb_fields-1]); + } + END { + print "#endif "; + } ' $1 > asn1_constants.h +} + +main "$@" diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_decoding.c b/openair1/PHY/LTE_TRANSPORT/dlsch_decoding.c new file mode 100644 index 0000000000000000000000000000000000000000..4020056e6f68043a591f4792c287202e49a76ab0 --- /dev/null +++ b/openair1/PHY/LTE_TRANSPORT/dlsch_decoding.c @@ -0,0 +1,748 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +/*! \file PHY/LTE_TRANSPORT/dlsch_decoding.c +* \brief Top-level routines for decoding Turbo-coded (DLSCH) transport channels from 36-212, V8.6 2009-03 +* \author R. Knopp +* \date 2011 +* \version 0.1 +* \company Eurecom +* \email: knopp@eurecom.fr +* \note +* \warning +*/ + +//#include "defs.h" +#include "PHY/defs.h" +#include "PHY/extern.h" +#include "PHY/CODING/extern.h" +#include "SCHED/extern.h" +#include "SIMULATION/TOOLS/defs.h" +//#define DEBUG_DLSCH_DECODING +//#define UE_DEBUG_TRACE 1 + + +void free_ue_dlsch(LTE_UE_DLSCH_t *dlsch) +{ + + int i,r; + + if (dlsch) { + for (i=0; i<dlsch->Mdlharq; i++) { + if (dlsch->harq_processes[i]) { + if (dlsch->harq_processes[i]->b) { + free16(dlsch->harq_processes[i]->b,MAX_DLSCH_PAYLOAD_BYTES); + dlsch->harq_processes[i]->b = NULL; + } + + for (r=0; r<MAX_NUM_DLSCH_SEGMENTS; r++) { + free16(dlsch->harq_processes[i]->c[r],((r==0)?8:0) + 3+768); + dlsch->harq_processes[i]->c[r] = NULL; + } + + for (r=0; r<MAX_NUM_DLSCH_SEGMENTS; r++) + if (dlsch->harq_processes[i]->d[r]) { + free16(dlsch->harq_processes[i]->d[r],((3*8*6144)+12+96)*sizeof(short)); + dlsch->harq_processes[i]->d[r] = NULL; + } + + free16(dlsch->harq_processes[i],sizeof(LTE_DL_UE_HARQ_t)); + dlsch->harq_processes[i] = NULL; + } + } + + free16(dlsch,sizeof(LTE_UE_DLSCH_t)); + dlsch = NULL; + } +} + +LTE_UE_DLSCH_t *new_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint32_t Nsoft,uint8_t max_turbo_iterations,uint8_t N_RB_DL, uint8_t abstraction_flag) +{ + + LTE_UE_DLSCH_t *dlsch; + uint8_t exit_flag = 0,i,r; + + unsigned char bw_scaling =1; + + switch (N_RB_DL) { + case 6: + bw_scaling =16; + break; + + case 25: + bw_scaling =4; + break; + + case 50: + bw_scaling =2; + break; + + default: + bw_scaling =1; + break; + } + + dlsch = (LTE_UE_DLSCH_t *)malloc16(sizeof(LTE_UE_DLSCH_t)); + + if (dlsch) { + memset(dlsch,0,sizeof(LTE_UE_DLSCH_t)); + dlsch->Kmimo = Kmimo; + dlsch->Mdlharq = Mdlharq; + dlsch->Nsoft = Nsoft; + dlsch->max_turbo_iterations = max_turbo_iterations; + + for (i=0; i<Mdlharq; i++) { + // printf("new_ue_dlsch: Harq process %d\n",i); + dlsch->harq_processes[i] = (LTE_DL_UE_HARQ_t *)malloc16(sizeof(LTE_DL_UE_HARQ_t)); + + if (dlsch->harq_processes[i]) { + memset(dlsch->harq_processes[i],0,sizeof(LTE_DL_UE_HARQ_t)); + dlsch->harq_processes[i]->first_tx=1; + dlsch->harq_processes[i]->b = (uint8_t*)malloc16(MAX_DLSCH_PAYLOAD_BYTES/bw_scaling); + + if (dlsch->harq_processes[i]->b) + memset(dlsch->harq_processes[i]->b,0,MAX_DLSCH_PAYLOAD_BYTES/bw_scaling); + else + exit_flag=3; + + if (abstraction_flag == 0) { + for (r=0; r<MAX_NUM_DLSCH_SEGMENTS/bw_scaling; r++) { + dlsch->harq_processes[i]->c[r] = (uint8_t*)malloc16(((r==0)?8:0) + 3+ 768); + + if (dlsch->harq_processes[i]->c[r]) + memset(dlsch->harq_processes[i]->c[r],0,((r==0)?8:0) + 3+ 768); + else + exit_flag=2; + + dlsch->harq_processes[i]->d[r] = (short*)malloc16(((3*8*6144)+12+96)*sizeof(short)); + + if (dlsch->harq_processes[i]->d[r]) + memset(dlsch->harq_processes[i]->d[r],0,((3*8*6144)+12+96)*sizeof(short)); + else + exit_flag=2; + } + } + } else { + exit_flag=1; + } + } + + if (exit_flag==0) + return(dlsch); + } + + printf("new_ue_dlsch with size %zu: exit_flag = %u\n",sizeof(LTE_DL_UE_HARQ_t), exit_flag); + free_ue_dlsch(dlsch); + + return(NULL); +} + +uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, + short *dlsch_llr, + LTE_DL_FRAME_PARMS *frame_parms, + LTE_UE_DLSCH_t *dlsch, + LTE_DL_UE_HARQ_t *harq_process, + uint32_t frame, + uint8_t subframe, + uint8_t harq_pid, + uint8_t is_crnti, + uint8_t llr8_flag) +{ + +#if UE_TIMING_TRACE + time_stats_t *dlsch_rate_unmatching_stats=&phy_vars_ue->dlsch_rate_unmatching_stats; + time_stats_t *dlsch_turbo_decoding_stats=&phy_vars_ue->dlsch_turbo_decoding_stats; + time_stats_t *dlsch_deinterleaving_stats=&phy_vars_ue->dlsch_deinterleaving_stats; +#endif + uint32_t A,E; + uint32_t G; + uint32_t ret,offset; + uint16_t iind; + // uint8_t dummy_channel_output[(3*8*block_length)+12]; + short dummy_w[MAX_NUM_DLSCH_SEGMENTS][3*(6144+64)]; + uint32_t r,r_offset=0,Kr,Kr_bytes,err_flag=0; + uint8_t crc_type; +#ifdef DEBUG_DLSCH_DECODING + uint16_t i; +#endif + //#ifdef __AVX2__ +#if 0 + int Kr_last,skipped_last=0; + uint8_t (*tc_2cw)(int16_t *y, + int16_t *y2, + uint8_t *, + uint8_t *, + uint16_t, + uint16_t, + uint16_t, + uint8_t, + uint8_t, + uint8_t, + time_stats_t *, + time_stats_t *, + time_stats_t *, + time_stats_t *, + time_stats_t *, + time_stats_t *, + time_stats_t *); + +#endif +decoder_if_t tc; + + + + + if (!dlsch_llr) { + printf("dlsch_decoding.c: NULL dlsch_llr pointer\n"); + return(dlsch->max_turbo_iterations); + } + + if (!harq_process) { + printf("dlsch_decoding.c: NULL harq_process pointer\n"); + return(dlsch->max_turbo_iterations); + } + + if (!frame_parms) { + printf("dlsch_decoding.c: NULL frame_parms pointer\n"); + return(dlsch->max_turbo_iterations); + } + + if (subframe>9) { + printf("dlsch_decoding.c: Illegal subframe index %d\n",subframe); + return(dlsch->max_turbo_iterations); + } + + if (dlsch->harq_ack[subframe].ack != 2) { + LOG_D(PHY, "[UE %d] DLSCH @ SF%d : ACK bit is %d instead of DTX even before PDSCH is decoded!\n", + phy_vars_ue->Mod_id, subframe, dlsch->harq_ack[subframe].ack); + } + + if (llr8_flag == 0) { + //#ifdef __AVX2__ +#if 0 + tc_2cw = phy_threegpplte_turbo_decoder16avx2; +#endif + tc = decoder16; + } + else + { + AssertFatal (harq_process->TBS >= 256 , "Mismatch flag nbRB=%d TBS=%d mcs=%d Qm=%d RIV=%d round=%d \n", + harq_process->nb_rb, harq_process->TBS,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round); + tc = decoder8; + } + + + // nb_rb = dlsch->nb_rb; + + /* + if (nb_rb > frame_parms->N_RB_DL) { + printf("dlsch_decoding.c: Illegal nb_rb %d\n",nb_rb); + return(max_turbo_iterations); + }*/ + + /*harq_pid = dlsch->current_harq_pid[phy_vars_ue->current_thread_id[subframe]]; + if (harq_pid >= 8) { + printf("dlsch_decoding.c: Illegal harq_pid %d\n",harq_pid); + return(max_turbo_iterations); + } + */ + + harq_process->trials[harq_process->round]++; + + A = harq_process->TBS; //2072 for QPSK 1/3 + + ret = dlsch->max_turbo_iterations; + + + G = harq_process->G; + //get_G(frame_parms,nb_rb,dlsch->rb_alloc,mod_order,num_pdcch_symbols,phy_vars_ue->frame,subframe); + + // printf("DLSCH Decoding, harq_pid %d Ndi %d\n",harq_pid,harq_process->Ndi); + + if (harq_process->round == 0) { + // This is a new packet, so compute quantities regarding segmentation + harq_process->B = A+24; + lte_segmentation(NULL, + NULL, + harq_process->B, + &harq_process->C, + &harq_process->Cplus, + &harq_process->Cminus, + &harq_process->Kplus, + &harq_process->Kminus, + &harq_process->F); + // CLEAR LLR's HERE for first packet in process + } + + /* + else { + printf("dlsch_decoding.c: Ndi>0 not checked yet!!\n"); + return(max_turbo_iterations); + } + */ + err_flag = 0; + r_offset = 0; + + unsigned char bw_scaling =1; + + switch (frame_parms->N_RB_DL) { + case 6: + bw_scaling =16; + break; + + case 25: + bw_scaling =4; + break; + + case 50: + bw_scaling =2; + break; + + default: + bw_scaling =1; + break; + } + + if (harq_process->C > MAX_NUM_DLSCH_SEGMENTS/bw_scaling) { + LOG_E(PHY,"Illegal harq_process->C %d > %d\n",harq_process->C,MAX_NUM_DLSCH_SEGMENTS/bw_scaling); + return((1+dlsch->max_turbo_iterations)); + } +#ifdef DEBUG_DLSCH_DECODING + printf("Segmentation: C %d, Cminus %d, Kminus %d, Kplus %d\n",harq_process->C,harq_process->Cminus,harq_process->Kminus,harq_process->Kplus); +#endif + + opp_enabled=1; + + for (r=0; r<harq_process->C; r++) { + + + // Get Turbo interleaver parameters + if (r<harq_process->Cminus) + Kr = harq_process->Kminus; + else + Kr = harq_process->Kplus; + + Kr_bytes = Kr>>3; + + if (Kr_bytes<=64) + iind = (Kr_bytes-5); + else if (Kr_bytes <=128) + iind = 59 + ((Kr_bytes-64)>>1); + else if (Kr_bytes <= 256) + iind = 91 + ((Kr_bytes-128)>>2); + else if (Kr_bytes <= 768) + iind = 123 + ((Kr_bytes-256)>>3); + else { + printf("dlsch_decoding: Illegal codeword size %d!!!\n",Kr_bytes); + return(dlsch->max_turbo_iterations); + } + +#ifdef DEBUG_DLSCH_DECODING + printf("f1 %d, f2 %d, F %d\n",f1f2mat_old[2*iind],f1f2mat_old[1+(2*iind)],(r==0) ? harq_process->F : 0); +#endif + +#if UE_TIMING_TRACE + start_meas(dlsch_rate_unmatching_stats); +#endif + memset(&dummy_w[r][0],0,3*(6144+64)*sizeof(short)); + harq_process->RTC[r] = generate_dummy_w(4+(Kr_bytes*8), + (uint8_t*) &dummy_w[r][0], + (r==0) ? harq_process->F : 0); + +#ifdef DEBUG_DLSCH_DECODING + LOG_D(PHY,"HARQ_PID %d Rate Matching Segment %d (coded bits %d,unpunctured/repeated bits %d, TBS %d, mod_order %d, nb_rb %d, Nl %d, rv %d, round %d)...\n", + harq_pid,r, G, + Kr*3, + harq_process->TBS, + harq_process->Qm, + harq_process->nb_rb, + harq_process->Nl, + harq_process->rvidx, + harq_process->round); +#endif + +#ifdef DEBUG_DLSCH_DECODING + printf(" in decoding dlsch->harq_processes[harq_pid]->rvidx = %d\n", dlsch->harq_processes[harq_pid]->rvidx); +#endif + if (lte_rate_matching_turbo_rx(harq_process->RTC[r], + G, + harq_process->w[r], + (uint8_t*)&dummy_w[r][0], + dlsch_llr+r_offset, + harq_process->C, + dlsch->Nsoft, + dlsch->Mdlharq, + dlsch->Kmimo, + harq_process->rvidx, + (harq_process->round==0)?1:0, + harq_process->Qm, + harq_process->Nl, + r, + &E)==-1) { +#if UE_TIMING_TRACE + stop_meas(dlsch_rate_unmatching_stats); +#endif + LOG_E(PHY,"dlsch_decoding.c: Problem in rate_matching\n"); + return(dlsch->max_turbo_iterations); + } else + { +#if UE_TIMING_TRACE + stop_meas(dlsch_rate_unmatching_stats); +#endif + } + r_offset += E; + + /* + printf("Subblock deinterleaving, d %p w %p\n", + harq_process->d[r], + harq_process->w); + */ +#if UE_TIMING_TRACE + start_meas(dlsch_deinterleaving_stats); +#endif + sub_block_deinterleaving_turbo(4+Kr, + &harq_process->d[r][96], + + harq_process->w[r]); +#if UE_TIMING_TRACE + stop_meas(dlsch_deinterleaving_stats); +#endif +#ifdef DEBUG_DLSCH_DECODING + /* + if (r==0) { + write_output("decoder_llr.m","decllr",dlsch_llr,G,1,0); + write_output("decoder_in.m","dec",&harq_process->d[0][96],(3*8*Kr_bytes)+12,1,0); + } + + printf("decoder input(segment %d) :",r); + int i; for (i=0;i<(3*8*Kr_bytes)+12;i++) + printf("%d : %d\n",i,harq_process->d[r][96+i]); + printf("\n");*/ +#endif + + + // printf("Clearing c, %p\n",harq_process->c[r]); + memset(harq_process->c[r],0,Kr_bytes); + + // printf("done\n"); + if (harq_process->C == 1) + crc_type = CRC24_A; + else + crc_type = CRC24_B; + + /* + printf("decoder input(segment %d)\n",r); + for (i=0;i<(3*8*Kr_bytes)+12;i++) + if ((harq_process->d[r][96+i]>7) || + (harq_process->d[r][96+i] < -8)) + printf("%d : %d\n",i,harq_process->d[r][96+i]); + printf("\n"); + */ + + //#ifndef __AVX2__ +#if 1 + if (err_flag == 0) { +/* + LOG_I(PHY, "turbo algo Kr=%d cb_cnt=%d C=%d nbRB=%d crc_type %d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d maxIter %d\n", + Kr,r,harq_process->C,harq_process->nb_rb,crc_type,A,harq_process->TBS, + harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round,dlsch->max_turbo_iterations); +*/ + if (llr8_flag) { + AssertFatal (Kr >= 256, "turbo algo issue Kr=%d cb_cnt=%d C=%d nbRB=%d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d\n", + Kr,r,harq_process->C,harq_process->nb_rb,A,harq_process->TBS,harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round); + } +#if UE_TIMING_TRACE + start_meas(dlsch_turbo_decoding_stats); +#endif + LOG_D(PHY,"AbsSubframe %d.%d Start turbo segment %d/%d \n",frame%1024,subframe,r,harq_process->C-1); + ret = tc + (&harq_process->d[r][96], + NULL, + harq_process->c[r], + NULL, + Kr, + f1f2mat_old[iind*2], + f1f2mat_old[(iind*2)+1], + dlsch->max_turbo_iterations, + crc_type, + (r==0) ? harq_process->F : 0, + &phy_vars_ue->dlsch_tc_init_stats, + &phy_vars_ue->dlsch_tc_alpha_stats, + &phy_vars_ue->dlsch_tc_beta_stats, + &phy_vars_ue->dlsch_tc_gamma_stats, + &phy_vars_ue->dlsch_tc_ext_stats, + &phy_vars_ue->dlsch_tc_intl1_stats, + &phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1); + +#if UE_TIMING_TRACE + stop_meas(dlsch_turbo_decoding_stats); +#endif + } +#else + if ((harq_process->C == 1) || + ((r==harq_process->C-1) && (skipped_last==0))) { // last segment with odd number of segments + +#if UE_TIMING_TRACE + start_meas(dlsch_turbo_decoding_stats); +#endif + ret = tc + (&harq_process->d[r][96], + harq_process->c[r], + Kr, + f1f2mat_old[iind*2], + f1f2mat_old[(iind*2)+1], + dlsch->max_turbo_iterations, + crc_type, + (r==0) ? harq_process->F : 0, + &phy_vars_ue->dlsch_tc_init_stats, + &phy_vars_ue->dlsch_tc_alpha_stats, + &phy_vars_ue->dlsch_tc_beta_stats, + &phy_vars_ue->dlsch_tc_gamma_stats, + &phy_vars_ue->dlsch_tc_ext_stats, + &phy_vars_ue->dlsch_tc_intl1_stats, + &phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1); + #if UE_TIMING_TRACE + stop_meas(dlsch_turbo_decoding_stats); +#endif + // printf("single decode, exit\n"); + // exit(-1); + } + else { + // we can merge code segments + if ((skipped_last == 0) && (r<harq_process->C-1)) { + skipped_last = 1; + Kr_last = Kr; + } + else { + skipped_last=0; + + if (Kr_last == Kr) { // decode 2 code segments with AVX2 version +#ifdef DEBUG_DLSCH_DECODING + printf("single decoding segment %d (%p)\n",r-1,&harq_process->d[r-1][96]); +#endif +#if UE_TIMING_TRACE + start_meas(dlsch_turbo_decoding_stats); +#endif +#ifdef DEBUG_DLSCH_DECODING + printf("double decoding segments %d,%d (%p,%p)\n",r-1,r,&harq_process->d[r-1][96],&harq_process->d[r][96]); +#endif + ret = tc_2cw + (&harq_process->d[r-1][96], + &harq_process->d[r][96], + harq_process->c[r-1], + harq_process->c[r], + Kr, + f1f2mat_old[iind*2], + f1f2mat_old[(iind*2)+1], + dlsch->max_turbo_iterations, + crc_type, + (r==0) ? harq_process->F : 0, + &phy_vars_ue->dlsch_tc_init_stats, + &phy_vars_ue->dlsch_tc_alpha_stats, + &phy_vars_ue->dlsch_tc_beta_stats, + &phy_vars_ue->dlsch_tc_gamma_stats, + &phy_vars_ue->dlsch_tc_ext_stats, + &phy_vars_ue->dlsch_tc_intl1_stats, + &phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1); + /* + ret = tc + (&harq_process->d[r-1][96], + harq_process->c[r-1], + Kr_last, + f1f2mat_old[iind*2], + f1f2mat_old[(iind*2)+1], + dlsch->max_turbo_iterations, + crc_type, + (r==0) ? harq_process->F : 0, + &phy_vars_ue->dlsch_tc_init_stats, + &phy_vars_ue->dlsch_tc_alpha_stats, + &phy_vars_ue->dlsch_tc_beta_stats, + &phy_vars_ue->dlsch_tc_gamma_stats, + &phy_vars_ue->dlsch_tc_ext_stats, + &phy_vars_ue->dlsch_tc_intl1_stats, + &phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1); + + exit(-1);*/ +#if UE_TIMING_TRACE + stop_meas(dlsch_turbo_decoding_stats); +#endif + } + else { // Kr_last != Kr +#if UE_TIMING_TRACE + start_meas(dlsch_turbo_decoding_stats); +#endif + ret = tc + (&harq_process->d[r-1][96], + harq_process->c[r-1], + Kr_last, + f1f2mat_old[iind*2], + f1f2mat_old[(iind*2)+1], + dlsch->max_turbo_iterations, + crc_type, + (r==0) ? harq_process->F : 0, + &phy_vars_ue->dlsch_tc_init_stats, + &phy_vars_ue->dlsch_tc_alpha_stats, + &phy_vars_ue->dlsch_tc_beta_stats, + &phy_vars_ue->dlsch_tc_gamma_stats, + &phy_vars_ue->dlsch_tc_ext_stats, + &phy_vars_ue->dlsch_tc_intl1_stats, + &phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1); +#if UE_TIMING_TRACE + stop_meas(dlsch_turbo_decoding_stats); + + start_meas(dlsch_turbo_decoding_stats); +#endif + + ret = tc + (&harq_process->d[r][96], + harq_process->c[r], + Kr, + f1f2mat_old[iind*2], + f1f2mat_old[(iind*2)+1], + dlsch->max_turbo_iterations, + crc_type, + (r==0) ? harq_process->F : 0, + &phy_vars_ue->dlsch_tc_init_stats, + &phy_vars_ue->dlsch_tc_alpha_stats, + &phy_vars_ue->dlsch_tc_beta_stats, + &phy_vars_ue->dlsch_tc_gamma_stats, + &phy_vars_ue->dlsch_tc_ext_stats, + &phy_vars_ue->dlsch_tc_intl1_stats, + &phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1); + +#if UE_TIMING_TRACE + + stop_meas(dlsch_turbo_decoding_stats); + + /*printf("Segmentation: C %d r %d, dlsch_rate_unmatching_stats %5.3f dlsch_deinterleaving_stats %5.3f dlsch_turbo_decoding_stats %5.3f \n", + harq_process->C, + r, + dlsch_rate_unmatching_stats->p_time/(cpuf*1000.0), + dlsch_deinterleaving_stats->p_time/(cpuf*1000.0), + dlsch_turbo_decoding_stats->p_time/(cpuf*1000.0));*/ +#endif + } + } + } +#endif + + + if ((err_flag == 0) && (ret>=(1+dlsch->max_turbo_iterations))) {// a Code segment is in error so break; + LOG_D(PHY,"AbsSubframe %d.%d CRC failed, segment %d/%d \n",frame%1024,subframe,r,harq_process->C-1); + err_flag = 1; + } + } + + int32_t frame_rx_prev = frame; + int32_t subframe_rx_prev = subframe - 1; + if (subframe_rx_prev < 0) { + frame_rx_prev--; + subframe_rx_prev += 10; + } + frame_rx_prev = frame_rx_prev%1024; + + if (err_flag == 1) { +#if UE_DEBUG_TRACE + LOG_I(PHY,"[UE %d] DLSCH: Setting NAK for SFN/SF %d/%d (pid %d, status %d, round %d, TBS %d, mcs %d) Kr %d r %d harq_process->round %d\n", + phy_vars_ue->Mod_id, frame, subframe, harq_pid,harq_process->status, harq_process->round,harq_process->TBS,harq_process->mcs,Kr,r,harq_process->round); +#endif + dlsch->harq_ack[subframe].ack = 0; + dlsch->harq_ack[subframe].harq_id = harq_pid; + dlsch->harq_ack[subframe].send_harq_status = 1; + harq_process->errors[harq_process->round]++; + harq_process->round++; + + + // printf("Rate: [UE %d] DLSCH: Setting NACK for subframe %d (pid %d, round %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round); + if (harq_process->round >= dlsch->Mdlharq) { + harq_process->status = SCH_IDLE; + harq_process->round = 0; + } + if(is_crnti) + { + LOG_D(PHY,"[UE %d] DLSCH: Setting NACK for subframe %d (pid %d, pid status %d, round %d/Max %d, TBS %d)\n", + phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->status,harq_process->round,dlsch->Mdlharq,harq_process->TBS); + } + + return((1+dlsch->max_turbo_iterations)); + } else { +#if UE_DEBUG_TRACE + LOG_I(PHY,"[UE %d] DLSCH: Setting ACK for subframe %d TBS %d mcs %d nb_rb %d\n", + phy_vars_ue->Mod_id,subframe,harq_process->TBS,harq_process->mcs,harq_process->nb_rb); +#endif + + harq_process->status = SCH_IDLE; + harq_process->round = 0; + dlsch->harq_ack[subframe].ack = 1; + dlsch->harq_ack[subframe].harq_id = harq_pid; + dlsch->harq_ack[subframe].send_harq_status = 1; + //LOG_I(PHY,"[UE %d] DLSCH: Setting ACK for SFN/SF %d/%d (pid %d, status %d, round %d, TBS %d, mcs %d)\n", + // phy_vars_ue->Mod_id, frame, subframe, harq_pid, harq_process->status, harq_process->round,harq_process->TBS,harq_process->mcs); + + if(is_crnti) + { + LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for subframe %d (pid %d, round %d, TBS %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round,harq_process->TBS); + } + //LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for subframe %d (pid %d, round %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round); + + } + + // Reassembly of Transport block here + offset = 0; + + /* + printf("harq_pid %d\n",harq_pid); + printf("F %d, Fbytes %d\n",harq_process->F,harq_process->F>>3); + printf("C %d\n",harq_process->C); + */ + for (r=0; r<harq_process->C; r++) { + if (r<harq_process->Cminus) + Kr = harq_process->Kminus; + else + Kr = harq_process->Kplus; + + Kr_bytes = Kr>>3; + + // printf("Segment %d : Kr= %d bytes\n",r,Kr_bytes); + if (r==0) { + memcpy(harq_process->b, + &harq_process->c[0][(harq_process->F>>3)], + Kr_bytes - (harq_process->F>>3)- ((harq_process->C>1)?3:0)); + offset = Kr_bytes - (harq_process->F>>3) - ((harq_process->C>1)?3:0); + // printf("copied %d bytes to b sequence (harq_pid %d)\n", + // Kr_bytes - (harq_process->F>>3),harq_pid); + // printf("b[0] = %x,c[%d] = %x\n", + // harq_process->b[0], + // harq_process->F>>3, + // harq_process->c[0][(harq_process->F>>3)]); + } else { + memcpy(harq_process->b+offset, + harq_process->c[r], + Kr_bytes- ((harq_process->C>1)?3:0)); + offset += (Kr_bytes - ((harq_process->C>1)?3:0)); + } + } + + dlsch->last_iteration_cnt = ret; + + return(ret); +} + diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c new file mode 100644 index 0000000000000000000000000000000000000000..6284f279d895d04590cf44ca3d792c1fcda40f47 --- /dev/null +++ b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c @@ -0,0 +1,6155 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +/*! \file PHY/LTE_TRANSPORT/dlsch_demodulation.c + * \brief Top-level routines for demodulating the PDSCH physical channel from 36-211, V8.6 2009-03 + * \author R. Knopp, F. Kaltenberger,A. Bhamri, S. Aubert, X. Xiang + * \date 2011 + * \version 0.1 + * \company Eurecom + * \email: knopp@eurecom.fr,florian.kaltenberger@eurecom.fr,ankit.bhamri@eurecom.fr,sebastien.aubert@eurecom.fr + * \note + * \warning + */ +//#include "PHY/defs.h" +#include "PHY/extern.h" +#include "SCHED/defs.h" +#include "defs.h" +#include "extern.h" +#include "PHY/sse_intrin.h" +#include "T.h" + +#define NOCYGWIN_STATIC + +/* dynamic shift for LLR computation for TM3/4 + * set as command line argument, see lte-softmodem.c + * default value: 0 + */ +int16_t dlsch_demod_shift = 0; +int16_t interf_unaw_shift = 13; + +//#define DEBUG_HARQ + +#define DEBUG_PHY 1 +//#define DEBUG_DLSCH_DEMOD 1 + +//#define DISABLE_LOG_X + +// [MCS][i_mod (0,1,2) = (2,4,6)] +unsigned char offset_mumimo_llr_drange_fix=0; +//inferference-free case +unsigned char interf_unaw_shift_tm4_mcs[29]={5, 3, 4, 3, 3, 2, 1, 1, 2, 0, 1, 1, 1, 1, 0, 0, + 1, 1, 1, 1, 0, 2, 1, 0, 1, 0, 1, 0, 0} ; +unsigned char interf_unaw_shift_tm1_mcs[29]={5, 5, 4, 3, 3, 3, 2, 2, 4, 4, 2, 3, 3, 3, 1, 1, + 0, 1, 1, 2, 5, 4, 4, 6, 5, 1, 0, 5, 6} ; // mcs 21, 26, 28 seem to be errorneous + +/* +//original values from sebastion + same hand tuning +unsigned char offset_mumimo_llr_drange[29][3]={{8,8,8},{7,7,7},{7,7,7},{7,7,7},{6,6,6},{6,6,6},{6,6,6},{5,5,5},{4,4,4},{1,2,4}, // QPSK +{5,5,4},{5,5,5},{5,5,5},{3,3,3},{2,2,2},{2,2,2},{2,2,2}, // 16-QAM +{2,2,1},{3,3,3},{3,3,3},{3,3,1},{2,2,2},{2,2,2},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}}; //64-QAM +*/ + /* + //first optimization try + unsigned char offset_mumimo_llr_drange[29][3]={{7, 8, 7},{6, 6, 7},{6, 6, 7},{6, 6, 6},{5, 6, 6},{5, 5, 6},{5, 5, 6},{4, 5, 4},{4, 3, 4},{3, 2, 2},{6, 5, 5},{5, 4, 4},{5, 5, 4},{3, 3, 2},{2, 2, 1},{2, 1, 1},{2, 2, 2},{3, 3, 3},{3, 3, 2},{3, 3, 2},{3, 2, 1},{2, 2, 2},{2, 2, 2},{0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0}}; + */ + //second optimization try + /* + unsigned char offset_mumimo_llr_drange[29][3]={{5, 8, 7},{4, 6, 8},{3, 6, 7},{7, 7, 6},{4, 7, 8},{4, 7, 4},{6, 6, 6},{3, 6, 6},{3, 6, 6},{1, 3, 4},{1, 1, 0},{3, 3, 2},{3, 4, 1},{4, 0, 1},{4, 2, 2},{3, 1, 2},{2, 1, 0},{2, 1, 1},{1, 0, 1},{1, 0, 1},{0, 0, 0},{1, 0, 0},{0, 0, 0},{0, 1, 0},{1, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0}}; w + */ +unsigned char offset_mumimo_llr_drange[29][3]= {{0, 6, 5},{0, 4, 5},{0, 4, 5},{0, 5, 4},{0, 5, 6},{0, 5, 3},{0, 4, 4},{0, 4, 4},{0, 3, 3},{0, 1, 2},{1, 1, 0},{1, 3, 2},{3, 4, 1},{2, 0, 0},{2, 2, 2},{1, 1, 1},{2, 1, 0},{2, 1, 1},{1, 0, 1},{1, 0, 1},{0, 0, 0},{1, 0, 0},{0, 0, 0},{0, 1, 0},{1, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0}}; + + +extern void print_shorts(char *s,int16_t *x); + + +int rx_pdsch(PHY_VARS_UE *ue, + PDSCH_t type, + unsigned char eNB_id, + unsigned char eNB_id_i, //if this == ue->n_connected_eNB, we assume MU interference + uint32_t frame, + uint8_t subframe, + unsigned char symbol, + unsigned char first_symbol_flag, + RX_type_t rx_type, + unsigned char i_mod, + unsigned char harq_pid) +{ + + LTE_UE_COMMON *common_vars = &ue->common_vars; + LTE_UE_PDSCH **pdsch_vars; + LTE_DL_FRAME_PARMS *frame_parms = &ue->frame_parms; + PHY_MEASUREMENTS *measurements = &ue->measurements; + LTE_UE_DLSCH_t **dlsch; + + int avg[4]; + int avg_0[2]; + int avg_1[2]; + +#if UE_TIMING_TRACE + uint8_t slot = 0; +#endif + + unsigned char aatx,aarx; + + unsigned short nb_rb = 0, round; + int avgs = 0, rb; + LTE_DL_UE_HARQ_t *dlsch0_harq,*dlsch1_harq = 0; + + uint8_t beamforming_mode; + uint32_t *rballoc; + + int32_t **rxdataF_comp_ptr; + int32_t **dl_ch_mag_ptr; + int32_t codeword_TB0 = -1; + int32_t codeword_TB1 = -1; + + + + switch (type) { + case SI_PDSCH: + pdsch_vars = &ue->pdsch_vars_SI[eNB_id]; + dlsch = &ue->dlsch_SI[eNB_id]; + dlsch0_harq = dlsch[0]->harq_processes[harq_pid]; + beamforming_mode = 0; + break; + + case RA_PDSCH: + pdsch_vars = &ue->pdsch_vars_ra[eNB_id]; + dlsch = &ue->dlsch_ra[eNB_id]; + dlsch0_harq = dlsch[0]->harq_processes[harq_pid]; + beamforming_mode = 0; + break; + + case PDSCH: + pdsch_vars = ue->pdsch_vars[ue->current_thread_id[subframe]]; + dlsch = ue->dlsch[ue->current_thread_id[subframe]][eNB_id]; + //printf("status TB0 = %d, status TB1 = %d \n", dlsch[0]->harq_processes[harq_pid]->status, dlsch[1]->harq_processes[harq_pid]->status); + LOG_D(PHY,"AbsSubframe %d.%d / Sym %d harq_pid %d, harq status %d.%d \n", + frame,subframe,symbol,harq_pid, + dlsch[0]->harq_processes[harq_pid]->status, + dlsch[1]->harq_processes[harq_pid]->status); + + if ((dlsch[0]->harq_processes[harq_pid]->status == ACTIVE) && + (dlsch[1]->harq_processes[harq_pid]->status == ACTIVE)){ + codeword_TB0 = dlsch[0]->harq_processes[harq_pid]->codeword; + codeword_TB1 = dlsch[1]->harq_processes[harq_pid]->codeword; + dlsch0_harq = dlsch[codeword_TB0]->harq_processes[harq_pid]; + dlsch1_harq = dlsch[codeword_TB1]->harq_processes[harq_pid]; +#ifdef DEBUG_HARQ + printf("[DEMOD] I am assuming both TBs are active\n"); +#endif + } + else if ((dlsch[0]->harq_processes[harq_pid]->status == ACTIVE) && + (dlsch[1]->harq_processes[harq_pid]->status != ACTIVE) ) { + codeword_TB0 = dlsch[0]->harq_processes[harq_pid]->codeword; + dlsch0_harq = dlsch[0]->harq_processes[harq_pid]; + dlsch1_harq = NULL; + codeword_TB1 = -1; +#ifdef DEBUG_HARQ + printf("[DEMOD] I am assuming only TB0 is active\n"); +#endif + } + else if ((dlsch[0]->harq_processes[harq_pid]->status != ACTIVE) && + (dlsch[1]->harq_processes[harq_pid]->status == ACTIVE) ){ + codeword_TB1 = dlsch[1]->harq_processes[harq_pid]->codeword; + dlsch0_harq = dlsch[1]->harq_processes[harq_pid]; + dlsch1_harq = NULL; + codeword_TB0 = -1; +#ifdef DEBUG_HARQ + printf("[DEMOD] I am assuming only TB1 is active, it is in cw %d\n", dlsch0_harq->codeword); +#endif + } + else { + LOG_E(PHY,"[UE][FATAL] Frame %d subframe %d: no active DLSCH\n",ue->proc.proc_rxtx[0].frame_rx,subframe); + return(-1); + } + beamforming_mode = ue->transmission_mode[eNB_id]<7?0:ue->transmission_mode[eNB_id]; + break; + + default: + LOG_E(PHY,"[UE][FATAL] Frame %d subframe %d: Unknown PDSCH format %d\n",ue->proc.proc_rxtx[0].frame_rx,subframe,type); + return(-1); + break; + } +#ifdef DEBUG_HARQ + printf("[DEMOD] MIMO mode = %d\n", dlsch0_harq->mimo_mode); + printf("[DEMOD] cw for TB0 = %d, cw for TB1 = %d\n", codeword_TB0, codeword_TB1); +#endif + + DevAssert(dlsch0_harq); + round = dlsch0_harq->round; + //printf("round = %d\n", round); + + if (eNB_id > 2) { + LOG_W(PHY,"dlsch_demodulation.c: Illegal eNB_id %d\n",eNB_id); + return(-1); + } + + if (!common_vars) { + LOG_W(PHY,"dlsch_demodulation.c: Null common_vars\n"); + return(-1); + } + + if (!dlsch[0]) { + LOG_W(PHY,"dlsch_demodulation.c: Null dlsch_ue pointer\n"); + return(-1); + } + + if (!pdsch_vars) { + LOG_W(PHY,"dlsch_demodulation.c: Null pdsch_vars pointer\n"); + return(-1); + } + + if (!frame_parms) { + LOG_W(PHY,"dlsch_demodulation.c: Null frame_parms\n"); + return(-1); + } + + if (((frame_parms->Ncp == NORMAL) && (symbol>=7)) || + ((frame_parms->Ncp == EXTENDED) && (symbol>=6))) + rballoc = dlsch0_harq->rb_alloc_odd; + else + rballoc = dlsch0_harq->rb_alloc_even; + + + if (dlsch0_harq->mimo_mode>DUALSTREAM_PUSCH_PRECODING) { + LOG_E(PHY,"This transmission mode is not yet supported!\n"); + return(-1); + } + + + if ((dlsch0_harq->mimo_mode==LARGE_CDD) || ((dlsch0_harq->mimo_mode>=DUALSTREAM_UNIFORM_PRECODING1) && (dlsch0_harq->mimo_mode<=DUALSTREAM_PUSCH_PRECODING))) { + DevAssert(dlsch1_harq); + if (eNB_id!=eNB_id_i) { + LOG_E(PHY,"TM3/TM4 requires to set eNB_id==eNB_id_i!\n"); + return(-1); + } + } + +#if UE_TIMING_TRACE + if(symbol > ue->frame_parms.symbols_per_tti>>1) + { + slot = 1; + } +#endif + +#ifdef DEBUG_HARQ + printf("Demod dlsch0_harq->pmi_alloc %d\n", dlsch0_harq->pmi_alloc); +#endif + + if (frame_parms->nb_antenna_ports_eNB>1 && beamforming_mode==0) { +#ifdef DEBUG_DLSCH_MOD + LOG_I(PHY,"dlsch: using pmi %x (%p), rb_alloc %x\n",pmi2hex_2Ar1(dlsch0_harq->pmi_alloc),dlsch[0],dlsch0_harq->rb_alloc_even[0]); +#endif + +#if UE_TIMING_TRACE + start_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]); +#endif + nb_rb = dlsch_extract_rbs_dual(common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF, + common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id], + pdsch_vars[eNB_id]->rxdataF_ext, + pdsch_vars[eNB_id]->dl_ch_estimates_ext, + dlsch0_harq->pmi_alloc, + pdsch_vars[eNB_id]->pmi_ext, + rballoc, + symbol, + subframe, + ue->high_speed_flag, + frame_parms, + dlsch0_harq->mimo_mode); +#ifdef DEBUG_DLSCH_MOD + printf("dlsch: using pmi %lx, rb_alloc %x, pmi_ext ",pmi2hex_2Ar1(dlsch0_harq->pmi_alloc),*rballoc); + for (rb=0;rb<nb_rb;rb++) + printf("%d",pdsch_vars[eNB_id]->pmi_ext[rb]); + printf("\n"); +#endif + + if (rx_type >= rx_IC_single_stream) { + if (eNB_id_i<ue->n_connected_eNB) // we are in TM5 + nb_rb = dlsch_extract_rbs_dual(common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF, + common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id_i], + pdsch_vars[eNB_id_i]->rxdataF_ext, + pdsch_vars[eNB_id_i]->dl_ch_estimates_ext, + dlsch0_harq->pmi_alloc, + pdsch_vars[eNB_id_i]->pmi_ext, + rballoc, + symbol, + subframe, + ue->high_speed_flag, + frame_parms, + dlsch0_harq->mimo_mode); + else + nb_rb = dlsch_extract_rbs_dual(common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF, + common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id], + pdsch_vars[eNB_id_i]->rxdataF_ext, + pdsch_vars[eNB_id_i]->dl_ch_estimates_ext, + dlsch0_harq->pmi_alloc, + pdsch_vars[eNB_id_i]->pmi_ext, + rballoc, + symbol, + subframe, + ue->high_speed_flag, + frame_parms, + dlsch0_harq->mimo_mode); + } + } else if (beamforming_mode==0) { //else if nb_antennas_ports_eNB==1 && beamforming_mode == 0 + nb_rb = dlsch_extract_rbs_single(common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF, + common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id], + pdsch_vars[eNB_id]->rxdataF_ext, + pdsch_vars[eNB_id]->dl_ch_estimates_ext, + dlsch0_harq->pmi_alloc, + pdsch_vars[eNB_id]->pmi_ext, + rballoc, + symbol, + subframe, + ue->high_speed_flag, + frame_parms); + + if (rx_type==rx_IC_single_stream) { + if (eNB_id_i<ue->n_connected_eNB) + nb_rb = dlsch_extract_rbs_single(common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF, + common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id_i], + pdsch_vars[eNB_id_i]->rxdataF_ext, + pdsch_vars[eNB_id_i]->dl_ch_estimates_ext, + dlsch0_harq->pmi_alloc, + pdsch_vars[eNB_id_i]->pmi_ext, + rballoc, + symbol, + subframe, + ue->high_speed_flag, + frame_parms); + else + nb_rb = dlsch_extract_rbs_single(common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF, + common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id], + pdsch_vars[eNB_id_i]->rxdataF_ext, + pdsch_vars[eNB_id_i]->dl_ch_estimates_ext, + dlsch0_harq->pmi_alloc, + pdsch_vars[eNB_id_i]->pmi_ext, + rballoc, + symbol, + subframe, + ue->high_speed_flag, + frame_parms); + } + } else if (beamforming_mode==7) { //else if beamforming_mode == 7 + nb_rb = dlsch_extract_rbs_TM7(common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF, + pdsch_vars[eNB_id]->dl_bf_ch_estimates, + pdsch_vars[eNB_id]->rxdataF_ext, + pdsch_vars[eNB_id]->dl_bf_ch_estimates_ext, + rballoc, + symbol, + subframe, + ue->high_speed_flag, + frame_parms); + + } else if(beamforming_mode>7) { + LOG_W(PHY,"dlsch_demodulation: beamforming mode not supported yet.\n"); + } + + //printf("nb_rb = %d, eNB_id %d\n",nb_rb,eNB_id); + if (nb_rb==0) { + // LOG_D(PHY,"dlsch_demodulation.c: nb_rb=0\n"); + return(-1); + } + + +#if UE_TIMING_TRACE + stop_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]); +#if DISABLE_LOG_X + printf("[AbsSFN %d.%d] Slot%d Symbol %d Flag %d type %d: Pilot/Data extraction %5.2f \n",frame,subframe,slot, + symbol,ue->high_speed_flag,type,symbol,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0)); +#else + LOG_I(PHY, "[AbsSFN %d.%d] Slot%d Symbol %d Flag %d type %d: Pilot/Data extraction %5.2f \n",frame,subframe,slot,symbol, + ue->high_speed_flag,type,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0)); +#endif +#endif + + +#if UE_TIMING_TRACE + start_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]); +#endif + aatx = frame_parms->nb_antenna_ports_eNB; + aarx = frame_parms->nb_antennas_rx; + + dlsch_scale_channel(pdsch_vars[eNB_id]->dl_ch_estimates_ext, + frame_parms, + dlsch, + symbol, + nb_rb); + + if ((dlsch0_harq->mimo_mode<DUALSTREAM_UNIFORM_PRECODING1) && + (rx_type==rx_IC_single_stream) && + (eNB_id_i==ue->n_connected_eNB) && + (dlsch0_harq->dl_power_off==0) + ) // TM5 two-user + { + dlsch_scale_channel(pdsch_vars[eNB_id_i]->dl_ch_estimates_ext, + frame_parms, + dlsch, + symbol, + nb_rb); + } + +#if UE_TIMING_TRACE + stop_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]); +#if DISABLE_LOG_X + printf("[AbsSFN %d.%d] Slot%d Symbol %d: Channel Scale %5.2f \n",frame,subframe,slot,symbol,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0)); +#else + LOG_I(PHY, "[AbsSFN %d.%d] Slot%d Symbol %d: Channel Scale %5.2f \n",frame,subframe,slot,symbol,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0)); +#endif +#endif + +#if UE_TIMING_TRACE + start_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]); +#endif + if (first_symbol_flag==1) { + if (beamforming_mode==0){ + if (dlsch0_harq->mimo_mode<LARGE_CDD) { + dlsch_channel_level(pdsch_vars[eNB_id]->dl_ch_estimates_ext, + frame_parms, + avg, + symbol, + nb_rb); + avgs = 0; + for (aatx=0;aatx<frame_parms->nb_antenna_ports_eNB;aatx++) + for (aarx=0;aarx<frame_parms->nb_antennas_rx;aarx++) + avgs = cmax(avgs,avg[(aatx<<1)+aarx]); + + pdsch_vars[eNB_id]->log2_maxh = (log2_approx(avgs)/2)+1; + } + else if ((dlsch0_harq->mimo_mode == LARGE_CDD) || + ((dlsch0_harq->mimo_mode >=DUALSTREAM_UNIFORM_PRECODING1) && + (dlsch0_harq->mimo_mode <=DUALSTREAM_PUSCH_PRECODING))) + { + dlsch_channel_level_TM34(pdsch_vars[eNB_id]->dl_ch_estimates_ext, + frame_parms, + pdsch_vars[eNB_id]->pmi_ext, + avg_0, + avg_1, + symbol, + nb_rb, + dlsch0_harq->mimo_mode); + + LOG_D(PHY,"Channel Level TM34 avg_0 %d, avg_1 %d, rx_type %d, rx_standard %d, dlsch_demod_shift %d \n", avg_0[0], + avg_1[0], rx_type, rx_standard, dlsch_demod_shift); + if (rx_type>rx_standard) { + avg_0[0] = (log2_approx(avg_0[0])/2) + dlsch_demod_shift;// + 2 ;//+ 4; + avg_1[0] = (log2_approx(avg_1[0])/2) + dlsch_demod_shift;// + 2 ;//+ 4; + pdsch_vars[eNB_id]->log2_maxh0 = cmax(avg_0[0],0); + pdsch_vars[eNB_id]->log2_maxh1 = cmax(avg_1[0],0); + // printf("dlsch_demod_shift %d\n", dlsch_demod_shift); + } + else { + avg_0[0] = (log2_approx(avg_0[0])/2) - 13 + interf_unaw_shift; + avg_1[0] = (log2_approx(avg_1[0])/2) - 13 + interf_unaw_shift; + pdsch_vars[eNB_id]->log2_maxh0 = cmax(avg_0[0],0); + pdsch_vars[eNB_id]->log2_maxh1 = cmax(avg_1[0],0); + } + } + else if (dlsch0_harq->mimo_mode<DUALSTREAM_UNIFORM_PRECODING1) {// single-layer precoding (TM5, TM6) + if ((rx_type==rx_IC_single_stream) && (eNB_id_i==ue->n_connected_eNB) && (dlsch0_harq->dl_power_off==0)) { + dlsch_channel_level_TM56(pdsch_vars[eNB_id]->dl_ch_estimates_ext, + frame_parms, + pdsch_vars[eNB_id]->pmi_ext, + avg, + symbol, + nb_rb); + avg[0] = log2_approx(avg[0]) - 13 + offset_mumimo_llr_drange[dlsch0_harq->mcs][(i_mod>>1)-1]; + pdsch_vars[eNB_id]->log2_maxh = cmax(avg[0],0); + + } + else if (dlsch0_harq->dl_power_off==1) { //TM6 + + dlsch_channel_level(pdsch_vars[eNB_id]->dl_ch_estimates_ext, + frame_parms, + avg, + symbol, + nb_rb); + + avgs = 0; + for (aatx=0;aatx<frame_parms->nb_antenna_ports_eNB;aatx++) + for (aarx=0;aarx<frame_parms->nb_antennas_rx;aarx++) + avgs = cmax(avgs,avg[(aatx<<1)+aarx]); + + pdsch_vars[eNB_id]->log2_maxh = (log2_approx(avgs)/2) + 1; + pdsch_vars[eNB_id]->log2_maxh++; + + } + } + + } + else if (beamforming_mode==7) + dlsch_channel_level_TM7(pdsch_vars[eNB_id]->dl_bf_ch_estimates_ext, + frame_parms, + avg, + symbol, + nb_rb); +#ifdef UE_DEBUG_TRACE + LOG_D(PHY,"[DLSCH] AbsSubframe %d.%d log2_maxh = %d [log2_maxh0 %d log2_maxh1 %d] (%d,%d)\n", + frame%1024,subframe, pdsch_vars[eNB_id]->log2_maxh, + pdsch_vars[eNB_id]->log2_maxh0, + pdsch_vars[eNB_id]->log2_maxh1, + avg[0],avgs); + //LOG_D(PHY,"[DLSCH] mimo_mode = %d\n", dlsch0_harq->mimo_mode); +#endif + + //wait until pdcch is decoded + //proc->channel_level = 1; + } + + /* + uint32_t wait = 0; + while(proc->channel_level == 0) + { + usleep(1); + wait++; + } + */ + +#if T_TRACER + if (type == PDSCH) + { + T(T_UE_PHY_PDSCH_ENERGY, T_INT(eNB_id), T_INT(frame%1024), T_INT(subframe), + T_INT(avg[0]), T_INT(avg[1]), T_INT(avg[2]), T_INT(avg[3])); + } +#endif + +#if UE_TIMING_TRACE + stop_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]); +#if DISABLE_LOG_X + printf("[AbsSFN %d.%d] Slot%d Symbol %d first_symbol_flag %d: Channel Level %5.2f \n",frame,subframe,slot,symbol,first_symbol_flag,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0)); +#else + LOG_I(PHY, "[AbsSFN %d.%d] Slot%d Symbol %d first_symbol_flag %d: Channel Level %5.2f \n",frame,subframe,slot,symbol,first_symbol_flag,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0)); +#endif +#endif + + +#if UE_TIMING_TRACE + start_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]); +#endif +// Now channel compensation + if (dlsch0_harq->mimo_mode<LARGE_CDD) { + dlsch_channel_compensation(pdsch_vars[eNB_id]->rxdataF_ext, + pdsch_vars[eNB_id]->dl_ch_estimates_ext, + pdsch_vars[eNB_id]->dl_ch_mag0, + pdsch_vars[eNB_id]->dl_ch_magb0, + pdsch_vars[eNB_id]->rxdataF_comp0, + (aatx>1) ? pdsch_vars[eNB_id]->rho : NULL, + frame_parms, + symbol, + first_symbol_flag, + dlsch0_harq->Qm, + nb_rb, + pdsch_vars[eNB_id]->log2_maxh, + measurements); // log2_maxh+I0_shift + /*if (symbol == 5) { + write_output("rxF_comp_d.m","rxF_c_d",&pdsch_vars[eNB_id]->rxdataF_comp0[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1); + } */ + if ((rx_type==rx_IC_single_stream) && + (eNB_id_i<ue->n_connected_eNB)) { + dlsch_channel_compensation(pdsch_vars[eNB_id_i]->rxdataF_ext, + pdsch_vars[eNB_id_i]->dl_ch_estimates_ext, + pdsch_vars[eNB_id_i]->dl_ch_mag0, + pdsch_vars[eNB_id_i]->dl_ch_magb0, + pdsch_vars[eNB_id_i]->rxdataF_comp0, + (aatx>1) ? pdsch_vars[eNB_id_i]->rho : NULL, + frame_parms, + symbol, + first_symbol_flag, + i_mod, + nb_rb, + pdsch_vars[eNB_id]->log2_maxh, + measurements); // log2_maxh+I0_shift +#ifdef DEBUG_PHY + if (symbol == 5) { + write_output("rxF_comp_d.m","rxF_c_d",&pdsch_vars[eNB_id]->rxdataF_comp0[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1); + write_output("rxF_comp_i.m","rxF_c_i",&pdsch_vars[eNB_id_i]->rxdataF_comp0[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1); + } +#endif + + dlsch_dual_stream_correlation(frame_parms, + symbol, + nb_rb, + pdsch_vars[eNB_id]->dl_ch_estimates_ext, + pdsch_vars[eNB_id_i]->dl_ch_estimates_ext, + pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round], + pdsch_vars[eNB_id]->log2_maxh); + } + } else if ((dlsch0_harq->mimo_mode == LARGE_CDD) || ((dlsch0_harq->mimo_mode >=DUALSTREAM_UNIFORM_PRECODING1) && + (dlsch0_harq->mimo_mode <=DUALSTREAM_PUSCH_PRECODING))){ + dlsch_channel_compensation_TM34(frame_parms, + pdsch_vars[eNB_id], + measurements, + eNB_id, + symbol, + dlsch0_harq->Qm, + dlsch1_harq->Qm, + harq_pid, + dlsch0_harq->round, + dlsch0_harq->mimo_mode, + nb_rb, + pdsch_vars[eNB_id]->log2_maxh0, + pdsch_vars[eNB_id]->log2_maxh1); + /* if (symbol == 5) { + write_output("rxF_comp_d00.m","rxF_c_d00",&pdsch_vars[eNB_id]->rxdataF_comp0[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);// should be QAM + write_output("rxF_comp_d01.m","rxF_c_d01",&pdsch_vars[eNB_id]->rxdataF_comp0[1][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);//should be almost 0 + write_output("rxF_comp_d10.m","rxF_c_d10",&pdsch_vars[eNB_id]->rxdataF_comp1[harq_pid][round][0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);//should be almost 0 + write_output("rxF_comp_d11.m","rxF_c_d11",&pdsch_vars[eNB_id]->rxdataF_comp1[harq_pid][round][1][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);//should be QAM + } */ + // compute correlation between signal and interference channels (rho12 and rho21) + dlsch_dual_stream_correlation(frame_parms, // this is doing h11'*h12 and h21'*h22 + symbol, + nb_rb, + pdsch_vars[eNB_id]->dl_ch_estimates_ext, + &(pdsch_vars[eNB_id]->dl_ch_estimates_ext[2]), + pdsch_vars[eNB_id]->dl_ch_rho2_ext, + pdsch_vars[eNB_id]->log2_maxh0); + //printf("rho stream1 =%d\n", &pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round] ); + //to be optimized (just take complex conjugate) + dlsch_dual_stream_correlation(frame_parms, // this is doing h12'*h11 and h22'*h21 + symbol, + nb_rb, + &(pdsch_vars[eNB_id]->dl_ch_estimates_ext[2]), + pdsch_vars[eNB_id]->dl_ch_estimates_ext, + pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round], + pdsch_vars[eNB_id]->log2_maxh1); + // printf("rho stream2 =%d\n",&pdsch_vars[eNB_id]->dl_ch_rho2_ext ); + //printf("TM3 log2_maxh : %d\n",pdsch_vars[eNB_id]->log2_maxh); + /* if (symbol == 5) { + write_output("rho0_0.m","rho0_0",&pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round][0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);// should be QAM + write_output("rho2_0.m","rho2_0",&pdsch_vars[eNB_id]->dl_ch_rho2_ext[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);//should be almost 0 + write_output("rho0_1.m.m","rho0_1",&pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round][1][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);//should be almost 0 + write_output("rho2_1.m","rho2_1",&pdsch_vars[eNB_id]->dl_ch_rho2_ext[1][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);//should be QAM + } */ + + } else if (dlsch0_harq->mimo_mode<DUALSTREAM_UNIFORM_PRECODING1) {// single-layer precoding (TM5, TM6) + if ((rx_type==rx_IC_single_stream) && (eNB_id_i==ue->n_connected_eNB) && (dlsch0_harq->dl_power_off==0)) { + dlsch_channel_compensation_TM56(pdsch_vars[eNB_id]->rxdataF_ext, + pdsch_vars[eNB_id]->dl_ch_estimates_ext, + pdsch_vars[eNB_id]->dl_ch_mag0, + pdsch_vars[eNB_id]->dl_ch_magb0, + pdsch_vars[eNB_id]->rxdataF_comp0, + pdsch_vars[eNB_id]->pmi_ext, + frame_parms, + measurements, + eNB_id, + symbol, + dlsch0_harq->Qm, + nb_rb, + pdsch_vars[eNB_id]->log2_maxh, + dlsch0_harq->dl_power_off); + + for (rb=0; rb<nb_rb; rb++) { + switch(pdsch_vars[eNB_id]->pmi_ext[rb]) { + case 0: + pdsch_vars[eNB_id_i]->pmi_ext[rb]=1; + break; + case 1: + pdsch_vars[eNB_id_i]->pmi_ext[rb]=0; + break; + case 2: + pdsch_vars[eNB_id_i]->pmi_ext[rb]=3; + break; + case 3: + pdsch_vars[eNB_id_i]->pmi_ext[rb]=2; + break; + } + // if (rb==0) + // printf("pmi %d, pmi_i %d\n",pdsch_vars[eNB_id]->pmi_ext[rb],pdsch_vars[eNB_id_i]->pmi_ext[rb]); + } + dlsch_channel_compensation_TM56(pdsch_vars[eNB_id_i]->rxdataF_ext, + pdsch_vars[eNB_id_i]->dl_ch_estimates_ext, + pdsch_vars[eNB_id_i]->dl_ch_mag0, + pdsch_vars[eNB_id_i]->dl_ch_magb0, + pdsch_vars[eNB_id_i]->rxdataF_comp0, + pdsch_vars[eNB_id_i]->pmi_ext, + frame_parms, + measurements, + eNB_id_i, + symbol, + i_mod, + nb_rb, + pdsch_vars[eNB_id]->log2_maxh, + dlsch0_harq->dl_power_off); +#ifdef DEBUG_PHY + if (symbol==5) { + write_output("rxF_comp_d.m","rxF_c_d",&pdsch_vars[eNB_id]->rxdataF_comp0[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1); + write_output("rxF_comp_i.m","rxF_c_i",&pdsch_vars[eNB_id_i]->rxdataF_comp0[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1); + } +#endif + dlsch_dual_stream_correlation(frame_parms, + symbol, + nb_rb, + pdsch_vars[eNB_id]->dl_ch_estimates_ext, + pdsch_vars[eNB_id_i]->dl_ch_estimates_ext, + pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round], + pdsch_vars[eNB_id]->log2_maxh); + } else if (dlsch0_harq->dl_power_off==1) { + dlsch_channel_compensation_TM56(pdsch_vars[eNB_id]->rxdataF_ext, + pdsch_vars[eNB_id]->dl_ch_estimates_ext, + pdsch_vars[eNB_id]->dl_ch_mag0, + pdsch_vars[eNB_id]->dl_ch_magb0, + pdsch_vars[eNB_id]->rxdataF_comp0, + pdsch_vars[eNB_id]->pmi_ext, + frame_parms, + measurements, + eNB_id, + symbol, + dlsch0_harq->Qm, + nb_rb, + pdsch_vars[eNB_id]->log2_maxh, + 1); + + } + + + } else if (dlsch0_harq->mimo_mode==TM7) { //TM7 + + dlsch_channel_compensation(pdsch_vars[eNB_id]->rxdataF_ext, + pdsch_vars[eNB_id]->dl_bf_ch_estimates_ext, + pdsch_vars[eNB_id]->dl_ch_mag0, + pdsch_vars[eNB_id]->dl_ch_magb0, + pdsch_vars[eNB_id]->rxdataF_comp0, + (aatx>1) ? pdsch_vars[eNB_id]->rho : NULL, + frame_parms, + symbol, + first_symbol_flag, + get_Qm(dlsch0_harq->mcs), + nb_rb, + //9, + pdsch_vars[eNB_id]->log2_maxh, + measurements); // log2_maxh+I0_shift + } + +#if UE_TIMING_TRACE + stop_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]); +#if DISABLE_LOG_X + printf("[AbsSFN %d.%d] Slot%d Symbol %d log2_maxh %d channel_level %d: Channel Comp %5.2f \n",frame,subframe,slot,symbol,pdsch_vars[eNB_id]->log2_maxh,proc->channel_level,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0)); +#else + LOG_I(PHY, "[AbsSFN %d.%d] Slot%d Symbol %d log2_maxh %d channel_level %d: Channel Comp %5.2f \n",frame,subframe,slot,symbol,pdsch_vars[eNB_id]->log2_maxh,proc->channel_level,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0)); +#endif +#endif +// MRC +#if UE_TIMING_TRACE + start_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]); +#endif + + if (frame_parms->nb_antennas_rx > 1) { + if ((dlsch0_harq->mimo_mode == LARGE_CDD) || + ((dlsch0_harq->mimo_mode >=DUALSTREAM_UNIFORM_PRECODING1) && + (dlsch0_harq->mimo_mode <=DUALSTREAM_PUSCH_PRECODING))){ // TM3 or TM4 + if (frame_parms->nb_antenna_ports_eNB == 2) { + dlsch_detection_mrc_TM34(frame_parms, + pdsch_vars[eNB_id], + harq_pid, + dlsch0_harq->round, + symbol, + nb_rb, + 1); + /* if (symbol == 5) { + write_output("rho0_mrc.m","rho0_0",&pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round][0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);// should be QAM + write_output("rho2_mrc.m","rho2_0",&pdsch_vars[eNB_id]->dl_ch_rho2_ext[0][symbol*frame_parms->N_RB_DL*12],frame_parms->N_RB_DL*12,1,1);//should be almost 0 + } */ + } + } else { + dlsch_detection_mrc(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + pdsch_vars[eNB_id_i]->rxdataF_comp0, + pdsch_vars[eNB_id]->rho, + pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round], + pdsch_vars[eNB_id]->dl_ch_mag0, + pdsch_vars[eNB_id]->dl_ch_magb0, + pdsch_vars[eNB_id_i]->dl_ch_mag0, + pdsch_vars[eNB_id_i]->dl_ch_magb0, + symbol, + nb_rb, + rx_type==rx_IC_single_stream); + } + } + // printf("Combining"); + if ((dlsch0_harq->mimo_mode == SISO) || + ((dlsch0_harq->mimo_mode >= UNIFORM_PRECODING11) && + (dlsch0_harq->mimo_mode <= PUSCH_PRECODING0)) || + (dlsch0_harq->mimo_mode == TM7)) { + /* + dlsch_siso(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp, + pdsch_vars[eNB_id_i]->rxdataF_comp, + symbol, + nb_rb); + */ + } else if (dlsch0_harq->mimo_mode == ALAMOUTI) { + dlsch_alamouti(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + pdsch_vars[eNB_id]->dl_ch_mag0, + pdsch_vars[eNB_id]->dl_ch_magb0, + symbol, + nb_rb); + } + + // printf("LLR"); + if ((dlsch0_harq->mimo_mode == LARGE_CDD) || + ((dlsch0_harq->mimo_mode >=DUALSTREAM_UNIFORM_PRECODING1) && + (dlsch0_harq->mimo_mode <=DUALSTREAM_PUSCH_PRECODING))) { + rxdataF_comp_ptr = pdsch_vars[eNB_id]->rxdataF_comp1[harq_pid][round]; + dl_ch_mag_ptr = pdsch_vars[eNB_id]->dl_ch_mag1[harq_pid][round]; + } + else { + rxdataF_comp_ptr = pdsch_vars[eNB_id_i]->rxdataF_comp0; + dl_ch_mag_ptr = pdsch_vars[eNB_id_i]->dl_ch_mag0; + //i_mod should have been passed as a parameter + } + +#if UE_TIMING_TRACE + stop_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]); +#if DISABLE_LOG_X + printf("[AbsSFN %d.%d] Slot%d Symbol %d: Channel Combine %5.2f \n",frame,subframe,slot,symbol,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0)); +#else + LOG_I(PHY, "[AbsSFN %d.%d] Slot%d Symbol %d: Channel Combine %5.2f \n",frame,subframe,slot,symbol,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0)); +#endif +#endif + +#if UE_TIMING_TRACE + start_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]); +#endif + //printf("LLR dlsch0_harq->Qm %d rx_type %d cw0 %d cw1 %d symbol %d \n",dlsch0_harq->Qm,rx_type,codeword_TB0,codeword_TB1,symbol); + // compute LLRs + // -> // compute @pointer where llrs should filled for this ofdm-symbol + int8_t *pllr_symbol_cw0; + int8_t *pllr_symbol_cw1; + uint32_t llr_offset_symbol; + llr_offset_symbol = pdsch_vars[eNB_id]->llr_offset[symbol]; + pllr_symbol_cw0 = (int8_t*)pdsch_vars[eNB_id]->llr[0]; + pllr_symbol_cw1 = (int8_t*)pdsch_vars[eNB_id]->llr[1]; + pllr_symbol_cw0 += llr_offset_symbol; + pllr_symbol_cw1 += llr_offset_symbol; + + LOG_I(PHY,"compute LLRs [AbsSubframe %d.%d-%d] NbRB %d Qm %d LLRs-Length %d LLR-Offset %d @LLR Buff %p @LLR Buff(symb) %p\n", + frame, subframe,symbol, + nb_rb,dlsch0_harq->Qm, + pdsch_vars[eNB_id]->llr_length[symbol], + pdsch_vars[eNB_id]->llr_offset[symbol], + (int16_t*)pdsch_vars[eNB_id]->llr[0], + pllr_symbol_cw0); + + switch (dlsch0_harq->Qm) { + case 2 : + if ((rx_type==rx_standard) || (codeword_TB1 == -1)) { + + + dlsch_qpsk_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + (int16_t*)pllr_symbol_cw0, + symbol, + first_symbol_flag, + nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,2,subframe,symbol), + beamforming_mode); + + } else if (codeword_TB0 == -1){ + + dlsch_qpsk_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + (int16_t*)pllr_symbol_cw1, + symbol, + first_symbol_flag, + nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,2,subframe,symbol), + beamforming_mode); + } + else if (rx_type >= rx_IC_single_stream) { + if (dlsch1_harq->Qm == 2) { + dlsch_qpsk_qpsk_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + rxdataF_comp_ptr, + pdsch_vars[eNB_id]->dl_ch_rho2_ext, + pdsch_vars[eNB_id]->llr[0], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,2,subframe,symbol), + pdsch_vars[eNB_id]->llr128); + if (rx_type==rx_IC_dual_stream) { + dlsch_qpsk_qpsk_llr(frame_parms, + rxdataF_comp_ptr, + pdsch_vars[eNB_id]->rxdataF_comp0, + pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round], + pdsch_vars[eNB_id]->llr[1], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,2,subframe,symbol), + pdsch_vars[eNB_id]->llr128_2ndstream); + } + } + else if (dlsch1_harq->Qm == 4) { + dlsch_qpsk_16qam_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + rxdataF_comp_ptr,//i + dl_ch_mag_ptr,//i + pdsch_vars[eNB_id]->dl_ch_rho2_ext, + pdsch_vars[eNB_id]->llr[0], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,2,subframe,symbol), + pdsch_vars[eNB_id]->llr128); + if (rx_type==rx_IC_dual_stream) { + dlsch_16qam_qpsk_llr(frame_parms, + rxdataF_comp_ptr, + pdsch_vars[eNB_id]->rxdataF_comp0,//i + dl_ch_mag_ptr, + pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round], + pdsch_vars[eNB_id]->llr[1], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,4,subframe,symbol), + pdsch_vars[eNB_id]->llr128_2ndstream); + } + } + else { + dlsch_qpsk_64qam_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + rxdataF_comp_ptr,//i + dl_ch_mag_ptr,//i + pdsch_vars[eNB_id]->dl_ch_rho2_ext, + pdsch_vars[eNB_id]->llr[0], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,2,subframe,symbol), + pdsch_vars[eNB_id]->llr128); + if (rx_type==rx_IC_dual_stream) { + dlsch_64qam_qpsk_llr(frame_parms, + rxdataF_comp_ptr, + pdsch_vars[eNB_id]->rxdataF_comp0,//i + dl_ch_mag_ptr, + pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round], + pdsch_vars[eNB_id]->llr[1], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,6,subframe,symbol), + pdsch_vars[eNB_id]->llr128_2ndstream); + } + } + } + break; + case 4 : + if ((rx_type==rx_standard ) || (codeword_TB1 == -1)) { + dlsch_16qam_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + pdsch_vars[eNB_id]->llr[0], + pdsch_vars[eNB_id]->dl_ch_mag0, + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,4,subframe,symbol), + pdsch_vars[eNB_id]->llr128, + beamforming_mode); + } else if (codeword_TB0 == -1){ + dlsch_16qam_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + pdsch_vars[eNB_id]->llr[1], + pdsch_vars[eNB_id]->dl_ch_mag0, + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,4,subframe,symbol), + pdsch_vars[eNB_id]->llr128_2ndstream, + beamforming_mode); + } + else if (rx_type >= rx_IC_single_stream) { + if (dlsch1_harq->Qm == 2) { + dlsch_16qam_qpsk_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + rxdataF_comp_ptr,//i + pdsch_vars[eNB_id]->dl_ch_mag0, + pdsch_vars[eNB_id]->dl_ch_rho2_ext, + pdsch_vars[eNB_id]->llr[0], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,4,subframe,symbol), + pdsch_vars[eNB_id]->llr128); + if (rx_type==rx_IC_dual_stream) { + dlsch_qpsk_16qam_llr(frame_parms, + rxdataF_comp_ptr, + pdsch_vars[eNB_id]->rxdataF_comp0,//i + pdsch_vars[eNB_id]->dl_ch_mag0,//i + pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round], + pdsch_vars[eNB_id]->llr[1], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,2,subframe,symbol), + pdsch_vars[eNB_id]->llr128_2ndstream); + } + } + else if (dlsch1_harq->Qm == 4) { + dlsch_16qam_16qam_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + rxdataF_comp_ptr,//i + pdsch_vars[eNB_id]->dl_ch_mag0, + dl_ch_mag_ptr,//i + pdsch_vars[eNB_id]->dl_ch_rho2_ext, + pdsch_vars[eNB_id]->llr[0], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,4,subframe,symbol), + pdsch_vars[eNB_id]->llr128); + if (rx_type==rx_IC_dual_stream) { + dlsch_16qam_16qam_llr(frame_parms, + rxdataF_comp_ptr, + pdsch_vars[eNB_id]->rxdataF_comp0,//i + dl_ch_mag_ptr, + pdsch_vars[eNB_id]->dl_ch_mag0,//i + pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round], + pdsch_vars[eNB_id]->llr[1], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,4,subframe,symbol), + pdsch_vars[eNB_id]->llr128_2ndstream); + } + } + else { + dlsch_16qam_64qam_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + rxdataF_comp_ptr,//i + pdsch_vars[eNB_id]->dl_ch_mag0, + dl_ch_mag_ptr,//i + pdsch_vars[eNB_id]->dl_ch_rho2_ext, + pdsch_vars[eNB_id]->llr[0], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,4,subframe,symbol), + pdsch_vars[eNB_id]->llr128); + if (rx_type==rx_IC_dual_stream) { + dlsch_64qam_16qam_llr(frame_parms, + rxdataF_comp_ptr, + pdsch_vars[eNB_id]->rxdataF_comp0, + dl_ch_mag_ptr, + pdsch_vars[eNB_id]->dl_ch_mag0, + pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round], + pdsch_vars[eNB_id]->llr[1], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,6,subframe,symbol), + pdsch_vars[eNB_id]->llr128_2ndstream); + } + } + } + break; + case 6 : + if ((rx_type==rx_standard) || (codeword_TB1 == -1)) { + dlsch_64qam_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + (int16_t*)pllr_symbol_cw0, + pdsch_vars[eNB_id]->dl_ch_mag0, + pdsch_vars[eNB_id]->dl_ch_magb0, + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,6,subframe,symbol), + pdsch_vars[eNB_id]->llr_offset[symbol], + beamforming_mode); + } else if (codeword_TB0 == -1){ + dlsch_64qam_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + (int16_t*)pllr_symbol_cw1, + pdsch_vars[eNB_id]->dl_ch_mag0, + pdsch_vars[eNB_id]->dl_ch_magb0, + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,6,subframe,symbol), + pdsch_vars[eNB_id]->llr_offset[symbol], + beamforming_mode); + } + else if (rx_type >= rx_IC_single_stream) { + if (dlsch1_harq->Qm == 2) { + dlsch_64qam_qpsk_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + rxdataF_comp_ptr,//i + pdsch_vars[eNB_id]->dl_ch_mag0, + pdsch_vars[eNB_id]->dl_ch_rho2_ext, + pdsch_vars[eNB_id]->llr[0], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,6,subframe,symbol), + pdsch_vars[eNB_id]->llr128); + if (rx_type==rx_IC_dual_stream) { + dlsch_qpsk_64qam_llr(frame_parms, + rxdataF_comp_ptr, + pdsch_vars[eNB_id]->rxdataF_comp0,//i + pdsch_vars[eNB_id]->dl_ch_mag0, + pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round], + pdsch_vars[eNB_id]->llr[1], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,2,subframe,symbol), + pdsch_vars[eNB_id]->llr128_2ndstream); + } + } + else if (dlsch1_harq->Qm == 4) { + dlsch_64qam_16qam_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + rxdataF_comp_ptr,//i + pdsch_vars[eNB_id]->dl_ch_mag0, + dl_ch_mag_ptr,//i + pdsch_vars[eNB_id]->dl_ch_rho2_ext, + pdsch_vars[eNB_id]->llr[0], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,6,subframe,symbol), + pdsch_vars[eNB_id]->llr128); + if (rx_type==rx_IC_dual_stream) { + dlsch_16qam_64qam_llr(frame_parms, + rxdataF_comp_ptr, + pdsch_vars[eNB_id]->rxdataF_comp0,//i + dl_ch_mag_ptr, + pdsch_vars[eNB_id]->dl_ch_mag0,//i + pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round], + pdsch_vars[eNB_id]->llr[1], + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,4,subframe,symbol), + pdsch_vars[eNB_id]->llr128_2ndstream); + } + } + else { + dlsch_64qam_64qam_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + rxdataF_comp_ptr,//i + pdsch_vars[eNB_id]->dl_ch_mag0, + dl_ch_mag_ptr,//i + pdsch_vars[eNB_id]->dl_ch_rho2_ext, + (int16_t*)pllr_symbol_cw0, + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,6,subframe,symbol), + pdsch_vars[eNB_id]->llr_offset[symbol]); + if (rx_type==rx_IC_dual_stream) { + dlsch_64qam_64qam_llr(frame_parms, + rxdataF_comp_ptr, + pdsch_vars[eNB_id]->rxdataF_comp0,//i + dl_ch_mag_ptr, + pdsch_vars[eNB_id]->dl_ch_mag0,//i + pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round], + (int16_t*)pllr_symbol_cw1, + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch1_harq->rb_alloc_even,6,subframe,symbol), + pdsch_vars[eNB_id]->llr_offset[symbol]); + } + } + } + break; + default: + LOG_W(PHY,"rx_dlsch.c : Unknown mod_order!!!!\n"); + return(-1); + break; + } + if (dlsch1_harq) { + switch (get_Qm(dlsch1_harq->mcs)) { + case 2 : + if (rx_type==rx_standard) { + dlsch_qpsk_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + (int16_t*)pllr_symbol_cw0, + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,2,subframe,symbol), + beamforming_mode); + } + break; + case 4: + if (rx_type==rx_standard) { + dlsch_16qam_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + pdsch_vars[eNB_id]->llr[0], + pdsch_vars[eNB_id]->dl_ch_mag0, + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,4,subframe,symbol), + pdsch_vars[eNB_id]->llr128, + beamforming_mode); + } + break; + case 6 : + if (rx_type==rx_standard) { + dlsch_64qam_llr(frame_parms, + pdsch_vars[eNB_id]->rxdataF_comp0, + (int16_t*)pllr_symbol_cw0, + pdsch_vars[eNB_id]->dl_ch_mag0, + pdsch_vars[eNB_id]->dl_ch_magb0, + symbol,first_symbol_flag,nb_rb, + adjust_G2(frame_parms,dlsch0_harq->rb_alloc_even,6,subframe,symbol), + pdsch_vars[eNB_id]->llr_offset[symbol], + beamforming_mode); + } + break; + default: + LOG_W(PHY,"rx_dlsch.c : Unknown mod_order!!!!\n"); + return(-1); + break; + } + } + +#if UE_TIMING_TRACE + stop_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]); +#if DISABLE_LOG_X + printf("[AbsSFN %d.%d] Slot%d Symbol %d: LLR Computation %5.2f \n",frame,subframe,slot,symbol,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0)); +#else + LOG_I(PHY, "[AbsSFN %d.%d] Slot%d Symbol %d: LLR Computation %5.2f \n",frame,subframe,slot,symbol,ue->generic_stat_bis[ue->current_thread_id[subframe]][slot].p_time/(cpuf*1000.0)); +#endif +#endif +// Please keep it: useful for debugging +#if 0 + if( (symbol == 13) && (subframe==0) && (dlsch0_harq->Qm == 6) /*&& (nb_rb==25)*/) + { + LOG_E(PHY,"Dump Phy Chan Est \n"); + if(1) + { +#if 1 + write_output("rxdataF0.m" , "rxdataF0", &common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF[0][0],14*frame_parms->ofdm_symbol_size,1,1); + //write_output("rxdataF1.m" , "rxdataF1", &common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].rxdataF[0][0],14*frame_parms->ofdm_symbol_size,1,1); + write_output("dl_ch_estimates00.m", "dl_ch_estimates00", &common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id][0][0],14*frame_parms->ofdm_symbol_size,1,1); + //write_output("dl_ch_estimates01.m", "dl_ch_estimates01", &common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id][1][0],14*frame_parms->ofdm_symbol_size,1,1); + //write_output("dl_ch_estimates10.m", "dl_ch_estimates10", &common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id][2][0],14*frame_parms->ofdm_symbol_size,1,1); + //write_output("dl_ch_estimates11.m", "dl_ch_estimates11", &common_vars->common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id][3][0],14*frame_parms->ofdm_symbol_size,1,1); + + + //write_output("rxdataF_ext00.m" , "rxdataF_ext00", &pdsch_vars[eNB_id]->rxdataF_ext[0][0],14*frame_parms->N_RB_DL*12,1,1); + //write_output("rxdataF_ext01.m" , "rxdataF_ext01", &pdsch_vars[eNB_id]->rxdataF_ext[1][0],14*frame_parms->N_RB_DL*12,1,1); + //write_output("rxdataF_ext10.m" , "rxdataF_ext10", &pdsch_vars[eNB_id]->rxdataF_ext[2][0],14*frame_parms->N_RB_DL*12,1,1); + //write_output("rxdataF_ext11.m" , "rxdataF_ext11", &pdsch_vars[eNB_id]->rxdataF_ext[3][0],14*frame_parms->N_RB_DL*12,1,1); + write_output("dl_ch_estimates_ext00.m", "dl_ch_estimates_ext00", &pdsch_vars[eNB_id]->dl_ch_estimates_ext[0][0],14*frame_parms->N_RB_DL*12,1,1); + //write_output("dl_ch_estimates_ext01.m", "dl_ch_estimates_ext01", &pdsch_vars[eNB_id]->dl_ch_estimates_ext[1][0],14*frame_parms->N_RB_DL*12,1,1); + //write_output("dl_ch_estimates_ext10.m", "dl_ch_estimates_ext10", &pdsch_vars[eNB_id]->dl_ch_estimates_ext[2][0],14*frame_parms->N_RB_DL*12,1,1); + //write_output("dl_ch_estimates_ext11.m", "dl_ch_estimates_ext11", &pdsch_vars[eNB_id]->dl_ch_estimates_ext[3][0],14*frame_parms->N_RB_DL*12,1,1); + write_output("rxdataF_comp00.m","rxdataF_comp00", &pdsch_vars[eNB_id]->rxdataF_comp0[0][0],14*frame_parms->N_RB_DL*12,1,1); + //write_output("rxdataF_comp01.m","rxdataF_comp01", &pdsch_vars[eNB_id]->rxdataF_comp0[1][0],14*frame_parms->N_RB_DL*12,1,1); + //write_output("rxdataF_comp10.m","rxdataF_comp10", &pdsch_vars[eNB_id]->rxdataF_comp1[harq_pid][round][0][0],14*frame_parms->N_RB_DL*12,1,1); + //write_output("rxdataF_comp11.m","rxdataF_comp11", &pdsch_vars[eNB_id]->rxdataF_comp1[harq_pid][round][1][0],14*frame_parms->N_RB_DL*12,1,1); +#endif + write_output("llr0.m","llr0", &pdsch_vars[eNB_id]->llr[0][0],(14*nb_rb*12*dlsch1_harq->Qm) - 4*(nb_rb*4*dlsch1_harq->Qm),1,0); + //write_output("llr1.m","llr1", &pdsch_vars[eNB_id]->llr[1][0],(14*nb_rb*12*dlsch1_harq->Qm) - 4*(nb_rb*4*dlsch1_harq->Qm),1,0); + + + AssertFatal(0," "); + } + + } +#endif + + T(T_UE_PHY_PDSCH_IQ, T_INT(eNB_id), T_INT(frame%1024), + T_INT(subframe), T_INT(nb_rb), + T_INT(frame_parms->N_RB_UL), T_INT(frame_parms->symbols_per_tti), + T_BUFFER(&pdsch_vars[eNB_id]->rxdataF_comp0[eNB_id][0], + 2 * /* ulsch[UE_id]->harq_processes[harq_pid]->nb_rb */ frame_parms->N_RB_UL *12*frame_parms->symbols_per_tti*2)); + + return 0; +} + +//============================================================================================== +// Pre-processing for LLR computation +//============================================================================================== + +void dlsch_channel_compensation(int **rxdataF_ext, + int **dl_ch_estimates_ext, + int **dl_ch_mag, + int **dl_ch_magb, + int **rxdataF_comp, + int **rho, + LTE_DL_FRAME_PARMS *frame_parms, + unsigned char symbol, + uint8_t first_symbol_flag, + unsigned char mod_order, + unsigned short nb_rb, + unsigned char output_shift, + PHY_MEASUREMENTS *measurements) +{ + +#if defined(__i386) || defined(__x86_64) + + unsigned short rb; + unsigned char aatx,aarx,symbol_mod,pilots=0; + __m128i *dl_ch128,*dl_ch128_2,*dl_ch_mag128,*dl_ch_mag128b,*rxdataF128,*rxdataF_comp128,*rho128; + __m128i mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3,QAM_amp128,QAM_amp128b; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) { + + if (frame_parms->nb_antenna_ports_eNB==1) // 10 out of 12 so don't reduce size + nb_rb=1+(5*nb_rb/6); + else + pilots=1; + } + + for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++) { + if (mod_order == 4) { + QAM_amp128 = _mm_set1_epi16(QAM16_n1); // 2/sqrt(10) + QAM_amp128b = _mm_setzero_si128(); + } else if (mod_order == 6) { + QAM_amp128 = _mm_set1_epi16(QAM64_n1); // + QAM_amp128b = _mm_set1_epi16(QAM64_n2); + } + + // printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol); + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + + dl_ch128 = (__m128i *)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128 = (__m128i *)&dl_ch_mag[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128b = (__m128i *)&dl_ch_magb[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF128 = (__m128i *)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp128 = (__m128i *)&rxdataF_comp[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + + + for (rb=0; rb<nb_rb; rb++) { + if (mod_order>2) { + // get channel amplitude if not QPSK + + mmtmpD0 = _mm_madd_epi16(dl_ch128[0],dl_ch128[0]); + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + + mmtmpD1 = _mm_madd_epi16(dl_ch128[1],dl_ch128[1]); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + + mmtmpD0 = _mm_packs_epi32(mmtmpD0,mmtmpD1); + + // store channel magnitude here in a new field of dlsch + + dl_ch_mag128[0] = _mm_unpacklo_epi16(mmtmpD0,mmtmpD0); + dl_ch_mag128b[0] = dl_ch_mag128[0]; + dl_ch_mag128[0] = _mm_mulhi_epi16(dl_ch_mag128[0],QAM_amp128); + dl_ch_mag128[0] = _mm_slli_epi16(dl_ch_mag128[0],1); + //print_ints("Re(ch):",(int16_t*)&mmtmpD0); + //print_shorts("QAM_amp:",(int16_t*)&QAM_amp128); + //print_shorts("mag:",(int16_t*)&dl_ch_mag128[0]); + dl_ch_mag128[1] = _mm_unpackhi_epi16(mmtmpD0,mmtmpD0); + dl_ch_mag128b[1] = dl_ch_mag128[1]; + dl_ch_mag128[1] = _mm_mulhi_epi16(dl_ch_mag128[1],QAM_amp128); + dl_ch_mag128[1] = _mm_slli_epi16(dl_ch_mag128[1],1); + + if (pilots==0) { + mmtmpD0 = _mm_madd_epi16(dl_ch128[2],dl_ch128[2]); + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + mmtmpD1 = _mm_packs_epi32(mmtmpD0,mmtmpD0); + + dl_ch_mag128[2] = _mm_unpacklo_epi16(mmtmpD1,mmtmpD1); + dl_ch_mag128b[2] = dl_ch_mag128[2]; + + dl_ch_mag128[2] = _mm_mulhi_epi16(dl_ch_mag128[2],QAM_amp128); + dl_ch_mag128[2] = _mm_slli_epi16(dl_ch_mag128[2],1); + } + + dl_ch_mag128b[0] = _mm_mulhi_epi16(dl_ch_mag128b[0],QAM_amp128b); + dl_ch_mag128b[0] = _mm_slli_epi16(dl_ch_mag128b[0],1); + + + dl_ch_mag128b[1] = _mm_mulhi_epi16(dl_ch_mag128b[1],QAM_amp128b); + dl_ch_mag128b[1] = _mm_slli_epi16(dl_ch_mag128b[1],1); + + if (pilots==0) { + dl_ch_mag128b[2] = _mm_mulhi_epi16(dl_ch_mag128b[2],QAM_amp128b); + dl_ch_mag128b[2] = _mm_slli_epi16(dl_ch_mag128b[2],1); + } + } + + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch128[0],rxdataF128[0]); + // print_ints("re",&mmtmpD0); + + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[0],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]); + // print_ints("im",&mmtmpD1); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[0]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + // print_ints("re(shift)",&mmtmpD0); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + // print_ints("im(shift)",&mmtmpD1); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + // print_ints("c0",&mmtmpD2); + // print_ints("c1",&mmtmpD3); + rxdataF_comp128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + // print_shorts("rx:",rxdataF128); + // print_shorts("ch:",dl_ch128); + // print_shorts("pack:",rxdataF_comp128); + + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch128[1],rxdataF128[1]); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[1],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[1]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + + rxdataF_comp128[1] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + // print_shorts("rx:",rxdataF128+1); + // print_shorts("ch:",dl_ch128+1); + // print_shorts("pack:",rxdataF_comp128+1); + + if (pilots==0) { + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch128[2],rxdataF128[2]); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[2],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[2]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + + rxdataF_comp128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + // print_shorts("rx:",rxdataF128+2); + // print_shorts("ch:",dl_ch128+2); + // print_shorts("pack:",rxdataF_comp128+2); + + dl_ch128+=3; + dl_ch_mag128+=3; + dl_ch_mag128b+=3; + rxdataF128+=3; + rxdataF_comp128+=3; + } else { // we have a smaller PDSCH in symbols with pilots so skip last group of 4 REs and increment less + dl_ch128+=2; + dl_ch_mag128+=2; + dl_ch_mag128b+=2; + rxdataF128+=2; + rxdataF_comp128+=2; + } + + } + } + } + + if (rho) { + + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + rho128 = (__m128i *)&rho[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch128 = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch128_2 = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; + + for (rb=0; rb<nb_rb; rb++) { + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch128[0],dl_ch128_2[0]); + // print_ints("re",&mmtmpD0); + + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[0],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]); + // print_ints("im",&mmtmpD1); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128_2[0]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + // print_ints("re(shift)",&mmtmpD0); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + // print_ints("im(shift)",&mmtmpD1); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + // print_ints("c0",&mmtmpD2); + // print_ints("c1",&mmtmpD3); + rho128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + + //print_shorts("rx:",dl_ch128_2); + //print_shorts("ch:",dl_ch128); + //print_shorts("pack:",rho128); + + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch128[1],dl_ch128_2[1]); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[1],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128_2[1]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + + + rho128[1] =_mm_packs_epi32(mmtmpD2,mmtmpD3); + //print_shorts("rx:",dl_ch128_2+1); + //print_shorts("ch:",dl_ch128+1); + //print_shorts("pack:",rho128+1); + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch128[2],dl_ch128_2[2]); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[2],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128_2[2]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + + rho128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + //print_shorts("rx:",dl_ch128_2+2); + //print_shorts("ch:",dl_ch128+2); + //print_shorts("pack:",rho128+2); + + dl_ch128+=3; + dl_ch128_2+=3; + rho128+=3; + + } + + if (first_symbol_flag==1) { + measurements->rx_correlation[0][aarx] = signal_energy(&rho[aarx][symbol*frame_parms->N_RB_DL*12],rb*12); + } + } + } + + _mm_empty(); + _m_empty(); + +#elif defined(__arm__) + + + unsigned short rb; + unsigned char aatx,aarx,symbol_mod,pilots=0; + + int16x4_t *dl_ch128,*dl_ch128_2,*rxdataF128; + int32x4_t mmtmpD0,mmtmpD1,mmtmpD0b,mmtmpD1b; + int16x8_t *dl_ch_mag128,*dl_ch_mag128b,mmtmpD2,mmtmpD3,mmtmpD4; + int16x8_t QAM_amp128,QAM_amp128b; + int16x4x2_t *rxdataF_comp128,*rho128; + + int16_t conj[4]__attribute__((aligned(16))) = {1,-1,1,-1}; + int32x4_t output_shift128 = vmovq_n_s32(-(int32_t)output_shift); + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) { + if (frame_parms->nb_antenna_ports_eNB==1) { // 10 out of 12 so don't reduce size + nb_rb=1+(5*nb_rb/6); + } + else { + pilots=1; + } + } + + for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++) { + if (mod_order == 4) { + QAM_amp128 = vmovq_n_s16(QAM16_n1); // 2/sqrt(10) + QAM_amp128b = vmovq_n_s16(0); + } else if (mod_order == 6) { + QAM_amp128 = vmovq_n_s16(QAM64_n1); // + QAM_amp128b = vmovq_n_s16(QAM64_n2); + } + // printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol); + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + dl_ch128 = (int16x4_t*)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128 = (int16x8_t*)&dl_ch_mag[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128b = (int16x8_t*)&dl_ch_magb[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF128 = (int16x4_t*)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp128 = (int16x4x2_t*)&rxdataF_comp[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + + for (rb=0; rb<nb_rb; rb++) { + if (mod_order>2) { + // get channel amplitude if not QPSK + mmtmpD0 = vmull_s16(dl_ch128[0], dl_ch128[0]); + // mmtmpD0 = [ch0*ch0,ch1*ch1,ch2*ch2,ch3*ch3]; + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128); + // mmtmpD0 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3]>>output_shift128 on 32-bits + mmtmpD1 = vmull_s16(dl_ch128[1], dl_ch128[1]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128); + mmtmpD2 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + // mmtmpD2 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3,ch4*ch4 + ch5*ch5,ch4*ch4 + ch5*ch5,ch6*ch6 + ch7*ch7,ch6*ch6 + ch7*ch7]>>output_shift128 on 16-bits + mmtmpD0 = vmull_s16(dl_ch128[2], dl_ch128[2]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128); + mmtmpD1 = vmull_s16(dl_ch128[3], dl_ch128[3]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128); + mmtmpD3 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + if (pilots==0) { + mmtmpD0 = vmull_s16(dl_ch128[4], dl_ch128[4]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128); + mmtmpD1 = vmull_s16(dl_ch128[5], dl_ch128[5]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128); + mmtmpD4 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + } + + dl_ch_mag128b[0] = vqdmulhq_s16(mmtmpD2,QAM_amp128b); + dl_ch_mag128b[1] = vqdmulhq_s16(mmtmpD3,QAM_amp128b); + dl_ch_mag128[0] = vqdmulhq_s16(mmtmpD2,QAM_amp128); + dl_ch_mag128[1] = vqdmulhq_s16(mmtmpD3,QAM_amp128); + + if (pilots==0) { + dl_ch_mag128b[2] = vqdmulhq_s16(mmtmpD4,QAM_amp128b); + dl_ch_mag128[2] = vqdmulhq_s16(mmtmpD4,QAM_amp128); + } + } + + mmtmpD0 = vmull_s16(dl_ch128[0], rxdataF128[0]); + //mmtmpD0 = [Re(ch[0])Re(rx[0]) Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1]) Im(ch[1])Im(ch[1])] + mmtmpD1 = vmull_s16(dl_ch128[1], rxdataF128[1]); + //mmtmpD1 = [Re(ch[2])Re(rx[2]) Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3]) Im(ch[3])Im(ch[3])] + mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)), + vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1))); + //mmtmpD0 = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])] + + mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[0],*(int16x4_t*)conj)), rxdataF128[0]); + //mmtmpD0 = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])] + mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[1],*(int16x4_t*)conj)), rxdataF128[1]); + //mmtmpD0 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])] + mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)), + vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b))); + //mmtmpD1 = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])] + + mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128); + mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128); + rxdataF_comp128[0] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + mmtmpD0 = vmull_s16(dl_ch128[2], rxdataF128[2]); + mmtmpD1 = vmull_s16(dl_ch128[3], rxdataF128[3]); + mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)), + vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1))); + mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[2],*(int16x4_t*)conj)), rxdataF128[2]); + mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[3],*(int16x4_t*)conj)), rxdataF128[3]); + mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)), + vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b))); + mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128); + mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128); + rxdataF_comp128[1] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + if (pilots==0) { + mmtmpD0 = vmull_s16(dl_ch128[4], rxdataF128[4]); + mmtmpD1 = vmull_s16(dl_ch128[5], rxdataF128[5]); + mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)), + vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1))); + + mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[4],*(int16x4_t*)conj)), rxdataF128[4]); + mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[5],*(int16x4_t*)conj)), rxdataF128[5]); + mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)), + vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b))); + + + mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128); + mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128); + rxdataF_comp128[2] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + + dl_ch128+=6; + dl_ch_mag128+=3; + dl_ch_mag128b+=3; + rxdataF128+=6; + rxdataF_comp128+=3; + + } else { // we have a smaller PDSCH in symbols with pilots so skip last group of 4 REs and increment less + dl_ch128+=4; + dl_ch_mag128+=2; + dl_ch_mag128b+=2; + rxdataF128+=4; + rxdataF_comp128+=2; + } + } + } + } + + if (rho) { + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + rho128 = (int16x4x2_t*)&rho[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch128 = (int16x4_t*)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch128_2 = (int16x4_t*)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; + for (rb=0; rb<nb_rb; rb++) { + mmtmpD0 = vmull_s16(dl_ch128[0], dl_ch128_2[0]); + mmtmpD1 = vmull_s16(dl_ch128[1], dl_ch128_2[1]); + mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)), + vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1))); + mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[0],*(int16x4_t*)conj)), dl_ch128_2[0]); + mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[1],*(int16x4_t*)conj)), dl_ch128_2[1]); + mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)), + vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b))); + + mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128); + mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128); + rho128[0] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + mmtmpD0 = vmull_s16(dl_ch128[2], dl_ch128_2[2]); + mmtmpD1 = vmull_s16(dl_ch128[3], dl_ch128_2[3]); + mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)), + vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1))); + mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[2],*(int16x4_t*)conj)), dl_ch128_2[2]); + mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[3],*(int16x4_t*)conj)), dl_ch128_2[3]); + mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)), + vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b))); + + mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128); + mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128); + rho128[1] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + mmtmpD0 = vmull_s16(dl_ch128[0], dl_ch128_2[0]); + mmtmpD1 = vmull_s16(dl_ch128[1], dl_ch128_2[1]); + mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)), + vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1))); + mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[4],*(int16x4_t*)conj)), dl_ch128_2[4]); + mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch128[5],*(int16x4_t*)conj)), dl_ch128_2[5]); + mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)), + vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b))); + + mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128); + mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128); + rho128[2] = vzip_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + + dl_ch128+=6; + dl_ch128_2+=6; + rho128+=3; + } + + if (first_symbol_flag==1) { + measurements->rx_correlation[0][aarx] = signal_energy(&rho[aarx][symbol*frame_parms->N_RB_DL*12],rb*12); + } + } + } +#endif +} + +#if defined(__x86_64__) || defined(__i386__) + +void prec2A_TM56_128(unsigned char pmi,__m128i *ch0,__m128i *ch1) +{ + + __m128i amp; + amp = _mm_set1_epi16(ONE_OVER_SQRT2_Q15); + + switch (pmi) { + + case 0 : // +1 +1 + // print_shorts("phase 0 :ch0",ch0); + // print_shorts("phase 0 :ch1",ch1); + ch0[0] = _mm_adds_epi16(ch0[0],ch1[0]); + break; + + case 1 : // +1 -1 + // print_shorts("phase 1 :ch0",ch0); + // print_shorts("phase 1 :ch1",ch1); + ch0[0] = _mm_subs_epi16(ch0[0],ch1[0]); + // print_shorts("phase 1 :ch0-ch1",ch0); + break; + + case 2 : // +1 +j + ch1[0] = _mm_sign_epi16(ch1[0],*(__m128i*)&conjugate[0]); + ch1[0] = _mm_shufflelo_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1)); + ch1[0] = _mm_shufflehi_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1)); + ch0[0] = _mm_subs_epi16(ch0[0],ch1[0]); + + break; // +1 -j + + case 3 : + ch1[0] = _mm_sign_epi16(ch1[0],*(__m128i*)&conjugate[0]); + ch1[0] = _mm_shufflelo_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1)); + ch1[0] = _mm_shufflehi_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1)); + ch0[0] = _mm_adds_epi16(ch0[0],ch1[0]); + break; + } + + ch0[0] = _mm_mulhi_epi16(ch0[0],amp); + ch0[0] = _mm_slli_epi16(ch0[0],1); + + _mm_empty(); + _m_empty(); +} +#elif defined(__arm__) +void prec2A_TM56_128(unsigned char pmi,__m128i *ch0,__m128i *ch1) { + + // sqrt(2) is already taken into account in computation sqrt_rho_a, sqrt_rho_b, + //so removed it + + //__m128i amp; + //amp = _mm_set1_epi16(ONE_OVER_SQRT2_Q15); + + switch (pmi) { + + case 0 : // +1 +1 + // print_shorts("phase 0 :ch0",ch0); + // print_shorts("phase 0 :ch1",ch1); + ch0[0] = _mm_adds_epi16(ch0[0],ch1[0]); + break; + case 1 : // +1 -1 + // print_shorts("phase 1 :ch0",ch0); + // print_shorts("phase 1 :ch1",ch1); + ch0[0] = _mm_subs_epi16(ch0[0],ch1[0]); + // print_shorts("phase 1 :ch0-ch1",ch0); + break; + case 2 : // +1 +j + ch1[0] = _mm_sign_epi16(ch1[0],*(__m128i*)&conjugate[0]); + ch1[0] = _mm_shufflelo_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1)); + ch1[0] = _mm_shufflehi_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1)); + ch0[0] = _mm_subs_epi16(ch0[0],ch1[0]); + + break; // +1 -j + case 3 : + ch1[0] = _mm_sign_epi16(ch1[0],*(__m128i*)&conjugate[0]); + ch1[0] = _mm_shufflelo_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1)); + ch1[0] = _mm_shufflehi_epi16(ch1[0],_MM_SHUFFLE(2,3,0,1)); + ch0[0] = _mm_adds_epi16(ch0[0],ch1[0]); + break; + } + + //ch0[0] = _mm_mulhi_epi16(ch0[0],amp); + //ch0[0] = _mm_slli_epi16(ch0[0],1); + + _mm_empty(); + _m_empty(); +} +#endif +// precoding is stream 0 .5(1,1) .5(1,-1) .5(1,1) .5(1,-1) +// stream 1 .5(1,-1) .5(1,1) .5(1,-1) .5(1,1) +// store "precoded" channel for stream 0 in ch0, stream 1 in ch1 + +short TM3_prec[8]__attribute__((aligned(16))) = {1,1,-1,-1,1,1,-1,-1} ; + +void prec2A_TM3_128(__m128i *ch0,__m128i *ch1) { + + __m128i amp = _mm_set1_epi16(ONE_OVER_SQRT2_Q15); + + __m128i tmp0,tmp1; + + //_mm_mulhi_epi16 + // print_shorts("prec2A_TM3 ch0 (before):",ch0); + // print_shorts("prec2A_TM3 ch1 (before):",ch1); + + tmp0 = ch0[0]; + tmp1 = _mm_sign_epi16(ch1[0],((__m128i*)&TM3_prec)[0]); + // print_shorts("prec2A_TM3 ch1*s (mid):",(__m128i*)TM3_prec); + + ch0[0] = _mm_adds_epi16(ch0[0],tmp1); + ch1[0] = _mm_subs_epi16(tmp0,tmp1); + + ch0[0] = _mm_mulhi_epi16(ch0[0],amp); + ch0[0] = _mm_slli_epi16(ch0[0],1); + + ch1[0] = _mm_mulhi_epi16(ch1[0],amp); + ch1[0] = _mm_slli_epi16(ch1[0],1); + + // print_shorts("prec2A_TM3 ch0 (mid):",&tmp0); + // print_shorts("prec2A_TM3 ch1 (mid):",ch1); + + //ch0[0] = _mm_mulhi_epi16(ch0[0],amp); + //ch0[0] = _mm_slli_epi16(ch0[0],1); + //ch1[0] = _mm_mulhi_epi16(ch1[0],amp); + //ch1[0] = _mm_slli_epi16(ch1[0],1); + + //ch0[0] = _mm_srai_epi16(ch0[0],1); + //ch1[0] = _mm_srai_epi16(ch1[0],1); + + // print_shorts("prec2A_TM3 ch0 (after):",ch0); + // print_shorts("prec2A_TM3 ch1 (after):",ch1); + + _mm_empty(); + _m_empty(); +} + +// pmi = 0 => stream 0 (1,1), stream 1 (1,-1) +// pmi = 1 => stream 0 (1,j), stream 2 (1,-j) + +void prec2A_TM4_128(int pmi,__m128i *ch0,__m128i *ch1) { + +// sqrt(2) is already taken into account in computation sqrt_rho_a, sqrt_rho_b, +//so divide by 2 is replaced by divide by sqrt(2). + + // printf ("demod pmi=%d\n", pmi); + __m128i amp; + amp = _mm_set1_epi16(ONE_OVER_SQRT2_Q15); + __m128i tmp0,tmp1; + + // print_shorts("prec2A_TM4 ch0 (before):",ch0); + // print_shorts("prec2A_TM4 ch1 (before):",ch1); + + if (pmi == 0) { //[1 1;1 -1] + tmp0 = ch0[0]; + tmp1 = ch1[0]; + ch0[0] = _mm_adds_epi16(tmp0,tmp1); + ch1[0] = _mm_subs_epi16(tmp0,tmp1); + } + else { //ch0+j*ch1 ch0-j*ch1 + tmp0 = ch0[0]; + tmp1 = _mm_sign_epi16(ch1[0],*(__m128i*)&conjugate[0]); + tmp1 = _mm_shufflelo_epi16(tmp1,_MM_SHUFFLE(2,3,0,1)); + tmp1 = _mm_shufflehi_epi16(tmp1,_MM_SHUFFLE(2,3,0,1)); + ch0[0] = _mm_subs_epi16(tmp0,tmp1); + ch1[0] = _mm_add_epi16(tmp0,tmp1); + } + + //print_shorts("prec2A_TM4 ch0 (middle):",ch0); + //print_shorts("prec2A_TM4 ch1 (middle):",ch1); + + ch0[0] = _mm_mulhi_epi16(ch0[0],amp); + ch0[0] = _mm_slli_epi16(ch0[0],1); + ch1[0] = _mm_mulhi_epi16(ch1[0],amp); + ch1[0] = _mm_slli_epi16(ch1[0],1); + + + // ch0[0] = _mm_srai_epi16(ch0[0],1); //divide by 2 + // ch1[0] = _mm_srai_epi16(ch1[0],1); //divide by 2 + //print_shorts("prec2A_TM4 ch0 (end):",ch0); + //print_shorts("prec2A_TM4 ch1 (end):",ch1); + _mm_empty(); + _m_empty(); + // print_shorts("prec2A_TM4 ch0 (end):",ch0); + //print_shorts("prec2A_TM4 ch1 (end):",ch1); +} + +void dlsch_channel_compensation_TM56(int **rxdataF_ext, + int **dl_ch_estimates_ext, + int **dl_ch_mag, + int **dl_ch_magb, + int **rxdataF_comp, + unsigned char *pmi_ext, + LTE_DL_FRAME_PARMS *frame_parms, + PHY_MEASUREMENTS *measurements, + int eNB_id, + unsigned char symbol, + unsigned char mod_order, + unsigned short nb_rb, + unsigned char output_shift, + unsigned char dl_power_off) +{ + +#if defined(__x86_64__) || defined(__i386__) + + unsigned short rb,Nre; + __m128i *dl_ch0_128,*dl_ch1_128,*dl_ch_mag128,*dl_ch_mag128b,*rxdataF128,*rxdataF_comp128; + unsigned char aarx=0,symbol_mod,pilots=0; + int precoded_signal_strength=0; + __m128i mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3,QAM_amp128,QAM_amp128b; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) + pilots=1; + + + //printf("comp prec: symbol %d, pilots %d\n",symbol, pilots); + + if (mod_order == 4) { + QAM_amp128 = _mm_set1_epi16(QAM16_n1); + QAM_amp128b = _mm_setzero_si128(); + } else if (mod_order == 6) { + QAM_amp128 = _mm_set1_epi16(QAM64_n1); + QAM_amp128b = _mm_set1_epi16(QAM64_n2); + } + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + + dl_ch0_128 = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch1_128 = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; + + + dl_ch_mag128 = (__m128i *)&dl_ch_mag[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128b = (__m128i *)&dl_ch_magb[aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF128 = (__m128i *)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp128 = (__m128i *)&rxdataF_comp[aarx][symbol*frame_parms->N_RB_DL*12]; + + + for (rb=0; rb<nb_rb; rb++) { + // combine TX channels using precoder from pmi +#ifdef DEBUG_DLSCH_DEMOD + printf("mode 6 prec: rb %d, pmi->%d\n",rb,pmi_ext[rb]); +#endif + prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128[1],&dl_ch1_128[1]); + + if (pilots==0) { + + prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128[2],&dl_ch1_128[2]); + } + + if (mod_order>2) { + // get channel amplitude if not QPSK + + mmtmpD0 = _mm_madd_epi16(dl_ch0_128[0],dl_ch0_128[0]); + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + + mmtmpD1 = _mm_madd_epi16(dl_ch0_128[1],dl_ch0_128[1]); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + + mmtmpD0 = _mm_packs_epi32(mmtmpD0,mmtmpD1); + + dl_ch_mag128[0] = _mm_unpacklo_epi16(mmtmpD0,mmtmpD0); + dl_ch_mag128b[0] = dl_ch_mag128[0]; + dl_ch_mag128[0] = _mm_mulhi_epi16(dl_ch_mag128[0],QAM_amp128); + dl_ch_mag128[0] = _mm_slli_epi16(dl_ch_mag128[0],1); + + + //print_shorts("dl_ch_mag128[0]=",&dl_ch_mag128[0]); + + //print_shorts("dl_ch_mag128[0]=",&dl_ch_mag128[0]); + + dl_ch_mag128[1] = _mm_unpackhi_epi16(mmtmpD0,mmtmpD0); + dl_ch_mag128b[1] = dl_ch_mag128[1]; + dl_ch_mag128[1] = _mm_mulhi_epi16(dl_ch_mag128[1],QAM_amp128); + dl_ch_mag128[1] = _mm_slli_epi16(dl_ch_mag128[1],1); + + if (pilots==0) { + mmtmpD0 = _mm_madd_epi16(dl_ch0_128[2],dl_ch0_128[2]); + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + + mmtmpD1 = _mm_packs_epi32(mmtmpD0,mmtmpD0); + + dl_ch_mag128[2] = _mm_unpacklo_epi16(mmtmpD1,mmtmpD1); + dl_ch_mag128b[2] = dl_ch_mag128[2]; + + dl_ch_mag128[2] = _mm_mulhi_epi16(dl_ch_mag128[2],QAM_amp128); + dl_ch_mag128[2] = _mm_slli_epi16(dl_ch_mag128[2],1); + } + + dl_ch_mag128b[0] = _mm_mulhi_epi16(dl_ch_mag128b[0],QAM_amp128b); + dl_ch_mag128b[0] = _mm_slli_epi16(dl_ch_mag128b[0],1); + + //print_shorts("dl_ch_mag128b[0]=",&dl_ch_mag128b[0]); + + dl_ch_mag128b[1] = _mm_mulhi_epi16(dl_ch_mag128b[1],QAM_amp128b); + dl_ch_mag128b[1] = _mm_slli_epi16(dl_ch_mag128b[1],1); + + if (pilots==0) { + dl_ch_mag128b[2] = _mm_mulhi_epi16(dl_ch_mag128b[2],QAM_amp128b); + dl_ch_mag128b[2] = _mm_slli_epi16(dl_ch_mag128b[2],1); + + } + } + + // MF multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch0_128[0],rxdataF128[0]); + // print_ints("re",&mmtmpD0); + + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch0_128[0],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]); + + // print_ints("im",&mmtmpD1); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[0]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + // print_ints("re(shift)",&mmtmpD0); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + // print_ints("im(shift)",&mmtmpD1); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + // print_ints("c0",&mmtmpD2); + // print_ints("c1",&mmtmpD3); + rxdataF_comp128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + // print_shorts("rx:",rxdataF128); + // print_shorts("ch:",dl_ch128); + // print_shorts("pack:",rxdataF_comp128); + + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch0_128[1],rxdataF128[1]); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch0_128[1],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[1]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + + rxdataF_comp128[1] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + // print_shorts("rx:",rxdataF128+1); + // print_shorts("ch:",dl_ch128+1); + // print_shorts("pack:",rxdataF_comp128+1); + + if (pilots==0) { + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch0_128[2],rxdataF128[2]); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch0_128[2],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[2]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + + rxdataF_comp128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + // print_shorts("rx:",rxdataF128+2); + // print_shorts("ch:",dl_ch128+2); + // print_shorts("pack:",rxdataF_comp128+2); + + dl_ch0_128+=3; + dl_ch1_128+=3; + dl_ch_mag128+=3; + dl_ch_mag128b+=3; + rxdataF128+=3; + rxdataF_comp128+=3; + } else { + dl_ch0_128+=2; + dl_ch1_128+=2; + dl_ch_mag128+=2; + dl_ch_mag128b+=2; + rxdataF128+=2; + rxdataF_comp128+=2; + } + } + + Nre = (pilots==0) ? 12 : 8; + + precoded_signal_strength += ((signal_energy_nodc(&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*Nre], + (nb_rb*Nre))) - (measurements->n0_power[aarx])); + } // rx_antennas + + measurements->precoded_cqi_dB[eNB_id][0] = dB_fixed2(precoded_signal_strength,measurements->n0_power_tot); + + //printf("eNB_id %d, symbol %d: precoded CQI %d dB\n",eNB_id,symbol, + // measurements->precoded_cqi_dB[eNB_id][0]); + +#elif defined(__arm__) + + uint32_t rb,Nre; + uint32_t aarx,symbol_mod,pilots=0; + + int16x4_t *dl_ch0_128,*dl_ch1_128,*rxdataF128; + int16x8_t *dl_ch0_128b,*dl_ch1_128b; + int32x4_t mmtmpD0,mmtmpD1,mmtmpD0b,mmtmpD1b; + int16x8_t *dl_ch_mag128,*dl_ch_mag128b,mmtmpD2,mmtmpD3,mmtmpD4,*rxdataF_comp128; + int16x8_t QAM_amp128,QAM_amp128b; + + int16_t conj[4]__attribute__((aligned(16))) = {1,-1,1,-1}; + int32x4_t output_shift128 = vmovq_n_s32(-(int32_t)output_shift); + int32_t precoded_signal_strength=0; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) { + if (frame_parms->nb_antenna_ports_eNB==1) // 10 out of 12 so don't reduce size + { nb_rb=1+(5*nb_rb/6); } + + else + { pilots=1; } + } + + + if (mod_order == 4) { + QAM_amp128 = vmovq_n_s16(QAM16_n1); // 2/sqrt(10) + QAM_amp128b = vmovq_n_s16(0); + + } else if (mod_order == 6) { + QAM_amp128 = vmovq_n_s16(QAM64_n1); // + QAM_amp128b = vmovq_n_s16(QAM64_n2); + } + + // printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol); + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + + + + dl_ch0_128 = (int16x4_t*)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch1_128 = (int16x4_t*)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch0_128b = (int16x8_t*)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch1_128b = (int16x8_t*)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128 = (int16x8_t*)&dl_ch_mag[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128b = (int16x8_t*)&dl_ch_magb[aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF128 = (int16x4_t*)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp128 = (int16x8_t*)&rxdataF_comp[aarx][symbol*frame_parms->N_RB_DL*12]; + + for (rb=0; rb<nb_rb; rb++) { +#ifdef DEBUG_DLSCH_DEMOD + printf("mode 6 prec: rb %d, pmi->%d\n",rb,pmi_ext[rb]); +#endif + prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128b[0],&dl_ch1_128b[0]); + prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128b[1],&dl_ch1_128b[1]); + + if (pilots==0) { + prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128b[2],&dl_ch1_128b[2]); + } + + if (mod_order>2) { + // get channel amplitude if not QPSK + mmtmpD0 = vmull_s16(dl_ch0_128[0], dl_ch0_128[0]); + // mmtmpD0 = [ch0*ch0,ch1*ch1,ch2*ch2,ch3*ch3]; + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128); + // mmtmpD0 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3]>>output_shift128 on 32-bits + mmtmpD1 = vmull_s16(dl_ch0_128[1], dl_ch0_128[1]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128); + mmtmpD2 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + // mmtmpD2 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3,ch4*ch4 + ch5*ch5,ch4*ch4 + ch5*ch5,ch6*ch6 + ch7*ch7,ch6*ch6 + ch7*ch7]>>output_shift128 on 16-bits + mmtmpD0 = vmull_s16(dl_ch0_128[2], dl_ch0_128[2]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128); + mmtmpD1 = vmull_s16(dl_ch0_128[3], dl_ch0_128[3]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128); + mmtmpD3 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + if (pilots==0) { + mmtmpD0 = vmull_s16(dl_ch0_128[4], dl_ch0_128[4]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128); + mmtmpD1 = vmull_s16(dl_ch0_128[5], dl_ch0_128[5]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128); + mmtmpD4 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + + } + + dl_ch_mag128b[0] = vqdmulhq_s16(mmtmpD2,QAM_amp128b); + dl_ch_mag128b[1] = vqdmulhq_s16(mmtmpD3,QAM_amp128b); + dl_ch_mag128[0] = vqdmulhq_s16(mmtmpD2,QAM_amp128); + dl_ch_mag128[1] = vqdmulhq_s16(mmtmpD3,QAM_amp128); + + + if (pilots==0) { + dl_ch_mag128b[2] = vqdmulhq_s16(mmtmpD4,QAM_amp128b); + dl_ch_mag128[2] = vqdmulhq_s16(mmtmpD4,QAM_amp128); + } + } + mmtmpD0 = vmull_s16(dl_ch0_128[0], rxdataF128[0]); + //mmtmpD0 = [Re(ch[0])Re(rx[0]) Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1]) Im(ch[1])Im(ch[1])] + mmtmpD1 = vmull_s16(dl_ch0_128[1], rxdataF128[1]); + //mmtmpD1 = [Re(ch[2])Re(rx[2]) Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3]) Im(ch[3])Im(ch[3])] + mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)), + vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1))); + //mmtmpD0 = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])] + + mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[0],*(int16x4_t*)conj)), rxdataF128[0]); + //mmtmpD0 = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])] + mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[1],*(int16x4_t*)conj)), rxdataF128[1]); + //mmtmpD0 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])] + mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)), + vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b))); + //mmtmpD1 = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])] + + mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128); + mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128); + rxdataF_comp128[0] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + mmtmpD0 = vmull_s16(dl_ch0_128[2], rxdataF128[2]); + mmtmpD1 = vmull_s16(dl_ch0_128[3], rxdataF128[3]); + mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)), + vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1))); + + mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[2],*(int16x4_t*)conj)), rxdataF128[2]); + mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[3],*(int16x4_t*)conj)), rxdataF128[3]); + mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)), + vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b))); + + mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128); + mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128); + rxdataF_comp128[1] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + if (pilots==0) { + mmtmpD0 = vmull_s16(dl_ch0_128[4], rxdataF128[4]); + mmtmpD1 = vmull_s16(dl_ch0_128[5], rxdataF128[5]); + mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)), + vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1))); + + mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[4],*(int16x4_t*)conj)), rxdataF128[4]); + mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[5],*(int16x4_t*)conj)), rxdataF128[5]); + mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)), + vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b))); + + + mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128); + mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128); + rxdataF_comp128[2] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + + dl_ch0_128+=6; + dl_ch1_128+=6; + dl_ch_mag128+=3; + dl_ch_mag128b+=3; + rxdataF128+=6; + rxdataF_comp128+=3; + + } else { // we have a smaller PDSCH in symbols with pilots so skip last group of 4 REs and increment less + dl_ch0_128+=4; + dl_ch1_128+=4; + dl_ch_mag128+=2; + dl_ch_mag128b+=2; + rxdataF128+=4; + rxdataF_comp128+=2; + } + } + + Nre = (pilots==0) ? 12 : 8; + + + precoded_signal_strength += ((signal_energy_nodc(&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*Nre], + + (nb_rb*Nre))) - (measurements->n0_power[aarx])); + // rx_antennas + } + measurements->precoded_cqi_dB[eNB_id][0] = dB_fixed2(precoded_signal_strength,measurements->n0_power_tot); + + //printf("eNB_id %d, symbol %d: precoded CQI %d dB\n",eNB_id,symbol, + // measurements->precoded_cqi_dB[eNB_id][0]); + +#endif + _mm_empty(); + _m_empty(); +} + +void dlsch_channel_compensation_TM34(LTE_DL_FRAME_PARMS *frame_parms, + LTE_UE_PDSCH *pdsch_vars, + PHY_MEASUREMENTS *measurements, + int eNB_id, + unsigned char symbol, + unsigned char mod_order0, + unsigned char mod_order1, + int harq_pid, + int round, + MIMO_mode_t mimo_mode, + unsigned short nb_rb, + unsigned char output_shift0, + unsigned char output_shift1) { + +#if defined(__x86_64__) || defined(__i386__) + + unsigned short rb,Nre; + __m128i *dl_ch0_128,*dl_ch1_128,*dl_ch_mag0_128,*dl_ch_mag1_128,*dl_ch_mag0_128b,*dl_ch_mag1_128b,*rxdataF128,*rxdataF_comp0_128,*rxdataF_comp1_128; + unsigned char aarx=0,symbol_mod,pilots=0; + int precoded_signal_strength0=0,precoded_signal_strength1=0; + int rx_power_correction; + + int **rxdataF_ext = pdsch_vars->rxdataF_ext; + int **dl_ch_estimates_ext = pdsch_vars->dl_ch_estimates_ext; + int **dl_ch_mag0 = pdsch_vars->dl_ch_mag0; + int **dl_ch_mag1 = pdsch_vars->dl_ch_mag1[harq_pid][round]; + int **dl_ch_magb0 = pdsch_vars->dl_ch_magb0; + int **dl_ch_magb1 = pdsch_vars->dl_ch_magb1[harq_pid][round]; + int **rxdataF_comp0 = pdsch_vars->rxdataF_comp0; + int **rxdataF_comp1 = pdsch_vars->rxdataF_comp1[harq_pid][round]; + unsigned char *pmi_ext = pdsch_vars->pmi_ext; + __m128i mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3,QAM_amp0_128,QAM_amp0_128b,QAM_amp1_128,QAM_amp1_128b; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) + pilots=1; + + rx_power_correction = 1; + + // printf("comp prec: symbol %d, pilots %d\n",symbol, pilots); + + if (mod_order0 == 4) { + QAM_amp0_128 = _mm_set1_epi16(QAM16_n1); + QAM_amp0_128b = _mm_setzero_si128(); + } else if (mod_order0 == 6) { + QAM_amp0_128 = _mm_set1_epi16(QAM64_n1); + QAM_amp0_128b = _mm_set1_epi16(QAM64_n2); + } + + if (mod_order1 == 4) { + QAM_amp1_128 = _mm_set1_epi16(QAM16_n1); + QAM_amp1_128b = _mm_setzero_si128(); + } else if (mod_order1 == 6) { + QAM_amp1_128 = _mm_set1_epi16(QAM64_n1); + QAM_amp1_128b = _mm_set1_epi16(QAM64_n2); + } + + for (aarx=0;aarx<frame_parms->nb_antennas_rx;aarx++) { + + /* if (aarx==0) { + output_shift=output_shift0; + } + else { + output_shift=output_shift1; + } */ + + // printf("antenna %d\n", aarx); + // printf("symbol %d, rx antenna %d\n", symbol, aarx); + + dl_ch0_128 = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; // this is h11 + dl_ch1_128 = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; // this is h12 + + + dl_ch_mag0_128 = (__m128i *)&dl_ch_mag0[aarx][symbol*frame_parms->N_RB_DL*12]; //responsible for x1 + dl_ch_mag0_128b = (__m128i *)&dl_ch_magb0[aarx][symbol*frame_parms->N_RB_DL*12];//responsible for x1 + dl_ch_mag1_128 = (__m128i *)&dl_ch_mag1[aarx][symbol*frame_parms->N_RB_DL*12]; //responsible for x2. always coming from tx2 + dl_ch_mag1_128b = (__m128i *)&dl_ch_magb1[aarx][symbol*frame_parms->N_RB_DL*12]; //responsible for x2. always coming from tx2 + rxdataF128 = (__m128i *)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12]; //received signal on antenna of interest h11*x1+h12*x2 + rxdataF_comp0_128 = (__m128i *)&rxdataF_comp0[aarx][symbol*frame_parms->N_RB_DL*12]; //result of multipl with MF x1 on antenna of interest + rxdataF_comp1_128 = (__m128i *)&rxdataF_comp1[aarx][symbol*frame_parms->N_RB_DL*12]; //result of multipl with MF x2 on antenna of interest + + for (rb=0; rb<nb_rb; rb++) { + + // combine TX channels using precoder from pmi + if (mimo_mode==LARGE_CDD) { + prec2A_TM3_128(&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM3_128(&dl_ch0_128[1],&dl_ch1_128[1]); + + + if (pilots==0) { + prec2A_TM3_128(&dl_ch0_128[2],&dl_ch1_128[2]); + } + } + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) { + prec2A_TM4_128(0,&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM4_128(0,&dl_ch0_128[1],&dl_ch1_128[1]); + + if (pilots==0) { + prec2A_TM4_128(0,&dl_ch0_128[2],&dl_ch1_128[2]); + } + } + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) { + prec2A_TM4_128(1,&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM4_128(1,&dl_ch0_128[1],&dl_ch1_128[1]); + + if (pilots==0) { + prec2A_TM4_128(1,&dl_ch0_128[2],&dl_ch1_128[2]); + } + } + + else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING) { + prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128[1],&dl_ch1_128[1]); + + if (pilots==0) { + prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128[2],&dl_ch1_128[2]); + } + } + + + else { + LOG_E(PHY,"Unknown MIMO mode\n"); + return; + } + + + if (mod_order0>2) { + // get channel amplitude if not QPSK + + mmtmpD0 = _mm_madd_epi16(dl_ch0_128[0],dl_ch0_128[0]); + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift0); + + mmtmpD1 = _mm_madd_epi16(dl_ch0_128[1],dl_ch0_128[1]); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift0); + + mmtmpD0 = _mm_packs_epi32(mmtmpD0,mmtmpD1); + + dl_ch_mag0_128[0] = _mm_unpacklo_epi16(mmtmpD0,mmtmpD0); + dl_ch_mag0_128b[0] = dl_ch_mag0_128[0]; + dl_ch_mag0_128[0] = _mm_mulhi_epi16(dl_ch_mag0_128[0],QAM_amp0_128); + dl_ch_mag0_128[0] = _mm_slli_epi16(dl_ch_mag0_128[0],1); + + // print_shorts("dl_ch_mag0_128[0]=",&dl_ch_mag0_128[0]); + + + dl_ch_mag0_128[1] = _mm_unpackhi_epi16(mmtmpD0,mmtmpD0); + dl_ch_mag0_128b[1] = dl_ch_mag0_128[1]; + dl_ch_mag0_128[1] = _mm_mulhi_epi16(dl_ch_mag0_128[1],QAM_amp0_128); + dl_ch_mag0_128[1] = _mm_slli_epi16(dl_ch_mag0_128[1],1); + + if (pilots==0) { + mmtmpD0 = _mm_madd_epi16(dl_ch0_128[2],dl_ch0_128[2]); + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift0); + + mmtmpD1 = _mm_packs_epi32(mmtmpD0,mmtmpD0); + + dl_ch_mag0_128[2] = _mm_unpacklo_epi16(mmtmpD1,mmtmpD1); + dl_ch_mag0_128b[2] = dl_ch_mag0_128[2]; + + dl_ch_mag0_128[2] = _mm_mulhi_epi16(dl_ch_mag0_128[2],QAM_amp0_128); + dl_ch_mag0_128[2] = _mm_slli_epi16(dl_ch_mag0_128[2],1); + } + + dl_ch_mag0_128b[0] = _mm_mulhi_epi16(dl_ch_mag0_128b[0],QAM_amp0_128b); + dl_ch_mag0_128b[0] = _mm_slli_epi16(dl_ch_mag0_128b[0],1); + + // print_shorts("dl_ch_mag0_128b[0]=",&dl_ch_mag0_128b[0]); + + dl_ch_mag0_128b[1] = _mm_mulhi_epi16(dl_ch_mag0_128b[1],QAM_amp0_128b); + dl_ch_mag0_128b[1] = _mm_slli_epi16(dl_ch_mag0_128b[1],1); + + if (pilots==0) { + dl_ch_mag0_128b[2] = _mm_mulhi_epi16(dl_ch_mag0_128b[2],QAM_amp0_128b); + dl_ch_mag0_128b[2] = _mm_slli_epi16(dl_ch_mag0_128b[2],1); + } + } + + if (mod_order1>2) { + // get channel amplitude if not QPSK + + mmtmpD0 = _mm_madd_epi16(dl_ch1_128[0],dl_ch1_128[0]); + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift1); + + mmtmpD1 = _mm_madd_epi16(dl_ch1_128[1],dl_ch1_128[1]); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift1); + + mmtmpD0 = _mm_packs_epi32(mmtmpD0,mmtmpD1); + + dl_ch_mag1_128[0] = _mm_unpacklo_epi16(mmtmpD0,mmtmpD0); + dl_ch_mag1_128b[0] = dl_ch_mag1_128[0]; + dl_ch_mag1_128[0] = _mm_mulhi_epi16(dl_ch_mag1_128[0],QAM_amp1_128); + dl_ch_mag1_128[0] = _mm_slli_epi16(dl_ch_mag1_128[0],1); + + // print_shorts("dl_ch_mag1_128[0]=",&dl_ch_mag1_128[0]); + + dl_ch_mag1_128[1] = _mm_unpackhi_epi16(mmtmpD0,mmtmpD0); + dl_ch_mag1_128b[1] = dl_ch_mag1_128[1]; + dl_ch_mag1_128[1] = _mm_mulhi_epi16(dl_ch_mag1_128[1],QAM_amp1_128); + dl_ch_mag1_128[1] = _mm_slli_epi16(dl_ch_mag1_128[1],1); + + if (pilots==0) { + mmtmpD0 = _mm_madd_epi16(dl_ch1_128[2],dl_ch1_128[2]); + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift1); + + mmtmpD1 = _mm_packs_epi32(mmtmpD0,mmtmpD0); + + dl_ch_mag1_128[2] = _mm_unpacklo_epi16(mmtmpD1,mmtmpD1); + dl_ch_mag1_128b[2] = dl_ch_mag1_128[2]; + + dl_ch_mag1_128[2] = _mm_mulhi_epi16(dl_ch_mag1_128[2],QAM_amp1_128); + dl_ch_mag1_128[2] = _mm_slli_epi16(dl_ch_mag1_128[2],1); + } + + dl_ch_mag1_128b[0] = _mm_mulhi_epi16(dl_ch_mag1_128b[0],QAM_amp1_128b); + dl_ch_mag1_128b[0] = _mm_slli_epi16(dl_ch_mag1_128b[0],1); + + // print_shorts("dl_ch_mag1_128b[0]=",&dl_ch_mag1_128b[0]); + + dl_ch_mag1_128b[1] = _mm_mulhi_epi16(dl_ch_mag1_128b[1],QAM_amp1_128b); + dl_ch_mag1_128b[1] = _mm_slli_epi16(dl_ch_mag1_128b[1],1); + + if (pilots==0) { + dl_ch_mag1_128b[2] = _mm_mulhi_epi16(dl_ch_mag1_128b[2],QAM_amp1_128b); + dl_ch_mag1_128b[2] = _mm_slli_epi16(dl_ch_mag1_128b[2],1); + } + } + + // layer 0 + // MF multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch0_128[0],rxdataF128[0]); + // print_ints("re",&mmtmpD0); + + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch0_128[0],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[0]); + // print_ints("im",&mmtmpD1); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift0); + // printf("Shift: %d\n",output_shift); + // print_ints("re(shift)",&mmtmpD0); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift0); + // print_ints("im(shift)",&mmtmpD1); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + // print_ints("c0",&mmtmpD2); + // print_ints("c1",&mmtmpD3); + rxdataF_comp0_128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + + // print_shorts("rx:",rxdataF128); + // print_shorts("ch:",dl_ch0_128); + // print_shorts("pack:",rxdataF_comp0_128); + + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch0_128[1],rxdataF128[1]); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch0_128[1],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[1]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift0); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift0); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + + rxdataF_comp0_128[1] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + // print_shorts("rx:",rxdataF128+1); + // print_shorts("ch:",dl_ch0_128+1); + // print_shorts("pack:",rxdataF_comp0_128+1); + + if (pilots==0) { + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch0_128[2],rxdataF128[2]); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch0_128[2],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[2]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift0); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift0); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + + rxdataF_comp0_128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + // print_shorts("rx:",rxdataF128+2); + // print_shorts("ch:",dl_ch0_128+2); + // print_shorts("pack:",rxdataF_comp0_128+2); + + } + + + // layer 1 + // MF multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch1_128[0],rxdataF128[0]); + // print_ints("re",&mmtmpD0); + + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch1_128[0],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]); + // print_ints("im",&mmtmpD1); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[0]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift1); + // print_ints("re(shift)",&mmtmpD0); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift1); + // print_ints("im(shift)",&mmtmpD1); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + // print_ints("c0",&mmtmpD2); + // print_ints("c1",&mmtmpD3); + rxdataF_comp1_128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + // print_shorts("rx:",rxdataF128); + // print_shorts("ch:",dl_ch1_128); + // print_shorts("pack:",rxdataF_comp1_128); + + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch1_128[1],rxdataF128[1]); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch1_128[1],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[1]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift1); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift1); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + + rxdataF_comp1_128[1] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + // print_shorts("rx:",rxdataF128+1); + // print_shorts("ch:",dl_ch1_128+1); + // print_shorts("pack:",rxdataF_comp1_128+1); + + if (pilots==0) { + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch1_128[2],rxdataF128[2]); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch1_128[2],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[2]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift1); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift1); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + + rxdataF_comp1_128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + // print_shorts("rx:",rxdataF128+2); + // print_shorts("ch:",dl_ch1_128+2); + // print_shorts("pack:",rxdataF_comp1_128+2); + + dl_ch0_128+=3; + dl_ch1_128+=3; + dl_ch_mag0_128+=3; + dl_ch_mag1_128+=3; + dl_ch_mag0_128b+=3; + dl_ch_mag1_128b+=3; + rxdataF128+=3; + rxdataF_comp0_128+=3; + rxdataF_comp1_128+=3; + } + else { + dl_ch0_128+=2; + dl_ch1_128+=2; + dl_ch_mag0_128+=2; + dl_ch_mag1_128+=2; + dl_ch_mag0_128b+=2; + dl_ch_mag1_128b+=2; + rxdataF128+=2; + rxdataF_comp0_128+=2; + rxdataF_comp1_128+=2; + } + + } // rb loop + Nre = (pilots==0) ? 12 : 8; + + precoded_signal_strength0 += ((signal_energy_nodc(&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*Nre], + (nb_rb*Nre))*rx_power_correction) - (measurements->n0_power[aarx])); + + precoded_signal_strength1 += ((signal_energy_nodc(&dl_ch_estimates_ext[aarx+2][symbol*frame_parms->N_RB_DL*Nre], + (nb_rb*Nre))*rx_power_correction) - (measurements->n0_power[aarx])); + } // rx_antennas + + measurements->precoded_cqi_dB[eNB_id][0] = dB_fixed2(precoded_signal_strength0,measurements->n0_power_tot); + measurements->precoded_cqi_dB[eNB_id][1] = dB_fixed2(precoded_signal_strength1,measurements->n0_power_tot); + + // printf("eNB_id %d, symbol %d: precoded CQI %d dB\n",eNB_id,symbol, + // measurements->precoded_cqi_dB[eNB_id][0]); + + _mm_empty(); + _m_empty(); + + #elif defined(__arm__) + + unsigned short rb,Nre; + unsigned char aarx,symbol_mod,pilots=0; + int precoded_signal_strength0=0,precoded_signal_strength1=0, rx_power_correction; + int16x4_t *dl_ch0_128,*rxdataF128; + int16x4_t *dl_ch1_128; + int16x8_t *dl_ch0_128b,*dl_ch1_128b; + + int32x4_t mmtmpD0,mmtmpD1,mmtmpD0b,mmtmpD1b; + int16x8_t *dl_ch_mag0_128,*dl_ch_mag0_128b,*dl_ch_mag1_128,*dl_ch_mag1_128b,mmtmpD2,mmtmpD3,mmtmpD4,*rxdataF_comp0_128,*rxdataF_comp1_128; + int16x8_t QAM_amp0_128,QAM_amp0_128b,QAM_amp1_128,QAM_amp1_128b; + int32x4_t output_shift128 = vmovq_n_s32(-(int32_t)output_shift); + + int **rxdataF_ext = pdsch_vars->rxdataF_ext; + int **dl_ch_estimates_ext = pdsch_vars->dl_ch_estimates_ext; + int **dl_ch_mag0 = pdsch_vars->dl_ch_mag0; + int **dl_ch_mag1 = pdsch_vars->dl_ch_mag1[harq_pid][round]; + int **dl_ch_magb0 = pdsch_vars->dl_ch_magb0; + int **dl_ch_magb1 = pdsch_vars->dl_ch_magb1[harq_pid][round]; + int **rxdataF_comp0 = pdsch_vars->rxdataF_comp0; + int **rxdataF_comp1 = pdsch_vars->rxdataF_comp1[harq_pid][round]; + + int16_t conj[4]__attribute__((aligned(16))) = {1,-1,1,-1}; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) { + if (frame_parms->nb_antenna_ports_eNB==1) // 10 out of 12 so don't reduce size + { nb_rb=1+(5*nb_rb/6); } + + else + { pilots=1; } + } + + rx_power_correction=1; + + if (mod_order0 == 4) { + QAM_amp0_128 = vmovq_n_s16(QAM16_n1); // 2/sqrt(10) + QAM_amp0_128b = vmovq_n_s16(0); + + } else if (mod_order0 == 6) { + QAM_amp0_128 = vmovq_n_s16(QAM64_n1); // + QAM_amp0_128b = vmovq_n_s16(QAM64_n2); + } + + if (mod_order1 == 4) { + QAM_amp1_128 = vmovq_n_s16(QAM16_n1); // 2/sqrt(10) + QAM_amp1_128b = vmovq_n_s16(0); + + } else if (mod_order1 == 6) { + QAM_amp1_128 = vmovq_n_s16(QAM64_n1); // + QAM_amp1_128b = vmovq_n_s16(QAM64_n2); + } + + // printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol); + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + + + + dl_ch0_128 = (int16x4_t*)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch1_128 = (int16x4_t*)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch0_128b = (int16x8_t*)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch1_128b = (int16x8_t*)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag0_128 = (int16x8_t*)&dl_ch_mag0[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag0_128b = (int16x8_t*)&dl_ch_magb0[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag1_128 = (int16x8_t*)&dl_ch_mag1[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag1_128b = (int16x8_t*)&dl_ch_magb1[aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF128 = (int16x4_t*)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp0_128 = (int16x8_t*)&rxdataF_comp0[aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp1_128 = (int16x8_t*)&rxdataF_comp1[aarx][symbol*frame_parms->N_RB_DL*12]; + + for (rb=0; rb<nb_rb; rb++) { + // combine TX channels using precoder from pmi + if (mimo_mode==LARGE_CDD) { + prec2A_TM3_128(&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM3_128(&dl_ch0_128[1],&dl_ch1_128[1]); + + + if (pilots==0) { + prec2A_TM3_128(&dl_ch0_128[2],&dl_ch1_128[2]); + } + } + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) { + prec2A_TM4_128(0,&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM4_128(0,&dl_ch0_128[1],&dl_ch1_128[1]); + + if (pilots==0) { + prec2A_TM4_128(0,&dl_ch0_128[2],&dl_ch1_128[2]); + } + } + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) { + prec2A_TM4_128(1,&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM4_128(1,&dl_ch0_128[1],&dl_ch1_128[1]); + + if (pilots==0) { + prec2A_TM4_128(1,&dl_ch0_128[2],&dl_ch1_128[2]); + } + } + else { + LOG_E(PHY,"Unknown MIMO mode\n"); + return; + } + + + if (mod_order0>2) { + // get channel amplitude if not QPSK + mmtmpD0 = vmull_s16(dl_ch0_128[0], dl_ch0_128[0]); + // mmtmpD0 = [ch0*ch0,ch1*ch1,ch2*ch2,ch3*ch3]; + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128); + // mmtmpD0 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3]>>output_shift128 on 32-bits + mmtmpD1 = vmull_s16(dl_ch0_128[1], dl_ch0_128[1]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128); + mmtmpD2 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + // mmtmpD2 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3,ch4*ch4 + ch5*ch5,ch4*ch4 + ch5*ch5,ch6*ch6 + ch7*ch7,ch6*ch6 + ch7*ch7]>>output_shift128 on 16-bits + mmtmpD0 = vmull_s16(dl_ch0_128[2], dl_ch0_128[2]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128); + mmtmpD1 = vmull_s16(dl_ch0_128[3], dl_ch0_128[3]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128); + mmtmpD3 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + if (pilots==0) { + mmtmpD0 = vmull_s16(dl_ch0_128[4], dl_ch0_128[4]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128); + mmtmpD1 = vmull_s16(dl_ch0_128[5], dl_ch0_128[5]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128); + mmtmpD4 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + + } + + dl_ch_mag0_128b[0] = vqdmulhq_s16(mmtmpD2,QAM_amp0_128b); + dl_ch_mag0_128b[1] = vqdmulhq_s16(mmtmpD3,QAM_amp0_128b); + dl_ch_mag0_128[0] = vqdmulhq_s16(mmtmpD2,QAM_amp0_128); + dl_ch_mag0_128[1] = vqdmulhq_s16(mmtmpD3,QAM_amp0_128); + + + if (pilots==0) { + dl_ch_mag0_128b[2] = vqdmulhq_s16(mmtmpD4,QAM_amp0_128b); + dl_ch_mag0_128[2] = vqdmulhq_s16(mmtmpD4,QAM_amp0_128); + } + } + + if (mod_order1>2) { + // get channel amplitude if not QPSK + mmtmpD0 = vmull_s16(dl_ch1_128[0], dl_ch1_128[0]); + // mmtmpD0 = [ch0*ch0,ch1*ch1,ch2*ch2,ch3*ch3]; + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128); + // mmtmpD0 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3]>>output_shift128 on 32-bits + mmtmpD1 = vmull_s16(dl_ch1_128[1], dl_ch1_128[1]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128); + mmtmpD2 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + // mmtmpD2 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3,ch4*ch4 + ch5*ch5,ch4*ch4 + ch5*ch5,ch6*ch6 + ch7*ch7,ch6*ch6 + ch7*ch7]>>output_shift128 on 16-bits + mmtmpD0 = vmull_s16(dl_ch1_128[2], dl_ch1_128[2]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128); + mmtmpD1 = vmull_s16(dl_ch1_128[3], dl_ch1_128[3]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128); + mmtmpD3 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + if (pilots==0) { + mmtmpD0 = vmull_s16(dl_ch1_128[4], dl_ch1_128[4]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0)),output_shift128); + mmtmpD1 = vmull_s16(dl_ch1_128[5], dl_ch1_128[5]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1)),output_shift128); + mmtmpD4 = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + + } + + dl_ch_mag1_128b[0] = vqdmulhq_s16(mmtmpD2,QAM_amp1_128b); + dl_ch_mag1_128b[1] = vqdmulhq_s16(mmtmpD3,QAM_amp1_128b); + dl_ch_mag1_128[0] = vqdmulhq_s16(mmtmpD2,QAM_amp1_128); + dl_ch_mag1_128[1] = vqdmulhq_s16(mmtmpD3,QAM_amp1_128); + + + if (pilots==0) { + dl_ch_mag1_128b[2] = vqdmulhq_s16(mmtmpD4,QAM_amp1_128b); + dl_ch_mag1_128[2] = vqdmulhq_s16(mmtmpD4,QAM_amp1_128); + } + } + + mmtmpD0 = vmull_s16(dl_ch0_128[0], rxdataF128[0]); + //mmtmpD0 = [Re(ch[0])Re(rx[0]) Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1]) Im(ch[1])Im(ch[1])] + mmtmpD1 = vmull_s16(dl_ch0_128[1], rxdataF128[1]); + //mmtmpD1 = [Re(ch[2])Re(rx[2]) Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3]) Im(ch[3])Im(ch[3])] + mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)), + vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1))); + //mmtmpD0 = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])] + + mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[0],*(int16x4_t*)conj)), rxdataF128[0]); + //mmtmpD0 = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])] + mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[1],*(int16x4_t*)conj)), rxdataF128[1]); + //mmtmpD0 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])] + mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)), + vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b))); + //mmtmpD1 = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])] + + mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128); + mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128); + rxdataF_comp0_128[0] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + mmtmpD0 = vmull_s16(dl_ch0_128[2], rxdataF128[2]); + mmtmpD1 = vmull_s16(dl_ch0_128[3], rxdataF128[3]); + mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)), + vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1))); + + mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[2],*(int16x4_t*)conj)), rxdataF128[2]); + mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[3],*(int16x4_t*)conj)), rxdataF128[3]); + mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)), + vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b))); + + mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128); + mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128); + rxdataF_comp0_128[1] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + // second stream + mmtmpD0 = vmull_s16(dl_ch1_128[0], rxdataF128[0]); + mmtmpD1 = vmull_s16(dl_ch1_128[1], rxdataF128[1]); + mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)), + vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1))); + mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[0],*(int16x4_t*)conj)), rxdataF128[0]); + + mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[1],*(int16x4_t*)conj)), rxdataF128[1]); + //mmtmpD0 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])] + mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)), + vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b))); + //mmtmpD1 = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])] + + mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128); + mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128); + rxdataF_comp1_128[0] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + mmtmpD0 = vmull_s16(dl_ch1_128[2], rxdataF128[2]); + mmtmpD1 = vmull_s16(dl_ch1_128[3], rxdataF128[3]); + mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)), + vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1))); + + mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[2],*(int16x4_t*)conj)), rxdataF128[2]); + mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[3],*(int16x4_t*)conj)), rxdataF128[3]); + mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)), + vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b))); + + mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128); + mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128); + rxdataF_comp1_128[1] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + + if (pilots==0) { + mmtmpD0 = vmull_s16(dl_ch0_128[4], rxdataF128[4]); + mmtmpD1 = vmull_s16(dl_ch0_128[5], rxdataF128[5]); + mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)), + vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1))); + + mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[4],*(int16x4_t*)conj)), rxdataF128[4]); + mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch0_128[5],*(int16x4_t*)conj)), rxdataF128[5]); + mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)), + vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b))); + + + mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128); + mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128); + rxdataF_comp0_128[2] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + mmtmpD0 = vmull_s16(dl_ch1_128[4], rxdataF128[4]); + mmtmpD1 = vmull_s16(dl_ch1_128[5], rxdataF128[5]); + mmtmpD0 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0),vget_high_s32(mmtmpD0)), + vpadd_s32(vget_low_s32(mmtmpD1),vget_high_s32(mmtmpD1))); + + mmtmpD0b = vmull_s16(vrev32_s16(vmul_s16(dl_ch1_128[4],*(int16x4_t*)conj)), rxdataF128[4]); + mmtmpD1b = vmull_s16(vrev32_s16(vmul_s16(dl_ch1_128[5],*(int16x4_t*)conj)), rxdataF128[5]); + mmtmpD1 = vcombine_s32(vpadd_s32(vget_low_s32(mmtmpD0b),vget_high_s32(mmtmpD0b)), + vpadd_s32(vget_low_s32(mmtmpD1b),vget_high_s32(mmtmpD1b))); + + + mmtmpD0 = vqshlq_s32(mmtmpD0,output_shift128); + mmtmpD1 = vqshlq_s32(mmtmpD1,output_shift128); + rxdataF_comp1_128[2] = vcombine_s16(vmovn_s32(mmtmpD0),vmovn_s32(mmtmpD1)); + } + } + + + + Nre = (pilots==0) ? 12 : 8; + + // rx_antennas + } + + + Nre = (pilots==0) ? 12 : 8; + + precoded_signal_strength0 += ((signal_energy_nodc(&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*Nre], + (nb_rb*Nre))*rx_power_correction) - (measurements->n0_power[aarx])); + precoded_signal_strength1 += ((signal_energy_nodc(&dl_ch_estimates_ext[aarx+2][symbol*frame_parms->N_RB_DL*Nre], + (nb_rb*Nre))*rx_power_correction) - (measurements->n0_power[aarx])); + + measurements->precoded_cqi_dB[eNB_id][0] = dB_fixed2(precoded_signal_strength0,measurements->n0_power_tot); + measurements->precoded_cqi_dB[eNB_id][1] = dB_fixed2(precoded_signal_strength1,measurements->n0_power_tot); + +#endif +} + + +void dlsch_dual_stream_correlation(LTE_DL_FRAME_PARMS *frame_parms, + unsigned char symbol, + unsigned short nb_rb, + int **dl_ch_estimates_ext, + int **dl_ch_estimates_ext_i, + int **dl_ch_rho_ext, + unsigned char output_shift) +{ + +#if defined(__x86_64__)||defined(__i386__) + + unsigned short rb; + __m128i *dl_ch128,*dl_ch128i,*dl_ch_rho128,mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3; + unsigned char aarx,symbol_mod,pilots=0; + + // printf("dlsch_dual_stream_correlation: symbol %d\n",symbol); + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) { + pilots=1; + } + + // printf("Dual stream correlation (%p)\n",dl_ch_estimates_ext_i); + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + + + + //printf ("antenna %d", aarx); + dl_ch128 = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + + if (dl_ch_estimates_ext_i == NULL) // TM3/4 + dl_ch128i = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; + else + dl_ch128i = (__m128i *)&dl_ch_estimates_ext_i[aarx][symbol*frame_parms->N_RB_DL*12]; + + dl_ch_rho128 = (__m128i *)&dl_ch_rho_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + + + for (rb=0; rb<nb_rb; rb++) { + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch128[0],dl_ch128i[0]); + // print_ints("re",&mmtmpD0); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[0],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128i[0]); + // print_ints("im",&mmtmpD1); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + // print_ints("re(shift)",&mmtmpD0); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + // print_ints("im(shift)",&mmtmpD1); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + // print_ints("c0",&mmtmpD2); + // print_ints("c1",&mmtmpD3); + dl_ch_rho128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + // print_shorts("rho 0:",dl_ch_rho128); + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch128[1],dl_ch128i[1]); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[1],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128i[1]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + dl_ch_rho128[1] =_mm_packs_epi32(mmtmpD2,mmtmpD3); + + + if (pilots==0) { + + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch128[2],dl_ch128i[2]); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[2],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128i[2]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + dl_ch_rho128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + + dl_ch128+=3; + dl_ch128i+=3; + dl_ch_rho128+=3; + } else { + + dl_ch128+=2; + dl_ch128i+=2; + dl_ch_rho128+=2; + } + } + + } + + _mm_empty(); + _m_empty(); + +#elif defined(__arm__) + +#endif +} + + +/*void dlsch_dual_stream_correlationTM34(LTE_DL_FRAME_PARMS *frame_parms, + unsigned char symbol, + unsigned short nb_rb, + int **dl_ch_estimates_ext, + int **dl_ch_estimates_ext_i, + int **dl_ch_rho_ext, + unsigned char output_shift0, + unsigned char output_shift1) +{ + +#if defined(__x86_64__)||defined(__i386__) + + unsigned short rb; + __m128i *dl_ch128,*dl_ch128i,*dl_ch_rho128,mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3; + unsigned char aarx,symbol_mod,pilots=0; + int output_shift; + + // printf("dlsch_dual_stream_correlation: symbol %d\n",symbol); + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) { + pilots=1; + } + + // printf("Dual stream correlation (%p)\n",dl_ch_estimates_ext_i); + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + + if (aarx==0) { + output_shift=output_shift0; + } + else { + output_shift=output_shift1; + } + + //printf ("antenna %d", aarx); + dl_ch128 = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + + if (dl_ch_estimates_ext_i == NULL) // TM3/4 + dl_ch128i = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; + else + dl_ch128i = (__m128i *)&dl_ch_estimates_ext_i[aarx][symbol*frame_parms->N_RB_DL*12]; + + dl_ch_rho128 = (__m128i *)&dl_ch_rho_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + + + for (rb=0; rb<nb_rb; rb++) { + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch128[0],dl_ch128i[0]); + // print_ints("re",&mmtmpD0); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[0],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128i[0]); + // print_ints("im",&mmtmpD1); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + // print_ints("re(shift)",&mmtmpD0); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + // print_ints("im(shift)",&mmtmpD1); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + // print_ints("c0",&mmtmpD2); + // print_ints("c1",&mmtmpD3); + dl_ch_rho128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + // print_shorts("rho 0:",dl_ch_rho128); + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch128[1],dl_ch128i[1]); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[1],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128i[1]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + dl_ch_rho128[1] =_mm_packs_epi32(mmtmpD2,mmtmpD3); + + + if (pilots==0) { + + // multiply by conjugated channel + mmtmpD0 = _mm_madd_epi16(dl_ch128[2],dl_ch128i[2]); + // mmtmpD0 contains real part of 4 consecutive outputs (32-bit) + mmtmpD1 = _mm_shufflelo_epi16(dl_ch128[2],_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1)); + mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate); + mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128i[2]); + // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit) + mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift); + mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift); + mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1); + mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1); + dl_ch_rho128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3); + + dl_ch128+=3; + dl_ch128i+=3; + dl_ch_rho128+=3; + } else { + + dl_ch128+=2; + dl_ch128i+=2; + dl_ch_rho128+=2; + } + } + + } + + _mm_empty(); + _m_empty(); + +#elif defined(__arm__) + +#endif +} +*/ + +void dlsch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms, + int **rxdataF_comp, + int **rxdataF_comp_i, + int **rho, + int **rho_i, + int **dl_ch_mag, + int **dl_ch_magb, + int **dl_ch_mag_i, + int **dl_ch_magb_i, + unsigned char symbol, + unsigned short nb_rb, + unsigned char dual_stream_UE) +{ + +#if defined(__x86_64__)||defined(__i386__) + + unsigned char aatx; + int i; + __m128i *rxdataF_comp128_0,*rxdataF_comp128_1,*rxdataF_comp128_i0,*rxdataF_comp128_i1,*dl_ch_mag128_0,*dl_ch_mag128_1,*dl_ch_mag128_0b,*dl_ch_mag128_1b,*rho128_0,*rho128_1,*rho128_i0,*rho128_i1, + *dl_ch_mag128_i0,*dl_ch_mag128_i1,*dl_ch_mag128_i0b,*dl_ch_mag128_i1b; + + if (frame_parms->nb_antennas_rx>1) { + + for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++) { + + rxdataF_comp128_0 = (__m128i *)&rxdataF_comp[(aatx<<1)][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp128_1 = (__m128i *)&rxdataF_comp[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_0 = (__m128i *)&dl_ch_mag[(aatx<<1)][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_1 = (__m128i *)&dl_ch_mag[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_0b = (__m128i *)&dl_ch_magb[(aatx<<1)][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_1b = (__m128i *)&dl_ch_magb[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12]; + + // MRC on each re of rb, both on MF output and magnitude (for 16QAM/64QAM llr computation) + for (i=0;i<nb_rb*3;i++) { + rxdataF_comp128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_0[i],1),_mm_srai_epi16(rxdataF_comp128_1[i],1)); + dl_ch_mag128_0[i] = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_0[i],1),_mm_srai_epi16(dl_ch_mag128_1[i],1)); + dl_ch_mag128_0b[i] = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_0b[i],1),_mm_srai_epi16(dl_ch_mag128_1b[i],1)); + // print_shorts("mrc comp0:",&rxdataF_comp128_0[i]); + // print_shorts("mrc mag0:",&dl_ch_mag128_0[i]); + // print_shorts("mrc mag0b:",&dl_ch_mag128_0b[i]); + // print_shorts("mrc rho1:",&rho128_1[i]); + + } + } + + if (rho) { + rho128_0 = (__m128i *) &rho[0][symbol*frame_parms->N_RB_DL*12]; + rho128_1 = (__m128i *) &rho[1][symbol*frame_parms->N_RB_DL*12]; + for (i=0;i<nb_rb*3;i++) { + // print_shorts("mrc rho0:",&rho128_0[i]); + // print_shorts("mrc rho1:",&rho128_1[i]); + rho128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rho128_0[i],1),_mm_srai_epi16(rho128_1[i],1)); + } + } + + + if (dual_stream_UE == 1) { + rho128_i0 = (__m128i *) &rho_i[0][symbol*frame_parms->N_RB_DL*12]; + rho128_i1 = (__m128i *) &rho_i[1][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp128_i0 = (__m128i *)&rxdataF_comp_i[0][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp128_i1 = (__m128i *)&rxdataF_comp_i[1][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_i0 = (__m128i *)&dl_ch_mag_i[0][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_i1 = (__m128i *)&dl_ch_mag_i[1][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_i0b = (__m128i *)&dl_ch_magb_i[0][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_i1b = (__m128i *)&dl_ch_magb_i[1][symbol*frame_parms->N_RB_DL*12]; + + for (i=0; i<nb_rb*3; i++) { + rxdataF_comp128_i0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_i0[i],1),_mm_srai_epi16(rxdataF_comp128_i1[i],1)); + rho128_i0[i] = _mm_adds_epi16(_mm_srai_epi16(rho128_i0[i],1),_mm_srai_epi16(rho128_i1[i],1)); + + dl_ch_mag128_i0[i] = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_i0[i],1),_mm_srai_epi16(dl_ch_mag128_i1[i],1)); + dl_ch_mag128_i0b[i] = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_i0b[i],1),_mm_srai_epi16(dl_ch_mag128_i1b[i],1)); + } + } + } + + _mm_empty(); + _m_empty(); + +#elif defined(__arm__) + + unsigned char aatx; + int i; + int16x8_t *rxdataF_comp128_0,*rxdataF_comp128_1,*rxdataF_comp128_i0,*rxdataF_comp128_i1,*dl_ch_mag128_0,*dl_ch_mag128_1,*dl_ch_mag128_0b,*dl_ch_mag128_1b,*rho128_0,*rho128_1,*rho128_i0,*rho128_i1,*dl_ch_mag128_i0,*dl_ch_mag128_i1,*dl_ch_mag128_i0b,*dl_ch_mag128_i1b; + + if (frame_parms->nb_antennas_rx>1) { + + for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++) { + + rxdataF_comp128_0 = (int16x8_t *)&rxdataF_comp[(aatx<<1)][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp128_1 = (int16x8_t *)&rxdataF_comp[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_0 = (int16x8_t *)&dl_ch_mag[(aatx<<1)][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_1 = (int16x8_t *)&dl_ch_mag[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_0b = (int16x8_t *)&dl_ch_magb[(aatx<<1)][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_1b = (int16x8_t *)&dl_ch_magb[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12]; + + // MRC on each re of rb, both on MF output and magnitude (for 16QAM/64QAM llr computation) + for (i=0; i<nb_rb*3; i++) { + rxdataF_comp128_0[i] = vhaddq_s16(rxdataF_comp128_0[i],rxdataF_comp128_1[i]); + dl_ch_mag128_0[i] = vhaddq_s16(dl_ch_mag128_0[i],dl_ch_mag128_1[i]); + dl_ch_mag128_0b[i] = vhaddq_s16(dl_ch_mag128_0b[i],dl_ch_mag128_1b[i]); + } + } + + if (rho) { + rho128_0 = (int16x8_t *) &rho[0][symbol*frame_parms->N_RB_DL*12]; + rho128_1 = (int16x8_t *) &rho[1][symbol*frame_parms->N_RB_DL*12]; + + for (i=0; i<nb_rb*3; i++) { + // print_shorts("mrc rho0:",&rho128_0[i]); + // print_shorts("mrc rho1:",&rho128_1[i]); + rho128_0[i] = vhaddq_s16(rho128_0[i],rho128_1[i]); + } + } + + + if (dual_stream_UE == 1) { + rho128_i0 = (int16x8_t *) &rho_i[0][symbol*frame_parms->N_RB_DL*12]; + rho128_i1 = (int16x8_t *) &rho_i[1][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp128_i0 = (int16x8_t *)&rxdataF_comp_i[0][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp128_i1 = (int16x8_t *)&rxdataF_comp_i[1][symbol*frame_parms->N_RB_DL*12]; + + dl_ch_mag128_i0 = (int16x8_t *)&dl_ch_mag_i[0][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_i1 = (int16x8_t *)&dl_ch_mag_i[1][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_i0b = (int16x8_t *)&dl_ch_magb_i[0][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_i1b = (int16x8_t *)&dl_ch_magb_i[1][symbol*frame_parms->N_RB_DL*12]; + + for (i=0; i<nb_rb*3; i++) { + rxdataF_comp128_i0[i] = vhaddq_s16(rxdataF_comp128_i0[i],rxdataF_comp128_i1[i]); + rho128_i0[i] = vhaddq_s16(rho128_i0[i],rho128_i1[i]); + + dl_ch_mag128_i0[i] = vhaddq_s16(dl_ch_mag128_i0[i],dl_ch_mag128_i1[i]); + dl_ch_mag128_i0b[i] = vhaddq_s16(dl_ch_mag128_i0b[i],dl_ch_mag128_i1b[i]); + } + } + } + +#endif +} + + +void dlsch_detection_mrc_TM34(LTE_DL_FRAME_PARMS *frame_parms, + LTE_UE_PDSCH *pdsch_vars, + int harq_pid, + int round, + unsigned char symbol, + unsigned short nb_rb, + unsigned char dual_stream_UE) { + + int i; + __m128i *rxdataF_comp128_0,*rxdataF_comp128_1,*rxdataF_comp128_i0,*rxdataF_comp128_i1,*dl_ch_mag128_0,*dl_ch_mag128_1,*dl_ch_mag128_0b,*dl_ch_mag128_1b,*rho128_0,*rho128_1,*rho128_i0,*rho128_i1,*dl_ch_mag128_i0,*dl_ch_mag128_i1,*dl_ch_mag128_i0b,*dl_ch_mag128_i1b; + + int **rxdataF_comp0 = pdsch_vars->rxdataF_comp0; + int **rxdataF_comp1 = pdsch_vars->rxdataF_comp1[harq_pid][round]; + int **dl_ch_rho_ext = pdsch_vars->dl_ch_rho_ext[harq_pid][round]; //for second stream + int **dl_ch_rho2_ext = pdsch_vars->dl_ch_rho2_ext; + int **dl_ch_mag0 = pdsch_vars->dl_ch_mag0; + int **dl_ch_mag1 = pdsch_vars->dl_ch_mag1[harq_pid][round]; + int **dl_ch_magb0 = pdsch_vars->dl_ch_magb0; + int **dl_ch_magb1 = pdsch_vars->dl_ch_magb1[harq_pid][round]; + + if (frame_parms->nb_antennas_rx>1) { + + rxdataF_comp128_0 = (__m128i *)&rxdataF_comp0[0][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp128_1 = (__m128i *)&rxdataF_comp0[1][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_0 = (__m128i *)&dl_ch_mag0[0][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_1 = (__m128i *)&dl_ch_mag0[1][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_0b = (__m128i *)&dl_ch_magb0[0][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_1b = (__m128i *)&dl_ch_magb0[1][symbol*frame_parms->N_RB_DL*12]; + + // MRC on each re of rb, both on MF output and magnitude (for 16QAM/64QAM llr computation) + for (i=0;i<nb_rb*3;i++) { + rxdataF_comp128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_0[i],1),_mm_srai_epi16(rxdataF_comp128_1[i],1)); + dl_ch_mag128_0[i] = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_0[i],1),_mm_srai_epi16(dl_ch_mag128_1[i],1)); + dl_ch_mag128_0b[i] = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_0b[i],1),_mm_srai_epi16(dl_ch_mag128_1b[i],1)); + + // print_shorts("mrc compens0:",&rxdataF_comp128_0[i]); + // print_shorts("mrc mag128_0:",&dl_ch_mag128_0[i]); + // print_shorts("mrc mag128_0b:",&dl_ch_mag128_0b[i]); + } } + + // if (rho) { + rho128_0 = (__m128i *) &dl_ch_rho2_ext[0][symbol*frame_parms->N_RB_DL*12]; + rho128_1 = (__m128i *) &dl_ch_rho2_ext[1][symbol*frame_parms->N_RB_DL*12]; + for (i=0;i<nb_rb*3;i++) { + // print_shorts("mrc rho0:",&rho128_0[i]); + // print_shorts("mrc rho1:",&rho128_1[i]); + rho128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rho128_0[i],1),_mm_srai_epi16(rho128_1[i],1)); + } + //} + + + if (dual_stream_UE == 1) { + rho128_i0 = (__m128i *) &dl_ch_rho_ext[0][symbol*frame_parms->N_RB_DL*12]; + rho128_i1 = (__m128i *) &dl_ch_rho_ext[1][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp128_i0 = (__m128i *)&rxdataF_comp1[0][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp128_i1 = (__m128i *)&rxdataF_comp1[1][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_i0 = (__m128i *)&dl_ch_mag1[0][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_i1 = (__m128i *)&dl_ch_mag1[1][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_i0b = (__m128i *)&dl_ch_magb1[0][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128_i1b = (__m128i *)&dl_ch_magb1[1][symbol*frame_parms->N_RB_DL*12]; + for (i=0;i<nb_rb*3;i++) { + rxdataF_comp128_i0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_i0[i],1),_mm_srai_epi16(rxdataF_comp128_i1[i],1)); + rho128_i0[i] = _mm_adds_epi16(_mm_srai_epi16(rho128_i0[i],1),_mm_srai_epi16(rho128_i1[i],1)); + + dl_ch_mag128_i0[i] = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_i0[i],1),_mm_srai_epi16(dl_ch_mag128_i1[i],1)); + dl_ch_mag128_i0b[i] = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_i0b[i],1),_mm_srai_epi16(dl_ch_mag128_i1b[i],1)); + + //print_shorts("mrc compens1:",&rxdataF_comp128_i0[i]); + //print_shorts("mrc mag128_i0:",&dl_ch_mag128_i0[i]); + //print_shorts("mrc mag128_i0b:",&dl_ch_mag128_i0b[i]); + } + } + + + _mm_empty(); + _m_empty(); +} + + + +void dlsch_scale_channel(int **dl_ch_estimates_ext, + LTE_DL_FRAME_PARMS *frame_parms, + LTE_UE_DLSCH_t **dlsch_ue, + uint8_t symbol, + unsigned short nb_rb) +{ + +#if defined(__x86_64__)||defined(__i386__) + + short rb, ch_amp; + unsigned char aatx,aarx,pilots=0,symbol_mod; + __m128i *dl_ch128, ch_amp128; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) { + if (frame_parms->nb_antenna_ports_eNB==1) // 10 out of 12 so don't reduce size + nb_rb=1+(5*nb_rb/6); + else + pilots=1; + } + + // Determine scaling amplitude based the symbol + + ch_amp = ((pilots) ? (dlsch_ue[0]->sqrt_rho_b) : (dlsch_ue[0]->sqrt_rho_a)); + + LOG_D(PHY,"Scaling PDSCH Chest in OFDM symbol %d by %d, pilots %d nb_rb %d NCP %d symbol %d\n",symbol_mod,ch_amp,pilots,nb_rb,frame_parms->Ncp,symbol); + // printf("Scaling PDSCH Chest in OFDM symbol %d by %d\n",symbol_mod,ch_amp); + + ch_amp128 = _mm_set1_epi16(ch_amp); // Q3.13 + + for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++) { + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + + dl_ch128=(__m128i *)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + + for (rb=0;rb<nb_rb;rb++) { + + dl_ch128[0] = _mm_mulhi_epi16(dl_ch128[0],ch_amp128); + dl_ch128[0] = _mm_slli_epi16(dl_ch128[0],3); + + dl_ch128[1] = _mm_mulhi_epi16(dl_ch128[1],ch_amp128); + dl_ch128[1] = _mm_slli_epi16(dl_ch128[1],3); + + if (pilots) { + dl_ch128+=2; + } else { + dl_ch128[2] = _mm_mulhi_epi16(dl_ch128[2],ch_amp128); + dl_ch128[2] = _mm_slli_epi16(dl_ch128[2],3); + dl_ch128+=3; + + } + } + } + } + +#elif defined(__arm__) + +#endif +} + + +//compute average channel_level on each (TX,RX) antenna pair +void dlsch_channel_level(int **dl_ch_estimates_ext, + LTE_DL_FRAME_PARMS *frame_parms, + int32_t *avg, + uint8_t symbol, + unsigned short nb_rb) +{ + +#if defined(__x86_64__)||defined(__i386__) + + short rb; + unsigned char aatx,aarx,nre=12,symbol_mod; + __m128i *dl_ch128, avg128D; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) + nre=8; + else if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB==1)) + nre=10; + else + nre=12; + + //nb_rb*nre = y * 2^x + int16_t x = factor2(nb_rb*nre); + int16_t y = (nb_rb*nre)>>x; + //printf("nb_rb*nre = %d = %d * 2^(%d)\n",nb_rb*nre,y,x); + + for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++) + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + //clear average level + avg128D = _mm_setzero_si128(); + // 5 is always a symbol with no pilots for both normal and extended prefix + + dl_ch128=(__m128i *)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + + for (rb=0;rb<nb_rb;rb++) { + // printf("rb %d : ",rb); + // print_shorts("ch",&dl_ch128[0]); + avg128D = _mm_add_epi32(avg128D,_mm_srai_epi16(_mm_madd_epi16(dl_ch128[0],dl_ch128[0]),x)); + avg128D = _mm_add_epi32(avg128D,_mm_srai_epi16(_mm_madd_epi16(dl_ch128[1],dl_ch128[1]),x)); + + //avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch128[0],_mm_srai_epi16(_mm_mulhi_epi16(dl_ch128[0], coeff128),15))); + //avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch128[1],_mm_srai_epi16(_mm_mulhi_epi16(dl_ch128[1], coeff128),15))); + + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) { + dl_ch128+=2; + } + else { + avg128D = _mm_add_epi32(avg128D,_mm_srai_epi16(_mm_madd_epi16(dl_ch128[2],dl_ch128[2]),x)); + //avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch128[2],_mm_srai_epi16(_mm_mulhi_epi16(dl_ch128[2], coeff128),15))); + dl_ch128+=3; + } + /* + if (rb==0) { + print_shorts("dl_ch128",&dl_ch128[0]); + print_shorts("dl_ch128",&dl_ch128[1]); + print_shorts("dl_ch128",&dl_ch128[2]); + } + */ + } + + avg[(aatx<<1)+aarx] =(((int32_t*)&avg128D)[0] + + ((int32_t*)&avg128D)[1] + + ((int32_t*)&avg128D)[2] + + ((int32_t*)&avg128D)[3])/y; + // printf("Channel level : %d\n",avg[(aatx<<1)+aarx]); + } + + _mm_empty(); + _m_empty(); + +#elif defined(__arm__) + + short rb; + unsigned char aatx,aarx,nre=12,symbol_mod; + int32x4_t avg128D; + int16x4_t *dl_ch128; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++) + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + //clear average level + avg128D = vdupq_n_s32(0); + // 5 is always a symbol with no pilots for both normal and extended prefix + + dl_ch128=(int16x4_t *)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + + for (rb=0; rb<nb_rb; rb++) { + // printf("rb %d : ",rb); + // print_shorts("ch",&dl_ch128[0]); + avg128D = vqaddq_s32(avg128D,vmull_s16(dl_ch128[0],dl_ch128[0])); + avg128D = vqaddq_s32(avg128D,vmull_s16(dl_ch128[1],dl_ch128[1])); + avg128D = vqaddq_s32(avg128D,vmull_s16(dl_ch128[2],dl_ch128[2])); + avg128D = vqaddq_s32(avg128D,vmull_s16(dl_ch128[3],dl_ch128[3])); + + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) { + dl_ch128+=4; + } else { + avg128D = vqaddq_s32(avg128D,vmull_s16(dl_ch128[4],dl_ch128[4])); + avg128D = vqaddq_s32(avg128D,vmull_s16(dl_ch128[5],dl_ch128[5])); + dl_ch128+=6; + } + + /* + if (rb==0) { + print_shorts("dl_ch128",&dl_ch128[0]); + print_shorts("dl_ch128",&dl_ch128[1]); + print_shorts("dl_ch128",&dl_ch128[2]); + } + */ + } + + + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) + nre=8; + else if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB==1)) + nre=10; + else + nre=12; + + avg[(aatx<<1)+aarx] = (((int32_t*)&avg128D)[0] + + ((int32_t*)&avg128D)[1] + + ((int32_t*)&avg128D)[2] + + ((int32_t*)&avg128D)[3])/(nb_rb*nre); + + // printf("Channel level : %d\n",avg[(aatx<<1)+aarx]); + } + + +#endif +} + +//compute average channel_level of effective (precoded) channel + +//compute average channel_level of effective (precoded) channel +void dlsch_channel_level_TM34(int **dl_ch_estimates_ext, + LTE_DL_FRAME_PARMS *frame_parms, + unsigned char *pmi_ext, + int *avg_0, + int *avg_1, + uint8_t symbol, + unsigned short nb_rb, + MIMO_mode_t mimo_mode){ + +#if defined(__x86_64__)||defined(__i386__) + + + short rb; + unsigned char aarx,nre=12,symbol_mod; + __m128i *dl_ch0_128,*dl_ch1_128, dl_ch0_128_tmp, dl_ch1_128_tmp, avg_0_128D, avg_1_128D; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + //clear average level + // avg_0_128D = _mm_setzero_si128(); + // avg_1_128D = _mm_setzero_si128(); + avg_0[0] = 0; + avg_0[1] = 0; + avg_1[0] = 0; + avg_1[1] = 0; + // 5 is always a symbol with no pilots for both normal and extended prefix + + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) + nre=8; + else if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB==1)) + nre=10; + else + nre=12; + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + dl_ch0_128 = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch1_128 = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; + + avg_0_128D = _mm_setzero_si128(); + avg_1_128D = _mm_setzero_si128(); + for (rb=0; rb<nb_rb; rb++) { + // printf("rb %d : \n",rb); + // print_shorts("ch0\n",&dl_ch0_128[0]); + //print_shorts("ch1\n",&dl_ch1_128[0]); + dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[0]); + dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[0]); + + if (mimo_mode==LARGE_CDD) + prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) + prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) + prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING) + prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp); + + // mmtmpD0 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp); + avg_0_128D = _mm_add_epi32(avg_0_128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp)); + + avg_1_128D = _mm_add_epi32(avg_1_128D,_mm_madd_epi16(dl_ch1_128_tmp,dl_ch1_128_tmp)); + + dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[1]); + dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[1]); + + if (mimo_mode==LARGE_CDD) + prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) + prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) + prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING) + prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp); + + // mmtmpD1 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp); + avg_0_128D = _mm_add_epi32(avg_0_128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp)); + + avg_1_128D = _mm_add_epi32(avg_1_128D,_mm_madd_epi16(dl_ch1_128_tmp,dl_ch1_128_tmp)); + + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) { + dl_ch0_128+=2; + dl_ch1_128+=2; + } + else { + dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[2]); + dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[2]); + + if (mimo_mode==LARGE_CDD) + prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) + prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) + prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING) + prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp); + // mmtmpD2 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp); + + avg_1_128D = _mm_add_epi32(avg_1_128D,_mm_madd_epi16(dl_ch1_128_tmp,dl_ch1_128_tmp)); + avg_0_128D = _mm_add_epi32(avg_0_128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp)); + + dl_ch0_128+=3; + dl_ch1_128+=3; + } + } + + + avg_0[aarx] = (((int*)&avg_0_128D)[0])/(nb_rb*nre) + + (((int*)&avg_0_128D)[1])/(nb_rb*nre) + + (((int*)&avg_0_128D)[2])/(nb_rb*nre) + + (((int*)&avg_0_128D)[3])/(nb_rb*nre); + // printf("From Chan_level aver stream 0 %d =%d\n", aarx, avg_0[aarx]); + + avg_1[aarx] = (((int*)&avg_1_128D)[0])/(nb_rb*nre) + + (((int*)&avg_1_128D)[1])/(nb_rb*nre) + + (((int*)&avg_1_128D)[2])/(nb_rb*nre) + + (((int*)&avg_1_128D)[3])/(nb_rb*nre); + // printf("From Chan_level aver stream 1 %d =%d\n", aarx, avg_1[aarx]); + } +//avg_0[0] = max(avg_0[0],avg_0[1]); +//avg_1[0] = max(avg_1[0],avg_1[1]); +//avg_0[0]= max(avg_0[0], avg_1[0]); + + avg_0[0] = avg_0[0] + avg_0[1]; + // printf("From Chan_level aver stream 0 final =%d\n", avg_0[0]); + avg_1[0] = avg_1[0] + avg_1[1]; + // printf("From Chan_level aver stream 1 final =%d\n", avg_1[0]); + avg_0[0] = min (avg_0[0], avg_1[0]); + avg_1[0] = avg_0[0]; + + _mm_empty(); + _m_empty(); + +#elif defined(__arm__) + +#endif +} + + + +/*void dlsch_channel_level_TM34(int **dl_ch_estimates_ext, + LTE_DL_FRAME_PARMS *frame_parms, + int *avg, + uint8_t symbol, + unsigned short nb_rb, + MIMO_mode_t mimo_mode){ + +#if defined(__x86_64__)||defined(__i386__) + + + short rb; + unsigned char aarx,nre=12,symbol_mod; + __m128i *dl_ch0_128,*dl_ch1_128, dl_ch0_128_tmp, dl_ch1_128_tmp,avg128D; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + //clear average level + avg128D = _mm_setzero_si128(); + avg[0] = 0; + avg[1] = 0; + // 5 is always a symbol with no pilots for both normal and extended prefix + + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) + nre=8; + else if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB==1)) + nre=10; + else + nre=12; + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + dl_ch0_128 = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch1_128 = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; + + for (rb=0; rb<nb_rb; rb++) { + + dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[0]); + dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[0]); + + if (mimo_mode==LARGE_CDD) + prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) + prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) + prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + + // mmtmpD0 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp); + avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp)); + + dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[1]); + dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[1]); + + if (mimo_mode==LARGE_CDD) + prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) + prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) + prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + + // mmtmpD1 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp); + avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp)); + + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) { + dl_ch0_128+=2; + dl_ch1_128+=2; + } + else { + dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[2]); + dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[2]); + + if (mimo_mode==LARGE_CDD) + prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) + prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) + prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + + // mmtmpD2 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp); + avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp)); + + dl_ch0_128+=3; + dl_ch1_128+=3; + } + } + + avg[aarx] = (((int*)&avg128D)[0])/(nb_rb*nre) + + (((int*)&avg128D)[1])/(nb_rb*nre) + + (((int*)&avg128D)[2])/(nb_rb*nre) + + (((int*)&avg128D)[3])/(nb_rb*nre); + } + + // choose maximum of the 2 effective channels + avg[0] = cmax(avg[0],avg[1]); + + _mm_empty(); + _m_empty(); + +#elif defined(__arm__) + +#endif +}*/ + +//compute average channel_level of effective (precoded) channel +void dlsch_channel_level_TM56(int **dl_ch_estimates_ext, + LTE_DL_FRAME_PARMS *frame_parms, + unsigned char *pmi_ext, + int *avg, + uint8_t symbol, + unsigned short nb_rb) +{ + +#if defined(__x86_64__)||defined(__i386__) + + short rb; + unsigned char aarx,nre=12,symbol_mod; + __m128i *dl_ch0_128,*dl_ch1_128, dl_ch0_128_tmp, dl_ch1_128_tmp,avg128D; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + //clear average level + avg128D = _mm_setzero_si128(); + avg[0] = 0; + avg[1] = 0; + // 5 is always a symbol with no pilots for both normal and extended prefix + + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) + nre=8; + else if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB==1)) + nre=10; + else + nre=12; + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + dl_ch0_128 = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch1_128 = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; + + for (rb=0; rb<nb_rb; rb++) { + + dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[0]); + dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[0]); + + prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp); + // mmtmpD0 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp); + avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp)); + + dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[1]); + dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[1]); + + prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp); + // mmtmpD1 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp); + avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp)); + + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) { + dl_ch0_128+=2; + dl_ch1_128+=2; + } + else { + dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[2]); + dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[2]); + + prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp); + // mmtmpD2 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp); + avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp)); + + dl_ch0_128+=3; + dl_ch1_128+=3; + } + } + + avg[aarx] = (((int*)&avg128D)[0])/(nb_rb*nre) + + (((int*)&avg128D)[1])/(nb_rb*nre) + + (((int*)&avg128D)[2])/(nb_rb*nre) + + (((int*)&avg128D)[3])/(nb_rb*nre); + } + + // choose maximum of the 2 effective channels + avg[0] = cmax(avg[0],avg[1]); + + _mm_empty(); + _m_empty(); + +#elif defined(__arm__) + + +#endif +} + +//compute average channel_level for TM7 +void dlsch_channel_level_TM7(int **dl_bf_ch_estimates_ext, + LTE_DL_FRAME_PARMS *frame_parms, + int *avg, + uint8_t symbol, + unsigned short nb_rb) +{ + +#if defined(__x86_64__)||defined(__i386__) + + short rb; + unsigned char aatx,aarx,nre=12,symbol_mod; + __m128i *dl_ch128,avg128D; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++) + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + //clear average level + avg128D = _mm_setzero_si128(); + // 5 is always a symbol with no pilots for both normal and extended prefix + + dl_ch128=(__m128i *)&dl_bf_ch_estimates_ext[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + + for (rb=0; rb<nb_rb; rb++) { + // printf("rb %d : ",rb); + // print_shorts("ch",&dl_ch128[0]); + avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch128[0],dl_ch128[0])); + avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch128[1],dl_ch128[1])); + + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) { + dl_ch128+=2; + } else { + avg128D = _mm_add_epi32(avg128D,_mm_madd_epi16(dl_ch128[2],dl_ch128[2])); + dl_ch128+=3; + } + + /* + if (rb==0) { + print_shorts("dl_ch128",&dl_ch128[0]); + print_shorts("dl_ch128",&dl_ch128[1]); + print_shorts("dl_ch128",&dl_ch128[2]); + } + */ + } + + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))) + nre=10; + else if ((frame_parms->Ncp==0) && (symbol==3 || symbol==6 || symbol==9 || symbol==12)) + nre=9; + else if ((frame_parms->Ncp==1) && (symbol==4 || symbol==7 || symbol==9)) + nre=8; + else + nre=12; + + avg[(aatx<<1)+aarx] = (((int*)&avg128D)[0] + + ((int*)&avg128D)[1] + + ((int*)&avg128D)[2] + + ((int*)&avg128D)[3])/(nb_rb*nre); + + // printf("Channel level : %d\n",avg[(aatx<<1)+aarx]); + } + + _mm_empty(); + _m_empty(); + +#elif defined(__arm__) + +#endif +} +//#define ONE_OVER_2_Q15 16384 +void dlsch_alamouti(LTE_DL_FRAME_PARMS *frame_parms, + int **rxdataF_comp, + int **dl_ch_mag, + int **dl_ch_magb, + unsigned char symbol, + unsigned short nb_rb) +{ + +#if defined(__x86_64__)||defined(__i386__) + + short *rxF0,*rxF1; + __m128i *ch_mag0,*ch_mag1,*ch_mag0b,*ch_mag1b, *rxF0_128; + unsigned char rb,re; + int jj = (symbol*frame_parms->N_RB_DL*12); + uint8_t symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + uint8_t pilots = ((symbol_mod==0)||(symbol_mod==(4-frame_parms->Ncp))) ? 1 : 0; + rxF0_128 = (__m128i*) &rxdataF_comp[0][jj]; + + //amp = _mm_set1_epi16(ONE_OVER_2_Q15); + + + // printf("Doing alamouti!\n"); + rxF0 = (short*)&rxdataF_comp[0][jj]; //tx antenna 0 h0*y + rxF1 = (short*)&rxdataF_comp[2][jj]; //tx antenna 1 h1*y + ch_mag0 = (__m128i *)&dl_ch_mag[0][jj]; + ch_mag1 = (__m128i *)&dl_ch_mag[2][jj]; + ch_mag0b = (__m128i *)&dl_ch_magb[0][jj]; + ch_mag1b = (__m128i *)&dl_ch_magb[2][jj]; + + for (rb=0; rb<nb_rb; rb++) { + + for (re=0; re<((pilots==0)?12:8); re+=2) { + + // Alamouti RX combining + + // printf("Alamouti: symbol %d, rb %d, re %d: rxF0 (%d,%d,%d,%d), rxF1 (%d,%d,%d,%d)\n",symbol,rb,re,rxF0[0],rxF0[1],rxF0[2],rxF0[3],rxF1[0],rxF1[1],rxF1[2],rxF1[3]); + rxF0[0] = rxF0[0] + rxF1[2]; + rxF0[1] = rxF0[1] - rxF1[3]; + + rxF0[2] = rxF0[2] - rxF1[0]; + rxF0[3] = rxF0[3] + rxF1[1]; + + // printf("Alamouti: rxF0 after (%d,%d,%d,%d)\n",rxF0[0],rxF0[1],rxF0[2],rxF0[3]); + rxF0+=4; + rxF1+=4; + + } + + // compute levels for 16QAM or 64 QAM llr unit + ch_mag0[0] = _mm_adds_epi16(ch_mag0[0],ch_mag1[0]); + ch_mag0[1] = _mm_adds_epi16(ch_mag0[1],ch_mag1[1]); + + ch_mag0b[0] = _mm_adds_epi16(ch_mag0b[0],ch_mag1b[0]); + ch_mag0b[1] = _mm_adds_epi16(ch_mag0b[1],ch_mag1b[1]); + + // account for 1/sqrt(2) scaling at transmission + //ch_mag0[0] = _mm_srai_epi16(ch_mag0[0],1); + //ch_mag0[1] = _mm_srai_epi16(ch_mag0[1],1); + //ch_mag0b[0] = _mm_srai_epi16(ch_mag0b[0],1); + //ch_mag0b[1] = _mm_srai_epi16(ch_mag0b[1],1); + + //rxF0_128[0] = _mm_mulhi_epi16(rxF0_128[0],amp); + //rxF0_128[0] = _mm_slli_epi16(rxF0_128[0],1); + //rxF0_128[1] = _mm_mulhi_epi16(rxF0_128[1],amp); + //rxF0_128[1] = _mm_slli_epi16(rxF0_128[1],1); + + //rxF0_128[0] = _mm_srai_epi16(rxF0_128[0],1); + //rxF0_128[1] = _mm_srai_epi16(rxF0_128[1],1); + + + + if (pilots==0) { + ch_mag0[2] = _mm_adds_epi16(ch_mag0[2],ch_mag1[2]); + ch_mag0b[2] = _mm_adds_epi16(ch_mag0b[2],ch_mag1b[2]); + + //ch_mag0[2] = _mm_srai_epi16(ch_mag0[2],1); + //ch_mag0b[2] = _mm_srai_epi16(ch_mag0b[2],1); + + //rxF0_128[2] = _mm_mulhi_epi16(rxF0_128[2],amp); + //rxF0_128[2] = _mm_slli_epi16(rxF0_128[2],1); + + //rxF0_128[2] = _mm_srai_epi16(rxF0_128[2],1); + + + ch_mag0+=3; + ch_mag1+=3; + ch_mag0b+=3; + ch_mag1b+=3; + rxF0_128+=3; + } else { + ch_mag0+=2; + ch_mag1+=2; + ch_mag0b+=2; + ch_mag1b+=2; + rxF0_128+=2; + } + } + + _mm_empty(); + _m_empty(); + +#elif defined(__arm__) + +#endif +} + + +//============================================================================================== +// Extraction functions +//============================================================================================== + +unsigned short dlsch_extract_rbs_single(int **rxdataF, + int **dl_ch_estimates, + int **rxdataF_ext, + int **dl_ch_estimates_ext, + unsigned short pmi, + unsigned char *pmi_ext, + unsigned int *rb_alloc, + unsigned char symbol, + unsigned char subframe, + uint32_t high_speed_flag, + LTE_DL_FRAME_PARMS *frame_parms) { + + + + unsigned short rb,nb_rb=0; + unsigned char rb_alloc_ind; + unsigned char i,aarx,l,nsymb,skip_half=0,sss_symb,pss_symb=0; + int *dl_ch0,*dl_ch0_ext,*rxF,*rxF_ext; + + + + unsigned char symbol_mod,pilots=0,j=0,poffset=0; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + pilots = ((symbol_mod==0)||(symbol_mod==(4-frame_parms->Ncp))) ? 1 : 0; + l=symbol; + nsymb = (frame_parms->Ncp==NORMAL) ? 14:12; + + if (frame_parms->frame_type == TDD) { // TDD + sss_symb = nsymb-1; + pss_symb = 2; + } else { + sss_symb = (nsymb>>1)-2; + pss_symb = (nsymb>>1)-1; + } + + if (symbol_mod==(4-frame_parms->Ncp)) + poffset=3; + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + + if (high_speed_flag == 1) + dl_ch0 = &dl_ch_estimates[aarx][5+(symbol*(frame_parms->ofdm_symbol_size))]; + else + dl_ch0 = &dl_ch_estimates[aarx][5]; + + dl_ch0_ext = &dl_ch_estimates_ext[aarx][symbol*(frame_parms->N_RB_DL*12)]; + + rxF_ext = &rxdataF_ext[aarx][symbol*(frame_parms->N_RB_DL*12)]; + rxF = &rxdataF[aarx][(frame_parms->first_carrier_offset + (symbol*(frame_parms->ofdm_symbol_size)))]; + + if ((frame_parms->N_RB_DL&1) == 0) // even number of RBs + + for (rb=0;rb<frame_parms->N_RB_DL;rb++) { + + if (rb < 32) + rb_alloc_ind = (rb_alloc[0]>>rb) & 1; + else if (rb < 64) + rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1; + else if (rb < 96) + rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1; + else if (rb < 100) + rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1; + else + rb_alloc_ind = 0; + + if (rb_alloc_ind == 1) + nb_rb++; + + // For second half of RBs skip DC carrier + if (rb==(frame_parms->N_RB_DL>>1)) { + rxF = &rxdataF[aarx][(1 + (symbol*(frame_parms->ofdm_symbol_size)))]; + //dl_ch0++; + } + + // PBCH + if ((subframe==0) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l>=nsymb>>1) && (l<((nsymb>>1) + 4))) { + rb_alloc_ind = 0; + } + + //SSS + if (((subframe==0)||(subframe==5)) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==sss_symb) ) { + rb_alloc_ind = 0; + } + + + if (frame_parms->frame_type == FDD) { + //PSS + if (((subframe==0)||(subframe==5)) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) { + rb_alloc_ind = 0; + } + } + + if ((frame_parms->frame_type == TDD) && + (subframe==6)) { //TDD Subframe 6 + if ((rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) { + rb_alloc_ind = 0; + } + } + + if (rb_alloc_ind==1) { + *pmi_ext = (pmi>>((rb>>2)<<1))&3; + memcpy(dl_ch0_ext,dl_ch0,12*sizeof(int)); + + /* + printf("rb %d\n",rb); + for (i=0;i<12;i++) + printf("(%d %d)",((short *)dl_ch0)[i<<1],((short*)dl_ch0)[1+(i<<1)]); + printf("\n"); + */ + if (pilots==0) { + for (i=0; i<12; i++) { + rxF_ext[i]=rxF[i]; + /* + printf("%d : (%d,%d)\n",(rxF+i-&rxdataF[aarx][( (symbol*(frame_parms->ofdm_symbol_size)))]), + ((short*)&rxF[i])[0],((short*)&rxF[i])[1]);*/ + } + + dl_ch0_ext+=12; + rxF_ext+=12; + } else { + j=0; + + for (i=0; i<12; i++) { + if ((i!=(frame_parms->nushift+poffset)) && + (i!=((frame_parms->nushift+poffset+6)%12))) { + rxF_ext[j]=rxF[i]; + // printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j])); + dl_ch0_ext[j++]=dl_ch0[i]; + + } + } + + dl_ch0_ext+=10; + rxF_ext+=10; + } + + + } + + dl_ch0+=12; + rxF+=12; + + } + else { // Odd number of RBs + for (rb=0; rb<frame_parms->N_RB_DL>>1; rb++) { +#ifdef DEBUG_DLSCH_DEMOD + printf("dlch_ext %d\n",dl_ch0_ext-&dl_ch_estimates_ext[aarx][0]); +#endif + skip_half=0; + + if (rb < 32) + rb_alloc_ind = (rb_alloc[0]>>rb) & 1; + else if (rb < 64) + rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1; + else if (rb < 96) + rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1; + else if (rb < 100) + rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1; + else + rb_alloc_ind = 0; + + if (rb_alloc_ind == 1) + nb_rb++; + + + // PBCH + if ((subframe==0) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4))) { + rb_alloc_ind = 0; + } + + //PBCH subframe 0, symbols nsymb>>1 ... nsymb>>1 + 3 + if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4))) + skip_half=1; + else if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4))) + skip_half=2; + + //SSS + + if (((subframe==0)||(subframe==5)) && + (rb>((frame_parms->N_RB_DL>>1)-3)) && + (rb<((frame_parms->N_RB_DL>>1)+3)) && + (l==sss_symb) ) { + rb_alloc_ind = 0; + } + //SSS + if (((subframe==0)||(subframe==5)) && + (rb==((frame_parms->N_RB_DL>>1)-3)) && + (l==sss_symb)) + skip_half=1; + else if (((subframe==0)||(subframe==5)) && + (rb==((frame_parms->N_RB_DL>>1)+3)) && + (l==sss_symb)) + skip_half=2; + + //PSS in subframe 0/5 if FDD + if (frame_parms->frame_type == FDD) { //FDD + + if (((subframe==0)||(subframe==5)) && + (rb>((frame_parms->N_RB_DL>>1)-3)) && + (rb<((frame_parms->N_RB_DL>>1)+3)) && + (l==pss_symb) ) { + rb_alloc_ind = 0; + } + + if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb)) + skip_half=1; + else if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb)) + skip_half=2; + } + + if ((frame_parms->frame_type == TDD) && + (subframe==6)){ //TDD Subframe 6 + if ((rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) { + rb_alloc_ind = 0; + } + if ((rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb)) + skip_half=1; + else if ((rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb)) + skip_half=2; + } + + + if (rb_alloc_ind==1) { + +#ifdef DEBUG_DLSCH_DEMOD + printf("rb %d/symbol %d (skip_half %d)\n",rb,l,skip_half); +#endif + if (pilots==0) { + // printf("Extracting w/o pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half); + if (skip_half==1) { + memcpy(dl_ch0_ext,dl_ch0,6*sizeof(int)); + + for (i=0; i<6; i++) { + rxF_ext[i]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + dl_ch0_ext+=6; + rxF_ext+=6; + } else if (skip_half==2) { + memcpy(dl_ch0_ext,dl_ch0+6,6*sizeof(int)); + + for (i=0; i<6; i++) { + rxF_ext[i]=rxF[(i+6)]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + dl_ch0_ext+=6; + rxF_ext+=6; + } else { + memcpy(dl_ch0_ext,dl_ch0,12*sizeof(int)); + + for (i=0; i<12; i++) { + rxF_ext[i]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + dl_ch0_ext+=12; + rxF_ext+=12; + } + } else { + // printf("Extracting with pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half); + j=0; + + if (skip_half==1) { + for (i=0; i<6; i++) { + if (i!=((frame_parms->nushift+poffset)%6)) { + rxF_ext[j]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j])); +#endif + dl_ch0_ext[j++]=dl_ch0[i]; + } + } + rxF_ext+=5; + dl_ch0_ext+=5; + } else if (skip_half==2) { + for (i=0; i<6; i++) { + if (i!=((frame_parms->nushift+poffset)%6)) { + rxF_ext[j]=rxF[(i+6)]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j])); +#endif + dl_ch0_ext[j++]=dl_ch0[i+6]; + } + } + + dl_ch0_ext+=5; + rxF_ext+=5; + } else { + for (i=0; i<12; i++) { + if ((i!=(frame_parms->nushift+poffset)) && + (i!=((frame_parms->nushift+poffset+6)%12))) { + rxF_ext[j]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j])); +#endif + dl_ch0_ext[j++]=dl_ch0[i]; + + } + } + + dl_ch0_ext+=10; + rxF_ext+=10; + } + } + } + dl_ch0+=12; + rxF+=12; + } // first half loop + + + // Do middle RB (around DC) + if (rb < 32) + rb_alloc_ind = (rb_alloc[0]>>rb) & 1; + else if (rb < 64) + rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1; + else if (rb < 96) + rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1; + else if (rb < 100) + rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1; + else + rb_alloc_ind = 0; + + + if (rb_alloc_ind == 1) + nb_rb++; + + // PBCH + + if ((subframe==0) && + (l>=(nsymb>>1)) && + (l<((nsymb>>1) + 4))) { + rb_alloc_ind = 0; + } + + //SSS + if (((subframe==0)||(subframe==5)) && (l==sss_symb) ) { + rb_alloc_ind = 0; + } + + if (frame_parms->frame_type == FDD) { + //PSS + if (((subframe==0)||(subframe==5)) && (l==pss_symb) ) { + rb_alloc_ind = 0; + } + } + + //PSS + if ((frame_parms->frame_type == TDD) && + (subframe==6) && + (l==pss_symb) ) { + rb_alloc_ind = 0; + } + + + // printf("dlch_ext %d\n",dl_ch0_ext-&dl_ch_estimates_ext[aarx][0]); + // printf("DC rb %d (%p)\n",rb,rxF); + if (rb_alloc_ind==1) { +#ifdef DEBUG_DLSCH_DEMOD + printf("rb %d/symbol %d (skip_half %d)\n",rb,l,skip_half); +#endif + if (pilots==0) { + for (i=0; i<6; i++) { + dl_ch0_ext[i]=dl_ch0[i]; + rxF_ext[i]=rxF[i]; + } + + rxF = &rxdataF[aarx][((symbol*(frame_parms->ofdm_symbol_size)))]; + + for (; i<12; i++) { + dl_ch0_ext[i]=dl_ch0[i]; + rxF_ext[i]=rxF[(1+i-6)]; + } + + dl_ch0_ext+=12; + rxF_ext+=12; + } else { // pilots==1 + j=0; + + for (i=0; i<6; i++) { + if (i!=((frame_parms->nushift+poffset)%6)) { + dl_ch0_ext[j]=dl_ch0[i]; + rxF_ext[j++]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("**extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j-1],*(1+(short*)&rxF_ext[j-1])); +#endif + } + } + + rxF = &rxdataF[aarx][((symbol*(frame_parms->ofdm_symbol_size)))]; + + for (; i<12; i++) { + if (i!=((frame_parms->nushift+6+poffset)%12)) { + dl_ch0_ext[j]=dl_ch0[i]; + rxF_ext[j++]=rxF[(1+i-6)]; +#ifdef DEBUG_DLSCH_DEMOD + printf("**extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j-1],*(1+(short*)&rxF_ext[j-1])); +#endif + } + } + + dl_ch0_ext+=10; + rxF_ext+=10; + } // symbol_mod==0 + } // rballoc==1 + else { + rxF = &rxdataF[aarx][((symbol*(frame_parms->ofdm_symbol_size)))]; + } + + dl_ch0+=12; + rxF+=7; + rb++; + + for (;rb<frame_parms->N_RB_DL;rb++) { + // printf("dlch_ext %d\n",dl_ch0_ext-&dl_ch_estimates_ext[aarx][0]); + // printf("rb %d (%p)\n",rb,rxF); + skip_half=0; + + if (rb < 32) + rb_alloc_ind = (rb_alloc[0]>>rb) & 1; + else if (rb < 64) + rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1; + else if (rb < 96) + rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1; + else if (rb < 100) + rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1; + else + rb_alloc_ind = 0; + + if (rb_alloc_ind == 1) + nb_rb++; + + // PBCH + if ((subframe==0) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l>=nsymb>>1) && (l<((nsymb>>1) + 4))) { + rb_alloc_ind = 0; + } + //PBCH subframe 0, symbols nsymb>>1 ... nsymb>>1 + 3 + if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4))) + skip_half=1; + else if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4))) + skip_half=2; + + //SSS + if (((subframe==0)||(subframe==5)) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==sss_symb) ) { + rb_alloc_ind = 0; + } + //SSS + if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l==sss_symb)) + skip_half=1; + else if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l==sss_symb)) + skip_half=2; + if (frame_parms->frame_type == FDD) { + //PSS + if (((subframe==0)||(subframe==5)) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) { + rb_alloc_ind = 0; + } + + //PSS + + if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb)) + skip_half=1; + else if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb)) + skip_half=2; + } + + if ((frame_parms->frame_type == TDD) && + + (subframe==6)) { //TDD Subframe 6 + if ((rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) { + rb_alloc_ind = 0; + } + + if ((rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb)) + skip_half=1; + else if ((rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb)) + skip_half=2; + } + + if (rb_alloc_ind==1) { +#ifdef DEBUG_DLSCH_DEMOD + printf("rb %d/symbol %d (skip_half %d)\n",rb,l,skip_half); +#endif + /* + printf("rb %d\n",rb); + for (i=0;i<12;i++) + printf("(%d %d)",((short *)dl_ch0)[i<<1],((short*)dl_ch0)[1+(i<<1)]); + printf("\n"); + */ + if (pilots==0) { + // printf("Extracting w/o pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half); + if (skip_half==1) { + memcpy(dl_ch0_ext,dl_ch0,6*sizeof(int)); + + for (i=0; i<6; i++) { + rxF_ext[i]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + dl_ch0_ext+=6; + rxF_ext+=6; + + } else if (skip_half==2) { + memcpy(dl_ch0_ext,dl_ch0+6,6*sizeof(int)); + + for (i=0; i<6; i++) { + rxF_ext[i]=rxF[(i+6)]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + dl_ch0_ext+=6; + rxF_ext+=6; + + } else { + memcpy(dl_ch0_ext,dl_ch0,12*sizeof(int)); + + for (i=0; i<12; i++) { + rxF_ext[i]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + dl_ch0_ext+=12; + rxF_ext+=12; + } + } else { + // printf("Extracting with pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half); + j=0; + + if (skip_half==1) { + for (i=0; i<6; i++) { + if (i!=((frame_parms->nushift+poffset)%6)) { + rxF_ext[j]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j])); +#endif + dl_ch0_ext[j++]=dl_ch0[i]; + } + } + + dl_ch0_ext+=5; + rxF_ext+=5; + } else if (skip_half==2) { + for (i=0; i<6; i++) { + if (i!=((frame_parms->nushift+poffset)%6)) { + rxF_ext[j]=rxF[(i+6)]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j])); +#endif + dl_ch0_ext[j++]=dl_ch0[i+6]; + } + } + + dl_ch0_ext+=5; + rxF_ext+=5; + } else { + for (i=0; i<12; i++) { + if ((i!=(frame_parms->nushift+poffset)) && + (i!=((frame_parms->nushift+poffset+6)%12))) { + rxF_ext[j]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j])); +#endif + dl_ch0_ext[j++]=dl_ch0[i]; + } + } + dl_ch0_ext+=10; + rxF_ext+=10; + } + } // pilots=0 + } + + dl_ch0+=12; + rxF+=12; + } + } + } + + + return(nb_rb/frame_parms->nb_antennas_rx); +} + +unsigned short dlsch_extract_rbs_dual(int **rxdataF, + int **dl_ch_estimates, + int **rxdataF_ext, + int **dl_ch_estimates_ext, + unsigned short pmi, + unsigned char *pmi_ext, + unsigned int *rb_alloc, + unsigned char symbol, + unsigned char subframe, + uint32_t high_speed_flag, + LTE_DL_FRAME_PARMS *frame_parms, + MIMO_mode_t mimo_mode) { + + int prb,nb_rb=0; + int prb_off,prb_off2; + int rb_alloc_ind,skip_half=0,sss_symb,pss_symb=0,nsymb,l; + int i,aarx; + int32_t *dl_ch0,*dl_ch0p,*dl_ch0_ext,*dl_ch1,*dl_ch1p,*dl_ch1_ext,*rxF,*rxF_ext; + int symbol_mod,pilots=0,j=0; + unsigned char *pmi_loc; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + // printf("extract_rbs: symbol_mod %d\n",symbol_mod); + + if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) + pilots=1; + + nsymb = (frame_parms->Ncp==NORMAL) ? 14:12; + l=symbol; + + if (frame_parms->frame_type == TDD) { // TDD + sss_symb = nsymb-1; + pss_symb = 2; + } else { + sss_symb = (nsymb>>1)-2; + pss_symb = (nsymb>>1)-1; + } + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + + if (high_speed_flag==1) { + dl_ch0 = &dl_ch_estimates[aarx][5+(symbol*(frame_parms->ofdm_symbol_size))]; + dl_ch1 = &dl_ch_estimates[2+aarx][5+(symbol*(frame_parms->ofdm_symbol_size))]; + } else { + dl_ch0 = &dl_ch_estimates[aarx][5]; + dl_ch1 = &dl_ch_estimates[2+aarx][5]; + } + + pmi_loc = pmi_ext; + + // pointers to extracted RX signals and channel estimates + rxF_ext = &rxdataF_ext[aarx][symbol*(frame_parms->N_RB_DL*12)]; + dl_ch0_ext = &dl_ch_estimates_ext[aarx][symbol*(frame_parms->N_RB_DL*12)]; + dl_ch1_ext = &dl_ch_estimates_ext[2+aarx][symbol*(frame_parms->N_RB_DL*12)]; + + for (prb=0; prb<frame_parms->N_RB_DL; prb++) { + skip_half=0; + + if (prb < 32) + rb_alloc_ind = (rb_alloc[0]>>prb) & 1; + else if (prb < 64) + rb_alloc_ind = (rb_alloc[1]>>(prb-32)) & 1; + else if (prb < 96) + rb_alloc_ind = (rb_alloc[2]>>(prb-64)) & 1; + else if (prb < 100) + rb_alloc_ind = (rb_alloc[3]>>(prb-96)) & 1; + else + rb_alloc_ind = 0; + + if (rb_alloc_ind == 1) + nb_rb++; + + + if ((frame_parms->N_RB_DL&1) == 0) { // even number of RBs + + // PBCH + if ((subframe==0) && + (prb>=((frame_parms->N_RB_DL>>1)-3)) && + (prb<((frame_parms->N_RB_DL>>1)+3)) && + (l>=(nsymb>>1)) && + (l<((nsymb>>1) + 4))) { + rb_alloc_ind = 0; + // printf("symbol %d / rb %d: skipping PBCH REs\n",symbol,prb); + } + + //SSS + + if (((subframe==0)||(subframe==5)) && + (prb>=((frame_parms->N_RB_DL>>1)-3)) && + (prb<((frame_parms->N_RB_DL>>1)+3)) && + (l==sss_symb) ) { + rb_alloc_ind = 0; + // printf("symbol %d / rb %d: skipping SSS REs\n",symbol,prb); + } + + + + //PSS in subframe 0/5 if FDD + if (frame_parms->frame_type == FDD) { //FDD + if (((subframe==0)||(subframe==5)) && + (prb>=((frame_parms->N_RB_DL>>1)-3)) && + (prb<((frame_parms->N_RB_DL>>1)+3)) && + (l==pss_symb) ) { + rb_alloc_ind = 0; + // printf("symbol %d / rb %d: skipping PSS REs\n",symbol,prb); + } + } + + if ((frame_parms->frame_type == TDD) && + (subframe==6)) { //TDD Subframe 6 + if ((prb>=((frame_parms->N_RB_DL>>1)-3)) && + (prb<((frame_parms->N_RB_DL>>1)+3)) && + (l==pss_symb) ) { + rb_alloc_ind = 0; + } + } + + if (rb_alloc_ind==1) { // PRB is allocated + + + + prb_off = 12*prb; + prb_off2 = 1+(12*(prb-(frame_parms->N_RB_DL>>1))); + dl_ch0p = dl_ch0+(12*prb); + dl_ch1p = dl_ch1+(12*prb); + if (prb<(frame_parms->N_RB_DL>>1)){ + rxF = &rxdataF[aarx][prb_off+ + frame_parms->first_carrier_offset + + (symbol*(frame_parms->ofdm_symbol_size))]; + } + else { + rxF = &rxdataF[aarx][prb_off2+ + (symbol*(frame_parms->ofdm_symbol_size))]; + } + + /* + if (mimo_mode <= PUSCH_PRECODING1) + *pmi_loc = (pmi>>((prb>>2)<<1))&3; + else + *pmi_loc=(pmi>>prb)&1;*/ + + *pmi_loc = get_pmi(frame_parms->N_RB_DL,mimo_mode,pmi,prb); + pmi_loc++; + + + if (pilots == 0) { + + memcpy(dl_ch0_ext,dl_ch0p,12*sizeof(int)); + memcpy(dl_ch1_ext,dl_ch1p,12*sizeof(int)); + memcpy(rxF_ext,rxF,12*sizeof(int)); + dl_ch0_ext +=12; + dl_ch1_ext +=12; + rxF_ext +=12; + } else { // pilots==1 + j=0; + for (i=0; i<12; i++) { + if ((i!=frame_parms->nushift) && + (i!=frame_parms->nushift+3) && + (i!=frame_parms->nushift+6) && + (i!=((frame_parms->nushift+9)%12))) { + rxF_ext[j]=rxF[i]; + // printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j])); + dl_ch0_ext[j]=dl_ch0p[i]; + dl_ch1_ext[j++]=dl_ch1p[i]; + } + } + dl_ch0_ext+=8; + dl_ch1_ext+=8; + rxF_ext+=8; + } // pilots==1 + + } + } else { // Odd number of RBs + + + // PBCH + if ((subframe==0) && + (prb>((frame_parms->N_RB_DL>>1)-3)) && + (prb<((frame_parms->N_RB_DL>>1)+3)) && + (l>=(nsymb>>1)) && + (l<((nsymb>>1) + 4))) { + rb_alloc_ind = 0; + // printf("symbol %d / rb %d: skipping PBCH REs\n",symbol,prb); + } + + //SSS + + if (((subframe==0)||(subframe==5)) && + (prb>((frame_parms->N_RB_DL>>1)-3)) && + (prb<((frame_parms->N_RB_DL>>1)+3)) && + (l==sss_symb) ) { + rb_alloc_ind = 0; + // printf("symbol %d / rb %d: skipping SSS REs\n",symbol,prb); + } + + + + //PSS in subframe 0/5 if FDD + if (frame_parms->frame_type == FDD) { //FDD + if (((subframe==0)||(subframe==5)) && + (prb>((frame_parms->N_RB_DL>>1)-3)) && + (prb<((frame_parms->N_RB_DL>>1)+3)) && + (l==pss_symb) ) { + rb_alloc_ind = 0; + // printf("symbol %d / rb %d: skipping PSS REs\n",symbol,prb); + } + } + + if ((frame_parms->frame_type == TDD) && + ((subframe==1) || (subframe==6))) { //TDD Subframe 1-6 + if ((prb>((frame_parms->N_RB_DL>>1)-3)) && + (prb<((frame_parms->N_RB_DL>>1)+3)) && + (l==pss_symb) ) { + rb_alloc_ind = 0; + } + } + + if (rb_alloc_ind == 1) { + skip_half=0; + + //Check if we have to drop half a PRB due to PSS/SSS/PBCH + // skip_half == 0 means full PRB + // skip_half == 1 means first half is used (leftmost half-PRB from PSS/SSS/PBCH) + // skip_half == 2 means second half is used (rightmost half-PRB from PSS/SSS/PBCH) + //PBCH subframe 0, symbols nsymb>>1 ... nsymb>>1 + 3 + if ((subframe==0) && + (prb==((frame_parms->N_RB_DL>>1)-3)) && + (l>=(nsymb>>1)) && + (l<((nsymb>>1) + 4))) + skip_half=1; + else if ((subframe==0) && + (prb==((frame_parms->N_RB_DL>>1)+3)) && + (l>=(nsymb>>1)) && + (l<((nsymb>>1) + 4))) + skip_half=2; + + //SSS + if (((subframe==0)||(subframe==5)) && + (prb==((frame_parms->N_RB_DL>>1)-3)) && + (l==sss_symb)) + skip_half=1; + else if (((subframe==0)||(subframe==5)) && + (prb==((frame_parms->N_RB_DL>>1)+3)) && + (l==sss_symb)) + skip_half=2; + + //PSS Subframe 0,5 + if (((frame_parms->frame_type == FDD) && + (((subframe==0)||(subframe==5)))) || //FDD Subframes 0,5 + ((frame_parms->frame_type == TDD) && + (((subframe==1) || (subframe==6))))) { //TDD Subframes 1,6 + + if ((prb==((frame_parms->N_RB_DL>>1)-3)) && + (l==pss_symb)) + skip_half=1; + else if ((prb==((frame_parms->N_RB_DL>>1)+3)) && + (l==pss_symb)) + skip_half=2; + } + + + prb_off = 12*prb; + prb_off2 = 7+(12*(prb-(frame_parms->N_RB_DL>>1)-1)); + dl_ch0p = dl_ch0+(12*prb); + dl_ch1p = dl_ch1+(12*prb); + + if (prb<=(frame_parms->N_RB_DL>>1)){ + rxF = &rxdataF[aarx][prb_off+ + frame_parms->first_carrier_offset + + (symbol*(frame_parms->ofdm_symbol_size))]; + } + else { + rxF = &rxdataF[aarx][prb_off2+ + (symbol*(frame_parms->ofdm_symbol_size))]; + } +#ifdef DEBUG_DLSCH_DEMOD + printf("symbol %d / rb %d: alloc %d skip_half %d (rxF %p, rxF_ext %p) prb_off (%d,%d)\n",symbol,prb,rb_alloc_ind,skip_half,rxF,rxF_ext,prb_off,prb_off2); +#endif + /* if (mimo_mode <= PUSCH_PRECODING1) + *pmi_loc = (pmi>>((prb>>2)<<1))&3; + else + *pmi_loc=(pmi>>prb)&1; + // printf("symbol_mod %d (pilots %d) rb %d, sb %d, pmi %d (pmi_loc %p,rxF %p, ch00 %p, ch01 %p, rxF_ext %p dl_ch0_ext %p dl_ch1_ext %p)\n",symbol_mod,pilots,prb,prb>>2,*pmi_loc,pmi_loc,rxF,dl_ch0, dl_ch1, rxF_ext,dl_ch0_ext,dl_ch1_ext); +*/ + *pmi_loc = get_pmi(frame_parms->N_RB_DL,mimo_mode,pmi,prb); + pmi_loc++; + + if (prb != (frame_parms->N_RB_DL>>1)) { // This PRB is not around DC + if (pilots==0) { + if (skip_half==1) { + memcpy(dl_ch0_ext,dl_ch0p,6*sizeof(int32_t)); + memcpy(dl_ch1_ext,dl_ch1p,6*sizeof(int32_t)); + memcpy(rxF_ext,rxF,6*sizeof(int32_t)); +#ifdef DEBUG_DLSCH_DEMOD + for (i=0;i<6;i++) + printf("extract rb %d, re %d => (%d,%d)\n",prb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + dl_ch0_ext+=6; + dl_ch1_ext+=6; + rxF_ext+=6; + } else if (skip_half==2) { + memcpy(dl_ch0_ext,dl_ch0p+6,6*sizeof(int32_t)); + memcpy(dl_ch1_ext,dl_ch1p+6,6*sizeof(int32_t)); + memcpy(rxF_ext,rxF+6,6*sizeof(int32_t)); +#ifdef DEBUG_DLSCH_DEMOD + for (i=0;i<6;i++) + printf("extract rb %d, re %d => (%d,%d)\n",prb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + dl_ch0_ext+=6; + dl_ch1_ext+=6; + rxF_ext+=6; + } else { // skip_half==0 + memcpy(dl_ch0_ext,dl_ch0p,12*sizeof(int32_t)); + memcpy(dl_ch1_ext,dl_ch1p,12*sizeof(int32_t)); + memcpy(rxF_ext,rxF,12*sizeof(int32_t)); +#ifdef DEBUG_DLSCH_DEMOD + for (i=0;i<12;i++) + printf("extract rb %d, re %d => (%d,%d)\n",prb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + dl_ch0_ext+=12; + dl_ch1_ext+=12; + rxF_ext+=12; + } + } else { // pilots=1 + j=0; + + if (skip_half==1) { + for (i=0; i<6; i++) { + if ((i!=frame_parms->nushift) && + (i!=((frame_parms->nushift+3)%6))) { + rxF_ext[j]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("(pilots,skip1)extract rb %d, re %d (%d)=> (%d,%d)\n",prb,i,j,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j])); +#endif + dl_ch0_ext[j]=dl_ch0p[i]; + dl_ch1_ext[j++]=dl_ch1p[i]; + } + } + dl_ch0_ext+=4; + dl_ch1_ext+=4; + rxF_ext+=4; + } else if (skip_half==2) { + for (i=0; i<6; i++) { + if ((i!=frame_parms->nushift) && + (i!=((frame_parms->nushift+3)%6))) { + rxF_ext[j]=rxF[(i+6)]; +#ifdef DEBUG_DLSCH_DEMOD + printf("(pilots,skip2)extract rb %d, re %d (%d) => (%d,%d)\n",prb,i,j,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j])); +#endif + dl_ch0_ext[j]=dl_ch0p[i+6]; + dl_ch1_ext[j++]=dl_ch1p[i+6]; + } + } + dl_ch0_ext+=4; + dl_ch1_ext+=4; + rxF_ext+=4; + + } else { //skip_half==0 + for (i=0; i<12; i++) { + if ((i!=frame_parms->nushift) && + (i!=frame_parms->nushift+3) && + (i!=frame_parms->nushift+6) && + (i!=((frame_parms->nushift+9)%12))) { + rxF_ext[j]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("(pilots)extract rb %d, re %d => (%d,%d)\n",prb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j])); +#endif + dl_ch0_ext[j] =dl_ch0p[i]; + dl_ch1_ext[j++]=dl_ch1p[i]; + } + } + dl_ch0_ext+=8; + dl_ch1_ext+=8; + rxF_ext+=8; + } //skip_half==0 + } //pilots==1 + } else { // Do middle RB (around DC) + + if (pilots==0) { + memcpy(dl_ch0_ext,dl_ch0p,6*sizeof(int32_t)); + memcpy(dl_ch1_ext,dl_ch1p,6*sizeof(int32_t)); + memcpy(rxF_ext,rxF,6*sizeof(int32_t)); +#ifdef DEBUG_DLSCH_DEMOD + for (i=0; i<6; i++) { + printf("extract rb %d, re %d => (%d,%d)\n",prb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); + } +#endif + rxF_ext+=6; + dl_ch0_ext+=6; + dl_ch1_ext+=6; + dl_ch0p+=6; + dl_ch1p+=6; + + rxF = &rxdataF[aarx][1+((symbol*(frame_parms->ofdm_symbol_size)))]; + + memcpy(dl_ch0_ext,dl_ch0p,6*sizeof(int32_t)); + memcpy(dl_ch1_ext,dl_ch1p,6*sizeof(int32_t)); + memcpy(rxF_ext,rxF,6*sizeof(int32_t)); +#ifdef DEBUG_DLSCH_DEMOD + for (i=0; i<6; i++) { + printf("extract rb %d, re %d => (%d,%d)\n",prb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); + } +#endif + rxF_ext+=6; + dl_ch0_ext+=6; + dl_ch1_ext+=6; + } else { // pilots==1 + j=0; + + for (i=0; i<6; i++) { + if ((i!=frame_parms->nushift) && + (i!=((frame_parms->nushift+3)%6))) { + dl_ch0_ext[j]=dl_ch0p[i]; + dl_ch1_ext[j]=dl_ch1p[i]; + rxF_ext[j++]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("(pilots)extract rb %d, re %d (%d) => (%d,%d)\n",prb,i,j,*(short *)&rxF[i],*(1+(short*)&rxF[i])); +#endif + } + } + rxF = &rxdataF[aarx][1+symbol*(frame_parms->ofdm_symbol_size)]; + + for (; i<12; i++) { + if ((i!=((frame_parms->nushift+6)%12)) && + (i!=((frame_parms->nushift+9)%12))) { + dl_ch0_ext[j]=dl_ch0p[i]; + dl_ch1_ext[j]=dl_ch1p[i]; + rxF_ext[j++]=rxF[i-6]; +#ifdef DEBUG_DLSCH_DEMOD + printf("(pilots)extract rb %d, re %d (%d) => (%d,%d)\n",prb,i,j,*(short *)&rxF[1+i-6],*(1+(short*)&rxF[1+i-6])); +#endif + } + } + + dl_ch0_ext+=8; + dl_ch1_ext+=8; + rxF_ext+=8; + } //pilots==1 + } // if Middle PRB + } // if odd PRB + } // if rballoc==1 + } // for prb + } // for aarx + return(nb_rb/frame_parms->nb_antennas_rx); +} + +unsigned short dlsch_extract_rbs_TM7(int **rxdataF, + int **dl_bf_ch_estimates, + int **rxdataF_ext, + int **dl_bf_ch_estimates_ext, + unsigned int *rb_alloc, + unsigned char symbol, + unsigned char subframe, + uint32_t high_speed_flag, + LTE_DL_FRAME_PARMS *frame_parms) +{ + + unsigned short rb,nb_rb=0; + unsigned char rb_alloc_ind; + unsigned char i,aarx,l,nsymb,skip_half=0,sss_symb,pss_symb=0; + int *dl_ch0,*dl_ch0_ext,*rxF,*rxF_ext; + + unsigned char symbol_mod,pilots=0,uespec_pilots=0,j=0,poffset=0,uespec_poffset=0; + int8_t uespec_nushift = frame_parms->Nid_cell%3; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + pilots = ((symbol_mod==0)||(symbol_mod==(4-frame_parms->Ncp))) ? 1 : 0; + l=symbol; + nsymb = (frame_parms->Ncp==NORMAL) ? 14:12; + + if (frame_parms->Ncp==0){ + if (symbol==3 || symbol==6 || symbol==9 || symbol==12) + uespec_pilots = 1; + } else{ + if (symbol==4 || symbol==7 || symbol==10) + uespec_pilots = 1; + } + + if (frame_parms->frame_type == TDD) {// TDD + sss_symb = nsymb-1; + pss_symb = 2; + } else { + sss_symb = (nsymb>>1)-2; + pss_symb = (nsymb>>1)-1; + } + + if (symbol_mod==(4-frame_parms->Ncp)) + poffset=3; + + if ((frame_parms->Ncp==0 && (symbol==6 ||symbol ==12)) || (frame_parms->Ncp==1 && symbol==7)) + uespec_poffset=2; + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + + if (high_speed_flag == 1) + dl_ch0 = &dl_bf_ch_estimates[aarx][symbol*(frame_parms->ofdm_symbol_size)]; + else + dl_ch0 = &dl_bf_ch_estimates[aarx][0]; + + dl_ch0_ext = &dl_bf_ch_estimates_ext[aarx][symbol*(frame_parms->N_RB_DL*12)]; + + rxF_ext = &rxdataF_ext[aarx][symbol*(frame_parms->N_RB_DL*12)]; + rxF = &rxdataF[aarx][(frame_parms->first_carrier_offset + (symbol*(frame_parms->ofdm_symbol_size)))]; + + if ((frame_parms->N_RB_DL&1) == 0) // even number of RBs + for (rb=0; rb<frame_parms->N_RB_DL; rb++) { + + if (rb < 32) + rb_alloc_ind = (rb_alloc[0]>>rb) & 1; + else if (rb < 64) + rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1; + else if (rb < 96) + rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1; + else if (rb < 100) + rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1; + else + rb_alloc_ind = 0; + + if (rb_alloc_ind == 1) + nb_rb++; + + // For second half of RBs skip DC carrier + if (rb==(frame_parms->N_RB_DL>>1)) { + rxF = &rxdataF[aarx][(1 + (symbol*(frame_parms->ofdm_symbol_size)))]; + //dl_ch0++; + } + + // PBCH + if ((subframe==0) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l>=nsymb>>1) && (l<((nsymb>>1) + 4))) { + rb_alloc_ind = 0; + } + + //SSS + if (((subframe==0)||(subframe==5)) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==sss_symb) ) { + rb_alloc_ind = 0; + } + + + if (frame_parms->frame_type == FDD) { + //PSS + if (((subframe==0)||(subframe==5)) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) { + rb_alloc_ind = 0; + } + } + + if ((frame_parms->frame_type == TDD) && + (subframe==6)) { //TDD Subframe 6 + if ((rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) { + rb_alloc_ind = 0; + } + } + + if (rb_alloc_ind==1) { + + /* + printf("rb %d\n",rb); + for (i=0;i<12;i++) + printf("(%d %d)",((short *)dl_ch0)[i<<1],((short*)dl_ch0)[1+(i<<1)]); + printf("\n"); + */ + if (pilots==0 && uespec_pilots==0) { + memcpy(dl_ch0_ext,dl_ch0,12*sizeof(int)); + + for (i=0; i<12; i++) { + rxF_ext[i]=rxF[i]; + } + + dl_ch0_ext+=12; + rxF_ext+=12; + } else if(pilots==1 && uespec_pilots==0) { + j=0; + + for (i=0; i<12; i++) { + if ((i!=(frame_parms->nushift+poffset)) && + (i!=((frame_parms->nushift+poffset+6)%12))) { + rxF_ext[j]=rxF[i]; + dl_ch0_ext[j++]=dl_ch0[i]; + } + } + + dl_ch0_ext+=10; + rxF_ext+=10; + + } else if (pilots==0 && uespec_pilots==1) { + j=0; + + + for (i=0; i<12; i++){ + if (frame_parms->Ncp==0){ + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){ + rxF_ext[j] = rxF[i]; + dl_ch0_ext[j++]=dl_ch0[i]; + } + } else{ + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){ + rxF_ext[j] = rxF[i]; + dl_ch0_ext[j++]=dl_ch0[i]; + } + } + + } + + dl_ch0_ext+=9-frame_parms->Ncp; + rxF_ext+=9-frame_parms->Ncp; + + } else { + LOG_E(PHY,"dlsch_extract_rbs_TM7(dl_demodulation.c):pilot or ue spec pilot detection error\n"); + exit(-1); + } + + } + + dl_ch0+=12; + rxF+=12; + + } + else { // Odd number of RBs + for (rb=0; rb<frame_parms->N_RB_DL>>1; rb++) { + skip_half=0; + + if (rb < 32) + rb_alloc_ind = (rb_alloc[0]>>rb) & 1; + else if (rb < 64) + rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1; + else if (rb < 96) + rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1; + else if (rb < 100) + rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1; + else + rb_alloc_ind = 0; + + if (rb_alloc_ind == 1) + nb_rb++; + + // PBCH + if ((subframe==0) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4))) { + rb_alloc_ind = 0; + } + + //PBCH subframe 0, symbols nsymb>>1 ... nsymb>>1 + 3 + if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4))) + skip_half=1; + else if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4))) + skip_half=2; + + //SSS + + if (((subframe==0)||(subframe==5)) && + (rb>((frame_parms->N_RB_DL>>1)-3)) && + (rb<((frame_parms->N_RB_DL>>1)+3)) && + (l==sss_symb) ) { + rb_alloc_ind = 0; + } + + //SSS + if (((subframe==0)||(subframe==5)) && + (rb==((frame_parms->N_RB_DL>>1)-3)) && + (l==sss_symb)) + skip_half=1; + else if (((subframe==0)||(subframe==5)) && + (rb==((frame_parms->N_RB_DL>>1)+3)) && + (l==sss_symb)) + skip_half=2; + + //PSS in subframe 0/5 if FDD + if (frame_parms->frame_type == FDD) { //FDD + if (((subframe==0)||(subframe==5)) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) { + rb_alloc_ind = 0; + } + + if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb)) + skip_half=1; + else if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb)) + skip_half=2; + } + + if ((frame_parms->frame_type == TDD) && ((subframe==1)||(subframe==6))) { //TDD Subframe 1 and 6 + if ((rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) { + rb_alloc_ind = 0; + } + + if ((rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb)) + skip_half=1; + else if ((rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb)) + skip_half=2; + } + + + if (rb_alloc_ind==1) { +#ifdef DEBUG_DLSCH_DEMOD + printf("rb %d/symbol %d pilots %d, uespec_pilots %d, (skip_half %d)\n",rb,l,pilots,uespec_pilots,skip_half); +#endif + + if (pilots==0 && uespec_pilots==0) { + //printf("Extracting w/o pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half); + + if (skip_half==1) { + memcpy(dl_ch0_ext,dl_ch0,6*sizeof(int)); + + for (i=0; i<6; i++) { + rxF_ext[i]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + + dl_ch0_ext+=6; + rxF_ext+=6; + } else if (skip_half==2) { + memcpy(dl_ch0_ext,dl_ch0+6,6*sizeof(int)); + + for (i=0; i<6; i++) { + rxF_ext[i]=rxF[(i+6)]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + + dl_ch0_ext+=6; + rxF_ext+=6; + } else { + memcpy(dl_ch0_ext,dl_ch0,12*sizeof(int)); + + for (i=0; i<12; i++){ + rxF_ext[i]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",symbol,rb,i,*(short *)&rxF[i],*(1+(short*)&rxF[i])); +#endif + } + dl_ch0_ext+=12; + rxF_ext+=12; + } + } else if (pilots==1 && uespec_pilots==0) { + // printf("Extracting with pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half); + j=0; + + if (skip_half==1) { + for (i=0; i<6; i++) { + if (i!=((frame_parms->nushift+poffset)%6)) { + rxF_ext[j]=rxF[i]; + dl_ch0_ext[j++]=dl_ch0[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + + dl_ch0_ext+=5; + rxF_ext+=5; + } else if (skip_half==2) { + for (i=0; i<6; i++) { + if (i!=((frame_parms->nushift+poffset)%6)) { + rxF_ext[j]=rxF[(i+6)]; + dl_ch0_ext[j++]=dl_ch0[i+6]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + + dl_ch0_ext+=5; + rxF_ext+=5; + } else { + for (i=0; i<12; i++) { + if ((i!=(frame_parms->nushift+poffset)) && + (i!=((frame_parms->nushift+poffset+6)%12))) { + rxF_ext[j]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j])); +#endif + dl_ch0_ext[j++]=dl_ch0[i]; + + } + } + + dl_ch0_ext+=10; + rxF_ext+=10; + } + } else if(pilots==0 && uespec_pilots==1){ + //printf("Extracting with uespec pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half); + j=0; + + if (skip_half==1) { + if (frame_parms->Ncp==0){ + for (i=0; i<6; i++) { + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){ + rxF_ext[j]=rxF[i]; + dl_ch0_ext[j++]=dl_ch0[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + dl_ch0_ext+=6-(uespec_nushift+uespec_poffset<6)-(uespec_nushift+uespec_poffset+4<6)-((uespec_nushift+uespec_poffset+8)%12<6); + rxF_ext+=6-(uespec_nushift+uespec_poffset<6)-(uespec_nushift+uespec_poffset+4<6)-((uespec_nushift+uespec_poffset+8)%12<6); + + } else{ + for (i=0; i<6; i++) { + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){ + rxF_ext[j]=rxF[i]; + dl_ch0_ext[j++]=dl_ch0[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + dl_ch0_ext+=4; + rxF_ext+=4; + } + + } else if (skip_half==2) { + if(frame_parms->Ncp==0){ + for (i=0; i<6; i++) { + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){ + rxF_ext[j]=rxF[(i+6)]; + dl_ch0_ext[j++]=dl_ch0[i+6]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + dl_ch0_ext+=6-(uespec_nushift+uespec_poffset>6)-(uespec_nushift+uespec_poffset+4>6)-((uespec_nushift+uespec_poffset+8)%12>6); + rxF_ext+=6-(uespec_nushift+uespec_poffset>6)-(uespec_nushift+uespec_poffset+4>6)-((uespec_nushift+uespec_poffset+8)%12>6); + + } else { + for (i=0; i<6; i++) { + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){ + rxF_ext[j]=rxF[(i+6)]; + dl_ch0_ext[j++]=dl_ch0[i+6]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + dl_ch0_ext+=4; + rxF_ext+=4; + } + + } else { + + for (i=0; i<12; i++){ + if (frame_parms->Ncp==0){ + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){ + rxF_ext[j] = rxF[i]; + dl_ch0_ext[j++] = dl_ch0[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d, j %d => (%d,%d)\n",symbol,rb,i,j-1,*(short *)&dl_ch0[j],*(1+(short*)&dl_ch0[i])); +#endif + } + } else{ + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){ + rxF_ext[j] = rxF[i]; + dl_ch0_ext[j++]=dl_ch0[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + + } + + dl_ch0_ext+=9-frame_parms->Ncp; + rxF_ext+=9-frame_parms->Ncp; + } + + } else { + LOG_E(PHY,"dlsch_extract_rbs_TM7(dl_demodulation.c):pilot or ue spec pilot detection error\n"); + exit(-1); + + } + } + + dl_ch0+=12; + rxF+=12; + } // first half loop + + + // Do middle RB (around DC) + if (rb < 32) + rb_alloc_ind = (rb_alloc[0]>>rb) & 1; + else if (rb < 64) + rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1; + else if (rb < 96) + rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1; + else if (rb < 100) + rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1; + else + rb_alloc_ind = 0; + + if (rb_alloc_ind == 1) + nb_rb++; + + // PBCH + if ((subframe==0) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4))) { + rb_alloc_ind = 0; + } + + //SSS + if (((subframe==0)||(subframe==5)) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==sss_symb) ) { + rb_alloc_ind = 0; + } + + if (frame_parms->frame_type == FDD) { + //PSS + if (((subframe==0)||(subframe==5)) && (rb>=((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) { + rb_alloc_ind = 0; + } + } + + if ((frame_parms->frame_type == TDD) && ((subframe==1)||(subframe==6))) { + //PSS + if ((rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) { + rb_alloc_ind = 0; + } + } + + //printf("dlch_ext %d\n",dl_ch0_ext-&dl_ch_estimates_ext[aarx][0]); + //printf("DC rb %d (%p)\n",rb,rxF); + if (rb_alloc_ind==1) { + //printf("rb %d/symbol %d (skip_half %d)\n",rb,l,skip_half); + if (pilots==0 && uespec_pilots==0) { + for (i=0; i<6; i++) { + dl_ch0_ext[i]=dl_ch0[i]; + rxF_ext[i]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + + rxF = &rxdataF[aarx][((symbol*(frame_parms->ofdm_symbol_size)))]; + + for (; i<12; i++) { + dl_ch0_ext[i]=dl_ch0[i]; + rxF_ext[i]=rxF[(1+i-6)]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + + dl_ch0_ext+=12; + rxF_ext+=12; + } else if(pilots==1 && uespec_pilots==0){ // pilots==1 + j=0; + + for (i=0; i<6; i++) { + if (i!=((frame_parms->nushift+poffset)%6)) { + dl_ch0_ext[j]=dl_ch0[i]; + rxF_ext[j++]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + + rxF = &rxdataF[aarx][((symbol*(frame_parms->ofdm_symbol_size)))]; + + for (; i<12; i++) { + if (i!=((frame_parms->nushift+6+poffset)%12)) { + dl_ch0_ext[j]=dl_ch0[i]; + rxF_ext[j++]=rxF[(1+i-6)]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + + dl_ch0_ext+=10; + rxF_ext+=10; + } else if(pilots==0 && uespec_pilots==1) { + j=0; + + for (i=0; i<6; i++) { + if (frame_parms->Ncp==0){ + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){ + dl_ch0_ext[j]=dl_ch0[i]; + rxF_ext[j++] = rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } else { + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){ + dl_ch0_ext[j]=dl_ch0[i]; + rxF_ext[j++] = rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + } + + rxF = &rxdataF[aarx][((symbol*(frame_parms->ofdm_symbol_size)))]; + + for (; i<12; i++) { + if (frame_parms->Ncp==0){ + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){ + dl_ch0_ext[j]=dl_ch0[i]; + rxF_ext[j++]=rxF[(1+i-6)]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } else { + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){ + dl_ch0_ext[j]=dl_ch0[i]; + rxF_ext[j++] = rxF[(1+i-6)]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + } + + dl_ch0_ext+=9-frame_parms->Ncp; + rxF_ext+=9-frame_parms->Ncp; + + }// symbol_mod==0 + + } // rballoc==1 + else { + rxF = &rxdataF[aarx][((symbol*(frame_parms->ofdm_symbol_size)))]; + } + + dl_ch0+=12; + rxF+=7; + rb++; + + for (; rb<frame_parms->N_RB_DL; rb++) { + // printf("dlch_ext %d\n",dl_ch0_ext-&dl_ch_estimates_ext[aarx][0]); + // printf("rb %d (%p)\n",rb,rxF); + skip_half=0; + + if (rb < 32) + rb_alloc_ind = (rb_alloc[0]>>rb) & 1; + else if (rb < 64) + rb_alloc_ind = (rb_alloc[1]>>(rb-32)) & 1; + else if (rb < 96) + rb_alloc_ind = (rb_alloc[2]>>(rb-64)) & 1; + else if (rb < 100) + rb_alloc_ind = (rb_alloc[3]>>(rb-96)) & 1; + else + rb_alloc_ind = 0; + + if (rb_alloc_ind==1) + nb_rb++; + + // PBCH + if ((subframe==0) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l>=nsymb>>1) && (l<((nsymb>>1) + 4))) { + rb_alloc_ind = 0; + } + + //PBCH subframe 0, symbols nsymb>>1 ... nsymb>>1 + 3 + if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4))) + skip_half=1; + else if ((subframe==0) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l>=(nsymb>>1)) && (l<((nsymb>>1) + 4))) + skip_half=2; + + //SSS + if (((subframe==0)||(subframe==5)) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==sss_symb) ) { + rb_alloc_ind = 0; + } + + //SSS + if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l==sss_symb)) + skip_half=1; + else if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l==sss_symb)) + skip_half=2; + + //PSS + if (frame_parms->frame_type == FDD) { + if (((subframe==0)||(subframe==5)) && (rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) { + rb_alloc_ind = 0; + } + + if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb)) + skip_half=1; + else if (((subframe==0)||(subframe==5)) && (rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb)) + skip_half=2; + } + + if ((frame_parms->frame_type == TDD) && ((subframe==1)||(subframe==6))) { //TDD Subframe 1 and 6 + if ((rb>((frame_parms->N_RB_DL>>1)-3)) && (rb<((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb) ) { + rb_alloc_ind = 0; + } + + if ((rb==((frame_parms->N_RB_DL>>1)-3)) && (l==pss_symb)) + skip_half=1; + else if ((rb==((frame_parms->N_RB_DL>>1)+3)) && (l==pss_symb)) + skip_half=2; + } + + if (rb_alloc_ind==1) { +#ifdef DEBUG_DLSCH_DEMOD + printf("rb %d/symbol %d (skip_half %d)\n",rb,l,skip_half); +#endif + /* + printf("rb %d\n",rb); + for (i=0;i<12;i++) + printf("(%d %d)",((short *)dl_ch0)[i<<1],((short*)dl_ch0)[1+(i<<1)]); + printf("\n"); + */ + if (pilots==0 && uespec_pilots==0) { + //printf("Extracting w/o pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half); + if (skip_half==1) { + memcpy(dl_ch0_ext,dl_ch0,6*sizeof(int)); + + for (i=0; i<6; i++) { + rxF_ext[i]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + + dl_ch0_ext+=6; + rxF_ext+=6; + + } else if (skip_half==2) { + memcpy(dl_ch0_ext,dl_ch0+6,6*sizeof(int)); + + for (i=0; i<6; i++) { + rxF_ext[i]=rxF[i+6]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + + dl_ch0_ext+=6; + rxF_ext+=6; + + } else { + memcpy(dl_ch0_ext,dl_ch0,12*sizeof(int)); + //printf("symbol %d, extract rb %d, => (%d,%d)\n",symbol,rb,*(short *)&dl_ch0[j],*(1+(short*)&dl_ch0[i])); + + for (i=0; i<12; i++) { + rxF_ext[i]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + + dl_ch0_ext+=12; + rxF_ext+=12; + } + } else if (pilots==1 && uespec_pilots==0){ + //printf("Extracting with pilots (symbol %d, rb %d, skip_half %d)\n",l,rb,skip_half); + j=0; + + if (skip_half==1) { + for (i=0; i<6; i++) { + if (i!=((frame_parms->nushift+poffset)%6)) { + rxF_ext[j]=rxF[i]; + dl_ch0_ext[j++]=dl_ch0[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + + dl_ch0_ext+=5; + rxF_ext+=5; + } else if (skip_half==2) { + for (i=0; i<6; i++) { + if (i!=((frame_parms->nushift+poffset)%6)) { + rxF_ext[j]=rxF[(i+6)]; + dl_ch0_ext[j++]=dl_ch0[i+6]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + + dl_ch0_ext+=5; + rxF_ext+=5; + } else { + for (i=0; i<12; i++) { + if ((i!=(frame_parms->nushift+poffset)) && + (i!=((frame_parms->nushift+poffset+6)%12))) { + rxF_ext[j]=rxF[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[j],*(1+(short*)&rxF_ext[j])); +#endif + dl_ch0_ext[j++]=dl_ch0[i]; + } + } + + dl_ch0_ext+=10; + rxF_ext+=10; + } + } else if(pilots==0 && uespec_pilots==1) { + j=0; + + if (skip_half==1) { + if (frame_parms->Ncp==0){ + for (i=0; i<6; i++) { + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){ + rxF_ext[j]=rxF[i]; + dl_ch0_ext[j++]=dl_ch0[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + dl_ch0_ext+=6-(uespec_nushift+uespec_poffset<6)-(uespec_nushift+uespec_poffset+4<6)-((uespec_nushift+uespec_poffset+8)%12<6); + rxF_ext+=6-(uespec_nushift+uespec_poffset<6)-(uespec_nushift+uespec_poffset+4<6)-((uespec_nushift+uespec_poffset+8)%12<6); + + } else{ + for (i=0; i<6; i++) { + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){ + rxF_ext[j]=rxF[i]; + dl_ch0_ext[j++]=dl_ch0[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + dl_ch0_ext+=4; + rxF_ext+=4; + } + + } else if (skip_half==2) { + if(frame_parms->Ncp==0){ + for (i=0; i<6; i++) { + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){ + rxF_ext[j]=rxF[i+6]; + dl_ch0_ext[j++]=dl_ch0[i+6]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + dl_ch0_ext+=6-(uespec_nushift+uespec_poffset>6)-(uespec_nushift+uespec_poffset+4>6)-((uespec_nushift+uespec_poffset+8)%12>6); + rxF_ext+=6-(uespec_nushift+uespec_poffset>6)-(uespec_nushift+uespec_poffset+4>6)-((uespec_nushift+uespec_poffset+8)%12>6); + + } else { + for (i=0; i<6; i++) { + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){ + rxF_ext[j]=rxF[(i+6)]; + dl_ch0_ext[j++]=dl_ch0[i+6]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + dl_ch0_ext+=4; + rxF_ext+=4; + } + + } else { + for (i=0; i<12; i++){ + if (frame_parms->Ncp==0){ + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+4 && i!=(uespec_nushift+uespec_poffset+8)%12){ + rxF_ext[j] = rxF[i]; + dl_ch0_ext[j++]=dl_ch0[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } else{ + if (i!=uespec_nushift+uespec_poffset && i!=uespec_nushift+uespec_poffset+3 && i!=uespec_nushift+uespec_poffset+6 && i!=(uespec_nushift+uespec_poffset+9)%12){ + rxF_ext[j] = rxF[i]; + dl_ch0_ext[j++]=dl_ch0[i]; +#ifdef DEBUG_DLSCH_DEMOD + printf("extract rb %d, re %d => (%d,%d)\n",rb,i,*(short *)&rxF_ext[i],*(1+(short*)&rxF_ext[i])); +#endif + } + } + } + + dl_ch0_ext+=9-frame_parms->Ncp; + rxF_ext+=9-frame_parms->Ncp; + + } + + }// pilots=0 + } + + dl_ch0+=12; + rxF+=12; + } + } + } + + _mm_empty(); + _m_empty(); + + return(nb_rb/frame_parms->nb_antennas_rx); +} + +//============================================================================================== + +void dump_dlsch2(PHY_VARS_UE *ue,uint8_t eNB_id,uint8_t subframe,unsigned int *coded_bits_per_codeword,int round, unsigned char harq_pid) +{ + unsigned int nsymb = (ue->frame_parms.Ncp == 0) ? 14 : 12; + char fname[32],vname[32]; + int N_RB_DL=ue->frame_parms.N_RB_DL; + + sprintf(fname,"dlsch%d_rxF_r%d_ext0.m",eNB_id,round); + sprintf(vname,"dl%d_rxF_r%d_ext0",eNB_id,round); + write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->rxdataF_ext[0],12*N_RB_DL*nsymb,1,1); + + if (ue->frame_parms.nb_antennas_rx >1) { + sprintf(fname,"dlsch%d_rxF_r%d_ext1.m",eNB_id,round); + sprintf(vname,"dl%d_rxF_r%d_ext1",eNB_id,round); + write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->rxdataF_ext[1],12*N_RB_DL*nsymb,1,1); + } + + sprintf(fname,"dlsch%d_ch_r%d_ext00.m",eNB_id,round); + sprintf(vname,"dl%d_ch_r%d_ext00",eNB_id,round); + write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_estimates_ext[0],12*N_RB_DL*nsymb,1,1); + + if (ue->transmission_mode[eNB_id]==7){ + sprintf(fname,"dlsch%d_bf_ch_r%d.m",eNB_id,round); + sprintf(vname,"dl%d_bf_ch_r%d",eNB_id,round); + write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_bf_ch_estimates[0],512*nsymb,1,1); + //write_output(fname,vname,phy_vars_ue->lte_ue_pdsch_vars[eNB_id]->dl_bf_ch_estimates[0],512,1,1); + + sprintf(fname,"dlsch%d_bf_ch_r%d_ext00.m",eNB_id,round); + sprintf(vname,"dl%d_bf_ch_r%d_ext00",eNB_id,round); + write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_bf_ch_estimates_ext[0],12*N_RB_DL*nsymb,1,1); + } + + if (ue->frame_parms.nb_antennas_rx == 2) { + sprintf(fname,"dlsch%d_ch_r%d_ext01.m",eNB_id,round); + sprintf(vname,"dl%d_ch_r%d_ext01",eNB_id,round); + write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_estimates_ext[1],12*N_RB_DL*nsymb,1,1); + } + + if (ue->frame_parms.nb_antenna_ports_eNB == 2) { + sprintf(fname,"dlsch%d_ch_r%d_ext10.m",eNB_id,round); + sprintf(vname,"dl%d_ch_r%d_ext10",eNB_id,round); + write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_estimates_ext[2],12*N_RB_DL*nsymb,1,1); + + if (ue->frame_parms.nb_antennas_rx == 2) { + sprintf(fname,"dlsch%d_ch_r%d_ext11.m",eNB_id,round); + sprintf(vname,"dl%d_ch_r%d_ext11",eNB_id,round); + write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_estimates_ext[3],12*N_RB_DL*nsymb,1,1); + } + } + + sprintf(fname,"dlsch%d_rxF_r%d_uespec0.m",eNB_id,round); + sprintf(vname,"dl%d_rxF_r%d_uespec0",eNB_id,round); + write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->rxdataF_uespec_pilots[0],12*N_RB_DL,1,1); + + /* + write_output("dlsch%d_ch_ext01.m","dl01_ch0_ext",pdsch_vars[eNB_id]->dl_ch_estimates_ext[1],12*N_RB_DL*nsymb,1,1); + write_output("dlsch%d_ch_ext10.m","dl10_ch0_ext",pdsch_vars[eNB_id]->dl_ch_estimates_ext[2],12*N_RB_DL*nsymb,1,1); + write_output("dlsch%d_ch_ext11.m","dl11_ch0_ext",pdsch_vars[eNB_id]->dl_ch_estimates_ext[3],12*N_RB_DL*nsymb,1,1); + */ + sprintf(fname,"dlsch%d_r%d_rho.m",eNB_id,round); + sprintf(vname,"dl_rho_r%d_%d",eNB_id,round); + + write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_rho_ext[harq_pid][round][0],12*N_RB_DL*nsymb,1,1); + + sprintf(fname,"dlsch%d_r%d_rho2.m",eNB_id,round); + sprintf(vname,"dl_rho2_r%d_%d",eNB_id,round); + + write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_rho2_ext[0],12*N_RB_DL*nsymb,1,1); + + sprintf(fname,"dlsch%d_rxF_r%d_comp0.m",eNB_id,round); + sprintf(vname,"dl%d_rxF_r%d_comp0",eNB_id,round); + write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->rxdataF_comp0[0],12*N_RB_DL*nsymb,1,1); + if (ue->frame_parms.nb_antenna_ports_eNB == 2) { + sprintf(fname,"dlsch%d_rxF_r%d_comp1.m",eNB_id,round); + sprintf(vname,"dl%d_rxF_r%d_comp1",eNB_id,round); + write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->rxdataF_comp1[harq_pid][round][0],12*N_RB_DL*nsymb,1,1); + } + + sprintf(fname,"dlsch%d_rxF_r%d_llr.m",eNB_id,round); + sprintf(vname,"dl%d_r%d_llr",eNB_id,round); + write_output(fname,vname, ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->llr[0],coded_bits_per_codeword[0],1,0); + sprintf(fname,"dlsch%d_r%d_mag1.m",eNB_id,round); + sprintf(vname,"dl%d_r%d_mag1",eNB_id,round); + write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_mag0[0],12*N_RB_DL*nsymb,1,1); + sprintf(fname,"dlsch%d_r%d_mag2.m",eNB_id,round); + sprintf(vname,"dl%d_r%d_mag2",eNB_id,round); + write_output(fname,vname,ue->pdsch_vars[ue->current_thread_id[subframe]][eNB_id]->dl_ch_magb0[0],12*N_RB_DL*nsymb,1,1); + + // printf("log2_maxh = %d\n",ue->pdsch_vars[eNB_id]->log2_maxh); +} + +#ifdef DEBUG_DLSCH_DEMOD +/* +void print_bytes(char *s,__m128i *x) +{ + + char *tempb = (char *)x; + + printf("%s : %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",s, + tempb[0],tempb[1],tempb[2],tempb[3],tempb[4],tempb[5],tempb[6],tempb[7], + tempb[8],tempb[9],tempb[10],tempb[11],tempb[12],tempb[13],tempb[14],tempb[15] + ); + +} + +void print_shorts(char *s,__m128i *x) +{ + + short *tempb = (short *)x; + printf("%s : %d,%d,%d,%d,%d,%d,%d,%d\n",s, + tempb[0],tempb[1],tempb[2],tempb[3],tempb[4],tempb[5],tempb[6],tempb[7]); + +} + +void print_shorts2(char *s,__m64 *x) +{ + + short *tempb = (short *)x; + printf("%s : %d,%d,%d,%d\n",s, + tempb[0],tempb[1],tempb[2],tempb[3]); + +} + +void print_ints(char *s,__m128i *x) +{ + + int *tempb = (int *)x; + printf("%s : %d,%d,%d,%d\n",s, + tempb[0],tempb[1],tempb[2],tempb[3]); + +}*/ +#endif diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c new file mode 100644 index 0000000000000000000000000000000000000000..7682045ae1307ca6a10ee83ef071091f7e28528d --- /dev/null +++ b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c @@ -0,0 +1,8899 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +/*! \file PHY/LTE_TRANSPORT/dlsch_llr_computation.c + * \brief Top-level routines for LLR computation of the PDSCH physical channel from 36-211, V8.6 2009-03 + * \author R. Knopp, F. Kaltenberger,A. Bhamri, S. Aubert, S. Wagner, X Jiang + * \date 2011 + * \version 0.1 + * \company Eurecom + * \email: knopp@eurecom.fr,florian.kaltenberger@eurecom.fr,ankit.bhamri@eurecom.fr,sebastien.aubert@eurecom.fr, sebastian.wagner@eurecom.fr + * \note + * \warning + */ + +#include "PHY/defs.h" +#include "PHY/TOOLS/defs.h" +#include "PHY/extern.h" +#include "defs.h" +#include "extern.h" +#include "PHY/sse_intrin.h" + +//#define DEBUG_LLR_SIC + + +int16_t zeros[8] __attribute__ ((aligned(16))) = {0,0,0,0,0,0,0,0}; +int16_t ones[8] __attribute__ ((aligned(16))) = {0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff}; +#if defined(__x86_64__) || defined(__i386__) +__m128i rho_rpi __attribute__ ((aligned(16))); +__m128i rho_rmi __attribute__ ((aligned(16))); +__m128i rho_rpi_1_1 __attribute__ ((aligned(16))); +__m128i rho_rpi_1_3 __attribute__ ((aligned(16))); +__m128i rho_rpi_1_5 __attribute__ ((aligned(16))); +__m128i rho_rpi_1_7 __attribute__ ((aligned(16))); +__m128i rho_rpi_3_1 __attribute__ ((aligned(16))); +__m128i rho_rpi_3_3 __attribute__ ((aligned(16))); +__m128i rho_rpi_3_5 __attribute__ ((aligned(16))); +__m128i rho_rpi_3_7 __attribute__ ((aligned(16))); +__m128i rho_rpi_5_1 __attribute__ ((aligned(16))); +__m128i rho_rpi_5_3 __attribute__ ((aligned(16))); +__m128i rho_rpi_5_5 __attribute__ ((aligned(16))); +__m128i rho_rpi_5_7 __attribute__ ((aligned(16))); +__m128i rho_rpi_7_1 __attribute__ ((aligned(16))); +__m128i rho_rpi_7_3 __attribute__ ((aligned(16))); +__m128i rho_rpi_7_5 __attribute__ ((aligned(16))); +__m128i rho_rpi_7_7 __attribute__ ((aligned(16))); +__m128i rho_rmi_1_1 __attribute__ ((aligned(16))); +__m128i rho_rmi_1_3 __attribute__ ((aligned(16))); +__m128i rho_rmi_1_5 __attribute__ ((aligned(16))); +__m128i rho_rmi_1_7 __attribute__ ((aligned(16))); +__m128i rho_rmi_3_1 __attribute__ ((aligned(16))); +__m128i rho_rmi_3_3 __attribute__ ((aligned(16))); +__m128i rho_rmi_3_5 __attribute__ ((aligned(16))); +__m128i rho_rmi_3_7 __attribute__ ((aligned(16))); +__m128i rho_rmi_5_1 __attribute__ ((aligned(16))); +__m128i rho_rmi_5_3 __attribute__ ((aligned(16))); +__m128i rho_rmi_5_5 __attribute__ ((aligned(16))); +__m128i rho_rmi_5_7 __attribute__ ((aligned(16))); +__m128i rho_rmi_7_1 __attribute__ ((aligned(16))); +__m128i rho_rmi_7_3 __attribute__ ((aligned(16))); +__m128i rho_rmi_7_5 __attribute__ ((aligned(16))); +__m128i rho_rmi_7_7 __attribute__ ((aligned(16))); + +__m128i psi_r_m7_m7 __attribute__ ((aligned(16))); +__m128i psi_r_m7_m5 __attribute__ ((aligned(16))); +__m128i psi_r_m7_m3 __attribute__ ((aligned(16))); +__m128i psi_r_m7_m1 __attribute__ ((aligned(16))); +__m128i psi_r_m7_p1 __attribute__ ((aligned(16))); +__m128i psi_r_m7_p3 __attribute__ ((aligned(16))); +__m128i psi_r_m7_p5 __attribute__ ((aligned(16))); +__m128i psi_r_m7_p7 __attribute__ ((aligned(16))); +__m128i psi_r_m5_m7 __attribute__ ((aligned(16))); +__m128i psi_r_m5_m5 __attribute__ ((aligned(16))); +__m128i psi_r_m5_m3 __attribute__ ((aligned(16))); +__m128i psi_r_m5_m1 __attribute__ ((aligned(16))); +__m128i psi_r_m5_p1 __attribute__ ((aligned(16))); +__m128i psi_r_m5_p3 __attribute__ ((aligned(16))); +__m128i psi_r_m5_p5 __attribute__ ((aligned(16))); +__m128i psi_r_m5_p7 __attribute__ ((aligned(16))); +__m128i psi_r_m3_m7 __attribute__ ((aligned(16))); +__m128i psi_r_m3_m5 __attribute__ ((aligned(16))); +__m128i psi_r_m3_m3 __attribute__ ((aligned(16))); +__m128i psi_r_m3_m1 __attribute__ ((aligned(16))); +__m128i psi_r_m3_p1 __attribute__ ((aligned(16))); +__m128i psi_r_m3_p3 __attribute__ ((aligned(16))); +__m128i psi_r_m3_p5 __attribute__ ((aligned(16))); +__m128i psi_r_m3_p7 __attribute__ ((aligned(16))); +__m128i psi_r_m1_m7 __attribute__ ((aligned(16))); +__m128i psi_r_m1_m5 __attribute__ ((aligned(16))); +__m128i psi_r_m1_m3 __attribute__ ((aligned(16))); +__m128i psi_r_m1_m1 __attribute__ ((aligned(16))); +__m128i psi_r_m1_p1 __attribute__ ((aligned(16))); +__m128i psi_r_m1_p3 __attribute__ ((aligned(16))); +__m128i psi_r_m1_p5 __attribute__ ((aligned(16))); +__m128i psi_r_m1_p7 __attribute__ ((aligned(16))); +__m128i psi_r_p1_m7 __attribute__ ((aligned(16))); +__m128i psi_r_p1_m5 __attribute__ ((aligned(16))); +__m128i psi_r_p1_m3 __attribute__ ((aligned(16))); +__m128i psi_r_p1_m1 __attribute__ ((aligned(16))); +__m128i psi_r_p1_p1 __attribute__ ((aligned(16))); +__m128i psi_r_p1_p3 __attribute__ ((aligned(16))); +__m128i psi_r_p1_p5 __attribute__ ((aligned(16))); +__m128i psi_r_p1_p7 __attribute__ ((aligned(16))); +__m128i psi_r_p3_m7 __attribute__ ((aligned(16))); +__m128i psi_r_p3_m5 __attribute__ ((aligned(16))); +__m128i psi_r_p3_m3 __attribute__ ((aligned(16))); +__m128i psi_r_p3_m1 __attribute__ ((aligned(16))); +__m128i psi_r_p3_p1 __attribute__ ((aligned(16))); +__m128i psi_r_p3_p3 __attribute__ ((aligned(16))); +__m128i psi_r_p3_p5 __attribute__ ((aligned(16))); +__m128i psi_r_p3_p7 __attribute__ ((aligned(16))); +__m128i psi_r_p5_m7 __attribute__ ((aligned(16))); +__m128i psi_r_p5_m5 __attribute__ ((aligned(16))); +__m128i psi_r_p5_m3 __attribute__ ((aligned(16))); +__m128i psi_r_p5_m1 __attribute__ ((aligned(16))); +__m128i psi_r_p5_p1 __attribute__ ((aligned(16))); +__m128i psi_r_p5_p3 __attribute__ ((aligned(16))); +__m128i psi_r_p5_p5 __attribute__ ((aligned(16))); +__m128i psi_r_p5_p7 __attribute__ ((aligned(16))); +__m128i psi_r_p7_m7 __attribute__ ((aligned(16))); +__m128i psi_r_p7_m5 __attribute__ ((aligned(16))); +__m128i psi_r_p7_m3 __attribute__ ((aligned(16))); +__m128i psi_r_p7_m1 __attribute__ ((aligned(16))); +__m128i psi_r_p7_p1 __attribute__ ((aligned(16))); +__m128i psi_r_p7_p3 __attribute__ ((aligned(16))); +__m128i psi_r_p7_p5 __attribute__ ((aligned(16))); +__m128i psi_r_p7_p7 __attribute__ ((aligned(16))); + +__m128i psi_i_m7_m7 __attribute__ ((aligned(16))); +__m128i psi_i_m7_m5 __attribute__ ((aligned(16))); +__m128i psi_i_m7_m3 __attribute__ ((aligned(16))); +__m128i psi_i_m7_m1 __attribute__ ((aligned(16))); +__m128i psi_i_m7_p1 __attribute__ ((aligned(16))); +__m128i psi_i_m7_p3 __attribute__ ((aligned(16))); +__m128i psi_i_m7_p5 __attribute__ ((aligned(16))); +__m128i psi_i_m7_p7 __attribute__ ((aligned(16))); +__m128i psi_i_m5_m7 __attribute__ ((aligned(16))); +__m128i psi_i_m5_m5 __attribute__ ((aligned(16))); +__m128i psi_i_m5_m3 __attribute__ ((aligned(16))); +__m128i psi_i_m5_m1 __attribute__ ((aligned(16))); +__m128i psi_i_m5_p1 __attribute__ ((aligned(16))); +__m128i psi_i_m5_p3 __attribute__ ((aligned(16))); +__m128i psi_i_m5_p5 __attribute__ ((aligned(16))); +__m128i psi_i_m5_p7 __attribute__ ((aligned(16))); +__m128i psi_i_m3_m7 __attribute__ ((aligned(16))); +__m128i psi_i_m3_m5 __attribute__ ((aligned(16))); +__m128i psi_i_m3_m3 __attribute__ ((aligned(16))); +__m128i psi_i_m3_m1 __attribute__ ((aligned(16))); +__m128i psi_i_m3_p1 __attribute__ ((aligned(16))); +__m128i psi_i_m3_p3 __attribute__ ((aligned(16))); +__m128i psi_i_m3_p5 __attribute__ ((aligned(16))); +__m128i psi_i_m3_p7 __attribute__ ((aligned(16))); +__m128i psi_i_m1_m7 __attribute__ ((aligned(16))); +__m128i psi_i_m1_m5 __attribute__ ((aligned(16))); +__m128i psi_i_m1_m3 __attribute__ ((aligned(16))); +__m128i psi_i_m1_m1 __attribute__ ((aligned(16))); +__m128i psi_i_m1_p1 __attribute__ ((aligned(16))); +__m128i psi_i_m1_p3 __attribute__ ((aligned(16))); +__m128i psi_i_m1_p5 __attribute__ ((aligned(16))); +__m128i psi_i_m1_p7 __attribute__ ((aligned(16))); +__m128i psi_i_p1_m7 __attribute__ ((aligned(16))); +__m128i psi_i_p1_m5 __attribute__ ((aligned(16))); +__m128i psi_i_p1_m3 __attribute__ ((aligned(16))); +__m128i psi_i_p1_m1 __attribute__ ((aligned(16))); +__m128i psi_i_p1_p1 __attribute__ ((aligned(16))); +__m128i psi_i_p1_p3 __attribute__ ((aligned(16))); +__m128i psi_i_p1_p5 __attribute__ ((aligned(16))); +__m128i psi_i_p1_p7 __attribute__ ((aligned(16))); +__m128i psi_i_p3_m7 __attribute__ ((aligned(16))); +__m128i psi_i_p3_m5 __attribute__ ((aligned(16))); +__m128i psi_i_p3_m3 __attribute__ ((aligned(16))); +__m128i psi_i_p3_m1 __attribute__ ((aligned(16))); +__m128i psi_i_p3_p1 __attribute__ ((aligned(16))); +__m128i psi_i_p3_p3 __attribute__ ((aligned(16))); +__m128i psi_i_p3_p5 __attribute__ ((aligned(16))); +__m128i psi_i_p3_p7 __attribute__ ((aligned(16))); +__m128i psi_i_p5_m7 __attribute__ ((aligned(16))); +__m128i psi_i_p5_m5 __attribute__ ((aligned(16))); +__m128i psi_i_p5_m3 __attribute__ ((aligned(16))); +__m128i psi_i_p5_m1 __attribute__ ((aligned(16))); +__m128i psi_i_p5_p1 __attribute__ ((aligned(16))); +__m128i psi_i_p5_p3 __attribute__ ((aligned(16))); +__m128i psi_i_p5_p5 __attribute__ ((aligned(16))); +__m128i psi_i_p5_p7 __attribute__ ((aligned(16))); +__m128i psi_i_p7_m7 __attribute__ ((aligned(16))); +__m128i psi_i_p7_m5 __attribute__ ((aligned(16))); +__m128i psi_i_p7_m3 __attribute__ ((aligned(16))); +__m128i psi_i_p7_m1 __attribute__ ((aligned(16))); +__m128i psi_i_p7_p1 __attribute__ ((aligned(16))); +__m128i psi_i_p7_p3 __attribute__ ((aligned(16))); +__m128i psi_i_p7_p5 __attribute__ ((aligned(16))); +__m128i psi_i_p7_p7 __attribute__ ((aligned(16))); + +__m128i a_r_m7_m7 __attribute__ ((aligned(16))); +__m128i a_r_m7_m5 __attribute__ ((aligned(16))); +__m128i a_r_m7_m3 __attribute__ ((aligned(16))); +__m128i a_r_m7_m1 __attribute__ ((aligned(16))); +__m128i a_r_m7_p1 __attribute__ ((aligned(16))); +__m128i a_r_m7_p3 __attribute__ ((aligned(16))); +__m128i a_r_m7_p5 __attribute__ ((aligned(16))); +__m128i a_r_m7_p7 __attribute__ ((aligned(16))); +__m128i a_r_m5_m7 __attribute__ ((aligned(16))); +__m128i a_r_m5_m5 __attribute__ ((aligned(16))); +__m128i a_r_m5_m3 __attribute__ ((aligned(16))); +__m128i a_r_m5_m1 __attribute__ ((aligned(16))); +__m128i a_r_m5_p1 __attribute__ ((aligned(16))); +__m128i a_r_m5_p3 __attribute__ ((aligned(16))); +__m128i a_r_m5_p5 __attribute__ ((aligned(16))); +__m128i a_r_m5_p7 __attribute__ ((aligned(16))); +__m128i a_r_m3_m7 __attribute__ ((aligned(16))); +__m128i a_r_m3_m5 __attribute__ ((aligned(16))); +__m128i a_r_m3_m3 __attribute__ ((aligned(16))); +__m128i a_r_m3_m1 __attribute__ ((aligned(16))); +__m128i a_r_m3_p1 __attribute__ ((aligned(16))); +__m128i a_r_m3_p3 __attribute__ ((aligned(16))); +__m128i a_r_m3_p5 __attribute__ ((aligned(16))); +__m128i a_r_m3_p7 __attribute__ ((aligned(16))); +__m128i a_r_m1_m7 __attribute__ ((aligned(16))); +__m128i a_r_m1_m5 __attribute__ ((aligned(16))); +__m128i a_r_m1_m3 __attribute__ ((aligned(16))); +__m128i a_r_m1_m1 __attribute__ ((aligned(16))); +__m128i a_r_m1_p1 __attribute__ ((aligned(16))); +__m128i a_r_m1_p3 __attribute__ ((aligned(16))); +__m128i a_r_m1_p5 __attribute__ ((aligned(16))); +__m128i a_r_m1_p7 __attribute__ ((aligned(16))); +__m128i a_r_p1_m7 __attribute__ ((aligned(16))); +__m128i a_r_p1_m5 __attribute__ ((aligned(16))); +__m128i a_r_p1_m3 __attribute__ ((aligned(16))); +__m128i a_r_p1_m1 __attribute__ ((aligned(16))); +__m128i a_r_p1_p1 __attribute__ ((aligned(16))); +__m128i a_r_p1_p3 __attribute__ ((aligned(16))); +__m128i a_r_p1_p5 __attribute__ ((aligned(16))); +__m128i a_r_p1_p7 __attribute__ ((aligned(16))); +__m128i a_r_p3_m7 __attribute__ ((aligned(16))); +__m128i a_r_p3_m5 __attribute__ ((aligned(16))); +__m128i a_r_p3_m3 __attribute__ ((aligned(16))); +__m128i a_r_p3_m1 __attribute__ ((aligned(16))); +__m128i a_r_p3_p1 __attribute__ ((aligned(16))); +__m128i a_r_p3_p3 __attribute__ ((aligned(16))); +__m128i a_r_p3_p5 __attribute__ ((aligned(16))); +__m128i a_r_p3_p7 __attribute__ ((aligned(16))); +__m128i a_r_p5_m7 __attribute__ ((aligned(16))); +__m128i a_r_p5_m5 __attribute__ ((aligned(16))); +__m128i a_r_p5_m3 __attribute__ ((aligned(16))); +__m128i a_r_p5_m1 __attribute__ ((aligned(16))); +__m128i a_r_p5_p1 __attribute__ ((aligned(16))); +__m128i a_r_p5_p3 __attribute__ ((aligned(16))); +__m128i a_r_p5_p5 __attribute__ ((aligned(16))); +__m128i a_r_p5_p7 __attribute__ ((aligned(16))); +__m128i a_r_p7_m7 __attribute__ ((aligned(16))); +__m128i a_r_p7_m5 __attribute__ ((aligned(16))); +__m128i a_r_p7_m3 __attribute__ ((aligned(16))); +__m128i a_r_p7_m1 __attribute__ ((aligned(16))); +__m128i a_r_p7_p1 __attribute__ ((aligned(16))); +__m128i a_r_p7_p3 __attribute__ ((aligned(16))); +__m128i a_r_p7_p5 __attribute__ ((aligned(16))); +__m128i a_r_p7_p7 __attribute__ ((aligned(16))); + +__m128i a_i_m7_m7 __attribute__ ((aligned(16))); +__m128i a_i_m7_m5 __attribute__ ((aligned(16))); +__m128i a_i_m7_m3 __attribute__ ((aligned(16))); +__m128i a_i_m7_m1 __attribute__ ((aligned(16))); +__m128i a_i_m7_p1 __attribute__ ((aligned(16))); +__m128i a_i_m7_p3 __attribute__ ((aligned(16))); +__m128i a_i_m7_p5 __attribute__ ((aligned(16))); +__m128i a_i_m7_p7 __attribute__ ((aligned(16))); +__m128i a_i_m5_m7 __attribute__ ((aligned(16))); +__m128i a_i_m5_m5 __attribute__ ((aligned(16))); +__m128i a_i_m5_m3 __attribute__ ((aligned(16))); +__m128i a_i_m5_m1 __attribute__ ((aligned(16))); +__m128i a_i_m5_p1 __attribute__ ((aligned(16))); +__m128i a_i_m5_p3 __attribute__ ((aligned(16))); +__m128i a_i_m5_p5 __attribute__ ((aligned(16))); +__m128i a_i_m5_p7 __attribute__ ((aligned(16))); +__m128i a_i_m3_m7 __attribute__ ((aligned(16))); +__m128i a_i_m3_m5 __attribute__ ((aligned(16))); +__m128i a_i_m3_m3 __attribute__ ((aligned(16))); +__m128i a_i_m3_m1 __attribute__ ((aligned(16))); +__m128i a_i_m3_p1 __attribute__ ((aligned(16))); +__m128i a_i_m3_p3 __attribute__ ((aligned(16))); +__m128i a_i_m3_p5 __attribute__ ((aligned(16))); +__m128i a_i_m3_p7 __attribute__ ((aligned(16))); +__m128i a_i_m1_m7 __attribute__ ((aligned(16))); +__m128i a_i_m1_m5 __attribute__ ((aligned(16))); +__m128i a_i_m1_m3 __attribute__ ((aligned(16))); +__m128i a_i_m1_m1 __attribute__ ((aligned(16))); +__m128i a_i_m1_p1 __attribute__ ((aligned(16))); +__m128i a_i_m1_p3 __attribute__ ((aligned(16))); +__m128i a_i_m1_p5 __attribute__ ((aligned(16))); +__m128i a_i_m1_p7 __attribute__ ((aligned(16))); +__m128i a_i_p1_m7 __attribute__ ((aligned(16))); +__m128i a_i_p1_m5 __attribute__ ((aligned(16))); +__m128i a_i_p1_m3 __attribute__ ((aligned(16))); +__m128i a_i_p1_m1 __attribute__ ((aligned(16))); +__m128i a_i_p1_p1 __attribute__ ((aligned(16))); +__m128i a_i_p1_p3 __attribute__ ((aligned(16))); +__m128i a_i_p1_p5 __attribute__ ((aligned(16))); +__m128i a_i_p1_p7 __attribute__ ((aligned(16))); +__m128i a_i_p3_m7 __attribute__ ((aligned(16))); +__m128i a_i_p3_m5 __attribute__ ((aligned(16))); +__m128i a_i_p3_m3 __attribute__ ((aligned(16))); +__m128i a_i_p3_m1 __attribute__ ((aligned(16))); +__m128i a_i_p3_p1 __attribute__ ((aligned(16))); +__m128i a_i_p3_p3 __attribute__ ((aligned(16))); +__m128i a_i_p3_p5 __attribute__ ((aligned(16))); +__m128i a_i_p3_p7 __attribute__ ((aligned(16))); +__m128i a_i_p5_m7 __attribute__ ((aligned(16))); +__m128i a_i_p5_m5 __attribute__ ((aligned(16))); +__m128i a_i_p5_m3 __attribute__ ((aligned(16))); +__m128i a_i_p5_m1 __attribute__ ((aligned(16))); +__m128i a_i_p5_p1 __attribute__ ((aligned(16))); +__m128i a_i_p5_p3 __attribute__ ((aligned(16))); +__m128i a_i_p5_p5 __attribute__ ((aligned(16))); +__m128i a_i_p5_p7 __attribute__ ((aligned(16))); +__m128i a_i_p7_m7 __attribute__ ((aligned(16))); +__m128i a_i_p7_m5 __attribute__ ((aligned(16))); +__m128i a_i_p7_m3 __attribute__ ((aligned(16))); +__m128i a_i_p7_m1 __attribute__ ((aligned(16))); +__m128i a_i_p7_p1 __attribute__ ((aligned(16))); +__m128i a_i_p7_p3 __attribute__ ((aligned(16))); +__m128i a_i_p7_p5 __attribute__ ((aligned(16))); +__m128i a_i_p7_p7 __attribute__ ((aligned(16))); + +__m128i psi_a_m7_m7 __attribute__ ((aligned(16))); +__m128i psi_a_m7_m5 __attribute__ ((aligned(16))); +__m128i psi_a_m7_m3 __attribute__ ((aligned(16))); +__m128i psi_a_m7_m1 __attribute__ ((aligned(16))); +__m128i psi_a_m7_p1 __attribute__ ((aligned(16))); +__m128i psi_a_m7_p3 __attribute__ ((aligned(16))); +__m128i psi_a_m7_p5 __attribute__ ((aligned(16))); +__m128i psi_a_m7_p7 __attribute__ ((aligned(16))); +__m128i psi_a_m5_m7 __attribute__ ((aligned(16))); +__m128i psi_a_m5_m5 __attribute__ ((aligned(16))); +__m128i psi_a_m5_m3 __attribute__ ((aligned(16))); +__m128i psi_a_m5_m1 __attribute__ ((aligned(16))); +__m128i psi_a_m5_p1 __attribute__ ((aligned(16))); +__m128i psi_a_m5_p3 __attribute__ ((aligned(16))); +__m128i psi_a_m5_p5 __attribute__ ((aligned(16))); +__m128i psi_a_m5_p7 __attribute__ ((aligned(16))); +__m128i psi_a_m3_m7 __attribute__ ((aligned(16))); +__m128i psi_a_m3_m5 __attribute__ ((aligned(16))); +__m128i psi_a_m3_m3 __attribute__ ((aligned(16))); +__m128i psi_a_m3_m1 __attribute__ ((aligned(16))); +__m128i psi_a_m3_p1 __attribute__ ((aligned(16))); +__m128i psi_a_m3_p3 __attribute__ ((aligned(16))); +__m128i psi_a_m3_p5 __attribute__ ((aligned(16))); +__m128i psi_a_m3_p7 __attribute__ ((aligned(16))); +__m128i psi_a_m1_m7 __attribute__ ((aligned(16))); +__m128i psi_a_m1_m5 __attribute__ ((aligned(16))); +__m128i psi_a_m1_m3 __attribute__ ((aligned(16))); +__m128i psi_a_m1_m1 __attribute__ ((aligned(16))); +__m128i psi_a_m1_p1 __attribute__ ((aligned(16))); +__m128i psi_a_m1_p3 __attribute__ ((aligned(16))); +__m128i psi_a_m1_p5 __attribute__ ((aligned(16))); +__m128i psi_a_m1_p7 __attribute__ ((aligned(16))); +__m128i psi_a_p1_m7 __attribute__ ((aligned(16))); +__m128i psi_a_p1_m5 __attribute__ ((aligned(16))); +__m128i psi_a_p1_m3 __attribute__ ((aligned(16))); +__m128i psi_a_p1_m1 __attribute__ ((aligned(16))); +__m128i psi_a_p1_p1 __attribute__ ((aligned(16))); +__m128i psi_a_p1_p3 __attribute__ ((aligned(16))); +__m128i psi_a_p1_p5 __attribute__ ((aligned(16))); +__m128i psi_a_p1_p7 __attribute__ ((aligned(16))); +__m128i psi_a_p3_m7 __attribute__ ((aligned(16))); +__m128i psi_a_p3_m5 __attribute__ ((aligned(16))); +__m128i psi_a_p3_m3 __attribute__ ((aligned(16))); +__m128i psi_a_p3_m1 __attribute__ ((aligned(16))); +__m128i psi_a_p3_p1 __attribute__ ((aligned(16))); +__m128i psi_a_p3_p3 __attribute__ ((aligned(16))); +__m128i psi_a_p3_p5 __attribute__ ((aligned(16))); +__m128i psi_a_p3_p7 __attribute__ ((aligned(16))); +__m128i psi_a_p5_m7 __attribute__ ((aligned(16))); +__m128i psi_a_p5_m5 __attribute__ ((aligned(16))); +__m128i psi_a_p5_m3 __attribute__ ((aligned(16))); +__m128i psi_a_p5_m1 __attribute__ ((aligned(16))); +__m128i psi_a_p5_p1 __attribute__ ((aligned(16))); +__m128i psi_a_p5_p3 __attribute__ ((aligned(16))); +__m128i psi_a_p5_p5 __attribute__ ((aligned(16))); +__m128i psi_a_p5_p7 __attribute__ ((aligned(16))); +__m128i psi_a_p7_m7 __attribute__ ((aligned(16))); +__m128i psi_a_p7_m5 __attribute__ ((aligned(16))); +__m128i psi_a_p7_m3 __attribute__ ((aligned(16))); +__m128i psi_a_p7_m1 __attribute__ ((aligned(16))); +__m128i psi_a_p7_p1 __attribute__ ((aligned(16))); +__m128i psi_a_p7_p3 __attribute__ ((aligned(16))); +__m128i psi_a_p7_p5 __attribute__ ((aligned(16))); +__m128i psi_a_p7_p7 __attribute__ ((aligned(16))); + +__m128i a_sq_m7_m7 __attribute__ ((aligned(16))); +__m128i a_sq_m7_m5 __attribute__ ((aligned(16))); +__m128i a_sq_m7_m3 __attribute__ ((aligned(16))); +__m128i a_sq_m7_m1 __attribute__ ((aligned(16))); +__m128i a_sq_m7_p1 __attribute__ ((aligned(16))); +__m128i a_sq_m7_p3 __attribute__ ((aligned(16))); +__m128i a_sq_m7_p5 __attribute__ ((aligned(16))); +__m128i a_sq_m7_p7 __attribute__ ((aligned(16))); +__m128i a_sq_m5_m7 __attribute__ ((aligned(16))); +__m128i a_sq_m5_m5 __attribute__ ((aligned(16))); +__m128i a_sq_m5_m3 __attribute__ ((aligned(16))); +__m128i a_sq_m5_m1 __attribute__ ((aligned(16))); +__m128i a_sq_m5_p1 __attribute__ ((aligned(16))); +__m128i a_sq_m5_p3 __attribute__ ((aligned(16))); +__m128i a_sq_m5_p5 __attribute__ ((aligned(16))); +__m128i a_sq_m5_p7 __attribute__ ((aligned(16))); +__m128i a_sq_m3_m7 __attribute__ ((aligned(16))); +__m128i a_sq_m3_m5 __attribute__ ((aligned(16))); +__m128i a_sq_m3_m3 __attribute__ ((aligned(16))); +__m128i a_sq_m3_m1 __attribute__ ((aligned(16))); +__m128i a_sq_m3_p1 __attribute__ ((aligned(16))); +__m128i a_sq_m3_p3 __attribute__ ((aligned(16))); +__m128i a_sq_m3_p5 __attribute__ ((aligned(16))); +__m128i a_sq_m3_p7 __attribute__ ((aligned(16))); +__m128i a_sq_m1_m7 __attribute__ ((aligned(16))); +__m128i a_sq_m1_m5 __attribute__ ((aligned(16))); +__m128i a_sq_m1_m3 __attribute__ ((aligned(16))); +__m128i a_sq_m1_m1 __attribute__ ((aligned(16))); +__m128i a_sq_m1_p1 __attribute__ ((aligned(16))); +__m128i a_sq_m1_p3 __attribute__ ((aligned(16))); +__m128i a_sq_m1_p5 __attribute__ ((aligned(16))); +__m128i a_sq_m1_p7 __attribute__ ((aligned(16))); +__m128i a_sq_p1_m7 __attribute__ ((aligned(16))); +__m128i a_sq_p1_m5 __attribute__ ((aligned(16))); +__m128i a_sq_p1_m3 __attribute__ ((aligned(16))); +__m128i a_sq_p1_m1 __attribute__ ((aligned(16))); +__m128i a_sq_p1_p1 __attribute__ ((aligned(16))); +__m128i a_sq_p1_p3 __attribute__ ((aligned(16))); +__m128i a_sq_p1_p5 __attribute__ ((aligned(16))); +__m128i a_sq_p1_p7 __attribute__ ((aligned(16))); +__m128i a_sq_p3_m7 __attribute__ ((aligned(16))); +__m128i a_sq_p3_m5 __attribute__ ((aligned(16))); +__m128i a_sq_p3_m3 __attribute__ ((aligned(16))); +__m128i a_sq_p3_m1 __attribute__ ((aligned(16))); +__m128i a_sq_p3_p1 __attribute__ ((aligned(16))); +__m128i a_sq_p3_p3 __attribute__ ((aligned(16))); +__m128i a_sq_p3_p5 __attribute__ ((aligned(16))); +__m128i a_sq_p3_p7 __attribute__ ((aligned(16))); +__m128i a_sq_p5_m7 __attribute__ ((aligned(16))); +__m128i a_sq_p5_m5 __attribute__ ((aligned(16))); +__m128i a_sq_p5_m3 __attribute__ ((aligned(16))); +__m128i a_sq_p5_m1 __attribute__ ((aligned(16))); +__m128i a_sq_p5_p1 __attribute__ ((aligned(16))); +__m128i a_sq_p5_p3 __attribute__ ((aligned(16))); +__m128i a_sq_p5_p5 __attribute__ ((aligned(16))); +__m128i a_sq_p5_p7 __attribute__ ((aligned(16))); +__m128i a_sq_p7_m7 __attribute__ ((aligned(16))); +__m128i a_sq_p7_m5 __attribute__ ((aligned(16))); +__m128i a_sq_p7_m3 __attribute__ ((aligned(16))); +__m128i a_sq_p7_m1 __attribute__ ((aligned(16))); +__m128i a_sq_p7_p1 __attribute__ ((aligned(16))); +__m128i a_sq_p7_p3 __attribute__ ((aligned(16))); +__m128i a_sq_p7_p5 __attribute__ ((aligned(16))); +__m128i a_sq_p7_p7 __attribute__ ((aligned(16))); + +__m128i bit_met_m7_m7 __attribute__ ((aligned(16))); +__m128i bit_met_m7_m5 __attribute__ ((aligned(16))); +__m128i bit_met_m7_m3 __attribute__ ((aligned(16))); +__m128i bit_met_m7_m1 __attribute__ ((aligned(16))); +__m128i bit_met_m7_p1 __attribute__ ((aligned(16))); +__m128i bit_met_m7_p3 __attribute__ ((aligned(16))); +__m128i bit_met_m7_p5 __attribute__ ((aligned(16))); +__m128i bit_met_m7_p7 __attribute__ ((aligned(16))); +__m128i bit_met_m5_m7 __attribute__ ((aligned(16))); +__m128i bit_met_m5_m5 __attribute__ ((aligned(16))); +__m128i bit_met_m5_m3 __attribute__ ((aligned(16))); +__m128i bit_met_m5_m1 __attribute__ ((aligned(16))); +__m128i bit_met_m5_p1 __attribute__ ((aligned(16))); +__m128i bit_met_m5_p3 __attribute__ ((aligned(16))); +__m128i bit_met_m5_p5 __attribute__ ((aligned(16))); +__m128i bit_met_m5_p7 __attribute__ ((aligned(16))); +__m128i bit_met_m3_m7 __attribute__ ((aligned(16))); +__m128i bit_met_m3_m5 __attribute__ ((aligned(16))); +__m128i bit_met_m3_m3 __attribute__ ((aligned(16))); +__m128i bit_met_m3_m1 __attribute__ ((aligned(16))); +__m128i bit_met_m3_p1 __attribute__ ((aligned(16))); +__m128i bit_met_m3_p3 __attribute__ ((aligned(16))); +__m128i bit_met_m3_p5 __attribute__ ((aligned(16))); +__m128i bit_met_m3_p7 __attribute__ ((aligned(16))); +__m128i bit_met_m1_m7 __attribute__ ((aligned(16))); +__m128i bit_met_m1_m5 __attribute__ ((aligned(16))); +__m128i bit_met_m1_m3 __attribute__ ((aligned(16))); +__m128i bit_met_m1_m1 __attribute__ ((aligned(16))); +__m128i bit_met_m1_p1 __attribute__ ((aligned(16))); +__m128i bit_met_m1_p3 __attribute__ ((aligned(16))); +__m128i bit_met_m1_p5 __attribute__ ((aligned(16))); +__m128i bit_met_m1_p7 __attribute__ ((aligned(16))); +__m128i bit_met_p1_m7 __attribute__ ((aligned(16))); +__m128i bit_met_p1_m5 __attribute__ ((aligned(16))); +__m128i bit_met_p1_m3 __attribute__ ((aligned(16))); +__m128i bit_met_p1_m1 __attribute__ ((aligned(16))); +__m128i bit_met_p1_p1 __attribute__ ((aligned(16))); +__m128i bit_met_p1_p3 __attribute__ ((aligned(16))); +__m128i bit_met_p1_p5 __attribute__ ((aligned(16))); +__m128i bit_met_p1_p7 __attribute__ ((aligned(16))); +__m128i bit_met_p3_m7 __attribute__ ((aligned(16))); +__m128i bit_met_p3_m5 __attribute__ ((aligned(16))); +__m128i bit_met_p3_m3 __attribute__ ((aligned(16))); +__m128i bit_met_p3_m1 __attribute__ ((aligned(16))); +__m128i bit_met_p3_p1 __attribute__ ((aligned(16))); +__m128i bit_met_p3_p3 __attribute__ ((aligned(16))); +__m128i bit_met_p3_p5 __attribute__ ((aligned(16))); +__m128i bit_met_p3_p7 __attribute__ ((aligned(16))); +__m128i bit_met_p5_m7 __attribute__ ((aligned(16))); +__m128i bit_met_p5_m5 __attribute__ ((aligned(16))); +__m128i bit_met_p5_m3 __attribute__ ((aligned(16))); +__m128i bit_met_p5_m1 __attribute__ ((aligned(16))); +__m128i bit_met_p5_p1 __attribute__ ((aligned(16))); +__m128i bit_met_p5_p3 __attribute__ ((aligned(16))); +__m128i bit_met_p5_p5 __attribute__ ((aligned(16))); +__m128i bit_met_p5_p7 __attribute__ ((aligned(16))); +__m128i bit_met_p7_m7 __attribute__ ((aligned(16))); +__m128i bit_met_p7_m5 __attribute__ ((aligned(16))); +__m128i bit_met_p7_m3 __attribute__ ((aligned(16))); +__m128i bit_met_p7_m1 __attribute__ ((aligned(16))); +__m128i bit_met_p7_p1 __attribute__ ((aligned(16))); +__m128i bit_met_p7_p3 __attribute__ ((aligned(16))); +__m128i bit_met_p7_p5 __attribute__ ((aligned(16))); +__m128i bit_met_p7_p7 __attribute__ ((aligned(16))); + +__m128i y0_p_1_1 __attribute__ ((aligned(16))); +__m128i y0_p_1_3 __attribute__ ((aligned(16))); +__m128i y0_p_1_5 __attribute__ ((aligned(16))); +__m128i y0_p_1_7 __attribute__ ((aligned(16))); +__m128i y0_p_3_1 __attribute__ ((aligned(16))); +__m128i y0_p_3_3 __attribute__ ((aligned(16))); +__m128i y0_p_3_5 __attribute__ ((aligned(16))); +__m128i y0_p_3_7 __attribute__ ((aligned(16))); +__m128i y0_p_5_1 __attribute__ ((aligned(16))); +__m128i y0_p_5_3 __attribute__ ((aligned(16))); +__m128i y0_p_5_5 __attribute__ ((aligned(16))); +__m128i y0_p_5_7 __attribute__ ((aligned(16))); +__m128i y0_p_7_1 __attribute__ ((aligned(16))); +__m128i y0_p_7_3 __attribute__ ((aligned(16))); +__m128i y0_p_7_5 __attribute__ ((aligned(16))); +__m128i y0_p_7_7 __attribute__ ((aligned(16))); +__m128i y0_m_1_1 __attribute__ ((aligned(16))); +__m128i y0_m_1_3 __attribute__ ((aligned(16))); +__m128i y0_m_1_5 __attribute__ ((aligned(16))); +__m128i y0_m_1_7 __attribute__ ((aligned(16))); +__m128i y0_m_3_1 __attribute__ ((aligned(16))); +__m128i y0_m_3_3 __attribute__ ((aligned(16))); +__m128i y0_m_3_5 __attribute__ ((aligned(16))); +__m128i y0_m_3_7 __attribute__ ((aligned(16))); +__m128i y0_m_5_1 __attribute__ ((aligned(16))); +__m128i y0_m_5_3 __attribute__ ((aligned(16))); +__m128i y0_m_5_5 __attribute__ ((aligned(16))); +__m128i y0_m_5_7 __attribute__ ((aligned(16))); +__m128i y0_m_7_1 __attribute__ ((aligned(16))); +__m128i y0_m_7_3 __attribute__ ((aligned(16))); +__m128i y0_m_7_5 __attribute__ ((aligned(16))); +__m128i y0_m_7_7 __attribute__ ((aligned(16))); + +__m128i xmm0 __attribute__ ((aligned(16))); +__m128i xmm1 __attribute__ ((aligned(16))); +__m128i xmm2 __attribute__ ((aligned(16))); +__m128i xmm3 __attribute__ ((aligned(16))); +__m128i xmm4 __attribute__ ((aligned(16))); +__m128i xmm5 __attribute__ ((aligned(16))); +__m128i xmm6 __attribute__ ((aligned(16))); +__m128i xmm7 __attribute__ ((aligned(16))); +__m128i xmm8 __attribute__ ((aligned(16))); + +__m128i y0r __attribute__ ((aligned(16))); +__m128i y0i __attribute__ ((aligned(16))); +__m128i y1r __attribute__ ((aligned(16))); +__m128i y1i __attribute__ ((aligned(16))); +__m128i y2r __attribute__ ((aligned(16))); +__m128i y2i __attribute__ ((aligned(16))); + +__m128i logmax_num_re0 __attribute__ ((aligned(16))); +__m128i logmax_num_im0 __attribute__ ((aligned(16))); +__m128i logmax_den_re0 __attribute__ ((aligned(16))); +__m128i logmax_den_im0 __attribute__ ((aligned(16))); +__m128i logmax_num_re1 __attribute__ ((aligned(16))); +__m128i logmax_num_im1 __attribute__ ((aligned(16))); +__m128i logmax_den_re1 __attribute__ ((aligned(16))); +__m128i logmax_den_im1 __attribute__ ((aligned(16))); + +__m128i tmp_result __attribute__ ((aligned(16))); +__m128i tmp_result2 __attribute__ ((aligned(16))); +__m128i tmp_result3 __attribute__ ((aligned(16))); +__m128i tmp_result4 __attribute__ ((aligned(16))); + + +//============================================================================================== +// Auxiliary Makros + +// calculates psi_a = psi_r*a_r + psi_i*a_i +#define prodsum_psi_a_epi16(psi_r,a_r,psi_i,a_i,psi_a) tmp_result = _mm_mulhi_epi16(psi_r,a_r); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result2 = _mm_mulhi_epi16(psi_i,a_i); tmp_result2 = _mm_slli_epi16(tmp_result2,1); psi_a = _mm_adds_epi16(tmp_result,tmp_result2); + +// calculate interference magnitude +#define interference_abs_epi16(psi,int_ch_mag,int_mag,c1,c2) tmp_result = _mm_cmplt_epi16(psi,int_ch_mag); tmp_result2 = _mm_xor_si128(tmp_result,(*(__m128i*)&ones[0])); tmp_result = _mm_and_si128(tmp_result,c1); tmp_result2 = _mm_and_si128(tmp_result2,c2); int_mag = _mm_or_si128(tmp_result,tmp_result2); + +// calculate interference magnitude +// tmp_result = ones in shorts corr. to interval 2<=x<=4, tmp_result2 interval < 2, tmp_result3 interval 4<x<6 and tmp_result4 interval x>6 +#define interference_abs_64qam_epi16(psi,int_ch_mag,int_two_ch_mag,int_three_ch_mag,a,c1,c3,c5,c7) tmp_result = _mm_cmplt_epi16(psi,int_two_ch_mag); tmp_result3 = _mm_xor_si128(tmp_result,(*(__m128i*)&ones[0])); tmp_result2 = _mm_cmplt_epi16(psi,int_ch_mag); tmp_result = _mm_xor_si128(tmp_result,tmp_result2); tmp_result4 = _mm_cmpgt_epi16(psi,int_three_ch_mag); tmp_result3 = _mm_xor_si128(tmp_result3,tmp_result4); tmp_result = _mm_and_si128(tmp_result,c3); tmp_result2 = _mm_and_si128(tmp_result2,c1); tmp_result3 = _mm_and_si128(tmp_result3,c5); tmp_result4 = _mm_and_si128(tmp_result4,c7); tmp_result = _mm_or_si128(tmp_result,tmp_result2); tmp_result3 = _mm_or_si128(tmp_result3,tmp_result4); a = _mm_or_si128(tmp_result,tmp_result3); + +// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor +#define square_a_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq) tmp_result = _mm_mulhi_epi16(a_r,a_r); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result = _mm_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result = _mm_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result2 = _mm_mulhi_epi16(a_i,a_i); tmp_result2 = _mm_slli_epi16(tmp_result2,1); tmp_result2 = _mm_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm_slli_epi16(tmp_result2,1); tmp_result2 = _mm_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm_slli_epi16(tmp_result2,1); a_sq = _mm_adds_epi16(tmp_result,tmp_result2); + +// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor for 64-QAM +#define square_a_64qam_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq) tmp_result = _mm_mulhi_epi16(a_r,a_r); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result = _mm_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm_slli_epi16(tmp_result,3); tmp_result = _mm_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result2 = _mm_mulhi_epi16(a_i,a_i); tmp_result2 = _mm_slli_epi16(tmp_result2,1); tmp_result2 = _mm_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm_slli_epi16(tmp_result2,3); tmp_result2 = _mm_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm_slli_epi16(tmp_result2,1); a_sq = _mm_adds_epi16(tmp_result,tmp_result2); + +#elif defined(__arm__) + +#endif + +//============================================================================================== +// SINGLE-STREAM +//============================================================================================== + +//---------------------------------------------------------------------------------------------- +// QPSK +//---------------------------------------------------------------------------------------------- + +int dlsch_qpsk_llr(LTE_DL_FRAME_PARMS *frame_parms, + int32_t **rxdataF_comp, + int16_t *dlsch_llr, + uint8_t symbol, + uint8_t first_symbol_flag, + uint16_t nb_rb, + uint16_t pbch_pss_sss_adjust, + uint8_t beamforming_mode) +{ + + uint32_t *rxF = (uint32_t*)&rxdataF_comp[0][((int32_t)symbol*frame_parms->N_RB_DL*12)]; + uint32_t *llr32; + int i,len; + uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol; + + /* + if (first_symbol_flag==1) { + llr32 = (uint32_t*)dlsch_llr; + } else { + llr32 = (uint32_t*)(*llr32p); + }*/ + + llr32 = (uint32_t*)dlsch_llr; + if (!llr32) { + LOG_E(PHY,"dlsch_qpsk_llr: llr is null, symbol %d, llr32=%p\n",symbol, llr32); + return(-1); + } + + + if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { + if (frame_parms->nb_antenna_ports_eNB!=1) + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + else + len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6); + } else if((beamforming_mode==7) && (frame_parms->Ncp==0) && (symbol==3 || symbol==6 || symbol==9 || symbol==12)){ + len = (nb_rb*9) - (3*pbch_pss_sss_adjust/4); + } else if((beamforming_mode==7) && (frame_parms->Ncp==1) && (symbol==4 || symbol==7 || symbol==10)){ + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + } else { + len = (nb_rb*12) - pbch_pss_sss_adjust; + } + + + /* + LOG_I(PHY,"dlsch_qpsk_llr: [symb %d / FirstSym %d / Length %d]: @LLR Buff %x, @LLR Buff(symb) %x \n", + symbol, + first_symbol_flag, + len, + dlsch_llr, + llr32); + */ + //printf("ll32p=%p , dlsch_llr=%p, symbol=%d, flag=%d \n", llr32, dlsch_llr, symbol, first_symbol_flag); + for (i=0; i<len; i++) { + *llr32 = *rxF; + //printf("llr %d : (%d,%d)\n",i,((int16_t*)llr32)[0],((int16_t*)llr32)[1]); + rxF++; + llr32++; + } + + //*llr32p = (int16_t *)llr32; + + return(0); +} + +int32_t dlsch_qpsk_llr_SIC(LTE_DL_FRAME_PARMS *frame_parms, + int32_t **rxdataF_comp, + int32_t **sic_buffer, //Q15 + int32_t **rho_i, + short *dlsch_llr, + uint8_t num_pdcch_symbols, + uint16_t nb_rb, + uint8_t subframe, + uint16_t mod_order_0, + uint32_t rb_alloc) +{ + + int16_t rho_amp_x0[2*frame_parms->N_RB_DL*12]; + int16_t rho_rho_amp_x0[2*frame_parms->N_RB_DL*12]; + uint16_t amp_tmp; + uint16_t *llr16=(uint16_t*)dlsch_llr; + int i, len, nsymb; + uint8_t symbol, symbol_mod; + int len_acc=0; + uint16_t *sic_data; + uint16_t pbch_pss_sss_adjust; + + nsymb = (frame_parms->Ncp==0) ? 14:12; + + for (symbol=num_pdcch_symbols; symbol<nsymb; symbol++) { + uint16_t *rxF = (uint16_t*)(&rxdataF_comp[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]); + int16_t *rho_1=(int16_t*)(&rho_i[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]); + sic_data = (uint16_t*)&sic_buffer[0][((int16_t)len_acc)]; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + + + + if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) //pilots=1 + amp_tmp=0x1fff;//dlsch0->sqrt_rho_b; already taken into account + else //pilots=0 + amp_tmp=0x1fff;//1.5*dlsch0->sqrt_rho_a; already taken into account + + if (mod_order_0==6) + amp_tmp=amp_tmp<<1; // to compensate for >> 1 shift in modulation + + + pbch_pss_sss_adjust=adjust_G2(frame_parms,&rb_alloc,2,subframe,symbol); + + if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { + if (frame_parms->nb_antenna_ports_eNB!=1) + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + else + len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6); + } else { + len = (nb_rb*12) - pbch_pss_sss_adjust; + } + + // printf("dlsch_qpsk_llr: symbol %d,nb_rb %d, len %d,pbch_pss_sss_adjust %d\n",symbol,nb_rb,len,pbch_pss_sss_adjust); + + len_acc+=len; //accumulated length; this is done because in sic_buffer we have only data symbols + + multadd_complex_vector_real_scalar((int16_t *)sic_data, + amp_tmp, + (int16_t *)rho_amp_x0, //this is in Q13 + 1, + len); + + mult_cpx_vector((int16_t *)rho_1, //Q15 + (int16_t *)rho_amp_x0, //Q13 + (int16_t*)rho_rho_amp_x0, + len, + 13); + +#ifdef DEBUG_LLR_SIC + write_output("rho_for_multipl.m","rho_for_m", rho_1,len,1, + symbol==num_pdcch_symbols ? 15 : + symbol==nsymb-1 ? 14 : 13); + + write_output("rho_rho_in_llr.m","rho2", rho_rho_amp_x0,len,1, + symbol==num_pdcch_symbols ? 15 : + symbol==nsymb-1 ? 14 : 13); +#endif + + sub_cpx_vector16((int16_t *)rxF, + (int16_t *)rho_rho_amp_x0, + //(int16_t *)clean_x1, + (int16_t *)rxF, + len*2); + +#ifdef DEBUG_LLR_SIC + write_output("rxFdata_comp1_after.m","rxF_a", rxF,len,1,1); + write_output("rxF_comp1.m","rxF_1_comp", rxF,len,1, + symbol==num_pdcch_symbols ? 15 : + symbol==nsymb-1 ? 14 : 13); +#endif + + //this is for QPSK only!!! + for (i=0; i<len*2; i++) { + *llr16 =rxF[i]; + //printf("llr %d : (%d,%d)\n",i,((int16_t*)llr32)[0],((int16_t*)llr32)[1]); + llr16++; + } + + } + + // printf("dlsch_qpsk_llr_SIC: acc_len=%d\n",len_acc); + + return(0); +} + + +//---------------------------------------------------------------------------------------------- +// 16-QAM +//---------------------------------------------------------------------------------------------- + +void dlsch_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms, + int32_t **rxdataF_comp, + int16_t *dlsch_llr, + int32_t **dl_ch_mag, + uint8_t symbol, + uint8_t first_symbol_flag, + uint16_t nb_rb, + uint16_t pbch_pss_sss_adjust, + int16_t **llr32p, + uint8_t beamforming_mode) +{ + +#if defined(__x86_64__) || defined(__i386__) + __m128i *rxF = (__m128i*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)]; + __m128i *ch_mag; + __m128i llr128[2]; + uint32_t *llr32; +#elif defined(__arm__) + int16x8_t *rxF = (int16x8_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)]; + int16x8_t *ch_mag; + int16x8_t xmm0; + int16_t *llr16; +#endif + + + int i,len; + unsigned char symbol_mod,len_mod4=0; + + +#if defined(__x86_64__) || defined(__i386__) + if (first_symbol_flag==1) { + llr32 = (uint32_t*)dlsch_llr; + } else { + llr32 = (uint32_t*)*llr32p; + } +#elif defined(__arm__) + if (first_symbol_flag==1) { + llr16 = (int16_t*)dlsch_llr; + } else { + llr16 = (int16_t*)*llr32p; + } +#endif + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + +#if defined(__x86_64__) || defined(__i386__) + ch_mag = (__m128i*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)]; +#elif defined(__arm__) + ch_mag = (int16x8_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)]; +#endif + if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { + if (frame_parms->nb_antenna_ports_eNB!=1) + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + else + len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6); + } else if((beamforming_mode==7) && (frame_parms->Ncp==0) && (symbol==3 || symbol==6 || symbol==9 || symbol==12)){ + len = (nb_rb*9) - (3*pbch_pss_sss_adjust/4); + } else if((beamforming_mode==7) && (frame_parms->Ncp==1) && (symbol==4 || symbol==7 || symbol==10)){ + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + } else { + len = (nb_rb*12) - pbch_pss_sss_adjust; + } + + // update output pointer according to number of REs in this symbol (<<2 because 4 bits per RE) + if (first_symbol_flag == 1) + *llr32p = dlsch_llr + (len<<2); + else + *llr32p += (len<<2); + + // printf("len=%d\n", len); + len_mod4 = len&3; + // printf("len_mod4=%d\n", len_mod4); + len>>=2; // length in quad words (4 REs) + // printf("len>>=2=%d\n", len); + len+=(len_mod4==0 ? 0 : 1); + // printf("len+=%d\n", len); + for (i=0; i<len; i++) { + +#if defined(__x86_64__) || defined(__i386) + xmm0 = _mm_abs_epi16(rxF[i]); + xmm0 = _mm_subs_epi16(ch_mag[i],xmm0); + + // lambda_1=y_R, lambda_2=|y_R|-|h|^2, lamda_3=y_I, lambda_4=|y_I|-|h|^2 + llr128[0] = _mm_unpacklo_epi32(rxF[i],xmm0); + llr128[1] = _mm_unpackhi_epi32(rxF[i],xmm0); + llr32[0] = _mm_extract_epi32(llr128[0],0); //((uint32_t *)&llr128[0])[0]; + llr32[1] = _mm_extract_epi32(llr128[0],1); //((uint32_t *)&llr128[0])[1]; + llr32[2] = _mm_extract_epi32(llr128[0],2); //((uint32_t *)&llr128[0])[2]; + llr32[3] = _mm_extract_epi32(llr128[0],3); //((uint32_t *)&llr128[0])[3]; + llr32[4] = _mm_extract_epi32(llr128[1],0); //((uint32_t *)&llr128[1])[0]; + llr32[5] = _mm_extract_epi32(llr128[1],1); //((uint32_t *)&llr128[1])[1]; + llr32[6] = _mm_extract_epi32(llr128[1],2); //((uint32_t *)&llr128[1])[2]; + llr32[7] = _mm_extract_epi32(llr128[1],3); //((uint32_t *)&llr128[1])[3]; + llr32+=8; +#elif defined(__arm__) + xmm0 = vabsq_s16(rxF[i]); + xmm0 = vqsubq_s16(ch_mag[i],xmm0); + // lambda_1=y_R, lambda_2=|y_R|-|h|^2, lamda_3=y_I, lambda_4=|y_I|-|h|^2 + + llr16[0] = vgetq_lane_s16(rxF[i],0); + llr16[1] = vgetq_lane_s16(rxF[i],1); + llr16[2] = vgetq_lane_s16(xmm0,0); + llr16[3] = vgetq_lane_s16(xmm0,1); + llr16[4] = vgetq_lane_s16(rxF[i],2); + llr16[5] = vgetq_lane_s16(rxF[i],3); + llr16[6] = vgetq_lane_s16(xmm0,2); + llr16[7] = vgetq_lane_s16(xmm0,3); + llr16[8] = vgetq_lane_s16(rxF[i],4); + llr16[9] = vgetq_lane_s16(rxF[i],5); + llr16[10] = vgetq_lane_s16(xmm0,4); + llr16[11] = vgetq_lane_s16(xmm0,5); + llr16[12] = vgetq_lane_s16(rxF[i],6); + llr16[13] = vgetq_lane_s16(rxF[i],6); + llr16[14] = vgetq_lane_s16(xmm0,7); + llr16[15] = vgetq_lane_s16(xmm0,7); + llr16+=16; +#endif + + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif +} + +void dlsch_16qam_llr_SIC (LTE_DL_FRAME_PARMS *frame_parms, + int32_t **rxdataF_comp, + int32_t **sic_buffer, //Q15 + int32_t **rho_i, + int16_t *dlsch_llr, + uint8_t num_pdcch_symbols, + int32_t **dl_ch_mag, + uint16_t nb_rb, + uint8_t subframe, + uint16_t mod_order_0, + uint32_t rb_alloc) +{ + int16_t rho_amp_x0[2*frame_parms->N_RB_DL*12]; + int16_t rho_rho_amp_x0[2*frame_parms->N_RB_DL*12]; + uint16_t amp_tmp; + uint32_t *llr32=(uint32_t*)dlsch_llr; + int i, len, nsymb; + uint8_t symbol, symbol_mod; + int len_acc=0; + uint16_t *sic_data; + uint16_t pbch_pss_sss_adjust; + unsigned char len_mod4=0; + __m128i llr128[2]; + __m128i *ch_mag; + nsymb = (frame_parms->Ncp==0) ? 14:12; + + for (symbol=num_pdcch_symbols; symbol<nsymb; symbol++) { + uint16_t *rxF = (uint16_t*)(&rxdataF_comp[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]); + int16_t *rho_1=(int16_t*)(&rho_i[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]); + ch_mag = (__m128i*)(&dl_ch_mag[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]); + sic_data = (uint16_t*)(&sic_buffer[0][((int16_t)len_acc)]); + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + pbch_pss_sss_adjust=adjust_G2(frame_parms,&rb_alloc,4,subframe,symbol); + + if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { + amp_tmp=0x1fff;//dlsch0->sqrt_rho_b; already taken into account + if (frame_parms->nb_antenna_ports_eNB!=1) + len = nb_rb*8 - (2*pbch_pss_sss_adjust/3); + else + len = nb_rb*10 - (5*pbch_pss_sss_adjust/6); + } else { + amp_tmp=0x1fff;;//dlsch0->sqrt_rho_a; already taken into account + len = nb_rb*12 - pbch_pss_sss_adjust; + } + + if (mod_order_0==6) + amp_tmp=amp_tmp<<1; // to compensate for >> 1 shift in modulation + + len_acc+=len; + + multadd_complex_vector_real_scalar((int16_t *)sic_data, + amp_tmp, + (int16_t *)rho_amp_x0, //this is in Q13 + 1, + len); + + mult_cpx_vector((int16_t *)rho_1, //Q15 + (int16_t *)rho_amp_x0, //Q13 + (int16_t*)rho_rho_amp_x0, + len, + 13); + + sub_cpx_vector16((int16_t *)rxF, + (int16_t *)rho_rho_amp_x0, + //(int16_t *)clean_x1, + (int16_t *)rxF, + len*2); + + len_mod4 = len&3; + len>>=2; // length in quad words (4 REs) + len+=(len_mod4==0 ? 0 : 1); + + for (i=0; i<len; i++) { + + + __m128i *x1 = (__m128i*)rxF;//clean_x1; +//printf("%p %p %p\n", clean_x1, &clean_x1, &clean_x1[0]); +//int *a = malloc(10*sizeof(int)); +//printf("%p %p\n", a, &a); +//exit(0); + xmm0 = _mm_abs_epi16(x1[i]); + xmm0 = _mm_subs_epi16(ch_mag[i],xmm0); + + // lambda_1=y_R, lambda_2=|y_R|-|h|^2, lamda_3=y_I, lambda_4=|y_I|-|h|^2 + llr128[0] = _mm_unpacklo_epi32(x1[i],xmm0); + llr128[1] = _mm_unpackhi_epi32(x1[i],xmm0); + llr32[0] = _mm_extract_epi32(llr128[0],0); //((uint32_t *)&llr128[0])[0]; + llr32[1] = _mm_extract_epi32(llr128[0],1); //((uint32_t *)&llr128[0])[1]; + llr32[2] = _mm_extract_epi32(llr128[0],2); //((uint32_t *)&llr128[0])[2]; + llr32[3] = _mm_extract_epi32(llr128[0],3); //((uint32_t *)&llr128[0])[3]; + llr32[4] = _mm_extract_epi32(llr128[1],0); //((uint32_t *)&llr128[1])[0]; + llr32[5] = _mm_extract_epi32(llr128[1],1); //((uint32_t *)&llr128[1])[1]; + llr32[6] = _mm_extract_epi32(llr128[1],2); //((uint32_t *)&llr128[1])[2]; + llr32[7] = _mm_extract_epi32(llr128[1],3); //((uint32_t *)&llr128[1])[3]; + llr32+=8; + + } + _mm_empty(); + _m_empty(); +} +} + +//---------------------------------------------------------------------------------------------- +// 64-QAM +//---------------------------------------------------------------------------------------------- + +void dlsch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms, + int32_t **rxdataF_comp, + int16_t *dlsch_llr, + int32_t **dl_ch_mag, + int32_t **dl_ch_magb, + uint8_t symbol, + uint8_t first_symbol_flag, + uint16_t nb_rb, + uint16_t pbch_pss_sss_adjust, + //int16_t **llr_save, + uint32_t llr_offset, + uint8_t beamforming_mode) +{ +#if defined(__x86_64__) || defined(__i386__) + __m128i *rxF = (__m128i*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)]; + __m128i *ch_mag,*ch_magb; +#elif defined(__arm__) + int16x8_t *rxF = (int16x8_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)]; + int16x8_t *ch_mag,*ch_magb,xmm1,xmm2; +#endif + int i,len,len2; + unsigned char symbol_mod,len_mod4; + short *llr; + int16_t *llr2; + int8_t *pllr_symbol; + + /* + if (first_symbol_flag==1) + llr = dlsch_llr; + else + llr = *llr_save; + */ + llr = dlsch_llr; + + pllr_symbol = (int8_t*)dlsch_llr; + pllr_symbol += llr_offset; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + +#if defined(__x86_64__) || defined(__i386__) + ch_mag = (__m128i*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)]; + ch_magb = (__m128i*)&dl_ch_magb[0][(symbol*frame_parms->N_RB_DL*12)]; +#elif defined(__arm__) + ch_mag = (int16x8_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)]; + ch_magb = (int16x8_t*)&dl_ch_magb[0][(symbol*frame_parms->N_RB_DL*12)]; +#endif + if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { + if (frame_parms->nb_antenna_ports_eNB!=1) + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + else + len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6); + } else if((beamforming_mode==7) && (frame_parms->Ncp==0) && (symbol==3 || symbol==6 || symbol==9 || symbol==12)){ + len = (nb_rb*9) - (3*pbch_pss_sss_adjust/4); + } else if((beamforming_mode==7) && (frame_parms->Ncp==1) && (symbol==4 || symbol==7 || symbol==10)){ + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + } else { + len = (nb_rb*12) - pbch_pss_sss_adjust; + } + +// printf("dlsch_64qam_llr: symbol %d,nb_rb %d, len %d,pbch_pss_sss_adjust %d\n",symbol,nb_rb,len,pbch_pss_sss_adjust); + +/* LOG_I(PHY,"dlsch_64qam_llr [symb %d / FirstSym %d / Length %d]: @LLR Buff %x \n", + symbol, + first_symbol_flag, + len, + dlsch_llr, + pllr_symbol);*/ + + llr2 = llr; + llr += (len*6); + + len_mod4 =len&3; + len2=len>>2; // length in quad words (4 REs) + len2+=((len_mod4==0)?0:1); + + for (i=0; i<len2; i++) { + +#if defined(__x86_64__) || defined(__i386__) + xmm1 = _mm_abs_epi16(rxF[i]); + xmm1 = _mm_subs_epi16(ch_mag[i],xmm1); + xmm2 = _mm_abs_epi16(xmm1); + xmm2 = _mm_subs_epi16(ch_magb[i],xmm2); +#elif defined(__arm__) + xmm1 = vabsq_s16(rxF[i]); + xmm1 = vsubq_s16(ch_mag[i],xmm1); + xmm2 = vabsq_s16(xmm1); + xmm2 = vsubq_s16(ch_magb[i],xmm2); +#endif + // loop over all LLRs in quad word (24 coded bits) + /* + for (j=0;j<8;j+=2) { + llr2[0] = ((short *)&rxF[i])[j]; + llr2[1] = ((short *)&rxF[i])[j+1]; + llr2[2] = ((short *)&xmm1)[j]; + llr2[3] = ((short *)&xmm1)[j+1]; + llr2[4] = ((short *)&xmm2)[j]; + llr2[5] = ((short *)&xmm2)[j+1]; + + llr2+=6; + } + */ + llr2[0] = ((short *)&rxF[i])[0]; + llr2[1] = ((short *)&rxF[i])[1]; +#if defined(__x86_64__) || defined(__i386__) + llr2[2] = _mm_extract_epi16(xmm1,0); + llr2[3] = _mm_extract_epi16(xmm1,1);//((short *)&xmm1)[j+1]; + llr2[4] = _mm_extract_epi16(xmm2,0);//((short *)&xmm2)[j]; + llr2[5] = _mm_extract_epi16(xmm2,1);//((short *)&xmm2)[j+1]; +#elif defined(__arm__) + llr2[2] = vgetq_lane_s16(xmm1,0); + llr2[3] = vgetq_lane_s16(xmm1,1);//((short *)&xmm1)[j+1]; + llr2[4] = vgetq_lane_s16(xmm2,0);//((short *)&xmm2)[j]; + llr2[5] = vgetq_lane_s16(xmm2,1);//((short *)&xmm2)[j+1]; +#endif + + llr2+=6; + llr2[0] = ((short *)&rxF[i])[2]; + llr2[1] = ((short *)&rxF[i])[3]; +#if defined(__x86_64__) || defined(__i386__) + llr2[2] = _mm_extract_epi16(xmm1,2); + llr2[3] = _mm_extract_epi16(xmm1,3);//((short *)&xmm1)[j+1]; + llr2[4] = _mm_extract_epi16(xmm2,2);//((short *)&xmm2)[j]; + llr2[5] = _mm_extract_epi16(xmm2,3);//((short *)&xmm2)[j+1]; +#elif defined(__arm__) + llr2[2] = vgetq_lane_s16(xmm1,2); + llr2[3] = vgetq_lane_s16(xmm1,3);//((short *)&xmm1)[j+1]; + llr2[4] = vgetq_lane_s16(xmm2,2);//((short *)&xmm2)[j]; + llr2[5] = vgetq_lane_s16(xmm2,3);//((short *)&xmm2)[j+1]; +#endif + + llr2+=6; + llr2[0] = ((short *)&rxF[i])[4]; + llr2[1] = ((short *)&rxF[i])[5]; +#if defined(__x86_64__) || defined(__i386__) + llr2[2] = _mm_extract_epi16(xmm1,4); + llr2[3] = _mm_extract_epi16(xmm1,5);//((short *)&xmm1)[j+1]; + llr2[4] = _mm_extract_epi16(xmm2,4);//((short *)&xmm2)[j]; + llr2[5] = _mm_extract_epi16(xmm2,5);//((short *)&xmm2)[j+1]; +#elif defined(__arm__) + llr2[2] = vgetq_lane_s16(xmm1,4); + llr2[3] = vgetq_lane_s16(xmm1,5);//((short *)&xmm1)[j+1]; + llr2[4] = vgetq_lane_s16(xmm2,4);//((short *)&xmm2)[j]; + llr2[5] = vgetq_lane_s16(xmm2,5);//((short *)&xmm2)[j+1]; +#endif + llr2+=6; + llr2[0] = ((short *)&rxF[i])[6]; + llr2[1] = ((short *)&rxF[i])[7]; +#if defined(__x86_64__) || defined(__i386__) + llr2[2] = _mm_extract_epi16(xmm1,6); + llr2[3] = _mm_extract_epi16(xmm1,7);//((short *)&xmm1)[j+1]; + llr2[4] = _mm_extract_epi16(xmm2,6);//((short *)&xmm2)[j]; + llr2[5] = _mm_extract_epi16(xmm2,7);//((short *)&xmm2)[j+1]; +#elif defined(__arm__) + llr2[2] = vgetq_lane_s16(xmm1,6); + llr2[3] = vgetq_lane_s16(xmm1,7);//((short *)&xmm1)[j+1]; + llr2[4] = vgetq_lane_s16(xmm2,6);//((short *)&xmm2)[j]; + llr2[5] = vgetq_lane_s16(xmm2,7);//((short *)&xmm2)[j+1]; +#endif + llr2+=6; + + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif +} + +//#if 0 +void dlsch_64qam_llr_SIC(LTE_DL_FRAME_PARMS *frame_parms, + int32_t **rxdataF_comp, + int32_t **sic_buffer, //Q15 + int32_t **rho_i, + int16_t *dlsch_llr, + uint8_t num_pdcch_symbols, + int32_t **dl_ch_mag, + int32_t **dl_ch_magb, + uint16_t nb_rb, + uint8_t subframe, + uint16_t mod_order_0, + uint32_t rb_alloc) +{ + int16_t rho_amp_x0[2*frame_parms->N_RB_DL*12]; + int16_t rho_rho_amp_x0[2*frame_parms->N_RB_DL*12]; + uint16_t amp_tmp; + uint16_t *llr32=(uint16_t*)dlsch_llr; + int i, len, nsymb, len2; + uint8_t symbol, symbol_mod; + int len_acc=0; + uint16_t *sic_data; + uint16_t pbch_pss_sss_adjust; + unsigned char len_mod4=0; + uint16_t *llr2; + __m128i *ch_mag,*ch_magb; + + nsymb = (frame_parms->Ncp==0) ? 14:12; + + for (symbol=num_pdcch_symbols; symbol<nsymb; symbol++) { + uint16_t *rxF = (uint16_t*)(&rxdataF_comp[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]); + int16_t *rho_1=(int16_t*)(&rho_i[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]); + ch_mag = (__m128i*)(&dl_ch_mag[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]); + ch_magb = (__m128i*)(&dl_ch_magb[0][((int16_t)symbol*frame_parms->N_RB_DL*12)]); + sic_data = (uint16_t*)(&sic_buffer[0][((int16_t)len_acc)]); + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + pbch_pss_sss_adjust=adjust_G2(frame_parms,&rb_alloc,6,subframe,symbol); + + if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { + amp_tmp = 0x1fff;//dlsch0->sqrt_rho_b; already taken into account + if (frame_parms->nb_antenna_ports_eNB!=1) + len = nb_rb*8 - (2*pbch_pss_sss_adjust/3); + else + len = nb_rb*10 - (5*pbch_pss_sss_adjust/6); + } else { + amp_tmp = 0x1fff; //dlsch0->sqrt_rho_a; already taken into account + len = nb_rb*12 - pbch_pss_sss_adjust; + } + + if (mod_order_0==6) + amp_tmp=amp_tmp<<1; // to compensate for >> 1 shift in modulation + + len_acc+=len; + + multadd_complex_vector_real_scalar((int16_t *)sic_data, + amp_tmp, + (int16_t *)rho_amp_x0, //this is in Q13 + 1, + len); + + mult_cpx_vector((int16_t *)rho_1, //Q15 + (int16_t *)rho_amp_x0, //Q13 + (int16_t*)rho_rho_amp_x0, + len, + 13); + + sub_cpx_vector16((int16_t *)rxF, + (int16_t *)rho_rho_amp_x0, + //(int16_t *)clean_x1, + (int16_t *)rxF, + len*2); + + llr2 = llr32; + llr32 += (len*6); + + len_mod4 =len&3; + len2=len>>2; // length in quad words (4 REs) + len2+=(len_mod4?0:1); + + + + for (i=0; i<len2; i++) { + + __m128i *x1 = (__m128i*)rxF; + xmm1 = _mm_abs_epi16(x1[i]); + xmm1 = _mm_subs_epi16(ch_mag[i],xmm1); + xmm2 = _mm_abs_epi16(xmm1); + xmm2 = _mm_subs_epi16(ch_magb[i],xmm2); + + // loop over all LLRs in quad word (24 coded bits) + /* + for (j=0;j<8;j+=2) { + llr2[0] = ((short *)&rxF[i])[j]; + llr2[1] = ((short *)&rxF[i])[j+1]; + llr2[2] = ((short *)&xmm1)[j]; + llr2[3] = ((short *)&xmm1)[j+1]; + llr2[4] = ((short *)&xmm2)[j]; + llr2[5] = ((short *)&xmm2)[j+1]; + + llr2+=6; + } + */ + llr2[0] = ((short *)&x1[i])[0]; + llr2[1] = ((short *)&x1[i])[1]; + llr2[2] = _mm_extract_epi16(xmm1,0); + llr2[3] = _mm_extract_epi16(xmm1,1);//((short *)&xmm1)[j+1]; + llr2[4] = _mm_extract_epi16(xmm2,0);//((short *)&xmm2)[j]; + llr2[5] = _mm_extract_epi16(xmm2,1);//((short *)&xmm2)[j+1]; + + + llr2+=6; + llr2[0] = ((short *)&x1[i])[2]; + llr2[1] = ((short *)&x1[i])[3]; + + llr2[2] = _mm_extract_epi16(xmm1,2); + llr2[3] = _mm_extract_epi16(xmm1,3);//((short *)&xmm1)[j+1]; + llr2[4] = _mm_extract_epi16(xmm2,2);//((short *)&xmm2)[j]; + llr2[5] = _mm_extract_epi16(xmm2,3);//((short *)&xmm2)[j+1]; + + llr2+=6; + llr2[0] = ((short *)&x1[i])[4]; + llr2[1] = ((short *)&x1[i])[5]; + + llr2[2] = _mm_extract_epi16(xmm1,4); + llr2[3] = _mm_extract_epi16(xmm1,5);//((short *)&xmm1)[j+1]; + llr2[4] = _mm_extract_epi16(xmm2,4);//((short *)&xmm2)[j]; + llr2[5] = _mm_extract_epi16(xmm2,5);//((short *)&xmm2)[j+1]; + + llr2+=6; + llr2[0] = ((short *)&x1[i])[6]; + llr2[1] = ((short *)&x1[i])[7]; + + llr2[2] = _mm_extract_epi16(xmm1,6); + llr2[3] = _mm_extract_epi16(xmm1,7);//((short *)&xmm1)[j+1]; + llr2[4] = _mm_extract_epi16(xmm2,6);//((short *)&xmm2)[j]; + llr2[5] = _mm_extract_epi16(xmm2,7);//((short *)&xmm2)[j+1]; + + llr2+=6; + + } + + // *llr_save = llr; + + _mm_empty(); + _m_empty(); + + } +} +//#endif +//============================================================================================== +// DUAL-STREAM +//============================================================================================== + +//---------------------------------------------------------------------------------------------- +// QPSK +//---------------------------------------------------------------------------------------------- + +#if defined(__x86_64__) || defined(__i386) +__m128i y0r_over2 __attribute__ ((aligned(16))); +__m128i y0i_over2 __attribute__ ((aligned(16))); +__m128i y1r_over2 __attribute__ ((aligned(16))); +__m128i y1i_over2 __attribute__ ((aligned(16))); + +__m128i A __attribute__ ((aligned(16))); +__m128i B __attribute__ ((aligned(16))); +__m128i C __attribute__ ((aligned(16))); +__m128i D __attribute__ ((aligned(16))); +__m128i E __attribute__ ((aligned(16))); +__m128i F __attribute__ ((aligned(16))); +__m128i G __attribute__ ((aligned(16))); +__m128i H __attribute__ ((aligned(16))); + +#endif + +int dlsch_qpsk_qpsk_llr(LTE_DL_FRAME_PARMS *frame_parms, + int **rxdataF_comp, + int **rxdataF_comp_i, + int **rho_i, + short *dlsch_llr, + unsigned char symbol, + unsigned char first_symbol_flag, + unsigned short nb_rb, + uint16_t pbch_pss_sss_adjust, + short **llr16p) +{ + + int16_t *rxF=(int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rxF_i=(int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rho=(int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *llr16; + int len; + uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol; + + if (first_symbol_flag == 1) { + llr16 = (int16_t*)dlsch_llr; + } else { + llr16 = (int16_t*)(*llr16p); + } + + AssertFatal(llr16!=NULL,"dlsch_qpsk_qpsk_llr: llr is null, symbol %d\n",symbol); + + if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { + // if symbol has pilots + if (frame_parms->nb_antenna_ports_eNB!=1) + // in 2 antenna ports we have 8 REs per symbol per RB + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + else + // for 1 antenna port we have 10 REs per symbol per RB + len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6); + } else { + // symbol has no pilots + len = (nb_rb*12) - pbch_pss_sss_adjust; + } + + // printf("dlsch_qpsk_qpsk_llr: symbol %d,nb_rb %d, len %d,pbch_pss_sss_adjust %d\n",symbol,nb_rb,len,pbch_pss_sss_adjust); + // printf("qpsk_qpsk: len %d, llr16 %p\n",len,llr16); + qpsk_qpsk((short *)rxF, + (short *)rxF_i, + (short *)llr16, + (short *)rho, + len); + + llr16 += (len<<1); + *llr16p = (short *)llr16; + + return(0); +} + +//__m128i ONE_OVER_SQRT_8 __attribute__((aligned(16))); + +void qpsk_qpsk(short *stream0_in, + short *stream1_in, + short *stream0_out, + short *rho01, + int length + ) +{ + + /* + This function computes the LLRs of stream 0 (s_0) in presence of the interfering stream 1 (s_1) assuming that both symbols are QPSK. It can be used for both MU-MIMO interference-aware receiver or for SU-MIMO receivers. + + Parameters: + stream0_in = Matched filter output y0' = (h0*g0)*y0 + stream1_in = Matched filter output y1' = (h0*g1)*y0 + stream0_out = LLRs + rho01 = Correlation between the two effective channels \rho_{10} = (h1*g1)*(h0*g0) + length = number of resource elements + */ + +#if defined(__x86_64__) || defined(__i386__) + __m128i *rho01_128i = (__m128i *)rho01; + __m128i *stream0_128i_in = (__m128i *)stream0_in; + __m128i *stream1_128i_in = (__m128i *)stream1_in; + __m128i *stream0_128i_out = (__m128i *)stream0_out; + __m128i ONE_OVER_SQRT_8 = _mm_set1_epi16(23170); //round(2^16/sqrt(8)) +#elif defined(__arm__) + int16x8_t *rho01_128i = (int16x8_t *)rho01; + int16x8_t *stream0_128i_in = (int16x8_t *)stream0_in; + int16x8_t *stream1_128i_in = (int16x8_t *)stream1_in; + int16x8_t *stream0_128i_out = (int16x8_t *)stream0_out; + int16x8_t ONE_OVER_SQRT_8 = vdupq_n_s16(23170); //round(2^16/sqrt(8)) +#endif + + int i; + + + for (i=0; i<length>>2; i+=2) { + // in each iteration, we take 8 complex samples +#if defined(__x86_64__) || defined(__i386__) + xmm0 = rho01_128i[i]; // 4 symbols + xmm1 = rho01_128i[i+1]; + + // put (rho_r + rho_i)/2sqrt2 in rho_rpi + // put (rho_r - rho_i)/2sqrt2 in rho_rmi + + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) + xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) + rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) + rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + + // divide by sqrt(8), no shift needed ONE_OVER_SQRT_8 = Q1.16 + rho_rpi = _mm_mulhi_epi16(rho_rpi,ONE_OVER_SQRT_8); + rho_rmi = _mm_mulhi_epi16(rho_rmi,ONE_OVER_SQRT_8); +#elif defined(__arm__) + + +#endif + // Compute LLR for first bit of stream 0 + + // Compute real and imaginary parts of MF output for stream 0 +#if defined(__x86_64__) || defined(__i386__) + xmm0 = stream0_128i_in[i]; + xmm1 = stream0_128i_in[i+1]; + + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + y0i = _mm_unpackhi_epi64(xmm0,xmm1); + + y0r_over2 = _mm_srai_epi16(y0r,1); // divide by 2 + y0i_over2 = _mm_srai_epi16(y0i,1); // divide by 2 +#elif defined(__arm__) + + +#endif + // Compute real and imaginary parts of MF output for stream 1 +#if defined(__x86_64__) || defined(__i386__) + xmm0 = stream1_128i_in[i]; + xmm1 = stream1_128i_in[i+1]; + + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] + y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] + + y1r_over2 = _mm_srai_epi16(y1r,1); // divide by 2 + y1i_over2 = _mm_srai_epi16(y1i,1); // divide by 2 + + // Compute the terms for the LLR of first bit + + xmm0 = _mm_setzero_si128(); // ZERO + + // 1 term for numerator of LLR + xmm3 = _mm_subs_epi16(y1r_over2,rho_rpi); + A = _mm_abs_epi16(xmm3); // A = |y1r/2 - rho/sqrt(8)| + xmm2 = _mm_adds_epi16(A,y0i_over2); // = |y1r/2 - rho/sqrt(8)| + y0i/2 + xmm3 = _mm_subs_epi16(y1i_over2,rho_rmi); + B = _mm_abs_epi16(xmm3); // B = |y1i/2 - rho*/sqrt(8)| + logmax_num_re0 = _mm_adds_epi16(B,xmm2); // = |y1r/2 - rho/sqrt(8)|+|y1i/2 - rho*/sqrt(8)| + y0i/2 + + // 2 term for numerator of LLR + xmm3 = _mm_subs_epi16(y1r_over2,rho_rmi); + C = _mm_abs_epi16(xmm3); // C = |y1r/2 - rho*/4| + xmm2 = _mm_subs_epi16(C,y0i_over2); // = |y1r/2 - rho*/4| - y0i/2 + xmm3 = _mm_adds_epi16(y1i_over2,rho_rpi); + D = _mm_abs_epi16(xmm3); // D = |y1i/2 + rho/4| + xmm2 = _mm_adds_epi16(xmm2,D); // |y1r/2 - rho*/4| + |y1i/2 + rho/4| - y0i/2 + logmax_num_re0 = _mm_max_epi16(logmax_num_re0,xmm2); // max, numerator done + + // 1 term for denominator of LLR + xmm3 = _mm_adds_epi16(y1r_over2,rho_rmi); + E = _mm_abs_epi16(xmm3); // E = |y1r/2 + rho*/4| + xmm2 = _mm_adds_epi16(E,y0i_over2); // = |y1r/2 + rho*/4| + y0i/2 + xmm3 = _mm_subs_epi16(y1i_over2,rho_rpi); + F = _mm_abs_epi16(xmm3); // F = |y1i/2 - rho/4| + logmax_den_re0 = _mm_adds_epi16(F,xmm2); // = |y1r/2 + rho*/4| + |y1i/2 - rho/4| + y0i/2 + + // 2 term for denominator of LLR + xmm3 = _mm_adds_epi16(y1r_over2,rho_rpi); + G = _mm_abs_epi16(xmm3); // G = |y1r/2 + rho/4| + xmm2 = _mm_subs_epi16(G,y0i_over2); // = |y1r/2 + rho/4| - y0i/2 + xmm3 = _mm_adds_epi16(y1i_over2,rho_rmi); + H = _mm_abs_epi16(xmm3); // H = |y1i/2 + rho*/4| + xmm2 = _mm_adds_epi16(xmm2,H); // = |y1r/2 + rho/4| + |y1i/2 + rho*/4| - y0i/2 + logmax_den_re0 = _mm_max_epi16(logmax_den_re0,xmm2); // max, denominator done + + // Compute the terms for the LLR of first bit + + // 1 term for nominator of LLR + xmm2 = _mm_adds_epi16(A,y0r_over2); + logmax_num_im0 = _mm_adds_epi16(B,xmm2); // = |y1r/2 - rho/4| + |y1i/2 - rho*/4| + y0r/2 + + // 2 term for nominator of LLR + xmm2 = _mm_subs_epi16(E,y0r_over2); + xmm2 = _mm_adds_epi16(xmm2,F); // = |y1r/2 + rho*/4| + |y1i/2 - rho/4| - y0r/2 + + logmax_num_im0 = _mm_max_epi16(logmax_num_im0,xmm2); // max, nominator done + + // 1 term for denominator of LLR + xmm2 = _mm_adds_epi16(C,y0r_over2); + logmax_den_im0 = _mm_adds_epi16(D,xmm2); // = |y1r/2 - rho*/4| + |y1i/2 + rho/4| - y0r/2 + + xmm2 = _mm_subs_epi16(G,y0r_over2); + xmm2 = _mm_adds_epi16(xmm2,H); // = |y1r/2 + rho/4| + |y1i/2 + rho*/4| - y0r/2 + + logmax_den_im0 = _mm_max_epi16(logmax_den_im0,xmm2); // max, denominator done + + // LLR of first bit [L1(1), L1(2), L1(3), L1(4)] + y0r = _mm_adds_epi16(y0r,logmax_num_re0); + y0r = _mm_subs_epi16(y0r,logmax_den_re0); + + // LLR of second bit [L2(1), L2(2), L2(3), L2(4)] + y0i = _mm_adds_epi16(y0i,logmax_num_im0); + y0i = _mm_subs_epi16(y0i,logmax_den_im0); + + _mm_storeu_si128(&stream0_128i_out[i],_mm_unpacklo_epi16(y0r,y0i)); // = [L1(1), L2(1), L1(2), L2(2)] + + if (i<((length>>1) - 1)) // false if only 2 REs remain + _mm_storeu_si128(&stream0_128i_out[i+1],_mm_unpackhi_epi16(y0r,y0i)); + +#elif defined(__x86_64__) + +#endif + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif +} + +int dlsch_qpsk_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms, + int32_t **rxdataF_comp, + int32_t **rxdataF_comp_i, + int32_t **dl_ch_mag_i, //|h_1|^2*(2/sqrt{10}) + int32_t **rho_i, + int16_t *dlsch_llr, + uint8_t symbol, + uint8_t first_symbol_flag, + uint16_t nb_rb, + uint16_t pbch_pss_sss_adjust, + int16_t **llr16p) +{ + + int16_t *rxF=(int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rxF_i=(int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *ch_mag_i = (int16_t*)&dl_ch_mag_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rho=(int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *llr16; + int len; + uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol; + + if (first_symbol_flag == 1) { + llr16 = (int16_t*)dlsch_llr; + } else { + llr16 = (int16_t*)(*llr16p); + } + + AssertFatal(llr16!=NULL,"dlsch_qpsk_qpsk_llr: llr is null, symbol %d\n",symbol); + + + if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { + // if symbol has pilots + if (frame_parms->nb_antenna_ports_eNB!=1) + // in 2 antenna ports we have 8 REs per symbol per RB + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + else + // for 1 antenna port we have 10 REs per symbol per RB + len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6); + } else { + // symbol has no pilots + len = (nb_rb*12) - pbch_pss_sss_adjust; + } + + qpsk_qam16((short *)rxF, + (short *)rxF_i, + (short *)ch_mag_i, + (short *)llr16, + (short *)rho, + len); + + llr16 += (len<<1); + *llr16p = (short *)llr16; + + return(0); +} + +/* +#if defined(__x86_64__) || defined(__i386__) +__m128i ONE_OVER_SQRT_2 __attribute__((aligned(16))); +__m128i ONE_OVER_SQRT_10 __attribute__((aligned(16))); +__m128i THREE_OVER_SQRT_10 __attribute__((aligned(16))); +__m128i ONE_OVER_SQRT_10_Q15 __attribute__((aligned(16))); +__m128i SQRT_10_OVER_FOUR __attribute__((aligned(16))); +__m128i ch_mag_int; +#endif +*/ +void qpsk_qam16(int16_t *stream0_in, + int16_t *stream1_in, + int16_t *ch_mag_i, + int16_t *stream0_out, + int16_t *rho01, + int32_t length + ) +{ + /* + This function computes the LLRs of stream 0 (s_0) in presence of the interfering stream 1 (s_1) assuming that both symbols are QPSK. It can be used for both MU-MIMO interference-aware receiver or for SU-MIMO receivers. + + Parameters: + stream0_in = Matched filter output y0' = (h0*g0)*y0 + stream1_in = Matched filter output y1' = (h0*g1)*y0 + stream0_out = LLRs + rho01 = Correlation between the two effective channels \rho_{10} = (h1*g1)*(h0*g0) + length = number of resource elements + */ + +#if defined(__x86_64__) || defined(__i386__) + __m128i *rho01_128i = (__m128i *)rho01; + __m128i *stream0_128i_in = (__m128i *)stream0_in; + __m128i *stream1_128i_in = (__m128i *)stream1_in; + __m128i *stream0_128i_out = (__m128i *)stream0_out; + __m128i *ch_mag_128i_i = (__m128i *)ch_mag_i; + __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15) + __m128i ONE_OVER_SQRT_10_Q15 = _mm_set1_epi16(10362); // round(1/sqrt(10)*2^15) + __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15) + __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15) + __m128i ch_mag_int __attribute__((aligned(16))); +#elif defined(__arm__) + int16x8_t *rho01_128i = (int16x8_t *)rho01; + int16x8_t *stream0_128i_in = (int16x8_t *)stream0_in; + int16x8_t *stream1_128i_in = (int16x8_t *)stream1_in; + int16x8_t *stream0_128i_out = (int16x8_t *)stream0_out; + int16x8_t *ch_mag_128i_i = (int16x8_t *)ch_mag_i; + int16x8_t ONE_OVER_SQRT_2 = vdupq_n_s16(23170); // round(1/sqrt(2)*2^15) + int16x8_t ONE_OVER_SQRT_10_Q15 = vdupq_n_s16(10362); // round(1/sqrt(10)*2^15) + int16x8_t THREE_OVER_SQRT_10 = vdupq_n_s16(31086); // round(3/sqrt(10)*2^15) + int16x8_t SQRT_10_OVER_FOUR = vdupq_n_s16(25905); // round(sqrt(10)/4*2^15) + int16x8_t ch_mag_int __attribute__((aligned(16))); +#endif + +#ifdef DEBUG_LLR + print_shorts2("rho01_128i:\n",rho01_128i); +#endif + + int i; + + + for (i=0; i<length>>2; i+=2) { + // in each iteration, we take 8 complex samples + +#if defined(__x86_64__) || defined(__i386__) + + xmm0 = rho01_128i[i]; // 4 symbols + xmm1 = rho01_128i[i+1]; + + // put (rho_r + rho_i)/2sqrt2 in rho_rpi + // put (rho_r - rho_i)/2sqrt2 in rho_rmi + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) + xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) + rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) + rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + + // divide by sqrt(2) + rho_rpi = _mm_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_2); + rho_rmi = _mm_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_2); + rho_rpi = _mm_slli_epi16(rho_rpi,1); + rho_rmi = _mm_slli_epi16(rho_rmi,1); + + // Compute LLR for first bit of stream 0 + + // Compute real and imaginary parts of MF output for stream 0 + xmm0 = stream0_128i_in[i]; + xmm1 = stream0_128i_in[i+1]; + + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + y0i = _mm_unpackhi_epi64(xmm0,xmm1); + + // divide by sqrt(2) + y0r_over2 = _mm_mulhi_epi16(y0r, ONE_OVER_SQRT_2); + y0i_over2 = _mm_mulhi_epi16(y0i, ONE_OVER_SQRT_2); + y0r_over2 = _mm_slli_epi16(y0r,1); + y0i_over2 = _mm_slli_epi16(y0i,1); + + y0_p_1_1 = _mm_adds_epi16(y0r_over2, y0i_over2); + y0_m_1_1 = _mm_subs_epi16(y0r_over2, y0i_over2); + + // Compute real and imaginary parts of MF output for stream 1 + xmm0 = stream1_128i_in[i]; + xmm1 = stream1_128i_in[i+1]; + + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] + y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] + + xmm0 = _mm_setzero_si128(); // ZERO + + // compute psi + xmm3 = _mm_subs_epi16(y1r,rho_rpi); + psi_r_p1_p1 = _mm_abs_epi16(xmm3); + xmm3 = _mm_subs_epi16(y1i,rho_rmi); + psi_i_p1_p1 = _mm_abs_epi16(xmm3); + xmm3 = _mm_subs_epi16(y1r,rho_rmi); + psi_r_p1_m1 = _mm_abs_epi16(xmm3); + xmm3 = _mm_adds_epi16(y1i,rho_rpi); + psi_i_p1_m1 = _mm_abs_epi16(xmm3); + xmm3 = _mm_adds_epi16(y1r,rho_rmi); + psi_r_m1_p1 = _mm_abs_epi16(xmm3); + xmm3 = _mm_subs_epi16(y1i,rho_rpi); + psi_i_m1_p1 = _mm_abs_epi16(xmm3); + xmm3 = _mm_adds_epi16(y1r,rho_rpi); + psi_r_m1_m1 = _mm_abs_epi16(xmm3); + xmm3 = _mm_adds_epi16(y1i,rho_rmi); + psi_i_m1_m1 = _mm_abs_epi16(xmm3); + + // Rearrange interfering channel magnitudes + xmm2 = ch_mag_128i_i[i]; + xmm3 = ch_mag_128i_i[i+1]; + + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3); + + // calculate optimal interference amplitudes + interference_abs_epi16(psi_r_p1_p1 , ch_mag_int, a_r_p1_p1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p1 , ch_mag_int, a_i_p1_p1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m1 , ch_mag_int, a_r_p1_m1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m1 , ch_mag_int, a_i_p1_m1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p1 , ch_mag_int, a_r_m1_p1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p1 , ch_mag_int, a_i_m1_p1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m1 , ch_mag_int, a_r_m1_m1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m1 , ch_mag_int, a_i_m1_m1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + + // prodsum + prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1); + prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1); + prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1); + prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1); + + // squares + square_a_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p1); + square_a_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m1); + square_a_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p1); + square_a_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m1); + + // Computing Metrics + xmm0 = _mm_subs_epi16(psi_a_p1_p1, a_sq_p1_p1); + bit_met_p1_p1 = _mm_adds_epi16(xmm0, y0_p_1_1); + + xmm0 = _mm_subs_epi16(psi_a_p1_m1, a_sq_p1_m1); + bit_met_p1_m1 = _mm_adds_epi16(xmm0, y0_m_1_1); + + xmm0 = _mm_subs_epi16(psi_a_m1_p1, a_sq_m1_p1); + bit_met_m1_p1 = _mm_subs_epi16(xmm0, y0_m_1_1); + + xmm0 = _mm_subs_epi16(psi_a_m1_m1, a_sq_m1_m1); + bit_met_m1_m1 = _mm_subs_epi16(xmm0, y0_p_1_1); + + // MSB + logmax_num_re0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_m1); // bit=0 + logmax_den_re0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_m1); // bit=1 + + y0r = _mm_subs_epi16(logmax_num_re0,logmax_den_re0); + + // LSB + logmax_num_im0 = _mm_max_epi16(bit_met_p1_p1,bit_met_m1_p1); // bit=0 + logmax_den_im0 = _mm_max_epi16(bit_met_p1_m1,bit_met_m1_m1); // bit=1 + + y0i = _mm_subs_epi16(logmax_num_im0,logmax_den_im0); + + stream0_128i_out[i] = _mm_unpacklo_epi16(y0r,y0i); // = [L1(1), L2(1), L1(2), L2(2)] + + if (i<((length>>1) - 1)) // false if only 2 REs remain + stream0_128i_out[i+1] = _mm_unpackhi_epi16(y0r,y0i); + +#elif defined(__arm__) + +#endif + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif +} + +int dlsch_qpsk_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms, + int32_t **rxdataF_comp, + int32_t **rxdataF_comp_i, + int32_t **dl_ch_mag_i, //|h_1|^2*(2/sqrt{10}) + int32_t **rho_i, + int16_t *dlsch_llr, + uint8_t symbol, + uint8_t first_symbol_flag, + uint16_t nb_rb, + uint16_t pbch_pss_sss_adjust, + int16_t **llr16p) +{ + + int16_t *rxF=(int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rxF_i=(int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *ch_mag_i = (int16_t*)&dl_ch_mag_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rho=(int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *llr16; + int len; + uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol; + + + if (first_symbol_flag == 1) { + llr16 = (int16_t*)dlsch_llr; + } else { + llr16 = (int16_t*)(*llr16p); + } + + AssertFatal(llr16!=NULL,"dlsch_qpsk_qam64_llr: llr is null, symbol %d\n",symbol); + + if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { + // if symbol has pilots + if (frame_parms->nb_antenna_ports_eNB!=1) + // in 2 antenna ports we have 8 REs per symbol per RB + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + else + // for 1 antenna port we have 10 REs per symbol per RB + len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6); + } else { + // symbol has no pilots + len = (nb_rb*12) - pbch_pss_sss_adjust; + } + + qpsk_qam64((short *)rxF, + (short *)rxF_i, + (short *)ch_mag_i, + (short *)llr16, + (short *)rho, + len); + + llr16 += (len<<1); + *llr16p = (short *)llr16; + + return(0); +} +/* +__m128i ONE_OVER_SQRT_2_42 __attribute__((aligned(16))); +__m128i THREE_OVER_SQRT_2_42 __attribute__((aligned(16))); +__m128i FIVE_OVER_SQRT_2_42 __attribute__((aligned(16))); +__m128i SEVEN_OVER_SQRT_2_42 __attribute__((aligned(16))); + +__m128i ch_mag_int_with_sigma2 __attribute__((aligned(16))); +__m128i two_ch_mag_int_with_sigma2 __attribute__((aligned(16))); +__m128i three_ch_mag_int_with_sigma2 __attribute__((aligned(16))); +__m128i SQRT_42_OVER_FOUR __attribute__((aligned(16))); +*/ +void qpsk_qam64(short *stream0_in, + short *stream1_in, + short *ch_mag_i, + short *stream0_out, + short *rho01, + int length + ) +{ + + /* + This function computes the LLRs of stream 0 (s_0) in presence of the interfering stream 1 (s_1) assuming that both symbols are QPSK. It can be used for both MU-MIMO interference-aware receiver or for SU-MIMO receivers. + + Parameters: + stream0_in = Matched filter output y0' = (h0*g0)*y0 + stream1_in = Matched filter output y1' = (h0*g1)*y0 + stream0_out = LLRs + rho01 = Correlation between the two effective channels \rho_{10} = (h1*g1)*(h0*g0) + length = number of resource elements + */ + +#if defined(__x86_64__) || defined(__i386__) + __m128i *rho01_128i = (__m128i *)rho01; + __m128i *stream0_128i_in = (__m128i *)stream0_in; + __m128i *stream1_128i_in = (__m128i *)stream1_in; + __m128i *stream0_128i_out = (__m128i *)stream0_out; + __m128i *ch_mag_128i_i = (__m128i *)ch_mag_i; + __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15) + __m128i ONE_OVER_SQRT_2_42 = _mm_set1_epi16(3575); // round(1/sqrt(2*42)*2^15) + __m128i THREE_OVER_SQRT_2_42 = _mm_set1_epi16(10726); // round(3/sqrt(2*42)*2^15) + __m128i FIVE_OVER_SQRT_2_42 = _mm_set1_epi16(17876); // round(5/sqrt(2*42)*2^15) + __m128i SEVEN_OVER_SQRT_2_42 = _mm_set1_epi16(25027); // round(7/sqrt(2*42)*2^15) + __m128i SQRT_42_OVER_FOUR = _mm_set1_epi16(13272); // round(sqrt(42)/4*2^13), Q3.1 + __m128i ch_mag_int; + __m128i ch_mag_int_with_sigma2; + __m128i two_ch_mag_int_with_sigma2; + __m128i three_ch_mag_int_with_sigma2; +#elif defined(__arm__) + +#endif + +#ifdef DEBUG_LLR + print_shorts2("rho01_128i:\n",rho01_128i); +#endif + + int i; + + + for (i=0; i<length>>2; i+=2) { + // in each iteration, we take 8 complex samples + +#if defined(__x86_64__) || defined(__i386__) + + xmm0 = rho01_128i[i]; // 4 symbols + xmm1 = rho01_128i[i+1]; + + // put (rho_r + rho_i)/sqrt2 in rho_rpi + // put (rho_r - rho_i)/sqrt2 in rho_rmi + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) + xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) + rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) + rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + + // divide by sqrt(2) + rho_rpi = _mm_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_2); + rho_rmi = _mm_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_2); + rho_rpi = _mm_slli_epi16(rho_rpi,1); + rho_rmi = _mm_slli_epi16(rho_rmi,1); + + // Compute LLR for first bit of stream 0 + + // Compute real and imaginary parts of MF output for stream 0 + xmm0 = stream0_128i_in[i]; + xmm1 = stream0_128i_in[i+1]; + + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + y0i = _mm_unpackhi_epi64(xmm0,xmm1); + + // divide by sqrt(2) + y0r_over2 = _mm_mulhi_epi16(y0r, ONE_OVER_SQRT_2); + y0i_over2 = _mm_mulhi_epi16(y0i, ONE_OVER_SQRT_2); + y0r_over2 = _mm_slli_epi16(y0r,1); + y0i_over2 = _mm_slli_epi16(y0i,1); + + y0_p_1_1 = _mm_adds_epi16(y0r_over2, y0i_over2); + y0_m_1_1 = _mm_subs_epi16(y0r_over2, y0i_over2); + + // Compute real and imaginary parts of MF output for stream 1 + xmm0 = stream1_128i_in[i]; + xmm1 = stream1_128i_in[i+1]; + + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] + y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] + + xmm0 = _mm_setzero_si128(); // ZERO + + // compute psi + xmm3 = _mm_subs_epi16(y1r,rho_rpi); + psi_r_p1_p1 = _mm_abs_epi16(xmm3); + xmm3 = _mm_subs_epi16(y1i,rho_rmi); + psi_i_p1_p1 = _mm_abs_epi16(xmm3); + xmm3 = _mm_subs_epi16(y1r,rho_rmi); + psi_r_p1_m1 = _mm_abs_epi16(xmm3); + xmm3 = _mm_adds_epi16(y1i,rho_rpi); + psi_i_p1_m1 = _mm_abs_epi16(xmm3); + xmm3 = _mm_adds_epi16(y1r,rho_rmi); + psi_r_m1_p1 = _mm_abs_epi16(xmm3); + xmm3 = _mm_subs_epi16(y1i,rho_rpi); + psi_i_m1_p1 = _mm_abs_epi16(xmm3); + xmm3 = _mm_adds_epi16(y1r,rho_rpi); + psi_r_m1_m1 = _mm_abs_epi16(xmm3); + xmm3 = _mm_adds_epi16(y1i,rho_rmi); + psi_i_m1_m1 = _mm_abs_epi16(xmm3); + + // Rearrange interfering channel magnitudes + xmm2 = ch_mag_128i_i[i]; + xmm3 = ch_mag_128i_i[i+1]; + + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3); + ch_mag_int_with_sigma2 = _mm_srai_epi16(ch_mag_int, 1); // *2 + two_ch_mag_int_with_sigma2 = ch_mag_int; // *4 + three_ch_mag_int_with_sigma2 = _mm_adds_epi16(ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2); // *6 + + interference_abs_64qam_epi16(psi_r_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + + // prodsum + prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1); + prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1); + prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1); + prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1); + + // Multiply by sqrt(2) + psi_a_p1_p1 = _mm_mulhi_epi16(psi_a_p1_p1, ONE_OVER_SQRT_2); + psi_a_p1_p1 = _mm_slli_epi16(psi_a_p1_p1, 2); + psi_a_p1_m1 = _mm_mulhi_epi16(psi_a_p1_m1, ONE_OVER_SQRT_2); + psi_a_p1_m1 = _mm_slli_epi16(psi_a_p1_m1, 2); + psi_a_m1_p1 = _mm_mulhi_epi16(psi_a_m1_p1, ONE_OVER_SQRT_2); + psi_a_m1_p1 = _mm_slli_epi16(psi_a_m1_p1, 2); + psi_a_m1_m1 = _mm_mulhi_epi16(psi_a_m1_m1, ONE_OVER_SQRT_2); + psi_a_m1_m1 = _mm_slli_epi16(psi_a_m1_m1, 2); + + square_a_64qam_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p1); + square_a_64qam_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m1); + square_a_64qam_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p1); + square_a_64qam_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m1); + + // Computing Metrics + xmm0 = _mm_subs_epi16(psi_a_p1_p1, a_sq_p1_p1); + bit_met_p1_p1 = _mm_adds_epi16(xmm0, y0_p_1_1); + + xmm0 = _mm_subs_epi16(psi_a_p1_m1, a_sq_p1_m1); + bit_met_p1_m1 = _mm_adds_epi16(xmm0, y0_m_1_1); + + xmm0 = _mm_subs_epi16(psi_a_m1_p1, a_sq_m1_p1); + bit_met_m1_p1 = _mm_subs_epi16(xmm0, y0_m_1_1); + + xmm0 = _mm_subs_epi16(psi_a_m1_m1, a_sq_m1_m1); + bit_met_m1_m1 = _mm_subs_epi16(xmm0, y0_p_1_1); + + // MSB + logmax_num_re0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_m1); // bit=0 + logmax_den_re0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_m1); // bit=1 + + y0r = _mm_subs_epi16(logmax_num_re0,logmax_den_re0); + + // LSB + logmax_num_im0 = _mm_max_epi16(bit_met_p1_p1,bit_met_m1_p1); // bit=0 + logmax_den_im0 = _mm_max_epi16(bit_met_p1_m1,bit_met_m1_m1); // bit=1 + + y0i = _mm_subs_epi16(logmax_num_im0,logmax_den_im0); + + stream0_128i_out[i] = _mm_unpacklo_epi16(y0r,y0i); // = [L1(1), L2(1), L1(2), L2(2)] + + if (i<((length>>1) - 1)) // false if only 2 REs remain + stream0_128i_out[i+1] = _mm_unpackhi_epi16(y0r,y0i); + +#elif defined(__arm__) + +#endif + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif +} + + +//---------------------------------------------------------------------------------------------- +// 16-QAM +//---------------------------------------------------------------------------------------------- + +/* +__m128i ONE_OVER_TWO_SQRT_10 __attribute__((aligned(16))); +__m128i NINE_OVER_TWO_SQRT_10 __attribute__((aligned(16))); + +__m128i y0r_over_sqrt10 __attribute__ ((aligned(16))); +__m128i y0i_over_sqrt10 __attribute__ ((aligned(16))); +__m128i y0r_three_over_sqrt10 __attribute__ ((aligned(16))); +__m128i y0i_three_over_sqrt10 __attribute__ ((aligned(16))); + +__m128i ch_mag_des __attribute__((aligned(16))); +__m128i ch_mag_over_10 __attribute__ ((aligned(16))); +__m128i ch_mag_over_2 __attribute__ ((aligned(16))); +__m128i ch_mag_9_over_10 __attribute__ ((aligned(16))); +*/ + +void qam16_qpsk(short *stream0_in, + short *stream1_in, + short *ch_mag, + short *stream0_out, + short *rho01, + int length + ) +{ + + /* + Author: Sebastian Wagner + Date: 2012-06-04 + + Input: + stream0_in: MF filter for 1st stream, i.e., y0=h0'*y + stream!_in: MF filter for 2nd stream, i.e., y1=h1'*y + ch_mag: 2*h0/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + ch_mag_i: 2*h1/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + rho01: Channel cross correlation, i.e., h1'*h0 + + Output: + stream0_out: output LLRs for 1st stream + */ + +#if defined(__x86_64__) || defined(__i386__) + __m128i *rho01_128i = (__m128i *)rho01; + __m128i *stream0_128i_in = (__m128i *)stream0_in; + __m128i *stream1_128i_in = (__m128i *)stream1_in; + __m128i *stream0_128i_out = (__m128i *)stream0_out; + __m128i *ch_mag_128i = (__m128i *)ch_mag; + __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15) + __m128i ONE_OVER_SQRT_10 = _mm_set1_epi16(20724); // round(1/sqrt(10)*2^16) + __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15) + __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15) + __m128i ONE_OVER_TWO_SQRT_10 = _mm_set1_epi16(10362); // round(1/2/sqrt(10)*2^16) + __m128i NINE_OVER_TWO_SQRT_10 = _mm_set1_epi16(23315); // round(9/2/sqrt(10)*2^14) + __m128i y0r_over_sqrt10; + __m128i y0i_over_sqrt10; + __m128i y0r_three_over_sqrt10; + __m128i y0i_three_over_sqrt10; + + __m128i ch_mag_des; + __m128i ch_mag_over_10; + __m128i ch_mag_over_2; + __m128i ch_mag_9_over_10; +#elif defined(__arm__) + +#endif + + int i; + + + for (i=0; i<length>>2; i+=2) { + // In one iteration, we deal with 8 REs + +#if defined(__x86_64__) || defined(__i386__) + // Get rho + xmm0 = rho01_128i[i]; + xmm1 = rho01_128i[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) + xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) + rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) + rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + + // Compute the different rhos + rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi,ONE_OVER_SQRT_10); + rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi,ONE_OVER_SQRT_10); + rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi,THREE_OVER_SQRT_10); + rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi,THREE_OVER_SQRT_10); + rho_rpi_3_3 = _mm_slli_epi16(rho_rpi_3_3,1); + rho_rmi_3_3 = _mm_slli_epi16(rho_rmi_3_3,1); + + xmm4 = _mm_mulhi_epi16(xmm2,ONE_OVER_SQRT_10); // Re(rho) + xmm5 = _mm_mulhi_epi16(xmm3,THREE_OVER_SQRT_10); // Im(rho) + xmm5 = _mm_slli_epi16(xmm5,1); + + rho_rpi_1_3 = _mm_adds_epi16(xmm4,xmm5); + rho_rmi_1_3 = _mm_subs_epi16(xmm4,xmm5); + + xmm6 = _mm_mulhi_epi16(xmm2,THREE_OVER_SQRT_10); // Re(rho) + xmm7 = _mm_mulhi_epi16(xmm3,ONE_OVER_SQRT_10); // Im(rho) + xmm6 = _mm_slli_epi16(xmm6,1); + + rho_rpi_3_1 = _mm_adds_epi16(xmm6,xmm7); + rho_rmi_3_1 = _mm_subs_epi16(xmm6,xmm7); + + // Rearrange interfering MF output + xmm0 = stream1_128i_in[i]; + xmm1 = stream1_128i_in[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] + y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] + + xmm0 = _mm_setzero_si128(); // ZERO + xmm2 = _mm_subs_epi16(rho_rpi_1_1,y1r); // = [Re(rho)+ Im(rho)]/sqrt(10) - y1r + psi_r_p1_p1 = _mm_abs_epi16(xmm2); // = |[Re(rho)+ Im(rho)]/sqrt(10) - y1r| + + xmm2= _mm_subs_epi16(rho_rmi_1_1,y1r); + psi_r_p1_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_1_1,y1i); + psi_i_p1_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_1_3,y1r); + psi_r_p1_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_1_3,y1r); + psi_r_p1_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_3_1,y1i); + psi_i_p1_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_3_1,y1r); + psi_r_p3_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_3_1,y1r); + psi_r_p3_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_1_3,y1i); + psi_i_p3_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_3_3,y1r); + psi_r_p3_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_3_3,y1r); + psi_r_p3_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_3_3,y1i); + psi_i_p3_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_1_1,y1i); + psi_i_m1_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_3_1,y1i); + psi_i_m1_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_1_3,y1i); + psi_i_m3_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_3_3,y1i); + psi_i_m3_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_1_1,y1i); + psi_i_p1_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_3_1,y1i); + psi_i_p1_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_1_3,y1i); + psi_i_p3_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_3_3,y1i); + psi_i_p3_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_1_1,y1r); + psi_r_m1_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_1_3,y1r); + psi_r_m1_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_3_1,y1r); + psi_r_m3_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_3_3,y1r); + psi_r_m3_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1r,rho_rmi_1_1); + psi_r_m1_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1r,rho_rmi_1_3); + psi_r_m1_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1i,rho_rmi_1_1); + psi_i_m1_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1i,rho_rmi_3_1); + psi_i_m1_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1r,rho_rmi_3_1); + psi_r_m3_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1r,rho_rmi_3_3); + psi_r_m3_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1i,rho_rmi_1_3); + psi_i_m3_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1i,rho_rmi_3_3); + psi_i_m3_m3 = _mm_abs_epi16(xmm2); + + // Rearrange desired MF output + xmm0 = stream0_128i_in[i]; + xmm1 = stream0_128i_in[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + y0i = _mm_unpackhi_epi64(xmm0,xmm1); + + // Rearrange desired channel magnitudes + xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10)) + xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10)) + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); // = [|h|^2(1),|h|^2(2),|h|^2(3),|h|^2(4)]*(2/sqrt(10)) + + // Scale MF output of desired signal + y0r_over_sqrt10 = _mm_mulhi_epi16(y0r,ONE_OVER_SQRT_10); + y0i_over_sqrt10 = _mm_mulhi_epi16(y0i,ONE_OVER_SQRT_10); + y0r_three_over_sqrt10 = _mm_mulhi_epi16(y0r,THREE_OVER_SQRT_10); + y0i_three_over_sqrt10 = _mm_mulhi_epi16(y0i,THREE_OVER_SQRT_10); + y0r_three_over_sqrt10 = _mm_slli_epi16(y0r_three_over_sqrt10,1); + y0i_three_over_sqrt10 = _mm_slli_epi16(y0i_three_over_sqrt10,1); + + // Compute necessary combination of required terms + y0_p_1_1 = _mm_adds_epi16(y0r_over_sqrt10,y0i_over_sqrt10); + y0_m_1_1 = _mm_subs_epi16(y0r_over_sqrt10,y0i_over_sqrt10); + + y0_p_1_3 = _mm_adds_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10); + y0_m_1_3 = _mm_subs_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10); + + y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10); + y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10); + + y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10); + y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10); + + // Add psi + psi_a_p1_p1 = _mm_adds_epi16(psi_r_p1_p1 ,psi_i_p1_p1); + psi_a_p1_p3 = _mm_adds_epi16(psi_r_p1_p3 ,psi_i_p1_p3); + psi_a_p3_p1 = _mm_adds_epi16(psi_r_p3_p1 ,psi_i_p3_p1); + psi_a_p3_p3 = _mm_adds_epi16(psi_r_p3_p3 ,psi_i_p3_p3); + psi_a_p1_m1 = _mm_adds_epi16(psi_r_p1_m1 ,psi_i_p1_m1); + psi_a_p1_m3 = _mm_adds_epi16(psi_r_p1_m3 ,psi_i_p1_m3); + psi_a_p3_m1 = _mm_adds_epi16(psi_r_p3_m1 ,psi_i_p3_m1); + psi_a_p3_m3 = _mm_adds_epi16(psi_r_p3_m3 ,psi_i_p3_m3); + psi_a_m1_p1 = _mm_adds_epi16(psi_r_m1_p1 ,psi_i_m1_p1); + psi_a_m1_p3 = _mm_adds_epi16(psi_r_m1_p3 ,psi_i_m1_p3); + psi_a_m3_p1 = _mm_adds_epi16(psi_r_m3_p1 ,psi_i_m3_p1); + psi_a_m3_p3 = _mm_adds_epi16(psi_r_m3_p3 ,psi_i_m3_p3); + psi_a_m1_m1 = _mm_adds_epi16(psi_r_m1_m1 ,psi_i_m1_m1); + psi_a_m1_m3 = _mm_adds_epi16(psi_r_m1_m3 ,psi_i_m1_m3); + psi_a_m3_m1 = _mm_adds_epi16(psi_r_m3_m1 ,psi_i_m3_m1); + psi_a_m3_m3 = _mm_adds_epi16(psi_r_m3_m3 ,psi_i_m3_m3); + + // scale by sqrt(2) + psi_a_p1_p1 = _mm_mulhi_epi16(psi_a_p1_p1,ONE_OVER_SQRT_2); + psi_a_p1_p1 = _mm_slli_epi16(psi_a_p1_p1,1); + psi_a_p1_p3 = _mm_mulhi_epi16(psi_a_p1_p3,ONE_OVER_SQRT_2); + psi_a_p1_p3 = _mm_slli_epi16(psi_a_p1_p3,1); + psi_a_p3_p1 = _mm_mulhi_epi16(psi_a_p3_p1,ONE_OVER_SQRT_2); + psi_a_p3_p1 = _mm_slli_epi16(psi_a_p3_p1,1); + psi_a_p3_p3 = _mm_mulhi_epi16(psi_a_p3_p3,ONE_OVER_SQRT_2); + psi_a_p3_p3 = _mm_slli_epi16(psi_a_p3_p3,1); + + psi_a_p1_m1 = _mm_mulhi_epi16(psi_a_p1_m1,ONE_OVER_SQRT_2); + psi_a_p1_m1 = _mm_slli_epi16(psi_a_p1_m1,1); + psi_a_p1_m3 = _mm_mulhi_epi16(psi_a_p1_m3,ONE_OVER_SQRT_2); + psi_a_p1_m3 = _mm_slli_epi16(psi_a_p1_m3,1); + psi_a_p3_m1 = _mm_mulhi_epi16(psi_a_p3_m1,ONE_OVER_SQRT_2); + psi_a_p3_m1 = _mm_slli_epi16(psi_a_p3_m1,1); + psi_a_p3_m3 = _mm_mulhi_epi16(psi_a_p3_m3,ONE_OVER_SQRT_2); + psi_a_p3_m3 = _mm_slli_epi16(psi_a_p3_m3,1); + + psi_a_m1_p1 = _mm_mulhi_epi16(psi_a_m1_p1,ONE_OVER_SQRT_2); + psi_a_m1_p1 = _mm_slli_epi16(psi_a_m1_p1,1); + psi_a_m1_p3 = _mm_mulhi_epi16(psi_a_m1_p3,ONE_OVER_SQRT_2); + psi_a_m1_p3 = _mm_slli_epi16(psi_a_m1_p3,1); + psi_a_m3_p1 = _mm_mulhi_epi16(psi_a_m3_p1,ONE_OVER_SQRT_2); + psi_a_m3_p1 = _mm_slli_epi16(psi_a_m3_p1,1); + psi_a_m3_p3 = _mm_mulhi_epi16(psi_a_m3_p3,ONE_OVER_SQRT_2); + psi_a_m3_p3 = _mm_slli_epi16(psi_a_m3_p3,1); + + psi_a_m1_m1 = _mm_mulhi_epi16(psi_a_m1_m1,ONE_OVER_SQRT_2); + psi_a_m1_m1 = _mm_slli_epi16(psi_a_m1_m1,1); + psi_a_m1_m3 = _mm_mulhi_epi16(psi_a_m1_m3,ONE_OVER_SQRT_2); + psi_a_m1_m3 = _mm_slli_epi16(psi_a_m1_m3,1); + psi_a_m3_m1 = _mm_mulhi_epi16(psi_a_m3_m1,ONE_OVER_SQRT_2); + psi_a_m3_m1 = _mm_slli_epi16(psi_a_m3_m1,1); + psi_a_m3_m3 = _mm_mulhi_epi16(psi_a_m3_m3,ONE_OVER_SQRT_2); + psi_a_m3_m3 = _mm_slli_epi16(psi_a_m3_m3,1); + + // Computing different multiples of channel norms + ch_mag_over_10=_mm_mulhi_epi16(ch_mag_des, ONE_OVER_TWO_SQRT_10); + ch_mag_over_2=_mm_mulhi_epi16(ch_mag_des, SQRT_10_OVER_FOUR); + ch_mag_over_2=_mm_slli_epi16(ch_mag_over_2, 1); + ch_mag_9_over_10=_mm_mulhi_epi16(ch_mag_des, NINE_OVER_TWO_SQRT_10); + ch_mag_9_over_10=_mm_slli_epi16(ch_mag_9_over_10, 2); + + // Computing Metrics + xmm1 = _mm_adds_epi16(psi_a_p1_p1, y0_p_1_1); + bit_met_p1_p1= _mm_subs_epi16(xmm1, ch_mag_over_10); + + xmm1 = _mm_adds_epi16(psi_a_p1_p3, y0_p_1_3); + bit_met_p1_p3= _mm_subs_epi16(xmm1, ch_mag_over_2); + + xmm1 = _mm_adds_epi16(psi_a_p1_m1, y0_m_1_1); + bit_met_p1_m1= _mm_subs_epi16(xmm1, ch_mag_over_10); + + xmm1 = _mm_adds_epi16(psi_a_p1_m3, y0_m_1_3); + bit_met_p1_m3= _mm_subs_epi16(xmm1, ch_mag_over_2); + + xmm1 = _mm_adds_epi16(psi_a_p3_p1, y0_p_3_1); + bit_met_p3_p1= _mm_subs_epi16(xmm1, ch_mag_over_2); + + xmm1 = _mm_adds_epi16(psi_a_p3_p3, y0_p_3_3); + bit_met_p3_p3= _mm_subs_epi16(xmm1, ch_mag_9_over_10); + + xmm1 = _mm_adds_epi16(psi_a_p3_m1, y0_m_3_1); + bit_met_p3_m1= _mm_subs_epi16(xmm1, ch_mag_over_2); + + xmm1 = _mm_adds_epi16(psi_a_p3_m3, y0_m_3_3); + bit_met_p3_m3= _mm_subs_epi16(xmm1, ch_mag_9_over_10); + + xmm1 = _mm_subs_epi16(psi_a_m1_p1, y0_m_1_1); + bit_met_m1_p1= _mm_subs_epi16(xmm1, ch_mag_over_10); + + xmm1 = _mm_subs_epi16(psi_a_m1_p3, y0_m_1_3); + bit_met_m1_p3= _mm_subs_epi16(xmm1, ch_mag_over_2); + + xmm1 = _mm_subs_epi16(psi_a_m1_m1, y0_p_1_1); + bit_met_m1_m1= _mm_subs_epi16(xmm1, ch_mag_over_10); + + xmm1 = _mm_subs_epi16(psi_a_m1_m3, y0_p_1_3); + bit_met_m1_m3= _mm_subs_epi16(xmm1, ch_mag_over_2); + + xmm1 = _mm_subs_epi16(psi_a_m3_p1, y0_m_3_1); + bit_met_m3_p1= _mm_subs_epi16(xmm1, ch_mag_over_2); + + xmm1 = _mm_subs_epi16(psi_a_m3_p3, y0_m_3_3); + bit_met_m3_p3= _mm_subs_epi16(xmm1, ch_mag_9_over_10); + + xmm1 = _mm_subs_epi16(psi_a_m3_m1, y0_p_3_1); + bit_met_m3_m1= _mm_subs_epi16(xmm1, ch_mag_over_2); + + xmm1 = _mm_subs_epi16(psi_a_m3_m3, y0_p_3_3); + bit_met_m3_m3= _mm_subs_epi16(xmm1, ch_mag_9_over_10); + + // LLR of the first bit + // Bit = 1 + xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3); + xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3); + xmm2 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_num_re0= _mm_max_epi16(xmm4,xmm5); + + // Bit = 0 + xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3); + xmm1 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3); + xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4,xmm5); + + // LLR of first bit [L1(1), L1(2), L1(3), L1(4), L1(5), L1(6), L1(7), L1(8)] + y0r = _mm_subs_epi16(logmax_den_re0,logmax_num_re0); + + // LLR of the second bit + // Bit = 1 + xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1); + xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1); + xmm2 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3); + xmm3 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_num_re1 = _mm_max_epi16(xmm4,xmm5); + + // Bit = 0 + xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1); + xmm1 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1); + xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_den_re1 = _mm_max_epi16(xmm4,xmm5); + + // LLR of second bit [L2(1), L2(2), L2(3), L2(4)] + y1r = _mm_subs_epi16(logmax_den_re1,logmax_num_re1); + + // LLR of the third bit + // Bit = 1 + xmm0 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3); + xmm1 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3); + xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_num_im0 = _mm_max_epi16(xmm4,xmm5); + + // Bit = 0 + xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3); + xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3); + xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3); + xmm3 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_den_im0 = _mm_max_epi16(xmm4,xmm5); + + // LLR of third bit [L3(1), L3(2), L3(3), L3(4)] + y0i = _mm_subs_epi16(logmax_den_im0,logmax_num_im0); + + // LLR of the fourth bit + // Bit = 1 + xmm0 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3); + xmm1 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3); + xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_num_im1 = _mm_max_epi16(xmm4,xmm5); + + // Bit = 0 + xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1); + xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1); + xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1); + xmm3 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_den_im1 = _mm_max_epi16(xmm4,xmm5); + + // LLR of fourth bit [L4(1), L4(2), L4(3), L4(4)] + y1i = _mm_subs_epi16(logmax_den_im1,logmax_num_im1); + + // Pack LLRs in output + // [L1(1), L2(1), L1(2), L2(2), L1(3), L2(3), L1(4), L2(4)] + xmm0 = _mm_unpacklo_epi16(y0r,y1r); + // [L1(5), L2(5), L1(6), L2(6), L1(7), L2(7), L1(8), L2(8)] + xmm1 = _mm_unpackhi_epi16(y0r,y1r); + // [L3(1), L4(1), L3(2), L4(2), L3(3), L4(3), L3(4), L4(4)] + xmm2 = _mm_unpacklo_epi16(y0i,y1i); + // [L3(5), L4(5), L3(6), L4(6), L3(7), L4(7), L3(8), L4(8)] + xmm3 = _mm_unpackhi_epi16(y0i,y1i); + + stream0_128i_out[2*i+0] = _mm_unpacklo_epi32(xmm0,xmm2); // 8LLRs, 2REs + stream0_128i_out[2*i+1] = _mm_unpackhi_epi32(xmm0,xmm2); + stream0_128i_out[2*i+2] = _mm_unpacklo_epi32(xmm1,xmm3); + stream0_128i_out[2*i+3] = _mm_unpackhi_epi32(xmm1,xmm3); + +#elif defined(__arm__) + +#endif + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif + +} + +int dlsch_16qam_qpsk_llr(LTE_DL_FRAME_PARMS *frame_parms, + int32_t **rxdataF_comp, + int32_t **rxdataF_comp_i, + int32_t **dl_ch_mag, //|h_0|^2*(2/sqrt{10}) + int32_t **rho_i, + int16_t *dlsch_llr, + uint8_t symbol, + uint8_t first_symbol_flag, + uint16_t nb_rb, + uint16_t pbch_pss_sss_adjust, + int16_t **llr16p) +{ + + int16_t *rxF = (int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rxF_i = (int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *ch_mag = (int16_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rho = (int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *llr16; + int len; + uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol; + + // first symbol has different structure due to more pilots + if (first_symbol_flag == 1) { + llr16 = (int16_t*)dlsch_llr; + } else { + llr16 = (int16_t*)(*llr16p); + } + + AssertFatal(llr16!=NULL,"dlsch_16qam_qpsk_llr: llr is null, symbol %d\n",symbol); + + if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { + // if symbol has pilots + if (frame_parms->nb_antenna_ports_eNB!=1) + // in 2 antenna ports we have 8 REs per symbol per RB + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + else + // for 1 antenna port we have 10 REs per symbol per RB + len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6); + } else { + // symbol has no pilots + len = (nb_rb*12) - pbch_pss_sss_adjust; + } + + // printf("symbol %d: qam16_llr, len %d (llr16 %p)\n",symbol,len,llr16); + + qam16_qpsk((short *)rxF, + (short *)rxF_i, + (short *)ch_mag, + (short *)llr16, + (short *)rho, + len); + + llr16 += (len<<2); + *llr16p = (short *)llr16; + + return(0); +} + +void qam16_qam16(short *stream0_in, + short *stream1_in, + short *ch_mag, + short *ch_mag_i, + short *stream0_out, + short *rho01, + int length + ) +{ + + /* + Author: Sebastian Wagner + Date: 2012-06-04 + + Input: + stream0_in: MF filter for 1st stream, i.e., y0=h0'*y + stream!_in: MF filter for 2nd stream, i.e., y1=h1'*y + ch_mag: 2*h0/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + ch_mag_i: 2*h1/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + rho01: Channel cross correlation, i.e., h1'*h0 + + Output: + stream0_out: output LLRs for 1st stream + */ +#if defined(__x86_64__) || defined(__i386__) + __m128i *rho01_128i = (__m128i *)rho01; + __m128i *stream0_128i_in = (__m128i *)stream0_in; + __m128i *stream1_128i_in = (__m128i *)stream1_in; + __m128i *stream0_128i_out = (__m128i *)stream0_out; + __m128i *ch_mag_128i = (__m128i *)ch_mag; + __m128i *ch_mag_128i_i = (__m128i *)ch_mag_i; + + + + __m128i ONE_OVER_SQRT_10 = _mm_set1_epi16(20724); // round(1/sqrt(10)*2^16) + __m128i ONE_OVER_SQRT_10_Q15 = _mm_set1_epi16(10362); // round(1/sqrt(10)*2^15) + __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15) + __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15) + __m128i ONE_OVER_TWO_SQRT_10 = _mm_set1_epi16(10362); // round(1/2/sqrt(10)*2^16) + __m128i NINE_OVER_TWO_SQRT_10 = _mm_set1_epi16(23315); // round(9/2/sqrt(10)*2^14) + __m128i ch_mag_des,ch_mag_int; + __m128i y0r_over_sqrt10; + __m128i y0i_over_sqrt10; + __m128i y0r_three_over_sqrt10; + __m128i y0i_three_over_sqrt10; + __m128i ch_mag_over_10; + __m128i ch_mag_over_2; + __m128i ch_mag_9_over_10; +#elif defined(__arm__) + +#endif + + int i; + + for (i=0; i<length>>2; i+=2) { + // In one iteration, we deal with 8 REs + +#if defined(__x86_64__) || defined(__i386__) + // Get rho + xmm0 = rho01_128i[i]; + xmm1 = rho01_128i[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) + xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) + rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) + rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + + // Compute the different rhos + rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi,ONE_OVER_SQRT_10); + rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi,ONE_OVER_SQRT_10); + rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi,THREE_OVER_SQRT_10); + rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi,THREE_OVER_SQRT_10); + rho_rpi_3_3 = _mm_slli_epi16(rho_rpi_3_3,1); + rho_rmi_3_3 = _mm_slli_epi16(rho_rmi_3_3,1); + + xmm4 = _mm_mulhi_epi16(xmm2,ONE_OVER_SQRT_10); // Re(rho) + xmm5 = _mm_mulhi_epi16(xmm3,THREE_OVER_SQRT_10); // Im(rho) + xmm5 = _mm_slli_epi16(xmm5,1); + + rho_rpi_1_3 = _mm_adds_epi16(xmm4,xmm5); + rho_rmi_1_3 = _mm_subs_epi16(xmm4,xmm5); + + xmm6 = _mm_mulhi_epi16(xmm2,THREE_OVER_SQRT_10); // Re(rho) + xmm7 = _mm_mulhi_epi16(xmm3,ONE_OVER_SQRT_10); // Im(rho) + xmm6 = _mm_slli_epi16(xmm6,1); + + rho_rpi_3_1 = _mm_adds_epi16(xmm6,xmm7); + rho_rmi_3_1 = _mm_subs_epi16(xmm6,xmm7); + + // Rearrange interfering MF output + xmm0 = stream1_128i_in[i]; + xmm1 = stream1_128i_in[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] + y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] + + xmm0 = _mm_setzero_si128(); // ZERO + xmm2 = _mm_subs_epi16(rho_rpi_1_1,y1r); // = [Re(rho)+ Im(rho)]/sqrt(10) - y1r + psi_r_p1_p1 = _mm_abs_epi16(xmm2); // = |[Re(rho)+ Im(rho)]/sqrt(10) - y1r| + + xmm2= _mm_subs_epi16(rho_rmi_1_1,y1r); + psi_r_p1_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_1_1,y1i); + psi_i_p1_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_1_3,y1r); + psi_r_p1_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_1_3,y1r); + psi_r_p1_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_3_1,y1i); + psi_i_p1_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_3_1,y1r); + psi_r_p3_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_3_1,y1r); + psi_r_p3_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_1_3,y1i); + psi_i_p3_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_3_3,y1r); + psi_r_p3_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_3_3,y1r); + psi_r_p3_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_3_3,y1i); + psi_i_p3_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_1_1,y1i); + psi_i_m1_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_3_1,y1i); + psi_i_m1_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_1_3,y1i); + psi_i_m3_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_3_3,y1i); + psi_i_m3_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_1_1,y1i); + psi_i_p1_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_3_1,y1i); + psi_i_p1_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_1_3,y1i); + psi_i_p3_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_3_3,y1i); + psi_i_p3_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_1_1,y1r); + psi_r_m1_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_1_3,y1r); + psi_r_m1_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_3_1,y1r); + psi_r_m3_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_3_3,y1r); + psi_r_m3_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1r,rho_rmi_1_1); + psi_r_m1_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1r,rho_rmi_1_3); + psi_r_m1_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1i,rho_rmi_1_1); + psi_i_m1_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1i,rho_rmi_3_1); + psi_i_m1_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1r,rho_rmi_3_1); + psi_r_m3_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1r,rho_rmi_3_3); + psi_r_m3_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1i,rho_rmi_1_3); + psi_i_m3_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1i,rho_rmi_3_3); + psi_i_m3_m3 = _mm_abs_epi16(xmm2); + + // Rearrange desired MF output + xmm0 = stream0_128i_in[i]; + xmm1 = stream0_128i_in[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + y0i = _mm_unpackhi_epi64(xmm0,xmm1); + + // Rearrange desired channel magnitudes + xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10)) + xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10)) + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); // = [|h|^2(1),|h|^2(2),|h|^2(3),|h|^2(4)]*(2/sqrt(10)) + + // Rearrange interfering channel magnitudes + xmm2 = ch_mag_128i_i[i]; + xmm3 = ch_mag_128i_i[i+1]; + + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3); + + // Scale MF output of desired signal + y0r_over_sqrt10 = _mm_mulhi_epi16(y0r,ONE_OVER_SQRT_10); + y0i_over_sqrt10 = _mm_mulhi_epi16(y0i,ONE_OVER_SQRT_10); + y0r_three_over_sqrt10 = _mm_mulhi_epi16(y0r,THREE_OVER_SQRT_10); + y0i_three_over_sqrt10 = _mm_mulhi_epi16(y0i,THREE_OVER_SQRT_10); + y0r_three_over_sqrt10 = _mm_slli_epi16(y0r_three_over_sqrt10,1); + y0i_three_over_sqrt10 = _mm_slli_epi16(y0i_three_over_sqrt10,1); + + // Compute necessary combination of required terms + y0_p_1_1 = _mm_adds_epi16(y0r_over_sqrt10,y0i_over_sqrt10); + y0_m_1_1 = _mm_subs_epi16(y0r_over_sqrt10,y0i_over_sqrt10); + + y0_p_1_3 = _mm_adds_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10); + y0_m_1_3 = _mm_subs_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10); + + y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10); + y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10); + + y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10); + y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10); + + // Compute optimal interfering symbol magnitude + interference_abs_epi16(psi_r_p1_p1 ,ch_mag_int,a_r_p1_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p1 ,ch_mag_int,a_i_p1_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p3 ,ch_mag_int,a_r_p1_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p3 ,ch_mag_int,a_i_p1_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m1 ,ch_mag_int,a_r_p1_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m1 ,ch_mag_int,a_i_p1_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m3 ,ch_mag_int,a_r_p1_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m3 ,ch_mag_int,a_i_p1_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p1 ,ch_mag_int,a_r_p3_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p1 ,ch_mag_int,a_i_p3_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p3 ,ch_mag_int,a_r_p3_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p3 ,ch_mag_int,a_i_p3_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m1 ,ch_mag_int,a_r_p3_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m1 ,ch_mag_int,a_i_p3_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m3 ,ch_mag_int,a_r_p3_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m3 ,ch_mag_int,a_i_p3_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p1 ,ch_mag_int,a_r_m1_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p1 ,ch_mag_int,a_i_m1_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p3 ,ch_mag_int,a_r_m1_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p3 ,ch_mag_int,a_i_m1_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m1 ,ch_mag_int,a_r_m1_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m1 ,ch_mag_int,a_i_m1_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m3 ,ch_mag_int,a_r_m1_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m3 ,ch_mag_int,a_i_m1_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p1 ,ch_mag_int,a_r_m3_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p1 ,ch_mag_int,a_i_m3_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p3 ,ch_mag_int,a_r_m3_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p3 ,ch_mag_int,a_i_m3_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m1 ,ch_mag_int,a_r_m3_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m1 ,ch_mag_int,a_i_m3_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m3 ,ch_mag_int,a_r_m3_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m3 ,ch_mag_int,a_i_m3_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + + // Calculation of groups of two terms in the bit metric involving product of psi and interference magnitude + prodsum_psi_a_epi16(psi_r_p1_p1,a_r_p1_p1,psi_i_p1_p1,a_i_p1_p1,psi_a_p1_p1); + prodsum_psi_a_epi16(psi_r_p1_p3,a_r_p1_p3,psi_i_p1_p3,a_i_p1_p3,psi_a_p1_p3); + prodsum_psi_a_epi16(psi_r_p3_p1,a_r_p3_p1,psi_i_p3_p1,a_i_p3_p1,psi_a_p3_p1); + prodsum_psi_a_epi16(psi_r_p3_p3,a_r_p3_p3,psi_i_p3_p3,a_i_p3_p3,psi_a_p3_p3); + prodsum_psi_a_epi16(psi_r_p1_m1,a_r_p1_m1,psi_i_p1_m1,a_i_p1_m1,psi_a_p1_m1); + prodsum_psi_a_epi16(psi_r_p1_m3,a_r_p1_m3,psi_i_p1_m3,a_i_p1_m3,psi_a_p1_m3); + prodsum_psi_a_epi16(psi_r_p3_m1,a_r_p3_m1,psi_i_p3_m1,a_i_p3_m1,psi_a_p3_m1); + prodsum_psi_a_epi16(psi_r_p3_m3,a_r_p3_m3,psi_i_p3_m3,a_i_p3_m3,psi_a_p3_m3); + prodsum_psi_a_epi16(psi_r_m1_p1,a_r_m1_p1,psi_i_m1_p1,a_i_m1_p1,psi_a_m1_p1); + prodsum_psi_a_epi16(psi_r_m1_p3,a_r_m1_p3,psi_i_m1_p3,a_i_m1_p3,psi_a_m1_p3); + prodsum_psi_a_epi16(psi_r_m3_p1,a_r_m3_p1,psi_i_m3_p1,a_i_m3_p1,psi_a_m3_p1); + prodsum_psi_a_epi16(psi_r_m3_p3,a_r_m3_p3,psi_i_m3_p3,a_i_m3_p3,psi_a_m3_p3); + prodsum_psi_a_epi16(psi_r_m1_m1,a_r_m1_m1,psi_i_m1_m1,a_i_m1_m1,psi_a_m1_m1); + prodsum_psi_a_epi16(psi_r_m1_m3,a_r_m1_m3,psi_i_m1_m3,a_i_m1_m3,psi_a_m1_m3); + prodsum_psi_a_epi16(psi_r_m3_m1,a_r_m3_m1,psi_i_m3_m1,a_i_m3_m1,psi_a_m3_m1); + prodsum_psi_a_epi16(psi_r_m3_m3,a_r_m3_m3,psi_i_m3_m3,a_i_m3_m3,psi_a_m3_m3); + + + // squared interference magnitude times int. ch. power + square_a_epi16(a_r_p1_p1,a_i_p1_p1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p1_p1); + square_a_epi16(a_r_p1_p3,a_i_p1_p3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p1_p3); + square_a_epi16(a_r_p3_p1,a_i_p3_p1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p3_p1); + square_a_epi16(a_r_p3_p3,a_i_p3_p3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p3_p3); + square_a_epi16(a_r_p1_m1,a_i_p1_m1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p1_m1); + square_a_epi16(a_r_p1_m3,a_i_p1_m3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p1_m3); + square_a_epi16(a_r_p3_m1,a_i_p3_m1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p3_m1); + square_a_epi16(a_r_p3_m3,a_i_p3_m3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p3_m3); + square_a_epi16(a_r_m1_p1,a_i_m1_p1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m1_p1); + square_a_epi16(a_r_m1_p3,a_i_m1_p3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m1_p3); + square_a_epi16(a_r_m3_p1,a_i_m3_p1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m3_p1); + square_a_epi16(a_r_m3_p3,a_i_m3_p3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m3_p3); + square_a_epi16(a_r_m1_m1,a_i_m1_m1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m1_m1); + square_a_epi16(a_r_m1_m3,a_i_m1_m3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m1_m3); + square_a_epi16(a_r_m3_m1,a_i_m3_m1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m3_m1); + square_a_epi16(a_r_m3_m3,a_i_m3_m3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m3_m3); + + // Computing different multiples of channel norms + ch_mag_over_10=_mm_mulhi_epi16(ch_mag_des, ONE_OVER_TWO_SQRT_10); + ch_mag_over_2=_mm_mulhi_epi16(ch_mag_des, SQRT_10_OVER_FOUR); + ch_mag_over_2=_mm_slli_epi16(ch_mag_over_2, 1); + ch_mag_9_over_10=_mm_mulhi_epi16(ch_mag_des, NINE_OVER_TWO_SQRT_10); + ch_mag_9_over_10=_mm_slli_epi16(ch_mag_9_over_10, 2); + + // Computing Metrics + xmm0 = _mm_subs_epi16(psi_a_p1_p1,a_sq_p1_p1); + xmm1 = _mm_adds_epi16(xmm0,y0_p_1_1); + bit_met_p1_p1= _mm_subs_epi16(xmm1,ch_mag_over_10); + + xmm0 = _mm_subs_epi16(psi_a_p1_p3,a_sq_p1_p3); + xmm1 = _mm_adds_epi16(xmm0,y0_p_1_3); + bit_met_p1_p3= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_p1_m1,a_sq_p1_m1); + xmm1 = _mm_adds_epi16(xmm0,y0_m_1_1); + bit_met_p1_m1= _mm_subs_epi16(xmm1,ch_mag_over_10); + + xmm0 = _mm_subs_epi16(psi_a_p1_m3,a_sq_p1_m3); + xmm1 = _mm_adds_epi16(xmm0,y0_m_1_3); + bit_met_p1_m3= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_p3_p1,a_sq_p3_p1); + xmm1 = _mm_adds_epi16(xmm0,y0_p_3_1); + bit_met_p3_p1= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_p3_p3,a_sq_p3_p3); + xmm1 = _mm_adds_epi16(xmm0,y0_p_3_3); + bit_met_p3_p3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); + + xmm0 = _mm_subs_epi16(psi_a_p3_m1,a_sq_p3_m1); + xmm1 = _mm_adds_epi16(xmm0,y0_m_3_1); + bit_met_p3_m1= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_p3_m3,a_sq_p3_m3); + xmm1 = _mm_adds_epi16(xmm0,y0_m_3_3); + bit_met_p3_m3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); + + xmm0 = _mm_subs_epi16(psi_a_m1_p1,a_sq_m1_p1); + xmm1 = _mm_subs_epi16(xmm0,y0_m_1_1); + bit_met_m1_p1= _mm_subs_epi16(xmm1,ch_mag_over_10); + + xmm0 = _mm_subs_epi16(psi_a_m1_p3,a_sq_m1_p3); + xmm1 = _mm_subs_epi16(xmm0,y0_m_1_3); + bit_met_m1_p3= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_m1_m1,a_sq_m1_m1); + xmm1 = _mm_subs_epi16(xmm0,y0_p_1_1); + bit_met_m1_m1= _mm_subs_epi16(xmm1,ch_mag_over_10); + + xmm0 = _mm_subs_epi16(psi_a_m1_m3,a_sq_m1_m3); + xmm1 = _mm_subs_epi16(xmm0,y0_p_1_3); + bit_met_m1_m3= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_m3_p1,a_sq_m3_p1); + xmm1 = _mm_subs_epi16(xmm0,y0_m_3_1); + bit_met_m3_p1= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_m3_p3,a_sq_m3_p3); + xmm1 = _mm_subs_epi16(xmm0,y0_m_3_3); + bit_met_m3_p3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); + + xmm0 = _mm_subs_epi16(psi_a_m3_m1,a_sq_m3_m1); + xmm1 = _mm_subs_epi16(xmm0,y0_p_3_1); + bit_met_m3_m1= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_m3_m3,a_sq_m3_m3); + xmm1 = _mm_subs_epi16(xmm0,y0_p_3_3); + bit_met_m3_m3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); + + // LLR of the first bit + // Bit = 1 + xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3); + xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3); + xmm2 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_num_re0= _mm_max_epi16(xmm4,xmm5); + + // Bit = 0 + xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3); + xmm1 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3); + xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4,xmm5); + + // LLR of first bit [L1(1), L1(2), L1(3), L1(4), L1(5), L1(6), L1(7), L1(8)] + y0r = _mm_subs_epi16(logmax_den_re0,logmax_num_re0); + + // LLR of the second bit + // Bit = 1 + xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1); + xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1); + xmm2 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3); + xmm3 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_num_re1 = _mm_max_epi16(xmm4,xmm5); + + // Bit = 0 + xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1); + xmm1 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1); + xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_den_re1 = _mm_max_epi16(xmm4,xmm5); + + // LLR of second bit [L2(1), L2(2), L2(3), L2(4)] + y1r = _mm_subs_epi16(logmax_den_re1,logmax_num_re1); + + // LLR of the third bit + // Bit = 1 + xmm0 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3); + xmm1 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3); + xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_num_im0 = _mm_max_epi16(xmm4,xmm5); + + // Bit = 0 + xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3); + xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3); + xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3); + xmm3 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_den_im0 = _mm_max_epi16(xmm4,xmm5); + + // LLR of third bit [L3(1), L3(2), L3(3), L3(4)] + y0i = _mm_subs_epi16(logmax_den_im0,logmax_num_im0); + + // LLR of the fourth bit + // Bit = 1 + xmm0 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3); + xmm1 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3); + xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_num_im1 = _mm_max_epi16(xmm4,xmm5); + + // Bit = 0 + xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1); + xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1); + xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1); + xmm3 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_den_im1 = _mm_max_epi16(xmm4,xmm5); + + // LLR of fourth bit [L4(1), L4(2), L4(3), L4(4)] + y1i = _mm_subs_epi16(logmax_den_im1,logmax_num_im1); + + // Pack LLRs in output + // [L1(1), L2(1), L1(2), L2(2), L1(3), L2(3), L1(4), L2(4)] + xmm0 = _mm_unpacklo_epi16(y0r,y1r); + // [L1(5), L2(5), L1(6), L2(6), L1(7), L2(7), L1(8), L2(8)] + xmm1 = _mm_unpackhi_epi16(y0r,y1r); + // [L3(1), L4(1), L3(2), L4(2), L3(3), L4(3), L3(4), L4(4)] + xmm2 = _mm_unpacklo_epi16(y0i,y1i); + // [L3(5), L4(5), L3(6), L4(6), L3(7), L4(7), L3(8), L4(8)] + xmm3 = _mm_unpackhi_epi16(y0i,y1i); + + stream0_128i_out[2*i+0] = _mm_unpacklo_epi32(xmm0,xmm2); // 8LLRs, 2REs + stream0_128i_out[2*i+1] = _mm_unpackhi_epi32(xmm0,xmm2); + stream0_128i_out[2*i+2] = _mm_unpacklo_epi32(xmm1,xmm3); + stream0_128i_out[2*i+3] = _mm_unpackhi_epi32(xmm1,xmm3); +#elif defined(__arm__) + +#endif + + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif +} + +int dlsch_16qam_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms, + int32_t **rxdataF_comp, + int32_t **rxdataF_comp_i, + int32_t **dl_ch_mag, //|h_0|^2*(2/sqrt{10}) + int32_t **dl_ch_mag_i, //|h_1|^2*(2/sqrt{10}) + int32_t **rho_i, + int16_t *dlsch_llr, + uint8_t symbol, + uint8_t first_symbol_flag, + uint16_t nb_rb, + uint16_t pbch_pss_sss_adjust, + int16_t **llr16p) +{ + + int16_t *rxF = (int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rxF_i = (int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *ch_mag = (int16_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *ch_mag_i = (int16_t*)&dl_ch_mag_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rho = (int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *llr16; + int len; + uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol; + + // first symbol has different structure due to more pilots + if (first_symbol_flag == 1) { + llr16 = (int16_t*)dlsch_llr; + } else { + llr16 = (int16_t*)(*llr16p); + } + + + AssertFatal(llr16!=NULL,"dlsch_16qam_16qam_llr: llr is null, symbol %d\n",symbol); + + if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { + // if symbol has pilots + if (frame_parms->nb_antenna_ports_eNB!=1) + // in 2 antenna ports we have 8 REs per symbol per RB + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + else + // for 1 antenna port we have 10 REs per symbol per RB + len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6); + } else { + // symbol has no pilots + len = (nb_rb*12) - pbch_pss_sss_adjust; + } + + // printf("symbol %d: qam16_llr, len %d (llr16 %p)\n",symbol,len,llr16); + + qam16_qam16((short *)rxF, + (short *)rxF_i, + (short *)ch_mag, + (short *)ch_mag_i, + (short *)llr16, + (short *)rho, + len); + + llr16 += (len<<2); + *llr16p = (short *)llr16; + + return(0); +} + +void qam16_qam64(int16_t *stream0_in, + int16_t *stream1_in, + int16_t *ch_mag, + int16_t *ch_mag_i, + int16_t *stream0_out, + int16_t *rho01, + int32_t length + ) +{ + + /* + Author: Sebastian Wagner + Date: 2012-06-04 + + Input: + stream0_in: MF filter for 1st stream, i.e., y0=h0'*y + stream!_in: MF filter for 2nd stream, i.e., y1=h1'*y + ch_mag: 2*h0/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + ch_mag_i: 2*h1/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + rho01: Channel cross correlation, i.e., h1'*h0 + + Output: + stream0_out: output LLRs for 1st stream + */ + +#if defined(__x86_64__) || defined(__i386__) + __m128i *rho01_128i = (__m128i *)rho01; + __m128i *stream0_128i_in = (__m128i *)stream0_in; + __m128i *stream1_128i_in = (__m128i *)stream1_in; + __m128i *stream0_128i_out = (__m128i *)stream0_out; + __m128i *ch_mag_128i = (__m128i *)ch_mag; + __m128i *ch_mag_128i_i = (__m128i *)ch_mag_i; + + + __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15) + __m128i ONE_OVER_SQRT_10 = _mm_set1_epi16(20724); // round(1/sqrt(10)*2^16) + __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15) + __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15) + __m128i ONE_OVER_TWO_SQRT_10 = _mm_set1_epi16(10362); // round(1/2/sqrt(10)*2^16) + __m128i NINE_OVER_TWO_SQRT_10 = _mm_set1_epi16(23315); // round(9/2/sqrt(10)*2^14) + __m128i ONE_OVER_SQRT_2_42 = _mm_set1_epi16(3575); // round(1/sqrt(2*42)*2^15) + __m128i THREE_OVER_SQRT_2_42 = _mm_set1_epi16(10726); // round(3/sqrt(2*42)*2^15) + __m128i FIVE_OVER_SQRT_2_42 = _mm_set1_epi16(17876); // round(5/sqrt(2*42)*2^15) + __m128i SEVEN_OVER_SQRT_2_42 = _mm_set1_epi16(25027); // round(7/sqrt(2*42)*2^15) + __m128i SQRT_42_OVER_FOUR = _mm_set1_epi16(13272); // round(sqrt(42)/4*2^13), Q3. + __m128i ch_mag_des,ch_mag_int; + __m128i y0r_over_sqrt10; + __m128i y0i_over_sqrt10; + __m128i y0r_three_over_sqrt10; + __m128i y0i_three_over_sqrt10; + __m128i ch_mag_over_10; + __m128i ch_mag_over_2; + __m128i ch_mag_9_over_10; + __m128i ch_mag_int_with_sigma2; + __m128i two_ch_mag_int_with_sigma2; + __m128i three_ch_mag_int_with_sigma2; + +#elif defined(__arm__) + +#endif + int i; + + for (i=0; i<length>>2; i+=2) { + // In one iteration, we deal with 8 REs + +#if defined(__x86_64__) || defined(__i386__) + // Get rho + xmm0 = rho01_128i[i]; + xmm1 = rho01_128i[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) + xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) + rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) + rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + + // Compute the different rhos + rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi,ONE_OVER_SQRT_10); + rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi,ONE_OVER_SQRT_10); + rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi,THREE_OVER_SQRT_10); + rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi,THREE_OVER_SQRT_10); + rho_rpi_3_3 = _mm_slli_epi16(rho_rpi_3_3,1); + rho_rmi_3_3 = _mm_slli_epi16(rho_rmi_3_3,1); + + xmm4 = _mm_mulhi_epi16(xmm2,ONE_OVER_SQRT_10); // Re(rho) + xmm5 = _mm_mulhi_epi16(xmm3,THREE_OVER_SQRT_10); // Im(rho) + xmm5 = _mm_slli_epi16(xmm5,1); + + rho_rpi_1_3 = _mm_adds_epi16(xmm4,xmm5); + rho_rmi_1_3 = _mm_subs_epi16(xmm4,xmm5); + + xmm6 = _mm_mulhi_epi16(xmm2,THREE_OVER_SQRT_10); // Re(rho) + xmm7 = _mm_mulhi_epi16(xmm3,ONE_OVER_SQRT_10); // Im(rho) + xmm6 = _mm_slli_epi16(xmm6,1); + + rho_rpi_3_1 = _mm_adds_epi16(xmm6,xmm7); + rho_rmi_3_1 = _mm_subs_epi16(xmm6,xmm7); + + // Rearrange interfering MF output + xmm0 = stream1_128i_in[i]; + xmm1 = stream1_128i_in[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] + y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] + + xmm0 = _mm_setzero_si128(); // ZERO + xmm2 = _mm_subs_epi16(rho_rpi_1_1,y1r); // = [Re(rho)+ Im(rho)]/sqrt(10) - y1r + psi_r_p1_p1 = _mm_abs_epi16(xmm2); // = |[Re(rho)+ Im(rho)]/sqrt(10) - y1r| + + xmm2= _mm_subs_epi16(rho_rmi_1_1,y1r); + psi_r_p1_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_1_1,y1i); + psi_i_p1_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_1_3,y1r); + psi_r_p1_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_1_3,y1r); + psi_r_p1_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_3_1,y1i); + psi_i_p1_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_3_1,y1r); + psi_r_p3_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_3_1,y1r); + psi_r_p3_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_1_3,y1i); + psi_i_p3_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_3_3,y1r); + psi_r_p3_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_3_3,y1r); + psi_r_p3_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rmi_3_3,y1i); + psi_i_p3_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_1_1,y1i); + psi_i_m1_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_3_1,y1i); + psi_i_m1_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_1_3,y1i); + psi_i_m3_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_subs_epi16(rho_rpi_3_3,y1i); + psi_i_m3_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_1_1,y1i); + psi_i_p1_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_3_1,y1i); + psi_i_p1_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_1_3,y1i); + psi_i_p3_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_3_3,y1i); + psi_i_p3_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_1_1,y1r); + psi_r_m1_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_1_3,y1r); + psi_r_m1_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_3_1,y1r); + psi_r_m3_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(rho_rpi_3_3,y1r); + psi_r_m3_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1r,rho_rmi_1_1); + psi_r_m1_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1r,rho_rmi_1_3); + psi_r_m1_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1i,rho_rmi_1_1); + psi_i_m1_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1i,rho_rmi_3_1); + psi_i_m1_m3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1r,rho_rmi_3_1); + psi_r_m3_p1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1r,rho_rmi_3_3); + psi_r_m3_p3 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1i,rho_rmi_1_3); + psi_i_m3_m1 = _mm_abs_epi16(xmm2); + xmm2= _mm_adds_epi16(y1i,rho_rmi_3_3); + psi_i_m3_m3 = _mm_abs_epi16(xmm2); + + // Rearrange desired MF output + xmm0 = stream0_128i_in[i]; + xmm1 = stream0_128i_in[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + y0i = _mm_unpackhi_epi64(xmm0,xmm1); + + // Rearrange desired channel magnitudes + xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10)) + xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10)) + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); // = [|h|^2(1),|h|^2(2),|h|^2(3),|h|^2(4)]*(2/sqrt(10)) + + // Rearrange interfering channel magnitudes + xmm2 = ch_mag_128i_i[i]; + xmm3 = ch_mag_128i_i[i+1]; + + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3); + + // Scale MF output of desired signal + y0r_over_sqrt10 = _mm_mulhi_epi16(y0r,ONE_OVER_SQRT_10); + y0i_over_sqrt10 = _mm_mulhi_epi16(y0i,ONE_OVER_SQRT_10); + y0r_three_over_sqrt10 = _mm_mulhi_epi16(y0r,THREE_OVER_SQRT_10); + y0i_three_over_sqrt10 = _mm_mulhi_epi16(y0i,THREE_OVER_SQRT_10); + y0r_three_over_sqrt10 = _mm_slli_epi16(y0r_three_over_sqrt10,1); + y0i_three_over_sqrt10 = _mm_slli_epi16(y0i_three_over_sqrt10,1); + + // Compute necessary combination of required terms + y0_p_1_1 = _mm_adds_epi16(y0r_over_sqrt10,y0i_over_sqrt10); + y0_m_1_1 = _mm_subs_epi16(y0r_over_sqrt10,y0i_over_sqrt10); + + y0_p_1_3 = _mm_adds_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10); + y0_m_1_3 = _mm_subs_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10); + + y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10); + y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10); + + y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10); + y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10); + + // Compute optimal interfering symbol magnitude + ch_mag_int_with_sigma2 = _mm_srai_epi16(ch_mag_int, 1); // *2 + two_ch_mag_int_with_sigma2 = ch_mag_int; // *4 + three_ch_mag_int_with_sigma2 = _mm_adds_epi16(ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2); // *6 + + interference_abs_64qam_epi16(psi_r_p1_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + + // Calculation of groups of two terms in the bit metric involving product of psi and interference magnitude + prodsum_psi_a_epi16(psi_r_p1_p1,a_r_p1_p1,psi_i_p1_p1,a_i_p1_p1,psi_a_p1_p1); + prodsum_psi_a_epi16(psi_r_p1_p3,a_r_p1_p3,psi_i_p1_p3,a_i_p1_p3,psi_a_p1_p3); + prodsum_psi_a_epi16(psi_r_p3_p1,a_r_p3_p1,psi_i_p3_p1,a_i_p3_p1,psi_a_p3_p1); + prodsum_psi_a_epi16(psi_r_p3_p3,a_r_p3_p3,psi_i_p3_p3,a_i_p3_p3,psi_a_p3_p3); + prodsum_psi_a_epi16(psi_r_p1_m1,a_r_p1_m1,psi_i_p1_m1,a_i_p1_m1,psi_a_p1_m1); + prodsum_psi_a_epi16(psi_r_p1_m3,a_r_p1_m3,psi_i_p1_m3,a_i_p1_m3,psi_a_p1_m3); + prodsum_psi_a_epi16(psi_r_p3_m1,a_r_p3_m1,psi_i_p3_m1,a_i_p3_m1,psi_a_p3_m1); + prodsum_psi_a_epi16(psi_r_p3_m3,a_r_p3_m3,psi_i_p3_m3,a_i_p3_m3,psi_a_p3_m3); + prodsum_psi_a_epi16(psi_r_m1_p1,a_r_m1_p1,psi_i_m1_p1,a_i_m1_p1,psi_a_m1_p1); + prodsum_psi_a_epi16(psi_r_m1_p3,a_r_m1_p3,psi_i_m1_p3,a_i_m1_p3,psi_a_m1_p3); + prodsum_psi_a_epi16(psi_r_m3_p1,a_r_m3_p1,psi_i_m3_p1,a_i_m3_p1,psi_a_m3_p1); + prodsum_psi_a_epi16(psi_r_m3_p3,a_r_m3_p3,psi_i_m3_p3,a_i_m3_p3,psi_a_m3_p3); + prodsum_psi_a_epi16(psi_r_m1_m1,a_r_m1_m1,psi_i_m1_m1,a_i_m1_m1,psi_a_m1_m1); + prodsum_psi_a_epi16(psi_r_m1_m3,a_r_m1_m3,psi_i_m1_m3,a_i_m1_m3,psi_a_m1_m3); + prodsum_psi_a_epi16(psi_r_m3_m1,a_r_m3_m1,psi_i_m3_m1,a_i_m3_m1,psi_a_m3_m1); + prodsum_psi_a_epi16(psi_r_m3_m3,a_r_m3_m3,psi_i_m3_m3,a_i_m3_m3,psi_a_m3_m3); + + // Multiply by sqrt(2) + psi_a_p1_p1 = _mm_mulhi_epi16(psi_a_p1_p1, ONE_OVER_SQRT_2); + psi_a_p1_p1 = _mm_slli_epi16(psi_a_p1_p1, 2); + psi_a_p1_p3 = _mm_mulhi_epi16(psi_a_p1_p3, ONE_OVER_SQRT_2); + psi_a_p1_p3 = _mm_slli_epi16(psi_a_p1_p3, 2); + psi_a_p3_p1 = _mm_mulhi_epi16(psi_a_p3_p1, ONE_OVER_SQRT_2); + psi_a_p3_p1 = _mm_slli_epi16(psi_a_p3_p1, 2); + psi_a_p3_p3 = _mm_mulhi_epi16(psi_a_p3_p3, ONE_OVER_SQRT_2); + psi_a_p3_p3 = _mm_slli_epi16(psi_a_p3_p3, 2); + psi_a_p1_m1 = _mm_mulhi_epi16(psi_a_p1_m1, ONE_OVER_SQRT_2); + psi_a_p1_m1 = _mm_slli_epi16(psi_a_p1_m1, 2); + psi_a_p1_m3 = _mm_mulhi_epi16(psi_a_p1_m3, ONE_OVER_SQRT_2); + psi_a_p1_m3 = _mm_slli_epi16(psi_a_p1_m3, 2); + psi_a_p3_m1 = _mm_mulhi_epi16(psi_a_p3_m1, ONE_OVER_SQRT_2); + psi_a_p3_m1 = _mm_slli_epi16(psi_a_p3_m1, 2); + psi_a_p3_m3 = _mm_mulhi_epi16(psi_a_p3_m3, ONE_OVER_SQRT_2); + psi_a_p3_m3 = _mm_slli_epi16(psi_a_p3_m3, 2); + psi_a_m1_p1 = _mm_mulhi_epi16(psi_a_m1_p1, ONE_OVER_SQRT_2); + psi_a_m1_p1 = _mm_slli_epi16(psi_a_m1_p1, 2); + psi_a_m1_p3 = _mm_mulhi_epi16(psi_a_m1_p3, ONE_OVER_SQRT_2); + psi_a_m1_p3 = _mm_slli_epi16(psi_a_m1_p3, 2); + psi_a_m3_p1 = _mm_mulhi_epi16(psi_a_m3_p1, ONE_OVER_SQRT_2); + psi_a_m3_p1 = _mm_slli_epi16(psi_a_m3_p1, 2); + psi_a_m3_p3 = _mm_mulhi_epi16(psi_a_m3_p3, ONE_OVER_SQRT_2); + psi_a_m3_p3 = _mm_slli_epi16(psi_a_m3_p3, 2); + psi_a_m1_m1 = _mm_mulhi_epi16(psi_a_m1_m1, ONE_OVER_SQRT_2); + psi_a_m1_m1 = _mm_slli_epi16(psi_a_m1_m1, 2); + psi_a_m1_m3 = _mm_mulhi_epi16(psi_a_m1_m3, ONE_OVER_SQRT_2); + psi_a_m1_m3 = _mm_slli_epi16(psi_a_m1_m3, 2); + psi_a_m3_m1 = _mm_mulhi_epi16(psi_a_m3_m1, ONE_OVER_SQRT_2); + psi_a_m3_m1 = _mm_slli_epi16(psi_a_m3_m1, 2); + psi_a_m3_m3 = _mm_mulhi_epi16(psi_a_m3_m3, ONE_OVER_SQRT_2); + psi_a_m3_m3 = _mm_slli_epi16(psi_a_m3_m3, 2); + + // squared interference magnitude times int. ch. power + square_a_64qam_epi16(a_r_p1_p1,a_i_p1_p1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p1_p1); + square_a_64qam_epi16(a_r_p1_p3,a_i_p1_p3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p1_p3); + square_a_64qam_epi16(a_r_p3_p1,a_i_p3_p1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p3_p1); + square_a_64qam_epi16(a_r_p3_p3,a_i_p3_p3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p3_p3); + square_a_64qam_epi16(a_r_p1_m1,a_i_p1_m1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p1_m1); + square_a_64qam_epi16(a_r_p1_m3,a_i_p1_m3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p1_m3); + square_a_64qam_epi16(a_r_p3_m1,a_i_p3_m1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p3_m1); + square_a_64qam_epi16(a_r_p3_m3,a_i_p3_m3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p3_m3); + square_a_64qam_epi16(a_r_m1_p1,a_i_m1_p1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m1_p1); + square_a_64qam_epi16(a_r_m1_p3,a_i_m1_p3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m1_p3); + square_a_64qam_epi16(a_r_m3_p1,a_i_m3_p1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m3_p1); + square_a_64qam_epi16(a_r_m3_p3,a_i_m3_p3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m3_p3); + square_a_64qam_epi16(a_r_m1_m1,a_i_m1_m1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m1_m1); + square_a_64qam_epi16(a_r_m1_m3,a_i_m1_m3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m1_m3); + square_a_64qam_epi16(a_r_m3_m1,a_i_m3_m1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m3_m1); + square_a_64qam_epi16(a_r_m3_m3,a_i_m3_m3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m3_m3); + + // Computing different multiples of channel norms + ch_mag_over_10=_mm_mulhi_epi16(ch_mag_des, ONE_OVER_TWO_SQRT_10); + ch_mag_over_2=_mm_mulhi_epi16(ch_mag_des, SQRT_10_OVER_FOUR); + ch_mag_over_2=_mm_slli_epi16(ch_mag_over_2, 1); + ch_mag_9_over_10=_mm_mulhi_epi16(ch_mag_des, NINE_OVER_TWO_SQRT_10); + ch_mag_9_over_10=_mm_slli_epi16(ch_mag_9_over_10, 2); + + // Computing Metrics + xmm0 = _mm_subs_epi16(psi_a_p1_p1,a_sq_p1_p1); + xmm1 = _mm_adds_epi16(xmm0,y0_p_1_1); + bit_met_p1_p1= _mm_subs_epi16(xmm1,ch_mag_over_10); + + xmm0 = _mm_subs_epi16(psi_a_p1_p3,a_sq_p1_p3); + xmm1 = _mm_adds_epi16(xmm0,y0_p_1_3); + bit_met_p1_p3= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_p1_m1,a_sq_p1_m1); + xmm1 = _mm_adds_epi16(xmm0,y0_m_1_1); + bit_met_p1_m1= _mm_subs_epi16(xmm1,ch_mag_over_10); + + xmm0 = _mm_subs_epi16(psi_a_p1_m3,a_sq_p1_m3); + xmm1 = _mm_adds_epi16(xmm0,y0_m_1_3); + bit_met_p1_m3= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_p3_p1,a_sq_p3_p1); + xmm1 = _mm_adds_epi16(xmm0,y0_p_3_1); + bit_met_p3_p1= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_p3_p3,a_sq_p3_p3); + xmm1 = _mm_adds_epi16(xmm0,y0_p_3_3); + bit_met_p3_p3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); + + xmm0 = _mm_subs_epi16(psi_a_p3_m1,a_sq_p3_m1); + xmm1 = _mm_adds_epi16(xmm0,y0_m_3_1); + bit_met_p3_m1= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_p3_m3,a_sq_p3_m3); + xmm1 = _mm_adds_epi16(xmm0,y0_m_3_3); + bit_met_p3_m3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); + + xmm0 = _mm_subs_epi16(psi_a_m1_p1,a_sq_m1_p1); + xmm1 = _mm_subs_epi16(xmm0,y0_m_1_1); + bit_met_m1_p1= _mm_subs_epi16(xmm1,ch_mag_over_10); + + xmm0 = _mm_subs_epi16(psi_a_m1_p3,a_sq_m1_p3); + xmm1 = _mm_subs_epi16(xmm0,y0_m_1_3); + bit_met_m1_p3= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_m1_m1,a_sq_m1_m1); + xmm1 = _mm_subs_epi16(xmm0,y0_p_1_1); + bit_met_m1_m1= _mm_subs_epi16(xmm1,ch_mag_over_10); + + xmm0 = _mm_subs_epi16(psi_a_m1_m3,a_sq_m1_m3); + xmm1 = _mm_subs_epi16(xmm0,y0_p_1_3); + bit_met_m1_m3= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_m3_p1,a_sq_m3_p1); + xmm1 = _mm_subs_epi16(xmm0,y0_m_3_1); + bit_met_m3_p1= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_m3_p3,a_sq_m3_p3); + xmm1 = _mm_subs_epi16(xmm0,y0_m_3_3); + bit_met_m3_p3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); + + xmm0 = _mm_subs_epi16(psi_a_m3_m1,a_sq_m3_m1); + xmm1 = _mm_subs_epi16(xmm0,y0_p_3_1); + bit_met_m3_m1= _mm_subs_epi16(xmm1,ch_mag_over_2); + + xmm0 = _mm_subs_epi16(psi_a_m3_m3,a_sq_m3_m3); + xmm1 = _mm_subs_epi16(xmm0,y0_p_3_3); + bit_met_m3_m3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); + + // LLR of the first bit + // Bit = 1 + xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3); + xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3); + xmm2 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_num_re0= _mm_max_epi16(xmm4,xmm5); + + // Bit = 0 + xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3); + xmm1 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3); + xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4,xmm5); + + // LLR of first bit [L1(1), L1(2), L1(3), L1(4), L1(5), L1(6), L1(7), L1(8)] + y0r = _mm_subs_epi16(logmax_den_re0,logmax_num_re0); + + // LLR of the second bit + // Bit = 1 + xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1); + xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1); + xmm2 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3); + xmm3 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_num_re1 = _mm_max_epi16(xmm4,xmm5); + + // Bit = 0 + xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1); + xmm1 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1); + xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_den_re1 = _mm_max_epi16(xmm4,xmm5); + + // LLR of second bit [L2(1), L2(2), L2(3), L2(4)] + y1r = _mm_subs_epi16(logmax_den_re1,logmax_num_re1); + + // LLR of the third bit + // Bit = 1 + xmm0 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3); + xmm1 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3); + xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_num_im0 = _mm_max_epi16(xmm4,xmm5); + + // Bit = 0 + xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3); + xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3); + xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3); + xmm3 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_den_im0 = _mm_max_epi16(xmm4,xmm5); + + // LLR of third bit [L3(1), L3(2), L3(3), L3(4)] + y0i = _mm_subs_epi16(logmax_den_im0,logmax_num_im0); + + // LLR of the fourth bit + // Bit = 1 + xmm0 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3); + xmm1 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3); + xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_num_im1 = _mm_max_epi16(xmm4,xmm5); + + // Bit = 0 + xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1); + xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1); + xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1); + xmm3 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1); + xmm4 = _mm_max_epi16(xmm0,xmm1); + xmm5 = _mm_max_epi16(xmm2,xmm3); + logmax_den_im1 = _mm_max_epi16(xmm4,xmm5); + + // LLR of fourth bit [L4(1), L4(2), L4(3), L4(4)] + y1i = _mm_subs_epi16(logmax_den_im1,logmax_num_im1); + + // Pack LLRs in output + // [L1(1), L2(1), L1(2), L2(2), L1(3), L2(3), L1(4), L2(4)] + xmm0 = _mm_unpacklo_epi16(y0r,y1r); + // [L1(5), L2(5), L1(6), L2(6), L1(7), L2(7), L1(8), L2(8)] + xmm1 = _mm_unpackhi_epi16(y0r,y1r); + // [L3(1), L4(1), L3(2), L4(2), L3(3), L4(3), L3(4), L4(4)] + xmm2 = _mm_unpacklo_epi16(y0i,y1i); + // [L3(5), L4(5), L3(6), L4(6), L3(7), L4(7), L3(8), L4(8)] + xmm3 = _mm_unpackhi_epi16(y0i,y1i); + + stream0_128i_out[2*i+0] = _mm_unpacklo_epi32(xmm0,xmm2); // 8LLRs, 2REs + stream0_128i_out[2*i+1] = _mm_unpackhi_epi32(xmm0,xmm2); + stream0_128i_out[2*i+2] = _mm_unpacklo_epi32(xmm1,xmm3); + stream0_128i_out[2*i+3] = _mm_unpackhi_epi32(xmm1,xmm3); +#elif defined(__arm__) + +#endif + + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif +} + +int dlsch_16qam_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms, + int32_t **rxdataF_comp, + int32_t **rxdataF_comp_i, + int32_t **dl_ch_mag, //|h_0|^2*(2/sqrt{10}) + int32_t **dl_ch_mag_i, //|h_1|^2*(2/sqrt{10}) + int32_t **rho_i, + int16_t *dlsch_llr, + uint8_t symbol, + uint8_t first_symbol_flag, + uint16_t nb_rb, + uint16_t pbch_pss_sss_adjust, + int16_t **llr16p) +{ + + int16_t *rxF = (int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rxF_i = (int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *ch_mag = (int16_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *ch_mag_i = (int16_t*)&dl_ch_mag_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rho = (int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *llr16; + int len; + uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol; + + // first symbol has different structure due to more pilots + if (first_symbol_flag == 1) { + llr16 = (int16_t*)dlsch_llr; + } else { + llr16 = (int16_t*)(*llr16p); + } + + + AssertFatal(llr16!=NULL,"dlsch_16qam_64qam_llr:llr is null, symbol %d\n",symbol); + + + if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { + // if symbol has pilots + if (frame_parms->nb_antenna_ports_eNB!=1) + // in 2 antenna ports we have 8 REs per symbol per RB + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + else + // for 1 antenna port we have 10 REs per symbol per RB + len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6); + } else { + // symbol has no pilots + len = (nb_rb*12) - pbch_pss_sss_adjust; + } + + // printf("symbol %d: qam16_llr, len %d (llr16 %p)\n",symbol,len,llr16); + + qam16_qam64((short *)rxF, + (short *)rxF_i, + (short *)ch_mag, + (short *)ch_mag_i, + (short *)llr16, + (short *)rho, + len); + + llr16 += (len<<2); + *llr16p = (short *)llr16; + + return(0); +} + +//---------------------------------------------------------------------------------------------- +// 64-QAM +//---------------------------------------------------------------------------------------------- + +/* +__m128i ONE_OVER_SQRT_42 __attribute__((aligned(16))); +__m128i THREE_OVER_SQRT_42 __attribute__((aligned(16))); +__m128i FIVE_OVER_SQRT_42 __attribute__((aligned(16))); +__m128i SEVEN_OVER_SQRT_42 __attribute__((aligned(16))); + +__m128i FORTYNINE_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); +__m128i THIRTYSEVEN_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); +__m128i TWENTYNINE_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); +__m128i TWENTYFIVE_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); +__m128i SEVENTEEN_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); +__m128i NINE_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); +__m128i THIRTEEN_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); +__m128i FIVE_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); +__m128i ONE_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); + +__m128i y0r_one_over_sqrt_21 __attribute__((aligned(16))); +__m128i y0r_three_over_sqrt_21 __attribute__((aligned(16))); +__m128i y0r_five_over_sqrt_21 __attribute__((aligned(16))); +__m128i y0r_seven_over_sqrt_21 __attribute__((aligned(16))); +__m128i y0i_one_over_sqrt_21 __attribute__((aligned(16))); +__m128i y0i_three_over_sqrt_21 __attribute__((aligned(16))); +__m128i y0i_five_over_sqrt_21 __attribute__((aligned(16))); +__m128i y0i_seven_over_sqrt_21 __attribute__((aligned(16))); + +__m128i ch_mag_98_over_42_with_sigma2 __attribute__((aligned(16))); +__m128i ch_mag_74_over_42_with_sigma2 __attribute__((aligned(16))); +__m128i ch_mag_58_over_42_with_sigma2 __attribute__((aligned(16))); +__m128i ch_mag_50_over_42_with_sigma2 __attribute__((aligned(16))); +__m128i ch_mag_34_over_42_with_sigma2 __attribute__((aligned(16))); +__m128i ch_mag_18_over_42_with_sigma2 __attribute__((aligned(16))); +__m128i ch_mag_26_over_42_with_sigma2 __attribute__((aligned(16))); +__m128i ch_mag_10_over_42_with_sigma2 __attribute__((aligned(16))); +__m128i ch_mag_2_over_42_with_sigma2 __attribute__((aligned(16))); + +*/ + +void qam64_qpsk(int16_t *stream0_in, + int16_t *stream1_in, + int16_t *ch_mag, + int16_t *stream0_out, + int16_t *rho01, + int32_t length + ) +{ + + /* + Author: S. Wagner + Date: 31-07-12 + + Input: + stream0_in: MF filter for 1st stream, i.e., y0=h0'*y + stream1_in: MF filter for 2nd stream, i.e., y1=h1'*y + ch_mag: 4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + ch_mag_i: 4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + rho01: Channel cross correlation, i.e., h1'*h0 + + Output: + stream0_out: output LLRs for 1st stream + */ + +#if defined(__x86_64__) || defined(__i386__) + __m128i *rho01_128i = (__m128i *)rho01; + __m128i *stream0_128i_in = (__m128i *)stream0_in; + __m128i *stream1_128i_in = (__m128i *)stream1_in; + __m128i *ch_mag_128i = (__m128i *)ch_mag; + + + __m128i ONE_OVER_SQRT_42 = _mm_set1_epi16(10112); // round(1/sqrt(42)*2^16) + __m128i THREE_OVER_SQRT_42 = _mm_set1_epi16(30337); // round(3/sqrt(42)*2^16) + __m128i FIVE_OVER_SQRT_42 = _mm_set1_epi16(25281); // round(5/sqrt(42)*2^15) + __m128i SEVEN_OVER_SQRT_42 = _mm_set1_epi16(17697); // round(5/sqrt(42)*2^15) + __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15) + __m128i FORTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(30969); // round(49/(4*sqrt(42))*2^14), Q2.14 + __m128i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(23385); // round(37/(4*sqrt(42))*2^14), Q2.14 + __m128i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(31601); // round(25/(4*sqrt(42))*2^15) + __m128i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(18329); // round(29/(4*sqrt(42))*2^15), Q2.14 + __m128i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(21489); // round(17/(4*sqrt(42))*2^15) + __m128i NINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(11376); // round(9/(4*sqrt(42))*2^15) + __m128i THIRTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(16433); // round(13/(4*sqrt(42))*2^15) + __m128i FIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(6320); // round(5/(4*sqrt(42))*2^15) + __m128i ONE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(1264); // round(1/(4*sqrt(42))*2^15) + + + __m128i ch_mag_des; + __m128i ch_mag_98_over_42_with_sigma2; + __m128i ch_mag_74_over_42_with_sigma2; + __m128i ch_mag_58_over_42_with_sigma2; + __m128i ch_mag_50_over_42_with_sigma2; + __m128i ch_mag_34_over_42_with_sigma2; + __m128i ch_mag_18_over_42_with_sigma2; + __m128i ch_mag_26_over_42_with_sigma2; + __m128i ch_mag_10_over_42_with_sigma2; + __m128i ch_mag_2_over_42_with_sigma2; + __m128i y0r_one_over_sqrt_21; + __m128i y0r_three_over_sqrt_21; + __m128i y0r_five_over_sqrt_21; + __m128i y0r_seven_over_sqrt_21; + __m128i y0i_one_over_sqrt_21; + __m128i y0i_three_over_sqrt_21; + __m128i y0i_five_over_sqrt_21; + __m128i y0i_seven_over_sqrt_21; +#elif defined(__arm__) + +#endif + + int i,j; + + for (i=0; i<length>>2; i+=2) { + +#if defined(__x86_64) || defined(__i386__) + // Get rho + xmm0 = rho01_128i[i]; + xmm1 = rho01_128i[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) + xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) + rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) + rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + + // Compute the different rhos + rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42); + rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42); + rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42); + rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42); + rho_rpi_5_5 = _mm_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42); + rho_rmi_5_5 = _mm_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42); + rho_rpi_7_7 = _mm_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42); + rho_rmi_7_7 = _mm_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42); + + rho_rpi_5_5 = _mm_slli_epi16(rho_rpi_5_5, 1); + rho_rmi_5_5 = _mm_slli_epi16(rho_rmi_5_5, 1); + rho_rpi_7_7 = _mm_slli_epi16(rho_rpi_7_7, 2); + rho_rmi_7_7 = _mm_slli_epi16(rho_rmi_7_7, 2); + + xmm4 = _mm_mulhi_epi16(xmm2, ONE_OVER_SQRT_42); + xmm5 = _mm_mulhi_epi16(xmm3, ONE_OVER_SQRT_42); + xmm6 = _mm_mulhi_epi16(xmm3, THREE_OVER_SQRT_42); + xmm7 = _mm_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42); + xmm8 = _mm_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42); + xmm7 = _mm_slli_epi16(xmm7, 1); + xmm8 = _mm_slli_epi16(xmm8, 2); + + rho_rpi_1_3 = _mm_adds_epi16(xmm4, xmm6); + rho_rmi_1_3 = _mm_subs_epi16(xmm4, xmm6); + rho_rpi_1_5 = _mm_adds_epi16(xmm4, xmm7); + rho_rmi_1_5 = _mm_subs_epi16(xmm4, xmm7); + rho_rpi_1_7 = _mm_adds_epi16(xmm4, xmm8); + rho_rmi_1_7 = _mm_subs_epi16(xmm4, xmm8); + + xmm4 = _mm_mulhi_epi16(xmm2, THREE_OVER_SQRT_42); + rho_rpi_3_1 = _mm_adds_epi16(xmm4, xmm5); + rho_rmi_3_1 = _mm_subs_epi16(xmm4, xmm5); + rho_rpi_3_5 = _mm_adds_epi16(xmm4, xmm7); + rho_rmi_3_5 = _mm_subs_epi16(xmm4, xmm7); + rho_rpi_3_7 = _mm_adds_epi16(xmm4, xmm8); + rho_rmi_3_7 = _mm_subs_epi16(xmm4, xmm8); + + xmm4 = _mm_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42); + xmm4 = _mm_slli_epi16(xmm4, 1); + rho_rpi_5_1 = _mm_adds_epi16(xmm4, xmm5); + rho_rmi_5_1 = _mm_subs_epi16(xmm4, xmm5); + rho_rpi_5_3 = _mm_adds_epi16(xmm4, xmm6); + rho_rmi_5_3 = _mm_subs_epi16(xmm4, xmm6); + rho_rpi_5_7 = _mm_adds_epi16(xmm4, xmm8); + rho_rmi_5_7 = _mm_subs_epi16(xmm4, xmm8); + + xmm4 = _mm_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42); + xmm4 = _mm_slli_epi16(xmm4, 2); + rho_rpi_7_1 = _mm_adds_epi16(xmm4, xmm5); + rho_rmi_7_1 = _mm_subs_epi16(xmm4, xmm5); + rho_rpi_7_3 = _mm_adds_epi16(xmm4, xmm6); + rho_rmi_7_3 = _mm_subs_epi16(xmm4, xmm6); + rho_rpi_7_5 = _mm_adds_epi16(xmm4, xmm7); + rho_rmi_7_5 = _mm_subs_epi16(xmm4, xmm7); + + // Rearrange interfering MF output + xmm0 = stream1_128i_in[i]; + xmm1 = stream1_128i_in[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] + y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] + + // Psi_r calculation from rho_rpi or rho_rmi + xmm0 = _mm_setzero_si128(); // ZERO for abs_pi16 + xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1r); + psi_r_p7_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1r); + psi_r_p7_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1r); + psi_r_p7_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1r); + psi_r_p7_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1r); + psi_r_p7_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1r); + psi_r_p7_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1r); + psi_r_p7_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1r); + psi_r_p7_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1r); + psi_r_p5_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1r); + psi_r_p5_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1r); + psi_r_p5_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1r); + psi_r_p5_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1r); + psi_r_p5_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1r); + psi_r_p5_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1r); + psi_r_p5_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1r); + psi_r_p5_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1r); + psi_r_p3_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1r); + psi_r_p3_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1r); + psi_r_p3_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1r); + psi_r_p3_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1r); + psi_r_p3_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1r); + psi_r_p3_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1r); + psi_r_p3_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1r); + psi_r_p3_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1r); + psi_r_p1_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1r); + psi_r_p1_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1r); + psi_r_p1_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1r); + psi_r_p1_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1r); + psi_r_p1_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1r); + psi_r_p1_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1r); + psi_r_p1_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1r); + psi_r_p1_m7 = _mm_abs_epi16(xmm2); + + xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1r); + psi_r_m1_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1r); + psi_r_m1_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1r); + psi_r_m1_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1r); + psi_r_m1_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1r); + psi_r_m1_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1r); + psi_r_m1_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1r); + psi_r_m1_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1r); + psi_r_m1_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1r); + psi_r_m3_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1r); + psi_r_m3_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1r); + psi_r_m3_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1r); + psi_r_m3_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1r); + psi_r_m3_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1r); + psi_r_m3_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1r); + psi_r_m3_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1r); + psi_r_m3_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1r); + psi_r_m5_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1r); + psi_r_m5_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1r); + psi_r_m5_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1r); + psi_r_m5_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1r); + psi_r_m5_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1r); + psi_r_m5_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1r); + psi_r_m5_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1r); + psi_r_m5_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1r); + psi_r_m7_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1r); + psi_r_m7_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1r); + psi_r_m7_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1r); + psi_r_m7_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1r); + psi_r_m7_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1r); + psi_r_m7_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1r); + psi_r_m7_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1r); + psi_r_m7_m7 = _mm_abs_epi16(xmm2); + + // Psi_i calculation from rho_rpi or rho_rmi + xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1i); + psi_i_p7_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1i); + psi_i_p7_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1i); + psi_i_p7_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1i); + psi_i_p7_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1i); + psi_i_p7_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1i); + psi_i_p7_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1i); + psi_i_p7_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1i); + psi_i_p7_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1i); + psi_i_p5_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1i); + psi_i_p5_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1i); + psi_i_p5_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1i); + psi_i_p5_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1i); + psi_i_p5_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1i); + psi_i_p5_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1i); + psi_i_p5_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1i); + psi_i_p5_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1i); + psi_i_p3_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1i); + psi_i_p3_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1i); + psi_i_p3_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1i); + psi_i_p3_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1i); + psi_i_p3_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1i); + psi_i_p3_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1i); + psi_i_p3_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1i); + psi_i_p3_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1i); + psi_i_p1_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1i); + psi_i_p1_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1i); + psi_i_p1_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1i); + psi_i_p1_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1i); + psi_i_p1_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1i); + psi_i_p1_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1i); + psi_i_p1_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1i); + psi_i_p1_m7 = _mm_abs_epi16(xmm2); + + xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1i); + psi_i_m1_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1i); + psi_i_m1_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1i); + psi_i_m1_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1i); + psi_i_m1_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1i); + psi_i_m1_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1i); + psi_i_m1_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1i); + psi_i_m1_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1i); + psi_i_m1_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1i); + psi_i_m3_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1i); + psi_i_m3_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1i); + psi_i_m3_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1i); + psi_i_m3_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1i); + psi_i_m3_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1i); + psi_i_m3_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1i); + psi_i_m3_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1i); + psi_i_m3_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1i); + psi_i_m5_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1i); + psi_i_m5_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1i); + psi_i_m5_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1i); + psi_i_m5_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1i); + psi_i_m5_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1i); + psi_i_m5_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1i); + psi_i_m5_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1i); + psi_i_m5_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1i); + psi_i_m7_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1i); + psi_i_m7_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1i); + psi_i_m7_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1i); + psi_i_m7_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1i); + psi_i_m7_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1i); + psi_i_m7_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1i); + psi_i_m7_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1i); + psi_i_m7_m7 = _mm_abs_epi16(xmm2); + + + // Rearrange desired MF output + xmm0 = stream0_128i_in[i]; + xmm1 = stream0_128i_in[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + y0i = _mm_unpackhi_epi64(xmm0,xmm1); + + // Rearrange desired channel magnitudes + xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10)) + xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10)) + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); + + y0r_one_over_sqrt_21 = _mm_mulhi_epi16(y0r, ONE_OVER_SQRT_42); + y0r_three_over_sqrt_21 = _mm_mulhi_epi16(y0r, THREE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm_mulhi_epi16(y0r, FIVE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm_slli_epi16(y0r_five_over_sqrt_21, 1); + y0r_seven_over_sqrt_21 = _mm_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42); + y0r_seven_over_sqrt_21 = _mm_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14 + + y0i_one_over_sqrt_21 = _mm_mulhi_epi16(y0i, ONE_OVER_SQRT_42); + y0i_three_over_sqrt_21 = _mm_mulhi_epi16(y0i, THREE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm_mulhi_epi16(y0i, FIVE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm_slli_epi16(y0i_five_over_sqrt_21, 1); + y0i_seven_over_sqrt_21 = _mm_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42); + y0i_seven_over_sqrt_21 = _mm_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14 + + y0_p_7_1 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_7_3 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_7_5 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_7_7 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_5_1 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_5_3 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_5_5 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_5_7 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_3_5 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_3_7 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_1_1 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_1_3 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_1_5 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_1_7 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + + y0_m_1_1 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_1_3 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_1_5 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_1_7 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_3_5 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_3_7 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_5_1 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_5_3 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_5_5 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_5_7 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_7_1 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_7_3 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_7_5 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_7_7 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + + // divide by sqrt(2) + psi_r_p7_p7 = _mm_mulhi_epi16(psi_r_p7_p7, ONE_OVER_SQRT_2); + psi_r_p7_p7 = _mm_slli_epi16(psi_r_p7_p7, 1); + psi_r_p7_p5 = _mm_mulhi_epi16(psi_r_p7_p5, ONE_OVER_SQRT_2); + psi_r_p7_p5 = _mm_slli_epi16(psi_r_p7_p5, 1); + psi_r_p7_p3 = _mm_mulhi_epi16(psi_r_p7_p3, ONE_OVER_SQRT_2); + psi_r_p7_p3 = _mm_slli_epi16(psi_r_p7_p3, 1); + psi_r_p7_p1 = _mm_mulhi_epi16(psi_r_p7_p1, ONE_OVER_SQRT_2); + psi_r_p7_p1 = _mm_slli_epi16(psi_r_p7_p1, 1); + psi_r_p7_m1 = _mm_mulhi_epi16(psi_r_p7_m1, ONE_OVER_SQRT_2); + psi_r_p7_m1 = _mm_slli_epi16(psi_r_p7_m1, 1); + psi_r_p7_m3 = _mm_mulhi_epi16(psi_r_p7_m3, ONE_OVER_SQRT_2); + psi_r_p7_m3 = _mm_slli_epi16(psi_r_p7_m3, 1); + psi_r_p7_m5 = _mm_mulhi_epi16(psi_r_p7_m5, ONE_OVER_SQRT_2); + psi_r_p7_m5 = _mm_slli_epi16(psi_r_p7_m5, 1); + psi_r_p7_m7 = _mm_mulhi_epi16(psi_r_p7_m7, ONE_OVER_SQRT_2); + psi_r_p7_m7 = _mm_slli_epi16(psi_r_p7_m7, 1); + psi_r_p5_p7 = _mm_mulhi_epi16(psi_r_p5_p7, ONE_OVER_SQRT_2); + psi_r_p5_p7 = _mm_slli_epi16(psi_r_p5_p7, 1); + psi_r_p5_p5 = _mm_mulhi_epi16(psi_r_p5_p5, ONE_OVER_SQRT_2); + psi_r_p5_p5 = _mm_slli_epi16(psi_r_p5_p5, 1); + psi_r_p5_p3 = _mm_mulhi_epi16(psi_r_p5_p3, ONE_OVER_SQRT_2); + psi_r_p5_p3 = _mm_slli_epi16(psi_r_p5_p3, 1); + psi_r_p5_p1 = _mm_mulhi_epi16(psi_r_p5_p1, ONE_OVER_SQRT_2); + psi_r_p5_p1 = _mm_slli_epi16(psi_r_p5_p1, 1); + psi_r_p5_m1 = _mm_mulhi_epi16(psi_r_p5_m1, ONE_OVER_SQRT_2); + psi_r_p5_m1 = _mm_slli_epi16(psi_r_p5_m1, 1); + psi_r_p5_m3 = _mm_mulhi_epi16(psi_r_p5_m3, ONE_OVER_SQRT_2); + psi_r_p5_m3 = _mm_slli_epi16(psi_r_p5_m3, 1); + psi_r_p5_m5 = _mm_mulhi_epi16(psi_r_p5_m5, ONE_OVER_SQRT_2); + psi_r_p5_m5 = _mm_slli_epi16(psi_r_p5_m5, 1); + psi_r_p5_m7 = _mm_mulhi_epi16(psi_r_p5_m7, ONE_OVER_SQRT_2); + psi_r_p5_m7 = _mm_slli_epi16(psi_r_p5_m7, 1); + psi_r_p3_p7 = _mm_mulhi_epi16(psi_r_p3_p7, ONE_OVER_SQRT_2); + psi_r_p3_p7 = _mm_slli_epi16(psi_r_p3_p7, 1); + psi_r_p3_p5 = _mm_mulhi_epi16(psi_r_p3_p5, ONE_OVER_SQRT_2); + psi_r_p3_p5 = _mm_slli_epi16(psi_r_p3_p5, 1); + psi_r_p3_p3 = _mm_mulhi_epi16(psi_r_p3_p3, ONE_OVER_SQRT_2); + psi_r_p3_p3 = _mm_slli_epi16(psi_r_p3_p3, 1); + psi_r_p3_p1 = _mm_mulhi_epi16(psi_r_p3_p1, ONE_OVER_SQRT_2); + psi_r_p3_p1 = _mm_slli_epi16(psi_r_p3_p1, 1); + psi_r_p3_m1 = _mm_mulhi_epi16(psi_r_p3_m1, ONE_OVER_SQRT_2); + psi_r_p3_m1 = _mm_slli_epi16(psi_r_p3_m1, 1); + psi_r_p3_m3 = _mm_mulhi_epi16(psi_r_p3_m3, ONE_OVER_SQRT_2); + psi_r_p3_m3 = _mm_slli_epi16(psi_r_p3_m3, 1); + psi_r_p3_m5 = _mm_mulhi_epi16(psi_r_p3_m5, ONE_OVER_SQRT_2); + psi_r_p3_m5 = _mm_slli_epi16(psi_r_p3_m5, 1); + psi_r_p3_m7 = _mm_mulhi_epi16(psi_r_p3_m7, ONE_OVER_SQRT_2); + psi_r_p3_m7 = _mm_slli_epi16(psi_r_p3_m7, 1); + psi_r_p1_p7 = _mm_mulhi_epi16(psi_r_p1_p7, ONE_OVER_SQRT_2); + psi_r_p1_p7 = _mm_slli_epi16(psi_r_p1_p7, 1); + psi_r_p1_p5 = _mm_mulhi_epi16(psi_r_p1_p5, ONE_OVER_SQRT_2); + psi_r_p1_p5 = _mm_slli_epi16(psi_r_p1_p5, 1); + psi_r_p1_p3 = _mm_mulhi_epi16(psi_r_p1_p3, ONE_OVER_SQRT_2); + psi_r_p1_p3 = _mm_slli_epi16(psi_r_p1_p3, 1); + psi_r_p1_p1 = _mm_mulhi_epi16(psi_r_p1_p1, ONE_OVER_SQRT_2); + psi_r_p1_p1 = _mm_slli_epi16(psi_r_p1_p1, 1); + psi_r_p1_m1 = _mm_mulhi_epi16(psi_r_p1_m1, ONE_OVER_SQRT_2); + psi_r_p1_m1 = _mm_slli_epi16(psi_r_p1_m1, 1); + psi_r_p1_m3 = _mm_mulhi_epi16(psi_r_p1_m3, ONE_OVER_SQRT_2); + psi_r_p1_m3 = _mm_slli_epi16(psi_r_p1_m3, 1); + psi_r_p1_m5 = _mm_mulhi_epi16(psi_r_p1_m5, ONE_OVER_SQRT_2); + psi_r_p1_m5 = _mm_slli_epi16(psi_r_p1_m5, 1); + psi_r_p1_m7 = _mm_mulhi_epi16(psi_r_p1_m7, ONE_OVER_SQRT_2); + psi_r_p1_m7 = _mm_slli_epi16(psi_r_p1_m7, 1); + psi_r_m1_p7 = _mm_mulhi_epi16(psi_r_m1_p7, ONE_OVER_SQRT_2); + psi_r_m1_p7 = _mm_slli_epi16(psi_r_m1_p7, 1); + psi_r_m1_p5 = _mm_mulhi_epi16(psi_r_m1_p5, ONE_OVER_SQRT_2); + psi_r_m1_p5 = _mm_slli_epi16(psi_r_m1_p5, 1); + psi_r_m1_p3 = _mm_mulhi_epi16(psi_r_m1_p3, ONE_OVER_SQRT_2); + psi_r_m1_p3 = _mm_slli_epi16(psi_r_m1_p3, 1); + psi_r_m1_p1 = _mm_mulhi_epi16(psi_r_m1_p1, ONE_OVER_SQRT_2); + psi_r_m1_p1 = _mm_slli_epi16(psi_r_m1_p1, 1); + psi_r_m1_m1 = _mm_mulhi_epi16(psi_r_m1_m1, ONE_OVER_SQRT_2); + psi_r_m1_m1 = _mm_slli_epi16(psi_r_m1_m1, 1); + psi_r_m1_m3 = _mm_mulhi_epi16(psi_r_m1_m3, ONE_OVER_SQRT_2); + psi_r_m1_m3 = _mm_slli_epi16(psi_r_m1_m3, 1); + psi_r_m1_m5 = _mm_mulhi_epi16(psi_r_m1_m5, ONE_OVER_SQRT_2); + psi_r_m1_m5 = _mm_slli_epi16(psi_r_m1_m5, 1); + psi_r_m1_m7 = _mm_mulhi_epi16(psi_r_m1_m7, ONE_OVER_SQRT_2); + psi_r_m1_m7 = _mm_slli_epi16(psi_r_m1_m7, 1); + psi_r_m3_p7 = _mm_mulhi_epi16(psi_r_m3_p7, ONE_OVER_SQRT_2); + psi_r_m3_p7 = _mm_slli_epi16(psi_r_m3_p7, 1); + psi_r_m3_p5 = _mm_mulhi_epi16(psi_r_m3_p5, ONE_OVER_SQRT_2); + psi_r_m3_p5 = _mm_slli_epi16(psi_r_m3_p5, 1); + psi_r_m3_p3 = _mm_mulhi_epi16(psi_r_m3_p3, ONE_OVER_SQRT_2); + psi_r_m3_p3 = _mm_slli_epi16(psi_r_m3_p3, 1); + psi_r_m3_p1 = _mm_mulhi_epi16(psi_r_m3_p1, ONE_OVER_SQRT_2); + psi_r_m3_p1 = _mm_slli_epi16(psi_r_m3_p1, 1); + psi_r_m3_m1 = _mm_mulhi_epi16(psi_r_m3_m1, ONE_OVER_SQRT_2); + psi_r_m3_m1 = _mm_slli_epi16(psi_r_m3_m1, 1); + psi_r_m3_m3 = _mm_mulhi_epi16(psi_r_m3_m3, ONE_OVER_SQRT_2); + psi_r_m3_m3 = _mm_slli_epi16(psi_r_m3_m3, 1); + psi_r_m3_m5 = _mm_mulhi_epi16(psi_r_m3_m5, ONE_OVER_SQRT_2); + psi_r_m3_m5 = _mm_slli_epi16(psi_r_m3_m5, 1); + psi_r_m3_m7 = _mm_mulhi_epi16(psi_r_m3_m7, ONE_OVER_SQRT_2); + psi_r_m3_m7 = _mm_slli_epi16(psi_r_m3_m7, 1); + psi_r_m5_p7 = _mm_mulhi_epi16(psi_r_m5_p7, ONE_OVER_SQRT_2); + psi_r_m5_p7 = _mm_slli_epi16(psi_r_m5_p7, 1); + psi_r_m5_p5 = _mm_mulhi_epi16(psi_r_m5_p5, ONE_OVER_SQRT_2); + psi_r_m5_p5 = _mm_slli_epi16(psi_r_m5_p5, 1); + psi_r_m5_p3 = _mm_mulhi_epi16(psi_r_m5_p3, ONE_OVER_SQRT_2); + psi_r_m5_p3 = _mm_slli_epi16(psi_r_m5_p3, 1); + psi_r_m5_p1 = _mm_mulhi_epi16(psi_r_m5_p1, ONE_OVER_SQRT_2); + psi_r_m5_p1 = _mm_slli_epi16(psi_r_m5_p1, 1); + psi_r_m5_m1 = _mm_mulhi_epi16(psi_r_m5_m1, ONE_OVER_SQRT_2); + psi_r_m5_m1 = _mm_slli_epi16(psi_r_m5_m1, 1); + psi_r_m5_m3 = _mm_mulhi_epi16(psi_r_m5_m3, ONE_OVER_SQRT_2); + psi_r_m5_m3 = _mm_slli_epi16(psi_r_m5_m3, 1); + psi_r_m5_m5 = _mm_mulhi_epi16(psi_r_m5_m5, ONE_OVER_SQRT_2); + psi_r_m5_m5 = _mm_slli_epi16(psi_r_m5_m5, 1); + psi_r_m5_m7 = _mm_mulhi_epi16(psi_r_m5_m7, ONE_OVER_SQRT_2); + psi_r_m5_m7 = _mm_slli_epi16(psi_r_m5_m7, 1); + psi_r_m7_p7 = _mm_mulhi_epi16(psi_r_m7_p7, ONE_OVER_SQRT_2); + psi_r_m7_p7 = _mm_slli_epi16(psi_r_m7_p7, 1); + psi_r_m7_p5 = _mm_mulhi_epi16(psi_r_m7_p5, ONE_OVER_SQRT_2); + psi_r_m7_p5 = _mm_slli_epi16(psi_r_m7_p5, 1); + psi_r_m7_p3 = _mm_mulhi_epi16(psi_r_m7_p3, ONE_OVER_SQRT_2); + psi_r_m7_p3 = _mm_slli_epi16(psi_r_m7_p3, 1); + psi_r_m7_p1 = _mm_mulhi_epi16(psi_r_m7_p1, ONE_OVER_SQRT_2); + psi_r_m7_p1 = _mm_slli_epi16(psi_r_m7_p1, 1); + psi_r_m7_m1 = _mm_mulhi_epi16(psi_r_m7_m1, ONE_OVER_SQRT_2); + psi_r_m7_m1 = _mm_slli_epi16(psi_r_m7_m1, 1); + psi_r_m7_m3 = _mm_mulhi_epi16(psi_r_m7_m3, ONE_OVER_SQRT_2); + psi_r_m7_m3 = _mm_slli_epi16(psi_r_m7_m3, 1); + psi_r_m7_m5 = _mm_mulhi_epi16(psi_r_m7_m5, ONE_OVER_SQRT_2); + psi_r_m7_m5 = _mm_slli_epi16(psi_r_m7_m5, 1); + psi_r_m7_m7 = _mm_mulhi_epi16(psi_r_m7_m7, ONE_OVER_SQRT_2); + psi_r_m7_m7 = _mm_slli_epi16(psi_r_m7_m7, 1); + + psi_i_p7_p7 = _mm_mulhi_epi16(psi_i_p7_p7, ONE_OVER_SQRT_2); + psi_i_p7_p7 = _mm_slli_epi16(psi_i_p7_p7, 1); + psi_i_p7_p5 = _mm_mulhi_epi16(psi_i_p7_p5, ONE_OVER_SQRT_2); + psi_i_p7_p5 = _mm_slli_epi16(psi_i_p7_p5, 1); + psi_i_p7_p3 = _mm_mulhi_epi16(psi_i_p7_p3, ONE_OVER_SQRT_2); + psi_i_p7_p3 = _mm_slli_epi16(psi_i_p7_p3, 1); + psi_i_p7_p1 = _mm_mulhi_epi16(psi_i_p7_p1, ONE_OVER_SQRT_2); + psi_i_p7_p1 = _mm_slli_epi16(psi_i_p7_p1, 1); + psi_i_p7_m1 = _mm_mulhi_epi16(psi_i_p7_m1, ONE_OVER_SQRT_2); + psi_i_p7_m1 = _mm_slli_epi16(psi_i_p7_m1, 1); + psi_i_p7_m3 = _mm_mulhi_epi16(psi_i_p7_m3, ONE_OVER_SQRT_2); + psi_i_p7_m3 = _mm_slli_epi16(psi_i_p7_m3, 1); + psi_i_p7_m5 = _mm_mulhi_epi16(psi_i_p7_m5, ONE_OVER_SQRT_2); + psi_i_p7_m5 = _mm_slli_epi16(psi_i_p7_m5, 1); + psi_i_p7_m7 = _mm_mulhi_epi16(psi_i_p7_m7, ONE_OVER_SQRT_2); + psi_i_p7_m7 = _mm_slli_epi16(psi_i_p7_m7, 1); + psi_i_p5_p7 = _mm_mulhi_epi16(psi_i_p5_p7, ONE_OVER_SQRT_2); + psi_i_p5_p7 = _mm_slli_epi16(psi_i_p5_p7, 1); + psi_i_p5_p5 = _mm_mulhi_epi16(psi_i_p5_p5, ONE_OVER_SQRT_2); + psi_i_p5_p5 = _mm_slli_epi16(psi_i_p5_p5, 1); + psi_i_p5_p3 = _mm_mulhi_epi16(psi_i_p5_p3, ONE_OVER_SQRT_2); + psi_i_p5_p3 = _mm_slli_epi16(psi_i_p5_p3, 1); + psi_i_p5_p1 = _mm_mulhi_epi16(psi_i_p5_p1, ONE_OVER_SQRT_2); + psi_i_p5_p1 = _mm_slli_epi16(psi_i_p5_p1, 1); + psi_i_p5_m1 = _mm_mulhi_epi16(psi_i_p5_m1, ONE_OVER_SQRT_2); + psi_i_p5_m1 = _mm_slli_epi16(psi_i_p5_m1, 1); + psi_i_p5_m3 = _mm_mulhi_epi16(psi_i_p5_m3, ONE_OVER_SQRT_2); + psi_i_p5_m3 = _mm_slli_epi16(psi_i_p5_m3, 1); + psi_i_p5_m5 = _mm_mulhi_epi16(psi_i_p5_m5, ONE_OVER_SQRT_2); + psi_i_p5_m5 = _mm_slli_epi16(psi_i_p5_m5, 1); + psi_i_p5_m7 = _mm_mulhi_epi16(psi_i_p5_m7, ONE_OVER_SQRT_2); + psi_i_p5_m7 = _mm_slli_epi16(psi_i_p5_m7, 1); + psi_i_p3_p7 = _mm_mulhi_epi16(psi_i_p3_p7, ONE_OVER_SQRT_2); + psi_i_p3_p7 = _mm_slli_epi16(psi_i_p3_p7, 1); + psi_i_p3_p5 = _mm_mulhi_epi16(psi_i_p3_p5, ONE_OVER_SQRT_2); + psi_i_p3_p5 = _mm_slli_epi16(psi_i_p3_p5, 1); + psi_i_p3_p3 = _mm_mulhi_epi16(psi_i_p3_p3, ONE_OVER_SQRT_2); + psi_i_p3_p3 = _mm_slli_epi16(psi_i_p3_p3, 1); + psi_i_p3_p1 = _mm_mulhi_epi16(psi_i_p3_p1, ONE_OVER_SQRT_2); + psi_i_p3_p1 = _mm_slli_epi16(psi_i_p3_p1, 1); + psi_i_p3_m1 = _mm_mulhi_epi16(psi_i_p3_m1, ONE_OVER_SQRT_2); + psi_i_p3_m1 = _mm_slli_epi16(psi_i_p3_m1, 1); + psi_i_p3_m3 = _mm_mulhi_epi16(psi_i_p3_m3, ONE_OVER_SQRT_2); + psi_i_p3_m3 = _mm_slli_epi16(psi_i_p3_m3, 1); + psi_i_p3_m5 = _mm_mulhi_epi16(psi_i_p3_m5, ONE_OVER_SQRT_2); + psi_i_p3_m5 = _mm_slli_epi16(psi_i_p3_m5, 1); + psi_i_p3_m7 = _mm_mulhi_epi16(psi_i_p3_m7, ONE_OVER_SQRT_2); + psi_i_p3_m7 = _mm_slli_epi16(psi_i_p3_m7, 1); + psi_i_p1_p7 = _mm_mulhi_epi16(psi_i_p1_p7, ONE_OVER_SQRT_2); + psi_i_p1_p7 = _mm_slli_epi16(psi_i_p1_p7, 1); + psi_i_p1_p5 = _mm_mulhi_epi16(psi_i_p1_p5, ONE_OVER_SQRT_2); + psi_i_p1_p5 = _mm_slli_epi16(psi_i_p1_p5, 1); + psi_i_p1_p3 = _mm_mulhi_epi16(psi_i_p1_p3, ONE_OVER_SQRT_2); + psi_i_p1_p3 = _mm_slli_epi16(psi_i_p1_p3, 1); + psi_i_p1_p1 = _mm_mulhi_epi16(psi_i_p1_p1, ONE_OVER_SQRT_2); + psi_i_p1_p1 = _mm_slli_epi16(psi_i_p1_p1, 1); + psi_i_p1_m1 = _mm_mulhi_epi16(psi_i_p1_m1, ONE_OVER_SQRT_2); + psi_i_p1_m1 = _mm_slli_epi16(psi_i_p1_m1, 1); + psi_i_p1_m3 = _mm_mulhi_epi16(psi_i_p1_m3, ONE_OVER_SQRT_2); + psi_i_p1_m3 = _mm_slli_epi16(psi_i_p1_m3, 1); + psi_i_p1_m5 = _mm_mulhi_epi16(psi_i_p1_m5, ONE_OVER_SQRT_2); + psi_i_p1_m5 = _mm_slli_epi16(psi_i_p1_m5, 1); + psi_i_p1_m7 = _mm_mulhi_epi16(psi_i_p1_m7, ONE_OVER_SQRT_2); + psi_i_p1_m7 = _mm_slli_epi16(psi_i_p1_m7, 1); + psi_i_m1_p7 = _mm_mulhi_epi16(psi_i_m1_p7, ONE_OVER_SQRT_2); + psi_i_m1_p7 = _mm_slli_epi16(psi_i_m1_p7, 1); + psi_i_m1_p5 = _mm_mulhi_epi16(psi_i_m1_p5, ONE_OVER_SQRT_2); + psi_i_m1_p5 = _mm_slli_epi16(psi_i_m1_p5, 1); + psi_i_m1_p3 = _mm_mulhi_epi16(psi_i_m1_p3, ONE_OVER_SQRT_2); + psi_i_m1_p3 = _mm_slli_epi16(psi_i_m1_p3, 1); + psi_i_m1_p1 = _mm_mulhi_epi16(psi_i_m1_p1, ONE_OVER_SQRT_2); + psi_i_m1_p1 = _mm_slli_epi16(psi_i_m1_p1, 1); + psi_i_m1_m1 = _mm_mulhi_epi16(psi_i_m1_m1, ONE_OVER_SQRT_2); + psi_i_m1_m1 = _mm_slli_epi16(psi_i_m1_m1, 1); + psi_i_m1_m3 = _mm_mulhi_epi16(psi_i_m1_m3, ONE_OVER_SQRT_2); + psi_i_m1_m3 = _mm_slli_epi16(psi_i_m1_m3, 1); + psi_i_m1_m5 = _mm_mulhi_epi16(psi_i_m1_m5, ONE_OVER_SQRT_2); + psi_i_m1_m5 = _mm_slli_epi16(psi_i_m1_m5, 1); + psi_i_m1_m7 = _mm_mulhi_epi16(psi_i_m1_m7, ONE_OVER_SQRT_2); + psi_i_m1_m7 = _mm_slli_epi16(psi_i_m1_m7, 1); + psi_i_m3_p7 = _mm_mulhi_epi16(psi_i_m3_p7, ONE_OVER_SQRT_2); + psi_i_m3_p7 = _mm_slli_epi16(psi_i_m3_p7, 1); + psi_i_m3_p5 = _mm_mulhi_epi16(psi_i_m3_p5, ONE_OVER_SQRT_2); + psi_i_m3_p5 = _mm_slli_epi16(psi_i_m3_p5, 1); + psi_i_m3_p3 = _mm_mulhi_epi16(psi_i_m3_p3, ONE_OVER_SQRT_2); + psi_i_m3_p3 = _mm_slli_epi16(psi_i_m3_p3, 1); + psi_i_m3_p1 = _mm_mulhi_epi16(psi_i_m3_p1, ONE_OVER_SQRT_2); + psi_i_m3_p1 = _mm_slli_epi16(psi_i_m3_p1, 1); + psi_i_m3_m1 = _mm_mulhi_epi16(psi_i_m3_m1, ONE_OVER_SQRT_2); + psi_i_m3_m1 = _mm_slli_epi16(psi_i_m3_m1, 1); + psi_i_m3_m3 = _mm_mulhi_epi16(psi_i_m3_m3, ONE_OVER_SQRT_2); + psi_i_m3_m3 = _mm_slli_epi16(psi_i_m3_m3, 1); + psi_i_m3_m5 = _mm_mulhi_epi16(psi_i_m3_m5, ONE_OVER_SQRT_2); + psi_i_m3_m5 = _mm_slli_epi16(psi_i_m3_m5, 1); + psi_i_m3_m7 = _mm_mulhi_epi16(psi_i_m3_m7, ONE_OVER_SQRT_2); + psi_i_m3_m7 = _mm_slli_epi16(psi_i_m3_m7, 1); + psi_i_m5_p7 = _mm_mulhi_epi16(psi_i_m5_p7, ONE_OVER_SQRT_2); + psi_i_m5_p7 = _mm_slli_epi16(psi_i_m5_p7, 1); + psi_i_m5_p5 = _mm_mulhi_epi16(psi_i_m5_p5, ONE_OVER_SQRT_2); + psi_i_m5_p5 = _mm_slli_epi16(psi_i_m5_p5, 1); + psi_i_m5_p3 = _mm_mulhi_epi16(psi_i_m5_p3, ONE_OVER_SQRT_2); + psi_i_m5_p3 = _mm_slli_epi16(psi_i_m5_p3, 1); + psi_i_m5_p1 = _mm_mulhi_epi16(psi_i_m5_p1, ONE_OVER_SQRT_2); + psi_i_m5_p1 = _mm_slli_epi16(psi_i_m5_p1, 1); + psi_i_m5_m1 = _mm_mulhi_epi16(psi_i_m5_m1, ONE_OVER_SQRT_2); + psi_i_m5_m1 = _mm_slli_epi16(psi_i_m5_m1, 1); + psi_i_m5_m3 = _mm_mulhi_epi16(psi_i_m5_m3, ONE_OVER_SQRT_2); + psi_i_m5_m3 = _mm_slli_epi16(psi_i_m5_m3, 1); + psi_i_m5_m5 = _mm_mulhi_epi16(psi_i_m5_m5, ONE_OVER_SQRT_2); + psi_i_m5_m5 = _mm_slli_epi16(psi_i_m5_m5, 1); + psi_i_m5_m7 = _mm_mulhi_epi16(psi_i_m5_m7, ONE_OVER_SQRT_2); + psi_i_m5_m7 = _mm_slli_epi16(psi_i_m5_m7, 1); + psi_i_m7_p7 = _mm_mulhi_epi16(psi_i_m7_p7, ONE_OVER_SQRT_2); + psi_i_m7_p7 = _mm_slli_epi16(psi_i_m7_p7, 1); + psi_i_m7_p5 = _mm_mulhi_epi16(psi_i_m7_p5, ONE_OVER_SQRT_2); + psi_i_m7_p5 = _mm_slli_epi16(psi_i_m7_p5, 1); + psi_i_m7_p3 = _mm_mulhi_epi16(psi_i_m7_p3, ONE_OVER_SQRT_2); + psi_i_m7_p3 = _mm_slli_epi16(psi_i_m7_p3, 1); + psi_i_m7_p1 = _mm_mulhi_epi16(psi_i_m7_p1, ONE_OVER_SQRT_2); + psi_i_m7_p1 = _mm_slli_epi16(psi_i_m7_p1, 1); + psi_i_m7_m1 = _mm_mulhi_epi16(psi_i_m7_m1, ONE_OVER_SQRT_2); + psi_i_m7_m1 = _mm_slli_epi16(psi_i_m7_m1, 1); + psi_i_m7_m3 = _mm_mulhi_epi16(psi_i_m7_m3, ONE_OVER_SQRT_2); + psi_i_m7_m3 = _mm_slli_epi16(psi_i_m7_m3, 1); + psi_i_m7_m5 = _mm_mulhi_epi16(psi_i_m7_m5, ONE_OVER_SQRT_2); + psi_i_m7_m5 = _mm_slli_epi16(psi_i_m7_m5, 1); + psi_i_m7_m7 = _mm_mulhi_epi16(psi_i_m7_m7, ONE_OVER_SQRT_2); + psi_i_m7_m7 = _mm_slli_epi16(psi_i_m7_m7, 1); + + psi_a_p7_p7 = _mm_adds_epi16(psi_r_p7_p7, psi_i_p7_p7); + psi_a_p7_p5 = _mm_adds_epi16(psi_r_p7_p5, psi_i_p7_p5); + psi_a_p7_p3 = _mm_adds_epi16(psi_r_p7_p3, psi_i_p7_p3); + psi_a_p7_p1 = _mm_adds_epi16(psi_r_p7_p1, psi_i_p7_p1); + psi_a_p7_m1 = _mm_adds_epi16(psi_r_p7_m1, psi_i_p7_m1); + psi_a_p7_m3 = _mm_adds_epi16(psi_r_p7_m3, psi_i_p7_m3); + psi_a_p7_m5 = _mm_adds_epi16(psi_r_p7_m5, psi_i_p7_m5); + psi_a_p7_m7 = _mm_adds_epi16(psi_r_p7_m7, psi_i_p7_m7); + psi_a_p5_p7 = _mm_adds_epi16(psi_r_p5_p7, psi_i_p5_p7); + psi_a_p5_p5 = _mm_adds_epi16(psi_r_p5_p5, psi_i_p5_p5); + psi_a_p5_p3 = _mm_adds_epi16(psi_r_p5_p3, psi_i_p5_p3); + psi_a_p5_p1 = _mm_adds_epi16(psi_r_p5_p1, psi_i_p5_p1); + psi_a_p5_m1 = _mm_adds_epi16(psi_r_p5_m1, psi_i_p5_m1); + psi_a_p5_m3 = _mm_adds_epi16(psi_r_p5_m3, psi_i_p5_m3); + psi_a_p5_m5 = _mm_adds_epi16(psi_r_p5_m5, psi_i_p5_m5); + psi_a_p5_m7 = _mm_adds_epi16(psi_r_p5_m7, psi_i_p5_m7); + psi_a_p3_p7 = _mm_adds_epi16(psi_r_p3_p7, psi_i_p3_p7); + psi_a_p3_p5 = _mm_adds_epi16(psi_r_p3_p5, psi_i_p3_p5); + psi_a_p3_p3 = _mm_adds_epi16(psi_r_p3_p3, psi_i_p3_p3); + psi_a_p3_p1 = _mm_adds_epi16(psi_r_p3_p1, psi_i_p3_p1); + psi_a_p3_m1 = _mm_adds_epi16(psi_r_p3_m1, psi_i_p3_m1); + psi_a_p3_m3 = _mm_adds_epi16(psi_r_p3_m3, psi_i_p3_m3); + psi_a_p3_m5 = _mm_adds_epi16(psi_r_p3_m5, psi_i_p3_m5); + psi_a_p3_m7 = _mm_adds_epi16(psi_r_p3_m7, psi_i_p3_m7); + psi_a_p1_p7 = _mm_adds_epi16(psi_r_p1_p7, psi_i_p1_p7); + psi_a_p1_p5 = _mm_adds_epi16(psi_r_p1_p5, psi_i_p1_p5); + psi_a_p1_p3 = _mm_adds_epi16(psi_r_p1_p3, psi_i_p1_p3); + psi_a_p1_p1 = _mm_adds_epi16(psi_r_p1_p1, psi_i_p1_p1); + psi_a_p1_m1 = _mm_adds_epi16(psi_r_p1_m1, psi_i_p1_m1); + psi_a_p1_m3 = _mm_adds_epi16(psi_r_p1_m3, psi_i_p1_m3); + psi_a_p1_m5 = _mm_adds_epi16(psi_r_p1_m5, psi_i_p1_m5); + psi_a_p1_m7 = _mm_adds_epi16(psi_r_p1_m7, psi_i_p1_m7); + psi_a_m1_p7 = _mm_adds_epi16(psi_r_m1_p7, psi_i_m1_p7); + psi_a_m1_p5 = _mm_adds_epi16(psi_r_m1_p5, psi_i_m1_p5); + psi_a_m1_p3 = _mm_adds_epi16(psi_r_m1_p3, psi_i_m1_p3); + psi_a_m1_p1 = _mm_adds_epi16(psi_r_m1_p1, psi_i_m1_p1); + psi_a_m1_m1 = _mm_adds_epi16(psi_r_m1_m1, psi_i_m1_m1); + psi_a_m1_m3 = _mm_adds_epi16(psi_r_m1_m3, psi_i_m1_m3); + psi_a_m1_m5 = _mm_adds_epi16(psi_r_m1_m5, psi_i_m1_m5); + psi_a_m1_m7 = _mm_adds_epi16(psi_r_m1_m7, psi_i_m1_m7); + psi_a_m3_p7 = _mm_adds_epi16(psi_r_m3_p7, psi_i_m3_p7); + psi_a_m3_p5 = _mm_adds_epi16(psi_r_m3_p5, psi_i_m3_p5); + psi_a_m3_p3 = _mm_adds_epi16(psi_r_m3_p3, psi_i_m3_p3); + psi_a_m3_p1 = _mm_adds_epi16(psi_r_m3_p1, psi_i_m3_p1); + psi_a_m3_m1 = _mm_adds_epi16(psi_r_m3_m1, psi_i_m3_m1); + psi_a_m3_m3 = _mm_adds_epi16(psi_r_m3_m3, psi_i_m3_m3); + psi_a_m3_m5 = _mm_adds_epi16(psi_r_m3_m5, psi_i_m3_m5); + psi_a_m3_m7 = _mm_adds_epi16(psi_r_m3_m7, psi_i_m3_m7); + psi_a_m5_p7 = _mm_adds_epi16(psi_r_m5_p7, psi_i_m5_p7); + psi_a_m5_p5 = _mm_adds_epi16(psi_r_m5_p5, psi_i_m5_p5); + psi_a_m5_p3 = _mm_adds_epi16(psi_r_m5_p3, psi_i_m5_p3); + psi_a_m5_p1 = _mm_adds_epi16(psi_r_m5_p1, psi_i_m5_p1); + psi_a_m5_m1 = _mm_adds_epi16(psi_r_m5_m1, psi_i_m5_m1); + psi_a_m5_m3 = _mm_adds_epi16(psi_r_m5_m3, psi_i_m5_m3); + psi_a_m5_m5 = _mm_adds_epi16(psi_r_m5_m5, psi_i_m5_m5); + psi_a_m5_m7 = _mm_adds_epi16(psi_r_m5_m7, psi_i_m5_m7); + psi_a_m7_p7 = _mm_adds_epi16(psi_r_m7_p7, psi_i_m7_p7); + psi_a_m7_p5 = _mm_adds_epi16(psi_r_m7_p5, psi_i_m7_p5); + psi_a_m7_p3 = _mm_adds_epi16(psi_r_m7_p3, psi_i_m7_p3); + psi_a_m7_p1 = _mm_adds_epi16(psi_r_m7_p1, psi_i_m7_p1); + psi_a_m7_m1 = _mm_adds_epi16(psi_r_m7_m1, psi_i_m7_m1); + psi_a_m7_m3 = _mm_adds_epi16(psi_r_m7_m3, psi_i_m7_m3); + psi_a_m7_m5 = _mm_adds_epi16(psi_r_m7_m5, psi_i_m7_m5); + psi_a_m7_m7 = _mm_adds_epi16(psi_r_m7_m7, psi_i_m7_m7); + + // Computing different multiples of ||h0||^2 + // x=1, y=1 + ch_mag_2_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42); + ch_mag_2_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_2_over_42_with_sigma2,1); + // x=1, y=3 + ch_mag_10_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42); + ch_mag_10_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_10_over_42_with_sigma2,1); + // x=1, x=5 + ch_mag_26_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42); + ch_mag_26_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_26_over_42_with_sigma2,1); + // x=1, y=7 + ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=3, y=3 + ch_mag_18_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42); + ch_mag_18_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_18_over_42_with_sigma2,1); + // x=3, y=5 + ch_mag_34_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42); + ch_mag_34_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_34_over_42_with_sigma2,1); + // x=3, y=7 + ch_mag_58_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42); + ch_mag_58_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_58_over_42_with_sigma2,2); + // x=5, y=5 + ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=5, y=7 + ch_mag_74_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42); + ch_mag_74_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_74_over_42_with_sigma2,2); + // x=7, y=7 + ch_mag_98_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42); + ch_mag_98_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_98_over_42_with_sigma2,2); + + // Computing Metrics + xmm1 = _mm_adds_epi16(psi_a_p7_p7, y0_p_7_7); + bit_met_p7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p7_p5, y0_p_7_5); + bit_met_p7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p7_p3, y0_p_7_3); + bit_met_p7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p7_p1, y0_p_7_1); + bit_met_p7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p7_m1, y0_m_7_1); + bit_met_p7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p7_m3, y0_m_7_3); + bit_met_p7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p7_m5, y0_m_7_5); + bit_met_p7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p7_m7, y0_m_7_7); + bit_met_p7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p5_p7, y0_p_5_7); + bit_met_p5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p5_p5, y0_p_5_5); + bit_met_p5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p5_p3, y0_p_5_3); + bit_met_p5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p5_p1, y0_p_5_1); + bit_met_p5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p5_m1, y0_m_5_1); + bit_met_p5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p5_m3, y0_m_5_3); + bit_met_p5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p5_m5, y0_m_5_5); + bit_met_p5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p5_m7, y0_m_5_7); + bit_met_p5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p3_p7, y0_p_3_7); + bit_met_p3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p3_p5, y0_p_3_5); + bit_met_p3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p3_p3, y0_p_3_3); + bit_met_p3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p3_p1, y0_p_3_1); + bit_met_p3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p3_m1, y0_m_3_1); + bit_met_p3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p3_m3, y0_m_3_3); + bit_met_p3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p3_m5, y0_m_3_5); + bit_met_p3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p3_m7, y0_m_3_7); + bit_met_p3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p1_p7, y0_p_1_7); + bit_met_p1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p1_p5, y0_p_1_5); + bit_met_p1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p1_p3, y0_p_1_3); + bit_met_p1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p1_p1, y0_p_1_1); + bit_met_p1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p1_m1, y0_m_1_1); + bit_met_p1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p1_m3, y0_m_1_3); + bit_met_p1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p1_m5, y0_m_1_5); + bit_met_p1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm1 = _mm_adds_epi16(psi_a_p1_m7, y0_m_1_7); + bit_met_p1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + + xmm1 = _mm_subs_epi16(psi_a_m1_p7, y0_m_1_7); + bit_met_m1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m1_p5, y0_m_1_5); + bit_met_m1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m1_p3, y0_m_1_3); + bit_met_m1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m1_p1, y0_m_1_1); + bit_met_m1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m1_m1, y0_p_1_1); + bit_met_m1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m1_m3, y0_p_1_3); + bit_met_m1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m1_m5, y0_p_1_5); + bit_met_m1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m1_m7, y0_p_1_7); + bit_met_m1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m3_p7, y0_m_3_7); + bit_met_m3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m3_p5, y0_m_3_5); + bit_met_m3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m3_p3, y0_m_3_3); + bit_met_m3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m3_p1, y0_m_3_1); + bit_met_m3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m3_m1, y0_p_3_1); + bit_met_m3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m3_m3, y0_p_3_3); + bit_met_m3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m3_m5, y0_p_3_5); + bit_met_m3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m3_m7, y0_p_3_7); + bit_met_m3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m5_p7, y0_m_5_7); + bit_met_m5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m5_p5, y0_m_5_5); + bit_met_m5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m5_p3, y0_m_5_3); + bit_met_m5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m5_p1, y0_m_5_1); + bit_met_m5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m5_m1, y0_p_5_1); + bit_met_m5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m5_m3, y0_p_5_3); + bit_met_m5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m5_m5, y0_p_5_5); + bit_met_m5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m5_m7, y0_p_5_7); + bit_met_m5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m7_p7, y0_m_7_7); + bit_met_m7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m7_p5, y0_m_7_5); + bit_met_m7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m7_p3, y0_m_7_3); + bit_met_m7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m7_p1, y0_m_7_1); + bit_met_m7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m7_m1, y0_p_7_1); + bit_met_m7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m7_m3, y0_p_7_3); + bit_met_m7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m7_m5, y0_p_7_5); + bit_met_m7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm1 = _mm_subs_epi16(psi_a_m7_m7, y0_p_7_7); + bit_met_m7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + + // Detection for 1st bit (LTE mapping) + // bit = 1 + xmm0 = _mm_max_epi16(bit_met_m7_p7, bit_met_m7_p5); + xmm1 = _mm_max_epi16(bit_met_m7_p3, bit_met_m7_p1); + xmm2 = _mm_max_epi16(bit_met_m7_m1, bit_met_m7_m3); + xmm3 = _mm_max_epi16(bit_met_m7_m5, bit_met_m7_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_m5_p7, bit_met_m5_p5); + xmm1 = _mm_max_epi16(bit_met_m5_p3, bit_met_m5_p1); + xmm2 = _mm_max_epi16(bit_met_m5_m1, bit_met_m5_m3); + xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m5_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_m3_p7, bit_met_m3_p5); + xmm1 = _mm_max_epi16(bit_met_m3_p3, bit_met_m3_p1); + xmm2 = _mm_max_epi16(bit_met_m3_m1, bit_met_m3_m3); + xmm3 = _mm_max_epi16(bit_met_m3_m5, bit_met_m3_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_m1_p7, bit_met_m1_p5); + xmm1 = _mm_max_epi16(bit_met_m1_p3, bit_met_m1_p1); + xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m1_m3); + xmm3 = _mm_max_epi16(bit_met_m1_m5, bit_met_m1_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p7_p5); + xmm1 = _mm_max_epi16(bit_met_p7_p3, bit_met_p7_p1); + xmm2 = _mm_max_epi16(bit_met_p7_m1, bit_met_p7_m3); + xmm3 = _mm_max_epi16(bit_met_p7_m5, bit_met_p7_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p5_p7, bit_met_p5_p5); + xmm1 = _mm_max_epi16(bit_met_p5_p3, bit_met_p5_p1); + xmm2 = _mm_max_epi16(bit_met_p5_m1, bit_met_p5_m3); + xmm3 = _mm_max_epi16(bit_met_p5_m5, bit_met_p5_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p3_p7, bit_met_p3_p5); + xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p3_p1); + xmm2 = _mm_max_epi16(bit_met_p3_m1, bit_met_p3_m3); + xmm3 = _mm_max_epi16(bit_met_p3_m5, bit_met_p3_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p1_p7, bit_met_p1_p5); + xmm1 = _mm_max_epi16(bit_met_p1_p3, bit_met_p1_p1); + xmm2 = _mm_max_epi16(bit_met_p1_m1, bit_met_p1_m3); + xmm3 = _mm_max_epi16(bit_met_p1_m5, bit_met_p1_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y0r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 2nd bit (LTE mapping) + // bit = 1 + xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y1r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 3rd bit (LTE mapping) + xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y2r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 4th bit (LTE mapping) + xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y0i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + + // Detection for 5th bit (LTE mapping) + xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y1i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 6th bit (LTE mapping) + xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y2i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + + // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs + // RE 1 + j = 24*i; + stream0_out[j + 0] = ((short *)&y0r)[0]; + stream0_out[j + 1] = ((short *)&y1r)[0]; + stream0_out[j + 2] = ((short *)&y2r)[0]; + stream0_out[j + 3] = ((short *)&y0i)[0]; + stream0_out[j + 4] = ((short *)&y1i)[0]; + stream0_out[j + 5] = ((short *)&y2i)[0]; + // RE 2 + stream0_out[j + 6] = ((short *)&y0r)[1]; + stream0_out[j + 7] = ((short *)&y1r)[1]; + stream0_out[j + 8] = ((short *)&y2r)[1]; + stream0_out[j + 9] = ((short *)&y0i)[1]; + stream0_out[j + 10] = ((short *)&y1i)[1]; + stream0_out[j + 11] = ((short *)&y2i)[1]; + // RE 3 + stream0_out[j + 12] = ((short *)&y0r)[2]; + stream0_out[j + 13] = ((short *)&y1r)[2]; + stream0_out[j + 14] = ((short *)&y2r)[2]; + stream0_out[j + 15] = ((short *)&y0i)[2]; + stream0_out[j + 16] = ((short *)&y1i)[2]; + stream0_out[j + 17] = ((short *)&y2i)[2]; + // RE 4 + stream0_out[j + 18] = ((short *)&y0r)[3]; + stream0_out[j + 19] = ((short *)&y1r)[3]; + stream0_out[j + 20] = ((short *)&y2r)[3]; + stream0_out[j + 21] = ((short *)&y0i)[3]; + stream0_out[j + 22] = ((short *)&y1i)[3]; + stream0_out[j + 23] = ((short *)&y2i)[3]; + // RE 5 + stream0_out[j + 24] = ((short *)&y0r)[4]; + stream0_out[j + 25] = ((short *)&y1r)[4]; + stream0_out[j + 26] = ((short *)&y2r)[4]; + stream0_out[j + 27] = ((short *)&y0i)[4]; + stream0_out[j + 28] = ((short *)&y1i)[4]; + stream0_out[j + 29] = ((short *)&y2i)[4]; + // RE 6 + stream0_out[j + 30] = ((short *)&y0r)[5]; + stream0_out[j + 31] = ((short *)&y1r)[5]; + stream0_out[j + 32] = ((short *)&y2r)[5]; + stream0_out[j + 33] = ((short *)&y0i)[5]; + stream0_out[j + 34] = ((short *)&y1i)[5]; + stream0_out[j + 35] = ((short *)&y2i)[5]; + // RE 7 + stream0_out[j + 36] = ((short *)&y0r)[6]; + stream0_out[j + 37] = ((short *)&y1r)[6]; + stream0_out[j + 38] = ((short *)&y2r)[6]; + stream0_out[j + 39] = ((short *)&y0i)[6]; + stream0_out[j + 40] = ((short *)&y1i)[6]; + stream0_out[j + 41] = ((short *)&y2i)[6]; + // RE 8 + stream0_out[j + 42] = ((short *)&y0r)[7]; + stream0_out[j + 43] = ((short *)&y1r)[7]; + stream0_out[j + 44] = ((short *)&y2r)[7]; + stream0_out[j + 45] = ((short *)&y0i)[7]; + stream0_out[j + 46] = ((short *)&y1i)[7]; + stream0_out[j + 47] = ((short *)&y2i)[7]; +#elif defined(__arm__) + +#endif + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif +} + + +int dlsch_64qam_qpsk_llr(LTE_DL_FRAME_PARMS *frame_parms, + int32_t **rxdataF_comp, + int32_t **rxdataF_comp_i, + int32_t **dl_ch_mag, + int32_t **rho_i, + int16_t *dlsch_llr, + uint8_t symbol, + uint8_t first_symbol_flag, + uint16_t nb_rb, + uint16_t pbch_pss_sss_adjust, + int16_t **llr16p) +{ + + int16_t *rxF = (int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rxF_i = (int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *ch_mag = (int16_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rho = (int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *llr16; + int len; + uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol; + + //first symbol has different structure due to more pilots + if (first_symbol_flag == 1) { + llr16 = (int16_t*)dlsch_llr; + } else { + llr16 = (int16_t*)(*llr16p); + } + + AssertFatal(llr16!=NULL,"dlsch_16qam_64qam_llr:llr is null, symbol %d\n",symbol); + + if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { + // if symbol has pilots + if (frame_parms->nb_antenna_ports_eNB!=1) + // in 2 antenna ports we have 8 REs per symbol per RB + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + else + // for 1 antenna port we have 10 REs per symbol per RB + len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6); + } else { + // symbol has no pilots + len = (nb_rb*12) - pbch_pss_sss_adjust; + } + + qam64_qpsk((short *)rxF, + (short *)rxF_i, + (short *)ch_mag, + (short *)llr16, + (short *)rho, + len); + + llr16 += (6*len); + *llr16p = (short *)llr16; + return(0); +} + + + +void qam64_qam16(short *stream0_in, + short *stream1_in, + short *ch_mag, + short *ch_mag_i, + short *stream0_out, + short *rho01, + int length + ) +{ + + /* + Author: S. Wagner + Date: 31-07-12 + + Input: + stream0_in: MF filter for 1st stream, i.e., y0=h0'*y + stream1_in: MF filter for 2nd stream, i.e., y1=h1'*y + ch_mag: 4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + ch_mag_i: 4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + rho01: Channel cross correlation, i.e., h1'*h0 + + Output: + stream0_out: output LLRs for 1st stream + */ + +#if defined(__x86_64__) || defined(__i386__) + + __m128i *rho01_128i = (__m128i *)rho01; + __m128i *stream0_128i_in = (__m128i *)stream0_in; + __m128i *stream1_128i_in = (__m128i *)stream1_in; + __m128i *ch_mag_128i = (__m128i *)ch_mag; + __m128i *ch_mag_128i_i = (__m128i *)ch_mag_i; + + __m128i ONE_OVER_SQRT_42 = _mm_set1_epi16(10112); // round(1/sqrt(42)*2^16) + __m128i THREE_OVER_SQRT_42 = _mm_set1_epi16(30337); // round(3/sqrt(42)*2^16) + __m128i FIVE_OVER_SQRT_42 = _mm_set1_epi16(25281); // round(5/sqrt(42)*2^15) + __m128i SEVEN_OVER_SQRT_42 = _mm_set1_epi16(17697); // round(5/sqrt(42)*2^15) + __m128i FORTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(30969); // round(49/(4*sqrt(42))*2^14), Q2.14 + __m128i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(23385); // round(37/(4*sqrt(42))*2^14), Q2.14 + __m128i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(31601); // round(25/(4*sqrt(42))*2^15) + __m128i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(18329); // round(29/(4*sqrt(42))*2^15), Q2.14 + __m128i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(21489); // round(17/(4*sqrt(42))*2^15) + __m128i NINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(11376); // round(9/(4*sqrt(42))*2^15) + __m128i THIRTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(16433); // round(13/(4*sqrt(42))*2^15) + __m128i FIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(6320); // round(5/(4*sqrt(42))*2^15) + __m128i ONE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(1264); // round(1/(4*sqrt(42))*2^15) + __m128i ONE_OVER_SQRT_10_Q15 = _mm_set1_epi16(10362); // round(1/sqrt(10)*2^15) + __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15) + __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15) + + + __m128i ch_mag_int; + __m128i ch_mag_des; + __m128i ch_mag_98_over_42_with_sigma2; + __m128i ch_mag_74_over_42_with_sigma2; + __m128i ch_mag_58_over_42_with_sigma2; + __m128i ch_mag_50_over_42_with_sigma2; + __m128i ch_mag_34_over_42_with_sigma2; + __m128i ch_mag_18_over_42_with_sigma2; + __m128i ch_mag_26_over_42_with_sigma2; + __m128i ch_mag_10_over_42_with_sigma2; + __m128i ch_mag_2_over_42_with_sigma2; + __m128i y0r_one_over_sqrt_21; + __m128i y0r_three_over_sqrt_21; + __m128i y0r_five_over_sqrt_21; + __m128i y0r_seven_over_sqrt_21; + __m128i y0i_one_over_sqrt_21; + __m128i y0i_three_over_sqrt_21; + __m128i y0i_five_over_sqrt_21; + __m128i y0i_seven_over_sqrt_21; + +#elif defined(__arm__) + +#endif + int i,j; + + + + for (i=0; i<length>>2; i+=2) { + +#if defined(__x86_64__) || defined(__i386__) + // Get rho + xmm0 = rho01_128i[i]; + xmm1 = rho01_128i[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) + xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) + rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) + rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + + // Compute the different rhos + rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42); + rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42); + rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42); + rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42); + rho_rpi_5_5 = _mm_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42); + rho_rmi_5_5 = _mm_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42); + rho_rpi_7_7 = _mm_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42); + rho_rmi_7_7 = _mm_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42); + + rho_rpi_5_5 = _mm_slli_epi16(rho_rpi_5_5, 1); + rho_rmi_5_5 = _mm_slli_epi16(rho_rmi_5_5, 1); + rho_rpi_7_7 = _mm_slli_epi16(rho_rpi_7_7, 2); + rho_rmi_7_7 = _mm_slli_epi16(rho_rmi_7_7, 2); + + xmm4 = _mm_mulhi_epi16(xmm2, ONE_OVER_SQRT_42); + xmm5 = _mm_mulhi_epi16(xmm3, ONE_OVER_SQRT_42); + xmm6 = _mm_mulhi_epi16(xmm3, THREE_OVER_SQRT_42); + xmm7 = _mm_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42); + xmm8 = _mm_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42); + xmm7 = _mm_slli_epi16(xmm7, 1); + xmm8 = _mm_slli_epi16(xmm8, 2); + + rho_rpi_1_3 = _mm_adds_epi16(xmm4, xmm6); + rho_rmi_1_3 = _mm_subs_epi16(xmm4, xmm6); + rho_rpi_1_5 = _mm_adds_epi16(xmm4, xmm7); + rho_rmi_1_5 = _mm_subs_epi16(xmm4, xmm7); + rho_rpi_1_7 = _mm_adds_epi16(xmm4, xmm8); + rho_rmi_1_7 = _mm_subs_epi16(xmm4, xmm8); + + xmm4 = _mm_mulhi_epi16(xmm2, THREE_OVER_SQRT_42); + rho_rpi_3_1 = _mm_adds_epi16(xmm4, xmm5); + rho_rmi_3_1 = _mm_subs_epi16(xmm4, xmm5); + rho_rpi_3_5 = _mm_adds_epi16(xmm4, xmm7); + rho_rmi_3_5 = _mm_subs_epi16(xmm4, xmm7); + rho_rpi_3_7 = _mm_adds_epi16(xmm4, xmm8); + rho_rmi_3_7 = _mm_subs_epi16(xmm4, xmm8); + + xmm4 = _mm_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42); + xmm4 = _mm_slli_epi16(xmm4, 1); + rho_rpi_5_1 = _mm_adds_epi16(xmm4, xmm5); + rho_rmi_5_1 = _mm_subs_epi16(xmm4, xmm5); + rho_rpi_5_3 = _mm_adds_epi16(xmm4, xmm6); + rho_rmi_5_3 = _mm_subs_epi16(xmm4, xmm6); + rho_rpi_5_7 = _mm_adds_epi16(xmm4, xmm8); + rho_rmi_5_7 = _mm_subs_epi16(xmm4, xmm8); + + xmm4 = _mm_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42); + xmm4 = _mm_slli_epi16(xmm4, 2); + rho_rpi_7_1 = _mm_adds_epi16(xmm4, xmm5); + rho_rmi_7_1 = _mm_subs_epi16(xmm4, xmm5); + rho_rpi_7_3 = _mm_adds_epi16(xmm4, xmm6); + rho_rmi_7_3 = _mm_subs_epi16(xmm4, xmm6); + rho_rpi_7_5 = _mm_adds_epi16(xmm4, xmm7); + rho_rmi_7_5 = _mm_subs_epi16(xmm4, xmm7); + + // Rearrange interfering MF output + xmm0 = stream1_128i_in[i]; + xmm1 = stream1_128i_in[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] + y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] + + // Psi_r calculation from rho_rpi or rho_rmi + xmm0 = _mm_setzero_si128(); // ZERO for abs_pi16 + xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1r); + psi_r_p7_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1r); + psi_r_p7_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1r); + psi_r_p7_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1r); + psi_r_p7_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1r); + psi_r_p7_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1r); + psi_r_p7_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1r); + psi_r_p7_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1r); + psi_r_p7_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1r); + psi_r_p5_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1r); + psi_r_p5_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1r); + psi_r_p5_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1r); + psi_r_p5_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1r); + psi_r_p5_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1r); + psi_r_p5_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1r); + psi_r_p5_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1r); + psi_r_p5_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1r); + psi_r_p3_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1r); + psi_r_p3_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1r); + psi_r_p3_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1r); + psi_r_p3_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1r); + psi_r_p3_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1r); + psi_r_p3_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1r); + psi_r_p3_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1r); + psi_r_p3_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1r); + psi_r_p1_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1r); + psi_r_p1_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1r); + psi_r_p1_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1r); + psi_r_p1_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1r); + psi_r_p1_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1r); + psi_r_p1_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1r); + psi_r_p1_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1r); + psi_r_p1_m7 = _mm_abs_epi16(xmm2); + + xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1r); + psi_r_m1_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1r); + psi_r_m1_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1r); + psi_r_m1_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1r); + psi_r_m1_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1r); + psi_r_m1_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1r); + psi_r_m1_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1r); + psi_r_m1_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1r); + psi_r_m1_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1r); + psi_r_m3_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1r); + psi_r_m3_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1r); + psi_r_m3_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1r); + psi_r_m3_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1r); + psi_r_m3_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1r); + psi_r_m3_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1r); + psi_r_m3_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1r); + psi_r_m3_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1r); + psi_r_m5_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1r); + psi_r_m5_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1r); + psi_r_m5_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1r); + psi_r_m5_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1r); + psi_r_m5_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1r); + psi_r_m5_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1r); + psi_r_m5_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1r); + psi_r_m5_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1r); + psi_r_m7_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1r); + psi_r_m7_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1r); + psi_r_m7_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1r); + psi_r_m7_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1r); + psi_r_m7_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1r); + psi_r_m7_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1r); + psi_r_m7_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1r); + psi_r_m7_m7 = _mm_abs_epi16(xmm2); + + // Psi_i calculation from rho_rpi or rho_rmi + xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1i); + psi_i_p7_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1i); + psi_i_p7_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1i); + psi_i_p7_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1i); + psi_i_p7_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1i); + psi_i_p7_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1i); + psi_i_p7_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1i); + psi_i_p7_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1i); + psi_i_p7_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1i); + psi_i_p5_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1i); + psi_i_p5_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1i); + psi_i_p5_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1i); + psi_i_p5_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1i); + psi_i_p5_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1i); + psi_i_p5_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1i); + psi_i_p5_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1i); + psi_i_p5_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1i); + psi_i_p3_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1i); + psi_i_p3_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1i); + psi_i_p3_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1i); + psi_i_p3_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1i); + psi_i_p3_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1i); + psi_i_p3_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1i); + psi_i_p3_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1i); + psi_i_p3_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1i); + psi_i_p1_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1i); + psi_i_p1_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1i); + psi_i_p1_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1i); + psi_i_p1_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1i); + psi_i_p1_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1i); + psi_i_p1_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1i); + psi_i_p1_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1i); + psi_i_p1_m7 = _mm_abs_epi16(xmm2); + + xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1i); + psi_i_m1_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1i); + psi_i_m1_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1i); + psi_i_m1_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1i); + psi_i_m1_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1i); + psi_i_m1_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1i); + psi_i_m1_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1i); + psi_i_m1_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1i); + psi_i_m1_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1i); + psi_i_m3_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1i); + psi_i_m3_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1i); + psi_i_m3_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1i); + psi_i_m3_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1i); + psi_i_m3_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1i); + psi_i_m3_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1i); + psi_i_m3_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1i); + psi_i_m3_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1i); + psi_i_m5_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1i); + psi_i_m5_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1i); + psi_i_m5_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1i); + psi_i_m5_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1i); + psi_i_m5_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1i); + psi_i_m5_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1i); + psi_i_m5_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1i); + psi_i_m5_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1i); + psi_i_m7_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1i); + psi_i_m7_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1i); + psi_i_m7_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1i); + psi_i_m7_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1i); + psi_i_m7_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1i); + psi_i_m7_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1i); + psi_i_m7_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1i); + psi_i_m7_m7 = _mm_abs_epi16(xmm2); + + + // Rearrange desired MF output + xmm0 = stream0_128i_in[i]; + xmm1 = stream0_128i_in[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + y0i = _mm_unpackhi_epi64(xmm0,xmm1); + + // Rearrange desired channel magnitudes + xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10)) + xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10)) + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); + + // Rearrange interfering channel magnitudes + xmm2 = ch_mag_128i_i[i]; + xmm3 = ch_mag_128i_i[i+1]; + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3); + + y0r_one_over_sqrt_21 = _mm_mulhi_epi16(y0r, ONE_OVER_SQRT_42); + y0r_three_over_sqrt_21 = _mm_mulhi_epi16(y0r, THREE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm_mulhi_epi16(y0r, FIVE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm_slli_epi16(y0r_five_over_sqrt_21, 1); + y0r_seven_over_sqrt_21 = _mm_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42); + y0r_seven_over_sqrt_21 = _mm_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14 + + y0i_one_over_sqrt_21 = _mm_mulhi_epi16(y0i, ONE_OVER_SQRT_42); + y0i_three_over_sqrt_21 = _mm_mulhi_epi16(y0i, THREE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm_mulhi_epi16(y0i, FIVE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm_slli_epi16(y0i_five_over_sqrt_21, 1); + y0i_seven_over_sqrt_21 = _mm_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42); + y0i_seven_over_sqrt_21 = _mm_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14 + + y0_p_7_1 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_7_3 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_7_5 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_7_7 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_5_1 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_5_3 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_5_5 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_5_7 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_3_5 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_3_7 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_1_1 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_1_3 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_1_5 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_1_7 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + + y0_m_1_1 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_1_3 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_1_5 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_1_7 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_3_5 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_3_7 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_5_1 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_5_3 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_5_5 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_5_7 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_7_1 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_7_3 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_7_5 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_7_7 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + + interference_abs_epi16(psi_r_p7_p7, ch_mag_int, a_r_p7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_p5, ch_mag_int, a_r_p7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_p3, ch_mag_int, a_r_p7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_p1, ch_mag_int, a_r_p7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m1, ch_mag_int, a_r_p7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m3, ch_mag_int, a_r_p7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m5, ch_mag_int, a_r_p7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m7, ch_mag_int, a_r_p7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p7, ch_mag_int, a_r_p5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p5, ch_mag_int, a_r_p5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p3, ch_mag_int, a_r_p5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p1, ch_mag_int, a_r_p5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m1, ch_mag_int, a_r_p5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m3, ch_mag_int, a_r_p5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m5, ch_mag_int, a_r_p5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m7, ch_mag_int, a_r_p5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p7, ch_mag_int, a_r_p3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p5, ch_mag_int, a_r_p3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p3, ch_mag_int, a_r_p3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p1, ch_mag_int, a_r_p3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m1, ch_mag_int, a_r_p3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m3, ch_mag_int, a_r_p3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m5, ch_mag_int, a_r_p3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m7, ch_mag_int, a_r_p3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p7, ch_mag_int, a_r_p1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p5, ch_mag_int, a_r_p1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p3, ch_mag_int, a_r_p1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p1, ch_mag_int, a_r_p1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m1, ch_mag_int, a_r_p1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m3, ch_mag_int, a_r_p1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m5, ch_mag_int, a_r_p1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m7, ch_mag_int, a_r_p1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p7, ch_mag_int, a_r_m1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p5, ch_mag_int, a_r_m1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p3, ch_mag_int, a_r_m1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p1, ch_mag_int, a_r_m1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m1, ch_mag_int, a_r_m1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m3, ch_mag_int, a_r_m1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m5, ch_mag_int, a_r_m1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m7, ch_mag_int, a_r_m1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p7, ch_mag_int, a_r_m3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p5, ch_mag_int, a_r_m3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p3, ch_mag_int, a_r_m3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p1, ch_mag_int, a_r_m3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m1, ch_mag_int, a_r_m3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m3, ch_mag_int, a_r_m3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m5, ch_mag_int, a_r_m3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m7, ch_mag_int, a_r_m3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p7, ch_mag_int, a_r_m5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p5, ch_mag_int, a_r_m5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p3, ch_mag_int, a_r_m5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p1, ch_mag_int, a_r_m5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m1, ch_mag_int, a_r_m5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m3, ch_mag_int, a_r_m5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m5, ch_mag_int, a_r_m5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m7, ch_mag_int, a_r_m5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p7, ch_mag_int, a_r_m7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p5, ch_mag_int, a_r_m7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p3, ch_mag_int, a_r_m7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p1, ch_mag_int, a_r_m7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m1, ch_mag_int, a_r_m7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m3, ch_mag_int, a_r_m7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m5, ch_mag_int, a_r_m7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m7, ch_mag_int, a_r_m7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + + interference_abs_epi16(psi_i_p7_p7, ch_mag_int, a_i_p7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_p5, ch_mag_int, a_i_p7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_p3, ch_mag_int, a_i_p7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_p1, ch_mag_int, a_i_p7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m1, ch_mag_int, a_i_p7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m3, ch_mag_int, a_i_p7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m5, ch_mag_int, a_i_p7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m7, ch_mag_int, a_i_p7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p7, ch_mag_int, a_i_p5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p5, ch_mag_int, a_i_p5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p3, ch_mag_int, a_i_p5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p1, ch_mag_int, a_i_p5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m1, ch_mag_int, a_i_p5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m3, ch_mag_int, a_i_p5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m5, ch_mag_int, a_i_p5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m7, ch_mag_int, a_i_p5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p7, ch_mag_int, a_i_p3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p5, ch_mag_int, a_i_p3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p3, ch_mag_int, a_i_p3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p1, ch_mag_int, a_i_p3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m1, ch_mag_int, a_i_p3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m3, ch_mag_int, a_i_p3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m5, ch_mag_int, a_i_p3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m7, ch_mag_int, a_i_p3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p7, ch_mag_int, a_i_p1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p5, ch_mag_int, a_i_p1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p3, ch_mag_int, a_i_p1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p1, ch_mag_int, a_i_p1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m1, ch_mag_int, a_i_p1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m3, ch_mag_int, a_i_p1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m5, ch_mag_int, a_i_p1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m7, ch_mag_int, a_i_p1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p7, ch_mag_int, a_i_m1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p5, ch_mag_int, a_i_m1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p3, ch_mag_int, a_i_m1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p1, ch_mag_int, a_i_m1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m1, ch_mag_int, a_i_m1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m3, ch_mag_int, a_i_m1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m5, ch_mag_int, a_i_m1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m7, ch_mag_int, a_i_m1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p7, ch_mag_int, a_i_m3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p5, ch_mag_int, a_i_m3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p3, ch_mag_int, a_i_m3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p1, ch_mag_int, a_i_m3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m1, ch_mag_int, a_i_m3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m3, ch_mag_int, a_i_m3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m5, ch_mag_int, a_i_m3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m7, ch_mag_int, a_i_m3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p7, ch_mag_int, a_i_m5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p5, ch_mag_int, a_i_m5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p3, ch_mag_int, a_i_m5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p1, ch_mag_int, a_i_m5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m1, ch_mag_int, a_i_m5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m3, ch_mag_int, a_i_m5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m5, ch_mag_int, a_i_m5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m7, ch_mag_int, a_i_m5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p7, ch_mag_int, a_i_m7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p5, ch_mag_int, a_i_m7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p3, ch_mag_int, a_i_m7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p1, ch_mag_int, a_i_m7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m1, ch_mag_int, a_i_m7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m3, ch_mag_int, a_i_m7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m5, ch_mag_int, a_i_m7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m7, ch_mag_int, a_i_m7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + + // Calculation of a group of two terms in the bit metric involving product of psi and interference + prodsum_psi_a_epi16(psi_r_p7_p7, a_r_p7_p7, psi_i_p7_p7, a_i_p7_p7, psi_a_p7_p7); + prodsum_psi_a_epi16(psi_r_p7_p5, a_r_p7_p5, psi_i_p7_p5, a_i_p7_p5, psi_a_p7_p5); + prodsum_psi_a_epi16(psi_r_p7_p3, a_r_p7_p3, psi_i_p7_p3, a_i_p7_p3, psi_a_p7_p3); + prodsum_psi_a_epi16(psi_r_p7_p1, a_r_p7_p1, psi_i_p7_p1, a_i_p7_p1, psi_a_p7_p1); + prodsum_psi_a_epi16(psi_r_p7_m1, a_r_p7_m1, psi_i_p7_m1, a_i_p7_m1, psi_a_p7_m1); + prodsum_psi_a_epi16(psi_r_p7_m3, a_r_p7_m3, psi_i_p7_m3, a_i_p7_m3, psi_a_p7_m3); + prodsum_psi_a_epi16(psi_r_p7_m5, a_r_p7_m5, psi_i_p7_m5, a_i_p7_m5, psi_a_p7_m5); + prodsum_psi_a_epi16(psi_r_p7_m7, a_r_p7_m7, psi_i_p7_m7, a_i_p7_m7, psi_a_p7_m7); + prodsum_psi_a_epi16(psi_r_p5_p7, a_r_p5_p7, psi_i_p5_p7, a_i_p5_p7, psi_a_p5_p7); + prodsum_psi_a_epi16(psi_r_p5_p5, a_r_p5_p5, psi_i_p5_p5, a_i_p5_p5, psi_a_p5_p5); + prodsum_psi_a_epi16(psi_r_p5_p3, a_r_p5_p3, psi_i_p5_p3, a_i_p5_p3, psi_a_p5_p3); + prodsum_psi_a_epi16(psi_r_p5_p1, a_r_p5_p1, psi_i_p5_p1, a_i_p5_p1, psi_a_p5_p1); + prodsum_psi_a_epi16(psi_r_p5_m1, a_r_p5_m1, psi_i_p5_m1, a_i_p5_m1, psi_a_p5_m1); + prodsum_psi_a_epi16(psi_r_p5_m3, a_r_p5_m3, psi_i_p5_m3, a_i_p5_m3, psi_a_p5_m3); + prodsum_psi_a_epi16(psi_r_p5_m5, a_r_p5_m5, psi_i_p5_m5, a_i_p5_m5, psi_a_p5_m5); + prodsum_psi_a_epi16(psi_r_p5_m7, a_r_p5_m7, psi_i_p5_m7, a_i_p5_m7, psi_a_p5_m7); + prodsum_psi_a_epi16(psi_r_p3_p7, a_r_p3_p7, psi_i_p3_p7, a_i_p3_p7, psi_a_p3_p7); + prodsum_psi_a_epi16(psi_r_p3_p5, a_r_p3_p5, psi_i_p3_p5, a_i_p3_p5, psi_a_p3_p5); + prodsum_psi_a_epi16(psi_r_p3_p3, a_r_p3_p3, psi_i_p3_p3, a_i_p3_p3, psi_a_p3_p3); + prodsum_psi_a_epi16(psi_r_p3_p1, a_r_p3_p1, psi_i_p3_p1, a_i_p3_p1, psi_a_p3_p1); + prodsum_psi_a_epi16(psi_r_p3_m1, a_r_p3_m1, psi_i_p3_m1, a_i_p3_m1, psi_a_p3_m1); + prodsum_psi_a_epi16(psi_r_p3_m3, a_r_p3_m3, psi_i_p3_m3, a_i_p3_m3, psi_a_p3_m3); + prodsum_psi_a_epi16(psi_r_p3_m5, a_r_p3_m5, psi_i_p3_m5, a_i_p3_m5, psi_a_p3_m5); + prodsum_psi_a_epi16(psi_r_p3_m7, a_r_p3_m7, psi_i_p3_m7, a_i_p3_m7, psi_a_p3_m7); + prodsum_psi_a_epi16(psi_r_p1_p7, a_r_p1_p7, psi_i_p1_p7, a_i_p1_p7, psi_a_p1_p7); + prodsum_psi_a_epi16(psi_r_p1_p5, a_r_p1_p5, psi_i_p1_p5, a_i_p1_p5, psi_a_p1_p5); + prodsum_psi_a_epi16(psi_r_p1_p3, a_r_p1_p3, psi_i_p1_p3, a_i_p1_p3, psi_a_p1_p3); + prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1); + prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1); + prodsum_psi_a_epi16(psi_r_p1_m3, a_r_p1_m3, psi_i_p1_m3, a_i_p1_m3, psi_a_p1_m3); + prodsum_psi_a_epi16(psi_r_p1_m5, a_r_p1_m5, psi_i_p1_m5, a_i_p1_m5, psi_a_p1_m5); + prodsum_psi_a_epi16(psi_r_p1_m7, a_r_p1_m7, psi_i_p1_m7, a_i_p1_m7, psi_a_p1_m7); + prodsum_psi_a_epi16(psi_r_m1_p7, a_r_m1_p7, psi_i_m1_p7, a_i_m1_p7, psi_a_m1_p7); + prodsum_psi_a_epi16(psi_r_m1_p5, a_r_m1_p5, psi_i_m1_p5, a_i_m1_p5, psi_a_m1_p5); + prodsum_psi_a_epi16(psi_r_m1_p3, a_r_m1_p3, psi_i_m1_p3, a_i_m1_p3, psi_a_m1_p3); + prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1); + prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1); + prodsum_psi_a_epi16(psi_r_m1_m3, a_r_m1_m3, psi_i_m1_m3, a_i_m1_m3, psi_a_m1_m3); + prodsum_psi_a_epi16(psi_r_m1_m5, a_r_m1_m5, psi_i_m1_m5, a_i_m1_m5, psi_a_m1_m5); + prodsum_psi_a_epi16(psi_r_m1_m7, a_r_m1_m7, psi_i_m1_m7, a_i_m1_m7, psi_a_m1_m7); + prodsum_psi_a_epi16(psi_r_m3_p7, a_r_m3_p7, psi_i_m3_p7, a_i_m3_p7, psi_a_m3_p7); + prodsum_psi_a_epi16(psi_r_m3_p5, a_r_m3_p5, psi_i_m3_p5, a_i_m3_p5, psi_a_m3_p5); + prodsum_psi_a_epi16(psi_r_m3_p3, a_r_m3_p3, psi_i_m3_p3, a_i_m3_p3, psi_a_m3_p3); + prodsum_psi_a_epi16(psi_r_m3_p1, a_r_m3_p1, psi_i_m3_p1, a_i_m3_p1, psi_a_m3_p1); + prodsum_psi_a_epi16(psi_r_m3_m1, a_r_m3_m1, psi_i_m3_m1, a_i_m3_m1, psi_a_m3_m1); + prodsum_psi_a_epi16(psi_r_m3_m3, a_r_m3_m3, psi_i_m3_m3, a_i_m3_m3, psi_a_m3_m3); + prodsum_psi_a_epi16(psi_r_m3_m5, a_r_m3_m5, psi_i_m3_m5, a_i_m3_m5, psi_a_m3_m5); + prodsum_psi_a_epi16(psi_r_m3_m7, a_r_m3_m7, psi_i_m3_m7, a_i_m3_m7, psi_a_m3_m7); + prodsum_psi_a_epi16(psi_r_m5_p7, a_r_m5_p7, psi_i_m5_p7, a_i_m5_p7, psi_a_m5_p7); + prodsum_psi_a_epi16(psi_r_m5_p5, a_r_m5_p5, psi_i_m5_p5, a_i_m5_p5, psi_a_m5_p5); + prodsum_psi_a_epi16(psi_r_m5_p3, a_r_m5_p3, psi_i_m5_p3, a_i_m5_p3, psi_a_m5_p3); + prodsum_psi_a_epi16(psi_r_m5_p1, a_r_m5_p1, psi_i_m5_p1, a_i_m5_p1, psi_a_m5_p1); + prodsum_psi_a_epi16(psi_r_m5_m1, a_r_m5_m1, psi_i_m5_m1, a_i_m5_m1, psi_a_m5_m1); + prodsum_psi_a_epi16(psi_r_m5_m3, a_r_m5_m3, psi_i_m5_m3, a_i_m5_m3, psi_a_m5_m3); + prodsum_psi_a_epi16(psi_r_m5_m5, a_r_m5_m5, psi_i_m5_m5, a_i_m5_m5, psi_a_m5_m5); + prodsum_psi_a_epi16(psi_r_m5_m7, a_r_m5_m7, psi_i_m5_m7, a_i_m5_m7, psi_a_m5_m7); + prodsum_psi_a_epi16(psi_r_m7_p7, a_r_m7_p7, psi_i_m7_p7, a_i_m7_p7, psi_a_m7_p7); + prodsum_psi_a_epi16(psi_r_m7_p5, a_r_m7_p5, psi_i_m7_p5, a_i_m7_p5, psi_a_m7_p5); + prodsum_psi_a_epi16(psi_r_m7_p3, a_r_m7_p3, psi_i_m7_p3, a_i_m7_p3, psi_a_m7_p3); + prodsum_psi_a_epi16(psi_r_m7_p1, a_r_m7_p1, psi_i_m7_p1, a_i_m7_p1, psi_a_m7_p1); + prodsum_psi_a_epi16(psi_r_m7_m1, a_r_m7_m1, psi_i_m7_m1, a_i_m7_m1, psi_a_m7_m1); + prodsum_psi_a_epi16(psi_r_m7_m3, a_r_m7_m3, psi_i_m7_m3, a_i_m7_m3, psi_a_m7_m3); + prodsum_psi_a_epi16(psi_r_m7_m5, a_r_m7_m5, psi_i_m7_m5, a_i_m7_m5, psi_a_m7_m5); + prodsum_psi_a_epi16(psi_r_m7_m7, a_r_m7_m7, psi_i_m7_m7, a_i_m7_m7, psi_a_m7_m7); + + // Calculation of a group of two terms in the bit metric involving squares of interference + square_a_epi16(a_r_p7_p7, a_i_p7_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p7); + square_a_epi16(a_r_p7_p5, a_i_p7_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p5); + square_a_epi16(a_r_p7_p3, a_i_p7_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p3); + square_a_epi16(a_r_p7_p1, a_i_p7_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p1); + square_a_epi16(a_r_p7_m1, a_i_p7_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m1); + square_a_epi16(a_r_p7_m3, a_i_p7_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m3); + square_a_epi16(a_r_p7_m5, a_i_p7_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m5); + square_a_epi16(a_r_p7_m7, a_i_p7_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m7); + square_a_epi16(a_r_p5_p7, a_i_p5_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p7); + square_a_epi16(a_r_p5_p5, a_i_p5_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p5); + square_a_epi16(a_r_p5_p3, a_i_p5_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p3); + square_a_epi16(a_r_p5_p1, a_i_p5_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p1); + square_a_epi16(a_r_p5_m1, a_i_p5_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m1); + square_a_epi16(a_r_p5_m3, a_i_p5_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m3); + square_a_epi16(a_r_p5_m5, a_i_p5_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m5); + square_a_epi16(a_r_p5_m7, a_i_p5_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m7); + square_a_epi16(a_r_p3_p7, a_i_p3_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p7); + square_a_epi16(a_r_p3_p5, a_i_p3_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p5); + square_a_epi16(a_r_p3_p3, a_i_p3_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p3); + square_a_epi16(a_r_p3_p1, a_i_p3_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p1); + square_a_epi16(a_r_p3_m1, a_i_p3_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m1); + square_a_epi16(a_r_p3_m3, a_i_p3_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m3); + square_a_epi16(a_r_p3_m5, a_i_p3_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m5); + square_a_epi16(a_r_p3_m7, a_i_p3_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m7); + square_a_epi16(a_r_p1_p7, a_i_p1_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p7); + square_a_epi16(a_r_p1_p5, a_i_p1_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p5); + square_a_epi16(a_r_p1_p3, a_i_p1_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p3); + square_a_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p1); + square_a_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m1); + square_a_epi16(a_r_p1_m3, a_i_p1_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m3); + square_a_epi16(a_r_p1_m5, a_i_p1_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m5); + square_a_epi16(a_r_p1_m7, a_i_p1_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m7); + square_a_epi16(a_r_m1_p7, a_i_m1_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p7); + square_a_epi16(a_r_m1_p5, a_i_m1_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p5); + square_a_epi16(a_r_m1_p3, a_i_m1_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p3); + square_a_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p1); + square_a_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m1); + square_a_epi16(a_r_m1_m3, a_i_m1_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m3); + square_a_epi16(a_r_m1_m5, a_i_m1_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m5); + square_a_epi16(a_r_m1_m7, a_i_m1_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m7); + square_a_epi16(a_r_m3_p7, a_i_m3_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p7); + square_a_epi16(a_r_m3_p5, a_i_m3_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p5); + square_a_epi16(a_r_m3_p3, a_i_m3_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p3); + square_a_epi16(a_r_m3_p1, a_i_m3_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p1); + square_a_epi16(a_r_m3_m1, a_i_m3_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m1); + square_a_epi16(a_r_m3_m3, a_i_m3_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m3); + square_a_epi16(a_r_m3_m5, a_i_m3_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m5); + square_a_epi16(a_r_m3_m7, a_i_m3_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m7); + square_a_epi16(a_r_m5_p7, a_i_m5_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p7); + square_a_epi16(a_r_m5_p5, a_i_m5_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p5); + square_a_epi16(a_r_m5_p3, a_i_m5_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p3); + square_a_epi16(a_r_m5_p1, a_i_m5_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p1); + square_a_epi16(a_r_m5_m1, a_i_m5_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m1); + square_a_epi16(a_r_m5_m3, a_i_m5_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m3); + square_a_epi16(a_r_m5_m5, a_i_m5_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m5); + square_a_epi16(a_r_m5_m7, a_i_m5_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m7); + square_a_epi16(a_r_m7_p7, a_i_m7_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p7); + square_a_epi16(a_r_m7_p5, a_i_m7_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p5); + square_a_epi16(a_r_m7_p3, a_i_m7_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p3); + square_a_epi16(a_r_m7_p1, a_i_m7_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p1); + square_a_epi16(a_r_m7_m1, a_i_m7_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m1); + square_a_epi16(a_r_m7_m3, a_i_m7_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m3); + square_a_epi16(a_r_m7_m5, a_i_m7_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m5); + square_a_epi16(a_r_m7_m7, a_i_m7_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m7); + + // Computing different multiples of ||h0||^2 + // x=1, y=1 + ch_mag_2_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42); + ch_mag_2_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_2_over_42_with_sigma2,1); + // x=1, y=3 + ch_mag_10_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42); + ch_mag_10_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_10_over_42_with_sigma2,1); + // x=1, x=5 + ch_mag_26_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42); + ch_mag_26_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_26_over_42_with_sigma2,1); + // x=1, y=7 + ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=3, y=3 + ch_mag_18_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42); + ch_mag_18_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_18_over_42_with_sigma2,1); + // x=3, y=5 + ch_mag_34_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42); + ch_mag_34_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_34_over_42_with_sigma2,1); + // x=3, y=7 + ch_mag_58_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42); + ch_mag_58_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_58_over_42_with_sigma2,2); + // x=5, y=5 + ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=5, y=7 + ch_mag_74_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42); + ch_mag_74_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_74_over_42_with_sigma2,2); + // x=7, y=7 + ch_mag_98_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42); + ch_mag_98_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_98_over_42_with_sigma2,2); + + // Computing Metrics + xmm0 = _mm_subs_epi16(psi_a_p7_p7, a_sq_p7_p7); + xmm1 = _mm_adds_epi16(xmm0, y0_p_7_7); + bit_met_p7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p7_p5, a_sq_p7_p5); + xmm1 = _mm_adds_epi16(xmm0, y0_p_7_5); + bit_met_p7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p7_p3, a_sq_p7_p3); + xmm1 = _mm_adds_epi16(xmm0, y0_p_7_3); + bit_met_p7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p7_p1, a_sq_p7_p1); + xmm1 = _mm_adds_epi16(xmm0, y0_p_7_1); + bit_met_p7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p7_m1, a_sq_p7_m1); + xmm1 = _mm_adds_epi16(xmm0, y0_m_7_1); + bit_met_p7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p7_m3, a_sq_p7_m3); + xmm1 = _mm_adds_epi16(xmm0, y0_m_7_3); + bit_met_p7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p7_m5, a_sq_p7_m5); + xmm1 = _mm_adds_epi16(xmm0, y0_m_7_5); + bit_met_p7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p7_m7, a_sq_p7_m7); + xmm1 = _mm_adds_epi16(xmm0, y0_m_7_7); + bit_met_p7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_p7, a_sq_p5_p7); + xmm1 = _mm_adds_epi16(xmm0, y0_p_5_7); + bit_met_p5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_p5, a_sq_p5_p5); + xmm1 = _mm_adds_epi16(xmm0, y0_p_5_5); + bit_met_p5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_p3, a_sq_p5_p3); + xmm1 = _mm_adds_epi16(xmm0, y0_p_5_3); + bit_met_p5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_p1, a_sq_p5_p1); + xmm1 = _mm_adds_epi16(xmm0, y0_p_5_1); + bit_met_p5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_m1, a_sq_p5_m1); + xmm1 = _mm_adds_epi16(xmm0, y0_m_5_1); + bit_met_p5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_m3, a_sq_p5_m3); + xmm1 = _mm_adds_epi16(xmm0, y0_m_5_3); + bit_met_p5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_m5, a_sq_p5_m5); + xmm1 = _mm_adds_epi16(xmm0, y0_m_5_5); + bit_met_p5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_m7, a_sq_p5_m7); + xmm1 = _mm_adds_epi16(xmm0, y0_m_5_7); + bit_met_p5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_p7, a_sq_p3_p7); + xmm1 = _mm_adds_epi16(xmm0, y0_p_3_7); + bit_met_p3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_p5, a_sq_p3_p5); + xmm1 = _mm_adds_epi16(xmm0, y0_p_3_5); + bit_met_p3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_p3, a_sq_p3_p3); + xmm1 = _mm_adds_epi16(xmm0, y0_p_3_3); + bit_met_p3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_p1, a_sq_p3_p1); + xmm1 = _mm_adds_epi16(xmm0, y0_p_3_1); + bit_met_p3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_m1, a_sq_p3_m1); + xmm1 = _mm_adds_epi16(xmm0, y0_m_3_1); + bit_met_p3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_m3, a_sq_p3_m3); + xmm1 = _mm_adds_epi16(xmm0, y0_m_3_3); + bit_met_p3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_m5, a_sq_p3_m5); + xmm1 = _mm_adds_epi16(xmm0, y0_m_3_5); + bit_met_p3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_m7, a_sq_p3_m7); + xmm1 = _mm_adds_epi16(xmm0, y0_m_3_7); + bit_met_p3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_p7, a_sq_p1_p7); + xmm1 = _mm_adds_epi16(xmm0, y0_p_1_7); + bit_met_p1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_p5, a_sq_p1_p5); + xmm1 = _mm_adds_epi16(xmm0, y0_p_1_5); + bit_met_p1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_p3, a_sq_p1_p3); + xmm1 = _mm_adds_epi16(xmm0, y0_p_1_3); + bit_met_p1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_p1, a_sq_p1_p1); + xmm1 = _mm_adds_epi16(xmm0, y0_p_1_1); + bit_met_p1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_m1, a_sq_p1_m1); + xmm1 = _mm_adds_epi16(xmm0, y0_m_1_1); + bit_met_p1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_m3, a_sq_p1_m3); + xmm1 = _mm_adds_epi16(xmm0, y0_m_1_3); + bit_met_p1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_m5, a_sq_p1_m5); + xmm1 = _mm_adds_epi16(xmm0, y0_m_1_5); + bit_met_p1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_m7, a_sq_p1_m7); + xmm1 = _mm_adds_epi16(xmm0, y0_m_1_7); + bit_met_p1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + + xmm0 = _mm_subs_epi16(psi_a_m1_p7, a_sq_m1_p7); + xmm1 = _mm_subs_epi16(xmm0, y0_m_1_7); + bit_met_m1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m1_p5, a_sq_m1_p5); + xmm1 = _mm_subs_epi16(xmm0, y0_m_1_5); + bit_met_m1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m1_p3, a_sq_m1_p3); + xmm1 = _mm_subs_epi16(xmm0, y0_m_1_3); + bit_met_m1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m1_p1, a_sq_m1_p1); + xmm1 = _mm_subs_epi16(xmm0, y0_m_1_1); + bit_met_m1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m1_m1, a_sq_m1_m1); + xmm1 = _mm_subs_epi16(xmm0, y0_p_1_1); + bit_met_m1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m1_m3, a_sq_m1_m3); + xmm1 = _mm_subs_epi16(xmm0, y0_p_1_3); + bit_met_m1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m1_m5, a_sq_m1_m5); + xmm1 = _mm_subs_epi16(xmm0, y0_p_1_5); + bit_met_m1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m1_m7, a_sq_m1_m7); + xmm1 = _mm_subs_epi16(xmm0, y0_p_1_7); + bit_met_m1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_p7, a_sq_m3_p7); + xmm1 = _mm_subs_epi16(xmm0, y0_m_3_7); + bit_met_m3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_p5, a_sq_m3_p5); + xmm1 = _mm_subs_epi16(xmm0, y0_m_3_5); + bit_met_m3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_p3, a_sq_m3_p3); + xmm1 = _mm_subs_epi16(xmm0, y0_m_3_3); + bit_met_m3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_p1, a_sq_m3_p1); + xmm1 = _mm_subs_epi16(xmm0, y0_m_3_1); + bit_met_m3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_m1, a_sq_m3_m1); + xmm1 = _mm_subs_epi16(xmm0, y0_p_3_1); + bit_met_m3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_m3, a_sq_m3_m3); + xmm1 = _mm_subs_epi16(xmm0, y0_p_3_3); + bit_met_m3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_m5, a_sq_m3_m5); + xmm1 = _mm_subs_epi16(xmm0, y0_p_3_5); + bit_met_m3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_m7, a_sq_m3_m7); + xmm1 = _mm_subs_epi16(xmm0, y0_p_3_7); + bit_met_m3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_p7, a_sq_m5_p7); + xmm1 = _mm_subs_epi16(xmm0, y0_m_5_7); + bit_met_m5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_p5, a_sq_m5_p5); + xmm1 = _mm_subs_epi16(xmm0, y0_m_5_5); + bit_met_m5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_p3, a_sq_m5_p3); + xmm1 = _mm_subs_epi16(xmm0, y0_m_5_3); + bit_met_m5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_p1, a_sq_m5_p1); + xmm1 = _mm_subs_epi16(xmm0, y0_m_5_1); + bit_met_m5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_m1, a_sq_m5_m1); + xmm1 = _mm_subs_epi16(xmm0, y0_p_5_1); + bit_met_m5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_m3, a_sq_m5_m3); + xmm1 = _mm_subs_epi16(xmm0, y0_p_5_3); + bit_met_m5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_m5, a_sq_m5_m5); + xmm1 = _mm_subs_epi16(xmm0, y0_p_5_5); + bit_met_m5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_m7, a_sq_m5_m7); + xmm1 = _mm_subs_epi16(xmm0, y0_p_5_7); + bit_met_m5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_p7, a_sq_m7_p7); + xmm1 = _mm_subs_epi16(xmm0, y0_m_7_7); + bit_met_m7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_p5, a_sq_m7_p5); + xmm1 = _mm_subs_epi16(xmm0, y0_m_7_5); + bit_met_m7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_p3, a_sq_m7_p3); + xmm1 = _mm_subs_epi16(xmm0, y0_m_7_3); + bit_met_m7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_p1, a_sq_m7_p1); + xmm1 = _mm_subs_epi16(xmm0, y0_m_7_1); + bit_met_m7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_m1, a_sq_m7_m1); + xmm1 = _mm_subs_epi16(xmm0, y0_p_7_1); + bit_met_m7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_m3, a_sq_m7_m3); + xmm1 = _mm_subs_epi16(xmm0, y0_p_7_3); + bit_met_m7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_m5, a_sq_m7_m5); + xmm1 = _mm_subs_epi16(xmm0, y0_p_7_5); + bit_met_m7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_m7, a_sq_m7_m7); + xmm1 = _mm_subs_epi16(xmm0, y0_p_7_7); + bit_met_m7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + + // Detection for 1st bit (LTE mapping) + // bit = 1 + xmm0 = _mm_max_epi16(bit_met_m7_p7, bit_met_m7_p5); + xmm1 = _mm_max_epi16(bit_met_m7_p3, bit_met_m7_p1); + xmm2 = _mm_max_epi16(bit_met_m7_m1, bit_met_m7_m3); + xmm3 = _mm_max_epi16(bit_met_m7_m5, bit_met_m7_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_m5_p7, bit_met_m5_p5); + xmm1 = _mm_max_epi16(bit_met_m5_p3, bit_met_m5_p1); + xmm2 = _mm_max_epi16(bit_met_m5_m1, bit_met_m5_m3); + xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m5_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_m3_p7, bit_met_m3_p5); + xmm1 = _mm_max_epi16(bit_met_m3_p3, bit_met_m3_p1); + xmm2 = _mm_max_epi16(bit_met_m3_m1, bit_met_m3_m3); + xmm3 = _mm_max_epi16(bit_met_m3_m5, bit_met_m3_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_m1_p7, bit_met_m1_p5); + xmm1 = _mm_max_epi16(bit_met_m1_p3, bit_met_m1_p1); + xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m1_m3); + xmm3 = _mm_max_epi16(bit_met_m1_m5, bit_met_m1_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p7_p5); + xmm1 = _mm_max_epi16(bit_met_p7_p3, bit_met_p7_p1); + xmm2 = _mm_max_epi16(bit_met_p7_m1, bit_met_p7_m3); + xmm3 = _mm_max_epi16(bit_met_p7_m5, bit_met_p7_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p5_p7, bit_met_p5_p5); + xmm1 = _mm_max_epi16(bit_met_p5_p3, bit_met_p5_p1); + xmm2 = _mm_max_epi16(bit_met_p5_m1, bit_met_p5_m3); + xmm3 = _mm_max_epi16(bit_met_p5_m5, bit_met_p5_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p3_p7, bit_met_p3_p5); + xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p3_p1); + xmm2 = _mm_max_epi16(bit_met_p3_m1, bit_met_p3_m3); + xmm3 = _mm_max_epi16(bit_met_p3_m5, bit_met_p3_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p1_p7, bit_met_p1_p5); + xmm1 = _mm_max_epi16(bit_met_p1_p3, bit_met_p1_p1); + xmm2 = _mm_max_epi16(bit_met_p1_m1, bit_met_p1_m3); + xmm3 = _mm_max_epi16(bit_met_p1_m5, bit_met_p1_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y0r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 2nd bit (LTE mapping) + // bit = 1 + xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y1r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 3rd bit (LTE mapping) + xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y2r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 4th bit (LTE mapping) + xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y0i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + + // Detection for 5th bit (LTE mapping) + xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y1i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 6th bit (LTE mapping) + xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y2i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + + // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs + // RE 1 + j = 24*i; + stream0_out[j + 0] = ((short *)&y0r)[0]; + stream0_out[j + 1] = ((short *)&y1r)[0]; + stream0_out[j + 2] = ((short *)&y2r)[0]; + stream0_out[j + 3] = ((short *)&y0i)[0]; + stream0_out[j + 4] = ((short *)&y1i)[0]; + stream0_out[j + 5] = ((short *)&y2i)[0]; + // RE 2 + stream0_out[j + 6] = ((short *)&y0r)[1]; + stream0_out[j + 7] = ((short *)&y1r)[1]; + stream0_out[j + 8] = ((short *)&y2r)[1]; + stream0_out[j + 9] = ((short *)&y0i)[1]; + stream0_out[j + 10] = ((short *)&y1i)[1]; + stream0_out[j + 11] = ((short *)&y2i)[1]; + // RE 3 + stream0_out[j + 12] = ((short *)&y0r)[2]; + stream0_out[j + 13] = ((short *)&y1r)[2]; + stream0_out[j + 14] = ((short *)&y2r)[2]; + stream0_out[j + 15] = ((short *)&y0i)[2]; + stream0_out[j + 16] = ((short *)&y1i)[2]; + stream0_out[j + 17] = ((short *)&y2i)[2]; + // RE 4 + stream0_out[j + 18] = ((short *)&y0r)[3]; + stream0_out[j + 19] = ((short *)&y1r)[3]; + stream0_out[j + 20] = ((short *)&y2r)[3]; + stream0_out[j + 21] = ((short *)&y0i)[3]; + stream0_out[j + 22] = ((short *)&y1i)[3]; + stream0_out[j + 23] = ((short *)&y2i)[3]; + // RE 5 + stream0_out[j + 24] = ((short *)&y0r)[4]; + stream0_out[j + 25] = ((short *)&y1r)[4]; + stream0_out[j + 26] = ((short *)&y2r)[4]; + stream0_out[j + 27] = ((short *)&y0i)[4]; + stream0_out[j + 28] = ((short *)&y1i)[4]; + stream0_out[j + 29] = ((short *)&y2i)[4]; + // RE 6 + stream0_out[j + 30] = ((short *)&y0r)[5]; + stream0_out[j + 31] = ((short *)&y1r)[5]; + stream0_out[j + 32] = ((short *)&y2r)[5]; + stream0_out[j + 33] = ((short *)&y0i)[5]; + stream0_out[j + 34] = ((short *)&y1i)[5]; + stream0_out[j + 35] = ((short *)&y2i)[5]; + // RE 7 + stream0_out[j + 36] = ((short *)&y0r)[6]; + stream0_out[j + 37] = ((short *)&y1r)[6]; + stream0_out[j + 38] = ((short *)&y2r)[6]; + stream0_out[j + 39] = ((short *)&y0i)[6]; + stream0_out[j + 40] = ((short *)&y1i)[6]; + stream0_out[j + 41] = ((short *)&y2i)[6]; + // RE 8 + stream0_out[j + 42] = ((short *)&y0r)[7]; + stream0_out[j + 43] = ((short *)&y1r)[7]; + stream0_out[j + 44] = ((short *)&y2r)[7]; + stream0_out[j + 45] = ((short *)&y0i)[7]; + stream0_out[j + 46] = ((short *)&y1i)[7]; + stream0_out[j + 47] = ((short *)&y2i)[7]; + +#elif defined(__arm__) + +#endif + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif + +} + + +int dlsch_64qam_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms, + int32_t **rxdataF_comp, + int32_t **rxdataF_comp_i, + int32_t **dl_ch_mag, + int32_t **dl_ch_mag_i, + int32_t **rho_i, + int16_t *dlsch_llr, + uint8_t symbol, + uint8_t first_symbol_flag, + uint16_t nb_rb, + uint16_t pbch_pss_sss_adjust, + int16_t **llr16p) +{ + + int16_t *rxF = (int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rxF_i = (int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *ch_mag = (int16_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *ch_mag_i = (int16_t*)&dl_ch_mag_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rho = (int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *llr16; + int len; + uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol; + + //first symbol has different structure due to more pilots + if (first_symbol_flag == 1) { + llr16 = (int16_t*)dlsch_llr; + } else { + llr16 = (int16_t*)(*llr16p); + } + + AssertFatal(llr16!=NULL,"dlsch_16qam_64qam_llr:llr is null, symbol %d\n",symbol); + + if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { + // if symbol has pilots + if (frame_parms->nb_antenna_ports_eNB!=1) + // in 2 antenna ports we have 8 REs per symbol per RB + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + else + // for 1 antenna port we have 10 REs per symbol per RB + len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6); + } else { + // symbol has no pilots + len = (nb_rb*12) - pbch_pss_sss_adjust; + } + + qam64_qam16((short *)rxF, + (short *)rxF_i, + (short *)ch_mag, + (short *)ch_mag_i, + (short *)llr16, + (short *)rho, + len); + + llr16 += (6*len); + *llr16p = (short *)llr16; + return(0); +} + +void qam64_qam64(short *stream0_in, + short *stream1_in, + short *ch_mag, + short *ch_mag_i, + short *stream0_out, + short *rho01, + int length + ) +{ + + /* + Author: S. Wagner + Date: 31-07-12 + + Input: + stream0_in: MF filter for 1st stream, i.e., y0=h0'*y + stream1_in: MF filter for 2nd stream, i.e., y1=h1'*y + ch_mag: 4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + ch_mag_i: 4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + rho01: Channel cross correlation, i.e., h1'*h0 + + Output: + stream0_out: output LLRs for 1st stream + */ + +#if defined(__x86_64__) || defined(__i386__) + + __m128i *rho01_128i = (__m128i *)rho01; + __m128i *stream0_128i_in = (__m128i *)stream0_in; + __m128i *stream1_128i_in = (__m128i *)stream1_in; + __m128i *ch_mag_128i = (__m128i *)ch_mag; + __m128i *ch_mag_128i_i = (__m128i *)ch_mag_i; + + __m128i ONE_OVER_SQRT_42 = _mm_set1_epi16(10112); // round(1/sqrt(42)*2^16) + __m128i THREE_OVER_SQRT_42 = _mm_set1_epi16(30337); // round(3/sqrt(42)*2^16) + __m128i FIVE_OVER_SQRT_42 = _mm_set1_epi16(25281); // round(5/sqrt(42)*2^15) + __m128i SEVEN_OVER_SQRT_42 = _mm_set1_epi16(17697); // round(7/sqrt(42)*2^14) Q2.14 + __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15) + __m128i ONE_OVER_SQRT_2_42 = _mm_set1_epi16(3575); // round(1/sqrt(2*42)*2^15) + __m128i THREE_OVER_SQRT_2_42 = _mm_set1_epi16(10726); // round(3/sqrt(2*42)*2^15) + __m128i FIVE_OVER_SQRT_2_42 = _mm_set1_epi16(17876); // round(5/sqrt(2*42)*2^15) + __m128i SEVEN_OVER_SQRT_2_42 = _mm_set1_epi16(25027); // round(7/sqrt(2*42)*2^15) + __m128i FORTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(30969); // round(49/(4*sqrt(42))*2^14), Q2.14 + __m128i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(23385); // round(37/(4*sqrt(42))*2^14), Q2.14 + __m128i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(31601); // round(25/(4*sqrt(42))*2^15) + __m128i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(18329); // round(29/(4*sqrt(42))*2^15), Q2.14 + __m128i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(21489); // round(17/(4*sqrt(42))*2^15) + __m128i NINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(11376); // round(9/(4*sqrt(42))*2^15) + __m128i THIRTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(16433); // round(13/(4*sqrt(42))*2^15) + __m128i FIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(6320); // round(5/(4*sqrt(42))*2^15) + __m128i ONE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(1264); // round(1/(4*sqrt(42))*2^15) + __m128i SQRT_42_OVER_FOUR = _mm_set1_epi16(13272); // round(sqrt(42)/4*2^13), Q3.12 + + __m128i ch_mag_des; + __m128i ch_mag_int; + __m128i ch_mag_98_over_42_with_sigma2; + __m128i ch_mag_74_over_42_with_sigma2; + __m128i ch_mag_58_over_42_with_sigma2; + __m128i ch_mag_50_over_42_with_sigma2; + __m128i ch_mag_34_over_42_with_sigma2; + __m128i ch_mag_18_over_42_with_sigma2; + __m128i ch_mag_26_over_42_with_sigma2; + __m128i ch_mag_10_over_42_with_sigma2; + __m128i ch_mag_2_over_42_with_sigma2; + __m128i y0r_one_over_sqrt_21; + __m128i y0r_three_over_sqrt_21; + __m128i y0r_five_over_sqrt_21; + __m128i y0r_seven_over_sqrt_21; + __m128i y0i_one_over_sqrt_21; + __m128i y0i_three_over_sqrt_21; + __m128i y0i_five_over_sqrt_21; + __m128i y0i_seven_over_sqrt_21; + __m128i ch_mag_int_with_sigma2; + __m128i two_ch_mag_int_with_sigma2; + __m128i three_ch_mag_int_with_sigma2; +#elif defined(__arm__) + +#endif + + int i,j; + + + for (i=0; i<length>>2; i+=2) { + +#if defined(__x86_64__) || defined(__i386__) + + // Get rho + xmm0 = rho01_128i[i]; + xmm1 = rho01_128i[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) + xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) + rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) + rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + + // Compute the different rhos + rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42); + rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42); + rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42); + rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42); + rho_rpi_5_5 = _mm_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42); + rho_rmi_5_5 = _mm_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42); + rho_rpi_7_7 = _mm_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42); + rho_rmi_7_7 = _mm_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42); + + rho_rpi_5_5 = _mm_slli_epi16(rho_rpi_5_5, 1); + rho_rmi_5_5 = _mm_slli_epi16(rho_rmi_5_5, 1); + rho_rpi_7_7 = _mm_slli_epi16(rho_rpi_7_7, 2); + rho_rmi_7_7 = _mm_slli_epi16(rho_rmi_7_7, 2); + + xmm4 = _mm_mulhi_epi16(xmm2, ONE_OVER_SQRT_42); + xmm5 = _mm_mulhi_epi16(xmm3, ONE_OVER_SQRT_42); + xmm6 = _mm_mulhi_epi16(xmm3, THREE_OVER_SQRT_42); + xmm7 = _mm_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42); + xmm8 = _mm_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42); + xmm7 = _mm_slli_epi16(xmm7, 1); + xmm8 = _mm_slli_epi16(xmm8, 2); + + rho_rpi_1_3 = _mm_adds_epi16(xmm4, xmm6); + rho_rmi_1_3 = _mm_subs_epi16(xmm4, xmm6); + rho_rpi_1_5 = _mm_adds_epi16(xmm4, xmm7); + rho_rmi_1_5 = _mm_subs_epi16(xmm4, xmm7); + rho_rpi_1_7 = _mm_adds_epi16(xmm4, xmm8); + rho_rmi_1_7 = _mm_subs_epi16(xmm4, xmm8); + + xmm4 = _mm_mulhi_epi16(xmm2, THREE_OVER_SQRT_42); + rho_rpi_3_1 = _mm_adds_epi16(xmm4, xmm5); + rho_rmi_3_1 = _mm_subs_epi16(xmm4, xmm5); + rho_rpi_3_5 = _mm_adds_epi16(xmm4, xmm7); + rho_rmi_3_5 = _mm_subs_epi16(xmm4, xmm7); + rho_rpi_3_7 = _mm_adds_epi16(xmm4, xmm8); + rho_rmi_3_7 = _mm_subs_epi16(xmm4, xmm8); + + xmm4 = _mm_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42); + xmm4 = _mm_slli_epi16(xmm4, 1); + rho_rpi_5_1 = _mm_adds_epi16(xmm4, xmm5); + rho_rmi_5_1 = _mm_subs_epi16(xmm4, xmm5); + rho_rpi_5_3 = _mm_adds_epi16(xmm4, xmm6); + rho_rmi_5_3 = _mm_subs_epi16(xmm4, xmm6); + rho_rpi_5_7 = _mm_adds_epi16(xmm4, xmm8); + rho_rmi_5_7 = _mm_subs_epi16(xmm4, xmm8); + + xmm4 = _mm_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42); + xmm4 = _mm_slli_epi16(xmm4, 2); + rho_rpi_7_1 = _mm_adds_epi16(xmm4, xmm5); + rho_rmi_7_1 = _mm_subs_epi16(xmm4, xmm5); + rho_rpi_7_3 = _mm_adds_epi16(xmm4, xmm6); + rho_rmi_7_3 = _mm_subs_epi16(xmm4, xmm6); + rho_rpi_7_5 = _mm_adds_epi16(xmm4, xmm7); + rho_rmi_7_5 = _mm_subs_epi16(xmm4, xmm7); + + // Rearrange interfering MF output + xmm0 = stream1_128i_in[i]; + xmm1 = stream1_128i_in[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] + y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] + + // Psi_r calculation from rho_rpi or rho_rmi + xmm0 = _mm_setzero_si128(); // ZERO for abs_pi16 + xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1r); + psi_r_p7_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1r); + psi_r_p7_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1r); + psi_r_p7_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1r); + psi_r_p7_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1r); + psi_r_p7_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1r); + psi_r_p7_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1r); + psi_r_p7_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1r); + psi_r_p7_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1r); + psi_r_p5_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1r); + psi_r_p5_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1r); + psi_r_p5_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1r); + psi_r_p5_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1r); + psi_r_p5_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1r); + psi_r_p5_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1r); + psi_r_p5_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1r); + psi_r_p5_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1r); + psi_r_p3_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1r); + psi_r_p3_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1r); + psi_r_p3_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1r); + psi_r_p3_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1r); + psi_r_p3_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1r); + psi_r_p3_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1r); + psi_r_p3_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1r); + psi_r_p3_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1r); + psi_r_p1_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1r); + psi_r_p1_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1r); + psi_r_p1_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1r); + psi_r_p1_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1r); + psi_r_p1_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1r); + psi_r_p1_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1r); + psi_r_p1_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1r); + psi_r_p1_m7 = _mm_abs_epi16(xmm2); + + xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1r); + psi_r_m1_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1r); + psi_r_m1_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1r); + psi_r_m1_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1r); + psi_r_m1_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1r); + psi_r_m1_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1r); + psi_r_m1_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1r); + psi_r_m1_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1r); + psi_r_m1_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1r); + psi_r_m3_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1r); + psi_r_m3_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1r); + psi_r_m3_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1r); + psi_r_m3_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1r); + psi_r_m3_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1r); + psi_r_m3_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1r); + psi_r_m3_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1r); + psi_r_m3_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1r); + psi_r_m5_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1r); + psi_r_m5_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1r); + psi_r_m5_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1r); + psi_r_m5_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1r); + psi_r_m5_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1r); + psi_r_m5_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1r); + psi_r_m5_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1r); + psi_r_m5_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1r); + psi_r_m7_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1r); + psi_r_m7_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1r); + psi_r_m7_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1r); + psi_r_m7_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1r); + psi_r_m7_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1r); + psi_r_m7_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1r); + psi_r_m7_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1r); + psi_r_m7_m7 = _mm_abs_epi16(xmm2); + + // Psi_i calculation from rho_rpi or rho_rmi + xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1i); + psi_i_p7_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1i); + psi_i_p7_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1i); + psi_i_p7_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1i); + psi_i_p7_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1i); + psi_i_p7_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1i); + psi_i_p7_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1i); + psi_i_p7_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1i); + psi_i_p7_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1i); + psi_i_p5_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1i); + psi_i_p5_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1i); + psi_i_p5_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1i); + psi_i_p5_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1i); + psi_i_p5_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1i); + psi_i_p5_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1i); + psi_i_p5_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1i); + psi_i_p5_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1i); + psi_i_p3_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1i); + psi_i_p3_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1i); + psi_i_p3_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1i); + psi_i_p3_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1i); + psi_i_p3_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1i); + psi_i_p3_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1i); + psi_i_p3_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1i); + psi_i_p3_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1i); + psi_i_p1_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1i); + psi_i_p1_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1i); + psi_i_p1_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1i); + psi_i_p1_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1i); + psi_i_p1_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1i); + psi_i_p1_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1i); + psi_i_p1_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1i); + psi_i_p1_m7 = _mm_abs_epi16(xmm2); + + xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1i); + psi_i_m1_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1i); + psi_i_m1_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1i); + psi_i_m1_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1i); + psi_i_m1_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1i); + psi_i_m1_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1i); + psi_i_m1_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1i); + psi_i_m1_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1i); + psi_i_m1_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1i); + psi_i_m3_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1i); + psi_i_m3_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1i); + psi_i_m3_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1i); + psi_i_m3_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1i); + psi_i_m3_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1i); + psi_i_m3_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1i); + psi_i_m3_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1i); + psi_i_m3_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1i); + psi_i_m5_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1i); + psi_i_m5_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1i); + psi_i_m5_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1i); + psi_i_m5_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1i); + psi_i_m5_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1i); + psi_i_m5_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1i); + psi_i_m5_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1i); + psi_i_m5_m7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1i); + psi_i_m7_p7 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1i); + psi_i_m7_p5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1i); + psi_i_m7_p3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1i); + psi_i_m7_p1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1i); + psi_i_m7_m1 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1i); + psi_i_m7_m3 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1i); + psi_i_m7_m5 = _mm_abs_epi16(xmm2); + xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1i); + psi_i_m7_m7 = _mm_abs_epi16(xmm2); + + + // Rearrange desired MF output + xmm0 = stream0_128i_in[i]; + xmm1 = stream0_128i_in[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + y0i = _mm_unpackhi_epi64(xmm0,xmm1); + + // Rearrange desired channel magnitudes + xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10)) + xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10)) + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); + + // Rearrange interfering channel magnitudes + xmm2 = ch_mag_128i_i[i]; + xmm3 = ch_mag_128i_i[i+1]; + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3); + + y0r_one_over_sqrt_21 = _mm_mulhi_epi16(y0r, ONE_OVER_SQRT_42); + y0r_three_over_sqrt_21 = _mm_mulhi_epi16(y0r, THREE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm_mulhi_epi16(y0r, FIVE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm_slli_epi16(y0r_five_over_sqrt_21, 1); + y0r_seven_over_sqrt_21 = _mm_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42); + y0r_seven_over_sqrt_21 = _mm_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14 + + y0i_one_over_sqrt_21 = _mm_mulhi_epi16(y0i, ONE_OVER_SQRT_42); + y0i_three_over_sqrt_21 = _mm_mulhi_epi16(y0i, THREE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm_mulhi_epi16(y0i, FIVE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm_slli_epi16(y0i_five_over_sqrt_21, 1); + y0i_seven_over_sqrt_21 = _mm_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42); + y0i_seven_over_sqrt_21 = _mm_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14 + + y0_p_7_1 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_7_3 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_7_5 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_7_7 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_5_1 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_5_3 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_5_5 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_5_7 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_3_5 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_3_7 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_1_1 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_1_3 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_1_5 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_1_7 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + + y0_m_1_1 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_1_3 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_1_5 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_1_7 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_3_5 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_3_7 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_5_1 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_5_3 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_5_5 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_5_7 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_7_1 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_7_3 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_7_5 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_7_7 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + + // Detection of interference term + ch_mag_int_with_sigma2 = _mm_srai_epi16(ch_mag_int, 1); // *2 + two_ch_mag_int_with_sigma2 = ch_mag_int; // *4 + three_ch_mag_int_with_sigma2 = _mm_adds_epi16(ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2); // *6 + + interference_abs_64qam_epi16(psi_r_p7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + + interference_abs_64qam_epi16(psi_i_p7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + + // Calculation of a group of two terms in the bit metric involving product of psi and interference + prodsum_psi_a_epi16(psi_r_p7_p7, a_r_p7_p7, psi_i_p7_p7, a_i_p7_p7, psi_a_p7_p7); + prodsum_psi_a_epi16(psi_r_p7_p5, a_r_p7_p5, psi_i_p7_p5, a_i_p7_p5, psi_a_p7_p5); + prodsum_psi_a_epi16(psi_r_p7_p3, a_r_p7_p3, psi_i_p7_p3, a_i_p7_p3, psi_a_p7_p3); + prodsum_psi_a_epi16(psi_r_p7_p1, a_r_p7_p1, psi_i_p7_p1, a_i_p7_p1, psi_a_p7_p1); + prodsum_psi_a_epi16(psi_r_p7_m1, a_r_p7_m1, psi_i_p7_m1, a_i_p7_m1, psi_a_p7_m1); + prodsum_psi_a_epi16(psi_r_p7_m3, a_r_p7_m3, psi_i_p7_m3, a_i_p7_m3, psi_a_p7_m3); + prodsum_psi_a_epi16(psi_r_p7_m5, a_r_p7_m5, psi_i_p7_m5, a_i_p7_m5, psi_a_p7_m5); + prodsum_psi_a_epi16(psi_r_p7_m7, a_r_p7_m7, psi_i_p7_m7, a_i_p7_m7, psi_a_p7_m7); + prodsum_psi_a_epi16(psi_r_p5_p7, a_r_p5_p7, psi_i_p5_p7, a_i_p5_p7, psi_a_p5_p7); + prodsum_psi_a_epi16(psi_r_p5_p5, a_r_p5_p5, psi_i_p5_p5, a_i_p5_p5, psi_a_p5_p5); + prodsum_psi_a_epi16(psi_r_p5_p3, a_r_p5_p3, psi_i_p5_p3, a_i_p5_p3, psi_a_p5_p3); + prodsum_psi_a_epi16(psi_r_p5_p1, a_r_p5_p1, psi_i_p5_p1, a_i_p5_p1, psi_a_p5_p1); + prodsum_psi_a_epi16(psi_r_p5_m1, a_r_p5_m1, psi_i_p5_m1, a_i_p5_m1, psi_a_p5_m1); + prodsum_psi_a_epi16(psi_r_p5_m3, a_r_p5_m3, psi_i_p5_m3, a_i_p5_m3, psi_a_p5_m3); + prodsum_psi_a_epi16(psi_r_p5_m5, a_r_p5_m5, psi_i_p5_m5, a_i_p5_m5, psi_a_p5_m5); + prodsum_psi_a_epi16(psi_r_p5_m7, a_r_p5_m7, psi_i_p5_m7, a_i_p5_m7, psi_a_p5_m7); + prodsum_psi_a_epi16(psi_r_p3_p7, a_r_p3_p7, psi_i_p3_p7, a_i_p3_p7, psi_a_p3_p7); + prodsum_psi_a_epi16(psi_r_p3_p5, a_r_p3_p5, psi_i_p3_p5, a_i_p3_p5, psi_a_p3_p5); + prodsum_psi_a_epi16(psi_r_p3_p3, a_r_p3_p3, psi_i_p3_p3, a_i_p3_p3, psi_a_p3_p3); + prodsum_psi_a_epi16(psi_r_p3_p1, a_r_p3_p1, psi_i_p3_p1, a_i_p3_p1, psi_a_p3_p1); + prodsum_psi_a_epi16(psi_r_p3_m1, a_r_p3_m1, psi_i_p3_m1, a_i_p3_m1, psi_a_p3_m1); + prodsum_psi_a_epi16(psi_r_p3_m3, a_r_p3_m3, psi_i_p3_m3, a_i_p3_m3, psi_a_p3_m3); + prodsum_psi_a_epi16(psi_r_p3_m5, a_r_p3_m5, psi_i_p3_m5, a_i_p3_m5, psi_a_p3_m5); + prodsum_psi_a_epi16(psi_r_p3_m7, a_r_p3_m7, psi_i_p3_m7, a_i_p3_m7, psi_a_p3_m7); + prodsum_psi_a_epi16(psi_r_p1_p7, a_r_p1_p7, psi_i_p1_p7, a_i_p1_p7, psi_a_p1_p7); + prodsum_psi_a_epi16(psi_r_p1_p5, a_r_p1_p5, psi_i_p1_p5, a_i_p1_p5, psi_a_p1_p5); + prodsum_psi_a_epi16(psi_r_p1_p3, a_r_p1_p3, psi_i_p1_p3, a_i_p1_p3, psi_a_p1_p3); + prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1); + prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1); + prodsum_psi_a_epi16(psi_r_p1_m3, a_r_p1_m3, psi_i_p1_m3, a_i_p1_m3, psi_a_p1_m3); + prodsum_psi_a_epi16(psi_r_p1_m5, a_r_p1_m5, psi_i_p1_m5, a_i_p1_m5, psi_a_p1_m5); + prodsum_psi_a_epi16(psi_r_p1_m7, a_r_p1_m7, psi_i_p1_m7, a_i_p1_m7, psi_a_p1_m7); + prodsum_psi_a_epi16(psi_r_m1_p7, a_r_m1_p7, psi_i_m1_p7, a_i_m1_p7, psi_a_m1_p7); + prodsum_psi_a_epi16(psi_r_m1_p5, a_r_m1_p5, psi_i_m1_p5, a_i_m1_p5, psi_a_m1_p5); + prodsum_psi_a_epi16(psi_r_m1_p3, a_r_m1_p3, psi_i_m1_p3, a_i_m1_p3, psi_a_m1_p3); + prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1); + prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1); + prodsum_psi_a_epi16(psi_r_m1_m3, a_r_m1_m3, psi_i_m1_m3, a_i_m1_m3, psi_a_m1_m3); + prodsum_psi_a_epi16(psi_r_m1_m5, a_r_m1_m5, psi_i_m1_m5, a_i_m1_m5, psi_a_m1_m5); + prodsum_psi_a_epi16(psi_r_m1_m7, a_r_m1_m7, psi_i_m1_m7, a_i_m1_m7, psi_a_m1_m7); + prodsum_psi_a_epi16(psi_r_m3_p7, a_r_m3_p7, psi_i_m3_p7, a_i_m3_p7, psi_a_m3_p7); + prodsum_psi_a_epi16(psi_r_m3_p5, a_r_m3_p5, psi_i_m3_p5, a_i_m3_p5, psi_a_m3_p5); + prodsum_psi_a_epi16(psi_r_m3_p3, a_r_m3_p3, psi_i_m3_p3, a_i_m3_p3, psi_a_m3_p3); + prodsum_psi_a_epi16(psi_r_m3_p1, a_r_m3_p1, psi_i_m3_p1, a_i_m3_p1, psi_a_m3_p1); + prodsum_psi_a_epi16(psi_r_m3_m1, a_r_m3_m1, psi_i_m3_m1, a_i_m3_m1, psi_a_m3_m1); + prodsum_psi_a_epi16(psi_r_m3_m3, a_r_m3_m3, psi_i_m3_m3, a_i_m3_m3, psi_a_m3_m3); + prodsum_psi_a_epi16(psi_r_m3_m5, a_r_m3_m5, psi_i_m3_m5, a_i_m3_m5, psi_a_m3_m5); + prodsum_psi_a_epi16(psi_r_m3_m7, a_r_m3_m7, psi_i_m3_m7, a_i_m3_m7, psi_a_m3_m7); + prodsum_psi_a_epi16(psi_r_m5_p7, a_r_m5_p7, psi_i_m5_p7, a_i_m5_p7, psi_a_m5_p7); + prodsum_psi_a_epi16(psi_r_m5_p5, a_r_m5_p5, psi_i_m5_p5, a_i_m5_p5, psi_a_m5_p5); + prodsum_psi_a_epi16(psi_r_m5_p3, a_r_m5_p3, psi_i_m5_p3, a_i_m5_p3, psi_a_m5_p3); + prodsum_psi_a_epi16(psi_r_m5_p1, a_r_m5_p1, psi_i_m5_p1, a_i_m5_p1, psi_a_m5_p1); + prodsum_psi_a_epi16(psi_r_m5_m1, a_r_m5_m1, psi_i_m5_m1, a_i_m5_m1, psi_a_m5_m1); + prodsum_psi_a_epi16(psi_r_m5_m3, a_r_m5_m3, psi_i_m5_m3, a_i_m5_m3, psi_a_m5_m3); + prodsum_psi_a_epi16(psi_r_m5_m5, a_r_m5_m5, psi_i_m5_m5, a_i_m5_m5, psi_a_m5_m5); + prodsum_psi_a_epi16(psi_r_m5_m7, a_r_m5_m7, psi_i_m5_m7, a_i_m5_m7, psi_a_m5_m7); + prodsum_psi_a_epi16(psi_r_m7_p7, a_r_m7_p7, psi_i_m7_p7, a_i_m7_p7, psi_a_m7_p7); + prodsum_psi_a_epi16(psi_r_m7_p5, a_r_m7_p5, psi_i_m7_p5, a_i_m7_p5, psi_a_m7_p5); + prodsum_psi_a_epi16(psi_r_m7_p3, a_r_m7_p3, psi_i_m7_p3, a_i_m7_p3, psi_a_m7_p3); + prodsum_psi_a_epi16(psi_r_m7_p1, a_r_m7_p1, psi_i_m7_p1, a_i_m7_p1, psi_a_m7_p1); + prodsum_psi_a_epi16(psi_r_m7_m1, a_r_m7_m1, psi_i_m7_m1, a_i_m7_m1, psi_a_m7_m1); + prodsum_psi_a_epi16(psi_r_m7_m3, a_r_m7_m3, psi_i_m7_m3, a_i_m7_m3, psi_a_m7_m3); + prodsum_psi_a_epi16(psi_r_m7_m5, a_r_m7_m5, psi_i_m7_m5, a_i_m7_m5, psi_a_m7_m5); + prodsum_psi_a_epi16(psi_r_m7_m7, a_r_m7_m7, psi_i_m7_m7, a_i_m7_m7, psi_a_m7_m7); + + // Multiply by sqrt(2) + psi_a_p7_p7 = _mm_mulhi_epi16(psi_a_p7_p7, ONE_OVER_SQRT_2); + psi_a_p7_p7 = _mm_slli_epi16(psi_a_p7_p7, 2); + psi_a_p7_p5 = _mm_mulhi_epi16(psi_a_p7_p5, ONE_OVER_SQRT_2); + psi_a_p7_p5 = _mm_slli_epi16(psi_a_p7_p5, 2); + psi_a_p7_p3 = _mm_mulhi_epi16(psi_a_p7_p3, ONE_OVER_SQRT_2); + psi_a_p7_p3 = _mm_slli_epi16(psi_a_p7_p3, 2); + psi_a_p7_p1 = _mm_mulhi_epi16(psi_a_p7_p1, ONE_OVER_SQRT_2); + psi_a_p7_p1 = _mm_slli_epi16(psi_a_p7_p1, 2); + psi_a_p7_m1 = _mm_mulhi_epi16(psi_a_p7_m1, ONE_OVER_SQRT_2); + psi_a_p7_m1 = _mm_slli_epi16(psi_a_p7_m1, 2); + psi_a_p7_m3 = _mm_mulhi_epi16(psi_a_p7_m3, ONE_OVER_SQRT_2); + psi_a_p7_m3 = _mm_slli_epi16(psi_a_p7_m3, 2); + psi_a_p7_m5 = _mm_mulhi_epi16(psi_a_p7_m5, ONE_OVER_SQRT_2); + psi_a_p7_m5 = _mm_slli_epi16(psi_a_p7_m5, 2); + psi_a_p7_m7 = _mm_mulhi_epi16(psi_a_p7_m7, ONE_OVER_SQRT_2); + psi_a_p7_m7 = _mm_slli_epi16(psi_a_p7_m7, 2); + psi_a_p5_p7 = _mm_mulhi_epi16(psi_a_p5_p7, ONE_OVER_SQRT_2); + psi_a_p5_p7 = _mm_slli_epi16(psi_a_p5_p7, 2); + psi_a_p5_p5 = _mm_mulhi_epi16(psi_a_p5_p5, ONE_OVER_SQRT_2); + psi_a_p5_p5 = _mm_slli_epi16(psi_a_p5_p5, 2); + psi_a_p5_p3 = _mm_mulhi_epi16(psi_a_p5_p3, ONE_OVER_SQRT_2); + psi_a_p5_p3 = _mm_slli_epi16(psi_a_p5_p3, 2); + psi_a_p5_p1 = _mm_mulhi_epi16(psi_a_p5_p1, ONE_OVER_SQRT_2); + psi_a_p5_p1 = _mm_slli_epi16(psi_a_p5_p1, 2); + psi_a_p5_m1 = _mm_mulhi_epi16(psi_a_p5_m1, ONE_OVER_SQRT_2); + psi_a_p5_m1 = _mm_slli_epi16(psi_a_p5_m1, 2); + psi_a_p5_m3 = _mm_mulhi_epi16(psi_a_p5_m3, ONE_OVER_SQRT_2); + psi_a_p5_m3 = _mm_slli_epi16(psi_a_p5_m3, 2); + psi_a_p5_m5 = _mm_mulhi_epi16(psi_a_p5_m5, ONE_OVER_SQRT_2); + psi_a_p5_m5 = _mm_slli_epi16(psi_a_p5_m5, 2); + psi_a_p5_m7 = _mm_mulhi_epi16(psi_a_p5_m7, ONE_OVER_SQRT_2); + psi_a_p5_m7 = _mm_slli_epi16(psi_a_p5_m7, 2); + psi_a_p3_p7 = _mm_mulhi_epi16(psi_a_p3_p7, ONE_OVER_SQRT_2); + psi_a_p3_p7 = _mm_slli_epi16(psi_a_p3_p7, 2); + psi_a_p3_p5 = _mm_mulhi_epi16(psi_a_p3_p5, ONE_OVER_SQRT_2); + psi_a_p3_p5 = _mm_slli_epi16(psi_a_p3_p5, 2); + psi_a_p3_p3 = _mm_mulhi_epi16(psi_a_p3_p3, ONE_OVER_SQRT_2); + psi_a_p3_p3 = _mm_slli_epi16(psi_a_p3_p3, 2); + psi_a_p3_p1 = _mm_mulhi_epi16(psi_a_p3_p1, ONE_OVER_SQRT_2); + psi_a_p3_p1 = _mm_slli_epi16(psi_a_p3_p1, 2); + psi_a_p3_m1 = _mm_mulhi_epi16(psi_a_p3_m1, ONE_OVER_SQRT_2); + psi_a_p3_m1 = _mm_slli_epi16(psi_a_p3_m1, 2); + psi_a_p3_m3 = _mm_mulhi_epi16(psi_a_p3_m3, ONE_OVER_SQRT_2); + psi_a_p3_m3 = _mm_slli_epi16(psi_a_p3_m3, 2); + psi_a_p3_m5 = _mm_mulhi_epi16(psi_a_p3_m5, ONE_OVER_SQRT_2); + psi_a_p3_m5 = _mm_slli_epi16(psi_a_p3_m5, 2); + psi_a_p3_m7 = _mm_mulhi_epi16(psi_a_p3_m7, ONE_OVER_SQRT_2); + psi_a_p3_m7 = _mm_slli_epi16(psi_a_p3_m7, 2); + psi_a_p1_p7 = _mm_mulhi_epi16(psi_a_p1_p7, ONE_OVER_SQRT_2); + psi_a_p1_p7 = _mm_slli_epi16(psi_a_p1_p7, 2); + psi_a_p1_p5 = _mm_mulhi_epi16(psi_a_p1_p5, ONE_OVER_SQRT_2); + psi_a_p1_p5 = _mm_slli_epi16(psi_a_p1_p5, 2); + psi_a_p1_p3 = _mm_mulhi_epi16(psi_a_p1_p3, ONE_OVER_SQRT_2); + psi_a_p1_p3 = _mm_slli_epi16(psi_a_p1_p3, 2); + psi_a_p1_p1 = _mm_mulhi_epi16(psi_a_p1_p1, ONE_OVER_SQRT_2); + psi_a_p1_p1 = _mm_slli_epi16(psi_a_p1_p1, 2); + psi_a_p1_m1 = _mm_mulhi_epi16(psi_a_p1_m1, ONE_OVER_SQRT_2); + psi_a_p1_m1 = _mm_slli_epi16(psi_a_p1_m1, 2); + psi_a_p1_m3 = _mm_mulhi_epi16(psi_a_p1_m3, ONE_OVER_SQRT_2); + psi_a_p1_m3 = _mm_slli_epi16(psi_a_p1_m3, 2); + psi_a_p1_m5 = _mm_mulhi_epi16(psi_a_p1_m5, ONE_OVER_SQRT_2); + psi_a_p1_m5 = _mm_slli_epi16(psi_a_p1_m5, 2); + psi_a_p1_m7 = _mm_mulhi_epi16(psi_a_p1_m7, ONE_OVER_SQRT_2); + psi_a_p1_m7 = _mm_slli_epi16(psi_a_p1_m7, 2); + psi_a_m1_p7 = _mm_mulhi_epi16(psi_a_m1_p7, ONE_OVER_SQRT_2); + psi_a_m1_p7 = _mm_slli_epi16(psi_a_m1_p7, 2); + psi_a_m1_p5 = _mm_mulhi_epi16(psi_a_m1_p5, ONE_OVER_SQRT_2); + psi_a_m1_p5 = _mm_slli_epi16(psi_a_m1_p5, 2); + psi_a_m1_p3 = _mm_mulhi_epi16(psi_a_m1_p3, ONE_OVER_SQRT_2); + psi_a_m1_p3 = _mm_slli_epi16(psi_a_m1_p3, 2); + psi_a_m1_p1 = _mm_mulhi_epi16(psi_a_m1_p1, ONE_OVER_SQRT_2); + psi_a_m1_p1 = _mm_slli_epi16(psi_a_m1_p1, 2); + psi_a_m1_m1 = _mm_mulhi_epi16(psi_a_m1_m1, ONE_OVER_SQRT_2); + psi_a_m1_m1 = _mm_slli_epi16(psi_a_m1_m1, 2); + psi_a_m1_m3 = _mm_mulhi_epi16(psi_a_m1_m3, ONE_OVER_SQRT_2); + psi_a_m1_m3 = _mm_slli_epi16(psi_a_m1_m3, 2); + psi_a_m1_m5 = _mm_mulhi_epi16(psi_a_m1_m5, ONE_OVER_SQRT_2); + psi_a_m1_m5 = _mm_slli_epi16(psi_a_m1_m5, 2); + psi_a_m1_m7 = _mm_mulhi_epi16(psi_a_m1_m7, ONE_OVER_SQRT_2); + psi_a_m1_m7 = _mm_slli_epi16(psi_a_m1_m7, 2); + psi_a_m3_p7 = _mm_mulhi_epi16(psi_a_m3_p7, ONE_OVER_SQRT_2); + psi_a_m3_p7 = _mm_slli_epi16(psi_a_m3_p7, 2); + psi_a_m3_p5 = _mm_mulhi_epi16(psi_a_m3_p5, ONE_OVER_SQRT_2); + psi_a_m3_p5 = _mm_slli_epi16(psi_a_m3_p5, 2); + psi_a_m3_p3 = _mm_mulhi_epi16(psi_a_m3_p3, ONE_OVER_SQRT_2); + psi_a_m3_p3 = _mm_slli_epi16(psi_a_m3_p3, 2); + psi_a_m3_p1 = _mm_mulhi_epi16(psi_a_m3_p1, ONE_OVER_SQRT_2); + psi_a_m3_p1 = _mm_slli_epi16(psi_a_m3_p1, 2); + psi_a_m3_m1 = _mm_mulhi_epi16(psi_a_m3_m1, ONE_OVER_SQRT_2); + psi_a_m3_m1 = _mm_slli_epi16(psi_a_m3_m1, 2); + psi_a_m3_m3 = _mm_mulhi_epi16(psi_a_m3_m3, ONE_OVER_SQRT_2); + psi_a_m3_m3 = _mm_slli_epi16(psi_a_m3_m3, 2); + psi_a_m3_m5 = _mm_mulhi_epi16(psi_a_m3_m5, ONE_OVER_SQRT_2); + psi_a_m3_m5 = _mm_slli_epi16(psi_a_m3_m5, 2); + psi_a_m3_m7 = _mm_mulhi_epi16(psi_a_m3_m7, ONE_OVER_SQRT_2); + psi_a_m3_m7 = _mm_slli_epi16(psi_a_m3_m7, 2); + psi_a_m5_p7 = _mm_mulhi_epi16(psi_a_m5_p7, ONE_OVER_SQRT_2); + psi_a_m5_p7 = _mm_slli_epi16(psi_a_m5_p7, 2); + psi_a_m5_p5 = _mm_mulhi_epi16(psi_a_m5_p5, ONE_OVER_SQRT_2); + psi_a_m5_p5 = _mm_slli_epi16(psi_a_m5_p5, 2); + psi_a_m5_p3 = _mm_mulhi_epi16(psi_a_m5_p3, ONE_OVER_SQRT_2); + psi_a_m5_p3 = _mm_slli_epi16(psi_a_m5_p3, 2); + psi_a_m5_p1 = _mm_mulhi_epi16(psi_a_m5_p1, ONE_OVER_SQRT_2); + psi_a_m5_p1 = _mm_slli_epi16(psi_a_m5_p1, 2); + psi_a_m5_m1 = _mm_mulhi_epi16(psi_a_m5_m1, ONE_OVER_SQRT_2); + psi_a_m5_m1 = _mm_slli_epi16(psi_a_m5_m1, 2); + psi_a_m5_m3 = _mm_mulhi_epi16(psi_a_m5_m3, ONE_OVER_SQRT_2); + psi_a_m5_m3 = _mm_slli_epi16(psi_a_m5_m3, 2); + psi_a_m5_m5 = _mm_mulhi_epi16(psi_a_m5_m5, ONE_OVER_SQRT_2); + psi_a_m5_m5 = _mm_slli_epi16(psi_a_m5_m5, 2); + psi_a_m5_m7 = _mm_mulhi_epi16(psi_a_m5_m7, ONE_OVER_SQRT_2); + psi_a_m5_m7 = _mm_slli_epi16(psi_a_m5_m7, 2); + psi_a_m7_p7 = _mm_mulhi_epi16(psi_a_m7_p7, ONE_OVER_SQRT_2); + psi_a_m7_p7 = _mm_slli_epi16(psi_a_m7_p7, 2); + psi_a_m7_p5 = _mm_mulhi_epi16(psi_a_m7_p5, ONE_OVER_SQRT_2); + psi_a_m7_p5 = _mm_slli_epi16(psi_a_m7_p5, 2); + psi_a_m7_p3 = _mm_mulhi_epi16(psi_a_m7_p3, ONE_OVER_SQRT_2); + psi_a_m7_p3 = _mm_slli_epi16(psi_a_m7_p3, 2); + psi_a_m7_p1 = _mm_mulhi_epi16(psi_a_m7_p1, ONE_OVER_SQRT_2); + psi_a_m7_p1 = _mm_slli_epi16(psi_a_m7_p1, 2); + psi_a_m7_m1 = _mm_mulhi_epi16(psi_a_m7_m1, ONE_OVER_SQRT_2); + psi_a_m7_m1 = _mm_slli_epi16(psi_a_m7_m1, 2); + psi_a_m7_m3 = _mm_mulhi_epi16(psi_a_m7_m3, ONE_OVER_SQRT_2); + psi_a_m7_m3 = _mm_slli_epi16(psi_a_m7_m3, 2); + psi_a_m7_m5 = _mm_mulhi_epi16(psi_a_m7_m5, ONE_OVER_SQRT_2); + psi_a_m7_m5 = _mm_slli_epi16(psi_a_m7_m5, 2); + psi_a_m7_m7 = _mm_mulhi_epi16(psi_a_m7_m7, ONE_OVER_SQRT_2); + psi_a_m7_m7 = _mm_slli_epi16(psi_a_m7_m7, 2); + + // Calculation of a group of two terms in the bit metric involving squares of interference + square_a_64qam_epi16(a_r_p7_p7, a_i_p7_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p7); + square_a_64qam_epi16(a_r_p7_p5, a_i_p7_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p5); + square_a_64qam_epi16(a_r_p7_p3, a_i_p7_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p3); + square_a_64qam_epi16(a_r_p7_p1, a_i_p7_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p1); + square_a_64qam_epi16(a_r_p7_m1, a_i_p7_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m1); + square_a_64qam_epi16(a_r_p7_m3, a_i_p7_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m3); + square_a_64qam_epi16(a_r_p7_m5, a_i_p7_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m5); + square_a_64qam_epi16(a_r_p7_m7, a_i_p7_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m7); + square_a_64qam_epi16(a_r_p5_p7, a_i_p5_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p7); + square_a_64qam_epi16(a_r_p5_p5, a_i_p5_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p5); + square_a_64qam_epi16(a_r_p5_p3, a_i_p5_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p3); + square_a_64qam_epi16(a_r_p5_p1, a_i_p5_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p1); + square_a_64qam_epi16(a_r_p5_m1, a_i_p5_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m1); + square_a_64qam_epi16(a_r_p5_m3, a_i_p5_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m3); + square_a_64qam_epi16(a_r_p5_m5, a_i_p5_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m5); + square_a_64qam_epi16(a_r_p5_m7, a_i_p5_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m7); + square_a_64qam_epi16(a_r_p3_p7, a_i_p3_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p7); + square_a_64qam_epi16(a_r_p3_p5, a_i_p3_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p5); + square_a_64qam_epi16(a_r_p3_p3, a_i_p3_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p3); + square_a_64qam_epi16(a_r_p3_p1, a_i_p3_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p1); + square_a_64qam_epi16(a_r_p3_m1, a_i_p3_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m1); + square_a_64qam_epi16(a_r_p3_m3, a_i_p3_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m3); + square_a_64qam_epi16(a_r_p3_m5, a_i_p3_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m5); + square_a_64qam_epi16(a_r_p3_m7, a_i_p3_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m7); + square_a_64qam_epi16(a_r_p1_p7, a_i_p1_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p7); + square_a_64qam_epi16(a_r_p1_p5, a_i_p1_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p5); + square_a_64qam_epi16(a_r_p1_p3, a_i_p1_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p3); + square_a_64qam_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p1); + square_a_64qam_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m1); + square_a_64qam_epi16(a_r_p1_m3, a_i_p1_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m3); + square_a_64qam_epi16(a_r_p1_m5, a_i_p1_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m5); + square_a_64qam_epi16(a_r_p1_m7, a_i_p1_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m7); + square_a_64qam_epi16(a_r_m1_p7, a_i_m1_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p7); + square_a_64qam_epi16(a_r_m1_p5, a_i_m1_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p5); + square_a_64qam_epi16(a_r_m1_p3, a_i_m1_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p3); + square_a_64qam_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p1); + square_a_64qam_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m1); + square_a_64qam_epi16(a_r_m1_m3, a_i_m1_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m3); + square_a_64qam_epi16(a_r_m1_m5, a_i_m1_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m5); + square_a_64qam_epi16(a_r_m1_m7, a_i_m1_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m7); + square_a_64qam_epi16(a_r_m3_p7, a_i_m3_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p7); + square_a_64qam_epi16(a_r_m3_p5, a_i_m3_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p5); + square_a_64qam_epi16(a_r_m3_p3, a_i_m3_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p3); + square_a_64qam_epi16(a_r_m3_p1, a_i_m3_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p1); + square_a_64qam_epi16(a_r_m3_m1, a_i_m3_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m1); + square_a_64qam_epi16(a_r_m3_m3, a_i_m3_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m3); + square_a_64qam_epi16(a_r_m3_m5, a_i_m3_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m5); + square_a_64qam_epi16(a_r_m3_m7, a_i_m3_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m7); + square_a_64qam_epi16(a_r_m5_p7, a_i_m5_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p7); + square_a_64qam_epi16(a_r_m5_p5, a_i_m5_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p5); + square_a_64qam_epi16(a_r_m5_p3, a_i_m5_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p3); + square_a_64qam_epi16(a_r_m5_p1, a_i_m5_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p1); + square_a_64qam_epi16(a_r_m5_m1, a_i_m5_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m1); + square_a_64qam_epi16(a_r_m5_m3, a_i_m5_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m3); + square_a_64qam_epi16(a_r_m5_m5, a_i_m5_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m5); + square_a_64qam_epi16(a_r_m5_m7, a_i_m5_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m7); + square_a_64qam_epi16(a_r_m7_p7, a_i_m7_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p7); + square_a_64qam_epi16(a_r_m7_p5, a_i_m7_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p5); + square_a_64qam_epi16(a_r_m7_p3, a_i_m7_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p3); + square_a_64qam_epi16(a_r_m7_p1, a_i_m7_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p1); + square_a_64qam_epi16(a_r_m7_m1, a_i_m7_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m1); + square_a_64qam_epi16(a_r_m7_m3, a_i_m7_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m3); + square_a_64qam_epi16(a_r_m7_m5, a_i_m7_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m5); + square_a_64qam_epi16(a_r_m7_m7, a_i_m7_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m7); + + // Computing different multiples of ||h0||^2 + // x=1, y=1 + ch_mag_2_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42); + ch_mag_2_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_2_over_42_with_sigma2,1); + // x=1, y=3 + ch_mag_10_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42); + ch_mag_10_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_10_over_42_with_sigma2,1); + // x=1, x=5 + ch_mag_26_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42); + ch_mag_26_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_26_over_42_with_sigma2,1); + // x=1, y=7 + ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=3, y=3 + ch_mag_18_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42); + ch_mag_18_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_18_over_42_with_sigma2,1); + // x=3, y=5 + ch_mag_34_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42); + ch_mag_34_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_34_over_42_with_sigma2,1); + // x=3, y=7 + ch_mag_58_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42); + ch_mag_58_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_58_over_42_with_sigma2,2); + // x=5, y=5 + ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=5, y=7 + ch_mag_74_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42); + ch_mag_74_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_74_over_42_with_sigma2,2); + // x=7, y=7 + ch_mag_98_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42); + ch_mag_98_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_98_over_42_with_sigma2,2); + + // Computing Metrics + xmm0 = _mm_subs_epi16(psi_a_p7_p7, a_sq_p7_p7); + xmm1 = _mm_adds_epi16(xmm0, y0_p_7_7); + bit_met_p7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p7_p5, a_sq_p7_p5); + xmm1 = _mm_adds_epi16(xmm0, y0_p_7_5); + bit_met_p7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p7_p3, a_sq_p7_p3); + xmm1 = _mm_adds_epi16(xmm0, y0_p_7_3); + bit_met_p7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p7_p1, a_sq_p7_p1); + xmm1 = _mm_adds_epi16(xmm0, y0_p_7_1); + bit_met_p7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p7_m1, a_sq_p7_m1); + xmm1 = _mm_adds_epi16(xmm0, y0_m_7_1); + bit_met_p7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p7_m3, a_sq_p7_m3); + xmm1 = _mm_adds_epi16(xmm0, y0_m_7_3); + bit_met_p7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p7_m5, a_sq_p7_m5); + xmm1 = _mm_adds_epi16(xmm0, y0_m_7_5); + bit_met_p7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p7_m7, a_sq_p7_m7); + xmm1 = _mm_adds_epi16(xmm0, y0_m_7_7); + bit_met_p7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_p7, a_sq_p5_p7); + xmm1 = _mm_adds_epi16(xmm0, y0_p_5_7); + bit_met_p5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_p5, a_sq_p5_p5); + xmm1 = _mm_adds_epi16(xmm0, y0_p_5_5); + bit_met_p5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_p3, a_sq_p5_p3); + xmm1 = _mm_adds_epi16(xmm0, y0_p_5_3); + bit_met_p5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_p1, a_sq_p5_p1); + xmm1 = _mm_adds_epi16(xmm0, y0_p_5_1); + bit_met_p5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_m1, a_sq_p5_m1); + xmm1 = _mm_adds_epi16(xmm0, y0_m_5_1); + bit_met_p5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_m3, a_sq_p5_m3); + xmm1 = _mm_adds_epi16(xmm0, y0_m_5_3); + bit_met_p5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_m5, a_sq_p5_m5); + xmm1 = _mm_adds_epi16(xmm0, y0_m_5_5); + bit_met_p5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p5_m7, a_sq_p5_m7); + xmm1 = _mm_adds_epi16(xmm0, y0_m_5_7); + bit_met_p5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_p7, a_sq_p3_p7); + xmm1 = _mm_adds_epi16(xmm0, y0_p_3_7); + bit_met_p3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_p5, a_sq_p3_p5); + xmm1 = _mm_adds_epi16(xmm0, y0_p_3_5); + bit_met_p3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_p3, a_sq_p3_p3); + xmm1 = _mm_adds_epi16(xmm0, y0_p_3_3); + bit_met_p3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_p1, a_sq_p3_p1); + xmm1 = _mm_adds_epi16(xmm0, y0_p_3_1); + bit_met_p3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_m1, a_sq_p3_m1); + xmm1 = _mm_adds_epi16(xmm0, y0_m_3_1); + bit_met_p3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_m3, a_sq_p3_m3); + xmm1 = _mm_adds_epi16(xmm0, y0_m_3_3); + bit_met_p3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_m5, a_sq_p3_m5); + xmm1 = _mm_adds_epi16(xmm0, y0_m_3_5); + bit_met_p3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p3_m7, a_sq_p3_m7); + xmm1 = _mm_adds_epi16(xmm0, y0_m_3_7); + bit_met_p3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_p7, a_sq_p1_p7); + xmm1 = _mm_adds_epi16(xmm0, y0_p_1_7); + bit_met_p1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_p5, a_sq_p1_p5); + xmm1 = _mm_adds_epi16(xmm0, y0_p_1_5); + bit_met_p1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_p3, a_sq_p1_p3); + xmm1 = _mm_adds_epi16(xmm0, y0_p_1_3); + bit_met_p1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_p1, a_sq_p1_p1); + xmm1 = _mm_adds_epi16(xmm0, y0_p_1_1); + bit_met_p1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_m1, a_sq_p1_m1); + xmm1 = _mm_adds_epi16(xmm0, y0_m_1_1); + bit_met_p1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_m3, a_sq_p1_m3); + xmm1 = _mm_adds_epi16(xmm0, y0_m_1_3); + bit_met_p1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_m5, a_sq_p1_m5); + xmm1 = _mm_adds_epi16(xmm0, y0_m_1_5); + bit_met_p1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_p1_m7, a_sq_p1_m7); + xmm1 = _mm_adds_epi16(xmm0, y0_m_1_7); + bit_met_p1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + + xmm0 = _mm_subs_epi16(psi_a_m1_p7, a_sq_m1_p7); + xmm1 = _mm_subs_epi16(xmm0, y0_m_1_7); + bit_met_m1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m1_p5, a_sq_m1_p5); + xmm1 = _mm_subs_epi16(xmm0, y0_m_1_5); + bit_met_m1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m1_p3, a_sq_m1_p3); + xmm1 = _mm_subs_epi16(xmm0, y0_m_1_3); + bit_met_m1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m1_p1, a_sq_m1_p1); + xmm1 = _mm_subs_epi16(xmm0, y0_m_1_1); + bit_met_m1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m1_m1, a_sq_m1_m1); + xmm1 = _mm_subs_epi16(xmm0, y0_p_1_1); + bit_met_m1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m1_m3, a_sq_m1_m3); + xmm1 = _mm_subs_epi16(xmm0, y0_p_1_3); + bit_met_m1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m1_m5, a_sq_m1_m5); + xmm1 = _mm_subs_epi16(xmm0, y0_p_1_5); + bit_met_m1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m1_m7, a_sq_m1_m7); + xmm1 = _mm_subs_epi16(xmm0, y0_p_1_7); + bit_met_m1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_p7, a_sq_m3_p7); + xmm1 = _mm_subs_epi16(xmm0, y0_m_3_7); + bit_met_m3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_p5, a_sq_m3_p5); + xmm1 = _mm_subs_epi16(xmm0, y0_m_3_5); + bit_met_m3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_p3, a_sq_m3_p3); + xmm1 = _mm_subs_epi16(xmm0, y0_m_3_3); + bit_met_m3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_p1, a_sq_m3_p1); + xmm1 = _mm_subs_epi16(xmm0, y0_m_3_1); + bit_met_m3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_m1, a_sq_m3_m1); + xmm1 = _mm_subs_epi16(xmm0, y0_p_3_1); + bit_met_m3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_m3, a_sq_m3_m3); + xmm1 = _mm_subs_epi16(xmm0, y0_p_3_3); + bit_met_m3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_m5, a_sq_m3_m5); + xmm1 = _mm_subs_epi16(xmm0, y0_p_3_5); + bit_met_m3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m3_m7, a_sq_m3_m7); + xmm1 = _mm_subs_epi16(xmm0, y0_p_3_7); + bit_met_m3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_p7, a_sq_m5_p7); + xmm1 = _mm_subs_epi16(xmm0, y0_m_5_7); + bit_met_m5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_p5, a_sq_m5_p5); + xmm1 = _mm_subs_epi16(xmm0, y0_m_5_5); + bit_met_m5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_p3, a_sq_m5_p3); + xmm1 = _mm_subs_epi16(xmm0, y0_m_5_3); + bit_met_m5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_p1, a_sq_m5_p1); + xmm1 = _mm_subs_epi16(xmm0, y0_m_5_1); + bit_met_m5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_m1, a_sq_m5_m1); + xmm1 = _mm_subs_epi16(xmm0, y0_p_5_1); + bit_met_m5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_m3, a_sq_m5_m3); + xmm1 = _mm_subs_epi16(xmm0, y0_p_5_3); + bit_met_m5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_m5, a_sq_m5_m5); + xmm1 = _mm_subs_epi16(xmm0, y0_p_5_5); + bit_met_m5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m5_m7, a_sq_m5_m7); + xmm1 = _mm_subs_epi16(xmm0, y0_p_5_7); + bit_met_m5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_p7, a_sq_m7_p7); + xmm1 = _mm_subs_epi16(xmm0, y0_m_7_7); + bit_met_m7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_p5, a_sq_m7_p5); + xmm1 = _mm_subs_epi16(xmm0, y0_m_7_5); + bit_met_m7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_p3, a_sq_m7_p3); + xmm1 = _mm_subs_epi16(xmm0, y0_m_7_3); + bit_met_m7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_p1, a_sq_m7_p1); + xmm1 = _mm_subs_epi16(xmm0, y0_m_7_1); + bit_met_m7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_m1, a_sq_m7_m1); + xmm1 = _mm_subs_epi16(xmm0, y0_p_7_1); + bit_met_m7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_m3, a_sq_m7_m3); + xmm1 = _mm_subs_epi16(xmm0, y0_p_7_3); + bit_met_m7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_m5, a_sq_m7_m5); + xmm1 = _mm_subs_epi16(xmm0, y0_p_7_5); + bit_met_m7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm_subs_epi16(psi_a_m7_m7, a_sq_m7_m7); + xmm1 = _mm_subs_epi16(xmm0, y0_p_7_7); + bit_met_m7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + + // Detection for 1st bit (LTE mapping) + // bit = 1 + xmm0 = _mm_max_epi16(bit_met_m7_p7, bit_met_m7_p5); + xmm1 = _mm_max_epi16(bit_met_m7_p3, bit_met_m7_p1); + xmm2 = _mm_max_epi16(bit_met_m7_m1, bit_met_m7_m3); + xmm3 = _mm_max_epi16(bit_met_m7_m5, bit_met_m7_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_m5_p7, bit_met_m5_p5); + xmm1 = _mm_max_epi16(bit_met_m5_p3, bit_met_m5_p1); + xmm2 = _mm_max_epi16(bit_met_m5_m1, bit_met_m5_m3); + xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m5_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_m3_p7, bit_met_m3_p5); + xmm1 = _mm_max_epi16(bit_met_m3_p3, bit_met_m3_p1); + xmm2 = _mm_max_epi16(bit_met_m3_m1, bit_met_m3_m3); + xmm3 = _mm_max_epi16(bit_met_m3_m5, bit_met_m3_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_m1_p7, bit_met_m1_p5); + xmm1 = _mm_max_epi16(bit_met_m1_p3, bit_met_m1_p1); + xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m1_m3); + xmm3 = _mm_max_epi16(bit_met_m1_m5, bit_met_m1_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p7_p5); + xmm1 = _mm_max_epi16(bit_met_p7_p3, bit_met_p7_p1); + xmm2 = _mm_max_epi16(bit_met_p7_m1, bit_met_p7_m3); + xmm3 = _mm_max_epi16(bit_met_p7_m5, bit_met_p7_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p5_p7, bit_met_p5_p5); + xmm1 = _mm_max_epi16(bit_met_p5_p3, bit_met_p5_p1); + xmm2 = _mm_max_epi16(bit_met_p5_m1, bit_met_p5_m3); + xmm3 = _mm_max_epi16(bit_met_p5_m5, bit_met_p5_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p3_p7, bit_met_p3_p5); + xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p3_p1); + xmm2 = _mm_max_epi16(bit_met_p3_m1, bit_met_p3_m3); + xmm3 = _mm_max_epi16(bit_met_p3_m5, bit_met_p3_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p1_p7, bit_met_p1_p5); + xmm1 = _mm_max_epi16(bit_met_p1_p3, bit_met_p1_p1); + xmm2 = _mm_max_epi16(bit_met_p1_m1, bit_met_p1_m3); + xmm3 = _mm_max_epi16(bit_met_p1_m5, bit_met_p1_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y0r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 2nd bit (LTE mapping) + // bit = 1 + xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y1r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 3rd bit (LTE mapping) + xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y2r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 4th bit (LTE mapping) + xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y0i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + + // Detection for 5th bit (LTE mapping) + xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y1i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 6th bit (LTE mapping) + xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm_max_epi16(xmm0, xmm1); + xmm5 = _mm_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); + + y2i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); + + + // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs + // RE 1 + j = 24*i; + stream0_out[j + 0] = ((short *)&y0r)[0]; + stream0_out[j + 1] = ((short *)&y1r)[0]; + stream0_out[j + 2] = ((short *)&y2r)[0]; + stream0_out[j + 3] = ((short *)&y0i)[0]; + stream0_out[j + 4] = ((short *)&y1i)[0]; + stream0_out[j + 5] = ((short *)&y2i)[0]; + // RE 2 + stream0_out[j + 6] = ((short *)&y0r)[1]; + stream0_out[j + 7] = ((short *)&y1r)[1]; + stream0_out[j + 8] = ((short *)&y2r)[1]; + stream0_out[j + 9] = ((short *)&y0i)[1]; + stream0_out[j + 10] = ((short *)&y1i)[1]; + stream0_out[j + 11] = ((short *)&y2i)[1]; + // RE 3 + stream0_out[j + 12] = ((short *)&y0r)[2]; + stream0_out[j + 13] = ((short *)&y1r)[2]; + stream0_out[j + 14] = ((short *)&y2r)[2]; + stream0_out[j + 15] = ((short *)&y0i)[2]; + stream0_out[j + 16] = ((short *)&y1i)[2]; + stream0_out[j + 17] = ((short *)&y2i)[2]; + // RE 4 + stream0_out[j + 18] = ((short *)&y0r)[3]; + stream0_out[j + 19] = ((short *)&y1r)[3]; + stream0_out[j + 20] = ((short *)&y2r)[3]; + stream0_out[j + 21] = ((short *)&y0i)[3]; + stream0_out[j + 22] = ((short *)&y1i)[3]; + stream0_out[j + 23] = ((short *)&y2i)[3]; + // RE 5 + stream0_out[j + 24] = ((short *)&y0r)[4]; + stream0_out[j + 25] = ((short *)&y1r)[4]; + stream0_out[j + 26] = ((short *)&y2r)[4]; + stream0_out[j + 27] = ((short *)&y0i)[4]; + stream0_out[j + 28] = ((short *)&y1i)[4]; + stream0_out[j + 29] = ((short *)&y2i)[4]; + // RE 6 + stream0_out[j + 30] = ((short *)&y0r)[5]; + stream0_out[j + 31] = ((short *)&y1r)[5]; + stream0_out[j + 32] = ((short *)&y2r)[5]; + stream0_out[j + 33] = ((short *)&y0i)[5]; + stream0_out[j + 34] = ((short *)&y1i)[5]; + stream0_out[j + 35] = ((short *)&y2i)[5]; + // RE 7 + stream0_out[j + 36] = ((short *)&y0r)[6]; + stream0_out[j + 37] = ((short *)&y1r)[6]; + stream0_out[j + 38] = ((short *)&y2r)[6]; + stream0_out[j + 39] = ((short *)&y0i)[6]; + stream0_out[j + 40] = ((short *)&y1i)[6]; + stream0_out[j + 41] = ((short *)&y2i)[6]; + // RE 8 + stream0_out[j + 42] = ((short *)&y0r)[7]; + stream0_out[j + 43] = ((short *)&y1r)[7]; + stream0_out[j + 44] = ((short *)&y2r)[7]; + stream0_out[j + 45] = ((short *)&y0i)[7]; + stream0_out[j + 46] = ((short *)&y1i)[7]; + stream0_out[j + 47] = ((short *)&y2i)[7]; + +#elif defined(__arm__) + +#endif + + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif +} + + +int dlsch_64qam_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms, + int32_t **rxdataF_comp, + int32_t **rxdataF_comp_i, + int32_t **dl_ch_mag, + int32_t **dl_ch_mag_i, + int32_t **rho_i, + int16_t *dlsch_llr, + uint8_t symbol, + uint8_t first_symbol_flag, + uint16_t nb_rb, + uint16_t pbch_pss_sss_adjust, + //int16_t **llr16p, + uint32_t llr_offset) +{ + + int16_t *rxF = (int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rxF_i = (int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *ch_mag = (int16_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *ch_mag_i = (int16_t*)&dl_ch_mag_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *rho = (int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)]; + int16_t *llr16; + int8_t *pllr_symbol; // pointer where llrs should filled for this ofdm symbol + int len; + uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol; + + //first symbol has different structure due to more pilots + /*if (first_symbol_flag == 1) { + llr16 = (int16_t*)dlsch_llr; + } else { + llr16 = (int16_t*)(*llr16p); + }*/ + + llr16 = (int16_t*)dlsch_llr; + + AssertFatal(llr16!=NULL,"dlsch_16qam_64qam_llr:llr is null, symbol %d\n",symbol); + + + if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { + // if symbol has pilots + if (frame_parms->nb_antenna_ports_eNB!=1) + // in 2 antenna ports we have 8 REs per symbol per RB + len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); + else + // for 1 antenna port we have 10 REs per symbol per RB + len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6); + } else { + // symbol has no pilots + len = (nb_rb*12) - pbch_pss_sss_adjust; + } + + pllr_symbol = (int8_t*)dlsch_llr; + pllr_symbol += llr_offset; + //printf("dlsch_64qam_64qam_llr: symbol %d,nb_rb %d, len %d,pbch_pss_sss_adjust %d\n",symbol,nb_rb,len,pbch_pss_sss_adjust); + /*LOG_I(PHY,"dlsch_64qam_64qam_llr [symb %d / FirstSym %d / Length %d / LLR Offset %d]: @LLR Buff %x, @LLR Buff(symb) %x, , @Compute LLR Buff(symb) %x \n", + symbol, + first_symbol_flag, + len, + llr_offset, + (int16_t*)dlsch_llr, + llr16, + pllr_symbol);*/ + +#ifdef __AVX2__ + + // Round length up to multiple of 16 words + uint32_t len256i = ((len+16)>>4)*16; + int32_t *rxF_256i = (int32_t*) malloc16_clear(len256i*4); + int32_t *rxF_i_256i = (int32_t*) malloc16_clear(len256i*4); + int32_t *ch_mag_256i = (int32_t*) malloc16_clear(len256i*4); + int32_t *ch_mag_i_256i = (int32_t*) malloc16_clear(len256i*4); + int32_t *rho_256i = (int32_t*) malloc16_clear(len256i*4); + + memcpy(rxF_256i, rxF, len*4); + memcpy(rxF_i_256i, rxF_i, len*4); + memcpy(ch_mag_256i, ch_mag, len*4); + memcpy(ch_mag_i_256i, ch_mag_i, len*4); + memcpy(rho_256i, rho, len*4); + +#if 0 + qam64_qam16_avx2((short *)rxF_256i, + (short *)rxF_i_256i, + (short *)ch_mag_256i, + (short *)ch_mag_i_256i, + (short *)llr16, + (short *) rho_256i, + len); +#else + qam64_qam64_avx2((int32_t *)rxF_256i, + (int32_t *)rxF_i_256i, + (int32_t *)ch_mag_256i, + (int32_t *)ch_mag_i_256i, + (int16_t *)llr16, + (int32_t *) rho_256i, + len); +#endif + + free16(rxF_256i, sizeof(rxF_256i)); + free16(rxF_i_256i, sizeof(rxF_i_256i)); + free16(ch_mag_256i, sizeof(ch_mag_256i)); + free16(ch_mag_i_256i, sizeof(ch_mag_i_256i)); + free16(rho_256i, sizeof(rho_256i)); + +#else + qam64_qam64((short *)rxF, + (short *)rxF_i, + (short *)ch_mag, + (short *)ch_mag_i, + (short *)llr16, + (short *)rho, + len); +#endif + + llr16 += (6*len); + //*llr16p = (short *)llr16; + + return(0); +} diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c new file mode 100644 index 0000000000000000000000000000000000000000..588adfbc55c65f736444797013a08ab37ade4a65 --- /dev/null +++ b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c @@ -0,0 +1,4034 @@ + /* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +/*! \file PHY/LTE_TRANSPORT/dlsch_llr_computation.c + * \brief Top-level routines for LLR computation of the PDSCH physical channel from 36-211, V8.6 2009-03 + * \author R. Knopp, F. Kaltenberger,A. Bhamri, S. Aubert, S. Wagner, X Jiang + * \date 2011 + * \version 0.1 + * \company Eurecom + * \email: knopp@eurecom.fr,florian.kaltenberger@eurecom.fr,ankit.bhamri@eurecom.fr,sebastien.aubert@eurecom.fr, sebastian.wagner@eurecom.fr + * \note + * \warning + */ + +#include "PHY/defs.h" +#include "PHY/TOOLS/defs.h" +#include "PHY/extern.h" +#include "defs.h" +#include "extern.h" +#include "PHY/sse_intrin.h" + +int16_t ones256[16] __attribute__ ((aligned(32))) = {0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff}; + +static __m256i rho_rpi __attribute__ ((aligned(32))); +static __m256i rho_rmi __attribute__ ((aligned(32))); +static __m256i rho_rpi_1_1 __attribute__ ((aligned(32))); +static __m256i rho_rpi_1_3 __attribute__ ((aligned(32))); +static __m256i rho_rpi_1_5 __attribute__ ((aligned(32))); +static __m256i rho_rpi_1_7 __attribute__ ((aligned(32))); +static __m256i rho_rpi_3_1 __attribute__ ((aligned(32))); +static __m256i rho_rpi_3_3 __attribute__ ((aligned(32))); +static __m256i rho_rpi_3_5 __attribute__ ((aligned(32))); +static __m256i rho_rpi_3_7 __attribute__ ((aligned(32))); +static __m256i rho_rpi_5_1 __attribute__ ((aligned(32))); +static __m256i rho_rpi_5_3 __attribute__ ((aligned(32))); +static __m256i rho_rpi_5_5 __attribute__ ((aligned(32))); +static __m256i rho_rpi_5_7 __attribute__ ((aligned(32))); +static __m256i rho_rpi_7_1 __attribute__ ((aligned(32))); +static __m256i rho_rpi_7_3 __attribute__ ((aligned(32))); +static __m256i rho_rpi_7_5 __attribute__ ((aligned(32))); +static __m256i rho_rpi_7_7 __attribute__ ((aligned(32))); +static __m256i rho_rmi_1_1 __attribute__ ((aligned(32))); +static __m256i rho_rmi_1_3 __attribute__ ((aligned(32))); +static __m256i rho_rmi_1_5 __attribute__ ((aligned(32))); +static __m256i rho_rmi_1_7 __attribute__ ((aligned(32))); +static __m256i rho_rmi_3_1 __attribute__ ((aligned(32))); +static __m256i rho_rmi_3_3 __attribute__ ((aligned(32))); +static __m256i rho_rmi_3_5 __attribute__ ((aligned(32))); +static __m256i rho_rmi_3_7 __attribute__ ((aligned(32))); +static __m256i rho_rmi_5_1 __attribute__ ((aligned(32))); +static __m256i rho_rmi_5_3 __attribute__ ((aligned(32))); +static __m256i rho_rmi_5_5 __attribute__ ((aligned(32))); +static __m256i rho_rmi_5_7 __attribute__ ((aligned(32))); +static __m256i rho_rmi_7_1 __attribute__ ((aligned(32))); +static __m256i rho_rmi_7_3 __attribute__ ((aligned(32))); +static __m256i rho_rmi_7_5 __attribute__ ((aligned(32))); +static __m256i rho_rmi_7_7 __attribute__ ((aligned(32))); + +static __m256i psi_r_m7_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_p7 __attribute__ ((aligned(32))); + +static __m256i psi_i_m7_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_p7 __attribute__ ((aligned(32))); + +static __m256i a_r_m7_m7 __attribute__ ((aligned(32))); +static __m256i a_r_m7_m5 __attribute__ ((aligned(32))); +static __m256i a_r_m7_m3 __attribute__ ((aligned(32))); +static __m256i a_r_m7_m1 __attribute__ ((aligned(32))); +static __m256i a_r_m7_p1 __attribute__ ((aligned(32))); +static __m256i a_r_m7_p3 __attribute__ ((aligned(32))); +static __m256i a_r_m7_p5 __attribute__ ((aligned(32))); +static __m256i a_r_m7_p7 __attribute__ ((aligned(32))); +static __m256i a_r_m5_m7 __attribute__ ((aligned(32))); +static __m256i a_r_m5_m5 __attribute__ ((aligned(32))); +static __m256i a_r_m5_m3 __attribute__ ((aligned(32))); +static __m256i a_r_m5_m1 __attribute__ ((aligned(32))); +static __m256i a_r_m5_p1 __attribute__ ((aligned(32))); +static __m256i a_r_m5_p3 __attribute__ ((aligned(32))); +static __m256i a_r_m5_p5 __attribute__ ((aligned(32))); +static __m256i a_r_m5_p7 __attribute__ ((aligned(32))); +static __m256i a_r_m3_m7 __attribute__ ((aligned(32))); +static __m256i a_r_m3_m5 __attribute__ ((aligned(32))); +static __m256i a_r_m3_m3 __attribute__ ((aligned(32))); +static __m256i a_r_m3_m1 __attribute__ ((aligned(32))); +static __m256i a_r_m3_p1 __attribute__ ((aligned(32))); +static __m256i a_r_m3_p3 __attribute__ ((aligned(32))); +static __m256i a_r_m3_p5 __attribute__ ((aligned(32))); +static __m256i a_r_m3_p7 __attribute__ ((aligned(32))); +static __m256i a_r_m1_m7 __attribute__ ((aligned(32))); +static __m256i a_r_m1_m5 __attribute__ ((aligned(32))); +static __m256i a_r_m1_m3 __attribute__ ((aligned(32))); +static __m256i a_r_m1_m1 __attribute__ ((aligned(32))); +static __m256i a_r_m1_p1 __attribute__ ((aligned(32))); +static __m256i a_r_m1_p3 __attribute__ ((aligned(32))); +static __m256i a_r_m1_p5 __attribute__ ((aligned(32))); +static __m256i a_r_m1_p7 __attribute__ ((aligned(32))); +static __m256i a_r_p1_m7 __attribute__ ((aligned(32))); +static __m256i a_r_p1_m5 __attribute__ ((aligned(32))); +static __m256i a_r_p1_m3 __attribute__ ((aligned(32))); +static __m256i a_r_p1_m1 __attribute__ ((aligned(32))); +static __m256i a_r_p1_p1 __attribute__ ((aligned(32))); +static __m256i a_r_p1_p3 __attribute__ ((aligned(32))); +static __m256i a_r_p1_p5 __attribute__ ((aligned(32))); +static __m256i a_r_p1_p7 __attribute__ ((aligned(32))); +static __m256i a_r_p3_m7 __attribute__ ((aligned(32))); +static __m256i a_r_p3_m5 __attribute__ ((aligned(32))); +static __m256i a_r_p3_m3 __attribute__ ((aligned(32))); +static __m256i a_r_p3_m1 __attribute__ ((aligned(32))); +static __m256i a_r_p3_p1 __attribute__ ((aligned(32))); +static __m256i a_r_p3_p3 __attribute__ ((aligned(32))); +static __m256i a_r_p3_p5 __attribute__ ((aligned(32))); +static __m256i a_r_p3_p7 __attribute__ ((aligned(32))); +static __m256i a_r_p5_m7 __attribute__ ((aligned(32))); +static __m256i a_r_p5_m5 __attribute__ ((aligned(32))); +static __m256i a_r_p5_m3 __attribute__ ((aligned(32))); +static __m256i a_r_p5_m1 __attribute__ ((aligned(32))); +static __m256i a_r_p5_p1 __attribute__ ((aligned(32))); +static __m256i a_r_p5_p3 __attribute__ ((aligned(32))); +static __m256i a_r_p5_p5 __attribute__ ((aligned(32))); +static __m256i a_r_p5_p7 __attribute__ ((aligned(32))); +static __m256i a_r_p7_m7 __attribute__ ((aligned(32))); +static __m256i a_r_p7_m5 __attribute__ ((aligned(32))); +static __m256i a_r_p7_m3 __attribute__ ((aligned(32))); +static __m256i a_r_p7_m1 __attribute__ ((aligned(32))); +static __m256i a_r_p7_p1 __attribute__ ((aligned(32))); +static __m256i a_r_p7_p3 __attribute__ ((aligned(32))); +static __m256i a_r_p7_p5 __attribute__ ((aligned(32))); +static __m256i a_r_p7_p7 __attribute__ ((aligned(32))); + +static __m256i a_i_m7_m7 __attribute__ ((aligned(32))); +static __m256i a_i_m7_m5 __attribute__ ((aligned(32))); +static __m256i a_i_m7_m3 __attribute__ ((aligned(32))); +static __m256i a_i_m7_m1 __attribute__ ((aligned(32))); +static __m256i a_i_m7_p1 __attribute__ ((aligned(32))); +static __m256i a_i_m7_p3 __attribute__ ((aligned(32))); +static __m256i a_i_m7_p5 __attribute__ ((aligned(32))); +static __m256i a_i_m7_p7 __attribute__ ((aligned(32))); +static __m256i a_i_m5_m7 __attribute__ ((aligned(32))); +static __m256i a_i_m5_m5 __attribute__ ((aligned(32))); +static __m256i a_i_m5_m3 __attribute__ ((aligned(32))); +static __m256i a_i_m5_m1 __attribute__ ((aligned(32))); +static __m256i a_i_m5_p1 __attribute__ ((aligned(32))); +static __m256i a_i_m5_p3 __attribute__ ((aligned(32))); +static __m256i a_i_m5_p5 __attribute__ ((aligned(32))); +static __m256i a_i_m5_p7 __attribute__ ((aligned(32))); +static __m256i a_i_m3_m7 __attribute__ ((aligned(32))); +static __m256i a_i_m3_m5 __attribute__ ((aligned(32))); +static __m256i a_i_m3_m3 __attribute__ ((aligned(32))); +static __m256i a_i_m3_m1 __attribute__ ((aligned(32))); +static __m256i a_i_m3_p1 __attribute__ ((aligned(32))); +static __m256i a_i_m3_p3 __attribute__ ((aligned(32))); +static __m256i a_i_m3_p5 __attribute__ ((aligned(32))); +static __m256i a_i_m3_p7 __attribute__ ((aligned(32))); +static __m256i a_i_m1_m7 __attribute__ ((aligned(32))); +static __m256i a_i_m1_m5 __attribute__ ((aligned(32))); +static __m256i a_i_m1_m3 __attribute__ ((aligned(32))); +static __m256i a_i_m1_m1 __attribute__ ((aligned(32))); +static __m256i a_i_m1_p1 __attribute__ ((aligned(32))); +static __m256i a_i_m1_p3 __attribute__ ((aligned(32))); +static __m256i a_i_m1_p5 __attribute__ ((aligned(32))); +static __m256i a_i_m1_p7 __attribute__ ((aligned(32))); +static __m256i a_i_p1_m7 __attribute__ ((aligned(32))); +static __m256i a_i_p1_m5 __attribute__ ((aligned(32))); +static __m256i a_i_p1_m3 __attribute__ ((aligned(32))); +static __m256i a_i_p1_m1 __attribute__ ((aligned(32))); +static __m256i a_i_p1_p1 __attribute__ ((aligned(32))); +static __m256i a_i_p1_p3 __attribute__ ((aligned(32))); +static __m256i a_i_p1_p5 __attribute__ ((aligned(32))); +static __m256i a_i_p1_p7 __attribute__ ((aligned(32))); +static __m256i a_i_p3_m7 __attribute__ ((aligned(32))); +static __m256i a_i_p3_m5 __attribute__ ((aligned(32))); +static __m256i a_i_p3_m3 __attribute__ ((aligned(32))); +static __m256i a_i_p3_m1 __attribute__ ((aligned(32))); +static __m256i a_i_p3_p1 __attribute__ ((aligned(32))); +static __m256i a_i_p3_p3 __attribute__ ((aligned(32))); +static __m256i a_i_p3_p5 __attribute__ ((aligned(32))); +static __m256i a_i_p3_p7 __attribute__ ((aligned(32))); +static __m256i a_i_p5_m7 __attribute__ ((aligned(32))); +static __m256i a_i_p5_m5 __attribute__ ((aligned(32))); +static __m256i a_i_p5_m3 __attribute__ ((aligned(32))); +static __m256i a_i_p5_m1 __attribute__ ((aligned(32))); +static __m256i a_i_p5_p1 __attribute__ ((aligned(32))); +static __m256i a_i_p5_p3 __attribute__ ((aligned(32))); +static __m256i a_i_p5_p5 __attribute__ ((aligned(32))); +static __m256i a_i_p5_p7 __attribute__ ((aligned(32))); +static __m256i a_i_p7_m7 __attribute__ ((aligned(32))); +static __m256i a_i_p7_m5 __attribute__ ((aligned(32))); +static __m256i a_i_p7_m3 __attribute__ ((aligned(32))); +static __m256i a_i_p7_m1 __attribute__ ((aligned(32))); +static __m256i a_i_p7_p1 __attribute__ ((aligned(32))); +static __m256i a_i_p7_p3 __attribute__ ((aligned(32))); +static __m256i a_i_p7_p5 __attribute__ ((aligned(32))); +static __m256i a_i_p7_p7 __attribute__ ((aligned(32))); + +static __m256i psi_a_m7_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_p7 __attribute__ ((aligned(32))); + +static __m256i a_sq_m7_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_p7 __attribute__ ((aligned(32))); + +static __m256i bit_met_m7_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_p7 __attribute__ ((aligned(32))); + +static __m256i y0_p_1_1 __attribute__ ((aligned(32))); +static __m256i y0_p_1_3 __attribute__ ((aligned(32))); +static __m256i y0_p_1_5 __attribute__ ((aligned(32))); +static __m256i y0_p_1_7 __attribute__ ((aligned(32))); +static __m256i y0_p_3_1 __attribute__ ((aligned(32))); +static __m256i y0_p_3_3 __attribute__ ((aligned(32))); +static __m256i y0_p_3_5 __attribute__ ((aligned(32))); +static __m256i y0_p_3_7 __attribute__ ((aligned(32))); +static __m256i y0_p_5_1 __attribute__ ((aligned(32))); +static __m256i y0_p_5_3 __attribute__ ((aligned(32))); +static __m256i y0_p_5_5 __attribute__ ((aligned(32))); +static __m256i y0_p_5_7 __attribute__ ((aligned(32))); +static __m256i y0_p_7_1 __attribute__ ((aligned(32))); +static __m256i y0_p_7_3 __attribute__ ((aligned(32))); +static __m256i y0_p_7_5 __attribute__ ((aligned(32))); +static __m256i y0_p_7_7 __attribute__ ((aligned(32))); +static __m256i y0_m_1_1 __attribute__ ((aligned(32))); +static __m256i y0_m_1_3 __attribute__ ((aligned(32))); +static __m256i y0_m_1_5 __attribute__ ((aligned(32))); +static __m256i y0_m_1_7 __attribute__ ((aligned(32))); +static __m256i y0_m_3_1 __attribute__ ((aligned(32))); +static __m256i y0_m_3_3 __attribute__ ((aligned(32))); +static __m256i y0_m_3_5 __attribute__ ((aligned(32))); +static __m256i y0_m_3_7 __attribute__ ((aligned(32))); +static __m256i y0_m_5_1 __attribute__ ((aligned(32))); +static __m256i y0_m_5_3 __attribute__ ((aligned(32))); +static __m256i y0_m_5_5 __attribute__ ((aligned(32))); +static __m256i y0_m_5_7 __attribute__ ((aligned(32))); +static __m256i y0_m_7_1 __attribute__ ((aligned(32))); +static __m256i y0_m_7_3 __attribute__ ((aligned(32))); +static __m256i y0_m_7_5 __attribute__ ((aligned(32))); +static __m256i y0_m_7_7 __attribute__ ((aligned(32))); + +static __m256i xmm0 __attribute__ ((aligned(32))); +static __m256i xmm1 __attribute__ ((aligned(32))); +static __m256i xmm2 __attribute__ ((aligned(32))); +static __m256i xmm3 __attribute__ ((aligned(32))); +static __m256i xmm4 __attribute__ ((aligned(32))); +static __m256i xmm5 __attribute__ ((aligned(32))); +static __m256i xmm6 __attribute__ ((aligned(32))); +static __m256i xmm7 __attribute__ ((aligned(32))); +static __m256i xmm8 __attribute__ ((aligned(32))); + +static __m256i y0r __attribute__ ((aligned(32))); +static __m256i y0i __attribute__ ((aligned(32))); +static __m256i y1r __attribute__ ((aligned(32))); +static __m256i y1i __attribute__ ((aligned(32))); +static __m256i y2r __attribute__ ((aligned(32))); +static __m256i y2i __attribute__ ((aligned(32))); + +static __m256i logmax_num_re0 __attribute__ ((aligned(32))); +static __m256i logmax_den_re0 __attribute__ ((aligned(32))); + +static __m256i tmp_result __attribute__ ((aligned(32))); +static __m256i tmp_result2 __attribute__ ((aligned(32))); +static __m256i tmp_result3 __attribute__ ((aligned(32))); +static __m256i tmp_result4 __attribute__ ((aligned(32))); + +//============================================================================================== +// Auxiliary Makros + +// calculate interference magnitude +#define interference_abs_epi16(psi,int_ch_mag,int_mag,c1,c2) tmp_result = _mm256_cmpgt_epi16(int_ch_mag,psi); tmp_result2 = _mm256_xor_si256(tmp_result,(*(__m256i*)&ones256[0])); tmp_result = _mm256_and_si256(tmp_result,c1); tmp_result2 = _mm256_and_si256(tmp_result2,c2); int_mag = _mm256_or_si256(tmp_result,tmp_result2); + +// calculate interference magnitude +// tmp_result = ones in shorts corr. to interval 2<=x<=4, tmp_result2 interval < 2, tmp_result3 interval 4<x<6 and tmp_result4 interval x>6 +#define interference_abs_64qam_epi16(psi,int_ch_mag,int_two_ch_mag,int_three_ch_mag,a,c1,c3,c5,c7) tmp_result = _mm256_cmpgt_epi16(int_two_ch_mag,psi); tmp_result3 = _mm256_xor_si256(tmp_result,(*(__m256i*)&ones256[0])); tmp_result2 = _mm256_cmpgt_epi16(int_ch_mag,psi); tmp_result = _mm256_xor_si256(tmp_result,tmp_result2); tmp_result4 = _mm256_cmpgt_epi16(psi,int_three_ch_mag); tmp_result3 = _mm256_xor_si256(tmp_result3,tmp_result4); tmp_result = _mm256_and_si256(tmp_result,c3); tmp_result2 = _mm256_and_si256(tmp_result2,c1); tmp_result3 = _mm256_and_si256(tmp_result3,c5); tmp_result4 = _mm256_and_si256(tmp_result4,c7); tmp_result = _mm256_or_si256(tmp_result,tmp_result2); tmp_result3 = _mm256_or_si256(tmp_result3,tmp_result4); a = _mm256_or_si256(tmp_result,tmp_result3); + +// calculates psi_a = psi_r*a_r + psi_i*a_i +#define prodsum_psi_a_epi16(psi_r,a_r,psi_i,a_i,psi_a) tmp_result = _mm256_mulhi_epi16(psi_r,a_r); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result2 = _mm256_mulhi_epi16(psi_i,a_i); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); psi_a = _mm256_adds_epi16(tmp_result,tmp_result2); + +// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor +#define square_a_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq) tmp_result = _mm256_mulhi_epi16(a_r,a_r); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result = _mm256_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result = _mm256_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result2 = _mm256_mulhi_epi16(a_i,a_i); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); a_sq = _mm256_adds_epi16(tmp_result,tmp_result2); + +// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor for 64-QAM +#define square_a_64qam_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq) tmp_result = _mm256_mulhi_epi16(a_r,a_r); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result = _mm256_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm256_slli_epi16(tmp_result,3); tmp_result = _mm256_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result2 = _mm256_mulhi_epi16(a_i,a_i); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm256_slli_epi16(tmp_result2,3); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); a_sq = _mm256_adds_epi16(tmp_result,tmp_result2); + +void seperate_real_imag_parts(__m256i *out_re, + __m256i *out_im, + __m256i in0, + __m256i in1) +{ + __m256i tmp0; + __m256i tmp1; + + in0 = _mm256_shufflelo_epi16(in0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + in0 = _mm256_shufflehi_epi16(in0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + in0 = _mm256_shuffle_epi32(in0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + in1 = _mm256_shufflelo_epi16(in1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + in1 = _mm256_shufflehi_epi16(in1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + in1 = _mm256_shuffle_epi32(in1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + //in0 = [Re(0,1,2,3) Im(0,1,2,3) Re(4,5,6,7) Im(4,5,6,7)] + //in0 = [Re(8,9,10,11) Im(8,9,10,11) Re(12,13,14,15) Im(12,13,14,15)] + + tmp0 = _mm256_unpacklo_epi64(in0, in1); + //axmm2 = [Re(0,1,2,3) Re(8,9,10,11) Re(4,5,6,7) Re(12,13,14,15)] + tmp0 = _mm256_permute4x64_epi64(tmp0,0xd8); // Re(rho) + + tmp1 = _mm256_unpackhi_epi64(in0, in1); + //axmm3 = [Im(0,1,2,3) Im(8,9,10,11) Im(4,5,6,7) Im(12,13,14,15)] + tmp1 = _mm256_permute4x64_epi64(tmp1,0xd8); // Im(rho) + + *out_re = tmp0; + *out_im = tmp1; +} + +void qam64_qam16_avx2(short *stream0_in, + short *stream1_in, + short *ch_mag, + short *ch_mag_i, + short *stream0_out, + short *rho01, + int length + ) +{ + + /* + Author: S. Wagner + Date: 31-07-12 + + Input: + stream0_in: MF filter for 1st stream, i.e., y0=h0'*y + stream1_in: MF filter for 2nd stream, i.e., y1=h1'*y + ch_mag: 4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + ch_mag_i: 4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + rho01: Channel cross correlation, i.e., h1'*h0 + + Output: + stream0_out: output LLRs for 1st stream + */ + +#if defined(__x86_64__) || defined(__i386__) + + __m256i *rho01_256i = (__m256i *)rho01; + __m256i *stream0_256i_in = (__m256i *)stream0_in; + __m256i *stream1_256i_in = (__m256i *)stream1_in; + __m256i *ch_mag_256i = (__m256i *)ch_mag; + __m256i *ch_mag_256i_i = (__m256i *)ch_mag_i; + + __m256i ONE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(10112)); // round(1/sqrt(42)*2^16) + __m256i THREE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30337)); // round(3/sqrt(42)*2^16) + __m256i FIVE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(25281)); // round(5/sqrt(42)*2^15) + __m256i SEVEN_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(17697)); // round(5/sqrt(42)*2^15) + __m256i FORTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30969)); // round(49/(4*sqrt(42))*2^14), Q2.14 + __m256i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(23385)); // round(37/(4*sqrt(42))*2^14), Q2.14 + __m256i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(31601)); // round(25/(4*sqrt(42))*2^15) + __m256i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(18329)); // round(29/(4*sqrt(42))*2^15), Q2.14 + __m256i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(21489)); // round(17/(4*sqrt(42))*2^15) + __m256i NINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(11376)); // round(9/(4*sqrt(42))*2^15) + __m256i THIRTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(16433)); // round(13/(4*sqrt(42))*2^15) + __m256i FIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(6320)); // round(5/(4*sqrt(42))*2^15) + __m256i ONE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(1264)); // round(1/(4*sqrt(42))*2^15) + __m256i ONE_OVER_SQRT_10_Q15 = _mm256_broadcastw_epi16(_mm_set1_epi16(10362)); // round(1/sqrt(10)*2^15) + __m256i THREE_OVER_SQRT_10 = _mm256_broadcastw_epi16(_mm_set1_epi16(31086)); // round(3/sqrt(10)*2^15) + __m256i SQRT_10_OVER_FOUR = _mm256_broadcastw_epi16(_mm_set1_epi16(25905)); // round(sqrt(10)/4*2^15) + + + __m256i ch_mag_int; + __m256i ch_mag_des; + __m256i ch_mag_98_over_42_with_sigma2; + __m256i ch_mag_74_over_42_with_sigma2; + __m256i ch_mag_58_over_42_with_sigma2; + __m256i ch_mag_50_over_42_with_sigma2; + __m256i ch_mag_34_over_42_with_sigma2; + __m256i ch_mag_18_over_42_with_sigma2; + __m256i ch_mag_26_over_42_with_sigma2; + __m256i ch_mag_10_over_42_with_sigma2; + __m256i ch_mag_2_over_42_with_sigma2; + __m256i y0r_one_over_sqrt_21; + __m256i y0r_three_over_sqrt_21; + __m256i y0r_five_over_sqrt_21; + __m256i y0r_seven_over_sqrt_21; + __m256i y0i_one_over_sqrt_21; + __m256i y0i_three_over_sqrt_21; + __m256i y0i_five_over_sqrt_21; + __m256i y0i_seven_over_sqrt_21; + +#elif defined(__arm__) + +#endif + int i,j; + uint32_t len256 = (length)>>3; + + for (i=0; i<len256; i+=2) { + +#if defined(__x86_64__) || defined(__i386__) + // Get rho + /* + xmm0 = rho01_128i[i]; + xmm1 = rho01_128i[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) + xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) + */ + seperate_real_imag_parts(&xmm2, &xmm3, rho01_256i[i], rho01_256i[i+1]); + + rho_rpi = _mm256_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) + rho_rmi = _mm256_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + + // Compute the different rhos + rho_rpi_1_1 = _mm256_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42); + rho_rmi_1_1 = _mm256_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42); + rho_rpi_3_3 = _mm256_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42); + rho_rmi_3_3 = _mm256_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42); + rho_rpi_5_5 = _mm256_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42); + rho_rmi_5_5 = _mm256_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42); + rho_rpi_7_7 = _mm256_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42); + rho_rmi_7_7 = _mm256_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42); + + rho_rpi_5_5 = _mm256_slli_epi16(rho_rpi_5_5, 1); + rho_rmi_5_5 = _mm256_slli_epi16(rho_rmi_5_5, 1); + rho_rpi_7_7 = _mm256_slli_epi16(rho_rpi_7_7, 2); + rho_rmi_7_7 = _mm256_slli_epi16(rho_rmi_7_7, 2); + + xmm4 = _mm256_mulhi_epi16(xmm2, ONE_OVER_SQRT_42); + xmm5 = _mm256_mulhi_epi16(xmm3, ONE_OVER_SQRT_42); + xmm6 = _mm256_mulhi_epi16(xmm3, THREE_OVER_SQRT_42); + xmm7 = _mm256_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42); + xmm8 = _mm256_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42); + xmm7 = _mm256_slli_epi16(xmm7, 1); + xmm8 = _mm256_slli_epi16(xmm8, 2); + + rho_rpi_1_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_1_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_1_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_1_5 = _mm256_subs_epi16(xmm4, xmm7); + rho_rpi_1_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_1_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, THREE_OVER_SQRT_42); + rho_rpi_3_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_3_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_3_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_3_5 = _mm256_subs_epi16(xmm4, xmm7); + rho_rpi_3_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_3_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42); + xmm4 = _mm256_slli_epi16(xmm4, 1); + rho_rpi_5_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_5_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_5_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_5_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_5_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_5_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42); + xmm4 = _mm256_slli_epi16(xmm4, 2); + rho_rpi_7_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_7_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_7_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_7_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_7_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_7_5 = _mm256_subs_epi16(xmm4, xmm7); + + // Rearrange interfering MF output + /* + xmm0 = stream1_128i_in[i]; + xmm1 = stream1_128i_in[i+1]; + xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y1r = _mm256_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] + y1i = _mm256_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] + */ + + seperate_real_imag_parts(&y1r, &y1i, stream1_256i_in[i], stream1_256i_in[i+1]); + + // Psi_r calculation from rho_rpi or rho_rmi + xmm0 = _mm256_broadcastw_epi16(_mm_set1_epi16(0));// ZERO for abs_pi16 + xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1r); + psi_r_p7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1r); + psi_r_p7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1r); + psi_r_p7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1r); + psi_r_p7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1r); + psi_r_p7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1r); + psi_r_p7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1r); + psi_r_p7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1r); + psi_r_p7_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1r); + psi_r_p5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1r); + psi_r_p5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1r); + psi_r_p5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1r); + psi_r_p5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1r); + psi_r_p5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1r); + psi_r_p5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1r); + psi_r_p5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1r); + psi_r_p5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1r); + psi_r_p3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1r); + psi_r_p3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1r); + psi_r_p3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1r); + psi_r_p3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1r); + psi_r_p3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1r); + psi_r_p3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1r); + psi_r_p3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1r); + psi_r_p3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1r); + psi_r_p1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1r); + psi_r_p1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1r); + psi_r_p1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1r); + psi_r_p1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1r); + psi_r_p1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1r); + psi_r_p1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1r); + psi_r_p1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1r); + psi_r_p1_m7 = _mm256_abs_epi16(xmm2); + + xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1r); + psi_r_m1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1r); + psi_r_m1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1r); + psi_r_m1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1r); + psi_r_m1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1r); + psi_r_m1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1r); + psi_r_m1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1r); + psi_r_m1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1r); + psi_r_m1_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1r); + psi_r_m3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1r); + psi_r_m3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1r); + psi_r_m3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1r); + psi_r_m3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1r); + psi_r_m3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1r); + psi_r_m3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1r); + psi_r_m3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1r); + psi_r_m3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1r); + psi_r_m5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1r); + psi_r_m5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1r); + psi_r_m5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1r); + psi_r_m5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1r); + psi_r_m5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1r); + psi_r_m5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1r); + psi_r_m5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1r); + psi_r_m5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1r); + psi_r_m7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1r); + psi_r_m7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1r); + psi_r_m7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1r); + psi_r_m7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1r); + psi_r_m7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1r); + psi_r_m7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1r); + psi_r_m7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1r); + psi_r_m7_m7 = _mm256_abs_epi16(xmm2); + + // Psi_i calculation from rho_rpi or rho_rmi + xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1i); + psi_i_p7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1i); + psi_i_p7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1i); + psi_i_p7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1i); + psi_i_p7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1i); + psi_i_p7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1i); + psi_i_p7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1i); + psi_i_p7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1i); + psi_i_p7_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1i); + psi_i_p5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1i); + psi_i_p5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1i); + psi_i_p5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1i); + psi_i_p5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1i); + psi_i_p5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1i); + psi_i_p5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1i); + psi_i_p5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1i); + psi_i_p5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1i); + psi_i_p3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1i); + psi_i_p3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1i); + psi_i_p3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1i); + psi_i_p3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1i); + psi_i_p3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1i); + psi_i_p3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1i); + psi_i_p3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1i); + psi_i_p3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1i); + psi_i_p1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1i); + psi_i_p1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1i); + psi_i_p1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1i); + psi_i_p1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1i); + psi_i_p1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1i); + psi_i_p1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1i); + psi_i_p1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1i); + psi_i_p1_m7 = _mm256_abs_epi16(xmm2); + + xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1i); + psi_i_m1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1i); + psi_i_m1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1i); + psi_i_m1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1i); + psi_i_m1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1i); + psi_i_m1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1i); + psi_i_m1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1i); + psi_i_m1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1i); + psi_i_m1_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1i); + psi_i_m3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1i); + psi_i_m3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1i); + psi_i_m3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1i); + psi_i_m3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1i); + psi_i_m3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1i); + psi_i_m3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1i); + psi_i_m3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1i); + psi_i_m3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1i); + psi_i_m5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1i); + psi_i_m5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1i); + psi_i_m5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1i); + psi_i_m5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1i); + psi_i_m5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1i); + psi_i_m5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1i); + psi_i_m5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1i); + psi_i_m5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1i); + psi_i_m7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1i); + psi_i_m7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1i); + psi_i_m7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1i); + psi_i_m7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1i); + psi_i_m7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1i); + psi_i_m7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1i); + psi_i_m7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1i); + psi_i_m7_m7 = _mm256_abs_epi16(xmm2); + +/* + // Rearrange desired MF output + xmm0 = stream0_128i_in[i]; + xmm1 = stream0_128i_in[i+1]; + xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y0r = _mm256_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + y0i = _mm256_unpackhi_epi64(xmm0,xmm1); +*/ + seperate_real_imag_parts(&y0r, &y0i, stream0_256i_in[i], stream0_256i_in[i+1]); + + /* + // Rearrange desired channel magnitudes + xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10)) + xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10)) + xmm2 = _mm256_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm256_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm256_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_des = _mm256_unpacklo_epi64(xmm2,xmm3); + */ + + seperate_real_imag_parts(&ch_mag_des, &xmm2, ch_mag_256i[i], ch_mag_256i[i+1]); + + // Rearrange interfering channel magnitudes + /* + xmm2 = ch_mag_128i_i[i]; + xmm3 = ch_mag_128i_i[i+1]; + xmm2 = _mm256_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm256_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm256_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_int = _mm256_unpacklo_epi64(xmm2,xmm3); + */ + + seperate_real_imag_parts(&ch_mag_int, &xmm2, ch_mag_256i_i[i], ch_mag_256i_i[i+1]); + + y0r_one_over_sqrt_21 = _mm256_mulhi_epi16(y0r, ONE_OVER_SQRT_42); + y0r_three_over_sqrt_21 = _mm256_mulhi_epi16(y0r, THREE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm256_mulhi_epi16(y0r, FIVE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm256_slli_epi16(y0r_five_over_sqrt_21, 1); + y0r_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42); + y0r_seven_over_sqrt_21 = _mm256_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14 + + y0i_one_over_sqrt_21 = _mm256_mulhi_epi16(y0i, ONE_OVER_SQRT_42); + y0i_three_over_sqrt_21 = _mm256_mulhi_epi16(y0i, THREE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm256_mulhi_epi16(y0i, FIVE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm256_slli_epi16(y0i_five_over_sqrt_21, 1); + y0i_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42); + y0i_seven_over_sqrt_21 = _mm256_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14 + + y0_p_7_1 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_7_3 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_7_5 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_7_7 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_5_1 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_5_3 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_5_5 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_5_7 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_3_1 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_3_3 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_3_5 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_3_7 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_1_1 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_1_3 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_1_5 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_1_7 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + + y0_m_1_1 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_1_3 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_1_5 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_1_7 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_3_1 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_3_3 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_3_5 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_3_7 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_5_1 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_5_3 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_5_5 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_5_7 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_7_1 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_7_3 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_7_5 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_7_7 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + + interference_abs_epi16(psi_r_p7_p7, ch_mag_int, a_r_p7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_p5, ch_mag_int, a_r_p7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_p3, ch_mag_int, a_r_p7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_p1, ch_mag_int, a_r_p7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m1, ch_mag_int, a_r_p7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m3, ch_mag_int, a_r_p7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m5, ch_mag_int, a_r_p7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m7, ch_mag_int, a_r_p7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p7, ch_mag_int, a_r_p5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p5, ch_mag_int, a_r_p5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p3, ch_mag_int, a_r_p5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p1, ch_mag_int, a_r_p5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m1, ch_mag_int, a_r_p5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m3, ch_mag_int, a_r_p5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m5, ch_mag_int, a_r_p5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m7, ch_mag_int, a_r_p5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p7, ch_mag_int, a_r_p3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p5, ch_mag_int, a_r_p3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p3, ch_mag_int, a_r_p3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p1, ch_mag_int, a_r_p3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m1, ch_mag_int, a_r_p3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m3, ch_mag_int, a_r_p3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m5, ch_mag_int, a_r_p3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m7, ch_mag_int, a_r_p3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p7, ch_mag_int, a_r_p1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p5, ch_mag_int, a_r_p1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p3, ch_mag_int, a_r_p1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p1, ch_mag_int, a_r_p1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m1, ch_mag_int, a_r_p1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m3, ch_mag_int, a_r_p1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m5, ch_mag_int, a_r_p1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m7, ch_mag_int, a_r_p1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p7, ch_mag_int, a_r_m1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p5, ch_mag_int, a_r_m1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p3, ch_mag_int, a_r_m1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p1, ch_mag_int, a_r_m1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m1, ch_mag_int, a_r_m1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m3, ch_mag_int, a_r_m1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m5, ch_mag_int, a_r_m1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m7, ch_mag_int, a_r_m1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p7, ch_mag_int, a_r_m3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p5, ch_mag_int, a_r_m3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p3, ch_mag_int, a_r_m3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p1, ch_mag_int, a_r_m3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m1, ch_mag_int, a_r_m3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m3, ch_mag_int, a_r_m3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m5, ch_mag_int, a_r_m3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m7, ch_mag_int, a_r_m3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p7, ch_mag_int, a_r_m5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p5, ch_mag_int, a_r_m5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p3, ch_mag_int, a_r_m5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p1, ch_mag_int, a_r_m5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m1, ch_mag_int, a_r_m5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m3, ch_mag_int, a_r_m5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m5, ch_mag_int, a_r_m5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m7, ch_mag_int, a_r_m5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p7, ch_mag_int, a_r_m7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p5, ch_mag_int, a_r_m7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p3, ch_mag_int, a_r_m7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p1, ch_mag_int, a_r_m7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m1, ch_mag_int, a_r_m7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m3, ch_mag_int, a_r_m7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m5, ch_mag_int, a_r_m7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m7, ch_mag_int, a_r_m7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + + interference_abs_epi16(psi_i_p7_p7, ch_mag_int, a_i_p7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_p5, ch_mag_int, a_i_p7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_p3, ch_mag_int, a_i_p7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_p1, ch_mag_int, a_i_p7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m1, ch_mag_int, a_i_p7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m3, ch_mag_int, a_i_p7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m5, ch_mag_int, a_i_p7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m7, ch_mag_int, a_i_p7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p7, ch_mag_int, a_i_p5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p5, ch_mag_int, a_i_p5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p3, ch_mag_int, a_i_p5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p1, ch_mag_int, a_i_p5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m1, ch_mag_int, a_i_p5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m3, ch_mag_int, a_i_p5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m5, ch_mag_int, a_i_p5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m7, ch_mag_int, a_i_p5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p7, ch_mag_int, a_i_p3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p5, ch_mag_int, a_i_p3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p3, ch_mag_int, a_i_p3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p1, ch_mag_int, a_i_p3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m1, ch_mag_int, a_i_p3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m3, ch_mag_int, a_i_p3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m5, ch_mag_int, a_i_p3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m7, ch_mag_int, a_i_p3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p7, ch_mag_int, a_i_p1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p5, ch_mag_int, a_i_p1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p3, ch_mag_int, a_i_p1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p1, ch_mag_int, a_i_p1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m1, ch_mag_int, a_i_p1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m3, ch_mag_int, a_i_p1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m5, ch_mag_int, a_i_p1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m7, ch_mag_int, a_i_p1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p7, ch_mag_int, a_i_m1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p5, ch_mag_int, a_i_m1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p3, ch_mag_int, a_i_m1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p1, ch_mag_int, a_i_m1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m1, ch_mag_int, a_i_m1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m3, ch_mag_int, a_i_m1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m5, ch_mag_int, a_i_m1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m7, ch_mag_int, a_i_m1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p7, ch_mag_int, a_i_m3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p5, ch_mag_int, a_i_m3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p3, ch_mag_int, a_i_m3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p1, ch_mag_int, a_i_m3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m1, ch_mag_int, a_i_m3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m3, ch_mag_int, a_i_m3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m5, ch_mag_int, a_i_m3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m7, ch_mag_int, a_i_m3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p7, ch_mag_int, a_i_m5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p5, ch_mag_int, a_i_m5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p3, ch_mag_int, a_i_m5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p1, ch_mag_int, a_i_m5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m1, ch_mag_int, a_i_m5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m3, ch_mag_int, a_i_m5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m5, ch_mag_int, a_i_m5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m7, ch_mag_int, a_i_m5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p7, ch_mag_int, a_i_m7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p5, ch_mag_int, a_i_m7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p3, ch_mag_int, a_i_m7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p1, ch_mag_int, a_i_m7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m1, ch_mag_int, a_i_m7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m3, ch_mag_int, a_i_m7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m5, ch_mag_int, a_i_m7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m7, ch_mag_int, a_i_m7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + + // Calculation of a group of two terms in the bit metric involving product of psi and interference + prodsum_psi_a_epi16(psi_r_p7_p7, a_r_p7_p7, psi_i_p7_p7, a_i_p7_p7, psi_a_p7_p7); + prodsum_psi_a_epi16(psi_r_p7_p5, a_r_p7_p5, psi_i_p7_p5, a_i_p7_p5, psi_a_p7_p5); + prodsum_psi_a_epi16(psi_r_p7_p3, a_r_p7_p3, psi_i_p7_p3, a_i_p7_p3, psi_a_p7_p3); + prodsum_psi_a_epi16(psi_r_p7_p1, a_r_p7_p1, psi_i_p7_p1, a_i_p7_p1, psi_a_p7_p1); + prodsum_psi_a_epi16(psi_r_p7_m1, a_r_p7_m1, psi_i_p7_m1, a_i_p7_m1, psi_a_p7_m1); + prodsum_psi_a_epi16(psi_r_p7_m3, a_r_p7_m3, psi_i_p7_m3, a_i_p7_m3, psi_a_p7_m3); + prodsum_psi_a_epi16(psi_r_p7_m5, a_r_p7_m5, psi_i_p7_m5, a_i_p7_m5, psi_a_p7_m5); + prodsum_psi_a_epi16(psi_r_p7_m7, a_r_p7_m7, psi_i_p7_m7, a_i_p7_m7, psi_a_p7_m7); + prodsum_psi_a_epi16(psi_r_p5_p7, a_r_p5_p7, psi_i_p5_p7, a_i_p5_p7, psi_a_p5_p7); + prodsum_psi_a_epi16(psi_r_p5_p5, a_r_p5_p5, psi_i_p5_p5, a_i_p5_p5, psi_a_p5_p5); + prodsum_psi_a_epi16(psi_r_p5_p3, a_r_p5_p3, psi_i_p5_p3, a_i_p5_p3, psi_a_p5_p3); + prodsum_psi_a_epi16(psi_r_p5_p1, a_r_p5_p1, psi_i_p5_p1, a_i_p5_p1, psi_a_p5_p1); + prodsum_psi_a_epi16(psi_r_p5_m1, a_r_p5_m1, psi_i_p5_m1, a_i_p5_m1, psi_a_p5_m1); + prodsum_psi_a_epi16(psi_r_p5_m3, a_r_p5_m3, psi_i_p5_m3, a_i_p5_m3, psi_a_p5_m3); + prodsum_psi_a_epi16(psi_r_p5_m5, a_r_p5_m5, psi_i_p5_m5, a_i_p5_m5, psi_a_p5_m5); + prodsum_psi_a_epi16(psi_r_p5_m7, a_r_p5_m7, psi_i_p5_m7, a_i_p5_m7, psi_a_p5_m7); + prodsum_psi_a_epi16(psi_r_p3_p7, a_r_p3_p7, psi_i_p3_p7, a_i_p3_p7, psi_a_p3_p7); + prodsum_psi_a_epi16(psi_r_p3_p5, a_r_p3_p5, psi_i_p3_p5, a_i_p3_p5, psi_a_p3_p5); + prodsum_psi_a_epi16(psi_r_p3_p3, a_r_p3_p3, psi_i_p3_p3, a_i_p3_p3, psi_a_p3_p3); + prodsum_psi_a_epi16(psi_r_p3_p1, a_r_p3_p1, psi_i_p3_p1, a_i_p3_p1, psi_a_p3_p1); + prodsum_psi_a_epi16(psi_r_p3_m1, a_r_p3_m1, psi_i_p3_m1, a_i_p3_m1, psi_a_p3_m1); + prodsum_psi_a_epi16(psi_r_p3_m3, a_r_p3_m3, psi_i_p3_m3, a_i_p3_m3, psi_a_p3_m3); + prodsum_psi_a_epi16(psi_r_p3_m5, a_r_p3_m5, psi_i_p3_m5, a_i_p3_m5, psi_a_p3_m5); + prodsum_psi_a_epi16(psi_r_p3_m7, a_r_p3_m7, psi_i_p3_m7, a_i_p3_m7, psi_a_p3_m7); + prodsum_psi_a_epi16(psi_r_p1_p7, a_r_p1_p7, psi_i_p1_p7, a_i_p1_p7, psi_a_p1_p7); + prodsum_psi_a_epi16(psi_r_p1_p5, a_r_p1_p5, psi_i_p1_p5, a_i_p1_p5, psi_a_p1_p5); + prodsum_psi_a_epi16(psi_r_p1_p3, a_r_p1_p3, psi_i_p1_p3, a_i_p1_p3, psi_a_p1_p3); + prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1); + prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1); + prodsum_psi_a_epi16(psi_r_p1_m3, a_r_p1_m3, psi_i_p1_m3, a_i_p1_m3, psi_a_p1_m3); + prodsum_psi_a_epi16(psi_r_p1_m5, a_r_p1_m5, psi_i_p1_m5, a_i_p1_m5, psi_a_p1_m5); + prodsum_psi_a_epi16(psi_r_p1_m7, a_r_p1_m7, psi_i_p1_m7, a_i_p1_m7, psi_a_p1_m7); + prodsum_psi_a_epi16(psi_r_m1_p7, a_r_m1_p7, psi_i_m1_p7, a_i_m1_p7, psi_a_m1_p7); + prodsum_psi_a_epi16(psi_r_m1_p5, a_r_m1_p5, psi_i_m1_p5, a_i_m1_p5, psi_a_m1_p5); + prodsum_psi_a_epi16(psi_r_m1_p3, a_r_m1_p3, psi_i_m1_p3, a_i_m1_p3, psi_a_m1_p3); + prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1); + prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1); + prodsum_psi_a_epi16(psi_r_m1_m3, a_r_m1_m3, psi_i_m1_m3, a_i_m1_m3, psi_a_m1_m3); + prodsum_psi_a_epi16(psi_r_m1_m5, a_r_m1_m5, psi_i_m1_m5, a_i_m1_m5, psi_a_m1_m5); + prodsum_psi_a_epi16(psi_r_m1_m7, a_r_m1_m7, psi_i_m1_m7, a_i_m1_m7, psi_a_m1_m7); + prodsum_psi_a_epi16(psi_r_m3_p7, a_r_m3_p7, psi_i_m3_p7, a_i_m3_p7, psi_a_m3_p7); + prodsum_psi_a_epi16(psi_r_m3_p5, a_r_m3_p5, psi_i_m3_p5, a_i_m3_p5, psi_a_m3_p5); + prodsum_psi_a_epi16(psi_r_m3_p3, a_r_m3_p3, psi_i_m3_p3, a_i_m3_p3, psi_a_m3_p3); + prodsum_psi_a_epi16(psi_r_m3_p1, a_r_m3_p1, psi_i_m3_p1, a_i_m3_p1, psi_a_m3_p1); + prodsum_psi_a_epi16(psi_r_m3_m1, a_r_m3_m1, psi_i_m3_m1, a_i_m3_m1, psi_a_m3_m1); + prodsum_psi_a_epi16(psi_r_m3_m3, a_r_m3_m3, psi_i_m3_m3, a_i_m3_m3, psi_a_m3_m3); + prodsum_psi_a_epi16(psi_r_m3_m5, a_r_m3_m5, psi_i_m3_m5, a_i_m3_m5, psi_a_m3_m5); + prodsum_psi_a_epi16(psi_r_m3_m7, a_r_m3_m7, psi_i_m3_m7, a_i_m3_m7, psi_a_m3_m7); + prodsum_psi_a_epi16(psi_r_m5_p7, a_r_m5_p7, psi_i_m5_p7, a_i_m5_p7, psi_a_m5_p7); + prodsum_psi_a_epi16(psi_r_m5_p5, a_r_m5_p5, psi_i_m5_p5, a_i_m5_p5, psi_a_m5_p5); + prodsum_psi_a_epi16(psi_r_m5_p3, a_r_m5_p3, psi_i_m5_p3, a_i_m5_p3, psi_a_m5_p3); + prodsum_psi_a_epi16(psi_r_m5_p1, a_r_m5_p1, psi_i_m5_p1, a_i_m5_p1, psi_a_m5_p1); + prodsum_psi_a_epi16(psi_r_m5_m1, a_r_m5_m1, psi_i_m5_m1, a_i_m5_m1, psi_a_m5_m1); + prodsum_psi_a_epi16(psi_r_m5_m3, a_r_m5_m3, psi_i_m5_m3, a_i_m5_m3, psi_a_m5_m3); + prodsum_psi_a_epi16(psi_r_m5_m5, a_r_m5_m5, psi_i_m5_m5, a_i_m5_m5, psi_a_m5_m5); + prodsum_psi_a_epi16(psi_r_m5_m7, a_r_m5_m7, psi_i_m5_m7, a_i_m5_m7, psi_a_m5_m7); + prodsum_psi_a_epi16(psi_r_m7_p7, a_r_m7_p7, psi_i_m7_p7, a_i_m7_p7, psi_a_m7_p7); + prodsum_psi_a_epi16(psi_r_m7_p5, a_r_m7_p5, psi_i_m7_p5, a_i_m7_p5, psi_a_m7_p5); + prodsum_psi_a_epi16(psi_r_m7_p3, a_r_m7_p3, psi_i_m7_p3, a_i_m7_p3, psi_a_m7_p3); + prodsum_psi_a_epi16(psi_r_m7_p1, a_r_m7_p1, psi_i_m7_p1, a_i_m7_p1, psi_a_m7_p1); + prodsum_psi_a_epi16(psi_r_m7_m1, a_r_m7_m1, psi_i_m7_m1, a_i_m7_m1, psi_a_m7_m1); + prodsum_psi_a_epi16(psi_r_m7_m3, a_r_m7_m3, psi_i_m7_m3, a_i_m7_m3, psi_a_m7_m3); + prodsum_psi_a_epi16(psi_r_m7_m5, a_r_m7_m5, psi_i_m7_m5, a_i_m7_m5, psi_a_m7_m5); + prodsum_psi_a_epi16(psi_r_m7_m7, a_r_m7_m7, psi_i_m7_m7, a_i_m7_m7, psi_a_m7_m7); + + // Calculation of a group of two terms in the bit metric involving squares of interference + square_a_epi16(a_r_p7_p7, a_i_p7_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p7); + square_a_epi16(a_r_p7_p5, a_i_p7_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p5); + square_a_epi16(a_r_p7_p3, a_i_p7_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p3); + square_a_epi16(a_r_p7_p1, a_i_p7_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p1); + square_a_epi16(a_r_p7_m1, a_i_p7_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m1); + square_a_epi16(a_r_p7_m3, a_i_p7_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m3); + square_a_epi16(a_r_p7_m5, a_i_p7_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m5); + square_a_epi16(a_r_p7_m7, a_i_p7_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m7); + square_a_epi16(a_r_p5_p7, a_i_p5_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p7); + square_a_epi16(a_r_p5_p5, a_i_p5_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p5); + square_a_epi16(a_r_p5_p3, a_i_p5_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p3); + square_a_epi16(a_r_p5_p1, a_i_p5_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p1); + square_a_epi16(a_r_p5_m1, a_i_p5_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m1); + square_a_epi16(a_r_p5_m3, a_i_p5_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m3); + square_a_epi16(a_r_p5_m5, a_i_p5_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m5); + square_a_epi16(a_r_p5_m7, a_i_p5_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m7); + square_a_epi16(a_r_p3_p7, a_i_p3_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p7); + square_a_epi16(a_r_p3_p5, a_i_p3_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p5); + square_a_epi16(a_r_p3_p3, a_i_p3_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p3); + square_a_epi16(a_r_p3_p1, a_i_p3_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p1); + square_a_epi16(a_r_p3_m1, a_i_p3_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m1); + square_a_epi16(a_r_p3_m3, a_i_p3_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m3); + square_a_epi16(a_r_p3_m5, a_i_p3_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m5); + square_a_epi16(a_r_p3_m7, a_i_p3_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m7); + square_a_epi16(a_r_p1_p7, a_i_p1_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p7); + square_a_epi16(a_r_p1_p5, a_i_p1_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p5); + square_a_epi16(a_r_p1_p3, a_i_p1_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p3); + square_a_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p1); + square_a_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m1); + square_a_epi16(a_r_p1_m3, a_i_p1_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m3); + square_a_epi16(a_r_p1_m5, a_i_p1_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m5); + square_a_epi16(a_r_p1_m7, a_i_p1_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m7); + square_a_epi16(a_r_m1_p7, a_i_m1_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p7); + square_a_epi16(a_r_m1_p5, a_i_m1_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p5); + square_a_epi16(a_r_m1_p3, a_i_m1_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p3); + square_a_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p1); + square_a_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m1); + square_a_epi16(a_r_m1_m3, a_i_m1_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m3); + square_a_epi16(a_r_m1_m5, a_i_m1_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m5); + square_a_epi16(a_r_m1_m7, a_i_m1_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m7); + square_a_epi16(a_r_m3_p7, a_i_m3_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p7); + square_a_epi16(a_r_m3_p5, a_i_m3_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p5); + square_a_epi16(a_r_m3_p3, a_i_m3_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p3); + square_a_epi16(a_r_m3_p1, a_i_m3_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p1); + square_a_epi16(a_r_m3_m1, a_i_m3_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m1); + square_a_epi16(a_r_m3_m3, a_i_m3_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m3); + square_a_epi16(a_r_m3_m5, a_i_m3_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m5); + square_a_epi16(a_r_m3_m7, a_i_m3_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m7); + square_a_epi16(a_r_m5_p7, a_i_m5_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p7); + square_a_epi16(a_r_m5_p5, a_i_m5_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p5); + square_a_epi16(a_r_m5_p3, a_i_m5_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p3); + square_a_epi16(a_r_m5_p1, a_i_m5_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p1); + square_a_epi16(a_r_m5_m1, a_i_m5_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m1); + square_a_epi16(a_r_m5_m3, a_i_m5_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m3); + square_a_epi16(a_r_m5_m5, a_i_m5_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m5); + square_a_epi16(a_r_m5_m7, a_i_m5_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m7); + square_a_epi16(a_r_m7_p7, a_i_m7_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p7); + square_a_epi16(a_r_m7_p5, a_i_m7_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p5); + square_a_epi16(a_r_m7_p3, a_i_m7_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p3); + square_a_epi16(a_r_m7_p1, a_i_m7_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p1); + square_a_epi16(a_r_m7_m1, a_i_m7_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m1); + square_a_epi16(a_r_m7_m3, a_i_m7_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m3); + square_a_epi16(a_r_m7_m5, a_i_m7_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m5); + square_a_epi16(a_r_m7_m7, a_i_m7_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m7); + + // Computing different multiples of ||h0||^2 + // x=1, y=1 + ch_mag_2_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42); + ch_mag_2_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_2_over_42_with_sigma2,1); + // x=1, y=3 + ch_mag_10_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42); + ch_mag_10_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_10_over_42_with_sigma2,1); + // x=1, x=5 + ch_mag_26_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42); + ch_mag_26_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_26_over_42_with_sigma2,1); + // x=1, y=7 + ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=3, y=3 + ch_mag_18_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42); + ch_mag_18_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_18_over_42_with_sigma2,1); + // x=3, y=5 + ch_mag_34_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42); + ch_mag_34_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_34_over_42_with_sigma2,1); + // x=3, y=7 + ch_mag_58_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42); + ch_mag_58_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_58_over_42_with_sigma2,2); + // x=5, y=5 + ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=5, y=7 + ch_mag_74_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42); + ch_mag_74_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_74_over_42_with_sigma2,2); + // x=7, y=7 + ch_mag_98_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42); + ch_mag_98_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_98_over_42_with_sigma2,2); + + // Computing Metrics + xmm0 = _mm256_subs_epi16(psi_a_p7_p7, a_sq_p7_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_7); + bit_met_p7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p5, a_sq_p7_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_5); + bit_met_p7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p3, a_sq_p7_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_3); + bit_met_p7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p1, a_sq_p7_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_1); + bit_met_p7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m1, a_sq_p7_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_1); + bit_met_p7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m3, a_sq_p7_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_3); + bit_met_p7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m5, a_sq_p7_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_5); + bit_met_p7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m7, a_sq_p7_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_7); + bit_met_p7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p7, a_sq_p5_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_7); + bit_met_p5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p5, a_sq_p5_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_5); + bit_met_p5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p3, a_sq_p5_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_3); + bit_met_p5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p1, a_sq_p5_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_1); + bit_met_p5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m1, a_sq_p5_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_1); + bit_met_p5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m3, a_sq_p5_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_3); + bit_met_p5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m5, a_sq_p5_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_5); + bit_met_p5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m7, a_sq_p5_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_7); + bit_met_p5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p7, a_sq_p3_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_7); + bit_met_p3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p5, a_sq_p3_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_5); + bit_met_p3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p3, a_sq_p3_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_3); + bit_met_p3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p1, a_sq_p3_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_1); + bit_met_p3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m1, a_sq_p3_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_1); + bit_met_p3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m3, a_sq_p3_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_3); + bit_met_p3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m5, a_sq_p3_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_5); + bit_met_p3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m7, a_sq_p3_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_7); + bit_met_p3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p7, a_sq_p1_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_7); + bit_met_p1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p5, a_sq_p1_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_5); + bit_met_p1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p3, a_sq_p1_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_3); + bit_met_p1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p1, a_sq_p1_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_1); + bit_met_p1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m1, a_sq_p1_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_1); + bit_met_p1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m3, a_sq_p1_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_3); + bit_met_p1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m5, a_sq_p1_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_5); + bit_met_p1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m7, a_sq_p1_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_7); + bit_met_p1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + + xmm0 = _mm256_subs_epi16(psi_a_m1_p7, a_sq_m1_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_7); + bit_met_m1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p5, a_sq_m1_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_5); + bit_met_m1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p3, a_sq_m1_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_3); + bit_met_m1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p1, a_sq_m1_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_1); + bit_met_m1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m1, a_sq_m1_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_1); + bit_met_m1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m3, a_sq_m1_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_3); + bit_met_m1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m5, a_sq_m1_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_5); + bit_met_m1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m7, a_sq_m1_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_7); + bit_met_m1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p7, a_sq_m3_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_7); + bit_met_m3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p5, a_sq_m3_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_5); + bit_met_m3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p3, a_sq_m3_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_3); + bit_met_m3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p1, a_sq_m3_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_1); + bit_met_m3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m1, a_sq_m3_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_1); + bit_met_m3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m3, a_sq_m3_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_3); + bit_met_m3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m5, a_sq_m3_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_5); + bit_met_m3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m7, a_sq_m3_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_7); + bit_met_m3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p7, a_sq_m5_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_7); + bit_met_m5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p5, a_sq_m5_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_5); + bit_met_m5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p3, a_sq_m5_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_3); + bit_met_m5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p1, a_sq_m5_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_1); + bit_met_m5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m1, a_sq_m5_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_1); + bit_met_m5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m3, a_sq_m5_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_3); + bit_met_m5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m5, a_sq_m5_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_5); + bit_met_m5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m7, a_sq_m5_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_7); + bit_met_m5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p7, a_sq_m7_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_7); + bit_met_m7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p5, a_sq_m7_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_5); + bit_met_m7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p3, a_sq_m7_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_3); + bit_met_m7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p1, a_sq_m7_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_1); + bit_met_m7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m1, a_sq_m7_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_1); + bit_met_m7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m3, a_sq_m7_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_3); + bit_met_m7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m5, a_sq_m7_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_5); + bit_met_m7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m7, a_sq_m7_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_7); + bit_met_m7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + + // Detection for 1st bit (LTE mapping) + // bit = 1 + xmm0 = _mm256_max_epi16(bit_met_m7_p7, bit_met_m7_p5); + xmm1 = _mm256_max_epi16(bit_met_m7_p3, bit_met_m7_p1); + xmm2 = _mm256_max_epi16(bit_met_m7_m1, bit_met_m7_m3); + xmm3 = _mm256_max_epi16(bit_met_m7_m5, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m5_p5); + xmm1 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m5_p1); + xmm2 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m5_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m5_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m3_p7, bit_met_m3_p5); + xmm1 = _mm256_max_epi16(bit_met_m3_p3, bit_met_m3_p1); + xmm2 = _mm256_max_epi16(bit_met_m3_m1, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m3_m5, bit_met_m3_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m1_p5); + xmm1 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m1_m3); + xmm3 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m1_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p7_p5); + xmm1 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p7_p1); + xmm2 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p7_m3); + xmm3 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_p7, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p5_p3, bit_met_p5_p1); + xmm2 = _mm256_max_epi16(bit_met_p5_m1, bit_met_p5_m3); + xmm3 = _mm256_max_epi16(bit_met_p5_m5, bit_met_p5_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p3_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p3_p1); + xmm2 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p3_m3); + xmm3 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p3_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_p7, bit_met_p1_p5); + xmm1 = _mm256_max_epi16(bit_met_p1_p3, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_p1_m1, bit_met_p1_m3); + xmm3 = _mm256_max_epi16(bit_met_p1_m5, bit_met_p1_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y0r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 2nd bit (LTE mapping) + // bit = 1 + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y1r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 3rd bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y2r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 4th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y0i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + + // Detection for 5th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y1i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 6th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y2i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs + // RE 1 + j = 48*i; + stream0_out[j + 0] = ((short *)&y0r)[0]; + stream0_out[j + 1] = ((short *)&y1r)[0]; + stream0_out[j + 2] = ((short *)&y2r)[0]; + stream0_out[j + 3] = ((short *)&y0i)[0]; + stream0_out[j + 4] = ((short *)&y1i)[0]; + stream0_out[j + 5] = ((short *)&y2i)[0]; + // RE 2 + stream0_out[j + 6] = ((short *)&y0r)[1]; + stream0_out[j + 7] = ((short *)&y1r)[1]; + stream0_out[j + 8] = ((short *)&y2r)[1]; + stream0_out[j + 9] = ((short *)&y0i)[1]; + stream0_out[j + 10] = ((short *)&y1i)[1]; + stream0_out[j + 11] = ((short *)&y2i)[1]; + // RE 3 + stream0_out[j + 12] = ((short *)&y0r)[2]; + stream0_out[j + 13] = ((short *)&y1r)[2]; + stream0_out[j + 14] = ((short *)&y2r)[2]; + stream0_out[j + 15] = ((short *)&y0i)[2]; + stream0_out[j + 16] = ((short *)&y1i)[2]; + stream0_out[j + 17] = ((short *)&y2i)[2]; + // RE 4 + stream0_out[j + 18] = ((short *)&y0r)[3]; + stream0_out[j + 19] = ((short *)&y1r)[3]; + stream0_out[j + 20] = ((short *)&y2r)[3]; + stream0_out[j + 21] = ((short *)&y0i)[3]; + stream0_out[j + 22] = ((short *)&y1i)[3]; + stream0_out[j + 23] = ((short *)&y2i)[3]; + // RE 5 + stream0_out[j + 24] = ((short *)&y0r)[4]; + stream0_out[j + 25] = ((short *)&y1r)[4]; + stream0_out[j + 26] = ((short *)&y2r)[4]; + stream0_out[j + 27] = ((short *)&y0i)[4]; + stream0_out[j + 28] = ((short *)&y1i)[4]; + stream0_out[j + 29] = ((short *)&y2i)[4]; + // RE 6 + stream0_out[j + 30] = ((short *)&y0r)[5]; + stream0_out[j + 31] = ((short *)&y1r)[5]; + stream0_out[j + 32] = ((short *)&y2r)[5]; + stream0_out[j + 33] = ((short *)&y0i)[5]; + stream0_out[j + 34] = ((short *)&y1i)[5]; + stream0_out[j + 35] = ((short *)&y2i)[5]; + // RE 7 + stream0_out[j + 36] = ((short *)&y0r)[6]; + stream0_out[j + 37] = ((short *)&y1r)[6]; + stream0_out[j + 38] = ((short *)&y2r)[6]; + stream0_out[j + 39] = ((short *)&y0i)[6]; + stream0_out[j + 40] = ((short *)&y1i)[6]; + stream0_out[j + 41] = ((short *)&y2i)[6]; + // RE 8 + stream0_out[j + 42] = ((short *)&y0r)[7]; + stream0_out[j + 43] = ((short *)&y1r)[7]; + stream0_out[j + 44] = ((short *)&y2r)[7]; + stream0_out[j + 45] = ((short *)&y0i)[7]; + stream0_out[j + 46] = ((short *)&y1i)[7]; + stream0_out[j + 47] = ((short *)&y2i)[7]; + + // RE 9 + stream0_out[j + 48] = ((short *)&y0r)[8]; + stream0_out[j + 49] = ((short *)&y1r)[8]; + stream0_out[j + 50] = ((short *)&y2r)[8]; + stream0_out[j + 51] = ((short *)&y0i)[8]; + stream0_out[j + 52] = ((short *)&y1i)[8]; + stream0_out[j + 53] = ((short *)&y2i)[8]; + // RE 10 + stream0_out[j + 54] = ((short *)&y0r)[9]; + stream0_out[j + 55] = ((short *)&y1r)[9]; + stream0_out[j + 56] = ((short *)&y2r)[9]; + stream0_out[j + 57] = ((short *)&y0i)[9]; + stream0_out[j + 58] = ((short *)&y1i)[9]; + stream0_out[j + 59] = ((short *)&y2i)[9]; + // RE 11 + stream0_out[j + 60] = ((short *)&y0r)[10]; + stream0_out[j + 61] = ((short *)&y1r)[10]; + stream0_out[j + 62] = ((short *)&y2r)[10]; + stream0_out[j + 63] = ((short *)&y0i)[10]; + stream0_out[j + 64] = ((short *)&y1i)[10]; + stream0_out[j + 65] = ((short *)&y2i)[10]; + // RE 12 + stream0_out[j + 66] = ((short *)&y0r)[11]; + stream0_out[j + 67] = ((short *)&y1r)[11]; + stream0_out[j + 68] = ((short *)&y2r)[11]; + stream0_out[j + 69] = ((short *)&y0i)[11]; + stream0_out[j + 70] = ((short *)&y1i)[11]; + stream0_out[j + 71] = ((short *)&y2i)[11]; + // RE 13 + stream0_out[j + 72] = ((short *)&y0r)[12]; + stream0_out[j + 73] = ((short *)&y1r)[12]; + stream0_out[j + 74] = ((short *)&y2r)[12]; + stream0_out[j + 75] = ((short *)&y0i)[12]; + stream0_out[j + 76] = ((short *)&y1i)[12]; + stream0_out[j + 77] = ((short *)&y2i)[12]; + // RE 14 + stream0_out[j + 78] = ((short *)&y0r)[13]; + stream0_out[j + 79] = ((short *)&y1r)[13]; + stream0_out[j + 80] = ((short *)&y2r)[13]; + stream0_out[j + 81] = ((short *)&y0i)[13]; + stream0_out[j + 82] = ((short *)&y1i)[13]; + stream0_out[j + 83] = ((short *)&y2i)[13]; + // RE 15 + stream0_out[j + 84] = ((short *)&y0r)[14]; + stream0_out[j + 85] = ((short *)&y1r)[14]; + stream0_out[j + 86] = ((short *)&y2r)[14]; + stream0_out[j + 87] = ((short *)&y0i)[14]; + stream0_out[j + 88] = ((short *)&y1i)[14]; + stream0_out[j + 89] = ((short *)&y2i)[14]; + // RE 16 + stream0_out[j + 90] = ((short *)&y0r)[15]; + stream0_out[j + 91] = ((short *)&y1r)[15]; + stream0_out[j + 92] = ((short *)&y2r)[15]; + stream0_out[j + 93] = ((short *)&y0i)[15]; + stream0_out[j + 94] = ((short *)&y1i)[15]; + stream0_out[j + 95] = ((short *)&y2i)[15]; + +#elif defined(__arm__) + +#endif + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif + +} + +void qam64_qam64_avx2(int32_t *stream0_in, + int32_t *stream1_in, + int32_t *ch_mag, + int32_t *ch_mag_i, + int16_t *stream0_out, + int32_t *rho01, + int length + ) +{ + + /* + Author: S. Wagner + Date: 28-02-17 + + Input: + stream0_in: MF filter for 1st stream, i.e., y0=h0'*y + stream1_in: MF filter for 2nd stream, i.e., y1=h1'*y + ch_mag: 4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + ch_mag_i: 4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + rho01: Channel cross correlation, i.e., h1'*h0 + + Output: + stream0_out: output LLRs for 1st stream + */ + +#if defined(__x86_64__) || defined(__i386__) + + __m256i *rho01_256i = (__m256i *)rho01; + __m256i *stream0_256i_in = (__m256i *)stream0_in; + __m256i *stream1_256i_in = (__m256i *)stream1_in; + __m256i *ch_mag_256i = (__m256i *)ch_mag; + __m256i *ch_mag_256i_i = (__m256i *)ch_mag_i; + + __m256i ONE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(10112)); // round(1/sqrt(42)*2^16) + __m256i THREE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30337)); // round(3/sqrt(42)*2^16) + __m256i FIVE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(25281)); // round(5/sqrt(42)*2^15) + __m256i SEVEN_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(17697)); // round(7/sqrt(42)*2^14) Q2.14 + __m256i ONE_OVER_SQRT_2 = _mm256_broadcastw_epi16(_mm_set1_epi16(23170)); // round(1/sqrt(2)*2^15) + __m256i ONE_OVER_SQRT_2_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(3575)); // round(1/sqrt(2*42)*2^15) + __m256i THREE_OVER_SQRT_2_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(10726)); // round(3/sqrt(2*42)*2^15) + __m256i FIVE_OVER_SQRT_2_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(17876)); // round(5/sqrt(2*42)*2^15) + __m256i SEVEN_OVER_SQRT_2_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(25027)); // round(7/sqrt(2*42)*2^15) + __m256i FORTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30969)); // round(49/(4*sqrt(42))*2^14), Q2.14 + __m256i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(23385)); // round(37/(4*sqrt(42))*2^14), Q2.14 + __m256i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(31601)); // round(25/(4*sqrt(42))*2^15) + __m256i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(18329)); // round(29/(4*sqrt(42))*2^15), Q2.14 + __m256i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(21489)); // round(17/(4*sqrt(42))*2^15) + __m256i NINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(11376)); // round(9/(4*sqrt(42))*2^15) + __m256i THIRTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(16433)); // round(13/(4*sqrt(42))*2^15) + __m256i FIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(6320)); // round(5/(4*sqrt(42))*2^15) + __m256i ONE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(1264)); // round(1/(4*sqrt(42))*2^15) + __m256i SQRT_42_OVER_FOUR = _mm256_broadcastw_epi16(_mm_set1_epi16(13272)); // round(sqrt(42)/4*2^13), Q3.12 + + __m256i ch_mag_des; + __m256i ch_mag_int; + __m256i ch_mag_98_over_42_with_sigma2; + __m256i ch_mag_74_over_42_with_sigma2; + __m256i ch_mag_58_over_42_with_sigma2; + __m256i ch_mag_50_over_42_with_sigma2; + __m256i ch_mag_34_over_42_with_sigma2; + __m256i ch_mag_18_over_42_with_sigma2; + __m256i ch_mag_26_over_42_with_sigma2; + __m256i ch_mag_10_over_42_with_sigma2; + __m256i ch_mag_2_over_42_with_sigma2; + __m256i y0r_one_over_sqrt_21; + __m256i y0r_three_over_sqrt_21; + __m256i y0r_five_over_sqrt_21; + __m256i y0r_seven_over_sqrt_21; + __m256i y0i_one_over_sqrt_21; + __m256i y0i_three_over_sqrt_21; + __m256i y0i_five_over_sqrt_21; + __m256i y0i_seven_over_sqrt_21; + __m256i ch_mag_int_with_sigma2; + __m256i two_ch_mag_int_with_sigma2; + __m256i three_ch_mag_int_with_sigma2; +#elif defined(__arm__) + +#endif + + int i,j; + uint32_t len256 = (length)>>3; + + for (i=0; i<len256; i+=2) { + +#if defined(__x86_64__) || defined(__i386__) + + // Get rho + /* + xmm0 = rho01_256i[i]; + xmm1 = rho01_256i[i+1]; + xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + //xmm0 = [Re(0,1,2,3) Im(0,1,2,3) Re(4,5,6,7) Im(4,5,6,7)] + //xmm0 = [Re(8,9,10,11) Im(8,9,10,11) Re(12,13,14,15) Im(12,13,14,15)] + + xmm2 = _mm256_unpacklo_epi64(xmm0, xmm1); + //xmm2 = [Re(0,1,2,3) Re(8,9,10,11) Re(4,5,6,7) Re(12,13,14,15)] + xmm2 = _mm256_permute4x64_epi64(xmm2,0xd8); // Re(rho) + + xmm3 = _mm256_unpackhi_epi64(xmm0, xmm1); + //xmm3 = [Im(0,1,2,3) Im(8,9,10,11) Im(4,5,6,7) Im(12,13,14,15)] + xmm3 = _mm256_permute4x64_epi64(xmm3,0xd8); // Im(rho) + */ + + seperate_real_imag_parts(&xmm2, &xmm3, rho01_256i[i], rho01_256i[i+1]); + + rho_rpi = _mm256_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) + rho_rmi = _mm256_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + + // Compute the different rhos + rho_rpi_1_1 = _mm256_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42); + rho_rmi_1_1 = _mm256_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42); + rho_rpi_3_3 = _mm256_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42); + rho_rmi_3_3 = _mm256_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42); + rho_rpi_5_5 = _mm256_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42); + rho_rmi_5_5 = _mm256_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42); + rho_rpi_7_7 = _mm256_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42); + rho_rmi_7_7 = _mm256_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42); + + rho_rpi_5_5 = _mm256_slli_epi16(rho_rpi_5_5, 1); + rho_rmi_5_5 = _mm256_slli_epi16(rho_rmi_5_5, 1); + rho_rpi_7_7 = _mm256_slli_epi16(rho_rpi_7_7, 2); + rho_rmi_7_7 = _mm256_slli_epi16(rho_rmi_7_7, 2); + + xmm4 = _mm256_mulhi_epi16(xmm2, ONE_OVER_SQRT_42); + xmm5 = _mm256_mulhi_epi16(xmm3, ONE_OVER_SQRT_42); + xmm6 = _mm256_mulhi_epi16(xmm3, THREE_OVER_SQRT_42); + xmm7 = _mm256_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42); + xmm8 = _mm256_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42); + xmm7 = _mm256_slli_epi16(xmm7, 1); + xmm8 = _mm256_slli_epi16(xmm8, 2); + + rho_rpi_1_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_1_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_1_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_1_5 = _mm256_subs_epi16(xmm4, xmm7); + rho_rpi_1_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_1_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, THREE_OVER_SQRT_42); + rho_rpi_3_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_3_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_3_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_3_5 = _mm256_subs_epi16(xmm4, xmm7); + rho_rpi_3_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_3_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42); + xmm4 = _mm256_slli_epi16(xmm4, 1); + rho_rpi_5_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_5_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_5_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_5_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_5_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_5_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42); + xmm4 = _mm256_slli_epi16(xmm4, 2); + rho_rpi_7_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_7_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_7_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_7_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_7_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_7_5 = _mm256_subs_epi16(xmm4, xmm7); + + // Rearrange interfering MF output + /* + xmm0 = stream1_256i_in[i]; + xmm1 = stream1_256i_in[i+1]; + xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + y1r = _mm256_unpacklo_epi64(xmm0, xmm1); + y1r = _mm256_permute4x64_epi64(y1r,0xd8); // Re(y1) + + y1i = _mm256_unpackhi_epi64(xmm0, xmm1); + y1i = _mm256_permute4x64_epi64(y1i,0xd8); // Im(y1) + */ + + seperate_real_imag_parts(&y1r, &y1i, stream1_256i_in[i], stream1_256i_in[i+1]); + + // Psi_r calculation from rho_rpi or rho_rmi + xmm0 = _mm256_broadcastw_epi16(_mm_set1_epi16(0));// ZERO for abs_pi16 + xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1r); + + psi_r_p7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1r); + psi_r_p7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1r); + psi_r_p7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1r); + psi_r_p7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1r); + psi_r_p7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1r); + psi_r_p7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1r); + psi_r_p7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1r); + psi_r_p7_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1r); + psi_r_p5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1r); + psi_r_p5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1r); + psi_r_p5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1r); + psi_r_p5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1r); + psi_r_p5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1r); + psi_r_p5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1r); + psi_r_p5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1r); + psi_r_p5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1r); + psi_r_p3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1r); + psi_r_p3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1r); + psi_r_p3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1r); + psi_r_p3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1r); + psi_r_p3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1r); + psi_r_p3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1r); + psi_r_p3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1r); + psi_r_p3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1r); + psi_r_p1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1r); + psi_r_p1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1r); + psi_r_p1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1r); + psi_r_p1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1r); + psi_r_p1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1r); + psi_r_p1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1r); + psi_r_p1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1r); + psi_r_p1_m7 = _mm256_abs_epi16(xmm2); + + xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1r); + psi_r_m1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1r); + psi_r_m1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1r); + psi_r_m1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1r); + psi_r_m1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1r); + psi_r_m1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1r); + psi_r_m1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1r); + psi_r_m1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1r); + psi_r_m1_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1r); + psi_r_m3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1r); + psi_r_m3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1r); + psi_r_m3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1r); + psi_r_m3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1r); + psi_r_m3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1r); + psi_r_m3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1r); + psi_r_m3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1r); + psi_r_m3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1r); + psi_r_m5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1r); + psi_r_m5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1r); + psi_r_m5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1r); + psi_r_m5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1r); + psi_r_m5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1r); + psi_r_m5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1r); + psi_r_m5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1r); + psi_r_m5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1r); + psi_r_m7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1r); + psi_r_m7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1r); + psi_r_m7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1r); + psi_r_m7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1r); + psi_r_m7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1r); + psi_r_m7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1r); + psi_r_m7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1r); + psi_r_m7_m7 = _mm256_abs_epi16(xmm2); + + // Psi_i calculation from rho_rpi or rho_rmi + xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1i); + psi_i_p7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1i); + psi_i_p7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1i); + psi_i_p7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1i); + psi_i_p7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1i); + psi_i_p7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1i); + psi_i_p7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1i); + psi_i_p7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1i); + psi_i_p7_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1i); + psi_i_p5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1i); + psi_i_p5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1i); + psi_i_p5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1i); + psi_i_p5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1i); + psi_i_p5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1i); + psi_i_p5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1i); + psi_i_p5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1i); + psi_i_p5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1i); + psi_i_p3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1i); + psi_i_p3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1i); + psi_i_p3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1i); + psi_i_p3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1i); + psi_i_p3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1i); + psi_i_p3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1i); + psi_i_p3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1i); + psi_i_p3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1i); + psi_i_p1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1i); + psi_i_p1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1i); + psi_i_p1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1i); + psi_i_p1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1i); + psi_i_p1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1i); + psi_i_p1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1i); + psi_i_p1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1i); + psi_i_p1_m7 = _mm256_abs_epi16(xmm2); + + xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1i); + psi_i_m1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1i); + psi_i_m1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1i); + psi_i_m1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1i); + psi_i_m1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1i); + psi_i_m1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1i); + psi_i_m1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1i); + psi_i_m1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1i); + psi_i_m1_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1i); + psi_i_m3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1i); + psi_i_m3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1i); + psi_i_m3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1i); + psi_i_m3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1i); + psi_i_m3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1i); + psi_i_m3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1i); + psi_i_m3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1i); + psi_i_m3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1i); + psi_i_m5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1i); + psi_i_m5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1i); + psi_i_m5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1i); + psi_i_m5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1i); + psi_i_m5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1i); + psi_i_m5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1i); + psi_i_m5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1i); + psi_i_m5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1i); + psi_i_m7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1i); + psi_i_m7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1i); + psi_i_m7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1i); + psi_i_m7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1i); + psi_i_m7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1i); + psi_i_m7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1i); + psi_i_m7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1i); + psi_i_m7_m7 = _mm256_abs_epi16(xmm2); + + /* + // Rearrange desired MF output + xmm0 = stream0_256i_in[i]; + xmm1 = stream0_256i_in[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + y0i = _mm_unpackhi_epi64(xmm0,xmm1); + */ + seperate_real_imag_parts(&y0r, &y0i, stream0_256i_in[i], stream0_256i_in[i+1]); + + // Rearrange desired channel magnitudes + // [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2),...,,|h|^2(7),|h|^2(7)]*(2/sqrt(10)) + /* + xmm2 = ch_mag_256i[i]; + xmm3 = ch_mag_256i[i+1]; + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); + */ + // xmm2 is dummy variable that contains the same values as ch_mag_des + seperate_real_imag_parts(&ch_mag_des, &xmm2, ch_mag_256i[i], ch_mag_256i[i+1]); + + + // Rearrange interfering channel magnitudes + /* + xmm2 = ch_mag_256i_i[i]; + xmm3 = ch_mag_256i_i[i+1]; + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3); + */ + seperate_real_imag_parts(&ch_mag_int, &xmm2, ch_mag_256i_i[i], ch_mag_256i_i[i+1]); + + y0r_one_over_sqrt_21 = _mm256_mulhi_epi16(y0r, ONE_OVER_SQRT_42); + y0r_three_over_sqrt_21 = _mm256_mulhi_epi16(y0r, THREE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm256_mulhi_epi16(y0r, FIVE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm256_slli_epi16(y0r_five_over_sqrt_21, 1); + y0r_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42); + y0r_seven_over_sqrt_21 = _mm256_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14 + + y0i_one_over_sqrt_21 = _mm256_mulhi_epi16(y0i, ONE_OVER_SQRT_42); + y0i_three_over_sqrt_21 = _mm256_mulhi_epi16(y0i, THREE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm256_mulhi_epi16(y0i, FIVE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm256_slli_epi16(y0i_five_over_sqrt_21, 1); + y0i_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42); + y0i_seven_over_sqrt_21 = _mm256_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14 + + + y0_p_7_1 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_7_3 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_7_5 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_7_7 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_5_1 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_5_3 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_5_5 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_5_7 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_3_1 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_3_3 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_3_5 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_3_7 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_1_1 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_1_3 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_1_5 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_1_7 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + + y0_m_1_1 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_1_3 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_1_5 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_1_7 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_3_1 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_3_3 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_3_5 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_3_7 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_5_1 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_5_3 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_5_5 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_5_7 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_7_1 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_7_3 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_7_5 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_7_7 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + + // Detection of interference term + ch_mag_int_with_sigma2 = _mm256_srai_epi16(ch_mag_int, 1); // *2 + two_ch_mag_int_with_sigma2 = ch_mag_int; // *4 + three_ch_mag_int_with_sigma2 = _mm256_adds_epi16(ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2); // *6 + + interference_abs_64qam_epi16(psi_r_p7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + + interference_abs_64qam_epi16(psi_i_p7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + + // Calculation of a group of two terms in the bit metric involving product of psi and interference + prodsum_psi_a_epi16(psi_r_p7_p7, a_r_p7_p7, psi_i_p7_p7, a_i_p7_p7, psi_a_p7_p7); + prodsum_psi_a_epi16(psi_r_p7_p5, a_r_p7_p5, psi_i_p7_p5, a_i_p7_p5, psi_a_p7_p5); + prodsum_psi_a_epi16(psi_r_p7_p3, a_r_p7_p3, psi_i_p7_p3, a_i_p7_p3, psi_a_p7_p3); + prodsum_psi_a_epi16(psi_r_p7_p1, a_r_p7_p1, psi_i_p7_p1, a_i_p7_p1, psi_a_p7_p1); + prodsum_psi_a_epi16(psi_r_p7_m1, a_r_p7_m1, psi_i_p7_m1, a_i_p7_m1, psi_a_p7_m1); + prodsum_psi_a_epi16(psi_r_p7_m3, a_r_p7_m3, psi_i_p7_m3, a_i_p7_m3, psi_a_p7_m3); + prodsum_psi_a_epi16(psi_r_p7_m5, a_r_p7_m5, psi_i_p7_m5, a_i_p7_m5, psi_a_p7_m5); + prodsum_psi_a_epi16(psi_r_p7_m7, a_r_p7_m7, psi_i_p7_m7, a_i_p7_m7, psi_a_p7_m7); + prodsum_psi_a_epi16(psi_r_p5_p7, a_r_p5_p7, psi_i_p5_p7, a_i_p5_p7, psi_a_p5_p7); + prodsum_psi_a_epi16(psi_r_p5_p5, a_r_p5_p5, psi_i_p5_p5, a_i_p5_p5, psi_a_p5_p5); + prodsum_psi_a_epi16(psi_r_p5_p3, a_r_p5_p3, psi_i_p5_p3, a_i_p5_p3, psi_a_p5_p3); + prodsum_psi_a_epi16(psi_r_p5_p1, a_r_p5_p1, psi_i_p5_p1, a_i_p5_p1, psi_a_p5_p1); + prodsum_psi_a_epi16(psi_r_p5_m1, a_r_p5_m1, psi_i_p5_m1, a_i_p5_m1, psi_a_p5_m1); + prodsum_psi_a_epi16(psi_r_p5_m3, a_r_p5_m3, psi_i_p5_m3, a_i_p5_m3, psi_a_p5_m3); + prodsum_psi_a_epi16(psi_r_p5_m5, a_r_p5_m5, psi_i_p5_m5, a_i_p5_m5, psi_a_p5_m5); + prodsum_psi_a_epi16(psi_r_p5_m7, a_r_p5_m7, psi_i_p5_m7, a_i_p5_m7, psi_a_p5_m7); + prodsum_psi_a_epi16(psi_r_p3_p7, a_r_p3_p7, psi_i_p3_p7, a_i_p3_p7, psi_a_p3_p7); + prodsum_psi_a_epi16(psi_r_p3_p5, a_r_p3_p5, psi_i_p3_p5, a_i_p3_p5, psi_a_p3_p5); + prodsum_psi_a_epi16(psi_r_p3_p3, a_r_p3_p3, psi_i_p3_p3, a_i_p3_p3, psi_a_p3_p3); + prodsum_psi_a_epi16(psi_r_p3_p1, a_r_p3_p1, psi_i_p3_p1, a_i_p3_p1, psi_a_p3_p1); + prodsum_psi_a_epi16(psi_r_p3_m1, a_r_p3_m1, psi_i_p3_m1, a_i_p3_m1, psi_a_p3_m1); + prodsum_psi_a_epi16(psi_r_p3_m3, a_r_p3_m3, psi_i_p3_m3, a_i_p3_m3, psi_a_p3_m3); + prodsum_psi_a_epi16(psi_r_p3_m5, a_r_p3_m5, psi_i_p3_m5, a_i_p3_m5, psi_a_p3_m5); + prodsum_psi_a_epi16(psi_r_p3_m7, a_r_p3_m7, psi_i_p3_m7, a_i_p3_m7, psi_a_p3_m7); + prodsum_psi_a_epi16(psi_r_p1_p7, a_r_p1_p7, psi_i_p1_p7, a_i_p1_p7, psi_a_p1_p7); + prodsum_psi_a_epi16(psi_r_p1_p5, a_r_p1_p5, psi_i_p1_p5, a_i_p1_p5, psi_a_p1_p5); + prodsum_psi_a_epi16(psi_r_p1_p3, a_r_p1_p3, psi_i_p1_p3, a_i_p1_p3, psi_a_p1_p3); + prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1); + prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1); + prodsum_psi_a_epi16(psi_r_p1_m3, a_r_p1_m3, psi_i_p1_m3, a_i_p1_m3, psi_a_p1_m3); + prodsum_psi_a_epi16(psi_r_p1_m5, a_r_p1_m5, psi_i_p1_m5, a_i_p1_m5, psi_a_p1_m5); + prodsum_psi_a_epi16(psi_r_p1_m7, a_r_p1_m7, psi_i_p1_m7, a_i_p1_m7, psi_a_p1_m7); + prodsum_psi_a_epi16(psi_r_m1_p7, a_r_m1_p7, psi_i_m1_p7, a_i_m1_p7, psi_a_m1_p7); + prodsum_psi_a_epi16(psi_r_m1_p5, a_r_m1_p5, psi_i_m1_p5, a_i_m1_p5, psi_a_m1_p5); + prodsum_psi_a_epi16(psi_r_m1_p3, a_r_m1_p3, psi_i_m1_p3, a_i_m1_p3, psi_a_m1_p3); + prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1); + prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1); + prodsum_psi_a_epi16(psi_r_m1_m3, a_r_m1_m3, psi_i_m1_m3, a_i_m1_m3, psi_a_m1_m3); + prodsum_psi_a_epi16(psi_r_m1_m5, a_r_m1_m5, psi_i_m1_m5, a_i_m1_m5, psi_a_m1_m5); + prodsum_psi_a_epi16(psi_r_m1_m7, a_r_m1_m7, psi_i_m1_m7, a_i_m1_m7, psi_a_m1_m7); + prodsum_psi_a_epi16(psi_r_m3_p7, a_r_m3_p7, psi_i_m3_p7, a_i_m3_p7, psi_a_m3_p7); + prodsum_psi_a_epi16(psi_r_m3_p5, a_r_m3_p5, psi_i_m3_p5, a_i_m3_p5, psi_a_m3_p5); + prodsum_psi_a_epi16(psi_r_m3_p3, a_r_m3_p3, psi_i_m3_p3, a_i_m3_p3, psi_a_m3_p3); + prodsum_psi_a_epi16(psi_r_m3_p1, a_r_m3_p1, psi_i_m3_p1, a_i_m3_p1, psi_a_m3_p1); + prodsum_psi_a_epi16(psi_r_m3_m1, a_r_m3_m1, psi_i_m3_m1, a_i_m3_m1, psi_a_m3_m1); + prodsum_psi_a_epi16(psi_r_m3_m3, a_r_m3_m3, psi_i_m3_m3, a_i_m3_m3, psi_a_m3_m3); + prodsum_psi_a_epi16(psi_r_m3_m5, a_r_m3_m5, psi_i_m3_m5, a_i_m3_m5, psi_a_m3_m5); + prodsum_psi_a_epi16(psi_r_m3_m7, a_r_m3_m7, psi_i_m3_m7, a_i_m3_m7, psi_a_m3_m7); + prodsum_psi_a_epi16(psi_r_m5_p7, a_r_m5_p7, psi_i_m5_p7, a_i_m5_p7, psi_a_m5_p7); + prodsum_psi_a_epi16(psi_r_m5_p5, a_r_m5_p5, psi_i_m5_p5, a_i_m5_p5, psi_a_m5_p5); + prodsum_psi_a_epi16(psi_r_m5_p3, a_r_m5_p3, psi_i_m5_p3, a_i_m5_p3, psi_a_m5_p3); + prodsum_psi_a_epi16(psi_r_m5_p1, a_r_m5_p1, psi_i_m5_p1, a_i_m5_p1, psi_a_m5_p1); + prodsum_psi_a_epi16(psi_r_m5_m1, a_r_m5_m1, psi_i_m5_m1, a_i_m5_m1, psi_a_m5_m1); + prodsum_psi_a_epi16(psi_r_m5_m3, a_r_m5_m3, psi_i_m5_m3, a_i_m5_m3, psi_a_m5_m3); + prodsum_psi_a_epi16(psi_r_m5_m5, a_r_m5_m5, psi_i_m5_m5, a_i_m5_m5, psi_a_m5_m5); + prodsum_psi_a_epi16(psi_r_m5_m7, a_r_m5_m7, psi_i_m5_m7, a_i_m5_m7, psi_a_m5_m7); + prodsum_psi_a_epi16(psi_r_m7_p7, a_r_m7_p7, psi_i_m7_p7, a_i_m7_p7, psi_a_m7_p7); + prodsum_psi_a_epi16(psi_r_m7_p5, a_r_m7_p5, psi_i_m7_p5, a_i_m7_p5, psi_a_m7_p5); + prodsum_psi_a_epi16(psi_r_m7_p3, a_r_m7_p3, psi_i_m7_p3, a_i_m7_p3, psi_a_m7_p3); + prodsum_psi_a_epi16(psi_r_m7_p1, a_r_m7_p1, psi_i_m7_p1, a_i_m7_p1, psi_a_m7_p1); + prodsum_psi_a_epi16(psi_r_m7_m1, a_r_m7_m1, psi_i_m7_m1, a_i_m7_m1, psi_a_m7_m1); + prodsum_psi_a_epi16(psi_r_m7_m3, a_r_m7_m3, psi_i_m7_m3, a_i_m7_m3, psi_a_m7_m3); + prodsum_psi_a_epi16(psi_r_m7_m5, a_r_m7_m5, psi_i_m7_m5, a_i_m7_m5, psi_a_m7_m5); + prodsum_psi_a_epi16(psi_r_m7_m7, a_r_m7_m7, psi_i_m7_m7, a_i_m7_m7, psi_a_m7_m7); + + // Multiply by sqrt(2) + psi_a_p7_p7 = _mm256_mulhi_epi16(psi_a_p7_p7, ONE_OVER_SQRT_2); + psi_a_p7_p7 = _mm256_slli_epi16(psi_a_p7_p7, 2); + psi_a_p7_p5 = _mm256_mulhi_epi16(psi_a_p7_p5, ONE_OVER_SQRT_2); + psi_a_p7_p5 = _mm256_slli_epi16(psi_a_p7_p5, 2); + psi_a_p7_p3 = _mm256_mulhi_epi16(psi_a_p7_p3, ONE_OVER_SQRT_2); + psi_a_p7_p3 = _mm256_slli_epi16(psi_a_p7_p3, 2); + psi_a_p7_p1 = _mm256_mulhi_epi16(psi_a_p7_p1, ONE_OVER_SQRT_2); + psi_a_p7_p1 = _mm256_slli_epi16(psi_a_p7_p1, 2); + psi_a_p7_m1 = _mm256_mulhi_epi16(psi_a_p7_m1, ONE_OVER_SQRT_2); + psi_a_p7_m1 = _mm256_slli_epi16(psi_a_p7_m1, 2); + psi_a_p7_m3 = _mm256_mulhi_epi16(psi_a_p7_m3, ONE_OVER_SQRT_2); + psi_a_p7_m3 = _mm256_slli_epi16(psi_a_p7_m3, 2); + psi_a_p7_m5 = _mm256_mulhi_epi16(psi_a_p7_m5, ONE_OVER_SQRT_2); + psi_a_p7_m5 = _mm256_slli_epi16(psi_a_p7_m5, 2); + psi_a_p7_m7 = _mm256_mulhi_epi16(psi_a_p7_m7, ONE_OVER_SQRT_2); + psi_a_p7_m7 = _mm256_slli_epi16(psi_a_p7_m7, 2); + psi_a_p5_p7 = _mm256_mulhi_epi16(psi_a_p5_p7, ONE_OVER_SQRT_2); + psi_a_p5_p7 = _mm256_slli_epi16(psi_a_p5_p7, 2); + psi_a_p5_p5 = _mm256_mulhi_epi16(psi_a_p5_p5, ONE_OVER_SQRT_2); + psi_a_p5_p5 = _mm256_slli_epi16(psi_a_p5_p5, 2); + psi_a_p5_p3 = _mm256_mulhi_epi16(psi_a_p5_p3, ONE_OVER_SQRT_2); + psi_a_p5_p3 = _mm256_slli_epi16(psi_a_p5_p3, 2); + psi_a_p5_p1 = _mm256_mulhi_epi16(psi_a_p5_p1, ONE_OVER_SQRT_2); + psi_a_p5_p1 = _mm256_slli_epi16(psi_a_p5_p1, 2); + psi_a_p5_m1 = _mm256_mulhi_epi16(psi_a_p5_m1, ONE_OVER_SQRT_2); + psi_a_p5_m1 = _mm256_slli_epi16(psi_a_p5_m1, 2); + psi_a_p5_m3 = _mm256_mulhi_epi16(psi_a_p5_m3, ONE_OVER_SQRT_2); + psi_a_p5_m3 = _mm256_slli_epi16(psi_a_p5_m3, 2); + psi_a_p5_m5 = _mm256_mulhi_epi16(psi_a_p5_m5, ONE_OVER_SQRT_2); + psi_a_p5_m5 = _mm256_slli_epi16(psi_a_p5_m5, 2); + psi_a_p5_m7 = _mm256_mulhi_epi16(psi_a_p5_m7, ONE_OVER_SQRT_2); + psi_a_p5_m7 = _mm256_slli_epi16(psi_a_p5_m7, 2); + psi_a_p3_p7 = _mm256_mulhi_epi16(psi_a_p3_p7, ONE_OVER_SQRT_2); + psi_a_p3_p7 = _mm256_slli_epi16(psi_a_p3_p7, 2); + psi_a_p3_p5 = _mm256_mulhi_epi16(psi_a_p3_p5, ONE_OVER_SQRT_2); + psi_a_p3_p5 = _mm256_slli_epi16(psi_a_p3_p5, 2); + psi_a_p3_p3 = _mm256_mulhi_epi16(psi_a_p3_p3, ONE_OVER_SQRT_2); + psi_a_p3_p3 = _mm256_slli_epi16(psi_a_p3_p3, 2); + psi_a_p3_p1 = _mm256_mulhi_epi16(psi_a_p3_p1, ONE_OVER_SQRT_2); + psi_a_p3_p1 = _mm256_slli_epi16(psi_a_p3_p1, 2); + psi_a_p3_m1 = _mm256_mulhi_epi16(psi_a_p3_m1, ONE_OVER_SQRT_2); + psi_a_p3_m1 = _mm256_slli_epi16(psi_a_p3_m1, 2); + psi_a_p3_m3 = _mm256_mulhi_epi16(psi_a_p3_m3, ONE_OVER_SQRT_2); + psi_a_p3_m3 = _mm256_slli_epi16(psi_a_p3_m3, 2); + psi_a_p3_m5 = _mm256_mulhi_epi16(psi_a_p3_m5, ONE_OVER_SQRT_2); + psi_a_p3_m5 = _mm256_slli_epi16(psi_a_p3_m5, 2); + psi_a_p3_m7 = _mm256_mulhi_epi16(psi_a_p3_m7, ONE_OVER_SQRT_2); + psi_a_p3_m7 = _mm256_slli_epi16(psi_a_p3_m7, 2); + psi_a_p1_p7 = _mm256_mulhi_epi16(psi_a_p1_p7, ONE_OVER_SQRT_2); + psi_a_p1_p7 = _mm256_slli_epi16(psi_a_p1_p7, 2); + psi_a_p1_p5 = _mm256_mulhi_epi16(psi_a_p1_p5, ONE_OVER_SQRT_2); + psi_a_p1_p5 = _mm256_slli_epi16(psi_a_p1_p5, 2); + psi_a_p1_p3 = _mm256_mulhi_epi16(psi_a_p1_p3, ONE_OVER_SQRT_2); + psi_a_p1_p3 = _mm256_slli_epi16(psi_a_p1_p3, 2); + psi_a_p1_p1 = _mm256_mulhi_epi16(psi_a_p1_p1, ONE_OVER_SQRT_2); + psi_a_p1_p1 = _mm256_slli_epi16(psi_a_p1_p1, 2); + psi_a_p1_m1 = _mm256_mulhi_epi16(psi_a_p1_m1, ONE_OVER_SQRT_2); + psi_a_p1_m1 = _mm256_slli_epi16(psi_a_p1_m1, 2); + psi_a_p1_m3 = _mm256_mulhi_epi16(psi_a_p1_m3, ONE_OVER_SQRT_2); + psi_a_p1_m3 = _mm256_slli_epi16(psi_a_p1_m3, 2); + psi_a_p1_m5 = _mm256_mulhi_epi16(psi_a_p1_m5, ONE_OVER_SQRT_2); + psi_a_p1_m5 = _mm256_slli_epi16(psi_a_p1_m5, 2); + psi_a_p1_m7 = _mm256_mulhi_epi16(psi_a_p1_m7, ONE_OVER_SQRT_2); + psi_a_p1_m7 = _mm256_slli_epi16(psi_a_p1_m7, 2); + psi_a_m1_p7 = _mm256_mulhi_epi16(psi_a_m1_p7, ONE_OVER_SQRT_2); + psi_a_m1_p7 = _mm256_slli_epi16(psi_a_m1_p7, 2); + psi_a_m1_p5 = _mm256_mulhi_epi16(psi_a_m1_p5, ONE_OVER_SQRT_2); + psi_a_m1_p5 = _mm256_slli_epi16(psi_a_m1_p5, 2); + psi_a_m1_p3 = _mm256_mulhi_epi16(psi_a_m1_p3, ONE_OVER_SQRT_2); + psi_a_m1_p3 = _mm256_slli_epi16(psi_a_m1_p3, 2); + psi_a_m1_p1 = _mm256_mulhi_epi16(psi_a_m1_p1, ONE_OVER_SQRT_2); + psi_a_m1_p1 = _mm256_slli_epi16(psi_a_m1_p1, 2); + psi_a_m1_m1 = _mm256_mulhi_epi16(psi_a_m1_m1, ONE_OVER_SQRT_2); + psi_a_m1_m1 = _mm256_slli_epi16(psi_a_m1_m1, 2); + psi_a_m1_m3 = _mm256_mulhi_epi16(psi_a_m1_m3, ONE_OVER_SQRT_2); + psi_a_m1_m3 = _mm256_slli_epi16(psi_a_m1_m3, 2); + psi_a_m1_m5 = _mm256_mulhi_epi16(psi_a_m1_m5, ONE_OVER_SQRT_2); + psi_a_m1_m5 = _mm256_slli_epi16(psi_a_m1_m5, 2); + psi_a_m1_m7 = _mm256_mulhi_epi16(psi_a_m1_m7, ONE_OVER_SQRT_2); + psi_a_m1_m7 = _mm256_slli_epi16(psi_a_m1_m7, 2); + psi_a_m3_p7 = _mm256_mulhi_epi16(psi_a_m3_p7, ONE_OVER_SQRT_2); + psi_a_m3_p7 = _mm256_slli_epi16(psi_a_m3_p7, 2); + psi_a_m3_p5 = _mm256_mulhi_epi16(psi_a_m3_p5, ONE_OVER_SQRT_2); + psi_a_m3_p5 = _mm256_slli_epi16(psi_a_m3_p5, 2); + psi_a_m3_p3 = _mm256_mulhi_epi16(psi_a_m3_p3, ONE_OVER_SQRT_2); + psi_a_m3_p3 = _mm256_slli_epi16(psi_a_m3_p3, 2); + psi_a_m3_p1 = _mm256_mulhi_epi16(psi_a_m3_p1, ONE_OVER_SQRT_2); + psi_a_m3_p1 = _mm256_slli_epi16(psi_a_m3_p1, 2); + psi_a_m3_m1 = _mm256_mulhi_epi16(psi_a_m3_m1, ONE_OVER_SQRT_2); + psi_a_m3_m1 = _mm256_slli_epi16(psi_a_m3_m1, 2); + psi_a_m3_m3 = _mm256_mulhi_epi16(psi_a_m3_m3, ONE_OVER_SQRT_2); + psi_a_m3_m3 = _mm256_slli_epi16(psi_a_m3_m3, 2); + psi_a_m3_m5 = _mm256_mulhi_epi16(psi_a_m3_m5, ONE_OVER_SQRT_2); + psi_a_m3_m5 = _mm256_slli_epi16(psi_a_m3_m5, 2); + psi_a_m3_m7 = _mm256_mulhi_epi16(psi_a_m3_m7, ONE_OVER_SQRT_2); + psi_a_m3_m7 = _mm256_slli_epi16(psi_a_m3_m7, 2); + psi_a_m5_p7 = _mm256_mulhi_epi16(psi_a_m5_p7, ONE_OVER_SQRT_2); + psi_a_m5_p7 = _mm256_slli_epi16(psi_a_m5_p7, 2); + psi_a_m5_p5 = _mm256_mulhi_epi16(psi_a_m5_p5, ONE_OVER_SQRT_2); + psi_a_m5_p5 = _mm256_slli_epi16(psi_a_m5_p5, 2); + psi_a_m5_p3 = _mm256_mulhi_epi16(psi_a_m5_p3, ONE_OVER_SQRT_2); + psi_a_m5_p3 = _mm256_slli_epi16(psi_a_m5_p3, 2); + psi_a_m5_p1 = _mm256_mulhi_epi16(psi_a_m5_p1, ONE_OVER_SQRT_2); + psi_a_m5_p1 = _mm256_slli_epi16(psi_a_m5_p1, 2); + psi_a_m5_m1 = _mm256_mulhi_epi16(psi_a_m5_m1, ONE_OVER_SQRT_2); + psi_a_m5_m1 = _mm256_slli_epi16(psi_a_m5_m1, 2); + psi_a_m5_m3 = _mm256_mulhi_epi16(psi_a_m5_m3, ONE_OVER_SQRT_2); + psi_a_m5_m3 = _mm256_slli_epi16(psi_a_m5_m3, 2); + psi_a_m5_m5 = _mm256_mulhi_epi16(psi_a_m5_m5, ONE_OVER_SQRT_2); + psi_a_m5_m5 = _mm256_slli_epi16(psi_a_m5_m5, 2); + psi_a_m5_m7 = _mm256_mulhi_epi16(psi_a_m5_m7, ONE_OVER_SQRT_2); + psi_a_m5_m7 = _mm256_slli_epi16(psi_a_m5_m7, 2); + psi_a_m7_p7 = _mm256_mulhi_epi16(psi_a_m7_p7, ONE_OVER_SQRT_2); + psi_a_m7_p7 = _mm256_slli_epi16(psi_a_m7_p7, 2); + psi_a_m7_p5 = _mm256_mulhi_epi16(psi_a_m7_p5, ONE_OVER_SQRT_2); + psi_a_m7_p5 = _mm256_slli_epi16(psi_a_m7_p5, 2); + psi_a_m7_p3 = _mm256_mulhi_epi16(psi_a_m7_p3, ONE_OVER_SQRT_2); + psi_a_m7_p3 = _mm256_slli_epi16(psi_a_m7_p3, 2); + psi_a_m7_p1 = _mm256_mulhi_epi16(psi_a_m7_p1, ONE_OVER_SQRT_2); + psi_a_m7_p1 = _mm256_slli_epi16(psi_a_m7_p1, 2); + psi_a_m7_m1 = _mm256_mulhi_epi16(psi_a_m7_m1, ONE_OVER_SQRT_2); + psi_a_m7_m1 = _mm256_slli_epi16(psi_a_m7_m1, 2); + psi_a_m7_m3 = _mm256_mulhi_epi16(psi_a_m7_m3, ONE_OVER_SQRT_2); + psi_a_m7_m3 = _mm256_slli_epi16(psi_a_m7_m3, 2); + psi_a_m7_m5 = _mm256_mulhi_epi16(psi_a_m7_m5, ONE_OVER_SQRT_2); + psi_a_m7_m5 = _mm256_slli_epi16(psi_a_m7_m5, 2); + psi_a_m7_m7 = _mm256_mulhi_epi16(psi_a_m7_m7, ONE_OVER_SQRT_2); + psi_a_m7_m7 = _mm256_slli_epi16(psi_a_m7_m7, 2); + + // Calculation of a group of two terms in the bit metric involving squares of interference + square_a_64qam_epi16(a_r_p7_p7, a_i_p7_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p7); + square_a_64qam_epi16(a_r_p7_p5, a_i_p7_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p5); + square_a_64qam_epi16(a_r_p7_p3, a_i_p7_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p3); + square_a_64qam_epi16(a_r_p7_p1, a_i_p7_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p1); + square_a_64qam_epi16(a_r_p7_m1, a_i_p7_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m1); + square_a_64qam_epi16(a_r_p7_m3, a_i_p7_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m3); + square_a_64qam_epi16(a_r_p7_m5, a_i_p7_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m5); + square_a_64qam_epi16(a_r_p7_m7, a_i_p7_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m7); + square_a_64qam_epi16(a_r_p5_p7, a_i_p5_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p7); + square_a_64qam_epi16(a_r_p5_p5, a_i_p5_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p5); + square_a_64qam_epi16(a_r_p5_p3, a_i_p5_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p3); + square_a_64qam_epi16(a_r_p5_p1, a_i_p5_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p1); + square_a_64qam_epi16(a_r_p5_m1, a_i_p5_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m1); + square_a_64qam_epi16(a_r_p5_m3, a_i_p5_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m3); + square_a_64qam_epi16(a_r_p5_m5, a_i_p5_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m5); + square_a_64qam_epi16(a_r_p5_m7, a_i_p5_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m7); + square_a_64qam_epi16(a_r_p3_p7, a_i_p3_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p7); + square_a_64qam_epi16(a_r_p3_p5, a_i_p3_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p5); + square_a_64qam_epi16(a_r_p3_p3, a_i_p3_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p3); + square_a_64qam_epi16(a_r_p3_p1, a_i_p3_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p1); + square_a_64qam_epi16(a_r_p3_m1, a_i_p3_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m1); + square_a_64qam_epi16(a_r_p3_m3, a_i_p3_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m3); + square_a_64qam_epi16(a_r_p3_m5, a_i_p3_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m5); + square_a_64qam_epi16(a_r_p3_m7, a_i_p3_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m7); + square_a_64qam_epi16(a_r_p1_p7, a_i_p1_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p7); + square_a_64qam_epi16(a_r_p1_p5, a_i_p1_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p5); + square_a_64qam_epi16(a_r_p1_p3, a_i_p1_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p3); + square_a_64qam_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p1); + square_a_64qam_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m1); + square_a_64qam_epi16(a_r_p1_m3, a_i_p1_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m3); + square_a_64qam_epi16(a_r_p1_m5, a_i_p1_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m5); + square_a_64qam_epi16(a_r_p1_m7, a_i_p1_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m7); + square_a_64qam_epi16(a_r_m1_p7, a_i_m1_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p7); + square_a_64qam_epi16(a_r_m1_p5, a_i_m1_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p5); + square_a_64qam_epi16(a_r_m1_p3, a_i_m1_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p3); + square_a_64qam_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p1); + square_a_64qam_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m1); + square_a_64qam_epi16(a_r_m1_m3, a_i_m1_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m3); + square_a_64qam_epi16(a_r_m1_m5, a_i_m1_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m5); + square_a_64qam_epi16(a_r_m1_m7, a_i_m1_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m7); + square_a_64qam_epi16(a_r_m3_p7, a_i_m3_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p7); + square_a_64qam_epi16(a_r_m3_p5, a_i_m3_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p5); + square_a_64qam_epi16(a_r_m3_p3, a_i_m3_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p3); + square_a_64qam_epi16(a_r_m3_p1, a_i_m3_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p1); + square_a_64qam_epi16(a_r_m3_m1, a_i_m3_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m1); + square_a_64qam_epi16(a_r_m3_m3, a_i_m3_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m3); + square_a_64qam_epi16(a_r_m3_m5, a_i_m3_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m5); + square_a_64qam_epi16(a_r_m3_m7, a_i_m3_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m7); + square_a_64qam_epi16(a_r_m5_p7, a_i_m5_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p7); + square_a_64qam_epi16(a_r_m5_p5, a_i_m5_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p5); + square_a_64qam_epi16(a_r_m5_p3, a_i_m5_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p3); + square_a_64qam_epi16(a_r_m5_p1, a_i_m5_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p1); + square_a_64qam_epi16(a_r_m5_m1, a_i_m5_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m1); + square_a_64qam_epi16(a_r_m5_m3, a_i_m5_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m3); + square_a_64qam_epi16(a_r_m5_m5, a_i_m5_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m5); + square_a_64qam_epi16(a_r_m5_m7, a_i_m5_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m7); + square_a_64qam_epi16(a_r_m7_p7, a_i_m7_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p7); + square_a_64qam_epi16(a_r_m7_p5, a_i_m7_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p5); + square_a_64qam_epi16(a_r_m7_p3, a_i_m7_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p3); + square_a_64qam_epi16(a_r_m7_p1, a_i_m7_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p1); + square_a_64qam_epi16(a_r_m7_m1, a_i_m7_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m1); + square_a_64qam_epi16(a_r_m7_m3, a_i_m7_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m3); + square_a_64qam_epi16(a_r_m7_m5, a_i_m7_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m5); + square_a_64qam_epi16(a_r_m7_m7, a_i_m7_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m7); + + // Computing different multiples of ||h0||^2 + // x=1, y=1 + ch_mag_2_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42); + ch_mag_2_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_2_over_42_with_sigma2,1); + // x=1, y=3 + ch_mag_10_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42); + ch_mag_10_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_10_over_42_with_sigma2,1); + // x=1, x=5 + ch_mag_26_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42); + ch_mag_26_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_26_over_42_with_sigma2,1); + // x=1, y=7 + ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=3, y=3 + ch_mag_18_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42); + ch_mag_18_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_18_over_42_with_sigma2,1); + // x=3, y=5 + ch_mag_34_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42); + ch_mag_34_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_34_over_42_with_sigma2,1); + // x=3, y=7 + ch_mag_58_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42); + ch_mag_58_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_58_over_42_with_sigma2,2); + // x=5, y=5 + ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=5, y=7 + ch_mag_74_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42); + ch_mag_74_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_74_over_42_with_sigma2,2); + // x=7, y=7 + ch_mag_98_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42); + ch_mag_98_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_98_over_42_with_sigma2,2); + + // Computing Metrics + xmm0 = _mm256_subs_epi16(psi_a_p7_p7, a_sq_p7_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_7); + bit_met_p7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p5, a_sq_p7_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_5); + bit_met_p7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p3, a_sq_p7_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_3); + bit_met_p7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p1, a_sq_p7_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_1); + bit_met_p7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m1, a_sq_p7_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_1); + bit_met_p7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m3, a_sq_p7_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_3); + bit_met_p7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m5, a_sq_p7_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_5); + bit_met_p7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m7, a_sq_p7_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_7); + bit_met_p7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p7, a_sq_p5_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_7); + bit_met_p5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p5, a_sq_p5_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_5); + bit_met_p5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p3, a_sq_p5_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_3); + bit_met_p5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p1, a_sq_p5_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_1); + bit_met_p5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m1, a_sq_p5_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_1); + bit_met_p5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m3, a_sq_p5_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_3); + bit_met_p5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m5, a_sq_p5_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_5); + bit_met_p5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m7, a_sq_p5_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_7); + bit_met_p5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p7, a_sq_p3_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_7); + bit_met_p3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p5, a_sq_p3_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_5); + bit_met_p3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p3, a_sq_p3_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_3); + bit_met_p3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p1, a_sq_p3_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_1); + bit_met_p3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m1, a_sq_p3_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_1); + bit_met_p3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m3, a_sq_p3_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_3); + bit_met_p3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m5, a_sq_p3_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_5); + bit_met_p3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m7, a_sq_p3_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_7); + bit_met_p3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p7, a_sq_p1_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_7); + bit_met_p1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p5, a_sq_p1_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_5); + bit_met_p1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p3, a_sq_p1_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_3); + bit_met_p1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p1, a_sq_p1_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_1); + bit_met_p1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m1, a_sq_p1_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_1); + bit_met_p1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m3, a_sq_p1_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_3); + bit_met_p1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m5, a_sq_p1_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_5); + bit_met_p1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m7, a_sq_p1_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_7); + bit_met_p1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + + xmm0 = _mm256_subs_epi16(psi_a_m1_p7, a_sq_m1_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_7); + bit_met_m1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p5, a_sq_m1_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_5); + bit_met_m1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p3, a_sq_m1_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_3); + bit_met_m1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p1, a_sq_m1_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_1); + bit_met_m1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m1, a_sq_m1_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_1); + bit_met_m1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m3, a_sq_m1_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_3); + bit_met_m1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m5, a_sq_m1_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_5); + bit_met_m1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m7, a_sq_m1_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_7); + bit_met_m1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p7, a_sq_m3_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_7); + bit_met_m3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p5, a_sq_m3_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_5); + bit_met_m3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p3, a_sq_m3_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_3); + bit_met_m3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p1, a_sq_m3_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_1); + bit_met_m3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m1, a_sq_m3_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_1); + bit_met_m3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m3, a_sq_m3_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_3); + bit_met_m3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m5, a_sq_m3_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_5); + bit_met_m3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m7, a_sq_m3_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_7); + bit_met_m3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p7, a_sq_m5_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_7); + bit_met_m5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p5, a_sq_m5_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_5); + bit_met_m5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p3, a_sq_m5_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_3); + bit_met_m5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p1, a_sq_m5_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_1); + bit_met_m5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m1, a_sq_m5_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_1); + bit_met_m5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m3, a_sq_m5_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_3); + bit_met_m5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m5, a_sq_m5_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_5); + bit_met_m5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m7, a_sq_m5_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_7); + bit_met_m5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p7, a_sq_m7_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_7); + bit_met_m7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p5, a_sq_m7_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_5); + bit_met_m7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p3, a_sq_m7_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_3); + bit_met_m7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p1, a_sq_m7_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_1); + bit_met_m7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m1, a_sq_m7_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_1); + bit_met_m7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m3, a_sq_m7_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_3); + bit_met_m7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m5, a_sq_m7_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_5); + bit_met_m7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m7, a_sq_m7_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_7); + bit_met_m7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + + // Detection for 1st bit (LTE mapping) + // bit = 1 + xmm0 = _mm256_max_epi16(bit_met_m7_p7, bit_met_m7_p5); + xmm1 = _mm256_max_epi16(bit_met_m7_p3, bit_met_m7_p1); + xmm2 = _mm256_max_epi16(bit_met_m7_m1, bit_met_m7_m3); + xmm3 = _mm256_max_epi16(bit_met_m7_m5, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m5_p5); + xmm1 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m5_p1); + xmm2 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m5_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m5_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m3_p7, bit_met_m3_p5); + xmm1 = _mm256_max_epi16(bit_met_m3_p3, bit_met_m3_p1); + xmm2 = _mm256_max_epi16(bit_met_m3_m1, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m3_m5, bit_met_m3_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m1_p5); + xmm1 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m1_m3); + xmm3 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m1_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p7_p5); + xmm1 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p7_p1); + xmm2 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p7_m3); + xmm3 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_p7, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p5_p3, bit_met_p5_p1); + xmm2 = _mm256_max_epi16(bit_met_p5_m1, bit_met_p5_m3); + xmm3 = _mm256_max_epi16(bit_met_p5_m5, bit_met_p5_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p3_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p3_p1); + xmm2 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p3_m3); + xmm3 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p3_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_p7, bit_met_p1_p5); + xmm1 = _mm256_max_epi16(bit_met_p1_p3, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_p1_m1, bit_met_p1_m3); + xmm3 = _mm256_max_epi16(bit_met_p1_m5, bit_met_p1_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y0r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 2nd bit (LTE mapping) + // bit = 1 + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y1r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 3rd bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y2r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 4th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y0i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + + // Detection for 5th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y1i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 6th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y2i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs + // RE 1 + j = 48*i; + stream0_out[j + 0] = ((short *)&y0r)[0]; + stream0_out[j + 1] = ((short *)&y1r)[0]; + stream0_out[j + 2] = ((short *)&y2r)[0]; + stream0_out[j + 3] = ((short *)&y0i)[0]; + stream0_out[j + 4] = ((short *)&y1i)[0]; + stream0_out[j + 5] = ((short *)&y2i)[0]; + // RE 2 + stream0_out[j + 6] = ((short *)&y0r)[1]; + stream0_out[j + 7] = ((short *)&y1r)[1]; + stream0_out[j + 8] = ((short *)&y2r)[1]; + stream0_out[j + 9] = ((short *)&y0i)[1]; + stream0_out[j + 10] = ((short *)&y1i)[1]; + stream0_out[j + 11] = ((short *)&y2i)[1]; + // RE 3 + stream0_out[j + 12] = ((short *)&y0r)[2]; + stream0_out[j + 13] = ((short *)&y1r)[2]; + stream0_out[j + 14] = ((short *)&y2r)[2]; + stream0_out[j + 15] = ((short *)&y0i)[2]; + stream0_out[j + 16] = ((short *)&y1i)[2]; + stream0_out[j + 17] = ((short *)&y2i)[2]; + // RE 4 + stream0_out[j + 18] = ((short *)&y0r)[3]; + stream0_out[j + 19] = ((short *)&y1r)[3]; + stream0_out[j + 20] = ((short *)&y2r)[3]; + stream0_out[j + 21] = ((short *)&y0i)[3]; + stream0_out[j + 22] = ((short *)&y1i)[3]; + stream0_out[j + 23] = ((short *)&y2i)[3]; + // RE 5 + stream0_out[j + 24] = ((short *)&y0r)[4]; + stream0_out[j + 25] = ((short *)&y1r)[4]; + stream0_out[j + 26] = ((short *)&y2r)[4]; + stream0_out[j + 27] = ((short *)&y0i)[4]; + stream0_out[j + 28] = ((short *)&y1i)[4]; + stream0_out[j + 29] = ((short *)&y2i)[4]; + // RE 6 + stream0_out[j + 30] = ((short *)&y0r)[5]; + stream0_out[j + 31] = ((short *)&y1r)[5]; + stream0_out[j + 32] = ((short *)&y2r)[5]; + stream0_out[j + 33] = ((short *)&y0i)[5]; + stream0_out[j + 34] = ((short *)&y1i)[5]; + stream0_out[j + 35] = ((short *)&y2i)[5]; + // RE 7 + stream0_out[j + 36] = ((short *)&y0r)[6]; + stream0_out[j + 37] = ((short *)&y1r)[6]; + stream0_out[j + 38] = ((short *)&y2r)[6]; + stream0_out[j + 39] = ((short *)&y0i)[6]; + stream0_out[j + 40] = ((short *)&y1i)[6]; + stream0_out[j + 41] = ((short *)&y2i)[6]; + // RE 8 + stream0_out[j + 42] = ((short *)&y0r)[7]; + stream0_out[j + 43] = ((short *)&y1r)[7]; + stream0_out[j + 44] = ((short *)&y2r)[7]; + stream0_out[j + 45] = ((short *)&y0i)[7]; + stream0_out[j + 46] = ((short *)&y1i)[7]; + stream0_out[j + 47] = ((short *)&y2i)[7]; + + // RE 9 + stream0_out[j + 48] = ((short *)&y0r)[8]; + stream0_out[j + 49] = ((short *)&y1r)[8]; + stream0_out[j + 50] = ((short *)&y2r)[8]; + stream0_out[j + 51] = ((short *)&y0i)[8]; + stream0_out[j + 52] = ((short *)&y1i)[8]; + stream0_out[j + 53] = ((short *)&y2i)[8]; + // RE 10 + stream0_out[j + 54] = ((short *)&y0r)[9]; + stream0_out[j + 55] = ((short *)&y1r)[9]; + stream0_out[j + 56] = ((short *)&y2r)[9]; + stream0_out[j + 57] = ((short *)&y0i)[9]; + stream0_out[j + 58] = ((short *)&y1i)[9]; + stream0_out[j + 59] = ((short *)&y2i)[9]; + // RE 11 + stream0_out[j + 60] = ((short *)&y0r)[10]; + stream0_out[j + 61] = ((short *)&y1r)[10]; + stream0_out[j + 62] = ((short *)&y2r)[10]; + stream0_out[j + 63] = ((short *)&y0i)[10]; + stream0_out[j + 64] = ((short *)&y1i)[10]; + stream0_out[j + 65] = ((short *)&y2i)[10]; + // RE 12 + stream0_out[j + 66] = ((short *)&y0r)[11]; + stream0_out[j + 67] = ((short *)&y1r)[11]; + stream0_out[j + 68] = ((short *)&y2r)[11]; + stream0_out[j + 69] = ((short *)&y0i)[11]; + stream0_out[j + 70] = ((short *)&y1i)[11]; + stream0_out[j + 71] = ((short *)&y2i)[11]; + // RE 13 + stream0_out[j + 72] = ((short *)&y0r)[12]; + stream0_out[j + 73] = ((short *)&y1r)[12]; + stream0_out[j + 74] = ((short *)&y2r)[12]; + stream0_out[j + 75] = ((short *)&y0i)[12]; + stream0_out[j + 76] = ((short *)&y1i)[12]; + stream0_out[j + 77] = ((short *)&y2i)[12]; + // RE 14 + stream0_out[j + 78] = ((short *)&y0r)[13]; + stream0_out[j + 79] = ((short *)&y1r)[13]; + stream0_out[j + 80] = ((short *)&y2r)[13]; + stream0_out[j + 81] = ((short *)&y0i)[13]; + stream0_out[j + 82] = ((short *)&y1i)[13]; + stream0_out[j + 83] = ((short *)&y2i)[13]; + // RE 15 + stream0_out[j + 84] = ((short *)&y0r)[14]; + stream0_out[j + 85] = ((short *)&y1r)[14]; + stream0_out[j + 86] = ((short *)&y2r)[14]; + stream0_out[j + 87] = ((short *)&y0i)[14]; + stream0_out[j + 88] = ((short *)&y1i)[14]; + stream0_out[j + 89] = ((short *)&y2i)[14]; + // RE 16 + stream0_out[j + 90] = ((short *)&y0r)[15]; + stream0_out[j + 91] = ((short *)&y1r)[15]; + stream0_out[j + 92] = ((short *)&y2r)[15]; + stream0_out[j + 93] = ((short *)&y0i)[15]; + stream0_out[j + 94] = ((short *)&y1i)[15]; + stream0_out[j + 95] = ((short *)&y2i)[15]; + +#elif defined(__arm__) + +#endif + + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif +} diff --git a/openair1/PHY/LTE_TRANSPORT/drs_modulation.c b/openair1/PHY/LTE_TRANSPORT/drs_modulation.c new file mode 100644 index 0000000000000000000000000000000000000000..a0bde219b57706e59b5d5746f8408bc1307f46be --- /dev/null +++ b/openair1/PHY/LTE_TRANSPORT/drs_modulation.c @@ -0,0 +1,173 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +/*! \file PHY/LTE_TRANSPORT/drs_modulation.c +* \brief Top-level routines for generating the Demodulation Reference Signals from 36-211, V8.6 2009-03 +* \author R. Knopp, F. Kaltenberger, A. Bhamri +* \date 2011 +* \version 0.1 +* \company Eurecom +* \email: knopp@eurecom.fr,florian.kaltenberger@eurecom.fr,ankit.bhamri@eurecom.fr +* \note +* \warning +*/ +#include "PHY/defs.h" +#include "PHY/extern.h" +#include "PHY/sse_intrin.h" +//#define DEBUG_DRS + +int generate_drs_pusch(PHY_VARS_UE *ue, + UE_rxtx_proc_t *proc, + uint8_t eNB_id, + short amp, + unsigned int subframe, + unsigned int first_rb, + unsigned int nb_rb, + uint8_t ant) +{ + + uint16_t k,l,Msc_RS,Msc_RS_idx,rb,drs_offset; + uint16_t * Msc_idx_ptr; + int subframe_offset,re_offset,symbol_offset; + + //uint32_t phase_shift; // phase shift for cyclic delay in DM RS + //uint8_t alpha_ind; + + int16_t alpha_re[12] = {32767, 28377, 16383, 0,-16384, -28378,-32768,-28378,-16384, -1, 16383, 28377}; + int16_t alpha_im[12] = {0, 16383, 28377, 32767, 28377, 16383, 0,-16384,-28378,-32768,-28378,-16384}; + + uint8_t cyclic_shift,cyclic_shift0,cyclic_shift1; + LTE_DL_FRAME_PARMS *frame_parms = &ue->frame_parms; + int32_t *txdataF = ue->common_vars.txdataF[ant]; + uint32_t u,v,alpha_ind; + uint32_t u0=frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.grouphop[subframe<<1]; + uint32_t u1=frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.grouphop[1+(subframe<<1)]; + uint32_t v0=frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.seqhop[subframe<<1]; + uint32_t v1=frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.seqhop[1+(subframe<<1)]; + int32_t ref_re,ref_im; + uint8_t harq_pid = subframe2harq_pid(frame_parms,proc->frame_tx,subframe); + + cyclic_shift0 = (frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift + + ue->ulsch[eNB_id]->harq_processes[harq_pid]->n_DMRS2 + + frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.nPRS[subframe<<1]+ + ((ue->ulsch[0]->cooperation_flag==2)?10:0)+ + ant*6) % 12; + // printf("PUSCH.cyclicShift %d, n_DMRS2 %d, nPRS %d\n",frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift,ue->ulsch[eNB_id]->n_DMRS2,ue->lte_frame_parms.pusch_config_common.ul_ReferenceSignalsPUSCH.nPRS[subframe<<1]); + cyclic_shift1 = (frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift + + ue->ulsch[eNB_id]->harq_processes[harq_pid]->n_DMRS2 + + frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.nPRS[(subframe<<1)+1]+ + ((ue->ulsch[0]->cooperation_flag==2)?10:0)+ + ant*6) % 12; + + // cyclic_shift0 = 0; + // cyclic_shift1 = 0; + Msc_RS = 12*nb_rb; + + Msc_idx_ptr = (uint16_t*) bsearch(&Msc_RS, dftsizes, 33, sizeof(uint16_t), compareints); + + if (Msc_idx_ptr) + Msc_RS_idx = Msc_idx_ptr - dftsizes; + else { + LOG_I(PHY,"generate_drs_pusch: index for Msc_RS=%d not found\n",Msc_RS); + return(-1); + } + + for (l = (3 - frame_parms->Ncp),u=u0,v=v0,cyclic_shift=cyclic_shift0; + l<frame_parms->symbols_per_tti; + l += (7 - frame_parms->Ncp),u=u1,v=v1,cyclic_shift=cyclic_shift1) { + + drs_offset = 0; +#ifdef DEBUG_DRS + printf("drs_modulation: Msc_RS = %d, Msc_RS_idx = %d, u=%d,v=%d\n",Msc_RS, Msc_RS_idx,u,v); +#endif + + + re_offset = frame_parms->first_carrier_offset; + subframe_offset = subframe*frame_parms->symbols_per_tti*frame_parms->ofdm_symbol_size; + symbol_offset = subframe_offset + frame_parms->ofdm_symbol_size*l; + + +#ifdef DEBUG_DRS + printf("generate_drs_pusch: symbol_offset %d, subframe offset %d, cyclic shift %d\n",symbol_offset,subframe_offset,cyclic_shift); +#endif + alpha_ind = 0; + + for (rb=0; rb<frame_parms->N_RB_UL; rb++) { + + if ((rb >= first_rb) && (rb<(first_rb+nb_rb))) { + +#ifdef DEBUG_DRS + printf("generate_drs_pusch: doing RB %d, re_offset=%d, drs_offset=%d,cyclic shift %d\n",rb,re_offset,drs_offset,cyclic_shift); +#endif + + for (k=0; k<12; k++) { + ref_re = (int32_t) ul_ref_sigs[u][v][Msc_RS_idx][drs_offset<<1]; + ref_im = (int32_t) ul_ref_sigs[u][v][Msc_RS_idx][(drs_offset<<1)+1]; + + ((int16_t*) txdataF)[2*(symbol_offset + re_offset)] = (int16_t) (((ref_re*alpha_re[alpha_ind]) - + (ref_im*alpha_im[alpha_ind]))>>15); + ((int16_t*) txdataF)[2*(symbol_offset + re_offset)+1] = (int16_t) (((ref_re*alpha_im[alpha_ind]) + + (ref_im*alpha_re[alpha_ind]))>>15); + ((short*) txdataF)[2*(symbol_offset + re_offset)] = (short) ((((short*) txdataF)[2*(symbol_offset + re_offset)]*(int32_t)amp)>>15); + ((short*) txdataF)[2*(symbol_offset + re_offset)+1] = (short) ((((short*) txdataF)[2*(symbol_offset + re_offset)+1]*(int32_t)amp)>>15); + + + alpha_ind = (alpha_ind + cyclic_shift); + + if (alpha_ind > 11) + alpha_ind-=12; + +#ifdef DEBUG_DRS + printf("symbol_offset %d, alpha_ind %d , re_offset %d : (%d,%d)\n", + symbol_offset, + alpha_ind, + re_offset, + ((short*) txdataF)[2*(symbol_offset + re_offset)], + ((short*) txdataF)[2*(symbol_offset + re_offset)+1]); + +#endif // DEBUG_DRS + re_offset++; + drs_offset++; + + if (re_offset >= frame_parms->ofdm_symbol_size) + re_offset = 0; + } + + } else { + re_offset+=12; // go to next RB + + // check if we crossed the symbol boundary and skip DC + + if (re_offset >= frame_parms->ofdm_symbol_size) { + if (frame_parms->N_RB_DL&1) // odd number of RBs + re_offset=6; + else // even number of RBs (doesn't straddle DC) + re_offset=0; + } + + + } + } + } + + return(0); +} + diff --git a/openair1/PHY/LTE_TRANSPORT/ulsch_coding.c b/openair1/PHY/LTE_TRANSPORT/ulsch_coding.c new file mode 100644 index 0000000000000000000000000000000000000000..09420f48ab1a7029fdbbb2015fd7f28f47ffcd8c --- /dev/null +++ b/openair1/PHY/LTE_TRANSPORT/ulsch_coding.c @@ -0,0 +1,933 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +/*! \file PHY/LTE_TRANSPORT/ulsch_coding.c +* \brief Top-level routines for coding the ULSCH transport channel as described in 36.212 V8.6 2009-03 +* \author R. Knopp +* \date 2011 +* \version 0.1 +* \company Eurecom +* \email: knopp@eurecom.fr +* \note +* \warning +*/ + +#include "PHY/defs.h" +#include "PHY/extern.h" + +#include "PHY/CODING/defs.h" +#include "PHY/CODING/extern.h" +#include "PHY/CODING/lte_interleaver_inline.h" +#include "PHY/LTE_TRANSPORT/defs.h" +#include "defs.h" +#include "extern.h" +#include "SIMULATION/ETH_TRANSPORT/extern.h" +#include "UTIL/LOG/vcd_signal_dumper.h" + +//#define DEBUG_ULSCH_CODING +//#define DEBUG_ULSCH_FREE 1 + +/* +#define is_not_pilot(pilots,first_pilot,re) (pilots==0) || \ + ((pilots==1)&&(first_pilot==1)&&(((re>2)&&(re<6))||((re>8)&&(re<12)))) || \ + ((pilots==1)&&(first_pilot==0)&&(((re<3))||((re>5)&&(re<9)))) \ +*/ +#define is_not_pilot(pilots,first_pilot,re) (1) + + + + +void free_ue_ulsch(LTE_UE_ULSCH_t *ulsch) +{ + int i; + int r; + + if (ulsch) { +#ifdef DEBUG_ULSCH_FREE + printf("Freeing ulsch %p\n",ulsch); +#endif + + for (i=0; i<8; i++) { + if (ulsch->harq_processes[i]) { + if (ulsch->harq_processes[i]->b) { + free16(ulsch->harq_processes[i]->b,MAX_ULSCH_PAYLOAD_BYTES); + ulsch->harq_processes[i]->b = NULL; + } + for (r=0; r<MAX_NUM_ULSCH_SEGMENTS; r++) { + if (ulsch->harq_processes[i]->c[r]) { + free16(ulsch->harq_processes[i]->c[r],((r==0)?8:0) + 3+768); + ulsch->harq_processes[i]->c[r] = NULL; + } + } + + free16(ulsch->harq_processes[i],sizeof(LTE_UL_UE_HARQ_t)); + ulsch->harq_processes[i] = NULL; + } + } + free16(ulsch,sizeof(LTE_UE_ULSCH_t)); + ulsch = NULL; + } + +} + +LTE_UE_ULSCH_t *new_ue_ulsch(unsigned char N_RB_UL, uint8_t abstraction_flag) +{ + + LTE_UE_ULSCH_t *ulsch; + unsigned char exit_flag = 0,i,j,r; + unsigned char bw_scaling =1; + + switch (N_RB_UL) { + case 6: + bw_scaling =16; + break; + + case 25: + bw_scaling =4; + break; + + case 50: + bw_scaling =2; + break; + + default: + bw_scaling =1; + break; + } + + ulsch = (LTE_UE_ULSCH_t *)malloc16(sizeof(LTE_UE_ULSCH_t)); + + if (ulsch) { + memset(ulsch,0,sizeof(LTE_UE_ULSCH_t)); + + ulsch->Mlimit = 4; + + for (i=0; i<8; i++) { + + ulsch->harq_processes[i] = (LTE_UL_UE_HARQ_t *)malloc16(sizeof(LTE_UL_UE_HARQ_t)); + + // printf("ulsch->harq_processes[%d] %p\n",i,ulsch->harq_processes[i]); + if (ulsch->harq_processes[i]) { + memset(ulsch->harq_processes[i], 0, sizeof(LTE_UL_UE_HARQ_t)); + ulsch->harq_processes[i]->b = (unsigned char*)malloc16(MAX_ULSCH_PAYLOAD_BYTES/bw_scaling); + + if (ulsch->harq_processes[i]->b) + memset(ulsch->harq_processes[i]->b,0,MAX_ULSCH_PAYLOAD_BYTES/bw_scaling); + else { + LOG_E(PHY,"Can't get b\n"); + exit_flag=1; + } + + if (abstraction_flag==0) { + for (r=0; r<MAX_NUM_ULSCH_SEGMENTS; r++) { + ulsch->harq_processes[i]->c[r] = (unsigned char*)malloc16(((r==0)?8:0) + 3+768); // account for filler in first segment and CRCs for multiple segment case + + if (ulsch->harq_processes[i]->c[r]) + memset(ulsch->harq_processes[i]->c[r],0,((r==0)?8:0) + 3+768); + else { + LOG_E(PHY,"Can't get c\n"); + exit_flag=2; + } + } + } + + ulsch->harq_processes[i]->subframe_scheduling_flag = 0; + ulsch->harq_processes[i]->first_tx = 1; + } else { + LOG_E(PHY,"Can't get harq_p %d\n",i); + exit_flag=3; + } + } + + if ((abstraction_flag == 0) && (exit_flag==0)) { + for (i=0; i<8; i++) + for (j=0; j<96; j++) + for (r=0; r<MAX_NUM_ULSCH_SEGMENTS; r++) + ulsch->harq_processes[i]->d[r][j] = LTE_NULL; + + return(ulsch); + } else if (abstraction_flag==1) + return(ulsch); + } + + LOG_E(PHY,"new_ue_ulsch exit flag, size of %d , %zu\n",exit_flag, sizeof(LTE_UE_ULSCH_t)); + free_ue_ulsch(ulsch); + return(NULL); + + +} + + +uint32_t ulsch_encoding(uint8_t *a, + PHY_VARS_UE *ue, + uint8_t harq_pid, + uint8_t eNB_id, + uint8_t subframe_rx, + uint8_t tmode, + uint8_t control_only_flag, + uint8_t Nbundled) +{ + + time_stats_t *seg_stats=&ue->ulsch_segmentation_stats; + time_stats_t *rm_stats=&ue->ulsch_rate_matching_stats; + time_stats_t *te_stats=&ue->ulsch_turbo_encoding_stats; + time_stats_t *i_stats=&ue->ulsch_interleaving_stats; + time_stats_t *m_stats=&ue->ulsch_multiplexing_stats; + + // uint16_t offset; + uint32_t crc=1; + uint16_t iind; + uint32_t A; + uint8_t Q_m=0; + uint32_t Kr=0,Kr_bytes,r,r_offset=0; + uint8_t y[6*14*1200],*yptr;; + uint8_t *columnset; + uint32_t sumKr=0; + uint32_t Qprime,L,G,Q_CQI=0,Q_RI=0,Q_ACK=0,H=0,Hprime=0,Hpp=0,Cmux=0,Rmux=0,Rmux_prime=0; + uint32_t Qprime_ACK=0,Qprime_CQI=0,Qprime_RI=0,len_ACK=0,len_RI=0; + // uint32_t E; + uint8_t ack_parity; + uint32_t i,q,j,iprime,j2; + uint16_t o_RCC; + uint8_t o_flip[8]; + uint32_t wACK_idx; + LTE_DL_FRAME_PARMS *frame_parms=&ue->frame_parms; + PHY_MEASUREMENTS *meas = &ue->measurements; + LTE_UE_ULSCH_t *ulsch=ue->ulsch[eNB_id]; + LTE_UE_DLSCH_t **dlsch = ue->dlsch[0][eNB_id]; + uint16_t rnti = 0xffff; + + if (!ulsch) { + LOG_E(PHY,"Null ulsch ptr %p\n",ulsch); + return(-1); + } + + if (harq_pid >= 8) { + LOG_E(PHY,"Illegal harq_pid %d\n",harq_pid); + return(-1); + } + + if (ulsch->harq_processes[harq_pid]->O_ACK > 2) { + LOG_E(PHY,"Illegal O_ACK %d\n",ulsch->harq_processes[harq_pid]->O_ACK); + return(-1); + } + + if (ulsch->O_RI > 1) { + LOG_E(PHY,"Illegal O_RI %d\n",ulsch->O_RI); + return(-1); + } + + VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_IN); + + // fill CQI/PMI information + if (ulsch->O>0) { + VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING_FILL_CQI, VCD_FUNCTION_IN); + rnti = ue->pdcch_vars[ue->current_thread_id[subframe_rx]][eNB_id]->crnti; + fill_CQI(ulsch,meas,0,harq_pid,ue->frame_parms.N_RB_DL,rnti, tmode,ue->sinr_eff); + + LOG_D(PHY,"ULSCH Encoding rnti %x \n", rnti); + print_CQI(ulsch->o,ulsch->uci_format,0,ue->frame_parms.N_RB_DL); + + // save PUSCH pmi for later (transmission modes 4,5,6) + if (dlsch[0]) { + //LOG_I(PHY,"XXX saving pmi for DL %x\n",pmi2hex_2Ar1(((wideband_cqi_rank1_2A_5MHz *)ulsch->o)->pmi)); + dlsch[0]->pmi_alloc = ((wideband_cqi_rank1_2A_5MHz *)ulsch->o)->pmi; + } + VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING_FILL_CQI, VCD_FUNCTION_OUT); + } + + if (ulsch->O<=32) { + o_flip[0] = ulsch->o[3]; + o_flip[1] = ulsch->o[2]; + o_flip[2] = ulsch->o[1]; + o_flip[3] = ulsch->o[0]; + } else { + o_flip[0] = ulsch->o[7]; + o_flip[1] = ulsch->o[6]; + o_flip[2] = ulsch->o[5]; + o_flip[3] = ulsch->o[4]; + o_flip[4] = ulsch->o[3]; + o_flip[5] = ulsch->o[2]; + o_flip[6] = ulsch->o[1]; + o_flip[7] = ulsch->o[0]; + } + + if (control_only_flag == 0) { + A=ulsch->harq_processes[harq_pid]->TBS; + Q_m = get_Qm_ul(ulsch->harq_processes[harq_pid]->mcs); + + ulsch->harq_processes[harq_pid]->control_only = 0; + +#ifdef DEBUG_ULSCH_CODING + printf("[PHY][UE] ULSCH coding : A %d, Qm %d, mcs %d, harq_pid %d, round %d, RV %d\n", + ulsch->harq_processes[harq_pid]->TBS, + Q_m, + ulsch->harq_processes[harq_pid]->mcs, + harq_pid, + ulsch->harq_processes[harq_pid]->round, + ulsch->harq_processes[harq_pid]->rvidx); + + for (i=0; i<ulsch->harq_processes[harq_pid]->O_ACK; i++) + printf("ulsch_coding: o_ACK[%d] %d\n",i,ulsch->o_ACK[i]); + + for (i=0; i<ulsch->O_RI; i++) + printf("ulsch_coding: o_RI[%d] %d\n",i,ulsch->o_RI[i]); + + printf("ulsch_coding: O=%d\n",ulsch->O); + + for (i=0; i<1+((8+ulsch->O)/8); i++) { + // ulsch->o[i] = i; + printf("ulsch_coding: O[%d] %d\n",i,ulsch->o[i]); + } + + if ((tmode != 4)) + print_CQI(ulsch->o,wideband_cqi_rank1_2A,0,ue->frame_parms.N_RB_DL); + else + print_CQI(ulsch->o,HLC_subband_cqi_rank1_2A,0,ue->frame_parms.N_RB_DL); + +#endif + + if (ulsch->harq_processes[harq_pid]->round == 0) { // this is a new packet + + start_meas(seg_stats); + // Add 24-bit crc (polynomial A) to payload + crc = crc24a(a, + A)>>8; + + a[A>>3] = ((uint8_t*)&crc)[2]; + a[1+(A>>3)] = ((uint8_t*)&crc)[1]; + a[2+(A>>3)] = ((uint8_t*)&crc)[0]; + + ulsch->harq_processes[harq_pid]->B = A+24; + ulsch->harq_processes[harq_pid]->b = a; + lte_segmentation(ulsch->harq_processes[harq_pid]->b, + ulsch->harq_processes[harq_pid]->c, + ulsch->harq_processes[harq_pid]->B, + &ulsch->harq_processes[harq_pid]->C, + &ulsch->harq_processes[harq_pid]->Cplus, + &ulsch->harq_processes[harq_pid]->Cminus, + &ulsch->harq_processes[harq_pid]->Kplus, + &ulsch->harq_processes[harq_pid]->Kminus, + &ulsch->harq_processes[harq_pid]->F); + + stop_meas(seg_stats); + + for (r=0; r<ulsch->harq_processes[harq_pid]->C; r++) { + if (r<ulsch->harq_processes[harq_pid]->Cminus) + Kr = ulsch->harq_processes[harq_pid]->Kminus; + else + Kr = ulsch->harq_processes[harq_pid]->Kplus; + + Kr_bytes = Kr>>3; + + // get interleaver index for Turbo code (lookup in Table 5.1.3-3 36-212, V8.6 2009-03, p. 13-14) + if (Kr_bytes<=64) + iind = (Kr_bytes-5); + else if (Kr_bytes <=128) + iind = 59 + ((Kr_bytes-64)>>1); + else if (Kr_bytes <= 256) + iind = 91 + ((Kr_bytes-128)>>2); + else if (Kr_bytes <= 768) + iind = 123 + ((Kr_bytes-256)>>3); + else { + LOG_E(PHY,"ulsch_coding: Illegal codeword size %d!!!\n",Kr_bytes); + VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT); + return(-1); + } + + +#ifdef DEBUG_ULSCH_CODING + printf("Generating Code Segment %d (%d bits)\n",r,Kr); + // generate codewords + + printf("bits_per_codeword (Kr)= %d\n",Kr); + printf("N_RB = %d\n",ulsch->harq_processes[harq_pid]->nb_rb); + printf("Ncp %d\n",frame_parms->Ncp); + printf("Qm %d\n",Q_m); +#endif + + // offset=0; + + +#ifdef DEBUG_ULSCH_CODING + printf("Encoding ... iind %d f1 %d, f2 %d\n",iind,f1f2mat_old[iind*2],f1f2mat_old[(iind*2)+1]); +#endif + start_meas(te_stats); + encoder(ulsch->harq_processes[harq_pid]->c[r], + Kr>>3, + &ulsch->harq_processes[harq_pid]->d[r][96], + (r==0) ? ulsch->harq_processes[harq_pid]->F : 0, + f1f2mat_old[iind*2], // f1 (see 36212-820, page 14) + f1f2mat_old[(iind*2)+1] // f2 (see 36212-820, page 14) + ); + stop_meas(te_stats); +#ifdef DEBUG_ULSCH_CODING + + if (r==0) + write_output("enc_output0.m","enc0",&ulsch->harq_processes[harq_pid]->d[r][96],(3*8*Kr_bytes)+12,1,4); + +#endif + start_meas(i_stats); + ulsch->harq_processes[harq_pid]->RTC[r] = + sub_block_interleaving_turbo(4+(Kr_bytes*8), + &ulsch->harq_processes[harq_pid]->d[r][96], + ulsch->harq_processes[harq_pid]->w[r]); + stop_meas(i_stats); + } + + } + + if (ulsch->harq_processes[harq_pid]->C == 0) { + LOG_E(PHY,"null segment\n"); + VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT); + return(-1); + } + + sumKr = 0; + + for (r=0; r<ulsch->harq_processes[harq_pid]->C; r++) { + if (r<ulsch->harq_processes[harq_pid]->Cminus) + Kr = ulsch->harq_processes[harq_pid]->Kminus; + else + Kr = ulsch->harq_processes[harq_pid]->Kplus; + + sumKr += Kr; + } + } else { // This is a control-only PUSCH, set sumKr to O_CQI-MIN + ulsch->harq_processes[harq_pid]->control_only = 1; + sumKr = ulsch->O_CQI_MIN; + } + + ulsch->harq_processes[harq_pid]->sumKr = sumKr; + // Compute Q_ri (p. 23 36-212) + + Qprime = ulsch->O_RI*ulsch->harq_processes[harq_pid]->Msc_initial*ulsch->harq_processes[harq_pid]->Nsymb_initial * ulsch->beta_offset_ri_times8; + + if (Qprime > 0) { + if ((Qprime % (8*sumKr)) > 0) + Qprime = 1+(Qprime/(8*sumKr)); + else + Qprime = Qprime/(8*sumKr); + + if (Qprime > 4*ulsch->harq_processes[harq_pid]->nb_rb * 12) + Qprime = 4*ulsch->harq_processes[harq_pid]->nb_rb * 12; + } + + Q_RI = Q_m*Qprime; + Qprime_RI = Qprime; + + // Compute Q_ack (p. 23 36-212) + Qprime = ulsch->harq_processes[harq_pid]->O_ACK*ulsch->harq_processes[harq_pid]->Msc_initial*ulsch->harq_processes[harq_pid]->Nsymb_initial * ulsch->beta_offset_harqack_times8; + + if (Qprime > 0) { + if ((Qprime % (8*sumKr)) > 0) + Qprime = 1+(Qprime/(8*sumKr)); + else + Qprime = Qprime/(8*sumKr); + + if (Qprime > 4*ulsch->harq_processes[harq_pid]->nb_rb * 12) + Qprime = 4*ulsch->harq_processes[harq_pid]->nb_rb * 12; + } + + Q_ACK = Qprime * Q_m; + Qprime_ACK = Qprime; + + LOG_D(PHY,"UE (%x/%d) O_ACK %d, Mcs_initial %d, Nsymb_initial %d, beta_offset_harqack*8 %d, sum Kr %d, Qprime_ACK %d, Q_ACK %d\n", + rnti, harq_pid, + ulsch->harq_processes[harq_pid]->O_ACK, + ulsch->harq_processes[harq_pid]->Msc_initial, + ulsch->harq_processes[harq_pid]->Nsymb_initial, + ulsch->beta_offset_harqack_times8, + sumKr, + Qprime_ACK, + Q_ACK); + + // Compute Q_cqi, assume O>11, p. 26 36-212 + if (control_only_flag == 0) { + + if (ulsch->O < 12) + L=0; + else + L=8; + + if (ulsch->O > 0) + Qprime = (ulsch->O + L) * ulsch->harq_processes[harq_pid]->Msc_initial*ulsch->harq_processes[harq_pid]->Nsymb_initial * ulsch->beta_offset_cqi_times8; + else + Qprime = 0; + + if (Qprime > 0) { + if ((Qprime % (8*sumKr)) > 0) + Qprime = 1+(Qprime/(8*sumKr)); + else + Qprime = Qprime/(8*sumKr); + } + + G = ulsch->harq_processes[harq_pid]->nb_rb * (12 * Q_m) * (ulsch->Nsymb_pusch); + + if (Qprime > (G - ulsch->O_RI)) + Qprime = G - ulsch->O_RI; + + Q_CQI = Q_m * Qprime; + Qprime_CQI = Qprime; + + + + G = G - Q_RI - Q_CQI; + ulsch->harq_processes[harq_pid]->G = G; + +/* + LOG_I(PHY,"ULSCH Encoding G %d, Q_RI %d (O_RI%d, Msc_initial %d, Nsymb_initial%d, beta_offset_ri_times8 %d), Q_CQI %d, Q_ACK %d \n",G,Q_RI,ulsch->O_RI,ulsch->harq_processes[harq_pid]->Msc_initial,ulsch->harq_processes[harq_pid]->Nsymb_initial,ulsch->beta_offset_ri_times8,Q_CQI,Q_ACK); + + LOG_I(PHY,"ULSCH Encoding (Nid_cell %d, rnti %x): harq_pid %d round %d, RV %d, mcs %d, O_RI %d, O_ACK %d, G %d\n", + frame_parms->Nid_cell,ulsch->rnti, + harq_pid, + ulsch->harq_processes[harq_pid]->round, + ulsch->harq_processes[harq_pid]->rvidx, + ulsch->harq_processes[harq_pid]->mcs, + ulsch->O_RI, + ulsch->harq_processes[harq_pid]->O_ACK, + G); +*/ + + if ((int)G < 0) { + LOG_E(PHY,"FATAL: ulsch_coding.c G < 0 (%d) : Q_RI %d, Q_CQI %d, O %d, betaCQI_times8 %d)\n",G,Q_RI,Q_CQI,ulsch->O,ulsch->beta_offset_cqi_times8); + VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT); + return(-1); + } + + + // Data and control multiplexing (5.2.2.7 36-212) + + H = G + Q_CQI; + Hprime = H/Q_m; + + + + // Fill in the "e"-sequence from 36-212, V8.6 2009-03, p. 16-17 (for each "e") and concatenate the + // outputs for each code segment, see Section 5.1.5 p.20 + + for (r=0; r<ulsch->harq_processes[harq_pid]->C; r++) { +#ifdef DEBUG_ULSCH_CODING + printf("Rate Matching, Code segment %d (coded bits (G) %d,unpunctured/repeated bits per code segment %d,mod_order %d, nb_rb %d)...\n", + r, + G, + Kr*3, + Q_m,ulsch->harq_processes[harq_pid]->nb_rb); +#endif + + start_meas(rm_stats); + r_offset += lte_rate_matching_turbo(ulsch->harq_processes[harq_pid]->RTC[r], + G, + ulsch->harq_processes[harq_pid]->w[r], + ulsch->e+r_offset, + ulsch->harq_processes[harq_pid]->C, // C + NSOFT, // Nsoft, + 0, // this means UL + 1, + ulsch->harq_processes[harq_pid]->rvidx, + get_Qm_ul(ulsch->harq_processes[harq_pid]->mcs), + 1, + r, + ulsch->harq_processes[harq_pid]->nb_rb); + //ulsch->harq_processes[harq_pid]->mcs); // r + stop_meas(rm_stats); +#ifdef DEBUG_ULSCH_CODING + + if (r==ulsch->harq_processes[harq_pid]->C-1) + write_output("enc_output.m","enc",ulsch->e,r_offset,1,4); + +#endif + } + } else { //control-only PUSCH + Q_CQI = (ulsch->harq_processes[harq_pid]->nb_rb * (12 * Q_m) * (ulsch->Nsymb_pusch)) - Q_RI; + H = Q_CQI; + Hprime = H/Q_m; + } + + + // Do CQI coding + if ((ulsch->O>1) && (ulsch->O < 12)) { + LOG_E(PHY,"short CQI sizes not supported yet\n"); + VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT); + return(-1); + } else { + // add 8-bit CRC + crc = crc8(o_flip, + ulsch->O)>>24; +#ifdef DEBUG_ULSCH_CODING + printf("crc(cqi) tx : %x\n",crc); +#endif + memset((void *)&ulsch->o_d[0],LTE_NULL,96); + + ccodelte_encode(ulsch->O, + 1, + o_flip, + &ulsch->o_d[96], + 0); + + + o_RCC = sub_block_interleaving_cc(8+ulsch->O, + &ulsch->o_d[96], + ulsch->o_w); + + lte_rate_matching_cc(o_RCC, + Q_CQI, + ulsch->o_w, + ulsch->q); + + } + + i=0; + + // Do RI coding + if (ulsch->O_RI == 1) { + switch (Q_m) { + case 2: + ulsch->q_RI[0] = ulsch->o_RI[0]; + ulsch->q_RI[1] = PUSCH_y;//ulsch->o_RI[0]; + len_RI=2; + break; + + case 4: + ulsch->q_RI[0] = ulsch->o_RI[0]; + ulsch->q_RI[1] = PUSCH_y;//1; + ulsch->q_RI[2] = PUSCH_x;//ulsch->o_RI[0]; + ulsch->q_RI[3] = PUSCH_x;//1; + len_RI=4; + break; + + case 6: + ulsch->q_RI[0] = ulsch->o_RI[0]; + ulsch->q_RI[1] = PUSCH_y;//1; + ulsch->q_RI[2] = PUSCH_x;//1; + ulsch->q_RI[3] = PUSCH_x;//ulsch->o_RI[0]; + ulsch->q_RI[4] = PUSCH_x;//1; + ulsch->q_RI[5] = PUSCH_x;//1; + len_RI=6; + break; + } + } else if (ulsch->O_RI>1) { + LOG_E(PHY,"RI cannot be more than 1 bit yet\n"); + VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT); + return(-1); + } + + // Do ACK coding, Section 5.2.2.6 36.213 (p.23-24 in v8.6) + wACK_idx = (ulsch->bundling==0) ? 4 : ((Nbundled-1)&3); +#ifdef DEBUG_ULSCH_CODING + printf("ulsch_coding.c: Bundling %d, Nbundled %d, wACK_idx %d\n", + ulsch->bundling,Nbundled,wACK_idx); +#endif + + // 1-bit ACK/NAK + if (ulsch->harq_processes[harq_pid]->O_ACK == 1) { + switch (Q_m) { + case 2: + ulsch->q_ACK[0] = (ulsch->o_ACK[0]+wACK[wACK_idx][0])&1; + ulsch->q_ACK[1] = (ulsch->bundling==0)? PUSCH_y : ((ulsch->o_ACK[0]+wACK[wACK_idx][1])&1);//ulsch->o_ACK[0]; + len_ACK = 2; + break; + + case 4: + ulsch->q_ACK[0] = (ulsch->o_ACK[0]+wACK[wACK_idx][0])&1; + ulsch->q_ACK[1] = (ulsch->bundling==0)? PUSCH_y : ((ulsch->o_ACK[0]+wACK[wACK_idx][1])&1); + ulsch->q_ACK[2] = PUSCH_x; + ulsch->q_ACK[3] = PUSCH_x; + len_ACK = 4; + break; + + case 6: + ulsch->q_ACK[0] = (ulsch->o_ACK[0]+wACK[wACK_idx][0])&1; + ulsch->q_ACK[1] = (ulsch->bundling==0)? PUSCH_y : ((ulsch->o_ACK[0]+wACK[wACK_idx][1])&1); + ulsch->q_ACK[2] = PUSCH_x; + ulsch->q_ACK[3] = PUSCH_x; + ulsch->q_ACK[4] = PUSCH_x; + ulsch->q_ACK[6] = PUSCH_x; + len_ACK = 6; + break; + } + } + + // two-bit ACK/NAK + if (ulsch->harq_processes[harq_pid]->O_ACK == 2) { + ack_parity = (ulsch->o_ACK[0]+ulsch->o_ACK[1])&1; + + switch (Q_m) { + case 2: + ulsch->q_ACK[0] = (ulsch->o_ACK[0]+wACK[wACK_idx][0])&1; + ulsch->q_ACK[1] = (ulsch->o_ACK[1]+wACK[wACK_idx][0])&1; + ulsch->q_ACK[2] = (ack_parity+wACK[wACK_idx][0])&1; + ulsch->q_ACK[3] = (ulsch->o_ACK[0]+wACK[wACK_idx][1])&1; + ulsch->q_ACK[4] = (ulsch->o_ACK[1]+wACK[wACK_idx][1])&1; + ulsch->q_ACK[5] = (ack_parity+wACK[wACK_idx][1])&1; + len_ACK = 6; + break; + + case 4: + ulsch->q_ACK[0] = (ulsch->o_ACK[0]+wACK[wACK_idx][0])&1; + ulsch->q_ACK[1] = (ulsch->o_ACK[1]+wACK[wACK_idx][0])&1; + ulsch->q_ACK[2] = PUSCH_x; + ulsch->q_ACK[3] = PUSCH_x;//1; + ulsch->q_ACK[4] = (ack_parity+wACK[wACK_idx][0])&1; + ulsch->q_ACK[5] = (ulsch->o_ACK[0]+wACK[wACK_idx][1])&1; + ulsch->q_ACK[6] = PUSCH_x; + ulsch->q_ACK[7] = PUSCH_x;//1; + ulsch->q_ACK[8] = (ulsch->o_ACK[1]+wACK[wACK_idx][1])&1; + ulsch->q_ACK[9] = (ack_parity+wACK[wACK_idx][1])&1; + ulsch->q_ACK[10] = PUSCH_x; + ulsch->q_ACK[11] = PUSCH_x;//1; + len_ACK = 12; + break; + + case 6: + ulsch->q_ACK[0] = (ulsch->o_ACK[0]+wACK[wACK_idx][0])&1; + ulsch->q_ACK[1] = (ulsch->o_ACK[1]+wACK[wACK_idx][0])&1; + ulsch->q_ACK[2] = PUSCH_x; + ulsch->q_ACK[3] = PUSCH_x; + ulsch->q_ACK[4] = PUSCH_x; + ulsch->q_ACK[5] = PUSCH_x; + + ulsch->q_ACK[6] = (ack_parity+wACK[wACK_idx][0])&1; + ulsch->q_ACK[7] = (ulsch->o_ACK[0]+wACK[wACK_idx][1])&1; + ulsch->q_ACK[8] = PUSCH_x; + ulsch->q_ACK[9] = PUSCH_x; + ulsch->q_ACK[10] = PUSCH_x; + ulsch->q_ACK[11] = PUSCH_x; + + ulsch->q_ACK[12] = (ulsch->o_ACK[1]+wACK[wACK_idx][1])&1; + ulsch->q_ACK[13] = (ack_parity+wACK[wACK_idx][1])&1; + ulsch->q_ACK[14] = PUSCH_x; + ulsch->q_ACK[15] = PUSCH_x; + ulsch->q_ACK[16] = PUSCH_x; + ulsch->q_ACK[17] = PUSCH_x; + len_ACK = 18; + + break; + } + } + + if (ulsch->harq_processes[harq_pid]->O_ACK > 2) { + LOG_E(PHY,"ACK cannot be more than 2 bits yet\n"); + VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT); + return(-1); + } + + + // channel multiplexing/interleaving + + start_meas(m_stats); + Hpp = Hprime + Q_RI; + + Cmux = ulsch->Nsymb_pusch; + Rmux = Hpp*Q_m/Cmux; + Rmux_prime = Rmux/Q_m; + + Qprime_RI = Q_RI / Q_m; + Qprime_ACK = Q_ACK / Q_m; + Qprime_CQI = Q_CQI / Q_m; + + // printf("Qprime_CQI = %d\n",Qprime_CQI); + // RI BITS + + memset(y,LTE_NULL,Q_m*Hpp); + + if (frame_parms->Ncp == 0) + columnset = cs_ri_normal; + else + columnset = cs_ri_extended; + + j=0; + + for (i=0; i<Qprime_RI; i++) { + r = Rmux_prime - 1 - (i>>2); + + for (q=0; q<Q_m; q++) { + y[q+(Q_m*((r*Cmux) + columnset[j]))] = ulsch->q_RI[(q+(Q_m*i))%len_RI]; + // printf("ri[%d] %d => y[%d]\n",q+(Q_m*i)%len_RI,ulsch->q_RI[(q+(Q_m*i))%len_RI],q+(Q_m*((r*Cmux) + columnset[j])),y[q+(Q_m*((r*Cmux) + columnset[j]))]); + } + + j=(j+3)&3; + + } + + + // CQI and Data bits + j=0; + /* + for (i=0,iprime=-Qprime_CQI;i<Hprime;i++,iprime++) { + + while (y[Q_m*j] != LTE_NULL) j++; + + if (i<Qprime_CQI) { + for (q=0;q<Q_m;q++) { + y[q+(Q_m*j)] = ulsch->q[q+(Q_m*i)]; + //printf("cqi[%d] %d => y[%d]\n",q+(Q_m*i),ulsch->q[q+(Q_m*i)],q+(Q_m*j)); + } + } + else { + for (q=0;q<Q_m;q++) { + y[q+(Q_m*j)] = ulsch->e[q+(Q_m*iprime)]; + // printf("e[%d] %d => y[%d]\n",q+(Q_m*iprime),ulsch->e[q+(Q_m*iprime)],q+(Q_m*j)); + } + } + j++; + } + */ + + for (i=0; i<Qprime_CQI; i++) { + + while (y[Q_m*j] != LTE_NULL) j++; + + for (q=0; q<Q_m; q++) { + y[q+(Q_m*j)] = ulsch->q[q+(Q_m*i)]; + // printf("cqi[%d] %d => y[%d] (j %d)\n",q+(Q_m*i),ulsch->q[q+(Q_m*i)],q+(Q_m*j),j); + } + + j++; + } + + j2 = j*Q_m; + + switch (Q_m) { + + case 2: + + for (iprime=0; iprime<(Hprime-Qprime_CQI)<<1; iprime+=2) { + while (y[j2] != LTE_NULL) j2+=2; + + y[j2] = ulsch->e[iprime]; + y[1+j2] = ulsch->e[1+iprime]; + j2+=2; + } + + break; + + case 4: + for (iprime=0; iprime<(Hprime-Qprime_CQI)<<2; iprime+=4) { + while (y[j2] != LTE_NULL) j2+=4; + + y[j2] = ulsch->e[iprime]; + y[1+j2] = ulsch->e[1+iprime]; + y[2+j2] = ulsch->e[2+iprime]; + y[3+j2] = ulsch->e[3+iprime]; + j2+=4; + } + + break; + + case 6: + for (iprime=0; iprime<(Hprime-Qprime_CQI)*6; iprime+=6) { + while (y[j2] != LTE_NULL) j2+=6; + + y[j2] = ulsch->e[iprime]; + y[1+j2] = ulsch->e[1+iprime]; + y[2+j2] = ulsch->e[2+iprime]; + y[3+j2] = ulsch->e[3+iprime]; + y[4+j2] = ulsch->e[4+iprime]; + y[5+j2] = ulsch->e[5+iprime]; + j2+=6; + } + + break; + + } + + // HARQ-ACK Bits (Note these overwrite some bits) + + if (frame_parms->Ncp == 0) + columnset = cs_ack_normal; + else + columnset = cs_ack_extended; + + j=0; + + for (i=0; i<Qprime_ACK; i++) { + r = Rmux_prime - 1 - (i>>2); + + for (q=0; q<Q_m; q++) { + y[q+(Q_m*((r*Cmux) + columnset[j]))] = ulsch->q_ACK[(q+(Q_m*i))%len_ACK]; +#ifdef DEBUG_ULSCH_CODING + printf("ulsch_coding.c: ACK %d => y[%d]=%d (i %d, r*Cmux %d, columnset %d)\n",q+(Q_m*i), + q+(Q_m*((r*Cmux) + columnset[j])),ulsch->q_ACK[(q+(Q_m*i))%len_ACK], + i,r*Cmux,columnset[j]); +#endif + } + + j=(j+3)&3; + + } + + // write out buffer + j=0; + + switch (Q_m) { + case 2: + for (i=0; i<Cmux; i++) + for (r=0; r<Rmux_prime; r++) { + yptr=&y[((r*Cmux)+i)<<1]; + ulsch->h[j++] = *yptr++; + ulsch->h[j++] = *yptr++; + } + + break; + + case 4: + for (i=0; i<Cmux; i++) + for (r=0; r<Rmux_prime; r++) { + yptr = &y[((r*Cmux)+i)<<2]; + ulsch->h[j++] = *yptr++; + ulsch->h[j++] = *yptr++; + ulsch->h[j++] = *yptr++; + ulsch->h[j++] = *yptr++; + } + + break; + + case 6: + for (i=0; i<Cmux; i++) + for (r=0; r<Rmux_prime; r++) { + yptr = &y[((r*Cmux)+i)*6]; + ulsch->h[j++] = *yptr++; + ulsch->h[j++] = *yptr++; + ulsch->h[j++] = *yptr++; + ulsch->h[j++] = *yptr++; + ulsch->h[j++] = *yptr++; + ulsch->h[j++] = *yptr++; + } + + break; + + default: + break; + } + + stop_meas(m_stats); + + if (j!=(H+Q_RI)) { + LOG_E(PHY,"Error in output buffer length (j %d, H+Q_RI %d)\n",j,H+Q_RI); + VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT); + return(-1); + } + + VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_ENCODING, VCD_FUNCTION_OUT); + return(0); +} + diff --git a/openair1/PHY/LTE_TRANSPORT/ulsch_modulation.c b/openair1/PHY/LTE_TRANSPORT/ulsch_modulation.c new file mode 100644 index 0000000000000000000000000000000000000000..d1718f6e90f600927f5d93f3a18e93213bea86f8 --- /dev/null +++ b/openair1/PHY/LTE_TRANSPORT/ulsch_modulation.c @@ -0,0 +1,777 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +/*! \file PHY/LTE_TRANSPORT/ulsch_modulation.c +* \brief Top-level routines for generating PUSCH physical channel from 36.211 V8.6 2009-03 +* \author R. Knopp, F. Kaltenberger, A. Bhamri +* \date 2011 +* \version 0.1 +* \company Eurecom +* \email: knopp@eurecom.fr,florian.kaltenberger@eurecom.fr,ankit.bhamri@eurecom.fr +* \note +* \warning +*/ +#include "PHY/defs.h" +#include "PHY/extern.h" +#include "PHY/CODING/defs.h" +#include "PHY/CODING/extern.h" +#include "PHY/LTE_TRANSPORT/defs.h" +#include "defs.h" +#include "UTIL/LOG/vcd_signal_dumper.h" + + + +//#define DEBUG_ULSCH_MODULATION + +#ifndef OFDMA_ULSCH +void dft_lte(int32_t *z,int32_t *d, int32_t Msc_PUSCH, uint8_t Nsymb) +{ + +#if defined(__x86_64__) || defined(__i386__) + __m128i dft_in128[4][1200],dft_out128[4][1200]; +#elif defined(__arm__) + int16x8_t dft_in128[4][1200],dft_out128[4][1200]; +#endif + uint32_t *dft_in0=(uint32_t*)dft_in128[0],*dft_out0=(uint32_t*)dft_out128[0]; + uint32_t *dft_in1=(uint32_t*)dft_in128[1],*dft_out1=(uint32_t*)dft_out128[1]; + uint32_t *dft_in2=(uint32_t*)dft_in128[2],*dft_out2=(uint32_t*)dft_out128[2]; + // uint32_t *dft_in3=(uint32_t*)dft_in128[3],*dft_out3=(uint32_t*)dft_out128[3]; + + uint32_t *d0,*d1,*d2,*d3,*d4,*d5,*d6,*d7,*d8,*d9,*d10,*d11; + + uint32_t *z0,*z1,*z2,*z3,*z4,*z5,*z6,*z7,*z8,*z9,*z10,*z11; + uint32_t i,ip; +#if defined(__x86_64__) || defined(__i386__) + __m128i norm128; +#elif defined(__arm__) + int16x8_t norm128; +#endif + // printf("Doing lte_dft for Msc_PUSCH %d\n",Msc_PUSCH); + + d0 = (uint32_t *)d; + d1 = d0+Msc_PUSCH; + d2 = d1+Msc_PUSCH; + d3 = d2+Msc_PUSCH; + d4 = d3+Msc_PUSCH; + d5 = d4+Msc_PUSCH; + d6 = d5+Msc_PUSCH; + d7 = d6+Msc_PUSCH; + d8 = d7+Msc_PUSCH; + d9 = d8+Msc_PUSCH; + d10 = d9+Msc_PUSCH; + d11 = d10+Msc_PUSCH; + + // printf("symbol 0 (d0 %p, d %p)\n",d0,d); + for (i=0,ip=0; i<Msc_PUSCH; i++,ip+=4) { + dft_in0[ip] = d0[i]; + dft_in0[ip+1] = d1[i]; + dft_in0[ip+2] = d2[i]; + dft_in0[ip+3] = d3[i]; + dft_in1[ip] = d4[i]; + dft_in1[ip+1] = d5[i]; + dft_in1[ip+2] = d6[i]; + dft_in1[ip+3] = d7[i]; + dft_in2[ip] = d8[i]; + dft_in2[ip+1] = d9[i]; + dft_in2[ip+2] = d10[i]; + dft_in2[ip+3] = d11[i]; + // printf("dft%d %d: %d,%d,%d,%d\n",Msc_PUSCH,ip,d0[i],d1[i],d2[i],d3[i]); + + // dft_in_re2[ip+1] = d9[i]; + // dft_in_re2[ip+2] = d10[i]; + } + + // printf("\n"); + + switch (Msc_PUSCH) { + case 12: + dft12((int16_t *)dft_in0,(int16_t *)dft_out0); + dft12((int16_t *)dft_in1,(int16_t *)dft_out1); + dft12((int16_t *)dft_in2,(int16_t *)dft_out2); + + /* + dft12f(&((__m128i *)dft_in0)[0],&((__m128i *)dft_in0)[1],&((__m128i *)dft_in0)[2],&((__m128i *)dft_in0)[3],&((__m128i *)dft_in0)[4],&((__m128i *)dft_in0)[5],&((__m128i *)dft_in0)[6],&((__m128i *)dft_in0)[7],&((__m128i *)dft_in0)[8],&((__m128i *)dft_in0)[9],&((__m128i *)dft_in0)[10],&((__m128i *)dft_in0)[11], + &((__m128i *)dft_out0)[0],&((__m128i *)dft_out0)[1],&((__m128i *)dft_out0)[2],&((__m128i *)dft_out0)[3],&((__m128i *)dft_out0)[4],&((__m128i *)dft_out0)[5],&((__m128i *)dft_out0)[6],&((__m128i *)dft_out0)[7],&((__m128i *)dft_out0)[8],&((__m128i *)dft_out0)[9],&((__m128i *)dft_out0)[10],&((__m128i *)dft_out0)[11]); + + dft12f(&((__m128i *)dft_in1)[0],&((__m128i *)dft_in1)[1],&((__m128i *)dft_in1)[2],&((__m128i *)dft_in1)[3],&((__m128i *)dft_in1)[4],&((__m128i *)dft_in1)[5],&((__m128i *)dft_in1)[6],&((__m128i *)dft_in1)[7],&((__m128i *)dft_in1)[8],&((__m128i *)dft_in1)[9],&((__m128i *)dft_in1)[10],&((__m128i *)dft_in1)[11], + &((__m128i *)dft_out1)[0],&((__m128i *)dft_out1)[1],&((__m128i *)dft_out1)[2],&((__m128i *)dft_out1)[3],&((__m128i *)dft_out1)[4],&((__m128i *)dft_out1)[5],&((__m128i *)dft_out1)[6],&((__m128i *)dft_out1)[7],&((__m128i *)dft_out1)[8],&((__m128i *)dft_out1)[9],&((__m128i *)dft_out1)[10],&((__m128i *)dft_out1)[11]); + + dft12f(&((__m128i *)dft_in2)[0],&((__m128i *)dft_in2)[1],&((__m128i *)dft_in2)[2],&((__m128i *)dft_in2)[3],&((__m128i *)dft_in2)[4],&((__m128i *)dft_in2)[5],&((__m128i *)dft_in2)[6],&((__m128i *)dft_in2)[7],&((__m128i *)dft_in2)[8],&((__m128i *)dft_in2)[9],&((__m128i *)dft_in2)[10],&((__m128i *)dft_in2)[11], + &((__m128i *)dft_out2)[0],&((__m128i *)dft_out2)[1],&((__m128i *)dft_out2)[2],&((__m128i *)dft_out2)[3],&((__m128i *)dft_out2)[4],&((__m128i *)dft_out2)[5],&((__m128i *)dft_out2)[6],&((__m128i *)dft_out2)[7],&((__m128i *)dft_out2)[8],&((__m128i *)dft_out2)[9],&((__m128i *)dft_out2)[10],&((__m128i *)dft_out2)[11]); + */ +#if defined(__x86_64__) || defined(__i386__) + norm128 = _mm_set1_epi16(9459); +#elif defined(__arm__) + norm128 = vdupq_n_s16(9459); +#endif + for (i=0; i<12; i++) { +#if defined(__x86_64__) || defined(__i386__) + ((__m128i*)dft_out0)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i*)dft_out0)[i],norm128),1); + ((__m128i*)dft_out1)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i*)dft_out1)[i],norm128),1); + ((__m128i*)dft_out2)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i*)dft_out2)[i],norm128),1); +#elif defined(__arm__) + ((int16x8_t*)dft_out0)[i] = vqdmulhq_s16(((int16x8_t*)dft_out0)[i],norm128); + ((int16x8_t*)dft_out1)[i] = vqdmulhq_s16(((int16x8_t*)dft_out1)[i],norm128); + ((int16x8_t*)dft_out2)[i] = vqdmulhq_s16(((int16x8_t*)dft_out2)[i],norm128); +#endif + } + + break; + + case 24: + dft24((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft24((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft24((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 36: + dft36((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft36((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft36((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 48: + dft48((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft48((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft48((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 60: + dft60((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft60((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft60((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 72: + dft72((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft72((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft72((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 96: + dft96((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft96((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft96((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 108: + dft108((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft108((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft108((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 120: + dft120((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft120((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft120((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 144: + dft144((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft144((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft144((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 180: + dft180((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft180((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft180((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 192: + dft192((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft192((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft192((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 216: + dft216((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft216((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft216((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 240: + dft240((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft240((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft240((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 288: + dft288((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft288((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft288((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 300: + dft300((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft300((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft300((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 324: + dft324((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft324((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft324((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 360: + dft360((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft360((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft360((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 384: + dft384((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft384((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft384((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 432: + dft432((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft432((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft432((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 480: + dft480((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft480((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft480((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 540: + dft540((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft540((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft540((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 576: + dft576((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft576((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft576((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 600: + dft600((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft600((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft600((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 648: + dft648((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft648((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft648((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 720: + dft720((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft720((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft720((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 864: + dft864((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft864((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft864((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 900: + dft900((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft900((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft900((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 960: + dft960((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft960((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft960((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 972: + dft972((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft972((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft972((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 1080: + dft1080((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft1080((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft1080((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 1152: + dft1152((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft1152((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft1152((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + + case 1200: + dft1200((int16_t*)dft_in0,(int16_t*)dft_out0,1); + dft1200((int16_t*)dft_in1,(int16_t*)dft_out1,1); + dft1200((int16_t*)dft_in2,(int16_t*)dft_out2,1); + break; + } + + z0 = (uint32_t *)z; + z1 = z0+Msc_PUSCH; + z2 = z1+Msc_PUSCH; + z3 = z2+Msc_PUSCH; + z4 = z3+Msc_PUSCH; + z5 = z4+Msc_PUSCH; + z6 = z5+Msc_PUSCH; + z7 = z6+Msc_PUSCH; + z8 = z7+Msc_PUSCH; + z9 = z8+Msc_PUSCH; + z10 = z9+Msc_PUSCH; + z11 = z10+Msc_PUSCH; + + // printf("symbol0 (dft)\n"); + for (i=0,ip=0; i<Msc_PUSCH; i++,ip+=4) { + z0[i] = dft_out0[ip]; + // printf("%d,%d,",((short*)&z0[i])[0],((short*)&z0[i])[1]); + z1[i] = dft_out0[ip+1]; + z2[i] = dft_out0[ip+2]; + z3[i] = dft_out0[ip+3]; + z4[i] = dft_out1[ip+0]; + z5[i] = dft_out1[ip+1]; + z6[i] = dft_out1[ip+2]; + z7[i] = dft_out1[ip+3]; + z8[i] = dft_out2[ip]; + z9[i] = dft_out2[ip+1]; + z10[i] = dft_out2[ip+2]; + z11[i] = dft_out2[ip+3]; + // printf("out dft%d %d: %d,%d,%d,%d,%d,%d,%d,%d\n",Msc_PUSCH,ip,z0[i],z1[i],z2[i],z3[i],z4[i],z5[i],z6[i],z7[i]); + + } + + // printf("\n"); +} + +#endif +void ulsch_modulation(int32_t **txdataF, + short amp, + uint32_t frame, + uint32_t subframe, + LTE_DL_FRAME_PARMS *frame_parms, + LTE_UE_ULSCH_t *ulsch) +{ + + uint8_t qam64_table_offset_re = 0; + uint8_t qam64_table_offset_im = 0; + uint8_t qam16_table_offset_re = 0; + uint8_t qam16_table_offset_im = 0; + short gain_lin_QPSK; + + DevAssert(frame_parms); + + int re_offset,re_offset0,i,Msymb,j,k,nsymb,Msc_PUSCH,l; + // uint8_t harq_pid = (rag_flag == 1) ? 0 : subframe2harq_pid_tdd(frame_parms->tdd_config,subframe); + uint8_t harq_pid = subframe2harq_pid(frame_parms,frame,subframe); + uint8_t Q_m; + int32_t *txptr; + uint32_t symbol_offset; + uint16_t first_rb; + uint16_t nb_rb; + int G; + + uint32_t x1, x2, s=0; + uint8_t c; + + if (!ulsch) { + printf("ulsch_modulation.c: Null ulsch\n"); + return; + } + + // x1 is set in lte_gold_generic + x2 = (ulsch->rnti<<14) + (subframe<<9) + frame_parms->Nid_cell; //this is c_init in 36.211 Sec 6.3.1 + + if (harq_pid>=8) { + printf("ulsch_modulation.c: Illegal harq_pid %d\n",harq_pid); + return; + } + + first_rb = ulsch->harq_processes[harq_pid]->first_rb; + nb_rb = ulsch->harq_processes[harq_pid]->nb_rb; + + if (nb_rb == 0) { + printf("ulsch_modulation.c: Frame %d, Subframe %d Illegal nb_rb %d\n",frame,subframe,nb_rb); + return; + } + + if (first_rb > frame_parms->N_RB_UL) { + printf("ulsch_modulation.c: Frame %d, Subframe %d Illegal first_rb %d\n",frame,subframe,first_rb); + return; + } + + VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_MODULATION, VCD_FUNCTION_IN); + + Q_m = get_Qm_ul(ulsch->harq_processes[harq_pid]->mcs); + + G = (int)ulsch->harq_processes[harq_pid]->nb_rb * (12 * Q_m) * (ulsch->Nsymb_pusch); + + + // Mapping + nsymb = (frame_parms->Ncp==0) ? 14:12; + Msc_PUSCH = ulsch->harq_processes[harq_pid]->nb_rb*12; + +#ifdef DEBUG_ULSCH_MODULATION + LOG_D(PHY,"ulsch_modulation.c: Doing modulation (rnti %x,x2 %x) for G=%d bits, harq_pid %d , nb_rb %d, Q_m %d, Nsymb_pusch %d (nsymb %d), subframe %d\n", + ulsch->rnti,x2,G,harq_pid,ulsch->harq_processes[harq_pid]->nb_rb,Q_m, ulsch->Nsymb_pusch,nsymb,subframe); +#endif + + // scrambling (Note the placeholding bits are handled in ulsch_coding.c directly!) + //printf("ulsch bits: "); + s = lte_gold_generic(&x1, &x2, 1); + k=0; + + //printf("G %d\n",G); + for (i=0; i<(1+(G>>5)); i++) { + for (j=0; j<32; j++,k++) { + c = (uint8_t)((s>>j)&1); + + if (ulsch->h[k] == PUSCH_x) { + // printf("i %d: PUSCH_x\n",i); + ulsch->b_tilde[k] = 1; + } else if (ulsch->h[k] == PUSCH_y) { + // printf("i %d: PUSCH_y\n",i); + ulsch->b_tilde[k] = ulsch->b_tilde[k-1]; + } else { + ulsch->b_tilde[k] = (ulsch->h[k]+c)&1; + // printf("i %d : %d (h %d c %d)\n", (i<<5)+j,ulsch->b_tilde[k],ulsch->h[k],c); + } + + } + + s = lte_gold_generic(&x1, &x2, 0); + } + + //printf("\n"); + + + gain_lin_QPSK = (short)((amp*ONE_OVER_SQRT2_Q15)>>15); + + + // Modulation + + Msymb = G/Q_m; + + if(ulsch->cooperation_flag == 2) + // For Distributed Alamouti Scheme in Collabrative Communication + { + for (i=0,j=Q_m; i<Msymb; i+=2,j+=2*Q_m) { + + switch (Q_m) { + + case 2: + + + //UE1, -x1* + ((int16_t*)&ulsch->d[i])[0] = (ulsch->b_tilde[j] == 1) ? (gain_lin_QPSK) : -gain_lin_QPSK; + ((int16_t*)&ulsch->d[i])[1] = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK; + // if (i<Msc_PUSCH) + // printf("input %d (%p): %d,%d\n", i,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]); + + // UE1, x0* + ((int16_t*)&ulsch->d[i+1])[0] = (ulsch->b_tilde[j-2] == 1) ? (-gain_lin_QPSK) : gain_lin_QPSK; + ((int16_t*)&ulsch->d[i+1])[1] = (ulsch->b_tilde[j-1] == 1)? (gain_lin_QPSK) : -gain_lin_QPSK; + + break; + + case 4: + + + //UE1,-x1* + qam16_table_offset_re = 0; + qam16_table_offset_im = 0; + + if (ulsch->b_tilde[j] == 1) + qam16_table_offset_re+=2; + + if (ulsch->b_tilde[j+1] == 1) + qam16_table_offset_im+=2; + + + + if (ulsch->b_tilde[j+2] == 1) + qam16_table_offset_re+=1; + + if (ulsch->b_tilde[j+3] == 1) + qam16_table_offset_im+=1; + + + ((int16_t*)&ulsch->d[i])[0]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15); + ((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15); + + //UE1,x0* + qam16_table_offset_re = 0; + qam16_table_offset_im = 0; + + if (ulsch->b_tilde[j-4] == 1) + qam16_table_offset_re+=2; + + if (ulsch->b_tilde[j-3] == 1) + qam16_table_offset_im+=2; + + + if (ulsch->b_tilde[j-2] == 1) + qam16_table_offset_re+=1; + + if (ulsch->b_tilde[j-1] == 1) + qam16_table_offset_im+=1; + + + // ((int16_t*)&ulsch->d[i+1])[0]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15); + // ((int16_t*)&ulsch->d[i+1])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15); + ((int16_t*)&ulsch->d[i+1])[0]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15); + ((int16_t*)&ulsch->d[i+1])[1]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15); + + + break; + + case 6: + + + + //UE1,-x1*FPGA_UE + qam64_table_offset_re = 0; + qam64_table_offset_im = 0; + + if (ulsch->b_tilde[j] == 1) + qam64_table_offset_re+=4; + + if (ulsch->b_tilde[j+1] == 1) + qam64_table_offset_im+=4; + + if (ulsch->b_tilde[j+2] == 1) + qam64_table_offset_re+=2; + + + if (ulsch->b_tilde[j+3] == 1) + qam64_table_offset_im+=2; + + if (ulsch->b_tilde[j+4] == 1) + qam64_table_offset_re+=1; + + if (ulsch->b_tilde[j+5] == 1) + qam64_table_offset_im+=1; + + + ((int16_t*)&ulsch->d[i])[0]=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15); + ((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15); + + //UE1,x0* + qam64_table_offset_re = 0; + qam64_table_offset_im = 0; + + if (ulsch->b_tilde[j-6] == 1) + qam64_table_offset_re+=4; + + if (ulsch->b_tilde[j-5] == 1) + qam64_table_offset_im+=4; + + if (ulsch->b_tilde[j-4] == 1) + qam64_table_offset_re+=2; + + + if (ulsch->b_tilde[j-3] == 1) + qam64_table_offset_im+=2; + + if (ulsch->b_tilde[j-2] == 1) + qam64_table_offset_re+=1; + + if (ulsch->b_tilde[j-1] == 1) + qam64_table_offset_im+=1; + + + ((int16_t*)&ulsch->d[i+1])[0]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15); + ((int16_t*)&ulsch->d[i+1])[1]=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15); + + break; + + }//switch + }//for + }//cooperation_flag == 2 + else { + for (i=0,j=0; i<Msymb; i++,j+=Q_m) { + + switch (Q_m) { + + case 2: + // TODO: this has to be updated!!! + + ((int16_t*)&ulsch->d[i])[0] = (ulsch->b_tilde[j] == 1) ? (-gain_lin_QPSK) : gain_lin_QPSK; + ((int16_t*)&ulsch->d[i])[1] = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK; + // if (i<Msc_PUSCH) + // printf("input %d/%d Msc_PUSCH %d (%p): %d,%d\n", i,Msymb,Msc_PUSCH,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]); + + break; + + case 4: + + qam16_table_offset_re = 0; + qam16_table_offset_im = 0; + + if (ulsch->b_tilde[j] == 1) + qam16_table_offset_re+=2; + + if (ulsch->b_tilde[j+1] == 1) + qam16_table_offset_im+=2; + + if (ulsch->b_tilde[j+2] == 1) + qam16_table_offset_re+=1; + + if (ulsch->b_tilde[j+3] == 1) + qam16_table_offset_im+=1; + + + ((int16_t*)&ulsch->d[i])[0]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15); + ((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15); + // printf("input(16qam) %d (%p): %d,%d\n", i,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]); + break; + + case 6: + + + qam64_table_offset_re = 0; + qam64_table_offset_im = 0; + + if (ulsch->b_tilde[j] == 1) + qam64_table_offset_re+=4; + + if (ulsch->b_tilde[j+1] == 1) + qam64_table_offset_im+=4; + + if (ulsch->b_tilde[j+2] == 1) + qam64_table_offset_re+=2; + + if (ulsch->b_tilde[j+3] == 1) + qam64_table_offset_im+=2; + + if (ulsch->b_tilde[j+4] == 1) + qam64_table_offset_re+=1; + + if (ulsch->b_tilde[j+5] == 1) + qam64_table_offset_im+=1; + + + ((int16_t*)&ulsch->d[i])[0]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15); + ((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15); + + break; + + } + } + }// normal symbols + + + // Transform Precoding + +#ifdef OFDMA_ULSCH + + for (i=0; i<Msymb; i++) { + ulsch->z[i] = ulsch->d[i]; + } + +#else + dft_lte(ulsch->z,ulsch->d,Msc_PUSCH,ulsch->Nsymb_pusch); +#endif + + DevAssert(txdataF); + +#ifdef OFDMA_ULSCH + re_offset0 = frame_parms->first_carrier_offset + (ulsch->harq_processes[harq_pid]->first_rb*12); + + if (re_offset0>frame_parms->ofdm_symbol_size) { + re_offset0 -= frame_parms->ofdm_symbol_size; + // re_offset0++; + } + + // printf("re_offset0 %d\n",re_offset0); + + + for (j=0,l=0; l<(nsymb-ulsch->srs_active); l++) { + re_offset = re_offset0; + symbol_offset = (int)frame_parms->ofdm_symbol_size*(l+(subframe*nsymb)); +#ifdef DEBUG_ULSCH_MODULATION + printf("symbol %d (subframe %d): symbol_offset %d\n",l,subframe,symbol_offset); +#endif + txptr = &txdataF[0][symbol_offset]; + + if (((frame_parms->Ncp == 0) && ((l==3) || (l==10)))|| + ((frame_parms->Ncp == 1) && ((l==2) || (l==8)))) { + } + // Skip reference symbols + else { + + // printf("copying %d REs\n",Msc_PUSCH); + for (i=0; i<Msc_PUSCH; i++,j++) { +#ifdef DEBUG_ULSCH_MODULATION + printf("re_offset %d (%p): %d,%d\n", re_offset,&ulsch->z[j],((int16_t*)&ulsch->z[j])[0],((int16_t*)&ulsch->z[j])[1]); +#endif + txptr[re_offset++] = ulsch->z[j]; + + if (re_offset==frame_parms->ofdm_symbol_size) + re_offset = 0; + } + } + } + +# else // OFDMA_ULSCH = 0 + re_offset0 = frame_parms->first_carrier_offset + (ulsch->harq_processes[harq_pid]->first_rb*12); + + if (re_offset0>frame_parms->ofdm_symbol_size) { + re_offset0 -= frame_parms->ofdm_symbol_size; + // re_offset0++; + } + + // printf("re_offset0 %d\n",re_offset0); + // printf("txdataF %p\n",&txdataF[0][0]); + for (j=0,l=0; l<(nsymb-ulsch->srs_active); l++) { + re_offset = re_offset0; + symbol_offset = (uint32_t)frame_parms->ofdm_symbol_size*(l+(subframe*nsymb)); +#ifdef DEBUG_ULSCH_MODULATION + printf("ulsch_mod (SC-FDMA) symbol %d (subframe %d): symbol_offset %d\n",l,subframe,symbol_offset); +#endif + txptr = &txdataF[0][symbol_offset]; + + if (((frame_parms->Ncp == 0) && ((l==3) || (l==10)))|| + ((frame_parms->Ncp == 1) && ((l==2) || (l==8)))) { + } + // Skip reference symbols + else { + // printf("copying %d REs\n",Msc_PUSCH); + for (i=0; i<Msc_PUSCH; i++,j++) { + +#ifdef DEBUG_ULSCH_MODULATION + printf("re_offset %d (%p): %d,%d => %p\n", re_offset,&ulsch->z[j],((int16_t*)&ulsch->z[j])[0],((int16_t*)&ulsch->z[j])[1],&txptr[re_offset]); +#endif //DEBUG_ULSCH_MODULATION + txptr[re_offset++] = ulsch->z[j]; + + if (re_offset==frame_parms->ofdm_symbol_size) + re_offset = 0; + } + } + } + +#endif + VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_UE_ULSCH_MODULATION, VCD_FUNCTION_OUT); + +} + diff --git a/openair2/LAYER2/NR_MAC_gNB/config.c b/openair2/LAYER2/NR_MAC_gNB/config.c new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/openair2/NR_PHY_INTERFACE/IF_Module.h b/openair2/NR_PHY_INTERFACE/IF_Module.h new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/openair2/RRC/LITE/rrc_UE.c b/openair2/RRC/LITE/rrc_UE.c index 0e3a70765d825b16229fea008bd1aa48d7225a72..f578e76dcdb5c1faff58fbb2bbbe2399320ad433 100644 --- a/openair2/RRC/LITE/rrc_UE.c +++ b/openair2/RRC/LITE/rrc_UE.c @@ -1815,8 +1815,8 @@ rrc_ue_process_rrcConnectionReconfiguration( rrc_ue_process_radioResourceConfigDedicated(ctxt_pP,eNB_index, rrcConnectionReconfiguration_r8->radioResourceConfigDedicated); } - - void *non_criticical_ext_iterator = rrcConnectionReconfiguration_r8; + +/* void *non_criticical_ext_iterator = rrcConnectionReconfiguration_r8; RCCConnectionReconfiguration_v1510_IEs_t *rrc_connection_reconfiguration_v1510_IEs = (RRCConnectionReconfiguration_v1510_IEs_t *)0; // fetch EN-DC for NR_RRC here // r8 @@ -1869,7 +1869,7 @@ rrc_ue_process_rrcConnectionReconfiguration( break; } } - } + }*/ #if defined(ENABLE_ITTI) diff --git a/openair2/RRC/NR/proto_NR.h b/openair2/RRC/NR/proto_NR.h new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391