diff --git a/openair1/PHY/TOOLS/cmult_mm.c b/openair1/PHY/TOOLS/cmult_mm.c deleted file mode 100644 index 328958af29605645183c7421dca863829eed3ab1..0000000000000000000000000000000000000000 --- a/openair1/PHY/TOOLS/cmult_mm.c +++ /dev/null @@ -1,200 +0,0 @@ -/******************************************************************************* - OpenAirInterface - Copyright(c) 1999 - 2014 Eurecom - - OpenAirInterface is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - - OpenAirInterface is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with OpenAirInterface.The full GNU General Public License is - included in this distribution in the file called "COPYING". If not, - see <http://www.gnu.org/licenses/>. - - Contact Information - OpenAirInterface Admin: openair_admin@eurecom.fr - OpenAirInterface Tech : openair_tech@eurecom.fr - OpenAirInterface Dev : openair4g-devel@lists.eurecom.fr - - Address : Eurecom, Campus SophiaTech, 450 Route des Chappes, CS 50193 - 06904 Biot Sophia Antipolis cedex, FRANCE - - *******************************************************************************/ -#include "defs.h" - -int mult_cpx_matrix_h(short *x1[2][2], - short *x2[2][2], - short *y[2][2], - unsigned int N, - unsigned short output_shift, - short hermitian) -{ - - if (hermitian) { - // this computes x1^H*x2 and stores it in y - mult_cpx_vector_h(x2[0][0],x1[0][0],y[0][0],N,output_shift,1); - mult_cpx_vector_h(x2[0][1],x1[0][1],y[0][0],N,output_shift,1); - mult_cpx_vector_h(x2[0][0],x1[1][0],y[1][0],N,output_shift,1); - mult_cpx_vector_h(x2[0][1],x1[1][1],y[1][0],N,output_shift,1); - mult_cpx_vector_h(x2[1][0],x1[0][0],y[0][1],N,output_shift,1); - mult_cpx_vector_h(x2[1][1],x1[0][1],y[0][1],N,output_shift,1); - mult_cpx_vector_h(x2[1][0],x1[1][0],y[1][1],N,output_shift,1); - mult_cpx_vector_h(x2[1][1],x1[1][1],y[1][1],N,output_shift,1); - } else { - // this computes x1*x2^H and stores it in y - mult_cpx_vector_h(x1[0][0],x2[0][0],y[0][0],N,output_shift,1); - mult_cpx_vector_h(x1[0][1],x2[0][1],y[0][0],N,output_shift,1); - mult_cpx_vector_h(x1[0][0],x2[1][0],y[0][1],N,output_shift,1); - mult_cpx_vector_h(x1[0][1],x2[1][1],y[0][1],N,output_shift,1); - mult_cpx_vector_h(x1[1][0],x2[0][0],y[1][0],N,output_shift,1); - mult_cpx_vector_h(x1[1][1],x2[0][1],y[1][0],N,output_shift,1); - mult_cpx_vector_h(x1[1][0],x2[1][0],y[1][1],N,output_shift,1); - mult_cpx_vector_h(x1[1][1],x2[1][1],y[1][1],N,output_shift,1); - } -} - -int mult_cpx_matrix_vector(int *x1[2][2], - int *x2[2], - int *y[2], - unsigned int N, - unsigned short output_shift) -{ - - Zero_Buffer(y[0],N*8); - Zero_Buffer(y[1],N*8); - - // this computes x1*x2 and stores it in y (32 bit) - mult_cpx_vector_add32((short*)x2[0],(short*)x1[0][0],(short*)y[0],N); - mult_cpx_vector_add32((short*)x2[1],(short*)x1[0][1],(short*)y[0],N); - mult_cpx_vector_add32((short*)x2[0],(short*)x1[1][0],(short*)y[1],N); - mult_cpx_vector_add32((short*)x2[1],(short*)x1[1][1],(short*)y[1],N); - - // shift and pack - shift_and_pack((short*)y[0],N,output_shift); - shift_and_pack((short*)y[1],N,output_shift); - -} - -#ifdef MAIN_MM -#include <stdio.h> -#include <stdlib.h> -main () -{ - short x1_00[256] __attribute__((aligned(16))); - short x1_10[256] __attribute__((aligned(16))); - short x1_01[256] __attribute__((aligned(16))); - short x1_11[256] __attribute__((aligned(16))); - short x2_0[256] __attribute__((aligned(16))); - short x2_1[256] __attribute__((aligned(16))); - short y_0[256] __attribute__((aligned(16))); - short y_1[256] __attribute__((aligned(16))); - - int *x1[2][2]; - int *x2[2]; - int *y[2]; - int i,m,n; - - x1[0][0] = (int*)x1_00; - x1[0][1] = (int*)x1_01; - x1[1][0] = (int*)x1_10; - x1[1][1] = (int*)x1_11; - x2[0] = (int*)x2_0; - x2[1] = (int*)x2_1; - y[0] = (int*)y_0; - y[1] = (int*)y_1; - - for(m=0; m<2; m++) { - for(n=0; n<2; n++) { - for(i=0; i<256; i+=4) { - ((short*)x1[m][n])[i] = ((short) rand())/4; - ((short*)x1[m][n])[i+1] = ((short) rand())/4; - ((short*)x1[m][n])[i+2] = -((short*)x1[m][n])[i+1]; - ((short*)x1[m][n])[i+3] = ((short*)x1[m][n])[i]; - } - } - - for(i=0; i<256; i+=4) { - ((short*)x2[m])[i] = ((short) rand())/4; - ((short*)x2[m])[i+1] = ((short) rand())/4; - ((short*)x2[m])[i+2] = ((short*)x2[m])[i]; - ((short*)x2[m])[i+3] = ((short*)x2[m])[i+1]; - } - - Zero_Buffer(y[m],512); - } - - /* - input[0] = 100; - input[1] = 200; - input[2] = -200; - input[3] = 100; - input[4] = 1000; - input[5] = 2000; - input[6] = -2000; - input[7] = 1000; - input[8] = 100; - input[9] = 200; - input[10] = -200; - input[11] = 100; - input[12] = 1000; - input[13] = 2000; - input[14] = -2000; - input[15] = 1000; - - input2[0] = 2; - input2[1] = 1; - input2[2] = 2; - input2[3] = 1; - input2[4] = 20; - input2[5] = 10; - input2[6] = 20; - input2[7] = 10; - input2[8] = 2; - input2[9] = 1; - input2[10] = 2; - input2[11] = 1; - input2[12] = 2000; - input2[13] = 1000; - input2[14] = 2000; - input2[15] = 1000; - - x1[0][0] = (int*)input; - x1[0][1] = (int*)input; - x1[1][0] = (int*)input; - x1[1][1] = (int*)input; - - x2[0] = (int*)input2; - x2[1] = (int*)input2; - - y[0] = (int*)output; - y[1] = (int*)output2; - */ - - mult_cpx_matrix_vector(x1,x2,y,64,15); - - //mult_cpx_vector_add32(x2[0],x1[0][0],y[0],64); - - for (i=0; i<128; i+=2) - printf("i=%d, x1 = [%d+1i*%d %d+1i*%d; %d+1i*%d %d+1i*%d]; x2 = [%d+1i*%d; %d+1i*%d]; y = [%d+1i*%d; %d+1i*%d]; y_m= round(x1*x2./pow2(15)); y-y_m \n", - i, - ((short*)x1[0][0])[2*i], ((short*)x1[0][0])[2*i+2], - ((short*)x1[0][1])[2*i], ((short*)x1[0][1])[2*i+2], - ((short*)x1[1][0])[2*i], ((short*)x1[1][0])[2*i+2], - ((short*)x1[1][1])[2*i], ((short*)x1[1][1])[2*i+2], - ((short*)x2[0])[2*i], ((short*)x2[0])[2*i+1], - ((short*)x2[1])[2*i], ((short*)x2[1])[2*i+1], - ((short*)y[0])[2*i], ((short*)y[0])[2*i+1], - ((short*)y[1])[2*i], ((short*)y[1])[2*i+1]); - - //((int*)y[0])[i], ((int*)y[0])[i+1], - //((int*)y[1])[i], ((int*)y[1])[i+1]); - -} - -#endif diff --git a/openair1/PHY/TOOLS/cmult_vv.c b/openair1/PHY/TOOLS/cmult_vv.c index d7b72755087cb3e3b343e79140a9935d208e9b56..f970d332248f47fd2109dc71515a8ce99a11ce27 100755 --- a/openair1/PHY/TOOLS/cmult_vv.c +++ b/openair1/PHY/TOOLS/cmult_vv.c @@ -50,7 +50,7 @@ int mult_cpx_conj_vector(int16_t *x1, uint32_t N, int output_shift) { - // Multiply elementwise two complex vectors of N elements with repeated formatted output + // Multiply elementwise the complex conjugate of x1 with x2. // x1 - input 1 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)| // We assume x1 with a dinamic of 15 bit maximum // @@ -90,7 +90,7 @@ int mult_cpx_conj_vector(int16_t *x1, tmp_im = _mm_shufflelo_epi16(*x1_128,_MM_SHUFFLE(2,3,0,1)); tmp_im = _mm_shufflehi_epi16(tmp_im,_MM_SHUFFLE(2,3,0,1)); tmp_im = _mm_sign_epi16(tmp_im,*(__m128i*)&conjug[0]); - tmp_im = _mm_madd_epi16(tmp_im,*x1_128); + tmp_im = _mm_madd_epi16(tmp_im,*x2_128); tmp_re = _mm_srai_epi32(tmp_re,output_shift); tmp_im = _mm_srai_epi32(tmp_im,output_shift); tmpy0 = _mm_unpacklo_epi32(tmp_re,tmp_im); @@ -130,3 +130,4 @@ int mult_cpx_conj_vector(int16_t *x1, return(0); } + diff --git a/openair1/PHY/TOOLS/cmult_vvh.c b/openair1/PHY/TOOLS/cmult_vvh.c deleted file mode 100644 index 64cf163d09c84c09c2afb1afd3aecd467c861a07..0000000000000000000000000000000000000000 --- a/openair1/PHY/TOOLS/cmult_vvh.c +++ /dev/null @@ -1,366 +0,0 @@ -/******************************************************************************* - OpenAirInterface - Copyright(c) 1999 - 2014 Eurecom - - OpenAirInterface is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - - OpenAirInterface is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with OpenAirInterface.The full GNU General Public License is - included in this distribution in the file called "COPYING". If not, - see <http://www.gnu.org/licenses/>. - - Contact Information - OpenAirInterface Admin: openair_admin@eurecom.fr - OpenAirInterface Tech : openair_tech@eurecom.fr - OpenAirInterface Dev : openair4g-devel@lists.eurecom.fr - - Address : Eurecom, Campus SophiaTech, 450 Route des Chappes, CS 50193 - 06904 Biot Sophia Antipolis cedex, FRANCE - - *******************************************************************************/ -#include "defs.h" - -static __m128i shift __attribute__ ((aligned(16))); - - -int mult_cpx_vector_h(short *x1, - short *x2, - short *y, - unsigned int N, - unsigned short output_shift, - short sign) -{ - // Multiply elementwise the complex vector x1 with the complex conjugate of the complex vecotr x2 of N elements and adds it to the vector y. - // x1 - input 1 in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| - // We assume x1 with a dinamic of 15 bit maximum - // - // x2 - input 2 in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| - // We assume x2 with a dinamic of 14 bit maximum - // - // y - output in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| - // - // N - the size f the vectors (this function does N cpx mpy. WARNING: N>=4; - // - // log2_amp - increase the output amplitude by a factor 2^log2_amp (default is 0) - // WARNING: log2_amp>0 can cause overflow!! - // sign - +1..add, -1..substract - - unsigned int i; // loop counter - - register __m128i m0,m1,m2; - - short *temps; - int *tempd; - - __m128i *x1_128; - __m128i *x2_128; - __m128i *y_128; - __m128i mask; - - __m128i temp; - - shift = _mm_cvtsi32_si128(output_shift); - x1_128 = (__m128i *)&x1[0]; - x2_128 = (__m128i *)&x2[0]; - y_128 = (__m128i *)&y[0]; - - if (sign == -1) - mask = (__m128i) _mm_set_epi16 (-1,1,-1,-1,-1,1,-1,-1); - else - mask = (__m128i) _mm_set_epi16 (1,-1,1,1,1,-1,1,1); - - // we compute 2*4 cpx multiply for each loop - for(i=0; i<(N>>3); i++) { - - // printf("i=%d\n",i); - - // unroll 1 - // temps = (short *)x1_128; - // printf("x1 : %d,%d,%d,%d,%d,%d,%d,%d\n",temps[0],temps[1],temps[2],temps[3],temps[4],temps[5],temps[6],temps[7]); - m1 = x1_128[0]; - m2 = x2_128[0]; - - // temps = (short *)&x2_128[0]; - // printf("x2 : %x,%x,%x,%x,%x,%x,%x,%x\n",temps[0],temps[1],temps[2],temps[3],temps[4],temps[5],temps[6],temps[7]); - - // bring x2 in conjugate form - // the first two instructions might be replaced with a single one in SSE3 - m2 = _mm_shufflelo_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_shufflehi_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_mullo_epi16(m2, mask); - - // temp = m2; - // temps = (short *)&temp; - // printf("x2 conj : %x,%x,%x,%x,%x,%x,%x,%x\n",temps[0],temps[1],temps[2],temps[3],temps[4],temps[5],temps[6],temps[7]); - - m0 = _mm_madd_epi16(m1,m2); //pmaddwd_r2r(mm1,mm0); // 1- compute x1[0]*x2[0] - - // temp = m0; - - // tempd = &temp; - // printf("m0 : %d,%d,%d,%d\n",tempd[0],tempd[1],tempd[2],tempd[3]); - - m0 = _mm_sra_epi32(m0,shift); // 1- shift right by shift in order to compensate for the input amplitude - - // temp = m0; - - // tempd = (int *)&temp; - // printf("m0 : %d,%d,%d,%d\n",tempd[0],tempd[1],tempd[2],tempd[3]); - - m0 = _mm_packs_epi32(m0,m0); // 1- pack in a 128 bit register [re im re im] - m0 = _mm_unpacklo_epi32(m0,m0); // 1- pack in a 128 bit register [re im re im] - - y_128[0] = _mm_add_epi16(m0,y_128[0]); - - // temps = (short *)&y_128[0]; - // printf("y0 : %d,%d,%d,%d,%d,%d,%d,%d\n",temps[0],temps[1],temps[2],temps[3],temps[4],temps[5],temps[6],temps[7]); - - // unroll 2 - m1 = x1_128[1]; - m2 = x2_128[1]; - - m2 = _mm_shufflelo_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_shufflehi_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_mullo_epi16(m2, mask); - - m0 = _mm_madd_epi16(m1,m2); //pmaddwd_r2r(mm1,mm0); // 1- compute x1[0]*x2[0] - - m0 = _mm_sra_epi32(m0,shift); // 1- shift right by shift in order to compensate for the input amplitude - - m0 = _mm_packs_epi32(m0,m0); // 1- pack in a 128 bit register [re im re im] - m0 = _mm_unpacklo_epi32(m0,m0); // 1- pack in a 128 bit register [re im re im] - - y_128[1] = _mm_add_epi16(m0,y_128[1]); - - // unroll 3 - m1 = x1_128[2]; - m2 = x2_128[2]; - - m2 = _mm_shufflelo_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_shufflehi_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_mullo_epi16(m2, mask); - - m0 = _mm_madd_epi16(m1,m2); //pmaddwd_r2r(mm1,mm0); // 1- compute x1[0]*x2[0] - - m0 = _mm_sra_epi32(m0,shift); // 1- shift right by shift in order to compensate for the input amplitude - - m0 = _mm_packs_epi32(m0,m0); // 1- pack in a 128 bit register [re im re im] - m0 = _mm_unpacklo_epi32(m0,m0); // 1- pack in a 128 bit register [re im re im] - - y_128[2] = _mm_add_epi16(m0,y_128[2]); - - - // unroll 4 - m1 = x1_128[3]; - m2 = x2_128[3]; - - m2 = _mm_shufflelo_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_shufflehi_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_mullo_epi16(m2, mask); - - m0 = _mm_madd_epi16(m1,m2); //pmaddwd_r2r(mm1,mm0); // 1- compute x1[0]*x2[0] - - m0 = _mm_sra_epi32(m0,shift); // 1- shift right by shift in order to compensate for the input amplitude - - m0 = _mm_packs_epi32(m0,m0); // 1- pack in a 128 bit register [re im re im] - m0 = _mm_unpacklo_epi32(m0,m0); // 1- pack in a 128 bit register [re im re im] - - y_128[3] = _mm_add_epi16(m0,y_128[3]); - - x1_128+=4; - x2_128+=4; - y_128 +=4; - // printf("x1_128 = %p, x2_128 =%p, y_128=%p\n",x1_128,x2_128,y_128); - - } - - - _mm_empty(); - _m_empty(); - - return(0); -} - - -int mult_cpx_vector_h_add32(short *x1, - short *x2, - short *y, - unsigned int N, - short sign) -{ - // Multiply elementwise the complex vector x1 with the complex conjugate of the complex vecotr x2 of N elements and adds it to the vector y. - // x1 - input 1 in 16bit format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| - // We assume x1 with a dinamic of 15 bit maximum - // - // x2 - input 2 in 16bit format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| - // We assume x2 with a dinamic of 14 bit maximum - // - // y - output in 32bit format |Re0 Im0|,......,|Re(N-1) Im(N-1)| - // - // N - the size f the vectors (this function does N cpx mpy. WARNING: N>=4; - // - // sign - +1..add, -1..substract - - unsigned int i; // loop counter - - register __m128i m0,m1,m2; - - short *temps; - int *tempd; - - __m128i *x1_128; - __m128i *x2_128; - __m128i *y_128; - __m128i mask; - - __m128i temp; - - x1_128 = (__m128i *)&x1[0]; - x2_128 = (__m128i *)&x2[0]; - y_128 = (__m128i *)&y[0]; - - if (sign == -1) - mask = (__m128i) _mm_set_epi16 (-1,1,-1,-1,-1,1,-1,-1); - else - mask = (__m128i) _mm_set_epi16 (1,-1,1,1,1,-1,1,1); - - // we compute 2*4 cpx multiply for each loop - for(i=0; i<(N>>3); i++) { - - m1 = x1_128[0]; - m2 = x2_128[0]; - - // bring x2 in conjugate form - // the first two instructions might be replaced with a single one in SSE3 - m2 = _mm_shufflelo_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_shufflehi_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_mullo_epi16(m2, mask); - - m0 = _mm_madd_epi16(m1,m2); // 1- compute x1[0]*x2[0], result is 32bit - - y_128[0] = _mm_add_epi32(m0,y_128[0]); - - // unroll 2 - m1 = x1_128[1]; - m2 = x2_128[1]; - - m2 = _mm_shufflelo_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_shufflehi_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_mullo_epi16(m2, mask); - - m0 = _mm_madd_epi16(m1,m2); - - y_128[1] = _mm_add_epi32(m0,y_128[1]); - - // unroll 3 - m1 = x1_128[2]; - m2 = x2_128[2]; - - m2 = _mm_shufflelo_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_shufflehi_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_mullo_epi16(m2, mask); - - m0 = _mm_madd_epi16(m1,m2); - - y_128[2] = _mm_add_epi32(m0,y_128[2]); - - - // unroll 4 - m1 = x1_128[3]; - m2 = x2_128[3]; - - m2 = _mm_shufflelo_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_shufflehi_epi16(m2,_MM_SHUFFLE(0,1,3,2)); - m2 = _mm_mullo_epi16(m2, mask); - - m0 = _mm_madd_epi16(m1,m2); - - y_128[3] = _mm_add_epi32(m0,y_128[3]); - - x1_128+=4; - x2_128+=4; - y_128 +=4; - - } - - - _mm_empty(); - _m_empty(); - - return(0); -} - -#ifdef MAIN -#define L 16 - -main () -{ - - short input[256] __attribute__((aligned(16))); - short input2[256] __attribute__((aligned(16))); - short output[256] __attribute__((aligned(16))); - - int i; - - Zero_Buffer(output,256*2); - - for (i=0; i<16; i+=2) - printf("output[%d] = %d + %d i\n",i,output[i],output[i+1]); - - input[0] = 100; - input[1] = 200; - input[2] = 100; - input[3] = 200; - input[4] = 1234; - input[5] = -1234; - input[6] = 1234; - input[7] = -1234; - input[8] = 100; - input[9] = 200; - input[10] = 100; - input[11] = 200; - input[12] = 1000; - input[13] = 2000; - input[14] = 1000; - input[15] = 2000; - - input2[0] = 1; - input2[1] = 2; - input2[2] = 1; - input2[3] = 2; - input2[4] = 10; - input2[5] = 20; - input2[6] = 10; - input2[7] = 20; - input2[8] = 1; - input2[9] = 2; - input2[10] = 1; - input2[11] = 2; - input2[12] = 1000; - input2[13] = 2000; - input2[14] = 1000; - input2[15] = 2000; - - - mult_cpx_vector_h(input2,input2,output,8,0,1); - - for (i=0; i<16; i+=2) - printf("output[%d] = %d + %d i\n",i,output[i],output[i+1]); - - Zero_Buffer(output,256*2); - mult_cpx_vector_h(input2,input2,output,8,0,-1); - - for (i=0; i<16; i+=2) - printf("output[%d] = %d + %d i\n",i,output[i],output[i+1]); - -} - -#endif //MAIN diff --git a/openair1/PHY/TOOLS/defs.h b/openair1/PHY/TOOLS/defs.h index 6960fcda4c8793ff4920a0cc99edc60c5dddff42..963c1384b7f14321210168527146eb7e9f41a126 100644 --- a/openair1/PHY/TOOLS/defs.h +++ b/openair1/PHY/TOOLS/defs.h @@ -64,6 +64,8 @@ struct complex32 { int32_t i; }; +//cmult_sv.h + /*!\fn void multadd_real_vector_complex_scalar(int16_t *x,int16_t *alpha,int16_t *y,uint32_t N) This function performs componentwise multiplication and accumulation of a complex scalar and a real vector. @param x Vector input (Q1.15) @@ -95,192 +97,14 @@ void multadd_complex_vector_real_scalar(int16_t *x, uint8_t zero_flag, uint32_t N); - -/*!\fn int32_t mult_cpx_vector(int16_t *x1,int16_t *x2,int16_t *y,uint32_t N,int32_t output_shift) -This function performs optimized componentwise multiplication of two Q1.15 vectors in repeated format. - -@param x1 Input 1 in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| -@param x2 Input 2 in the format |Re0 -Im0 Im0 Re0|,......,|Re(N-1) -Im(N-1) Im(N-1) Re(N-1)| -@param y Output in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| -@param N Length of Vector WARNING: N must be a multiple of 8 (4x loop unrolling and two complex multiplies per cycle) -@param output_shift Number of bits to shift output down to Q1.15 (should be 15 for Q1.15 inputs) WARNING: log2_amp>0 can cause overflow!! - -The function implemented is : \f$\mathbf{y} = \mathbf{x_1}\odot\mathbf{x_2}\f$ -*/ -int32_t mult_cpx_vector(int16_t *x1, - int16_t *x2, - int16_t *y, - uint32_t N, - int32_t output_shift); - -/*!\fn int32_t mult_cpx_vector_norep(int16_t *x1,int16_t *x2,int16_t *y,uint32_t N,int32_t output_shift) -This function performs optimized componentwise multiplication of two Q1.15 vectors with normal formatted output. - -@param x1 Input 1 in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| -@param x2 Input 2 in the format |Re0 -Im0 Im0 Re0|,......,|Re(N-1) -Im(N-1) Im(N-1) Re(N-1)| -@param y Output in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)| -@param N Length of Vector WARNING: N must be a multiple of 8 (4x loop unrolling and two complex multiplies per cycle) -@param output_shift Number of bits to shift output down to Q1.15 (should be 15 for Q1.15 inputs) WARNING: log2_amp>0 can cause overflow!! - -The function implemented is : \f$\mathbf{y} = \mathbf{x_1}\odot\mathbf{x_2}\f$ -*/ -int32_t mult_cpx_vector_norep(int16_t *x1, - int16_t *x2, - int16_t *y, - uint32_t N, - int32_t output_shift); - - -/*!\fn int32_t mult_cpx_vector_norep2(int16_t *x1,int16_t *x2,int16_t *y,uint32_t N,int32_t output_shift) -This function performs optimized componentwise multiplication of two Q1.15 vectors with normal formatted output. - -@param x1 Input 1 in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| -@param x2 Input 2 in the format |Re0 -Im0 Im0 Re0|,......,|Re(N-1) -Im(N-1) Im(N-1) Re(N-1)| -@param y Output in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)| -@param N Length of Vector WARNING: N must be a multiple of 8 (no unrolling and two complex multiplies per cycle) -@param output_shift Number of bits to shift output down to Q1.15 (should be 15 for Q1.15 inputs) WARNING: log2_amp>0 can cause overflow!! - -The function implemented is : \f$\mathbf{y} = \mathbf{x_1}\odot\mathbf{x_2}\f$ -*/ -int32_t mult_cpx_vector_norep2(int16_t *x1, - int16_t *x2, - int16_t *y, - uint32_t N, - int32_t output_shift); - -int32_t mult_cpx_vector_norep_conj(int16_t *x1, - int16_t *x2, - int16_t *y, - uint32_t N, - int32_t output_shift); - -int32_t mult_cpx_vector_norep_conj2(int16_t *x1, - int16_t *x2, - int16_t *y, - uint32_t N, - int32_t output_shift); - -/*!\fn int32_t mult_cpx_vector2(int16_t *x1,int16_t *x2,int16_t *y,uint32_t N,int32_t output_shift) -This function performs optimized componentwise multiplication of two Q1.15 vectors. - -@param x1 Input 1 in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| -@param x2 Input 2 in the format |Re0 -Im0 Im0 Re0|,......,|Re(N-1) -Im(N-1) Im(N-1) Re(N-1)| -@param y Output in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| -@param N Length of Vector WARNING: N must be a multiple of 2 (2 complex multiplies per cycle) -@param output_shift Number of bits to shift output down to Q1.15 (should be 15 for Q1.15 inputs) WARNING: log2_amp>0 can cause overflow!! - -The function implemented is : \f$\mathbf{y} = \mathbf{x_1}\odot\mathbf{x_2}\f$ -*/ -int32_t mult_cpx_vector2(int16_t *x1, - int16_t *x2, - int16_t *y, - uint32_t N, - int32_t output_shift); - -/*!\fn int32_t mult_cpx_vector_add(int16_t *x1,int16_t *x2,int16_t *y,uint32_t N,int32_t output_shift) -This function performs optimized componentwise multiplication of two Q1.15 vectors. The output IS ADDED TO y. WARNING: make sure that output_shift is set to the right value, so that the result of the multiplication has the comma at the same place as y. - -@param x1 Input 1 in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| -@param x2 Input 2 in the format |Re0 -Im0 Im0 Re0|,......,|Re(N-1) -Im(N-1) Im(N-1) Re(N-1)| -@param y Output in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| -@param N Length of Vector WARNING: N>=4 -@param output_shift Number of bits to shift output down to Q1.15 (should be 15 for Q1.15 inputs) WARNING: log2_amp>0 can cause overflow!! - -The function implemented is : \f$\mathbf{y} += \mathbf{x_1}\odot\mathbf{x_2}\f$ -*/ - -int32_t mult_cpx_vector_add(int16_t *x1, - int16_t *x2, - int16_t *y, - uint32_t N, - int32_t output_shift); - - -int32_t mult_cpx_vector_h_add32(int16_t *x1, - int16_t *x2, - int16_t *y, - uint32_t N, - int16_t sign); - -int32_t mult_cpx_vector_add32(int16_t *x1, - int16_t *x2, - int16_t *y, - uint32_t N); - -int32_t mult_vector32(int16_t *x1, - int16_t *x2, +int rotate_cpx_vector(int16_t *x, + int16_t *alpha, int16_t *y, - uint32_t N); - -int32_t mult_vector32_scalar(int16_t *x1, - int32_t x2, - int16_t *y, - uint32_t N); + uint32_t N, + uint16_t output_shift); -int32_t mult_cpx_vector32_conj(int16_t *x, - int16_t *y, - uint32_t N); -int32_t mult_cpx_vector32_real(int16_t *x1, - int16_t *x2, - int16_t *y, - uint32_t N); -int32_t shift_and_pack(int16_t *y, - uint32_t N, - int32_t output_shift); - -/*!\fn int32_t mult_cpx_vector_h(int16_t *x1,int16_t *x2,int16_t *y,uint32_t N,int32_t output_shift,int16_t sign) -This function performs optimized componentwise multiplication of the vector x1 with the conjugate of the vector x2. The output IS ADDED TO y. WARNING: make sure that output_shift is set to the right value, so that the result of the multiplication has the comma at the same place as y. - -@param x1 Input 1 in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| -@param x2 Input 2 in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| -@param y Output in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| -@param N Length of Vector (complex samples) WARNING: N>=4 -@param output_shift Number of bits to shift output down to Q1.15 (should be 15 for Q1.15 inputs) WARNING: log2_amp>0 can cause overflow!! -@param sign +1..add, -1..substract - -The function implemented is : \f$\mathbf{y} = \mathbf{y} + \mathbf{x_1}\odot\mathbf{x_2}^*\f$ -*/ -int32_t mult_cpx_vector_h(int16_t *x1, - int16_t *x2, - int16_t *y, - uint32_t N, - int32_t output_shift, - int16_t sign); - -/*!\fn int32_t mult_cpx_matrix_h(int16_t *x1[2][2],int16_t *x2[2][2],int16_t *y[2][2],uint32_t N,uint16_t output_shift,int16_t hermitian) -This function performs optimized componentwise matrix multiplication of the 2x2 matrices x1 with the 2x2 matrices x2. The output IS ADDED TO y (i.e. make sure y is initilized correctly). WARNING: make sure that output_shift is set to the right value, so that the result of the multiplication has the comma at the same place as y. - -@param x1 Input 1 in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| -@param x2 Input 2 in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| -@param y Output in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| -@param N Length of Vector (complex samples) WARNING: N>=4 -@param output_shift Number of bits to shift output down to Q1.15 (should be 15 for Q1.15 inputs) WARNING: log2_amp>0 can cause overflow!! -@param hermitian if !=0 the hermitian transpose is returned (i.e. A^H*B instead of A*B^H) -*/ -int32_t mult_cpx_matrix_h(int16_t *x1[2][2], - int16_t *x2[2][2], - int16_t *y[2][2], - uint32_t N, - uint16_t output_shift, - int16_t hermitian); - - -/*!\fn int32_t mult_cpx_matrix_vector(int32_t *x1[2][2],int32_t *x2[2],int32_t *y[2],uint32_t N,uint16_t output_shift) -This function performs optimized componentwise matrix-vector multiplication of the 2x2 matrices x1 with the 2x1 vectors x2. The output IS ADDED TO y (i.e. make sure y is initilized correctly). WARNING: make sure that output_shift is set to the right value, so that the result of the multiplication has the comma at the same place as y. - -@param x1 Input 1 in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| -@param x2 Input 2 in the format |Re0 -Im0 Im0 Re0|,......,|Re(N-1) -Im(N-1) Im(N-1) Re(N-1)| -@param y Output in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)| WARNING: y must be different for x2 -@param N Length of Vector (complex samples) WARNING: N>=4 -@param output_shift Number of bits to shift output down to Q1.15 (should be 15 for Q1.15 inputs) WARNING: log2_amp>0 can cause overflow!! -*/ -int32_t mult_cpx_matrix_vector(int32_t *x1[2][2], - int32_t *x2[2], - int32_t *y[2], - uint32_t N, - uint16_t output_shift); /*!\fn void init_fft(uint16_t size,uint8_t logsize,uint16_t *rev) \brief Initialize the FFT engine for a given size @@ -289,6 +113,25 @@ int32_t mult_cpx_matrix_vector(int32_t *x1[2][2], @param rev Pointer to bit-reversal permutation array */ +//cmult_vv.c +/*! + Multiply elementwise the complex conjugate of x1 with x2. + @param x1 - input 1 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)| + We assume x1 with a dinamic of 15 bit maximum + @param x2 - input 2 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)| + We assume x2 with a dinamic of 14 bit maximum + @param y - output in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)| + @param N - the size f the vectors (this function does N cpx mpy. WARNING: N>=4; + @param output_shift - shift to be applied to generate output +*/ + +int mult_cpx_conj_vector(int16_t *x1, + int16_t *x2, + int16_t *y, + uint32_t N, + int output_shift); + +// lte_dfts.c void init_fft(uint16_t size, uint8_t logsize, uint16_t *rev); @@ -355,6 +198,7 @@ int32_t rotate_cpx_vector(int16_t *x, uint16_t output_shift); +//cadd_sv.c /*!\fn int32_t add_cpx_vector(int16_t *x,int16_t *alpha,int16_t *y,uint32_t N) This function performs componentwise addition of a vector with a complex scalar. diff --git a/openair1/PHY/TOOLS/time_meas.h b/openair1/PHY/TOOLS/time_meas.h index a3df8970535976cdbe1b235dd6ad56e4a588d38c..3e621ac281bb41dde0164b9ad848f87becfd9f0f 100644 --- a/openair1/PHY/TOOLS/time_meas.h +++ b/openair1/PHY/TOOLS/time_meas.h @@ -26,6 +26,9 @@ Address : Eurecom, Campus SophiaTech, 450 Route des Chappes, CS 50193 - 06904 Biot Sophia Antipolis cedex, FRANCE *******************************************************************************/ +#ifndef __TIME_MEAS_DEFS__H__ +#define __TIME_MEAS_DEFS__H__ + #include <unistd.h> #include <math.h> #include <stdint.h> @@ -148,3 +151,4 @@ static inline void copy_meas(time_stats_t *dst_ts,time_stats_t *src_ts) dst_ts->max=src_ts->max; } } +#endif