Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
O
OAI-RAN-5G-sheduler_MaxTBS
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MAQ5G-PFC
OAI-RAN-5G-sheduler_MaxTBS
Commits
a5e6690d
Commit
a5e6690d
authored
8 years ago
by
Florian Kaltenberger
Browse files
Options
Downloads
Patches
Plain Diff
adding multadd_cpx_vector
parent
ab57b0dd
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
openair1/PHY/TOOLS/cmult_vv.c
+79
-0
79 additions, 0 deletions
openair1/PHY/TOOLS/cmult_vv.c
openair1/PHY/TOOLS/defs.h
+19
-0
19 additions, 0 deletions
openair1/PHY/TOOLS/defs.h
with
98 additions
and
0 deletions
openair1/PHY/TOOLS/cmult_vv.c
+
79
−
0
View file @
a5e6690d
...
...
@@ -27,6 +27,7 @@
#if defined(__x86_64__) || defined(__i386__)
int16_t
conjug
[
8
]
__attribute__
((
aligned
(
16
)))
=
{
-
1
,
1
,
-
1
,
1
,
-
1
,
1
,
-
1
,
1
}
;
int16_t
conjug2
[
8
]
__attribute__
((
aligned
(
16
)))
=
{
1
,
-
1
,
1
,
-
1
,
1
,
-
1
,
1
,
-
1
}
;
#define simd_q15_t __m128i
#define simdshort_q15_t __m64
#elif defined(__arm__)
...
...
@@ -134,3 +135,81 @@ int mult_cpx_conj_vector(int16_t *x1,
return
(
0
);
}
int
multadd_cpx_vector
(
int16_t
*
x1
,
int16_t
*
x2
,
int16_t
*
y
,
uint8_t
zero_flag
,
uint32_t
N
,
int
output_shift
)
{
// Multiply elementwise the complex conjugate of x1 with x2.
// x1 - input 1 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
// We assume x1 with a dinamic of 15 bit maximum
//
// x2 - input 2 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
// We assume x2 with a dinamic of 14 bit maximum
///
// y - output in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
//
// zero_flag - Set output (y) to zero prior to disable accumulation
//
// N - the size f the vectors (this function does N cpx mpy. WARNING: N>=4;
//
// output_shift - shift to be applied to generate output
uint32_t
i
;
// loop counter
simd_q15_t
*
x1_128
;
simd_q15_t
*
x2_128
;
simd_q15_t
*
y_128
;
#if defined(__x86_64__) || defined(__i386__)
simd_q15_t
tmp_re
,
tmp_im
;
simd_q15_t
tmpy0
,
tmpy1
;
#elif defined(__arm__)
int32x4_t
tmp_re
,
tmp_im
;
int32x4_t
tmp_re1
,
tmp_im1
;
int16x4x2_t
tmpy
;
int32x4_t
shift
=
vdupq_n_s32
(
-
output_shift
);
#endif
x1_128
=
(
simd_q15_t
*
)
&
x1
[
0
];
x2_128
=
(
simd_q15_t
*
)
&
x2
[
0
];
y_128
=
(
simd_q15_t
*
)
&
y
[
0
];
// we compute 4 cpx multiply for each loop
for
(
i
=
0
;
i
<
(
N
>>
2
);
i
++
)
{
#if defined(__x86_64__) || defined(__i386__)
tmp_re
=
_mm_sign_epi16
(
*
x1_128
,
*
(
__m128i
*
)
&
conjug2
[
0
]);
tmp_re
=
_mm_madd_epi16
(
tmp_re
,
*
x2_128
);
tmp_im
=
_mm_shufflelo_epi16
(
*
x1_128
,
_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
tmp_im
=
_mm_shufflehi_epi16
(
tmp_im
,
_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
tmp_im
=
_mm_madd_epi16
(
tmp_im
,
*
x2_128
);
tmp_re
=
_mm_srai_epi32
(
tmp_re
,
output_shift
);
tmp_im
=
_mm_srai_epi32
(
tmp_im
,
output_shift
);
tmpy0
=
_mm_unpacklo_epi32
(
tmp_re
,
tmp_im
);
//print_ints("unpack lo:",&tmpy0[i]);
tmpy1
=
_mm_unpackhi_epi32
(
tmp_re
,
tmp_im
);
//print_ints("unpack hi:",&tmpy1[i]);
if
(
zero_flag
==
1
)
*
y_128
=
_mm_packs_epi32
(
tmpy0
,
tmpy1
);
else
*
y_128
=
_mm_adds_epi16
(
*
y_128
,
_mm_packs_epi32
(
tmpy0
,
tmpy1
));
//print_shorts("*y_128:",&y_128[i]);
#elif defined(__arm__)
msg
(
"mult_cpx_vector not implemented for __arm__"
);
#endif
x1_128
++
;
x2_128
++
;
y_128
++
;
}
_mm_empty
();
_m_empty
();
return
(
0
);
}
This diff is collapsed.
Click to expand it.
openair1/PHY/TOOLS/defs.h
+
19
−
0
View file @
a5e6690d
...
...
@@ -126,6 +126,25 @@ int mult_cpx_conj_vector(int16_t *x1,
int
output_shift
,
int
madd
);
/*!
Element-wise multiplication and accumulation of two complex vectors x1 and x2.
@param x1 - input 1 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
We assume x1 with a dinamic of 15 bit maximum
@param x2 - input 2 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
We assume x2 with a dinamic of 14 bit maximum
@param y - output in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
@param zero_flag Set output (y) to zero prior to accumulation
@param N - the size f the vectors (this function does N cpx mpy. WARNING: N>=4;
@param output_shift - shift to be applied to generate output
*/
int
multadd_cpx_vector
(
int16_t
*
x1
,
int16_t
*
x2
,
int16_t
*
y
,
uint8_t
zero_flag
,
uint32_t
N
,
int
output_shift
);
// lte_dfts.c
void
init_fft
(
uint16_t
size
,
uint8_t
logsize
,
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment