libosmocore 1.10.0.47-38eb4
Osmocom core library
Loading...
Searching...
No Matches
conv_acc_neon_impl.h File Reference

Accelerated Viterbi decoder implementation: straight port of SSE to NEON based on Tom Tsous work. More...

Go to the source code of this file.

Macros

#define __always_inline   inline __attribute__((always_inline))
 
#define NEON_BUTTERFLY(M0, M1, M2, M3, M4)
 
#define NEON_DEINTERLEAVE_K5(M0, M1, M2, M3)
 
#define NEON_DEINTERLEAVE_K7(M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, M10, M11, M12, M13, M14, M15)
 
#define NEON_BRANCH_METRIC_N2(M0, M1, M2, M3, M4, M6, M7)
 
#define NEON_BRANCH_METRIC_N4(M0, M1, M2, M3, M4, M5)
 
#define NEON_NORMALIZE_K5(M0, M1, M2, M3)
 
#define NEON_NORMALIZE_K7(M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, M10, M11)
 

Functions

__always_inline void _neon_metrics_k5_n2 (const int16_t *val, const int16_t *outa, int16_t *sumsa, int16_t *paths, int norm)
 
__always_inline void _neon_metrics_k5_n4 (const int16_t *val, const int16_t *outa, int16_t *sumsa, int16_t *paths, int norm)
 
static __always_inline void _neon_metrics_k7_n2 (const int16_t *val, const int16_t *outa, int16_t *sumsa, int16_t *paths, int norm)
 
static __always_inline void _neon_metrics_k7_n4 (const int16_t *val, const int16_t *outa, int16_t *sumsa, int16_t *paths, int norm)
 

Detailed Description

Accelerated Viterbi decoder implementation: straight port of SSE to NEON based on Tom Tsous work.

Macro Definition Documentation

◆ __always_inline

#define __always_inline   inline __attribute__((always_inline))

◆ NEON_BRANCH_METRIC_N2

#define NEON_BRANCH_METRIC_N2 ( M0,
M1,
M2,
M3,
M4,
M6,
M7 )
Value:
{ \
M0 = vmulq_s16(M4, M0); \
M1 = vmulq_s16(M4, M1); \
M2 = vmulq_s16(M4, M2); \
M3 = vmulq_s16(M4, M3); \
M6 = vcombine_s16(vpadd_s16(vget_low_s16(M0), vget_high_s16(M0)), vpadd_s16(vget_low_s16(M1), vget_high_s16(M1))); \
M7 = vcombine_s16(vpadd_s16(vget_low_s16(M2), vget_high_s16(M2)), vpadd_s16(vget_low_s16(M3), vget_high_s16(M3))); \
}

Referenced by _neon_metrics_k7_n2().

◆ NEON_BRANCH_METRIC_N4

#define NEON_BRANCH_METRIC_N4 ( M0,
M1,
M2,
M3,
M4,
M5 )
Value:
{ \
M0 = vmulq_s16(M4, M0); \
M1 = vmulq_s16(M4, M1); \
M2 = vmulq_s16(M4, M2); \
M3 = vmulq_s16(M4, M3); \
int16x4_t t1 = vpadd_s16(vpadd_s16(vget_low_s16(M0), vget_high_s16(M0)), vpadd_s16(vget_low_s16(M1), vget_high_s16(M1))); \
int16x4_t t2 = vpadd_s16(vpadd_s16(vget_low_s16(M2), vget_high_s16(M2)), vpadd_s16(vget_low_s16(M3), vget_high_s16(M3))); \
M5 = vcombine_s16(t1, t2); \
}

Referenced by _neon_metrics_k5_n4(), and _neon_metrics_k7_n4().

◆ NEON_BUTTERFLY

#define NEON_BUTTERFLY ( M0,
M1,
M2,
M3,
M4 )
Value:
{ \
M3 = vqaddq_s16(M0, M2); \
M4 = vqsubq_s16(M1, M2); \
M0 = vqsubq_s16(M0, M2); \
M1 = vqaddq_s16(M1, M2); \
M2 = vmaxq_s16(M3, M4); \
M3 = vreinterpretq_s16_u16(vcgtq_s16(M3, M4)); \
M4 = vmaxq_s16(M0, M1); \
M1 = vreinterpretq_s16_u16(vcgtq_s16(M0, M1)); \
}

Referenced by _neon_metrics_k5_n2(), _neon_metrics_k5_n4(), _neon_metrics_k7_n2(), and _neon_metrics_k7_n4().

◆ NEON_DEINTERLEAVE_K5

#define NEON_DEINTERLEAVE_K5 ( M0,
M1,
M2,
M3 )
Value:
{ \
int16x8x2_t tmp; \
tmp = vuzpq_s16(M0, M1); \
M2 = tmp.val[0]; \
M3 = tmp.val[1]; \
}

Referenced by _neon_metrics_k5_n2(), and _neon_metrics_k5_n4().

◆ NEON_DEINTERLEAVE_K7

#define NEON_DEINTERLEAVE_K7 ( M0,
M1,
M2,
M3,
M4,
M5,
M6,
M7,
M8,
M9,
M10,
M11,
M12,
M13,
M14,
M15 )
Value:
{ \
int16x8x2_t tmp; \
tmp = vuzpq_s16(M0, M1); \
M8 = tmp.val[0]; M9 = tmp.val[1]; \
tmp = vuzpq_s16(M2, M3); \
M10 = tmp.val[0]; M11 = tmp.val[1]; \
tmp = vuzpq_s16(M4, M5); \
M12 = tmp.val[0]; M13 = tmp.val[1]; \
tmp = vuzpq_s16(M6, M7); \
M14 = tmp.val[0]; M15 = tmp.val[1]; \
}

Referenced by _neon_metrics_k7_n2(), and _neon_metrics_k7_n4().

◆ NEON_NORMALIZE_K5

#define NEON_NORMALIZE_K5 ( M0,
M1,
M2,
M3 )
Value:
{ \
M2 = vminq_s16(M0, M1); \
int16x4_t t = vpmin_s16(vget_low_s16(M2), vget_high_s16(M2)); \
t = vpmin_s16(t, t); \
t = vpmin_s16(t, t); \
M2 = vdupq_lane_s16(t, 0); \
M0 = vqsubq_s16(M0, M2); \
M1 = vqsubq_s16(M1, M2); \
}

Referenced by _neon_metrics_k5_n2(), and _neon_metrics_k5_n4().

◆ NEON_NORMALIZE_K7

#define NEON_NORMALIZE_K7 ( M0,
M1,
M2,
M3,
M4,
M5,
M6,
M7,
M8,
M9,
M10,
M11 )
Value:
{ \
M8 = vminq_s16(M0, M1); \
M9 = vminq_s16(M2, M3); \
M10 = vminq_s16(M4, M5); \
M11 = vminq_s16(M6, M7); \
M8 = vminq_s16(M8, M9); \
M10 = vminq_s16(M10, M11); \
M8 = vminq_s16(M8, M10); \
int16x4_t t = vpmin_s16(vget_low_s16(M8), vget_high_s16(M8)); \
t = vpmin_s16(t, t); \
t = vpmin_s16(t, t); \
M8 = vdupq_lane_s16(t, 0); \
M0 = vqsubq_s16(M0, M8); \
M1 = vqsubq_s16(M1, M8); \
M2 = vqsubq_s16(M2, M8); \
M3 = vqsubq_s16(M3, M8); \
M4 = vqsubq_s16(M4, M8); \
M5 = vqsubq_s16(M5, M8); \
M6 = vqsubq_s16(M6, M8); \
M7 = vqsubq_s16(M7, M8); \
}

Referenced by _neon_metrics_k7_n2(), and _neon_metrics_k7_n4().

Function Documentation

◆ _neon_metrics_k5_n2()

__always_inline void _neon_metrics_k5_n2 ( const int16_t * val,
const int16_t * outa,
int16_t * sumsa,
int16_t * paths,
int norm )

◆ _neon_metrics_k5_n4()

__always_inline void _neon_metrics_k5_n4 ( const int16_t * val,
const int16_t * outa,
int16_t * sumsa,
int16_t * paths,
int norm )

◆ _neon_metrics_k7_n2()

static __always_inline void _neon_metrics_k7_n2 ( const int16_t * val,
const int16_t * outa,
int16_t * sumsa,
int16_t * paths,
int norm )
static

◆ _neon_metrics_k7_n4()

static __always_inline void _neon_metrics_k7_n4 ( const int16_t * val,
const int16_t * outa,
int16_t * sumsa,
int16_t * paths,
int norm )
static