Merge remote-tracking branch 'vudentz/master'

This commit is contained in:
Colin Guthrie 2011-03-18 00:10:27 +00:00
commit b8db02755a
19 changed files with 2018 additions and 450 deletions

View file

@ -1820,7 +1820,7 @@ module_bluetooth_discover_la_LDFLAGS = $(MODULE_LDFLAGS)
module_bluetooth_discover_la_LIBADD = $(MODULE_LIBADD) $(DBUS_LIBS) libbluetooth-util.la
module_bluetooth_discover_la_CFLAGS = $(AM_CFLAGS) $(DBUS_CFLAGS)
libbluetooth_sbc_la_SOURCES = modules/bluetooth/sbc.c modules/bluetooth/sbc.h modules/bluetooth/sbc_tables.h modules/bluetooth/sbc_math.h modules/bluetooth/sbc_primitives.h modules/bluetooth/sbc_primitives.c modules/bluetooth/sbc_primitives_mmx.h modules/bluetooth/sbc_primitives_neon.h modules/bluetooth/sbc_primitives_mmx.c modules/bluetooth/sbc_primitives_neon.c
libbluetooth_sbc_la_SOURCES = modules/bluetooth/sbc/sbc.c modules/bluetooth/sbc/sbc.h modules/bluetooth/sbc/sbc_tables.h modules/bluetooth/sbc/sbc_math.h modules/bluetooth/sbc/sbc_primitives.h modules/bluetooth/sbc/sbc_primitives.c modules/bluetooth/sbc/sbc_primitives_mmx.h modules/bluetooth/sbc/sbc_primitives_neon.h modules/bluetooth/sbc/sbc_primitives_mmx.c modules/bluetooth/sbc/sbc_primitives_neon.c modules/bluetooth/sbc/sbc_primitives_armv6.c modules/bluetooth/sbc/sbc_primitives_iwmmxt.c
libbluetooth_sbc_la_LDFLAGS = -avoid-version
libbluetooth_sbc_la_LIBADD = $(MODULE_LIBADD)
libbluetooth_sbc_la_CFLAGS = $(AM_CFLAGS)

View file

@ -714,6 +714,47 @@ static void list_adapters(pa_bluetooth_discovery *y) {
send_and_add_to_pending(y, NULL, m, list_adapters_reply);
}
int pa_bluetooth_transport_parse_property(pa_bluetooth_transport *t, DBusMessageIter *i)
{
const char *key;
DBusMessageIter variant_i;
if (dbus_message_iter_get_arg_type(i) != DBUS_TYPE_STRING) {
pa_log("Property name not a string.");
return -1;
}
dbus_message_iter_get_basic(i, &key);
if (!dbus_message_iter_next(i)) {
pa_log("Property value missing");
return -1;
}
if (dbus_message_iter_get_arg_type(i) != DBUS_TYPE_VARIANT) {
pa_log("Property value not a variant.");
return -1;
}
dbus_message_iter_recurse(i, &variant_i);
switch (dbus_message_iter_get_arg_type(&variant_i)) {
case DBUS_TYPE_BOOLEAN: {
pa_bool_t *value;
dbus_message_iter_get_basic(&variant_i, &value);
if (pa_streq(key, "NREC"))
t->nrec = value;
break;
}
}
return 0;
}
static DBusHandlerResult filter_cb(DBusConnection *bus, DBusMessage *m, void *userdata) {
DBusError err;
pa_bluetooth_discovery *y;
@ -861,6 +902,28 @@ static DBusHandlerResult filter_cb(DBusConnection *bus, DBusMessage *m, void *us
}
}
return DBUS_HANDLER_RESULT_NOT_YET_HANDLED;
} else if (dbus_message_is_signal(m, "org.bluez.MediaTransport", "PropertyChanged")) {
pa_bluetooth_device *d;
pa_bluetooth_transport *t;
void *state = NULL;
DBusMessageIter arg_i;
while ((d = pa_hashmap_iterate(y->devices, &state, NULL)))
if ((t = pa_hashmap_get(d->transports, dbus_message_get_path(m))))
break;
if (!t)
goto fail;
if (!dbus_message_iter_init(m, &arg_i)) {
pa_log("Failed to parse PropertyChanged: %s", err.message);
goto fail;
}
if (pa_bluetooth_transport_parse_property(t, &arg_i) < 0)
goto fail;
return DBUS_HANDLER_RESULT_NOT_YET_HANDLED;
}
@ -934,10 +997,11 @@ const pa_bluetooth_transport* pa_bluetooth_device_get_transport(const pa_bluetoo
return NULL;
}
int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char *accesstype) {
int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char *accesstype, size_t *imtu, size_t *omtu) {
DBusMessage *m, *r;
DBusError err;
int ret;
uint16_t i, o;
pa_assert(t);
pa_assert(t->y);
@ -955,7 +1019,7 @@ int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char *
}
#ifdef DBUS_TYPE_UNIX_FD
if (!dbus_message_get_args(r, &err, DBUS_TYPE_UNIX_FD, &ret, DBUS_TYPE_INVALID)) {
if (!dbus_message_get_args(r, &err, DBUS_TYPE_UNIX_FD, &ret, DBUS_TYPE_UINT16, &i, DBUS_TYPE_UINT16, &o, DBUS_TYPE_INVALID)) {
pa_log("Failed to parse org.bluez.MediaTransport.Acquire(): %s", err.message);
ret = -1;
dbus_error_free(&err);
@ -963,6 +1027,12 @@ int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char *
}
#endif
if (imtu)
*imtu = i;
if (omtu)
*omtu = o;
fail:
dbus_message_unref(r);
return ret;
@ -1028,6 +1098,7 @@ static DBusMessage *endpoint_set_configuration(DBusConnection *conn, DBusMessage
const char *path, *dev_path = NULL, *uuid = NULL;
uint8_t *config = NULL;
int size = 0;
pa_bool_t nrec;
enum profile p;
DBusMessageIter args, props;
DBusMessage *r;
@ -1063,6 +1134,10 @@ static DBusMessage *endpoint_set_configuration(DBusConnection *conn, DBusMessage
if (var != DBUS_TYPE_OBJECT_PATH)
goto fail;
dbus_message_iter_get_basic(&value, &dev_path);
} else if (strcasecmp(key, "NREC") == 0) {
if (var != DBUS_TYPE_BOOLEAN)
goto fail;
dbus_message_iter_get_basic(&value, &nrec);
} else if (strcasecmp(key, "Configuration") == 0) {
DBusMessageIter array;
if (var != DBUS_TYPE_ARRAY)
@ -1086,6 +1161,8 @@ static DBusMessage *endpoint_set_configuration(DBusConnection *conn, DBusMessage
p = PROFILE_A2DP_SOURCE;
t = transport_new(y, path, p, config, size);
if (nrec)
t->nrec = nrec;
pa_hashmap_put(d->transports, t->path, t);
pa_log_debug("Transport %s profile %d available", t->path, t->profile);
@ -1395,6 +1472,7 @@ pa_bluetooth_discovery* pa_bluetooth_discovery_get(pa_core *c) {
"type='signal',sender='org.bluez',interface='org.bluez.AudioSink',member='PropertyChanged'",
"type='signal',sender='org.bluez',interface='org.bluez.AudioSource',member='PropertyChanged'",
"type='signal',sender='org.bluez',interface='org.bluez.HandsfreeGateway',member='PropertyChanged'",
"type='signal',sender='org.bluez',interface='org.bluez.MediaTransport',member='PropertyChanged'",
NULL) < 0) {
pa_log("Failed to add D-Bus matches: %s", err.message);
goto fail;
@ -1462,6 +1540,7 @@ void pa_bluetooth_discovery_unref(pa_bluetooth_discovery *y) {
"type='signal',sender='org.bluez',interface='org.bluez.AudioSink',member='PropertyChanged'",
"type='signal',sender='org.bluez',interface='org.bluez.AudioSource',member='PropertyChanged'",
"type='signal',sender='org.bluez',interface='org.bluez.HandsfreeGateway',member='PropertyChanged'",
"type='signal',sender='org.bluez',interface='org.bluez.MediaTransport',member='PropertyChanged'",
NULL);
if (y->filter_added)

View file

@ -70,6 +70,7 @@ struct pa_bluetooth_transport {
uint8_t codec;
uint8_t *config;
int config_size;
pa_bool_t nrec;
};
/* This enum is shared among Audio, Headset, AudioSink, and AudioSource, although not all values are acceptable in all profiles */
@ -126,8 +127,9 @@ const pa_bluetooth_device* pa_bluetooth_discovery_get_by_address(pa_bluetooth_di
const pa_bluetooth_transport* pa_bluetooth_discovery_get_transport(pa_bluetooth_discovery *y, const char *path);
const pa_bluetooth_transport* pa_bluetooth_device_get_transport(const pa_bluetooth_device *d, enum profile profile);
int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char *accesstype);
int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char *accesstype, size_t *imtu, size_t *omtu);
void pa_bluetooth_transport_release(const pa_bluetooth_transport *t, const char *accesstype);
int pa_bluetooth_transport_parse_property(pa_bluetooth_transport *t, DBusMessageIter *i);
pa_hook* pa_bluetooth_discovery_hook(pa_bluetooth_discovery *d);

View file

@ -51,13 +51,16 @@
#include "module-bluetooth-device-symdef.h"
#include "ipc.h"
#include "sbc.h"
#include "sbc/sbc.h"
#include "rtp.h"
#include "bluetooth-util.h"
#define MAX_BITPOOL 64
#define MIN_BITPOOL 2U
#define BITPOOL_DEC_LIMIT 32
#define BITPOOL_DEC_STEP 5
PA_MODULE_AUTHOR("Joao Paulo Rechi Vita");
PA_MODULE_DESCRIPTION("Bluetooth audio sink and source");
PA_MODULE_VERSION(PACKAGE_VERSION);
@ -117,6 +120,8 @@ struct a2dp_info {
size_t buffer_size; /* Size of the buffer */
uint16_t seq_num; /* Cumulative packet sequence */
uint8_t min_bitpool;
uint8_t max_bitpool;
};
struct hsp_info {
@ -574,7 +579,7 @@ static int setup_a2dp(struct userdata *u) {
}
/* Run from main thread */
static void setup_sbc(struct a2dp_info *a2dp) {
static void setup_sbc(struct a2dp_info *a2dp, enum profile p) {
sbc_capabilities_t *active_capabilities;
pa_assert(a2dp);
@ -660,7 +665,11 @@ static void setup_sbc(struct a2dp_info *a2dp) {
pa_assert_not_reached();
}
a2dp->sbc.bitpool = active_capabilities->max_bitpool;
a2dp->min_bitpool = active_capabilities->min_bitpool;
a2dp->max_bitpool = active_capabilities->max_bitpool;
/* Set minimum bitpool for source to get the maximum possible block_size */
a2dp->sbc.bitpool = p == PROFILE_A2DP ? a2dp->max_bitpool : a2dp->min_bitpool;
a2dp->codesize = sbc_get_codesize(&a2dp->sbc);
a2dp->frame_length = sbc_get_frame_length(&a2dp->sbc);
}
@ -728,7 +737,7 @@ static int set_conf(struct userdata *u) {
/* setup SBC encoder now we agree on parameters */
if (u->profile == PROFILE_A2DP || u->profile == PROFILE_A2DP_SOURCE) {
setup_sbc(&u->a2dp);
setup_sbc(&u->a2dp, u->profile);
u->block_size =
((u->link_mtu - sizeof(struct rtp_header) - sizeof(struct rtp_payload))
@ -743,6 +752,39 @@ static int set_conf(struct userdata *u) {
return 0;
}
/* from IO thread */
static void a2dp_set_bitpool(struct userdata *u, uint8_t bitpool)
{
struct a2dp_info *a2dp;
pa_assert(u);
a2dp = &u->a2dp;
if (a2dp->sbc.bitpool == bitpool)
return;
if (bitpool > a2dp->max_bitpool)
bitpool = a2dp->max_bitpool;
else if (bitpool < a2dp->min_bitpool)
bitpool = a2dp->min_bitpool;
a2dp->sbc.bitpool = bitpool;
a2dp->codesize = sbc_get_codesize(&a2dp->sbc);
a2dp->frame_length = sbc_get_frame_length(&a2dp->sbc);
pa_log_debug("Bitpool has changed to %u", a2dp->sbc.bitpool);
u->block_size =
(u->link_mtu - sizeof(struct rtp_header) - sizeof(struct rtp_payload))
/ a2dp->frame_length * a2dp->codesize;
pa_sink_set_max_request_within_thread(u->sink, u->block_size);
pa_sink_set_fixed_latency_within_thread(u->sink,
FIXED_LATENCY_PLAYBACK_A2DP + pa_bytes_to_usec(u->block_size, &u->sample_spec));
}
/* from IO thread, except in SCO over PCM */
static int setup_stream(struct userdata *u) {
@ -758,6 +800,9 @@ static int setup_stream(struct userdata *u) {
pa_log_debug("Stream properly set up, we're ready to roll!");
if (u->profile == PROFILE_A2DP)
a2dp_set_bitpool(u, u->a2dp.max_bitpool);
u->rtpoll_item = pa_rtpoll_item_new(u->rtpoll, PA_RTPOLL_NEVER, 1);
pollfd = pa_rtpoll_item_get_pollfd(u->rtpoll_item, NULL);
pollfd->fd = u->stream_fd;
@ -910,7 +955,8 @@ static int bt_transport_acquire(struct userdata *u, pa_bool_t start) {
return -1;
}
u->stream_fd = pa_bluetooth_transport_acquire(t, accesstype);
/* FIXME: Handle in/out MTU properly when unix socket is not longer supported */
u->stream_fd = pa_bluetooth_transport_acquire(t, accesstype, NULL, &u->link_mtu);
if (u->stream_fd < 0)
return -1;
@ -1441,7 +1487,7 @@ static int a2dp_process_push(struct userdata *u) {
d = pa_memblock_acquire(memchunk.memblock);
to_write = memchunk.length = pa_memblock_get_length(memchunk.memblock);
while (PA_LIKELY(to_decode > 0 && to_write > 0)) {
while (PA_LIKELY(to_decode > 0)) {
size_t written;
ssize_t decoded;
@ -1460,10 +1506,12 @@ static int a2dp_process_push(struct userdata *u) {
/* pa_log_debug("SBC: decoded: %lu; written: %lu", (unsigned long) decoded, (unsigned long) written); */
/* pa_log_debug("SBC: frame_length: %lu; codesize: %lu", (unsigned long) a2dp->frame_length, (unsigned long) a2dp->codesize); */
/* Reset frame length, it can be changed due to bitpool change */
a2dp->frame_length = sbc_get_frame_length(&a2dp->sbc);
pa_assert_fp((size_t) decoded <= to_decode);
pa_assert_fp((size_t) decoded == a2dp->frame_length);
pa_assert_fp((size_t) written <= to_write);
pa_assert_fp((size_t) written == a2dp->codesize);
p = (const uint8_t*) p + decoded;
@ -1475,6 +1523,8 @@ static int a2dp_process_push(struct userdata *u) {
frame_count++;
}
memchunk.length -= to_write;
pa_memblock_release(memchunk.memblock);
pa_source_post(u->source, &memchunk);
@ -1488,6 +1538,27 @@ static int a2dp_process_push(struct userdata *u) {
return ret;
}
static void a2dp_reduce_bitpool(struct userdata *u)
{
struct a2dp_info *a2dp;
uint8_t bitpool;
pa_assert(u);
a2dp = &u->a2dp;
/* Check if bitpool is already at its limit */
if (a2dp->sbc.bitpool <= BITPOOL_DEC_LIMIT)
return;
bitpool = a2dp->sbc.bitpool - BITPOOL_DEC_STEP;
if (bitpool < BITPOOL_DEC_LIMIT)
bitpool = BITPOOL_DEC_LIMIT;
a2dp_set_bitpool(u, bitpool);
}
static void thread_func(void *userdata) {
struct userdata *u = userdata;
unsigned do_write = 0;
@ -1579,6 +1650,9 @@ static void thread_func(void *userdata) {
pa_sink_render_full(u->sink, skip_bytes, &tmp);
pa_memblock_unref(tmp.memblock);
u->write_index += skip_bytes;
if (u->profile == PROFILE_A2DP)
a2dp_reduce_bitpool(u);
}
}
@ -1677,7 +1751,7 @@ static DBusHandlerResult filter_cb(DBusConnection *bus, DBusMessage *m, void *us
dbus_message_get_path(m),
dbus_message_get_member(m));
if (!dbus_message_has_path(m, u->path))
if (!dbus_message_has_path(m, u->path) && !dbus_message_has_path(m, u->transport))
goto fail;
if (dbus_message_is_signal(m, "org.bluez.Headset", "SpeakerGainChanged") ||
@ -1703,6 +1777,28 @@ static DBusHandlerResult filter_cb(DBusConnection *bus, DBusMessage *m, void *us
pa_source_volume_changed(u->source, &v);
}
}
} else if (dbus_message_is_signal(m, "org.bluez.MediaTransport", "PropertyChanged")) {
DBusMessageIter arg_i;
pa_bluetooth_transport *t;
pa_bool_t nrec;
t = (pa_bluetooth_transport *) pa_bluetooth_discovery_get_transport(u->discovery, u->transport);
pa_assert(t);
if (!dbus_message_iter_init(m, &arg_i)) {
pa_log("Failed to parse PropertyChanged: %s", err.message);
goto fail;
}
nrec = t->nrec;
if (pa_bluetooth_transport_parse_property(t, &arg_i) < 0)
goto fail;
if (nrec != t->nrec) {
pa_log_debug("dbus: property 'NREC' changed to value '%s'", t->nrec ? "True" : "False");
pa_proplist_sets(u->source->proplist, "bluetooth.nrec", t->nrec ? "1" : "0");
}
}
fail:
@ -1944,6 +2040,7 @@ static int add_source(struct userdata *u) {
pa_proplist_sets(data.proplist, "bluetooth.protocol", u->profile == PROFILE_A2DP_SOURCE ? "a2dp_source" : "hsp");
if ((u->profile == PROFILE_HSP) || (u->profile == PROFILE_HFGW))
pa_proplist_sets(data.proplist, PA_PROP_DEVICE_INTENDED_ROLES, "phone");
data.card = u->card;
data.name = get_name("source", u->modargs, u->address, &b);
data.namereg_fail = b;
@ -1970,8 +2067,15 @@ static int add_source(struct userdata *u) {
pa_bytes_to_usec(u->block_size, &u->sample_spec));
}
if (u->profile == PROFILE_HSP || u->profile == PROFILE_HFGW)
pa_proplist_sets(u->source->proplist, "bluetooth.nrec", (u->hsp.pcm_capabilities.flags & BT_PCM_FLAG_NREC) ? "1" : "0");
if ((u->profile == PROFILE_HSP) || (u->profile == PROFILE_HFGW)) {
if (u->transport) {
const pa_bluetooth_transport *t;
t = pa_bluetooth_discovery_get_transport(u->discovery, u->transport);
pa_assert(t);
pa_proplist_sets(u->source->proplist, "bluetooth.nrec", t->nrec ? "1" : "0");
} else
pa_proplist_sets(u->source->proplist, "bluetooth.nrec", (u->hsp.pcm_capabilities.flags & BT_PCM_FLAG_NREC) ? "1" : "0");
}
if (u->profile == PROFILE_HSP) {
u->source->set_volume = source_set_volume_cb;
@ -2094,7 +2198,11 @@ static int bt_transport_config_a2dp(struct userdata *u) {
pa_assert_not_reached();
}
a2dp->sbc.bitpool = config->max_bitpool;
a2dp->min_bitpool = config->min_bitpool;
a2dp->max_bitpool = config->max_bitpool;
/* Set minimum bitpool for source to get the maximum possible block_size */
a2dp->sbc.bitpool = u->profile == PROFILE_A2DP ? a2dp->max_bitpool : a2dp->min_bitpool;
a2dp->codesize = sbc_get_codesize(&a2dp->sbc);
a2dp->frame_length = sbc_get_frame_length(&a2dp->sbc);
@ -2118,99 +2226,12 @@ static int bt_transport_config(struct userdata *u) {
return bt_transport_config_a2dp(u);
}
static int parse_transport_property(struct userdata *u, DBusMessageIter *i) {
const char *key;
DBusMessageIter variant_i;
pa_assert(u);
pa_assert(i);
if (dbus_message_iter_get_arg_type(i) != DBUS_TYPE_STRING) {
pa_log("Property name not a string.");
return -1;
}
dbus_message_iter_get_basic(i, &key);
if (!dbus_message_iter_next(i)) {
pa_log("Property value missing");
return -1;
}
if (dbus_message_iter_get_arg_type(i) != DBUS_TYPE_VARIANT) {
pa_log("Property value not a variant.");
return -1;
}
dbus_message_iter_recurse(i, &variant_i);
switch (dbus_message_iter_get_arg_type(&variant_i)) {
case DBUS_TYPE_UINT16: {
uint16_t value;
dbus_message_iter_get_basic(&variant_i, &value);
if (pa_streq(key, "OMTU"))
u->link_mtu = value;
break;
}
}
return 0;
}
/* Run from main thread */
static int bt_transport_open(struct userdata *u) {
DBusMessage *m, *r;
DBusMessageIter arg_i, element_i;
DBusError err;
if (bt_transport_acquire(u, FALSE) < 0)
return -1;
dbus_error_init(&err);
pa_assert_se(m = dbus_message_new_method_call("org.bluez", u->transport, "org.bluez.MediaTransport", "GetProperties"));
r = dbus_connection_send_with_reply_and_block(pa_dbus_connection_get(u->connection), m, -1, &err);
if (dbus_error_is_set(&err) || !r) {
pa_log("Failed to get transport properties: %s", err.message);
goto fail;
}
if (!dbus_message_iter_init(r, &arg_i)) {
pa_log("GetProperties reply has no arguments.");
goto fail;
}
if (dbus_message_iter_get_arg_type(&arg_i) != DBUS_TYPE_ARRAY) {
pa_log("GetProperties argument is not an array.");
goto fail;
}
dbus_message_iter_recurse(&arg_i, &element_i);
while (dbus_message_iter_get_arg_type(&element_i) != DBUS_TYPE_INVALID) {
if (dbus_message_iter_get_arg_type(&element_i) == DBUS_TYPE_DICT_ENTRY) {
DBusMessageIter dict_i;
dbus_message_iter_recurse(&element_i, &dict_i);
parse_transport_property(u, &dict_i);
}
if (!dbus_message_iter_next(&element_i))
break;
}
return bt_transport_config(u);
fail:
dbus_message_unref(r);
return -1;
}
/* Run from main thread */
@ -2690,7 +2711,7 @@ int pa__init(pa_module* m) {
struct userdata *u;
const char *address, *path;
DBusError err;
char *mike, *speaker;
char *mike, *speaker, *transport;
const pa_bluetooth_device *device;
pa_assert(m);
@ -2769,15 +2790,18 @@ int pa__init(pa_module* m) {
speaker = pa_sprintf_malloc("type='signal',sender='org.bluez',interface='org.bluez.Headset',member='SpeakerGainChanged',path='%s'", u->path);
mike = pa_sprintf_malloc("type='signal',sender='org.bluez',interface='org.bluez.Headset',member='MicrophoneGainChanged',path='%s'", u->path);
transport = pa_sprintf_malloc("type='signal',sender='org.bluez',interface='org.bluez.MediaTransport',member='PropertyChanged'");
if (pa_dbus_add_matches(
pa_dbus_connection_get(u->connection), &err,
speaker,
mike,
transport,
NULL) < 0) {
pa_xfree(speaker);
pa_xfree(mike);
pa_xfree(transport);
pa_log("Failed to add D-Bus matches: %s", err.message);
goto fail;
@ -2785,6 +2809,7 @@ int pa__init(pa_module* m) {
pa_xfree(speaker);
pa_xfree(mike);
pa_xfree(transport);
/* Connect to the BT service */
init_bt(u);

View file

@ -77,7 +77,7 @@ struct sbc_frame {
uint8_t joint;
/* only the lower 4 bits of every element are to be used */
uint32_t scale_factor[2][8];
uint32_t SBC_ALIGNED scale_factor[2][8];
/* raw integer subband samples in the frame */
int32_t SBC_ALIGNED sb_sample_f[16][2][8];
@ -159,7 +159,8 @@ static uint8_t sbc_crc8(const uint8_t *data, size_t len)
* Takes a pointer to the frame in question, a pointer to the bits array and
* the sampling frequency (as 2 bit integer)
*/
static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
static SBC_ALWAYS_INLINE void sbc_calculate_bits_internal(
const struct sbc_frame *frame, int (*bits)[8], int subbands)
{
uint8_t sf = frame->frequency;
@ -170,17 +171,17 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
for (ch = 0; ch < frame->channels; ch++) {
max_bitneed = 0;
if (frame->allocation == SNR) {
for (sb = 0; sb < frame->subbands; sb++) {
for (sb = 0; sb < subbands; sb++) {
bitneed[ch][sb] = frame->scale_factor[ch][sb];
if (bitneed[ch][sb] > max_bitneed)
max_bitneed = bitneed[ch][sb];
}
} else {
for (sb = 0; sb < frame->subbands; sb++) {
for (sb = 0; sb < subbands; sb++) {
if (frame->scale_factor[ch][sb] == 0)
bitneed[ch][sb] = -5;
else {
if (frame->subbands == 4)
if (subbands == 4)
loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
else
loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
@ -201,7 +202,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
bitslice--;
bitcount += slicecount;
slicecount = 0;
for (sb = 0; sb < frame->subbands; sb++) {
for (sb = 0; sb < subbands; sb++) {
if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
slicecount++;
else if (bitneed[ch][sb] == bitslice + 1)
@ -214,7 +215,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
bitslice--;
}
for (sb = 0; sb < frame->subbands; sb++) {
for (sb = 0; sb < subbands; sb++) {
if (bitneed[ch][sb] < bitslice + 2)
bits[ch][sb] = 0;
else {
@ -224,7 +225,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
}
}
for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) {
for (sb = 0; bitcount < frame->bitpool &&
sb < subbands; sb++) {
if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) {
bits[ch][sb]++;
bitcount++;
@ -234,7 +236,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
}
}
for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) {
for (sb = 0; bitcount < frame->bitpool &&
sb < subbands; sb++) {
if (bits[ch][sb] < 16) {
bits[ch][sb]++;
bitcount++;
@ -250,7 +253,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
max_bitneed = 0;
if (frame->allocation == SNR) {
for (ch = 0; ch < 2; ch++) {
for (sb = 0; sb < frame->subbands; sb++) {
for (sb = 0; sb < subbands; sb++) {
bitneed[ch][sb] = frame->scale_factor[ch][sb];
if (bitneed[ch][sb] > max_bitneed)
max_bitneed = bitneed[ch][sb];
@ -258,11 +261,11 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
}
} else {
for (ch = 0; ch < 2; ch++) {
for (sb = 0; sb < frame->subbands; sb++) {
for (sb = 0; sb < subbands; sb++) {
if (frame->scale_factor[ch][sb] == 0)
bitneed[ch][sb] = -5;
else {
if (frame->subbands == 4)
if (subbands == 4)
loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
else
loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
@ -285,7 +288,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
bitcount += slicecount;
slicecount = 0;
for (ch = 0; ch < 2; ch++) {
for (sb = 0; sb < frame->subbands; sb++) {
for (sb = 0; sb < subbands; sb++) {
if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
slicecount++;
else if (bitneed[ch][sb] == bitslice + 1)
@ -300,7 +303,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
}
for (ch = 0; ch < 2; ch++) {
for (sb = 0; sb < frame->subbands; sb++) {
for (sb = 0; sb < subbands; sb++) {
if (bitneed[ch][sb] < bitslice + 2) {
bits[ch][sb] = 0;
} else {
@ -324,7 +327,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
if (ch == 1) {
ch = 0;
sb++;
if (sb >= frame->subbands) break;
if (sb >= subbands)
break;
} else
ch = 1;
}
@ -339,7 +343,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
if (ch == 1) {
ch = 0;
sb++;
if (sb >= frame->subbands) break;
if (sb >= subbands)
break;
} else
ch = 1;
}
@ -348,6 +353,14 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
}
static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
{
if (frame->subbands == 4)
sbc_calculate_bits_internal(frame, bits, 4);
else
sbc_calculate_bits_internal(frame, bits, 8);
}
/*
* Unpacks a SBC frame at the beginning of the stream in data,
* which has at most len bytes into frame.
@ -534,6 +547,16 @@ static void sbc_decoder_init(struct sbc_decoder_state *state,
state->offset[ch][i] = (10 * i + 10);
}
static SBC_ALWAYS_INLINE int16_t sbc_clip16(int32_t s)
{
if (s > 0x7FFF)
return 0x7FFF;
else if (s < -0x8000)
return -0x8000;
else
return s;
}
static inline void sbc_synthesize_four(struct sbc_decoder_state *state,
struct sbc_frame *frame, int ch, int blk)
{
@ -562,7 +585,7 @@ static inline void sbc_synthesize_four(struct sbc_decoder_state *state,
k = (i + 4) & 0xf;
/* Store in output, Q0 */
frame->pcm_sample[ch][blk * 4 + i] = SCALE4_STAGED1(
frame->pcm_sample[ch][blk * 4 + i] = sbc_clip16(SCALE4_STAGED1(
MULA(v[offset[i] + 0], sbc_proto_4_40m0[idx + 0],
MULA(v[offset[k] + 1], sbc_proto_4_40m1[idx + 0],
MULA(v[offset[i] + 2], sbc_proto_4_40m0[idx + 1],
@ -572,7 +595,7 @@ static inline void sbc_synthesize_four(struct sbc_decoder_state *state,
MULA(v[offset[i] + 6], sbc_proto_4_40m0[idx + 3],
MULA(v[offset[k] + 7], sbc_proto_4_40m1[idx + 3],
MULA(v[offset[i] + 8], sbc_proto_4_40m0[idx + 4],
MUL( v[offset[k] + 9], sbc_proto_4_40m1[idx + 4])))))))))));
MUL( v[offset[k] + 9], sbc_proto_4_40m1[idx + 4]))))))))))));
}
}
@ -607,8 +630,8 @@ static inline void sbc_synthesize_eight(struct sbc_decoder_state *state,
for (idx = 0, i = 0; i < 8; i++, idx += 5) {
k = (i + 8) & 0xf;
/* Store in output */
frame->pcm_sample[ch][blk * 8 + i] = SCALE8_STAGED1( // Q0
/* Store in output, Q0 */
frame->pcm_sample[ch][blk * 8 + i] = sbc_clip16(SCALE8_STAGED1(
MULA(state->V[ch][offset[i] + 0], sbc_proto_8_80m0[idx + 0],
MULA(state->V[ch][offset[k] + 1], sbc_proto_8_80m1[idx + 0],
MULA(state->V[ch][offset[i] + 2], sbc_proto_8_80m0[idx + 1],
@ -618,7 +641,7 @@ static inline void sbc_synthesize_eight(struct sbc_decoder_state *state,
MULA(state->V[ch][offset[i] + 6], sbc_proto_8_80m0[idx + 3],
MULA(state->V[ch][offset[k] + 7], sbc_proto_8_80m1[idx + 3],
MULA(state->V[ch][offset[i] + 8], sbc_proto_8_80m0[idx + 4],
MUL( state->V[ch][offset[k] + 9], sbc_proto_8_80m1[idx + 4])))))))))));
MUL( state->V[ch][offset[k] + 9], sbc_proto_8_80m1[idx + 4]))))))))))));
}
}
@ -732,9 +755,9 @@ static int sbc_analyze_audio(struct sbc_encoder_state *state,
* -99 not implemented
*/
static SBC_ALWAYS_INLINE int sbc_pack_frame_internal(
static SBC_ALWAYS_INLINE ssize_t sbc_pack_frame_internal(
uint8_t *data, struct sbc_frame *frame, size_t len,
int frame_subbands, int frame_channels)
int frame_subbands, int frame_channels, int joint)
{
/* Bitstream writer starts from the fourth byte */
uint8_t *data_ptr = data + 4;
@ -791,63 +814,6 @@ static SBC_ALWAYS_INLINE int sbc_pack_frame_internal(
crc_pos = 16;
if (frame->mode == JOINT_STEREO) {
/* like frame->sb_sample but joint stereo */
int32_t sb_sample_j[16][2];
/* scalefactor and scale_factor in joint case */
uint32_t scalefactor_j[2];
uint8_t scale_factor_j[2];
uint8_t joint = 0;
frame->joint = 0;
for (sb = 0; sb < frame_subbands - 1; sb++) {
scale_factor_j[0] = 0;
scalefactor_j[0] = 2 << SCALE_OUT_BITS;
scale_factor_j[1] = 0;
scalefactor_j[1] = 2 << SCALE_OUT_BITS;
for (blk = 0; blk < frame->blocks; blk++) {
uint32_t tmp;
/* Calculate joint stereo signal */
sb_sample_j[blk][0] =
ASR(frame->sb_sample_f[blk][0][sb], 1) +
ASR(frame->sb_sample_f[blk][1][sb], 1);
sb_sample_j[blk][1] =
ASR(frame->sb_sample_f[blk][0][sb], 1) -
ASR(frame->sb_sample_f[blk][1][sb], 1);
/* calculate scale_factor_j and scalefactor_j for joint case */
tmp = fabs(sb_sample_j[blk][0]);
while (scalefactor_j[0] < tmp) {
scale_factor_j[0]++;
scalefactor_j[0] *= 2;
}
tmp = fabs(sb_sample_j[blk][1]);
while (scalefactor_j[1] < tmp) {
scale_factor_j[1]++;
scalefactor_j[1] *= 2;
}
}
/* decide whether to join this subband */
if ((frame->scale_factor[0][sb] +
frame->scale_factor[1][sb]) >
(scale_factor_j[0] +
scale_factor_j[1])) {
/* use joint stereo for this subband */
joint |= 1 << (frame_subbands - 1 - sb);
frame->joint |= 1 << sb;
frame->scale_factor[0][sb] = scale_factor_j[0];
frame->scale_factor[1][sb] = scale_factor_j[1];
for (blk = 0; blk < frame->blocks; blk++) {
frame->sb_sample_f[blk][0][sb] =
sb_sample_j[blk][0];
frame->sb_sample_f[blk][1][sb] =
sb_sample_j[blk][1];
}
}
}
PUT_BITS(data_ptr, bits_cache, bits_count,
joint, frame_subbands);
crc_header[crc_pos >> 3] = joint;
@ -905,18 +871,23 @@ static SBC_ALWAYS_INLINE int sbc_pack_frame_internal(
return data_ptr - data;
}
static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len)
static ssize_t sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len,
int joint)
{
if (frame->subbands == 4) {
if (frame->channels == 1)
return sbc_pack_frame_internal(data, frame, len, 4, 1);
return sbc_pack_frame_internal(
data, frame, len, 4, 1, joint);
else
return sbc_pack_frame_internal(data, frame, len, 4, 2);
return sbc_pack_frame_internal(
data, frame, len, 4, 2, joint);
} else {
if (frame->channels == 1)
return sbc_pack_frame_internal(data, frame, len, 8, 1);
return sbc_pack_frame_internal(
data, frame, len, 8, 1, joint);
else
return sbc_pack_frame_internal(data, frame, len, 8, 2);
return sbc_pack_frame_internal(
data, frame, len, 8, 2, joint);
}
}
@ -924,7 +895,7 @@ static void sbc_encoder_init(struct sbc_encoder_state *state,
const struct sbc_frame *frame)
{
memset(&state->X, 0, sizeof(state->X));
state->position = SBC_X_BUFFER_SIZE - frame->subbands * 9;
state->position = (SBC_X_BUFFER_SIZE - frame->subbands * 9) & ~7;
sbc_init_primitives(state);
}
@ -1046,10 +1017,11 @@ ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len,
}
ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len,
void *output, size_t output_len, size_t *written)
void *output, size_t output_len, ssize_t *written)
{
struct sbc_priv *priv;
int framelen, samples;
int samples;
ssize_t framelen;
int (*sbc_enc_process_input)(int position,
const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
int nsamples, int nchannels);
@ -1114,11 +1086,18 @@ ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len,
samples = sbc_analyze_audio(&priv->enc_state, &priv->frame);
priv->enc_state.sbc_calc_scalefactors(
priv->frame.sb_sample_f, priv->frame.scale_factor,
priv->frame.blocks, priv->frame.channels, priv->frame.subbands);
framelen = sbc_pack_frame(output, &priv->frame, output_len);
if (priv->frame.mode == JOINT_STEREO) {
int j = priv->enc_state.sbc_calc_scalefactors_j(
priv->frame.sb_sample_f, priv->frame.scale_factor,
priv->frame.blocks, priv->frame.subbands);
framelen = sbc_pack_frame(output, &priv->frame, output_len, j);
} else {
priv->enc_state.sbc_calc_scalefactors(
priv->frame.sb_sample_f, priv->frame.scale_factor,
priv->frame.blocks, priv->frame.channels,
priv->frame.subbands);
framelen = sbc_pack_frame(output, &priv->frame, output_len, 0);
}
if (written)
*written = framelen;
@ -1131,8 +1110,7 @@ void sbc_finish(sbc_t *sbc)
if (!sbc)
return;
if (sbc->priv_alloc_base)
free(sbc->priv_alloc_base);
free(sbc->priv_alloc_base);
memset(sbc, 0, sizeof(sbc_t));
}

View file

@ -90,7 +90,7 @@ ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len,
/* Encodes ONE input block into ONE output block */
ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len,
void *output, size_t output_len, size_t *written);
void *output, size_t output_len, ssize_t *written);
/* Returns the output block size in bytes */
size_t sbc_get_frame_length(sbc_t *sbc);

View file

@ -32,7 +32,9 @@
#include "sbc_primitives.h"
#include "sbc_primitives_mmx.h"
#include "sbc_primitives_iwmmxt.h"
#include "sbc_primitives_neon.h"
#include "sbc_primitives_armv6.h"
/*
* A reference C code of analysis filter with SIMD-friendly tables
@ -231,12 +233,12 @@ static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s4_internal(
/* handle X buffer wraparound */
if (position < nsamples) {
if (nchannels > 0)
memcpy(&X[0][SBC_X_BUFFER_SIZE - 36], &X[0][position],
memcpy(&X[0][SBC_X_BUFFER_SIZE - 40], &X[0][position],
36 * sizeof(int16_t));
if (nchannels > 1)
memcpy(&X[1][SBC_X_BUFFER_SIZE - 36], &X[1][position],
memcpy(&X[1][SBC_X_BUFFER_SIZE - 40], &X[1][position],
36 * sizeof(int16_t));
position = SBC_X_BUFFER_SIZE - 36;
position = SBC_X_BUFFER_SIZE - 40;
}
#define PCM(i) (big_endian ? \
@ -439,6 +441,80 @@ static void sbc_calc_scalefactors(
}
}
static int sbc_calc_scalefactors_j(
int32_t sb_sample_f[16][2][8],
uint32_t scale_factor[2][8],
int blocks, int subbands)
{
int blk, joint = 0;
int32_t tmp0, tmp1;
uint32_t x, y;
/* last subband does not use joint stereo */
int sb = subbands - 1;
x = 1 << SCALE_OUT_BITS;
y = 1 << SCALE_OUT_BITS;
for (blk = 0; blk < blocks; blk++) {
tmp0 = fabs(sb_sample_f[blk][0][sb]);
tmp1 = fabs(sb_sample_f[blk][1][sb]);
if (tmp0 != 0)
x |= tmp0 - 1;
if (tmp1 != 0)
y |= tmp1 - 1;
}
scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - sbc_clz(x);
scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - sbc_clz(y);
/* the rest of subbands can use joint stereo */
while (--sb >= 0) {
int32_t sb_sample_j[16][2];
x = 1 << SCALE_OUT_BITS;
y = 1 << SCALE_OUT_BITS;
for (blk = 0; blk < blocks; blk++) {
tmp0 = sb_sample_f[blk][0][sb];
tmp1 = sb_sample_f[blk][1][sb];
sb_sample_j[blk][0] = ASR(tmp0, 1) + ASR(tmp1, 1);
sb_sample_j[blk][1] = ASR(tmp0, 1) - ASR(tmp1, 1);
tmp0 = fabs(tmp0);
tmp1 = fabs(tmp1);
if (tmp0 != 0)
x |= tmp0 - 1;
if (tmp1 != 0)
y |= tmp1 - 1;
}
scale_factor[0][sb] = (31 - SCALE_OUT_BITS) -
sbc_clz(x);
scale_factor[1][sb] = (31 - SCALE_OUT_BITS) -
sbc_clz(y);
x = 1 << SCALE_OUT_BITS;
y = 1 << SCALE_OUT_BITS;
for (blk = 0; blk < blocks; blk++) {
tmp0 = fabs(sb_sample_j[blk][0]);
tmp1 = fabs(sb_sample_j[blk][1]);
if (tmp0 != 0)
x |= tmp0 - 1;
if (tmp1 != 0)
y |= tmp1 - 1;
}
x = (31 - SCALE_OUT_BITS) - sbc_clz(x);
y = (31 - SCALE_OUT_BITS) - sbc_clz(y);
/* decide whether to use joint stereo for this subband */
if ((scale_factor[0][sb] + scale_factor[1][sb]) > x + y) {
joint |= 1 << (subbands - 1 - sb);
scale_factor[0][sb] = x;
scale_factor[1][sb] = y;
for (blk = 0; blk < blocks; blk++) {
sb_sample_f[blk][0][sb] = sb_sample_j[blk][0];
sb_sample_f[blk][1][sb] = sb_sample_j[blk][1];
}
}
}
/* bitmask with the information about subbands using joint stereo */
return joint;
}
/*
* Detect CPU features and setup function pointers
*/
@ -456,6 +532,7 @@ void sbc_init_primitives(struct sbc_encoder_state *state)
/* Default implementation for scale factors calculation */
state->sbc_calc_scalefactors = sbc_calc_scalefactors;
state->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j;
state->implementation_info = "Generic C";
/* X86/AMD64 optimizations */
@ -464,6 +541,12 @@ void sbc_init_primitives(struct sbc_encoder_state *state)
#endif
/* ARM optimizations */
#ifdef SBC_BUILD_WITH_ARMV6_SUPPORT
sbc_init_primitives_armv6(state);
#endif
#ifdef SBC_BUILD_WITH_IWMMXT_SUPPORT
sbc_init_primitives_iwmmxt(state);
#endif
#ifdef SBC_BUILD_WITH_NEON_SUPPORT
sbc_init_primitives_neon(state);
#endif

View file

@ -62,6 +62,10 @@ struct sbc_encoder_state {
void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8],
uint32_t scale_factor[2][8],
int blocks, int channels, int subbands);
/* Scale factors calculation with joint stereo support */
int (*sbc_calc_scalefactors_j)(int32_t sb_sample_f[16][2][8],
uint32_t scale_factor[2][8],
int blocks, int subbands);
const char *implementation_info;
};

View file

@ -0,0 +1,299 @@
/*
*
* Bluetooth low-complexity, subband codec (SBC) library
*
* Copyright (C) 2008-2010 Nokia Corporation
* Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org>
* Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
* Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
*
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <stdint.h>
#include <limits.h>
#include "sbc.h"
#include "sbc_math.h"
#include "sbc_tables.h"
#include "sbc_primitives_armv6.h"
/*
* ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline.
*/
#ifdef SBC_BUILD_WITH_ARMV6_SUPPORT
static void __attribute__((naked)) sbc_analyze_four_armv6()
{
/* r0 = in, r1 = out, r2 = consts */
asm volatile (
"push {r1, r4-r7, lr}\n"
"push {r8-r11}\n"
"ldrd r4, r5, [r0, #0]\n"
"ldrd r6, r7, [r2, #0]\n"
"ldrd r8, r9, [r0, #16]\n"
"ldrd r10, r11, [r2, #16]\n"
"mov r14, #0x8000\n"
"smlad r3, r4, r6, r14\n"
"smlad r12, r5, r7, r14\n"
"ldrd r4, r5, [r0, #32]\n"
"ldrd r6, r7, [r2, #32]\n"
"smlad r3, r8, r10, r3\n"
"smlad r12, r9, r11, r12\n"
"ldrd r8, r9, [r0, #48]\n"
"ldrd r10, r11, [r2, #48]\n"
"smlad r3, r4, r6, r3\n"
"smlad r12, r5, r7, r12\n"
"ldrd r4, r5, [r0, #64]\n"
"ldrd r6, r7, [r2, #64]\n"
"smlad r3, r8, r10, r3\n"
"smlad r12, r9, r11, r12\n"
"ldrd r8, r9, [r0, #8]\n"
"ldrd r10, r11, [r2, #8]\n"
"smlad r3, r4, r6, r3\n" /* t1[0] is done */
"smlad r12, r5, r7, r12\n" /* t1[1] is done */
"ldrd r4, r5, [r0, #24]\n"
"ldrd r6, r7, [r2, #24]\n"
"pkhtb r3, r12, r3, asr #16\n" /* combine t1[0] and t1[1] */
"smlad r12, r8, r10, r14\n"
"smlad r14, r9, r11, r14\n"
"ldrd r8, r9, [r0, #40]\n"
"ldrd r10, r11, [r2, #40]\n"
"smlad r12, r4, r6, r12\n"
"smlad r14, r5, r7, r14\n"
"ldrd r4, r5, [r0, #56]\n"
"ldrd r6, r7, [r2, #56]\n"
"smlad r12, r8, r10, r12\n"
"smlad r14, r9, r11, r14\n"
"ldrd r8, r9, [r0, #72]\n"
"ldrd r10, r11, [r2, #72]\n"
"smlad r12, r4, r6, r12\n"
"smlad r14, r5, r7, r14\n"
"ldrd r4, r5, [r2, #80]\n" /* start loading cos table */
"smlad r12, r8, r10, r12\n" /* t1[2] is done */
"smlad r14, r9, r11, r14\n" /* t1[3] is done */
"ldrd r6, r7, [r2, #88]\n"
"ldrd r8, r9, [r2, #96]\n"
"ldrd r10, r11, [r2, #104]\n" /* cos table fully loaded */
"pkhtb r12, r14, r12, asr #16\n" /* combine t1[2] and t1[3] */
"smuad r4, r3, r4\n"
"smuad r5, r3, r5\n"
"smlad r4, r12, r8, r4\n"
"smlad r5, r12, r9, r5\n"
"smuad r6, r3, r6\n"
"smuad r7, r3, r7\n"
"smlad r6, r12, r10, r6\n"
"smlad r7, r12, r11, r7\n"
"pop {r8-r11}\n"
"stmia r1, {r4, r5, r6, r7}\n"
"pop {r1, r4-r7, pc}\n"
);
}
#define sbc_analyze_four(in, out, consts) \
((void (*)(int16_t *, int32_t *, const FIXED_T*)) \
sbc_analyze_four_armv6)((in), (out), (consts))
static void __attribute__((naked)) sbc_analyze_eight_armv6()
{
/* r0 = in, r1 = out, r2 = consts */
asm volatile (
"push {r1, r4-r7, lr}\n"
"push {r8-r11}\n"
"ldrd r4, r5, [r0, #24]\n"
"ldrd r6, r7, [r2, #24]\n"
"ldrd r8, r9, [r0, #56]\n"
"ldrd r10, r11, [r2, #56]\n"
"mov r14, #0x8000\n"
"smlad r3, r4, r6, r14\n"
"smlad r12, r5, r7, r14\n"
"ldrd r4, r5, [r0, #88]\n"
"ldrd r6, r7, [r2, #88]\n"
"smlad r3, r8, r10, r3\n"
"smlad r12, r9, r11, r12\n"
"ldrd r8, r9, [r0, #120]\n"
"ldrd r10, r11, [r2, #120]\n"
"smlad r3, r4, r6, r3\n"
"smlad r12, r5, r7, r12\n"
"ldrd r4, r5, [r0, #152]\n"
"ldrd r6, r7, [r2, #152]\n"
"smlad r3, r8, r10, r3\n"
"smlad r12, r9, r11, r12\n"
"ldrd r8, r9, [r0, #16]\n"
"ldrd r10, r11, [r2, #16]\n"
"smlad r3, r4, r6, r3\n" /* t1[6] is done */
"smlad r12, r5, r7, r12\n" /* t1[7] is done */
"ldrd r4, r5, [r0, #48]\n"
"ldrd r6, r7, [r2, #48]\n"
"pkhtb r3, r12, r3, asr #16\n" /* combine t1[6] and t1[7] */
"str r3, [sp, #-4]!\n" /* save to stack */
"smlad r3, r8, r10, r14\n"
"smlad r12, r9, r11, r14\n"
"ldrd r8, r9, [r0, #80]\n"
"ldrd r10, r11, [r2, #80]\n"
"smlad r3, r4, r6, r3\n"
"smlad r12, r5, r7, r12\n"
"ldrd r4, r5, [r0, #112]\n"
"ldrd r6, r7, [r2, #112]\n"
"smlad r3, r8, r10, r3\n"
"smlad r12, r9, r11, r12\n"
"ldrd r8, r9, [r0, #144]\n"
"ldrd r10, r11, [r2, #144]\n"
"smlad r3, r4, r6, r3\n"
"smlad r12, r5, r7, r12\n"
"ldrd r4, r5, [r0, #0]\n"
"ldrd r6, r7, [r2, #0]\n"
"smlad r3, r8, r10, r3\n" /* t1[4] is done */
"smlad r12, r9, r11, r12\n" /* t1[5] is done */
"ldrd r8, r9, [r0, #32]\n"
"ldrd r10, r11, [r2, #32]\n"
"pkhtb r3, r12, r3, asr #16\n" /* combine t1[4] and t1[5] */
"str r3, [sp, #-4]!\n" /* save to stack */
"smlad r3, r4, r6, r14\n"
"smlad r12, r5, r7, r14\n"
"ldrd r4, r5, [r0, #64]\n"
"ldrd r6, r7, [r2, #64]\n"
"smlad r3, r8, r10, r3\n"
"smlad r12, r9, r11, r12\n"
"ldrd r8, r9, [r0, #96]\n"
"ldrd r10, r11, [r2, #96]\n"
"smlad r3, r4, r6, r3\n"
"smlad r12, r5, r7, r12\n"
"ldrd r4, r5, [r0, #128]\n"
"ldrd r6, r7, [r2, #128]\n"
"smlad r3, r8, r10, r3\n"
"smlad r12, r9, r11, r12\n"
"ldrd r8, r9, [r0, #8]\n"
"ldrd r10, r11, [r2, #8]\n"
"smlad r3, r4, r6, r3\n" /* t1[0] is done */
"smlad r12, r5, r7, r12\n" /* t1[1] is done */
"ldrd r4, r5, [r0, #40]\n"
"ldrd r6, r7, [r2, #40]\n"
"pkhtb r3, r12, r3, asr #16\n" /* combine t1[0] and t1[1] */
"smlad r12, r8, r10, r14\n"
"smlad r14, r9, r11, r14\n"
"ldrd r8, r9, [r0, #72]\n"
"ldrd r10, r11, [r2, #72]\n"
"smlad r12, r4, r6, r12\n"
"smlad r14, r5, r7, r14\n"
"ldrd r4, r5, [r0, #104]\n"
"ldrd r6, r7, [r2, #104]\n"
"smlad r12, r8, r10, r12\n"
"smlad r14, r9, r11, r14\n"
"ldrd r8, r9, [r0, #136]\n"
"ldrd r10, r11, [r2, #136]!\n"
"smlad r12, r4, r6, r12\n"
"smlad r14, r5, r7, r14\n"
"ldrd r4, r5, [r2, #(160 - 136 + 0)]\n"
"smlad r12, r8, r10, r12\n" /* t1[2] is done */
"smlad r14, r9, r11, r14\n" /* t1[3] is done */
"ldrd r6, r7, [r2, #(160 - 136 + 8)]\n"
"smuad r4, r3, r4\n"
"smuad r5, r3, r5\n"
"pkhtb r12, r14, r12, asr #16\n" /* combine t1[2] and t1[3] */
/* r3 = t2[0:1] */
/* r12 = t2[2:3] */
"pop {r0, r14}\n" /* t2[4:5], t2[6:7] */
"ldrd r8, r9, [r2, #(160 - 136 + 32)]\n"
"smuad r6, r3, r6\n"
"smuad r7, r3, r7\n"
"ldrd r10, r11, [r2, #(160 - 136 + 40)]\n"
"smlad r4, r12, r8, r4\n"
"smlad r5, r12, r9, r5\n"
"ldrd r8, r9, [r2, #(160 - 136 + 64)]\n"
"smlad r6, r12, r10, r6\n"
"smlad r7, r12, r11, r7\n"
"ldrd r10, r11, [r2, #(160 - 136 + 72)]\n"
"smlad r4, r0, r8, r4\n"
"smlad r5, r0, r9, r5\n"
"ldrd r8, r9, [r2, #(160 - 136 + 96)]\n"
"smlad r6, r0, r10, r6\n"
"smlad r7, r0, r11, r7\n"
"ldrd r10, r11, [r2, #(160 - 136 + 104)]\n"
"smlad r4, r14, r8, r4\n"
"smlad r5, r14, r9, r5\n"
"ldrd r8, r9, [r2, #(160 - 136 + 16 + 0)]\n"
"smlad r6, r14, r10, r6\n"
"smlad r7, r14, r11, r7\n"
"ldrd r10, r11, [r2, #(160 - 136 + 16 + 8)]\n"
"stmia r1!, {r4, r5}\n"
"smuad r4, r3, r8\n"
"smuad r5, r3, r9\n"
"ldrd r8, r9, [r2, #(160 - 136 + 16 + 32)]\n"
"stmia r1!, {r6, r7}\n"
"smuad r6, r3, r10\n"
"smuad r7, r3, r11\n"
"ldrd r10, r11, [r2, #(160 - 136 + 16 + 40)]\n"
"smlad r4, r12, r8, r4\n"
"smlad r5, r12, r9, r5\n"
"ldrd r8, r9, [r2, #(160 - 136 + 16 + 64)]\n"
"smlad r6, r12, r10, r6\n"
"smlad r7, r12, r11, r7\n"
"ldrd r10, r11, [r2, #(160 - 136 + 16 + 72)]\n"
"smlad r4, r0, r8, r4\n"
"smlad r5, r0, r9, r5\n"
"ldrd r8, r9, [r2, #(160 - 136 + 16 + 96)]\n"
"smlad r6, r0, r10, r6\n"
"smlad r7, r0, r11, r7\n"
"ldrd r10, r11, [r2, #(160 - 136 + 16 + 104)]\n"
"smlad r4, r14, r8, r4\n"
"smlad r5, r14, r9, r5\n"
"smlad r6, r14, r10, r6\n"
"smlad r7, r14, r11, r7\n"
"pop {r8-r11}\n"
"stmia r1!, {r4, r5, r6, r7}\n"
"pop {r1, r4-r7, pc}\n"
);
}
#define sbc_analyze_eight(in, out, consts) \
((void (*)(int16_t *, int32_t *, const FIXED_T*)) \
sbc_analyze_eight_armv6)((in), (out), (consts))
static void sbc_analyze_4b_4s_armv6(int16_t *x, int32_t *out, int out_stride)
{
/* Analyze blocks */
sbc_analyze_four(x + 12, out, analysis_consts_fixed4_simd_odd);
out += out_stride;
sbc_analyze_four(x + 8, out, analysis_consts_fixed4_simd_even);
out += out_stride;
sbc_analyze_four(x + 4, out, analysis_consts_fixed4_simd_odd);
out += out_stride;
sbc_analyze_four(x + 0, out, analysis_consts_fixed4_simd_even);
}
static void sbc_analyze_4b_8s_armv6(int16_t *x, int32_t *out, int out_stride)
{
/* Analyze blocks */
sbc_analyze_eight(x + 24, out, analysis_consts_fixed8_simd_odd);
out += out_stride;
sbc_analyze_eight(x + 16, out, analysis_consts_fixed8_simd_even);
out += out_stride;
sbc_analyze_eight(x + 8, out, analysis_consts_fixed8_simd_odd);
out += out_stride;
sbc_analyze_eight(x + 0, out, analysis_consts_fixed8_simd_even);
}
void sbc_init_primitives_armv6(struct sbc_encoder_state *state)
{
state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_armv6;
state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_armv6;
state->implementation_info = "ARMv6 SIMD";
}
#endif

View file

@ -0,0 +1,52 @@
/*
*
* Bluetooth low-complexity, subband codec (SBC) library
*
* Copyright (C) 2008-2010 Nokia Corporation
* Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org>
* Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
* Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
*
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef __SBC_PRIMITIVES_ARMV6_H
#define __SBC_PRIMITIVES_ARMV6_H
#include "sbc_primitives.h"
#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \
defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7__) || \
defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \
defined(__ARM_ARCH_7M__)
#define SBC_HAVE_ARMV6 1
#endif
#if !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) && \
defined(__GNUC__) && defined(SBC_HAVE_ARMV6) && \
defined(__ARM_EABI__) && !defined(__thumb__) && \
!defined(__ARM_NEON__)
#define SBC_BUILD_WITH_ARMV6_SUPPORT
void sbc_init_primitives_armv6(struct sbc_encoder_state *encoder_state);
#endif
#endif

View file

@ -0,0 +1,304 @@
/*
*
* Bluetooth low-complexity, subband codec (SBC) library
*
* Copyright (C) 2010 Keith Mok <ek9852@gmail.com>
* Copyright (C) 2008-2010 Nokia Corporation
* Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org>
* Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
* Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
*
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <stdint.h>
#include <limits.h>
#include "sbc.h"
#include "sbc_math.h"
#include "sbc_tables.h"
#include "sbc_primitives_iwmmxt.h"
/*
* IWMMXT optimizations
*/
#ifdef SBC_BUILD_WITH_IWMMXT_SUPPORT
static inline void sbc_analyze_four_iwmmxt(const int16_t *in, int32_t *out,
const FIXED_T *consts)
{
asm volatile (
"wldrd wr0, [%0]\n"
"tbcstw wr4, %2\n"
"wldrd wr2, [%1]\n"
"wldrd wr1, [%0, #8]\n"
"wldrd wr3, [%1, #8]\n"
"wmadds wr0, wr2, wr0\n"
" wldrd wr6, [%0, #16]\n"
"wmadds wr1, wr3, wr1\n"
" wldrd wr7, [%0, #24]\n"
"waddwss wr0, wr0, wr4\n"
" wldrd wr8, [%1, #16]\n"
"waddwss wr1, wr1, wr4\n"
" wldrd wr9, [%1, #24]\n"
" wmadds wr6, wr8, wr6\n"
" wldrd wr2, [%0, #32]\n"
" wmadds wr7, wr9, wr7\n"
" wldrd wr3, [%0, #40]\n"
" waddwss wr0, wr6, wr0\n"
" wldrd wr4, [%1, #32]\n"
" waddwss wr1, wr7, wr1\n"
" wldrd wr5, [%1, #40]\n"
" wmadds wr2, wr4, wr2\n"
"wldrd wr6, [%0, #48]\n"
" wmadds wr3, wr5, wr3\n"
"wldrd wr7, [%0, #56]\n"
" waddwss wr0, wr2, wr0\n"
"wldrd wr8, [%1, #48]\n"
" waddwss wr1, wr3, wr1\n"
"wldrd wr9, [%1, #56]\n"
"wmadds wr6, wr8, wr6\n"
" wldrd wr2, [%0, #64]\n"
"wmadds wr7, wr9, wr7\n"
" wldrd wr3, [%0, #72]\n"
"waddwss wr0, wr6, wr0\n"
" wldrd wr4, [%1, #64]\n"
"waddwss wr1, wr7, wr1\n"
" wldrd wr5, [%1, #72]\n"
" wmadds wr2, wr4, wr2\n"
"tmcr wcgr0, %4\n"
" wmadds wr3, wr5, wr3\n"
" waddwss wr0, wr2, wr0\n"
" waddwss wr1, wr3, wr1\n"
"\n"
"wsrawg wr0, wr0, wcgr0\n"
" wldrd wr4, [%1, #80]\n"
"wsrawg wr1, wr1, wcgr0\n"
" wldrd wr5, [%1, #88]\n"
"wpackwss wr0, wr0, wr0\n"
" wldrd wr6, [%1, #96]\n"
"wpackwss wr1, wr1, wr1\n"
"wmadds wr2, wr5, wr0\n"
" wldrd wr7, [%1, #104]\n"
"wmadds wr0, wr4, wr0\n"
"\n"
" wmadds wr3, wr7, wr1\n"
" wmadds wr1, wr6, wr1\n"
" waddwss wr2, wr3, wr2\n"
" waddwss wr0, wr1, wr0\n"
"\n"
"wstrd wr0, [%3]\n"
"wstrd wr2, [%3, #8]\n"
:
: "r" (in), "r" (consts),
"r" (1 << (SBC_PROTO_FIXED4_SCALE - 1)), "r" (out),
"r" (SBC_PROTO_FIXED4_SCALE)
: "wr0", "wr1", "wr2", "wr3", "wr4", "wr5", "wr6", "wr7",
"wr8", "wr9", "wcgr0", "memory");
}
static inline void sbc_analyze_eight_iwmmxt(const int16_t *in, int32_t *out,
const FIXED_T *consts)
{
asm volatile (
"wldrd wr0, [%0]\n"
"tbcstw wr15, %2\n"
"wldrd wr1, [%0, #8]\n"
"wldrd wr2, [%0, #16]\n"
"wldrd wr3, [%0, #24]\n"
"wldrd wr4, [%1]\n"
"wldrd wr5, [%1, #8]\n"
"wldrd wr6, [%1, #16]\n"
"wldrd wr7, [%1, #24]\n"
"wmadds wr0, wr0, wr4\n"
" wldrd wr8, [%1, #32]\n"
"wmadds wr1, wr1, wr5\n"
" wldrd wr9, [%1, #40]\n"
"wmadds wr2, wr2, wr6\n"
" wldrd wr10, [%1, #48]\n"
"wmadds wr3, wr3, wr7\n"
" wldrd wr11, [%1, #56]\n"
"waddwss wr0, wr0, wr15\n"
" wldrd wr4, [%0, #32]\n"
"waddwss wr1, wr1, wr15\n"
" wldrd wr5, [%0, #40]\n"
"waddwss wr2, wr2, wr15\n"
" wldrd wr6, [%0, #48]\n"
"waddwss wr3, wr3, wr15\n"
" wldrd wr7, [%0, #56]\n"
" wmadds wr4, wr4, wr8\n"
" wldrd wr12, [%0, #64]\n"
" wmadds wr5, wr5, wr9\n"
" wldrd wr13, [%0, #72]\n"
" wmadds wr6, wr6, wr10\n"
" wldrd wr14, [%0, #80]\n"
" wmadds wr7, wr7, wr11\n"
" wldrd wr15, [%0, #88]\n"
" waddwss wr0, wr4, wr0\n"
" wldrd wr8, [%1, #64]\n"
" waddwss wr1, wr5, wr1\n"
" wldrd wr9, [%1, #72]\n"
" waddwss wr2, wr6, wr2\n"
" wldrd wr10, [%1, #80]\n"
" waddwss wr3, wr7, wr3\n"
" wldrd wr11, [%1, #88]\n"
" wmadds wr12, wr12, wr8\n"
"wldrd wr4, [%0, #96]\n"
" wmadds wr13, wr13, wr9\n"
"wldrd wr5, [%0, #104]\n"
" wmadds wr14, wr14, wr10\n"
"wldrd wr6, [%0, #112]\n"
" wmadds wr15, wr15, wr11\n"
"wldrd wr7, [%0, #120]\n"
" waddwss wr0, wr12, wr0\n"
"wldrd wr8, [%1, #96]\n"
" waddwss wr1, wr13, wr1\n"
"wldrd wr9, [%1, #104]\n"
" waddwss wr2, wr14, wr2\n"
"wldrd wr10, [%1, #112]\n"
" waddwss wr3, wr15, wr3\n"
"wldrd wr11, [%1, #120]\n"
"wmadds wr4, wr4, wr8\n"
" wldrd wr12, [%0, #128]\n"
"wmadds wr5, wr5, wr9\n"
" wldrd wr13, [%0, #136]\n"
"wmadds wr6, wr6, wr10\n"
" wldrd wr14, [%0, #144]\n"
"wmadds wr7, wr7, wr11\n"
" wldrd wr15, [%0, #152]\n"
"waddwss wr0, wr4, wr0\n"
" wldrd wr8, [%1, #128]\n"
"waddwss wr1, wr5, wr1\n"
" wldrd wr9, [%1, #136]\n"
"waddwss wr2, wr6, wr2\n"
" wldrd wr10, [%1, #144]\n"
" waddwss wr3, wr7, wr3\n"
" wldrd wr11, [%1, #152]\n"
" wmadds wr12, wr12, wr8\n"
"tmcr wcgr0, %4\n"
" wmadds wr13, wr13, wr9\n"
" wmadds wr14, wr14, wr10\n"
" wmadds wr15, wr15, wr11\n"
" waddwss wr0, wr12, wr0\n"
" waddwss wr1, wr13, wr1\n"
" waddwss wr2, wr14, wr2\n"
" waddwss wr3, wr15, wr3\n"
"\n"
"wsrawg wr0, wr0, wcgr0\n"
"wsrawg wr1, wr1, wcgr0\n"
"wsrawg wr2, wr2, wcgr0\n"
"wsrawg wr3, wr3, wcgr0\n"
"\n"
"wpackwss wr0, wr0, wr0\n"
"wpackwss wr1, wr1, wr1\n"
" wldrd wr4, [%1, #160]\n"
"wpackwss wr2, wr2, wr2\n"
" wldrd wr5, [%1, #168]\n"
"wpackwss wr3, wr3, wr3\n"
" wldrd wr6, [%1, #192]\n"
" wmadds wr4, wr4, wr0\n"
" wldrd wr7, [%1, #200]\n"
" wmadds wr5, wr5, wr0\n"
" wldrd wr8, [%1, #224]\n"
" wmadds wr6, wr6, wr1\n"
" wldrd wr9, [%1, #232]\n"
" wmadds wr7, wr7, wr1\n"
" waddwss wr4, wr6, wr4\n"
" waddwss wr5, wr7, wr5\n"
" wmadds wr8, wr8, wr2\n"
"wldrd wr6, [%1, #256]\n"
" wmadds wr9, wr9, wr2\n"
"wldrd wr7, [%1, #264]\n"
"waddwss wr4, wr8, wr4\n"
" waddwss wr5, wr9, wr5\n"
"wmadds wr6, wr6, wr3\n"
"wmadds wr7, wr7, wr3\n"
"waddwss wr4, wr6, wr4\n"
"waddwss wr5, wr7, wr5\n"
"\n"
"wstrd wr4, [%3]\n"
"wstrd wr5, [%3, #8]\n"
"\n"
"wldrd wr6, [%1, #176]\n"
"wldrd wr5, [%1, #184]\n"
"wmadds wr5, wr5, wr0\n"
"wldrd wr8, [%1, #208]\n"
"wmadds wr0, wr6, wr0\n"
"wldrd wr9, [%1, #216]\n"
"wmadds wr9, wr9, wr1\n"
"wldrd wr6, [%1, #240]\n"
"wmadds wr1, wr8, wr1\n"
"wldrd wr7, [%1, #248]\n"
"waddwss wr0, wr1, wr0\n"
"waddwss wr5, wr9, wr5\n"
"wmadds wr7, wr7, wr2\n"
"wldrd wr8, [%1, #272]\n"
"wmadds wr2, wr6, wr2\n"
"wldrd wr9, [%1, #280]\n"
"waddwss wr0, wr2, wr0\n"
"waddwss wr5, wr7, wr5\n"
"wmadds wr9, wr9, wr3\n"
"wmadds wr3, wr8, wr3\n"
"waddwss wr0, wr3, wr0\n"
"waddwss wr5, wr9, wr5\n"
"\n"
"wstrd wr0, [%3, #16]\n"
"wstrd wr5, [%3, #24]\n"
:
: "r" (in), "r" (consts),
"r" (1 << (SBC_PROTO_FIXED8_SCALE - 1)), "r" (out),
"r" (SBC_PROTO_FIXED8_SCALE)
: "wr0", "wr1", "wr2", "wr3", "wr4", "wr5", "wr6", "wr7",
"wr8", "wr9", "wr10", "wr11", "wr12", "wr13", "wr14", "wr15",
"wcgr0", "memory");
}
static inline void sbc_analyze_4b_4s_iwmmxt(int16_t *x, int32_t *out,
int out_stride)
{
/* Analyze blocks */
sbc_analyze_four_iwmmxt(x + 12, out, analysis_consts_fixed4_simd_odd);
out += out_stride;
sbc_analyze_four_iwmmxt(x + 8, out, analysis_consts_fixed4_simd_even);
out += out_stride;
sbc_analyze_four_iwmmxt(x + 4, out, analysis_consts_fixed4_simd_odd);
out += out_stride;
sbc_analyze_four_iwmmxt(x + 0, out, analysis_consts_fixed4_simd_even);
}
static inline void sbc_analyze_4b_8s_iwmmxt(int16_t *x, int32_t *out,
int out_stride)
{
/* Analyze blocks */
sbc_analyze_eight_iwmmxt(x + 24, out, analysis_consts_fixed8_simd_odd);
out += out_stride;
sbc_analyze_eight_iwmmxt(x + 16, out, analysis_consts_fixed8_simd_even);
out += out_stride;
sbc_analyze_eight_iwmmxt(x + 8, out, analysis_consts_fixed8_simd_odd);
out += out_stride;
sbc_analyze_eight_iwmmxt(x + 0, out, analysis_consts_fixed8_simd_even);
}
void sbc_init_primitives_iwmmxt(struct sbc_encoder_state *state)
{
state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_iwmmxt;
state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_iwmmxt;
state->implementation_info = "IWMMXT";
}
#endif

View file

@ -0,0 +1,42 @@
/*
*
* Bluetooth low-complexity, subband codec (SBC) library
*
* Copyright (C) 2010 Keith Mok <ek9852@gmail.com>
* Copyright (C) 2008-2010 Nokia Corporation
* Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org>
* Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
* Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
*
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef __SBC_PRIMITIVES_IWMMXT_H
#define __SBC_PRIMITIVES_IWMMXT_H
#include "sbc_primitives.h"
#if defined(__GNUC__) && defined(__IWMMXT__) && \
!defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15)
#define SBC_BUILD_WITH_IWMMXT_SUPPORT
void sbc_init_primitives_iwmmxt(struct sbc_encoder_state *encoder_state);
#endif
#endif

View file

@ -100,7 +100,7 @@ static inline void sbc_analyze_four_mmx(const int16_t *in, int32_t *out,
:
: "r" (in), "r" (consts), "r" (&round_c), "r" (out),
"i" (SBC_PROTO_FIXED4_SCALE)
: "memory");
: "cc", "memory");
}
static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out,
@ -242,7 +242,7 @@ static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out,
:
: "r" (in), "r" (consts), "r" (&round_c), "r" (out),
"i" (SBC_PROTO_FIXED8_SCALE)
: "memory");
: "cc", "memory");
}
static inline void sbc_analyze_4b_4s_mmx(int16_t *x, int32_t *out,
@ -275,6 +275,59 @@ static inline void sbc_analyze_4b_8s_mmx(int16_t *x, int32_t *out,
asm volatile ("emms\n");
}
static void sbc_calc_scalefactors_mmx(
int32_t sb_sample_f[16][2][8],
uint32_t scale_factor[2][8],
int blocks, int channels, int subbands)
{
static const SBC_ALIGNED int32_t consts[2] = {
1 << SCALE_OUT_BITS,
1 << SCALE_OUT_BITS,
};
int ch, sb;
intptr_t blk;
for (ch = 0; ch < channels; ch++) {
for (sb = 0; sb < subbands; sb += 2) {
blk = (blocks - 1) * (((char *) &sb_sample_f[1][0][0] -
(char *) &sb_sample_f[0][0][0]));
asm volatile (
"movq (%4), %%mm0\n"
"1:\n"
"movq (%1, %0), %%mm1\n"
"pxor %%mm2, %%mm2\n"
"pcmpgtd %%mm2, %%mm1\n"
"paddd (%1, %0), %%mm1\n"
"pcmpgtd %%mm1, %%mm2\n"
"pxor %%mm2, %%mm1\n"
"por %%mm1, %%mm0\n"
"sub %2, %0\n"
"jns 1b\n"
"movd %%mm0, %k0\n"
"psrlq $32, %%mm0\n"
"bsrl %k0, %k0\n"
"subl %5, %k0\n"
"movl %k0, (%3)\n"
"movd %%mm0, %k0\n"
"bsrl %k0, %k0\n"
"subl %5, %k0\n"
"movl %k0, 4(%3)\n"
: "+r" (blk)
: "r" (&sb_sample_f[0][ch][sb]),
"i" ((char *) &sb_sample_f[1][0][0] -
(char *) &sb_sample_f[0][0][0]),
"r" (&scale_factor[ch][sb]),
"r" (&consts),
"i" (SCALE_OUT_BITS)
: "cc", "memory");
}
}
asm volatile ("emms\n");
}
static int check_mmx_support(void)
{
#ifdef __amd64__
@ -313,6 +366,7 @@ void sbc_init_primitives_mmx(struct sbc_encoder_state *state)
if (check_mmx_support()) {
state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx;
state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx;
state->sbc_calc_scalefactors = sbc_calc_scalefactors_mmx;
state->implementation_info = "MMX";
}
}

View file

@ -0,0 +1,892 @@
/*
*
* Bluetooth low-complexity, subband codec (SBC) library
*
* Copyright (C) 2004-2009 Marcel Holtmann <marcel@holtmann.org>
* Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
* Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
*
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <stdint.h>
#include <limits.h>
#include "sbc.h"
#include "sbc_math.h"
#include "sbc_tables.h"
#include "sbc_primitives_neon.h"
/*
* ARM NEON optimizations
*/
#ifdef SBC_BUILD_WITH_NEON_SUPPORT
static inline void _sbc_analyze_four_neon(const int16_t *in, int32_t *out,
const FIXED_T *consts)
{
/* TODO: merge even and odd cases (or even merge all four calls to this
* function) in order to have only aligned reads from 'in' array
* and reduce number of load instructions */
asm volatile (
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmull.s16 q0, d4, d8\n"
"vld1.16 {d6, d7}, [%0, :64]!\n"
"vmull.s16 q1, d5, d9\n"
"vld1.16 {d10, d11}, [%1, :128]!\n"
"vmlal.s16 q0, d6, d10\n"
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vmlal.s16 q1, d7, d11\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmlal.s16 q0, d4, d8\n"
"vld1.16 {d6, d7}, [%0, :64]!\n"
"vmlal.s16 q1, d5, d9\n"
"vld1.16 {d10, d11}, [%1, :128]!\n"
"vmlal.s16 q0, d6, d10\n"
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vmlal.s16 q1, d7, d11\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmlal.s16 q0, d4, d8\n"
"vmlal.s16 q1, d5, d9\n"
"vpadd.s32 d0, d0, d1\n"
"vpadd.s32 d1, d2, d3\n"
"vrshrn.s32 d0, q0, %3\n"
"vld1.16 {d2, d3, d4, d5}, [%1, :128]!\n"
"vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */
"vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */
"vmull.s16 q3, d2, d0\n"
"vmull.s16 q4, d3, d0\n"
"vmlal.s16 q3, d4, d1\n"
"vmlal.s16 q4, d5, d1\n"
"vpadd.s32 d0, d6, d7\n" /* TODO: can be eliminated */
"vpadd.s32 d1, d8, d9\n" /* TODO: can be eliminated */
"vst1.32 {d0, d1}, [%2, :128]\n"
: "+r" (in), "+r" (consts)
: "r" (out),
"i" (SBC_PROTO_FIXED4_SCALE)
: "memory",
"d0", "d1", "d2", "d3", "d4", "d5",
"d6", "d7", "d8", "d9", "d10", "d11");
}
static inline void _sbc_analyze_eight_neon(const int16_t *in, int32_t *out,
const FIXED_T *consts)
{
/* TODO: merge even and odd cases (or even merge all four calls to this
* function) in order to have only aligned reads from 'in' array
* and reduce number of load instructions */
asm volatile (
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmull.s16 q6, d4, d8\n"
"vld1.16 {d6, d7}, [%0, :64]!\n"
"vmull.s16 q7, d5, d9\n"
"vld1.16 {d10, d11}, [%1, :128]!\n"
"vmull.s16 q8, d6, d10\n"
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vmull.s16 q9, d7, d11\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmlal.s16 q6, d4, d8\n"
"vld1.16 {d6, d7}, [%0, :64]!\n"
"vmlal.s16 q7, d5, d9\n"
"vld1.16 {d10, d11}, [%1, :128]!\n"
"vmlal.s16 q8, d6, d10\n"
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vmlal.s16 q9, d7, d11\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmlal.s16 q6, d4, d8\n"
"vld1.16 {d6, d7}, [%0, :64]!\n"
"vmlal.s16 q7, d5, d9\n"
"vld1.16 {d10, d11}, [%1, :128]!\n"
"vmlal.s16 q8, d6, d10\n"
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vmlal.s16 q9, d7, d11\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmlal.s16 q6, d4, d8\n"
"vld1.16 {d6, d7}, [%0, :64]!\n"
"vmlal.s16 q7, d5, d9\n"
"vld1.16 {d10, d11}, [%1, :128]!\n"
"vmlal.s16 q8, d6, d10\n"
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vmlal.s16 q9, d7, d11\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmlal.s16 q6, d4, d8\n"
"vld1.16 {d6, d7}, [%0, :64]!\n"
"vmlal.s16 q7, d5, d9\n"
"vld1.16 {d10, d11}, [%1, :128]!\n"
"vmlal.s16 q8, d6, d10\n"
"vmlal.s16 q9, d7, d11\n"
"vpadd.s32 d0, d12, d13\n"
"vpadd.s32 d1, d14, d15\n"
"vpadd.s32 d2, d16, d17\n"
"vpadd.s32 d3, d18, d19\n"
"vrshr.s32 q0, q0, %3\n"
"vrshr.s32 q1, q1, %3\n"
"vmovn.s32 d0, q0\n"
"vmovn.s32 d1, q1\n"
"vdup.i32 d3, d1[1]\n" /* TODO: can be eliminated */
"vdup.i32 d2, d1[0]\n" /* TODO: can be eliminated */
"vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */
"vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */
"vld1.16 {d4, d5}, [%1, :128]!\n"
"vmull.s16 q6, d4, d0\n"
"vld1.16 {d6, d7}, [%1, :128]!\n"
"vmull.s16 q7, d5, d0\n"
"vmull.s16 q8, d6, d0\n"
"vmull.s16 q9, d7, d0\n"
"vld1.16 {d4, d5}, [%1, :128]!\n"
"vmlal.s16 q6, d4, d1\n"
"vld1.16 {d6, d7}, [%1, :128]!\n"
"vmlal.s16 q7, d5, d1\n"
"vmlal.s16 q8, d6, d1\n"
"vmlal.s16 q9, d7, d1\n"
"vld1.16 {d4, d5}, [%1, :128]!\n"
"vmlal.s16 q6, d4, d2\n"
"vld1.16 {d6, d7}, [%1, :128]!\n"
"vmlal.s16 q7, d5, d2\n"
"vmlal.s16 q8, d6, d2\n"
"vmlal.s16 q9, d7, d2\n"
"vld1.16 {d4, d5}, [%1, :128]!\n"
"vmlal.s16 q6, d4, d3\n"
"vld1.16 {d6, d7}, [%1, :128]!\n"
"vmlal.s16 q7, d5, d3\n"
"vmlal.s16 q8, d6, d3\n"
"vmlal.s16 q9, d7, d3\n"
"vpadd.s32 d0, d12, d13\n" /* TODO: can be eliminated */
"vpadd.s32 d1, d14, d15\n" /* TODO: can be eliminated */
"vpadd.s32 d2, d16, d17\n" /* TODO: can be eliminated */
"vpadd.s32 d3, d18, d19\n" /* TODO: can be eliminated */
"vst1.32 {d0, d1, d2, d3}, [%2, :128]\n"
: "+r" (in), "+r" (consts)
: "r" (out),
"i" (SBC_PROTO_FIXED8_SCALE)
: "memory",
"d0", "d1", "d2", "d3", "d4", "d5",
"d6", "d7", "d8", "d9", "d10", "d11",
"d12", "d13", "d14", "d15", "d16", "d17",
"d18", "d19");
}
static inline void sbc_analyze_4b_4s_neon(int16_t *x,
int32_t *out, int out_stride)
{
/* Analyze blocks */
_sbc_analyze_four_neon(x + 12, out, analysis_consts_fixed4_simd_odd);
out += out_stride;
_sbc_analyze_four_neon(x + 8, out, analysis_consts_fixed4_simd_even);
out += out_stride;
_sbc_analyze_four_neon(x + 4, out, analysis_consts_fixed4_simd_odd);
out += out_stride;
_sbc_analyze_four_neon(x + 0, out, analysis_consts_fixed4_simd_even);
}
static inline void sbc_analyze_4b_8s_neon(int16_t *x,
int32_t *out, int out_stride)
{
/* Analyze blocks */
_sbc_analyze_eight_neon(x + 24, out, analysis_consts_fixed8_simd_odd);
out += out_stride;
_sbc_analyze_eight_neon(x + 16, out, analysis_consts_fixed8_simd_even);
out += out_stride;
_sbc_analyze_eight_neon(x + 8, out, analysis_consts_fixed8_simd_odd);
out += out_stride;
_sbc_analyze_eight_neon(x + 0, out, analysis_consts_fixed8_simd_even);
}
static void sbc_calc_scalefactors_neon(
int32_t sb_sample_f[16][2][8],
uint32_t scale_factor[2][8],
int blocks, int channels, int subbands)
{
int ch, sb;
for (ch = 0; ch < channels; ch++) {
for (sb = 0; sb < subbands; sb += 4) {
int blk = blocks;
int32_t *in = &sb_sample_f[0][ch][sb];
asm volatile (
"vmov.s32 q0, #0\n"
"vmov.s32 q1, %[c1]\n"
"vmov.s32 q14, #1\n"
"vmov.s32 q15, %[c2]\n"
"vadd.s32 q1, q1, q14\n"
"1:\n"
"vld1.32 {d16, d17}, [%[in], :128], %[inc]\n"
"vabs.s32 q8, q8\n"
"vld1.32 {d18, d19}, [%[in], :128], %[inc]\n"
"vabs.s32 q9, q9\n"
"vld1.32 {d20, d21}, [%[in], :128], %[inc]\n"
"vabs.s32 q10, q10\n"
"vld1.32 {d22, d23}, [%[in], :128], %[inc]\n"
"vabs.s32 q11, q11\n"
"vmax.s32 q0, q0, q8\n"
"vmax.s32 q1, q1, q9\n"
"vmax.s32 q0, q0, q10\n"
"vmax.s32 q1, q1, q11\n"
"subs %[blk], %[blk], #4\n"
"bgt 1b\n"
"vmax.s32 q0, q0, q1\n"
"vsub.s32 q0, q0, q14\n"
"vclz.s32 q0, q0\n"
"vsub.s32 q0, q15, q0\n"
"vst1.32 {d0, d1}, [%[out], :128]\n"
:
[blk] "+r" (blk),
[in] "+r" (in)
:
[inc] "r" ((char *) &sb_sample_f[1][0][0] -
(char *) &sb_sample_f[0][0][0]),
[out] "r" (&scale_factor[ch][sb]),
[c1] "i" (1 << SCALE_OUT_BITS),
[c2] "i" (31 - SCALE_OUT_BITS)
: "d0", "d1", "d2", "d3", "d16", "d17", "d18", "d19",
"d20", "d21", "d22", "d23", "d24", "d25", "d26",
"d27", "d28", "d29", "d30", "d31", "cc", "memory");
}
}
}
int sbc_calc_scalefactors_j_neon(
int32_t sb_sample_f[16][2][8],
uint32_t scale_factor[2][8],
int blocks, int subbands)
{
static SBC_ALIGNED int32_t joint_bits_mask[8] = {
8, 4, 2, 1, 128, 64, 32, 16
};
int joint, i;
int32_t *in0, *in1;
int32_t *in = &sb_sample_f[0][0][0];
uint32_t *out0, *out1;
uint32_t *out = &scale_factor[0][0];
int32_t *consts = joint_bits_mask;
i = subbands;
asm volatile (
/*
* constants: q13 = (31 - SCALE_OUT_BITS), q14 = 1
* input: q0 = ((1 << SCALE_OUT_BITS) + 1)
* %[in0] - samples for channel 0
* %[in1] - samples for shannel 1
* output: q0, q1 - scale factors without joint stereo
* q2, q3 - scale factors with joint stereo
* q15 - joint stereo selection mask
*/
".macro calc_scalefactors\n"
"vmov.s32 q1, q0\n"
"vmov.s32 q2, q0\n"
"vmov.s32 q3, q0\n"
"mov %[i], %[blocks]\n"
"1:\n"
"vld1.32 {d18, d19}, [%[in1], :128], %[inc]\n"
"vbic.s32 q11, q9, q14\n"
"vld1.32 {d16, d17}, [%[in0], :128], %[inc]\n"
"vhadd.s32 q10, q8, q11\n"
"vhsub.s32 q11, q8, q11\n"
"vabs.s32 q8, q8\n"
"vabs.s32 q9, q9\n"
"vabs.s32 q10, q10\n"
"vabs.s32 q11, q11\n"
"vmax.s32 q0, q0, q8\n"
"vmax.s32 q1, q1, q9\n"
"vmax.s32 q2, q2, q10\n"
"vmax.s32 q3, q3, q11\n"
"subs %[i], %[i], #1\n"
"bgt 1b\n"
"vsub.s32 q0, q0, q14\n"
"vsub.s32 q1, q1, q14\n"
"vsub.s32 q2, q2, q14\n"
"vsub.s32 q3, q3, q14\n"
"vclz.s32 q0, q0\n"
"vclz.s32 q1, q1\n"
"vclz.s32 q2, q2\n"
"vclz.s32 q3, q3\n"
"vsub.s32 q0, q13, q0\n"
"vsub.s32 q1, q13, q1\n"
"vsub.s32 q2, q13, q2\n"
"vsub.s32 q3, q13, q3\n"
".endm\n"
/*
* constants: q14 = 1
* input: q15 - joint stereo selection mask
* %[in0] - value set by calc_scalefactors macro
* %[in1] - value set by calc_scalefactors macro
*/
".macro update_joint_stereo_samples\n"
"sub %[out1], %[in1], %[inc]\n"
"sub %[out0], %[in0], %[inc]\n"
"sub %[in1], %[in1], %[inc], asl #1\n"
"sub %[in0], %[in0], %[inc], asl #1\n"
"vld1.32 {d18, d19}, [%[in1], :128]\n"
"vbic.s32 q11, q9, q14\n"
"vld1.32 {d16, d17}, [%[in0], :128]\n"
"vld1.32 {d2, d3}, [%[out1], :128]\n"
"vbic.s32 q3, q1, q14\n"
"vld1.32 {d0, d1}, [%[out0], :128]\n"
"vhsub.s32 q10, q8, q11\n"
"vhadd.s32 q11, q8, q11\n"
"vhsub.s32 q2, q0, q3\n"
"vhadd.s32 q3, q0, q3\n"
"vbif.s32 q10, q9, q15\n"
"vbif.s32 d22, d16, d30\n"
"sub %[inc], %[zero], %[inc], asl #1\n"
"sub %[i], %[blocks], #2\n"
"2:\n"
"vbif.s32 d23, d17, d31\n"
"vst1.32 {d20, d21}, [%[in1], :128], %[inc]\n"
"vbif.s32 d4, d2, d30\n"
"vld1.32 {d18, d19}, [%[in1], :128]\n"
"vbif.s32 d5, d3, d31\n"
"vst1.32 {d22, d23}, [%[in0], :128], %[inc]\n"
"vbif.s32 d6, d0, d30\n"
"vld1.32 {d16, d17}, [%[in0], :128]\n"
"vbif.s32 d7, d1, d31\n"
"vst1.32 {d4, d5}, [%[out1], :128], %[inc]\n"
"vbic.s32 q11, q9, q14\n"
"vld1.32 {d2, d3}, [%[out1], :128]\n"
"vst1.32 {d6, d7}, [%[out0], :128], %[inc]\n"
"vbic.s32 q3, q1, q14\n"
"vld1.32 {d0, d1}, [%[out0], :128]\n"
"vhsub.s32 q10, q8, q11\n"
"vhadd.s32 q11, q8, q11\n"
"vhsub.s32 q2, q0, q3\n"
"vhadd.s32 q3, q0, q3\n"
"vbif.s32 q10, q9, q15\n"
"vbif.s32 d22, d16, d30\n"
"subs %[i], %[i], #2\n"
"bgt 2b\n"
"sub %[inc], %[zero], %[inc], asr #1\n"
"vbif.s32 d23, d17, d31\n"
"vst1.32 {d20, d21}, [%[in1], :128]\n"
"vbif.s32 q2, q1, q15\n"
"vst1.32 {d22, d23}, [%[in0], :128]\n"
"vbif.s32 q3, q0, q15\n"
"vst1.32 {d4, d5}, [%[out1], :128]\n"
"vst1.32 {d6, d7}, [%[out0], :128]\n"
".endm\n"
"vmov.s32 q14, #1\n"
"vmov.s32 q13, %[c2]\n"
"cmp %[i], #4\n"
"bne 8f\n"
"4:\n" /* 4 subbands */
"add %[in0], %[in], #0\n"
"add %[in1], %[in], #32\n"
"add %[out0], %[out], #0\n"
"add %[out1], %[out], #32\n"
"vmov.s32 q0, %[c1]\n"
"vadd.s32 q0, q0, q14\n"
"calc_scalefactors\n"
/* check whether to use joint stereo for subbands 0, 1, 2 */
"vadd.s32 q15, q0, q1\n"
"vadd.s32 q9, q2, q3\n"
"vmov.s32 d31[1], %[zero]\n" /* last subband -> no joint */
"vld1.32 {d16, d17}, [%[consts], :128]!\n"
"vcgt.s32 q15, q15, q9\n"
/* calculate and save to memory 'joint' variable */
/* update and save scale factors to memory */
" vand.s32 q8, q8, q15\n"
"vbit.s32 q0, q2, q15\n"
" vpadd.s32 d16, d16, d17\n"
"vbit.s32 q1, q3, q15\n"
" vpadd.s32 d16, d16, d16\n"
"vst1.32 {d0, d1}, [%[out0], :128]\n"
"vst1.32 {d2, d3}, [%[out1], :128]\n"
" vst1.32 {d16[0]}, [%[joint]]\n"
"update_joint_stereo_samples\n"
"b 9f\n"
"8:\n" /* 8 subbands */
"add %[in0], %[in], #16\n\n"
"add %[in1], %[in], #48\n"
"add %[out0], %[out], #16\n\n"
"add %[out1], %[out], #48\n"
"vmov.s32 q0, %[c1]\n"
"vadd.s32 q0, q0, q14\n"
"calc_scalefactors\n"
/* check whether to use joint stereo for subbands 4, 5, 6 */
"vadd.s32 q15, q0, q1\n"
"vadd.s32 q9, q2, q3\n"
"vmov.s32 d31[1], %[zero]\n" /* last subband -> no joint */
"vld1.32 {d16, d17}, [%[consts], :128]!\n"
"vcgt.s32 q15, q15, q9\n"
/* calculate part of 'joint' variable and save it to d24 */
/* update and save scale factors to memory */
" vand.s32 q8, q8, q15\n"
"vbit.s32 q0, q2, q15\n"
" vpadd.s32 d16, d16, d17\n"
"vbit.s32 q1, q3, q15\n"
"vst1.32 {d0, d1}, [%[out0], :128]\n"
"vst1.32 {d2, d3}, [%[out1], :128]\n"
" vpadd.s32 d24, d16, d16\n"
"update_joint_stereo_samples\n"
"add %[in0], %[in], #0\n"
"add %[in1], %[in], #32\n"
"add %[out0], %[out], #0\n\n"
"add %[out1], %[out], #32\n"
"vmov.s32 q0, %[c1]\n"
"vadd.s32 q0, q0, q14\n"
"calc_scalefactors\n"
/* check whether to use joint stereo for subbands 0, 1, 2, 3 */
"vadd.s32 q15, q0, q1\n"
"vadd.s32 q9, q2, q3\n"
"vld1.32 {d16, d17}, [%[consts], :128]!\n"
"vcgt.s32 q15, q15, q9\n"
/* combine last part of 'joint' with d24 and save to memory */
/* update and save scale factors to memory */
" vand.s32 q8, q8, q15\n"
"vbit.s32 q0, q2, q15\n"
" vpadd.s32 d16, d16, d17\n"
"vbit.s32 q1, q3, q15\n"
" vpadd.s32 d16, d16, d16\n"
"vst1.32 {d0, d1}, [%[out0], :128]\n"
" vadd.s32 d16, d16, d24\n"
"vst1.32 {d2, d3}, [%[out1], :128]\n"
" vst1.32 {d16[0]}, [%[joint]]\n"
"update_joint_stereo_samples\n"
"9:\n"
".purgem calc_scalefactors\n"
".purgem update_joint_stereo_samples\n"
:
[i] "+&r" (i),
[in] "+&r" (in),
[in0] "=&r" (in0),
[in1] "=&r" (in1),
[out] "+&r" (out),
[out0] "=&r" (out0),
[out1] "=&r" (out1),
[consts] "+&r" (consts)
:
[inc] "r" ((char *) &sb_sample_f[1][0][0] -
(char *) &sb_sample_f[0][0][0]),
[blocks] "r" (blocks),
[joint] "r" (&joint),
[c1] "i" (1 << SCALE_OUT_BITS),
[c2] "i" (31 - SCALE_OUT_BITS),
[zero] "r" (0)
: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
"d16", "d17", "d18", "d19", "d20", "d21", "d22",
"d23", "d24", "d25", "d26", "d27", "d28", "d29",
"d30", "d31", "cc", "memory");
return joint;
}
#define PERM_BE(a, b, c, d) { \
(a * 2) + 1, (a * 2) + 0, \
(b * 2) + 1, (b * 2) + 0, \
(c * 2) + 1, (c * 2) + 0, \
(d * 2) + 1, (d * 2) + 0 \
}
#define PERM_LE(a, b, c, d) { \
(a * 2) + 0, (a * 2) + 1, \
(b * 2) + 0, (b * 2) + 1, \
(c * 2) + 0, (c * 2) + 1, \
(d * 2) + 0, (d * 2) + 1 \
}
static SBC_ALWAYS_INLINE int sbc_enc_process_input_4s_neon_internal(
int position,
const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
int nsamples, int nchannels, int big_endian)
{
static SBC_ALIGNED uint8_t perm_be[2][8] = {
PERM_BE(7, 3, 6, 4),
PERM_BE(0, 2, 1, 5)
};
static SBC_ALIGNED uint8_t perm_le[2][8] = {
PERM_LE(7, 3, 6, 4),
PERM_LE(0, 2, 1, 5)
};
/* handle X buffer wraparound */
if (position < nsamples) {
int16_t *dst = &X[0][SBC_X_BUFFER_SIZE - 40];
int16_t *src = &X[0][position];
asm volatile (
"vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n"
"vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n"
"vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n"
"vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n"
"vld1.16 {d0}, [%[src], :64]!\n"
"vst1.16 {d0}, [%[dst], :64]!\n"
:
[dst] "+r" (dst),
[src] "+r" (src)
: : "memory", "d0", "d1", "d2", "d3");
if (nchannels > 1) {
dst = &X[1][SBC_X_BUFFER_SIZE - 40];
src = &X[1][position];
asm volatile (
"vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n"
"vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n"
"vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n"
"vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n"
"vld1.16 {d0}, [%[src], :64]!\n"
"vst1.16 {d0}, [%[dst], :64]!\n"
:
[dst] "+r" (dst),
[src] "+r" (src)
: : "memory", "d0", "d1", "d2", "d3");
}
position = SBC_X_BUFFER_SIZE - 40;
}
if ((nchannels > 1) && ((uintptr_t)pcm & 1)) {
/* poor 'pcm' alignment */
int16_t *x = &X[0][position];
int16_t *y = &X[1][position];
asm volatile (
"vld1.8 {d0, d1}, [%[perm], :128]\n"
"1:\n"
"sub %[x], %[x], #16\n"
"sub %[y], %[y], #16\n"
"sub %[position], %[position], #8\n"
"vld1.8 {d4, d5}, [%[pcm]]!\n"
"vuzp.16 d4, d5\n"
"vld1.8 {d20, d21}, [%[pcm]]!\n"
"vuzp.16 d20, d21\n"
"vswp d5, d20\n"
"vtbl.8 d16, {d4, d5}, d0\n"
"vtbl.8 d17, {d4, d5}, d1\n"
"vtbl.8 d18, {d20, d21}, d0\n"
"vtbl.8 d19, {d20, d21}, d1\n"
"vst1.16 {d16, d17}, [%[x], :128]\n"
"vst1.16 {d18, d19}, [%[y], :128]\n"
"subs %[nsamples], %[nsamples], #8\n"
"bgt 1b\n"
:
[x] "+r" (x),
[y] "+r" (y),
[pcm] "+r" (pcm),
[nsamples] "+r" (nsamples),
[position] "+r" (position)
:
[perm] "r" (big_endian ? perm_be : perm_le)
: "cc", "memory", "d0", "d1", "d2", "d3", "d4",
"d5", "d6", "d7", "d16", "d17", "d18", "d19",
"d20", "d21", "d22", "d23");
} else if (nchannels > 1) {
/* proper 'pcm' alignment */
int16_t *x = &X[0][position];
int16_t *y = &X[1][position];
asm volatile (
"vld1.8 {d0, d1}, [%[perm], :128]\n"
"1:\n"
"sub %[x], %[x], #16\n"
"sub %[y], %[y], #16\n"
"sub %[position], %[position], #8\n"
"vld2.16 {d4, d5}, [%[pcm]]!\n"
"vld2.16 {d20, d21}, [%[pcm]]!\n"
"vswp d5, d20\n"
"vtbl.8 d16, {d4, d5}, d0\n"
"vtbl.8 d17, {d4, d5}, d1\n"
"vtbl.8 d18, {d20, d21}, d0\n"
"vtbl.8 d19, {d20, d21}, d1\n"
"vst1.16 {d16, d17}, [%[x], :128]\n"
"vst1.16 {d18, d19}, [%[y], :128]\n"
"subs %[nsamples], %[nsamples], #8\n"
"bgt 1b\n"
:
[x] "+r" (x),
[y] "+r" (y),
[pcm] "+r" (pcm),
[nsamples] "+r" (nsamples),
[position] "+r" (position)
:
[perm] "r" (big_endian ? perm_be : perm_le)
: "cc", "memory", "d0", "d1", "d2", "d3", "d4",
"d5", "d6", "d7", "d16", "d17", "d18", "d19",
"d20", "d21", "d22", "d23");
} else {
int16_t *x = &X[0][position];
asm volatile (
"vld1.8 {d0, d1}, [%[perm], :128]\n"
"1:\n"
"sub %[x], %[x], #16\n"
"sub %[position], %[position], #8\n"
"vld1.8 {d4, d5}, [%[pcm]]!\n"
"vtbl.8 d16, {d4, d5}, d0\n"
"vtbl.8 d17, {d4, d5}, d1\n"
"vst1.16 {d16, d17}, [%[x], :128]\n"
"subs %[nsamples], %[nsamples], #8\n"
"bgt 1b\n"
:
[x] "+r" (x),
[pcm] "+r" (pcm),
[nsamples] "+r" (nsamples),
[position] "+r" (position)
:
[perm] "r" (big_endian ? perm_be : perm_le)
: "cc", "memory", "d0", "d1", "d2", "d3", "d4",
"d5", "d6", "d7", "d16", "d17", "d18", "d19");
}
return position;
}
static SBC_ALWAYS_INLINE int sbc_enc_process_input_8s_neon_internal(
int position,
const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
int nsamples, int nchannels, int big_endian)
{
static SBC_ALIGNED uint8_t perm_be[4][8] = {
PERM_BE(15, 7, 14, 8),
PERM_BE(13, 9, 12, 10),
PERM_BE(11, 3, 6, 0),
PERM_BE(5, 1, 4, 2)
};
static SBC_ALIGNED uint8_t perm_le[4][8] = {
PERM_LE(15, 7, 14, 8),
PERM_LE(13, 9, 12, 10),
PERM_LE(11, 3, 6, 0),
PERM_LE(5, 1, 4, 2)
};
/* handle X buffer wraparound */
if (position < nsamples) {
int16_t *dst = &X[0][SBC_X_BUFFER_SIZE - 72];
int16_t *src = &X[0][position];
asm volatile (
"vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n"
"vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n"
"vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n"
"vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n"
"vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n"
"vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n"
"vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n"
"vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n"
"vld1.16 {d0, d1}, [%[src], :128]!\n"
"vst1.16 {d0, d1}, [%[dst], :128]!\n"
:
[dst] "+r" (dst),
[src] "+r" (src)
: : "memory", "d0", "d1", "d2", "d3");
if (nchannels > 1) {
dst = &X[1][SBC_X_BUFFER_SIZE - 72];
src = &X[1][position];
asm volatile (
"vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n"
"vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n"
"vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n"
"vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n"
"vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n"
"vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n"
"vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n"
"vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n"
"vld1.16 {d0, d1}, [%[src], :128]!\n"
"vst1.16 {d0, d1}, [%[dst], :128]!\n"
:
[dst] "+r" (dst),
[src] "+r" (src)
: : "memory", "d0", "d1", "d2", "d3");
}
position = SBC_X_BUFFER_SIZE - 72;
}
if ((nchannels > 1) && ((uintptr_t)pcm & 1)) {
/* poor 'pcm' alignment */
int16_t *x = &X[0][position];
int16_t *y = &X[1][position];
asm volatile (
"vld1.8 {d0, d1, d2, d3}, [%[perm], :128]\n"
"1:\n"
"sub %[x], %[x], #32\n"
"sub %[y], %[y], #32\n"
"sub %[position], %[position], #16\n"
"vld1.8 {d4, d5, d6, d7}, [%[pcm]]!\n"
"vuzp.16 q2, q3\n"
"vld1.8 {d20, d21, d22, d23}, [%[pcm]]!\n"
"vuzp.16 q10, q11\n"
"vswp q3, q10\n"
"vtbl.8 d16, {d4, d5, d6, d7}, d0\n"
"vtbl.8 d17, {d4, d5, d6, d7}, d1\n"
"vtbl.8 d18, {d4, d5, d6, d7}, d2\n"
"vtbl.8 d19, {d4, d5, d6, d7}, d3\n"
"vst1.16 {d16, d17, d18, d19}, [%[x], :128]\n"
"vtbl.8 d16, {d20, d21, d22, d23}, d0\n"
"vtbl.8 d17, {d20, d21, d22, d23}, d1\n"
"vtbl.8 d18, {d20, d21, d22, d23}, d2\n"
"vtbl.8 d19, {d20, d21, d22, d23}, d3\n"
"vst1.16 {d16, d17, d18, d19}, [%[y], :128]\n"
"subs %[nsamples], %[nsamples], #16\n"
"bgt 1b\n"
:
[x] "+r" (x),
[y] "+r" (y),
[pcm] "+r" (pcm),
[nsamples] "+r" (nsamples),
[position] "+r" (position)
:
[perm] "r" (big_endian ? perm_be : perm_le)
: "cc", "memory", "d0", "d1", "d2", "d3", "d4",
"d5", "d6", "d7", "d16", "d17", "d18", "d19",
"d20", "d21", "d22", "d23");
} else if (nchannels > 1) {
/* proper 'pcm' alignment */
int16_t *x = &X[0][position];
int16_t *y = &X[1][position];
asm volatile (
"vld1.8 {d0, d1, d2, d3}, [%[perm], :128]\n"
"1:\n"
"sub %[x], %[x], #32\n"
"sub %[y], %[y], #32\n"
"sub %[position], %[position], #16\n"
"vld2.16 {d4, d5, d6, d7}, [%[pcm]]!\n"
"vld2.16 {d20, d21, d22, d23}, [%[pcm]]!\n"
"vswp q3, q10\n"
"vtbl.8 d16, {d4, d5, d6, d7}, d0\n"
"vtbl.8 d17, {d4, d5, d6, d7}, d1\n"
"vtbl.8 d18, {d4, d5, d6, d7}, d2\n"
"vtbl.8 d19, {d4, d5, d6, d7}, d3\n"
"vst1.16 {d16, d17, d18, d19}, [%[x], :128]\n"
"vtbl.8 d16, {d20, d21, d22, d23}, d0\n"
"vtbl.8 d17, {d20, d21, d22, d23}, d1\n"
"vtbl.8 d18, {d20, d21, d22, d23}, d2\n"
"vtbl.8 d19, {d20, d21, d22, d23}, d3\n"
"vst1.16 {d16, d17, d18, d19}, [%[y], :128]\n"
"subs %[nsamples], %[nsamples], #16\n"
"bgt 1b\n"
:
[x] "+r" (x),
[y] "+r" (y),
[pcm] "+r" (pcm),
[nsamples] "+r" (nsamples),
[position] "+r" (position)
:
[perm] "r" (big_endian ? perm_be : perm_le)
: "cc", "memory", "d0", "d1", "d2", "d3", "d4",
"d5", "d6", "d7", "d16", "d17", "d18", "d19",
"d20", "d21", "d22", "d23");
} else {
int16_t *x = &X[0][position];
asm volatile (
"vld1.8 {d0, d1, d2, d3}, [%[perm], :128]\n"
"1:\n"
"sub %[x], %[x], #32\n"
"sub %[position], %[position], #16\n"
"vld1.8 {d4, d5, d6, d7}, [%[pcm]]!\n"
"vtbl.8 d16, {d4, d5, d6, d7}, d0\n"
"vtbl.8 d17, {d4, d5, d6, d7}, d1\n"
"vtbl.8 d18, {d4, d5, d6, d7}, d2\n"
"vtbl.8 d19, {d4, d5, d6, d7}, d3\n"
"vst1.16 {d16, d17, d18, d19}, [%[x], :128]\n"
"subs %[nsamples], %[nsamples], #16\n"
"bgt 1b\n"
:
[x] "+r" (x),
[pcm] "+r" (pcm),
[nsamples] "+r" (nsamples),
[position] "+r" (position)
:
[perm] "r" (big_endian ? perm_be : perm_le)
: "cc", "memory", "d0", "d1", "d2", "d3", "d4",
"d5", "d6", "d7", "d16", "d17", "d18", "d19");
}
return position;
}
#undef PERM_BE
#undef PERM_LE
static int sbc_enc_process_input_4s_be_neon(int position, const uint8_t *pcm,
int16_t X[2][SBC_X_BUFFER_SIZE],
int nsamples, int nchannels)
{
return sbc_enc_process_input_4s_neon_internal(
position, pcm, X, nsamples, nchannels, 1);
}
static int sbc_enc_process_input_4s_le_neon(int position, const uint8_t *pcm,
int16_t X[2][SBC_X_BUFFER_SIZE],
int nsamples, int nchannels)
{
return sbc_enc_process_input_4s_neon_internal(
position, pcm, X, nsamples, nchannels, 0);
}
static int sbc_enc_process_input_8s_be_neon(int position, const uint8_t *pcm,
int16_t X[2][SBC_X_BUFFER_SIZE],
int nsamples, int nchannels)
{
return sbc_enc_process_input_8s_neon_internal(
position, pcm, X, nsamples, nchannels, 1);
}
static int sbc_enc_process_input_8s_le_neon(int position, const uint8_t *pcm,
int16_t X[2][SBC_X_BUFFER_SIZE],
int nsamples, int nchannels)
{
return sbc_enc_process_input_8s_neon_internal(
position, pcm, X, nsamples, nchannels, 0);
}
void sbc_init_primitives_neon(struct sbc_encoder_state *state)
{
state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_neon;
state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_neon;
state->sbc_calc_scalefactors = sbc_calc_scalefactors_neon;
state->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j_neon;
state->sbc_enc_process_input_4s_le = sbc_enc_process_input_4s_le_neon;
state->sbc_enc_process_input_4s_be = sbc_enc_process_input_4s_be_neon;
state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le_neon;
state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be_neon;
state->implementation_info = "NEON";
}
#endif

View file

@ -1,246 +0,0 @@
/*
*
* Bluetooth low-complexity, subband codec (SBC) library
*
* Copyright (C) 2004-2009 Marcel Holtmann <marcel@holtmann.org>
* Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
* Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
*
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <stdint.h>
#include <limits.h>
#include "sbc.h"
#include "sbc_math.h"
#include "sbc_tables.h"
#include "sbc_primitives_neon.h"
/*
* ARM NEON optimizations
*/
#ifdef SBC_BUILD_WITH_NEON_SUPPORT
static inline void _sbc_analyze_four_neon(const int16_t *in, int32_t *out,
const FIXED_T *consts)
{
/* TODO: merge even and odd cases (or even merge all four calls to this
* function) in order to have only aligned reads from 'in' array
* and reduce number of load instructions */
asm volatile (
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmull.s16 q0, d4, d8\n"
"vld1.16 {d6, d7}, [%0, :64]!\n"
"vmull.s16 q1, d5, d9\n"
"vld1.16 {d10, d11}, [%1, :128]!\n"
"vmlal.s16 q0, d6, d10\n"
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vmlal.s16 q1, d7, d11\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmlal.s16 q0, d4, d8\n"
"vld1.16 {d6, d7}, [%0, :64]!\n"
"vmlal.s16 q1, d5, d9\n"
"vld1.16 {d10, d11}, [%1, :128]!\n"
"vmlal.s16 q0, d6, d10\n"
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vmlal.s16 q1, d7, d11\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmlal.s16 q0, d4, d8\n"
"vmlal.s16 q1, d5, d9\n"
"vpadd.s32 d0, d0, d1\n"
"vpadd.s32 d1, d2, d3\n"
"vrshrn.s32 d0, q0, %3\n"
"vld1.16 {d2, d3, d4, d5}, [%1, :128]!\n"
"vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */
"vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */
"vmull.s16 q3, d2, d0\n"
"vmull.s16 q4, d3, d0\n"
"vmlal.s16 q3, d4, d1\n"
"vmlal.s16 q4, d5, d1\n"
"vpadd.s32 d0, d6, d7\n" /* TODO: can be eliminated */
"vpadd.s32 d1, d8, d9\n" /* TODO: can be eliminated */
"vst1.32 {d0, d1}, [%2, :128]\n"
: "+r" (in), "+r" (consts)
: "r" (out),
"i" (SBC_PROTO_FIXED4_SCALE)
: "memory",
"d0", "d1", "d2", "d3", "d4", "d5",
"d6", "d7", "d8", "d9", "d10", "d11");
}
static inline void _sbc_analyze_eight_neon(const int16_t *in, int32_t *out,
const FIXED_T *consts)
{
/* TODO: merge even and odd cases (or even merge all four calls to this
* function) in order to have only aligned reads from 'in' array
* and reduce number of load instructions */
asm volatile (
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmull.s16 q6, d4, d8\n"
"vld1.16 {d6, d7}, [%0, :64]!\n"
"vmull.s16 q7, d5, d9\n"
"vld1.16 {d10, d11}, [%1, :128]!\n"
"vmull.s16 q8, d6, d10\n"
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vmull.s16 q9, d7, d11\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmlal.s16 q6, d4, d8\n"
"vld1.16 {d6, d7}, [%0, :64]!\n"
"vmlal.s16 q7, d5, d9\n"
"vld1.16 {d10, d11}, [%1, :128]!\n"
"vmlal.s16 q8, d6, d10\n"
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vmlal.s16 q9, d7, d11\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmlal.s16 q6, d4, d8\n"
"vld1.16 {d6, d7}, [%0, :64]!\n"
"vmlal.s16 q7, d5, d9\n"
"vld1.16 {d10, d11}, [%1, :128]!\n"
"vmlal.s16 q8, d6, d10\n"
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vmlal.s16 q9, d7, d11\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmlal.s16 q6, d4, d8\n"
"vld1.16 {d6, d7}, [%0, :64]!\n"
"vmlal.s16 q7, d5, d9\n"
"vld1.16 {d10, d11}, [%1, :128]!\n"
"vmlal.s16 q8, d6, d10\n"
"vld1.16 {d4, d5}, [%0, :64]!\n"
"vmlal.s16 q9, d7, d11\n"
"vld1.16 {d8, d9}, [%1, :128]!\n"
"vmlal.s16 q6, d4, d8\n"
"vld1.16 {d6, d7}, [%0, :64]!\n"
"vmlal.s16 q7, d5, d9\n"
"vld1.16 {d10, d11}, [%1, :128]!\n"
"vmlal.s16 q8, d6, d10\n"
"vmlal.s16 q9, d7, d11\n"
"vpadd.s32 d0, d12, d13\n"
"vpadd.s32 d1, d14, d15\n"
"vpadd.s32 d2, d16, d17\n"
"vpadd.s32 d3, d18, d19\n"
"vrshr.s32 q0, q0, %3\n"
"vrshr.s32 q1, q1, %3\n"
"vmovn.s32 d0, q0\n"
"vmovn.s32 d1, q1\n"
"vdup.i32 d3, d1[1]\n" /* TODO: can be eliminated */
"vdup.i32 d2, d1[0]\n" /* TODO: can be eliminated */
"vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */
"vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */
"vld1.16 {d4, d5}, [%1, :128]!\n"
"vmull.s16 q6, d4, d0\n"
"vld1.16 {d6, d7}, [%1, :128]!\n"
"vmull.s16 q7, d5, d0\n"
"vmull.s16 q8, d6, d0\n"
"vmull.s16 q9, d7, d0\n"
"vld1.16 {d4, d5}, [%1, :128]!\n"
"vmlal.s16 q6, d4, d1\n"
"vld1.16 {d6, d7}, [%1, :128]!\n"
"vmlal.s16 q7, d5, d1\n"
"vmlal.s16 q8, d6, d1\n"
"vmlal.s16 q9, d7, d1\n"
"vld1.16 {d4, d5}, [%1, :128]!\n"
"vmlal.s16 q6, d4, d2\n"
"vld1.16 {d6, d7}, [%1, :128]!\n"
"vmlal.s16 q7, d5, d2\n"
"vmlal.s16 q8, d6, d2\n"
"vmlal.s16 q9, d7, d2\n"
"vld1.16 {d4, d5}, [%1, :128]!\n"
"vmlal.s16 q6, d4, d3\n"
"vld1.16 {d6, d7}, [%1, :128]!\n"
"vmlal.s16 q7, d5, d3\n"
"vmlal.s16 q8, d6, d3\n"
"vmlal.s16 q9, d7, d3\n"
"vpadd.s32 d0, d12, d13\n" /* TODO: can be eliminated */
"vpadd.s32 d1, d14, d15\n" /* TODO: can be eliminated */
"vpadd.s32 d2, d16, d17\n" /* TODO: can be eliminated */
"vpadd.s32 d3, d18, d19\n" /* TODO: can be eliminated */
"vst1.32 {d0, d1, d2, d3}, [%2, :128]\n"
: "+r" (in), "+r" (consts)
: "r" (out),
"i" (SBC_PROTO_FIXED8_SCALE)
: "memory",
"d0", "d1", "d2", "d3", "d4", "d5",
"d6", "d7", "d8", "d9", "d10", "d11",
"d12", "d13", "d14", "d15", "d16", "d17",
"d18", "d19");
}
static inline void sbc_analyze_4b_4s_neon(int16_t *x,
int32_t *out, int out_stride)
{
/* Analyze blocks */
_sbc_analyze_four_neon(x + 12, out, analysis_consts_fixed4_simd_odd);
out += out_stride;
_sbc_analyze_four_neon(x + 8, out, analysis_consts_fixed4_simd_even);
out += out_stride;
_sbc_analyze_four_neon(x + 4, out, analysis_consts_fixed4_simd_odd);
out += out_stride;
_sbc_analyze_four_neon(x + 0, out, analysis_consts_fixed4_simd_even);
}
static inline void sbc_analyze_4b_8s_neon(int16_t *x,
int32_t *out, int out_stride)
{
/* Analyze blocks */
_sbc_analyze_eight_neon(x + 24, out, analysis_consts_fixed8_simd_odd);
out += out_stride;
_sbc_analyze_eight_neon(x + 16, out, analysis_consts_fixed8_simd_even);
out += out_stride;
_sbc_analyze_eight_neon(x + 8, out, analysis_consts_fixed8_simd_odd);
out += out_stride;
_sbc_analyze_eight_neon(x + 0, out, analysis_consts_fixed8_simd_even);
}
void sbc_init_primitives_neon(struct sbc_encoder_state *state)
{
state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_neon;
state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_neon;
state->implementation_info = "NEON";
}
#endif