diff --git a/libs/spandsp/m4/ax_check_arm_neon.m4 b/libs/spandsp/m4/ax_check_arm_neon.m4
index 88d18fda25..80e0fc0605 100644
--- a/libs/spandsp/m4/ax_check_arm_neon.m4
+++ b/libs/spandsp/m4/ax_check_arm_neon.m4
@@ -1,7 +1,7 @@
 # @synopsis AX_CHECK_ARM_NEON
 #
 # Does the machine support the ARM NEON instruction set?
-# @version 1.0    Dec 31 2012
+# @version 1.01   Feb 11 2013
 # @author Steve Underwood
 #
 # Permission to use, copy, modify, distribute, and sell this file for any 
diff --git a/libs/spandsp/src/spandsp/saturated.h b/libs/spandsp/src/spandsp/saturated.h
index d8872f9c3b..41b8ba3db0 100644
--- a/libs/spandsp/src/spandsp/saturated.h
+++ b/libs/spandsp/src/spandsp/saturated.h
@@ -363,24 +363,80 @@ static __inline__ int32_t saturated_sub32(int32_t a, int32_t b)
 
 static __inline__ int16_t saturated_mul16(int16_t a, int16_t b)
 {
-    int32_t product;
+    int32_t z;
 
-    product = (int32_t) a*b;
-    if (product == 0x40000000)
+#if defined(__GNUC__)  &&  (defined(__ARM_ARCH_6__)  ||  defined(__ARM_ARCH_7A__))
+    __asm__ __volatile__(
+        " smulbb %[z],%[a],%[b];\n"
+        " qadd %[z],%[z],%[z];\n"
+        : [z] "=r" (z)
+        : [a] "r" (a), [b] "r" (b)
+    );
+    return (int16_t) (z >> 16);
+#else
+    z = (int32_t) a*b;
+    if (z == 0x40000000)
         return INT16_MAX;
     /*endif*/
-    return (int16_t) (product >> 15);
+    return (int16_t) (z >> 15);
+#endif
 }
 /*- End of function --------------------------------------------------------*/
 
 static __inline__ int32_t saturated_mul16_32(int16_t a, int16_t b)
 {
+    int32_t z;
+
+#if defined(__GNUC__)  &&  (defined(__ARM_ARCH_6__)  ||  defined(__ARM_ARCH_7A__))
+    __asm__ __volatile__(
+        " smulbb %[z],%[a],%[b];\n"
+        " qadd %[z],%[z],%[z];\n"
+        : [z] "=r" (z)
+        : [a] "r" (a), [b] "r" (b)
+    );
+    return z;
+#else
+    z = (int32_t) a*b;
+    if (z == 0x40000000)
+        return INT32_MAX;
+    return z << 1;
+#endif
+}
+/*- End of function --------------------------------------------------------*/
+
+static __inline__ int32_t saturated_mac16_32(int32_t z, int16_t a, int16_t b)
+{
+#if defined(__GNUC__)  &&  (defined(__ARM_ARCH_6__)  ||  defined(__ARM_ARCH_7A__))
     int32_t product;
 
-    product = (int32_t) a*b;
-    if (product == 0x40000000)
-        return INT32_MAX;
-    return product << 1;
+    __asm__ __volatile__(
+        " smulbb %[p],%[a],%[b];\n"
+        " qdadd %[z],%[z],%[p];\n"
+        : [z] "=r" (z)
+        : "[z]" (z), [a] "r" (a), [b] "r" (b), [p] "r"(product)
+    );
+    return z;
+#else
+    return saturated_add32(z, saturated_mul16_32(a, b));
+#endif
+}
+/*- End of function --------------------------------------------------------*/
+
+static __inline__ int32_t saturated_msu16_32(int32_t z, int16_t a, int16_t b)
+{
+#if defined(__GNUC__)  &&  (defined(__ARM_ARCH_6__)  ||  defined(__ARM_ARCH_7A__))
+    int32_t product;
+
+    __asm__ __volatile__(
+        " smulbb %[p],%[a],%[b];\n"
+        " qdsub %[z],%[z],%[p];\n"
+        : [z] "=r" (z)
+        : "[z]" (z), [a] "r" (a), [b] "r" (b), [p] "r" (product)
+    );
+    return z;
+#else
+    return saturated_sub32(z, saturated_mul16_32(a, b));
+#endif
 }
 /*- End of function --------------------------------------------------------*/
 
diff --git a/libs/spandsp/src/v22bis_rx.c b/libs/spandsp/src/v22bis_rx.c
index c00c62b47c..099eb8bff0 100644
--- a/libs/spandsp/src/v22bis_rx.c
+++ b/libs/spandsp/src/v22bis_rx.c
@@ -73,8 +73,8 @@
 #include "spandsp/private/v22bis.h"
 
 #if defined(SPANDSP_USE_FIXED_POINT)
+#define FP_SCALE(x)                     FP_Q_6_10(x)
 #define FP_SHIFT_FACTOR                 10
-#define FP_SCALE                        FP_Q_6_10
 #else
 #define FP_SCALE(x)                     (x)
 #endif
@@ -306,9 +306,8 @@ static __inline__ int descramble(v22bis_state_t *s, int bit)
 {
     int out_bit;
 
-    bit &= 1;
-
     /* Descramble the bit */
+    bit &= 1;
     out_bit = (bit ^ (s->rx.scramble_reg >> 13) ^ (s->rx.scramble_reg >> 16)) & 1;
     s->rx.scramble_reg = (s->rx.scramble_reg << 1) | bit;
 
@@ -856,9 +855,9 @@ SPAN_DECLARE_NONSTD(int) v22bis_rx(v22bis_state_t *s, const int16_t amp[], int l
             {
                 /* Only AGC during the initial symbol acquisition, and then lock the gain. */
 #if defined(SPANDSP_USE_FIXED_POINT)
-                s->rx.agc_scaling = saturate16(((int32_t) (1024.0f*1024.0f*0.18f*3.60f))/fixed_sqrt32(power));
+                s->rx.agc_scaling = saturate16(((int32_t) (FP_SCALE(0.18f)*FP_SCALE(3.60f)))/fixed_sqrt32(power));
 #else
-                s->rx.agc_scaling = 0.18f*3.60f/sqrtf(power);
+                s->rx.agc_scaling = FP_SCALE(0.18f)*FP_SCALE(3.60f)/fixed_sqrt32(power);
 #endif
             }
             /* Pulse shape while still at the carrier frequency, using a quadrature
@@ -868,7 +867,6 @@ SPAN_DECLARE_NONSTD(int) v22bis_rx(v22bis_state_t *s, const int16_t amp[], int l
             step = -s->rx.eq_put_step;
             if (step > PULSESHAPER_COEFF_SETS - 1)
                 step = PULSESHAPER_COEFF_SETS - 1;
-            s->rx.eq_put_step += PULSESHAPER_COEFF_SETS*40/(3*2);
             if (s->calling_party)
             {
 #if defined(SPANDSP_USE_FIXED_POINT)
@@ -905,6 +903,7 @@ SPAN_DECLARE_NONSTD(int) v22bis_rx(v22bis_state_t *s, const int16_t amp[], int l
             zz.re = sample.re*z.re - sample.im*z.im;
             zz.im = -sample.re*z.im - sample.im*z.re;
 #endif
+            s->rx.eq_put_step += PULSESHAPER_COEFF_SETS*40/(3*2);
             process_half_baud(s, &zz);
         }
 #if defined(SPANDSP_USE_FIXED_POINT)
diff --git a/libs/spandsp/src/v22bis_tx.c b/libs/spandsp/src/v22bis_tx.c
index 0c43b95a07..bb18b77b26 100644
--- a/libs/spandsp/src/v22bis_tx.c
+++ b/libs/spandsp/src/v22bis_tx.c
@@ -63,7 +63,7 @@
 #include "spandsp/private/v22bis.h"
 
 #if defined(SPANDSP_USE_FIXED_POINT)
-#define FP_SCALE    FP_Q_6_10
+#define FP_SCALE(x) FP_Q_6_10(x)
 #else
 #define FP_SCALE(x) (x)
 #endif
diff --git a/libs/spandsp/src/v29tx.c b/libs/spandsp/src/v29tx.c
index 868b9ac341..3eac97e1bc 100644
--- a/libs/spandsp/src/v29tx.c
+++ b/libs/spandsp/src/v29tx.c
@@ -66,9 +66,8 @@
 
 #define FP_CONSTELLATION_SCALE(x)       FP_SCALE(x)
 
-#include "v29tx_constellation_maps.h"
-
 #include "v29tx_rrc.h"
+#include "v29tx_constellation_maps.h"
 
 /*! The nominal frequency of the carrier, in Hertz */
 #define CARRIER_NOMINAL_FREQ        1700.0f
diff --git a/libs/spandsp/tests/saturated_tests.c b/libs/spandsp/tests/saturated_tests.c
index 793cff9dd9..a3ece0e436 100644
--- a/libs/spandsp/tests/saturated_tests.c
+++ b/libs/spandsp/tests/saturated_tests.c
@@ -197,17 +197,17 @@ int main(int argc, char *argv[])
         exit(2);
     }
     printf("Testing 16 bit add\n");
-    if (saturated_add16(10000, 10000) != 20000
-        ||
-        saturated_add16(10000, -10000) != 0
-        ||
-        saturated_add16(-10000, 10000) != 0
-        ||
-        saturated_add16(-10000, -10000) != -20000
-        ||
-        saturated_add16(-30000, -30000) != INT16_MIN
-        ||
-        saturated_add16(30000, 30000) != INT16_MAX)
+    if (saturated_add16(10000, 10000) != 20000)
+	printf("aaa1 %d\n", saturated_add16(10000, 10000));
+        if (saturated_add16(10000, -10000) != 0)
+        printf("aaa2 %d\n", saturated_add16(10000, -10000));
+        if (saturated_add16(-10000, 10000) != 0)
+        printf("aaa3 %d\n", saturated_add16(-10000, 10000));
+        if (saturated_add16(-10000, -10000) != -20000)
+        printf("aaa4 %d\n", saturated_add16(-10000, -10000));
+        if (saturated_add16(-30000, -30000) != INT16_MIN)
+        printf("aaa5 %d\n", saturated_add16(-30000, -30000));
+        if (saturated_add16(30000, 30000) != INT16_MAX)
     {
         printf("Test failed.\n");
         exit(2);
@@ -292,6 +292,38 @@ int main(int argc, char *argv[])
         printf("Test failed.\n");
         exit(2);
     }
+    printf("Testing 32 + 16 x 16 => 32 bit MAC\n");
+    if (saturated_mac16_32(123, 100, 100) != 123 + 20000
+        ||
+        saturated_mac16_32(123, -100, 100) != 123 - 20000
+        ||
+        saturated_mac16_32(123, 32767, -32768) != 123 - 2147418112
+        ||
+        saturated_mac16_32(123, -32768, 32767) != 123 - 2147418112
+        ||
+        saturated_mac16_32(123, 32767, 32767) != 123 + 2147352578
+        ||
+        saturated_mac16_32(123, -32768, -32768) != INT32_MAX)
+    {
+        printf("Test failed.\n");
+        exit(2);
+    }
+    printf("Testing 32 - 16 x 16 => 32 bit MSU\n");
+    if (saturated_msu16_32(123, 100, 100) != 123 - 20000
+        ||
+        saturated_msu16_32(123, -100, 100) != 123 + 20000
+        ||
+        saturated_msu16_32(123, 32767, -32768) != 123 + 2147418112
+        ||
+        saturated_msu16_32(123, -32768, 32767) != 123 + 2147418112
+        ||
+        saturated_msu16_32(123, 32767, 32767) != 123 - 2147352578
+        ||
+        saturated_msu16_32(123, -32768, -32768) != 123 - INT32_MAX)
+    {
+        printf("Test failed.\n");
+        exit(2);
+    }
     printf("Testing 16 bit absolute\n");
     if (saturated_abs16(10000) != 10000
         ||