res_pjsip_pubsub: Address SEGV when attempting to terminate a subscription

Occasionally under load we'll attempt to send a final NOTIFY on a
subscription that's already been terminated and a SEGV will occur
down in pjproject's evsub_destroy function.  This is a result of a
race condition between all the paths that can generate a notify
and/or destroy the underlying pjproject evsub object:

 * The client can send a SUBSCRIBE with Expires: 0.
 * The client can send a SUBSCRIBE/refresh.
 * The subscription timer can expire.
 * An extension state can change.
 * An MWI event can be generated.
 * The pjproject transaction timer (timer_b) can expire.

Normally when our pubsub_on_evsub_state is called with a terminate,
we push a task to the serializer and return at which point the dialog
is unlocked.  This is usually not a problem because the task runs
immediately and locks the dialog again.  When the system is heavily
loaded though, there may be a delay between the unlock and relock
during which another event may occur such as the subscription timer
or timer_b expiring, an extension state change, etc.  These may also
cause a terminate to be processed and if so, we could cause pjproject
to try to destroy the evsub structure twice.  There's no way for us to
tell that the evsub was already destroyed and the evsub's group lock
can't tolerate this and SEGVs.

The remedy is twofold.

 * A patch has been submitted to Teluu and added to the bundled
   pjproject which adds add/decrement operations on evsub's group lock.

 * In res_pjsip_pubsub:
   * configure.ac and pjproject-bundled's configure.m4 were updated
     to check for the new evsub group lock APIs.
   * We now add a reference to the evsub group lock when we create
     the subscription and remove the reference when we clean up the
     subscription.  This prevents evsub from being destroyed before
     we're done with it.
   * A state has been added to the subscription tree structure so
     termination progress can be tracked through the asyncronous tasks.
   * The pubsub_on_evsub_state callback has been split so it's not doing
     double duty.  It now only handles the final cleanup of the
     subscription tree.  pubsub_on_rx_refresh now handles both client
     refreshes and client terminates.  It was always being called for
     both anyway.
   * The serialized_on_server_timeout task was removed since
     serialized_pubsub_on_rx_refresh was almost identical.
   * Missing state checks and ao2_cleanups were added.
   * Some debug levels were adjusted to make seeing only off-nominal
     things at level 1 and nominal or progress things at level 2+.

ASTERISK-26099 #close
Reported-by: Ross Beer.

Change-Id: I779d11802cf672a51392e62a74a1216596075ba1
This commit is contained in:
George Joseph
2016-06-12 10:19:27 -06:00
parent 4d52b4c3e5
commit 6a568bcc66
6 changed files with 398 additions and 133 deletions

134
configure vendored
View File

@@ -915,6 +915,10 @@ PBX_POPT
POPT_DIR
POPT_INCLUDE
POPT_LIB
PBX_PJSIP_EVSUB_GRP_LOCK
PJSIP_EVSUB_GRP_LOCK_DIR
PJSIP_EVSUB_GRP_LOCK_INCLUDE
PJSIP_EVSUB_GRP_LOCK_LIB
PBX_PJSIP_TLS_TRANSPORT_PROTO
PJSIP_TLS_TRANSPORT_PROTO_DIR
PJSIP_TLS_TRANSPORT_PROTO_INCLUDE
@@ -10563,6 +10567,18 @@ PBX_PJSIP_TLS_TRANSPORT_PROTO=0
PJSIP_EVSUB_GRP_LOCK_DESCRIP="PJSIP EVSUB Group Lock support"
PJSIP_EVSUB_GRP_LOCK_OPTION=pjsip
PJSIP_EVSUB_GRP_LOCK_DIR=${PJPROJECT_DIR}
PBX_PJSIP_EVSUB_GRP_LOCK=0
POPT_DESCRIP="popt"
POPT_OPTION="popt"
@@ -13612,7 +13628,7 @@ else
We can't simply define LARGE_OFF_T to be 9223372036854775807,
since some C++ compilers masquerading as C compilers
incorrectly reject 9223372036854775807. */
#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
&& LARGE_OFF_T % 2147483647 == 1)
? 1 : -1];
@@ -13658,7 +13674,7 @@ else
We can't simply define LARGE_OFF_T to be 9223372036854775807,
since some C++ compilers masquerading as C compilers
incorrectly reject 9223372036854775807. */
#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
&& LARGE_OFF_T % 2147483647 == 1)
? 1 : -1];
@@ -13682,7 +13698,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
We can't simply define LARGE_OFF_T to be 9223372036854775807,
since some C++ compilers masquerading as C compilers
incorrectly reject 9223372036854775807. */
#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
&& LARGE_OFF_T % 2147483647 == 1)
? 1 : -1];
@@ -13727,7 +13743,7 @@ else
We can't simply define LARGE_OFF_T to be 9223372036854775807,
since some C++ compilers masquerading as C compilers
incorrectly reject 9223372036854775807. */
#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
&& LARGE_OFF_T % 2147483647 == 1)
? 1 : -1];
@@ -13751,7 +13767,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
We can't simply define LARGE_OFF_T to be 9223372036854775807,
since some C++ compilers masquerading as C compilers
incorrectly reject 9223372036854775807. */
#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
&& LARGE_OFF_T % 2147483647 == 1)
? 1 : -1];
@@ -24463,6 +24479,9 @@ rm -f conftest*
$as_echo "#define HAVE_PJSIP_TLS_TRANSPORT_PROTO 1" >>confdefs.h
$as_echo "#define HAVE_PJSIP_EVSUB_GRP_LOCK 1" >>confdefs.h
else
if test "x${PBX_PJPROJECT}" != "x1" -a "${USE_PJPROJECT}" != "no"; then
@@ -25178,6 +25197,111 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
LIBS="${saved_libs}"
CPPFLAGS="${saved_cppflags}"
if test "x${PBX_PJSIP_EVSUB_GRP_LOCK}" != "x1" -a "${USE_PJSIP_EVSUB_GRP_LOCK}" != "no"; then
pbxlibdir=""
# if --with-PJSIP_EVSUB_GRP_LOCK=DIR has been specified, use it.
if test "x${PJSIP_EVSUB_GRP_LOCK_DIR}" != "x"; then
if test -d ${PJSIP_EVSUB_GRP_LOCK_DIR}/lib; then
pbxlibdir="-L${PJSIP_EVSUB_GRP_LOCK_DIR}/lib"
else
pbxlibdir="-L${PJSIP_EVSUB_GRP_LOCK_DIR}"
fi
fi
pbxfuncname="pjsip_evsub_add_lock"
if test "x${pbxfuncname}" = "x" ; then # empty lib, assume only headers
AST_PJSIP_EVSUB_GRP_LOCK_FOUND=yes
else
ast_ext_lib_check_save_CFLAGS="${CFLAGS}"
CFLAGS="${CFLAGS} $PJPROJECT_CFLAGS"
as_ac_Lib=`$as_echo "ac_cv_lib_pjsip_${pbxfuncname}" | $as_tr_sh`
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ${pbxfuncname} in -lpjsip" >&5
$as_echo_n "checking for ${pbxfuncname} in -lpjsip... " >&6; }
if eval \${$as_ac_Lib+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-lpjsip ${pbxlibdir} $PJPROJECT_LIB $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char ${pbxfuncname} ();
int
main ()
{
return ${pbxfuncname} ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
eval "$as_ac_Lib=yes"
else
eval "$as_ac_Lib=no"
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
eval ac_res=\$$as_ac_Lib
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
$as_echo "$ac_res" >&6; }
if eval test \"x\$"$as_ac_Lib"\" = x"yes"; then :
AST_PJSIP_EVSUB_GRP_LOCK_FOUND=yes
else
AST_PJSIP_EVSUB_GRP_LOCK_FOUND=no
fi
CFLAGS="${ast_ext_lib_check_save_CFLAGS}"
fi
# now check for the header.
if test "${AST_PJSIP_EVSUB_GRP_LOCK_FOUND}" = "yes"; then
PJSIP_EVSUB_GRP_LOCK_LIB="${pbxlibdir} -lpjsip $PJPROJECT_LIB"
# if --with-PJSIP_EVSUB_GRP_LOCK=DIR has been specified, use it.
if test "x${PJSIP_EVSUB_GRP_LOCK_DIR}" != "x"; then
PJSIP_EVSUB_GRP_LOCK_INCLUDE="-I${PJSIP_EVSUB_GRP_LOCK_DIR}/include"
fi
PJSIP_EVSUB_GRP_LOCK_INCLUDE="${PJSIP_EVSUB_GRP_LOCK_INCLUDE} $PJPROJECT_CFLAGS"
if test "xpjsip.h" = "x" ; then # no header, assume found
PJSIP_EVSUB_GRP_LOCK_HEADER_FOUND="1"
else # check for the header
ast_ext_lib_check_saved_CPPFLAGS="${CPPFLAGS}"
CPPFLAGS="${CPPFLAGS} ${PJSIP_EVSUB_GRP_LOCK_INCLUDE}"
ac_fn_c_check_header_mongrel "$LINENO" "pjsip.h" "ac_cv_header_pjsip_h" "$ac_includes_default"
if test "x$ac_cv_header_pjsip_h" = xyes; then :
PJSIP_EVSUB_GRP_LOCK_HEADER_FOUND=1
else
PJSIP_EVSUB_GRP_LOCK_HEADER_FOUND=0
fi
CPPFLAGS="${ast_ext_lib_check_saved_CPPFLAGS}"
fi
if test "x${PJSIP_EVSUB_GRP_LOCK_HEADER_FOUND}" = "x0" ; then
PJSIP_EVSUB_GRP_LOCK_LIB=""
PJSIP_EVSUB_GRP_LOCK_INCLUDE=""
else
if test "x${pbxfuncname}" = "x" ; then # only checking headers -> no library
PJSIP_EVSUB_GRP_LOCK_LIB=""
fi
PBX_PJSIP_EVSUB_GRP_LOCK=1
cat >>confdefs.h <<_ACEOF
#define HAVE_PJSIP_EVSUB_GRP_LOCK 1
_ACEOF
fi
fi
fi
fi
fi