Implement Configurable TCP Keepalive Settings in PJSIP Transports

This commit introduces configurable TCP keepalive settings for both TCP and TLS transports. The changes allow for finer control over TCP connection keepalives, enhancing stability and reliability in environments prone to connection timeouts or where intermediate devices may prematurely close idle connections. This has proven necessary and has already been tested in production in several specialized environments where access to the underlying transport is unreliable in ways invisible to the operating system directly, so these keepalive and timeout mechanisms are necessary.

Fixes #657
This commit is contained in:
Joshua Elson
2024-03-18 15:14:36 -04:00
parent f3de77f91f
commit 3d40d34271
6 changed files with 184 additions and 10 deletions

View File

@@ -828,17 +828,55 @@ static int transport_apply(const struct ast_sorcery *sorcery, void *obj)
} else if (transport->type == AST_TRANSPORT_TCP) {
pjsip_tcp_transport_cfg cfg;
static int option = 1;
int sockopt_count = 0;
pjsip_tcp_transport_cfg_default(&cfg, temp_state->state->host.addr.sa_family);
cfg.bind_addr = temp_state->state->host;
cfg.async_cnt = transport->async_operations;
set_qos(transport, &cfg.qos_params);
/* sockopt_params.options is copied to each newly connected socket */
cfg.sockopt_params.options[0].level = pj_SOL_TCP();
cfg.sockopt_params.options[0].optname = pj_TCP_NODELAY();
cfg.sockopt_params.options[0].optval = &option;
cfg.sockopt_params.options[0].optlen = sizeof(option);
cfg.sockopt_params.cnt = 1;
cfg.sockopt_params.options[sockopt_count].level = pj_SOL_TCP();
cfg.sockopt_params.options[sockopt_count].optname = pj_TCP_NODELAY();
cfg.sockopt_params.options[sockopt_count].optval = &option;
cfg.sockopt_params.options[sockopt_count].optlen = sizeof(option);
sockopt_count++;
if (transport->tcp_keepalive_enable) {
#if defined(PJ_MAX_SOCKOPT_PARAMS) && PJ_MAX_SOCKOPT_PARAMS >= 5
ast_log(LOG_DEBUG, "TCP Keepalive enabled for transport '%s'. Idle Time: %d, Interval: %d, Count: %d\n",
ast_sorcery_object_get_id(obj), transport->tcp_keepalive_idle_time, transport->tcp_keepalive_interval_time, transport->tcp_keepalive_probe_count);
cfg.sockopt_params.options[sockopt_count].level = pj_SOL_SOCKET();
cfg.sockopt_params.options[sockopt_count].optname = SO_KEEPALIVE;
cfg.sockopt_params.options[sockopt_count].optval = &option;
cfg.sockopt_params.options[sockopt_count].optlen = sizeof(option);
sockopt_count++;
cfg.sockopt_params.options[sockopt_count].level = pj_SOL_TCP();
cfg.sockopt_params.options[sockopt_count].optname = TCP_KEEPIDLE;
cfg.sockopt_params.options[sockopt_count].optval = &transport->tcp_keepalive_idle_time;
cfg.sockopt_params.options[sockopt_count].optlen = sizeof(transport->tcp_keepalive_idle_time);
sockopt_count++;
cfg.sockopt_params.options[sockopt_count].level = pj_SOL_TCP();
cfg.sockopt_params.options[sockopt_count].optname = TCP_KEEPINTVL;
cfg.sockopt_params.options[sockopt_count].optval = &transport->tcp_keepalive_interval_time;
cfg.sockopt_params.options[sockopt_count].optlen = sizeof(transport->tcp_keepalive_interval_time);
sockopt_count++;
cfg.sockopt_params.options[sockopt_count].level = pj_SOL_TCP();
cfg.sockopt_params.options[sockopt_count].optname = TCP_KEEPCNT;
cfg.sockopt_params.options[sockopt_count].optval = &transport->tcp_keepalive_probe_count;
cfg.sockopt_params.options[sockopt_count].optlen = sizeof(transport->tcp_keepalive_probe_count);
sockopt_count++;
#else
ast_log(LOG_WARNING, "TCP keepalive settings for '%s' not set due to PJSIP built without support for setting all options. Consider using bundled PJSIP.\n",
ast_sorcery_object_get_id(obj));
#endif
}
cfg.sockopt_params.cnt = sockopt_count;
for (i = 0; i < BIND_TRIES && res != PJ_SUCCESS; i++) {
if (perm_state && perm_state->state && perm_state->state->factory
@@ -853,6 +891,7 @@ static int transport_apply(const struct ast_sorcery *sorcery, void *obj)
} else if (transport->type == AST_TRANSPORT_TLS) {
#if defined(PJ_HAS_SSL_SOCK) && PJ_HAS_SSL_SOCK != 0
static int option = 1;
int sockopt_count = 0;
if (transport->async_operations > 1 && ast_compare_versions(pj_get_version(), "2.5.0") < 0) {
ast_log(LOG_ERROR, "Transport: %s: When protocol=tls and pjproject version < 2.5.0, async_operations can't be > 1\n",
@@ -864,11 +903,47 @@ static int transport_apply(const struct ast_sorcery *sorcery, void *obj)
set_qos(transport, &temp_state->state->tls.qos_params);
/* sockopt_params.options is copied to each newly connected socket */
temp_state->state->tls.sockopt_params.options[0].level = pj_SOL_TCP();
temp_state->state->tls.sockopt_params.options[0].optname = pj_TCP_NODELAY();
temp_state->state->tls.sockopt_params.options[0].optval = &option;
temp_state->state->tls.sockopt_params.options[0].optlen = sizeof(option);
temp_state->state->tls.sockopt_params.cnt = 1;
temp_state->state->tls.sockopt_params.options[sockopt_count].level = pj_SOL_TCP();
temp_state->state->tls.sockopt_params.options[sockopt_count].optname = pj_TCP_NODELAY();
temp_state->state->tls.sockopt_params.options[sockopt_count].optval = &option;
temp_state->state->tls.sockopt_params.options[sockopt_count].optlen = sizeof(option);
sockopt_count++;
if (transport->tcp_keepalive_enable) {
#if defined(PJ_MAX_SOCKOPT_PARAMS) && PJ_MAX_SOCKOPT_PARAMS >= 5
ast_log(LOG_DEBUG, "TCP Keepalive enabled for transport '%s'. Idle Time: %d, Interval: %d, Count: %d\n",
ast_sorcery_object_get_id(obj), transport->tcp_keepalive_idle_time, transport->tcp_keepalive_interval_time, transport->tcp_keepalive_probe_count);
temp_state->state->tls.sockopt_params.options[sockopt_count].level = pj_SOL_SOCKET();
temp_state->state->tls.sockopt_params.options[sockopt_count].optname = SO_KEEPALIVE;
temp_state->state->tls.sockopt_params.options[sockopt_count].optval = &option;
temp_state->state->tls.sockopt_params.options[sockopt_count].optlen = sizeof(option);
sockopt_count++;
temp_state->state->tls.sockopt_params.options[sockopt_count].level = pj_SOL_TCP();
temp_state->state->tls.sockopt_params.options[sockopt_count].optname = TCP_KEEPIDLE;
temp_state->state->tls.sockopt_params.options[sockopt_count].optval = &transport->tcp_keepalive_idle_time;
temp_state->state->tls.sockopt_params.options[sockopt_count].optlen = sizeof(transport->tcp_keepalive_idle_time);
sockopt_count++;
temp_state->state->tls.sockopt_params.options[sockopt_count].level = pj_SOL_TCP();
temp_state->state->tls.sockopt_params.options[sockopt_count].optname = TCP_KEEPINTVL;
temp_state->state->tls.sockopt_params.options[sockopt_count].optval = &transport->tcp_keepalive_interval_time;
temp_state->state->tls.sockopt_params.options[sockopt_count].optlen = sizeof(transport->tcp_keepalive_interval_time);
sockopt_count++;
temp_state->state->tls.sockopt_params.options[sockopt_count].level = pj_SOL_TCP();
temp_state->state->tls.sockopt_params.options[sockopt_count].optname = TCP_KEEPCNT;
temp_state->state->tls.sockopt_params.options[sockopt_count].optval = &transport->tcp_keepalive_probe_count;
temp_state->state->tls.sockopt_params.options[sockopt_count].optlen = sizeof(transport->tcp_keepalive_probe_count);
sockopt_count++;
#else
ast_log(LOG_WARNING, "TCP keepalive settings for '%s' not set due to PJSIP built without support for setting all options. Consider using bundled PJSIP.\n",
ast_sorcery_object_get_id(obj));
#endif
}
temp_state->state->tls.sockopt_params.cnt = sockopt_count;
for (i = 0; i < BIND_TRIES && res != PJ_SUCCESS; i++) {
if (perm_state && perm_state->state && perm_state->state->factory
@@ -1760,6 +1835,10 @@ int ast_sip_initialize_sorcery_transport(void)
ast_sorcery_object_field_register_custom(sorcery, "transport", "require_client_cert", "", transport_tls_bool_handler, require_client_cert_to_str, NULL, 0, 0);
ast_sorcery_object_field_register_custom(sorcery, "transport", "allow_wildcard_certs", "", transport_tls_bool_handler, allow_wildcard_certs_to_str, NULL, 0, 0);
ast_sorcery_object_field_register_custom(sorcery, "transport", "method", "", transport_tls_method_handler, tls_method_to_str, NULL, 0, 0);
ast_sorcery_object_field_register(sorcery, "transport", "tcp_keepalive_enable", "no", OPT_BOOL_T, 0, FLDSET(struct ast_sip_transport, tcp_keepalive_enable));
ast_sorcery_object_field_register(sorcery, "transport", "tcp_keepalive_idle_time", "30", OPT_INT_T, 0, FLDSET(struct ast_sip_transport, tcp_keepalive_idle_time));
ast_sorcery_object_field_register(sorcery, "transport", "tcp_keepalive_interval_time", "1", OPT_INT_T, 0, FLDSET(struct ast_sip_transport, tcp_keepalive_interval_time));
ast_sorcery_object_field_register(sorcery, "transport", "tcp_keepalive_probe_count", "5", OPT_INT_T, 0, FLDSET(struct ast_sip_transport, tcp_keepalive_probe_count));
#if defined(PJ_HAS_SSL_SOCK) && PJ_HAS_SSL_SOCK != 0
ast_sorcery_object_field_register_custom(sorcery, "transport", "cipher", "", transport_tls_cipher_handler, transport_tls_cipher_to_str, NULL, 0, 0);
#endif