aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2021-12-02 08:02:20 +0100
committerChristian Grothoff <christian@grothoff.org>2021-12-02 08:02:20 +0100
commit5a7dd001250da73ce2236f245b6bba9a1663477e (patch)
treee3e45fcc971fb084fb274443c87984f38e398dd6
parente8fc5d8d5c5eb07c9b6be41fd9bab0552a6ac672 (diff)
improve suicide logic
-rw-r--r--debian/taler-exchange.taler-exchange-aggregator.service3
-rw-r--r--debian/taler-exchange.taler-exchange-closer.service3
-rw-r--r--debian/taler-exchange.taler-exchange-httpd.service6
-rw-r--r--debian/taler-exchange.taler-exchange-httpd@.service26
-rw-r--r--debian/taler-exchange.taler-exchange-httpd@.socket14
-rw-r--r--debian/taler-exchange.taler-exchange-secmod-eddsa.service3
-rw-r--r--debian/taler-exchange.taler-exchange-secmod-rsa.service3
-rw-r--r--debian/taler-exchange.taler-exchange-transfer.service3
-rw-r--r--debian/taler-exchange.taler-exchange-wirewatch.service3
-rw-r--r--debian/taler-exchange.taler-exchange-wirewatch@.service16
-rw-r--r--src/exchange/exchange.conf3
-rw-r--r--src/exchange/taler-exchange-httpd.c148
-rw-r--r--src/exchange/taler-exchange-httpd.h7
-rw-r--r--src/exchange/taler-exchange-httpd_keys.c83
14 files changed, 258 insertions, 63 deletions
diff --git a/debian/taler-exchange.taler-exchange-aggregator.service b/debian/taler-exchange.taler-exchange-aggregator.service
index d0fcbf60c..683c1a81e 100644
--- a/debian/taler-exchange.taler-exchange-aggregator.service
+++ b/debian/taler-exchange.taler-exchange-aggregator.service
@@ -5,7 +5,8 @@ PartOf=taler-exchange.target
[Service]
User=taler-exchange-aggregator
Type=simple
-Restart=on-failure
+Restart=always
+RestartSec=100ms
ExecStart=/usr/bin/taler-exchange-aggregator -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal
diff --git a/debian/taler-exchange.taler-exchange-closer.service b/debian/taler-exchange.taler-exchange-closer.service
index b2a820d35..01c52b2d4 100644
--- a/debian/taler-exchange.taler-exchange-closer.service
+++ b/debian/taler-exchange.taler-exchange-closer.service
@@ -5,7 +5,8 @@ PartOf=taler-exchange.target
[Service]
User=taler-exchange-closer
Type=simple
-Restart=on-failure
+Restart=always
+RestartSec=100ms
ExecStart=/usr/bin/taler-exchange-closer -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal
diff --git a/debian/taler-exchange.taler-exchange-httpd.service b/debian/taler-exchange.taler-exchange-httpd.service
index de65e17b6..7db2b1194 100644
--- a/debian/taler-exchange.taler-exchange-httpd.service
+++ b/debian/taler-exchange.taler-exchange-httpd.service
@@ -8,7 +8,11 @@ PartOf=taler-exchange.target
[Service]
User=taler-exchange-httpd
Type=simple
-Restart=on-failure
+# Depending on the configuration, the service suicides and then
+# needs to be restarted.
+Restart=always
+# Do not dally on restarts.
+RestartSec=1ms
ExecStart=/usr/bin/taler-exchange-httpd -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal
diff --git a/debian/taler-exchange.taler-exchange-httpd@.service b/debian/taler-exchange.taler-exchange-httpd@.service
new file mode 100644
index 000000000..4235f72e9
--- /dev/null
+++ b/debian/taler-exchange.taler-exchange-httpd@.service
@@ -0,0 +1,26 @@
+% This is a systemd service template.
+[Unit]
+Description=GNU Taler payment system exchange REST API at %I
+AssertPathExists=/run/taler/exchange-httpd
+Requires=taler-exchange-httpd@%i.socket taler-exchange-secmod-rsa.service taler-exchange-secmod-eddsa.service
+After=postgres.service network.target taler-exchange-secmod-rsa.service taler-exchange-secmod-eddsa.service
+PartOf=taler-exchange.target
+
+[Service]
+User=taler-exchange-httpd
+Type=simple
+# Depending on the configuration, the service suicides and then
+# needs to be restarted.
+Restart=always
+# Do not dally on restarts.
+RestartSec=1ms
+EnvironmentFile=/etc/environment
+ExecStart=/usr/bin/taler-exchange-httpd -c /etc/taler/taler.conf
+StandardOutput=journal
+StandardError=journal
+PrivateTmp=no
+PrivateDevices=yes
+ProtectSystem=full
+
+[Install]
+WantedBy=multi-user.target
diff --git a/debian/taler-exchange.taler-exchange-httpd@.socket b/debian/taler-exchange.taler-exchange-httpd@.socket
new file mode 100644
index 000000000..e1d6b6bd4
--- /dev/null
+++ b/debian/taler-exchange.taler-exchange-httpd@.socket
@@ -0,0 +1,14 @@
+[Unit]
+Description=Taler Exchange Socket at %I
+PartOf=taler-exchange-httpd@%i.service
+
+[Socket]
+ListenStream=80
+Accept=no
+Service=taler-exchange-httpd@%i.service
+SocketUser=taler-exchange-httpd
+SocketGroup=www-data
+SocketMode=0660
+
+[Install]
+WantedBy=sockets.target
diff --git a/debian/taler-exchange.taler-exchange-secmod-eddsa.service b/debian/taler-exchange.taler-exchange-secmod-eddsa.service
index 7406e3c76..e4898581c 100644
--- a/debian/taler-exchange.taler-exchange-secmod-eddsa.service
+++ b/debian/taler-exchange.taler-exchange-secmod-eddsa.service
@@ -6,7 +6,8 @@ PartOf=taler-exchange.target
[Service]
User=taler-exchange-secmod-eddsa
Type=simple
-Restart=on-failure
+Restart=always
+RestartSec=100ms
ExecStart=/usr/bin/taler-exchange-secmod-eddsa -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal
diff --git a/debian/taler-exchange.taler-exchange-secmod-rsa.service b/debian/taler-exchange.taler-exchange-secmod-rsa.service
index b4fedd1cc..6c5a3d613 100644
--- a/debian/taler-exchange.taler-exchange-secmod-rsa.service
+++ b/debian/taler-exchange.taler-exchange-secmod-rsa.service
@@ -6,7 +6,8 @@ PartOf=taler-exchange.target
[Service]
User=taler-exchange-secmod-rsa
Type=simple
-Restart=on-failure
+Restart=always
+RestartSec=100ms
ExecStart=/usr/bin/taler-exchange-secmod-rsa -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal
diff --git a/debian/taler-exchange.taler-exchange-transfer.service b/debian/taler-exchange.taler-exchange-transfer.service
index 274b64624..b2615b7c9 100644
--- a/debian/taler-exchange.taler-exchange-transfer.service
+++ b/debian/taler-exchange.taler-exchange-transfer.service
@@ -6,7 +6,8 @@ PartOf=taler-exchange.target
[Service]
User=taler-exchange-wire
Type=simple
-Restart=on-failure
+Restart=always
+RestartSec=100ms
ExecStart=/usr/bin/taler-exchange-transfer -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal
diff --git a/debian/taler-exchange.taler-exchange-wirewatch.service b/debian/taler-exchange.taler-exchange-wirewatch.service
index 2705ca2b5..54704cb80 100644
--- a/debian/taler-exchange.taler-exchange-wirewatch.service
+++ b/debian/taler-exchange.taler-exchange-wirewatch.service
@@ -6,7 +6,8 @@ PartOf=taler-exchange.target
[Service]
User=taler-exchange-wire
Type=simple
-Restart=on-failure
+Restart=always
+RestartSec=100ms
ExecStart=/usr/bin/taler-exchange-wirewatch -c /etc/taler/taler.conf
StandardOutput=journal
StandardError=journal
diff --git a/debian/taler-exchange.taler-exchange-wirewatch@.service b/debian/taler-exchange.taler-exchange-wirewatch@.service
new file mode 100644
index 000000000..54704cb80
--- /dev/null
+++ b/debian/taler-exchange.taler-exchange-wirewatch@.service
@@ -0,0 +1,16 @@
+[Unit]
+Description=GNU Taler payment system exchange wirewatch service
+After=network.target
+PartOf=taler-exchange.target
+
+[Service]
+User=taler-exchange-wire
+Type=simple
+Restart=always
+RestartSec=100ms
+ExecStart=/usr/bin/taler-exchange-wirewatch -c /etc/taler/taler.conf
+StandardOutput=journal
+StandardError=journal
+PrivateTmp=yes
+PrivateDevices=yes
+ProtectSystem=full
diff --git a/src/exchange/exchange.conf b/src/exchange/exchange.conf
index 4d353acec..92de5e31c 100644
--- a/src/exchange/exchange.conf
+++ b/src/exchange/exchange.conf
@@ -40,6 +40,9 @@ PORT = 8081
# transfers to enable tracking.
BASE_URL = http://localhost:8081/
+# Maximum number of requests this process should handle before
+# committing suicide.
+# MAX_REQUESTS =
# How long should the aggregator sleep if it has nothing to do?
AGGREGATOR_IDLE_SLEEP_INTERVAL = 60 s
diff --git a/src/exchange/taler-exchange-httpd.c b/src/exchange/taler-exchange-httpd.c
index 64304e9c2..327bcd1e5 100644
--- a/src/exchange/taler-exchange-httpd.c
+++ b/src/exchange/taler-exchange-httpd.c
@@ -77,6 +77,11 @@ static int allow_address_reuse;
const struct GNUNET_CONFIGURATION_Handle *TEH_cfg;
/**
+ * Handle to the HTTP server.
+ */
+static struct MHD_Daemon *mhd;
+
+/**
* Our KYC configuration.
*/
struct TEH_KycOptions TEH_kyc_config;
@@ -123,6 +128,12 @@ static unsigned int connection_timeout = 30;
static int connection_close;
/**
+ * True if we should commit suicide once all active
+ * connections are finished.
+ */
+bool TEH_suicide;
+
+/**
* Value to return from main()
*/
static int global_ret;
@@ -138,6 +149,11 @@ static uint16_t serve_port;
static unsigned long long req_count;
/**
+ * Counter for the number of open connections.
+ */
+static unsigned long long active_connections;
+
+/**
* Limit for the number of requests this HTTP may process before restarting.
* (This was added as one way of dealing with unavoidable memory fragmentation
* happening slowly over time.)
@@ -263,6 +279,45 @@ handle_post_coins (struct TEH_RequestContext *rc,
/**
+ * Increments our request counter and checks if this
+ * process should commit suicide.
+ */
+static void
+check_suicide (void)
+{
+ int fd;
+ pid_t chld;
+ unsigned long long cnt;
+
+ cnt = req_count++;
+ if (req_max != cnt)
+ return;
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "Restarting exchange service after %llu requests\n",
+ cnt);
+ /* Stop accepting new connections */
+ fd = MHD_quiesce_daemon (mhd);
+ GNUNET_break (0 == close (fd));
+ /* Continue handling existing connections in child,
+ so that this process can die and be replaced by
+ systemd with a fresh one */
+ chld = fork ();
+ if (-1 == chld)
+ {
+ GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR,
+ "fork");
+ _exit (1);
+ }
+ if (0 != chld)
+ {
+ /* We are the parent, instant-suicide! */
+ _exit (0);
+ }
+ TEH_suicide = true;
+}
+
+
+/**
* Function called whenever MHD is done with a request. If the
* request was a POST, we may have stored a `struct Buffer *` in the
* @a con_cls that might still need to be cleaned up. Call the
@@ -290,6 +345,7 @@ handle_mhd_completion_callback (void *cls,
return;
GNUNET_async_scope_enter (&rc->async_scope_id,
&old_scope);
+ check_suicide ();
TEH_check_invariants ();
if (NULL != rc->rh_cleaner)
rc->rh_cleaner (rc);
@@ -1642,8 +1698,19 @@ connection_done (void *cls,
(void) cls;
(void) connection;
(void) socket_context;
- unsigned long long cnt;
+ switch (toe)
+ {
+ case MHD_CONNECTION_NOTIFY_STARTED:
+ active_connections++;
+ break;
+ case MHD_CONNECTION_NOTIFY_CLOSED:
+ active_connections--;
+ if (TEH_suicide &&
+ (0 == active_connections) )
+ GNUNET_SCHEDULER_shutdown ();
+ break;
+ }
#if HAVE_DEVELOPER
/* We only act if the connection is closed. */
if (MHD_CONNECTION_NOTIFY_CLOSED != toe)
@@ -1651,15 +1718,6 @@ connection_done (void *cls,
if (NULL != input_filename)
GNUNET_SCHEDULER_shutdown ();
#endif
- cnt = req_count++;
- if (req_max == cnt)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_INFO,
- "Restarting exchange service after %llu requests\n",
- cnt);
- (void) kill (getpid (),
- SIGTERM);
- }
}
@@ -1780,46 +1838,42 @@ run (void *cls,
GNUNET_SCHEDULER_shutdown ();
return;
}
- {
- struct MHD_Daemon *mhd;
-
- mhd = MHD_start_daemon (MHD_USE_SUSPEND_RESUME
- | MHD_USE_PIPE_FOR_SHUTDOWN
- | MHD_USE_DEBUG | MHD_USE_DUAL_STACK
- | MHD_USE_TCP_FASTOPEN,
- (-1 == fh) ? serve_port : 0,
- NULL, NULL,
- &handle_mhd_request, NULL,
- MHD_OPTION_LISTEN_BACKLOG_SIZE,
- (unsigned int) 1024,
- MHD_OPTION_LISTEN_SOCKET,
- fh,
- MHD_OPTION_EXTERNAL_LOGGER,
- &TALER_MHD_handle_logs,
- NULL,
- MHD_OPTION_NOTIFY_COMPLETED,
- &handle_mhd_completion_callback,
- NULL,
- MHD_OPTION_NOTIFY_CONNECTION,
- &connection_done,
- NULL,
- MHD_OPTION_CONNECTION_TIMEOUT,
- connection_timeout,
- (0 == allow_address_reuse)
+ mhd = MHD_start_daemon (MHD_USE_SUSPEND_RESUME
+ | MHD_USE_PIPE_FOR_SHUTDOWN
+ | MHD_USE_DEBUG | MHD_USE_DUAL_STACK
+ | MHD_USE_TCP_FASTOPEN,
+ (-1 == fh) ? serve_port : 0,
+ NULL, NULL,
+ &handle_mhd_request, NULL,
+ MHD_OPTION_LISTEN_BACKLOG_SIZE,
+ (unsigned int) 1024,
+ MHD_OPTION_LISTEN_SOCKET,
+ fh,
+ MHD_OPTION_EXTERNAL_LOGGER,
+ &TALER_MHD_handle_logs,
+ NULL,
+ MHD_OPTION_NOTIFY_COMPLETED,
+ &handle_mhd_completion_callback,
+ NULL,
+ MHD_OPTION_NOTIFY_CONNECTION,
+ &connection_done,
+ NULL,
+ MHD_OPTION_CONNECTION_TIMEOUT,
+ connection_timeout,
+ (0 == allow_address_reuse)
? MHD_OPTION_END
: MHD_OPTION_LISTENING_ADDRESS_REUSE,
- (unsigned int) allow_address_reuse,
- MHD_OPTION_END);
- if (NULL == mhd)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
- "Failed to launch HTTP service. Is the port in use?\n");
- GNUNET_SCHEDULER_shutdown ();
- return;
- }
- global_ret = EXIT_SUCCESS;
- TALER_MHD_daemon_start (mhd);
+ (unsigned int) allow_address_reuse,
+ MHD_OPTION_END);
+ if (NULL == mhd)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Failed to launch HTTP service. Is the port in use?\n");
+ GNUNET_SCHEDULER_shutdown ();
+ return;
}
+ global_ret = EXIT_SUCCESS;
+ TALER_MHD_daemon_start (mhd);
atexit (&write_stats);
#if HAVE_DEVELOPER
diff --git a/src/exchange/taler-exchange-httpd.h b/src/exchange/taler-exchange-httpd.h
index d52ce1a0f..4f59a5821 100644
--- a/src/exchange/taler-exchange-httpd.h
+++ b/src/exchange/taler-exchange-httpd.h
@@ -158,6 +158,13 @@ extern int TEH_allow_keys_timetravel;
extern char *TEH_revocation_directory;
/**
+ * True if we should commit suicide once all active
+ * connections are finished. Also forces /keys requests
+ * to terminate if they are long-polling.
+ */
+extern bool TEH_suicide;
+
+/**
* Master public key (according to the
* configuration in the exchange directory).
*/
diff --git a/src/exchange/taler-exchange-httpd_keys.c b/src/exchange/taler-exchange-httpd_keys.c
index 6ac39aa80..bff057a50 100644
--- a/src/exchange/taler-exchange-httpd_keys.c
+++ b/src/exchange/taler-exchange-httpd_keys.c
@@ -37,6 +37,12 @@
/**
+ * When do we forcefully timeout a /keys request?
+ */
+#define KEYS_TIMEOUT GNUNET_TIME_UNIT_MINUTES
+
+
+/**
* Taler protocol version in the format CURRENT:REVISION:AGE
* as used by GNU libtool. See
* https://www.gnu.org/software/libtool/manual/html_node/Libtool-versioning.html
@@ -355,6 +361,11 @@ struct SuspendedKeysRequests
* The suspended connection.
*/
struct MHD_Connection *connection;
+
+ /**
+ * When does this request timeout?
+ */
+ struct GNUNET_TIME_Absolute timeout;
};
@@ -399,6 +410,11 @@ static unsigned int skr_size;
static struct MHD_Connection *skr_connection;
/**
+ * Task to force timeouts on /keys requests.
+ */
+static struct GNUNET_SCHEDULER_Task *keys_tt;
+
+/**
* For how long should a signing key be legally retained?
* Configuration value.
*/
@@ -419,6 +435,40 @@ static struct TALER_SecurityModulePublicKeyP esign_sm_pub;
*/
static bool terminating;
+
+/**
+ * Function called to forcefully resume suspended keys requests.
+ *
+ * @param cls unused, NULL
+ */
+static void
+keys_timeout_cb (void *cls)
+{
+ struct SuspendedKeysRequests *skr;
+
+ (void) cls;
+ keys_tt = NULL;
+ while (NULL != (skr = skr_head))
+ {
+ if (GNUNET_TIME_absolute_is_future (skr->timeout))
+ break;
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "Resuming /keys request due to timeout\n");
+ GNUNET_CONTAINER_DLL_remove (skr_head,
+ skr_tail,
+ skr);
+ MHD_resume_connection (skr->connection);
+ TALER_MHD_daemon_trigger ();
+ GNUNET_free (skr);
+ }
+ if (NULL == skr)
+ return;
+ keys_tt = GNUNET_SCHEDULER_add_at (skr->timeout,
+ &keys_timeout_cb,
+ NULL);
+}
+
+
/**
* Suspend /keys request while we (hopefully) are waiting to be
* provisioned with key material.
@@ -445,6 +495,13 @@ suspend_request (struct MHD_Connection *connection)
GNUNET_CONTAINER_DLL_insert (skr_head,
skr_tail,
skr);
+ skr->timeout = GNUNET_TIME_relative_to_absolute (KEYS_TIMEOUT);
+ if (NULL == keys_tt)
+ {
+ keys_tt = GNUNET_SCHEDULER_add_at (skr->timeout,
+ &keys_timeout_cb,
+ NULL);
+ }
skr_size++;
if (skr_size > SKR_LIMIT)
{
@@ -477,9 +534,8 @@ check_dk (void *cls,
{
struct TEH_DenominationKey *dk = value;
-
+ (void) cls;
(void) hc;
- (void) value;
GNUNET_assert (TALER_DENOMINATION_INVALID != dk->denom_pub.cipher);
if (TALER_DENOMINATION_RSA == dk->denom_pub.cipher)
GNUNET_assert (GNUNET_CRYPTO_rsa_public_key_check (
@@ -1073,6 +1129,11 @@ TEH_keys_init ()
void
TEH_keys_finished ()
{
+ if (NULL != keys_tt)
+ {
+ GNUNET_SCHEDULER_cancel (keys_tt);
+ keys_tt = NULL;
+ }
if (NULL != key_state)
destroy_key_state (key_state,
true);
@@ -2282,13 +2343,17 @@ TEH_keys_get_handler (struct TEH_RequestContext *rc,
ksh = TEH_keys_get_state ();
if (NULL == ksh)
{
- if ( (SKR_LIMIT == skr_size) &&
- (rc->connection == skr_connection) )
+ if ( ( (SKR_LIMIT == skr_size) &&
+ (rc->connection == skr_connection) ) ||
+ TEH_suicide)
{
- return TALER_MHD_reply_with_error (rc->connection,
- MHD_HTTP_INTERNAL_SERVER_ERROR,
- TALER_EC_EXCHANGE_GENERIC_KEYS_MISSING,
- "too many connections suspended on /keys");
+ return TALER_MHD_reply_with_error (
+ rc->connection,
+ MHD_HTTP_SERVICE_UNAVAILABLE,
+ TALER_EC_EXCHANGE_GENERIC_KEYS_MISSING,
+ TEH_suicide
+ ? "server terminating"
+ : "too many connections suspended waiting on /keys");
}
return suspend_request (rc->connection);
}
@@ -2688,7 +2753,7 @@ TEH_keys_management_get_keys_handler (const struct TEH_RequestHandler *rh,
if (NULL == ksh)
{
return TALER_MHD_reply_with_error (connection,
- MHD_HTTP_INTERNAL_SERVER_ERROR,
+ MHD_HTTP_SERVICE_UNAVAILABLE,
TALER_EC_EXCHANGE_GENERIC_KEYS_MISSING,
"no key state");
}