From e4a66ef2cce7ac948e603b330964c9089b20da40 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 3 May 2013 21:19:28 +0200 Subject: [PATCH] usbi_handle_disconnect: Fix race condition leading to double completion It took me quite a while to debug this, here is a step by step for the race which I believe is happening in some cases: 1) app calls libusb_submit_transfer 2) libusb_submit_transfer locks itransfer->lock 3) libusb_submit_transfer adds the transfer to the flying list 4) *thread switch* 5) other thread notices POLL_ERR on device fd, calls usbi_handle_disconnect 6) usbi_handle_disconnect find the transfer which is in progress of being submitted in the flying list 7) usbi_handle_disconnect calls usbi_backend->clear_transfer_priv on the transfer, this blocks waiting on itransfer->lock 8) *thread switch* 9) libusb_submit_transfer actually tries to submit the transfer now, calls usbi_backend->submit_transfer, which fails with -ENODEV 10) libusb_submit_transfer *removes* the transfer from the flying list, unlocks itransfer->lock and returns an error to its caller 11) the caller frees the transfer, meaning the to_cancel pointer in usbi_handle_disconnect now points to free-ed memory, for extra mayhem 12) *thread switch* 13) usbi_handle_disconnect calls usbi_handle_transfer_completion 14) usbi_handle_transfer_completion tries to remove the transfer from the flying list *for the 2nd time* But the first call done from libusb_submit_transfer has already done this. libusb's list_del looks like this: static inline void list_del(struct list_head *entry) { entry->next->prev = entry->prev; entry->prev->next = entry->next; entry->next = entry->prev = NULL; } So the first call sets it next and prev to NULL, and then the 2nd call tries to deref next -> BOOM For an example backtrace caused by this, see: https://bugs.freedesktop.org/show_bug.cgi?id=55619#c7 This patch fixes this by letting libusb_submit keep the flying transfers list locked during submission, so the submission flow changes from: 1) lock flying transfers add to flying transfers unlock 2) submit 3) on submission error: lock flying transfers remove from flying transfers unlock to: 1) lock flying transfers 2) add to flying transfers 3) submit 4) on submission error: remove from flying transfers 5) unlock This means that the os backends submit handler now gets called with the flying transfers lock held! I've looked at all the backends and this should not be a problem. Only the windows and win-ce backends care about the flying transfers list at all, and then only in their handle_events handler. Signed-off-by: Hans de Goede --- libusb/io.c | 40 +++++++++++++++++++++++++--------------- libusb/libusbi.h | 2 ++ 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/libusb/io.c b/libusb/io.c index 56b9ebd..e04e572 100644 --- a/libusb/io.c +++ b/libusb/io.c @@ -1151,8 +1151,10 @@ static int calculate_timeout(struct usbi_transfer *transfer) } /* add a transfer to the (timeout-sorted) active transfers list. - * returns 1 if the transfer has a timeout and it is the timeout next to - * expire */ + * Callers of this function must hold the flying_transfers_lock. + * This function *always* adds the transfer to the flying_transfers list, + * it will return non 0 if it fails to update the timer, but even then the + * transfer is added to the flying_transfers list. */ static int add_to_flying_list(struct usbi_transfer *transfer) { struct usbi_transfer *cur; @@ -1161,8 +1163,6 @@ static int add_to_flying_list(struct usbi_transfer *transfer) int r = 0; int first = 1; - usbi_mutex_lock(&ctx->flying_transfers_lock); - /* if we have no other flying transfers, start the list with this one */ if (list_empty(&ctx->flying_transfers)) { list_add(&transfer->list, &ctx->flying_transfers); @@ -1212,7 +1212,6 @@ out: UNUSED(first); #endif - usbi_mutex_unlock(&ctx->flying_transfers_lock); return r; } @@ -1374,16 +1373,16 @@ int API_EXPORTED libusb_submit_transfer(struct libusb_transfer *transfer) goto out; } + usbi_mutex_lock(&ctx->flying_transfers_lock); r = add_to_flying_list(itransfer); - if (r) - goto out; - r = usbi_backend->submit_transfer(itransfer); - if (r) { - usbi_mutex_lock(&ctx->flying_transfers_lock); + if (r == LIBUSB_SUCCESS) { + r = usbi_backend->submit_transfer(itransfer); + } + if (r != LIBUSB_SUCCESS) { list_del(&itransfer->list); arm_timerfd_for_next_timeout(ctx); - usbi_mutex_unlock(&ctx->flying_transfers_lock); } + usbi_mutex_unlock(&ctx->flying_transfers_lock); out: updated_fds = (itransfer->flags & USBI_TRANSFER_UPDATED_FDS); @@ -2407,8 +2406,9 @@ out: #endif } -/* Backends call this from handle_events to report disconnection of a device. - * The transfers get cancelled appropriately. +/* Backends may call this from handle_events to report disconnection of a + * device. This function ensures transfers get cancelled appropriately. + * Callers of this function must hold the events_lock. */ void usbi_handle_disconnect(struct libusb_device_handle *handle) { @@ -2423,12 +2423,22 @@ void usbi_handle_disconnect(struct libusb_device_handle *handle) * * this is a bit tricky because: * 1. we can't do transfer completion while holding flying_transfers_lock + * because the completion handler may try to re-submit the transfer * 2. the transfers list can change underneath us - if we were to build a - * list of transfers to complete (while holding look), the situation + * list of transfers to complete (while holding lock), the situation * might be different by the time we come to free them * * so we resort to a loop-based approach as below - * FIXME: is this still potentially racy? + * + * This is safe because transfers are only removed from the + * flying_transfer list by usbi_handle_transfer_completion and + * libusb_close, both of which hold the events_lock while doing so, + * so usbi_handle_disconnect cannot be running at the same time. + * + * Note that libusb_submit_transfer also removes the transfer from + * the flying_transfer list on submission failure, but it keeps the + * flying_transfer list locked between addition and removal, so + * usbi_handle_disconnect never sees such transfers. */ while (1) { diff --git a/libusb/libusbi.h b/libusb/libusbi.h index 5a4569d..5a98494 100644 --- a/libusb/libusbi.h +++ b/libusb/libusbi.h @@ -831,6 +831,8 @@ struct usbi_os_backend { * * This function must not block. * + * This function gets called with the flying_transfers_lock locked! + * * Return: * - 0 on success * - LIBUSB_ERROR_NO_DEVICE if the device has been disconnected -- 1.8.2.1