From f982cbc85e9ffff24a6a348c3637b54a7e8cbc34 Mon Sep 17 00:00:00 2001 From: Vadim Troshchinskiy Date: Wed, 20 May 2015 15:47:45 +0200 Subject: Fix negotiation in stage 10 error Problem fixed by adding a select() call to implement a timeout, and retrying writes if needed. --- nxcomp/Loop.cpp | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/nxcomp/Loop.cpp b/nxcomp/Loop.cpp index f25aed353..0ab0dd988 100644 --- a/nxcomp/Loop.cpp +++ b/nxcomp/Loop.cpp @@ -8100,16 +8100,52 @@ int ReadRemoteData(int fd, char *buffer, int size, char stop) int WriteLocalData(int fd, const char *buffer, int size) { int position = 0; + int ret = 0; + fd_set writeSet; + struct timeval selectTs = {30, 0}; while (position < size) { + + // A write to a non-blocking socket may fail with EAGAIN. The problem is + // that cache data is done in several writes, and there's no easy way + // to handle failure without rewriting a significant amount of code. + // + // Bailing out of the outer loop would result in restarting the sending + // of the entire cache list, which would confuse the other side. + + FD_ZERO(&writeSet); + FD_SET(fd, &writeSet); + + ret = select(fd+1, NULL, &writeSet, NULL, &selectTs); + + #ifdef DEBUG + *logofs << "Loop: WriteLocalData: select() returned with a code of " << ret << " and remaining timeout of " + << selectTs.tv_sec << " sec, " << selectTs.tv_usec << "usec\n" << logofs_flush; + #endif + + if ( ret < 0 ) + { + *logofs << "Loop: Error in select() when writing data to FD#" << fd << ": " << strerror(EGET()) << "\n" << logofs_flush; + + if ( EGET() == EINTR ) + continue; + + return -1; + } + else if ( ret == 0 ) + { + *logofs << "Loop: Timeout expired in select() when writing data to FD#" << fd << ": " << strerror(EGET()) << "\n" << logofs_flush; + return -1; + } + int result = write(fd, buffer + position, size - position); getNewTimestamp(); if (result <= 0) { - if (result < 0 && EGET() == EINTR) + if (result < 0 && (EGET() == EINTR || EGET() == EAGAIN || EGET() == EWOULDBLOCK)) { continue; } -- cgit v1.2.3