[Pacemaker] [PATCH] pingd calls "goto retry" if it gets EAGAIN or EINTR

Junko IKEDA tsukishima.ha at gmail.com
Fri Apr 13 06:59:38 CEST 2012


>> > @@ -898,6 +901,9 @@ ping_read(ping_node *node, int *lenp)
>>  >       } else if(rc > 0) {
>>  >           crm_free(packet);
>>  >           return TRUE;
>>  > +     } else {
>>  > +         crm_info("Retrying...");
>>  > +         goto retry;
>>  >       }
>>  >
>>  >      } else {
>
> Does that else statement ever get hit?

Which else statement?

I tried to reproduce this with modified ping_read().
crm_info() is added in each section for confirmation.

static int
ping_read(ping_node *node, int *lenp)
{
    int bytes;
    char fromaddr[128];
    struct msghdr m;
    struct cmsghdr *cm;
    u_char buf[1024];
    struct iovec iov[2];
    int saved_errno = 0;

    struct timeval recv_start_time;
    struct timeval recv_time;
    int packlen;
    u_char *packet;

    gettimeofday(&recv_start_time, NULL);
    packlen = DEFDATALEN + IP6LEN + ICMP6ECHOLEN + EXTRA;

    crm_malloc0(packet, packlen);

  retry:
    m.msg_name = &fromaddr;
    m.msg_namelen = sizeof(fromaddr);
    memset(&iov, 0, sizeof(iov));
    iov[0].iov_base = (caddr_t)packet;
    iov[0].iov_len = packlen;
    m.msg_iov = iov;
    m.msg_iovlen = 1;
    cm = (struct cmsghdr *)buf;
    m.msg_control = (caddr_t)buf;
    m.msg_controllen = sizeof(buf);

    bytes = recvmsg(node->fd, &m, 0);
    saved_errno = errno;
    crm_debug_2("Got %d bytes", bytes);

    if(bytes < 0) {
        crm_perror(LOG_DEBUG, "Read failed");
        int rc = 0;
        //if (saved_errno != EAGAIN && saved_errno != EINTR) {
        if (saved_errno == EAGAIN || saved_errno == EINTR) {
            crm_info("1: bytes=%d, errno=%d, rc=%d", bytes, errno, rc);
            crm_info("Retrying...");
            goto retry;

        } else {
            if(node->type == AF_INET6) {
                rc = process_icmp6_error(node, (struct
sockaddr_in6*)&(node->addr));
            } else {
                rc = process_icmp4_error(node, (struct sockaddr_in*)&fromaddr);
            }

            if(rc < 0) {
                crm_info("2: bytes=%d, errno=%d, rc=%d", bytes, errno, rc);
                crm_info("Retrying...");
                goto retry;
            }
        }

    } else if (bytes > 0) {
        int rc = 0;
        if(node->type == AF_INET6) {
            rc = dump_v6_echo(node, packet, bytes, &m);
        } else {
            rc = dump_v4_echo(node, packet, bytes, &m);
        }

        gettimeofday(&recv_time, NULL);
        if ((recv_start_time.tv_sec + ping_timeout) < recv_time.tv_sec) {
                crm_warn("failed to receive for timeout.");
                crm_free(packet);
                return FALSE;
        }

        if(rc < 0) {
            crm_info("3: bytes=%d, errno=%d, rc=%d", bytes, errno, rc);
            crm_info("Retrying...");
            goto retry;

        } else if(rc > 0) {
            crm_free(packet);
            return TRUE;
        } else {
            crm_info("4: bytes=%d, errno=%d, rc=%d", bytes, errno, rc);
            crm_info("Retrying...");
            goto retry;
        }

    } else {
        crm_err("Unexpected reply");
    }

    crm_free(packet);
    return FALSE;
}


excerpts from ha-log:

Apr 12 09:31:44 bl460g6a pingd: [11081]: info: ping_read: 2: bytes=-1,
errno=113, rc=-1
Apr 12 09:31:44 bl460g6a pingd: [11081]: info: ping_read: Retrying...
Apr 12 09:31:44 bl460g6a pingd: [11081]: info: ping_read: 2: bytes=-1,
errno=113, rc=-1
Apr 12 09:31:44 bl460g6a pingd: [11081]: info: ping_read: Retrying...
Apr 12 09:31:44 bl460g6a pingd: [11081]: info: process_icmp4_error: No
error message: -1: Resource temporarily unavailable (11)
Apr 12 09:31:44 bl460g6a pingd: [11081]: info: ping_read: 4:
bytes=112, errno=11, rc=0
Apr 12 09:31:44 bl460g6a pingd: [11081]: info: ping_read: Retrying...
Apr 12 09:31:44 bl460g6a pingd: [11081]: info: process_icmp4_error: No
error message: -1: Resource temporarily unavailable (11)
Apr 12 09:31:44 bl460g6a pingd: [11081]: info: ping_read: 4:
bytes=112, errno=11, rc=0
Apr 12 09:31:44 bl460g6a pingd: [11081]: info: ping_read: Retrying...
Apr 12 09:31:45 bl460g6a pingd: [11081]: info: ping_read: 3: bytes=56,
errno=11, rc=-1
Apr 12 09:31:45 bl460g6a pingd: [11081]: info: ping_read: Retrying...


# grep "ping_read: 1" /var/log/ha-log | wc -l
0

# grep "ping_read: 2" /var/log/ha-log | wc -l
5644

# grep "ping_read: 3" /var/log/ha-log | wc -l
25035

# grep "ping_read: 4" /var/log/ha-log | wc -l
6228

# grep "Unexpected reply" /var/log/ha-log | wc -l
0


The first if statement
        if (saved_errno == EAGAIN || saved_errno == EINTR) {

ans the last else
        crm_err("Unexpected reply");

didn't get hit.


Thanks,
Junko
-------------- next part --------------
A non-text attachment was scrubbed...
Name: log.tar.gz
Type: application/x-gzip
Size: 219297 bytes
Desc: not available
URL: <http://oss.clusterlabs.org/pipermail/pacemaker/attachments/20120413/18b5d9b6/attachment-0001.gz>


More information about the Pacemaker mailing list