glibc源码解读——memset

#include <string.h>
#include <memcopy.h>

#undef memset

void *
inhibit_loop_to_libcall
memset(void *dstpp, int c, size_t len)
{
    long int dstp = (long int)dstpp;

    if (len >= 8)
    {
        size_t xlen;
        op_t cccc;

        cccc = (unsigned char)c;
        cccc |= cccc << 8;
        cccc |= cccc << 16;
        if (OPSIZ > 4)
            /* Do the shift in two steps to avoid warning if long has 32 bits.  */
            cccc |= (cccc << 16) << 16;

        /* There are at least some bytes to set.
        No need to test for LEN == 0 in this alignment loop.  */
        while (dstp % OPSIZ != 0)
        {
            ((byte *)dstp)[0] = c;
            dstp += 1;
            len -= 1;
        }

        /* Write 8 `op_t' per iteration until less than 8 `op_t' remain.  */
        xlen = len / (OPSIZ * 8);
        while (xlen > 0)
        {
            ((op_t *)dstp)[0] = cccc;
            ((op_t *)dstp)[1] = cccc;
            ((op_t *)dstp)[2] = cccc;
            ((op_t *)dstp)[3] = cccc;
            ((op_t *)dstp)[4] = cccc;
            ((op_t *)dstp)[5] = cccc;
            ((op_t *)dstp)[6] = cccc;
            ((op_t *)dstp)[7] = cccc;
            dstp += 8 * OPSIZ;
            xlen -= 1;
        }
        len %= OPSIZ * 8;

        /* Write 1 `op_t' per iteration until less than OPSIZ bytes remain.  */
        xlen = len / OPSIZ;
        while (xlen > 0)
        {
            ((op_t *)dstp)[0] = cccc;
            dstp += OPSIZ;
            xlen -= 1;
        }
        len %= OPSIZ;
    }

    /* Write the last few bytes.  */
    while (len > 0)
    {
        ((byte *)dstp)[0] = c;
        dstp += 1;
        len -= 1;
    }

    return dstpp;
}
libc_hidden_builtin_def(memset)


其中部分定义

#define op_t unsigned long int
#define OPSIZ	(sizeof(op_t))

typedef unsigned char byte;


考虑到效率的因素:

1、len < 8 的时候,按每次一个字节来读写。

2、len >= 8 的时候,32位机器按每次 4 个字节来读写,64位机器按每次 8 个字节来读写,连续做 8 次。剩下的接着按 每次 4 或 8 字节读写, 每次 1 字节读写。


疑问:

假如在32位机器下,每次取得 dstp 的地址都是 4 的倍数吗?怎么才能取得 不是 4 倍数的地址?




阅读更多

更多精彩内容