#include <string.h>
#include <memcopy.h>
#undef memset
void *
inhibit_loop_to_libcall
memset(void *dstpp, int c, size_t len)
{
long int dstp = (long int)dstpp;
if (len >= 8)
{
size_t xlen;
op_t cccc;
cccc = (unsigned char)c;
cccc |= cccc << 8;
cccc |= cccc << 16;
if (OPSIZ > 4)
/* Do the shift in two steps to avoid warning if long has 32 bits. */
cccc |= (cccc << 16) << 16;
/* There are at least some bytes to set.
No need to test for LEN == 0 in this alignment loop. */
while (dstp % OPSIZ != 0)
{
((byte *)dstp)[0] = c;
dstp += 1;
len -= 1;
}
/* Write 8 `op_t' per iteration until less than 8 `op_t' remain. */
xlen = len / (OPSIZ * 8);
while (xlen > 0)
{
((op_t *)dstp)[0] = cccc;
((op_t *)dstp)[1] = cccc;
((op_t *)dstp)[2] = cccc;
((op_t *)dstp)[3] = cccc;
((op_t *)dstp)[4] = cccc;
((op_t *)dstp)[5] = cccc;
((op_t *)dstp)[6] = cccc;
((op_t *)dstp)[7] = cccc;
dstp += 8 * OPSIZ;
xlen -= 1;
}
len %= OPSIZ * 8;
/* Write 1 `op_t' per iteration until less than OPSIZ bytes remain. */
xlen = len / OPSIZ;
while (xlen > 0)
{
((op_t *)dstp)[0] = cccc;
dstp += OPSIZ;
xlen -= 1;
}
len %= OPSIZ;
}
/* Write the last few bytes. */
while (len > 0)
{
((byte *)dstp)[0] = c;
dstp += 1;
len -= 1;
}
return dstpp;
}
libc_hidden_builtin_def(memset)
其中部分定义
#define op_t unsigned long int
#define OPSIZ (sizeof(op_t))
typedef unsigned char byte;
1、len < 8 的时候,按每次一个字节来读写。
2、len >= 8 的时候,32位机器按每次 4 个字节来读写,64位机器按每次 8 个字节来读写,连续做 8 次。剩下的接着按 每次 4 或 8 字节读写, 每次 1 字节读写。
疑问:
假如在32位机器下,每次取得 dstp 的地址都是 4 的倍数吗?怎么才能取得 不是 4 倍数的地址?