STM32F439芯片,以下用M4称呼。M4的HASH模块,可以计算SHA1、SHA224、SHA256、MD5这些校验值,也可以计算基于它们的HMAC加密校验值,都是硬件计算。在此以SHA1及其HMAC_SHA1为例,讨论其用法。
介绍一下HMAC的概念:
HMAC(message) = Hash[((key | pad) XOR 0x5C) | Hash(((key | pad) XOR 0x36) | message)]
其中,(key | pad)表示在key的后面缀上若干数量的零,使得其总长度为64bytes,即512bits。假如key本身就已经达到512bits,就不必后缀零了。假如key超过了512bits,那么计算这个key的HASH值来代替原本的key。XOR 0x5C表示这64个byte全部要异或0x5C。XOR 0x36同理,剩下的 | 符号只是简单的连接前面的bit串和后面的bit串。
首先要说明的是,这些校验算法,对原始数据的尺寸,都是以bit为单位的。只不过,平时在电脑上常用的CRC32啦、MD5啦、SHA1啦,因为都是用来校验文件的,而文件的尺寸是以byte为单位的,所以常见的HASH函数都是以byte为单位,就连M4的固件库也不例外。固件库里面的函数如下:
ErrorStatus HASH_SHA1(uint8_t *Input, uint32_t Ilen, uint8_t Output[20])
{
……
/* Number of valid bits in last word of the Input data */
nbvalidbitsdata = 8 * (Ilen % 4);
/* HASH peripheral initialization */
HASH_DeInit();
……
/* Configure the number of valid bits in last word of the data */
HASH_SetLastWordValidBitsNbr(nbvalidbitsdata);
/* Write the Input block in the IN FIFO */
for(i=0; i<Ilen; i+=4)
{
HASH_DataIn(*(uint32_t*)inputaddr);
inputaddr+=4;
}
/* Start the HASH processor */
HASH_StartDigest();
……
if (busystatus != RESET)
{
status = ERROR;
}
else
{
/* Read the message digest */
HASH_GetDigest(&SHA1_MessageDigest);
*(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[0]);
outputaddr+=4;
*(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[1]);
……
}
return status;
}
函数已经包装的很好了,只可惜直接拿来用还是用不了,明明固件库就是为了方便众人使用的,可是这个不能直接用,让人很无语啊……
好吧,其实只要在调用这个函数之前,使能某个总线的时钟即可:
RCC_AHB2PeriphClockCmd (RCC_AHB2Periph_HASH, ENABLE);
算完之后,用同样的函数把时钟禁用,当然也可以不禁用。
接下来讨论byte和bit的区别。上文也说了,大多数HASH函数都是以byte为单位,固件库函数的Ilen参数就是指字节数,然后是 nbvalidbitsdata = 8 * (Ilen % 4); 这句,计算的是剩余的不够4个字节即不够32位的位数。明明NBW寄存器是可以精确到位的,但这里却只精确到字节。
下面改造这个函数,使其可以精确到位。注意,SHA1算法的字节顺序,是高位在前低位在后,也就是说,我们在原始数据的结尾,添上一些位,那么这些位要放置在字节的高位。改造后的函数如下:
ErrorStatus HASH_SHA1_bit(uint8_t *Input, uint32_t bit_Ilen, uint8_t Output[20])
{
HASH_InitTypeDef SHA1_HASH_InitStructure;
HASH_MsgDigest SHA1_MessageDigest;
__IO uint16_t nbvalidbitsdata = 0;
uint32_t i = 0;
__IO uint32_t counter = 0;
uint32_t busystatus = 0;
ErrorStatus status = SUCCESS;
uint32_t inputaddr = (uint32_t)Input;
uint32_t outputaddr = (uint32_t)Output;
/* Number of valid bits in last word of the Input data */
nbvalidbitsdata = bit_Ilen % 32; //8 * (Ilen % 4);
/* HASH peripheral initialization */
HASH_DeInit();
/* HASH Configuration */
SHA1_HASH_InitStructure.HASH_AlgoSelection = HASH_AlgoSelection_SHA1;
SHA1_HASH_InitStructure.HASH_AlgoMode = HASH_AlgoMode_HASH;
SHA1_HASH_InitStructure.HASH_DataType = HASH_DataType_8b;
HASH_Init(&SHA1_HASH_InitStructure);
/* Configure the number of valid bits in last word of the data */
HASH_SetLastWordValidBitsNbr(nbvalidbitsdata);
/* Write the Input block in the IN FIFO */
for(i=0; i<bit_Ilen; i+=4 * 8) // for(i=0; i<Ilen; i+=4)
{
HASH_DataIn(*(uint32_t*)inputaddr);
inputaddr+=4;
}
/* Start the HASH processor */
HASH_StartDigest();
/* wait until the Busy flag is RESET */
do
{
busystatus = HASH_GetFlagStatus(HASH_FLAG_BUSY);
counter++;
}while ((counter != SHA1BUSY_TIMEOUT) && (busystatus != RESET));
if (busystatus != RESET)
{
status = ERROR;
}
else
{
/* Read the message digest */
HASH_GetDigest(&SHA1_MessageDigest);
*(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[0]);
outputaddr+=4;
*(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[1]);
outputaddr+=4;
*(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[2]);
outputaddr+=4;
*(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[3]);
outputaddr+=4;
*(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[4]);
}
return status;
}
ErrorStatus HMAC_SHA1_bit(uint8_t *Key, uint32_t bit_Keylen, uint8_t *Input,
uint32_t bit_Ilen, uint8_t Output[20])
{
HASH_InitTypeDef SHA1_HASH_InitStructure;
HASH_MsgDigest SHA1_MessageDigest;
__IO uint16_t nbvalidbitsdata = 0;
__IO uint16_t nbvalidbitskey = 0;
uint32_t i = 0;
__IO uint32_t counter = 0;
uint32_t busystatus = 0;
ErrorStatus status = SUCCESS;
uint32_t keyaddr = (uint32_t)Key;
uint32_t inputaddr = (uint32_t)Input;
uint32_t outputaddr = (uint32_t)Output;
/* Number of valid bits in last word of the Input data */
nbvalidbitsdata = bit_Ilen % 32; // 8 * (Ilen % 4);
/* Number of valid bits in last word of the Key */
nbvalidbitskey = bit_Keylen % 32; // 8 * (Keylen % 4);
/* HASH peripheral initialization */
HASH_DeInit();
/* HASH Configuration */
SHA1_HASH_InitStructure.HASH_AlgoSelection = HASH_AlgoSelection_SHA1;
SHA1_HASH_InitStructure.HASH_AlgoMode = HASH_AlgoMode_HMAC;
SHA1_HASH_InitStructure.HASH_DataType = HASH_DataType_8b;
if(bit_Keylen > 64 * 8) // if(Keylen > 64)
{
/* HMAC long Key */
SHA1_HASH_InitStructure.HASH_HMACKeyType = HASH_HMACKeyType_LongKey;
}
else
{
/* HMAC short Key */
SHA1_HASH_InitStructure.HASH_HMACKeyType = HASH_HMACKeyType_ShortKey;
}
HASH_Init(&SHA1_HASH_InitStructure);
/* Configure the number of valid bits in last word of the Key */
HASH_SetLastWordValidBitsNbr(nbvalidbitskey);
/* Write the Key */
for(i=0; i<bit_Keylen; i+=4 * 8) // for(i=0; i<Keylen; i+=4)
{
HASH_DataIn(*(uint32_t*)keyaddr);
keyaddr+=4;
}
/* Start the HASH processor */
HASH_StartDigest();
/* wait until the Busy flag is RESET */
do
{
busystatus = HASH_GetFlagStatus(HASH_FLAG_BUSY);
counter++;
}while ((counter != SHA1BUSY_TIMEOUT) && (busystatus != RESET));
if (busystatus != RESET)
{
status = ERROR;
}
else
{
/* Configure the number of valid bits in last word of the Input data */
HASH_SetLastWordValidBitsNbr(nbvalidbitsdata);
/* Write the Input block in the IN FIFO */
for(i=0; i<bit_Ilen; i+=4 * 8) // for(i=0; i<Ilen; i+=4)
{
HASH_DataIn(*(uint32_t*)inputaddr);
inputaddr+=4;
}
/* Start the HASH processor */
HASH_StartDigest();
/* wait until the Busy flag is RESET */
counter =0;
do
{
busystatus = HASH_GetFlagStatus(HASH_FLAG_BUSY);
counter++;
}while ((counter != SHA1BUSY_TIMEOUT) && (busystatus != RESET));
if (busystatus != RESET)
{
status = ERROR;
}
else
{
/* Configure the number of valid bits in last word of the Key */
HASH_SetLastWordValidBitsNbr(nbvalidbitskey);
/* Write the Key */
keyaddr = (uint32_t)Key;
for(i=0; i<bit_Keylen; i+=4 * 8) // for(i=0; i<Keylen; i+=4)
{
HASH_DataIn(*(uint32_t*)keyaddr);
keyaddr+=4;
}
/* Start the HASH processor */
HASH_StartDigest();
/* wait until the Busy flag is RESET */
counter =0;
do
{
busystatus = HASH_GetFlagStatus(HASH_FLAG_BUSY);
counter++;
}while ((counter != SHA1BUSY_TIMEOUT) && (busystatus != RESET));
if (busystatus != RESET)
{
status = ERROR;
}
else
{
/* Read the message digest */
HASH_GetDigest(&SHA1_MessageDigest);
*(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[0]);
outputaddr+=4;
*(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[1]);
outputaddr+=4;
*(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[2]);
outputaddr+=4;
*(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[3]);
outputaddr+=4;
*(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[4]);
}
}
}
return status;
}
函数还是那么长,这里只说和固件库函数的区别,首先在函数名后面缀上
bit表示这俩函数可以对原始数据精确到bit,同时给参数中的长度,
Ilen和KeyLen也缀上bit,表示这俩参数也是以bit为单位。然后编译一下,所有有错误的行,都是要修改的地方,改起来只要注意把针对字节的计算和判断改成针对位的就行了。
接下来生成一组key和一组message,并选取各种不同的长度来进行测试。为了便于在IAR开发环境下查看HASH结果,用如下的函数专门把结果转换成十六进制字符串:
void out_to_hash( volatile char ref_hash[41], uint8_t Output[20] )
{
for (size_t i = 0; i < 20; i++)
{
const char *hex = "0123456789ABCDEF";
unsigned char ch = Output[i];
ref_hash[i * 2 + 0] = hex[(ch >> 4U) & 0x0F];
ref_hash[i * 2 + 1] = hex[(ch >> 0U) & 0x0F];
}
ref_hash[40] = 0;
return;
}
测试代码如下:
void hash_test( void )
{
uint8_t key[256] = "";
uint8_t message[256] = "";
for (size_t i = 0; i < 256; i++)
{
key[i] = 8 + i * 13;
message[i] = 3 + i * 5;
}
volatile char sha1_1b [41] = "";
volatile char sha1_5b [41] = "";
volatile char sha1_8b [41] = "";
volatile char sha1_13b [41] = "";
volatile char sha1_21b [41] = "";
volatile char sha1_34b [41] = "";
volatile char sha1_377b [41] = "";
volatile char sha1_610b [41] = "";
volatile char hmac_0b_sha1_0b [41] = "";
volatile char hmac_8b_sha1_8b [41] = "";
volatile char hmac_8b_sha1_13b [41] = "";
volatile char hmac_8b_sha1_610b [41] = "";
volatile char hmac_13b_sha1_8b [41] = "";
volatile char hmac_13b_sha1_13b [41] = "";
volatile char hmac_13b_sha1_610b [41] = "";
volatile char hmac_512b_sha1_8b [41] = "";
volatile char hmac_512b_sha1_13b [41] = "";
volatile char hmac_512b_sha1_610b[41] = "";
volatile char hmac_610b_sha1_8b [41] = "";
volatile char hmac_610b_sha1_13b [41] = "";
volatile char hmac_610b_sha1_610b[41] = "";
RCC_AHB2PeriphClockCmd (RCC_AHB2Periph_HASH, ENABLE);
uint8_t output[20] = "";
HASH_SHA1_bit (message, 1 , output); out_to_hash (sha1_1b , output);
HASH_SHA1_bit (message, 5 , output); out_to_hash (sha1_5b , output);
HASH_SHA1_bit (message, 8 , output); out_to_hash (sha1_8b , output);
HASH_SHA1_bit (message, 13 , output); out_to_hash (sha1_13b , output);
HASH_SHA1_bit (message, 21 , output); out_to_hash (sha1_21b , output);
HASH_SHA1_bit (message, 34 , output); out_to_hash (sha1_34b , output);
HASH_SHA1_bit (message, 377, output); out_to_hash (sha1_377b, output);
HASH_SHA1_bit (message, 610, output); out_to_hash (sha1_610b, output);
HMAC_SHA1_bit ("" , 8 , message, 0 , output); out_to_hash (hmac_0b_sha1_0b , output);
HMAC_SHA1_bit (key, 8 , message, 8 , output); out_to_hash (hmac_8b_sha1_8b , output);
HMAC_SHA1_bit (key, 8 , message, 13 , output); out_to_hash (hmac_8b_sha1_13b , output);
HMAC_SHA1_bit (key, 8 , message, 610, output); out_to_hash (hmac_8b_sha1_610b , output);
HMAC_SHA1_bit (key, 13 , message, 8 , output); out_to_hash (hmac_13b_sha1_8b , output);
HMAC_SHA1_bit (key, 13 , message, 13 , output); out_to_hash (hmac_13b_sha1_13b , output);
HMAC_SHA1_bit (key, 13 , message, 610, output); out_to_hash (hmac_13b_sha1_610b , output);
HMAC_SHA1_bit (key, 512, message, 8 , output); out_to_hash (hmac_512b_sha1_8b , output);
HMAC_SHA1_bit (key, 512, message, 13 , output); out_to_hash (hmac_512b_sha1_13b , output);
HMAC_SHA1_bit (key, 512, message, 610, output); out_to_hash (hmac_512b_sha1_610b, output);
HMAC_SHA1_bit (key, 610, message, 8 , output); out_to_hash (hmac_610b_sha1_8b , output);
HMAC_SHA1_bit (key, 610, message, 13 , output); out_to_hash (hmac_610b_sha1_13b , output);
HMAC_SHA1_bit (key, 610, message, 610, output); out_to_hash (hmac_610b_sha1_610b, output);
return;
}
测试时要注意,key的长度可以是零,但硬件不认,算出来的HMAC值也只是简单的160个bit的零。考虑到(key | pad)的特点,只要在512bits长度之内,key后面缀多少个零都是等效的,所以这个key用8bits的零来代替了。测试结果如下:
虽然不知道算得对不对,但看起来挺像那么回事的。