算法核心思想:分治法,第一次统计每2位的1的个数,第二次统计每4位1的个数,第三次统计每8位1的个数,依次相加即可得到结果。
F = 0x55555555 = 01010101010101010101010101010101
T = 0x33333333 = 00110011001100110011001100110011
O = 0x0f0f0f0f = 00001111000011110000111100001111
现在将数字分成16组,每组2位,例如:9 = 1001 = 00,00,..... 00,10,01
第一步是统计每2位中1的个数。由于每2位数字最多可包含2个1,所以需要分别统计两次。如下:
i = (i & 0x55555555) + ((i >> 1) & 0x55555555);
i&F”仅保留奇数位置的1,(i >> 1)&F仅保留偶数位置的1。二者相加得到每2位具有1的个数。
同样,再统计每4位上的1的格式,将0011抽象成一个大的1即可得到相似的代码。
i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
i = (i & 0x0F0F0F0F) + (i >> 4)) & 0x0F0F0F0F);
而0x01010101 = (1 << 24) + (1 << 16) + (1 << 8) + 1
,所以i * 0x01010101 = (i << 24) + (i << 16) + (i << 8) + i
所以i * 0x01010101
最高位就是原始输入的1出现次数的最终结果。移位则将是最高位的值移到最低位。
res = i * (0x01010101) > >24
redis源码如下:
size_t redisPopcount(void *s, long count) {
size_t bits = 0;
unsigned char *p = s;
uint32_t *p4;
// 通过查表来计算,对于 1 字节所能表示的值来说
// 这些值的二进制表示所带有的 1 的数量
// 比如整数 3 的二进制表示 0011 ,带有两个 1
// 正好是查表 bitsinbyte[3] == 2
static const unsigned char bitsinbyte[256] = {0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8};
/* Count initial bytes not aligned to 32 bit. */
while((unsigned long)p & 3 && count) {
bits += bitsinbyte[*p++];
count--;
}
/* Count bits 16 bytes at a time */
// 每次统计 16 字节
// 关于这里所使用的优化算法,可以参考:
// http://yesteapea.wordpress.com/2013/03/03/counting-the-number-of-set-bits-in-an-integer/
p4 = (uint32_t*)p;
while(count>=16) {
uint32_t aux1, aux2, aux3, aux4;
aux1 = *p4++;
aux2 = *p4++;
aux3 = *p4++;
aux4 = *p4++;
count -= 16;
aux1 = aux1 - ((aux1 >> 1) & 0x55555555);
aux1 = (aux1 & 0x33333333) + ((aux1 >> 2) & 0x33333333);
aux2 = aux2 - ((aux2 >> 1) & 0x55555555);
aux2 = (aux2 & 0x33333333) + ((aux2 >> 2) & 0x33333333);
aux3 = aux3 - ((aux3 >> 1) & 0x55555555);
aux3 = (aux3 & 0x33333333) + ((aux3 >> 2) & 0x33333333);
aux4 = aux4 - ((aux4 >> 1) & 0x55555555);
aux4 = (aux4 & 0x33333333) + ((aux4 >> 2) & 0x33333333);
bits += ((((aux1 + (aux1 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
((((aux2 + (aux2 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
((((aux3 + (aux3 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
((((aux4 + (aux4 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24);
}
/* Count the remaining bytes. */
// 不足 16 字节的,剩下的每个字节通过查表来完成
p = (unsigned char*)p4;
while(count--) bits += bitsinbyte[*p++];
return bits;
}