是google提出来的应用在tpu上的,目前intel也支持了;
https://software.intel.com/sites/default/files/managed/40/8b/bf16-hardware-numerics-definition-white-paper.pdf
#include "tensorflow/core/framework/bfloat16.h"
namespace tensorflow {
void FloatToBFloat16(const float* src, bfloat16* dst, int64 size) {
const uint16_t* p = reinterpret_cast<const uint16_t*>(src);
uint16_t* q = reinterpret_cast<uint16_t*>(dst);
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
for (; size != 0; p += 2, q++, size--) {
*q = p[0];
}
#else
for (; size != 0; p += 2, q++, size--) {
*q = p[1];
}
#endif
}
void BFloat16ToFloat(const bfloat16* src, float* dst, int64 size) {
const uint16_t* p = reinterpret_cast<const uint16_t*>(src);
uint16_t* q = reinterpret_cast<uint16_t*>(dst);
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
for (; size != 0; p++, q += 2, size--) {
q[0] = *p;
q[1] = 0;
}
#else
for (; size != 0; p++, q += 2, size--) {
q[0] = 0;
q[1] = *p;
}
#endif
}
} // end namespace tensorflow