通过roaring bitmap构建好的bitmap数据结构,目前还不支持直接插入到StarRocks的bitmap字段中。需要先转成string,写入到中间表,再利用bitmap_from_string函数导入到目标表中。
建表语句:
目标表
CREATE TABLE `test` (
`col1` varchar(65533) NULL COMMENT "",
`col2` bitmap BITMAP_UNION NULL COMMENT ""
) ENGINE=OLAP
AGGREGATE KEY(`col1`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`col1`) BUCKETS 1
PROPERTIES (
"replication_num" = "1",
"in_memory" = "false",
"storage_format" = "DEFAULT"
);
中间表
CREATE TABLE `test1` (
`col1` varchar(65533) NULL COMMENT "",
`col2` varchar(65533) NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`col1`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`col1`) BUCKETS 1
PROPERTIES (
"replication_num" = "1",
"in_memory" = "false",
"storage_format" = "DEFAULT"
);
python 生成 bitmap 并写入到StarRocks
from random import randrange
from requests import Session
from roaringbitmap import RoaringBitmap
class LoadSession(Session):
def rebuild_auth(self, prepared_request, response):
"""
No code here means requests will always preserve the Authorization
header when redirected.
"""
def bitmap_generate():
bitmap = RoaringBitmap()
bitmap.add(randrange(99999999))
bitmap.add(randrange(99999999))
return bitmap
def main():
"""
Stream load Demo with Standard Lib requests
"""
username, password = 'root', ''
headers = {
"Content-Type": "text/html; charset=UTF-8",
"connection": "keep-alive",
"max_filter_ratio": "0.2",
"columns": "col1, col2",
"column_separator": '|',
"Expect": "100-continue",
}
v1 = bitmap_generate()
v2 = bitmap_generate()
v3 = bitmap_generate()
payload = 'k1|' + str(v1)[1:-1] + '\nk2|' + str(v2)[1:-1] + '\nk3|' + str(v3)[1:-1]
database = 'bitmap_test'
tablename = 'test1'
api = 'http://localhost:18140/api/%s/%s/_stream_load' % (database, tablename)
session = LoadSession()
session.auth = (username, password)
response = session.put(url=api, headers=headers, data=payload)
print(response.json())
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
main()
使用 bitmap_from_string 函数写入目标表
insert into test select col1, bitmap_from_string(col2) from test1;
查看结果
select bitmap_to_string(col2) from test;