同时使用impala和pyhive链接时会报这个错误,原因是import顺序不对导致冲突。
按照以下顺序导入就不会报错了:
from pyhive import hive
from impala.dbapi import connect as impalaConn
from impala.util import as_pandas
这里顺便扔两个简单的查询方法:
def selectFromImpala(sql):
ipaddr = config["hive"]["ipaddress"]
port = config["hive"]["port"]
username = config["hive"]["username"]
conn = impalaConn(host=ipaddr, port=int(port))
cursor = conn.cursor(user=username)
cursor.execute(sql)
df = as_pandas(cursor)
cursor.close()
conn.close()
return df
def selectFromHive(sql):
hostname = config["hive"]["hostname"]
port = config["hive"]["port"]
username = config["hive"]["username"]
conn = hive.connect(host=hostname,
port=port,
username=username)
df = pd.read_sql(sql, conn)
columns = df.columns
columns_dict = {column: column.split('.')[-1] for column in columns}
df.rename(columns=columns_dict, inplace=True)
conn.close()
return df