参考
https://yq.aliyun.com/articles/73537
https://www.timescale.com
https://github.com/digoal/blog/blob/master/201801/20180129_01.md
官方说明文档
https://docs.timescale.com/v1.2/getting-started
安装
必须组件安装
sudo apt install cmake
下载软件包 && 编译安装
wget https://github.com/timescale/timescaledb/releases/download/1.2.1/timescaledb-1.2.1.tar.gz
tar zxvf timescaledb-1.2.1.tar.gz
cd timescaledb
# Find eth path of pg_config
whereis pg_config
# Bootstrap the build system
./bootstrap -DPG_CONFIG=/opt/pg11/bin/pg_config
# To build the extension
cd build && make
# To install
make install
配置postgresql.conf(在数据库启动时自动加载timescale lib库)
vi $PGDATA/postgresql.conf
shared_preload_libraries = 'timescaledb'
pg_ctl restart -m fast
创建扩展
testdb=# create database timescaledb;
CREATE DATABASE
testdb=# \c timescaledb
You are now connected to database "timescaledb" as user "postgres".
timescaledb=# create extension timescaledb ;
WARNING:
WELCOME TO
_____ _ _ ____________
|_ _(_) | | | _ \ ___ \
| | _ _ __ ___ ___ ___ ___ __ _| | ___| | | | |_/ /
| | | | _ ` _ \ / _ \/ __|/ __/ _` | |/ _ \ | | | ___ \
| | | | | | | | | __/\__ \ (_| (_| | | __/ |/ /| |_/ /
|_| |_|_| |_| |_|\___||___/\___\__,_|_|\___|___/ \____/
Running version 1.2.1
For more information on TimescaleDB, please visit the following links:
1. Getting started: https://docs.timescale.com/getting-started
2. API reference documentation: https://docs.timescale.com/api
3. How TimescaleDB is designed: https://docs.timescale.com/introduction/architecture
Note: TimescaleDB collects anonymous reports to better understand and assist our users.
For more information and how to disable, please see our docs https://docs.timescaledb.com/using-time
scaledb/telemetry.
CREATE EXTENSION
timescaledb=#
timescaledb的相关参数
name | setting | description |
---|---|---|
timescaledb.constraint_aware_append | on | Enable constraint-aware append scans |
timescaledb.disable_optimizations | off | Disable all timescale query opt imizations |
timescaledb.optimize_non_hypertables | off | Apply timescale query optimization to plain tables |
timescaledb.restoring | off | Install timescale in restoring mode |
timescaledb.disable_load | off | Disable the loading of the actual extension |
timescaledb.enable_ordered_append | on | Enable ordered append scans |
timescaledb.last_tuned | last tune run | |
timescaledb.last_tuned_version | version of timescaledb-tune | |
timescaledb.license_key | CommunityLicense | TimescaleDB license key |
timescaledb.max_background_workers | 8 | Maximum background worker processes allocated to TimescaleDB |
timescaledb.max_cached_chunks_per_hypertable | 100 | Maximum cached chunks |
timescaledb.max_open_chunks_per_insert | 167 | Maximum open chunks per insert |
timescaledb.telemetry_level | basic | Telemetry settings level |
报错
CMake Error at CMakeLists.txt:294 (message)
CMake Error at CMakeLists.txt:294 (message):
PostgreSQL was built without OpenSSL support, which TimescaleDB needs for
full compatibility. Please rebuild PostgreSQL using `--with-openssl` or if
you want to continue without OpenSSL, re-run bootstrap with
`-DUSE_OPENSSL=0`
解决方案:
./bootstrap -DPG_CONFIG=/opt/pg11/bin/pg_config -DUSE_OPENSSL=0
使用例子测试
下载样本数据
wget https://timescaledata.blob.core.windows.net/datasets/nyc_data.tar.gz
tar zxvf nyc_data.tar.gz
导入结构
psql -d timescaledb -f nyc_data.sql
--内容如下:
DROP TABLE IF EXISTS "rides";
CREATE TABLE "rides"(
vendor_id TEXT,
pickup_datetime TIMESTAMP WITHOUT TIME ZONE NOT NULL,
dropoff_datetime TIMESTAMP WITHOUT TIME ZONE NOT NULL,
passenger_count NUMERIC,
trip_distance NUMERIC,
pickup_longitude NUMERIC,
pickup_latitude NUMERIC,
rate_code INTEGER,
dropoff_longitude NUMERIC,
dropoff_latitude NUMERIC,
payment_type INTEGER,
fare_amount NUMERIC,
extra NUMERIC,
mta_tax NUMERIC,
tip_amount NUMERIC,
tolls_amount NUMERIC,
improvement_surcharge NUMERIC,
total_amount NUMERIC
);
SELECT create_hypertable('rides', 'pickup_datetime', 'payment_type', 2, create_default_indexes=>FALS
E);
CREATE INDEX ON rides (vendor_id, pickup_datetime desc);
CREATE INDEX ON rides (pickup_datetime desc, vendor_id);
CREATE INDEX ON rides (rate_code, pickup_datetime DESC);
CREATE INDEX ON rides (passenger_count, pickup_datetime desc);
CREATE TABLE IF NOT EXISTS "payment_types"(
payment_type INTEGER,
description TEXT
);
INSERT INTO payment_types(payment_type, description) VALUES
(1, 'credit card'),
(2, 'cash'),
(3, 'no charge'),
(4, 'dispute'),
(5, 'unknown'),
(6, 'voided trip');
CREATE TABLE IF NOT EXISTS "rates"(
rate_code INTEGER,
description TEXT
);
INSERT INTO rates(rate_code, description) VALUES
(1, 'standard rate'),
(2, 'JFK'),
(3, 'Newark'),
(4, 'Nassau or Westchester'),
(5, 'negotiated fare'),
(6, 'group ride');
导入数据
psql -d timescaledb -c "\COPY rides FROM nyc_data_rides.csv CSV"
每天同车超过2人的交易,平均计费多少?
SELECT date_trunc('day', pickup_datetime) as day, avg(fare_amount)
FROM rides
WHERE passenger_count > 1 AND pickup_datetime < '2016-01-08'
GROUP BY day ORDER BY day;
某些查询的性能甚至超过20倍
explain SELECT date_trunc('day', pickup_datetime) as day, COUNT(*) FROM rides
GROUP BY day ORDER BY day
LIMIT 5;
用到timescaleDB内置的一些加速算法。每5分钟间隔为一个BUCKET,输出每个间隔产生了多少笔订单
SELECT time_bucket('5 minute', pickup_datetime) as five_min, count(*)
FROM rides
WHERE pickup_datetime < '2016-01-01 02:00'
GROUP BY five_min ORDER BY five_min;
每个城市的打车交易量
SELECT rates.description, COUNT(vendor_id) as num_trips FROM rides
JOIN rates on rides.rate_code = rates.rate_code
WHERE pickup_datetime < '2016-01-08'
GROUP BY rates.description ORDER BY rates.description;