概述
tidb为分布式的newSQLdb,我们结合operator和k8s 完成tidb的部署和扩容
硬盘使用local卷,如何管理local卷请翻阅 我的另一篇文章
https://www.jianshu.com/p/bfa204cef8c0
k8s机器信息 版本1.13
node1 10.16.16.119 master节点
node2 10.16.16.120 master节点
node3 10.16.16.68
node4 10.16.16.68
部署rcd资源
git clone https://github.com/pingcap/tidb-operator.git
cd /tidb-operator/
kubectl apply -f manifests/crd.yaml
安装TiDB Operator
查看可配置信息
helm inspect charts/tidb-operator
apiVersion: v1
description: tidb-operator Helm chart for Kubernetes
home: https://github.com/pingcap/tidb-operator
keywords:
- operator
- newsql
- htap
- database
- mysql
- raft
name: tidb-operator
sources:
- https://github.com/pingcap/tidb-operator
version: 0.1.0
---
# Default values for tidb-operator
# clusterScoped is whether tidb-operator should manage kubernetes cluster wide tidb clusters
# Also see rbac.create and controllerManager.serviceAccount
clusterScoped: true
# Also see clusterScoped and controllerManager.serviceAccount
rbac:
create: true
# operatorImage is TiDB Operator image
operatorImage: pingcap/tidb-operator:latest
imagePullPolicy: IfNotPresent
defaultStorageClassName: local-storage
controllerManager:
# With rbac.create=false, the user is responsible for creating this account
# With rbac.create=true, this service account will be created
# Also see rbac.create and clusterScoped
serviceAccount: tidb-controller-manager
logLevel: 2
replicas: 1
resources:
limits:
cpu: 250m
memory: 150Mi
requests:
cpu: 80m
memory: 50Mi
# autoFailover is whether tidb-operator should auto failover when failure occurs
autoFailover: false
# pd failover period default(5m)
pdFailoverPeriod: 5m
# tidb failover period default(5m)
tidbFailoverPeriod: 5m
scheduler:
# With rbac.create=false, the user is responsible for creating this account
# With rbac.create=true, this service account will be created
# Also see rbac.create and clusterScoped
serviceAccount: tidb-scheduler
logLevel: 2
replicas: 1
schedulerName: tidb-scheduler
resources:
limits:
cpu: 250m
memory: 150Mi
requests:
cpu: 80m
memory: 50Mi
kubeSchedulerImageName: gcr.io/google-containers/hyperkube
# This will default to matching your kubernetes version
# kubeSchedulerImageTag:
指定自身参数,改变指定的image,因为gcr。io国内无法访问
有两种类型的存储hhd和ssd
vi ./w11_tidb-operator.config
#这里使用的是local hhd
defaultStorageClassName: local-storage
#defaultStorageClassName: ssd-local-storage
scheduler:
kubeSchedulerImageName: googlecontainer/hyperkube
kubeSchedulerImageTag: v1.13.4
helm install charts/tidb-operator -f ./w11_tidb-operator.config --name=tidb-operator --namespace=tidb-admin
kubectl get pods --namespace tidb-admin -l app.kubernetes.io/instance=tidb-operator
如果全部ready即为创建成功
创建TIDB集群
查看配置参数
helm inspect charts/tidb-cluster
过多这里不在罗列,主要分为
pd的设置
tidb的设置
kv的设置
monitor的设置
编写定义参数
clusterName: w11tidb
pd:
resources:
limits:
cpu: 4000m
memory: 4Gi
requests:
cpu: 1000m
memory: 1Gi
storage: 1Gi
tikv:
replicas: 3
resources:
limits:
cpu: 16000m
memory: 10Gi
storage: 100Gi
requests:
cpu: 1000m
memory: 2Gi
storage: 5Gi
tidb:
separateSlowLog: True
replicas: 3
resources:
limits:
cpu: 16000m
memory: 6Gi
requests:
cpu: 1000m
memory: 1Gi
monitor:
persistent: true
helm install charts/tidb-cluster -f ./w11.config --name=tidb-cluster --namespace=tidb
kubectl get pods --namespace tidb -l app.kubernetes.io/instance=tidb-cluster -o wide
看出是否全部ok
查看端口
kubectl get svc --namespace tidb
监控为w11tidb-grafana,tidb为w11tidb-tidb
连接tidb
监控
http://10.16.16.119:30976
密码账号全部 admin admin
在线扩容TIDB
编写定义参数
将tikv 和 tidb从3个节点扩用值6个 resource提高
clusterName: w11tidb
pd:
resources:
limits:
cpu: 4000m
memory: 4Gi
requests:
cpu: 1000m
memory: 1Gi
storage: 1Gi
tikv:
replicas: 6
resources:
limits:
cpu: 16000m
memory: 20Gi
storage: 100Gi
requests:
cpu: 1000m
memory: 2Gi
storage: 5Gi
tidb:
separateSlowLog: True
replicas: 6
resources:
limits:
cpu: 16000m
memory: 16Gi
requests:
cpu: 1000m
memory: 1Gi
monitor:
persistent: true
扩容
helm upgrade -f w11.config tidb-cluster ./charts/tidb-cluster/
观察节点情况
kubectl get pods --namespace tidb -l app.kubernetes.io/instance=tidb-cluster -o wide
两个节点扩用中,已有节点根据情况重启重新配置resource
slow收集的收集
开启单独记录slowlog
tidb:
separateSlowLog: True
更新集群
helm upgrade -f w11.config tidb-cluster ./charts/tidb-cluster/
kubectl logs -n${namespace}${tidbPodName}-c slowlog
或者用stern
stern -n${namespace}tidb -c slowlog
备份
创建root账号
kubectl --namespace tidb create secret generic backup-secret --from-literal=user=root --from-literal=password=<password>
配置backup参数
helm inspect ./charts/tidb-backup/
vi ./w11_backup.config
clusterName: w11tidb
storage:
className: local-storage
size: 10Gi
部署备份
helm install --name w11backup ./charts/tidb-backup/ -f ./w11_backup.config --namespace tidb
查看job
kubectl get jobs --namespace default -l app.kubernetes.io/component=backup
查看备份信息
kubectl -n tidb get pv
kubectl -n tidb get pv local-pv-e85e3bd9 -o yaml
发现在node1上
创建crontabjob
创建root账号
kubectl --namespace tidb create secret generic backup-secret --from-literal=user=root --from-literal=password=<password>
helm inspect ./charts/tidb-cluster/
根据自身配置一下参数
binlog:
pump:
create: false
replicas: 1
image: pingcap/tidb-binlog:v2.1.0
imagePullPolicy: IfNotPresent
logLevel: info
# storageClassName is a StorageClass provides a way for administrators to describe the "classes" of storage they offer.
# different classes might map to quality-of-service levels, or to backup policies,
# or to arbitrary policies determined by the cluster administrators.
# refer to https://kubernetes.io/docs/concepts/storage/storage-classes
storageClassName: local-storage
storage: 10Gi
# a integer value to control expiry date of the binlog data, indicates for how long (in days) the binlog data would be stored.
# must bigger than 0
gc: 7
# number of seconds between heartbeat ticks (in 2 seconds)
heartbeatInterval: 2
drainer:
create: false
image: pingcap/tidb-binlog:v2.1.0
imagePullPolicy: IfNotPresent
logLevel: info
# storageClassName is a StorageClass provides a way for administrators to describe the "classes" of storage they offer.
# different classes might map to quality-of-service levels, or to backup policies,
# or to arbitrary policies determined by the cluster administrators.
# refer to https://kubernetes.io/docs/concepts/storage/storage-classes
storageClassName: local-storage
storage: 10Gi
# parallel worker count (default 1)
workerCount: 1
# the interval time (in seconds) of detect pumps' status (default 10)
detectInterval: 10
# disbale detect causality
disableDetect: false
# disable dispatching sqls that in one same binlog; if set true, work-count and txn-batch would be useless
disableDispatch: false
# # disable sync these schema
ignoreSchemas: "INFORMATION_SCHEMA,PERFORMANCE_SCHEMA,mysql,test"
# if drainer donesn't have checkpoint, use initial commitTS to initial checkpoint
initialCommitTs: 0
# enable safe mode to make syncer reentrant
safeMode: false
# number of binlog events in a transaction batch (default 1)
txnBatch: 1
# downstream storage, equal to --dest-db-type
# valid values are "mysql", "pb", "kafka"
destDBType: pb
mysql: {}
# host: "127.0.0.1"
# user: "root"
# password: ""
# port: 3306
# # Time and size limits for flash batch write
# timeLimit: "30s"
# sizeLimit: "100000"
kafka: {}
# only need config one of zookeeper-addrs and kafka-addrs, will get kafka address if zookeeper-addrs is configed.
# zookeeperAddrs: "127.0.0.1:2181"
# kafkaAddrs: "127.0.0.1:9092"
# kafkaVersion: "0.8.2.0"
scheduledBackup:
create: false
binlogImage: pingcap/tidb-binlog:v2.1.0
binlogImagePullPolicy: IfNotPresent
# https://github.com/tennix/tidb-cloud-backup
mydumperImage: pingcap/tidb-cloud-backup:latest
mydumperImagePullPolicy: IfNotPresent
# storageClassName is a StorageClass provides a way for administrators to describe the "classes" of storage they offer.
# different classes might map to quality-of-service levels, or to backup policies,
# or to arbitrary policies determined by the cluster administrators.
# refer to https://kubernetes.io/docs/concepts/storage/storage-classes
storageClassName: local-storage
storage: 100Gi
# https://kubernetes.io/docs/tasks/job/automated-tasks-with-cron-jobs/#schedule
schedule: "0 0 * * *"
# https://kubernetes.io/docs/tasks/job/automated-tasks-with-cron-jobs/#suspend
suspend: false
# https://kubernetes.io/docs/tasks/job/automated-tasks-with-cron-jobs/#jobs-history-limits
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 1
# https://kubernetes.io/docs/tasks/job/automated-tasks-with-cron-jobs/#starting-deadline
startingDeadlineSeconds: 3600
# https://github.com/maxbube/mydumper/blob/master/docs/mydumper_usage.rst#options
options: "--chunk-filesize=100"
# secretName is the name of the secret which stores user and password used for backup
# Note: you must give the user enough privilege to do the backup
# you can create the secret by:
# kubectl create secret generic backup-secret --from-literal=user=root --from-literal=password=<password>
secretName: backup-secret
# backup to gcp
gcp: {}
# bucket: ""
# secretName is the name of the secret which stores the gcp service account credentials json file
# The service account must have read/write permission to the above bucket.
# Read the following document to create the service account and download the credentials file as credentials.json:
# https://cloud.google.com/docs/authentication/production#obtaining_and_providing_service_account_credentials_manually
# And then create the secret by: kubectl create secret generic gcp-backup-secret --from-file=./credentials.json
# secretName: gcp-backup-secret
# backup to ceph object storage
ceph: {}
# endpoint: ""
# bucket: ""
# secretName is the name of the secret which stores ceph object store access key and secret key
# You can create the secret by:
# kubectl create secret generic ceph-backup-secret --from-literal=access_key=<access-key> --from-literal=secret_key=<secret-key>
# secretName: ceph-backup-secret
我这里配置如下:
在cluster的配置文件后面追加我们这里是w11.condig
vi ./w11.config
###
#tidb的参数忽略
###
binlog:
pump:
create: true
replicas: 1
image: pingcap/tidb-binlog:v2.1.0
imagePullPolicy: IfNotPresent
logLevel: info
storageClassName: local-storage
storage: 10Gi
gc: 7
heartbeatInterval: 2
drainer:
create: true
image: pingcap/tidb-binlog:v2.1.0
imagePullPolicy: IfNotPresent
logLevel: info
storageClassName: local-storage
storage: 10Gi
# parallel worker count (default 1)
workerCount: 10
# the interval time (in seconds) of detect pumps' status (default 10)
detectInterval: 10
# disbale detect causality
disableDetect: false
# disable dispatching sqls that in one same binlog; if set true, work-count and txn-batch would be useless
disableDispatch: false
# # disable sync these schema
ignoreSchemas: "INFORMATION_SCHEMA,PERFORMANCE_SCHEMA,mysql,test"
# if drainer donesn't have checkpoint, use initial commitTS to initial checkpoint
initialCommitTs: 0
# enable safe mode to make syncer reentrant
safeMode: false
# number of binlog events in a transaction batch (default 1)
txnBatch: 10
# downstream storage, equal to --dest-db-type
# valid values are "mysql", "pb", "kafka"
destDBType: pb
mysql: {}
kafka: {}
scheduledBackup:
create: true
binlogImage: pingcap/tidb-binlog:v2.1.0
binlogImagePullPolicy: IfNotPresent
mydumperImage: pingcap/tidb-cloud-backup:latest
mydumperImagePullPolicy: IfNotPresent
storageClassName: local-storage
storage: 20Gi
schedule: "0 0 * * *"
创建备份任务
helm upgrade tidb-cluster ./charts/tidb-cluster/ -f ./w11.config
kubectl get pods --namespace tidb -l app.kubernetes.io/instance=tidb-cluster -o wide
查看增量备份pump日志
kubectl --namespace tidb logs w11tidb-pump-0
查看增量备份drain的日志
kubectl --namespace tidb logs w11tidb-drainer-0
查看全量crontab
kubectl get cronjob --namespace tidb -l app.kubernetes.io/instance=tidb-cluster