使用Docker创建ELK的时候,ES使用的NFC存储,挂载远程地址,在一次启动的时候出现locking错误。
错误详情如下:
prod_elasticsearch.1.xmwr7padq9d0@docker3 | {"type": "server", "timestamp": "2019-07-13T17:21:21,366+0000", "level": "WARN", "component": "o.e.b.ElasticsearchUncaughtExceptionHandler", "cluster.name": "docker-cluster", "node.name": "2f81a7e8e374", "message": "uncaught exception in thread [main]" ,
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "stacktrace": ["org.elasticsearch.bootstrap.StartupException: java.lang.IllegalStateException: failed to obtain node locks, tried [[/usr/share/elasticsearch/data]] with lock id [0]; maybe these locations are not writable or multiple nodes were started without increasing [node.max_local_storage_nodes] (was [1])?",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Elasticsearch.init(Elasticsearch.java:163) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Elasticsearch.execute(Elasticsearch.java:150) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.cli.EnvironmentAwareCommand.execute(EnvironmentAwareCommand.java:86) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.cli.Command.mainWithoutErrorHandling(Command.java:124) ~[elasticsearch-cli-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.cli.Command.main(Command.java:90) ~[elasticsearch-cli-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Elasticsearch.main(Elasticsearch.java:115) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Elasticsearch.main(Elasticsearch.java:92) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "Caused by: java.lang.IllegalStateException: failed to obtain node locks, tried [[/usr/share/elasticsearch/data]] with lock id [0]; maybe these locations are not writable or multiple nodes were started without increasing [node.max_local_storage_nodes] (was [1])?",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.env.NodeEnvironment.<init>(NodeEnvironment.java:298) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.node.Node.<init>(Node.java:271) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.node.Node.<init>(Node.java:251) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Bootstrap$5.<init>(Bootstrap.java:221) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Bootstrap.setup(Bootstrap.java:221) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Bootstrap.init(Bootstrap.java:349) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Elasticsearch.init(Elasticsearch.java:159) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "... 6 more",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "Caused by: java.io.IOException: failed to obtain lock on /usr/share/elasticsearch/data/nodes/0",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.env.NodeEnvironment$NodeLock.<init>(NodeEnvironment.java:220) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.env.NodeEnvironment.<init>(NodeEnvironment.java:268) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.node.Node.<init>(Node.java:271) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.node.Node.<init>(Node.java:251) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Bootstrap$5.<init>(Bootstrap.java:221) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Bootstrap.setup(Bootstrap.java:221) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Bootstrap.init(Bootstrap.java:349) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Elasticsearch.init(Elasticsearch.java:159) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "... 6 more",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "Caused by: java.io.IOException: No locks available",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at sun.nio.ch.FileDispatcherImpl.lock0(Native Method) ~[?:?]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at sun.nio.ch.FileDispatcherImpl.lock(FileDispatcherImpl.java:96) ~[?:?]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at sun.nio.ch.FileChannelImpl.tryLock(FileChannelImpl.java:1161) ~[?:?]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at java.nio.channels.FileChannel.tryLock(FileChannel.java:1165) ~[?:?]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.apache.lucene.store.NativeFSLockFactory.obtainFSLock(NativeFSLockFactory.java:126) ~[lucene-core-8.0.0.jar:8.0.0 2ae4746365c1ee72a0047ced7610b2096e438979 - jimczi - 2019-03-08 11:58:55]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.apache.lucene.store.FSLockFactory.obtainLock(FSLockFactory.java:41) ~[lucene-core-8.0.0.jar:8.0.0 2ae4746365c1ee72a0047ced7610b2096e438979 - jimczi - 2019-03-08 11:58:55]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.apache.lucene.store.BaseDirectory.obtainLock(BaseDirectory.java:45) ~[lucene-core-8.0.0.jar:8.0.0 2ae4746365c1ee72a0047ced7610b2096e438979 - jimczi - 2019-03-08 11:58:55]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.env.NodeEnvironment$NodeLock.<init>(NodeEnvironment.java:213) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.env.NodeEnvironment.<init>(NodeEnvironment.java:268) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.node.Node.<init>(Node.java:271) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.node.Node.<init>(Node.java:251) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Bootstrap$5.<init>(Bootstrap.java:221) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Bootstrap.setup(Bootstrap.java:221) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Bootstrap.init(Bootstrap.java:349) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "at org.elasticsearch.bootstrap.Elasticsearch.init(Elasticsearch.java:159) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3 | "... 6 more"] }
通过查询,查到如下解决方法(一一排查):
- 检查Elasticsearch运行进程,如果还有运行的则kill,检查后并没有正在运行的es实例
ps -ef | grep java
- 检查NFC的权限,如果权限没问题,会在目录里创建node.lock文件
chown -R 1000:1000 /docker/volumes/elasticsearch/data
chmod -R 777 /docker/volumes/elasticsearch/data
- 重启docker和nfs服务,问题依旧
chown -R 1000:1000 /docker/volumes/elasticsearch/data
chmod -R 777 /docker/volumes/elasticsearch/data
- 删除docker.lock文件(问题依旧)
rm /docker/volumes/elasticsearch/data/nodes/0/node.lock
- 验证rpc stuff,没什么问题
rpcinfo -u $NFSSERVER status
一开始,使用的NFSV3协议, docker-compose.yml中nfs的选项为:o: addr=${NFSSERVER},vers=3,rw,改用NFSv4解决问题
更改vers=3 为 vers=4
docker-compose.yml 配置如下
version: "3.7"
services:
es01:
image: elasticsearch:7.12.0
container_name: es01
environment:
- node.name=es01
- node.master=true
- node.data=true
- node.max_local_storage_nodes=2
- cluster.name=es-docker-cluster
- discovery.seed_hosts=es02
- cluster.initial_master_nodes=es01
- bootstrap.memory_lock=false
- "ES_JAVA_OPTS=-Xms1g -Xmx1g"
- reindex.remote.whitelist=192.168.50.20:9200
- http.cors.enabled=true
- http.cors.allow-origin=*
volumes:
- type: volume
source: es_data01
target: /usr/share/elasticsearch/data
volume:
nocopy: true
ports:
- 9200:9200
- 9300:9300
networks:
- elastic
deploy:
placement:
constraints:
- "node.role==manager"
es02:
image: elasticsearch:7.12.0
container_name: es02
environment:
- node.name=es02
- node.data=true
- node.master=false
- cluster.name=es-docker-cluster
- discovery.seed_hosts=es01
- cluster.initial_master_nodes=es01
- bootstrap.memory_lock=false
- "ES_JAVA_OPTS=-Xms1g -Xmx1g"
- reindex.remote.whitelist=192.168.50.20:9200
- http.cors.enabled=true
- http.cors.allow-origin=*
volumes:
- es_data02:/usr/share/elasticsearch/data
networks:
- elastic
depends_on:
- es01
deploy:
placement:
constraints:
- "node.role==worker"
resources:
limits:
cpus: "0.50"
kib01:
image: kibana:7.12.0
container_name: kib01
ports:
- 5601:5601
environment:
ELASTICSEARCH_URL: http://es01:9200
ELASTICSEARCH_HOSTS: http://es01:9200
networks:
- elastic
depends_on:
- es01
- es02
deploy:
placement:
constraints:
- "node.role==manager"
volumes:
es_data01:
driver: local
driver_opts:
type: nfs
o: "addr=nfsaddress,vers=4,rw"
device: ":/volume1/runtime/elasticsearch/data"
es_data02:
driver: local
networks:
elastic:
driver: overlay
需要先删除docker volumn,否则仍然会启动失败
docker volume rm es_data01