记一次Elasticsearch locking错误

使用Docker创建ELK的时候,ES使用的NFC存储,挂载远程地址,在一次启动的时候出现locking错误。

错误详情如下:

prod_elasticsearch.1.xmwr7padq9d0@docker3    | {"type": "server", "timestamp": "2019-07-13T17:21:21,366+0000", "level": "WARN", "component": "o.e.b.ElasticsearchUncaughtExceptionHandler", "cluster.name": "docker-cluster", "node.name": "2f81a7e8e374",  "message": "uncaught exception in thread [main]" ,
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "stacktrace": ["org.elasticsearch.bootstrap.StartupException: java.lang.IllegalStateException: failed to obtain node locks, tried [[/usr/share/elasticsearch/data]] with lock id [0]; maybe these locations are not writable or multiple nodes were started without increasing [node.max_local_storage_nodes] (was [1])?",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Elasticsearch.init(Elasticsearch.java:163) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Elasticsearch.execute(Elasticsearch.java:150) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.cli.EnvironmentAwareCommand.execute(EnvironmentAwareCommand.java:86) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.cli.Command.mainWithoutErrorHandling(Command.java:124) ~[elasticsearch-cli-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.cli.Command.main(Command.java:90) ~[elasticsearch-cli-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Elasticsearch.main(Elasticsearch.java:115) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Elasticsearch.main(Elasticsearch.java:92) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "Caused by: java.lang.IllegalStateException: failed to obtain node locks, tried [[/usr/share/elasticsearch/data]] with lock id [0]; maybe these locations are not writable or multiple nodes were started without increasing [node.max_local_storage_nodes] (was [1])?",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.env.NodeEnvironment.<init>(NodeEnvironment.java:298) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.node.Node.<init>(Node.java:271) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.node.Node.<init>(Node.java:251) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Bootstrap$5.<init>(Bootstrap.java:221) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Bootstrap.setup(Bootstrap.java:221) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Bootstrap.init(Bootstrap.java:349) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Elasticsearch.init(Elasticsearch.java:159) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "... 6 more",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "Caused by: java.io.IOException: failed to obtain lock on /usr/share/elasticsearch/data/nodes/0",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.env.NodeEnvironment$NodeLock.<init>(NodeEnvironment.java:220) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.env.NodeEnvironment.<init>(NodeEnvironment.java:268) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.node.Node.<init>(Node.java:271) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.node.Node.<init>(Node.java:251) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Bootstrap$5.<init>(Bootstrap.java:221) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Bootstrap.setup(Bootstrap.java:221) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Bootstrap.init(Bootstrap.java:349) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Elasticsearch.init(Elasticsearch.java:159) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "... 6 more",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "Caused by: java.io.IOException: No locks available",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at sun.nio.ch.FileDispatcherImpl.lock0(Native Method) ~[?:?]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at sun.nio.ch.FileDispatcherImpl.lock(FileDispatcherImpl.java:96) ~[?:?]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at sun.nio.ch.FileChannelImpl.tryLock(FileChannelImpl.java:1161) ~[?:?]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at java.nio.channels.FileChannel.tryLock(FileChannel.java:1165) ~[?:?]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.apache.lucene.store.NativeFSLockFactory.obtainFSLock(NativeFSLockFactory.java:126) ~[lucene-core-8.0.0.jar:8.0.0 2ae4746365c1ee72a0047ced7610b2096e438979 - jimczi - 2019-03-08 11:58:55]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.apache.lucene.store.FSLockFactory.obtainLock(FSLockFactory.java:41) ~[lucene-core-8.0.0.jar:8.0.0 2ae4746365c1ee72a0047ced7610b2096e438979 - jimczi - 2019-03-08 11:58:55]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.apache.lucene.store.BaseDirectory.obtainLock(BaseDirectory.java:45) ~[lucene-core-8.0.0.jar:8.0.0 2ae4746365c1ee72a0047ced7610b2096e438979 - jimczi - 2019-03-08 11:58:55]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.env.NodeEnvironment$NodeLock.<init>(NodeEnvironment.java:213) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.env.NodeEnvironment.<init>(NodeEnvironment.java:268) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.node.Node.<init>(Node.java:271) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.node.Node.<init>(Node.java:251) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Bootstrap$5.<init>(Bootstrap.java:221) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Bootstrap.setup(Bootstrap.java:221) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Bootstrap.init(Bootstrap.java:349) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "at org.elasticsearch.bootstrap.Elasticsearch.init(Elasticsearch.java:159) ~[elasticsearch-7.2.0.jar:7.2.0]",
prod_elasticsearch.1.xmwr7padq9d0@docker3    | "... 6 more"] }

通过查询,查到如下解决方法(一一排查):

  • 检查Elasticsearch运行进程,如果还有运行的则kill,检查后并没有正在运行的es实例
ps -ef | grep java
  • 检查NFC的权限,如果权限没问题,会在目录里创建node.lock文件
chown -R 1000:1000 /docker/volumes/elasticsearch/data
chmod -R 777 /docker/volumes/elasticsearch/data
  • 重启docker和nfs服务,问题依旧
chown -R 1000:1000 /docker/volumes/elasticsearch/data
chmod -R 777 /docker/volumes/elasticsearch/data
  • 删除docker.lock文件(问题依旧)
rm /docker/volumes/elasticsearch/data/nodes/0/node.lock
  • 验证rpc stuff,没什么问题
rpcinfo -u $NFSSERVER status

一开始,使用的NFSV3协议, docker-compose.yml中nfs的选项为:o: addr=${NFSSERVER},vers=3,rw,改用NFSv4解决问题

更改vers=3 为 vers=4

docker-compose.yml 配置如下

version: "3.7"
services:
  es01:
    image: elasticsearch:7.12.0
    container_name: es01
    environment:
      - node.name=es01
      - node.master=true
      - node.data=true
      - node.max_local_storage_nodes=2
      - cluster.name=es-docker-cluster
      - discovery.seed_hosts=es02
      - cluster.initial_master_nodes=es01
      - bootstrap.memory_lock=false
      - "ES_JAVA_OPTS=-Xms1g -Xmx1g"
      - reindex.remote.whitelist=192.168.50.20:9200
      - http.cors.enabled=true
      - http.cors.allow-origin=*
    volumes:
      - type: volume
        source: es_data01
        target: /usr/share/elasticsearch/data
        volume:
          nocopy: true
    ports:
      - 9200:9200
      - 9300:9300
    networks:
      - elastic
    deploy:
      placement:
        constraints:
          - "node.role==manager"
  
  es02:
    image: elasticsearch:7.12.0
    container_name: es02
    environment:
      - node.name=es02
      - node.data=true
      - node.master=false
      - cluster.name=es-docker-cluster
      - discovery.seed_hosts=es01
      - cluster.initial_master_nodes=es01
      - bootstrap.memory_lock=false
      - "ES_JAVA_OPTS=-Xms1g -Xmx1g"
      - reindex.remote.whitelist=192.168.50.20:9200
      - http.cors.enabled=true
      - http.cors.allow-origin=*
    volumes:
      - es_data02:/usr/share/elasticsearch/data
    networks:
      - elastic
    depends_on:
      - es01
    deploy:
      placement:
        constraints:
          - "node.role==worker"
      resources:
        limits:
          cpus: "0.50"
          
  kib01:
    image: kibana:7.12.0
    container_name: kib01
    ports:
      - 5601:5601
    environment:
      ELASTICSEARCH_URL: http://es01:9200
      ELASTICSEARCH_HOSTS: http://es01:9200
    networks:
      - elastic
    depends_on:
      - es01
      - es02
    deploy:
      placement:
        constraints:
          - "node.role==manager"


volumes:
  es_data01:
    driver: local
    driver_opts:
      type: nfs
      o: "addr=nfsaddress,vers=4,rw"
      device: ":/volume1/runtime/elasticsearch/data"
  es_data02:
    driver: local

networks:
  elastic:
    driver: overlay

需要先删除docker volumn,否则仍然会启动失败

docker volume rm es_data01
参考文章
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容