iceberg系列(2):存储详解-partition

iceberg分区演化
可以通过添加、删除、重命名或重新排序分区规范字段来改进表分区。
更改分区规格会生成一个由唯一规格 ID 标识的新规格,该 ID 将添加到表的分区规格列表中,并且可以设置为表的默认分区规格。
在变更分区规范时,更改不应导致分区字段 ID (field id)更改,因为分区字段 ID 用作清单文件(manifest)中的分区元组字段 ID(partition tuple field ID)。
在 v2 中,必须为每个分区字段显式跟踪分区字段 ID。新 ID 是根据表元数据中最后分配的分区 ID 分配的。
在v1中,分区字段id不被跟踪,而是从1000开始顺序分配。当从多个规格中读取基于清单文件(manifest)的元数据表时,这种分配机制会导致问题,因为具有相同ID的分区字段可能包含不同的数据类型。为了与旧版本兼容,对于v1表中的分区演化,建议遵循以下规则:

  • 不要重新排序分区字段
  • 不要删除分区字段;而是用void变换替换字段
  • 仅在前一个分区规范的末尾添加分区字段

下面来看几个实例:

CREATE TABLE local.db.sample ( 
    id bigint, 
    data string, 
    category string) 
USING iceberg 
PARTITIONED BY (category)

insert into local.db.sample values(1,'a','1')

查看metainfo文件:

{
  "format-version" : 1,
  "table-uuid" : "94ad30ed-4a31-438d-b81b-36d791471d2c",
  "location" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sample",
  "last-updated-ms" : 1642174094175,
  "last-column-id" : 3,
  "schema" : {
    "type" : "struct",
    "schema-id" : 0,
    "fields" : [ {
      "id" : 1,
      "name" : "id",
      "required" : false,
      "type" : "long"
    }, {
      "id" : 2,
      "name" : "data",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 3,
      "name" : "category",
      "required" : false,
      "type" : "string"
    } ]
  },
  "current-schema-id" : 0,
  "schemas" : [ {
    "type" : "struct",
    "schema-id" : 0,
    "fields" : [ {
      "id" : 1,
      "name" : "id",
      "required" : false,
      "type" : "long"
    }, {
      "id" : 2,
      "name" : "data",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 3,
      "name" : "category",
      "required" : false,
      "type" : "string"
    } ]
  } ],
  "partition-spec" : [ {
    "name" : "category",
    "transform" : "identity",
    "source-id" : 3,
    "field-id" : 1000
  } ],
  "default-spec-id" : 0,
  "partition-specs" : [ {
    "spec-id" : 0,
    "fields" : [ {
      "name" : "category",
      "transform" : "identity",
      "source-id" : 3,
      "field-id" : 1000
    } ]
  } ],
  "last-partition-id" : 1000,
  "default-sort-order-id" : 0,
  "sort-orders" : [ {
    "order-id" : 0,
    "fields" : [ ]
  } ],
  "properties" : {
    "owner" : "liliwei"
  },
  "current-snapshot-id" : 3476183237498309505,
  "snapshots" : [ {
    "snapshot-id" : 3476183237498309505,
    "timestamp-ms" : 1642174094175,
    "summary" : {
      "operation" : "append",
      "spark.app.id" : "local-1642173017469",
      "added-data-files" : "1",
      "added-records" : "1",
      "added-files-size" : "874",
      "changed-partition-count" : "1",
      "total-records" : "1",
      "total-files-size" : "874",
      "total-data-files" : "1",
      "total-delete-files" : "0",
      "total-position-deletes" : "0",
      "total-equality-deletes" : "0"
    },
    "manifest-list" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sample/metadata/snap-3476183237498309505-1-002e475b-e5b9-485e-a59d-35730a6c9f4e.avro",
    "schema-id" : 0
  } ],
  "snapshot-log" : [ {
    "timestamp-ms" : 1642174094175,
    "snapshot-id" : 3476183237498309505
  } ],
  "metadata-log" : [ {
    "timestamp-ms" : 1642173226793,
    "metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sample/metadata/v1.metadata.json"
  } ]
}%

查看snap文件:
java -jar ~/plat/tools/avro-tools-1.10.2.jar tojson snap-3476183237498309505-1-002e475b-e5b9-485e-a59d-35730a6c9f4e.avro

{
    "manifest_path": "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sample/metadata/002e475b-e5b9-485e-a59d-35730a6c9f4e-m0.avro",
    "manifest_length": 6095,
    "partition_spec_id": 0,
    "added_snapshot_id": {
        "long": 3476183237498309505
    },
    "added_data_files_count": {
        "int": 1
    },
    "existing_data_files_count": {
        "int": 0
    },
    "deleted_data_files_count": {
        "int": 0
    },
    "partitions": {
        "array": [{
            "contains_null": false,
            "contains_nan": {
                "boolean": false
            },
            "lower_bound": {
                "bytes": "1"
            },
            "upper_bound": {
                "bytes": "1"
            }
        }]
    },
    "added_rows_count": {
        "long": 1
    },
    "existing_rows_count": {
        "long": 0
    },
    "deleted_rows_count": {
        "long": 0
    }
}

ALTER TABLE local.db.sample ADD PARTITION FIELD data

查看目录结构:

(base) ➜ metadata tree -l
.
├── 002e475b-e5b9-485e-a59d-35730a6c9f4e-m0.avro
├── snap-3476183237498309505-1-002e475b-e5b9-485e-a59d-35730a6c9f4e.avro
├── v1.metadata.json
├── v2.metadata.json
├── v3.metadata.json
└── version-hint.text

0 directories, 6 files

查看v3.metadata.json文件:

{
  "format-version" : 1,
  "table-uuid" : "94ad30ed-4a31-438d-b81b-36d791471d2c",
  "location" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sample",
  "last-updated-ms" : 1642175874398,
  "last-column-id" : 3,
  "schema" : {
    "type" : "struct",
    "schema-id" : 0,
    "fields" : [ {
      "id" : 1,
      "name" : "id",
      "required" : false,
      "type" : "long"
    }, {
      "id" : 2,
      "name" : "data",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 3,
      "name" : "category",
      "required" : false,
      "type" : "string"
    } ]
  },
  "current-schema-id" : 0,
  "schemas" : [ {
    "type" : "struct",
    "schema-id" : 0,
    "fields" : [ {
      "id" : 1,
      "name" : "id",
      "required" : false,
      "type" : "long"
    }, {
      "id" : 2,
      "name" : "data",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 3,
      "name" : "category",
      "required" : false,
      "type" : "string"
    } ]
  } ],
  "partition-spec" : [ {
    "name" : "category",
    "transform" : "identity",
    "source-id" : 3,
    "field-id" : 1000
  }, {
    "name" : "data",
    "transform" : "identity",
    "source-id" : 2,
    "field-id" : 1001
  } ],
  "default-spec-id" : 1,
  "partition-specs" : [ {
    "spec-id" : 0,
    "fields" : [ {
      "name" : "category",
      "transform" : "identity",
      "source-id" : 3,
      "field-id" : 1000
    } ]
  }, {
    "spec-id" : 1,
    "fields" : [ {
      "name" : "category",
      "transform" : "identity",
      "source-id" : 3,
      "field-id" : 1000
    }, {
      "name" : "data",
      "transform" : "identity",
      "source-id" : 2,
      "field-id" : 1001
    } ]
  } ],
  "last-partition-id" : 1001,
  "default-sort-order-id" : 0,
  "sort-orders" : [ {
    "order-id" : 0,
    "fields" : [ ]
  } ],
  "properties" : {
    "owner" : "liliwei"
  },
  "current-snapshot-id" : 3476183237498309505,
  "snapshots" : [ {
    "snapshot-id" : 3476183237498309505,
    "timestamp-ms" : 1642174094175,
    "summary" : {
      "operation" : "append",
      "spark.app.id" : "local-1642173017469",
      "added-data-files" : "1",
      "added-records" : "1",
      "added-files-size" : "874",
      "changed-partition-count" : "1",
      "total-records" : "1",
      "total-files-size" : "874",
      "total-data-files" : "1",
      "total-delete-files" : "0",
      "total-position-deletes" : "0",
      "total-equality-deletes" : "0"
    },
    "manifest-list" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sample/metadata/snap-3476183237498309505-1-002e475b-e5b9-485e-a59d-35730a6c9f4e.avro",
    "schema-id" : 0
  } ],
  "snapshot-log" : [ {
    "timestamp-ms" : 1642174094175,
    "snapshot-id" : 3476183237498309505
  } ],
  "metadata-log" : [ {
    "timestamp-ms" : 1642173226793,
    "metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sample/metadata/v1.metadata.json"
  }, {
    "timestamp-ms" : 1642174094175,
    "metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sample/metadata/v2.metadata.json"
  } ]
}%

插入数据

insert into local.db.sample values(2,'b','2');

查看目录结构:

(base) ➜ metadata tree -l
.
├── 002e475b-e5b9-485e-a59d-35730a6c9f4e-m0.avro
├── ed1a1f56-56fc-4313-bf60-10df0c4e88ca-m0.avro
├── snap-2641901311316255446-1-ed1a1f56-56fc-4313-bf60-10df0c4e88ca.avro
├── snap-3476183237498309505-1-002e475b-e5b9-485e-a59d-35730a6c9f4e.avro
├── v1.metadata.json
├── v2.metadata.json
├── v3.metadata.json
├── v4.metadata.json
└── version-hint.text

0 directories, 9 files
java -jar ~/plat/tools/avro-tools-1.10.2.jar tojson snap-2641901311316255446-1-ed1a1f56-56fc-4313-bf60-10df0c4e88ca.avro
{
    "manifest_path": "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sample/metadata/ed1a1f56-56fc-4313-bf60-10df0c4e88ca-m0.avro",
    "manifest_length": 6301,
    "partition_spec_id": 1,
    "added_snapshot_id": {
        "long": 2641901311316255446
    },
    "added_data_files_count": {
        "int": 1
    },
    "existing_data_files_count": {
        "int": 0
    },
    "deleted_data_files_count": {
        "int": 0
    },
    "partitions": {
        "array": [{
            "contains_null": false,
            "contains_nan": {
                "boolean": false
            },
            "lower_bound": {
                "bytes": "2"
            },
            "upper_bound": {
                "bytes": "2"
            }
        }, {
            "contains_null": false,
            "contains_nan": {
                "boolean": false
            },
            "lower_bound": {
                "bytes": "b"
            },
            "upper_bound": {
                "bytes": "b"
            }
        }]
    },
    "added_rows_count": {
        "long": 1
    },
    "existing_rows_count": {
        "long": 0
    },
    "deleted_rows_count": {
        "long": 0
    }
} {
    "manifest_path": "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sample/metadata/002e475b-e5b9-485e-a59d-35730a6c9f4e-m0.avro",
    "manifest_length": 6095,
    "partition_spec_id": 0,
    "added_snapshot_id": {
        "long": 3476183237498309505
    },
    "added_data_files_count": {
        "int": 1
    },
    "existing_data_files_count": {
        "int": 0
    },
    "deleted_data_files_count": {
        "int": 0
    },
    "partitions": {
        "array": [{
            "contains_null": false,
            "contains_nan": {
                "boolean": false
            },
            "lower_bound": {
                "bytes": "1"
            },
            "upper_bound": {
                "bytes": "1"
            }
        }]
    },
    "added_rows_count": {
        "long": 1
    },
    "existing_rows_count": {
        "long": 0
    },
    "deleted_rows_count": {
        "long": 0
    }
}

下面查看V2格式下表的格式
创建一张表

CREATE TABLE local.db.sampleV2 ( 
    id bigint, 
    data string, 
    category string) 
USING iceberg 
PARTITIONED BY (category)
TBLPROPERTIES ('format-version'='2'); 

查看表结构文件:

{
  "format-version" : 2,
  "table-uuid" : "5c786a06-aeec-4559-9b3d-79687d82a809",
  "location" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2",
  "last-sequence-number" : 0,
  "last-updated-ms" : 1642173468635,
  "last-column-id" : 3,
  "current-schema-id" : 0,
  "schemas" : [ {
    "type" : "struct",
    "schema-id" : 0,
    "fields" : [ {
      "id" : 1,
      "name" : "id",
      "required" : false,
      "type" : "long"
    }, {
      "id" : 2,
      "name" : "data",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 3,
      "name" : "category",
      "required" : false,
      "type" : "string"
    } ]
  } ],
  "default-spec-id" : 0,
  "partition-specs" : [ {
    "spec-id" : 0,
    "fields" : [ {
      "name" : "category",
      "transform" : "identity",
      "source-id" : 3,
      "field-id" : 1000
    } ]
  } ],
  "last-partition-id" : 1000,
  "default-sort-order-id" : 0,
  "sort-orders" : [ {
    "order-id" : 0,
    "fields" : [ ]
  } ],
  "properties" : {
    "owner" : "liliwei"
  },
  "current-snapshot-id" : -1,
  "snapshots" : [ ],
  "snapshot-log" : [ ],
  "metadata-log" : [ ]
}

插入数据:

insert into local.db.sampleV2 values(1,'a','1');

查看manifest list文件

(base) ➜ metadata tree -l
.
├── 2c7688ff-595a-4d9e-ba36-7fd68e70500f-m0.avro
├── snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro
├── v1.metadata.json
├── v2.metadata.json
└── version-hint.text

0 directories, 5 files
java -jar ~/plat/tools/avro-tools-1.10.2.jar tojson snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro
{
    "manifest_path": "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/2c7688ff-595a-4d9e-ba36-7fd68e70500f-m0.avro",
    "manifest_length": 6833,
    "partition_spec_id": 0,
    "content": 0,
    "sequence_number": 1,
    "min_sequence_number": 1,
    "added_snapshot_id": 1504400791559924261,
    "added_data_files_count": 1,
    "existing_data_files_count": 0,
    "deleted_data_files_count": 0,
    "added_rows_count": 1,
    "existing_rows_count": 0,
    "deleted_rows_count": 0,
    "partitions": {
        "array": [{
            "contains_null": false,
            "contains_nan": {
                "boolean": false
            },
            "lower_bound": {
                "bytes": "1"
            },
            "upper_bound": {
                "bytes": "1"
            }
        }]
    }
}

进行变更

ALTER TABLE local.db.sampleV2 ADD PARTITION FIELD data;

查看目录结构 :

(base) ➜ metadata tree -l
.
├── 2c7688ff-595a-4d9e-ba36-7fd68e70500f-m0.avro
├── snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro
├── v1.metadata.json
├── v2.metadata.json
├── v3.metadata.json
└── version-hint.text

0 directories, 6 files

查看v3内容:

{
  "format-version" : 2,
  "table-uuid" : "5c786a06-aeec-4559-9b3d-79687d82a809",
  "location" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2",
  "last-sequence-number" : 1,
  "last-updated-ms" : 1642176605638,
  "last-column-id" : 3,
  "current-schema-id" : 0,
  "schemas" : [ {
    "type" : "struct",
    "schema-id" : 0,
    "fields" : [ {
      "id" : 1,
      "name" : "id",
      "required" : false,
      "type" : "long"
    }, {
      "id" : 2,
      "name" : "data",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 3,
      "name" : "category",
      "required" : false,
      "type" : "string"
    } ]
  } ],
  "default-spec-id" : 1,
  "partition-specs" : [ {
    "spec-id" : 0,
    "fields" : [ {
      "name" : "category",
      "transform" : "identity",
      "source-id" : 3,
      "field-id" : 1000
    } ]
  }, {
    "spec-id" : 1,
    "fields" : [ {
      "name" : "category",
      "transform" : "identity",
      "source-id" : 3,
      "field-id" : 1000
    }, {
      "name" : "data",
      "transform" : "identity",
      "source-id" : 2,
      "field-id" : 1001
    } ]
  } ],
  "last-partition-id" : 1001,
  "default-sort-order-id" : 0,
  "sort-orders" : [ {
    "order-id" : 0,
    "fields" : [ ]
  } ],
  "properties" : {
    "owner" : "liliwei"
  },
  "current-snapshot-id" : 1504400791559924261,
  "snapshots" : [ {
    "sequence-number" : 1,
    "snapshot-id" : 1504400791559924261,
    "timestamp-ms" : 1642176476606,
    "summary" : {
      "operation" : "append",
      "spark.app.id" : "local-1642173017469",
      "added-data-files" : "1",
      "added-records" : "1",
      "added-files-size" : "874",
      "changed-partition-count" : "1",
      "total-records" : "1",
      "total-files-size" : "874",
      "total-data-files" : "1",
      "total-delete-files" : "0",
      "total-position-deletes" : "0",
      "total-equality-deletes" : "0"
    },
    "manifest-list" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro",
    "schema-id" : 0
  } ],
  "snapshot-log" : [ {
    "timestamp-ms" : 1642176476606,
    "snapshot-id" : 1504400791559924261
  } ],
  "metadata-log" : [ {
    "timestamp-ms" : 1642173468635,
    "metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/v1.metadata.json"
  }, {
    "timestamp-ms" : 1642176476606,
    "metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/v2.metadata.json"
  } ]
}

插入数据:

insert into local.db.sampleV2 values(2,'b','2');

查看目录结构:

(base) ➜ metadata tree -l
.
├── 2c7688ff-595a-4d9e-ba36-7fd68e70500f-m0.avro
├── 3f59d998-6448-4d83-9dcb-5ceb5c5d1f7d-m0.avro
├── snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro
├── snap-506027699712535420-1-3f59d998-6448-4d83-9dcb-5ceb5c5d1f7d.avro
├── v1.metadata.json
├── v2.metadata.json
├── v3.metadata.json
├── v4.metadata.json
└── version-hint.text

0 directories, 9 files

查看v4文件:

{
  "format-version" : 2,
  "table-uuid" : "5c786a06-aeec-4559-9b3d-79687d82a809",
  "location" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2",
  "last-sequence-number" : 2,
  "last-updated-ms" : 1642176734997,
  "last-column-id" : 3,
  "current-schema-id" : 0,
  "schemas" : [ {
    "type" : "struct",
    "schema-id" : 0,
    "fields" : [ {
      "id" : 1,
      "name" : "id",
      "required" : false,
      "type" : "long"
    }, {
      "id" : 2,
      "name" : "data",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 3,
      "name" : "category",
      "required" : false,
      "type" : "string"
    } ]
  } ],
  "default-spec-id" : 1,
  "partition-specs" : [ {
    "spec-id" : 0,
    "fields" : [ {
      "name" : "category",
      "transform" : "identity",
      "source-id" : 3,
      "field-id" : 1000
    } ]
  }, {
    "spec-id" : 1,
    "fields" : [ {
      "name" : "category",
      "transform" : "identity",
      "source-id" : 3,
      "field-id" : 1000
    }, {
      "name" : "data",
      "transform" : "identity",
      "source-id" : 2,
      "field-id" : 1001
    } ]
  } ],
  "last-partition-id" : 1001,
  "default-sort-order-id" : 0,
  "sort-orders" : [ {
    "order-id" : 0,
    "fields" : [ ]
  } ],
  "properties" : {
    "owner" : "liliwei"
  },
  "current-snapshot-id" : 506027699712535420,
  "snapshots" : [ {
    "sequence-number" : 1,
    "snapshot-id" : 1504400791559924261,
    "timestamp-ms" : 1642176476606,
    "summary" : {
      "operation" : "append",
      "spark.app.id" : "local-1642173017469",
      "added-data-files" : "1",
      "added-records" : "1",
      "added-files-size" : "874",
      "changed-partition-count" : "1",
      "total-records" : "1",
      "total-files-size" : "874",
      "total-data-files" : "1",
      "total-delete-files" : "0",
      "total-position-deletes" : "0",
      "total-equality-deletes" : "0"
    },
    "manifest-list" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro",
    "schema-id" : 0
  }, {
    "sequence-number" : 2,
    "snapshot-id" : 506027699712535420,
    "parent-snapshot-id" : 1504400791559924261,
    "timestamp-ms" : 1642176734997,
    "summary" : {
      "operation" : "append",
      "spark.app.id" : "local-1642173017469",
      "added-data-files" : "1",
      "added-records" : "1",
      "added-files-size" : "874",
      "changed-partition-count" : "1",
      "total-records" : "2",
      "total-files-size" : "1748",
      "total-data-files" : "2",
      "total-delete-files" : "0",
      "total-position-deletes" : "0",
      "total-equality-deletes" : "0"
    },
    "manifest-list" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/snap-506027699712535420-1-3f59d998-6448-4d83-9dcb-5ceb5c5d1f7d.avro",
    "schema-id" : 0
  } ],
  "snapshot-log" : [ {
    "timestamp-ms" : 1642176476606,
    "snapshot-id" : 1504400791559924261
  }, {
    "timestamp-ms" : 1642176734997,
    "snapshot-id" : 506027699712535420
  } ],
  "metadata-log" : [ {
    "timestamp-ms" : 1642173468635,
    "metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/v1.metadata.json"
  }, {
    "timestamp-ms" : 1642176476606,
    "metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/v2.metadata.json"
  }, {
    "timestamp-ms" : 1642176605638,
    "metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/v3.metadata.json"
  } ]
}

查看manifest list文件:

{
    "manifest_path": "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/3f59d998-6448-4d83-9dcb-5ceb5c5d1f7d-m0.avro",
    "manifest_length": 7036,
    "partition_spec_id": 1,
    "content": 0,
    "sequence_number": 2,
    "min_sequence_number": 2,
    "added_snapshot_id": 506027699712535420,
    "added_data_files_count": 1,
    "existing_data_files_count": 0,
    "deleted_data_files_count": 0,
    "added_rows_count": 1,
    "existing_rows_count": 0,
    "deleted_rows_count": 0,
    "partitions": {
        "array": [{
            "contains_null": false,
            "contains_nan": {
                "boolean": false
            },
            "lower_bound": {
                "bytes": "2"
            },
            "upper_bound": {
                "bytes": "2"
            }
        }, {
            "contains_null": false,
            "contains_nan": {
                "boolean": false
            },
            "lower_bound": {
                "bytes": "b"
            },
            "upper_bound": {
                "bytes": "b"
            }
        }]
    }
} {
    "manifest_path": "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/2c7688ff-595a-4d9e-ba36-7fd68e70500f-m0.avro",
    "manifest_length": 6833,
    "partition_spec_id": 0,
    "content": 0,
    "sequence_number": 1,
    "min_sequence_number": 1,
    "added_snapshot_id": 1504400791559924261,
    "added_data_files_count": 1,
    "existing_data_files_count": 0,
    "deleted_data_files_count": 0,
    "added_rows_count": 1,
    "existing_rows_count": 0,
    "deleted_rows_count": 0,
    "partitions": {
        "array": [{
            "contains_null": false,
            "contains_nan": {
                "boolean": false
            },
            "lower_bound": {
                "bytes": "1"
            },
            "upper_bound": {
                "bytes": "1"
            }
        }]
    }
}
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容