Apache Hudi 表目录结构
记录<u>一部分</u> 表目录结构和文件格式。便于对 Hudi 的设计理念和表的组织格式进行直观的理解。
数据由官网示例运行后产生。
一、COW 表类型组织逻辑
表名字为 stock_ticks_cow
Permission | Owner | Group | Size | Last Modified | Replication | Block Size | Name |
---|---|---|---|---|---|---|---|
drwxr-xr-x | root | supergroup | 0 B | Oct 15 16:20 | 0 | 0 B | .hoodie |
drwxr-xr-x | root | supergroup | 0 B | Oct 15 15:45 | 0 | 0 B | 2018 |
1.1 ?./tablename/.hoodie
Permission | Owner | Group | Size | Last Modified | Replication | Block Size | Name |
---|---|---|---|---|---|---|---|
-rw-r--r-- | root | supergroup | 968 B | Oct 15 15:45 | 3 | 128 MB | 20201015074528.rollback |
-rw-r--r-- | root | supergroup | 0 B | Oct 15 15:45 | 3 | 128 MB | 20201015074528.rollback.inflight |
-rw-r--r-- | root | supergroup | 2.2 KB | Oct 15 15:45 | 3 | 128 MB | 20201015074529.commit |
-rw-r--r-- | root | supergroup | 0 B | Oct 15 15:45 | 3 | 128 MB | 20201015074529.commit.requested |
-rw-r--r-- | root | supergroup | 350 B | Oct 15 15:45 | 3 | 128 MB | 20201015074529.inflight |
-rw-r--r-- | root | supergroup | 2.21 KB | Oct 15 16:20 | 3 | 128 MB | 20201015082021.commit |
-rw-r--r-- | root | supergroup | 0 B | Oct 15 16:20 | 3 | 128 MB | 20201015082021.commit.requested |
-rw-r--r-- | root | supergroup | 1.01 KB | Oct 15 16:20 | 3 | 128 MB | 20201015082021.inflight |
-rw-r--r-- | root | supergroup | 213 B | Oct 15 15:36 | 3 | 128 MB | hoodie.properties |
drwxr-xr-x | root | supergroup | 0 B | Oct 15 15:36 | 0 | 0 B | .aux |
drwxr-xr-x | root | supergroup | 0 B | Oct 15 16:20 | 0 | 0 B | .temp |
drwxr-xr-x | root | supergroup | 0 B | Oct 15 15:36 | 0 | 0 B | archived |
1.1.1 ?.rollback 文件格式
Obj���avro.schemaÚ
{"type":"record","name":"HoodieRollbackMetadata","namespace":"org.apache.hudi.avro.model","fields":[{"name":"startRollbackTime","type":{"type":"string","avro.java.string":"String"}},{"name":"timeTakenInMillis","type":"long"},{"name":"totalFilesDeleted","type":"int"},{"name":"commitsRollback","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}},{"name":"partitionMetadata","type":{"type":"map","values":{"type":"record","name":"HoodieRollbackPartitionMetadata","fields":[{"name":"partitionPath","type":{"type":"string","avro.java.string":"String"}},{"name":"successDeleteFiles","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}},{"name":"failedDeleteFiles","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}}]},"avro.java.string":"String"}},{"name":"version","type":["int","null"],"default":1}]}
1.1.2 ?.rollback.inflight 文件格式
暂无复现场景
1.1.3 ?.commit 文件格式
{
"partitionToWriteStats" : {
"2018/08/31" : [ {
"fileId" : "8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0",
"path" : "2018/08/31/8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-22_20201015074529.parquet",
"prevCommit" : "null",
"numWrites" : 197,
"numDeletes" : 0,
"numUpdateWrites" : 0,
"numInserts" : 197,
"totalWriteBytes" : 443701,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : "2018/08/31",
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 443701
} ]
},
"compacted" : false,
"extraMetadata" : {
"ROLLING_STAT" : "{\n \"partitionToRollingStats\" : {\n \"2018/08/31\" : {\n \"8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0\" : {\n \"fileId\" : \"8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0\",\n \"inserts\" : 197,\n \"upserts\" : 0,\n \"deletes\" : 0,\n \"totalInputWriteBytesToDisk\" : 0,\n \"totalInputWriteBytesOnDisk\" : 443701\n }\n }\n },\n \"actionType\" : \"commit\"\n}",
"schema" : "{\"type\":\"record\",\"name\":\"stock_ticks\",\"fields\":[{\"name\":\"volume\",\"type\":\"long\"},{\"name\":\"ts\",\"type\":\"string\"},{\"name\":\"symbol\",\"type\":\"string\"},{\"name\":\"year\",\"type\":\"int\"},{\"name\":\"month\",\"type\":\"string\"},{\"name\":\"high\",\"type\":\"double\"},{\"name\":\"low\",\"type\":\"double\"},{\"name\":\"key\",\"type\":\"string\"},{\"name\":\"date\",\"type\":\"string\"},{\"name\":\"close\",\"type\":\"double\"},{\"name\":\"open\",\"type\":\"double\"},{\"name\":\"day\",\"type\":\"string\"}]}",
"deltastreamer.checkpoint.key" : "stock_ticks,0:3482"
},
"fileIdAndRelativePaths" : {
"8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0" : "2018/08/31/8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-22_20201015074529.parquet"
},
"totalRecordsDeleted" : 0,
"totalLogRecordsCompacted" : 0,
"totalScanTime" : 0,
"totalCreateTime" : 793,
"totalUpsertTime" : 0,
"totalCompactedRecordsUpdated" : 0,
"totalLogFilesCompacted" : 0,
"totalLogFilesSize" : 0
}
1.1.4 ?.commit.requested 文件格式
暂无复现场景
1.1.5 ?.inflight 文件格式
{
"partitionToWriteStats" : {
"2018/08/31" : [ {
"fileId" : "8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0",
"path" : null,
"prevCommit" : "20201015074529",
"numWrites" : 0,
"numDeletes" : 0,
"numUpdateWrites" : 99,
"numInserts" : 0,
"totalWriteBytes" : 0,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : null,
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 0
} ]
},
"compacted" : false,
"extraMetadata" : { },
"totalScanTime" : 0,
"totalCreateTime" : 0,
"totalUpsertTime" : 0,
"totalCompactedRecordsUpdated" : 0,
"totalLogFilesCompacted" : 0,
"totalLogFilesSize" : 0,
"fileIdAndRelativePaths" : {
"8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0" : null
},
"totalRecordsDeleted" : 0,
"totalLogRecordsCompacted" : 0
}
1.1.6 hoodie.properties
#Properties saved on Thu Oct 15 07:36:26 UTC 2020
#Thu Oct 15 07:36:26 UTC 2020
hoodie.table.name=stock_ticks_cow
hoodie.archivelog.folder=archived
hoodie.table.type=COPY_ON_WRITE
hoodie.timeline.layout.version=1
1.2 ?/tablename/part-n/pn-n/pn-n-n...
Permission | Owner | Group | Size | Last Modified | Replication | Block Size | Name |
---|---|---|---|---|---|---|---|
-rw-r--r-- | root | supergroup | 93 B | Oct 15 15:45 | 3 | 128 MB | .hoodie_partition_metadata |
-rw-r--r-- | root | supergroup | 433.3 KB | Oct 15 15:45 | 3 | 128 MB | 8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-22_20201015074529.parquet |
-rw-r--r-- | root | supergroup | 433.01 KB | Oct 15 16:20 | 3 | 128 MB | 8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-25_20201015082021.parquet |
1.2.1 .hoodie_partition_metadata 文件格式
#partition metadata
#Thu Oct 15 07:45:31 UTC 2020
commitTime=20201015074529
partitionDepth=3
1.2.2 ?.parquet 文件格式
5041 5231 1504 1524 154c 4c15 0215 0400
001f 8b08 0000 0000 0000 00e3 6360 6030
3230 3230 3430 3435 3037 3135 b204 0078
5454 0112 0000 0015 0015 1415 382c 158a
0315 0415 0615 081c 180e 3230 3230 3130
3135 3037 3435 3239 180e 3230 3230 3130
3135 3037 3435 3239 1600 280e 3230 3230
3130 3135 3037 3435 3239 180e 3230 3230
......
二、MOR 类型表目录结构
表名字为 stock_ticks_mor
Permission | Owner | Group | Size | Last Modified | Replication | Block Size | Name |
---|---|---|---|---|---|---|---|
drwxr-xr-x | root | supergroup | 0 B | Oct 15 16:20 | 0 | 0 B | .hoodie |
drwxr-xr-x | root | supergroup | 0 B | Oct 15 15:45 | 0 | 0 B | 2018 |
2.1 ?/tablename/.hoodie
Permission | Owner | Group | Size | Last Modified | Replication | Block Size | Name |
---|---|---|---|---|---|---|---|
-rw-r--r-- | root | supergroup | 968 B | Oct 15 15:45 | 3 | 128 MB | 20201015074553.rollback |
-rw-r--r-- | root | supergroup | 0 B | Oct 15 15:45 | 3 | 128 MB | 20201015074553.rollback.inflight |
-rw-r--r-- | root | supergroup | 2.21 KB | Oct 15 15:45 | 3 | 128 MB | 20201015074554.deltacommit |
-rw-r--r-- | root | supergroup | 350 B | Oct 15 15:45 | 3 | 128 MB | 20201015074554.deltacommit.inflight |
-rw-r--r-- | root | supergroup | 0 B | Oct 15 15:45 | 3 | 128 MB | 20201015074554.deltacommit.requested |
-rw-r--r-- | root | supergroup | 2.26 KB | Oct 15 16:20 | 3 | 128 MB | 20201015082051.deltacommit |
-rw-r--r-- | root | supergroup | 1.01 KB | Oct 15 16:20 | 3 | 128 MB | 20201015082051.deltacommit.inflight |
-rw-r--r-- | root | supergroup | 0 B | Oct 15 16:20 | 3 | 128 MB | 20201015082051.deltacommit.requested |
-rw-r--r-- | root | supergroup | 305 B | Oct 15 15:37 | 3 | 128 MB | hoodie.properties |
drwxr-xr-x | root | supergroup | 0 B | Oct 15 15:37 | 0 | 0 B | .aux |
drwxr-xr-x | root | supergroup | 0 B | Oct 15 15:45 | 0 | 0 B | .temp |
drwxr-xr-x | root | supergroup | 0 B | Oct 15 15:37 | 0 | 0 B | archived |
2.1.1 *.rollback 文件格式
Obj���avro.schemaÚ
{"type":"record","name":"HoodieRollbackMetadata","namespace":"org.apache.hudi.avro.model","fields":[{"name":"startRollbackTime","type":{"type":"string","avro.java.string":"String"}},{"name":"timeTakenInMillis","type":"long"},{"name":"totalFilesDeleted","type":"int"},{"name":"commitsRollback","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}},{"name":"partitionMetadata","type":{"type":"map","values":{"type":"record","name":"HoodieRollbackPartitionMetadata","fields":[{"name":"partitionPath","type":{"type":"string","avro.java.string":"String"}},{"name":"successDeleteFiles","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}},{"name":"failedDeleteFiles","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}}]},"avro.java.string":"String"}},{"name":"version","type":["int","null"],"default":1}]}
2.1.2 *.rollback.inflight 文件格式
暂无场景复现
2.1.3 *.deltacommit 文件格式
{
"partitionToWriteStats" : {
"2018/08/31" : [ {
"fileId" : "c7922a25-5d97-4add-8580-127fd14aa494-0",
"path" : "2018/08/31/c7922a25-5d97-4add-8580-127fd14aa494-0_0-22-22_20201015074554.parquet",
"prevCommit" : "null",
"numWrites" : 197,
"numDeletes" : 0,
"numUpdateWrites" : 0,
"numInserts" : 197,
"totalWriteBytes" : 443699,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : "2018/08/31",
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 443699
} ]
},
"compacted" : false,
"extraMetadata" : {
"ROLLING_STAT" : "{\n \"partitionToRollingStats\" : {\n \"2018/08/31\" : {\n \"c7922a25-5d97-4add-8580-127fd14aa494-0\" : {\n \"fileId\" : \"c7922a25-5d97-4add-8580-127fd14aa494-0\",\n \"inserts\" : 197,\n \"upserts\" : 0,\n \"deletes\" : 0,\n \"totalInputWriteBytesToDisk\" : 0,\n \"totalInputWriteBytesOnDisk\" : 443699\n }\n }\n },\n \"actionType\" : \"deltacommit\"\n}",
"schema" : "{\"type\":\"record\",\"name\":\"stock_ticks\",\"fields\":[{\"name\":\"volume\",\"type\":\"long\"},{\"name\":\"ts\",\"type\":\"string\"},{\"name\":\"symbol\",\"type\":\"string\"},{\"name\":\"year\",\"type\":\"int\"},{\"name\":\"month\",\"type\":\"string\"},{\"name\":\"high\",\"type\":\"double\"},{\"name\":\"low\",\"type\":\"double\"},{\"name\":\"key\",\"type\":\"string\"},{\"name\":\"date\",\"type\":\"string\"},{\"name\":\"close\",\"type\":\"double\"},{\"name\":\"open\",\"type\":\"double\"},{\"name\":\"day\",\"type\":\"string\"}]}",
"deltastreamer.checkpoint.key" : "stock_ticks,0:3482"
},
"fileIdAndRelativePaths" : {
"c7922a25-5d97-4add-8580-127fd14aa494-0" : "2018/08/31/c7922a25-5d97-4add-8580-127fd14aa494-0_0-22-22_20201015074554.parquet"
},
"totalRecordsDeleted" : 0,
"totalLogRecordsCompacted" : 0,
"totalScanTime" : 0,
"totalCreateTime" : 1280,
"totalUpsertTime" : 0,
"totalCompactedRecordsUpdated" : 0,
"totalLogFilesCompacted" : 0,
"totalLogFilesSize" : 0
}
2.1.4 *.deltacommit.inflight 文件格式
{
"partitionToWriteStats" : { },
"compacted" : false,
"extraMetadata" : { },
"fileIdAndRelativePaths" : { },
"totalRecordsDeleted" : 0,
"totalLogRecordsCompacted" : 0,
"totalScanTime" : 0,
"totalCreateTime" : 0,
"totalUpsertTime" : 0,
"totalCompactedRecordsUpdated" : 0,
"totalLogFilesCompacted" : 0,
"totalLogFilesSize" : 0
}
2.1.5 *.deltacommit.requested 文件格式
暂无复现场景
2.1.6 hoodie.properties 文件格式
#Properties saved on Thu Oct 15 07:37:05 UTC 2020
#Thu Oct 15 07:37:05 UTC 2020
hoodie.compaction.payload.class=org.apache.hudi.common.model.OverwriteWithLatestAvroPayload
hoodie.table.name=stock_ticks_mor
hoodie.archivelog.folder=archived
hoodie.table.type=MERGE_ON_READ
hoodie.timeline.layout.version=1
2.2 ?/${tablename}/分区n/分区n-n/分区n-n-n/..
| Permission | Owner | Group | Size | Last Modified | Replication | Block Size | Name |
| :--------- | :---- | :--------- | :------- | :------------ | :---------- | :--------- | :----------------------------------------------------------- | :--- |
| -rw-r--r-- | root | supergroup | 21.04 KB | Oct 15 16:20 | 3 | 512 MB | .c7922a25-5d97-4add-8580-127fd14aa494-0_20201015074554.log.1_0-22-25 |
| -rw-r--r-- | root | supergroup | 93 B | Oct 15 15:45 | 3 | 128 MB | .hoodie_partition_metadata |
| -rw-r--r-- | root | supergroup | 433.3 KB | Oct 15 15:45 | 3 | 128 MB | c7922a25-5d97-4add-8580-127fd14aa494-0_0-22-22_20201015074554.parquet |
2.2.1 ?.log.? 文件格式
2348 5544 4923 0000 0000 0000 541e 0000
0001 0000 0003 0000 0002 0000 0000 0000
000e 3230 3230 3130 3135 3038 3230 3531
......
2.2.2 .hoodie_partition_metadata 文件格式
#partition metadata
#Thu Oct 15 07:45:56 UTC 2020
commitTime=20201015074554
partitionDepth=3
2.2.3 ?.parquet 文件格式
5041 5231 1504 1524 154c 4c15 0215 0400
001f 8b08 0000 0000 0000 00e3 6360 6030
3230 3230 3430 3435 3037 3135 3501 0002
bea4 3012 0000 0015 0015 1415 382c 158a
......