基于osmpbfreader.h头文件写的存储node的ID、Lon/Lat为dense+Varint形式的文件读写练习
writer.cc
/*
// 这个程序将定义node结构体,
// 将读入的osmpbf文件中的node 存储为结构体,然后保存为二进制文件
To build this file :
g++ -o writer writer.cc -losmpbf -lprotobuf -lz
To run it:
./writer path_to_your_data.osm.pbf path_to_your_write.nd
*/
#include "osmpbfreader.h"
#include <ctime>
#include <algorithm> //for iterator
#include <map>
using namespace CanalTP;
typedef unsigned char byte;
//////////////////////////////////////////////////////////////////
int o5_uvar64buf(byte* bufp,uint64_t v) {
// 将一个uint64_t(无符号的64位整数) 按varient写入 缓冲区 ;
// bufp:缓冲区中当前位置后移
// 返回: 写入的字节数,bufp
byte* p0;
uint64_t frac;
p0= bufp; // 备份当前位置
frac= v&0x7f;
if(frac==v) { // just one byte
*bufp++= frac;
return 1;
}
do {
*bufp++= frac|0x80;
v>>= 7;
frac= v&0x7f;
} while(frac!=v);
*bufp++= frac;
return bufp-p0;
} // end o5_uvar64buf()
int o5_svar64buf(byte* bufp,int64_t v) {
// 将一个 int64_t(有符号的64位整数) 作为有符号的Varint写入 buffer ;
// bufp:缓冲区中当前位置后移
// 返回: 写入的字节数,bufp
byte* p0;
uint64_t u;
uint64_t frac;
p0= bufp;
if(v<0) {
u= -v;
u= (u<<1)-1;
}
else
u= v<<1;
frac= u&0x7f;
if(frac==u) { // just one byte
*bufp++= frac;
return 1;
}
do {
*bufp++= frac|0x80;
u>>= 7;
frac= u&0x7f;
} while(frac!=u);
*bufp++= frac;
return bufp-p0;
} // end o5_svar64()
int o5_svar32buf(byte* bufp,int32_t v) {
// 将一个 int32_t(有符号的32位整数) 作为有符号的Varint写入 buffer ;
// bufp:缓冲区中当前位置后移
// 返回: 写入的字节数,bufp
byte* p0;
uint32_t u;
uint32_t frac;
p0= bufp;
if(v<0) {
u= -v;
u= (u<<1)-1;
}
else
u= v<<1;
frac= u&0x7f;
if(frac==u) { // just one byte
*bufp++= frac;
return 1;
}
do {
*bufp++= frac|0x80;
u>>= 7;
frac= u&0x7f;
} while(frac!=u);
*bufp++= frac;
return bufp-p0;
} // end o5_svar32()
////////////////////////////////////////////
struct Node {
// 定义一个节点结构体
int64_t ID;
// double Lon;
// double Lat;
int32_t Lon;
int32_t Lat; // 小数点右移七位取整
Node() {}
Node(int64_t ID, int32_t Lon, int32_t Lat) : ID(ID), Lon(Lon), Lat(Lat)
{}
};
// 需要定义一个visitor,它应该有三个方法,每当读文件时就会调用
struct Writer {
// Three integers count how many times each object type occurs
uint64_t n_count = 0; // 记录共有多少节点
std::ofstream fout;
byte buf[200000]; // 缓冲区 200KB
byte* bufp = buf; // 初始指向缓冲区的起始位置
int t=0; // 写入缓冲区字节数
Writer(const std::string & filename)
: fout(filename.c_str(), std::ios::binary )
{ // 打开文件
if(!fout.is_open())
fatal() << "Unable to open the file " << filename;
msg() << "writing nodes to the file" << filename;
}
~Writer() {
fout.close();
msg() << "Finished, "<< n_count <<" nodes total.";
}
///////////////////////////////////
int flag = 0;
// int64_t delta_id=0;
// int32_t delta_lon=0;
// int32_t delta_lat=0;
int64_t temp_id = 0;
int32_t temp_lon=0;
int32_t temp_lat=0;
// 每次读到node时都会调用该方法
void node_callback(uint64_t node_id, double lon, double lat, const Tags & tags,const Info & Ninfo){
// n_count++; // n_count 的初始值在函数外定义,以后由条件重置
// Node node(node_id, int32_t(lon * 1E7), int32_t(lat *1E7));
/*
while (flag == 0) {
// std::cout<<"uuuu\n";
temp_id = int64_t(node_id);
temp_lon = int32_t(lon * 1E7);
temp_lat = int32_t(lat * 1E7);
t+= o5_svar64buf(bufp+t,temp_id );
t+= o5_svar32buf(bufp+t, temp_lon );
t+= o5_svar32buf(bufp+t, temp_lat );
flag = 1;
n_count++;
}
*/
if (n_count++<10000 ){ // 10000个节点一组,大约140KB
t+= o5_svar64buf(bufp+t, int64_t(node_id) - temp_id);
t+= o5_svar32buf(bufp+t, int32_t(lon * 1E7) - temp_lon);
t+= o5_svar32buf(bufp+t, int32_t(lat * 1E7) - temp_lat);
temp_id = int64_t(node_id);
temp_lon = int32_t(lon * 1E7);
temp_lat = int32_t(lat * 1E7);
/*
for (int i=0;i<t;i++){
printf("0X%02X ",buf[i]);
}
std::cout<<"\n";
*/
}
else if (n_count++==10001){
std::cout<<t<<std::endl;
fout.write((char*) &t,sizeof(int)); // 写入接下来的数据的大小信息
fout.write((char*) &buf, t ); // 写入数据
}
/*
else if (){ // 达到10000个,将header、buffer写入文件,并重置计数器
fout.write((char*) &t,sizeof(int)); // 写入接下来的数据的大小信息
fout.write((char*) &buf, t ); // 写入数据
n_count = 0; // 重置数量计数器
t = 0; // 重置数据大小计数器
bufp = buf; // 重置缓冲区当前位置指针
}
*/
}
/////////////////
// fout.write((char*) &node, sizeof(Node) );
// This method is called every time a Way is read
// refs is a vector that contains the reference to the nodes that compose the way
void way_callback(uint64_t way_id, const Tags &tags, const std::vector<uint64_t> &refs, const Info & Winfo){
}
// This method is called every time a Relation is read
// refs is a vector of pair corresponding of the relation type (Node, Way, Relation) and the reference to the object
void relation_callback(uint64_t relation_id, const Tags &tags, const References &refs, const Info &Rinfo){
}
};
int main(int argc, char** argv) {
if(argc != 3) {
std::cout << "Usage: " << argv[0] << " file_to_read.osm.pbf file_to_write.nd" << std::endl;
return 1;
}
// Let's read that file !
clock_t start,finish;
double totaltime;
start = clock(); //count running time
Writer writer(argv[2]);
read_osm_pbf(argv[1], writer);
// std::cout << "We read " << writer.nodes << " nodes, " << writer.ways << " ways and " << writer.relations << " relations" << std::endl;
finish = clock(); //finish count time
totaltime = (double)(finish-start)/CLOCKS_PER_SEC;
msg() << "Total time:"<<totaltime<<" seconds."; //"msg" is a structure defined in the header,used to cout an msg.
return 0;
}
reader.cc
#include <fstream>
#include <iostream>
#include <iomanip>
#include <stdint.h> // 定义了几种扩展的整数类型和宏
#include <netinet/in.h> // 提供了network-byte-order的转换函数
typedef unsigned char byte;
int32_t pbf_sint32(byte** pp) {
// 获取有符号整数的值;
// pp: see module header;
byte* p;
int32_t i;
int32_t fac;
int sig;
p= *pp;
i= *p;
if((*p & 0x80)==0) { // 只有一比特
(*pp)++;
if(i & 1) // 负数
return -1-(i>>1);
else
return i>>1;
}
sig= i & 1; // 符号位
i= (i & 0x7e)>>1;
fac= 0x40;
while(*++p & 0x80) { // more byte(s) will follow
i+= (*p & 0x7f)*fac;
fac<<= 7;
}
i+= *p++ *fac;
*pp= p;
if(sig) // 负数
return -1-i;
else
return i;
} // pbf_sint32()
int32_t pbf_sint64(byte** pp) {
// 获取有符号整数的值;
// pp: see module header;
byte* p;
int64_t i;
int64_t fac;
int sig;
p= *pp;
i= *p;
if((*p & 0x80)==0) { // 只有一比特
(*pp)++;
if(i & 1) // 负数
return -1-(i>>1);
else
return i>>1;
}
sig= i & 1; // 符号位
i= (i & 0x7e)>>1;
fac= 0x40;
while(*++p & 0x80) { // more byte(s) will follow
i+= (*p & 0x7f)*fac;
fac<<= 7;
}
i+= *p++ *fac;
*pp= p;
if(sig) // 负数
return -1-i;
else
return i;
} // pbf_sint64()
uint64_t pbf_uint64(byte** pp) {
// 获取无符号整数的值;
// pp: see module header;
byte* p;
uint64_t i;
uint64_t fac;
p= *pp;
i= *p;
if((*p & 0x80)==0) { // just one byte
(*pp)++;
return i;
}
i&= 0x7f;
fac= 0x80;
while(*++p & 0x80) { // more byte(s) will follow
i+= (*p & 0x7f)*fac;
fac<<= 7;
}
i+= *p++ *fac;
*pp= p;
return i;
} // pbf_uint64()
struct Node {
// 定义一个节点结构体
int64_t ID;
int32_t Lon;
int32_t Lat;
Node() {}
Node(int64_t ID, int32_t Lon, int32_t Lat) : ID(ID), Lon(Lon), Lat(Lat)
{}
};
int main(int argc, char** argv) {
if(argc != 2) {
std::cout << "Usage: " << argv[0] << " file_to_read.nd" << std::endl;
return 1;
}
std::ifstream fin(argv[1],std::ios::in);
if (!fin.is_open()) {
std::cout<<"error!\n";
return 1;
}
uint64_t n_count = 0; // 记录共有多少节点
byte buf[200000]; // 缓冲区 200KB
int t=0; // 写入缓冲区字节数
int cnt=0;
int64_t temp_id = 0;
int32_t temp_lon=0;
int32_t temp_lat=0;
while ( fin.read((char*) &t, sizeof(int)) ) { // not the end of file
fin.read((char*) &buf,t); //
buf[t] = 0x00;
// std::cout<<t<<std::endl;
// cnt++;
byte* bufp = buf; // 初始指向缓冲区的起始位置
// for (int i=0;i<t;i++) printf("%02x ",buf[i]);
byte* bufe = buf+t; // buf's end
while (*bufp != 0 ){
cnt++;
int64_t ID = pbf_sint64(&bufp)+temp_id;
int32_t LON = pbf_sint32(&bufp)+temp_lon;
int32_t LAT = pbf_sint32(&bufp)+temp_lat;
printf("ID: %10d, lat/lon: %10.7f/%-10.7f\n",ID,LON * (1E-7),LAT * (1E-7));
temp_id = ID;
temp_lon = LON;
temp_lat = LAT;
/*
std::cout<<pbf_sint64(&bufp)<<" "
<<std::setiosflags(std::ios::fixed)<<std::setprecision(7)<<std::setw(11)<<pbf_sint32(&bufp) * (1E-7)<<" "
<<std::setiosflags(std::ios::fixed)<<std::setprecision(7)<<std::setw(11)<<pbf_sint32(&bufp) * (1E-7)<<std::endl;
*/
}
}
fin.close();
std::cout<<"total number::"<<cnt<<std::endl;
}