2018-08-13multi-threads(2)

protocol buffer:data format(like json,xml)

#note the nesting structure
example = tf.train.Example(features=tf.train.Features(feature={
            "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[index])),
            'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
        }))
writer.write(example.SerializeToString())  #serialized example to string

read

for serialized_example in tf.python_io.tf_record_iterator("train.tfrecords"):
  # 本段代码来自[TensorFlow高效读取数据]
  example = tf.train.Example()
  # 进行解析
  example.ParseFromString(serialized_example)
  # 逐个读取example对象里封装的东西
  image = example.features.feature['image'].bytes_list.value
  label = example.features.feature['label'].int64_list.value
  # 可以做一些预处理之类的
  print image, labe

note: when the string_input_producer() is called, the queue is still empty util start_queue_runners() is called
cast(data,dtype) is used for translating the type of data

import tensorflow as tf
filenames = ['A.csv', 'B.csv', 'C.csv']
#num_epoch: 设置迭代数
filename_queue = tf.train.string_input_producer(filenames, shuffle=False,num_epochs=3)
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
record_defaults = [['null'], ['null']]
#定义了多种解码器,每个解码器跟一个reader相连
example_list = [tf.decode_csv(value, record_defaults=record_defaults)
                  for _ in range(2)]  # Reader设置为2
# 使用tf.train.batch_join(),可以使用多个reader,并行读取数据。每个Reader使用一个线程。
example_batch, label_batch = tf.train.batch_join(
      example_list, batch_size=1)
#初始化本地变量
init_local_op = tf.initialize_local_variables()
with tf.Session() as sess:
    sess.run(init_local_op)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    try:
        while not coord.should_stop():
            e_val,l_val = sess.run([example_batch,label_batch])
            print e_val,l_val
    except tf.errors.OutOfRangeError:
            print('Epochs Complete!')
    finally:
            coord.request_stop()
    coord.join(threads)
    coord.request_stop()
    coord.join(threads)

multi-reader, multi-threads

import tensorflow as tf
# 生成一个先入先出队列和一个QueueRunner,生成文件名队列
filenames = ['A.csv']
filename_queue = tf.train.string_input_producer(filenames, shuffle=False)
# 定义Reader
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
# 定义Decoder
record_defaults = [[1], [1], [1], [1], [1]]
col1, col2, col3, col4, col5 = tf.decode_csv(value,record_defaults=record_defaults)
features = tf.pack([col1, col2, col3])
label = tf.pack([col4,col5])
example_batch, label_batch = tf.train.shuffle_batch([features,label], batch_size=2, capacity=200, min_after_dequeue=100, num_threads=2)
# 运行Graph
with tf.Session() as sess:
    coord = tf.train.Coordinator()  #创建一个协调器,管理线程
    threads = tf.train.start_queue_runners(coord=coord)  #启动QueueRunner, 此时文件名队列已经进队。
    for i in range(10):
        e_val,l_val = sess.run([example_batch, label_batch])
        print e_val,l_val
    coord.request_stop()
    coord.join(threads)

one-hot encoding

def MyLoop(coord):
  while not coord.should_stop():
    ...do something...
    if ...some condition...:
      coord.request_stop()
 
# Main thread: create a coordinator.
coord = tf.train.Coordinator()
 
# Create 10 threads that run 'MyLoop()'
threads = [threading.Thread(target=MyLoop, args=(coord,)) for i in xrange(10)]
 
# Start the threads and wait for all of them to stop.
for t in threads:
  t.start()
coord.join(threads)
qr = tf.train.QueueRunner(queue, [enqueue_op] * 4)
# Launch the graph.
sess = tf.Session()
# Create a coordinator, launch the queue runner threads.
coord = tf.train.Coordinator()
enqueue_threads = qr.create_threads(sess, coord=coord, start=True)
# Run the training loop, controlling termination with the coordinator.
for step in xrange(1000000):
    if coord.should_stop():
        break
    sess.run(train_op)
# When done, ask the threads to stop.
coord.request_stop()
# And wait for them to actually do it.
coord.join(enqueue_threads)

image.png
©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容

  • 电影《神探》可以说是银河映像第二个十年的扛鼎之作。也再一次印证了杜琪峰和韦家辉的创作组合必出精品。如果说进入200...
    重庆森林cqsenlin阅读 3,508评论 0 0
  • 昨天,黄老师让我们讨论一下“我们选择了学习音乐,但是否是真的热爱音乐”这个话题。 看过陈晓慧同学的...
    柿子大宝阅读 4,937评论 14 12
  • 00后疯狂肆虐 曾经的90后开始变老的时候 我们迷茫尴尬 受不了过去的哥哥早已变成了叔叔 身体里模糊存活着一个带着...
    若向风阅读 1,445评论 0 0

友情链接更多精彩内容