实验环境介绍
- gcc:4.8.5
- glibc:glibc-2.17-222.el7.x86_64
- os:Centos7.4
- kernel:3.10.0-693.21.1.el7.x86_64
本章描述的函数被称为不带缓冲的I/O(将与第5章的标准I/O函数对照),不带缓冲的I/O函数不是ISO C的组成部分。是POSIX.1和Single Unix specification的组成部分。进一步讨论多个进程间如何共享文件。
文件描述符
当打开或者创建一个新文件时,内核向进程返回一个文件描述符,通过这个文件描述符对文件进行读写。
open和openat函数
- open函数的几个参数(后面后面使用代码详细测试):
[图片上传失败...(image-46bae1-1530629898456)] - 由open和openat返回的一定是最小的未使用的描述符数值。如:可以先关闭标准输出,然后使用标准输出(描述符为1)来打开另外一个文件,代码如下:
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#define FILE_PATH "/root/open.tmp"
int
main(int argc, char *argv[])
{
close(STDOUT_FILENO);
int fd = open(FILE_PATH, O_RDWR | O_CREAT | O_TRUNC, 0666);
if (-1 != fd) {
fprintf(stderr, "fd of opening %s is: %d\n", FILE_PATH, fd);
close(fd);
} else {
fprintf(stderr, "open file error!!\n");
}
exit(EXIT_SUCCESS);
}
result:
[root@localhost part_3]# ./3_3
fd of opening /root/open.tmp is: 1
- openat解决的问题:
- 让线程可以使用相对路径名打开目录中的文件,而不再只能打开当前目录工作目录。因为线程共享相同的当前工作目录,因此很难让同一进程的多个不同线程在同一时间工作在不同的目录中
- 避免TOCTTOU错误:如果有两个基于文件的函数调用,其中第二个调用依赖于第一个调用的结果,两个调用不是原子操作,可能两个操作之间文件改变了。导致结果错误。比如一个进程打开一个目录中的文件,如果这个目录发生变化(卸载之类的),此时如果继续读写就会和预期不一致,他可能会创建一个新的文件来进行操作。
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/types.h>
#include <dirent.h>
#define FILE_PATH "/root/open.tmp"
#define FILE_DIR "/root"
#define FILENAME "open.tmp"
#define CUR_FILE_PATH "/root/source/part_3/open.tmp"
#define CUR_DIR "/root/source/part_3"
int
main(int argc, char *argv[])
{
// 1.open和openat的第一种情况:都打开其他路径下的一个文件,使用绝对路径
// 1.1使用open
int open_fd = open(FILE_PATH, O_RDWR | O_CREAT | O_TRUNC, 0666);
if (open_fd < 0) {
printf("open open %s error\n", FILE_PATH);
} else {
printf("open open %s fd is: %d\n", FILE_PATH, open_fd);
close(open_fd);
}
// 1.2 使用openat
DIR *d = opendir(FILE_DIR);
int dir_fd = dirfd(d);
int openat_fd = openat(dir_fd, FILE_PATH, O_RDWR | O_CREAT | O_TRUNC, 0666);
if (openat_fd < 0) {
printf("openat open %s error\n", FILE_PATH);
} else {
printf("openat open %s fd is: %d\n", FILE_PATH, openat_fd);
close(openat_fd);
}
// 2.open和openat的第一种情况:都打开其他路径下的一个文件,使用相对路径
// 2.1使用open
open_fd = open(FILE_PATH, O_RDWR | O_CREAT | O_TRUNC, 0666);
if (open_fd < 0) {
printf("open open %s error\n", FILE_PATH);
} else {
printf("open open %s fd is: %d\n", FILE_PATH, open_fd);
close(open_fd);
}
// 2.2 使用openat
d = opendir(FILE_DIR);
dir_fd = dirfd(d);
openat_fd = openat(dir_fd, FILENAME, O_RDWR | O_CREAT | O_TRUNC, 0666);
if (openat_fd < 0) {
printf("openat open %s error\n", FILE_PATH);
} else {
printf("openat open %s fd is: %d\n", FILE_PATH, openat_fd);
close(openat_fd);
}
// 3.open和openat的第一种情况:都打开其他路径下的一个文件,使用相对路径,但openat使用AT_FDCWD特殊字符
// 3.1使用open
open_fd = open(CUR_FILE_PATH, O_RDWR | O_CREAT | O_TRUNC, 0666);
if (open_fd < 0) {
printf("open open %s error\n", CUR_FILE_PATH);
} else {
printf("open open %s fd is: %d\n", CUR_FILE_PATH, open_fd);
close(open_fd);
}
// 3.2 使用openat,使用相对路径,但openat使用AT_FDCWD特殊字符
d = opendir(FILE_DIR);
dir_fd = dirfd(d);
openat_fd = openat(AT_FDCWD, FILENAME, O_RDWR | O_CREAT | O_TRUNC, 0666);
if (openat_fd < 0) {
printf("openat open %s error\n", CUR_FILE_PATH);
} else {
printf("openat open %s fd is: %d\n", CUR_FILE_PATH, openat_fd);
close(openat_fd);
}
exit(EXIT_SUCCESS);
}
result:
[root@localhost part_3]# ./3_4
open open /root/open.tmp fd is: 3
openat open /root/open.tmp fd is: 4
open open /root/open.tmp fd is: 4
openat open /root/open.tmp fd is: 5
open open /root/source/part_3/open.tmp fd is: 5
openat open /root/source/part_3/open.tmp fd is: 6
creat函数
吐槽一哈,当时写这个函数的人,把这个单词写错了
- 这个creat创建函数有局限性,所以还不如用open和openat函数
close函数
- 关闭一个文件描述符会释放在该文件上的记录锁。进程终止时会自动关闭所有的文件
lseek函数
- 如果文件描述符指向的是一个管道、fifo或者网络套接字,则不能设置偏移(是否能设置偏移在于文件描述符指向的文件是否能够被设置偏移),还得注意某些文件是支持负的偏移量的
- 测试标注你输入能否设置偏移量
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
int
main(int argc, char *argv[])
{
if (lseek(STDIN_FILENO, 0, SEEK_CUR) == -1)
printf("cannot set STDIN_FILENO seek\n");
else
printf("set STDIN_FILENO seek OK\n");
exit(EXIT_SUCCESS);
}
result:
// 这个标准输入指向的是bash
[root@localhost part_3]# ./3_6
cannot set STDIN_FILENO seek
// 这个标准输入指向的是/etc/passwd
[root@localhost part_3]# ./3_6 < /etc/passwd
set STDIN_FILENO seek OK
// 这个标准输入指向的是./open.tmp .一个普通的文本文件
[root@localhost part_3]# ./3_6 < ./open.tmp
set STDIN_FILENO seek OK
// bash的管道作为标注输入
[root@localhost part_3]# cat /etc/passwd | ./3_6
cannot set STDIN_FILENO seek
// 管道文件作为标准输入,运行程序后,往管道中输入一些数据
[root@localhost part_3]# ./3_6 < ./fifo.tmp
[root@localhost part_3]# echo sdf > fifo.tmp
cannot set STDIN_FILENO seek
- 设置偏移量大于文件长度,这样会创建一个空洞文件(第4章我们再进一步说明空洞文件)
#include <fcntl.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
char buf1[] = "abcdefghij";
char buf2[] = "ABCDEFGHIJ";
int
main(int argc, char *argv[])
{
int fd;
if ((fd = creat("file.hole", 0666)) < 0)
printf("create file.hole error\n");
if (write(fd, buf1, 10) != 10)
printf("write buf1 error\n");
/* now the offset of the file = 10 */
if (lseek(fd, 20, SEEK_SET) == -1)
printf("lseek the file.hole error\n");
/* now the offset of the file = 20 */
if (write(fd, buf2, 10) != 10)
printf("write buf2 error\n");
/* now the offset of the file = 30 */
exit(EXIT_SUCCESS);
}
result:
[root@localhost part_3]# od -c file.hole
0000000 a b c d e f g h i j \0 \0 \0 \0 \0 \0
0000020 \0 \0 \0 \0 A B C D E F G H I J
0000036
read函数
- 有多重情况可使实际读到的字节数少于要求读的字节数
- 读普通文件,在读到要求字节数之前已经达到了文件文件尾端。如:在到达文件尾端之前有30个字节,要求读100字节,则read返回30,下次在调用read时,他将返回0(文件尾端)
- 测试代码如下:
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
#include <errno.h>
#define FILE_PATH "/root/source/part_3/open.tmp"
int
main(int argc, char *argv[])
{
int fd = open(FILE_PATH, O_RDWR, 0666);
if (-1 != fd) {
fprintf(stderr, "fd of opening %s is: %d\n", FILE_PATH, fd);
} else {
fprintf(stderr, "open file error!!\n");
}
off_t off = lseek(fd, 0, SEEK_SET);
if (off < 0)
perror("lseek error");
#if (defined(_SC_V7_LP64_OFF64) || (defined(_SC_V7_LP64_OFF64BIG)))
else
printf("set lseek = %ld\n", off);
#else
else
printf("set lseek = %d\n", off);
#endif
char buffer[100] = {0};
size_t read_size = 100;
if (read_size > SSIZE_MAX)
read_size = SSIZE_MAX;
ssize_t size = read(fd, buffer, read_size);
if (size <= 0) {
perror("first reading error");
} else {
printf("first reading successfully, size = %ld\n", size);
}
char buffer2[100] = {0};
size = read(fd, buffer2, read_size);
if (size <= 0) {
perror("second reading error");
} else {
printf("second reading successfully, size = %ld\n", size);
}
close(fd);
exit(EXIT_SUCCESS);
}
result:
[root@localhost part_3]# ./3_8
fd of opening /root/source/part_3/open.tmp is: 3
set lseek = 0
first reading successfully, size = 10
second reading error: Success
函数write
- write的出错的常见原因是因为文件已经写满,就或者超过了一个给定进程的文件长度限制
I/O的效率
- 以下代码从标准输入读,写到标准输出,但是这个BUFFSIZE如何选取,需要做实验来验证,apue给出了一个结果,如下图:
[图片上传失败...(image-a59ad6-1530629898456)]
#include <stdio.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>
#include <sys/types.h>
#define BUFFSIZE 4096
int
main(int argc, char *argvp[])
{
int n;
char buf[BUFFSIZE];
while ((n = read(STDIN_FILENO, buf, BUFFSIZE)) > 0)
if (write(STDOUT_FILENO, buf, n) != n)
printf("write error\n");
if (n < 0)
printf("read error\n");
exit(EXIT_SUCCESS);
}
- 我将以以下代码做一个实验,运行之前先准备一个516581760 bytes大小的文件(注:这里咱不考虑gcc优化,文件中的数据不能保证随机)
比如使用:
dd if=/dev/zero of=/tmp/test bs=516581760 count=1
#include <stdio.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#define BYTES (1 * 516581760L)
// #define BYTES (1 * 2)
#define LOOP_COUNT(buffer_size) (BYTES / buffer_size)
#define FILE_PATH "/tmp/test"
#define FILE_PATH2 "/tmp/test2"
void cal_func_time(void (*func)(int), int arg);
void test_buffer(int size);
int
main(int argc, char *argv[])
{
for (int i = 20; i > 0; i--) {
cal_func_time(test_buffer, 1 << (i - 1) );
}
return 0;
}
void
cal_func_time(void (*func)(int), int arg)
{
int sc_clk_tck;
sc_clk_tck = sysconf(_SC_CLK_TCK);
struct tms begin_tms, end_tms;
clock_t begin, end;
begin = times(&begin_tms);
// do func
func(arg);
end = times(&end_tms);
printf("real time: %lf\n", (end - begin) / (double)sc_clk_tck);
printf("user time: %lf\n",
(end_tms.tms_utime - begin_tms.tms_utime) / (double)sc_clk_tck);
printf("sys time: %lf\n",
(end_tms.tms_stime - begin_tms.tms_stime) / (double)sc_clk_tck);
// printf("child user time: %lf\n",
// (end_tms.tms_cutime - begin_tms.tms_cutime) / (double)sc_clk_tck);
// printf("child sys time: %lf\n",
// (end_tms.tms_cstime - begin_tms.tms_cstime) / (double)sc_clk_tck);
}
void
test_buffer(int size)
{
ssize_t n;
char buf[size];
printf("++++++++++++++++++++++++\n");
printf("data_sum = %ld, buf_size = %d, loop = %ld\n", BYTES, size, LOOP_COUNT(size));
int open_fd = open(FILE_PATH, O_RDONLY);
if (open_fd < 0) {
printf("open %s error\n", FILE_PATH);
return;
}
int open_fd2 = open(FILE_PATH2, O_RDWR | O_CREAT | O_TRUNC, 0666);
if (open_fd2 < 0) {
printf("open open %s error\n", FILE_PATH2);
return;
}
for (int i = 0; i < LOOP_COUNT(size); i++) {
n = read(open_fd, buf, size);
if ( n <= 0 ) {
printf("read error\n");
break;
}
else if (write(open_fd2, buf, n) != n) {
printf("write error\n");
break;
}
}
close(open_fd);
close(open_fd2);
}
centos7上xfs文件系统测试result:
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 524288, loop = 985
real time: 15.610000
user time: 0.000000
sys time: 1.070000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 262144, loop = 1970
real time: 12.570000
user time: 0.000000
sys time: 0.910000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 131072, loop = 3941
real time: 11.530000
user time: 0.010000
sys time: 0.740000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 65536, loop = 7882
real time: 9.700000
user time: 0.010000
sys time: 0.790000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 32768, loop = 15764
real time: 8.320000
user time: 0.000000
sys time: 0.700000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 16384, loop = 31529
real time: 10.860000
user time: 0.020000
sys time: 0.800000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 8192, loop = 63059
real time: 13.040000
user time: 0.020000
sys time: 0.930000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 4096, loop = 126118
real time: 10.390000
user time: 0.050000
sys time: 1.050000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 2048, loop = 252237
real time: 16.830000
user time: 0.100000
sys time: 1.490000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 1024, loop = 504474
real time: 34.600000
user time: 0.320000
sys time: 5.280000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 512, loop = 1008948
real time: 24.540000
user time: 0.410000
sys time: 3.280000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 256, loop = 2017897
real time: 30.740000
user time: 0.650000
sys time: 5.560000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 128, loop = 4035795
real time: 27.330000
user time: 1.290000
sys time: 9.530000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 64, loop = 8071590
real time: 29.070000
user time: 2.230000
sys time: 16.970000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 32, loop = 16143180
real time: 43.730000
user time: 4.260000
sys time: 31.630000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 16, loop = 32286360
real time: 72.980000
user time: 8.350000
sys time: 62.530000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 8, loop = 64572720
real time: 141.430000
user time: 16.640000
sys time: 124.310000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 4, loop = 129145440
real time: 280.300000
user time: 32.960000
sys time: 245.860000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 2, loop = 258290880
real time: 553.320000
user time: 65.580000
sys time: 485.670000
++++++++++++++++++++++++
data_sum = 516581760, buf_size = 1, loop = 516581760
real time: 1108.320000
user time: 130.450000
sys time: 976.340000