#define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #define PAGESIZE 8192 const char* filename = "./direct_io.data"; const int one_gb_bytes = 1024 * 1024 * 1024; const char* dummy_file = "a_1GB_dummy_file"; static void prepare_test_file(const char* filename, int file_size); static void read_file_seq(int fd, int blocks); static double get_fs_cache_latency_us(const char* filename); static double get_seq_page_latency(const char* filename); static double get_random_page_latency(const char* filename); static double cal_real_lat(double cache_hit_ratio, double per_io_lat, double per_cache_lat); static void cal_random_page_cost(double seq_read_lat, double random_page_lat, double fs_cache_lat, double cache_hit_ratio); static void invalidate_device_cache(); static void auto_gather(); int main(int argc, char *argv[]) { auto_gather(); return 0; } static void auto_gather() { double fs_cache_lat, seq_read_lat, random_page_lat; int i; double cache_hit_ratio[] = { 1, 0.9, 0.5, 0.1, 0, -1 }; prepare_test_file(filename, one_gb_bytes); /* Test file system cache buffer latency */ fs_cache_lat = get_fs_cache_latency_us(filename); /* see comments in invalidate_device_cache */ invalidate_device_cache(); seq_read_lat = get_seq_page_latency(filename); invalidate_device_cache(); random_page_lat = get_random_page_latency(filename); printf("fs_cache_lat = %fus, seq_read_lat = %fus, random_page_lat = %fus\n\n", fs_cache_lat, seq_read_lat, random_page_lat); for(i = 0; cache_hit_ratio[i] != -1; i++) { cal_random_page_cost(seq_read_lat, random_page_lat, fs_cache_lat, cache_hit_ratio[i]); } } /* * Now, user need to provide a cache_hit_ratio to calculate random_page_cost. */ static void cal_random_page_cost(double seq_read_lat, double random_page_lat, double fs_cache_lat, double cache_hit_ratio) { double real_seqscan_lat, real_random_lat; /* File System Cache impacts on seq read as well */ real_seqscan_lat = cal_real_lat(cache_hit_ratio, seq_read_lat, fs_cache_lat); real_random_lat = cal_real_lat(cache_hit_ratio, random_page_lat, fs_cache_lat); printf("cache hit ratio: %f random_page_cost %f\n", cache_hit_ratio, real_random_lat / real_seqscan_lat); return; } static double cal_real_lat(double cache_hit_ratio, double per_io_lat, double per_cache_lat) { double cache_lat, disk_lat; cache_lat = cache_hit_ratio * per_cache_lat; disk_lat = (1 - cache_hit_ratio) * per_io_lat; return cache_lat + disk_lat; } static void prepare_test_file(const char* filename, int file_size) { int fd; int ret; unsigned char *buf; int current_size = 0; ret = posix_memalign((void **)&buf, 512, PAGESIZE); if (ret) { perror("posix_memalign failed"); exit(1); } /* * XXX: Looks if using the O_DIRECT option to prepare the * test_data_file have big impacts on the get_seq_page_latency. * Looks there are some cache in this steps can be invalidated * via invalidate_device_cache & drop_fs_cache. I don't know what * it is. */ fd = open(filename, O_WRONLY | O_DIRECT | O_CREAT, 0644); if (fd < 0) { perror("open ./direct_io.data failed"); exit(1); } do { ret = write(fd, buf, PAGESIZE); if (ret < 0) { perror("write ./direct_io.data failed"); } current_size += ret; } while (current_size < file_size); close(fd); } static double get_time_interval(struct timeval *start_tm, struct timeval *end_tm) { return (end_tm->tv_sec - start_tm->tv_sec) * 1000000 + (end_tm->tv_usec - start_tm->tv_usec); } static void read_file_seq(int fd, int blocks) { char buf[PAGESIZE]; long total_size = blocks * PAGESIZE; long current_size = 0; int ret; lseek(fd, 0,SEEK_SET); do { ret = read(fd, buf, PAGESIZE); if (ret < 0) { perror("read file seq failed."); exit(1); } else if (ret == 0) { perror("eof"); } current_size += ret; } while(current_size < total_size); } static double get_fs_cache_latency_us(const char* filename) { int i; int fd; int warmup_blocks = 16; struct timeval start_tm, end_tm; int loops = 10000; fd = open(filename, O_RDONLY, 0755); if (fd < 0) { perror("open ./direct_io.data failed"); exit(1); } /* read twice so that the blocks will be in the file system cache */ read_file_seq(fd, warmup_blocks); read_file_seq(fd, warmup_blocks); gettimeofday(&start_tm, NULL); for(i = 0; i < loops; i++) { read_file_seq(fd, warmup_blocks); } gettimeofday(&end_tm, NULL); close(fd); return get_time_interval(&start_tm, &end_tm) / loops / warmup_blocks; } static void drop_fs_cache() { int fd, ret; char* data = "3"; sync(); fd = open("/proc/sys/vm/drop_caches", O_WRONLY); if (fd < 0) { perror("Open drop_cache for write failed"); exit(1); } ret = write(fd, data, sizeof(char)); if (ret < 0) { perror("drop fs cache failed"); exit(1); } close(fd); } static double get_seq_page_latency(const char* filename) { int fd; struct timeval start_tm, end_tm; int ret; char buf[PAGESIZE]; long current_size = 0, total_size = one_gb_bytes; fd = open(filename, O_RDONLY, 0644); if (fd < 0) { perror("open ./direct_io.data failed"); exit(1); } drop_fs_cache(); gettimeofday(&start_tm, NULL); /* * We have to read the file just once, or else the IO will hint file * system buffer */ do { ret = read(fd, buf, PAGESIZE); if (ret < 0) { perror("read file seq failed."); exit(1); } if (ret == 0) { perror("eof"); exit(1); } current_size += ret; } while(current_size < total_size); gettimeofday(&end_tm, NULL); close(fd); return get_time_interval(&start_tm, &end_tm) / (total_size / PAGESIZE); } /* * Even we use O_DIRECT option, it just can bypass the file system cache * and the device cache like in RAID controller still impacts on the test. * I invalidate the cache by *write* and *read* a dummy file whose * size should be bigger than device cache. Per testing, only read a dummy * file doesn't work. */ static void invalidate_device_cache() { int fd, ret; int current_size = 0; unsigned char* buf; prepare_test_file(dummy_file, one_gb_bytes); ret = posix_memalign((void **)&buf, 512, PAGESIZE); if (ret) { perror("posix_memalign failed"); exit(1); } /* * Use O_DIRECT so that the read are really issued to device and take * up the device cache */ fd = open(dummy_file, O_RDONLY | O_DIRECT, 0644); if (fd < 0) { perror("Open dummy file failed."); exit(1); } do { ret = read(fd, buf, PAGESIZE); if (ret < 0) { perror("READ dummy file failed."); exit(2); } else if (ret == 0) { perror("unexpected eof"); exit(1); } current_size += ret; } while (current_size < one_gb_bytes); } static double get_random_page_latency(const char* filename) { int fd; struct timeval start_tm, end_tm; int ret; unsigned char* buf; long current_size = 0, total_size = one_gb_bytes; off_t offset; int blocks = one_gb_bytes / PAGESIZE; ret = posix_memalign((void **)&buf, 512, PAGESIZE); if (ret) { perror("posix_memalign failed"); exit(1); } fd = open(filename, O_RDONLY | O_DIRECT, 0644); if (fd < 0) { perror("open ./direct_io.data failed"); exit(1); } gettimeofday(&start_tm, NULL); do { offset = rand() % blocks * PAGESIZE; ret = pread(fd, buf, PAGESIZE, offset); if (ret < 0) { perror("read file seq failed."); exit(1); } current_size += ret; } while(current_size < total_size); gettimeofday(&end_tm, NULL); close(fd); return get_time_interval(&start_tm, &end_tm) / blocks; }