在 PHP 解决圣经背诵问题 这一篇中我们使用 PHP 解决了圣经背诵问题,但是程序运行速度不尽人意,一般要3秒或更多的时间才能得出结果。
我们可以使用 time 命令来看一下运行时间 (关于 time 命令的说明见这一篇 Linux 下使用 time 命令查看程序的执行时间)。
结果如下:
首先我们用下面的 shell 命令来初步统计一下单词的数量,这样便于验证结果的正确性。
tr -cs "[:alnum:]" "\n" < bible.txt | sort | uniq -c
我们在圣经中查找 apples ,这个单词出现了 3 次。
time php find-word-in-bible.php apples apples 17125,10 17560,13 17636,42 [3] real 3.85s user 3.82s sys 0.01s
消耗的时间比较多。
为了验证是算法导致,还是 PHP 本身本身导致,决定使用 C 实现同样的代码。
代码如下:
#include <stdio.h> #include <stdlib.h> #include <time.h> #define is_digit_or_letter(c) (((c) >='a' && (c) <= 'z') || ((c) >= '0' && (c) <= '9') || ((c) >= 'A' && (c) <= 'Z')) static void str_copy(char * dest, char * src, unsigned int len){ if(NULL == dest || NULL == src || len < 1){ return; } while(len--){ dest[len] = src[len]; if('\n' == src[len]){ return; } } } unsigned int str_len(char * str){ unsigned int len = 0; if(NULL != str){ while('\0' != *str++){ len++; } } return len; } void find_word_in_buffer( char * word, int word_len, char * start, char * end, int * line, int * words, int * found ){ int j; char c; while(start < end){ c = *start++; if('\r' == c){ if(start < end && '\n' == *start){ start++; } (*line)++; (*words) = 1; } else if('\n' == c){ (*line)++; (*words) = 1; } else if(is_digit_or_letter(c)){ j = 1; while(start < end && j < word_len && (*start == word[j])){ start++; j++; } if(j == word_len && !is_digit_or_letter(*start)){ (*found)++; printf("%d,%d ", *line, *words); } while(start < end && is_digit_or_letter(*start)){ start++; } (*words)++; } } } void find_word_in_bible_all( char * word, int word_len, FILE * bible_file, int * found ){ int line = 1, words = 1; long file_size = 0; char * buffer; fseek (bible_file , 0 , SEEK_END); file_size = ftell(bible_file); rewind (bible_file); buffer = (char *)malloc(file_size); if(NULL == buffer){ printf("Alloc failed"); return; } fread(buffer, file_size, 1, bible_file); find_word_in_buffer( word, word_len, buffer, buffer + file_size, &line, &words, found ); } void find_word_in_bible( char * word, int word_len, FILE * bible_file, int * found, long buffer_size ){ long file_size = 0, read_size = 0; int line = 1 ,words = 1, last_len = 0; char swap[256], * start, *end, *p, *buffer = (char *)malloc(buffer_size); if(NULL == buffer){ printf("Alloc failed"); return; } fseek (bible_file , 0 , SEEK_END); file_size = ftell(bible_file); rewind (bible_file); while(!feof(bible_file)){ end = buffer + buffer_size; if((read_size + buffer_size) > file_size){ end += (read_size - file_size); } fread(buffer, buffer_size, 1, bible_file); start = buffer; while(start <= end && is_digit_or_letter(*start)){ start++; } while(start <= end && !is_digit_or_letter(*start)){ start++; } p = end = buffer + buffer_size; while(p >= start && !is_digit_or_letter(*p)){ p--; } while(p >= start && is_digit_or_letter(*p)){ p--; } p++; str_copy(swap + last_len, buffer, start - buffer); find_word_in_buffer( word, word_len, swap, (char *)((int)swap + last_len + start - buffer), &line, &words, found ); last_len = end - p; if(last_len > 0){ str_copy(swap, p, last_len); } find_word_in_buffer( word, word_len, start, p, &line, &words, found ); } find_word_in_buffer( word, word_len, swap, swap + last_len, &line, &words, found ); } static unsigned int is_valid_word(char * word, int word_len){ char * end = word + word_len; while(word < end){ if(!is_digit_or_letter(*word)){ return 0; } word++; } return 1; } int main(int argc, char ** argv){ char * word = NULL; int word_len, found = 0; long buffer_size = 0; time_t ticks; char * bible_file = "bible.txt"; FILE * file; if(argc < 2){ printf("Parameter 1 (word) should not be empty.\n"); return 1; } if(argc > 2){ buffer_size = atol(argv[2]); } if(buffer_size <= 1024){ buffer_size = 1024; } word = argv[1]; word_len = str_len(word); if(word_len < 1){ printf("\nSearch word should not be empty."); return 2; } if(0 == is_valid_word(word, word_len)){ printf("\nSearch word can just contain char in a-z and 0-9.\n"); return 3; } file = fopen(bible_file, "r"); if(NULL == file){ printf("Can't open bible file at : %s\n", bible_file); return 5; } printf("Find [%s] in bible\n", word); find_word_in_bible(word, word_len, file, &found, buffer_size); fclose(file); ticks = clock(); printf( "\nAll %d found in %d'ms.\n", found, (ticks * 1000) / CLOCKS_PER_SEC ); return 0; }
测试一下:
time ./a.out apples Find [apples] in bible 17140,10 17576,13 17652,42 All 3 found in 50'ms. real 0.05 user 0.05 sys 0.00
不错,竟然能之 50毫秒里面搞定,速度确实快了几个数量级啊。
One Trackback
[…] 薹翮 我悄悄地离开,在昨天;今天我又静静地到来 SEO 查询工具 « 使用 C 语言解决圣经背诵问题 […]