我正在尝试创建一个C ++程序,从这样的文本文件中获取日志信息:
local - - [24/Oct/1994:13:41:41 -0600] "GET index.html HTTP/1.0" 200 150
local - - [24/Oct/1994:13:41:41 -0600] "GET 1.gif HTTP/1.0" 200 1210
local - - [24/Oct/1994:13:43:13 -0600] "GET index.html HTTP/1.0" 200 3185
local - - [24/Oct/1994:13:43:14 -0600] "GET 2.gif HTTP/1.0" 200 2555
local - - [24/Oct/1994:13:43:15 -0600] "GET 3.gif HTTP/1.0" 200 36403
local - - [24/Oct/1994:13:43:17 -0600] "GET 4.gif HTTP/1.0" 200 441
local - - [24/Oct/1994:13:46:45 -0600] "GET index.html HTTP/1.0" 200 3185
然后我试图获取每行中GET之后的文件名,将其存储在某处并在每次在日志文件中重复文件名时计数。 读完文件后,我打印出前10个重复的文件名。
我的问题是,下面的代码对日志文件中的所有行都计算 - 但这不是我想要的:计算GET和HTTP之间的文件名。
#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>
#include <time.h>
#include <math.h>
const long MAX = 1000000;
std::string words[MAX];
long instances[MAX];
long count = 0;
void insert(std::string input) {
//check first, add if not present
for (long i = 0; i < count; i++)
if (input == words[i]) {
instances[i]++;
//std::cout << words[i] << std::endl;
return;
}
if (count < MAX) {
words[count] = input;
instances[count] = 1;
count++;
}
else
std::cerr << "Too many unquie words in the file";
}
long findTop(std::string &word) {
//int topIndex = 0;
long topCount = instances[0];
long topIndex = 0;
for (long i = 1; i<count; i++)
if (instances[i] > topCount) {
topCount = instances[i];
topIndex = i;
}
instances[topIndex] = 0;
word = words[topIndex];
//topIndex = i;
return topCount;
}
long frequency_of_primes(long n) {
long i, j;
long freq = n - 1;
for (i = 2; i <= n; ++i) for (j = sqrt(i); j>1; --j) if (i%j == 0) { --freq; break; }
return freq;
}
int main()
{
std::cout << "Please wait for the result!" << std::endl;
std::string word;
std::ifstream data("Text.txt");
while (data >> word)
insert(word);
long topCount = 0;
for (long i = 0; i<10; i++)
//cout << words[i] << " " << instances[i] << endl;
std::cout << " File Name: " << word << " Visitors #" << findTop(word) << std::endl;
clock_t t;
long f;
t = clock();
printf("Calculating...\n");
f = frequency_of_primes(99999);
printf("The number of primes lower than 100,000 is: %d\n", f);
t = clock() - t;
printf("It took me %d clicks (%f seconds).\n", t, ((float)t) / CLOCKS_PER_SEC);
return 0;
}
函数get_file_name()
在标题中找到第一个引号,在标题中找到最后一个引号,并进一步解析为文件名。这基本上就是@ AI.G。建议。但是,您可能希望了解C ++工具提供的正则表达式支持。
我也没有对输入或输出文件进行任何处理;这些信息刚刚被列为使用unordered_map
的一个例子,如@PaulMcKenzie也建议的那样。
#include <iostream>
#include <fstream>
#include <unordered_map>
std::string get_file_name(const std::string& s) {
std::size_t first = s.find_first_of("\"");
std::size_t last = s.find_last_of("\"");
std::string request = s.substr(first, first - last);
std::size_t file_begin = request.find_first_of(' ');
std::string truncated_request = request.substr(++file_begin);
std::size_t file_end = truncated_request.find(' ');
std::string file_name = truncated_request.substr(0, file_end);
return file_name;
}
int main() {
std::ifstream f_s("header_log.txt");
std::string content;
std::unordered_map<std::string, int> file_access_counts;
while (std::getline(f_s, content)) {
auto file_name = get_file_name(content);
auto item = file_access_counts.find(file_name);
if (item != file_access_counts.end()) {
++file_access_counts.at(file_name);
} else {
file_access_counts.insert(std::make_pair(file_name, 1));
}
}
f_s.close();
std::ofstream ofs;
ofs.open ("output.txt", std::ofstream::out | std::ofstream::app);
for (auto& n: file_access_counts)
ofs << n.first << ", " << n.second << std::endl;
ofs.close();
return 0;
}