Up

D Server Log Processing

 1 #!/home/jcgregorio/src/dmd/bin/dmd -run
 2 import std.date;
 3 import std.string;
 4 import std.stdio;
 5 import std.mmfile;
 6 import std.regexp;
 7 import std.uri;
 8 import array;
 9 
10 alias invariant(char)[] istr;
11 
12 int main() {
13     // map[path][referrer] count the number of hits to that path from a given referrer.
14     int[istr][istr] map;
15 
16     auto path_regex = new RegExp(r"GET /news/([a-zA-Z0-9\-\_/]+)");
17     auto path_regex_prj = new RegExp(r"GET /projects/([a-zA-Z0-9\-\_]+)/ ");
18     auto now = UTCtoLocalTime(getUTCtime());
19     auto filename = format("/home/jcgregorio/log/bitworking.org/%04d%02d%02d.log", YearFromTime(now),
20             MonthFromTime(now)+1, DateFromTime(now));
21     auto file = new MmFile(filename);
22     auto data = cast(invariant(char)[]) file[];
23     auto lines = std.string.split(data, "\n");
24     // Chop up each line into path and referrer
25     // Increment count at [path][referrer]
26     foreach (s; lines) {
27         // On a good log line this puts the request line at index 1 and the referrer at index 3.
28         auto parts = std.string.split(s, "\"" );
29         if (parts.length > 4) {
30             invariant(char)[] path;
31             if (path_regex.test(parts[1])) {
32                 path = path_regex.match(1);
33             } else if (path_regex_prj.test(parts[1])) {
34                 path = path_regex_prj.match(1);
35             }
36             if (path) {
37                 auto referrer = parts[3];
38                 map[path][referrer] += 1;
39             }
40         }
41     }
42     // At this point were done collecting the data, the
43     // rest of the processing is sorting that data the
44     // way we want and printing it out.
45 
46     write("Status: 200 OK\r\n");
47     write("Content-type: text/html\r\n");
48     write("\r\n");
49     write("<html> <head> </head> <body> ");
50 
51     writefln("<p>", lines.length, " log entries processed.</p>");
52 
53     write("<dl>");
54 
55 
56     struct uri_count_t {
57        int count;
58        invariant(char)[] uri;
59     }
60     uri_count_t[] per_path;
61     // Now sum up the hits per path
62     foreach (path, referrers; map) {
63         int total = 0;
64         foreach (referrer, hits; referrers) {
65             total += hits;
66         }
67         per_path ~= uri_count_t(total, path);
68     }
69     auto per_path_sorted = per_path.sort(delegate bool(uri_count_t a, uri_count_t b) { return a.count < b.count; });
70     foreach (uri_count; per_path_sorted) {
71         writeln("<dt>", uri_count.count, " ", uri_count.uri, "</dt>");
72         writeln("<dd><ul>");
73         uri_count_t[] per_referrer;
74         auto referrers = map[uri_count.uri];
75         foreach (referrer; referrers.keys.sort) {
76             int hits = referrers[referrer];
77             if ("-" != referrer) {
78                 writeln("<li>", hits, " <a href=\"", encode(cast(invariant(char)[])referrer), "\">", referrer, "</a></li>");
79             }
80         }
81         writeln("</ul></dd>");
82     }
83     write("</dl></body>");
84 
85     return 0;
86 }
87