-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.nim
317 lines (251 loc) · 11 KB
/
main.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
import os, tables, sets, strformat, strutils, terminal
type Line = object
index: int
str: string
l: int
# 类似内置substr的更快实现
func substr(s: string, first: int, last: int): string =
let l = last-first+1
if l < 1:
return ""
result = newString(l)
copyMem(result[0].addr, cast[cstring](cast[uint](s.cstring)+first.uint), l)
# 根据条件匹配直至不满足条件,舍去前后空格
proc parse_item_trim_space(this: var Line, cond: proc): string =
while this.index < this.l and this.str[this.index] == ' ':
this.index+=1
var i = this.index;
var found_start = -1;
var found_end = -1;
var y = if likely(i > 0): this.str[i-1] else: '\0'
while i < this.l:
let x = this.str[i]
i+=1; # i已指向下一个字符
# 符合预定格式,x为当前字符,y为上个字符,可能为0
if likely(cond(x, y)):
y = x
found_end = i-1
if unlikely(found_start < 0):
found_start = found_end
# 如果未到行末尾,才能continue,否则i=len了,不能continue,会造成下次退出循环,item_value未赋值
if i < this.l:
continue
# 否则匹配到边界或者完全没有匹配到
if unlikely(found_start < 0):
# 完全没有匹配到
raise newException(ValueError, "匹配失败:"&this.str)
while i < this.l and this.str[i] == ' ':
i+=1
this.index = i;
# cond成立时,则包含当前字符x,否则不包含,截取的字符最少1字节
return this.str.substr(found_start, found_end)
raise newException(ValueError, "匹配失败:"&this.str)
proc parse_item_wrap_string(this: var Line, left: char = '"', right: char = '"'): string =
while this.index < this.l and this.str[this.index] == '\32':
this.index+=1
if this.index >= this.l or this.str[this.index] != left:
raise newException(ValueError, "匹配失败:"&this.str)
this.index+=1
let start = this.index
let p = this.str.find(right, start)
if p < start:
raise newException(ValueError, "匹配失败:"&this.str)
this.index = p+1
return this.str.substr(start, p-1)
# 仅数字
proc digital(x: char, y: char): bool = x >= '\48' and x <= '\57'
# 包含数字和.号
proc digital_dot(x: char, y: char): bool = (x >= '\48' and x <= '\57') or x == '\46'
# 包含数字字母[a-f]和.号或:号(IPv4或IPv6)
proc digital_dot_colon(x: char, y: char): bool = (x >= '\48' and x <= '\58') or x == '\46' or (x >= '\97' and x <= '\102')
# 包含数字和.号或-号
proc digital_dot_minus(x: char, y: char): bool = (x >= '\48' and x <= '\57') or x == '\46' or x == '\45'
# 非空格
proc not_space(x: char, y: char): bool = x != '\32'
# 当前是空格,上一个是-或者数字
proc digital_or_none_end(x: char, y: char): bool = not (x == '\32' and ( (y >= '\48' and y <= '\57') or y == '\45'))
# 包含数字字母和.号或:号(IPv4或IPv6)
proc parse_remote_addr(this: var Line): string =
return this.parse_item_trim_space(digital_dot_colon)
# 去除可能存在的-,非空格
proc parse_remote_user(this: var Line): string =
while this.index < this.l and this.str[this.index] == '\45':
this.index+=1
return this.parse_item_trim_space(not_space)
# 匹配到],并且下一个是空格
proc parse_time_local(this: var Line): string =
return this.parse_item_wrap_string('[', ']')
# 匹配到双引号结束位置
proc parse_request_line(this: var Line): string =
return this.parse_item_wrap_string()
# 是数字
proc parse_status_code(this: var Line): string =
return this.parse_item_trim_space(digital)
# 是数字
proc parse_body_bytes_sent(this: var Line): string =
return this.parse_item_trim_space(digital)
# 匹配到双引号结束位置
proc parse_http_referer(this: var Line): string =
return this.parse_item_wrap_string()
# 匹配到双引号结束位置
proc parse_http_user_agent(this: var Line): string =
return this.parse_item_wrap_string()
# 匹配到双引号结束位置
proc parse_http_x_forwarded_for(this: var Line): string =
return this.parse_item_wrap_string()
# 非空格的字符
proc parse_host(this: var Line): string =
return this.parse_item_trim_space(not_space)
# 数字
proc parse_request_length(this: var Line): string =
return this.parse_item_trim_space(digital)
# 数字
proc parse_bytes_sent(this: var Line): string =
return this.parse_item_trim_space(digital)
# 非空格的字符
proc parse_upstream_addr(this: var Line): string =
return this.parse_item_trim_space(not_space)
# 当前是空格,上一个是-或者数字
proc parse_upstream_status(this: var Line): string =
return this.parse_item_trim_space(digital_or_none_end)
# 数字和.号
proc parse_request_time(this: var Line): string =
return this.parse_item_trim_space(digital_dot)
# 数字和.号,或者-
proc parse_upstream_response_time(this: var Line): string =
return this.parse_item_trim_space(digital_dot_minus)
# 数字和.号,或者-
proc parse_upstream_connect_time(this: var Line): string =
return this.parse_item_trim_space(digital_dot_minus)
# 数字和.号,或者-
proc parse_upstream_header_time(this: var Line): string =
return this.parse_item_trim_space(digital_dot_minus)
proc process(filename: File|string) =
var remote_addr_data = newCountTable[string](8192);
var remote_user_data = newCountTable[string](64);
var time_local_data = newCountTable[string](16384);
var request_line_data = newCountTable[string](16384);
var status_data = newCountTable[string](64);
var http_referer_data = newCountTable[string](8192);
var http_user_agent_data = newCountTable[string](8192);
var http_x_forwarded_for_data = newCountTable[string](2048);
var http_sent_data = newCountTable[string](16384);
var http_bad_code_data = newOrderedTable[string, ref CountTable[string]]();
var total_bytes_sent: uint64 = 0;
var total_lines: uint = 0;
proc parse_line(line: string) =
var l = Line(str: line, l: line.len)
let remote_addr = l.parse_remote_addr()
let remote_user = l.parse_remote_user()
let time_local = l.parse_time_local()
let request_line = l.parse_request_line()
let status_code = l.parse_status_code()
let body_bytes_sent = l.parse_body_bytes_sent()
let http_referer = l.parse_http_referer()
let http_user_agent = l.parse_http_user_agent()
let http_x_forwarded_for = l.parse_http_x_forwarded_for()
let bytes_sent_num = parseUint(body_bytes_sent)
total_bytes_sent+=bytes_sent_num
remote_addr_data.inc(remote_addr)
remote_user_data.inc(remote_user)
time_local_data.inc(time_local)
request_line_data.inc(request_line)
status_data.inc(status_code)
http_referer_data.inc(http_referer)
http_user_agent_data.inc(http_user_agent)
http_x_forwarded_for_data.inc(http_x_forwarded_for)
http_sent_data.inc(request_line, bytes_sent_num.int)
if status_code != "200":
http_bad_code_data.mgetOrPut(status_code, newCountTable[string]()).inc(request_line)
for line in filename.lines:
try:
parse_line(line);
total_lines+=1;
except CatchableError:
stderr.writeLine(line)
# 分析完毕后,排序然后,打印统计数据
let str_sent = formatSize(total_bytes_sent.int64, prefix = bpColloquial, includeSpace = true)
let ip_count = remote_addr_data.len
echo &"\n共计\e[1;34m{total_lines}\e[00m次访问\n发送总流量\e[1;32m{str_sent}\e[00m\n独立IP数\e[1;31m{ip_count}\e[00m"
if total_lines < 1:
return
let limit = 100;
let t_width = terminalWidth() - 16
let lines = total_lines.float
let total_bytes = total_bytes_sent.float
proc print_stat_long(name: string, data: ref CountTable[string]) =
data.sort()
echo &"\n\e[1;34m{name}\e[00m"
var i = 0;
var n = 0;
for u, num in data:
var stru = if u.len < t_width: u.alignLeft(t_width) else: u.substr(0, t_width-1)
echo fmt"{stru} {num:>6.6} {num.float*100/lines:.2f}%"
i+=1
n+=num
if i >= limit:
break
let part1 = (fmt"{n}/{total_lines}").alignLeft(t_width)
echo &"前{limit}项占比\n{part1} {data.len:6.6} {n.float*100/lines:.2f}%\n"
proc print_sent_long(name: string, data: ref CountTable[string]) =
data.sort()
echo &"\n\e[1;34m{name}\e[00m"
var i = 0;
var n = 0;
let max_width = t_width - 6
for u, num in data:
var stru = if u.len < max_width: u.alignLeft(max_width) else: u.substr(0, max_width-1)
echo fmt"{stru} {formatSize(num,prefix = bpColloquial, includeSpace = true):>12.12} {num.float*100/total_bytes:.2f}%"
i+=1
n+=num
if i >= limit:
break
let part1 = (fmt"{formatSize(n,prefix = bpColloquial, includeSpace = true)}/{formatSize(total_bytes_sent.int64,prefix = bpColloquial, includeSpace = true)}").alignLeft(max_width)
echo &"前{limit}项占比\n{part1} {data.len:12.12} {n.float*100/total_bytes:.2f}%\n"
proc print_code_long(code: string, data: ref CountTable[string]) =
data.sort()
var count = 0;
for n in data.values:
count+=n
echo &"\n\e[1;34m状态码{code},共{count}次,占比{(count*100).float/lines:.2f}%\e[00m"
var i = 0;
var n = 0;
for u, num in data:
var stru = if u.len < t_width: u.alignLeft(t_width) else: u.substr(0, t_width-1)
echo fmt"{stru} {num:>6.6} {num.float*100/count.float:.2f}%"
i+=1
n+=num
if i >= limit:
break
let part1 = (fmt"{n}/{count}").alignLeft(t_width)
echo &"前{limit}项占比\n{part1} {data.len:6.6} {n.float*100/count.float:.2f}%\n"
# 来访IP统计
print_stat_long("来访IP统计", remote_addr_data)
# 用户统计
print_stat_long("用户统计", remote_user_data)
# 代理IP统计
print_stat_long("代理IP统计", http_x_forwarded_for_data)
# HTTP请求统计
print_stat_long("HTTP请求统计", request_line_data)
# User-Agent统计
print_stat_long("User-Agent统计", http_user_agent_data)
# HTTP REFERER 统计
print_stat_long("HTTP REFERER 统计", http_referer_data)
# 请求时间
print_stat_long("请求时间统计", time_local_data)
# HTTP响应状态统计
print_stat_long("HTTP响应状态统计", status_data)
# HTTP流量占比统计
print_sent_long("HTTP流量占比统计", http_sent_data)
# 非200状态码
http_bad_code_data.sort(proc (x, y: (string, ref CountTable[string])): int = cmp(x[0], y[0]))
for code, items in http_bad_code_data:
print_code_long(code, items)
try:
if paramCount() > 0:
process(paramStr(1))
else:
process(stdin)
except CatchableError:
echo getCurrentExceptionMsg()
quit(1)