-
Notifications
You must be signed in to change notification settings - Fork 0
/
weblog01.rb
62 lines (54 loc) · 2.01 KB
/
weblog01.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
require 'mysql2'
lines = []
url = []
ip = []
time = []
date = []
clock = []
controller = []
count = 0
file = File.open("development.log") # 文件目录
# log以空行分段
file.chunk { |line|
/\A\s*\z/ !~ line || nil # log以空行分段
}.each { |_, lines| # lines 为一个记录段
count += 1 # 第几条日志
lines.each do |line|
words = line.split
words.each do |word|
if word =~/\w?\/\w?/ && (count > url.size) # 只提取第一条URL
url = url.push(word)
end
ip = ip.push(word) if word =~/\d{3}\.\d\.\d\.\d/ && (count > ip.size) # 只提取第一条URL
#time = time.push(word) if word =~/\d{4}-\d{2}-\d{2}|\d{2}.\d{2}.\d{2}/
date = date.push(word) if word =~/\d{4}-\d{2}-\d{2}/ && (count > date.size) # 只提取第一条URL
clock = clock.push(word) if word =~/^\d{2}\:\d{2}\:\d{2}$/ && (count > clock.size) # 只提取第一条URL ^,$ 分别代表起始位置,可用于精准匹配
# \d\d\d\d 可简写为 \d{4}, | 表示 或
controller = controller.push(word) if word =~/[A-Z].*Controller/ && (count > controller.size) # 只提取第一条URL
end
end
}
p "共有#{count} 条日志记录,提取信息如下所示:> "
p url
p ip
p date
p clock
p controller
client = Mysql2::Client.new(
:host => '127.0.0.1', # 主机
:username => 'root', # 用户名
:password => '123123', # 密码
:database => 'weblog', # 数据库
:encoding => 'utf8' # 编码
)
=begin
url.zip(ip, date, clock, controller) do |a, b, c, d, e|
# zip 方法会将接收器和参数传来的数组元素逐一取出,而且每次都会启动块。
client.query("INSERT INTO Weblog(Url, Ip, Date, Clock, Controller) VALUES ('#{a}', '#{b}', '#{c}', '#{d}', '#{e}')")
end
#需要五个栏位
=end
=begin
思路概述:
先将日志文件按空行分割为一个个的段落lines,lines是一个数组,每一行是数组的一个元素,针对每个元素进行匹配;
=end