12: def run
13:
14: apache_regex = /(.*) - - \[(.*)\] \"(.*) (.*) .*\" .* .* \"(.*)\" \".*\"/
15:
16: invalid_date = repository.adapter.query('select distinct created_at from page_views order by created_at asc limit 1').first + 1/60000.0
17:
18:
19: logger.info "Invalid date: #{invalid_date.to_s}"
20: unique_uris = repository.adapter.query('select distinct uri from page_views where created_at <= ?', invalid_date)
21: logger.info "#{unique_uris.size} unique URIs found!"
22:
23: Harbor::Contrib::Stats::ApacheRequest.create_table!
24:
25: repository.adapter.execute('truncate table apache_requests')
26:
27: i, j = 0, 0
28: f = ::File.new(self.apache_file)
29: while (line = f.readline)
30: i+=1
31: logger.info "Apache lines parsed: #{i}, Requests imported: #{j}" if i%1000 == 0
32: if line =~ apache_regex
33: ip_address = $1
34: request_type = $3
35: uri = $4
36: referrer = $5
37: date = DateTime.parse($2.sub(":"," "))
38: break if date >= DateTime.parse('2010-02-27 00:00:00')
39: next unless unique_uris.include?(uri) && date > invalid_date
40: j += 1
41: Harbor::Contrib::Stats::ApacheRequest.create(j, ip_address, request_type, uri, referrer, date)
42: end
43: end
44: logger.info "Parsing complete!"
45: logger.info "Apache lines parsed: #{i}, Requests imported: #{j}"
46: exit!(0)
47:
48: end