Harbor Documentation

Harbor::Contrib::Stats::ApacheImporter

Parent

Methods

Attributes

  • logger [RW] (Not documented)
  • apache_file [RW] (Not documented)

Public Class Methods

new(apache_file)

      # File lib/harbor/contrib/stats/reconciliation/apache_importer.rb, line 8
 8:         def initialize(apache_file)
 9:           self.apache_file = apache_file
10:         end

Public Instance Methods

run()

      # File lib/harbor/contrib/stats/reconciliation/apache_importer.rb, line 12
12:         def run
13:           
14:           apache_regex = /(.*) - - \[(.*)\] \"(.*) (.*) .*\" .* .* \"(.*)\" \".*\"/
15:           
16:           invalid_date = repository.adapter.query('select distinct created_at from page_views order by created_at asc limit 1').first + 1/60000.0
17: 
18:           # Unique URIs from page_views, these are the only ones we want to look at in the Apache logs
19:           logger.info "Invalid date: #{invalid_date.to_s}"
20:           unique_uris = repository.adapter.query('select distinct uri from page_views where created_at <= ?', invalid_date)
21:           logger.info "#{unique_uris.size} unique URIs found!"
22:           
23:           Harbor::Contrib::Stats::ApacheRequest.create_table!
24: 
25:           repository.adapter.execute('truncate table apache_requests')
26: 
27:           i, j = 0, 0
28:           f = ::File.new(self.apache_file)
29:           while (line = f.readline)
30:             i+=1
31:             logger.info "Apache lines parsed: #{i}, Requests imported: #{j}" if i%1000 == 0
32:             if line =~ apache_regex
33:               ip_address = $1
34:               request_type = $3
35:               uri = $4
36:               referrer = $5
37:               date = DateTime.parse($2.sub(":"," "))
38:               break if date >= DateTime.parse('2010-02-27 00:00:00')
39:               next unless unique_uris.include?(uri) && date > invalid_date
40:               j += 1
41:               Harbor::Contrib::Stats::ApacheRequest.create(j, ip_address, request_type, uri, referrer, date)
42:             end
43:           end
44:           logger.info "Parsing complete!"
45:           logger.info "Apache lines parsed: #{i}, Requests imported: #{j}"
46:           exit!(0)
47:           
48:         end