pull/216/head
midoks 3 years ago
parent 80c0bbed71
commit c501efdfac
  1. 409
      plugins/webstats/lua/webstats_common.lua
  2. 641
      plugins/webstats/lua/webstats_log.lua
  3. 7
      plugins/webstats/t/bench/bench.sh
  4. 106
      plugins/webstats/t/bench/test_match_spider.lua
  5. 35
      plugins/webstats/t/bench/test_ngx_find.lua
  6. 118
      plugins/webstats/t/bench/test_time.lua

@ -10,14 +10,24 @@ local sites = require "webstats_sites"
local debug_mode = true
local total_key = "log_kv_total"
local unset_server_name = "unset"
local max_log_id = 99999999999999
local cache = ngx.shared.mw_total
local today = ngx.re.gsub(ngx.today(),'-','')
local request_header = ngx.req.get_headers()
local method = ngx.req.get_headers()
local day = os.date("%d")
local number_day = tonumber(day)
local day_column = "day"..number_day
local flow_column = "flow"..number_day
local spider_column = "spider_flow"..number_day
-- _M.setInputSn | need
local auto_config = nil
local log_dir = "{$SERVER_APP}/logs"
function _M.new(self)
@ -71,12 +81,132 @@ function _M.setParams( self, params )
self.params = params
end
function _M.setInputSn(self, input_sn)
local global_config = config["global"]
if config[input_sn] == nil then
auto_config = global_config
else
auto_config = config[site]
for k, v in pairs(global_config) do
if auto_config[k] == nil then
auto_config[k] = v
end
end
end
return auto_config
end
function _M.get_domain(self)
local domain = ngx.req.get_headers()['host']
if domain ~= nil then
domain = ngx.re.gsub(domain, "_", ".")
else
domain = "unknown"
end
return domain
end
function _M.split(self, str, reps)
local arr = {}
string.gsub(str,'[^'..reps..']+',function(w) table.insert(arr,w) end)
return arr
end
function _M.arrlen(self, arr)
if not arr then return 0 end
local count = 0
for _,v in ipairs(arr) do
count = count + 1
end
return count
end
function _M.is_ipaddr(self, client_ip)
local cipn = self:split(client_ip,'.')
if self:arrlen(cipn) < 4 then return false end
for _,v in ipairs({1,2,3,4})
do
local ipv = tonumber(cipn[v])
if ipv == nil then return false end
if ipv > 255 or ipv < 0 then return false end
end
return true
end
function _M.get_sn(self, input_sn)
local dst_name = cache:get(input_sn)
if dst_name then return dst_name end
-- self:D(json.encode(sites))
for _,v in ipairs(sites)
do
if input_sn == v["name"] then
cache:set(input_sn, v['name'], 86400)
return v["name"]
end
for _,dst_domain in ipairs(v['domains'])
do
if input_sn == dst_domain then
cache:set(input_sn, v['name'], 86400)
return v['name']
elseif string.find(dst_domain, "*") then
local new_domain = string.gsub(dst_domain, '*', '.*')
if string.find(input_sn, new_domain) then
dst_domain = v['name']
cache:set(input_sn, dst_domain, 86400)
end
end
end
end
cache:set(input_sn, unset_server_name, 86400)
return unset_server_name
end
function _M.get_store_key(self)
return os.date("%Y%m%d%H", ngx.time())
end
function _M.get_length(self)
local clen = ngx.var.body_bytes_sent
if clen == nil then clen = 0 end
return tonumber(clen)
end
function _M.get_last_id(self, input_sn)
local last_insert_id_key = input_sn .. "_last_id"
local new_id, err = cache:incr(last_insert_id_key, 1, 0)
cache:incr(cache_count_id_key, 1, 0)
if new_id >= max_log_id then
cache:set(last_insert_id_key, 1)
new_id = cache:get(last_insert_id_key)
end
return new_id
end
function _M.get_http_origin(self)
local data = ""
local headers = request_header
if not headers then return data end
if method ~='GET' then
data = ngx.req.get_body_data()
if not data then
data = ngx.req.get_post_args(1000000)
end
if "string" == type(data) then
headers["payload"] = data
end
if "table" == type(data) then
headers = table.concat(headers, data)
end
end
return json.encode(headers)
end
-- 后台任务
function _M.cron(self)
local timer_every_get_data = function (premature)
@ -296,6 +426,49 @@ function _M.store_logs_line(self, db, stmt, input_sn, info)
return true
end
function _M.statistics_ipc(self, input_sn, ip)
-- 判断IP是否重复的时间限定范围是请求的当前时间+24小时
local ipc = 0
local ip_token = input_sn..'_'..ip
if not cache:get(ip_token) then
ipc = 1
cache:set(ip_token,1, self:get_end_time())
end
return ipc
end
function _M.statistics_request(self, ip, is_spider, body_length)
-- 计算pv uv
local pvc = 0
local uvc = 0
if not is_spider and method == 'GET' and ngx.status == 200 and body_length > 512 then
local ua = ''
if request_header['user-agent'] then
ua = string.lower(request_header['user-agent'])
end
out_header = ngx.resp.get_headers()
if out_header['content-type'] then
if string.find(out_header['content-type'],'text/html', 1, true) then
pvc = 1
if request_header['user-agent'] then
if string.find(ua,'mozilla') then
local today = os.date("%Y-%m-%d")
local uv_token = ngx.md5(ip .. request_header['user-agent'] .. today)
if not cache:get(uv_token) then
uvc = 1
cache:set(uv_token,1, self:get_end_time())
end
end
end
end
end
end
return pvc, uvc
end
--------------------- db start ---------------------------
function _M.statistics_uri(self, db, uri, uri_md5, body_length)
-- count the number of URI requests and traffic
local open_statistics_uri = config['global']["statistics_uri"]
@ -324,6 +497,8 @@ function _M.statistics_ip(self, db, ip, body_length)
end
function _M.update_stat(self,db, stat_table, key, columns)
-- 根据指定表名,更新统计数据
if not columns then return end
@ -333,10 +508,9 @@ function _M.update_stat(self,db, stat_table, key, columns)
stmt:finalize()
local update_sql = "UPDATE ".. stat_table .. " SET " .. columns
update_sql = update_sql .. " WHERE time=" .. key
status, errorString = db:exec(update_sql)
return db:exec(update_sql)
end
--------------------- db end ---------------------------
-- debug func
function _M.D(self,msg)
@ -427,5 +601,234 @@ function _M.rpop(self)
end
function _M.get_update_field(self, field, value)
return field.."="..field.."+"..tostring(value)
end
function _M.get_request_time(self)
local request_time = math.floor((ngx.now() - ngx.req.start_time()) * 1000)
if request_time == 0 then request_time = 1 end
return request_time
end
function _M.get_end_time(self)
local s_time = ngx.time()
local n_date = os.date("*t",s_time + 86400)
n_date.hour = 0
n_date.min = 0
n_date.sec = 0
local d_time = ngx.time(n_date)
return d_time - s_time
end
function _M.match_spider(self, ua)
-- 匹配蜘蛛请求
local is_spider = false
local spider_name = ""
local spider_match = ""
local spider_table = {
["baidu"] = 1, -- check
["bing"] = 2, -- check
["qh360"] = 3, -- check
["google"] = 4,
["bytes"] = 5, -- check
["sogou"] = 6, -- check
["youdao"] = 7,
["soso"] = 8,
["dnspod"] = 9,
["yandex"] = 10,
["yisou"] = 11,
["other"] = 12,
["mpcrawler"] = 13,
["yahoo"] = 14, -- check
["duckduckgo"] = 15
}
local find_spider, _ = ngx.re.match(ua, "(Baiduspider|Bytespider|360Spider|Sogou web spider|Sosospider|Googlebot|bingbot|AdsBot-Google|Google-Adwords|YoudaoBot|Yandex|DNSPod-Monitor|YisouSpider|mpcrawler)", "ijo")
if find_spider then
is_spider = true
spider_match = string.lower(find_spider[0])
if string.find(spider_match, "baidu", 1, true) then
spider_name = "baidu"
elseif string.find(spider_match, "bytes", 1, true) then
spider_name = "bytes"
elseif string.find(spider_match, "360", 1, true) then
spider_name = "qh360"
elseif string.find(spider_match, "sogou", 1, true) then
spider_name = "sogou"
elseif string.find(spider_match, "soso", 1, true) then
spider_name = "soso"
elseif string.find(spider_match, "google", 1, true) then
spider_name = "google"
elseif string.find(spider_match, "bingbot", 1, true) then
spider_name = "bing"
elseif string.find(spider_match, "youdao", 1, true) then
spider_name = "youdao"
elseif string.find(spider_match, "dnspod", 1, true) then
spider_name = "dnspod"
elseif string.find(spider_match, "yandex", 1, true) then
spider_name = "yandex"
elseif string.find(spider_match, "yisou", 1, true) then
spider_name = "yisou"
elseif string.find(spider_match, "mpcrawler", 1, true) then
spider_name = "mpcrawler"
end
end
if is_spider then
return is_spider, spider_name, spider_table[spider_name]
end
-- Curl|Yahoo|HeadlessChrome|包含bot|Wget|Spider|Crawler|Scrapy|zgrab|python|java|Adsbot|DuckDuckGo
find_spider, _ = ngx.re.match(ua, "(Yahoo|Slurp|DuckDuckGo)", "ijo")
if res then
spider_match = string.lower(find_spider[0])
if string.find(spider_match, "yahoo", 1, true) then
spider_name = "yahoo"
elseif string.find(spider_match, "slurp", 1, true) then
spider_name = "yahoo"
elseif string.find(spider_match, "duckduckgo", 1, true) then
spider_name = "duckduckgo"
end
return true, spider_name, spider_table[spider_name]
end
return false, "", 0
end
function _M.match_client(self, ua)
local client_stat_fields = ""
if not ua then
return client_stat_fields
end
local clients_map = {
["android"] = "android",
["iphone"] = "iphone",
["ipod"] = "iphone",
["ipad"] = "iphone",
["firefox"] = "firefox",
["msie"] = "msie",
["trident"] = "msie",
["360se"] = "qh360",
["360ee"] = "qh360",
["360browser"] = "qh360",
["qihoo"] = "qh360",
["the world"] = "theworld",
["theworld"] = "theworld",
["tencenttraveler"] = "tt",
["maxthon"] = "maxthon",
["opera"] = "opera",
["qqbrowser"] = "qq",
["ucweb"] = "uc",
["ubrowser"] = "uc",
["safari"] = "safari",
["chrome"] = "chrome",
["metasr"] = "metasr",
["2345explorer"] = "pc2345",
["edge"] = "edeg",
["edg"] = "edeg",
["windows"] = "windows",
["linux"] = "linux",
["macintosh"] = "mac",
["mobile"] = "mobile"
}
local mobile_regx = "(Mobile|Android|iPhone|iPod|iPad)"
local mobile_res = ngx.re.match(ua, mobile_regx, "ijo")
--mobile
if mobile_res then
client_stat_fields = client_stat_fields..","..self:get_update_field("mobile", 1)
mobile_res = string.lower(mobile_res[0])
if mobile_res ~= "mobile" then
client_stat_fields = client_stat_fields..","..self:get_update_field(clients_map[mobile_res], 1)
end
else
--pc
-- 匹配结果的顺序,与ua中关键词的顺序有关
-- lua的正则不支持|语法
-- 短字符串string.find效率要比ngx正则高
local pc_regx1 = "(360SE|360EE|360browser|Qihoo|TheWorld|TencentTraveler|Maxthon|Opera|QQBrowser|UCWEB|UBrowser|MetaSr|2345Explorer|Edg[e]*)"
local pc_res = ngx.re.match(ua, pc_regx1, "ijo")
local cls_pc = nil
if not pc_res then
if ngx.re.find(ua, "[Ff]irefox") then
cls_pc = "firefox"
elseif string.find(ua, "MSIE") or string.find(ua, "Trident") then
cls_pc = "msie"
elseif string.find(ua, "[Cc]hrome") then
cls_pc = "chrome"
elseif string.find(ua, "[Ss]afari") then
cls_pc = "safari"
end
else
cls_pc = string.lower(pc_res[0])
end
-- D("UA:"..ua)
-- D("PC cls:"..tostring(cls_pc))
if cls_pc then
client_stat_fields = client_stat_fields..","..self:get_update_field(clients_map[cls_pc], 1)
else
-- machine and other
local machine_res, err = ngx.re.match(ua, "(ApacheBench|[Cc]url|HeadlessChrome|[a-zA-Z]+[Bb]ot|[Ww]get|[Ss]pider|[Cc]rawler|[Ss]crapy|zgrab|[Pp]ython|java)", "ijo")
if machine_res then
client_stat_fields = client_stat_fields..","..self:get_update_field("machine", 1)
else
-- 移动端+PC端+机器以外 归类到 其他
client_stat_fields = client_stat_fields..","..self:get_update_field("other", 1)
end
end
local os_regx = "(Windows|Linux|Macintosh)"
local os_res = ngx.re.match(ua, os_regx, "ijo")
if os_res then
os_res = string.lower(os_res[0])
client_stat_fields = client_stat_fields..","..self:get_update_field(clients_map[os_res], 1)
end
end
local other_regx = "MicroMessenger"
local other_res = ngx.re.find(ua, other_regx)
if other_res then
client_stat_fields = client_stat_fields..","..self:get_update_field("weixin", 1)
end
if client_stat_fields then
client_stat_fields = string.sub(client_stat_fields, 2)
end
return client_stat_fields
end
function _M.get_client_ip(self)
local client_ip = "unknown"
local cdn = auto_config['cdn']
if cdn == true then
for _,v in ipairs(auto_config['cdn_headers']) do
if request_header[v] ~= nil and request_header[v] ~= "" then
local ip_list = request_header[v]
client_ip = self:split(ip_list,',')[1]
break;
end
end
end
-- ipv6
if type(client_ip) == 'table' then client_ip = "" end
if client_ip ~= "unknown" and ngx.re.match(client_ip,"^([a-fA-F0-9]*):") then
return client_ip
end
-- ipv4
if not ngx.re.match(client_ip,"\\d+\\.\\d+\\.\\d+\\.\\d+") == nil or not self:is_ipaddr(client_ip) then
client_ip = ngx.var.remote_addr
if client_ip == nil then
client_ip = "unknown"
end
end
return client_ip
end
return _M

@ -9,11 +9,8 @@ log_by_lua_block {
end
local ver = '0.2.0'
local max_log_id = 99999999999999
local debug_mode = true
local unset_server_name = "unset"
local __C = require "webstats_common"
local C = __C:getInstance()
@ -46,527 +43,27 @@ log_by_lua_block {
local config = require "webstats_config"
local sites = require "webstats_sites"
local server_name = string.gsub(C:get_sn(ngx.var.server_name),'_','.')
C:setConfData(config, sites)
local server_name
local request_header
local method
local auto_config
local auto_config = C:setInputSn(server_name)
local request_header = ngx.req.get_method()
local method = ngx.req.get_headers()
local excluded
local day
local today
local number_day
local day_column
local flow_column
local spider_column
local day = os.date("%d")
local number_day = tonumber(day)
local day_column = "day"..number_day
local flow_column = "flow"..number_day
local spider_column = "spider_flow"..number_day
--- default common var end ---
local function to_json(msg)
return json.encode(msg)
end
local function init_var()
request_header = ngx.req.get_headers()
method = ngx.req.get_method()
day = os.date("%d")
-- today = os.date("%Y%m%d")
today = ngx.re.gsub(ngx.today(),'-','')
number_day = tonumber(day)
day_column = "day"..number_day
flow_column = "flow"..number_day
spider_column = "spider_flow"..number_day
end
local function get_auto_config(site)
local config_data = config
local global_config = config_data["global"]
if config_data[site] == nil then
auto_config = global_config
else
auto_config = config_data[site]
for k, v in pairs(global_config) do
if auto_config[k] == nil then
auto_config[k] = v
end
end
end
return auto_config
end
local function get_store_key()
return os.date("%Y%m%d%H", os.time())
end
local function get_length()
local clen = ngx.var.body_bytes_sent
if clen == nil then clen = 0 end
return tonumber(clen)
end
local function get_domain()
local domain = request_header['host']
if domain ~= nil then
domain = string.gsub(domain, "_", ".")
else
domain = "unknown"
end
return domain
end
local function write_file_bylog(filename,body,mode)
local fp = io.open(filename,mode)
if fp == nil then
return nil
end
fp:write(body)
fp:flush()
fp:close()
return true
end
local function read_file_body_bylog(filename)
local fp = io.open(filename,'rb')
if not fp then
return nil
end
fbody = fp:read("*a")
fp:close()
if fbody == '' then
return nil
end
return fbody
end
local function load_update_day(input_server_name)
local _file = "{$SERVER_APP}/logs/"..input_server_name.."/update_day.log"
return read_file_body_bylog(_file)
end
local function write_update_day(input_server_name)
local update_day = today
local _file = "{$SERVER_APP}/logs/"..input_server_name.."/update_day.log"
write_file_bylog(_file, update_day, "w")
end
local function arrlen_bylog(arr)
if not arr then return 0 end
count = 0
for _,v in ipairs(arr) do
count = count + 1
end
return count
end
local function split_bylog(str,reps )
local resultStrList = {}
string.gsub(str,'[^'..reps..']+',function(w) table.insert(resultStrList,w) end)
return resultStrList
end
local function is_ipaddr_bylog(client_ip)
local cipn = split_bylog(client_ip,'.')
if arrlen_bylog(cipn) < 4 then return false end
for _,v in ipairs({1,2,3,4})
do
local ipv = tonumber(cipn[v])
if ipv == nil then return false end
if ipv > 255 or ipv < 0 then return false end
end
return true
end
local function get_client_ip_bylog()
local client_ip = "unknown"
local cdn = auto_config['cdn']
if cdn == true then
for _,v in ipairs(auto_config['cdn_headers']) do
if request_header[v] ~= nil and request_header[v] ~= "" then
local ip_list = request_header[v]
client_ip = split_bylog(ip_list,',')[1]
break;
end
end
end
-- ipv6
if type(client_ip) == 'table' then client_ip = "" end
if client_ip ~= "unknown" and ngx.re.match(client_ip,"^([a-fA-F0-9]*):") then
return client_ip
end
-- ipv4
if not ngx.re.match(client_ip,"\\d+\\.\\d+\\.\\d+\\.\\d+") == nil or not is_ipaddr_bylog(client_ip) then
client_ip = ngx.var.remote_addr
if client_ip == nil then
client_ip = "unknown"
end
end
return client_ip
end
local function get_last_id(input_server_name)
local last_insert_id_key = input_server_name .. "_last_id"
new_id, err = cache:incr(last_insert_id_key, 1, 0)
cache:incr(cache_count_id_key, 1, 0)
if new_id >= max_log_id then
cache:set(last_insert_id_key, 1)
new_id = cache:get(last_insert_id_key)
end
return new_id
end
local function get_request_time()
local request_time = math.floor((ngx.now() - ngx.req.start_time()) * 1000)
if request_time == 0 then request_time = 1 end
return request_time
end
local function get_end_time()
local s_time = os.time()
local n_date = os.date("*t",s_time + 86400)
n_date.hour = 0
n_date.min = 0
n_date.sec = 0
d_time = os.time(n_date)
return d_time - s_time
end
local function get_http_original()
local data = ""
local headers = request_header
if not headers then return data end
if method ~='GET' then
data = ngx.req.get_body_data()
if not data then
data = ngx.req.get_post_args(1000000)
end
if "string" == type(data) then
headers["payload"] = data
end
if "table" == type(data) then
headers = table.concat(headers, data)
end
end
return json.encode(headers)
end
local function is_migrating(input_server_name)
local file = io.open("{$SERVER_APP}/migrating", "rb")
if file then return true end
local file = io.open("{$SERVER_APP}/logs/"..input_server_name.."/migrating", "rb")
if file then return true end
return false
end
local function is_working(name)
local work_status = cache:get(name.."_working")
if work_status ~= nil and work_status == true then
return true
end
return false
end
local function lock_working(name)
local working_key = name.."_working"
cache:set(working_key, true, 60)
end
local function unlock_working(name)
local working_key = name.."_working"
cache:set(working_key, false)
end
local function get_server_name(c_name)
local my_name = cache:get(c_name)
if my_name then return my_name end
local determined_name = nil
for _,v in ipairs(sites)
do
if c_name == v["name"] then
cache:set(c_name, v['name'],86400)
return v["name"]
end
for _,d_name in ipairs(v['domains'])
do
if c_name == d_name then
cache:set(c_name, v['name'], 86400)
return v['name']
elseif string.find(d_name, "*") then
new_domain = string.gsub(d_name, '*', '.*')
if string.find(c_name, new_domain) then
determined_name = v['name']
end
end
end
end
if determined_name then
cache:set(c_name, determined_name,86400)
return determined_name
end
cache:set(c_name, unset_server_name, 86400)
return unset_server_name
end
--------------------- db start ---------------------------
local function update_stat(db, stat_table, key, columns)
-- 根据指定表名,更新统计数据
if not columns then return end
local stmt = db:prepare(string.format("INSERT INTO %s(time) SELECT :time WHERE NOT EXISTS(SELECT time FROM %s WHERE time=:time);", stat_table, stat_table))
stmt:bind_names{time=key}
local res, err = stmt:step()
stmt:finalize()
local update_sql = "UPDATE ".. stat_table .. " SET " .. columns
update_sql = update_sql .. " WHERE time=" .. key
status, errorString = db:exec(update_sql)
end
local function get_update_field(field, value)
return field.."="..field.."+"..tostring(value)
end
--------------------- db end ---------------------------
local function match_client()
-- 匹配客户端
local ua = ''
if request_header['user-agent'] then
ua = request_header['user-agent']
end
if not ua then
return false, nil
end
local client_stat_fields = ""
local clients_map = {
["android"] = "android",
["iphone"] = "iphone",
["ipod"] = "iphone",
["ipad"] = "iphone",
["firefox"] = "firefox",
["msie"] = "msie",
["trident"] = "msie",
["360se"] = "qh360",
["360ee"] = "qh360",
["360browser"] = "qh360",
["qihoo"] = "qh360",
["the world"] = "theworld",
["theworld"] = "theworld",
["tencenttraveler"] = "tt",
["maxthon"] = "maxthon",
["opera"] = "opera",
["qqbrowser"] = "qq",
["ucweb"] = "uc",
["ubrowser"] = "uc",
["safari"] = "safari",
["chrome"] = "chrome",
["metasr"] = "metasr",
["2345explorer"] = "pc2345",
["edge"] = "edeg",
["edg"] = "edeg",
["windows"] = "windows",
["linux"] = "linux",
["macintosh"] = "mac",
["mobile"] = "mobile"
}
local mobile_regx = "(Mobile|Android|iPhone|iPod|iPad)"
local mobile_res = ngx.re.match(ua, mobile_regx, "ijo")
--mobile
if mobile_res then
client_stat_fields = client_stat_fields..","..get_update_field("mobile", 1)
mobile_res = string.lower(mobile_res[0])
if mobile_res ~= "mobile" then
client_stat_fields = client_stat_fields..","..get_update_field(clients_map[mobile_res], 1)
end
else
--pc
-- 匹配结果的顺序,与ua中关键词的顺序有关
-- lua的正则不支持|语法
-- 短字符串string.find效率要比ngx正则高
local pc_regx1 = "(360SE|360EE|360browser|Qihoo|TheWorld|TencentTraveler|Maxthon|Opera|QQBrowser|UCWEB|UBrowser|MetaSr|2345Explorer|Edg[e]*)"
local pc_res = ngx.re.match(ua, pc_regx1, "ijo")
local cls_pc = nil
if not pc_res then
if string.find(ua, "[Ff]irefox") then
cls_pc = "firefox"
elseif string.find(ua, "MSIE") or string.find(ua, "Trident") then
cls_pc = "msie"
elseif string.find(ua, "[Cc]hrome") then
cls_pc = "chrome"
elseif string.find(ua, "[Ss]afari") then
cls_pc = "safari"
end
else
cls_pc = string.lower(pc_res[0])
end
-- D("UA:"..ua)
-- D("PC cls:"..tostring(cls_pc))
if cls_pc then
client_stat_fields = client_stat_fields..","..get_update_field(clients_map[cls_pc], 1)
else
-- machine and other
local machine_res, err = ngx.re.match(ua, "(ApacheBench|[Cc]url|HeadlessChrome|[a-zA-Z]+[Bb]ot|[Ww]get|[Ss]pider|[Cc]rawler|[Ss]crapy|zgrab|[Pp]ython|java)", "ijo")
if machine_res then
client_stat_fields = client_stat_fields..","..get_update_field("machine", 1)
else
-- 移动端+PC端+机器以外 归类到 其他
client_stat_fields = client_stat_fields..","..get_update_field("other", 1)
end
end
local os_regx = "(Windows|Linux|Macintosh)"
local os_res = ngx.re.match(ua, os_regx, "ijo")
if os_res then
os_res = string.lower(os_res[0])
client_stat_fields = client_stat_fields..","..get_update_field(clients_map[os_res], 1)
end
end
local other_regx = "MicroMessenger"
local other_res = string.find(ua, other_regx)
if other_res then
client_stat_fields = client_stat_fields..","..get_update_field("weixin", 1)
end
if client_stat_fields then
client_stat_fields = string.sub(client_stat_fields, 2)
end
return client_stat_fields
end
local function match_spider(client_ip)
-- 匹配蜘蛛请求
local ua = ''
if request_header['user-agent'] then
ua = request_header['user-agent']
end
if not ua then
return false, nil, 0
end
local is_spider = false
local spider_name = nil
local spider_table = {
["baidu"] = 1, -- check
["bing"] = 2, -- check
["qh360"] = 3, -- check
["google"] = 4,
["bytes"] = 5, -- check
["sogou"] = 6, -- check
["youdao"] = 7,
["soso"] = 8,
["dnspod"] = 9,
["yandex"] = 10,
["yisou"] = 11,
["other"] = 12,
["mpcrawler"] = 13,
["yahoo"] = 14, -- check
["duckduckgo"] = 15
}
local res,err = ngx.re.match(ua, "(Baiduspider|Bytespider|360Spider|Sogou web spider|Sosospider|Googlebot|bingbot|AdsBot-Google|Google-Adwords|YoudaoBot|Yandex|DNSPod-Monitor|YisouSpider|mpcrawler)", "ijo")
check_res = true
if res then
is_spider = true
spider_match = string.lower(res[0])
if string.find(spider_match, "baidu", 1, true) then
spider_name = "baidu"
elseif string.find(spider_match, "bytes", 1, true) then
spider_name = "bytes"
elseif string.find(spider_match, "360", 1, true) then
spider_name = "qh360"
elseif string.find(spider_match, "sogou", 1, true) then
spider_name = "sogou"
elseif string.find(spider_match, "soso", 1, true) then
spider_name = "soso"
elseif string.find(spider_match, "google", 1, true) then
spider_name = "google"
elseif string.find(spider_match, "bingbot", 1, true) then
spider_name = "bing"
elseif string.find(spider_match, "youdao", 1, true) then
spider_name = "youdao"
elseif string.find(spider_match, "dnspod", 1, true) then
spider_name = "dnspod"
elseif string.find(spider_match, "yandex", 1, true) then
spider_name = "yandex"
elseif string.find(spider_match, "yisou", 1, true) then
spider_name = "yisou"
elseif string.find(spider_match, "mpcrawler", 1, true) then
spider_name = "mpcrawler"
end
end
if is_spider then
return is_spider, spider_name, spider_table[spider_name]
end
-- Curl|Yahoo|HeadlessChrome|包含bot|Wget|Spider|Crawler|Scrapy|zgrab|python|java|Adsbot|DuckDuckGo
local other_res, err = ngx.re.match(ua, "(Yahoo|Slurp|DuckDuckGo)", "ijo")
if other_res then
other_res = string.lower(other_res[0])
if string.find(other_res, "yahoo", 1, true) then
spider_name = "yahoo"
elseif string.find(other_res, "slurp", 1, true) then
spider_name = "yahoo"
elseif string.find(other_res, "duckduckgo", 1, true) then
spider_name = "duckduckgo"
end
return true, spider_name, spider_table[spider_name]
end
return false, nil, 0
end
local function statistics_ipc(input_server_name,ip)
-- 判断IP是否重复的时间限定范围是请求的当前时间+24小时
local ipc = 0
local ip_token = input_server_name..'_'..ip
if not cache:get(ip_token) then
ipc = 1
cache:set(ip_token,1, get_end_time())
end
return ipc
end
local function statistics_request(ip, is_spider,body_length)
-- 计算pv uv
local pvc = 0
local uvc = 0
if not is_spider and method == 'GET' and ngx.status == 200 and body_length > 512 then
local ua = ''
if request_header['user-agent'] then
ua = string.lower(request_header['user-agent'])
end
out_header = ngx.resp.get_headers()
if out_header['content-type'] then
if string.find(out_header['content-type'],'text/html', 1, true) then
pvc = 1
if request_header['user-agent'] then
if string.find(ua,'mozilla') then
local today = os.date("%Y-%m-%d")
local uv_token = ngx.md5(ip .. request_header['user-agent'] .. today)
if not cache:get(uv_token) then
uvc = 1
cache:set(uv_token,1, get_end_time())
end
end
end
end
end
end
return pvc, uvc
end
--------------------- exclude_func start --------------------------
local function load_global_exclude_ip()
local load_key = "global_exclude_ip_load"
@ -679,7 +176,7 @@ log_by_lua_block {
return true
end
else
if cache:get("global_exclude_ip_"..ip) then
if cache:get("global_exclude_ip_"..ip) then
-- D("*Excluded global ip:"..ip)
return true
end
@ -687,41 +184,14 @@ log_by_lua_block {
return false
end
--------------------- exclude_func end ---------------------------
local function statistics_uri(db, uri, uri_md5, body_length)
-- count the number of URI requests and traffic
local open_statistics_uri = config['global']["statistics_uri"]
if not open_statistics_uri then return true end
local stat_sql = nil
stat_sql = "INSERT INTO uri_stat(uri_md5,uri) SELECT \""..uri_md5.."\",\""..uri.."\" WHERE NOT EXISTS (SELECT uri_md5 FROM uri_stat WHERE uri_md5=\""..uri_md5.."\");"
local res, err = db:exec(stat_sql)
stat_sql = "UPDATE uri_stat SET "..day_column.."="..day_column.."+1,"..flow_column.."="..flow_column.."+"..body_length.." WHERE uri_md5=\""..uri_md5.."\""
local res, err = db:exec(stat_sql)
return true
end
local function statistics_ip(db, ip, body_length)
local open_statistics_ip = config['global']["statistics_ip"]
if not open_statistics_ip then return true end
local stat_sql = nil
stat_sql = "INSERT INTO ip_stat(ip) SELECT \""..ip.."\" WHERE NOT EXISTS (SELECT ip FROM ip_stat WHERE ip=\""..ip.."\");"
local res, err = db:exec(stat_sql)
stat_sql = "UPDATE ip_stat SET "..day_column.."="..day_column.."+1,"..flow_column.."="..flow_column.."+"..body_length.." WHERE ip=\""..ip.."\""
local res, err = db:exec(stat_sql)
return true
end
local function cache_logs()
local function cache_logs(server_name)
-- make new id
local new_id = get_last_id(server_name)
local new_id = C:get_last_id(server_name)
local excluded = false
local ip = get_client_ip_bylog()
local ip = C:get_client_ip()
excluded = filter_status() or exclude_extension() or exclude_url() or exclude_ip(server_name, ip)
local ip_list = request_header["x-forwarded-for"]
@ -737,17 +207,17 @@ log_by_lua_block {
end
-- local request_time = ngx.var.request_time
local request_time = get_request_time()
local request_time = C:get_request_time()
local client_port = ngx.var.remote_port
local real_server_name = server_name
local uri = ngx.var.uri
local status_code = ngx.status
local protocol = ngx.var.server_protocol
local request_uri = ngx.var.request_uri
local time_key = get_store_key()
local time_key = C:get_store_key()
local method = ngx.req.get_method()
local body_length = get_length()
local domain = get_domain()
local body_length = C:get_length()
local domain = C:get_domain()
local referer = ngx.var.http_referer
local kv = {
@ -774,15 +244,17 @@ log_by_lua_block {
client_port=client_port
}
-- C:D(json.encode(kv))
local request_stat_fields = "req=req+1,length=length+"..body_length
local spider_stat_fields = "x"
local client_stat_fields = "x"
if not excluded then
if status_code == 500 or (method=="POST" and config["record_post_args"] == true) or (status_code==403 and config["record_get_403_args"]==true) then
if status_code == 500 or (method=="POST" and config["record_post_args"] == true) or (status_code==403 and config["record_get_403_args"] == true) then
local data = ""
local ok, err = pcall(function() data=get_http_original() end)
local ok, err = pcall(function() data = C:get_http_origin() end)
if ok and not err then
kv["request_headers"] = data
end
@ -805,15 +277,16 @@ log_by_lua_block {
local pvc = 0
local uvc = 0
is_spider, request_spider, spider_index = match_spider(ip)
local is_spider, request_spider, spider_index = C:match_spider(kv['user_agent'])
if not is_spider then
client_stat_fields = match_client()
client_stat_fields = C:match_client(kv['user_agent'])
if not client_stat_fields or #client_stat_fields == 0 then
client_stat_fields = request_stat_fields..",other=other+1"
end
pvc, uvc = statistics_request(ip, is_spider,body_length)
ipc = statistics_ipc(server_name,ip)
pvc, uvc = C:statistics_request(ip, is_spider,body_length)
ipc = C:statistics_ipc(server_name,ip)
else
kv["is_spider"] = spider_index
local field = "spider"
@ -846,13 +319,13 @@ log_by_lua_block {
-- C:D("ddd")
cache_set(server_name, new_id, "stat_fields", stat_fields)
-- cache_set(server_name, new_id, "log_kv", json.encode(kv))
cache_set(server_name, new_id, "log_kv", json.encode(kv))
for i,v in pairs(kv) do
cache_set(server_name, new_id, tostring(i), tostring(v))
-- C:D("kv:"..tostring(i)..":"..tostring(v))
end
-- for i,v in pairs(kv) do
-- cache_set(server_name, new_id, tostring(i), tostring(v))
-- C:D("kv:"..tostring(i)..":"..tostring(v))
-- end
end
@ -889,7 +362,7 @@ log_by_lua_block {
-- D("Log stat fields is nil.")
-- D("Logdata:"..logvalue)
else
stat_fields = split_bylog(stat_fields, ";")
stat_fields = C:split(stat_fields, ";")
request_stat_fields = stat_fields[1]
client_stat_fields = stat_fields[2]
spider_stat_fields = stat_fields[3]
@ -933,27 +406,27 @@ log_by_lua_block {
end
stmt:reset()
-- D("store_logs_line ok")
update_stat( db, "client_stat", time_key, client_stat_fields)
update_stat( db, "spider_stat", time_key, spider_stat_fields)
C:update_stat( db, "client_stat", time_key, client_stat_fields)
C:update_stat( db, "spider_stat", time_key, spider_stat_fields)
-- D("stat ok")
-- only count non spider requests
local ok, err = pcall(function() statistics_uri(db, request_uri, ngx.md5(request_uri), body_length) end)
local ok, err = pcall(function() statistics_ip(db, ip, body_length) end)
local ok, err = pcall(function() C:statistics_uri(db, request_uri, ngx.md5(request_uri), body_length) end)
local ok, err = pcall(function() C:statistics_ip(db, ip, body_length) end)
end
update_stat( db, "request_stat", time_key, request_stat_fields)
C:update_stat( db, "request_stat", time_key, request_stat_fields)
return true
end
local function store_logs(input_server_name)
if is_migrating(input_server_name) == true then
local function store_logs(input_sn)
if C:is_migrating(input_sn) == true then
-- D("migrating...")
return
end
local last_insert_id_key = input_server_name.."_last_id"
local store_start_id_key = input_server_name.."_store_start"
local last_insert_id_key = input_sn.."_last_id"
local store_start_id_key = input_sn.."_store_start"
local last_id = cache:get(last_insert_id_key)
local store_start = cache:get(store_start_id_key)
if store_start == nil then
@ -965,15 +438,15 @@ log_by_lua_block {
end
local worker_id = ngx.worker.id()
if is_working(input_server_name) then
if C:is_working(input_sn) then
-- D("other workers are being stored, please store later.")
-- cache:delete(flush_data_key)
return true
end
lock_working(input_server_name)
C:lock_working(input_sn)
local log_dir = "{$SERVER_APP}/logs"
local db_path = log_dir .. '/' .. input_server_name .. "/logs.db"
local db_path = log_dir .. '/' .. input_sn .. "/logs.db"
local db, err = sqlite3.open(db_path)
if tostring(err) ~= 'nil' then
@ -1006,7 +479,7 @@ log_by_lua_block {
status, errorString = db:exec([[BEGIN TRANSACTION]])
update_day = load_update_day(input_server_name)
update_day = C:load_update_day(input_sn)
if not update_day or update_day ~= today then
local update_sql = "UPDATE uri_stat SET "..day_column.."=0,"..flow_column.."=0"
@ -1014,15 +487,15 @@ log_by_lua_block {
update_sql = "UPDATE ip_stat SET "..day_column.."=0,"..flow_column.."=0"
status, errorString = db:exec(update_sql)
write_update_day(input_server_name)
C:write_update_day(input_sn)
end
if store_end >= store_start then
for i=store_start, store_end, 1 do
-- D("store_start:"..store_start..":store_end:".. store_end)
if store_logs_line(db, stmt2, input_server_name, i) then
cache_clear(input_server_name, i, "log_kv")
cache_clear(input_server_name, i, "stat_fields")
if store_logs_line(db, stmt2, input_sn, i) then
cache_clear(input_sn, i, "log_kv")
cache_clear(input_sn, i, "stat_fields")
end
end
end
@ -1045,24 +518,18 @@ log_by_lua_block {
end
cache:set(store_start_id_key, store_end+1)
unlock_working(input_server_name)
C:unlock_working(input_sn)
end
local function run_app()
-- D("------------ debug start ------------")
init_var()
local c_name = ngx.var.server_name
server_name = string.gsub(get_server_name(c_name),'_','.')
get_auto_config(server_name)
-- D("server_name:"..server_name)
load_global_exclude_ip()
load_exclude_ip(server_name)
cache_logs()
-- store_logs(server_name)
cache_logs(server_name)
store_logs(server_name)
-- D("------------ debug end -------------")
end

@ -21,4 +21,9 @@ fi
# test
# $RUN_CMD simple.lua
$RUN_CMD test_today.lua
# $RUN_CMD test_today.lua
# $RUN_CMD test_time.lua
# $RUN_CMD test_ngx_find.lua
$RUN_CMD test_match_spider.lua

@ -0,0 +1,106 @@
local function target()
ngx.re.find("hello, world.", [[\w+\.]], "jo")
end
for i = 1, 100 do
target()
end
-- 以上为预热操作
collectgarbage()
local function match_spider(ua)
-- 匹配蜘蛛请求
local is_spider = false
local spider_name = ""
local spider_match = ""
local spider_table = {
["baidu"] = 1, -- check
["bing"] = 2, -- check
["qh360"] = 3, -- check
["google"] = 4,
["bytes"] = 5, -- check
["sogou"] = 6, -- check
["youdao"] = 7,
["soso"] = 8,
["dnspod"] = 9,
["yandex"] = 10,
["yisou"] = 11,
["other"] = 12,
["mpcrawler"] = 13,
["yahoo"] = 14, -- check
["duckduckgo"] = 15
}
local find_spider, _ = ngx.re.match(ua, "(Baiduspider|Bytespider|360Spider|Sogou web spider|Sosospider|Googlebot|bingbot|AdsBot-Google|Google-Adwords|YoudaoBot|Yandex|DNSPod-Monitor|YisouSpider|mpcrawler)", "ijo")
if find_spider then
is_spider = true
spider_match = string.lower(find_spider[0])
if string.find(spider_match, "baidu", 1, true) then
spider_name = "baidu"
elseif string.find(spider_match, "bytes", 1, true) then
spider_name = "bytes"
elseif string.find(spider_match, "360", 1, true) then
spider_name = "qh360"
elseif string.find(spider_match, "sogou", 1, true) then
spider_name = "sogou"
elseif string.find(spider_match, "soso", 1, true) then
spider_name = "soso"
elseif string.find(spider_match, "google", 1, true) then
spider_name = "google"
elseif string.find(spider_match, "bingbot", 1, true) then
spider_name = "bing"
elseif string.find(spider_match, "youdao", 1, true) then
spider_name = "youdao"
elseif string.find(spider_match, "dnspod", 1, true) then
spider_name = "dnspod"
elseif string.find(spider_match, "yandex", 1, true) then
spider_name = "yandex"
elseif string.find(spider_match, "yisou", 1, true) then
spider_name = "yisou"
elseif string.find(spider_match, "mpcrawler", 1, true) then
spider_name = "mpcrawler"
end
end
if is_spider then
return is_spider, spider_name, spider_table[spider_name]
end
-- Curl|Yahoo|HeadlessChrome|包含bot|Wget|Spider|Crawler|Scrapy|zgrab|python|java|Adsbot|DuckDuckGo
find_spider, _ = ngx.re.match(ua, "(Yahoo|Slurp|DuckDuckGo)", "ijo")
if res then
spider_match = string.lower(find_spider[0])
if string.find(spider_match, "yahoo", 1, true) then
spider_name = "yahoo"
elseif string.find(spider_match, "slurp", 1, true) then
spider_name = "yahoo"
elseif string.find(spider_match, "duckduckgo", 1, true) then
spider_name = "duckduckgo"
end
return true, spider_name, spider_table[spider_name]
end
return false, "", 0
end
-- local is_spider, request_spider, spider_index = match_spider("Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)")
-- ngx.say(is_spider,request_spider, spider_index)
ngx.update_time()
local begin = ngx.now()
local N = 1e6
for i = 1, N do
match_spider("Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)")
end
ngx.update_time()
ngx.say("match_spider elapsed: ", (ngx.now() - begin) / N)

@ -0,0 +1,35 @@
local function target()
ngx.re.find("hello, world.", [[\w+\.]], "jo")
end
for i = 1, 100 do
target()
end
-- 以上为预热操作
collectgarbage()
local spider_match = "aa 220"
ngx.update_time()
local begin = ngx.now()
local N = 1e7
for i = 1, N do
ngx.re.find(spider_match, "360", "ijo")
end
ngx.update_time()
ngx.say("ngx.re.find elapsed: ", (ngx.now() - begin) / N)
ngx.update_time()
local begin = ngx.now()
local N = 1e7
for i = 1, N do
string.find(spider_match, "360", 1, true)
end
ngx.update_time()
ngx.say("string.find elapsed: ", (ngx.now() - begin) / N)

@ -0,0 +1,118 @@
local function target()
ngx.re.find("hello, world.", [[\w+\.]], "jo")
end
for i = 1, 100 do
target()
end
-- 以上为预热操作
collectgarbage()
local function get_store_key()
return os.date("%Y%m%d%H", os.time())
end
local function get_store_key2()
return os.date("%Y%m%d%H", ngx.time())
end
local function get_end_time()
local s_time = os.time()
local n_date = os.date("*t",s_time + 86400)
n_date.hour = 0
n_date.min = 0
n_date.sec = 0
local d_time = os.time(n_date)
return d_time - s_time
end
local function get_end_time2()
local s_time = ngx.time()
local n_date = os.date("*t",s_time + 86400)
n_date.hour = 0
n_date.min = 0
n_date.sec = 0
local d_time = ngx.time(n_date)
return d_time - s_time
end
local function get_update_field(field, value)
return field.."="..field.."+"..value
end
local function get_update_field2(field, value)
return field.."="..field.."+"..tostring(value)
end
ngx.update_time()
local begin = ngx.now()
local N = 1e3
for i = 1, N do
get_store_key()
end
ngx.update_time()
ngx.say("get_store_key elapsed: ", (ngx.now() - begin) / N)
ngx.update_time()
local begin = ngx.now()
local N = 1e3
for i = 1, N do
get_store_key2()
end
ngx.update_time()
ngx.say("get_store_key2 elapsed: ", (ngx.now() - begin) / N)
ngx.update_time()
local begin = ngx.now()
local N = 1e5
for i = 1, N do
get_end_time()
end
ngx.update_time()
ngx.say("get_end_time elapsed: ", (ngx.now() - begin) / N)
ngx.update_time()
local begin = ngx.now()
local N = 1e5
for i = 1, N do
get_end_time2()
end
ngx.update_time()
ngx.say("get_end_time2 elapsed: ", (ngx.now() - begin) / N)
ngx.update_time()
local begin = ngx.now()
local N = 1e9
for i = 1, N do
get_update_field("ss","1")
end
ngx.update_time()
ngx.say("get_update_field elapsed: ", (ngx.now() - begin) / N)
ngx.update_time()
local begin = ngx.now()
local N = 1e9
for i = 1, N do
get_update_field2("ss",1)
end
ngx.update_time()
ngx.say("get_update_field2 elapsed: ", (ngx.now() - begin) / N)
Loading…
Cancel
Save