diff --git a/plugins/webstats/lua/webstats_common.lua b/plugins/webstats/lua/webstats_common.lua index 47d81eede..40899c78c 100644 --- a/plugins/webstats/lua/webstats_common.lua +++ b/plugins/webstats/lua/webstats_common.lua @@ -10,14 +10,24 @@ local sites = require "webstats_sites" local debug_mode = true local total_key = "log_kv_total" + +local unset_server_name = "unset" +local max_log_id = 99999999999999 local cache = ngx.shared.mw_total + local today = ngx.re.gsub(ngx.today(),'-','') +local request_header = ngx.req.get_headers() +local method = ngx.req.get_headers() + local day = os.date("%d") local number_day = tonumber(day) local day_column = "day"..number_day local flow_column = "flow"..number_day local spider_column = "spider_flow"..number_day +-- _M.setInputSn | need +local auto_config = nil + local log_dir = "{$SERVER_APP}/logs" function _M.new(self) @@ -71,12 +81,132 @@ function _M.setParams( self, params ) self.params = params end +function _M.setInputSn(self, input_sn) + local global_config = config["global"] + if config[input_sn] == nil then + auto_config = global_config + else + auto_config = config[site] + for k, v in pairs(global_config) do + if auto_config[k] == nil then + auto_config[k] = v + end + end + end + return auto_config +end + +function _M.get_domain(self) + local domain = ngx.req.get_headers()['host'] + if domain ~= nil then + domain = ngx.re.gsub(domain, "_", ".") + else + domain = "unknown" + end + return domain +end + function _M.split(self, str, reps) local arr = {} string.gsub(str,'[^'..reps..']+',function(w) table.insert(arr,w) end) return arr end +function _M.arrlen(self, arr) + if not arr then return 0 end + local count = 0 + for _,v in ipairs(arr) do + count = count + 1 + end + return count +end + +function _M.is_ipaddr(self, client_ip) + local cipn = self:split(client_ip,'.') + if self:arrlen(cipn) < 4 then return false end + for _,v in ipairs({1,2,3,4}) + do + local ipv = tonumber(cipn[v]) + if ipv == nil then return false end + if ipv > 255 or ipv < 0 then return false end + end + return true +end + + +function _M.get_sn(self, input_sn) + local dst_name = cache:get(input_sn) + if dst_name then return dst_name end + + -- self:D(json.encode(sites)) + for _,v in ipairs(sites) + do + if input_sn == v["name"] then + cache:set(input_sn, v['name'], 86400) + return v["name"] + end + + for _,dst_domain in ipairs(v['domains']) + do + if input_sn == dst_domain then + cache:set(input_sn, v['name'], 86400) + return v['name'] + elseif string.find(dst_domain, "*") then + local new_domain = string.gsub(dst_domain, '*', '.*') + if string.find(input_sn, new_domain) then + dst_domain = v['name'] + cache:set(input_sn, dst_domain, 86400) + end + end + end + end + + cache:set(input_sn, unset_server_name, 86400) + return unset_server_name +end + + +function _M.get_store_key(self) + return os.date("%Y%m%d%H", ngx.time()) +end + +function _M.get_length(self) + local clen = ngx.var.body_bytes_sent + if clen == nil then clen = 0 end + return tonumber(clen) +end + +function _M.get_last_id(self, input_sn) + local last_insert_id_key = input_sn .. "_last_id" + local new_id, err = cache:incr(last_insert_id_key, 1, 0) + cache:incr(cache_count_id_key, 1, 0) + if new_id >= max_log_id then + cache:set(last_insert_id_key, 1) + new_id = cache:get(last_insert_id_key) + end + return new_id +end + +function _M.get_http_origin(self) + local data = "" + local headers = request_header + if not headers then return data end + if method ~='GET' then + data = ngx.req.get_body_data() + if not data then + data = ngx.req.get_post_args(1000000) + end + if "string" == type(data) then + headers["payload"] = data + end + + if "table" == type(data) then + headers = table.concat(headers, data) + end + end + return json.encode(headers) +end + -- 后台任务 function _M.cron(self) local timer_every_get_data = function (premature) @@ -296,6 +426,49 @@ function _M.store_logs_line(self, db, stmt, input_sn, info) return true end +function _M.statistics_ipc(self, input_sn, ip) + -- 判断IP是否重复的时间限定范围是请求的当前时间+24小时 + local ipc = 0 + local ip_token = input_sn..'_'..ip + if not cache:get(ip_token) then + ipc = 1 + cache:set(ip_token,1, self:get_end_time()) + end + return ipc +end + +function _M.statistics_request(self, ip, is_spider, body_length) + -- 计算pv uv + local pvc = 0 + local uvc = 0 + + if not is_spider and method == 'GET' and ngx.status == 200 and body_length > 512 then + local ua = '' + if request_header['user-agent'] then + ua = string.lower(request_header['user-agent']) + end + + out_header = ngx.resp.get_headers() + if out_header['content-type'] then + if string.find(out_header['content-type'],'text/html', 1, true) then + pvc = 1 + if request_header['user-agent'] then + if string.find(ua,'mozilla') then + local today = os.date("%Y-%m-%d") + local uv_token = ngx.md5(ip .. request_header['user-agent'] .. today) + if not cache:get(uv_token) then + uvc = 1 + cache:set(uv_token,1, self:get_end_time()) + end + end + end + end + end + end + return pvc, uvc +end + +--------------------- db start --------------------------- function _M.statistics_uri(self, db, uri, uri_md5, body_length) -- count the number of URI requests and traffic local open_statistics_uri = config['global']["statistics_uri"] @@ -324,6 +497,8 @@ function _M.statistics_ip(self, db, ip, body_length) end + + function _M.update_stat(self,db, stat_table, key, columns) -- 根据指定表名,更新统计数据 if not columns then return end @@ -333,10 +508,9 @@ function _M.update_stat(self,db, stat_table, key, columns) stmt:finalize() local update_sql = "UPDATE ".. stat_table .. " SET " .. columns update_sql = update_sql .. " WHERE time=" .. key - status, errorString = db:exec(update_sql) + return db:exec(update_sql) end - - +--------------------- db end --------------------------- -- debug func function _M.D(self,msg) @@ -427,5 +601,234 @@ function _M.rpop(self) end +function _M.get_update_field(self, field, value) + return field.."="..field.."+"..tostring(value) +end + +function _M.get_request_time(self) + local request_time = math.floor((ngx.now() - ngx.req.start_time()) * 1000) + if request_time == 0 then request_time = 1 end + return request_time +end + + +function _M.get_end_time(self) + local s_time = ngx.time() + local n_date = os.date("*t",s_time + 86400) + n_date.hour = 0 + n_date.min = 0 + n_date.sec = 0 + local d_time = ngx.time(n_date) + return d_time - s_time +end + + +function _M.match_spider(self, ua) + -- 匹配蜘蛛请求 + local is_spider = false + local spider_name = "" + local spider_match = "" + + local spider_table = { + ["baidu"] = 1, -- check + ["bing"] = 2, -- check + ["qh360"] = 3, -- check + ["google"] = 4, + ["bytes"] = 5, -- check + ["sogou"] = 6, -- check + ["youdao"] = 7, + ["soso"] = 8, + ["dnspod"] = 9, + ["yandex"] = 10, + ["yisou"] = 11, + ["other"] = 12, + ["mpcrawler"] = 13, + ["yahoo"] = 14, -- check + ["duckduckgo"] = 15 + } + + local find_spider, _ = ngx.re.match(ua, "(Baiduspider|Bytespider|360Spider|Sogou web spider|Sosospider|Googlebot|bingbot|AdsBot-Google|Google-Adwords|YoudaoBot|Yandex|DNSPod-Monitor|YisouSpider|mpcrawler)", "ijo") + if find_spider then + is_spider = true + spider_match = string.lower(find_spider[0]) + if string.find(spider_match, "baidu", 1, true) then + spider_name = "baidu" + elseif string.find(spider_match, "bytes", 1, true) then + spider_name = "bytes" + elseif string.find(spider_match, "360", 1, true) then + spider_name = "qh360" + elseif string.find(spider_match, "sogou", 1, true) then + spider_name = "sogou" + elseif string.find(spider_match, "soso", 1, true) then + spider_name = "soso" + elseif string.find(spider_match, "google", 1, true) then + spider_name = "google" + elseif string.find(spider_match, "bingbot", 1, true) then + spider_name = "bing" + elseif string.find(spider_match, "youdao", 1, true) then + spider_name = "youdao" + elseif string.find(spider_match, "dnspod", 1, true) then + spider_name = "dnspod" + elseif string.find(spider_match, "yandex", 1, true) then + spider_name = "yandex" + elseif string.find(spider_match, "yisou", 1, true) then + spider_name = "yisou" + elseif string.find(spider_match, "mpcrawler", 1, true) then + spider_name = "mpcrawler" + end + end + + if is_spider then + return is_spider, spider_name, spider_table[spider_name] + end + + -- Curl|Yahoo|HeadlessChrome|包含bot|Wget|Spider|Crawler|Scrapy|zgrab|python|java|Adsbot|DuckDuckGo + find_spider, _ = ngx.re.match(ua, "(Yahoo|Slurp|DuckDuckGo)", "ijo") + if res then + spider_match = string.lower(find_spider[0]) + if string.find(spider_match, "yahoo", 1, true) then + spider_name = "yahoo" + elseif string.find(spider_match, "slurp", 1, true) then + spider_name = "yahoo" + elseif string.find(spider_match, "duckduckgo", 1, true) then + spider_name = "duckduckgo" + end + return true, spider_name, spider_table[spider_name] + end + return false, "", 0 +end + + +function _M.match_client(self, ua) + local client_stat_fields = "" + + if not ua then + return client_stat_fields + end + + local clients_map = { + ["android"] = "android", + ["iphone"] = "iphone", + ["ipod"] = "iphone", + ["ipad"] = "iphone", + ["firefox"] = "firefox", + ["msie"] = "msie", + ["trident"] = "msie", + ["360se"] = "qh360", + ["360ee"] = "qh360", + ["360browser"] = "qh360", + ["qihoo"] = "qh360", + ["the world"] = "theworld", + ["theworld"] = "theworld", + ["tencenttraveler"] = "tt", + ["maxthon"] = "maxthon", + ["opera"] = "opera", + ["qqbrowser"] = "qq", + ["ucweb"] = "uc", + ["ubrowser"] = "uc", + ["safari"] = "safari", + ["chrome"] = "chrome", + ["metasr"] = "metasr", + ["2345explorer"] = "pc2345", + ["edge"] = "edeg", + ["edg"] = "edeg", + ["windows"] = "windows", + ["linux"] = "linux", + ["macintosh"] = "mac", + ["mobile"] = "mobile" + } + local mobile_regx = "(Mobile|Android|iPhone|iPod|iPad)" + local mobile_res = ngx.re.match(ua, mobile_regx, "ijo") + --mobile + if mobile_res then + client_stat_fields = client_stat_fields..","..self:get_update_field("mobile", 1) + mobile_res = string.lower(mobile_res[0]) + if mobile_res ~= "mobile" then + client_stat_fields = client_stat_fields..","..self:get_update_field(clients_map[mobile_res], 1) + end + else + --pc + -- 匹配结果的顺序,与ua中关键词的顺序有关 + -- lua的正则不支持|语法 + -- 短字符串string.find效率要比ngx正则高 + local pc_regx1 = "(360SE|360EE|360browser|Qihoo|TheWorld|TencentTraveler|Maxthon|Opera|QQBrowser|UCWEB|UBrowser|MetaSr|2345Explorer|Edg[e]*)" + local pc_res = ngx.re.match(ua, pc_regx1, "ijo") + local cls_pc = nil + if not pc_res then + if ngx.re.find(ua, "[Ff]irefox") then + cls_pc = "firefox" + elseif string.find(ua, "MSIE") or string.find(ua, "Trident") then + cls_pc = "msie" + elseif string.find(ua, "[Cc]hrome") then + cls_pc = "chrome" + elseif string.find(ua, "[Ss]afari") then + cls_pc = "safari" + end + else + cls_pc = string.lower(pc_res[0]) + end + -- D("UA:"..ua) + -- D("PC cls:"..tostring(cls_pc)) + if cls_pc then + client_stat_fields = client_stat_fields..","..self:get_update_field(clients_map[cls_pc], 1) + else + -- machine and other + local machine_res, err = ngx.re.match(ua, "(ApacheBench|[Cc]url|HeadlessChrome|[a-zA-Z]+[Bb]ot|[Ww]get|[Ss]pider|[Cc]rawler|[Ss]crapy|zgrab|[Pp]ython|java)", "ijo") + if machine_res then + client_stat_fields = client_stat_fields..","..self:get_update_field("machine", 1) + else + -- 移动端+PC端+机器以外 归类到 其他 + client_stat_fields = client_stat_fields..","..self:get_update_field("other", 1) + end + end + + local os_regx = "(Windows|Linux|Macintosh)" + local os_res = ngx.re.match(ua, os_regx, "ijo") + if os_res then + os_res = string.lower(os_res[0]) + client_stat_fields = client_stat_fields..","..self:get_update_field(clients_map[os_res], 1) + end + end + + local other_regx = "MicroMessenger" + local other_res = ngx.re.find(ua, other_regx) + if other_res then + client_stat_fields = client_stat_fields..","..self:get_update_field("weixin", 1) + end + if client_stat_fields then + client_stat_fields = string.sub(client_stat_fields, 2) + end + return client_stat_fields +end + +function _M.get_client_ip(self) + local client_ip = "unknown" + local cdn = auto_config['cdn'] + if cdn == true then + for _,v in ipairs(auto_config['cdn_headers']) do + if request_header[v] ~= nil and request_header[v] ~= "" then + local ip_list = request_header[v] + client_ip = self:split(ip_list,',')[1] + break; + end + end + end + + -- ipv6 + if type(client_ip) == 'table' then client_ip = "" end + if client_ip ~= "unknown" and ngx.re.match(client_ip,"^([a-fA-F0-9]*):") then + return client_ip + end + + -- ipv4 + if not ngx.re.match(client_ip,"\\d+\\.\\d+\\.\\d+\\.\\d+") == nil or not self:is_ipaddr(client_ip) then + client_ip = ngx.var.remote_addr + if client_ip == nil then + client_ip = "unknown" + end + end + + return client_ip +end return _M \ No newline at end of file diff --git a/plugins/webstats/lua/webstats_log.lua b/plugins/webstats/lua/webstats_log.lua index 5c25ccd91..109bcc59f 100644 --- a/plugins/webstats/lua/webstats_log.lua +++ b/plugins/webstats/lua/webstats_log.lua @@ -9,11 +9,8 @@ log_by_lua_block { end local ver = '0.2.0' - local max_log_id = 99999999999999 local debug_mode = true - local unset_server_name = "unset" - local __C = require "webstats_common" local C = __C:getInstance() @@ -46,527 +43,27 @@ log_by_lua_block { local config = require "webstats_config" local sites = require "webstats_sites" + local server_name = string.gsub(C:get_sn(ngx.var.server_name),'_','.') + C:setConfData(config, sites) - local server_name - local request_header - local method - - local auto_config + local auto_config = C:setInputSn(server_name) + + local request_header = ngx.req.get_method() + local method = ngx.req.get_headers() local excluded - local day - local today - local number_day - local day_column - local flow_column - local spider_column + local day = os.date("%d") + local number_day = tonumber(day) + local day_column = "day"..number_day + local flow_column = "flow"..number_day + local spider_column = "spider_flow"..number_day --- default common var end --- - - local function to_json(msg) - return json.encode(msg) - end - local function init_var() - - request_header = ngx.req.get_headers() - method = ngx.req.get_method() - - day = os.date("%d") - -- today = os.date("%Y%m%d") - today = ngx.re.gsub(ngx.today(),'-','') - - number_day = tonumber(day) - day_column = "day"..number_day - flow_column = "flow"..number_day - spider_column = "spider_flow"..number_day - end - - local function get_auto_config(site) - local config_data = config - local global_config = config_data["global"] - if config_data[site] == nil then - auto_config = global_config - else - auto_config = config_data[site] - for k, v in pairs(global_config) do - if auto_config[k] == nil then - auto_config[k] = v - end - end - end - return auto_config - end - - - local function get_store_key() - return os.date("%Y%m%d%H", os.time()) - end - - local function get_length() - local clen = ngx.var.body_bytes_sent - if clen == nil then clen = 0 end - return tonumber(clen) - end - - local function get_domain() - local domain = request_header['host'] - if domain ~= nil then - domain = string.gsub(domain, "_", ".") - else - domain = "unknown" - end - return domain - end - - local function write_file_bylog(filename,body,mode) - local fp = io.open(filename,mode) - if fp == nil then - return nil - end - fp:write(body) - fp:flush() - fp:close() - return true - end - - local function read_file_body_bylog(filename) - local fp = io.open(filename,'rb') - if not fp then - return nil - end - fbody = fp:read("*a") - fp:close() - if fbody == '' then - return nil - end - return fbody - end - - local function load_update_day(input_server_name) - local _file = "{$SERVER_APP}/logs/"..input_server_name.."/update_day.log" - return read_file_body_bylog(_file) - end - - local function write_update_day(input_server_name) - local update_day = today - local _file = "{$SERVER_APP}/logs/"..input_server_name.."/update_day.log" - write_file_bylog(_file, update_day, "w") - end - - local function arrlen_bylog(arr) - if not arr then return 0 end - count = 0 - for _,v in ipairs(arr) do - count = count + 1 - end - return count - end - - local function split_bylog(str,reps ) - local resultStrList = {} - string.gsub(str,'[^'..reps..']+',function(w) table.insert(resultStrList,w) end) - return resultStrList - end - - local function is_ipaddr_bylog(client_ip) - local cipn = split_bylog(client_ip,'.') - if arrlen_bylog(cipn) < 4 then return false end - for _,v in ipairs({1,2,3,4}) - do - local ipv = tonumber(cipn[v]) - if ipv == nil then return false end - if ipv > 255 or ipv < 0 then return false end - end return true end - local function get_client_ip_bylog() - local client_ip = "unknown" - local cdn = auto_config['cdn'] - if cdn == true then - for _,v in ipairs(auto_config['cdn_headers']) do - if request_header[v] ~= nil and request_header[v] ~= "" then - local ip_list = request_header[v] - client_ip = split_bylog(ip_list,',')[1] - break; - end - end - end - - -- ipv6 - if type(client_ip) == 'table' then client_ip = "" end - if client_ip ~= "unknown" and ngx.re.match(client_ip,"^([a-fA-F0-9]*):") then - return client_ip - end - - -- ipv4 - if not ngx.re.match(client_ip,"\\d+\\.\\d+\\.\\d+\\.\\d+") == nil or not is_ipaddr_bylog(client_ip) then - client_ip = ngx.var.remote_addr - if client_ip == nil then - client_ip = "unknown" - end - end - - return client_ip - end - - local function get_last_id(input_server_name) - local last_insert_id_key = input_server_name .. "_last_id" - new_id, err = cache:incr(last_insert_id_key, 1, 0) - cache:incr(cache_count_id_key, 1, 0) - if new_id >= max_log_id then - cache:set(last_insert_id_key, 1) - new_id = cache:get(last_insert_id_key) - end - return new_id - end - - local function get_request_time() - local request_time = math.floor((ngx.now() - ngx.req.start_time()) * 1000) - if request_time == 0 then request_time = 1 end - return request_time - end - - local function get_end_time() - local s_time = os.time() - local n_date = os.date("*t",s_time + 86400) - n_date.hour = 0 - n_date.min = 0 - n_date.sec = 0 - d_time = os.time(n_date) - return d_time - s_time - end - - local function get_http_original() - local data = "" - local headers = request_header - if not headers then return data end - if method ~='GET' then - data = ngx.req.get_body_data() - if not data then - data = ngx.req.get_post_args(1000000) - end - if "string" == type(data) then - headers["payload"] = data - end - - if "table" == type(data) then - headers = table.concat(headers, data) - end - end - return json.encode(headers) - end - - local function is_migrating(input_server_name) - local file = io.open("{$SERVER_APP}/migrating", "rb") - if file then return true end - local file = io.open("{$SERVER_APP}/logs/"..input_server_name.."/migrating", "rb") - if file then return true end - return false - end - - - local function is_working(name) - local work_status = cache:get(name.."_working") - if work_status ~= nil and work_status == true then - return true - end - return false - end - - local function lock_working(name) - local working_key = name.."_working" - cache:set(working_key, true, 60) - end - - local function unlock_working(name) - local working_key = name.."_working" - cache:set(working_key, false) - end - - - local function get_server_name(c_name) - local my_name = cache:get(c_name) - if my_name then return my_name end - local determined_name = nil - for _,v in ipairs(sites) - do - if c_name == v["name"] then - cache:set(c_name, v['name'],86400) - return v["name"] - end - for _,d_name in ipairs(v['domains']) - do - if c_name == d_name then - cache:set(c_name, v['name'], 86400) - return v['name'] - elseif string.find(d_name, "*") then - new_domain = string.gsub(d_name, '*', '.*') - if string.find(c_name, new_domain) then - determined_name = v['name'] - end - end - end - end - - if determined_name then - cache:set(c_name, determined_name,86400) - return determined_name - end - cache:set(c_name, unset_server_name, 86400) - return unset_server_name - end - - --------------------- db start --------------------------- - local function update_stat(db, stat_table, key, columns) - -- 根据指定表名,更新统计数据 - if not columns then return end - local stmt = db:prepare(string.format("INSERT INTO %s(time) SELECT :time WHERE NOT EXISTS(SELECT time FROM %s WHERE time=:time);", stat_table, stat_table)) - stmt:bind_names{time=key} - local res, err = stmt:step() - stmt:finalize() - local update_sql = "UPDATE ".. stat_table .. " SET " .. columns - update_sql = update_sql .. " WHERE time=" .. key - status, errorString = db:exec(update_sql) - end - - local function get_update_field(field, value) - return field.."="..field.."+"..tostring(value) - end - --------------------- db end --------------------------- - - - local function match_client() - -- 匹配客户端 - local ua = '' - if request_header['user-agent'] then - ua = request_header['user-agent'] - end - if not ua then - return false, nil - end - local client_stat_fields = "" - local clients_map = { - ["android"] = "android", - ["iphone"] = "iphone", - ["ipod"] = "iphone", - ["ipad"] = "iphone", - ["firefox"] = "firefox", - ["msie"] = "msie", - ["trident"] = "msie", - ["360se"] = "qh360", - ["360ee"] = "qh360", - ["360browser"] = "qh360", - ["qihoo"] = "qh360", - ["the world"] = "theworld", - ["theworld"] = "theworld", - ["tencenttraveler"] = "tt", - ["maxthon"] = "maxthon", - ["opera"] = "opera", - ["qqbrowser"] = "qq", - ["ucweb"] = "uc", - ["ubrowser"] = "uc", - ["safari"] = "safari", - ["chrome"] = "chrome", - ["metasr"] = "metasr", - ["2345explorer"] = "pc2345", - ["edge"] = "edeg", - ["edg"] = "edeg", - ["windows"] = "windows", - ["linux"] = "linux", - ["macintosh"] = "mac", - ["mobile"] = "mobile" - } - local mobile_regx = "(Mobile|Android|iPhone|iPod|iPad)" - local mobile_res = ngx.re.match(ua, mobile_regx, "ijo") - --mobile - if mobile_res then - client_stat_fields = client_stat_fields..","..get_update_field("mobile", 1) - mobile_res = string.lower(mobile_res[0]) - if mobile_res ~= "mobile" then - client_stat_fields = client_stat_fields..","..get_update_field(clients_map[mobile_res], 1) - end - else - --pc - -- 匹配结果的顺序,与ua中关键词的顺序有关 - -- lua的正则不支持|语法 - -- 短字符串string.find效率要比ngx正则高 - local pc_regx1 = "(360SE|360EE|360browser|Qihoo|TheWorld|TencentTraveler|Maxthon|Opera|QQBrowser|UCWEB|UBrowser|MetaSr|2345Explorer|Edg[e]*)" - local pc_res = ngx.re.match(ua, pc_regx1, "ijo") - local cls_pc = nil - if not pc_res then - if string.find(ua, "[Ff]irefox") then - cls_pc = "firefox" - elseif string.find(ua, "MSIE") or string.find(ua, "Trident") then - cls_pc = "msie" - elseif string.find(ua, "[Cc]hrome") then - cls_pc = "chrome" - elseif string.find(ua, "[Ss]afari") then - cls_pc = "safari" - end - else - cls_pc = string.lower(pc_res[0]) - end - -- D("UA:"..ua) - -- D("PC cls:"..tostring(cls_pc)) - if cls_pc then - client_stat_fields = client_stat_fields..","..get_update_field(clients_map[cls_pc], 1) - else - -- machine and other - local machine_res, err = ngx.re.match(ua, "(ApacheBench|[Cc]url|HeadlessChrome|[a-zA-Z]+[Bb]ot|[Ww]get|[Ss]pider|[Cc]rawler|[Ss]crapy|zgrab|[Pp]ython|java)", "ijo") - if machine_res then - client_stat_fields = client_stat_fields..","..get_update_field("machine", 1) - else - -- 移动端+PC端+机器以外 归类到 其他 - client_stat_fields = client_stat_fields..","..get_update_field("other", 1) - end - end - - local os_regx = "(Windows|Linux|Macintosh)" - local os_res = ngx.re.match(ua, os_regx, "ijo") - if os_res then - os_res = string.lower(os_res[0]) - client_stat_fields = client_stat_fields..","..get_update_field(clients_map[os_res], 1) - end - end - - local other_regx = "MicroMessenger" - local other_res = string.find(ua, other_regx) - if other_res then - client_stat_fields = client_stat_fields..","..get_update_field("weixin", 1) - end - if client_stat_fields then - client_stat_fields = string.sub(client_stat_fields, 2) - end - return client_stat_fields - end - - local function match_spider(client_ip) - -- 匹配蜘蛛请求 - local ua = '' - if request_header['user-agent'] then - ua = request_header['user-agent'] - end - if not ua then - return false, nil, 0 - end - local is_spider = false - local spider_name = nil - - local spider_table = { - ["baidu"] = 1, -- check - ["bing"] = 2, -- check - ["qh360"] = 3, -- check - ["google"] = 4, - ["bytes"] = 5, -- check - ["sogou"] = 6, -- check - ["youdao"] = 7, - ["soso"] = 8, - ["dnspod"] = 9, - ["yandex"] = 10, - ["yisou"] = 11, - ["other"] = 12, - ["mpcrawler"] = 13, - ["yahoo"] = 14, -- check - ["duckduckgo"] = 15 - } - - local res,err = ngx.re.match(ua, "(Baiduspider|Bytespider|360Spider|Sogou web spider|Sosospider|Googlebot|bingbot|AdsBot-Google|Google-Adwords|YoudaoBot|Yandex|DNSPod-Monitor|YisouSpider|mpcrawler)", "ijo") - check_res = true - if res then - is_spider = true - spider_match = string.lower(res[0]) - if string.find(spider_match, "baidu", 1, true) then - spider_name = "baidu" - elseif string.find(spider_match, "bytes", 1, true) then - spider_name = "bytes" - elseif string.find(spider_match, "360", 1, true) then - spider_name = "qh360" - elseif string.find(spider_match, "sogou", 1, true) then - spider_name = "sogou" - elseif string.find(spider_match, "soso", 1, true) then - spider_name = "soso" - elseif string.find(spider_match, "google", 1, true) then - spider_name = "google" - elseif string.find(spider_match, "bingbot", 1, true) then - spider_name = "bing" - elseif string.find(spider_match, "youdao", 1, true) then - spider_name = "youdao" - elseif string.find(spider_match, "dnspod", 1, true) then - spider_name = "dnspod" - elseif string.find(spider_match, "yandex", 1, true) then - spider_name = "yandex" - elseif string.find(spider_match, "yisou", 1, true) then - spider_name = "yisou" - elseif string.find(spider_match, "mpcrawler", 1, true) then - spider_name = "mpcrawler" - end - end - - if is_spider then - return is_spider, spider_name, spider_table[spider_name] - end - - -- Curl|Yahoo|HeadlessChrome|包含bot|Wget|Spider|Crawler|Scrapy|zgrab|python|java|Adsbot|DuckDuckGo - local other_res, err = ngx.re.match(ua, "(Yahoo|Slurp|DuckDuckGo)", "ijo") - if other_res then - other_res = string.lower(other_res[0]) - if string.find(other_res, "yahoo", 1, true) then - spider_name = "yahoo" - elseif string.find(other_res, "slurp", 1, true) then - spider_name = "yahoo" - elseif string.find(other_res, "duckduckgo", 1, true) then - spider_name = "duckduckgo" - end - return true, spider_name, spider_table[spider_name] - end - return false, nil, 0 - end - - local function statistics_ipc(input_server_name,ip) - -- 判断IP是否重复的时间限定范围是请求的当前时间+24小时 - local ipc = 0 - local ip_token = input_server_name..'_'..ip - if not cache:get(ip_token) then - ipc = 1 - cache:set(ip_token,1, get_end_time()) - end - return ipc - end - - local function statistics_request(ip, is_spider,body_length) - -- 计算pv uv - local pvc = 0 - local uvc = 0 - - if not is_spider and method == 'GET' and ngx.status == 200 and body_length > 512 then - local ua = '' - if request_header['user-agent'] then - ua = string.lower(request_header['user-agent']) - end - - out_header = ngx.resp.get_headers() - if out_header['content-type'] then - if string.find(out_header['content-type'],'text/html', 1, true) then - pvc = 1 - if request_header['user-agent'] then - if string.find(ua,'mozilla') then - local today = os.date("%Y-%m-%d") - local uv_token = ngx.md5(ip .. request_header['user-agent'] .. today) - if not cache:get(uv_token) then - uvc = 1 - cache:set(uv_token,1, get_end_time()) - end - end - end - end - end - end - return pvc, uvc - end - --------------------- exclude_func start -------------------------- local function load_global_exclude_ip() local load_key = "global_exclude_ip_load" @@ -679,7 +176,7 @@ log_by_lua_block { return true end else - if cache:get("global_exclude_ip_"..ip) then + if cache:get("global_exclude_ip_"..ip) then -- D("*Excluded global ip:"..ip) return true end @@ -687,41 +184,14 @@ log_by_lua_block { return false end --------------------- exclude_func end --------------------------- - - local function statistics_uri(db, uri, uri_md5, body_length) - -- count the number of URI requests and traffic - local open_statistics_uri = config['global']["statistics_uri"] - if not open_statistics_uri then return true end - - local stat_sql = nil - stat_sql = "INSERT INTO uri_stat(uri_md5,uri) SELECT \""..uri_md5.."\",\""..uri.."\" WHERE NOT EXISTS (SELECT uri_md5 FROM uri_stat WHERE uri_md5=\""..uri_md5.."\");" - local res, err = db:exec(stat_sql) - - stat_sql = "UPDATE uri_stat SET "..day_column.."="..day_column.."+1,"..flow_column.."="..flow_column.."+"..body_length.." WHERE uri_md5=\""..uri_md5.."\"" - local res, err = db:exec(stat_sql) - return true - end - - local function statistics_ip(db, ip, body_length) - local open_statistics_ip = config['global']["statistics_ip"] - if not open_statistics_ip then return true end - - local stat_sql = nil - stat_sql = "INSERT INTO ip_stat(ip) SELECT \""..ip.."\" WHERE NOT EXISTS (SELECT ip FROM ip_stat WHERE ip=\""..ip.."\");" - local res, err = db:exec(stat_sql) - - stat_sql = "UPDATE ip_stat SET "..day_column.."="..day_column.."+1,"..flow_column.."="..flow_column.."+"..body_length.." WHERE ip=\""..ip.."\"" - local res, err = db:exec(stat_sql) - return true - end - local function cache_logs() + local function cache_logs(server_name) -- make new id - local new_id = get_last_id(server_name) + local new_id = C:get_last_id(server_name) local excluded = false - local ip = get_client_ip_bylog() + local ip = C:get_client_ip() excluded = filter_status() or exclude_extension() or exclude_url() or exclude_ip(server_name, ip) local ip_list = request_header["x-forwarded-for"] @@ -737,17 +207,17 @@ log_by_lua_block { end -- local request_time = ngx.var.request_time - local request_time = get_request_time() + local request_time = C:get_request_time() local client_port = ngx.var.remote_port local real_server_name = server_name local uri = ngx.var.uri local status_code = ngx.status local protocol = ngx.var.server_protocol local request_uri = ngx.var.request_uri - local time_key = get_store_key() + local time_key = C:get_store_key() local method = ngx.req.get_method() - local body_length = get_length() - local domain = get_domain() + local body_length = C:get_length() + local domain = C:get_domain() local referer = ngx.var.http_referer local kv = { @@ -774,15 +244,17 @@ log_by_lua_block { client_port=client_port } + -- C:D(json.encode(kv)) + local request_stat_fields = "req=req+1,length=length+"..body_length local spider_stat_fields = "x" local client_stat_fields = "x" if not excluded then - if status_code == 500 or (method=="POST" and config["record_post_args"] == true) or (status_code==403 and config["record_get_403_args"]==true) then + if status_code == 500 or (method=="POST" and config["record_post_args"] == true) or (status_code==403 and config["record_get_403_args"] == true) then local data = "" - local ok, err = pcall(function() data=get_http_original() end) + local ok, err = pcall(function() data = C:get_http_origin() end) if ok and not err then kv["request_headers"] = data end @@ -805,15 +277,16 @@ log_by_lua_block { local pvc = 0 local uvc = 0 - is_spider, request_spider, spider_index = match_spider(ip) + local is_spider, request_spider, spider_index = C:match_spider(kv['user_agent']) if not is_spider then - client_stat_fields = match_client() + + client_stat_fields = C:match_client(kv['user_agent']) if not client_stat_fields or #client_stat_fields == 0 then client_stat_fields = request_stat_fields..",other=other+1" end - pvc, uvc = statistics_request(ip, is_spider,body_length) - ipc = statistics_ipc(server_name,ip) + pvc, uvc = C:statistics_request(ip, is_spider,body_length) + ipc = C:statistics_ipc(server_name,ip) else kv["is_spider"] = spider_index local field = "spider" @@ -846,13 +319,13 @@ log_by_lua_block { -- C:D("ddd") cache_set(server_name, new_id, "stat_fields", stat_fields) - -- cache_set(server_name, new_id, "log_kv", json.encode(kv)) + cache_set(server_name, new_id, "log_kv", json.encode(kv)) - for i,v in pairs(kv) do - cache_set(server_name, new_id, tostring(i), tostring(v)) - -- C:D("kv:"..tostring(i)..":"..tostring(v)) - end + -- for i,v in pairs(kv) do + -- cache_set(server_name, new_id, tostring(i), tostring(v)) + -- C:D("kv:"..tostring(i)..":"..tostring(v)) + -- end end @@ -889,7 +362,7 @@ log_by_lua_block { -- D("Log stat fields is nil.") -- D("Logdata:"..logvalue) else - stat_fields = split_bylog(stat_fields, ";") + stat_fields = C:split(stat_fields, ";") request_stat_fields = stat_fields[1] client_stat_fields = stat_fields[2] spider_stat_fields = stat_fields[3] @@ -933,27 +406,27 @@ log_by_lua_block { end stmt:reset() -- D("store_logs_line ok") - update_stat( db, "client_stat", time_key, client_stat_fields) - update_stat( db, "spider_stat", time_key, spider_stat_fields) + C:update_stat( db, "client_stat", time_key, client_stat_fields) + C:update_stat( db, "spider_stat", time_key, spider_stat_fields) -- D("stat ok") -- only count non spider requests - local ok, err = pcall(function() statistics_uri(db, request_uri, ngx.md5(request_uri), body_length) end) - local ok, err = pcall(function() statistics_ip(db, ip, body_length) end) + local ok, err = pcall(function() C:statistics_uri(db, request_uri, ngx.md5(request_uri), body_length) end) + local ok, err = pcall(function() C:statistics_ip(db, ip, body_length) end) end - update_stat( db, "request_stat", time_key, request_stat_fields) + C:update_stat( db, "request_stat", time_key, request_stat_fields) return true end - local function store_logs(input_server_name) - if is_migrating(input_server_name) == true then + local function store_logs(input_sn) + if C:is_migrating(input_sn) == true then -- D("migrating...") return end - local last_insert_id_key = input_server_name.."_last_id" - local store_start_id_key = input_server_name.."_store_start" + local last_insert_id_key = input_sn.."_last_id" + local store_start_id_key = input_sn.."_store_start" local last_id = cache:get(last_insert_id_key) local store_start = cache:get(store_start_id_key) if store_start == nil then @@ -965,15 +438,15 @@ log_by_lua_block { end local worker_id = ngx.worker.id() - if is_working(input_server_name) then + if C:is_working(input_sn) then -- D("other workers are being stored, please store later.") -- cache:delete(flush_data_key) return true end - lock_working(input_server_name) + C:lock_working(input_sn) local log_dir = "{$SERVER_APP}/logs" - local db_path = log_dir .. '/' .. input_server_name .. "/logs.db" + local db_path = log_dir .. '/' .. input_sn .. "/logs.db" local db, err = sqlite3.open(db_path) if tostring(err) ~= 'nil' then @@ -1006,7 +479,7 @@ log_by_lua_block { status, errorString = db:exec([[BEGIN TRANSACTION]]) - update_day = load_update_day(input_server_name) + update_day = C:load_update_day(input_sn) if not update_day or update_day ~= today then local update_sql = "UPDATE uri_stat SET "..day_column.."=0,"..flow_column.."=0" @@ -1014,15 +487,15 @@ log_by_lua_block { update_sql = "UPDATE ip_stat SET "..day_column.."=0,"..flow_column.."=0" status, errorString = db:exec(update_sql) - write_update_day(input_server_name) + C:write_update_day(input_sn) end if store_end >= store_start then for i=store_start, store_end, 1 do -- D("store_start:"..store_start..":store_end:".. store_end) - if store_logs_line(db, stmt2, input_server_name, i) then - cache_clear(input_server_name, i, "log_kv") - cache_clear(input_server_name, i, "stat_fields") + if store_logs_line(db, stmt2, input_sn, i) then + cache_clear(input_sn, i, "log_kv") + cache_clear(input_sn, i, "stat_fields") end end end @@ -1045,24 +518,18 @@ log_by_lua_block { end cache:set(store_start_id_key, store_end+1) - unlock_working(input_server_name) + C:unlock_working(input_sn) end local function run_app() -- D("------------ debug start ------------") init_var() - local c_name = ngx.var.server_name - server_name = string.gsub(get_server_name(c_name),'_','.') - get_auto_config(server_name) - - -- D("server_name:"..server_name) - load_global_exclude_ip() load_exclude_ip(server_name) - cache_logs() - -- store_logs(server_name) + cache_logs(server_name) + store_logs(server_name) -- D("------------ debug end -------------") end diff --git a/plugins/webstats/t/bench/bench.sh b/plugins/webstats/t/bench/bench.sh index 7d6c3d0b6..1882c65ac 100755 --- a/plugins/webstats/t/bench/bench.sh +++ b/plugins/webstats/t/bench/bench.sh @@ -21,4 +21,9 @@ fi # test # $RUN_CMD simple.lua -$RUN_CMD test_today.lua +# $RUN_CMD test_today.lua +# $RUN_CMD test_time.lua + +# $RUN_CMD test_ngx_find.lua + +$RUN_CMD test_match_spider.lua \ No newline at end of file diff --git a/plugins/webstats/t/bench/test_match_spider.lua b/plugins/webstats/t/bench/test_match_spider.lua new file mode 100644 index 000000000..833b75f56 --- /dev/null +++ b/plugins/webstats/t/bench/test_match_spider.lua @@ -0,0 +1,106 @@ + +local function target() + ngx.re.find("hello, world.", [[\w+\.]], "jo") +end +for i = 1, 100 do + target() +end +-- 以上为预热操作 +collectgarbage() + +local function match_spider(ua) + -- 匹配蜘蛛请求 + local is_spider = false + local spider_name = "" + local spider_match = "" + + local spider_table = { + ["baidu"] = 1, -- check + ["bing"] = 2, -- check + ["qh360"] = 3, -- check + ["google"] = 4, + ["bytes"] = 5, -- check + ["sogou"] = 6, -- check + ["youdao"] = 7, + ["soso"] = 8, + ["dnspod"] = 9, + ["yandex"] = 10, + ["yisou"] = 11, + ["other"] = 12, + ["mpcrawler"] = 13, + ["yahoo"] = 14, -- check + ["duckduckgo"] = 15 + } + + local find_spider, _ = ngx.re.match(ua, "(Baiduspider|Bytespider|360Spider|Sogou web spider|Sosospider|Googlebot|bingbot|AdsBot-Google|Google-Adwords|YoudaoBot|Yandex|DNSPod-Monitor|YisouSpider|mpcrawler)", "ijo") + if find_spider then + is_spider = true + spider_match = string.lower(find_spider[0]) + if string.find(spider_match, "baidu", 1, true) then + spider_name = "baidu" + elseif string.find(spider_match, "bytes", 1, true) then + spider_name = "bytes" + elseif string.find(spider_match, "360", 1, true) then + spider_name = "qh360" + elseif string.find(spider_match, "sogou", 1, true) then + spider_name = "sogou" + elseif string.find(spider_match, "soso", 1, true) then + spider_name = "soso" + elseif string.find(spider_match, "google", 1, true) then + spider_name = "google" + elseif string.find(spider_match, "bingbot", 1, true) then + spider_name = "bing" + elseif string.find(spider_match, "youdao", 1, true) then + spider_name = "youdao" + elseif string.find(spider_match, "dnspod", 1, true) then + spider_name = "dnspod" + elseif string.find(spider_match, "yandex", 1, true) then + spider_name = "yandex" + elseif string.find(spider_match, "yisou", 1, true) then + spider_name = "yisou" + elseif string.find(spider_match, "mpcrawler", 1, true) then + spider_name = "mpcrawler" + end + end + + if is_spider then + return is_spider, spider_name, spider_table[spider_name] + end + + -- Curl|Yahoo|HeadlessChrome|包含bot|Wget|Spider|Crawler|Scrapy|zgrab|python|java|Adsbot|DuckDuckGo + find_spider, _ = ngx.re.match(ua, "(Yahoo|Slurp|DuckDuckGo)", "ijo") + if res then + spider_match = string.lower(find_spider[0]) + if string.find(spider_match, "yahoo", 1, true) then + spider_name = "yahoo" + elseif string.find(spider_match, "slurp", 1, true) then + spider_name = "yahoo" + elseif string.find(spider_match, "duckduckgo", 1, true) then + spider_name = "duckduckgo" + end + return true, spider_name, spider_table[spider_name] + end + return false, "", 0 +end + + + +-- local is_spider, request_spider, spider_index = match_spider("Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)") + +-- ngx.say(is_spider,request_spider, spider_index) + +ngx.update_time() +local begin = ngx.now() +local N = 1e6 +for i = 1, N do + match_spider("Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)") +end +ngx.update_time() + +ngx.say("match_spider elapsed: ", (ngx.now() - begin) / N) + + + + + + diff --git a/plugins/webstats/t/bench/test_ngx_find.lua b/plugins/webstats/t/bench/test_ngx_find.lua new file mode 100644 index 000000000..67a045d6e --- /dev/null +++ b/plugins/webstats/t/bench/test_ngx_find.lua @@ -0,0 +1,35 @@ + +local function target() + ngx.re.find("hello, world.", [[\w+\.]], "jo") +end +for i = 1, 100 do + target() +end +-- 以上为预热操作 +collectgarbage() + +local spider_match = "aa 220" + + +ngx.update_time() +local begin = ngx.now() +local N = 1e7 +for i = 1, N do + ngx.re.find(spider_match, "360", "ijo") +end +ngx.update_time() + +ngx.say("ngx.re.find elapsed: ", (ngx.now() - begin) / N) + + + +ngx.update_time() +local begin = ngx.now() +local N = 1e7 +for i = 1, N do + string.find(spider_match, "360", 1, true) +end +ngx.update_time() + +ngx.say("string.find elapsed: ", (ngx.now() - begin) / N) + diff --git a/plugins/webstats/t/bench/test_time.lua b/plugins/webstats/t/bench/test_time.lua new file mode 100644 index 000000000..492b79bed --- /dev/null +++ b/plugins/webstats/t/bench/test_time.lua @@ -0,0 +1,118 @@ + +local function target() + ngx.re.find("hello, world.", [[\w+\.]], "jo") +end +for i = 1, 100 do + target() +end +-- 以上为预热操作 +collectgarbage() + + +local function get_store_key() + return os.date("%Y%m%d%H", os.time()) +end + +local function get_store_key2() + return os.date("%Y%m%d%H", ngx.time()) +end + + +local function get_end_time() + local s_time = os.time() + local n_date = os.date("*t",s_time + 86400) + n_date.hour = 0 + n_date.min = 0 + n_date.sec = 0 + local d_time = os.time(n_date) + return d_time - s_time +end + + + + +local function get_end_time2() + local s_time = ngx.time() + local n_date = os.date("*t",s_time + 86400) + n_date.hour = 0 + n_date.min = 0 + n_date.sec = 0 + local d_time = ngx.time(n_date) + return d_time - s_time +end + +local function get_update_field(field, value) + return field.."="..field.."+"..value +end + +local function get_update_field2(field, value) + return field.."="..field.."+"..tostring(value) +end + + + +ngx.update_time() +local begin = ngx.now() +local N = 1e3 +for i = 1, N do + get_store_key() +end +ngx.update_time() + +ngx.say("get_store_key elapsed: ", (ngx.now() - begin) / N) + + +ngx.update_time() +local begin = ngx.now() +local N = 1e3 +for i = 1, N do + get_store_key2() +end +ngx.update_time() + +ngx.say("get_store_key2 elapsed: ", (ngx.now() - begin) / N) + + +ngx.update_time() +local begin = ngx.now() +local N = 1e5 +for i = 1, N do + get_end_time() +end +ngx.update_time() + +ngx.say("get_end_time elapsed: ", (ngx.now() - begin) / N) + + +ngx.update_time() +local begin = ngx.now() +local N = 1e5 +for i = 1, N do + get_end_time2() +end +ngx.update_time() + +ngx.say("get_end_time2 elapsed: ", (ngx.now() - begin) / N) + + +ngx.update_time() +local begin = ngx.now() +local N = 1e9 +for i = 1, N do + get_update_field("ss","1") +end +ngx.update_time() + +ngx.say("get_update_field elapsed: ", (ngx.now() - begin) / N) + + +ngx.update_time() +local begin = ngx.now() +local N = 1e9 +for i = 1, N do + get_update_field2("ss",1) +end +ngx.update_time() + +ngx.say("get_update_field2 elapsed: ", (ngx.now() - begin) / N) +