Simple Linux Panel
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
mdserver-web/plugins/simdht/workers/metadata.py

155 lines
5.1 KiB

# coding: utf-8
import traceback
import pygeoip
import threading
import socket
import sys
import hashlib
import datetime
import time
import json
import metautils
from bencode import bencode, bdecode
geoip = pygeoip.GeoIP('GeoIP.dat')
# setting time
import pytz
pytz.timezone('Asia/Shanghai')
# print datetime.datetime.utcnow()
def decode(encoding, s):
if type(s) is list:
s = ';'.join(s)
u = s
for x in (encoding, 'utf8', 'gbk', 'big5'):
try:
u = s.decode(x)
return u
except:
pass
return s.decode(encoding, 'ignore')
def decode_utf8(encoding, d, i):
if i + '.utf-8' in d:
return d[i + '.utf-8'].decode('utf8')
return decode(encoding, d[i])
def parse_metadata(data):
info = {}
encoding = 'utf8'
try:
torrent = bdecode(data)
if not torrent.get('name'):
return None
except:
return None
try:
info['create_time'] = datetime.datetime.fromtimestamp(
float(torrent['creation date']))
except:
info['create_time'] = datetime.datetime.now()
if torrent.get('encoding'):
encoding = torrent['encoding']
if torrent.get('announce'):
info['announce'] = decode_utf8(encoding, torrent, 'announce')
if torrent.get('comment'):
info['comment'] = decode_utf8(encoding, torrent, 'comment')[:200]
if torrent.get('publisher-url'):
info['publisher-url'] = decode_utf8(encoding, torrent, 'publisher-url')
if torrent.get('publisher'):
info['publisher'] = decode_utf8(encoding, torrent, 'publisher')
if torrent.get('created by'):
info['creator'] = decode_utf8(encoding, torrent, 'created by')[:15]
if 'info' in torrent:
detail = torrent['info']
else:
detail = torrent
info['name'] = decode_utf8(encoding, detail, 'name')
if 'files' in detail:
info['files'] = []
for x in detail['files']:
if 'path.utf-8' in x:
v = {'path': decode(
encoding, '/'.join(x['path.utf-8'])), 'length': x['length']}
else:
v = {'path': decode(
encoding, '/'.join(x['path'])), 'length': x['length']}
if 'filehash' in x:
v['filehash'] = x['filehash'].encode('hex')
info['files'].append(v)
info['length'] = sum([x['length'] for x in info['files']])
else:
info['length'] = detail['length']
info['data_hash'] = hashlib.md5(detail['pieces']).hexdigest()
if 'profiles' in detail:
info['profiles'] = detail['profiles']
return info
def save_metadata(dbcurr, binhash, address, start_time, data, blacklist):
utcnow = datetime.datetime.now()
name = threading.currentThread().getName()
try:
info = parse_metadata(data)
if not info:
return
except:
traceback.print_exc()
return
info_hash = binhash.encode('hex')
info['info_hash'] = info_hash
# need to build tags
info['tagged'] = False
info['classified'] = False
info['requests'] = 1
info['last_seen'] = utcnow
info['source_ip'] = address[0]
for item in blacklist:
if str(item) in info['name']:
return
if info.get('files'):
files = [z for z in info['files'] if not z['path'].startswith('_')]
if not files:
files = info['files']
else:
files = [{'path': info['name'], 'length': info['length']}]
files.sort(key=lambda z: z['length'], reverse=True)
bigfname = files[0]['path']
info['extension'] = metautils.get_extension(bigfname).lower()
info['category'] = metautils.get_category(info['extension'])
if 'files' in info:
try:
dbcurr.execute('INSERT INTO search_filelist VALUES(%s, %s)', (info[
'info_hash'], json.dumps(info['files'])))
except:
print name, 'insert error', sys.exc_info()[1]
del info['files']
try:
try:
print '\n', 'Saved', utcnow, info['info_hash'], info['name'], (time.time() - start_time), 's', address[0], geoip.country_name_by_addr(address[0]),
except:
print '\n', 'Saved', utcnow, info['info_hash'], sys.exc_info()[1]
try:
ret = dbcurr.execute('INSERT INTO search_hash(info_hash,category,data_hash,name,extension,classified,source_ip,tagged,' +
'length,create_time,last_seen,requests,comment,creator) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)',
(info['info_hash'], info['category'], info['data_hash'], info['name'], info['extension'], info['classified'],
info['source_ip'], info['tagged'], info['length'], info[
'create_time'], info['last_seen'], info['requests'],
info.get('comment', ''), info.get('creator', '')))
except:
print 'insert search_hash err: ', info['info_hash']
dbcurr.connection.commit()
except:
print name, 'save error', info
traceback.print_exc()
return