mirror of https://github.com/midoks/mdserver-web
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
143 lines
4.8 KiB
143 lines
4.8 KiB
6 years ago
|
# coding: utf-8
|
||
|
import traceback
|
||
|
import pygeoip
|
||
|
import threading
|
||
|
import socket
|
||
|
import sys
|
||
|
import hashlib
|
||
|
import datetime
|
||
|
import time
|
||
|
import json
|
||
|
|
||
|
import metautils
|
||
|
from bencode import bencode, bdecode
|
||
|
geoip = pygeoip.GeoIP('GeoIP.dat')
|
||
|
|
||
|
def decode(encoding, s):
|
||
|
if type(s) is list:
|
||
|
s = ';'.join(s)
|
||
|
u = s
|
||
|
for x in (encoding, 'utf8', 'gbk', 'big5'):
|
||
|
try:
|
||
|
u = s.decode(x)
|
||
|
return u
|
||
|
except:
|
||
|
pass
|
||
|
return s.decode(encoding, 'ignore')
|
||
|
|
||
|
def decode_utf8(encoding, d, i):
|
||
|
if i+'.utf-8' in d:
|
||
|
return d[i+'.utf-8'].decode('utf8')
|
||
|
return decode(encoding, d[i])
|
||
|
|
||
|
def parse_metadata(data):
|
||
|
info = {}
|
||
|
encoding = 'utf8'
|
||
|
try:
|
||
|
torrent = bdecode(data)
|
||
|
if not torrent.get('name'):
|
||
|
return None
|
||
|
except:
|
||
|
return None
|
||
|
try:
|
||
|
info['create_time'] = datetime.datetime.fromtimestamp(float(torrent['creation date']))
|
||
|
except:
|
||
|
info['create_time'] = datetime.datetime.utcnow()
|
||
|
|
||
|
if torrent.get('encoding'):
|
||
|
encoding = torrent['encoding']
|
||
|
if torrent.get('announce'):
|
||
|
info['announce'] = decode_utf8(encoding, torrent, 'announce')
|
||
|
if torrent.get('comment'):
|
||
|
info['comment'] = decode_utf8(encoding, torrent, 'comment')[:200]
|
||
|
if torrent.get('publisher-url'):
|
||
|
info['publisher-url'] = decode_utf8(encoding, torrent, 'publisher-url')
|
||
|
if torrent.get('publisher'):
|
||
|
info['publisher'] = decode_utf8(encoding, torrent, 'publisher')
|
||
|
if torrent.get('created by'):
|
||
|
info['creator'] = decode_utf8(encoding, torrent, 'created by')[:15]
|
||
|
|
||
|
if 'info' in torrent:
|
||
|
detail = torrent['info']
|
||
|
else:
|
||
|
detail = torrent
|
||
|
info['name'] = decode_utf8(encoding, detail, 'name')
|
||
|
if 'files' in detail:
|
||
|
info['files'] = []
|
||
|
for x in detail['files']:
|
||
|
if 'path.utf-8' in x:
|
||
|
v = {'path': decode(encoding, '/'.join(x['path.utf-8'])), 'length': x['length']}
|
||
|
else:
|
||
|
v = {'path': decode(encoding, '/'.join(x['path'])), 'length': x['length']}
|
||
|
if 'filehash' in x:
|
||
|
v['filehash'] = x['filehash'].encode('hex')
|
||
|
info['files'].append(v)
|
||
|
info['length'] = sum([x['length'] for x in info['files']])
|
||
|
else:
|
||
|
info['length'] = detail['length']
|
||
|
info['data_hash'] = hashlib.md5(detail['pieces']).hexdigest()
|
||
|
if 'profiles' in detail:
|
||
|
info['profiles'] = detail['profiles']
|
||
|
return info
|
||
|
|
||
|
|
||
|
def save_metadata(dbcurr, binhash, address, start_time, data, blacklist):
|
||
|
utcnow = datetime.datetime.utcnow()
|
||
|
name = threading.currentThread().getName()
|
||
|
try:
|
||
|
info = parse_metadata(data)
|
||
|
if not info:
|
||
|
return
|
||
|
except:
|
||
|
traceback.print_exc()
|
||
|
return
|
||
|
info_hash = binhash.encode('hex')
|
||
|
info['info_hash'] = info_hash
|
||
|
# need to build tags
|
||
|
info['tagged'] = False
|
||
|
info['classified'] = False
|
||
|
info['requests'] = 1
|
||
|
info['last_seen'] = utcnow
|
||
|
info['source_ip'] = address[0]
|
||
|
|
||
|
for item in blacklist:
|
||
|
if str(item) in info['name']:
|
||
|
return
|
||
|
if info.get('files'):
|
||
|
files = [z for z in info['files'] if not z['path'].startswith('_')]
|
||
|
if not files:
|
||
|
files = info['files']
|
||
|
else:
|
||
|
files = [{'path': info['name'], 'length': info['length']}]
|
||
|
files.sort(key=lambda z:z['length'], reverse=True)
|
||
|
bigfname = files[0]['path']
|
||
|
info['extension'] = metautils.get_extension(bigfname).lower()
|
||
|
info['category'] = metautils.get_category(info['extension'])
|
||
|
|
||
|
if 'files' in info:
|
||
|
try:
|
||
|
dbcurr.execute('INSERT INTO search_filelist VALUES(%s, %s)', (info['info_hash'], json.dumps(info['files'])))
|
||
|
except:
|
||
|
print name, 'insert error', sys.exc_info()[1]
|
||
|
del info['files']
|
||
|
|
||
|
try:
|
||
|
try:
|
||
|
print '\n', 'Saved', info['info_hash'], info['name'], (time.time()-start_time), 's', address[0], geoip.country_name_by_addr(address[0]),
|
||
|
except:
|
||
|
print '\n', 'Saved', info['info_hash'], sys.exc_info()[1]
|
||
|
try:
|
||
|
ret = dbcurr.execute('INSERT INTO search_hash(info_hash,category,data_hash,name,extension,classified,source_ip,tagged,' +
|
||
|
'length,create_time,last_seen,requests,comment,creator) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)',
|
||
|
(info['info_hash'], info['category'], info['data_hash'], info['name'], info['extension'], info['classified'],
|
||
|
info['source_ip'], info['tagged'], info['length'], info['create_time'], info['last_seen'], info['requests'],
|
||
|
info.get('comment',''), info.get('creator','')))
|
||
|
except:
|
||
|
print 'insert search_hash err: ',info['info_hash']
|
||
|
dbcurr.connection.commit()
|
||
|
except:
|
||
|
print name, 'save error', info
|
||
|
traceback.print_exc()
|
||
|
return
|
||
|
|