BitTorrent协议(四)之bitfield消息

bitfield消息

BEP3的peer message章节说明了几种消息类型。 其中bitfield消息理解起来是容易的,但是实际过程中却略有不同。

‘bitfield’ is only ever sent as the first message. Its payload is a bitfield with each index that downloader has sent set to one and the rest set to zero. Downloaders which don’t have anything yet may skip the ‘bitfield’ message. The first byte of the bitfield corresponds to indices 0 - 7 from high bit to low bit, respectively. The next one 8-15, etc. Spare bits at the end are set to zero.

这段话的意思很简单,举例说明一下,如果一个文件有10个分片,那么如果没有下载任何数据,那么bitfield的值应该是(为了便于阅读每个字节的二进制码 以一个空格键隔开)

0b0000 0000 0000

如果下载了piece 0, 则值应该是

0b1000 0000 0000 

如果全部下载完成,则应该是

0b1111 1111 1100

一共是10个1,表示10个piece。

但是实际上呢,对于一个下载完成的文件,获取到的bitfield值是0b111111111100么?

我们可以自己制作一个种子来验证一下。

本地验证

用uTorrent软件选择本地的一个文件制作一个种子文件。如下图所示

file-bt

该文件有110个片。

我们读取代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

__author__ = 'ym'
"""
Date : '2019/1/22'
Description :

"""
from ym.bt.peer_protocol import pack_handshake, unpack_handshake, sendall, recv, pack_extend
from ym.bt.parse_torrent import BDecode
from ym.bt.udp_announce import ip_me
import socket
import struct
import logging
from bitstring import BitArray, Bits

log = logging.getLogger()


def download():
try:
info_hash = 'c99c3b7a5ba31a8966e6c9a40bc4f83887a107e5'
ip, port = ip_me(), 40959
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(1)
sock.connect((ip, port))

handshake = pack_handshake(info_hash)
rt = sendall(sock, handshake)
if rt is None:
return
print("sent={}, data={}".format(rt, handshake))
data = recv(sock, 68)

tup = unpack_handshake(data)
if tup is None:
return
print(tup)

# interested
data = struct.pack(">i B", 1, 2)
rt = sendall(sock, data)
if rt is None:
return
bitfield = None
while True:
data = recv(sock, 4)
if data is None:
return
num, = struct.unpack(">i", data)
tmp = recv(sock, 1)

if tmp is None:
return
id, = struct.unpack(">B", tmp)
print("id={}".format(id))
tmp = recv(sock, int(num) - 1)
print("recv={}".format(tmp))
if id == 5:
# bitfield
bitfield = BitArray(tmp)
print(bitfield.bin)
break
else:
pass



except Exception as e:
log.exception(e)


if __name__ == '__main__':
download()

我们看到日志如下

/usr/local/bin/python3.7 /Users/ym/charm/pytest/ym/bt/bt_download.py
sent=ok, data=b'\x13BitTorrent protocol\x00\x00\x00\x00\x00\x10\x00\x01\xc9\x9c;z[\xa3\x1a\x89f\xe6\xc9\xa4\x0b\xc4\xf88\x87\xa1\x07\xe5ym111111111111111111'
Handshake(fixed_num=19, bt_head=b'BitTorrent protocol', reserved=b'\x00\x00\x00\x00\x00\x10\x00\x05', info_hash=b'\xc9\x9c;z[\xa3\x1a\x89f\xe6\xc9\xa4\x0b\xc4\xf88\x87\xa1\x07\xe5', peer_id=b'-UM1870-\x14\xab\x9c\xa8$G\x8euX\x91\xad\x9e')
id=20
recv=b'\x00d1:ei0e4:ipv616:\xfe\x80\x00\x00\x00\x00\x00\x00\x04:^:D[\x0c\x9212:complete_agoi1e1:md11:upload_onlyi3e12:ut_holepunchi4e11:ut_metadatai2e6:ut_pexi1e12:ut_recommendi5e10:ut_commenti6ee13:metadata_sizei2270e1:pi40959e4:reqqi255e1:v19:\xc2\xb5Torrent Mac 1.8.76:yourip4:\xc0\xa8+Re'
id=5
recv=b'\xff\xff\xffg~\xaf\xbf=\x7f\xff\xeb&\xdd\x98'
1111111111111111111111110110011101111110101011111011111100111101011111111111111111101011001001101101110110011000

这是怎么回事,为什么中间会有这么多0呢,不是已经下载完成了么?

111111111111111111111111011001110111111010101111101111110011110...

应该是110个1才对。而且如果再运行几遍会发现每次有不同的index的片是0。

have消息

原来这是uTorrent软件实现的一个策略,即使已经下载完成,也不会返回110个1的bitfield。而是用have消息来 补全,也就是id为4的peer消息。have消息的值是一个piece的index值,表示该piece下载完成。

因此,我们需要根据bitfield消息和have消息来看是否下载完成。代码修改一下,每接收到一个have消息就把相应的index 置为1。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from ym.bt.peer_protocol import pack_handshake, unpack_handshake, sendall, recv, pack_extend
from ym.bt.parse_torrent import BDecode
from ym.bt.udp_announce import ip_me
import socket
import struct
import logging
from bitstring import BitArray, Bits

log = logging.getLogger()


def download():
try:
info_hash = 'c99c3b7a5ba31a8966e6c9a40bc4f83887a107e5'
ip, port = ip_me(), 40959
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(1)
sock.connect((ip, port))

handshake = pack_handshake(info_hash)
rt = sendall(sock, handshake)
if rt is None:
return
print("sent={}, data={}".format(rt, handshake))
data = recv(sock, 68)

tup = unpack_handshake(data)
if tup is None:
return
print(tup)

# interested
data = struct.pack(">i B", 1, 2)
rt = sendall(sock, data)
if rt is None:
return
bitfield = None
while True:
data = recv(sock, 4)
if data is None:
return
num, = struct.unpack(">i", data)
tmp = recv(sock, 1)

if tmp is None:
return
id, = struct.unpack(">B", tmp)
print("id={}".format(id))
tmp = recv(sock, int(num) - 1)
print("recv={}".format(tmp))
if id == 5:
# bitfield
bitfield = BitArray(tmp)
print(bitfield.bin)
elif id == 4:
# have
index, = struct.unpack(">i", tmp)
print("index={}".format(index))
if bitfield.bin[index] == '0':
bitfield.set('1', index)
print(bitfield.bin)
else:
pass



except Exception as e:
log.exception(e)


if __name__ == '__main__':
download()

我们再看一下日志

/usr/local/bin/python3.7 /Users/ym/charm/pytest/ym/bt/bt_download.py
sent=ok, data=b'\x13BitTorrent protocol\x00\x00\x00\x00\x00\x10\x00\x01\xc9\x9c;z[\xa3\x1a\x89f\xe6\xc9\xa4\x0b\xc4\xf88\x87\xa1\x07\xe5ym111111111111111111'
Handshake(fixed_num=19, bt_head=b'BitTorrent protocol', reserved=b'\x00\x00\x00\x00\x00\x10\x00\x05', info_hash=b'\xc9\x9c;z[\xa3\x1a\x89f\xe6\xc9\xa4\x0b\xc4\xf88\x87\xa1\x07\xe5', peer_id=b'-UM1870-\x14\xabY\x08\r\x82=\xd1J^\x930')
id=20
recv=b'\x00d1:ei0e4:ipv616:\xfe\x80\x00\x00\x00\x00\x00\x00\x04:^:D[\x0c\x9212:complete_agoi1e1:md11:upload_onlyi3e12:ut_holepunchi4e11:ut_metadatai2e6:ut_pexi1e12:ut_recommendi5e10:ut_commenti6ee13:metadata_sizei2270e1:pi40959e4:reqqi255e1:v19:\xc2\xb5Torrent Mac 1.8.76:yourip4:\xc0\xa8+Re'
id=5
recv=b'\xfb\xdf\xbf\xee\xdf\xdf\x9d\xef\xb9\xfa\xfbv^\xf8'
1111101111011111101111111110111011011111110111111001110111101111101110011111101011111011011101100101111011111000
id=4
recv=b'\x00\x00\x002'
index=50
1111101111011111101111111110111011011111110111111011110111101111101110011111101011111011011101100101111011111000

...
...
...

id=4
recv=b'\x00\x00\x00b'
index=98
1111111111011111111111111111111111111111111111111111111111111111111111111111111111111111011111111111111111111100
id=4
recv=b'\x00\x00\x00X'
index=88
1111111111011111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111100
id=4
recv=b'\x00\x00\x00\n'
index=10
1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111100
already recv:b''
timed out
Traceback (most recent call last):
  File "/Users/ym/charm/pytest/ym/bt/peer_protocol.py", line 40, in recv
    tmp = sock.recv(n - len(data))
socket.timeout: timed out

Process finished with exit code 0

可以看到,确实是110个1。