BT客户端开始一个下载首先要处理的就是torrent文件.
而torrent文件使用bencoding编码.
所以实现bencoding编码的解析器,就是第一步工作.
Bencoding is done as follows:
Strings are length-prefixed base ten followed by a colon and the string. For example /'4:spam/' corresponds to /'spam/'.
Integers are represented by an /'i/' followed by the number in base 10 followed by an /'e/'. For example /'i3e/' corresponds to 3 and /'i-3e/' corresponds to -3. Integers have no size limitation. /'i-0e/' is invalid. All encodings with a leading zero, such as /'i03e/', are invalid, other than /'i0e/', which of course corresponds to 0.
Lists are encoded as an /'l/' followed by their elements (also bencoded) followed by an /'e/'. For example /'l4:spam4:eggse/' corresponds to [/'spam/', /'eggs/'].
Dictionaries are encoded as a /'d/' followed by a list of alternating keys and their corresponding values followed by an /'e/'. For example, /'d3:cow3:moo4:spam4:eggse/' corresponds to {/'cow/': /'moo/', /'spam/': /'eggs/'} and /'d4:spaml1:a1:bee/' corresponds to {/'spam/': [/'a/', /'b/']} . Keys must be strings and appear in sorted order (sorted as raw strings, not alphanumerics).
下面是实现的bencoding解码器的VC++源代码:
-
-
-
-
- #if !defined(AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_)
- #define AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_
-
- #if _MSC_VER > 1000
- #pragma once
- #endif // _MSC_VER > 1000
- #pragma warning( disable : 4786 )
- #pragma warning( disable : 4355 )
- #include <list>
- #include <map>
- #include <string>
- #include <vector>
- using namespace std;
- enum BEncodeParserErrorCode
- {
- enm_BEncodeErr_noerr = 0,
- enm_BEncodeErr_errString,
- enm_BEncodeErr_errInt,
- enm_BEncodeErr_errList,
- enm_BEncodeErr_errDict,
- enm_BEncodeErr_End,
- enm_BEncodeErr_unknown
- };
- enum BEncodeObjectType
- {
- enum_BEncodeType_Objectbase = 0,
- enum_BEncodeType_String,
- enum_BEncodeType_Int,
- enum_BEncodeType_List,
- enum_BEncodeType_Dict,
- };
- class CBEncodeObjectBase
- {
- public:
- CBEncodeObjectBase(BEncodeObjectType type
- = enum_BEncodeType_Objectbase){m_type = type;clear();}
- virtual ~CBEncodeObjectBase(){};
- void clear(){szPos = NULL;m_error = enm_BEncodeErr_noerr;}
- public:
- BEncodeObjectType m_type;
- char * szPos;
- int ilen;
- BEncodeParserErrorCode m_error;
- };
- class CBEncodeInt : public CBEncodeObjectBase
- {
- public:
- CBEncodeInt() : CBEncodeObjectBase(enum_BEncodeType_Int) {}
- virtual ~CBEncodeInt(){}
- public:
- int m_iValue;
- };
- class CBEncodeString : public CBEncodeObjectBase
- {
- public:
- CBEncodeString() : CBEncodeObjectBase(enum_BEncodeType_String) {m_szData = NULL;}
- virtual ~CBEncodeString(){}
- public:
- bool getstring(string & strValue)
- {
- if(m_error == enm_BEncodeErr_noerr && m_szData)
- {
- strValue.assign(m_szData,m_ilen);
- return true;
- }
- return false;
- }
- char * m_szData;
- int m_ilen;
- };
- class CBEncodeList : public CBEncodeObjectBase
- {
- public:
- CBEncodeList() : CBEncodeObjectBase(enum_BEncodeType_List) {}
- virtual ~CBEncodeList(){clear();}
- void clear()
- {
- list<CBEncodeObjectBase *>::iterator it;
- for(it = m_listObj.begin();it!=m_listObj.end();++it)
- delete (*it);
- m_listObj.clear();
- }
- public:
- list<CBEncodeObjectBase*> m_listObj;
- };
- class CBEncodeDict : public CBEncodeObjectBase
- {
- public:
- CBEncodeDict() : CBEncodeObjectBase(enum_BEncodeType_Dict) {}
- virtual ~CBEncodeDict(){clear();}
- CBEncodeObjectBase* getvalue(const char * szName)
- {
- map<string,CBEncodeObjectBase*>::iterator it = m_mapObj.find(szName);
- if(it != m_mapObj.end())
- return it->second;
- return NULL;
- }
- void clear()
- {
- list<CBEncodeObjectBase *>::iterator it;
- for(it = m_listObj.begin();it!=m_listObj.end();++it)
- delete (*it);
- m_listObj.clear();
- m_mapObj.clear();
- }
- public:
- map<string,CBEncodeObjectBase*> m_mapObj;
- list<CBEncodeObjectBase*> m_listObj;
-
- };
- class CBEncode
- {
- public:
- bool readint(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);
- bool readstring(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);
- bool readlist(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);
- bool readdict(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);
- bool parse(const char * szData);
- CBEncode();
- virtual ~CBEncode();
- void clear()
- {
- list<CBEncodeObjectBase *>::iterator it;
- for(it = m_listObj.begin();it!=m_listObj.end();++it)
- delete (*it);
- m_listObj.clear();
- }
- public:
- list<CBEncodeObjectBase*> m_listObj;
- CBEncodeObjectBase* m_plastObj;
- char * m_szTxt;
- };
-
- #endif // !defined(AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_)
BEncode.cpp
-
-
-
-
- #include "stdafx.h"
- #include "BEncode.h"
-
-
-
-
-
- CBEncode::CBEncode()
- {
- m_plastObj = NULL;
- m_szTxt = NULL;
- }
-
- CBEncode::~CBEncode()
- {
- clear();
- }
-
- bool CBEncode::parse(const char *szData)
- {
- if(szData == NULL||*szData==NULL)
- return false;
- clear();
- m_szTxt = (char*)szData;
- char * szCurPos = (char*)szData;
- int iendpos;
- while(*szCurPos)
- {
- if(*szCurPos== 'i')
- {
- if(!readint(szCurPos,iendpos,m_listObj))
- break;
- szCurPos+=iendpos;
- }
- else if(*szCurPos== 'l')
- {
- if(!readlist(szCurPos,iendpos,m_listObj))
- break;
- szCurPos+=iendpos;
- }
- else if(*szCurPos== 'd')
- {
- if(!readdict(szCurPos,iendpos,m_listObj))
- break;
- szCurPos+=iendpos;
- }
- else
- {
- if(!readstring(szCurPos,iendpos,m_listObj))
- break;
- szCurPos+=iendpos;
- }
- }
- if(*szCurPos==0&&m_plastObj->m_error == enm_BEncodeErr_noerr)
- return true;
- return false;
- }
-
- bool CBEncode::readstring(char *szCurPos,int & iendpos
- ,list<CBEncodeObjectBase*> & listObj)
- {
- char * szTmp = szCurPos;
- CBEncodeString * pNewString = new CBEncodeString;
- pNewString->szPos = szCurPos;
-
- char szLen[20];
- int i = 0;
- while(*szTmp>='0'&&*szTmp<='9')
- szLen[i++]=*(szTmp++);
- szLen[i]=0;
- if(*szTmp==':')
- {
- int ilen = atoi(szLen);
- if(ilen>0)
- {
- pNewString->m_szData = ++szTmp;
- pNewString->m_ilen = ilen;
- szTmp+=ilen;
- }
- else
- pNewString->m_error = enm_BEncodeErr_errString;
- }
- else
- pNewString->m_error = enm_BEncodeErr_errString;
- listObj.push_back(pNewString);
- iendpos = szTmp-szCurPos;
- m_plastObj = pNewString;
- m_plastObj->ilen = iendpos;
- return pNewString->m_error == enm_BEncodeErr_noerr?true:false;
- }
-
- bool CBEncode::readint(char *szCurPos,int & iendpos
- ,list<CBEncodeObjectBase*> & listObj)
- {
- char * szTmp = szCurPos;
- CBEncodeInt * pNewInt= new CBEncodeInt;
- pNewInt->szPos = szCurPos;
-
- if(*szTmp == 'i')
- {
- szTmp++;
- char szLen[20];
- int i = 0;
- while(*szTmp>='0'&&*szTmp<='9')
- szLen[i++]=*(szTmp++);
- szLen[i]=0;
- if(*szTmp=='e')
- {
- pNewInt->m_iValue = atoi(szLen);
- ++szTmp;
- }
- else
- pNewInt->m_error = enm_BEncodeErr_errInt;
- }
- else
- pNewInt->m_error = enm_BEncodeErr_errInt;
- listObj.push_back(pNewInt);
- iendpos = szTmp-szCurPos;
- m_plastObj = pNewInt;
- m_plastObj->ilen = iendpos;
- return pNewInt->m_error == enm_BEncodeErr_noerr?true:false;
- }
-
- bool CBEncode::readlist(char *szCurPos,int & iendpos
- ,list<CBEncodeObjectBase*> & listObj)
- {
- char * szTmp = szCurPos;
- CBEncodeList * pNewList= new CBEncodeList;
- pNewList->szPos = szCurPos;
- if(*szTmp == 'l')
- {
- szTmp++;
- int ilistendpos;
- while(*szTmp!='e')
- {
- if(*szTmp== 'i')
- {
- if(!readint(szTmp,ilistendpos,pNewList->m_listObj))
- break;
- szTmp+=ilistendpos;
- }
- else if(*szTmp== 'l')
- {
- if(!readlist(szTmp,ilistendpos,pNewList->m_listObj))
- break;
- szTmp+=ilistendpos;
- }
- else if(*szTmp== 'd')
- {
- if(!readdict(szTmp,ilistendpos,pNewList->m_listObj))
- break;
- szTmp+=ilistendpos;
- }
- else
- {
- if(!readstring(szTmp,ilistendpos,pNewList->m_listObj))
- break;
- szTmp+=ilistendpos;
- }
- }
- if(*szTmp!='e'||m_plastObj->m_error != enm_BEncodeErr_noerr)
- pNewList->m_error = enm_BEncodeErr_errList;
- else
- szTmp++;
- }
- else
- pNewList->m_error = enm_BEncodeErr_errList;
- listObj.push_back(pNewList);
- iendpos = szTmp-szCurPos;
- m_plastObj = pNewList;
- m_plastObj->ilen = iendpos;
- return pNewList->m_error == enm_BEncodeErr_noerr?true:false;
- }
-
- bool CBEncode::readdict(char *szCurPos,int & iendpos
- ,list<CBEncodeObjectBase*> & listObj)
- {
- char * szTmp = szCurPos;
- CBEncodeDict * pNewDict= new CBEncodeDict;
- pNewDict->szPos = szCurPos;
- if(*szTmp == 'd')
- {
- szTmp++;
- int ilistendpos;
- string strname;
- while(*szTmp!='e')
- {
- if(!readstring(szTmp,ilistendpos,pNewDict->m_listObj))
- break;
- if(m_plastObj->m_type !=enum_BEncodeType_String)
- break;
- strname.assign(((CBEncodeString *)m_plastObj)->m_szData
- ,((CBEncodeString *)m_plastObj)->m_ilen);
- szTmp+=ilistendpos;
- if(*szTmp== 'i')
- {
- if(!readint(szTmp,ilistendpos,pNewDict->m_listObj))
- break;
- szTmp+=ilistendpos;
- }
- else if(*szTmp== 'l')
- {
- if(!readlist(szTmp,ilistendpos,pNewDict->m_listObj))
- break;
- szTmp+=ilistendpos;
- }
- else if(*szTmp== 'd')
- {
- if(!readdict(szTmp,ilistendpos,pNewDict->m_listObj))
- break;
- szTmp+=ilistendpos;
- }
- else
- {
- if(!readstring(szTmp,ilistendpos,pNewDict->m_listObj))
- break;
- szTmp+=ilistendpos;
- }
- pNewDict->m_mapObj.insert(pair<string
- ,CBEncodeObjectBase*>(strname,m_plastObj));
- }
- if(*szTmp!='e'||m_plastObj->m_error != enm_BEncodeErr_noerr)
- pNewDict->m_error = enm_BEncodeErr_errDict;
- else
- szTmp++;
- }
- else
- pNewDict->m_error = enm_BEncodeErr_errDict;
- listObj.push_back(pNewDict);
- iendpos = szTmp-szCurPos;
- m_plastObj = pNewDict;
- m_plastObj->ilen = iendpos;
- return pNewDict->m_error == enm_BEncodeErr_noerr?true:false;
- }
(lightest) |