发新帖回复

javaee

普通会员

225648 帖子	344 回复	358 积分

[其它] Redis源码剖析之字典

楼主

发表于 2019-11-07 13:15:47 | 查看: 64 | 回复: 1

以下涉及到的源码均为redis5.0-rc3版本的代码【点击查看官方源码】

文章目录Redis字典数据结构哈希表字典关键点哈希冲突空间扩容与释放操作函数创建字典添加元素删除元素rehash操作

Redis字典

字典是一个key-value形式的数据结构，使用过java中map结构的都应该了解，因为其也是字典；而了解哈希表的也应该不会陌生，因为字典的底层就是哈希表。而redis本身就是一个key-value形式的nosql数据库，足以见得字典结构在redis中的地位。

数据结构

字典的定义是在redis源码中的dict.h头文件中，其底层是由哈希表构成。

哈希表

首先来看一下哈希表的结构，在dict.h头文件中是这样定义的：

//哈希表的节点entrytypedefstructdictEntry{void*key;//键union{//值void*val;uint64_tu64;int64_ts64;doubled;}v;structdictEntry*next;//next指针}dictEntry;/*Thisisourhashtablestructure.Everydictionaryhastwoofthisaswe*implementincrementalrehashing,fortheoldtothenewtable.*/typedefstructdictht{//哈希表dictEntry**table;unsignedlongsize;unsignedlongsizemask;unsignedlongused;}dictht;

哈希表由如下内容构成：

table：哈希表节点数组（指针型）。而节点dictEntry又由key，value及next指针构成；其中value也明确定义了可以是指针、无符号和有符号64位整型、double类型；next指针则是用于解决哈希键的冲突问题。size：哈希节点数组能容纳的节点个数。sizemask：值为size-1，是用于计算索引的掩码。used：节点数组已有节点的个数。

通常，图更能有助于理解，如下所示：

字典

在dict.h头文件中定义如下：

//特定函数typedefstructdictType{//计算哈希值uint64_t(*hashFunction)(constvoid*key);//复制键void*(*keyDup)(void*privdata,constvoid*key);//复制值void*(*valDup)(void*privdata,constvoid*obj);//比较键int(*keyCompare)(void*privdata,constvoid*key1,constvoid*key2);//销毁键void(*keyDestructor)(void*privdata,void*key);//销毁值void(*valDestructor)(void*privdata,void*obj);}dictType;typedefstructdict{dictType*type;void*privdata;//哈希表dicththt[2];//用于标记是否正在进行rehash操作longrehashidx;/*rehashingnotinprogressifrehashidx==-1*///正在run的迭代器数量unsignedlongiterators;/*numberofiteratorscurrentlyrunning*/}dict;/*Ifsafeissetto1thisisasafeiterator,thatmeans,youcancall*dictAdd,dictFind,andotherfunctionsagainstthedictionaryevenwhile*iterating.Otherwiseitisanonsafeiterator,andonlydictNext()*shouldbecalledwhileiterating.*/typedefstructdictIterator{dict*d;longindex;inttable,safe;dictEntry*entry,*nextEntry;/*unsafeiteratorfingerprintformisusedetection.*/longlongfingerprint;}dictIterator;

其中，明确标定了需要两个hash表，这是为什么呢？从哈希表结构的定义的代码注释（Everydictionaryhastwoofthisasweimplementincrementalrehashing,fortheoldtothenewtable.）可知，一个用于真正存值，另一个则用于在rehash的时候使用。字典的示意图如下：

在dict.h头文件中还有这样一行代码，其定义了字典中哈希表hash节点的初始个数为4。

/*Thisistheinitialsizeofeveryhashtable*/#defineDICT_HT_INITIAL_SIZE4

从字典结构的源码中还能看见一个dictIterator结构，这便是用于遍历字典的迭代器。

关键点哈希冲突

哈希表插入元素是通过利用key的hash值来计算索引位置，所以会存在冲突情况，而解决冲突则是利用hash节点结构中的next指针。当要插入一个新值的时候，比如entry(key,value)，其插入过程如下图所示：哈希冲突的解决过程代码，再后面的操作函数-添加元素中可以详见。

空间扩容与释放

空间的扩容和释放过程其实就是rehash过程。在rehash过程正在进行时，会将rehashidx值设置为-1，重新计算key的hash值，然后利用字典中专门为rehash操作而设的一张hash表进行周转迁移数据，具体详见操作函数-rehash操作。而rehash的过程中，删除、更改和查询元素等操作也是要同时遍历两张hash表的，因为可能你想要的那个元素已被迁移到另一张表中。对于删除元素操作可见操作函数-删除元素中的相应代码。

操作函数

为了理解一些点，下面将列出部分函数，想知道更多操作的内部细节可浏览源码。此处先给出头文件中宏定义的两个码，后续函数中都会用到。

#defineDICT_OK0#defineDICT_ERR1创建字典/*Createanewhashtable*/dict*dictCreate(dictType*type,void*privDataPtr){//创建一个字典指针，分配固定大小内存dict*d=zmalloc(sizeof(*d));//初始化_dictInit(d,type,privDataPtr);returnd;}/*Initializethehashtable*/int_dictInit(dict*d,dictType*type,void*privDataPtr){_dictReset(&d->ht[0]);_dictReset(&d->ht[1]);d->type=type;d->privdata=privDataPtr;d->rehashidx=-1;d->iterators=0;//DICT_OK为宏定义的码returnDICT_OK;}添加元素

字典的添加元素过程是先获取key的hash值，然后再通过掩码计算索引（hash&d->ht[table].sizemask），最后锁定索引的位置完成元素的添加。

/*Addanelementtothetargethashtable*/intdictAdd(dict*d,void*key,void*val){dictEntry*entry=dictAddRaw(d,key,NULL);if(!entry)returnDICT_ERR;dictSetVal(d,entry,val);returnDICT_OK;}/*Lowleveladdorfind:*Thisfunctionaddstheentrybutinsteadofsettingavaluereturnsthe*dictEntrystructuretotheuser,thatwillmakesuretofillthevalue*fieldashewishes.**ThisfunctionisalsodirectlyexposedtotheuserAPItobecalled*mainlyinordertostorenon-pointersinsidethehashvalue,example:**entry=dictAddRaw(dict,mykey,NULL);*if(entry!=NULL)dictSetSignedIntegerVal(entry,1000);**Returnvalues:**IfkeyalreadyexistsNULLisreturned,and"*existing"ispopulated*withtheexistingentryifexistingisnotNULL.**Ifkeywasadded,thehashentryisreturnedtobemanipulatedbythecaller.*/dictEntry*dictAddRaw(dict*d,void*key,dictEntry**existing){longindex;dictEntry*entry;dictht*ht;if(dictIsRehashing(d))_dictRehashStep(d);/*Gettheindexofthenewelement,or-1if*theelementalreadyexists.*/if((index=_dictKeyIndex(d,key,dictHashKey(d,key),existing))==-1)计算索引值，如果元素已存在则returnreturnNULL;/*Allocatethememoryandstorethenewentry.*Inserttheelementintop,withtheassumptionthatinadatabase*systemitismorelikelythatrecentlyaddedentriesareaccessed*morefrequently.*/ht=dictIsRehashing(d)?&d->ht[1]:&d->ht[0];entry=zmalloc(sizeof(*entry));//元素的加入总是加在索引处的第一个位置，这一步在解决hash冲突时将新加入的entry->next指向索引上原有的第一个entryentry->next=ht->table[index];ht->table[index]=entry;ht->used++;/*Setthehashentryfields.*/dictSetKey(d,entry,key);returnentry;}/*Returnstheindexofafreeslotthatcanbepopulatedwith*ahashentryforthegiven'key'.*Ifthekeyalreadyexists,-1isreturned*andtheoptionaloutputparametermaybefilled.**Notethatifweareintheprocessofrehashingthehashtable,the*indexisalwaysreturnedinthecontextofthesecond(new)hashtable.*/staticlong_dictKeyIndex(dict*d,constvoid*key,uint64_thash,dictEntry**existing){unsignedlongidx,table;dictEntry*he;if(existing)*existing=NULL;/*Expandthehashtableifneeded*/if(_dictExpandIfNeeded(d)==DICT_ERR)return-1;for(table=0;table<=1;table++){//用hash值与掩码进行与操作得出索引值idx=hash&d->ht[table].sizemask;/*Searchifthisslotdoesnotalreadycontainthegivenkey*/he=d->ht[table].table[idx];while(he){if(key==he->key||dictCompareKeys(d,key,he->key)){if(existing)*existing=he;//已存在返回-1return-1;}he=he->next;}if(!dictIsRehashing(d))break;}//不存在，返回索引位置returnidx;}删除元素/*Removeanelement,returningDICT_OKonsuccessorDICT_ERRifthe*elementwasnotfound.*/intdictDelete(dict*ht,constvoid*key){returndictGenericDelete(ht,key,0)?DICT_OK:DICT_ERR;}/*Searchandremoveanelement.Thisisanhelperfunctionfor*dictDelete()anddictUnlink(),pleasecheckthetopcomment*ofthosefunctions.*/staticdictEntry*dictGenericDelete(dict*d,constvoid*key,intnofree){uint64_th,idx;dictEntry*he,*prevHe;inttable;if(d->ht[0].used==0&&d->ht[1].used==0)returnNULL;if(dictIsRehashing(d))_dictRehashStep(d);h=dictHashKey(d,key);//遍历两个hashtable并删除给定值，为了避免rehash过程中，用于rehash哈希表存储了要删除的值for(table=0;table<=1;table++){idx=h&d->ht[table].sizemask;he=d->ht[table].table[idx];prevHe=NULL;while(he){if(key==he->key||dictCompareKeys(d,key,he->key)){/*Unlinktheelementfromthelist*/if(prevHe)prevHe->next=he->next;elsed->ht[table].table[idx]=he->next;if(!nofree){dictFreeKey(d,he);dictFreeVal(d,he);zfree(he);}d->ht[table].used--;returnhe;}prevHe=he;he=he->next;}if(!dictIsRehashing(d))break;}returnNULL;/*notfound*/}rehash操作/*PerformsNstepsofincrementalrehashing.Returns1iftherearestill*keystomovefromtheoldtothenewhashtable,otherwise0isreturned.**Notethatarehashingstepconsistsinmovingabucket(thatmayhavemore*thanonekeyasweusechaining)fromtheoldtothenewhashtable,however*sincepartofthehashtablemaybecomposedofemptyspaces,itisnot*guaranteedthatthisfunctionwillrehashevenasinglebucket,sinceit*willvisitatmaxN*10emptybucketsintotal,otherwisetheamountof*workitdoeswouldbeunboundandthefunctionmayblockforalongtime.*/intdictRehash(dict*d,intn){intempty_visits=n*10;/*Maxnumberofemptybucketstovisit.*/if(!dictIsRehashing(d))return0;while(n--&&d->ht[0].used!=0){dictEntry*de,*nextde;/*Notethatrehashidxcan'toverflowaswearesuretherearemore*elementsbecauseht[0].used!=0*/assert(d->ht[0].size>(unsignedlong)d->rehashidx);while(d->ht[0].table[d->rehashidx]==NULL){d->rehashidx++;if(--empty_visits==0)return1;}de=d->ht[0].table[d->rehashidx];/*MoveallthekeysinthisbucketfromtheoldtothenewhashHT*///迁移数据while(de){uint64_th;nextde=de->next;/*Gettheindexinthenewhashtable*///计算新索引h=dictHashKey(d,de->key)&d->ht[1].sizemask;de->next=d->ht[1].table[h];d->ht[1].table[h]=de;d->ht[0].used--;d->ht[1].used++;de=nextde;}d->ht[0].table[d->rehashidx]=NULL;d->rehashidx++;}/*Checkifwealreadyrehashedthewholetable...*/if(d->ht[0].used==0){//如果整个rehash操作已完成，则将h[1]和h[0]互换位置zfree(d->ht[0].table);d->ht[0]=d->ht[1];_dictReset(&d->ht[1]);d->rehashidx=-1;return0;}/*Moretorehash...*/return1;

[其它] Redis源码剖析之字典

相关帖子