24 template<
class keyType>
27 typedef uint64_t valueType;
30 vector<valueType>
mem;
33 #define LMASK ((L==64)?(~0ULL):((1ULL<<L)-1))
39 uint32_t trycount = 0;
43 #define FILLCNTLEN (sizeof(uint32_t)*8)
47 bool autoclear =
false;
54 inline valueType
get(uint32_t loc) {
55 uint64_t st = ((uint64_t) loc) * L;
56 uint32_t mx = st & 0x3F;
59 if ( (mx + L) > 64 ) {
63 return ((mem[st>>6]>>mx) | (mem[(st>>6)+1]<<(64-mx)));
66 return (mem[st>>6]>>mx);
71 valueType
inline set(uint32_t loc, valueType &value) {
73 uint64_t st = ((uint64_t) loc) * L;
74 uint32_t mx = st & 0x3F;
76 if ( (mx + L) > 64 ) {
77 mem[st>>6] &= ((UINT64_MAX) >> (64-mx));
78 mem[st>>6] |= (value << mx);
79 mem[(st>>6)+1] &= (UINT64_MAX << ((mx+L)-64));
80 mem[(st>>6)+1] |= (value >> (64-mx));
84 mem[st>>6] &= (~(LMASK << mx));
85 mem[st>>6] |= (value << mx);
90 inline valueType getrand(valueType &v) {
91 valueType va = rand();
100 uint32_t s2 = rand();
105 Ha.setMaskSeed(ma-1,s1);
106 Hb.setMaskSeed(mb-1,s2);
108 if (trycount>1) printf(
"NewHash for the %d time\n", trycount);
111 vector<int32_t> *first = NULL, *nxt1 = NULL, *nxt2 = NULL;
112 bool testHash(uint32_t keycount);
123 void fillvalue(
void *values, uint32_t keycount,
size_t valuesize);
124 void fillvalueBFS(
void *values,
size_t valuesize,
int root,
bool usepublicFilled);
125 bool trybuild(
void *values, uint32_t keycount,
size_t valuesize) {
127 disj.setLength(ma+mb);
128 if (succ = testHash(keycount)) {
129 fillvalue(values, keycount,valuesize);
131 if (autoclear || (!succ))
150 Othello(uint8_t _L, keyType *_keys, uint32_t keycount,
bool _autoclear =
true,
void *_values = NULL,
size_t _valuesize = 0, int32_t _allowed_conflicts = -1 ) {
151 mykeycount = keycount;
152 allowed_conflicts = _allowed_conflicts;
154 autoclear = _autoclear;
158 while ((1UL<<hl2) < keycount * 1) hl2++;
159 while ((1UL<<hl1) < keycount* 1.333334) hl1++;
163 if (allowed_conflicts<0) {
164 allowed_conflicts = (ma<20)?0:ma-20;
166 uint64_t bitcnt = (ma+mb);
168 while (bitcnt % (
sizeof(valueType)*8)) bitcnt++;
169 bitcnt /= (
sizeof(valueType)*8);
170 mem.resize(bitcnt,0ULL);
171 while ( ((uint64_t)mem.size())*
sizeof(mem[0])*8ULL<(ma+mb)*((uint64_t)L) )
175 cout <<
"Building" <<endl;
177 while ( (!build) && (hl1<=31&&hl2<=31)) {
180 build = trybuild( _values, keycount, _valuesize);
183 if (hl1 == hl2) hl1++;
188 uint64_t bitcnt = (ma+mb);
190 while (bitcnt % (
sizeof(valueType)*8)) bitcnt++;
191 bitcnt /= (
sizeof(valueType)*8);
192 mem.resize(bitcnt,0ULL);
193 while ( ((uint64_t)mem.size())*
sizeof(mem[0])*8ULL<(ma+mb)*((uint64_t)L) )
196 cout <<
"Extend Othello Length to" <<
human(keycount) <<
" Keys, ma/mb = " <<
human(ma) <<
"/"<<
human(mb) <<
" keyT"<<
sizeof(keyType)*8<<
"b valueT" <<
sizeof(valueType)*8<<
"b"<<
" L="<<(int) L<<endl;
200 printf(
"%08x %08x\n", Ha.s, Hb.s);
202 cout <<
"Succ " <<
human(keycount) <<
" Keys, ma/mb = " <<
human(ma) <<
"/"<<
human(mb) <<
" keyT"<<
sizeof(keyType)*8<<
"b valueT" <<
sizeof(valueType)*8<<
"b"<<
" L="<<(int) L <<
" After "<<trycount <<
"tries"<< endl;
205 cout <<
"Build Fail!" << endl;
208 template<
typename VT>
209 Othello(uint8_t _L, vector<keyType> &keys, vector<VT> &values,
bool _autoclear =
true, int32_t allowed_conflicts = -1) :
210 Othello(_L, & (keys[0]),keys.size(), _autoclear, &(values[0]), sizeof(VT), allowed_conflicts)
219 nxt1 = nxt2 = first= NULL;
239 memcpy(v,&L,
sizeof(uint32_t));
240 memcpy(v+4,&s1,
sizeof(uint32_t));
241 memcpy(v+8,&s2,
sizeof(uint32_t));
242 int hl1 = 0, hl2 = 0;
243 if (ma == 0 || mb ==0) {
247 while ((1<<hl1)!= ma) hl1++;
248 while ((1<<hl2)!= mb) hl2++;
250 memcpy(v+0x10,&hl1,
sizeof(uint32_t));
251 memcpy(v+0x14,&hl2,
sizeof(uint32_t));
260 memcpy(&(L),v,
sizeof(uint32_t));
261 memcpy(&(s1),v+4,
sizeof(uint32_t));
262 memcpy(&(s2),v+8,
sizeof(uint32_t));
263 memcpy(&hl1, v+0x10,
sizeof(uint32_t));
264 memcpy(&hl2, v+0x14,
sizeof(uint32_t));
265 if (hl1 > 0 && hl2 >0) {
269 uint64_t bitcnt = (ma+mb);
271 while (bitcnt % (
sizeof(valueType)*8)) bitcnt++;
272 bitcnt /= (
sizeof(valueType)*8);
273 mem.resize(bitcnt,0ULL);
274 while ( ((uint64_t)mem.size())*
sizeof(mem[0])*8ULL<(ma+mb)*((uint64_t)L) )
276 Ha.setMaskSeed(ma-1,s1);
277 Hb.setMaskSeed(mb-1,s2);
285 void addkeys(
int newkeys,
void *values, uint32_t valuesize) {
286 nxt1->resize(mykeycount+newkeys);
287 nxt2->resize(mykeycount+newkeys);
289 for (
int i = mykeycount; i< mykeycount+newkeys; i++){
291 get_hash(keys[i],ha,hb);
292 if (testConnected(ha,hb)) {
293 mykeycount += newkeys;
294 trybuild(values, mykeycount, L);
297 (*nxt1)[i] = (*first)[ha];
299 (*nxt2)[i] = (*first)[hb];
301 fillvalueBFS(values, valuesize, ha,
false);
304 mykeycount += newkeys;
312 return query(k,ha,hb);
317 void printValueTSize() {
318 std::cout <<
sizeof(valueType) << std::endl;
322 vector<uint32_t> getCnt();
323 vector<double> getRatio();
331 void setAlienPreference(
double ideal = 1.0);
333 void inline get_hash_1(
const keyType &v, uint32_t &ret1) {
344 inline valueType
query(
const keyType &k, uint32_t &ha, uint32_t &hb) {
346 valueType aa =
get(ha);
348 valueType bb =
get(hb);
350 return LMASK & (aa^bb);
352 void inline get_hash_2(
const keyType &v, uint32_t &ret1) {
356 void inline get_hash(
const keyType &v, uint32_t &ret1, uint32_t &ret2) {
365 fread(&(mem[0]),
sizeof(mem[0]), mem.size(), pF);
371 fwrite(&(mem[0]),
sizeof(mem[0]), mem.size(), pF);
374 void padd (vector<int32_t> &A, valueType &t) {
375 const valueType one = 1;
376 for (
int i = 0; i <L; i++)
381 void pdiff(vector<int32_t> &A, valueType &t) {
382 const valueType one = 1;
383 for (
int i = 0; i <L; i++)
389 bool testConnected(int32_t ha, int32_t hb);
397 void removeOneKey(uint32_t kid);
401 void updatevalue(uint32_t kid,
void * values, uint32_t valuesize) {
403 get_hash(keys[kid],ha,hb);
404 fillvalueBFS(values, valuesize, ha,
false);
421 template<
class keyType>
424 if (nxt1 == NULL) nxt1 =
new vector<int32_t> (keycount);
425 if (nxt2 == NULL) nxt2 =
new vector<int32_t> (keycount);
426 if (first == NULL) first =
new vector<int32_t> (ma+mb, -1);
429 for (
int i = 0; i < keycount; i++) {
430 if (i>1048576)
if ((i & (i-1)) == 0) printf(
"Tesing keys # %d\n",i);
431 get_hash(keys[i], ha, hb);
433 if (disj.sameset(ha,hb)) {
434 removedKeys.push_back(keys[i]);
435 if (removedKeys.size()> allowed_conflicts)
439 (*nxt1)[i] = (*first)[ha];
441 (*nxt2)[i] = (*first)[hb];
448 template<
class keyType>
451 int t = (*first)[ha0];
458 bool isAtoB = (kid>0);
459 if (kid <0) kid = -kid;
462 get_hash(keys[kid], ha, hb);
463 if (hb == hb0)
return true;
464 int t = (*first)[hb];
466 int t = (*first)[hb];
468 if (t!=kid) q.push_back(-t);
473 int t = (*first)[ha];
475 if (t!=kid) q.push_back(t);
483 template<
class keyType>
486 get_hash(keys[kid],ha,hb);
488 keys[kid] = keys[mykeycount];
490 get_hash(keys[mykeycount], hal, hbl);
493 if ((*first).at(ha) == kid) {
494 (*first).at(ha) = (*nxt1)[kid];
496 int t = (*first)[ha];
497 while ((*nxt1)[t] != kid) t = (*nxt1)[t];
498 (*nxt1)[t] = (*nxt1)[(*nxt1)[t]];
500 (*nxt1)[kid] = (*nxt1)[mykeycount];
502 if ((*first)[hal] == mykeycount) {
505 int t = (*first)[hal];
506 while ((*nxt1)[t] != mykeycount) t = (*nxt1)[t];
510 if ((*first)[hb] == kid) {
511 (*first)[hb] = (*nxt2)[kid];
513 int t = (*first)[hb];
514 while ((*nxt2)[t] !=kid) t = (*nxt2)[t];
515 (*nxt2)[t] = (*nxt2)[(*nxt2)[t]];
517 (*nxt2)[kid] = (*nxt2)[mykeycount];
518 if ((*first)[hbl] == mykeycount) {
522 int t = (*first)[hbl];
523 while ((*nxt2)[t] != kid) t= (*nxt2)[t];
528 template<
class keyType>
531 vector<int32_t> *nxt;
533 while (!Q.empty()) Q.pop_front();
535 std::set<uint32_t> s;
542 uint32_t nodeid = (*Q.begin());
544 if (nodeid < ma) nxt = nxt1;
546 int32_t kid = first->at(nodeid);
549 get_hash(keys[kid],ha,hb);
550 if (usepublicFilled) {
551 if (filled[ha] && filled[hb]) {
557 if (s.find(ha)!=s.end() && s.find(hb)!=s.end()) {
563 bool isfa = (usepublicFilled)?filled[ha]:(s.find(ha)!=s.end());
564 int helse = isfa ? hb : ha;
565 int hthis = isfa ? ha : hb;
567 valueType valueKid = 0;
568 if (values != NULL) {
569 uint8_t * loc = (uint8_t *) values;
570 loc += (kid * valuesize);
571 memcpy(&valueKid, loc, valuesize);
576 valueKid = (hthis == ha)?1:0;
577 fillcount[helse/FILLCNTLEN] |= (1<<(helse % FILLCNTLEN));
580 valueType newvalue = valueKid ^
get(hthis);
582 set(helse, newvalue);
587 filled[helse] =
true;
595 template<
class keyType>
597 filled.resize(ma+mb);
598 fill(filled.begin(), filled.end(),
false);
599 if (values == NULL) {
600 fillcount.resize((ma+mb)/32);
601 fill(fillcount.begin(),fillcount.end(),0);
603 for (
int i = 0; i< ma+mb; i++)
604 if (disj.isroot(i)) {
608 fillvalueBFS(values, valuesize, i,
true);
612 template<
class keyType>
614 vector<uint32_t> cnt;
616 for (
int i = 0; i < ma; i++) {
617 valueType gv =
get(i);
619 vv = (uint8_t*) ((
void *) &gv);
620 for (
int p = 0; p < L; p++) {
622 tv = ((*vv) >> (p & 7));
623 if (tv & 1) cnt[p]++;
624 if ((p & 7) == 7) vv++;
627 for (
int i = ma; i < ma+mb; i++) {
628 valueType gv =
get(i);
630 vv = (uint8_t*) ((
void *) &gv);
631 for (
int p = 0; p < L; p++) {
633 tv = ((*vv) >> (p & 7));
634 if (tv & 1) cnt[L+p]++;
635 if ((p & 7) == 7) vv++;
642 template<
class keyType>
644 vector<uint32_t> cnt = getCnt();
647 for (
int i = 0; i < L; i++) {
648 double p1 = 1.0 * cnt[i] / ma;
649 double p2 = 1.0 * cnt[i+L] / mb;
650 ret[i] = p1*(1-p2)+p2*(1-p1);
656 template<
class keyType>
658 if (filled.size() <=1)
return;
659 printf(
"Random flip\n");
661 vector<list<uint32_t> > VL(ma+mb, list<uint32_t>());
662 for (
int i = 0; i< ma+mb; i++)
669 VL[disj.getfa(i)].push_back(i);
671 for (
int i = 0; i < ma+mb; i++) {
673 for (
auto j = VL[i].begin(); j!=VL[i].end(); j++) {
674 valueType newvalue =
get(*j)^vv;
680 double getrate(uint32_t ma, uint32_t mb, uint32_t da, uint32_t db) {
681 double pa = da*1.0/ma;
682 double pb = db*1.0/mb;
683 double ret = pa*(1-pb)+pb*(1-pa);
688 template<
class keyType>
690 int da[] = {1,1,0,-1,-1,-1,0,1};
691 int db[] = {0,1,1,1,0,-1,-1,-1};
692 vector< array<int32_t,8> > sa (L, array<int32_t,8>());
693 vector< array<int32_t,8> > sb (L, array<int32_t,8>());
694 for (
int i = 0; i < 8; i++)
695 for (
int j =0; j <L; j++)
696 sa[j][i] = sb[j][i] =0;
697 vector<int32_t> na(L,0),nb(L,0);
701 if (filled.size() <=1)
return;
702 printf(
"Adjust bitmap goal: return 1 with rate %.3lf\n",ideal);
704 vector<list<uint32_t> > VL(ma+mb, list<uint32_t>());
705 for (
int i = 0; i< ma+mb; i++) {
712 VL[disj.getfa(i)].push_back(i);
713 valueType cur =
get(i);
714 if (i<ma) padd(na, cur);
718 for (
int i = 0; i < ma+mb; i++) {
719 vector<int32_t> diffa(L,0),diffb(L,0);
721 for (
auto j = VL[i].begin(); j!=VL[i].end(); j++) {
722 valueType cur =
get(*j);
723 if (*j < ma) pdiff(diffa,cur);
724 else pdiff(diffb, cur);
726 for (
int bitID = 0; bitID<L; bitID++)
727 for (
int j = 0; j <8; j++)
728 if (da[j]*diffa[bitID] + db[j] *diffb[bitID] > 0)
730 sa[bitID][j]+=diffa[bitID];
731 sb[bitID][j]+=diffb[bitID];
735 vector<int32_t> direction(L,0);
736 for (
int bitID = 0; bitID <L; bitID++) {
737 double ratemin = 1.0;
738 for (
int dir = 0; dir <8*4; dir++) {
739 int setA = ((dir & 8) !=0);
740 int setB = ((dir & 16)!=0);
741 double rate = getrate(ma,mb,na[bitID]+setA*emptyA+sa[bitID][dir & 0x7], nb[bitID]+setB*emptyB+sb[bitID][dir & 0x7]);
742 if ((rate-ideal)*(rate-ideal) < ratemin) {
743 ratemin = (rate-ideal)*(rate-ideal);
744 direction[bitID] = dir;
748 valueType veA=0, veB=0;
750 for (
int bitID = 0; bitID<L; bitID++) {
751 veA |= ((direction[bitID] & 8) ? (1<<bitID) : 0);
752 veB |= ((direction[bitID] & 16) ? (1<<bitID) : 0);
754 for (
int i = 0; i < ma; i++)
if (!filled[i])
756 for (
int i = ma; i <ma+mb; i++)
if (!filled[i])
759 for (
int i = 0; i < ma+mb; i++) {
760 vector<int32_t> diffa(L,0),diffb(L,0);
761 for (
auto j = VL[i].begin(); j!=VL[i].end(); j++) {
762 valueType cur =
get(*j);
763 if (*j < ma) pdiff(diffa,cur);
764 else pdiff(diffb, cur);
767 for (
int bitID = 0; bitID<L; bitID++)
768 if (da[direction[bitID] &7 ] * diffa[bitID] + db[direction[bitID] &7] * diffb[bitID] > 0)
771 for (
auto j = VL[i].begin(); j!=VL[i].end(); j++) {
772 valueType newvalue =
get(*j)^vv;
vector< double > getRatio()
returns vector, length = L. position x: the probability that query return 1 on the x-th lowest bit...
Definition: othello.h:643
vector< valueType > mem
actual memory space for arrayA and arrayB.
Definition: othello.h:30
vector< uint32_t > fillcount
Enabled only when the values of the query is not pre-defined. This supports othelloIndex query...
Definition: othello.h:41
uint32_t mb
length of arrayB
Definition: othello.h:35
A hash function that hashes keyType to uint32_t. When SSE4.2 support is found, use sse4...
Definition: hash.h:10
void addkeys(int newkeys, void *values, uint32_t valuesize)
after putting some keys into keys, call this function to add keys into Othello. values shall be store...
Definition: othello.h:285
void writeDataToBinaryFile(FILE *pF)
write array to binary file.
Definition: othello.h:370
void loadDataFromBinaryFile(FILE *pF)
load the array from file.
Definition: othello.h:364
void randomflip()
adjust the array so that for random alien query, returns 0 or 1 with equal probability on each bit...
Definition: othello.h:657
#define MAX_REHASH
Maximum number of rehash tries before report an error. If this limit is reached, Othello build fails...
Definition: othello.h:28
Describes the data structure l-Othello. It classifies keys of keyType into 2^L classes.
Definition: othello.h:25
uint32_t allowed_conflicts
The number of keys that can be skipped during construction.
Definition: othello.h:44
uint64_t queryInt(const keyType &k)
returns a 64-bit integer query value for a key.
Definition: othello.h:310
Othello(uint8_t _L, vector< keyType > &keys, vector< VT > &values, bool _autoclear=true, int32_t allowed_conflicts=-1)
Construct othello with vectors.
Definition: othello.h:209
void finishBuild()
release memory space used during construction and forbid future modification of arrayA and arrayB...
Definition: othello.h:215
DisjointSet disj
Disjoint Set data structure that helps to test the acyclicity.
Definition: othello.h:40
Othello(uint8_t _L, keyType *_keys, uint32_t keycount, bool _autoclear=true, void *_values=NULL, size_t _valuesize=0, int32_t _allowed_conflicts=-1)
Construct l-Othello.
Definition: othello.h:150
void exportInfo(unsigned char *v)
export the information of the Othello, not including the array, to a memory space.
Definition: othello.h:235
void removeOneKey(uint32_t kid)
remove one key with the particular index from the keylist.
Definition: othello.h:484
void setAlienPreference(double ideal=1.0)
adjust the array so that for random alien query, return 1 with probability that is close to the ideal...
Definition: othello.h:689
valueType query(const keyType &k, uint32_t &ha, uint32_t &hb)
compute the hash value of key and return query value.
Definition: othello.h:344
std::string human(uint64_t word)
convert a 64-bit Integer to human-readable format in K/M/G. e.g, 102400 is converted to "100K"...
Definition: io_helper.h:122
Othello(unsigned char *v)
load the infomation of the Othello from memory.
Definition: othello.h:257
vector< uint32_t > getCnt()
returns vector, length = 2L. position x: the number of 1s on the x-th lowest bit, for array A...
Definition: othello.h:613
uint32_t L
the length of return value.
Definition: othello.h:31
vector< keyType > removedKeys
The list of removed keys.
Definition: othello.h:45
void updatevalue(uint32_t kid, void *values, uint32_t valuesize)
update the value for one particular key.
Definition: othello.h:401
Disjoint Set data structure. Helps to test the acyclicity of the graph during construction.
Definition: disjointset.h:10
uint32_t ma
length of arrayA.
Definition: othello.h:34