Othello
 All Classes Files Functions Variables Macros
io_helper.h
Go to the documentation of this file.
1 
5 #pragma once
6 #include <string>
7 #include <sstream>
8 #include <iostream>
9 #include <iomanip>
10 #include <cstdio>
11 #include <errno.h>
12 #include <queue>
13 #include <algorithm>
14 #include <cstring>
18 template <typename keyType, typename valueType>
19 class IOHelper {
20 public:
28  virtual bool convert(char *s, keyType *T, valueType *V) = 0;
30  virtual bool convert(char *s, keyType *T) = 0;
31 
35  virtual void splitgrp(const keyType &key, uint32_t &grp, keyType &keyInGroup) = 0;
39  virtual void combgrp(keyType &key, uint32_t &grp, keyType &keyInGroup) = 0;
40 };
41 
42 template<typename keyType, typename valueType>
43 class FileReader {
44 public:
46  virtual bool getFileIsSorted() = 0;
47  virtual bool getNext(keyType *T, valueType *V) = 0;
48  virtual void finish() =0;
49  virtual void reset() = 0;
50  virtual ~FileReader() {
51  }
52 };
53 
60 template <typename keyType, typename valueType>
61 class ConstantLengthKmerHelper : public IOHelper<keyType,valueType> {
62 public:
63  uint8_t kmerlength;
64  uint8_t splitbit;
65  ConstantLengthKmerHelper(uint8_t _kmerlength, uint8_t _splitbit): kmerlength(_kmerlength),splitbit(_splitbit) {};
67  inline bool convert(char *s, keyType *k, valueType *v) {
68  char *s0;
69  s0 = s;
70  switch (*s) {
71  case 'A':
72  case 'T':
73  case 'G':
74  case 'C':
75  keyType ret = 0;
76  while (*s == 'A' || *s == 'C' || *s =='T' || *s =='G') {
77  ret <<=2;
78  switch (*s) {
79  case 'T':
80  ret++;
81  case 'G':
82  ret++;
83  case 'C':
84  ret++;
85  }
86  s++;
87  }
88  *k = ret;
89  valueType tv;
90  sscanf(s,"%d",&tv);
91  *v = tv;
92  return true;
93 
94  }
95  return false;
96 
97  }
98  inline bool convert( char *s, keyType *k) {
99  valueType v;
100  return convert(s,k,&v);
101  }
102  void splitgrp(const keyType &key, uint32_t &grp, keyType &keyInGroup) {
103  int mvcnt = 2 * kmerlength - splitbit;
104  keyType high = (key >> mvcnt);
105  grp = high;
106  keyType lowmask = 1;
107  lowmask <<= mvcnt;
108  keyInGroup = (key & (lowmask-1));
109  }
110 
111  void combgrp(keyType &key, uint32_t &grp, keyType &keyInGroup) {
112  key = grp;
113  key <<= (2*kmerlength - splitbit);
114  key |= (keyInGroup);
115  }
116 
117 };
118 
119 
120 
122 std::string human(uint64_t word) {
123  std::stringstream ss;
124  if (word <= 1024) ss << word;
125  else if (word <= 10240) ss << std::setprecision(2) << word*1.0/1024<<"K";
126  else if (word <= 1048576) ss << word/1024<<"K";
127  else if (word <= 10485760) ss << word*1.0/1048576<<"M";
128  else if (word <= (1048576<<10)) ss << word/1048576<<"M";
129  else ss << word*1.0/(1<<30) <<"G";
130  std::string s;
131  ss >>s;
132  return s;
133 }
134 
136 std::vector<std::string> split(const char * str, char deli) {
137  std::istringstream ss(str);
138  std::string token;
139  std::vector<std::string> ret;
140  while(std::getline(ss, token, deli)) {
141  if (token.size()>=1)
142  ret.push_back(token);
143  }
144  return ret;
145 }
146 #include <cstring>
147 
148 template <typename keyType, typename valueType>
149 class KmerFileReader : public FileReader<keyType,valueType> {
150  FILE *f;
151  bool fIsSorted;
152 public:
153  KmerFileReader(const char *fname, IOHelper<keyType,valueType> *_helper, bool b) {
154  fIsSorted = b;
156  char buf[1024];
157  strcpy(buf,fname);
158  if (buf[strlen(buf)-1]=='\n')
159  buf[strlen(buf)-1] = '\0';
160  f=fopen(buf,"r");
161  printf("OpenFile %s %x\n",fname,f);
162  }
163  void finish() {
164  fclose(f);
165  }
166  void reset() {
167  rewind(f);
168  }
169  bool getFileIsSorted() {
170  return fIsSorted;
171  }
172  ~KmerFileReader() {
173  finish();
174  }
175  bool getNext(keyType *T, valueType *V) {
176  char buf[1024];
177  if (fgets(buf,sizeof(buf),f)==NULL) return false;
178  return FileReader<keyType,valueType>::helper->convert(buf,T,V);
179  }
180 };
181 
182 template <typename keyType, typename valueType>
183 struct KVpair {
184  keyType k;
185  valueType v;
186  bool friend operator <( const KVpair &a, const KVpair &b) {
187  return a.k > b.k;
188  }
189 } __attribute__((packed));
190 
191 
192 
193 template <typename keyType, typename valueType>
194 class compressFileReader : public FileReader <keyType, valueType> {
195  FILE *f;
196  bool fIsSorted;
197  static const int buflen = 1024;
198  int curr = 0;
199  int max = 0;
200  unsigned char buf[1024*64];
201  uint32_t kl, vl;
202 public:
203  compressFileReader( const char * fname, IOHelper <keyType, valueType> * _helper, uint32_t klength, uint32_t valuelength, bool _fIsSorted = true) {
204  kl = klength;
205  vl = valuelength;
206  FileReader<keyType,valueType> :: helper = _helper;
207  fIsSorted = _fIsSorted;
208  char buf[1024];
209  strcpy(buf,fname);
210  if (buf[strlen(buf)-1]=='\n')
211  buf[strlen(buf)-1] = '\0';
212  f=fopen(buf,"rb");
213  printf("OpenFile to binary read Kmers %s %x\n",fname,f);
214  }
215  void finish() {
216  fclose(f);
217  }
218  void reset() {
219  printf("Do not support reset()\n");
220  }
221  bool getFileIsSorted() {
222  return fIsSorted;
223  }
225  finish();
226  }
227  bool getNext(keyType *k, valueType *v) {
228  if (curr == max) {
229  max = fread(buf,kl+vl,buflen,f);
230  if (max == 0) return false;
231  curr = 0;
232  }
233  *k =0;
234  *v =0;
235  memcpy( (void *) k, buf + curr*(kl+vl), kl);
236  memcpy( (void *) v, buf + curr*(kl+vl)+kl, vl);
237  curr++;
238  return true;
239  }
240 
241 };
242 
243 
244 template <typename keyType, typename valueType>
245 class MultivalueFileReaderWriter : public FileReader <keyType, valueType> {
246  FILE *f;
247  static const int buflen = 8192;
248  int curr = 0;
249  int max = 0;
250  unsigned char buf[buflen * 2];
251  uint32_t kl,vl;
252  bool isRead;
253  bool isclosed = false;
254 public:
255  static const valueType EMPTYVALUE = ~0;
256  bool valid(uint32_t value) {
257  if (vl == 1) return value!=0xFF;
258  if (vl == 2) return value!=0xFFFF;
259  if (vl == 4) return value!=0xFFFFFFFFUL;
260  }
261  MultivalueFileReaderWriter( const char * fname, uint32_t klength, uint32_t valuelength, bool _isRead) {
262  kl = klength;
263  vl = valuelength;
264  char buf[1024];
265  strcpy(buf,fname);
266  if (buf[strlen(buf)-1]=='\n')
267  buf[strlen(buf)-1] = '\0';
268  if (isRead = _isRead)
269  f=fopen(buf,"rb");
270  else f = fopen(buf,"wb");
271  memset(buf,0,sizeof(buf));
272  printf("OpenFile to binary read/write Multivalue Kmers file %s %x\n",fname,f);
273  curr = 0;
274  max = 0;
275  }
276  void finish() {
277  if (!isclosed) {
278  if (!isRead) {
279  fwrite(buf,sizeof(buf[0]), curr, f);
280  }
281  fclose(f);
282  }
283  isclosed = true;
284  }
285  void reset() {
286  rewind(f);
287  }
288  bool getFileIsSorted() {
289  return false;
290  }
292  finish();
293  }
294  void getmore() {
295  memmove(buf, buf+curr, max - curr);
296  max -= curr;
297  curr = 0;
298  if (max < buflen) {
299  max += fread(buf+max,1,buflen, f);
300  }
301  }
302  bool get(void * mem, uint32_t l) {
303  if (curr + l >= max) getmore();
304  if (curr + l > max) return false;
305  memcpy(mem,buf+curr,l);
306  curr+=l;
307  return true;
308  }
309  bool getNext(keyType *k, valueType *v) {
310  if (!get(k,kl)) return false;
311  while (true) {
312  get(v,vl);
313  if (!valid(*v)) return true;
314  v++;
315  }
316  /*
317  if (curr + kl+vl >= max) {
318  getmore();
319  if (max < kl) return false;
320  }
321  *k = 0;
322  memcpy( (void *) k, buf + curr, kl);
323  curr+=kl;
324  memcpy( (void *) v, buf + curr, vl);
325  curr += vl;
326  while (valid(*v)) {
327  v++;
328  if (curr+ vl + vl >= max) getmore();
329  memcpy( (void *) v, buf + curr, vl);
330  curr += vl;
331  }
332  return true;
333  */
334  }
335  void add(void * mem, uint32_t l) {
336  memcpy(buf+curr, mem, l);
337  curr += l;
338  if (curr >= buflen) {
339  fwrite( buf, 1, buflen, f);
340  if (curr > buflen) {
341  memcpy(buf,buf+buflen,curr-buflen);
342  }
343  curr -= buflen;
344  }
345  }
346  void write(keyType *k ,valueType *v) {
347  add((void *) k, kl);
348  while (valid(*v)) {
349  add((void *) v, vl);
350  v++;
351  }
352  }
353  void write(keyType *k, std::vector<valueType> &vv) {
354  add ( (void *) k, kl);
355  void *p; uint8_t a8; uint16_t a16; uint32_t a32;
356  if (vl == 1) p = &a8;
357  if (vl == 2) p = &a16;
358  if (vl == 4) p = &a32;
359  for (auto v: vv) {
360  a8 = a16 = a32 = v;
361  add(p,vl);
362  }
363  a8 =0xFF; a16 = 0xFFFF; a32 = 0xFFFFFFFF;//~0ULL;
364  add (p,vl);
365  }
366 };
367 
368 using namespace std;
369 
370 template <typename keyType>
371 class KmerReader {
372 public:
373  virtual void finish() =0;
374  virtual bool getNext(keyType *k) =0;
375 };
376 
377 template <typename KVpair>
378 class BinaryKmerReader: public KmerReader<KVpair> {
379  FILE * f;
380  static const int buflen = 16;
381  KVpair buff[1024];
382  int curr = 0;
383  int max = 0;
384  bool isclosed =false;
385 public:
386  BinaryKmerReader(const char * fname) {
387  char buf[1024];
388  strcpy(buf,fname);
389  if (buf[strlen(buf)-1]=='\n')
390  buf[strlen(buf)-1] = '\0';
391  f=fopen(buf,"rb");
392  printf("OpenFile to read Kmers %s %x\n",fname,f);
393  if (f==0) {
394  printf("errno %d %s\n",errno,strerror(errno));
395  }
396  curr = 0;
397  }
398  void finish() {
399  if (!isclosed) {
400  fclose(f);
401  }
402  isclosed = true;
403  }
404  ~BinaryKmerReader() { finish(); }
405  bool getNext(KVpair *ret) {
406  if (curr == max) {
407  max = fread(buff,sizeof(buff[0]),buflen,f);
408  memset(ret,0xFFFFFFFFUL,sizeof(KVpair));
409  if (max == 0) return false;
410  curr = 0;
411  }
412  memcpy(ret, &buff[curr], sizeof(buff[curr]));
413  curr++;
414  return true;
415  }
416 };
417 
418 template <typename KVpair>
420  FILE *f;
421  int curr = 0;
422 public:
423  KVpair buf[1024];
424  BinaryKmerWriter( const char * fname) {
425  char buf[1024];
426  strcpy(buf,fname);
427  if (buf[strlen(buf)-1]=='\n')
428  buf[strlen(buf)-1] = '\0';
429  f=fopen(buf,"wb");
430  printf("OpenFile to write Kmers %s %x\n",fname,f);
431  curr = 0;
432  memset(buf,0,sizeof(buf));
433  }
434  static const int buflen = 16;
435  void write(KVpair *p) {
436  memcpy(&buf[curr],p,sizeof(buf[curr]));
437  curr++;
438  if (curr == buflen) {
439  fwrite(buf,sizeof(buf[0]),buflen,f);
440  curr = 0;
441  }
442  }
443  void finish() {
444  fwrite(buf,sizeof(buf[0]),curr,f);
445  curr = 0;
446  fclose(f);
447  }
448 };
449 
451 template <typename keyType>
452 class SortedKmerTxtReader : public KmerReader<keyType> {
453  BinaryKmerReader<keyType> * binaryReader = NULL;
454  uint32_t pointer;
455  vector<keyType> * vK;
456  public:
457  SortedKmerTxtReader(const char * fname, uint32_t kmerlength, const char *tmpfilename) {
458  ConstantLengthKmerHelper<keyType, uint64_t> helper(kmerlength, 0);
460  reader = new KmerFileReader<keyType,uint64_t>(fname, &helper, false);
461  keyType k; uint64_t v;
462  vK = new vector<keyType>();
463  while (reader->getNext(&k, &v)) {
464  vK->push_back(k);
465  }
466  sort(vK->begin(),vK->end());
467  delete reader;
468  if (tmpfilename != NULL) {
469  string binaryfilename (tmpfilename);
470  BinaryKmerWriter<keyType> writer(binaryfilename.c_str());
471  for (uint64_t k:*vK)
472  writer.write(&k);
473  writer.finish();
474  binaryReader = new BinaryKmerReader<keyType> (binaryfilename.c_str());
475  }
476  else pointer = 0;
477  }
479  finish();
480  if (binaryReader)
481  delete binaryReader;
482  }
483  bool getNext(keyType *k) {
484  if (binaryReader!= NULL)
485  return binaryReader->getNext(k);
486  else {
487  if (pointer == vK->size()) {
488  delete vK;
489  return false;
490  }
491  *k = (*vK)[pointer++];
492  return true;
493  }
494  }
495  void finish() {
496  if (binaryReader)
497  binaryReader->finish();
498  }
499 };
500 
501 
502 template <typename keyType, typename valueType>
503 class taxoTreeBuilder: public FileReader <keyType, valueType> {
504  vector< FILE *> fV;
505  vector<compressFileReader <keyType, valueType> *> readerV;
506  vector< vector< int > > NCBI; //texonomyToNCBIID;
507  vector< int > stID;
508  struct KIDpair {
509  keyType k;
510  uint32_t id;
511  bool finished;
512  bool friend operator <( const KIDpair &a, const KIDpair &b) {
513  if (a.finished != b.finished) return (((int) a.finished) > ((int) b.finished));
514  return a.k>b.k;
515  }
516  };
517 public:
518  void finish() {
519  for (auto f: fV) fclose(f);
520  }
521  void reset() {
522  printf(" Do not support reset() \n");
523  }
524  int levelcount;
525  vector<vector<int> > NCBI_local;
526  vector<int> localshift;
527  vector<vector<string> > NCBI_ID;
528  vector<KmerReader<uint64_t> *> readers;
529  vector<MultivalueFileReaderWriter<uint64_t, uint16_t> *> grpreaders; //must be 64-bit kmers, and 16-bit grpids.
530  priority_queue<KIDpair> PQ;
531  bool combineMode = false; //used when there are >=800 files;
532  uint32_t combineCount; // split the file into combineCount groups,
533  bool getFileIsSorted() {
534  return true;
535  }
536  void groupFile(string fname, vector<string> lf, string prefix, string suffix, int32_t idshift, bool useBinaryKmerFile,uint32_t KmerLength, const char * tmpfolder) {
537  vector<KmerReader<keyType> *> readers;
538  priority_queue<KIDpair> PQN;
539  for (string s: lf) {
540  string fname = prefix + s + suffix;
541  if (useBinaryKmerFile)
542  readers.push_back(new BinaryKmerReader<keyType>(fname.c_str()));
543  else {
544  string tmpfname(tmpfolder); tmpfname = tmpfname + s + ".bintmp";
545  readers.push_back(new SortedKmerTxtReader<keyType>(fname.c_str(),KmerLength,NULL));
546  }
547  keyType key;
548  readers[readers.size()-1]->getNext(&key);
549  KIDpair kid = {key, idshift+readers.size()-1, false};
550  PQN.push(kid);
551  }
552 
554  // Loop key for these files;
555  while (true) {
556  keyType key = PQN.top().k;
557  uint32_t id = PQN.top().id;
558  vector<uint16_t> ret;
559  if (PQN.top().finished) {
560  for (auto r: readers) {
561  r->finish();
562  delete r;
563  }
564  writer->finish();
565  delete writer;
566  return;
567  }
568  while (PQN.top().k == key && !PQN.top().finished) {
569  int tid = PQN.top().id;
570  ret.push_back(tid);
571  keyType nextk;
572  bool finish = !readers[tid-idshift]->getNext(&nextk);
573  PQN.pop();
574  KIDpair kid = {nextk, tid, finish};
575  PQN.push(kid);
576  }
577  writer->write(&key, ret);
578  }
579  }
580  vector< vector<uint16_t> > grpTmpValue;
581 
582  taxoTreeBuilder(const char * NCBIfname, const char * fnameprefix, const char * fnamesuffix, const char * tmpFileDirectory, uint32_t KmerLength, uint32_t splitbit, bool useBinaryKmerFile = true ) {
584  FILE * fNCBI;
585  string prefix ( fnameprefix);
586  string suffix (fnamesuffix);
587  fNCBI = fopen(NCBIfname, "r");
588  //Assuming the file is tab-splited,
589  //Species_index Species_ID Species_name Genus_index Genus_ID Genus_name Family_index Family_ID Family_name Order_index Order_ID Order_name Class_index Class_ID Class_name Phylum_index Phylum_ID Phylum_name
590  char buf[4096];
591  fgets(buf, 4096, fNCBI); // skip the first line
592  vector<string> vv = split(buf, '\t');
593  levelcount = vv.size()/3;
594  NCBI_local.clear();
595  NCBI_ID.clear();
596  NCBI_local.resize(levelcount);
597  NCBI_ID.resize(levelcount);
598  readers.clear();
599  vector<string> fnames;
600  while (true) {
601  if (fgets(buf, 4096, fNCBI) == NULL) break; // read a Species
602  vector<string> vv = split(buf, '\t');
603  if (vv.size()<2) break;
604  for (int i = 0 ; i*3 < vv.size(); i++) {
605  int localID = atoi(vv[i*3].c_str());
606  NCBI_local[i].push_back(localID);
607  NCBI_ID[i].push_back(vv[i*3+1]);
608  }
609  fnames.push_back(vv[1]);
610  }
611  localshift.clear();
612  localshift.push_back(1);
613  for (int i = 0; i < levelcount; i++)
614  localshift.push_back(localshift[i] + *max_element(NCBI_local[i].begin(), NCBI_local[i].end())+1);
615 
616  int nn = 50;
617  combineMode = (fnames.size()>nn);
618  if (combineMode) {
619  int curr = 0;
620  int combineCount = 0;
621  vector<string> * fnamesInThisgrp ;
622  vector<string> grpfnames;
623  while (curr < fnames.size()) {
624  if (curr + nn < fnames.size())
625  fnamesInThisgrp = new vector<string> (fnames.begin()+curr, fnames.begin()+curr+nn);
626  else
627  fnamesInThisgrp = new vector<string> (fnames.begin()+curr, fnames.end());
628  stringstream ss;
629  string tmpFolder(tmpFileDirectory);
630 
631  ss<<tmpFolder<<"TMP"<<grpfnames.size();
632  string fnamegrp;
633  ss>> fnamegrp;
634  grpfnames.push_back(fnamegrp);
635  printf("merge kmer files %d %d to grp %s\n", curr, curr+fnamesInThisgrp->size()-1, fnamegrp.c_str());
636  groupFile(fnamegrp, *fnamesInThisgrp, prefix, suffix, curr, useBinaryKmerFile,KmerLength,tmpFileDirectory);
637  curr += fnamesInThisgrp->size();
638  delete fnamesInThisgrp;
639  }
640  combineCount = grpfnames.size();
641  for (string v: grpfnames) {
642  grpreaders.push_back( new MultivalueFileReaderWriter<uint64_t, uint16_t>(v.c_str(), 8,2, true));
643  keyType key;
644  uint16_t valuebuf[1024];
645  grpreaders[grpreaders.size()-1]->getNext(&key, valuebuf);
646  vector<uint16_t> Vvaluebuf;
647  for (int i = 0 ; grpreaders[0]->valid(valuebuf[i]); i++)
648  Vvaluebuf.push_back(valuebuf[i]);
649  grpTmpValue.push_back(Vvaluebuf);
650  KIDpair kid = {key, grpreaders.size()-1, false};
651  PQ.push(kid);
652  }
653  }
654  else
655  for (int i = 0 ; i < NCBI_ID.size(); i++) {
656  string fname = prefix + fnames[i] + suffix;
657  if (useBinaryKmerFile)
658  readers.push_back(new BinaryKmerReader<keyType>(fname.c_str()));
659  else {
660  string tmpfname(tmpFileDirectory); tmpfname = tmpfname + fnames[i] + ".bintmp";
661  readers.push_back(new SortedKmerTxtReader<keyType>(fname.c_str(),KmerLength,tmpfname.c_str()));
662  }
663  keyType key;
664  readers[readers.size()-1]->getNext(&key);
665  KIDpair kid = {key, readers.size()-1, false};
666  PQ.push(kid);
667  }
668  fclose(fNCBI);
669  string IDLfname(tmpFileDirectory); IDLfname+= "IDList.txt";
670  FILE * IDLf; IDLf = fopen(IDLfname.c_str(),"w");
671  for (int t : localshift) {
672  fprintf(IDLf,"%d\n",t);
673  }
674  fclose(IDLf);
675  }
676  ~taxoTreeBuilder() {
677  if (combineMode) {
678  for (int i = 0 ; i < grpreaders.size(); i++)
679  delete grpreaders[i];
680  }
681  else
682  for (int i = 0 ; i < readers.size(); i++)
683  delete readers[i];
685  }
686  bool getNext( keyType *k, valueType *v) {
687  int anslevel = 0;
688  keyType key = PQ.top().k;
689  vector<int> ret;
690  if (PQ.top().finished) {
691  finish();
692  return false;
693  }
694  // printf("Find key %llx:", key);
695  while (PQ.top().k == key && !PQ.top().finished) {
696  int tid;
697  tid = PQ.top().id;
698  keyType nextk;
699  bool finish;
700  if (combineMode) {
701  ret.insert(ret.end(),grpTmpValue[tid].begin(),grpTmpValue[tid].end());
702  int ll = grpTmpValue[tid].size();
703  // printf(" %d keys: (from %d)\t", ll, tid);
704  // for (int i: grpTmpValue[tid])
705  // printf("%x\t",i);
706  uint16_t valuebuf[1024];
707  finish = !grpreaders[tid]->getNext(&nextk, valuebuf);
708  grpTmpValue[tid].clear();
709  for (int i = 0; grpreaders[tid]->valid(valuebuf[i]); i++)
710  grpTmpValue[tid].push_back(valuebuf[i]);
711  // printf("Next Has ::%d::", grpTmpValue[tid].size());
712  }
713  else {
714  ret.push_back(tid);
715  // printf(" %x\t",PQ.top().id);
716  finish = !readers[tid]->getNext(&nextk);
717  }
718  PQ.pop();
719  KIDpair kid = {nextk, tid, finish};
720  PQ.push(kid);
721  }
722  *k = key;
723 
724  for (int i = 0; i< levelcount; i++) {
725  bool flag = true;
726  for (int j = 0; j < ret.size() && flag; j++)
727  flag = (NCBI_local[i][ret[j]]==NCBI_local[i][ret[0]]);
728  if (flag) {
729  *v = localshift[i] + NCBI_local[i][ret[0]];
730  return true;
731  }
732  }
733  *v = localshift[levelcount];
734  return true;
735  }
736 };
Definition: io_helper.h:371
virtual void splitgrp(const keyType &key, uint32_t &grp, keyType &keyInGroup)=0
split a keyTypeype value into two parts: groupID/keyInGroup by the highest splitbit bits...
virtual void combgrp(keyType &key, uint32_t &grp, keyType &keyInGroup)=0
combine groupID/keyInGroup to the origional key
std::vector< std::string > split(const char *str, char deli)
split a c-style string with delimineter chara.
Definition: io_helper.h:136
Definition: io_helper.h:149
read kmer from unsorted txt file and sort .
Definition: io_helper.h:452
interface for converting a key from its raw format to a keyTypeype. Split key into groups...
Definition: io_helper.h:19
Definition: io_helper.h:503
void combgrp(keyType &key, uint32_t &grp, keyType &keyInGroup)
combine groupID/keyInGroup to the origional key
Definition: io_helper.h:111
bool convert(char *s, keyType *k, valueType *v)
convert a input-style line to key/value pair.
Definition: io_helper.h:67
Definition: io_helper.h:183
void splitgrp(const keyType &key, uint32_t &grp, keyType &keyInGroup)
split a keyTypeype value into two parts: groupID/keyInGroup by the highest splitbit bits...
Definition: io_helper.h:102
Definition: io_helper.h:378
bool convert(char *s, keyType *k)
skip the value.
Definition: io_helper.h:98
Definition: io_helper.h:43
std::string human(uint64_t word)
convert a 64-bit Integer to human-readable format in K/M/G. e.g, 102400 is converted to "100K"...
Definition: io_helper.h:122
uint8_t splitbit
group the keys according to the highest bits.
Definition: io_helper.h:64
Definition: io_helper.h:194
IOHelper for Constant-Length Kmers.
Definition: io_helper.h:61
uint8_t kmerlength
Assume all kmers are of the same length.
Definition: io_helper.h:63
Definition: io_helper.h:419
Definition: io_helper.h:245
virtual bool convert(char *s, keyType *T, valueType *V)=0
convert a input-style line to key/value pair.