Sample SNPs
Fast ordered sampling of rows from large text or binary files. Special cases for DNA variant files (.bed, VCF, HapMap, etc).
varfiles.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 Anthony J. Greenberg
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5  *
6  * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7  *
8  * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9  *
10  * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11  *
12  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
13  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
14  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
15  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
16  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
17  * THE POSSIBILITY OF SUCH DAMAGE.
18  */
19 
21 
37 #ifndef varfiles_hpp
38 #define varfiles_hpp
39 
40 #include <fstream>
41 #include <string>
42 #include <vector>
43 #include <unordered_map>
44 #include <cstdint>
45 #include <limits>
46 
47 #include "populations.hpp"
48 
49 using std::fstream;
50 using std::string;
51 using std::vector;
52 using std::unordered_map;
53 using std::numeric_limits;
54 
55 namespace sampFiles {
56  // Forward declarations
57  class VarFile;
58  class GbinFile;
59  class GbinFileI;
60  class GbinFileO;
61  class BedFile;
62  class BedFileI;
63  class BedFileO;
64  class GtxtFile;
65  class GtxtFileI;
66  class GtxtFileO;
67  class TpedFile;
68  class TpedFileI;
69  class TpedFileO;
70  class VcfFile;
71  class VcfFileI;
72  class VcfFileO;
73  class HmpFile;
74  class HmpFileI;
75  class HmpFileO;
76 
82  static const size_t BUF_SIZE = 10485760;
84  const double EPS = numeric_limits<double>::epsilon();
86  const double PI = 3.14159265358979323846264338328;
87 
92  class VarFile {
93  protected:
95  fstream _varFile;
96 
98  VarFile() {_varFile.exceptions(fstream::badbit); };
99 
100  public:
102  VarFile(const VarFile &in) = default;
104  VarFile &operator=(const VarFile &in) = default;
106  VarFile(VarFile &&in) = default;
108  VarFile &operator=(VarFile &&in) = default;
110  ~VarFile(){if (_varFile.is_open()) _varFile.close(); };
111 
113  virtual void open() = 0;
115  virtual void close() = 0;
116  };
117 
123  class GbinFile : public VarFile {
124  protected:
126  string _fileName;
128  size_t _nCols;
130  size_t _elemSize;
131 
132  public:
134  GbinFile() : VarFile(), _nCols{0}, _elemSize{sizeof(char)} {};
144  GbinFile(const string &fileName, const size_t &nCols, const size_t &elemSize) : VarFile(), _fileName{fileName}, _nCols{nCols}, _elemSize{elemSize} {};
145 
147  GbinFile(const GbinFile &in) = default;
149  GbinFile &operator=(const GbinFile &in) = default;
151  GbinFile(GbinFile &&in) = default;
153  GbinFile &operator=(GbinFile &&in) = default;
156 
158  virtual void open() {};
160  virtual void close();
161 
162  };
163 
169  class GbinFileI : GbinFile {
170  protected:
177  virtual uint64_t _numLines();
178 
179  public:
181  GbinFileI() : GbinFile() {};
189  GbinFileI(const string &fileName, const size_t &nCols, const size_t &elemSize) : GbinFile(fileName, nCols, elemSize) {};
191  GbinFileI(const GbinFileI &in) = default;
193  GbinFileI &operator=(const GbinFileI &in) = default;
195  GbinFileI(GbinFileI &&in) = default;
197  GbinFileI &operator=(GbinFileI &&in) = default;
200 
202  void open();
203 
212  void sample(GbinFileO &out, const uint64_t &n);
214  uint64_t nlines() { return _numLines(); };
215 
216  };
217 
223  class GbinFileO : public GbinFile {
224  friend class GbinFileI;
225  protected:
226 
227  public:
229  GbinFileO() : GbinFile() {};
237  GbinFileO(const string &fileName, const size_t &nCols, const size_t &elemSize) : GbinFile(fileName, nCols, elemSize) {};
239  GbinFileO(const GbinFileO &in) = default;
241  GbinFileO &operator=(const GbinFileO &in) = default;
243  GbinFileO(GbinFileO &&in) = default;
245  GbinFileO &operator=(GbinFileO &&in) = default;
248 
250  void open();
251 
252  };
257  class BedFile : public GbinFile {
258  protected:
259 
261  fstream _famFile;
263  fstream _bimFile;
265  string _fileStub;
271  static const vector<char> _masks;
277  static const unordered_map<char, string> _tests;
278 
279 
280  public:
282  BedFile();
287  BedFile(const string &stubName);
289  BedFile(const BedFile &in) = default;
291  BedFile &operator=(const BedFile &in) = default;
293  BedFile(BedFile &&in) = default;
295  BedFile &operator=(BedFile &&in) = default;
297  ~BedFile();
298 
300  virtual void open() {};
302  void close();
303 
304  };
305 
311  class BedFileI : public BedFile {
312  protected:
313 
320  uint64_t _numLines();
327  uint64_t _famLines();
336  uint64_t _famLines(fstream &fam);
337 
353  void _ld(const char *snp1, const char *snp2, const size_t &N, const unsigned short &pad, double &rSq, double &Dprime, double &dcnt1, double &dcnt2);
369  void _ld(const char *snp1, const char *snp2, const PopIndex &popID, vector<double> &rSq, vector<double> &Dprime, vector<double> &dcnt1, vector<double> &dcnt2);
370  public:
372  BedFileI() : BedFile() {};
377  BedFileI(const string &stubName) : BedFile(stubName) {};
379  BedFileI(const BedFileI &in) = default;
381  BedFileI &operator=(const BedFileI &in) = default;
383  BedFileI(BedFileI &&in) = default;
385  BedFileI &operator=(BedFileI &&in) = default;
388 
390  void open();
391 
400  void sample(BedFileO &out, const uint64_t &n);
401 
409  void sampleLD(const uint64_t &n);
418  void sampleLD(const PopIndex &popID, const uint64_t &n);
420  uint64_t nsnp() { return _numLines(); };
422  uint64_t nindiv() { return _famLines(); };
423 
424  };
425 
431  class BedFileO : public BedFile {
432  friend class BedFileI;
433  protected:
434 
435  public:
437  BedFileO() : BedFile() {};
442  BedFileO(const string &stubName) : BedFile(stubName) {};
444  BedFileO(const BedFileO &in) = default;
446  BedFileO &operator=(const BedFileO &in) = default;
448  BedFileO(BedFileO &&in) = default;
450  BedFileO &operator=(BedFileO &&in) = default;
453 
455  void open();
456 
457  };
463  class GtxtFile : public VarFile {
464  protected:
466  string _fileName;
468  bool _head;
469 
470  public:
472  GtxtFile() : VarFile(), _head{false} {};
478  GtxtFile(const string &fileName) : VarFile(), _fileName{fileName}, _head{false} {};
485  GtxtFile(const string &fileName, const bool &head) : VarFile(), _fileName{fileName}, _head{head} {};
486 
488  GtxtFile(const GtxtFile &in) = default;
490  GtxtFile &operator=(const GtxtFile &in) = default;
492  GtxtFile(GtxtFile &&in) = default;
494  GtxtFile &operator=(GtxtFile &&in) = default;
497 
499  virtual void open() {};
501  virtual void close();
502 
503  };
504 
510  class GtxtFileI : GtxtFile {
511  protected:
518  virtual uint64_t _numLines();
519 
520  public:
522  GtxtFileI() : GtxtFile() {};
527  GtxtFileI(const string &fileName) : GtxtFile(fileName) {};
533  GtxtFileI(const string &fileName, const bool &head) : GtxtFile(fileName, head) {};
535  GtxtFileI(const GtxtFileI &in) = default;
537  GtxtFileI &operator=(const GtxtFileI &in) = default;
539  GtxtFileI(GtxtFileI &&in) = default;
541  GtxtFileI &operator=(GtxtFileI &&in) = default;
544 
546  void open();
547 
557  void sample(GtxtFileO &out, const uint64_t &n, const bool &headSkip);
568  void sample(const uint64_t &n, const bool &headSkip, const char &delim, vector<string> &out);
570  uint64_t nlines() { return _numLines(); };
571 
572  };
573 
579  class GtxtFileO : public GtxtFile {
580  friend class GtxtFileI;
581  protected:
582 
583  public:
585  GtxtFileO() : GtxtFile() {};
590  GtxtFileO(const string &fileName) : GtxtFile(fileName) {};
596  GtxtFileO(const string &fileName, const bool &head) : GtxtFile(fileName, head) {};
598  GtxtFileO(const GtxtFileO &in) = default;
600  GtxtFileO &operator=(const GtxtFileO &in) = default;
602  GtxtFileO(GtxtFileO &&in) = default;
604  GtxtFileO &operator=(GtxtFileO &&in) = default;
607 
609  void open();
610 
611  };
612 
613 
618  class TpedFile : public GtxtFile {
619  protected:
621  fstream _tfamFile;
623  string _fileStub;
624 
625  public:
627  TpedFile() : GtxtFile() {_tfamFile.exceptions(fstream::badbit); };
632  TpedFile(const string &stubName) : GtxtFile(stubName + ".tped"), _fileStub{stubName} {_tfamFile.exceptions(fstream::badbit); }; // no headers in .tped
634  TpedFile(const TpedFile &in) = default;
636  TpedFile &operator=(const TpedFile &in) = default;
638  TpedFile(TpedFile &&in) = default;
640  TpedFile &operator=(TpedFile &&in) = default;
642  ~TpedFile();
643 
645  virtual void open() {};
647  void close();
648 
649  };
650 
656  class TpedFileI : public TpedFile {
657  protected:
664  uint64_t _famLines();
673  uint64_t _famLines(fstream &fam);
681  void _famCopy(fstream &fam);
688  uint64_t _numLines();
689 
690  public:
692  TpedFileI() : TpedFile() {};
697  TpedFileI(const string &stubName) : TpedFile(stubName) {};
699  TpedFileI(const TpedFileI &in) = default;
701  TpedFileI &operator=(const TpedFileI &in) = default;
703  TpedFileI(TpedFileI &&in) = default;
705  TpedFileI &operator=(TpedFileI &&in) = default;
708 
710  void open();
711 
720  void sample(TpedFileO &out, const uint64_t &n);
722  uint64_t nsnp() { return _numLines(); };
724  uint64_t nindiv() { return _famLines(); };
725 
726  };
727 
733  class TpedFileO : TpedFile {
734  friend class TpedFileI;
735  protected:
736 
737  public:
739  TpedFileO() : TpedFile() {};
744  TpedFileO(const string &stubName) : TpedFile(stubName) {};
746  TpedFileO(const TpedFileO &in) = default;
748  TpedFileO &operator=(const TpedFileO &in) = default;
750  TpedFileO(TpedFileO &&in) = default;
752  TpedFileO &operator=(TpedFileO &&in) = default;
755 
757  void open();
758  };
759 
765  class VcfFile : public GtxtFile {
766  protected:
767 
768  public:
770  VcfFile() : GtxtFile() {};
776  VcfFile(const string &fileName) : GtxtFile(fileName) {};
777 
779  VcfFile(const VcfFile &in) = default;
781  VcfFile &operator=(const VcfFile &in) = default;
783  VcfFile(VcfFile &&in) = default;
785  VcfFile &operator=(VcfFile &&in) = default;
787  ~VcfFile(){};
788 
790  void open() {};
792  void close();
793  };
794 
800  class VcfFileI : public VcfFile {
801  protected:
808  uint64_t _numLines();
809 
810  public:
812  VcfFileI() : VcfFile() {};
817  VcfFileI(const string &fileName) : VcfFile(fileName) {};
819  VcfFileI(const VcfFileI &in) = default;
821  VcfFileI &operator=(const VcfFileI &in) = default;
823  VcfFileI(VcfFileI &&in) = default;
825  VcfFileI &operator=(VcfFileI &&in) = default;
828 
830  void open();
831 
840  void sample(VcfFileO &out, const uint64_t &n);
842  uint64_t nsnp() { return _numLines(); };
843 
844  };
845 
851  class VcfFileO : public VcfFile {
852  friend class VcfFileI;
853  protected:
854 
855  public:
857  VcfFileO() : VcfFile() {};
862  VcfFileO(const string &fileName) : VcfFile(fileName) {};
864  VcfFileO(const VcfFileO &in) = default;
866  VcfFileO &operator=(const VcfFileO &in) = default;
868  VcfFileO(VcfFileO &&in) = default;
870  VcfFileO &operator=(VcfFileO &&in) = default;
873 
875  void open();
876 
877  };
878 
884  class HmpFile : public GtxtFile {
885  protected:
886 
887  public:
889  HmpFile() : GtxtFile() {};
895  HmpFile(const string &fileName) : GtxtFile(fileName) {};
896 
898  HmpFile(const HmpFile &in) = default;
900  HmpFile &operator=(const HmpFile &in) = default;
902  HmpFile(HmpFile &&in) = default;
904  HmpFile &operator=(HmpFile &&in) = default;
906  ~HmpFile(){};
907 
909  virtual void open() {};
911  virtual void close();
912 
913  };
914 
920  class HmpFileI : HmpFile {
921  protected:
928  uint64_t _numLines();
929 
930  public:
932  HmpFileI() : HmpFile() {};
937  HmpFileI(const string &fileName);
939  HmpFileI(const HmpFileI &in) = default;
941  HmpFileI &operator=(const HmpFileI &in) = default;
943  HmpFileI(HmpFileI &&in) = default;
945  HmpFileI &operator=(HmpFileI &&in) = default;
948 
950  void open();
951 
960  void sample(HmpFileO &out, const uint64_t &n);
962  uint64_t nsnp() { return _numLines(); };
963 
964  };
965 
971  class HmpFileO : public HmpFile {
972  friend class HmpFileI;
973  protected:
974 
975  public:
977  HmpFileO() : HmpFile() {};
982  HmpFileO(const string &fileName) : HmpFile(fileName) {};
984  HmpFileO(const HmpFileO &in) = default;
986  HmpFileO &operator=(const HmpFileO &in) = default;
988  HmpFileO(HmpFileO &&in) = default;
990  HmpFileO &operator=(HmpFileO &&in) = default;
993 
995  void open();
996 
997  };
998 
999 
1000 }
1001 
1002 #endif /* varfiles_hpp */
1003 
1004 
1005 
1006 
sampFiles::GbinFile::operator=
GbinFile & operator=(const GbinFile &in)=default
Copy assignment.
sampFiles::GtxtFile::operator=
GtxtFile & operator=(GtxtFile &&in)=default
Move assignment.
sampFiles::BedFile::BedFile
BedFile(BedFile &&in)=default
Move constructor.
sampFiles::VarFile::open
virtual void open()=0
Open stream.
sampFiles::HmpFileI::operator=
HmpFileI & operator=(const HmpFileI &in)=default
Copy assignment.
sampFiles::TpedFileO::TpedFileO
TpedFileO(const string &stubName)
File name constructor.
Definition: varfiles.hpp:744
sampFiles::VarFile::_varFile
fstream _varFile
Variant file stream.
Definition: varfiles.hpp:95
sampFiles::VcfFileI::~VcfFileI
~VcfFileI()
Destructor.
Definition: varfiles.hpp:827
sampFiles::HmpFile::HmpFile
HmpFile(const string &fileName)
Constructor with file name.
Definition: varfiles.hpp:895
sampFiles::HmpFileO::open
void open()
Open stream to write.
Definition: varfiles.cpp:2979
sampFiles::GbinFile::GbinFile
GbinFile()
Default constructor.
Definition: varfiles.hpp:134
sampFiles::TpedFileI::sample
void sample(TpedFileO &out, const uint64_t &n)
Sample SNPs and save to BED file.
Definition: varfiles.cpp:2353
sampFiles::GbinFileO
Generic binary file output class.
Definition: varfiles.hpp:223
sampFiles::VcfFileI::VcfFileI
VcfFileI()
Default constructor.
Definition: varfiles.hpp:812
sampFiles::VcfFileO::~VcfFileO
~VcfFileO()
Destructor.
Definition: varfiles.hpp:872
sampFiles::VarFile::VarFile
VarFile()
Default constructor (protected)
Definition: varfiles.hpp:98
sampFiles::VcfFileI::operator=
VcfFileI & operator=(VcfFileI &&in)=default
Move assignment.
sampFiles::VcfFileI::VcfFileI
VcfFileI(VcfFileI &&in)=default
Move constructor.
sampFiles::BedFileO::open
void open()
Open stream to write.
Definition: varfiles.cpp:1748
sampFiles::TpedFile::operator=
TpedFile & operator=(TpedFile &&in)=default
Move assignment.
sampFiles::GtxtFileO::GtxtFileO
GtxtFileO(const GtxtFileO &in)=default
Copy constructor.
sampFiles::HmpFileI::sample
void sample(HmpFileO &out, const uint64_t &n)
Sample SNPs and save to HMP file.
Definition: varfiles.cpp:2872
sampFiles::BedFileI::operator=
BedFileI & operator=(BedFileI &&in)=default
Move assignment.
sampFiles::GtxtFileI::GtxtFileI
GtxtFileI(const string &fileName, const bool &head)
File name constructor with header specification.
Definition: varfiles.hpp:533
sampFiles::GbinFileO::~GbinFileO
~GbinFileO()
Destructor.
Definition: varfiles.hpp:247
sampFiles::HmpFile::~HmpFile
~HmpFile()
Destructor.
Definition: varfiles.hpp:906
sampFiles::TpedFileO::TpedFileO
TpedFileO(const TpedFileO &in)=default
Copy constructor.
sampFiles::GbinFile::close
virtual void close()
Close stream.
Definition: varfiles.cpp:90
sampFiles::GtxtFile::open
virtual void open()
Open stream (does nothing)
Definition: varfiles.hpp:499
sampFiles::VarFile
Base variant file class.
Definition: varfiles.hpp:92
sampFiles::GbinFileI::_numLines
virtual uint64_t _numLines()
Get number of rows in the binary file.
Definition: varfiles.cpp:64
sampFiles::BedFileO::BedFileO
BedFileO(BedFileO &&in)=default
Move constructor.
sampFiles::TpedFileO::operator=
TpedFileO & operator=(const TpedFileO &in)=default
Copy assignment.
sampFiles::TpedFileO::~TpedFileO
~TpedFileO()
Destructor.
Definition: varfiles.hpp:754
sampFiles::TpedFile::TpedFile
TpedFile(TpedFile &&in)=default
Move constructor.
sampFiles::HmpFileO::~HmpFileO
~HmpFileO()
Destructor.
Definition: varfiles.hpp:992
sampFiles::BedFileI::_ld
void _ld(const char *snp1, const char *snp2, const size_t &N, const unsigned short &pad, double &rSq, double &Dprime, double &dcnt1, double &dcnt2)
Between-SNP linkage disequilibrium (LD)
Definition: varfiles.cpp:435
sampFiles::BedFile::~BedFile
~BedFile()
Destructor.
Definition: varfiles.cpp:288
sampFiles::GtxtFile::GtxtFile
GtxtFile(const GtxtFile &in)=default
Copy constructor.
sampFiles::GbinFile
Generic binary file base class.
Definition: varfiles.hpp:123
sampFiles::GbinFileO::operator=
GbinFileO & operator=(const GbinFileO &in)=default
Copy assignment.
sampFiles::HmpFile::HmpFile
HmpFile()
Default constructor.
Definition: varfiles.hpp:889
sampFiles::TpedFileI::_numLines
uint64_t _numLines()
Get number of rows in the text file.
Definition: varfiles.cpp:2274
sampFiles::GtxtFileO::open
void open()
Open stream to write.
Definition: varfiles.cpp:2106
sampFiles::TpedFileI::TpedFileI
TpedFileI(TpedFileI &&in)=default
Move constructor.
sampFiles::BedFileO::operator=
BedFileO & operator=(BedFileO &&in)=default
Move assignment.
sampFiles::VcfFileO
VCF file output class.
Definition: varfiles.hpp:851
sampFiles::BedFile::open
virtual void open()
Open stream (does nothing)
Definition: varfiles.hpp:300
sampFiles::GbinFileI::open
void open()
Open stream to read.
Definition: varfiles.cpp:97
sampFiles::BedFileI::BedFileI
BedFileI(const BedFileI &in)=default
Copy constructor.
sampFiles::HmpFileO::HmpFileO
HmpFileO()
Default constructor.
Definition: varfiles.hpp:977
sampFiles::GbinFileI::operator=
GbinFileI & operator=(GbinFileI &&in)=default
Move assignment.
sampFiles::HmpFileO::operator=
HmpFileO & operator=(const HmpFileO &in)=default
Copy assignment.
sampFiles::GbinFileO::GbinFileO
GbinFileO(const string &fileName, const size_t &nCols, const size_t &elemSize)
File name constructor.
Definition: varfiles.hpp:237
sampFiles::BedFile
BED file base class.
Definition: varfiles.hpp:257
sampFiles::HmpFile
Hapmap (HMP) file base class.
Definition: varfiles.hpp:884
sampFiles::GtxtFileI::nlines
uint64_t nlines()
Number of SNPs in the object.
Definition: varfiles.hpp:570
sampFiles::TpedFileO::TpedFileO
TpedFileO(TpedFileO &&in)=default
Move constructor.
sampFiles::TpedFileI::operator=
TpedFileI & operator=(const TpedFileI &in)=default
Copy assignment.
sampFiles::GbinFile::~GbinFile
~GbinFile()
Destructor.
Definition: varfiles.hpp:155
sampFiles::BedFile::_masks
static const vector< char > _masks
Genotype bit masks.
Definition: varfiles.hpp:271
sampFiles::BedFile::_tests
static const unordered_map< char, string > _tests
Genotype bit tests.
Definition: varfiles.hpp:277
sampFiles::VarFile::VarFile
VarFile(const VarFile &in)=default
Copy constructor.
sampFiles::TpedFileI::nindiv
uint64_t nindiv()
Number of individuals in the object.
Definition: varfiles.hpp:724
sampFiles::BedFile::BedFile
BedFile()
Default constructor.
Definition: varfiles.cpp:269
sampFiles::TpedFileI::TpedFileI
TpedFileI(const TpedFileI &in)=default
Copy constructor.
sampFiles::TpedFileI::~TpedFileI
~TpedFileI()
Destructor.
Definition: varfiles.hpp:707
sampFiles::GbinFile::operator=
GbinFile & operator=(GbinFile &&in)=default
Move assignment.
sampFiles::BedFileO::operator=
BedFileO & operator=(const BedFileO &in)=default
Copy assignment.
sampFiles::HmpFileO
HMP file output class.
Definition: varfiles.hpp:971
sampFiles::BedFile::_bimFile
fstream _bimFile
Corresponding .bim file stream.
Definition: varfiles.hpp:263
sampFiles::TpedFile::open
virtual void open()
Open stream (does nothing)
Definition: varfiles.hpp:645
sampFiles::GbinFileI::GbinFileI
GbinFileI(const GbinFileI &in)=default
Copy constructor.
sampFiles::BedFile::_fileStub
string _fileStub
File name stub (minus the extension)
Definition: varfiles.hpp:265
sampFiles::BedFileI::operator=
BedFileI & operator=(const BedFileI &in)=default
Copy assignment.
sampFiles::TpedFile
TPED file base class.
Definition: varfiles.hpp:618
sampFiles::TpedFile::TpedFile
TpedFile(const string &stubName)
File name constructor.
Definition: varfiles.hpp:632
sampFiles::TpedFileI
TPED file input class.
Definition: varfiles.hpp:656
populations.hpp
Connect lines with populations.
sampFiles::GtxtFileI::GtxtFileI
GtxtFileI(GtxtFileI &&in)=default
Move constructor.
sampFiles::GbinFileO::operator=
GbinFileO & operator=(GbinFileO &&in)=default
Move assignment.
sampFiles::HmpFile::open
virtual void open()
Open stream (does nothing)
Definition: varfiles.hpp:909
sampFiles::TpedFile::TpedFile
TpedFile()
Default constructor.
Definition: varfiles.hpp:627
sampFiles::GtxtFile::close
virtual void close()
Close stream.
Definition: varfiles.cpp:1803
sampFiles::GtxtFileO::GtxtFileO
GtxtFileO(const string &fileName, const bool &head)
File name constructor with header specification.
Definition: varfiles.hpp:596
sampFiles::BedFileO::BedFileO
BedFileO(const string &stubName)
File name constructor.
Definition: varfiles.hpp:442
sampFiles::TpedFileI::_famCopy
void _famCopy(fstream &fam)
Copy the .tfam file.
Definition: varfiles.cpp:2234
sampFiles::GbinFileI::nlines
uint64_t nlines()
Number of rows in the object.
Definition: varfiles.hpp:214
sampFiles::GtxtFileI::GtxtFileI
GtxtFileI(const string &fileName)
File name constructor with header specification.
Definition: varfiles.hpp:527
sampFiles::VcfFileO::VcfFileO
VcfFileO(VcfFileO &&in)=default
Move constructor.
sampFiles::VcfFileI::_numLines
uint64_t _numLines()
Get number of SNPs in the VCF file.
Definition: varfiles.cpp:2560
sampFiles::VarFile::close
virtual void close()=0
Close stream.
sampFiles::VarFile::~VarFile
~VarFile()
Destructor.
Definition: varfiles.hpp:110
sampFiles::HmpFileO::HmpFileO
HmpFileO(HmpFileO &&in)=default
Move constructor.
sampFiles::HmpFile::operator=
HmpFile & operator=(HmpFile &&in)=default
Move assignment.
sampFiles::VarFile::operator=
VarFile & operator=(const VarFile &in)=default
Copy assignment.
sampFiles::GtxtFile
Generic text file base class.
Definition: varfiles.hpp:463
sampFiles::HmpFile::HmpFile
HmpFile(HmpFile &&in)=default
Move constructor.
sampFiles::GtxtFileI::~GtxtFileI
~GtxtFileI()
Destructor.
Definition: varfiles.hpp:543
sampFiles::HmpFileO::operator=
HmpFileO & operator=(HmpFileO &&in)=default
Move assignment.
sampFiles::HmpFileI::operator=
HmpFileI & operator=(HmpFileI &&in)=default
Move assignment.
sampFiles::GbinFile::GbinFile
GbinFile(GbinFile &&in)=default
Move constructor.
sampFiles::GbinFileO::GbinFileO
GbinFileO()
Default constructor.
Definition: varfiles.hpp:229
sampFiles::VcfFileO::operator=
VcfFileO & operator=(VcfFileO &&in)=default
Move assignment.
sampFiles::VcfFile::VcfFile
VcfFile(const string &fileName)
Constructor with file name.
Definition: varfiles.hpp:776
sampFiles::TpedFileI::_famLines
uint64_t _famLines()
Get number of lines in the _tfamFile
Definition: varfiles.cpp:2138
sampFiles::GtxtFileI::sample
void sample(GtxtFileO &out, const uint64_t &n, const bool &headSkip)
Sample rows and save to a text file.
Definition: varfiles.cpp:1876
sampFiles::BedFileI::nindiv
uint64_t nindiv()
Number of individuals in the object.
Definition: varfiles.hpp:422
sampFiles::BedFileI::BedFileI
BedFileI(const string &stubName)
File name constructor.
Definition: varfiles.hpp:377
sampFiles::GtxtFileO::GtxtFileO
GtxtFileO(GtxtFileO &&in)=default
Move constructor.
sampFiles::GtxtFileI::open
void open()
Open stream to read.
Definition: varfiles.cpp:1809
sampFiles::TpedFile::_fileStub
string _fileStub
File name stub (minus the extension)
Definition: varfiles.hpp:623
sampFiles::TpedFile::TpedFile
TpedFile(const TpedFile &in)=default
Copy constructor.
sampFiles::TpedFileI::open
void open()
Open stream to read.
Definition: varfiles.cpp:2320
sampFiles::VcfFileI
VCF file input class.
Definition: varfiles.hpp:800
sampFiles::TpedFileI::nsnp
uint64_t nsnp()
Number of SNPs in the object.
Definition: varfiles.hpp:722
sampFiles::VcfFile::operator=
VcfFile & operator=(const VcfFile &in)=default
Copy assignment.
sampFiles::VcfFile
VCF file base class.
Definition: varfiles.hpp:765
sampFiles::GbinFileI::GbinFileI
GbinFileI(const string &fileName, const size_t &nCols, const size_t &elemSize)
File name constructor.
Definition: varfiles.hpp:189
sampFiles::HmpFileO::HmpFileO
HmpFileO(const string &fileName)
File name constructor.
Definition: varfiles.hpp:982
sampFiles::GbinFile::open
virtual void open()
Open stream (does nothing)
Definition: varfiles.hpp:158
sampFiles::BedFileI::sample
void sample(BedFileO &out, const uint64_t &n)
Sample SNPs and save to BED file.
Definition: varfiles.cpp:1026
sampFiles::GtxtFile::_fileName
string _fileName
File name.
Definition: varfiles.hpp:466
sampFiles::HmpFileI::nsnp
uint64_t nsnp()
Number of SNPs in the object.
Definition: varfiles.hpp:962
sampFiles::BedFileI::open
void open()
Open stream to read.
Definition: varfiles.cpp:967
sampFiles::GtxtFileO
Generic text file output class.
Definition: varfiles.hpp:579
sampFiles::BedFileI::BedFileI
BedFileI(BedFileI &&in)=default
Move constructor.
sampFiles::TpedFile::close
void close()
Close stream.
Definition: varfiles.cpp:2129
sampFiles::GtxtFileO::GtxtFileO
GtxtFileO()
Default constructor.
Definition: varfiles.hpp:585
sampFiles::GtxtFileO::GtxtFileO
GtxtFileO(const string &fileName)
File name constructor.
Definition: varfiles.hpp:590
sampFiles::HmpFileI
HMP file input class.
Definition: varfiles.hpp:920
sampFiles::HmpFileI::HmpFileI
HmpFileI(const HmpFileI &in)=default
Copy constructor.
sampFiles::BedFile::operator=
BedFile & operator=(const BedFile &in)=default
Copy assignment.
sampFiles::GbinFile::GbinFile
GbinFile(const string &fileName, const size_t &nCols, const size_t &elemSize)
Constructor with file name.
Definition: varfiles.hpp:144
sampFiles::GbinFile::_fileName
string _fileName
File name.
Definition: varfiles.hpp:126
sampFiles::TpedFileO::open
void open()
Open stream to write.
Definition: varfiles.cpp:2520
sampFiles::VcfFileI::operator=
VcfFileI & operator=(const VcfFileI &in)=default
Copy assignment.
sampFiles::GtxtFile::GtxtFile
GtxtFile(GtxtFile &&in)=default
Move constructor.
sampFiles::BedFile::BedFile
BedFile(const BedFile &in)=default
Copy constructor.
sampFiles::HmpFile::close
virtual void close()
Close stream.
Definition: varfiles.cpp:2771
sampFiles::BedFileO
BED file output class.
Definition: varfiles.hpp:431
sampFiles::BedFile::_famFile
fstream _famFile
Corresponding .fam file stream.
Definition: varfiles.hpp:261
sampFiles::VcfFile::VcfFile
VcfFile(const VcfFile &in)=default
Copy constructor.
sampFiles::GbinFileI::GbinFileI
GbinFileI()
Default constructor.
Definition: varfiles.hpp:181
sampFiles::HmpFile::operator=
HmpFile & operator=(const HmpFile &in)=default
Copy assignment.
sampFiles::GbinFile::_elemSize
size_t _elemSize
Size of each element in bytes.
Definition: varfiles.hpp:130
sampFiles::BedFileO::~BedFileO
~BedFileO()
Destructor.
Definition: varfiles.hpp:452
sampFiles::GtxtFile::~GtxtFile
~GtxtFile()
Destructor.
Definition: varfiles.hpp:496
sampFiles::GbinFileI
Binary file input class.
Definition: varfiles.hpp:169
sampFiles::VcfFile::operator=
VcfFile & operator=(VcfFile &&in)=default
Move assignment.
sampFiles::BedFileI::~BedFileI
~BedFileI()
Destructor.
Definition: varfiles.hpp:387
sampFiles::BedFileI::_numLines
uint64_t _numLines()
Get number of lines in the _bimFile
Definition: varfiles.cpp:309
sampFiles::GtxtFile::GtxtFile
GtxtFile(const string &fileName, const bool &head)
Constructor with file name and header indicator.
Definition: varfiles.hpp:485
sampFiles::BedFile::operator=
BedFile & operator=(BedFile &&in)=default
Move assignment.
sampFiles::VarFile::operator=
VarFile & operator=(VarFile &&in)=default
Move assignment.
sampFiles::GbinFileO::GbinFileO
GbinFileO(const GbinFileO &in)=default
Copy constructor.
sampFiles::VcfFile::VcfFile
VcfFile(VcfFile &&in)=default
Move constructor.
sampFiles::VcfFileO::open
void open()
Open stream to write.
Definition: varfiles.cpp:2749
sampFiles::HmpFileI::HmpFileI
HmpFileI()
Default constructor.
Definition: varfiles.hpp:932
sampFiles::GtxtFileO::operator=
GtxtFileO & operator=(GtxtFileO &&in)=default
Move assignment.
sampFiles::HmpFile::HmpFile
HmpFile(const HmpFile &in)=default
Copy constructor.
sampFiles::BedFileI::BedFileI
BedFileI()
Default constructor.
Definition: varfiles.hpp:372
sampFiles::BedFileO::BedFileO
BedFileO(const BedFileO &in)=default
Copy constructor.
sampFiles::HmpFileI::_numLines
uint64_t _numLines()
Get number of SNPs in the HMP file.
Definition: varfiles.cpp:2822
sampFiles::TpedFile::_tfamFile
fstream _tfamFile
Corresponding .tfam file stream.
Definition: varfiles.hpp:621
sampFiles::TpedFile::operator=
TpedFile & operator=(const TpedFile &in)=default
Copy assignment.
sampFiles::HmpFileI::~HmpFileI
~HmpFileI()
Destructor.
Definition: varfiles.hpp:947
sampFiles::BedFileI::nsnp
uint64_t nsnp()
Number of SNPs in the object.
Definition: varfiles.hpp:420
sampFiles::VcfFile::~VcfFile
~VcfFile()
Destructor.
Definition: varfiles.hpp:787
sampFiles::PopIndex
Population index.
Definition: populations.hpp:44
sampFiles::VarFile::VarFile
VarFile(VarFile &&in)=default
Move constructor.
sampFiles::TpedFileO::operator=
TpedFileO & operator=(TpedFileO &&in)=default
Move assignment.
sampFiles::GtxtFileO::~GtxtFileO
~GtxtFileO()
Destructor.
Definition: varfiles.hpp:606
sampFiles::GtxtFileI::operator=
GtxtFileI & operator=(GtxtFileI &&in)=default
Move assignment.
sampFiles::GbinFileO::GbinFileO
GbinFileO(GbinFileO &&in)=default
Move constructor.
sampFiles::VcfFileO::VcfFileO
VcfFileO()
Default constructor.
Definition: varfiles.hpp:857
sampFiles::BUF_SIZE
static const size_t BUF_SIZE
Buffer size.
Definition: varfiles.hpp:82
sampFiles::GbinFile::_nCols
size_t _nCols
Number of elements in a row.
Definition: varfiles.hpp:128
sampFiles::VcfFileI::VcfFileI
VcfFileI(const string &fileName)
File name constructor.
Definition: varfiles.hpp:817
sampFiles::VcfFileO::operator=
VcfFileO & operator=(const VcfFileO &in)=default
Copy assignment.
sampFiles::BedFileI::_famLines
uint64_t _famLines()
Get number of lines in the _famFile
Definition: varfiles.cpp:340
sampFiles::GtxtFileO::operator=
GtxtFileO & operator=(const GtxtFileO &in)=default
Copy assignment.
sampFiles::VcfFile::close
void close()
Close stream.
Definition: varfiles.cpp:2554
sampFiles::GbinFileI::~GbinFileI
~GbinFileI()
Destructor.
Definition: varfiles.hpp:199
sampFiles::TpedFileO::TpedFileO
TpedFileO()
Default constructor.
Definition: varfiles.hpp:739
sampFiles::BedFileO::BedFileO
BedFileO()
Default constructor.
Definition: varfiles.hpp:437
sampFiles::GbinFileI::sample
void sample(GbinFileO &out, const uint64_t &n)
Sample rows and save to a binary file.
Definition: varfiles.cpp:115
sampFiles::BedFileI::sampleLD
void sampleLD(const uint64_t &n)
Linkage disequilibrium among sampled sites.
Definition: varfiles.cpp:1282
sampFiles::EPS
const double EPS
Machine .
Definition: varfiles.hpp:84
sampFiles::GtxtFile::GtxtFile
GtxtFile()
Default constructor.
Definition: varfiles.hpp:472
sampFiles::GtxtFileI::operator=
GtxtFileI & operator=(const GtxtFileI &in)=default
Copy assignment.
sampFiles::PI
const double PI
pi
Definition: varfiles.hpp:86
sampFiles::VcfFileO::VcfFileO
VcfFileO(const string &fileName)
File name constructor.
Definition: varfiles.hpp:862
sampFiles::GtxtFileI::GtxtFileI
GtxtFileI(const GtxtFileI &in)=default
Copy constructor.
sampFiles::HmpFileI::HmpFileI
HmpFileI(HmpFileI &&in)=default
Move constructor.
sampFiles::GtxtFile::_head
bool _head
Is there a header?
Definition: varfiles.hpp:468
sampFiles::VcfFile::VcfFile
VcfFile()
Default constructor.
Definition: varfiles.hpp:770
sampFiles::GbinFileO::open
void open()
Open stream to write.
Definition: varfiles.cpp:249
sampFiles::VcfFile::open
void open()
Open stream (does nothing)
Definition: varfiles.hpp:790
sampFiles::TpedFileI::TpedFileI
TpedFileI(const string &stubName)
File name constructor.
Definition: varfiles.hpp:697
sampFiles::TpedFileI::operator=
TpedFileI & operator=(TpedFileI &&in)=default
Move assignment.
sampFiles::VcfFileI::nsnp
uint64_t nsnp()
Number of SNPs in the object.
Definition: varfiles.hpp:842
sampFiles::BedFile::close
void close()
Close stream.
Definition: varfiles.cpp:297
sampFiles::VcfFileI::sample
void sample(VcfFileO &out, const uint64_t &n)
Sample SNPs and save to VCF file.
Definition: varfiles.cpp:2634
sampFiles::HmpFileI::open
void open()
Open stream to read.
Definition: varfiles.cpp:2803
sampFiles::GbinFileI::operator=
GbinFileI & operator=(const GbinFileI &in)=default
Copy assignment.
sampFiles::GbinFile::GbinFile
GbinFile(const GbinFile &in)=default
Copy constructor.
sampFiles::TpedFile::~TpedFile
~TpedFile()
Destructor.
Definition: varfiles.cpp:2123
sampFiles::HmpFileO::HmpFileO
HmpFileO(const HmpFileO &in)=default
Copy constructor.
sampFiles::VcfFileI::open
void open()
Open stream to read.
Definition: varfiles.cpp:2614
sampFiles::GtxtFileI::_numLines
virtual uint64_t _numLines()
Get number of rows in the text file.
Definition: varfiles.cpp:1829
sampFiles::BedFileI
BED file input class.
Definition: varfiles.hpp:311
sampFiles::GtxtFileI
Text file input class.
Definition: varfiles.hpp:510
sampFiles::GtxtFile::GtxtFile
GtxtFile(const string &fileName)
Constructor with file name.
Definition: varfiles.hpp:478
sampFiles::VcfFileI::VcfFileI
VcfFileI(const VcfFileI &in)=default
Copy constructor.
sampFiles::VcfFileO::VcfFileO
VcfFileO(const VcfFileO &in)=default
Copy constructor.
sampFiles::GtxtFile::operator=
GtxtFile & operator=(const GtxtFile &in)=default
Copy assignment.
sampFiles::GbinFileI::GbinFileI
GbinFileI(GbinFileI &&in)=default
Move constructor.
sampFiles::TpedFileI::TpedFileI
TpedFileI()
Default constructor.
Definition: varfiles.hpp:692
sampFiles::GtxtFileI::GtxtFileI
GtxtFileI()
Default constructor.
Definition: varfiles.hpp:522
sampFiles::TpedFileO
TPED file output class.
Definition: varfiles.hpp:733