Sample SNPs
Fast ordered sampling of rows from large text or binary files. Special cases for DNA variant files (.bed, VCF, HapMap, etc).
|
Go to the documentation of this file.
43 #include <unordered_map>
52 using std::unordered_map;
53 using std::numeric_limits;
84 const double EPS = numeric_limits<double>::epsilon();
86 const double PI = 3.14159265358979323846264338328;
160 virtual void close();
189 GbinFileI(
const string &fileName,
const size_t &nCols,
const size_t &elemSize) :
GbinFile(fileName, nCols, elemSize) {};
237 GbinFileO(
const string &fileName,
const size_t &nCols,
const size_t &elemSize) :
GbinFile(fileName, nCols, elemSize) {};
277 static const unordered_map<char, string>
_tests;
287 BedFile(
const string &stubName);
353 void _ld(
const char *snp1,
const char *snp2,
const size_t &N,
const unsigned short &pad,
double &rSq,
double &Dprime,
double &dcnt1,
double &dcnt2);
369 void _ld(
const char *snp1,
const char *snp2,
const PopIndex &popID, vector<double> &rSq, vector<double> &Dprime, vector<double> &dcnt1, vector<double> &dcnt2);
501 virtual void close();
568 void sample(
const uint64_t &n,
const bool &headSkip,
const char &delim, vector<string> &out);
911 virtual void close();
GbinFile & operator=(const GbinFile &in)=default
Copy assignment.
GtxtFile & operator=(GtxtFile &&in)=default
Move assignment.
BedFile(BedFile &&in)=default
Move constructor.
virtual void open()=0
Open stream.
HmpFileI & operator=(const HmpFileI &in)=default
Copy assignment.
TpedFileO(const string &stubName)
File name constructor.
Definition: varfiles.hpp:744
fstream _varFile
Variant file stream.
Definition: varfiles.hpp:95
~VcfFileI()
Destructor.
Definition: varfiles.hpp:827
HmpFile(const string &fileName)
Constructor with file name.
Definition: varfiles.hpp:895
void open()
Open stream to write.
Definition: varfiles.cpp:2979
GbinFile()
Default constructor.
Definition: varfiles.hpp:134
void sample(TpedFileO &out, const uint64_t &n)
Sample SNPs and save to BED file.
Definition: varfiles.cpp:2353
Generic binary file output class.
Definition: varfiles.hpp:223
VcfFileI()
Default constructor.
Definition: varfiles.hpp:812
~VcfFileO()
Destructor.
Definition: varfiles.hpp:872
VarFile()
Default constructor (protected)
Definition: varfiles.hpp:98
VcfFileI & operator=(VcfFileI &&in)=default
Move assignment.
VcfFileI(VcfFileI &&in)=default
Move constructor.
void open()
Open stream to write.
Definition: varfiles.cpp:1748
TpedFile & operator=(TpedFile &&in)=default
Move assignment.
GtxtFileO(const GtxtFileO &in)=default
Copy constructor.
void sample(HmpFileO &out, const uint64_t &n)
Sample SNPs and save to HMP file.
Definition: varfiles.cpp:2872
BedFileI & operator=(BedFileI &&in)=default
Move assignment.
GtxtFileI(const string &fileName, const bool &head)
File name constructor with header specification.
Definition: varfiles.hpp:533
~GbinFileO()
Destructor.
Definition: varfiles.hpp:247
~HmpFile()
Destructor.
Definition: varfiles.hpp:906
TpedFileO(const TpedFileO &in)=default
Copy constructor.
virtual void close()
Close stream.
Definition: varfiles.cpp:90
virtual void open()
Open stream (does nothing)
Definition: varfiles.hpp:499
Base variant file class.
Definition: varfiles.hpp:92
virtual uint64_t _numLines()
Get number of rows in the binary file.
Definition: varfiles.cpp:64
BedFileO(BedFileO &&in)=default
Move constructor.
TpedFileO & operator=(const TpedFileO &in)=default
Copy assignment.
~TpedFileO()
Destructor.
Definition: varfiles.hpp:754
TpedFile(TpedFile &&in)=default
Move constructor.
~HmpFileO()
Destructor.
Definition: varfiles.hpp:992
void _ld(const char *snp1, const char *snp2, const size_t &N, const unsigned short &pad, double &rSq, double &Dprime, double &dcnt1, double &dcnt2)
Between-SNP linkage disequilibrium (LD)
Definition: varfiles.cpp:435
~BedFile()
Destructor.
Definition: varfiles.cpp:288
GtxtFile(const GtxtFile &in)=default
Copy constructor.
Generic binary file base class.
Definition: varfiles.hpp:123
GbinFileO & operator=(const GbinFileO &in)=default
Copy assignment.
HmpFile()
Default constructor.
Definition: varfiles.hpp:889
uint64_t _numLines()
Get number of rows in the text file.
Definition: varfiles.cpp:2274
void open()
Open stream to write.
Definition: varfiles.cpp:2106
TpedFileI(TpedFileI &&in)=default
Move constructor.
BedFileO & operator=(BedFileO &&in)=default
Move assignment.
VCF file output class.
Definition: varfiles.hpp:851
virtual void open()
Open stream (does nothing)
Definition: varfiles.hpp:300
void open()
Open stream to read.
Definition: varfiles.cpp:97
BedFileI(const BedFileI &in)=default
Copy constructor.
HmpFileO()
Default constructor.
Definition: varfiles.hpp:977
GbinFileI & operator=(GbinFileI &&in)=default
Move assignment.
HmpFileO & operator=(const HmpFileO &in)=default
Copy assignment.
GbinFileO(const string &fileName, const size_t &nCols, const size_t &elemSize)
File name constructor.
Definition: varfiles.hpp:237
BED file base class.
Definition: varfiles.hpp:257
Hapmap (HMP) file base class.
Definition: varfiles.hpp:884
uint64_t nlines()
Number of SNPs in the object.
Definition: varfiles.hpp:570
TpedFileO(TpedFileO &&in)=default
Move constructor.
TpedFileI & operator=(const TpedFileI &in)=default
Copy assignment.
~GbinFile()
Destructor.
Definition: varfiles.hpp:155
static const vector< char > _masks
Genotype bit masks.
Definition: varfiles.hpp:271
static const unordered_map< char, string > _tests
Genotype bit tests.
Definition: varfiles.hpp:277
VarFile(const VarFile &in)=default
Copy constructor.
uint64_t nindiv()
Number of individuals in the object.
Definition: varfiles.hpp:724
BedFile()
Default constructor.
Definition: varfiles.cpp:269
TpedFileI(const TpedFileI &in)=default
Copy constructor.
~TpedFileI()
Destructor.
Definition: varfiles.hpp:707
GbinFile & operator=(GbinFile &&in)=default
Move assignment.
BedFileO & operator=(const BedFileO &in)=default
Copy assignment.
HMP file output class.
Definition: varfiles.hpp:971
fstream _bimFile
Corresponding .bim file stream.
Definition: varfiles.hpp:263
virtual void open()
Open stream (does nothing)
Definition: varfiles.hpp:645
GbinFileI(const GbinFileI &in)=default
Copy constructor.
string _fileStub
File name stub (minus the extension)
Definition: varfiles.hpp:265
BedFileI & operator=(const BedFileI &in)=default
Copy assignment.
TPED file base class.
Definition: varfiles.hpp:618
TpedFile(const string &stubName)
File name constructor.
Definition: varfiles.hpp:632
TPED file input class.
Definition: varfiles.hpp:656
Connect lines with populations.
GtxtFileI(GtxtFileI &&in)=default
Move constructor.
GbinFileO & operator=(GbinFileO &&in)=default
Move assignment.
virtual void open()
Open stream (does nothing)
Definition: varfiles.hpp:909
TpedFile()
Default constructor.
Definition: varfiles.hpp:627
virtual void close()
Close stream.
Definition: varfiles.cpp:1803
GtxtFileO(const string &fileName, const bool &head)
File name constructor with header specification.
Definition: varfiles.hpp:596
BedFileO(const string &stubName)
File name constructor.
Definition: varfiles.hpp:442
void _famCopy(fstream &fam)
Copy the .tfam file.
Definition: varfiles.cpp:2234
uint64_t nlines()
Number of rows in the object.
Definition: varfiles.hpp:214
GtxtFileI(const string &fileName)
File name constructor with header specification.
Definition: varfiles.hpp:527
VcfFileO(VcfFileO &&in)=default
Move constructor.
uint64_t _numLines()
Get number of SNPs in the VCF file.
Definition: varfiles.cpp:2560
virtual void close()=0
Close stream.
~VarFile()
Destructor.
Definition: varfiles.hpp:110
HmpFileO(HmpFileO &&in)=default
Move constructor.
HmpFile & operator=(HmpFile &&in)=default
Move assignment.
VarFile & operator=(const VarFile &in)=default
Copy assignment.
Generic text file base class.
Definition: varfiles.hpp:463
HmpFile(HmpFile &&in)=default
Move constructor.
~GtxtFileI()
Destructor.
Definition: varfiles.hpp:543
HmpFileO & operator=(HmpFileO &&in)=default
Move assignment.
HmpFileI & operator=(HmpFileI &&in)=default
Move assignment.
GbinFile(GbinFile &&in)=default
Move constructor.
GbinFileO()
Default constructor.
Definition: varfiles.hpp:229
VcfFileO & operator=(VcfFileO &&in)=default
Move assignment.
VcfFile(const string &fileName)
Constructor with file name.
Definition: varfiles.hpp:776
uint64_t _famLines()
Get number of lines in the _tfamFile
Definition: varfiles.cpp:2138
void sample(GtxtFileO &out, const uint64_t &n, const bool &headSkip)
Sample rows and save to a text file.
Definition: varfiles.cpp:1876
uint64_t nindiv()
Number of individuals in the object.
Definition: varfiles.hpp:422
BedFileI(const string &stubName)
File name constructor.
Definition: varfiles.hpp:377
GtxtFileO(GtxtFileO &&in)=default
Move constructor.
void open()
Open stream to read.
Definition: varfiles.cpp:1809
string _fileStub
File name stub (minus the extension)
Definition: varfiles.hpp:623
TpedFile(const TpedFile &in)=default
Copy constructor.
void open()
Open stream to read.
Definition: varfiles.cpp:2320
VCF file input class.
Definition: varfiles.hpp:800
uint64_t nsnp()
Number of SNPs in the object.
Definition: varfiles.hpp:722
VcfFile & operator=(const VcfFile &in)=default
Copy assignment.
VCF file base class.
Definition: varfiles.hpp:765
GbinFileI(const string &fileName, const size_t &nCols, const size_t &elemSize)
File name constructor.
Definition: varfiles.hpp:189
HmpFileO(const string &fileName)
File name constructor.
Definition: varfiles.hpp:982
virtual void open()
Open stream (does nothing)
Definition: varfiles.hpp:158
void sample(BedFileO &out, const uint64_t &n)
Sample SNPs and save to BED file.
Definition: varfiles.cpp:1026
string _fileName
File name.
Definition: varfiles.hpp:466
uint64_t nsnp()
Number of SNPs in the object.
Definition: varfiles.hpp:962
void open()
Open stream to read.
Definition: varfiles.cpp:967
Generic text file output class.
Definition: varfiles.hpp:579
BedFileI(BedFileI &&in)=default
Move constructor.
void close()
Close stream.
Definition: varfiles.cpp:2129
GtxtFileO()
Default constructor.
Definition: varfiles.hpp:585
GtxtFileO(const string &fileName)
File name constructor.
Definition: varfiles.hpp:590
HMP file input class.
Definition: varfiles.hpp:920
HmpFileI(const HmpFileI &in)=default
Copy constructor.
BedFile & operator=(const BedFile &in)=default
Copy assignment.
GbinFile(const string &fileName, const size_t &nCols, const size_t &elemSize)
Constructor with file name.
Definition: varfiles.hpp:144
string _fileName
File name.
Definition: varfiles.hpp:126
void open()
Open stream to write.
Definition: varfiles.cpp:2520
VcfFileI & operator=(const VcfFileI &in)=default
Copy assignment.
GtxtFile(GtxtFile &&in)=default
Move constructor.
BedFile(const BedFile &in)=default
Copy constructor.
virtual void close()
Close stream.
Definition: varfiles.cpp:2771
BED file output class.
Definition: varfiles.hpp:431
fstream _famFile
Corresponding .fam file stream.
Definition: varfiles.hpp:261
VcfFile(const VcfFile &in)=default
Copy constructor.
GbinFileI()
Default constructor.
Definition: varfiles.hpp:181
HmpFile & operator=(const HmpFile &in)=default
Copy assignment.
size_t _elemSize
Size of each element in bytes.
Definition: varfiles.hpp:130
~BedFileO()
Destructor.
Definition: varfiles.hpp:452
~GtxtFile()
Destructor.
Definition: varfiles.hpp:496
Binary file input class.
Definition: varfiles.hpp:169
VcfFile & operator=(VcfFile &&in)=default
Move assignment.
~BedFileI()
Destructor.
Definition: varfiles.hpp:387
uint64_t _numLines()
Get number of lines in the _bimFile
Definition: varfiles.cpp:309
GtxtFile(const string &fileName, const bool &head)
Constructor with file name and header indicator.
Definition: varfiles.hpp:485
BedFile & operator=(BedFile &&in)=default
Move assignment.
VarFile & operator=(VarFile &&in)=default
Move assignment.
GbinFileO(const GbinFileO &in)=default
Copy constructor.
VcfFile(VcfFile &&in)=default
Move constructor.
void open()
Open stream to write.
Definition: varfiles.cpp:2749
HmpFileI()
Default constructor.
Definition: varfiles.hpp:932
GtxtFileO & operator=(GtxtFileO &&in)=default
Move assignment.
HmpFile(const HmpFile &in)=default
Copy constructor.
BedFileI()
Default constructor.
Definition: varfiles.hpp:372
BedFileO(const BedFileO &in)=default
Copy constructor.
uint64_t _numLines()
Get number of SNPs in the HMP file.
Definition: varfiles.cpp:2822
fstream _tfamFile
Corresponding .tfam file stream.
Definition: varfiles.hpp:621
TpedFile & operator=(const TpedFile &in)=default
Copy assignment.
~HmpFileI()
Destructor.
Definition: varfiles.hpp:947
uint64_t nsnp()
Number of SNPs in the object.
Definition: varfiles.hpp:420
~VcfFile()
Destructor.
Definition: varfiles.hpp:787
Population index.
Definition: populations.hpp:44
VarFile(VarFile &&in)=default
Move constructor.
TpedFileO & operator=(TpedFileO &&in)=default
Move assignment.
~GtxtFileO()
Destructor.
Definition: varfiles.hpp:606
GtxtFileI & operator=(GtxtFileI &&in)=default
Move assignment.
GbinFileO(GbinFileO &&in)=default
Move constructor.
VcfFileO()
Default constructor.
Definition: varfiles.hpp:857
static const size_t BUF_SIZE
Buffer size.
Definition: varfiles.hpp:82
size_t _nCols
Number of elements in a row.
Definition: varfiles.hpp:128
VcfFileI(const string &fileName)
File name constructor.
Definition: varfiles.hpp:817
VcfFileO & operator=(const VcfFileO &in)=default
Copy assignment.
uint64_t _famLines()
Get number of lines in the _famFile
Definition: varfiles.cpp:340
GtxtFileO & operator=(const GtxtFileO &in)=default
Copy assignment.
void close()
Close stream.
Definition: varfiles.cpp:2554
~GbinFileI()
Destructor.
Definition: varfiles.hpp:199
TpedFileO()
Default constructor.
Definition: varfiles.hpp:739
BedFileO()
Default constructor.
Definition: varfiles.hpp:437
void sample(GbinFileO &out, const uint64_t &n)
Sample rows and save to a binary file.
Definition: varfiles.cpp:115
void sampleLD(const uint64_t &n)
Linkage disequilibrium among sampled sites.
Definition: varfiles.cpp:1282
const double EPS
Machine .
Definition: varfiles.hpp:84
GtxtFile()
Default constructor.
Definition: varfiles.hpp:472
GtxtFileI & operator=(const GtxtFileI &in)=default
Copy assignment.
const double PI
pi
Definition: varfiles.hpp:86
VcfFileO(const string &fileName)
File name constructor.
Definition: varfiles.hpp:862
GtxtFileI(const GtxtFileI &in)=default
Copy constructor.
HmpFileI(HmpFileI &&in)=default
Move constructor.
bool _head
Is there a header?
Definition: varfiles.hpp:468
VcfFile()
Default constructor.
Definition: varfiles.hpp:770
void open()
Open stream to write.
Definition: varfiles.cpp:249
void open()
Open stream (does nothing)
Definition: varfiles.hpp:790
TpedFileI(const string &stubName)
File name constructor.
Definition: varfiles.hpp:697
TpedFileI & operator=(TpedFileI &&in)=default
Move assignment.
uint64_t nsnp()
Number of SNPs in the object.
Definition: varfiles.hpp:842
void close()
Close stream.
Definition: varfiles.cpp:297
void sample(VcfFileO &out, const uint64_t &n)
Sample SNPs and save to VCF file.
Definition: varfiles.cpp:2634
void open()
Open stream to read.
Definition: varfiles.cpp:2803
GbinFileI & operator=(const GbinFileI &in)=default
Copy assignment.
GbinFile(const GbinFile &in)=default
Copy constructor.
~TpedFile()
Destructor.
Definition: varfiles.cpp:2123
HmpFileO(const HmpFileO &in)=default
Copy constructor.
void open()
Open stream to read.
Definition: varfiles.cpp:2614
virtual uint64_t _numLines()
Get number of rows in the text file.
Definition: varfiles.cpp:1829
BED file input class.
Definition: varfiles.hpp:311
Text file input class.
Definition: varfiles.hpp:510
GtxtFile(const string &fileName)
Constructor with file name.
Definition: varfiles.hpp:478
VcfFileI(const VcfFileI &in)=default
Copy constructor.
VcfFileO(const VcfFileO &in)=default
Copy constructor.
GtxtFile & operator=(const GtxtFile &in)=default
Copy assignment.
GbinFileI(GbinFileI &&in)=default
Move constructor.
TpedFileI()
Default constructor.
Definition: varfiles.hpp:692
GtxtFileI()
Default constructor.
Definition: varfiles.hpp:522
TPED file output class.
Definition: varfiles.hpp:733