isoSeqQC
Quality assessment and re-mapping of poorly mapped isoSeq read segments
Loading...
Searching...
No Matches
isoseqAlgn.hpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024-2025 Anthony J. Greenberg and Rebekah Rogers
3 *
4 * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5 *
6 * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7 *
8 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9 *
10 * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11 *
12 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
13 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
14 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
15 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
16 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
17 * THE POSSIBILITY OF SUCH DAMAGE.
18 */
19
21
29
30#pragma once
31
32#include <memory>
33#include <utility>
34#include <vector>
35#include <array>
36#include <set>
37#include <unordered_map>
38#include <string>
39#include <filesystem>
40
41#include "sam.h"
42#include "bgzf.h"
43
44namespace isaSpace {
45 struct BamAndGffFiles;
46 struct StatsAndFastqFiles;
48 struct MappedReadInterval;
51 struct ReadExonCoverage;
52 struct ReadPortion;
53 struct BAMsecondary;
54 struct BAMheaderDeleter;
55 struct BAMrecordDeleter;
56 struct BGZFhandleDeleter;
57 class BAMsafeReader;
58 class ExonGroup;
60 class BAMrecord;
61 class BAMtoGenome;
62 class BAMfile;
63
67 std::string bamFileName;
69 std::string gffFileName;
70 };
71
74 std::string statsFileName;
76 std::string fastqFileName;
77 };
78
81 std::string tokenName;
83 std::vector<std::string> attributeList;
84 };
85
92 hts_pos_t readStart{-1};
94 hts_pos_t readEnd{-1};
96 hts_pos_t referenceStart{-1};
98 hts_pos_t referenceEnd{-1};
99 };
100
103 hts_pos_t mapStart{-1};
105 std::vector<float> matchStatus;
106 };
107
120 std::vector< std::pair<float, hts_pos_t> >::difference_type windowSize{0};
121 };
122
133 std::string chromosomeName;
135 std::string readName;
141 hts_pos_t alignmentStart{0};
147 hts_pos_t alignmentEnd{0};
153 hts_pos_t bestAlignmentStart{0};
159 hts_pos_t bestAlignmentEnd{0};
181 uint16_t nExons{0};
187 std::string geneName;
192 char strand{'+'};
197 hts_pos_t firstExonLength{-1};
203 hts_pos_t firstExonStart{-1};
209 hts_pos_t lastExonEnd{-1};
215 std::vector<float> exonCoverageScores;
221 std::vector<float> bestExonCoverageScores;
222 };
223
224 struct ReadPortion {
226 size_t start{0};
228 size_t end{0};
230 std::string originalName;
231 };
232
238 hts_pos_t mapStart{-1};
240 hts_pos_t mapEnd{-1};
244 std::vector<uint32_t> cigar;
245 };
246
252 void operator()(sam_hdr_t *header) const { sam_hdr_destroy(header); }
253 };
254
260 void operator()(bam1_t *bamRecord) const { bam_destroy1(bamRecord); }
261 };
262
268 void operator()(BGZF *bgzfHandle) const { bgzf_close(bgzfHandle); }
269 };
270
279 public:
281 BAMsafeReader() = default;
286 BAMsafeReader(const std::string &bamFileName);
291 BAMsafeReader(const BAMsafeReader &toCopy) = delete;
297 BAMsafeReader& operator=(const BAMsafeReader &toCopy) = delete;
302 BAMsafeReader(BAMsafeReader &&toMove) noexcept {
303 *this = std::move(toMove);
304 };
305
313
320 [[nodiscard]] std::unique_ptr<sam_hdr_t, BAMheaderDeleter> getHeaderCopy() const;
328 [[nodiscard]] std::pair<std::unique_ptr<bam1_t, BAMrecordDeleter>, int32_t> getNextRecord();
329 private:
331 static const uint16_t nRetries_;
333 BGZF *fileHandle_{nullptr};
335 std::unique_ptr<sam_hdr_t, BAMheaderDeleter> headerUPointer_{nullptr};
337 std::string fileName_;
339 std::filesystem::perms initialPermissions_{std::filesystem::perms::none};
340 };
341
345 class ExonGroup {
346 public:
348 ExonGroup() = default;
359 ExonGroup(std::string geneName, const char strand, std::set< std::pair<hts_pos_t, hts_pos_t> > &exonSet);
368 ExonGroup(std::string geneName, const char strand, const std::vector<std::string> &exonLinesFomGFF);
373 ExonGroup(const ExonGroup &toCopy) = default;
379 ExonGroup& operator=(const ExonGroup &toCopy) = default;
384 ExonGroup(ExonGroup &&toMove) noexcept = default;
390 ExonGroup& operator=(ExonGroup &&toMove) noexcept = default;
392 ~ExonGroup() = default;
393
398 [[nodiscard]] bool empty() const noexcept { return exonRanges_.empty(); };
403 [[nodiscard]] std::string geneName() const { return geneName_; };
408 [[nodiscard]] size_t nExons() const noexcept { return exonRanges_.size(); };
413 [[nodiscard]] char strand() const noexcept { return isNegativeStrand_ ? '-' : '+'; };
419 [[nodiscard]] std::pair<hts_pos_t, hts_pos_t> at(const size_t &idx) const {return exonRanges_.at(idx);};
428 [[nodiscard]] std::pair<hts_pos_t, hts_pos_t> geneSpan() const noexcept;
437 [[nodiscard]] std::pair<hts_pos_t, hts_pos_t> firstExonSpan() const noexcept;
442 [[nodiscard]] hts_pos_t firstExonLength() const noexcept;
451 [[nodiscard]] uint32_t firstExonAfter(const hts_pos_t &position) const noexcept;
460 [[nodiscard]] uint32_t firstOverlappingExon(const hts_pos_t &position) const noexcept;
469 [[nodiscard]] uint32_t lastExonBefore(const hts_pos_t &position) const noexcept;
478 [[nodiscard]] uint32_t lastOverlappingExon(const hts_pos_t &position) const noexcept;
487 [[nodiscard]] std::pair<hts_pos_t, hts_pos_t> getFirstIntronSpan() const;
497 [[nodiscard]] std::vector<float> getExonCoverageQuality(const BAMrecord &alignment) const;
507 [[nodiscard]] std::vector<float> getBestExonCoverageQuality(const BAMrecord &alignment) const;
508 private:
510 static const size_t strandIDidx_;
512 static const char gffDelimiter_;
514 static const size_t spanStart_;
516 static const size_t spanEnd_;
518 std::string geneName_;
520 bool isNegativeStrand_{false};
525 std::vector< std::pair<hts_pos_t, hts_pos_t> > exonRanges_;
526 // TODO: to implement tracking transcripts, additional vector of iterators pointing to the next exon
527 // Keep the number the same and equal to the number of transcripts listed in the GFF
528 };
529
537 public:
545 ReadMatchWindowBIC(const std::vector< std::pair<float, hts_pos_t> >::const_iterator &windowBegin, const BinomialWindowParameters &windowParameters) :
546 leftProbability_{windowParameters.currentProbability},
547 rightProbability_{windowParameters.alternativeProbability},
548 kSuccesses_{calculateKsuccesses_(windowBegin, windowParameters)},
549 nTrials_{static_cast<float>(windowParameters.windowSize)} {};
550
554 ReadMatchWindowBIC(const ReadMatchWindowBIC &toCopy) = default;
565 ReadMatchWindowBIC(ReadMatchWindowBIC &&toMove) noexcept = default;
571 ReadMatchWindowBIC& operator=(ReadMatchWindowBIC &&toMove) noexcept = default;
574
582 [[nodiscard]] float getBICdifference() const noexcept;
583 private:
585 float leftProbability_{0.0F};
591 float rightProbability_{0.0F};
593 float kSuccesses_{0.0F};
595 float nTrials_{0.0F};
596
602 [[nodiscard]] static float calculateKsuccesses_(const std::vector< std::pair<float, hts_pos_t> >::const_iterator &windowBegin, const BinomialWindowParameters &windowParameters);
603 };
604
609 class BAMrecord {
610 public:
612 BAMrecord() = default;
620 BAMrecord(const bam1_t *alignmentRecord, const sam_hdr_t *samHeader);
625 BAMrecord(const BAMrecord &toCopy) = default;
631 BAMrecord& operator=(const BAMrecord &toCopy) = default;
636 BAMrecord(BAMrecord &&toMove) noexcept = default;
642 BAMrecord& operator=(BAMrecord &&toMove) noexcept = default;
644 ~BAMrecord() = default;
645
655 void addSecondaryAlignment(const bam1_t *alignmentRecord, const sam_hdr_t *samHeader, const hts_pos_t localWindow = 500'000);
660 [[nodiscard]] std::string getReadName() const {return readName_; };
667 [[nodiscard]] hts_pos_t getMapStart() const noexcept { return mapStart_; };
674 [[nodiscard]] hts_pos_t getMapEnd() const noexcept { return mapEnd_; };
681 [[nodiscard]] hts_pos_t getmRNAstart() const noexcept {
682 return isRev_ ? mapEnd_ : mapStart_;
683 };
684
688 [[nodiscard]] bool isRevComp() const noexcept { return isRev_; };
693 [[nodiscard]] bool isMapped() const noexcept { return isMapped_; };
698 [[nodiscard]] bool hasSecondaryAlignments() const noexcept { return secondaryAlignmentCount_ > 0; };
703 [[nodiscard]] bool hasLocalSecondaryAlignments() const noexcept { return !localSecondaryAlignments_.empty(); };
708 [[nodiscard]] uint16_t secondaryAlignmentCount() const noexcept { return secondaryAlignmentCount_; };
713 [[nodiscard]] uint16_t localSecondaryAlignmentCount() const noexcept { return static_cast<uint16_t>( localSecondaryAlignments_.size() ); };
718 [[nodiscard]] uint16_t localReversedSecondaryAlignmentCount() const noexcept;
723 [[nodiscard]] hts_pos_t getReadLength() const noexcept { return static_cast<hts_pos_t>( sequenceAndQuality_.size() ); };
730 [[nodiscard]] std::vector<uint32_t> getCIGARvector() const { return cigar_; };
737 [[nodiscard]] std::string getCIGARstring() const;
742 [[nodiscard]] uint32_t getFirstCIGAR() const noexcept;
749 [[nodiscard]] std::string getReferenceName() const {return referenceName_; };
765 [[nodiscard]] std::vector< std::pair<float, hts_pos_t> > getReadCentricMatchStatus() const;
774 [[nodiscard]] std::vector<MappedReadInterval> getPoorlyMappedRegions(const BinomialWindowParameters &windowParameters) const;
783 [[nodiscard]] std::string getSequenceAndQuality(const MappedReadInterval &segmentBoundaries) const;
784 private:
786 static const uint16_t sequenceMask_;
788 static const uint16_t qualityShift_;
790 static const uint16_t suppSecondaryAlgn_;
795 static const std::array<hts_pos_t, 10> queryConsumption_;
800 static const std::array<hts_pos_t, 10> referenceConsumption_;
802 static const std::array<char, 16> seqNT16str_;
804 static const std::array<char, 16> complSeqNT16str_;
809 static const std::array<float, 10> sequenceMatch_;
811 bool isMapped_{false};
813 bool isRev_{false};
815 uint16_t secondaryAlignmentCount_{0};
821 hts_pos_t mapStart_{0};
827 hts_pos_t mapEnd_{0};
829 std::string readName_;
831 std::string referenceName_;
833 std::vector<uint32_t> cigar_;
839 std::vector<uint16_t> sequenceAndQuality_;
841 std::vector<BAMsecondary> localSecondaryAlignments_;
842 };
843
850 public:
852 BAMtoGenome() = default;
861 BAMtoGenome(const BamAndGffFiles &bamGFFfilePairNames);
866 BAMtoGenome(const BAMtoGenome &toCopy) = default;
872 BAMtoGenome& operator=(const BAMtoGenome &toCopy) = default;
877 BAMtoGenome(BAMtoGenome &&toMove) noexcept = default;
883 BAMtoGenome& operator=(BAMtoGenome &&toMove) noexcept = default;
885 ~BAMtoGenome() = default;
886
894 [[nodiscard]] size_t nChromosomes() const noexcept;
899 [[nodiscard]] size_t nExonSets() const noexcept;
908 void saveReadCoverageStats(const std::string &outFileName, const size_t &nThreads) const;
919 void saveUnmappedRegions(const std::string &outFileName, const BinomialWindowParameters &windowParameters, const size_t &nThreads) const;
931 void saveUnmappedRegions(const StatsAndFastqFiles &outFilePair, const BinomialWindowParameters &windowParameters, const size_t &nThreads) const;
932 private:
934 static const uint16_t suppSecondaryAlgn_;
935
940 std::vector< std::pair<BAMrecord, ExonGroup> > readsAndExons_;
941
952 [[nodiscard]] std::vector<ExonGroup>::const_iterator findOverlappingGene_(const std::vector<ExonGroup> &chromosomeExonGroups,
953 const std::vector<ExonGroup>::const_iterator &exonGroupSearchStart, BAMrecord &alignedRead);
960 void processPrimaryAlignment_(const std::string &referenceName, const BAMrecord &alignmentRecord, std::unordered_map<std::string, std::vector<ExonGroup>::const_iterator> &latestExonGroupIts);
967 void processSecondaryAlignment_(const std::string &referenceName, const BAMrecord &alignmentRecord, const std::unordered_map<std::string, std::vector<ExonGroup>::const_iterator> &latestExonGroupIts);
968 };
969
975 class BAMfile {
976 public:
978 BAMfile() = default;
983 BAMfile(const std::string &BAMfileName);
988 BAMfile(const BAMfile &toCopy) = delete;
994 BAMfile& operator=(const BAMfile &toCopy) = delete;
999 BAMfile(BAMfile &&toMove) noexcept = default;
1005 BAMfile& operator=(BAMfile &&toMove) noexcept = default;
1007 ~BAMfile() = default;
1008
1013 [[nodiscard]] size_t getPrimaryAlignmentCount() const noexcept;
1022 void addRemaps(const std::string &remapBAMfileName, const float &remapIdentityCutoff);
1028 [[nodiscard]] std::vector<std::string> saveRemappedBAM(const std::string &outputBAMfileName) const;
1037 [[nodiscard]] std::vector<std::string> saveSortedRemappedBAM(const std::string &outputBAMfileName) const;
1038 private:
1040 static const uint16_t secondaryOrUnmappedAlgn_;
1042 std::unordered_map<
1043 std::string,
1044 std::unordered_map<
1045 std::string,
1046 std::vector< std::unique_ptr<bam1_t, BAMrecordDeleter> >
1047 >
1048 > bamRecords_;
1050 std::unique_ptr<sam_hdr_t, BAMheaderDeleter> bamFileHeader_;
1051 };
1052}
Records from a BAM file.
Definition isoseqAlgn.hpp:975
BAMfile(BAMfile &&toMove) noexcept=default
Move constructor.
BAMfile & operator=(const BAMfile &toCopy)=delete
Copy assignment operator.
void addRemaps(const std::string &remapBAMfileName, const float &remapIdentityCutoff)
Add re-mapped read regions.
size_t getPrimaryAlignmentCount() const noexcept
Get the number of primary alignments.
BAMfile & operator=(BAMfile &&toMove) noexcept=default
Move assignment operator.
BAMfile(const std::string &BAMfileName)
Constructor with BAM file name.
BAMfile()=default
Default constructor.
~BAMfile()=default
Destructor.
std::vector< std::string > saveRemappedBAM(const std::string &outputBAMfileName) const
Save the reads with re-alignments to a BAM file.
BAMfile(const BAMfile &toCopy)=delete
Copy constructor.
std::vector< std::string > saveSortedRemappedBAM(const std::string &outputBAMfileName) const
Sort and save the reads with re-alignments to a BAM file.
Summary of a BAM record set.
Definition isoseqAlgn.hpp:609
std::vector< std::pair< float, hts_pos_t > > getReadCentricMatchStatus() const
Reference match status along the read.
bool isMapped() const noexcept
Is the read mapped?
Definition isoseqAlgn.hpp:693
void addSecondaryAlignment(const bam1_t *alignmentRecord, const sam_hdr_t *samHeader, const hts_pos_t localWindow=500 '000)
Add a secondary alignment record.
bool hasLocalSecondaryAlignments() const noexcept
Are there any local secondary alignments?
Definition isoseqAlgn.hpp:703
bool isRevComp() const noexcept
Is the read reverse-complemented?
Definition isoseqAlgn.hpp:688
BAMrecord(const bam1_t *alignmentRecord, const sam_hdr_t *samHeader)
Constructor with data.
hts_pos_t getmRNAstart() const noexcept
mRNA start position
Definition isoseqAlgn.hpp:681
BAMrecord(const BAMrecord &toCopy)=default
Copy constructor.
hts_pos_t getMapEnd() const noexcept
Map end position.
Definition isoseqAlgn.hpp:674
uint32_t getFirstCIGAR() const noexcept
Get the first cigar element with strand reversal.
std::string getReferenceName() const
Get reference name.
Definition isoseqAlgn.hpp:749
std::vector< uint32_t > getCIGARvector() const
CIGAR vector.
Definition isoseqAlgn.hpp:730
BAMrecord()=default
Default constructor.
hts_pos_t getMapStart() const noexcept
Map start position.
Definition isoseqAlgn.hpp:667
uint16_t localSecondaryAlignmentCount() const noexcept
Count of secondary alignments overlapping the primary.
Definition isoseqAlgn.hpp:713
uint16_t localReversedSecondaryAlignmentCount() const noexcept
Count of reversed secondary alignments overlapping the primary.
BAMrecord & operator=(const BAMrecord &toCopy)=default
Copy assignment operator.
std::string getReadName() const
Output read name.
Definition isoseqAlgn.hpp:660
bool hasSecondaryAlignments() const noexcept
Are there any secondary alignments?
Definition isoseqAlgn.hpp:698
std::string getSequenceAndQuality(const MappedReadInterval &segmentBoundaries) const
Get a segment of the sequence and the ASCII quality score.
uint16_t secondaryAlignmentCount() const noexcept
Count of all secondary alignments regardless of position.
Definition isoseqAlgn.hpp:708
~BAMrecord()=default
Destructor.
hts_pos_t getReadLength() const noexcept
Read length.
Definition isoseqAlgn.hpp:723
BAMrecord(BAMrecord &&toMove) noexcept=default
Move constructor.
std::vector< MappedReadInterval > getPoorlyMappedRegions(const BinomialWindowParameters &windowParameters) const
Identify unmapped portions of the read.
BAMrecord & operator=(BAMrecord &&toMove) noexcept=default
Move assignment operator.
std::string getCIGARstring() const
CIGAR string.
MappedReadMatchStatus getBestReferenceMatchStatus() const
Best reference-centric match status.
BAM file safe reader.
Definition isoseqAlgn.hpp:278
BAMsafeReader(BAMsafeReader &&toMove) noexcept
Move constructor.
Definition isoseqAlgn.hpp:302
std::unique_ptr< sam_hdr_t, BAMheaderDeleter > getHeaderCopy() const
Get BAM header.
BAMsafeReader(const BAMsafeReader &toCopy)=delete
Copy constructor.
BAMsafeReader()=default
Default constructor.
std::pair< std::unique_ptr< bam1_t, BAMrecordDeleter >, int32_t > getNextRecord()
Get the next BAM record.
BAMsafeReader & operator=(BAMsafeReader &&toMove) noexcept
Move assignment operator.
BAMsafeReader & operator=(const BAMsafeReader &toCopy)=delete
Copy assignment operator.
BAMsafeReader(const std::string &bamFileName)
Constructor with file name.
~BAMsafeReader()
Destructor.
Relate BAM alignments to exons.
Definition isoseqAlgn.hpp:849
size_t nChromosomes() const noexcept
Number of chromosomes/scaffolds/linkage groups.
BAMtoGenome & operator=(BAMtoGenome &&toMove) noexcept=default
Move assignment operator.
BAMtoGenome()=default
Default constructor.
~BAMtoGenome()=default
Destructor.
BAMtoGenome(BAMtoGenome &&toMove) noexcept=default
Move constructor.
void saveUnmappedRegions(const std::string &outFileName, const BinomialWindowParameters &windowParameters, const size_t &nThreads) const
Save unmapped read statistics to file.
BAMtoGenome & operator=(const BAMtoGenome &toCopy)=default
Copy assignment operator.
void saveReadCoverageStats(const std::string &outFileName, const size_t &nThreads) const
Save read coverage to file.
BAMtoGenome(const BamAndGffFiles &bamGFFfilePairNames)
Constructor intersecting iso-Seq alignments and exons.
BAMtoGenome(const BAMtoGenome &toCopy)=default
Copy constructor.
size_t nExonSets() const noexcept
Number of exon sets (genes with exons).
Group of exons from the same gene.
Definition isoseqAlgn.hpp:345
ExonGroup(const ExonGroup &toCopy)=default
Copy constructor.
char strand() const noexcept
Strand ID.
Definition isoseqAlgn.hpp:413
uint32_t firstExonAfter(const hts_pos_t &position) const noexcept
Index of the first exon after a given position.
uint32_t lastExonBefore(const hts_pos_t &position) const noexcept
Index of the last exon before a given position.
ExonGroup(std::string geneName, const char strand, std::set< std::pair< hts_pos_t, hts_pos_t > > &exonSet)
Constructor with lines from a GFF file.
std::pair< hts_pos_t, hts_pos_t > firstExonSpan() const noexcept
First exon span.
std::string geneName() const
Report the gene name.
Definition isoseqAlgn.hpp:403
ExonGroup & operator=(ExonGroup &&toMove) noexcept=default
Move assignment operator.
ExonGroup(ExonGroup &&toMove) noexcept=default
Move constructor.
hts_pos_t firstExonLength() const noexcept
First exon length.
size_t nExons() const noexcept
Number of exons in the gene.
Definition isoseqAlgn.hpp:408
uint32_t lastOverlappingExon(const hts_pos_t &position) const noexcept
Index of the last exon overlapping a given position.
std::pair< hts_pos_t, hts_pos_t > at(const size_t &idx) const
Range covered by a given exon.
Definition isoseqAlgn.hpp:419
uint32_t firstOverlappingExon(const hts_pos_t &position) const noexcept
Index of the first exon overlapping a given position.
ExonGroup()=default
Default constructor.
ExonGroup & operator=(const ExonGroup &toCopy)=default
Copy assignment operator.
ExonGroup(std::string geneName, const char strand, const std::vector< std::string > &exonLinesFomGFF)
Constructor with exon lines from a GFF file.
bool empty() const noexcept
Is the object empty?
Definition isoseqAlgn.hpp:398
std::vector< float > getBestExonCoverageQuality(const BAMrecord &alignment) const
Get best read coverage quality per exon.
std::vector< float > getExonCoverageQuality(const BAMrecord &alignment) const
Get read coverage quality per exon.
std::pair< hts_pos_t, hts_pos_t > getFirstIntronSpan() const
First intron span.
~ExonGroup()=default
Destructor.
std::pair< hts_pos_t, hts_pos_t > geneSpan() const noexcept
Gene span.
Binomial BIC for a read window.
Definition isoseqAlgn.hpp:536
float getBICdifference() const noexcept
Get BIC difference.
ReadMatchWindowBIC(ReadMatchWindowBIC &&toMove) noexcept=default
Move constructor.
ReadMatchWindowBIC()=default
Default constructor.
ReadMatchWindowBIC & operator=(ReadMatchWindowBIC &&toMove) noexcept=default
Move assignment operator.
~ReadMatchWindowBIC()=default
Destructor.
ReadMatchWindowBIC & operator=(const ReadMatchWindowBIC &toCopy)=default
Copy assignment operator.
ReadMatchWindowBIC(const std::vector< std::pair< float, hts_pos_t > >::const_iterator &windowBegin, const BinomialWindowParameters &windowParameters)
Constructor with a read match vector window.
Definition isoseqAlgn.hpp:545
ReadMatchWindowBIC(const ReadMatchWindowBIC &toCopy)=default
Copy constructor.
BAM header pointer deleter.
Definition isoseqAlgn.hpp:247
void operator()(sam_hdr_t *header) const
Functor operator.
Definition isoseqAlgn.hpp:252
BAM record pointer deleter.
Definition isoseqAlgn.hpp:255
void operator()(bam1_t *bamRecord) const
Functor operator.
Definition isoseqAlgn.hpp:260
BAM secondary alignment.
Definition isoseqAlgn.hpp:236
hts_pos_t mapStart
Base-1 map start position on the reference.
Definition isoseqAlgn.hpp:238
std::vector< uint32_t > cigar
CIGAR string.
Definition isoseqAlgn.hpp:244
hts_pos_t mapEnd
Base-1 map end position on the reference.
Definition isoseqAlgn.hpp:240
bool sameStrandAsPrimary
Is it the same strand as the primary?
Definition isoseqAlgn.hpp:242
BGZF handle pointer deleter.
Definition isoseqAlgn.hpp:263
void operator()(BGZF *bgzfHandle) const
Functor operator.
Definition isoseqAlgn.hpp:268
BAM and GFF file name pair.
Definition isoseqAlgn.hpp:65
std::string gffFileName
GFF file name.
Definition isoseqAlgn.hpp:69
std::string bamFileName
BAM file name.
Definition isoseqAlgn.hpp:67
Binomial window parameters.
Definition isoseqAlgn.hpp:108
float bicDifferenceThreshold
Minimum difference in BIC between windows with current and alternative probabilities.
Definition isoseqAlgn.hpp:118
float alternativeProbability
Probability of success to consider as an alternative.
Definition isoseqAlgn.hpp:116
std::vector< std::pair< float, hts_pos_t > >::difference_type windowSize
Window size.
Definition isoseqAlgn.hpp:120
float currentProbability
Probability of success up to this window.
Definition isoseqAlgn.hpp:110
Delineates an interval in a mapped read.
Definition isoseqAlgn.hpp:90
hts_pos_t referenceStart
Base-1 start position on the reference.
Definition isoseqAlgn.hpp:96
hts_pos_t readStart
Base-0 start position on the read.
Definition isoseqAlgn.hpp:92
hts_pos_t referenceEnd
Base-1 end position on the reference.
Definition isoseqAlgn.hpp:98
hts_pos_t readEnd
Base-0 end position on the read.
Definition isoseqAlgn.hpp:94
Read match and map start.
Definition isoseqAlgn.hpp:101
hts_pos_t mapStart
Map start.
Definition isoseqAlgn.hpp:103
std::vector< float > matchStatus
Match status vector.
Definition isoseqAlgn.hpp:105
Exons covered by a read.
Definition isoseqAlgn.hpp:127
std::vector< float > bestExonCoverageScores
Best exon coverage scores.
Definition isoseqAlgn.hpp:221
std::string geneName
Gene name.
Definition isoseqAlgn.hpp:187
hts_pos_t alignmentEnd
Alignment end.
Definition isoseqAlgn.hpp:147
hts_pos_t bestAlignmentEnd
Best alignment end.
Definition isoseqAlgn.hpp:159
std::string readName
Read name.
Definition isoseqAlgn.hpp:135
uint16_t nSecondaryAlignments
Number of secondary alignments.
Definition isoseqAlgn.hpp:167
hts_pos_t alignmentStart
Alignment start.
Definition isoseqAlgn.hpp:141
std::vector< float > exonCoverageScores
Exon coverage scores.
Definition isoseqAlgn.hpp:215
uint32_t firstSoftClipLength
Length of the soft clip at read start.
Definition isoseqAlgn.hpp:165
char strand
Strand ID.
Definition isoseqAlgn.hpp:192
uint16_t nGoodSecondaryAlignments
Number of good secondary alignments.
Definition isoseqAlgn.hpp:173
hts_pos_t lastExonEnd
Last exon end.
Definition isoseqAlgn.hpp:209
uint16_t nLocalReversedAlignments
Number of locally mapping reverse-complemented reads.
Definition isoseqAlgn.hpp:179
hts_pos_t firstExonStart
First exon start.
Definition isoseqAlgn.hpp:203
std::string chromosomeName
Chromosome name.
Definition isoseqAlgn.hpp:133
hts_pos_t bestAlignmentStart
Best alignment start.
Definition isoseqAlgn.hpp:153
uint16_t nExons
Number of exons.
Definition isoseqAlgn.hpp:181
hts_pos_t firstExonLength
First exon length.
Definition isoseqAlgn.hpp:197
Read portion for re-mapping.
Definition isoseqAlgn.hpp:224
size_t end
Base-0 one past the read end.
Definition isoseqAlgn.hpp:228
std::string originalName
Original read name.
Definition isoseqAlgn.hpp:230
size_t start
Base-0 read start.
Definition isoseqAlgn.hpp:226
Mismatch statistics and FASTQ file name pair.
Definition isoseqAlgn.hpp:72
std::string statsFileName
Statistics file name.
Definition isoseqAlgn.hpp:74
std::string fastqFileName
FASTQ file name.
Definition isoseqAlgn.hpp:76
Token name and GFF attribute list pair.
Definition isoseqAlgn.hpp:79
std::string tokenName
GFF token name.
Definition isoseqAlgn.hpp:81
std::vector< std::string > attributeList
Attribute list.
Definition isoseqAlgn.hpp:83