Skip to main content

fastx_tools/
lib.rs

1//! FASTA and FASTQ record storage and manipulation.
2//!
3//! Types to store in memory and manipulate FASTA and FASTQ records.
4//!
5
6
7/// Types and functions for reading, storing, and manipulating FASTA and FASTQ records.
8pub mod fastx {
9    use std::collections::HashMap;
10    use std::fs::File;
11    use std::io::{BufRead, BufReader, Write};
12
13    /// Holds a FASTA/FASTQ record name and sequence range for per-record subsetting.
14    #[derive(Debug)]
15    pub struct NameWithRange {
16        /// Record header name, including the leading `>` or `@`.
17        pub name:  String,
18        /// Base-0 start position, inclusive.
19        pub start: usize,
20        /// Base-0 end position, exclusive.
21        pub end:   usize,
22    }
23
24    fn clamp_slice(s: &str, start: usize, end: usize) -> &str {
25        let real_start = start.min(end).min( s.len() );
26        let real_end   = end.min( s.len() );
27        &s[real_start..real_end]
28    }
29
30    /// Common interface for sequence record types stored in `FastxRecords`.
31    pub trait FastxRecord: Clone {
32        /// Returns the base-0 index of this record in the source file.
33        fn get_index(&self) -> u32;
34        /// Returns the sequence string.
35        fn get_sequence(&self) -> &str;
36        /// Returns a new record containing the subsequence between `start` (inclusive) and `end` (exclusive).
37        fn subsequence(&self, start: usize, end: usize) -> Self;
38        /// Returns the record formatted for writing to a file.
39        fn format_output(&self, name: &str) -> String;
40    }
41
42    /// Holds the sequence and its base-0 index in the original FASTA file.
43    #[derive(Clone, Debug)]
44    pub struct IndexedSequence {
45        original_index_: u32,
46        sequence_:       String,
47    }
48
49    impl IndexedSequence {
50        /// Creates a new `IndexedSequence` with the given index and sequence.
51        ///
52        /// # Arguments
53        ///
54        /// * `original_index` - The position of this sequence in the original FASTA file
55        /// * `sequence` - The nucleotide or amino acid sequence string
56        pub fn new(original_index: u32, sequence: &str) -> IndexedSequence {
57            IndexedSequence {
58                original_index_: original_index,
59                sequence_:       sequence.to_string(),
60            }
61        }
62
63    }
64
65    impl FastxRecord for IndexedSequence {
66        fn get_index(&self) -> u32 { self.original_index_ }
67
68        fn get_sequence(&self) -> &str { &self.sequence_ }
69
70        fn subsequence(&self, start: usize, end: usize) -> IndexedSequence {
71            IndexedSequence::new( self.original_index_, clamp_slice(&self.sequence_, start, end) )
72        }
73
74        fn format_output(&self, name: &str) -> String {
75            format!("{name}\n{}\n", self.sequence_)
76        }
77    }
78
79    /// Holds the sequence, its quality scores, and its base-0 index in the original FASTQ file.
80    #[derive(Clone, Debug)]
81    pub struct IndexedSequenceWithQuality {
82        original_index_: u32,
83        quality_scores_: String,
84        sequence_:       String,
85    }
86
87    impl IndexedSequenceWithQuality {
88        /// Creates a new `IndexedSequenceWithQuality` with the given index, quality scores, and sequence.
89        ///
90        /// # Arguments
91        ///
92        /// * `original_index` - The position of this sequence in the original FASTQ file
93        /// * `quality_scores` - The quality scores for this sequence
94        /// * `sequence` - The nucleotide or amino acid sequence string
95        ///
96        /// # Errors
97        ///
98        /// Returns an error if `quality_scores` and `sequence` differ in length.
99        pub fn new(original_index: u32, quality_scores: &str, sequence: &str) -> Result<IndexedSequenceWithQuality, String> {
100            if quality_scores.len() != sequence.len() {
101                return Err( "Quality scores must be the same length as sequence".to_string() )
102            }
103            Ok(
104                IndexedSequenceWithQuality {
105                    original_index_: original_index,
106                    quality_scores_: quality_scores.to_string(),
107                    sequence_:       sequence.to_string(),
108                }
109            )
110        }
111
112        /// Returns the quality scores string.
113        pub fn get_quality_scores(&self) -> &str { &self.quality_scores_ }
114    }
115
116    impl FastxRecord for IndexedSequenceWithQuality {
117        fn get_index(&self) -> u32 { self.original_index_ }
118
119        fn get_sequence(&self) -> &str { &self.sequence_ }
120
121        fn subsequence(&self, start: usize, end: usize) -> IndexedSequenceWithQuality {
122            IndexedSequenceWithQuality::new(
123                self.original_index_,
124                clamp_slice(&self.quality_scores_, start, end),
125                clamp_slice(&self.sequence_, start, end),
126            ).unwrap()
127        }
128
129        fn format_output(&self, name: &str) -> String {
130            format!("{name}\n{}\n+\n{}\n", self.sequence_, self.quality_scores_)
131        }
132    }
133
134    /// Generic collection of sequence records, indexed by name.
135    #[derive(Debug)]
136    pub struct FastxRecords<T> {
137        records_:             HashMap<String, T>,
138        max_sequence_length_: usize,
139    }
140
141    impl<T: FastxRecord> FastxRecords<T> {
142        /// Return record count.
143        pub fn num_records(&self) -> usize { self.records_.len() }
144        /// Return length of the longest sequence
145        pub fn get_max_length(&self) -> usize { self.max_sequence_length_ }
146
147        /// Return a subset of records from a list of names.
148        ///
149        /// # Arguments
150        ///
151        /// * `names` - A list of record names, not necessarily all present in the current object.
152        ///
153        /// # Returns
154        ///
155        /// A tuple containing a `FastxRecords` object with any records present in the input list
156        /// and a list of names not found. An empty list returns an empty object. Record indexes
157        /// still refer to the original file. Duplicate record names are removed in the
158        /// `FastxRecords` object but not in the list of absent records.
159        ///
160        pub fn records_by_name(&self, names: Vec<String>) -> (FastxRecords<T>, String) {
161            let mut subset: HashMap<String, T> = HashMap::new();
162            let mut absent_names               = String::new();
163            let mut current_max_length: usize  = 0;
164            for name in names {
165                if let Some(record) = self.records_.get(&name) {
166                    subset.insert( name, record.clone() );
167                    current_max_length = current_max_length.max( record.get_sequence().len() );
168                } else {
169                    if !absent_names.is_empty() { absent_names.push('\n'); }
170                    absent_names.push_str(&name);
171                }
172            }
173            (
174                FastxRecords {
175                    records_:             subset,
176                    max_sequence_length_: current_max_length
177                },
178                absent_names
179            )
180        }
181
182        /// Return subsequences of all records.
183        ///
184        /// # Arguments
185        ///
186        /// * `start` - Base-0 start position (inclusive)
187        /// * `end` - Base-0 end position (exclusive)
188        ///
189        /// # Returns
190        ///
191        /// A `FastxRecords` object with all records, but with the subsequence between `start` and `end`.
192        ///
193        pub fn subsequences(&self, start: usize, end: usize) -> FastxRecords<T> {
194            let mut subset: HashMap<String, T> = HashMap::new();
195            // still must track because the actual lengths may not be (end - start) long
196            let mut current_max_length: usize = 0;
197            for (name, record) in &self.records_ {
198                let local_subsequence = record.subsequence(start, end);
199                current_max_length    = current_max_length.max( local_subsequence.get_sequence().len() );
200                subset.insert(name.clone(), local_subsequence);
201            }
202            FastxRecords { records_: subset, max_sequence_length_: current_max_length }
203        }
204
205        /// Return subsequences of named records.
206        ///
207        /// # Arguments
208        ///
209        /// * `names_ranges` - Record names and ranges.
210        ///
211        /// # Returns
212        ///
213        /// A tuple containing a `FastxRecords` object with subsets of any records present in the input list
214        /// and a list of names not found. An empty list returns an empty object. Record indexes
215        /// still refer to the original file. Duplicate record names are removed in the
216        /// `FastxRecords` object but not in the list of absent records.
217        ///
218        pub fn subsequences_by_name(&self, names_ranges: Vec<NameWithRange>) -> (FastxRecords<T>, String) {
219            let mut subset: HashMap<String, T> = HashMap::new();
220            let mut absent_names               = String::new();
221            let mut current_max_length: usize  = 0;
222            for name_range in names_ranges {
223                if let Some(record) = self.records_.get(&name_range.name) {
224                    let local_subsequence = record.subsequence(name_range.start, name_range.end);
225                    current_max_length    = current_max_length.max( local_subsequence.get_sequence().len() );
226                    subset.insert(name_range.name, local_subsequence);
227                } else {
228                    if !absent_names.is_empty() { absent_names.push('\n'); }
229                    absent_names.push_str(&name_range.name);
230                }
231            }
232            (FastxRecords { records_: subset, max_sequence_length_: current_max_length }, absent_names)
233        }
234
235        /// Merge in a `FastxRecords` object.
236        ///
237        /// Any records also present in the current object are overwritten by new values.
238        ///
239        /// # Arguments
240        ///
241        /// * `from` - an object to merge
242        ///
243        pub fn merge(&mut self, from: FastxRecords<T>) {
244            self.max_sequence_length_ = self.max_sequence_length_.max(from.max_sequence_length_);
245            self.records_.extend(from.records_);
246        }
247
248        /// Save the records to a file.
249        ///
250        /// # Arguments
251        ///
252        /// * `output_path` - Path to a file
253        ///
254        /// # Errors
255        ///
256        /// Returns an error if the file cannot be created or any record cannot be written.
257        ///
258        pub fn save_records(&self, output_path: &str) -> Result<(), String> {
259            let mut file = File::create(output_path)
260                .map_err( |error| error.to_string() )?;
261            for (name, record) in &self.records_ {
262                file.write_all( record.format_output(name).as_bytes() )
263                    .map_err( |error| error.to_string() )?;
264            }
265            Ok( () )
266        }
267
268        /// Save the records in the order they appeared in the original file.
269        ///
270        /// # Arguments
271        ///
272        /// * `output_path` - Path to a file
273        ///
274        /// # Errors
275        ///
276        /// Returns an error if the file cannot be created or any record cannot be written.
277        ///
278        pub fn save_sorted_records(&self, output_path: &str) -> Result<(), String> {
279            let mut file = File::create(output_path)
280                .map_err( |error| error.to_string() )?;
281            let mut sorted: Vec<(&String, &T)> = self.records_.iter().collect();
282            sorted.sort_by_key( |(_, record)| record.get_index() );
283            for (name, record) in sorted {
284                file.write_all( record.format_output(name).as_bytes() )
285                    .map_err( |error| error.to_string() )?;
286            }
287            Ok( () )
288        }
289    }
290
291    /// Reads a FASTA file and returns a collection of records.
292    ///
293    /// # Arguments
294    ///
295    /// * `fasta_path` - Path to a FASTA file
296    ///
297    /// # Errors
298    ///
299    /// Returns an error if the file cannot be opened, any line cannot be read, or the file
300    /// contains no valid header+sequence pairs.
301    ///
302    pub fn read_fasta(fasta_path: &str) -> Result<FastxRecords<IndexedSequence>, String> {
303        let mut local_records: HashMap<String, IndexedSequence> = HashMap::new();
304        let mut current_header                                  = String::new();
305        let mut current_sequence                                = String::new();
306        let mut current_max_length: usize                       = 0;
307        let file = File::open(fasta_path)
308            .map_err( |error| error.to_string() )?;
309        let mut record_idx: u32 = 0;
310        for line in BufReader::new(file).lines() {
311            let line = line.map_err( |error| error.to_string() )?;
312            if line.starts_with('>') {
313                if !current_header.is_empty() && !current_sequence.is_empty() {
314                    local_records.insert( current_header.clone(), IndexedSequence::new(record_idx, &current_sequence) );
315                    current_max_length = current_max_length.max( current_sequence.len() );
316                    record_idx += 1;
317                }
318                current_header = line;
319                current_sequence.clear();
320                continue;
321            }
322            current_sequence.push_str(&line);
323        }
324        if !current_header.is_empty() && !current_sequence.is_empty() {
325            local_records.insert(
326                current_header.clone(),
327                IndexedSequence::new(record_idx, &current_sequence)
328            );
329            current_max_length = current_max_length.max( current_sequence.len() );
330        }
331        if local_records.is_empty() {
332            return Err( format!("No valid FASTA records in {fasta_path} file") )
333        }
334        Ok( FastxRecords { records_: local_records, max_sequence_length_: current_max_length } )
335    }
336
337    /// Reads a FASTQ file and returns a collection of records.
338    ///
339    /// # Arguments
340    ///
341    /// * `fastq_path` - Path to a FASTQ file
342    ///
343    /// # Errors
344    ///
345    /// Returns an error if the file cannot be opened, any record is malformed (missing lines,
346    /// non-`@` header, invalid `+` separator, or quality/sequence length mismatch), or the file
347    /// contains no valid records.
348    ///
349    pub fn read_fastq(fastq_path: &str) -> Result<FastxRecords<IndexedSequenceWithQuality>, String> {
350        let mut local_records: HashMap<String, IndexedSequenceWithQuality> = HashMap::new();
351        let file = File::open(fastq_path)
352            .map_err( |error| error.to_string() )?;
353        let mut record_idx: u32           = 0;
354        let mut current_max_length: usize = 0;
355        let mut lines                     = BufReader::new(file).lines();
356        loop {
357            let header = match lines.next() {
358                None    => break,
359                Some(l) => l.map_err( |error| error.to_string() )?,
360            };
361            if header.is_empty() { continue; }
362            if !header.starts_with('@') {
363                return Err( format!("Expected '@' header line, got: {header}") );
364            }
365            let sequence = lines.next()
366                .ok_or_else( || format!("Missing sequence line after header: {header}") )?
367                .map_err( |error| error.to_string() )?;
368            let plus = lines.next()
369                .ok_or_else( || format!("Missing '+' line after sequence in record: {header}") )?
370                .map_err( |error| error.to_string() )?;
371            if !plus.starts_with('+') {
372                return Err( format!("Expected '+' separator line, got: {plus} in record: {header}") );
373            }
374            let quality = lines.next()
375                .ok_or_else( || format!("Missing quality line after '+' in record: {header}") )?
376                .map_err( |error| error.to_string() )?;
377            current_max_length = current_max_length.max( sequence.len() );
378            local_records.insert(
379                header.clone(),
380                IndexedSequenceWithQuality::new(record_idx, &quality, &sequence)
381                    .map_err( |error| format!("{error} in record: {header}") )?
382            );
383            record_idx += 1;
384        }
385        if local_records.is_empty() {
386            return Err( format!("No valid FASTQ records in {fastq_path} file") );
387        }
388        Ok( FastxRecords { records_: local_records, max_sequence_length_: current_max_length } )
389    }
390}
391
392#[cfg(test)]
393mod tests {
394    use super::fastx::*;
395    use std::io::Write;
396    use tempfile::NamedTempFile;
397
398    fn make_standard_fasta() -> NamedTempFile {
399        let mut tmp = NamedTempFile::new().unwrap();
400        writeln!(tmp, ">seq1").unwrap();
401        writeln!(tmp, "ACGTACGT").unwrap();
402        writeln!(tmp, ">seq2").unwrap();
403        writeln!(tmp, "TTGGCCAA").unwrap();
404        writeln!(tmp, ">seq3").unwrap();
405        writeln!(tmp, "GCGCGCGC").unwrap();
406        tmp
407    }
408
409    fn make_standard_fastq() -> NamedTempFile {
410        let mut tmp = NamedTempFile::new().unwrap();
411        writeln!(tmp, "@record1").unwrap();
412        writeln!(tmp, "ACGTACGTACGT").unwrap();
413        writeln!(tmp, "+").unwrap();
414        writeln!(tmp, "IIIIIIIIIIII").unwrap();
415        writeln!(tmp, "@record2").unwrap();
416        writeln!(tmp, "TTGGCCAATTGG").unwrap();
417        writeln!(tmp, "+").unwrap();
418        writeln!(tmp, "HHHHHHHHHHHH").unwrap();
419        writeln!(tmp, "@record3").unwrap();
420        writeln!(tmp, "GCGCGCGCGCGC").unwrap();
421        writeln!(tmp, "+").unwrap();
422        writeln!(tmp, "????????????").unwrap();
423        tmp
424    }
425
426    // FastxRecords tests
427
428    #[test]
429    fn test_fasta_records_new_loads_all_records() {
430        let fasta   = make_standard_fasta();
431        let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
432        assert_eq!(records.num_records(), 3);
433    }
434
435    #[test]
436    fn test_fasta_records_by_name_returns_present_records() {
437        let fasta   = make_standard_fasta();
438        let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
439        let names   = vec![">seq1".to_string(), ">seq2".to_string()];
440        let (subset, absent) = records.records_by_name(names);
441        assert_eq!(subset.num_records(), 2);
442        assert!( absent.is_empty() );
443    }
444
445    #[test]
446    fn test_fasta_records_by_name_single_record() {
447        let fasta            = make_standard_fasta();
448        let records          = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
449        let names            = vec![">seq1".to_string()];
450        let (subset, absent) = records.records_by_name(names);
451        assert_eq!(subset.num_records(), 1);
452        assert!( absent.is_empty() );
453    }
454
455    #[test]
456    fn test_fasta_records_by_name_absent_names_reported() {
457        let fasta            = make_standard_fasta();
458        let records          = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
459        let names            = vec!["not_a_real_record".to_string()];
460        let (subset, absent) = records.records_by_name(names);
461        assert_eq!(subset.num_records(), 0);
462        assert!( absent.contains("not_a_real_record") );
463    }
464
465    #[test]
466    fn test_fasta_records_by_name_empty_input_returns_empty() {
467        let fasta            = make_standard_fasta();
468        let records          = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
469        let (subset, absent) = records.records_by_name(vec![]);
470        assert_eq!(subset.num_records(), 0);
471        assert!( absent.is_empty() );
472    }
473
474    #[test]
475    fn test_fasta_records_by_name_duplicate_present_name_deduplicated() {
476        let fasta            = make_standard_fasta();
477        let records          = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
478        let names            = vec![">seq1".to_string(), ">seq1".to_string()];
479        let (subset, absent) = records.records_by_name(names);
480        assert_eq!(subset.num_records(), 1);
481        assert!( absent.is_empty() );
482    }
483
484    #[test]
485    fn test_fasta_records_by_name_duplicate_absent_name_repeated_in_output() {
486        let fasta            = make_standard_fasta();
487        let records          = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
488        let names            = vec!["missing".to_string(), "missing".to_string()];
489        let (subset, absent) = records.records_by_name(names);
490        assert_eq!(subset.num_records(), 0);
491        assert_eq!(absent, "missing\nmissing");
492    }
493
494    #[test]
495    fn test_fasta_records_new_empty_file_returns_error() {
496        let tmp    = NamedTempFile::new().unwrap();
497        let result = read_fasta( tmp.path().to_str().unwrap() );
498        assert!( result.unwrap_err().contains("No valid FASTA records in") );
499    }
500
501    #[test]
502    fn test_fasta_records_new_no_headers_returns_error() {
503        let mut tmp = NamedTempFile::new().unwrap();
504        writeln!(tmp, "ACGTACGT").unwrap();
505        writeln!(tmp, "TTGGCCAA").unwrap();
506        let result = read_fasta( tmp.path().to_str().unwrap() );
507        assert!( result.unwrap_err().contains("No valid FASTA records in") );
508    }
509
510    #[test]
511    fn test_fasta_records_new_header_without_sequence_returns_error() {
512        let mut tmp = NamedTempFile::new().unwrap();
513        writeln!(tmp, ">solo_header").unwrap();
514        let result = read_fasta( tmp.path().to_str().unwrap() );
515        assert!( result.unwrap_err().contains("No valid FASTA records in") );
516    }
517
518    #[test]
519    fn test_fasta_records_new_nonexistent_file_returns_error() {
520        let tmp  = NamedTempFile::new().unwrap();
521        let path = tmp.path().to_str().unwrap().to_string();
522        drop(tmp);
523        let result = read_fasta(&path);
524        assert!( result.unwrap_err().contains("No such file or directory") );
525    }
526
527    // multi-line FASTA sequence tests
528    #[test]
529    fn test_fasta_multiline_sequence_is_concatenated() {
530        let mut tmp = NamedTempFile::new().unwrap();
531        writeln!(tmp, ">record1").unwrap();
532        writeln!(tmp, "ACGT").unwrap();
533        writeln!(tmp, "GCGC").unwrap();
534        writeln!(tmp, "TTTT").unwrap();
535        let records = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
536        assert_eq!(records.num_records(), 1);
537        let out = NamedTempFile::new().unwrap();
538        records.save_sorted_records( out.path().to_str().unwrap() ).unwrap();
539        let content          = std::fs::read_to_string( out.path() ).unwrap();
540        let lines: Vec<&str> = content.lines().collect();
541        assert_eq!(lines[1], "ACGTGCGCTTTT");
542    }
543
544    #[test]
545    fn test_fasta_multiline_sequence_multiple_records() {
546        let mut tmp = NamedTempFile::new().unwrap();
547        writeln!(tmp, ">record1").unwrap();
548        writeln!(tmp, "ACGT").unwrap();
549        writeln!(tmp, "GCGC").unwrap();
550        writeln!(tmp, ">record2").unwrap();
551        writeln!(tmp, "TTTT").unwrap();
552        writeln!(tmp, "AAAA").unwrap();
553        let records = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
554        assert_eq!(records.num_records(), 2);
555    }
556
557    // FastxRecords::subsequences tests
558    #[test]
559    fn test_fasta_records_subsequences_preserves_record_count() {
560        let fasta   = make_standard_fasta();
561        let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
562        let sub     = records.subsequences(0, 10);
563        assert_eq!( sub.num_records(), records.num_records() );
564    }
565
566    #[test]
567    fn test_fasta_records_subsequences_within_bounds() {
568        let fasta       = make_standard_fasta();
569        let records     = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
570        let (first, _)  = records.records_by_name(vec![">seq1".to_string()]);
571        let sub         = first.subsequences(2, 5);
572        let (result, _) = sub.records_by_name(vec![">seq1".to_string()]);
573        assert_eq!(result.num_records(), 1);
574    }
575
576    #[test]
577    fn test_fasta_records_subsequences_full_sequence_preserves_length() {
578        let fasta   = make_standard_fasta();
579        let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
580        let sub     = records.subsequences(0, usize::MAX);
581        assert_eq!(sub.num_records(), 3);
582    }
583
584    #[test]
585    fn test_fasta_records_subsequences_start_equals_end_returns_empty_sequences() {
586        let fasta   = make_standard_fasta();
587        let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
588        let sub     = records.subsequences(5, 5);
589        assert_eq!(sub.num_records(), 3);
590    }
591
592    #[test]
593    fn test_fasta_records_subsequences_start_beyond_end_returns_empty_sequences() {
594        let fasta   = make_standard_fasta();
595        let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
596        let sub     = records.subsequences(10, 2);
597        assert_eq!(sub.num_records(), 3);
598    }
599
600    #[test]
601    fn test_fasta_records_subsequences_start_beyond_sequence_returns_empty_sequences() {
602        let fasta   = make_standard_fasta();
603        let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
604        let sub     = records.subsequences(usize::MAX - 1, usize::MAX);
605        assert_eq!(sub.num_records(), 3);
606    }
607
608    // FastxRecords::subsequences_by_name tests
609    #[test]
610    fn test_subsequences_by_name_returns_present_records() {
611        let fasta        = make_standard_fasta();
612        let records      = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
613        let names_ranges = vec![
614            NameWithRange { name: ">seq1".to_string(), start: 0, end: 5 },
615            NameWithRange { name: ">seq2".to_string(), start: 0, end: 5 },
616        ];
617        let (subset, absent) = records.subsequences_by_name(names_ranges);
618        assert_eq!(subset.num_records(), 2);
619        assert!( absent.is_empty() );
620    }
621
622    #[test]
623    fn test_subsequences_by_name_single_record() {
624        let fasta        = make_standard_fasta();
625        let records      = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
626        let names_ranges = vec![
627            NameWithRange { name: ">seq1".to_string(), start: 2, end: 7 },
628        ];
629        let (subset, absent) = records.subsequences_by_name(names_ranges);
630        assert_eq!(subset.num_records(), 1);
631        assert!( absent.is_empty() );
632    }
633
634    #[test]
635    fn test_subsequences_by_name_absent_names_reported() {
636        let fasta        = make_standard_fasta();
637        let records      = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
638        let names_ranges = vec![
639            NameWithRange { name: "not_a_real_record".to_string(), start: 0, end: 5 },
640        ];
641        let (subset, absent) = records.subsequences_by_name(names_ranges);
642        assert_eq!(subset.num_records(), 0);
643        assert_eq!(absent, "not_a_real_record");
644    }
645
646    #[test]
647    fn test_subsequences_by_name_multiple_absent_names_separated_by_newline() {
648        let fasta        = make_standard_fasta();
649        let records      = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
650        let names_ranges = vec![
651            NameWithRange { name: "missing_one".to_string(), start: 0, end: 5 },
652            NameWithRange { name: "missing_two".to_string(), start: 0, end: 5 },
653        ];
654        let (_, absent) = records.subsequences_by_name(names_ranges);
655        assert_eq!(absent, "missing_one\nmissing_two");
656    }
657
658    #[test]
659    fn test_subsequences_by_name_mixed_present_and_absent() {
660        let fasta        = make_standard_fasta();
661        let records      = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
662        let names_ranges = vec![
663            NameWithRange { name: ">seq1".to_string(), start: 0, end: 5 },
664            NameWithRange { name: "missing_record".to_string(), start: 0, end: 5 },
665        ];
666        let (subset, absent) = records.subsequences_by_name(names_ranges);
667        assert_eq!(subset.num_records(), 1);
668        assert_eq!(absent, "missing_record");
669    }
670
671    #[test]
672    fn test_subsequences_by_name_empty_input_returns_empty() {
673        let fasta            = make_standard_fasta();
674        let records          = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
675        let (subset, absent) = records.subsequences_by_name(vec![]);
676        assert_eq!(subset.num_records(), 0);
677        assert!( absent.is_empty() );
678    }
679
680    #[test]
681    fn test_subsequences_by_name_start_equals_end() {
682        let fasta        = make_standard_fasta();
683        let records      = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
684        let names_ranges = vec![
685            NameWithRange { name: ">seq1".to_string(), start: 5, end: 5 },
686        ];
687        let (subset, absent) = records.subsequences_by_name(names_ranges);
688        assert_eq!(subset.num_records(), 1);
689        assert!( absent.is_empty() );
690    }
691
692    #[test]
693    fn test_subsequences_by_name_start_beyond_end() {
694        let fasta        = make_standard_fasta();
695        let records      = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
696        let names_ranges = vec![
697            NameWithRange { name: ">seq1".to_string(), start: 10, end: 2 },
698        ];
699        let (subset, absent) = records.subsequences_by_name(names_ranges);
700        assert_eq!(subset.num_records(), 1);
701        assert!( absent.is_empty() );
702    }
703
704    #[test]
705    fn test_subsequences_by_name_start_beyond_sequence() {
706        let fasta        = make_standard_fasta();
707        let records      = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
708        let names_ranges = vec![
709            NameWithRange { name: ">seq1".to_string(), start: usize::MAX - 1, end: usize::MAX },
710        ];
711        let (subset, absent) = records.subsequences_by_name(names_ranges);
712        assert_eq!(subset.num_records(), 1);
713        assert!( absent.is_empty() );
714    }
715
716    // FastxRecords::save_records and save_sorted_records tests
717    #[test]
718    fn test_fasta_save_records_roundtrip() {
719        let fasta   = make_standard_fasta();
720        let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
721        let tmp     = NamedTempFile::new().unwrap();
722        records.save_records( tmp.path().to_str().unwrap() ).unwrap();
723        let saved = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
724        assert_eq!( saved.num_records(), records.num_records() );
725    }
726
727    #[test]
728    fn test_fasta_save_sorted_records_roundtrip() {
729        let fasta   = make_standard_fasta();
730        let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
731        let tmp     = NamedTempFile::new().unwrap();
732        records.save_sorted_records( tmp.path().to_str().unwrap() ).unwrap();
733        let saved = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
734        assert_eq!( saved.num_records(), records.num_records() );
735    }
736
737    #[test]
738    fn test_fasta_save_sorted_records_order() {
739        let mut input_tmp = NamedTempFile::new().unwrap();
740        writeln!(input_tmp, ">first").unwrap();
741        writeln!(input_tmp, "AAAA").unwrap();
742        writeln!(input_tmp, ">second").unwrap();
743        writeln!(input_tmp, "CCCC").unwrap();
744        writeln!(input_tmp, ">third").unwrap();
745        writeln!(input_tmp, "GGGG").unwrap();
746        let records    = read_fasta( input_tmp.path().to_str().unwrap() ).unwrap();
747        let output_tmp = NamedTempFile::new().unwrap();
748        records.save_sorted_records( output_tmp.path().to_str().unwrap() ).unwrap();
749        let content = std::fs::read_to_string( output_tmp.path() ).unwrap();
750        let headers: Vec<&str> = content.lines().filter( |line| line.starts_with('>') ).collect();
751        assert_eq!(headers, vec![">first", ">second", ">third"]);
752    }
753
754    // FastqRecords tests
755
756    #[test]
757    fn test_fastq_records_new_loads_all_records() {
758        let fastq   = make_standard_fastq();
759        let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
760        assert_eq!(records.num_records(), 3);
761    }
762
763    #[test]
764    fn test_fastq_records_by_name_returns_present_records() {
765        let fastq            = make_standard_fastq();
766        let records          = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
767        let names            = vec!["@record1".to_string(), "@record2".to_string()];
768        let (subset, absent) = records.records_by_name(names);
769        assert_eq!(subset.num_records(), 2);
770        assert!( absent.is_empty() );
771    }
772
773    #[test]
774    fn test_fastq_records_by_name_single_record() {
775        let fastq            = make_standard_fastq();
776        let records          = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
777        let names            = vec!["@record3".to_string()];
778        let (subset, absent) = records.records_by_name(names);
779        assert_eq!(subset.num_records(), 1);
780        assert!( absent.is_empty() );
781    }
782
783    #[test]
784    fn test_fastq_records_by_name_absent_names_reported() {
785        let fastq            = make_standard_fastq();
786        let records          = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
787        let names            = vec!["not_a_real_record".to_string()];
788        let (subset, absent) = records.records_by_name(names);
789        assert_eq!(subset.num_records(), 0);
790        assert!( absent.contains("not_a_real_record") );
791    }
792
793    #[test]
794    fn test_fastq_records_by_name_empty_input_returns_empty() {
795        let fastq            = make_standard_fastq();
796        let records          = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
797        let (subset, absent) = records.records_by_name(vec![]);
798        assert_eq!(subset.num_records(), 0);
799        assert!( absent.is_empty() );
800    }
801
802    #[test]
803    fn test_fastq_records_by_name_duplicate_present_name_deduplicated() {
804        let fastq            = make_standard_fastq();
805        let records          = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
806        let names            = vec!["@record1".to_string(), "@record1".to_string()];
807        let (subset, absent) = records.records_by_name(names);
808        assert_eq!(subset.num_records(), 1);
809        assert!( absent.is_empty() );
810    }
811
812    #[test]
813    fn test_fastq_records_by_name_duplicate_absent_name_repeated_in_output() {
814        let fastq            = make_standard_fastq();
815        let records          = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
816        let names            = vec!["missing".to_string(), "missing".to_string()];
817        let (subset, absent) = records.records_by_name(names);
818        assert_eq!(subset.num_records(), 0);
819        assert_eq!(absent, "missing\nmissing");
820    }
821
822    #[test]
823    fn test_fastq_records_new_empty_file_returns_error() {
824        let tmp    = NamedTempFile::new().unwrap();
825        let result = read_fastq( tmp.path().to_str().unwrap() );
826        assert!( result.unwrap_err().contains("No valid FASTQ records in") );
827    }
828
829    #[test]
830    fn test_fastq_records_new_no_at_header_returns_error() {
831        let mut tmp = NamedTempFile::new().unwrap();
832        writeln!(tmp, "record1").unwrap();
833        writeln!(tmp, "ACGTACGT").unwrap();
834        writeln!(tmp, "+").unwrap();
835        writeln!(tmp, "IIIIIIII").unwrap();
836        let result = read_fastq( tmp.path().to_str().unwrap() );
837        assert!( result.unwrap_err().contains("Expected '@' header line") );
838    }
839
840    #[test]
841    fn test_fastq_records_new_missing_sequence_line_returns_error() {
842        let mut tmp = NamedTempFile::new().unwrap();
843        writeln!(tmp, "@record1").unwrap();
844        let result = read_fastq( tmp.path().to_str().unwrap() );
845        assert!( result.unwrap_err().contains("Missing sequence line after header") );
846    }
847
848    #[test]
849    fn test_fastq_records_new_truncated_record_returns_error() {
850        let mut tmp = NamedTempFile::new().unwrap();
851        writeln!(tmp, "@record1").unwrap();
852        writeln!(tmp, "ACGTACGT").unwrap();
853        let result = read_fastq( tmp.path().to_str().unwrap() );
854        assert!( result.unwrap_err().contains("Missing '+' line") );
855    }
856
857    #[test]
858    fn test_fastq_records_new_blank_lines_between_records_are_skipped() {
859        let mut tmp = NamedTempFile::new().unwrap();
860        writeln!(tmp, "@record1").unwrap();
861        writeln!(tmp, "ACGTACGT").unwrap();
862        writeln!(tmp, "+").unwrap();
863        writeln!(tmp, "IIIIIIII").unwrap();
864        writeln!(tmp, "").unwrap();
865        writeln!(tmp, "@record2").unwrap();
866        writeln!(tmp, "GCGCGCGC").unwrap();
867        writeln!(tmp, "+").unwrap();
868        writeln!(tmp, "HHHHHHHH").unwrap();
869        let records = read_fastq( tmp.path().to_str().unwrap() ).unwrap();
870        assert_eq!(records.num_records(), 2);
871    }
872
873    #[test]
874    fn test_fastq_records_new_invalid_separator_returns_error() {
875        let mut tmp = NamedTempFile::new().unwrap();
876        writeln!(tmp, "@record1").unwrap();
877        writeln!(tmp, "ACGTACGT").unwrap();
878        writeln!(tmp, "GCGCGCGC").unwrap();
879        writeln!(tmp, "IIIIIIII").unwrap();
880        let result = read_fastq( tmp.path().to_str().unwrap() );
881        assert!( result.unwrap_err().contains("Expected '+' separator line") );
882    }
883
884    #[test]
885    fn test_fastq_records_new_missing_quality_line_returns_error() {
886        let mut tmp = NamedTempFile::new().unwrap();
887        writeln!(tmp, "@record1").unwrap();
888        writeln!(tmp, "ACGTACGT").unwrap();
889        writeln!(tmp, "+").unwrap();
890        let result = read_fastq( tmp.path().to_str().unwrap() );
891        assert!( result.unwrap_err().contains("Missing quality line after '+'") );
892    }
893
894    #[test]
895    fn test_fastq_records_new_mismatched_quality_length_returns_error() {
896        let mut tmp = NamedTempFile::new().unwrap();
897        writeln!(tmp, "@record1").unwrap();
898        writeln!(tmp, "ACGTACGT").unwrap();
899        writeln!(tmp, "+").unwrap();
900        writeln!(tmp, "IIII").unwrap();
901        let result = read_fastq( tmp.path().to_str().unwrap() );
902        assert!( result.unwrap_err().contains("Quality scores must be the same length") );
903    }
904
905    #[test]
906    fn test_fastq_records_new_nonexistent_file_returns_error() {
907        let tmp  = NamedTempFile::new().unwrap();
908        let path = tmp.path().to_str().unwrap().to_string();
909        drop(tmp);
910        let result = read_fastq(&path);
911        assert!( result.unwrap_err().contains("No such file or directory") );
912    }
913
914    // FastqRecords::subsequences tests
915    #[test]
916    fn test_fastq_records_subsequences_preserves_record_count() {
917        let fastq   = make_standard_fastq();
918        let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
919        let sub     = records.subsequences(0, 5);
920        assert_eq!( sub.num_records(), records.num_records() );
921    }
922
923    #[test]
924    fn test_fastq_records_subsequences_within_bounds() {
925        let fastq       = make_standard_fastq();
926        let records     = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
927        let (first, _)  = records.records_by_name(vec!["@record1".to_string()]);
928        let sub         = first.subsequences(2, 5);
929        let (result, _) = sub.records_by_name(vec!["@record1".to_string()]);
930        assert_eq!(result.num_records(), 1);
931    }
932
933    #[test]
934    fn test_fastq_records_subsequences_full_sequence_preserves_count() {
935        let fastq   = make_standard_fastq();
936        let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
937        let sub     = records.subsequences(0, usize::MAX);
938        assert_eq!(sub.num_records(), 3);
939    }
940
941    #[test]
942    fn test_fastq_records_subsequences_start_equals_end_returns_empty_sequences() {
943        let fastq   = make_standard_fastq();
944        let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
945        let sub     = records.subsequences(5, 5);
946        assert_eq!(sub.num_records(), 3);
947    }
948
949    #[test]
950    fn test_fastq_records_subsequences_start_beyond_end_returns_empty_sequences() {
951        let fastq   = make_standard_fastq();
952        let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
953        let sub     = records.subsequences(10, 2);
954        assert_eq!(sub.num_records(), 3);
955    }
956
957    #[test]
958    fn test_fastq_records_subsequences_start_beyond_sequence_returns_empty_sequences() {
959        let fastq   = make_standard_fastq();
960        let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
961        let sub     = records.subsequences(usize::MAX - 1, usize::MAX);
962        assert_eq!(sub.num_records(), 3);
963    }
964
965    // FastqRecords::subsequences_by_name tests
966    #[test]
967    fn test_fastq_subsequences_by_name_returns_present_records() {
968        let fastq        = make_standard_fastq();
969        let records      = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
970        let names_ranges = vec![
971            NameWithRange { name: "@record1".to_string(), start: 0, end: 5 },
972            NameWithRange { name: "@record2".to_string(), start: 0, end: 5 },
973        ];
974        let (subset, absent) = records.subsequences_by_name(names_ranges);
975        assert_eq!(subset.num_records(), 2);
976        assert!( absent.is_empty() );
977    }
978
979    #[test]
980    fn test_fastq_subsequences_by_name_single_record() {
981        let fastq        = make_standard_fastq();
982        let records      = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
983        let names_ranges = vec![
984            NameWithRange { name: "@record3".to_string(), start: 2, end: 7 },
985        ];
986        let (subset, absent) = records.subsequences_by_name(names_ranges);
987        assert_eq!(subset.num_records(), 1);
988        assert!( absent.is_empty() );
989    }
990
991    #[test]
992    fn test_fastq_subsequences_by_name_absent_names_reported() {
993        let fastq        = make_standard_fastq();
994        let records      = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
995        let names_ranges = vec![
996            NameWithRange { name: "not_a_real_record".to_string(), start: 0, end: 5 },
997        ];
998        let (subset, absent) = records.subsequences_by_name(names_ranges);
999        assert_eq!(subset.num_records(), 0);
1000        assert_eq!(absent, "not_a_real_record");
1001    }
1002
1003    #[test]
1004    fn test_fastq_subsequences_by_name_multiple_absent_names_separated_by_newline() {
1005        let fastq        = make_standard_fastq();
1006        let records      = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1007        let names_ranges = vec![
1008            NameWithRange { name: "missing_one".to_string(), start: 0, end: 5 },
1009            NameWithRange { name: "missing_two".to_string(), start: 0, end: 5 },
1010        ];
1011        let (_, absent) = records.subsequences_by_name(names_ranges);
1012        assert_eq!(absent, "missing_one\nmissing_two");
1013    }
1014
1015    #[test]
1016    fn test_fastq_subsequences_by_name_mixed_present_and_absent() {
1017        let fastq        = make_standard_fastq();
1018        let records      = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1019        let names_ranges = vec![
1020            NameWithRange { name: "@record1".to_string(), start: 0, end: 5 },
1021            NameWithRange { name: "missing_record".to_string(), start: 0, end: 5 },
1022        ];
1023        let (subset, absent) = records.subsequences_by_name(names_ranges);
1024        assert_eq!(subset.num_records(), 1);
1025        assert_eq!(absent, "missing_record");
1026    }
1027
1028    #[test]
1029    fn test_fastq_subsequences_by_name_empty_input_returns_empty() {
1030        let fastq            = make_standard_fastq();
1031        let records          = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1032        let (subset, absent) = records.subsequences_by_name(vec![]);
1033        assert_eq!(subset.num_records(), 0);
1034        assert!( absent.is_empty() );
1035    }
1036
1037    #[test]
1038    fn test_fastq_subsequences_by_name_start_equals_end() {
1039        let fastq        = make_standard_fastq();
1040        let records      = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1041        let names_ranges = vec![
1042            NameWithRange { name: "@record1".to_string(), start: 5, end: 5 },
1043        ];
1044        let (subset, absent) = records.subsequences_by_name(names_ranges);
1045        assert_eq!(subset.num_records(), 1);
1046        assert!( absent.is_empty() );
1047    }
1048
1049    #[test]
1050    fn test_fastq_subsequences_by_name_start_beyond_end() {
1051        let fastq        = make_standard_fastq();
1052        let records      = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1053        let names_ranges = vec![
1054            NameWithRange { name: "@record1".to_string(), start: 10, end: 2 },
1055        ];
1056        let (subset, absent) = records.subsequences_by_name(names_ranges);
1057        assert_eq!(subset.num_records(), 1);
1058        assert!( absent.is_empty() );
1059    }
1060
1061    #[test]
1062    fn test_fastq_subsequences_by_name_start_beyond_sequence() {
1063        let fastq        = make_standard_fastq();
1064        let records      = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1065        let names_ranges = vec![
1066            NameWithRange { name: "@record1".to_string(), start: usize::MAX - 1, end: usize::MAX },
1067        ];
1068        let (subset, absent) = records.subsequences_by_name(names_ranges);
1069        assert_eq!(subset.num_records(), 1);
1070        assert!( absent.is_empty() );
1071    }
1072
1073    // FastqRecords::save_records and save_sorted_records tests
1074    #[test]
1075    fn test_fastq_save_records_roundtrip() {
1076        let fastq   = make_standard_fastq();
1077        let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1078        let tmp     = NamedTempFile::new().unwrap();
1079        records.save_records( tmp.path().to_str().unwrap() ).unwrap();
1080        let saved = read_fastq( tmp.path().to_str().unwrap() ).unwrap();
1081        assert_eq!( saved.num_records(), records.num_records() );
1082    }
1083
1084    #[test]
1085    fn test_fastq_save_sorted_records_roundtrip() {
1086        let fastq   = make_standard_fastq();
1087        let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1088        let tmp     = NamedTempFile::new().unwrap();
1089        records.save_sorted_records( tmp.path().to_str().unwrap() ).unwrap();
1090        let saved = read_fastq( tmp.path().to_str().unwrap() ).unwrap();
1091        assert_eq!( saved.num_records(), records.num_records() );
1092    }
1093
1094    #[test]
1095    fn test_fastq_save_sorted_records_order() {
1096        let fastq   = make_standard_fastq();
1097        let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1098        let tmp     = NamedTempFile::new().unwrap();
1099        records.save_sorted_records( tmp.path().to_str().unwrap() ).unwrap();
1100        let content = std::fs::read_to_string( tmp.path() ).unwrap();
1101        let headers: Vec<&str> = content.lines().filter( |line| line.starts_with('@') ).collect();
1102        assert_eq!(headers, vec!["@record1", "@record2", "@record3"]);
1103    }
1104
1105    // FastxRecords::merge tests
1106    #[test]
1107    fn test_merge_disjoint_sets_combines_all_records() {
1108        let mut tmp1 = NamedTempFile::new().unwrap();
1109        writeln!(tmp1, ">r1").unwrap();
1110        writeln!(tmp1, "AAAA").unwrap();
1111        let mut tmp2 = NamedTempFile::new().unwrap();
1112        writeln!(tmp2, ">r2").unwrap();
1113        writeln!(tmp2, "CCCC").unwrap();
1114        let mut a = read_fasta( tmp1.path().to_str().unwrap() ).unwrap();
1115        let b     = read_fasta( tmp2.path().to_str().unwrap() ).unwrap();
1116        a.merge(b);
1117        assert_eq!(a.num_records(), 2);
1118    }
1119
1120    #[test]
1121    fn test_merge_duplicate_key_overwrites_existing_record() {
1122        let mut tmp1 = NamedTempFile::new().unwrap();
1123        writeln!(tmp1, ">r1").unwrap();
1124        writeln!(tmp1, "AAAA").unwrap();
1125        let mut tmp2 = NamedTempFile::new().unwrap();
1126        writeln!(tmp2, ">r1").unwrap();
1127        writeln!(tmp2, "CCCC").unwrap();
1128        let mut a = read_fasta( tmp1.path().to_str().unwrap() ).unwrap();
1129        let b     = read_fasta( tmp2.path().to_str().unwrap() ).unwrap();
1130        a.merge(b);
1131        assert_eq!(a.num_records(), 1);
1132        let out = NamedTempFile::new().unwrap();
1133        a.save_records( out.path().to_str().unwrap() ).unwrap();
1134        let content = std::fs::read_to_string( out.path() ).unwrap();
1135        assert!(  content.contains("CCCC") );
1136        assert!( !content.contains("AAAA") );
1137    }
1138
1139    #[test]
1140    fn test_merge_into_empty_collection() {
1141        let mut tmp1 = NamedTempFile::new().unwrap();
1142        writeln!(tmp1, ">r1").unwrap();
1143        writeln!(tmp1, "AAAA").unwrap();
1144        let mut tmp2 = NamedTempFile::new().unwrap();
1145        writeln!(tmp2, ">r2").unwrap();
1146        writeln!(tmp2, "CCCC").unwrap();
1147        let a              = read_fasta( tmp1.path().to_str().unwrap() ).unwrap();
1148        let b              = read_fasta( tmp2.path().to_str().unwrap() ).unwrap();
1149        let (mut empty, _) = a.records_by_name(vec![]);
1150        empty.merge(b);
1151        assert_eq!(empty.num_records(), 1);
1152    }
1153
1154    #[test]
1155    fn test_merge_empty_collection_into_existing() {
1156        let mut tmp = NamedTempFile::new().unwrap();
1157        writeln!(tmp, ">r1").unwrap();
1158        writeln!(tmp, "AAAA").unwrap();
1159        let mut a      = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
1160        let (empty, _) = a.records_by_name(vec![]);
1161        a.merge(empty);
1162        assert_eq!(a.num_records(), 1);
1163    }
1164
1165    #[test]
1166    fn test_merge_updates_max_length_when_incoming_is_longer() {
1167        let mut tmp1 = NamedTempFile::new().unwrap();
1168        writeln!(tmp1, ">r1").unwrap();
1169        writeln!(tmp1, "AAAA").unwrap();
1170        let mut tmp2 = NamedTempFile::new().unwrap();
1171        writeln!(tmp2, ">r2").unwrap();
1172        writeln!(tmp2, "CCCCCCCC").unwrap();
1173        let mut a = read_fasta( tmp1.path().to_str().unwrap() ).unwrap();
1174        let b     = read_fasta( tmp2.path().to_str().unwrap() ).unwrap();
1175        a.merge(b);
1176        assert_eq!(a.get_max_length(), 8);
1177    }
1178
1179    #[test]
1180    fn test_merge_max_length_unchanged_when_incoming_is_shorter() {
1181        let mut tmp1 = NamedTempFile::new().unwrap();
1182        writeln!(tmp1, ">r1").unwrap();
1183        writeln!(tmp1, "CCCCCCCC").unwrap();
1184        let mut tmp2 = NamedTempFile::new().unwrap();
1185        writeln!(tmp2, ">r2").unwrap();
1186        writeln!(tmp2, "AAAA").unwrap();
1187        let mut a = read_fasta( tmp1.path().to_str().unwrap() ).unwrap();
1188        let b     = read_fasta( tmp2.path().to_str().unwrap() ).unwrap();
1189        a.merge(b);
1190        assert_eq!(a.get_max_length(), 8);
1191    }
1192
1193    // get_max_length tests
1194    #[test]
1195    fn test_fasta_get_max_length_after_read() {
1196        let mut tmp = NamedTempFile::new().unwrap();
1197        writeln!(tmp, ">r1").unwrap();
1198        writeln!(tmp, "AAAA").unwrap();
1199        writeln!(tmp, ">r2").unwrap();
1200        writeln!(tmp, "CCCCCCCC").unwrap();
1201        writeln!(tmp, ">r3").unwrap();
1202        writeln!(tmp, "GGG").unwrap();
1203        let records = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
1204        assert_eq!(records.get_max_length(), 8);
1205    }
1206
1207    #[test]
1208    fn test_fastq_get_max_length_after_read() {
1209        let fastq   = make_standard_fastq();
1210        let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1211        assert_eq!(records.get_max_length(), 12);
1212    }
1213
1214    #[test]
1215    fn test_fasta_get_max_length_after_records_by_name() {
1216        let mut tmp = NamedTempFile::new().unwrap();
1217        writeln!(tmp, ">r1").unwrap();
1218        writeln!(tmp, "AAAA").unwrap();
1219        writeln!(tmp, ">r2").unwrap();
1220        writeln!(tmp, "CCCCCCCC").unwrap();
1221        writeln!(tmp, ">r3").unwrap();
1222        writeln!(tmp, "GGG").unwrap();
1223        let records     = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
1224        let (subset, _) = records.records_by_name(vec![">r1".to_string(), ">r3".to_string()]);
1225        assert_eq!(subset.get_max_length(), 4);
1226    }
1227
1228    #[test]
1229    fn test_fasta_get_max_length_after_subsequences() {
1230        let mut tmp = NamedTempFile::new().unwrap();
1231        writeln!(tmp, ">r1").unwrap();
1232        writeln!(tmp, "AAAA").unwrap();
1233        writeln!(tmp, ">r2").unwrap();
1234        writeln!(tmp, "CCCCCCCC").unwrap();
1235        writeln!(tmp, ">r3").unwrap();
1236        writeln!(tmp, "GGG").unwrap();
1237        let records = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
1238        let sub     = records.subsequences(0, 5);
1239        // r1=AAAA(4), r2=CCCCC(5), r3=GGG(3) — clamped at sequence length
1240        assert_eq!(sub.get_max_length(), 5);
1241    }
1242
1243    #[test]
1244    fn test_fasta_get_max_length_after_subsequences_by_name() {
1245        let mut tmp = NamedTempFile::new().unwrap();
1246        writeln!(tmp, ">r1").unwrap();
1247        writeln!(tmp, "AAAA").unwrap();
1248        writeln!(tmp, ">r2").unwrap();
1249        writeln!(tmp, "CCCCCCCC").unwrap();
1250        writeln!(tmp, ">r3").unwrap();
1251        writeln!(tmp, "GGG").unwrap();
1252        let records      = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
1253        let names_ranges = vec![
1254            NameWithRange { name: ">r2".to_string(), start: 0, end: 5 },
1255            NameWithRange { name: ">r1".to_string(), start: 0, end: 3 },
1256        ];
1257        let (subset, _) = records.subsequences_by_name(names_ranges);
1258        assert_eq!(subset.get_max_length(), 5);
1259    }
1260
1261    #[test]
1262    fn test_fasta_get_max_length_empty_subset_is_zero() {
1263        let fasta       = make_standard_fasta();
1264        let records     = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
1265        let (subset, _) = records.records_by_name(vec![]);
1266        assert_eq!(subset.get_max_length(), 0);
1267    }
1268
1269    // IndexedSequence tests
1270    #[test]
1271    fn test_new_and_get_index() {
1272        let seq = IndexedSequence::new( 3, "ACGT" );
1273        assert_eq!(seq.get_index(), 3);
1274    }
1275
1276    #[test]
1277    fn test_slice_within_bounds() {
1278        let seq = IndexedSequence::new( 0, "ACGTACGT" );
1279        assert_eq!(seq.subsequence(2, 5).get_sequence(), "GTA");
1280    }
1281
1282    #[test]
1283    fn test_slice_full_sequence() {
1284        let seq = IndexedSequence::new( 0, "ACGT" );
1285        assert_eq!(seq.subsequence(0, 4).get_sequence(), "ACGT");
1286    }
1287
1288    #[test]
1289    fn test_slice_equal_start_and_end() {
1290        let seq = IndexedSequence::new( 0, "ACGT" );
1291        assert_eq!(seq.subsequence(2, 2).get_sequence(), "");
1292    }
1293
1294    #[test]
1295    fn test_slice_end_beyond_sequence() {
1296        let seq = IndexedSequence::new( 0, "ACGT" );
1297        assert_eq!(seq.subsequence(1, 10).get_sequence(), "CGT");
1298    }
1299
1300    #[test]
1301    fn test_slice_start_beyond_sequence() {
1302        let seq = IndexedSequence::new( 0, "ACGT" );
1303        assert_eq!(seq.subsequence(10, 20).get_sequence(), "");
1304    }
1305
1306    #[test]
1307    fn test_slice_start_greater_than_end() {
1308        let seq = IndexedSequence::new( 0, "ACGT" );
1309        assert_eq!(seq.subsequence(3, 1).get_sequence(), "");
1310    }
1311
1312    #[test]
1313    fn test_empty_sequence_zero_indices() {
1314        let seq = IndexedSequence::new( 0, "" );
1315        assert_eq!(seq.subsequence(0, 0).get_sequence(), "");
1316    }
1317
1318    #[test]
1319    fn test_empty_sequence_nonzero_indices() {
1320        let seq = IndexedSequence::new( 0, "" );
1321        assert_eq!(seq.subsequence(2, 5).get_sequence(), "");
1322    }
1323
1324    #[test]
1325    fn test_empty_sequence_start_greater_than_end() {
1326        let seq = IndexedSequence::new( 0, "" );
1327        assert_eq!(seq.subsequence(5, 2).get_sequence(), "");
1328    }
1329
1330    #[test]
1331    fn test_clone_indexed_sequence() {
1332        let original = IndexedSequence::new( 7, "ACGT" );
1333        let cloned   = original.clone();
1334        assert_eq!( cloned.get_index(), original.get_index() );
1335        assert_eq!( cloned.get_sequence(), original.get_sequence() );
1336    }
1337
1338    #[test]
1339    fn test_clone_indexed_sequence_is_independent() {
1340        let original = IndexedSequence::new( 2, "ACGT" );
1341        let cloned   = original.clone();
1342        assert_eq!(cloned.get_index(), 2);
1343        assert_eq!(cloned.get_sequence(), "ACGT");
1344    }
1345
1346    // IndexedSequenceWithQuality tests
1347    #[test]
1348    fn test_quality_new_and_get_index() {
1349        let seq = IndexedSequenceWithQuality::new( 5, "IIII", "ACGT" ).unwrap();
1350        assert_eq!(seq.get_index(), 5);
1351    }
1352
1353    #[test]
1354    fn test_quality_new_mismatched_lengths_returns_error() {
1355        let result = IndexedSequenceWithQuality::new( 0, "II", "ACGT" );
1356        assert!( result.is_err() );
1357    }
1358
1359    #[test]
1360    fn test_quality_subsequence_within_bounds() {
1361        let seq = IndexedSequenceWithQuality::new( 0, "IIHH????", "ACGTACGT" ).unwrap();
1362        let sub = seq.subsequence(2, 5);
1363        assert_eq!(sub.get_sequence(), "GTA");
1364        assert_eq!(sub.get_quality_scores(), "HH?");
1365    }
1366
1367    #[test]
1368    fn test_quality_subsequence_full_sequence() {
1369        let seq = IndexedSequenceWithQuality::new( 0, "IIII", "ACGT" ).unwrap();
1370        let sub = seq.subsequence(0, 4);
1371        assert_eq!(sub.get_sequence(), "ACGT");
1372        assert_eq!(sub.get_quality_scores(), "IIII");
1373    }
1374
1375    #[test]
1376    fn test_quality_subsequence_equal_start_and_end() {
1377        let seq = IndexedSequenceWithQuality::new( 0, "IIII", "ACGT" ).unwrap();
1378        let sub = seq.subsequence(2, 2);
1379        assert_eq!(sub.get_sequence(), "");
1380        assert_eq!(sub.get_quality_scores(), "");
1381    }
1382
1383    #[test]
1384    fn test_quality_subsequence_end_beyond_sequence() {
1385        let seq = IndexedSequenceWithQuality::new( 0, "IIII", "ACGT" ).unwrap();
1386        let sub = seq.subsequence(1, 10);
1387        assert_eq!(sub.get_sequence(), "CGT");
1388        assert_eq!(sub.get_quality_scores(), "III");
1389    }
1390
1391    #[test]
1392    fn test_quality_subsequence_start_beyond_sequence() {
1393        let seq = IndexedSequenceWithQuality::new( 0, "IIII", "ACGT" ).unwrap();
1394        let sub = seq.subsequence(10, 20);
1395        assert_eq!(sub.get_sequence(), "");
1396        assert_eq!(sub.get_quality_scores(), "");
1397    }
1398
1399    #[test]
1400    fn test_quality_subsequence_start_greater_than_end() {
1401        let seq = IndexedSequenceWithQuality::new( 0, "IIII", "ACGT" ).unwrap();
1402        let sub = seq.subsequence(3, 1);
1403        assert_eq!(sub.get_sequence(), "");
1404        assert_eq!(sub.get_quality_scores(), "");
1405    }
1406
1407    #[test]
1408    fn test_quality_empty_sequence_zero_indices() {
1409        let seq = IndexedSequenceWithQuality::new( 0, "", "" ).unwrap();
1410        let sub = seq.subsequence(0, 0);
1411        assert_eq!(sub.get_sequence(), "");
1412        assert_eq!(sub.get_quality_scores(), "");
1413    }
1414
1415    #[test]
1416    fn test_quality_empty_sequence_nonzero_indices() {
1417        let seq = IndexedSequenceWithQuality::new( 0, "", "" ).unwrap();
1418        let sub = seq.subsequence(2, 5);
1419        assert_eq!(sub.get_sequence(), "");
1420        assert_eq!(sub.get_quality_scores(), "");
1421    }
1422
1423    #[test]
1424    fn test_quality_empty_sequence_start_greater_than_end() {
1425        let seq = IndexedSequenceWithQuality::new( 0, "", "" ).unwrap();
1426        let sub = seq.subsequence(5, 2);
1427        assert_eq!(sub.get_sequence(), "");
1428        assert_eq!(sub.get_quality_scores(), "");
1429    }
1430
1431    #[test]
1432    fn test_clone_indexed_sequence_with_quality() {
1433        let original = IndexedSequenceWithQuality::new( 3, "IIII", "ACGT" ).unwrap();
1434        let cloned   = original.clone();
1435        assert_eq!( cloned.get_index(), original.get_index() );
1436        assert_eq!( cloned.get_sequence(), original.get_sequence() );
1437        assert_eq!( cloned.get_quality_scores(), original.get_quality_scores() );
1438    }
1439
1440    #[test]
1441    fn test_clone_indexed_sequence_with_quality_is_independent() {
1442        let original = IndexedSequenceWithQuality::new( 1, "IIII", "ACGT" ).unwrap();
1443        let cloned   = original.clone();
1444        assert_eq!(cloned.get_index(), 1);
1445        assert_eq!(cloned.get_sequence(), "ACGT");
1446        assert_eq!(cloned.get_quality_scores(), "IIII");
1447    }
1448
1449}