HiveBrain v1.2.0
Get Started
← Back to all entries
patternrustCritical

Read large files line by line in Rust

Submitted by: @import:stackoverflow-api··
0
Viewed 0 times
linefilesrustlargeread

Problem

My Rust program is intented to read a very large (up to several GB), simple text file line by line. The problem is, that this file is too large to be read at once, or to transfer all lines into a Vec.

What would be an idiomatic way to handle this in Rust?

Solution

You want to use the buffered reader, BufRead, and specifically the function BufReader.lines():

use std::fs::File;
use std::io::{self, prelude::*, BufReader};

fn main() -> io::Result {
    let file = File::open("foo.txt")?;
    let reader = BufReader::new(file);

    for line in reader.lines() {
        println!("{}", line?);
    }

    Ok(())
}


Note that you are not returned the linefeed, as said in the documentation.

If you do not want to allocate a string for each line, here is an example to reuse the same buffer:

fn main() -> std::io::Result {
    let mut reader = my_reader::BufReader::open("Cargo.toml")?;
    let mut buffer = String::new();

    while let Some(line) = reader.read_line(&mut buffer) {
        println!("{}", line?.trim());
    }

    Ok(())
}

mod my_reader {
    use std::{
        fs::File,
        io::{self, prelude::*},
    };

    pub struct BufReader {
        reader: io::BufReader,
    }

    impl BufReader {
        pub fn open(path: impl AsRef) -> io::Result {
            let file = File::open(path)?;
            let reader = io::BufReader::new(file);

            Ok(Self { reader })
        }

        pub fn read_line(
            &mut self,
            buffer: &'buf mut String,
        ) -> Option> {
            buffer.clear();

            self.reader
                .read_line(buffer)
                .map(|u| if u == 0 { None } else { Some(buffer) })
                .transpose()
        }
    }
}


Playground

Or if you prefer a standard iterator, you can use this Rc trick I shamelessly took from Reddit:

fn main() -> std::io::Result {
    for line in my_reader::BufReader::open("Cargo.toml")? {
        println!("{}", line?.trim());
    }

    Ok(())
}

mod my_reader {
    use std::{
        fs::File,
        io::{self, prelude::*},
        rc::Rc,
    };

    pub struct BufReader {
        reader: io::BufReader,
        buf: Rc,
    }
    
    fn new_buf() -> Rc {
        Rc::new(String::with_capacity(1024)) // Tweakable capacity
    }

    impl BufReader {
        pub fn open(path: impl AsRef) -> io::Result {
            let file = File::open(path)?;
            let reader = io::BufReader::new(file);
            let buf = new_buf();

            Ok(Self { reader, buf })
        }
    }

    impl Iterator for BufReader {
        type Item = io::Result>;

        fn next(&mut self) -> Option {
            let buf = match Rc::get_mut(&mut self.buf) {
                Some(buf) => {
                    buf.clear();
                    buf
                }
                None => {
                    self.buf = new_buf();
                    Rc::make_mut(&mut self.buf)
                }
            };

            self.reader
                .read_line(buf)
                .map(|u| if u == 0 { None } else { Some(Rc::clone(&self.buf)) })
                .transpose()
        }
    }
}


Playground

Code Snippets

use std::fs::File;
use std::io::{self, prelude::*, BufReader};

fn main() -> io::Result<()> {
    let file = File::open("foo.txt")?;
    let reader = BufReader::new(file);

    for line in reader.lines() {
        println!("{}", line?);
    }

    Ok(())
}
fn main() -> std::io::Result<()> {
    let mut reader = my_reader::BufReader::open("Cargo.toml")?;
    let mut buffer = String::new();

    while let Some(line) = reader.read_line(&mut buffer) {
        println!("{}", line?.trim());
    }

    Ok(())
}

mod my_reader {
    use std::{
        fs::File,
        io::{self, prelude::*},
    };

    pub struct BufReader {
        reader: io::BufReader<File>,
    }

    impl BufReader {
        pub fn open(path: impl AsRef<std::path::Path>) -> io::Result<Self> {
            let file = File::open(path)?;
            let reader = io::BufReader::new(file);

            Ok(Self { reader })
        }

        pub fn read_line<'buf>(
            &mut self,
            buffer: &'buf mut String,
        ) -> Option<io::Result<&'buf mut String>> {
            buffer.clear();

            self.reader
                .read_line(buffer)
                .map(|u| if u == 0 { None } else { Some(buffer) })
                .transpose()
        }
    }
}
fn main() -> std::io::Result<()> {
    for line in my_reader::BufReader::open("Cargo.toml")? {
        println!("{}", line?.trim());
    }

    Ok(())
}

mod my_reader {
    use std::{
        fs::File,
        io::{self, prelude::*},
        rc::Rc,
    };

    pub struct BufReader {
        reader: io::BufReader<File>,
        buf: Rc<String>,
    }
    
    fn new_buf() -> Rc<String> {
        Rc::new(String::with_capacity(1024)) // Tweakable capacity
    }

    impl BufReader {
        pub fn open(path: impl AsRef<std::path::Path>) -> io::Result<Self> {
            let file = File::open(path)?;
            let reader = io::BufReader::new(file);
            let buf = new_buf();

            Ok(Self { reader, buf })
        }
    }

    impl Iterator for BufReader {
        type Item = io::Result<Rc<String>>;

        fn next(&mut self) -> Option<Self::Item> {
            let buf = match Rc::get_mut(&mut self.buf) {
                Some(buf) => {
                    buf.clear();
                    buf
                }
                None => {
                    self.buf = new_buf();
                    Rc::make_mut(&mut self.buf)
                }
            };

            self.reader
                .read_line(buf)
                .map(|u| if u == 0 { None } else { Some(Rc::clone(&self.buf)) })
                .transpose()
        }
    }
}

Context

Stack Overflow Q#45882329, score: 174

Revisions (0)

No revisions yet.