"Fossies" - the Fresh Open Source Software Archive

Member "ripgrep-11.0.1/grep-searcher/src/line_buffer.rs" (16 Apr 2019, 34481 Bytes) of package /linux/privat/ripgrep-11.0.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Rust source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. See also the last Fossies "Diffs" side-by-side code changes report for "line_buffer.rs": 0.10.0_vs_11.0.0.

    1 use std::cmp;
    2 use std::io;
    3 
    4 use bstr::{BStr, BString};
    5 
    6 /// The default buffer capacity that we use for the line buffer.
    7 pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
    8 
    9 /// The behavior of a searcher in the face of long lines and big contexts.
   10 ///
   11 /// When searching data incrementally using a fixed size buffer, this controls
   12 /// the amount of *additional* memory to allocate beyond the size of the buffer
   13 /// to accommodate lines (which may include the lines in a context window, when
   14 /// enabled) that do not fit in the buffer.
   15 ///
   16 /// The default is to eagerly allocate without a limit.
   17 #[derive(Clone, Copy, Debug)]
   18 pub enum BufferAllocation {
   19     /// Attempt to expand the size of the buffer until either at least the next
   20     /// line fits into memory or until all available memory is exhausted.
   21     ///
   22     /// This is the default.
   23     Eager,
   24     /// Limit the amount of additional memory allocated to the given size. If
   25     /// a line is found that requires more memory than is allowed here, then
   26     /// stop reading and return an error.
   27     Error(usize),
   28 }
   29 
   30 impl Default for BufferAllocation {
   31     fn default() -> BufferAllocation {
   32         BufferAllocation::Eager
   33     }
   34 }
   35 
   36 /// Create a new error to be used when a configured allocation limit has been
   37 /// reached.
   38 pub fn alloc_error(limit: usize) -> io::Error {
   39     let msg = format!("configured allocation limit ({}) exceeded", limit);
   40     io::Error::new(io::ErrorKind::Other, msg)
   41 }
   42 
   43 /// The behavior of binary detection in the line buffer.
   44 ///
   45 /// Binary detection is the process of _heuristically_ identifying whether a
   46 /// given chunk of data is binary or not, and then taking an action based on
   47 /// the result of that heuristic. The motivation behind detecting binary data
   48 /// is that binary data often indicates data that is undesirable to search
   49 /// using textual patterns. Of course, there are many cases in which this isn't
   50 /// true, which is why binary detection is disabled by default.
   51 #[derive(Clone, Copy, Debug)]
   52 pub enum BinaryDetection {
   53     /// No binary detection is performed. Data reported by the line buffer may
   54     /// contain arbitrary bytes.
   55     None,
   56     /// The given byte is searched in all contents read by the line buffer. If
   57     /// it occurs, then the data is considered binary and the line buffer acts
   58     /// as if it reached EOF. The line buffer guarantees that this byte will
   59     /// never be observable by callers.
   60     Quit(u8),
   61     /// The given byte is searched in all contents read by the line buffer. If
   62     /// it occurs, then it is replaced by the line terminator. The line buffer
   63     /// guarantees that this byte will never be observable by callers.
   64     Convert(u8),
   65 }
   66 
   67 impl Default for BinaryDetection {
   68     fn default() -> BinaryDetection {
   69         BinaryDetection::None
   70     }
   71 }
   72 
   73 impl BinaryDetection {
   74     /// Returns true if and only if the detection heuristic demands that
   75     /// the line buffer stop read data once binary data is observed.
   76     fn is_quit(&self) -> bool {
   77         match *self {
   78             BinaryDetection::Quit(_) => true,
   79             _ => false,
   80         }
   81     }
   82 }
   83 
   84 /// The configuration of a buffer. This contains options that are fixed once
   85 /// a buffer has been constructed.
   86 #[derive(Clone, Copy, Debug)]
   87 struct Config {
   88     /// The number of bytes to attempt to read at a time.
   89     capacity: usize,
   90     /// The line terminator.
   91     lineterm: u8,
   92     /// The behavior for handling long lines.
   93     buffer_alloc: BufferAllocation,
   94     /// When set, the presence of the given byte indicates binary content.
   95     binary: BinaryDetection,
   96 }
   97 
   98 impl Default for Config {
   99     fn default() -> Config {
  100         Config {
  101             capacity: DEFAULT_BUFFER_CAPACITY,
  102             lineterm: b'\n',
  103             buffer_alloc: BufferAllocation::default(),
  104             binary: BinaryDetection::default(),
  105         }
  106     }
  107 }
  108 
  109 /// A builder for constructing line buffers.
  110 #[derive(Clone, Debug, Default)]
  111 pub struct LineBufferBuilder {
  112     config: Config,
  113 }
  114 
  115 impl LineBufferBuilder {
  116     /// Create a new builder for a buffer.
  117     pub fn new() -> LineBufferBuilder {
  118         LineBufferBuilder { config: Config::default() }
  119     }
  120 
  121     /// Create a new line buffer from this builder's configuration.
  122     pub fn build(&self) -> LineBuffer {
  123         LineBuffer {
  124             config: self.config,
  125             buf: BString::from(vec![0; self.config.capacity]),
  126             pos: 0,
  127             last_lineterm: 0,
  128             end: 0,
  129             absolute_byte_offset: 0,
  130             binary_byte_offset: None,
  131         }
  132     }
  133 
  134     /// Set the default capacity to use for a buffer.
  135     ///
  136     /// In general, the capacity of a buffer corresponds to the amount of data
  137     /// to hold in memory, and the size of the reads to make to the underlying
  138     /// reader.
  139     ///
  140     /// This is set to a reasonable default and probably shouldn't be changed
  141     /// unless there's a specific reason to do so.
  142     pub fn capacity(&mut self, capacity: usize) -> &mut LineBufferBuilder {
  143         self.config.capacity = capacity;
  144         self
  145     }
  146 
  147     /// Set the line terminator for the buffer.
  148     ///
  149     /// Every buffer has a line terminator, and this line terminator is used
  150     /// to determine how to roll the buffer forward. For example, when a read
  151     /// to the buffer's underlying reader occurs, the end of the data that is
  152     /// read is likely to correspond to an incomplete line. As a line buffer,
  153     /// callers should not access this data since it is incomplete. The line
  154     /// terminator is how the line buffer determines the part of the read that
  155     /// is incomplete.
  156     ///
  157     /// By default, this is set to `b'\n'`.
  158     pub fn line_terminator(&mut self, lineterm: u8) -> &mut LineBufferBuilder {
  159         self.config.lineterm = lineterm;
  160         self
  161     }
  162 
  163     /// Set the maximum amount of additional memory to allocate for long lines.
  164     ///
  165     /// In order to enable line oriented search, a fundamental requirement is
  166     /// that, at a minimum, each line must be able to fit into memory. This
  167     /// setting controls how big that line is allowed to be. By default, this
  168     /// is set to `BufferAllocation::Eager`, which means a line buffer will
  169     /// attempt to allocate as much memory as possible to fit a line, and will
  170     /// only be limited by available memory.
  171     ///
  172     /// Note that this setting only applies to the amount of *additional*
  173     /// memory to allocate, beyond the capacity of the buffer. That means that
  174     /// a value of `0` is sensible, and in particular, will guarantee that a
  175     /// line buffer will never allocate additional memory beyond its initial
  176     /// capacity.
  177     pub fn buffer_alloc(
  178         &mut self,
  179         behavior: BufferAllocation,
  180     ) -> &mut LineBufferBuilder {
  181         self.config.buffer_alloc = behavior;
  182         self
  183     }
  184 
  185     /// Whether to enable binary detection or not. Depending on the setting,
  186     /// this can either cause the line buffer to report EOF early or it can
  187     /// cause the line buffer to clean the data.
  188     ///
  189     /// By default, this is disabled. In general, binary detection should be
  190     /// viewed as an imperfect heuristic.
  191     pub fn binary_detection(
  192         &mut self,
  193         detection: BinaryDetection,
  194     ) -> &mut LineBufferBuilder {
  195         self.config.binary = detection;
  196         self
  197     }
  198 }
  199 
  200 /// A line buffer reader efficiently reads a line oriented buffer from an
  201 /// arbitrary reader.
  202 #[derive(Debug)]
  203 pub struct LineBufferReader<'b, R> {
  204     rdr: R,
  205     line_buffer: &'b mut LineBuffer,
  206 }
  207 
  208 impl<'b, R: io::Read> LineBufferReader<'b, R> {
  209     /// Create a new buffered reader that reads from `rdr` and uses the given
  210     /// `line_buffer` as an intermediate buffer.
  211     ///
  212     /// This does not change the binary detection behavior of the given line
  213     /// buffer.
  214     pub fn new(
  215         rdr: R,
  216         line_buffer: &'b mut LineBuffer,
  217     ) -> LineBufferReader<'b, R> {
  218         line_buffer.clear();
  219         LineBufferReader { rdr, line_buffer }
  220     }
  221 
  222     /// The absolute byte offset which corresponds to the starting offsets
  223     /// of the data returned by `buffer` relative to the beginning of the
  224     /// underlying reader's contents. As such, this offset does not generally
  225     /// correspond to an offset in memory. It is typically used for reporting
  226     /// purposes. It can also be used for counting the number of bytes that
  227     /// have been searched.
  228     pub fn absolute_byte_offset(&self) -> u64 {
  229         self.line_buffer.absolute_byte_offset()
  230     }
  231 
  232     /// If binary data was detected, then this returns the absolute byte offset
  233     /// at which binary data was initially found.
  234     pub fn binary_byte_offset(&self) -> Option<u64> {
  235         self.line_buffer.binary_byte_offset()
  236     }
  237 
  238     /// Fill the contents of this buffer by discarding the part of the buffer
  239     /// that has been consumed. The free space created by discarding the
  240     /// consumed part of the buffer is then filled with new data from the
  241     /// reader.
  242     ///
  243     /// If EOF is reached, then `false` is returned. Otherwise, `true` is
  244     /// returned. (Note that if this line buffer's binary detection is set to
  245     /// `Quit`, then the presence of binary data will cause this buffer to
  246     /// behave as if it had seen EOF at the first occurrence of binary data.)
  247     ///
  248     /// This forwards any errors returned by the underlying reader, and will
  249     /// also return an error if the buffer must be expanded past its allocation
  250     /// limit, as governed by the buffer allocation strategy.
  251     pub fn fill(&mut self) -> Result<bool, io::Error> {
  252         self.line_buffer.fill(&mut self.rdr)
  253     }
  254 
  255     /// Return the contents of this buffer.
  256     pub fn buffer(&self) -> &[u8] {
  257         self.line_buffer.buffer().as_bytes()
  258     }
  259 
  260     /// Return the underlying buffer as a byte string. Used for tests only.
  261     #[cfg(test)]
  262     fn bstr(&self) -> &BStr {
  263         self.line_buffer.buffer()
  264     }
  265 
  266     /// Consume the number of bytes provided. This must be less than or equal
  267     /// to the number of bytes returned by `buffer`.
  268     pub fn consume(&mut self, amt: usize) {
  269         self.line_buffer.consume(amt);
  270     }
  271 
  272     /// Consumes the remainder of the buffer. Subsequent calls to `buffer` are
  273     /// guaranteed to return an empty slice until the buffer is refilled.
  274     ///
  275     /// This is a convenience function for `consume(buffer.len())`.
  276     #[cfg(test)]
  277     fn consume_all(&mut self) {
  278         self.line_buffer.consume_all();
  279     }
  280 }
  281 
  282 /// A line buffer manages a (typically fixed) buffer for holding lines.
  283 ///
  284 /// Callers should create line buffers sparingly and reuse them when possible.
  285 /// Line buffers cannot be used directly, but instead must be used via the
  286 /// LineBufferReader.
  287 #[derive(Clone, Debug)]
  288 pub struct LineBuffer {
  289     /// The configuration of this buffer.
  290     config: Config,
  291     /// The primary buffer with which to hold data.
  292     buf: BString,
  293     /// The current position of this buffer. This is always a valid sliceable
  294     /// index into `buf`, and its maximum value is the length of `buf`.
  295     pos: usize,
  296     /// The end position of searchable content in this buffer. This is either
  297     /// set to just after the final line terminator in the buffer, or to just
  298     /// after the end of the last byte emitted by the reader when the reader
  299     /// has been exhausted.
  300     last_lineterm: usize,
  301     /// The end position of the buffer. This is always greater than or equal to
  302     /// last_lineterm. The bytes between last_lineterm and end, if any, always
  303     /// correspond to a partial line.
  304     end: usize,
  305     /// The absolute byte offset corresponding to `pos`. This is most typically
  306     /// not a valid index into addressable memory, but rather, an offset that
  307     /// is relative to all data that passes through a line buffer (since
  308     /// construction or since the last time `clear` was called).
  309     ///
  310     /// When the line buffer reaches EOF, this is set to the position just
  311     /// after the last byte read from the underlying reader. That is, it
  312     /// becomes the total count of bytes that have been read.
  313     absolute_byte_offset: u64,
  314     /// If binary data was found, this records the absolute byte offset at
  315     /// which it was first detected.
  316     binary_byte_offset: Option<u64>,
  317 }
  318 
  319 impl LineBuffer {
  320     /// Set the binary detection method used on this line buffer.
  321     ///
  322     /// This permits dynamically changing the binary detection strategy on
  323     /// an existing line buffer without needing to create a new one.
  324     pub fn set_binary_detection(&mut self, binary: BinaryDetection) {
  325         self.config.binary = binary;
  326     }
  327 
  328     /// Reset this buffer, such that it can be used with a new reader.
  329     fn clear(&mut self) {
  330         self.pos = 0;
  331         self.last_lineterm = 0;
  332         self.end = 0;
  333         self.absolute_byte_offset = 0;
  334         self.binary_byte_offset = None;
  335     }
  336 
  337     /// The absolute byte offset which corresponds to the starting offsets
  338     /// of the data returned by `buffer` relative to the beginning of the
  339     /// reader's contents. As such, this offset does not generally correspond
  340     /// to an offset in memory. It is typically used for reporting purposes,
  341     /// particularly in error messages.
  342     ///
  343     /// This is reset to `0` when `clear` is called.
  344     fn absolute_byte_offset(&self) -> u64 {
  345         self.absolute_byte_offset
  346     }
  347 
  348     /// If binary data was detected, then this returns the absolute byte offset
  349     /// at which binary data was initially found.
  350     fn binary_byte_offset(&self) -> Option<u64> {
  351         self.binary_byte_offset
  352     }
  353 
  354     /// Return the contents of this buffer.
  355     fn buffer(&self) -> &BStr {
  356         &self.buf[self.pos..self.last_lineterm]
  357     }
  358 
  359     /// Return the contents of the free space beyond the end of the buffer as
  360     /// a mutable slice.
  361     fn free_buffer(&mut self) -> &mut BStr {
  362         &mut self.buf[self.end..]
  363     }
  364 
  365     /// Consume the number of bytes provided. This must be less than or equal
  366     /// to the number of bytes returned by `buffer`.
  367     fn consume(&mut self, amt: usize) {
  368         assert!(amt <= self.buffer().len());
  369         self.pos += amt;
  370         self.absolute_byte_offset += amt as u64;
  371     }
  372 
  373     /// Consumes the remainder of the buffer. Subsequent calls to `buffer` are
  374     /// guaranteed to return an empty slice until the buffer is refilled.
  375     ///
  376     /// This is a convenience function for `consume(buffer.len())`.
  377     #[cfg(test)]
  378     fn consume_all(&mut self) {
  379         let amt = self.buffer().len();
  380         self.consume(amt);
  381     }
  382 
  383     /// Fill the contents of this buffer by discarding the part of the buffer
  384     /// that has been consumed. The free space created by discarding the
  385     /// consumed part of the buffer is then filled with new data from the given
  386     /// reader.
  387     ///
  388     /// Callers should provide the same reader to this line buffer in
  389     /// subsequent calls to fill. A different reader can only be used
  390     /// immediately following a call to `clear`.
  391     ///
  392     /// If EOF is reached, then `false` is returned. Otherwise, `true` is
  393     /// returned. (Note that if this line buffer's binary detection is set to
  394     /// `Quit`, then the presence of binary data will cause this buffer to
  395     /// behave as if it had seen EOF.)
  396     ///
  397     /// This forwards any errors returned by `rdr`, and will also return an
  398     /// error if the buffer must be expanded past its allocation limit, as
  399     /// governed by the buffer allocation strategy.
  400     fn fill<R: io::Read>(&mut self, mut rdr: R) -> Result<bool, io::Error> {
  401         // If the binary detection heuristic tells us to quit once binary data
  402         // has been observed, then we no longer read new data and reach EOF
  403         // once the current buffer has been consumed.
  404         if self.config.binary.is_quit() && self.binary_byte_offset.is_some() {
  405             return Ok(!self.buffer().is_empty());
  406         }
  407 
  408         self.roll();
  409         assert_eq!(self.pos, 0);
  410         loop {
  411             self.ensure_capacity()?;
  412             let readlen = rdr.read(self.free_buffer().as_bytes_mut())?;
  413             if readlen == 0 {
  414                 // We're only done reading for good once the caller has
  415                 // consumed everything.
  416                 self.last_lineterm = self.end;
  417                 return Ok(!self.buffer().is_empty());
  418             }
  419 
  420             // Get a mutable view into the bytes we've just read. These are
  421             // the bytes that we do binary detection on, and also the bytes we
  422             // search to find the last line terminator. We need a mutable slice
  423             // in the case of binary conversion.
  424             let oldend = self.end;
  425             self.end += readlen;
  426             let newbytes = &mut self.buf[oldend..self.end];
  427 
  428             // Binary detection.
  429             match self.config.binary {
  430                 BinaryDetection::None => {} // nothing to do
  431                 BinaryDetection::Quit(byte) => {
  432                     if let Some(i) = newbytes.find_byte(byte) {
  433                         self.end = oldend + i;
  434                         self.last_lineterm = self.end;
  435                         self.binary_byte_offset =
  436                             Some(self.absolute_byte_offset + self.end as u64);
  437                         // If the first byte in our buffer is a binary byte,
  438                         // then our buffer is empty and we should report as
  439                         // such to the caller.
  440                         return Ok(self.pos < self.end);
  441                     }
  442                 }
  443                 BinaryDetection::Convert(byte) => {
  444                     if let Some(i) = replace_bytes(
  445                         newbytes,
  446                         byte,
  447                         self.config.lineterm,
  448                     ) {
  449                         // Record only the first binary offset.
  450                         if self.binary_byte_offset.is_none() {
  451                             self.binary_byte_offset =
  452                                 Some(self.absolute_byte_offset
  453                                      + (oldend + i) as u64);
  454                         }
  455                     }
  456                 }
  457             }
  458 
  459             // Update our `last_lineterm` positions if we read one.
  460             if let Some(i) = newbytes.rfind_byte(self.config.lineterm) {
  461                 self.last_lineterm = oldend + i + 1;
  462                 return Ok(true);
  463             }
  464             // At this point, if we couldn't find a line terminator, then we
  465             // don't have a complete line. Therefore, we try to read more!
  466         }
  467     }
  468 
  469     /// Roll the unconsumed parts of the buffer to the front.
  470     ///
  471     /// This operation is idempotent.
  472     ///
  473     /// After rolling, `last_lineterm` and `end` point to the same location,
  474     /// and `pos` is always set to `0`.
  475     fn roll(&mut self) {
  476         if self.pos == self.end {
  477             self.pos = 0;
  478             self.last_lineterm = 0;
  479             self.end = 0;
  480             return;
  481         }
  482 
  483         let roll_len = self.end - self.pos;
  484         self.buf.copy_within(self.pos.., 0);
  485         self.pos = 0;
  486         self.last_lineterm = roll_len;
  487         self.end = roll_len;
  488     }
  489 
  490     /// Ensures that the internal buffer has a non-zero amount of free space
  491     /// in which to read more data. If there is no free space, then more is
  492     /// allocated. If the allocation must exceed the configured limit, then
  493     /// this returns an error.
  494     fn ensure_capacity(&mut self) -> Result<(), io::Error> {
  495         if !self.free_buffer().is_empty() {
  496             return Ok(());
  497         }
  498         // `len` is used for computing the next allocation size. The capacity
  499         // is permitted to start at `0`, so we make sure it's at least `1`.
  500         let len = cmp::max(1, self.buf.len());
  501         let additional = match self.config.buffer_alloc {
  502             BufferAllocation::Eager => len * 2,
  503             BufferAllocation::Error(limit) => {
  504                 let used = self.buf.len() - self.config.capacity;
  505                 let n = cmp::min(len * 2, limit - used);
  506                 if n == 0 {
  507                     return Err(alloc_error(self.config.capacity + limit));
  508                 }
  509                 n
  510             }
  511         };
  512         assert!(additional > 0);
  513         let newlen = self.buf.len() + additional;
  514         self.buf.resize(newlen, 0);
  515         assert!(!self.free_buffer().is_empty());
  516         Ok(())
  517     }
  518 }
  519 
  520 /// Replaces `src` with `replacement` in bytes, and return the offset of the
  521 /// first replacement, if one exists.
  522 fn replace_bytes(bytes: &mut BStr, src: u8, replacement: u8) -> Option<usize> {
  523     if src == replacement {
  524         return None;
  525     }
  526     let mut first_pos = None;
  527     let mut pos = 0;
  528     while let Some(i) = bytes[pos..].find_byte(src).map(|i| pos + i) {
  529         if first_pos.is_none() {
  530             first_pos = Some(i);
  531         }
  532         bytes[i] = replacement;
  533         pos = i + 1;
  534         while bytes.get(pos) == Some(&src) {
  535             bytes[pos] = replacement;
  536             pos += 1;
  537         }
  538     }
  539     first_pos
  540 }
  541 
  542 #[cfg(test)]
  543 mod tests {
  544     use std::str;
  545     use bstr::BString;
  546     use super::*;
  547 
  548     const SHERLOCK: &'static str = "\
  549 For the Doctor Watsons of this world, as opposed to the Sherlock
  550 Holmeses, success in the province of detective work must always
  551 be, to a very large extent, the result of luck. Sherlock Holmes
  552 can extract a clew from a wisp of straw or a flake of cigar ash;
  553 but Doctor Watson has to have it taken out for him and dusted,
  554 and exhibited clearly, with a label attached.\
  555 ";
  556 
  557     fn s(slice: &str) -> String {
  558         slice.to_string()
  559     }
  560 
  561     fn replace_str(
  562         slice: &str,
  563         src: u8,
  564         replacement: u8,
  565     ) -> (String, Option<usize>) {
  566         let mut dst = BString::from(slice);
  567         let result = replace_bytes(&mut dst, src, replacement);
  568         (dst.into_string().unwrap(), result)
  569     }
  570 
  571     #[test]
  572     fn replace() {
  573         assert_eq!(replace_str("abc", b'b', b'z'), (s("azc"), Some(1)));
  574         assert_eq!(replace_str("abb", b'b', b'z'), (s("azz"), Some(1)));
  575         assert_eq!(replace_str("aba", b'a', b'z'), (s("zbz"), Some(0)));
  576         assert_eq!(replace_str("bbb", b'b', b'z'), (s("zzz"), Some(0)));
  577         assert_eq!(replace_str("bac", b'b', b'z'), (s("zac"), Some(0)));
  578     }
  579 
  580     #[test]
  581     fn buffer_basics1() {
  582         let bytes = "homer\nlisa\nmaggie";
  583         let mut linebuf = LineBufferBuilder::new().build();
  584         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  585 
  586         assert!(rdr.buffer().is_empty());
  587 
  588         assert!(rdr.fill().unwrap());
  589         assert_eq!(rdr.bstr(), "homer\nlisa\n");
  590         assert_eq!(rdr.absolute_byte_offset(), 0);
  591         rdr.consume(5);
  592         assert_eq!(rdr.absolute_byte_offset(), 5);
  593         rdr.consume_all();
  594         assert_eq!(rdr.absolute_byte_offset(), 11);
  595 
  596         assert!(rdr.fill().unwrap());
  597         assert_eq!(rdr.bstr(), "maggie");
  598         rdr.consume_all();
  599 
  600         assert!(!rdr.fill().unwrap());
  601         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
  602         assert_eq!(rdr.binary_byte_offset(), None);
  603     }
  604 
  605     #[test]
  606     fn buffer_basics2() {
  607         let bytes = "homer\nlisa\nmaggie\n";
  608         let mut linebuf = LineBufferBuilder::new().build();
  609         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  610 
  611         assert!(rdr.fill().unwrap());
  612         assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n");
  613         rdr.consume_all();
  614 
  615         assert!(!rdr.fill().unwrap());
  616         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
  617         assert_eq!(rdr.binary_byte_offset(), None);
  618     }
  619 
  620     #[test]
  621     fn buffer_basics3() {
  622         let bytes = "\n";
  623         let mut linebuf = LineBufferBuilder::new().build();
  624         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  625 
  626         assert!(rdr.fill().unwrap());
  627         assert_eq!(rdr.bstr(), "\n");
  628         rdr.consume_all();
  629 
  630         assert!(!rdr.fill().unwrap());
  631         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
  632         assert_eq!(rdr.binary_byte_offset(), None);
  633     }
  634 
  635     #[test]
  636     fn buffer_basics4() {
  637         let bytes = "\n\n";
  638         let mut linebuf = LineBufferBuilder::new().build();
  639         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  640 
  641         assert!(rdr.fill().unwrap());
  642         assert_eq!(rdr.bstr(), "\n\n");
  643         rdr.consume_all();
  644 
  645         assert!(!rdr.fill().unwrap());
  646         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
  647         assert_eq!(rdr.binary_byte_offset(), None);
  648     }
  649 
  650     #[test]
  651     fn buffer_empty() {
  652         let bytes = "";
  653         let mut linebuf = LineBufferBuilder::new().build();
  654         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  655 
  656         assert!(!rdr.fill().unwrap());
  657         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
  658         assert_eq!(rdr.binary_byte_offset(), None);
  659     }
  660 
  661     #[test]
  662     fn buffer_zero_capacity() {
  663         let bytes = "homer\nlisa\nmaggie";
  664         let mut linebuf = LineBufferBuilder::new().capacity(0).build();
  665         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  666 
  667         while rdr.fill().unwrap() {
  668             rdr.consume_all();
  669         }
  670         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
  671         assert_eq!(rdr.binary_byte_offset(), None);
  672     }
  673 
  674     #[test]
  675     fn buffer_small_capacity() {
  676         let bytes = "homer\nlisa\nmaggie";
  677         let mut linebuf = LineBufferBuilder::new().capacity(1).build();
  678         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  679 
  680         let mut got = BString::new();
  681         while rdr.fill().unwrap() {
  682             got.push(rdr.buffer());
  683             rdr.consume_all();
  684         }
  685         assert_eq!(bytes, got);
  686         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
  687         assert_eq!(rdr.binary_byte_offset(), None);
  688     }
  689 
  690     #[test]
  691     fn buffer_limited_capacity1() {
  692         let bytes = "homer\nlisa\nmaggie";
  693         let mut linebuf = LineBufferBuilder::new()
  694             .capacity(1)
  695             .buffer_alloc(BufferAllocation::Error(5))
  696             .build();
  697         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  698 
  699         assert!(rdr.fill().unwrap());
  700         assert_eq!(rdr.bstr(), "homer\n");
  701         rdr.consume_all();
  702 
  703         assert!(rdr.fill().unwrap());
  704         assert_eq!(rdr.bstr(), "lisa\n");
  705         rdr.consume_all();
  706 
  707         // This returns an error because while we have just enough room to
  708         // store maggie in the buffer, we *don't* have enough room to read one
  709         // more byte, so we don't know whether we're at EOF or not, and
  710         // therefore must give up.
  711         assert!(rdr.fill().is_err());
  712 
  713         // We can mush on though!
  714         assert_eq!(rdr.bstr(), "m");
  715         rdr.consume_all();
  716 
  717         assert!(rdr.fill().unwrap());
  718         assert_eq!(rdr.bstr(), "aggie");
  719         rdr.consume_all();
  720 
  721         assert!(!rdr.fill().unwrap());
  722     }
  723 
  724     #[test]
  725     fn buffer_limited_capacity2() {
  726         let bytes = "homer\nlisa\nmaggie";
  727         let mut linebuf = LineBufferBuilder::new()
  728             .capacity(1)
  729             .buffer_alloc(BufferAllocation::Error(6))
  730             .build();
  731         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  732 
  733         assert!(rdr.fill().unwrap());
  734         assert_eq!(rdr.bstr(), "homer\n");
  735         rdr.consume_all();
  736 
  737         assert!(rdr.fill().unwrap());
  738         assert_eq!(rdr.bstr(), "lisa\n");
  739         rdr.consume_all();
  740 
  741         // We have just enough space.
  742         assert!(rdr.fill().unwrap());
  743         assert_eq!(rdr.bstr(), "maggie");
  744         rdr.consume_all();
  745 
  746         assert!(!rdr.fill().unwrap());
  747     }
  748 
  749     #[test]
  750     fn buffer_limited_capacity3() {
  751         let bytes = "homer\nlisa\nmaggie";
  752         let mut linebuf = LineBufferBuilder::new()
  753             .capacity(1)
  754             .buffer_alloc(BufferAllocation::Error(0))
  755             .build();
  756         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  757 
  758         assert!(rdr.fill().is_err());
  759         assert_eq!(rdr.bstr(), "");
  760     }
  761 
  762     #[test]
  763     fn buffer_binary_none() {
  764         let bytes = "homer\nli\x00sa\nmaggie\n";
  765         let mut linebuf = LineBufferBuilder::new().build();
  766         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  767 
  768         assert!(rdr.buffer().is_empty());
  769 
  770         assert!(rdr.fill().unwrap());
  771         assert_eq!(rdr.bstr(), "homer\nli\x00sa\nmaggie\n");
  772         rdr.consume_all();
  773 
  774         assert!(!rdr.fill().unwrap());
  775         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
  776         assert_eq!(rdr.binary_byte_offset(), None);
  777     }
  778 
  779     #[test]
  780     fn buffer_binary_quit1() {
  781         let bytes = "homer\nli\x00sa\nmaggie\n";
  782         let mut linebuf = LineBufferBuilder::new()
  783             .binary_detection(BinaryDetection::Quit(b'\x00'))
  784             .build();
  785         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  786 
  787         assert!(rdr.buffer().is_empty());
  788 
  789         assert!(rdr.fill().unwrap());
  790         assert_eq!(rdr.bstr(), "homer\nli");
  791         rdr.consume_all();
  792 
  793         assert!(!rdr.fill().unwrap());
  794         assert_eq!(rdr.absolute_byte_offset(), 8);
  795         assert_eq!(rdr.binary_byte_offset(), Some(8));
  796     }
  797 
  798     #[test]
  799     fn buffer_binary_quit2() {
  800         let bytes = "\x00homer\nlisa\nmaggie\n";
  801         let mut linebuf = LineBufferBuilder::new()
  802             .binary_detection(BinaryDetection::Quit(b'\x00'))
  803             .build();
  804         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  805 
  806         assert!(!rdr.fill().unwrap());
  807         assert_eq!(rdr.bstr(), "");
  808         assert_eq!(rdr.absolute_byte_offset(), 0);
  809         assert_eq!(rdr.binary_byte_offset(), Some(0));
  810     }
  811 
  812     #[test]
  813     fn buffer_binary_quit3() {
  814         let bytes = "homer\nlisa\nmaggie\n\x00";
  815         let mut linebuf = LineBufferBuilder::new()
  816             .binary_detection(BinaryDetection::Quit(b'\x00'))
  817             .build();
  818         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  819 
  820         assert!(rdr.buffer().is_empty());
  821 
  822         assert!(rdr.fill().unwrap());
  823         assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n");
  824         rdr.consume_all();
  825 
  826         assert!(!rdr.fill().unwrap());
  827         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64 - 1);
  828         assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 1));
  829     }
  830 
  831     #[test]
  832     fn buffer_binary_quit4() {
  833         let bytes = "homer\nlisa\nmaggie\x00\n";
  834         let mut linebuf = LineBufferBuilder::new()
  835             .binary_detection(BinaryDetection::Quit(b'\x00'))
  836             .build();
  837         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  838 
  839         assert!(rdr.buffer().is_empty());
  840 
  841         assert!(rdr.fill().unwrap());
  842         assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie");
  843         rdr.consume_all();
  844 
  845         assert!(!rdr.fill().unwrap());
  846         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64 - 2);
  847         assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 2));
  848     }
  849 
  850     #[test]
  851     fn buffer_binary_quit5() {
  852         let mut linebuf = LineBufferBuilder::new()
  853             .binary_detection(BinaryDetection::Quit(b'u'))
  854             .build();
  855         let mut rdr = LineBufferReader::new(SHERLOCK.as_bytes(), &mut linebuf);
  856 
  857         assert!(rdr.buffer().is_empty());
  858 
  859         assert!(rdr.fill().unwrap());
  860         assert_eq!(rdr.bstr(), "\
  861 For the Doctor Watsons of this world, as opposed to the Sherlock
  862 Holmeses, s\
  863 ");
  864         rdr.consume_all();
  865 
  866         assert!(!rdr.fill().unwrap());
  867         assert_eq!(rdr.absolute_byte_offset(), 76);
  868         assert_eq!(rdr.binary_byte_offset(), Some(76));
  869         assert_eq!(SHERLOCK.as_bytes()[76], b'u');
  870     }
  871 
  872     #[test]
  873     fn buffer_binary_convert1() {
  874         let bytes = "homer\nli\x00sa\nmaggie\n";
  875         let mut linebuf = LineBufferBuilder::new()
  876             .binary_detection(BinaryDetection::Convert(b'\x00'))
  877             .build();
  878         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  879 
  880         assert!(rdr.buffer().is_empty());
  881 
  882         assert!(rdr.fill().unwrap());
  883         assert_eq!(rdr.bstr(), "homer\nli\nsa\nmaggie\n");
  884         rdr.consume_all();
  885 
  886         assert!(!rdr.fill().unwrap());
  887         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
  888         assert_eq!(rdr.binary_byte_offset(), Some(8));
  889     }
  890 
  891     #[test]
  892     fn buffer_binary_convert2() {
  893         let bytes = "\x00homer\nlisa\nmaggie\n";
  894         let mut linebuf = LineBufferBuilder::new()
  895             .binary_detection(BinaryDetection::Convert(b'\x00'))
  896             .build();
  897         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  898 
  899         assert!(rdr.buffer().is_empty());
  900 
  901         assert!(rdr.fill().unwrap());
  902         assert_eq!(rdr.bstr(), "\nhomer\nlisa\nmaggie\n");
  903         rdr.consume_all();
  904 
  905         assert!(!rdr.fill().unwrap());
  906         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
  907         assert_eq!(rdr.binary_byte_offset(), Some(0));
  908     }
  909 
  910     #[test]
  911     fn buffer_binary_convert3() {
  912         let bytes = "homer\nlisa\nmaggie\n\x00";
  913         let mut linebuf = LineBufferBuilder::new()
  914             .binary_detection(BinaryDetection::Convert(b'\x00'))
  915             .build();
  916         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  917 
  918         assert!(rdr.buffer().is_empty());
  919 
  920         assert!(rdr.fill().unwrap());
  921         assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n\n");
  922         rdr.consume_all();
  923 
  924         assert!(!rdr.fill().unwrap());
  925         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
  926         assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 1));
  927     }
  928 
  929     #[test]
  930     fn buffer_binary_convert4() {
  931         let bytes = "homer\nlisa\nmaggie\x00\n";
  932         let mut linebuf = LineBufferBuilder::new()
  933             .binary_detection(BinaryDetection::Convert(b'\x00'))
  934             .build();
  935         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
  936 
  937         assert!(rdr.buffer().is_empty());
  938 
  939         assert!(rdr.fill().unwrap());
  940         assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n\n");
  941         rdr.consume_all();
  942 
  943         assert!(!rdr.fill().unwrap());
  944         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
  945         assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 2));
  946     }
  947 }