"Fossies" - the Fresh Open Source Software Archive

Member "AdGuardHome-0.104.3/internal/querylog/qlog_file.go" (19 Nov 2020, 9818 Bytes) of package /linux/misc/dns/AdGuardHome-0.104.3.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Go source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. See also the latest Fossies "Diffs" side-by-side code changes report for "qlog_file.go": 0.104.1_vs_0.104.3.

    1 package querylog
    2 
    3 import (
    4     "fmt"
    5     "io"
    6     "os"
    7     "sync"
    8     "time"
    9 
   10     "github.com/AdguardTeam/AdGuardHome/internal/agherr"
   11     "github.com/AdguardTeam/golibs/log"
   12 )
   13 
   14 // Timestamp not found errors.
   15 const (
   16     ErrTSNotFound agherr.Error = "ts not found"
   17     ErrTSTooLate  agherr.Error = "ts too late"
   18     ErrTSTooEarly agherr.Error = "ts too early"
   19 )
   20 
   21 // TODO: Find a way to grow buffer instead of relying on this value when reading strings
   22 const maxEntrySize = 16 * 1024
   23 
   24 // buffer should be enough for at least this number of entries
   25 const bufferSize = 100 * maxEntrySize
   26 
   27 // QLogFile represents a single query log file
   28 // It allows reading from the file in the reverse order
   29 //
   30 // Please note that this is a stateful object.
   31 // Internally, it contains a pointer to a specific position in the file,
   32 // and it reads lines in reverse order starting from that position.
   33 type QLogFile struct {
   34     file     *os.File // the query log file
   35     position int64    // current position in the file
   36 
   37     buffer      []byte // buffer that we've read from the file
   38     bufferStart int64  // start of the buffer (in the file)
   39     bufferLen   int    // buffer len
   40 
   41     lock sync.Mutex // We use mutex to make it thread-safe
   42 }
   43 
   44 // NewQLogFile initializes a new instance of the QLogFile
   45 func NewQLogFile(path string) (*QLogFile, error) {
   46     f, err := os.OpenFile(path, os.O_RDONLY, 0o644)
   47     if err != nil {
   48         return nil, err
   49     }
   50 
   51     return &QLogFile{
   52         file: f,
   53     }, nil
   54 }
   55 
   56 // Seek performs binary search in the query log file looking for a record
   57 // with the specified timestamp. Once the record is found, it sets
   58 // "position" so that the next ReadNext call returned that record.
   59 //
   60 // The algorithm is rather simple:
   61 // 1. It starts with the position in the middle of a file
   62 // 2. Shifts back to the beginning of the line
   63 // 3. Checks the log record timestamp
   64 // 4. If it is lower than the timestamp we are looking for,
   65 // it shifts seek position to 3/4 of the file. Otherwise, to 1/4 of the file.
   66 // 5. It performs the search again, every time the search scope is narrowed twice.
   67 //
   68 // Returns:
   69 // * It returns the position of the the line with the timestamp we were looking for
   70 // so that when we call "ReadNext" this line was returned.
   71 // * Depth of the search (how many times we compared timestamps).
   72 // * If we could not find it, it returns one of the errors described above.
   73 func (q *QLogFile) Seek(timestamp int64) (int64, int, error) {
   74     q.lock.Lock()
   75     defer q.lock.Unlock()
   76 
   77     // Empty the buffer
   78     q.buffer = nil
   79 
   80     // First of all, check the file size
   81     fileInfo, err := q.file.Stat()
   82     if err != nil {
   83         return 0, 0, err
   84     }
   85 
   86     // Define the search scope
   87     start := int64(0)          // start of the search interval (position in the file)
   88     end := fileInfo.Size()     // end of the search interval (position in the file)
   89     probe := (end - start) / 2 // probe -- approximate index of the line we'll try to check
   90     var line string
   91     var lineIdx int64 // index of the probe line in the file
   92     var lineEndIdx int64
   93     var lastProbeLineIdx int64 // index of the last probe line
   94     lastProbeLineIdx = -1
   95 
   96     // Count seek depth in order to detect mistakes
   97     // If depth is too large, we should stop the search
   98     depth := 0
   99 
  100     for {
  101         // Get the line at the specified position
  102         line, lineIdx, lineEndIdx, err = q.readProbeLine(probe)
  103         if err != nil {
  104             return 0, depth, err
  105         }
  106 
  107         if lineIdx == lastProbeLineIdx {
  108             if lineIdx == 0 {
  109                 return 0, depth, ErrTSTooEarly
  110             }
  111 
  112             // If we're testing the same line twice then most likely
  113             // the scope is too narrow and we won't find anything
  114             // anymore in any other file.
  115             return 0, depth, fmt.Errorf("looking up timestamp %d in %q: %w", timestamp, q.file.Name(), ErrTSNotFound)
  116         } else if lineIdx == fileInfo.Size() {
  117             return 0, depth, ErrTSTooLate
  118         }
  119 
  120         // Save the last found idx
  121         lastProbeLineIdx = lineIdx
  122 
  123         // Get the timestamp from the query log record
  124         ts := readQLogTimestamp(line)
  125         if ts == 0 {
  126             return 0, depth, fmt.Errorf("looking up timestamp %d in %q: record %q has empty timestamp", timestamp, q.file.Name(), line)
  127         }
  128 
  129         if ts == timestamp {
  130             // Hurray, returning the result
  131             break
  132         }
  133 
  134         // Narrow the scope and repeat the search
  135         if ts > timestamp {
  136             // If the timestamp we're looking for is OLDER than what we found
  137             // Then the line is somewhere on the LEFT side from the current probe position
  138             end = lineIdx
  139         } else {
  140             // If the timestamp we're looking for is NEWER than what we found
  141             // Then the line is somewhere on the RIGHT side from the current probe position
  142             start = lineEndIdx
  143         }
  144         probe = start + (end-start)/2
  145 
  146         depth++
  147         if depth >= 100 {
  148             return 0, depth, fmt.Errorf("looking up timestamp %d in %q: depth %d too high: %w", timestamp, q.file.Name(), depth, ErrTSNotFound)
  149         }
  150     }
  151 
  152     q.position = lineIdx + int64(len(line))
  153     return q.position, depth, nil
  154 }
  155 
  156 // SeekStart changes the current position to the end of the file
  157 // Please note that we're reading query log in the reverse order
  158 // and that's why log start is actually the end of file
  159 //
  160 // Returns nil if we were able to change the current position.
  161 // Returns error in any other case.
  162 func (q *QLogFile) SeekStart() (int64, error) {
  163     q.lock.Lock()
  164     defer q.lock.Unlock()
  165 
  166     // Empty the buffer
  167     q.buffer = nil
  168 
  169     // First of all, check the file size
  170     fileInfo, err := q.file.Stat()
  171     if err != nil {
  172         return 0, err
  173     }
  174 
  175     // Place the position to the very end of file
  176     q.position = fileInfo.Size() - 1
  177     if q.position < 0 {
  178         q.position = 0
  179     }
  180     return q.position, nil
  181 }
  182 
  183 // ReadNext reads the next line (in the reverse order) from the file
  184 // and shifts the current position left to the next (actually prev) line.
  185 // returns io.EOF if there's nothing to read more
  186 func (q *QLogFile) ReadNext() (string, error) {
  187     q.lock.Lock()
  188     defer q.lock.Unlock()
  189 
  190     if q.position == 0 {
  191         return "", io.EOF
  192     }
  193 
  194     line, lineIdx, err := q.readNextLine(q.position)
  195     if err != nil {
  196         return "", err
  197     }
  198 
  199     // Shift position
  200     if lineIdx == 0 {
  201         q.position = 0
  202     } else {
  203         // there's usually a line break before the line
  204         // so we should shift one more char left from the line
  205         // line\nline
  206         q.position = lineIdx - 1
  207     }
  208     return line, err
  209 }
  210 
  211 // Close frees the underlying resources
  212 func (q *QLogFile) Close() error {
  213     return q.file.Close()
  214 }
  215 
  216 // readNextLine reads the next line from the specified position
  217 // this line actually have to END on that position.
  218 //
  219 // the algorithm is:
  220 // 1. check if we have the buffer initialized
  221 // 2. if it is, scan it and look for the line there
  222 // 3. if we cannot find the line there, read the prev chunk into the buffer
  223 // 4. read the line from the buffer
  224 func (q *QLogFile) readNextLine(position int64) (string, int64, error) {
  225     relativePos := position - q.bufferStart
  226     if q.buffer == nil || (relativePos < maxEntrySize && q.bufferStart != 0) {
  227         // Time to re-init the buffer
  228         err := q.initBuffer(position)
  229         if err != nil {
  230             return "", 0, err
  231         }
  232         relativePos = position - q.bufferStart
  233     }
  234 
  235     // Look for the end of the prev line
  236     // This is where we'll read from
  237     startLine := int64(0)
  238     for i := relativePos - 1; i >= 0; i-- {
  239         if q.buffer[i] == '\n' {
  240             startLine = i + 1
  241             break
  242         }
  243     }
  244 
  245     line := string(q.buffer[startLine:relativePos])
  246     lineIdx := q.bufferStart + startLine
  247     return line, lineIdx, nil
  248 }
  249 
  250 // initBuffer initializes the QLogFile buffer.
  251 // the goal is to read a chunk of file that includes the line with the specified position.
  252 func (q *QLogFile) initBuffer(position int64) error {
  253     q.bufferStart = int64(0)
  254     if (position - bufferSize) > 0 {
  255         q.bufferStart = position - bufferSize
  256     }
  257 
  258     // Seek to this position
  259     _, err := q.file.Seek(q.bufferStart, io.SeekStart)
  260     if err != nil {
  261         return err
  262     }
  263 
  264     if q.buffer == nil {
  265         q.buffer = make([]byte, bufferSize)
  266     }
  267     q.bufferLen, err = q.file.Read(q.buffer)
  268     if err != nil {
  269         return err
  270     }
  271 
  272     return nil
  273 }
  274 
  275 // readProbeLine reads a line that includes the specified position
  276 // this method is supposed to be used when we use binary search in the Seek method
  277 // in the case of consecutive reads, use readNext (it uses a better buffer)
  278 func (q *QLogFile) readProbeLine(position int64) (string, int64, int64, error) {
  279     // First of all, we should read a buffer that will include the query log line
  280     // In order to do this, we'll define the boundaries
  281     seekPosition := int64(0)
  282     relativePos := position // position relative to the buffer we're going to read
  283     if (position - maxEntrySize) > 0 {
  284         seekPosition = position - maxEntrySize
  285         relativePos = maxEntrySize
  286     }
  287 
  288     // Seek to this position
  289     _, err := q.file.Seek(seekPosition, io.SeekStart)
  290     if err != nil {
  291         return "", 0, 0, err
  292     }
  293 
  294     // The buffer size is 2*maxEntrySize
  295     buffer := make([]byte, maxEntrySize*2)
  296     bufferLen, err := q.file.Read(buffer)
  297     if err != nil {
  298         return "", 0, 0, err
  299     }
  300 
  301     // Now start looking for the new line character starting
  302     // from the relativePos and going left
  303     startLine := int64(0)
  304     for i := relativePos - 1; i >= 0; i-- {
  305         if buffer[i] == '\n' {
  306             startLine = i + 1
  307             break
  308         }
  309     }
  310     // Looking for the end of line now
  311     endLine := int64(bufferLen)
  312     lineEndIdx := endLine + seekPosition
  313     for i := relativePos; i < int64(bufferLen); i++ {
  314         if buffer[i] == '\n' {
  315             endLine = i
  316             lineEndIdx = endLine + seekPosition + 1
  317             break
  318         }
  319     }
  320 
  321     // Finally we can return the string we were looking for
  322     lineIdx := startLine + seekPosition
  323     return string(buffer[startLine:endLine]), lineIdx, lineEndIdx, nil
  324 }
  325 
  326 // readQLogTimestamp reads the timestamp field from the query log line
  327 func readQLogTimestamp(str string) int64 {
  328     val := readJSONValue(str, "T")
  329     if len(val) == 0 {
  330         val = readJSONValue(str, "Time")
  331     }
  332 
  333     if len(val) == 0 {
  334         log.Error("Couldn't find timestamp: %s", str)
  335         return 0
  336     }
  337     tm, err := time.Parse(time.RFC3339Nano, val)
  338     if err != nil {
  339         log.Error("Couldn't parse timestamp: %s", val)
  340         return 0
  341     }
  342     return tm.UnixNano()
  343 }