"Fossies" - the Fresh Open Source Software Archive 
Member "sift-0.9.0/sift.go" (22 Oct 2016, 19248 Bytes) of package /linux/privat/sift-0.9.0.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Go source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
See also the latest
Fossies "Diffs" side-by-side code changes report for "sift.go":
0.8.0_vs_0.9.0.
1 // sift
2 // Copyright (C) 2014-2016 Sven Taute
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, version 3 of the License.
7 //
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 // GNU General Public License for more details.
12 //
13 // You should have received a copy of the GNU General Public License
14 // along with this program. If not, see <http://www.gnu.org/licenses/>.
15
16 package main
17
18 import (
19 "bufio"
20 "compress/gzip"
21 "errors"
22 "fmt"
23 "io"
24 "log"
25 "net"
26 "os"
27 "path/filepath"
28 "regexp"
29 "runtime"
30 "strings"
31 "sync"
32 "time"
33
34 "github.com/svent/go-flags"
35 "github.com/svent/go-nbreader"
36 "github.com/svent/sift/gitignore"
37 "golang.org/x/crypto/ssh/terminal"
38 )
39
40 const (
41 // InputMultilineWindow is the size of the sliding window for multiline matching
42 InputMultilineWindow = 32 * 1024
43 // MultilinePipeTimeout is the timeout for reading and matching input
44 // from STDIN/network in multiline mode
45 MultilinePipeTimeout = 1000 * time.Millisecond
46 // MultilinePipeChunkTimeout is the timeout to consider last input from STDIN/network
47 // as a complete chunk for multiline matching
48 MultilinePipeChunkTimeout = 150 * time.Millisecond
49 // MaxDirRecursionRoutines is the maximum number of parallel routines used
50 // to recurse into directories
51 MaxDirRecursionRoutines = 3
52 SiftConfigFile = ".sift.conf"
53 SiftVersion = "0.9.0"
54 )
55
56 type ConditionType int
57
58 const (
59 ConditionPreceded ConditionType = iota
60 ConditionFollowed
61 ConditionSurrounded
62 ConditionFileMatches
63 ConditionLineMatches
64 ConditionRangeMatches
65 )
66
67 type Condition struct {
68 regex *regexp.Regexp
69 conditionType ConditionType
70 within int64
71 lineRangeStart int64
72 lineRangeEnd int64
73 negated bool
74 }
75
76 type FileType struct {
77 Patterns []string
78 ShebangRegex *regexp.Regexp
79 }
80
81 type Match struct {
82 // offset of the start of the match
83 start int64
84 // offset of the end of the match
85 end int64
86 // offset of the beginning of the first line of the match
87 lineStart int64
88 // offset of the end of the last line of the match
89 lineEnd int64
90 // the match
91 match string
92 // the match including the non-matched text on the first and last line
93 line string
94 // the line number of the beginning of the match
95 lineno int64
96 // the index to global.conditions (if this match belongs to a condition)
97 conditionID int
98 // the context before the match
99 contextBefore *string
100 // the context after the match
101 contextAfter *string
102 }
103
104 type Matches []Match
105
106 func (e Matches) Len() int { return len(e) }
107 func (e Matches) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
108 func (e Matches) Less(i, j int) bool { return e[i].start < e[j].start }
109
110 type Result struct {
111 conditionMatches Matches
112 matches Matches
113 // if too many matches are found or input is read only from STDIN,
114 // matches are streamed through a channel
115 matchChan chan Matches
116 streaming bool
117 isBinary bool
118 target string
119 }
120
121 var (
122 InputBlockSize int = 256 * 1024
123 options Options
124 errorLogger = log.New(os.Stderr, "Error: ", 0)
125 errLineTooLong = errors.New("line too long")
126 )
127 var global = struct {
128 conditions []Condition
129 filesChan chan string
130 directoryChan chan string
131 fileTypesMap map[string]FileType
132 includeFilepathRegex *regexp.Regexp
133 excludeFilepathRegex *regexp.Regexp
134 netTcpRegex *regexp.Regexp
135 outputFile io.Writer
136 matchPatterns []string
137 matchRegexes []*regexp.Regexp
138 gitignoreCache *gitignore.GitIgnoreCache
139 resultsChan chan *Result
140 resultsDoneChan chan struct{}
141 targetsWaitGroup sync.WaitGroup
142 recurseWaitGroup sync.WaitGroup
143 streamingAllowed bool
144 streamingThreshold int
145 termHighlightFilename string
146 termHighlightLineno string
147 termHighlightMatch string
148 termHighlightReset string
149 totalLineLengthErrors int64
150 totalMatchCount int64
151 totalResultCount int64
152 totalTargetCount int64
153 }{
154 outputFile: os.Stdout,
155 netTcpRegex: regexp.MustCompile(`^(tcp[46]?)://(.*:\d+)$`),
156 streamingThreshold: 1 << 16,
157 }
158
159 // processDirectories reads global.directoryChan and processes
160 // directories via processDirectory.
161 func processDirectories() {
162 n := options.Cores
163 if n > MaxDirRecursionRoutines {
164 n = MaxDirRecursionRoutines
165 }
166 for i := 0; i < n; i++ {
167 go func() {
168 for dirname := range global.directoryChan {
169 processDirectory(dirname)
170 }
171 }()
172 }
173 }
174
175 // enqueueDirectory enqueues directories on global.directoryChan.
176 // If the channel blocks, the directory is processed directly.
177 func enqueueDirectory(dirname string) {
178 global.recurseWaitGroup.Add(1)
179 select {
180 case global.directoryChan <- dirname:
181 default:
182 processDirectory(dirname)
183 }
184 }
185
186 // processDirectory recurses into a directory and sends all files
187 // fulfilling the selected options on global.filesChan
188 func processDirectory(dirname string) {
189 defer global.recurseWaitGroup.Done()
190 var gic *gitignore.Checker
191 if options.Git {
192 gic = gitignore.NewCheckerWithCache(global.gitignoreCache)
193 err := gic.LoadBasePath(dirname)
194 if err != nil {
195 errorLogger.Printf("cannot load gitignore files for path '%s': %s", dirname, err)
196 }
197 }
198 dir, err := os.Open(dirname)
199 if err != nil {
200 errorLogger.Printf("cannot open directory '%s': %s\n", dirname, err)
201 return
202 }
203 defer dir.Close()
204 for {
205 entries, err := dir.Readdir(256)
206 if err == io.EOF {
207 return
208 }
209 if err != nil {
210 errorLogger.Printf("cannot read directory '%s': %s\n", dirname, err)
211 return
212 }
213
214 nextEntry:
215 for _, fi := range entries {
216 fullpath := filepath.Join(dirname, fi.Name())
217
218 // check directory include/exclude options
219 if fi.IsDir() {
220 if !options.Recursive {
221 continue nextEntry
222 }
223 for _, dirPattern := range options.ExcludeDirs {
224 matched, err := filepath.Match(dirPattern, fi.Name())
225 if err != nil {
226 errorLogger.Fatalf("cannot match malformed pattern '%s' against directory name: %s\n", dirPattern, err)
227 }
228 if matched {
229 continue nextEntry
230 }
231 }
232 if len(options.IncludeDirs) > 0 {
233 for _, dirPattern := range options.IncludeDirs {
234 matched, err := filepath.Match(dirPattern, fi.Name())
235 if err != nil {
236 errorLogger.Fatalf("cannot match malformed pattern '%s' against directory name: %s\n", dirPattern, err)
237 }
238 if matched {
239 goto includeDirMatchFound
240 }
241 }
242 continue nextEntry
243 includeDirMatchFound:
244 }
245 if options.Git {
246 if fi.Name() == gitignore.GitFoldername || gic.Check(fullpath, fi) {
247 continue nextEntry
248 }
249 }
250 enqueueDirectory(fullpath)
251 continue nextEntry
252 }
253
254 // check whether this is a regular file
255 if fi.Mode()&os.ModeType != 0 {
256 if options.FollowSymlinks && fi.Mode()&os.ModeType == os.ModeSymlink {
257 realPath, err := filepath.EvalSymlinks(fullpath)
258 if err != nil {
259 errorLogger.Printf("cannot follow symlink '%s': %s\n", fullpath, err)
260 } else {
261 realFi, err := os.Stat(realPath)
262 if err != nil {
263 errorLogger.Printf("cannot follow symlink '%s': %s\n", fullpath, err)
264 }
265 if realFi.IsDir() {
266 enqueueDirectory(realPath)
267 continue nextEntry
268 } else {
269 if realFi.Mode()&os.ModeType != 0 {
270 continue nextEntry
271 }
272 }
273 }
274 } else {
275 continue nextEntry
276 }
277 }
278
279 // check file path options
280 if global.excludeFilepathRegex != nil {
281 if global.excludeFilepathRegex.MatchString(fullpath) {
282 continue nextEntry
283 }
284 }
285 if global.includeFilepathRegex != nil {
286 if !global.includeFilepathRegex.MatchString(fullpath) {
287 continue nextEntry
288 }
289 }
290
291 // check file extension options
292 if len(options.ExcludeExtensions) > 0 {
293 for _, e := range strings.Split(options.ExcludeExtensions, ",") {
294 if filepath.Ext(fi.Name()) == "."+e {
295 continue nextEntry
296 }
297 }
298 }
299 if len(options.IncludeExtensions) > 0 {
300 for _, e := range strings.Split(options.IncludeExtensions, ",") {
301 if filepath.Ext(fi.Name()) == "."+e {
302 goto includeExtensionFound
303 }
304 }
305 continue nextEntry
306 includeExtensionFound:
307 }
308
309 // check file include/exclude options
310 for _, filePattern := range options.ExcludeFiles {
311 matched, err := filepath.Match(filePattern, fi.Name())
312 if err != nil {
313 errorLogger.Fatalf("cannot match malformed pattern '%s' against file name: %s\n", filePattern, err)
314 }
315 if matched {
316 continue nextEntry
317 }
318 }
319 if len(options.IncludeFiles) > 0 {
320 for _, filePattern := range options.IncludeFiles {
321 matched, err := filepath.Match(filePattern, fi.Name())
322 if err != nil {
323 errorLogger.Fatalf("cannot match malformed pattern '%s' against file name: %s\n", filePattern, err)
324 }
325 if matched {
326 goto includeFileMatchFound
327 }
328 }
329 continue nextEntry
330 includeFileMatchFound:
331 }
332
333 // check file type options
334 if len(options.ExcludeTypes) > 0 {
335 for _, t := range strings.Split(options.ExcludeTypes, ",") {
336 for _, filePattern := range global.fileTypesMap[t].Patterns {
337 if matched, _ := filepath.Match(filePattern, fi.Name()); matched {
338 continue nextEntry
339 }
340 }
341 sr := global.fileTypesMap[t].ShebangRegex
342 if sr != nil {
343 if m, err := checkShebang(global.fileTypesMap[t].ShebangRegex, fullpath); m && err == nil {
344 continue nextEntry
345 }
346 }
347 }
348 }
349 if len(options.IncludeTypes) > 0 {
350 for _, t := range strings.Split(options.IncludeTypes, ",") {
351 for _, filePattern := range global.fileTypesMap[t].Patterns {
352 if matched, _ := filepath.Match(filePattern, fi.Name()); matched {
353 goto includeTypeFound
354 }
355 }
356 sr := global.fileTypesMap[t].ShebangRegex
357 if sr != nil {
358 if m, err := checkShebang(global.fileTypesMap[t].ShebangRegex, fullpath); err != nil || m {
359 goto includeTypeFound
360 }
361 }
362 }
363 continue nextEntry
364 includeTypeFound:
365 }
366
367 if options.Git {
368 if fi.Name() == gitignore.GitIgnoreFilename || gic.Check(fullpath, fi) {
369 continue
370 }
371 }
372
373 global.filesChan <- fullpath
374 }
375 }
376 }
377
378 // checkShebang checks whether the first line of file matches the given regex
379 func checkShebang(regex *regexp.Regexp, filepath string) (bool, error) {
380 f, err := os.Open(filepath)
381 defer f.Close()
382 if err != nil {
383 return false, err
384 }
385 b, err := bufio.NewReader(f).ReadBytes('\n')
386 return regex.Match(b), nil
387 }
388
389 // processFileTargets reads filesChan, builds an io.Reader for the target and calls processReader
390 func processFileTargets() {
391 defer global.targetsWaitGroup.Done()
392 dataBuffer := make([]byte, InputBlockSize)
393 testBuffer := make([]byte, InputBlockSize)
394 matchRegexes := make([]*regexp.Regexp, len(global.matchPatterns))
395 for i := range global.matchPatterns {
396 matchRegexes[i] = regexp.MustCompile(global.matchPatterns[i])
397 }
398
399 for filepath := range global.filesChan {
400 var err error
401 var infile *os.File
402 var reader io.Reader
403
404 if options.TargetsOnly {
405 global.resultsChan <- &Result{target: filepath}
406 continue
407 }
408
409 if filepath == "-" {
410 infile = os.Stdin
411 } else {
412 infile, err = os.Open(filepath)
413 if err != nil {
414 errorLogger.Printf("cannot open file '%s': %s\n", filepath, err)
415 continue
416 }
417 }
418
419 if options.Zip && strings.HasSuffix(filepath, ".gz") {
420 rawReader := infile
421 reader, err = gzip.NewReader(rawReader)
422 if err != nil {
423 errorLogger.Printf("error decompressing file '%s', opening as normal file\n", infile.Name())
424 infile.Seek(0, 0)
425 reader = infile
426 }
427 } else if infile == os.Stdin && options.Multiline {
428 reader = nbreader.NewNBReader(infile, InputBlockSize,
429 nbreader.ChunkTimeout(MultilinePipeChunkTimeout), nbreader.Timeout(MultilinePipeTimeout))
430 } else {
431 reader = infile
432 }
433
434 if options.InvertMatch {
435 err = processReaderInvertMatch(reader, matchRegexes, filepath)
436 } else {
437 err = processReader(reader, matchRegexes, dataBuffer, testBuffer, filepath)
438 }
439 if err != nil {
440 if err == errLineTooLong {
441 global.totalLineLengthErrors += 1
442 if options.ErrShowLineLength {
443 errmsg := fmt.Sprintf("file contains very long lines (>= %d bytes). See options --blocksize and --err-skip-line-length.", InputBlockSize)
444 errorLogger.Printf("cannot process data from file '%s': %s\n", filepath, errmsg)
445 }
446 } else {
447 errorLogger.Printf("cannot process data from file '%s': %s\n", filepath, err)
448 }
449 }
450 infile.Close()
451 }
452 }
453
454 // processNetworkTarget starts a listening TCP socket and calls processReader
455 func processNetworkTarget(target string) {
456 matchRegexes := make([]*regexp.Regexp, len(global.matchPatterns))
457 for i := range global.matchPatterns {
458 matchRegexes[i] = regexp.MustCompile(global.matchPatterns[i])
459 }
460 defer global.targetsWaitGroup.Done()
461
462 var reader io.Reader
463 netParams := global.netTcpRegex.FindStringSubmatch(target)
464 proto := netParams[1]
465 addr := netParams[2]
466
467 listener, err := net.Listen(proto, addr)
468 if err != nil {
469 errorLogger.Fatalf("could not listen on '%s'\n", target)
470 }
471
472 conn, err := listener.Accept()
473 if err != nil {
474 errorLogger.Fatalf("could not accept connections on '%s'\n", target)
475 }
476
477 if options.Multiline {
478 reader = nbreader.NewNBReader(conn, InputBlockSize, nbreader.ChunkTimeout(MultilinePipeChunkTimeout),
479 nbreader.Timeout(MultilinePipeTimeout))
480 } else {
481 reader = conn
482 }
483
484 dataBuffer := make([]byte, InputBlockSize)
485 testBuffer := make([]byte, InputBlockSize)
486 err = processReader(reader, matchRegexes, dataBuffer, testBuffer, target)
487 if err != nil {
488 errorLogger.Printf("error processing data from '%s'\n", target)
489 return
490 }
491 }
492
493 func executeSearch(targets []string) (ret int, err error) {
494 defer func() {
495 if r := recover(); r != nil {
496 ret = 2
497 err = errors.New(r.(string))
498 }
499 }()
500 tstart := time.Now()
501 global.filesChan = make(chan string, 256)
502 global.directoryChan = make(chan string, 128)
503 global.resultsChan = make(chan *Result, 128)
504 global.resultsDoneChan = make(chan struct{})
505 global.gitignoreCache = gitignore.NewGitIgnoreCache()
506 global.totalTargetCount = 0
507 global.totalLineLengthErrors = 0
508 global.totalMatchCount = 0
509 global.totalResultCount = 0
510
511 go resultHandler()
512
513 for i := 0; i < options.Cores; i++ {
514 global.targetsWaitGroup.Add(1)
515 go processFileTargets()
516 }
517
518 go processDirectories()
519
520 for _, target := range targets {
521 switch {
522 case target == "-":
523 global.filesChan <- "-"
524 case global.netTcpRegex.MatchString(target):
525 global.targetsWaitGroup.Add(1)
526 go processNetworkTarget(target)
527 default:
528 fileinfo, err := os.Stat(target)
529 if err != nil {
530 if os.IsNotExist(err) {
531 errorLogger.Fatalf("no such file or directory: %s\n", target)
532 } else {
533 errorLogger.Fatalf("cannot open file or directory: %s\n", target)
534 }
535 }
536 if fileinfo.IsDir() {
537 global.recurseWaitGroup.Add(1)
538 global.directoryChan <- target
539 } else {
540 global.filesChan <- target
541 }
542 }
543 }
544
545 global.recurseWaitGroup.Wait()
546 close(global.directoryChan)
547
548 close(global.filesChan)
549 global.targetsWaitGroup.Wait()
550
551 close(global.resultsChan)
552 <-global.resultsDoneChan
553
554 var retVal int
555 if global.totalResultCount > 0 {
556 retVal = 0
557 } else {
558 retVal = 1
559 }
560
561 if !options.ErrSkipLineLength && !options.ErrShowLineLength && global.totalLineLengthErrors > 0 {
562 errorLogger.Printf("%d files skipped due to very long lines (>= %d bytes). See options --blocksize, --err-show-line-length and --err-skip-line-length.", global.totalLineLengthErrors, InputBlockSize)
563 }
564
565 if options.Stats {
566 tend := time.Now()
567 fmt.Fprintln(os.Stderr, global.totalTargetCount, "files processed")
568 fmt.Fprintln(os.Stderr, global.totalResultCount, "files match")
569 fmt.Fprintln(os.Stderr, global.totalMatchCount, "matches found")
570 fmt.Fprintf(os.Stderr, "in %v\n", tend.Sub(tstart))
571 }
572
573 return retVal, nil
574 }
575
576 func main() {
577 var targets []string
578 var args []string
579 var err error
580
581 parser := flags.NewNamedParser("sift", flags.HelpFlag|flags.PassDoubleDash)
582 parser.AddGroup("Options", "Options", &options)
583 parser.Name = "sift"
584 parser.Usage = "[OPTIONS] PATTERN [FILE|PATH|tcp://HOST:PORT]...\n" +
585 " sift [OPTIONS] [-e PATTERN | -f FILE] [FILE|PATH|tcp://HOST:PORT]...\n" +
586 " sift [OPTIONS] --targets [FILE|PATH]..."
587
588 // temporarily parse options to see if the --no-conf/--conf options were used and
589 // then discard the result
590 options.LoadDefaults()
591 args, err = parser.Parse()
592 if err != nil {
593 if e, ok := err.(*flags.Error); ok && e.Type == flags.ErrHelp {
594 fmt.Println(e.Error())
595 os.Exit(0)
596 } else {
597 errorLogger.Println(err)
598 os.Exit(2)
599 }
600 }
601 noConf := options.NoConfig
602 configFile := options.ConfigFile
603 options = Options{}
604
605 // perform full option parsing respecting the --no-conf/--conf options
606 options.LoadDefaults()
607 options.LoadConfigs(noConf, configFile)
608 args, err = parser.Parse()
609 if err != nil {
610 errorLogger.Println(err)
611 os.Exit(2)
612 }
613
614 for _, pattern := range options.Patterns {
615 global.matchPatterns = append(global.matchPatterns, pattern)
616 }
617
618 if options.PatternFile != "" {
619 f, err := os.Open(options.PatternFile)
620 if err != nil {
621 errorLogger.Fatalln("Cannot open pattern file:\n", err)
622 }
623 scanner := bufio.NewScanner(f)
624 for scanner.Scan() {
625 pattern := scanner.Text()
626 global.matchPatterns = append(global.matchPatterns, pattern)
627
628 }
629 }
630 if len(global.matchPatterns) == 0 {
631 if len(args) == 0 && !(options.PrintConfig || options.WriteConfig ||
632 options.TargetsOnly || options.ListTypes) {
633 errorLogger.Fatalln("No pattern given. Try 'sift --help' for more information.")
634 }
635 if len(args) > 0 && !options.TargetsOnly {
636 global.matchPatterns = append(global.matchPatterns, args[0])
637 args = args[1:len(args)]
638 }
639 }
640
641 if len(args) == 0 {
642 // check whether there is input on STDIN
643 if !terminal.IsTerminal(int(os.Stdin.Fd())) {
644 targets = []string{"-"}
645 } else {
646 targets = []string{"."}
647 }
648 } else {
649 targets = args[0:len(args)]
650 }
651
652 // expand arguments containing patterns on Windows
653 if runtime.GOOS == "windows" {
654 targetsExpanded := []string{}
655 for _, t := range targets {
656 if t == "-" {
657 targetsExpanded = append(targetsExpanded, t)
658 continue
659 }
660 expanded, err := filepath.Glob(t)
661 if err == filepath.ErrBadPattern {
662 errorLogger.Fatalf("cannot parse argument '%s': %s\n", t, err)
663 }
664 if expanded != nil {
665 for _, e := range expanded {
666 targetsExpanded = append(targetsExpanded, e)
667 }
668 }
669 }
670 targets = targetsExpanded
671 }
672
673 if err := options.Apply(global.matchPatterns, targets); err != nil {
674 errorLogger.Fatalf("cannot process options: %s\n", err)
675 }
676
677 global.matchRegexes = make([]*regexp.Regexp, len(global.matchPatterns))
678 for i := range global.matchPatterns {
679 global.matchRegexes[i], err = regexp.Compile(global.matchPatterns[i])
680 if err != nil {
681 errorLogger.Fatalf("cannot parse pattern: %s\n", err)
682 }
683 }
684
685 retVal, err := executeSearch(targets)
686 if err != nil {
687 errorLogger.Println(err)
688 }
689 os.Exit(retVal)
690 }