"Fossies" - the Fresh Open Source Software Archive

Member "cloc-1.86/Unix/cloc" (19 May 2020, 568962 Bytes) of package /linux/privat/cloc-1.86.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. See also the latest Fossies "Diffs" side-by-side code changes report for "cloc": 1.84_vs_1.86.

    1 #!/usr/bin/env perl
    2 # cloc -- Count Lines of Code                  {{{1
    3 # Copyright (C) 2006-2020 Al Danial <al.danial@gmail.com>
    4 # First release August 2006
    5 #
    6 # Includes code from:
    7 #   - SLOCCount v2.26
    8 #     http://www.dwheeler.com/sloccount/
    9 #     by David Wheeler.
   10 #   - Regexp::Common v2013031301
   11 #     http://search.cpan.org/~abigail/Regexp-Common-2013031301/lib/Regexp/Common.pm
   12 #     by Damian Conway and Abigail.
   13 #   - Win32::Autoglob
   14 #     http://search.cpan.org/~sburke/Win32-Autoglob-1.01/Autoglob.pm
   15 #     by Sean M. Burke.
   16 #   - Algorithm::Diff
   17 #     http://search.cpan.org/~tyemq/Algorithm-Diff-1.1902/lib/Algorithm/Diff.pm
   18 #     by Tye McQueen.
   19 #
   20 # This program is free software; you can redistribute it and/or modify
   21 # it under the terms of the GNU General Public License as published by
   22 # the Free Software Foundation; either version 2 of the License, or
   23 # (at your option) any later version.
   24 #
   25 # This program is distributed in the hope that it will be useful,
   26 # but WITHOUT ANY WARRANTY; without even the implied warranty of
   27 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   28 # GNU General Public License for more details:
   29 # <http://www.gnu.org/licenses/gpl.txt>.
   30 #
   31 # 1}}}
   32 my $VERSION = "1.86";  # odd number == beta; even number == stable
   33 my $URL     = "github.com/AlDanial/cloc";  # 'https://' pushes header too wide
   34 require 5.006;
   35 # use modules                                  {{{1
   36 use warnings;
   37 use strict;
   38 
   39 use Getopt::Long;
   40 use File::Basename;
   41 use File::Temp qw { tempfile tempdir };
   42 use File::Find;
   43 use File::Path;
   44 use File::Spec;
   45 use IO::File;
   46 use List::Util qw( min max );
   47 use Cwd;
   48 use POSIX qw { strftime ceil};
   49 
   50 # Digest::MD5 isn't in the standard distribution. Use it only if installed.
   51 my $HAVE_Digest_MD5 = 1;
   52 use Digest::MD5;
   53 ##eval "use Digest::MD5;";
   54 ##if (defined $Digest::MD5::VERSION) {
   55 ##    $HAVE_Digest_MD5 = 1;
   56 ##} else {
   57 ##    warn "Digest::MD5 not installed; will skip file uniqueness checks.\n";
   58 ##}
   59 
   60 # Time::HiRes became standard with Perl 5.8
   61 my $HAVE_Time_HiRes = 1;
   62 use Time::HiRes;
   63 ##eval "use Time::HiRes;";
   64 ##$HAVE_Time_HiRes = 1 if defined $Time::HiRes::VERSION;
   65 
   66 my $HAVE_Rexexp_Common = 1;
   67 use Regexp::Common;
   68 ### Regexp::Common isn't in the standard distribution.  It will
   69 ### be installed in a temp directory if necessary.
   70 ##BEGIN {
   71 ##    if (eval "use Regexp::Common;") {
   72 ##        $HAVE_Rexexp_Common = 1;
   73 ##    } else {
   74 ##        $HAVE_Rexexp_Common = 0;
   75 ##    }
   76 ##}
   77 
   78 my $HAVE_Algorith_Diff = 1;
   79 use Algorithm::Diff qw ( sdiff );
   80 ### Algorithm::Diff isn't in the standard distribution.  It will
   81 ### be installed in a temp directory if necessary.
   82 ##eval "use Algorithm::Diff qw ( sdiff ) ";
   83 ##if (defined $Algorithm::Diff::VERSION) {
   84 ##    $HAVE_Algorith_Diff = 1;
   85 ##} else {
   86 ##    Install_Algorithm_Diff();
   87 ##}
   88 # print "2 HAVE_Algorith_Diff = $HAVE_Algorith_Diff\n";
   89 # test_alg_diff($ARGV[$#ARGV - 1], $ARGV[$#ARGV]); die;
   90 # die "Hre=$HAVE_Rexexp_Common  Had=$HAVE_Algorith_Diff";
   91 
   92 # Uncomment next two lines when building Windows executable with perl2exe
   93 # or if running on a system that already has Regexp::Common.
   94 #use Regexp::Common;
   95 #$HAVE_Rexexp_Common = 1;
   96 
   97 #perl2exe_include "Regexp/Common/whitespace.pm"
   98 #perl2exe_include "Regexp/Common/URI.pm"
   99 #perl2exe_include "Regexp/Common/URI/fax.pm"
  100 #perl2exe_include "Regexp/Common/URI/file.pm"
  101 #perl2exe_include "Regexp/Common/URI/ftp.pm"
  102 #perl2exe_include "Regexp/Common/URI/gopher.pm"
  103 #perl2exe_include "Regexp/Common/URI/http.pm"
  104 #perl2exe_include "Regexp/Common/URI/pop.pm"
  105 #perl2exe_include "Regexp/Common/URI/prospero.pm"
  106 #perl2exe_include "Regexp/Common/URI/news.pm"
  107 #perl2exe_include "Regexp/Common/URI/tel.pm"
  108 #perl2exe_include "Regexp/Common/URI/telnet.pm"
  109 #perl2exe_include "Regexp/Common/URI/tv.pm"
  110 #perl2exe_include "Regexp/Common/URI/wais.pm"
  111 #perl2exe_include "Regexp/Common/CC.pm"
  112 #perl2exe_include "Regexp/Common/SEN.pm"
  113 #perl2exe_include "Regexp/Common/number.pm"
  114 #perl2exe_include "Regexp/Common/delimited.pm"
  115 #perl2exe_include "Regexp/Common/profanity.pm"
  116 #perl2exe_include "Regexp/Common/net.pm"
  117 #perl2exe_include "Regexp/Common/zip.pm"
  118 #perl2exe_include "Regexp/Common/comment.pm"
  119 #perl2exe_include "Regexp/Common/balanced.pm"
  120 #perl2exe_include "Regexp/Common/lingua.pm"
  121 #perl2exe_include "Regexp/Common/list.pm"
  122 #perl2exe_include "File/Glob.pm"
  123 
  124 use Text::Tabs qw { expand };
  125 use Cwd qw { cwd };
  126 use File::Glob;
  127 # 1}}}
  128 # Usage information, options processing.       {{{1
  129 my $ON_WINDOWS = 0;
  130    $ON_WINDOWS = 1 if ($^O =~ /^MSWin/) or ($^O eq "Windows_NT");
  131 if ($ON_WINDOWS and $ENV{'SHELL'}) {
  132     if ($ENV{'SHELL'} =~ m{^/}) {
  133         $ON_WINDOWS = 0;  # make Cygwin look like Unix
  134     } else {
  135         $ON_WINDOWS = 1;  # MKS defines $SHELL but still acts like Windows
  136     }
  137 }
  138 my $config_file = '';
  139 if ( $ENV{'HOME'} ) {
  140     $config_file = File::Spec->catfile( $ENV{'HOME'}, '.config', 'cloc', 'options.txt');
  141 } elsif ( $ENV{'APPDATA'} and $ON_WINDOWS ) {
  142     $config_file = File::Spec->catfile( $ENV{'APPDATA'}, 'cloc');
  143 }
  144 
  145 my $NN     = chr(27) . "[0m";  # normal
  146    $NN     = "" if $ON_WINDOWS or !(-t STDOUT); # -t STDOUT:  is it a terminal?
  147 my $BB     = chr(27) . "[1m";  # bold
  148    $BB     = "" if $ON_WINDOWS or !(-t STDOUT);
  149 my $script = basename $0;
  150 
  151 #  Intended for v1.88:
  152 #  --git-diff-simindex       Git diff strategy #3:  use git's similarity index
  153 #                            (git diff -M --name-status) to identify file pairs
  154 #                            to compare.  This is especially useful to compare
  155 #                            files that were renamed between the commits.
  156 
  157 my $brief_usage  = "
  158                        cloc -- Count Lines of Code
  159 
  160 Usage:
  161     $script [options] <file(s)/dir(s)/git hash(es)>
  162         Count physical lines of source code and comments in the given files
  163         (may be archives such as compressed tarballs or zip files) and/or
  164         recursively below the given directories or git commit hashes.
  165         Example:    cloc src/ include/ main.c
  166 
  167     $script [options] --diff <set1>  <set2>
  168         Compute differences of physical lines of source code and comments
  169         between any pairwise combination of directory names, archive
  170         files or git commit hashes.
  171         Example:    cloc --diff Python-3.5.tar.xz python-3.6/
  172 
  173 $script --help  shows full documentation on the options.
  174 https://$URL has numerous examples and more information.
  175 ";
  176 my $usage  = "
  177 Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <report files>
  178 
  179  Count, or compute differences of, physical lines of source code in the
  180  given files (may be archives such as compressed tarballs or zip files,
  181  or git commit hashes or branch names) and/or recursively below the
  182  given directories.
  183 
  184  ${BB}Input Options${NN}
  185    --extract-with=<cmd>      This option is only needed if cloc is unable
  186                              to figure out how to extract the contents of
  187                              the input file(s) by itself.
  188                              Use <cmd> to extract binary archive files (e.g.:
  189                              .tar.gz, .zip, .Z).  Use the literal '>FILE<' as
  190                              a stand-in for the actual file(s) to be
  191                              extracted.  For example, to count lines of code
  192                              in the input files
  193                                 gcc-4.2.tar.gz  perl-5.8.8.tar.gz
  194                              on Unix use
  195                                --extract-with='gzip -dc >FILE< | tar xf -'
  196                              or, if you have GNU tar,
  197                                --extract-with='tar zxf >FILE<'
  198                              and on Windows use, for example:
  199                                --extract-with=\"\\\"c:\\Program Files\\WinZip\\WinZip32.exe\\\" -e -o >FILE< .\"
  200                              (if WinZip is installed there).
  201    --list-file=<file>        Take the list of file and/or directory names to
  202                              process from <file>, which has one file/directory
  203                              name per line.  Only exact matches are counted;
  204                              relative path names will be resolved starting from
  205                              the directory where cloc is invoked.  Set <file>
  206                              to - to read file names from a STDIN pipe.
  207                              See also --exclude-list-file.
  208    --diff-list-file=<file>   Take the pairs of file names to be diff'ed from
  209                              <file>, whose format matches the output of
  210                              --diff-alignment.  (Run with that option to
  211                              see a sample.)  The language identifier at the
  212                              end of each line is ignored.  This enables --diff
  213                              mode and by-passes file pair alignment logic.
  214    --vcs=<VCS>               Invoke a system call to <VCS> to obtain a list of
  215                              files to work on.  If <VCS> is 'git', then will
  216                              invoke 'git ls-files' to get a file list and
  217                              'git submodule status' to get a list of submodules
  218                              whose contents will be ignored.  See also --git
  219                              which accepts git commit hashes and branch names.
  220                              If <VCS> is 'svn' then will invoke 'svn list -R'.
  221                              The primary benefit is that cloc will then skip
  222                              files explicitly excluded by the versioning tool
  223                              in question, ie, those in .gitignore or have the
  224                              svn:ignore property.
  225                              Alternatively <VCS> may be any system command
  226                              that generates a list of files.
  227                              Note:  cloc must be in a directory which can read
  228                              the files as they are returned by <VCS>.  cloc will
  229                              not download files from remote repositories.
  230                              'svn list -R' may refer to a remote repository
  231                              to obtain file names (and therefore may require
  232                              authentication to the remote repository), but
  233                              the files themselves must be local.
  234                              Setting <VCS> to 'auto' selects between 'git'
  235                              and 'svn' (or neither) depending on the presence
  236                              of a .git or .svn subdirectory below the directory
  237                              where cloc is invoked.
  238    --unicode                 Check binary files to see if they contain Unicode
  239                              expanded ASCII text.  This causes performance to
  240                              drop noticeably.
  241 
  242  ${BB}Processing Options${NN}
  243    --autoconf                Count .in files (as processed by GNU autoconf) of
  244                              recognized languages.  See also --no-autogen.
  245    --by-file                 Report results for every source file encountered.
  246    --by-file-by-lang         Report results for every source file encountered
  247                              in addition to reporting by language.
  248    --config <file>           Read command line switches from <file> instead of
  249                              the default location of $config_file.
  250                              The file should contain one switch, along with
  251                              arguments (if any), per line.  Blank lines and lines
  252                              beginning with '#' are skipped.  Options given on
  253                              the command line take priority over entries read from
  254                              the file.
  255    --count-and-diff <set1> <set2>
  256                              First perform direct code counts of source file(s)
  257                              of <set1> and <set2> separately, then perform a diff
  258                              of these.  Inputs may be pairs of files, directories,
  259                              or archives.  If --out or --report-file is given,
  260                              three output files will be created, one for each
  261                              of the two counts and one for the diff.  See also
  262                              --diff, --diff-alignment, --diff-timeout,
  263                              --ignore-case, --ignore-whitespace.
  264    --diff <set1> <set2>      Compute differences in code and comments between
  265                              source file(s) of <set1> and <set2>.  The inputs
  266                              may be any mix of files, directories, archives,
  267                              or git commit hashes.  Use --diff-alignment to
  268                              generate a list showing which file pairs where
  269                              compared.  When comparing git branches, only files
  270                              which have changed in either commit are compared.
  271                              See also --git, --count-and-diff, --diff-alignment,
  272                              --diff-list-file, --diff-timeout, --ignore-case,
  273                              --ignore-whitespace.
  274    --diff-timeout <N>        Ignore files which take more than <N> seconds
  275                              to process.  Default is 10 seconds.  Setting <N>
  276                              to 0 allows unlimited time.  (Large files with many
  277                              repeated lines can cause Algorithm::Diff::sdiff()
  278                              to take hours.) See also --timeout.
  279    --docstring-as-code       cloc considers docstrings to be comments, but this is
  280                              not always correct as docstrings represent regular
  281                              strings when they appear on the right hand side of an
  282                              assignment or as function arguments.  This switch
  283                              forces docstrings to be counted as code.
  284    --follow-links            [Unix only] Follow symbolic links to directories
  285                              (sym links to files are always followed).
  286                              See also --stat.
  287    --force-lang=<lang>[,<ext>]
  288                              Process all files that have a <ext> extension
  289                              with the counter for language <lang>.  For
  290                              example, to count all .f files with the
  291                              Fortran 90 counter (which expects files to
  292                              end with .f90) instead of the default Fortran 77
  293                              counter, use
  294                                --force-lang=\"Fortran 90\",f
  295                              If <ext> is omitted, every file will be counted
  296                              with the <lang> counter.  This option can be
  297                              specified multiple times (but that is only
  298                              useful when <ext> is given each time).
  299                              See also --script-lang, --lang-no-ext.
  300    --force-lang-def=<file>   Load language processing filters from <file>,
  301                              then use these filters instead of the built-in
  302                              filters.  Note:  languages which map to the same
  303                              file extension (for example:
  304                              MATLAB/Mathematica/Objective C/MUMPS/Mercury;
  305                              Pascal/PHP; Lisp/OpenCL; Lisp/Julia; Perl/Prolog)
  306                              will be ignored as these require additional
  307                              processing that is not expressed in language
  308                              definition files.  Use --read-lang-def to define
  309                              new language filters without replacing built-in
  310                              filters (see also --write-lang-def,
  311                              --write-lang-def-incl-dup).
  312    --git                     Forces the inputs to be interpreted as git targets
  313                              (commit hashes, branch names, et cetera) if these
  314                              are not first identified as file or directory
  315                              names.  This option overrides the --vcs=git logic
  316                              if this is given; in other words, --git gets its
  317                              list of files to work on directly from git using
  318                              the hash or branch name rather than from
  319                              'git ls-files'.  This option can be used with
  320                              --diff to perform line count diffs between git
  321                              commits, or between a git commit and a file,
  322                              directory, or archive.  Use -v/--verbose to see
  323                              the git system commands cloc issues.
  324    --git-diff-rel            Same as --git --diff, or just --diff if the inputs
  325                              are recognized as git targets.  Only files which
  326                              have changed in either commit are compared.
  327    --git-diff-all            Git diff strategy #2:  compare all files in the
  328                              repository between the two commits.
  329    --ignore-whitespace       Ignore horizontal white space when comparing files
  330                              with --diff.  See also --ignore-case.
  331    --ignore-case             Ignore changes in case within file contents;
  332                              consider upper- and lowercase letters equivalent
  333                              when comparing files with --diff.  See also
  334                              --ignore-whitespace.
  335    --ignore-case-ext         Ignore case of file name extensions.  This will
  336                              cause problems counting some languages
  337                              (specifically, .c and .C are associated with C and
  338                              C++; this switch would count .C files as C rather
  339                              than C++ on *nix operating systems).  File name
  340                              case insensitivity is always true on Windows.
  341    --lang-no-ext=<lang>      Count files without extensions using the <lang>
  342                              counter.  This option overrides internal logic
  343                              for files without extensions (where such files
  344                              are checked against known scripting languages
  345                              by examining the first line for #!).  See also
  346                              --force-lang, --script-lang.
  347    --max-file-size=<MB>      Skip files larger than <MB> megabytes when
  348                              traversing directories.  By default, <MB>=100.
  349                              cloc's memory requirement is roughly twenty times
  350                              larger than the largest file so running with
  351                              files larger than 100 MB on a computer with less
  352                              than 2 GB of memory will cause problems.
  353                              Note:  this check does not apply to files
  354                              explicitly passed as command line arguments.
  355    --no-autogen[=list]       Ignore files generated by code-production systems
  356                              such as GNU autoconf.  To see a list of these files
  357                              (then exit), run with --no-autogen list
  358                              See also --autoconf.
  359    --original-dir            [Only effective in combination with
  360                              --strip-comments]  Write the stripped files
  361                              to the same directory as the original files.
  362    --read-binary-files       Process binary files in addition to text files.
  363                              This is usually a bad idea and should only be
  364                              attempted with text files that have embedded
  365                              binary data.
  366    --read-lang-def=<file>    Load new language processing filters from <file>
  367                              and merge them with those already known to cloc.
  368                              If <file> defines a language cloc already knows
  369                              about, cloc's definition will take precedence.
  370                              Use --force-lang-def to over-ride cloc's
  371                              definitions (see also --write-lang-def,
  372                              --write-lang-def-incl-dup).
  373    --script-lang=<lang>,<s>  Process all files that invoke <s> as a #!
  374                              scripting language with the counter for language
  375                              <lang>.  For example, files that begin with
  376                                 #!/usr/local/bin/perl5.8.8
  377                              will be counted with the Perl counter by using
  378                                 --script-lang=Perl,perl5.8.8
  379                              The language name is case insensitive but the
  380                              name of the script language executable, <s>,
  381                              must have the right case.  This option can be
  382                              specified multiple times.  See also --force-lang,
  383                              --lang-no-ext.
  384    --sdir=<dir>              Use <dir> as the scratch directory instead of
  385                              letting File::Temp chose the location.  Files
  386                              written to this location are not removed at
  387                              the end of the run (as they are with File::Temp).
  388    --skip-uniqueness         Skip the file uniqueness check.  This will give
  389                              a performance boost at the expense of counting
  390                              files with identical contents multiple times
  391                              (if such duplicates exist).
  392    --stat                    Some file systems (AFS, CD-ROM, FAT, HPFS, SMB)
  393                              do not have directory 'nlink' counts that match
  394                              the number of its subdirectories.  Consequently
  395                              cloc may undercount or completely skip the
  396                              contents of such file systems.  This switch forces
  397                              File::Find to stat directories to obtain the
  398                              correct count.  File search spead will decrease.
  399                              See also --follow-links.
  400    --stdin-name=<file>       Give a file name to use to determine the language
  401                              for standard input.  (Use - as the input name to
  402                              receive source code via STDIN.)
  403    --strip-comments=<ext>    For each file processed, write to the current
  404                              directory a version of the file which has blank
  405                              and commented lines removed (in-line comments
  406                              persist).  The name of each stripped file is the
  407                              original file name with .<ext> appended to it.
  408                              It is written to the current directory unless
  409                              --original-dir is on.
  410    --strip-str-comments      Replace comment markers embedded in strings with
  411                              'xx'.  This attempts to work around a limitation
  412                              in Regexp::Common::Comment where comment markers
  413                              embedded in strings are seen as actual comment
  414                              markers and not strings, often resulting in a
  415                              'Complex regular subexpression recursion limit'
  416                              warning and incorrect counts.  There are two
  417                              disadvantages to using this switch:  1/code count
  418                              performance drops, and 2/code generated with
  419                              --strip-comments will contain different strings
  420                              where ever embedded comments are found.
  421    --sum-reports             Input arguments are report files previously
  422                              created with the --report-file option in plain
  423                              format (eg. not JSON, YAML, XML, or SQL).
  424                              Makes a cumulative set of results containing the
  425                              sum of data from the individual report files.
  426    --timeout <N>             Ignore files which take more than <N> seconds
  427                              to process at any of the language's filter stages.
  428                              The default maximum number of seconds spent on a
  429                              filter stage is the number of lines in the file
  430                              divided by one thousand.  Setting <N> to 0 allows
  431                              unlimited time.  See also --diff-timeout.
  432    --processes=NUM           [Available only on systems with a recent version
  433                              of the Parallel::ForkManager module.  Not
  434                              available on Windows.] Sets the maximum number of
  435                              cores that cloc uses.  The default value of 0
  436                              disables multiprocessing.
  437    --unix                    Override the operating system autodetection
  438                              logic and run in UNIX mode.  See also
  439                              --windows, --show-os.
  440    --use-sloccount           If SLOCCount is installed, use its compiled
  441                              executables c_count, java_count, pascal_count,
  442                              php_count, and xml_count instead of cloc's
  443                              counters.  SLOCCount's compiled counters are
  444                              substantially faster than cloc's and may give
  445                              a performance improvement when counting projects
  446                              with large files.  However, these cloc-specific
  447                              features will not be available: --diff,
  448                              --count-and-diff, --strip-comments, --unicode.
  449    --windows                 Override the operating system autodetection
  450                              logic and run in Microsoft Windows mode.
  451                              See also --unix, --show-os.
  452 
  453  ${BB}Filter Options${NN}
  454    --exclude-content=<regex> Exclude files containing text that matches the given
  455                              regular expression.
  456    --exclude-dir=<D1>[,D2,]  Exclude the given comma separated directories
  457                              D1, D2, D3, et cetera, from being scanned.  For
  458                              example  --exclude-dir=.cache,test  will skip
  459                              all files and subdirectories that have /.cache/
  460                              or /test/ as their parent directory.
  461                              Directories named .bzr, .cvs, .hg, .git, .svn,
  462                              and .snapshot are always excluded.
  463                              This option only works with individual directory
  464                              names so including file path separators is not
  465                              allowed.  Use --fullpath and --not-match-d=<regex>
  466                              to supply a regex matching multiple subdirectories.
  467    --exclude-ext=<ext1>[,<ext2>[...]]
  468                              Do not count files having the given file name
  469                              extensions.
  470    --exclude-lang=<L1>[,L2[...]]
  471                              Exclude the given comma separated languages
  472                              L1, L2, L3, et cetera, from being counted.
  473    --exclude-list-file=<file>  Ignore files and/or directories whose names
  474                              appear in <file>.  <file> should have one file
  475                              name per line.  Only exact matches are ignored;
  476                              relative path names will be resolved starting from
  477                              the directory where cloc is invoked.
  478                              See also --list-file.
  479    --fullpath                Modifies the behavior of --match-f, --not-match-f,
  480                              and --not-match-d to include the file's path
  481                              in the regex, not just the file's basename.
  482                              (This does not expand each file to include its
  483                              absolute path, instead it uses as much of
  484                              the path as is passed in to cloc.)
  485                              Note:  --match-d always looks at the full
  486                              path and therefore is unaffected by --fullpath.
  487    --include-ext=<ext1>[,ext2[...]]
  488                              Count only languages having the given comma
  489                              separated file extensions.  Use --show-ext to
  490                              see the recognized extensions.
  491    --include-lang=<L1>[,L2[...]]
  492                              Count only the given comma separated languages
  493                              L1, L2, L3, et cetera.  Use --show-lang to see
  494                              the list of recognized languages.
  495    --match-d=<regex>         Only count files in directories matching the Perl
  496                              regex.  For example
  497                                --match-d='/(src|include)/'
  498                              only counts files in directories containing
  499                              /src/ or /include/.  Unlike --not-match-d,
  500                              --match-f, and --not-match-f, --match-d always
  501                              compares the fully qualified path against the
  502                              regex.
  503    --not-match-d=<regex>     Count all files except those in directories
  504                              matching the Perl regex.  Only the trailing
  505                              directory name is compared, for example, when
  506                              counting in /usr/local/lib, only 'lib' is
  507                              compared to the regex.
  508                              Add --fullpath to compare parent directories to
  509                              the regex.
  510                              Do not include file path separators at the
  511                              beginning or end of the regex.
  512    --match-f=<regex>         Only count files whose basenames match the Perl
  513                              regex.  For example
  514                                --match-f='^[Ww]idget'
  515                              only counts files that start with Widget or widget.
  516                              Add --fullpath to include parent directories
  517                              in the regex instead of just the basename.
  518    --not-match-f=<regex>     Count all files except those whose basenames
  519                              match the Perl regex.  Add --fullpath to include
  520                              parent directories in the regex instead of just
  521                              the basename.
  522    --skip-archive=<regex>    Ignore files that end with the given Perl regular
  523                              expression.  For example, if given
  524                                --skip-archive='(zip|tar(\.(gz|Z|bz2|xz|7z))?)'
  525                              the code will skip files that end with .zip,
  526                              .tar, .tar.gz, .tar.Z, .tar.bz2, .tar.xz, and
  527                              .tar.7z.
  528    --skip-win-hidden         On Windows, ignore hidden files.
  529 
  530  ${BB}Debug Options${NN}
  531    --categorized=<file>      Save names of categorized files to <file>.
  532    --counted=<file>          Save names of processed source files to <file>.
  533    --diff-alignment=<file>   Write to <file> a list of files and file pairs
  534                              showing which files were added, removed, and/or
  535                              compared during a run with --diff.  This switch
  536                              forces the --diff mode on.
  537    --explain=<lang>          Print the filters used to remove comments for
  538                              language <lang> and exit.  In some cases the
  539                              filters refer to Perl subroutines rather than
  540                              regular expressions.  An examination of the
  541                              source code may be needed for further explanation.
  542    --help                    Print this usage information and exit.
  543    --found=<file>            Save names of every file found to <file>.
  544    --ignored=<file>          Save names of ignored files and the reason they
  545                              were ignored to <file>.
  546    --print-filter-stages     Print processed source code before and after
  547                              each filter is applied.
  548    --show-ext[=<ext>]        Print information about all known (or just the
  549                              given) file extensions and exit.
  550    --show-lang[=<lang>]      Print information about all known (or just the
  551                              given) languages and exit.
  552    --show-os                 Print the value of the operating system mode
  553                              and exit.  See also --unix, --windows.
  554    -v[=<n>]                  Verbose switch (optional numeric value).
  555    -verbose[=<n>]            Long form of -v.
  556    --version                 Print the version of this program and exit.
  557    --write-lang-def=<file>   Writes to <file> the language processing filters
  558                              then exits.  Useful as a first step to creating
  559                              custom language definitions. Note: languages which
  560                              map to the same file extension will be excluded.
  561                              (See also --force-lang-def, --read-lang-def).
  562    --write-lang-def-incl-dup=<file>
  563                              Same as --write-lang-def, but includes duplicated
  564                              extensions.  This generates a problematic language
  565                              definition file because cloc will refuse to use
  566                              it until duplicates are removed.
  567 
  568  ${BB}Output Options${NN}
  569    --3                       Print third-generation language output.
  570                              (This option can cause report summation to fail
  571                              if some reports were produced with this option
  572                              while others were produced without it.)
  573    --by-percent  X           Instead of comment and blank line counts, show
  574                              these values as percentages based on the value
  575                              of X in the denominator:
  576                                 X = 'c'   -> # lines of code
  577                                 X = 'cm'  -> # lines of code + comments
  578                                 X = 'cb'  -> # lines of code + blanks
  579                                 X = 'cmb' -> # lines of code + comments + blanks
  580                              For example, if using method 'c' and your code
  581                              has twice as many lines of comments as lines
  582                              of code, the value in the comment column will
  583                              be 200%.  The code column remains a line count.
  584    --csv                     Write the results as comma separated values.
  585    --csv-delimiter=<C>       Use the character <C> as the delimiter for comma
  586                              separated files instead of ,.  This switch forces
  587    --file-encoding=<E>       Write output files using the <E> encoding instead of
  588                              the default ASCII (<E> = 'UTF-7').  Examples: 'UTF-16',
  589                              'euc-kr', 'iso-8859-16'.  Known encodings can be
  590                              printed with
  591                                perl -MEncode -e 'print join(\"\\n\", Encode->encodings(\":all\")), \"\\n\"'
  592    --hide-rate               Do not show line and file processing rates in the
  593                              output header. This makes output deterministic.
  594    --json                    Write the results as JavaScript Object Notation
  595                              (JSON) formatted output.
  596    --md                      Write the results as Markdown-formatted text.
  597    --out=<file>              Synonym for --report-file=<file>.
  598    --progress-rate=<n>       Show progress update after every <n> files are
  599                              processed (default <n>=100).  Set <n> to 0 to
  600                              suppress progress output (useful when redirecting
  601                              output to STDOUT).
  602    --quiet                   Suppress all information messages except for
  603                              the final report.
  604    --report-file=<file>      Write the results to <file> instead of STDOUT.
  605    --sql=<file>              Write results as SQL create and insert statements
  606                              which can be read by a database program such as
  607                              SQLite.  If <file> is -, output is sent to STDOUT.
  608    --sql-append              Append SQL insert statements to the file specified
  609                              by --sql and do not generate table creation
  610                              statements.  Only valid with the --sql option.
  611    --sql-project=<name>      Use <name> as the project identifier for the
  612                              current run.  Only valid with the --sql option.
  613    --sql-style=<style>       Write SQL statements in the given style instead
  614                              of the default SQLite format.  Styles include
  615                              'Oracle' and 'Named_Columns'.
  616    --sum-one                 For plain text reports, show the SUM: output line
  617                              even if only one input file is processed.
  618    --xml                     Write the results in XML.
  619    --xsl=<file>              Reference <file> as an XSL stylesheet within
  620                              the XML output.  If <file> is 1 (numeric one),
  621                              writes a default stylesheet, cloc.xsl (or
  622                              cloc-diff.xsl if --diff is also given).
  623                              This switch forces --xml on.
  624    --yaml                    Write the results in YAML.
  625 
  626 ";
  627 #  Help information for options not yet implemented:
  628 #  --inline                  Process comments that appear at the end
  629 #                            of lines containing code.
  630 #  --html                    Create HTML files of each input file showing
  631 #                            comment and code lines in different colors.
  632 
  633 $| = 1;  # flush STDOUT
  634 my $start_time = get_time();
  635 my (
  636     $opt_categorized          ,
  637     $opt_found                ,
  638     @opt_force_lang           ,
  639     $opt_lang_no_ext          ,
  640     @opt_script_lang          ,
  641     $opt_count_diff           ,
  642     $opt_diff                 ,
  643     $opt_diff_alignment       ,
  644     $opt_diff_list_file       ,
  645     $opt_diff_timeout         ,
  646     $opt_timeout              ,
  647     $opt_html                 ,
  648     $opt_ignored              ,
  649     $opt_counted              ,
  650     $opt_show_ext             ,
  651     $opt_show_lang            ,
  652     $opt_progress_rate        ,
  653     $opt_print_filter_stages  ,
  654     $opt_v                    ,
  655     $opt_vcs                  ,
  656     $opt_version              ,
  657     $opt_exclude_content      ,
  658     $opt_exclude_lang         ,
  659     $opt_exclude_list_file    ,
  660     $opt_exclude_dir          ,
  661     $opt_explain              ,
  662     $opt_include_ext          ,
  663     $opt_include_lang         ,
  664     $opt_force_lang_def       ,
  665     $opt_read_lang_def        ,
  666     $opt_write_lang_def       ,
  667     $opt_write_lang_def_incl_dup,
  668     $opt_strip_comments       ,
  669     $opt_original_dir         ,
  670     $opt_quiet                ,
  671     $opt_report_file          ,
  672     $opt_sdir                 ,
  673     $opt_sum_reports          ,
  674     $opt_hide_rate            ,
  675     $opt_processes            ,
  676     $opt_unicode              ,
  677     $opt_no3                  ,   # accept it but don't use it
  678     $opt_3                    ,
  679     $opt_extract_with         ,
  680     $opt_by_file              ,
  681     $opt_by_file_by_lang      ,
  682     $opt_by_percent           ,
  683     $opt_xml                  ,
  684     $opt_xsl                  ,
  685     $opt_yaml                 ,
  686     $opt_csv                  ,
  687     $opt_csv_delimiter        ,
  688     $opt_fullpath             ,
  689     $opt_json                 ,
  690     $opt_md                   ,
  691     $opt_match_f              ,
  692     $opt_not_match_f          ,
  693     $opt_match_d              ,
  694     $opt_not_match_d          ,
  695     $opt_skip_uniqueness      ,
  696     $opt_list_file            ,
  697     $opt_help                 ,
  698     $opt_skip_win_hidden      ,
  699     $opt_read_binary_files    ,
  700     $opt_sql                  ,
  701     $opt_sql_append           ,
  702     $opt_sql_project          ,
  703     $opt_sql_style            ,
  704     $opt_inline               ,
  705     $opt_exclude_ext          ,
  706     $opt_ignore_whitespace    ,
  707     $opt_ignore_case          ,
  708     $opt_ignore_case_ext      ,
  709     $opt_follow_links         ,
  710     $opt_autoconf             ,
  711     $opt_sum_one              ,
  712     $opt_stdin_name           ,
  713     $opt_force_on_windows     ,
  714     $opt_force_on_unix        ,   # actually forces !$ON_WINDOWS
  715     $opt_show_os              ,
  716     $opt_skip_archive         ,
  717     $opt_max_file_size        ,   # in MB
  718     $opt_use_sloccount        ,
  719     $opt_no_autogen           ,
  720     $opt_force_git            ,
  721     $opt_git_diff_rel         ,
  722     $opt_git_diff_all         ,
  723     $opt_git_diff_simindex    ,
  724     $opt_config_file          ,
  725     $opt_strip_str_comments   ,
  726     $opt_file_encoding        ,
  727     $opt_docstring_as_code    ,
  728     $opt_stat                 ,
  729    );
  730 
  731 my $getopt_success = GetOptions(             # {{{1
  732    "by_file|by-file"                         => \$opt_by_file             ,
  733    "by_file_by_lang|by-file-by-lang"         => \$opt_by_file_by_lang     ,
  734    "categorized=s"                           => \$opt_categorized         ,
  735    "counted=s"                               => \$opt_counted             ,
  736    "include_ext|include-ext=s"               => \$opt_include_ext         ,
  737    "include_lang|include-lang=s"             => \$opt_include_lang        ,
  738    "exclude_content|exclude-content=s"       => \$opt_exclude_content     ,
  739    "exclude_lang|exclude-lang=s"             => \$opt_exclude_lang        ,
  740    "exclude_dir|exclude-dir=s"               => \$opt_exclude_dir         ,
  741    "exclude_list_file|exclude-list-file=s"   => \$opt_exclude_list_file   ,
  742    "explain=s"                               => \$opt_explain             ,
  743    "extract_with|extract-with=s"             => \$opt_extract_with        ,
  744    "found=s"                                 => \$opt_found               ,
  745    "count_and_diff|count-and-diff"           => \$opt_count_diff          ,
  746    "diff"                                    => \$opt_diff                ,
  747    "diff-alignment|diff_alignment=s"         => \$opt_diff_alignment      ,
  748    "diff-timeout|diff_timeout=i"             => \$opt_diff_timeout        ,
  749    "diff-list-file|diff_list_file=s"         => \$opt_diff_list_file      ,
  750    "timeout=i"                               => \$opt_timeout             ,
  751    "html"                                    => \$opt_html                ,
  752    "ignored=s"                               => \$opt_ignored             ,
  753    "quiet"                                   => \$opt_quiet               ,
  754    "force_lang_def|force-lang-def=s"         => \$opt_force_lang_def      ,
  755    "read_lang_def|read-lang-def=s"           => \$opt_read_lang_def       ,
  756    "show_ext|show-ext:s"                     => \$opt_show_ext            ,
  757    "show_lang|show-lang:s"                   => \$opt_show_lang           ,
  758    "progress_rate|progress-rate=i"           => \$opt_progress_rate       ,
  759    "print_filter_stages|print-filter-stages" => \$opt_print_filter_stages ,
  760    "report_file|report-file=s"               => \$opt_report_file         ,
  761    "out=s"                                   => \$opt_report_file         ,
  762    "script_lang|script-lang=s"               => \@opt_script_lang         ,
  763    "sdir=s"                                  => \$opt_sdir                ,
  764    "skip_uniqueness|skip-uniqueness"         => \$opt_skip_uniqueness     ,
  765    "strip_comments|strip-comments=s"         => \$opt_strip_comments      ,
  766    "original_dir|original-dir"               => \$opt_original_dir        ,
  767    "sum_reports|sum-reports"                 => \$opt_sum_reports         ,
  768    "hide_rate|hide-rate"                     => \$opt_hide_rate           ,
  769    "processes=n"                             => \$opt_processes           ,
  770    "unicode"                                 => \$opt_unicode             ,
  771    "no3"                                     => \$opt_no3                 ,  # ignored
  772    "3"                                       => \$opt_3                   ,
  773    "v|verbose:i"                             => \$opt_v                   ,
  774    "vcs=s"                                   => \$opt_vcs                 ,
  775    "version"                                 => \$opt_version             ,
  776    "write_lang_def|write-lang-def=s"         => \$opt_write_lang_def      ,
  777    "write_lang_def_incl_dup|write-lang-def-incl-dup=s" => \$opt_write_lang_def_incl_dup,
  778    "xml"                                     => \$opt_xml                 ,
  779    "xsl=s"                                   => \$opt_xsl                 ,
  780    "force_lang|force-lang=s"                 => \@opt_force_lang          ,
  781    "lang_no_ext|lang-no-ext=s"               => \$opt_lang_no_ext         ,
  782    "yaml"                                    => \$opt_yaml                ,
  783    "csv"                                     => \$opt_csv                 ,
  784    "csv_delimeter|csv-delimiter=s"           => \$opt_csv_delimiter       ,
  785    "json"                                    => \$opt_json                ,
  786    "md"                                      => \$opt_md                  ,
  787    "fullpath"                                => \$opt_fullpath            ,
  788    "match_f|match-f=s"                       => \$opt_match_f             ,
  789    "not_match_f|not-match-f=s"               => \$opt_not_match_f         ,
  790    "match_d|match-d=s"                       => \$opt_match_d             ,
  791    "not_match_d|not-match-d=s"               => \$opt_not_match_d         ,
  792    "list_file|list-file=s"                   => \$opt_list_file           ,
  793    "help"                                    => \$opt_help                ,
  794    "skip_win_hidden|skip-win-hidden"         => \$opt_skip_win_hidden     ,
  795    "read_binary_files|read-binary-files"     => \$opt_read_binary_files   ,
  796    "sql=s"                                   => \$opt_sql                 ,
  797    "sql_project|sql-project=s"               => \$opt_sql_project         ,
  798    "sql_append|sql-append"                   => \$opt_sql_append          ,
  799    "sql_style|sql-style=s"                   => \$opt_sql_style           ,
  800    "inline"                                  => \$opt_inline              ,
  801    "exclude_ext|exclude-ext=s"               => \$opt_exclude_ext         ,
  802    "ignore_whitespace|ignore-whitespace"     => \$opt_ignore_whitespace   ,
  803    "ignore_case|ignore-case"                 => \$opt_ignore_case         ,
  804    "ignore_case_ext|ignore-case-ext"         => \$opt_ignore_case_ext     ,
  805    "follow_links|follow-links"               => \$opt_follow_links        ,
  806    "autoconf"                                => \$opt_autoconf            ,
  807    "sum_one|sum-one"                         => \$opt_sum_one             ,
  808    "by_percent|by-percent=s"                 => \$opt_by_percent          ,
  809    "stdin_name|stdin-name=s"                 => \$opt_stdin_name          ,
  810    "windows"                                 => \$opt_force_on_windows    ,
  811    "unix"                                    => \$opt_force_on_unix       ,
  812    "show_os|show-os"                         => \$opt_show_os             ,
  813    "skip_archive|skip-archive=s"             => \$opt_skip_archive        ,
  814    "max_file_size|max-file-size=i"           => \$opt_max_file_size       ,
  815    "use_sloccount|use-sloccount"             => \$opt_use_sloccount       ,
  816    "no_autogen|no-autogen"                   => \$opt_no_autogen          ,
  817    "git"                                     => \$opt_force_git           ,
  818    "git_diff_rel|git-diff-rel"               => \$opt_git_diff_rel        ,
  819    "git_diff_all|git-diff-all"               => \$opt_git_diff_all        ,
  820 #  "git_diff_simindex|git-diff-simindex"     => \$opt_git_diff_simindex   ,
  821    "config=s"                                => \$opt_config_file         ,
  822    "strip_str_comments|strip-str-comments"   => \$opt_strip_str_comments  ,
  823    "file_encoding|file-encoding=s"           => \$opt_file_encoding       ,
  824    "docstring_as_code|docstring-as-code"     => \$opt_docstring_as_code   ,
  825    "stat"                                    => \$opt_stat                ,
  826   );
  827 # 1}}}
  828 $config_file = $opt_config_file if defined $opt_config_file;
  829 load_from_config_file($config_file,          # {{{2
  830                                                 \$opt_by_file             ,
  831                                                 \$opt_by_file_by_lang     ,
  832                                                 \$opt_categorized         ,
  833                                                 \$opt_counted             ,
  834                                                 \$opt_include_ext         ,
  835                                                 \$opt_include_lang        ,
  836                                                 \$opt_exclude_content     ,
  837                                                 \$opt_exclude_lang        ,
  838                                                 \$opt_exclude_dir         ,
  839                                                 \$opt_exclude_list_file   ,
  840                                                 \$opt_explain             ,
  841                                                 \$opt_extract_with        ,
  842                                                 \$opt_found               ,
  843                                                 \$opt_count_diff          ,
  844                                                 \$opt_diff                ,
  845                                                 \$opt_diff_alignment      ,
  846                                                 \$opt_diff_timeout        ,
  847                                                 \$opt_timeout             ,
  848                                                 \$opt_html                ,
  849                                                 \$opt_ignored             ,
  850                                                 \$opt_quiet               ,
  851                                                 \$opt_force_lang_def      ,
  852                                                 \$opt_read_lang_def       ,
  853                                                 \$opt_show_ext            ,
  854                                                 \$opt_show_lang           ,
  855                                                 \$opt_progress_rate       ,
  856                                                 \$opt_print_filter_stages ,
  857                                                 \$opt_report_file         ,
  858                                                 \@opt_script_lang         ,
  859                                                 \$opt_sdir                ,
  860                                                 \$opt_skip_uniqueness     ,
  861                                                 \$opt_strip_comments      ,
  862                                                 \$opt_original_dir        ,
  863                                                 \$opt_sum_reports         ,
  864                                                 \$opt_hide_rate           ,
  865                                                 \$opt_processes           ,
  866                                                 \$opt_unicode             ,
  867                                                 \$opt_3                   ,
  868                                                 \$opt_v                   ,
  869                                                 \$opt_vcs                 ,
  870                                                 \$opt_version             ,
  871                                                 \$opt_write_lang_def      ,
  872                                                 \$opt_write_lang_def_incl_dup,
  873                                                 \$opt_xml                 ,
  874                                                 \$opt_xsl                 ,
  875                                                 \@opt_force_lang          ,
  876                                                 \$opt_lang_no_ext         ,
  877                                                 \$opt_yaml                ,
  878                                                 \$opt_csv                 ,
  879                                                 \$opt_csv_delimiter       ,
  880                                                 \$opt_json                ,
  881                                                 \$opt_md                  ,
  882                                                 \$opt_fullpath            ,
  883                                                 \$opt_match_f             ,
  884                                                 \$opt_not_match_f         ,
  885                                                 \$opt_match_d             ,
  886                                                 \$opt_not_match_d         ,
  887                                                 \$opt_list_file           ,
  888                                                 \$opt_help                ,
  889                                                 \$opt_skip_win_hidden     ,
  890                                                 \$opt_read_binary_files   ,
  891                                                 \$opt_sql                 ,
  892                                                 \$opt_sql_project         ,
  893                                                 \$opt_sql_append          ,
  894                                                 \$opt_sql_style           ,
  895                                                 \$opt_inline              ,
  896                                                 \$opt_exclude_ext         ,
  897                                                 \$opt_ignore_whitespace   ,
  898                                                 \$opt_ignore_case         ,
  899                                                 \$opt_ignore_case_ext     ,
  900                                                 \$opt_follow_links        ,
  901                                                 \$opt_autoconf            ,
  902                                                 \$opt_sum_one             ,
  903                                                 \$opt_by_percent          ,
  904                                                 \$opt_stdin_name          ,
  905                                                 \$opt_force_on_windows    ,
  906                                                 \$opt_force_on_unix       ,
  907                                                 \$opt_show_os             ,
  908                                                 \$opt_skip_archive        ,
  909                                                 \$opt_max_file_size       ,
  910                                                 \$opt_use_sloccount       ,
  911                                                 \$opt_no_autogen          ,
  912                                                 \$opt_force_git           ,
  913                                                 \$opt_strip_str_comments  ,
  914                                                 \$opt_file_encoding       ,
  915                                                 \$opt_docstring_as_code   ,
  916                                                 \$opt_stat                ,
  917 );  # 2}}} Not pretty.  Not at all.
  918 my $HAVE_Parallel_ForkManager = undef;
  919 if ($opt_processes and $opt_processes > 1) {
  920     # Parallel::ForkManager isn't in the standard distribution.
  921     # Use it only if installed, and only if --processes=N is given.
  922     # The module load is slow so only use it if called for.
  923     use Parallel::ForkManager;
  924     $HAVE_Parallel_ForkManager = 1;
  925 } else {
  926     $HAVE_Parallel_ForkManager = 0;
  927 }
  928 if ($opt_version) {
  929     printf "$VERSION\n";
  930     exit;
  931 }
  932 $opt_by_file  = 1 if defined  $opt_by_file_by_lang;
  933 my $CLOC_XSL = "cloc.xsl"; # created with --xsl
  934    $CLOC_XSL = "cloc-diff.xsl" if $opt_diff;
  935 die "\n" unless $getopt_success;
  936 print $usage and exit if $opt_help;
  937 my %Exclude_Language = ();
  938    %Exclude_Language = map { $_ => 1 } split(/,/, $opt_exclude_lang)
  939         if $opt_exclude_lang;
  940 my %Exclude_Dir      = ();
  941    %Exclude_Dir      = map { $_ => 1 } split(/,/, $opt_exclude_dir )
  942         if $opt_exclude_dir ;
  943 die unless exclude_dir_validates(\%Exclude_Dir);
  944 my %Include_Ext = ();
  945    %Include_Ext = map { $_ => 1 } split(/,/, $opt_include_ext)
  946         if $opt_include_ext;
  947 my %Include_Language = ();
  948    %Include_Language = map { $_ => 1 } split(/,/, $opt_include_lang)
  949         if $opt_include_lang;
  950 # Forcibly exclude .svn, .cvs, .hg, .git, .bzr directories.  The contents of these
  951 # directories often conflict with files of interest.
  952 $opt_exclude_dir       = 1;
  953 $Exclude_Dir{".svn"}   = 1;
  954 $Exclude_Dir{".cvs"}   = 1;
  955 $Exclude_Dir{".hg"}    = 1;
  956 $Exclude_Dir{".git"}   = 1;
  957 $Exclude_Dir{".bzr"}   = 1;
  958 $Exclude_Dir{".snapshot"} = 1;  # NetApp backups
  959 $Exclude_Dir{".config"} = 1;
  960 $opt_count_diff        = defined $opt_count_diff ? 1 : 0;
  961 $opt_diff              = 1  if $opt_diff_alignment    or
  962                                $opt_diff_list_file    or
  963                                $opt_git_diff_rel      or
  964                                $opt_git_diff_all      or
  965                                $opt_git_diff_simindex;
  966 $opt_force_git         = 1  if $opt_git_diff_rel      or
  967                                $opt_git_diff_all      or
  968                                $opt_git_diff_simindex;
  969 $opt_diff_alignment    = 0  if $opt_diff_list_file;
  970 $opt_exclude_ext       = "" unless $opt_exclude_ext;
  971 $opt_ignore_whitespace = 0  unless $opt_ignore_whitespace;
  972 $opt_ignore_case       = 0  unless $opt_ignore_case;
  973 $opt_ignore_case_ext   = 0  unless $opt_ignore_case_ext;
  974 $opt_lang_no_ext       = 0  unless $opt_lang_no_ext;
  975 $opt_follow_links      = 0  unless $opt_follow_links;
  976 if (defined $opt_diff_timeout) {
  977     # if defined but with a value of <= 0, set to 2^31 seconds = 68 years
  978     $opt_diff_timeout = 2**31 unless $opt_diff_timeout > 0;
  979 } else {
  980     $opt_diff_timeout  =10; # seconds
  981 }
  982 if (defined $opt_timeout) {
  983     # if defined but with a value of <= 0, set to 2^31 seconds = 68 years
  984     $opt_timeout = 2**31 unless $opt_timeout > 0;
  985     # else is computed dynamically, ref $max_duration_sec
  986 }
  987 $opt_csv               = 1  if $opt_csv_delimiter;
  988 $ON_WINDOWS            = 1  if $opt_force_on_windows;
  989 $ON_WINDOWS            = 0  if $opt_force_on_unix;
  990 $opt_max_file_size     = 100 unless $opt_max_file_size;
  991 my $HAVE_SLOCCOUNT_c_count = 0;
  992 if (!$ON_WINDOWS and $opt_use_sloccount) {
  993     # Only bother doing this kludgey test is user explicitly wants
  994     # to use SLOCCount.  Debian based systems will hang if just doing
  995     #  external_utility_exists("c_count")
  996     # if c_count is in $PATH; c_count expects to have input.
  997     $HAVE_SLOCCOUNT_c_count = external_utility_exists("c_count /bin/sh");
  998 }
  999 if ($opt_use_sloccount) {
 1000     if (!$HAVE_SLOCCOUNT_c_count) {
 1001         warn "c_count could not be found; ignoring --use-sloccount\n";
 1002         $opt_use_sloccount = 0;
 1003     } else {
 1004         warn "Using c_count, php_count, xml_count, pascal_count from SLOCCount\n";
 1005         warn "--diff is disabled with --use-sloccount\n" if $opt_diff;
 1006         warn "--count-and-diff is disabled with --use-sloccount\n" if $opt_count_diff;
 1007         warn "--unicode is disabled with --use-sloccount\n" if $opt_unicode;
 1008         warn "--strip-comments is disabled with --use-sloccount\n" if $opt_strip_comments;
 1009         $opt_diff           = 0;
 1010         $opt_count_diff     = undef;
 1011         $opt_unicode        = 0;
 1012         $opt_strip_comments = 0;
 1013     }
 1014 }
 1015 $opt_vcs = 0 if $opt_force_git;
 1016 
 1017 my @COUNT_DIFF_ARGV        = undef;
 1018 my $COUNT_DIFF_report_file = undef;
 1019 if ($opt_count_diff and !$opt_diff_list_file) {
 1020     die "--count-and-diff requires two arguments; got ", scalar @ARGV, "\n"
 1021         if scalar @ARGV != 2;
 1022     # prefix with a dummy term so that $opt_count_diff is the
 1023     # index into @COUNT_DIFF_ARGV to work on at each pass
 1024     @COUNT_DIFF_ARGV = (undef, $ARGV[0],
 1025                                $ARGV[1],
 1026                               [$ARGV[0], $ARGV[1]]);  # 3rd pass: diff them
 1027     $COUNT_DIFF_report_file = $opt_report_file if $opt_report_file;
 1028 }
 1029 
 1030 # Options defaults:
 1031 $opt_quiet         =   1 if ($opt_md or $opt_json) and !defined $opt_report_file;
 1032 $opt_progress_rate = 100 unless defined $opt_progress_rate;
 1033 $opt_progress_rate =   0 if     defined $opt_quiet;
 1034 if (!defined $opt_v) {
 1035     $opt_v  = 0;
 1036 } elsif (!$opt_v) {
 1037     $opt_v  = 1;
 1038 }
 1039 if (defined $opt_xsl) {
 1040     $opt_xsl = $CLOC_XSL if $opt_xsl eq "1";
 1041     $opt_xml = 1;
 1042 }
 1043 my $skip_generate_report = 0;
 1044 $opt_sql_style = 0 unless defined $opt_sql_style;
 1045 $opt_sql = 0 unless $opt_sql_style or defined $opt_sql;
 1046 if ($opt_sql eq "-" || $opt_sql eq "1") { # stream SQL output to STDOUT
 1047     $opt_quiet            = 1;
 1048     $skip_generate_report = 1;
 1049     $opt_by_file          = 1;
 1050     $opt_sum_reports      = 0;
 1051     $opt_progress_rate    = 0;
 1052 } elsif ($opt_sql)  { # write SQL output to a file
 1053     $opt_by_file          = 1;
 1054     $skip_generate_report = 1;
 1055     $opt_sum_reports      = 0;
 1056 }
 1057 if ($opt_sql_style) {
 1058     $opt_sql_style = lc $opt_sql_style;
 1059     if (!grep { lc $_ eq $opt_sql_style } qw ( Oracle Named_Columns )) {
 1060         die "'$opt_sql_style' is not a recognized SQL style.\n";
 1061     }
 1062 }
 1063 $opt_by_percent = '' unless defined $opt_by_percent;
 1064 if ($opt_by_percent and $opt_by_percent !~ m/^(c|cm|cb|cmb)$/i) {
 1065     die "--by-percent must be either 'c', 'cm', 'cb', or 'cmb'\n";
 1066 }
 1067 $opt_by_percent = lc $opt_by_percent;
 1068 
 1069 if (defined $opt_vcs) {
 1070     if ($opt_vcs eq "auto") {
 1071         if      (-d ".git") {
 1072             $opt_vcs = "git";
 1073         } elsif (-d ".svn") {
 1074             $opt_vcs = "svn";
 1075         } else {
 1076             warn "--vcs auto:  unable to determine versioning system\n";
 1077         }
 1078     }
 1079     if      ($opt_vcs eq "git") {
 1080         $opt_vcs = "git ls-files";
 1081         my @submodules = invoke_generator('git submodule status');
 1082         foreach my $SM (@submodules) {
 1083             $SM =~ s/^\s+//;        # may have leading space
 1084             $SM =~ s/\(\S+\)\s*$//; # may end with something like (heads/master)
 1085             my ($checksum, $dir) = split(' ', $SM, 2);
 1086             $dir =~ s/\s+$//;
 1087             $Exclude_Dir{$dir} = 1;
 1088         }
 1089     } elsif ($opt_vcs eq "svn") {
 1090         $opt_vcs = "svn list -R";
 1091     }
 1092 }
 1093 
 1094 my $list_no_autogen = 0;
 1095 if (defined $opt_no_autogen and scalar @ARGV == 1 and $ARGV[0] eq "list") {
 1096     $list_no_autogen = 1;
 1097 }
 1098 
 1099 die $brief_usage unless defined $opt_version         or
 1100                         defined $opt_show_lang       or
 1101                         defined $opt_show_ext        or
 1102                         defined $opt_show_os         or
 1103                         defined $opt_write_lang_def  or
 1104                         defined $opt_write_lang_def_incl_dup  or
 1105                         defined $opt_list_file       or
 1106                         defined $opt_diff_list_file  or
 1107                         defined $opt_vcs             or
 1108                         defined $opt_xsl             or
 1109                         defined $opt_explain         or
 1110                         $list_no_autogen             or
 1111                         scalar @ARGV >= 1;
 1112 if (!$opt_diff_list_file) {
 1113     die "--diff requires two arguments; got ", scalar @ARGV, "\n"
 1114         if $opt_diff and !$opt_sum_reports and scalar @ARGV != 2;
 1115     die "--diff arguments are identical; nothing done", "\n"
 1116         if $opt_diff and !$opt_sum_reports and scalar @ARGV == 2
 1117                                            and $ARGV[0] eq $ARGV[1];
 1118 }
 1119 trick_pp_packer_encode() if $ON_WINDOWS and $opt_file_encoding;
 1120 $File::Find::dont_use_nlink = 1 if $opt_stat or top_level_SMB_dir(\@ARGV);
 1121 my @git_similarity = (); # only populated with --git-diff-simindex
 1122 replace_git_hash_with_tarfile(\@ARGV, \@git_similarity);
 1123 # 1}}}
 1124 # Step 1:  Initialize global constants.        {{{1
 1125 #
 1126 my $nFiles_Found = 0;  # updated in make_file_list
 1127 my (%Language_by_Extension, %Language_by_Script,
 1128     %Filters_by_Language, %Not_Code_Extension, %Not_Code_Filename,
 1129     %Language_by_File, %Scale_Factor, %Known_Binary_Archives,
 1130     %EOL_Continuation_re,
 1131    );
 1132 my $ALREADY_SHOWED_HEADER = 0;
 1133 my $ALREADY_SHOWED_XML_SECTION = 0;
 1134 my %Error_Codes = ( 'Unable to read'                => -1,
 1135                     'Neither file nor directory'    => -2,
 1136                     'Diff error (quoted comments?)' => -3,
 1137                     'Diff error, exceeded timeout'  => -4,
 1138                     'Line count, exceeded timeout'  => -5,
 1139                   );
 1140 my %Extension_Collision = (
 1141     'ADSO/IDSM'                                     => [ 'adso' ] ,
 1142     'C#/Smalltalk'                                  => [ 'cs'   ] ,
 1143     'D/dtrace'                                      => [ 'd'    ] ,
 1144     'F#/Forth'                                      => [ 'fs'   ] ,
 1145     'Fortran 77/Forth'                              => [ 'f', 'for' ] ,
 1146     'IDL/Qt Project/Prolog/ProGuard'                => [ 'pro'  ] ,
 1147     'Lisp/Julia'                                    => [ 'jl'   ] ,
 1148     'Lisp/OpenCL'                                   => [ 'cl'   ] ,
 1149     'MATLAB/Mathematica/Objective C/MUMPS/Mercury'  => [ 'm'    ] ,
 1150     'Pascal/Puppet'                                 => [ 'pp'   ] ,
 1151     'Perl/Prolog'                                   => [ 'pl', 'PL'  ] ,
 1152     'PHP/Pascal'                                    => [ 'inc'  ] ,
 1153     'Raku/Prolog'                                   => [ 'p6', 'P6'  ] ,
 1154     'Qt/Glade'                                      => [ 'ui'   ] ,
 1155     'TypeScript/Qt Linguist'                        => [ 'ts'   ] ,
 1156     'Verilog-SystemVerilog/Coq'                     => [ 'v'    ] ,
 1157     'Visual Basic/TeX/Apex Class'                   => [ 'cls'  ] ,
 1158     'Scheme/SaltStack'                              => [ 'sls'  ] ,
 1159 );
 1160 my @Autogen_to_ignore = no_autogen_files($list_no_autogen);
 1161 if ($opt_force_lang_def) {
 1162     # replace cloc's definitions
 1163     read_lang_def(
 1164         $opt_force_lang_def    , #        Sample values:
 1165         \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77'
 1166         \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
 1167         \%Language_by_File     , # Language_by_File{makefile}  = 'make'
 1168         \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] =
 1169                                  #      [ 'remove_matches' , '^\s*#'  ]
 1170         \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
 1171         \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
 1172         \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
 1173         \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
 1174         );
 1175 } else {
 1176     set_constants(               #
 1177         \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77'
 1178         \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
 1179         \%Language_by_File     , # Language_by_File{makefile}  = 'make'
 1180         \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] =
 1181                                  #      [ 'remove_matches' , '^\s*#'  ]
 1182         \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
 1183         \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
 1184         \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
 1185         \%Known_Binary_Archives, # Known_Binary_Archives{.tar} = 1
 1186         \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
 1187         );
 1188         if ($opt_no_autogen) {
 1189             foreach my $F (@Autogen_to_ignore) { $Not_Code_Filename{ $F } = 1; }
 1190         }
 1191 }
 1192 if ($opt_read_lang_def) {
 1193     # augment cloc's definitions (keep cloc's where there are overlaps)
 1194     merge_lang_def(
 1195         $opt_read_lang_def     , #        Sample values:
 1196         \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77'
 1197         \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
 1198         \%Language_by_File     , # Language_by_File{makefile}  = 'make'
 1199         \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] =
 1200                                  #      [ 'remove_matches' , '^\s*#'  ]
 1201         \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
 1202         \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
 1203         \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
 1204         \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
 1205         );
 1206 }
 1207 if ($opt_lang_no_ext and !defined $Filters_by_Language{$opt_lang_no_ext}) {
 1208     die_unknown_lang($opt_lang_no_ext, "--lang-no-ext")
 1209 }
 1210 check_scale_existence(\%Filters_by_Language, \%Language_by_Extension,
 1211                       \%Scale_Factor);
 1212 
 1213 my $nCounted = 0;
 1214 
 1215 # Process command line provided extension-to-language mapping overrides.
 1216 # Make a hash of known languages in lower case for easier matching.
 1217 my %Recognized_Language_lc = (); # key = language name in lc, value = true name
 1218 foreach my $language (keys %Filters_by_Language) {
 1219     my $lang_lc = lc $language;
 1220     $Recognized_Language_lc{$lang_lc} = $language;
 1221 }
 1222 my %Forced_Extension = (); # file name extensions which user wants to count
 1223 my $All_One_Language = 0;  # set to !0 if --force-lang's <ext> is missing
 1224 foreach my $pair (@opt_force_lang) {
 1225     my ($lang, $extension) = split(',', $pair);
 1226     my $lang_lc = lc $lang;
 1227     if (defined $extension) {
 1228         $Forced_Extension{$extension} = $lang;
 1229 
 1230         die_unknown_lang($lang, "--force-lang")
 1231             unless $Recognized_Language_lc{$lang_lc};
 1232 
 1233         $Language_by_Extension{$extension} = $Recognized_Language_lc{$lang_lc};
 1234     } else {
 1235         # the scary case--count everything as this language
 1236         $All_One_Language = $Recognized_Language_lc{$lang_lc};
 1237     }
 1238 }
 1239 
 1240 foreach my $pair (@opt_script_lang) {
 1241     my ($lang, $script_name) = split(',', $pair);
 1242     my $lang_lc = lc $lang;
 1243     if (!defined $script_name) {
 1244         die "The --script-lang option requires a comma separated pair of ".
 1245             "strings.\n";
 1246     }
 1247 
 1248     die_unknown_lang($lang, "--script-lang")
 1249         unless $Recognized_Language_lc{$lang_lc};
 1250 
 1251     $Language_by_Script{$script_name} = $Recognized_Language_lc{$lang_lc};
 1252 }
 1253 
 1254 # If user provided file extensions to ignore, add these to
 1255 # the exclusion list.
 1256 foreach my $ext (map { $_ => 1 } split(/,/, $opt_exclude_ext ) ) {
 1257     $ext = lc $ext if $ON_WINDOWS or $opt_ignore_case_ext;
 1258     $Not_Code_Extension{$ext} = 1;
 1259 }
 1260 
 1261 # If SQL or --by-file output is requested, keep track of directory names
 1262 # generated by File::Temp::tempdir and used to temporarily hold the results
 1263 # of compressed archives.  Contents of the SQL table 't' will be much
 1264 # cleaner if these meaningless directory names are stripped from the front
 1265 # of files pulled from the archives.
 1266 my %TEMP_DIR = ();
 1267 my $TEMP_OFF =  0;  # Needed for --sdir; keep track of the number of
 1268                     # scratch directories made in this run to avoid
 1269                     # file overwrites by multiple extractions to same
 1270                     # sdir.
 1271 # Also track locations where temporary installations, if necessary, of
 1272 # Algorithm::Diff and/or Regexp::Common are done.  Make sure these
 1273 # directories are not counted as inputs (ref bug #80 2012-11-23).
 1274 my %TEMP_INST = ();
 1275 
 1276 # invert %Language_by_Script hash to get an easy-to-look-up list of known
 1277 # scripting languages
 1278 my %Script_Language = map { $_ => 1 } values %Language_by_Script ;
 1279 # 1}}}
 1280 # Step 2:  Early exits for display, summation. {{{1
 1281 #
 1282 print_extension_info(   $opt_show_ext     ) if defined $opt_show_ext ;
 1283 print_language_info(    $opt_show_lang, '') if defined $opt_show_lang;
 1284 print_language_filters( $opt_explain      ) if defined $opt_explain  ;
 1285 exit if (defined $opt_show_ext)  or
 1286         (defined $opt_show_lang) or
 1287         (defined $opt_explain)   or
 1288         $list_no_autogen;
 1289 
 1290 Top_of_Processing_Loop:
 1291 # Sorry, coding purists.  Using a goto to implement --count-and-diff
 1292 # which has to do three passes over the main code, starting with
 1293 # a clean slate each time.
 1294 if ($opt_count_diff) {
 1295     @ARGV = ( $COUNT_DIFF_ARGV[ $opt_count_diff ] );
 1296     if ($opt_count_diff == 3) {
 1297         $opt_diff = 1;
 1298         @ARGV = @{$COUNT_DIFF_ARGV[ $opt_count_diff ]}; # last arg is list of list
 1299     }
 1300     if ($opt_report_file) {
 1301         # Instead of just one output file, will have three.
 1302         # Keep their names unique otherwise results are clobbered.
 1303         # Replace file path separators with underscores otherwise
 1304         # may end up with illegal file names.
 1305         my ($fn_0, $fn_1) = (undef, undef);
 1306         if ($ON_WINDOWS) {
 1307             ($fn_0 = $ARGV[0]) =~ s{\\}{_}g;
 1308              $fn_0 =~ s{::}{_}g;
 1309             ($fn_1 = $ARGV[1]) =~ s{\\}{_}g if defined $ARGV[1];
 1310              $fn_1 =~ s{::}{_}g             if defined $ARGV[1];
 1311         } else {
 1312             ($fn_0 = $ARGV[0]) =~ s{/}{_}g;
 1313             ($fn_1 = $ARGV[1]) =~ s{/}{_}g  if defined $ARGV[1];
 1314         }
 1315 
 1316         if ($opt_count_diff == 3) {
 1317             $opt_report_file = $COUNT_DIFF_report_file . ".diff.$fn_0.$fn_1";
 1318         } else {
 1319             $opt_report_file = $COUNT_DIFF_report_file . "." .  $fn_0;
 1320         }
 1321     } else {
 1322         # STDOUT; print a header showing what it's working on
 1323         if ($opt_count_diff == 3) {
 1324             print "\ndiff $ARGV[0] $ARGV[1]::\n";
 1325         } else {
 1326             print "\n" if $opt_count_diff > 1;
 1327             print "$ARGV[0]::\n";
 1328         }
 1329     }
 1330     $ALREADY_SHOWED_HEADER      = 0;
 1331     $ALREADY_SHOWED_XML_SECTION = 0;
 1332 }
 1333 
 1334 #print "Before glob have [", join(",", @ARGV), "]\n";
 1335 @ARGV = windows_glob(@ARGV) if $ON_WINDOWS;
 1336 #print "after  glob have [", join(",", @ARGV), "]\n";
 1337 
 1338 # filter out archive files if requested to do so
 1339 if (defined $opt_skip_archive) {
 1340     my @non_archive = ();
 1341     foreach my $candidate (@ARGV) {
 1342         if ($candidate !~ m/${opt_skip_archive}$/) {
 1343             push @non_archive, $candidate;
 1344 
 1345         }
 1346     }
 1347     @ARGV = @non_archive;
 1348 }
 1349 
 1350 if ($opt_sum_reports and $opt_diff) {
 1351     my @results = ();
 1352     if ($opt_list_file) { # read inputs from the list file
 1353         my @list = read_list_file($opt_list_file);
 1354         @results = combine_diffs(\@list);
 1355     } elsif ($opt_vcs) { # read inputs from the VCS generator
 1356         my @list = invoke_generator($opt_vcs, \@ARGV);
 1357         @results = combine_diffs(\@list);
 1358     } else { # get inputs from the command line
 1359         @results = combine_diffs(\@ARGV);
 1360     }
 1361     if ($opt_report_file) {
 1362         write_file($opt_report_file, {}, @results);
 1363     } else {
 1364         print "\n", join("\n", @results), "\n";
 1365     }
 1366     exit;
 1367 }
 1368 if ($opt_sum_reports) {
 1369     my %Results = ();
 1370     foreach my $type( "by language", "by report file" ) {
 1371         my $found_lang = undef;
 1372         if ($opt_list_file or $opt_vcs) {
 1373             # read inputs from the list file
 1374             my @list;
 1375             if ($opt_vcs) {
 1376                 @list = invoke_generator($opt_vcs, \@ARGV);
 1377             } else {
 1378                 @list = read_list_file($opt_list_file);
 1379             }
 1380             $found_lang = combine_results(\@list,
 1381                                            $type,
 1382                                           \%{$Results{ $type }},
 1383                                           \%Filters_by_Language );
 1384         } else { # get inputs from the command line
 1385             $found_lang = combine_results(\@ARGV,
 1386                                            $type,
 1387                                           \%{$Results{ $type }},
 1388                                           \%Filters_by_Language );
 1389         }
 1390         next unless %Results;
 1391         my $end_time = get_time();
 1392         my @results  = generate_report($VERSION, $end_time - $start_time,
 1393                                        $type,
 1394                                       \%{$Results{ $type }}, \%Scale_Factor);
 1395         if ($opt_report_file) {
 1396             my $ext  = ".lang";
 1397                $ext  = ".file" unless $type eq "by language";
 1398             next if !$found_lang and  $ext  eq ".lang";
 1399             write_file($opt_report_file . $ext, {}, @results);
 1400         } else {
 1401             print "\n", join("\n", @results), "\n";
 1402         }
 1403     }
 1404     exit;
 1405 }
 1406 if ($opt_write_lang_def or $opt_write_lang_def_incl_dup) {
 1407     my $file = $opt_write_lang_def          if $opt_write_lang_def;
 1408        $file = $opt_write_lang_def_incl_dup if $opt_write_lang_def_incl_dup;
 1409     write_lang_def($file                 ,
 1410                   \%Language_by_Extension,
 1411                   \%Language_by_Script   ,
 1412                   \%Language_by_File     ,
 1413                   \%Filters_by_Language  ,
 1414                   \%Not_Code_Extension   ,
 1415                   \%Not_Code_Filename    ,
 1416                   \%Scale_Factor         ,
 1417                   \%EOL_Continuation_re  ,
 1418                   );
 1419     exit;
 1420 }
 1421 if ($opt_show_os) {
 1422     if ($ON_WINDOWS) {
 1423         print "Windows\n";
 1424     } else {
 1425         print "UNIX\n";
 1426     }
 1427     exit;
 1428 }
 1429 
 1430 my $max_processes = get_max_processes();
 1431 
 1432 # 1}}}
 1433 # Step 3:  Create a list of files to consider. {{{1
 1434 #  a) If inputs are binary archives, first cd to a temp
 1435 #     directory, expand the archive with the user-given
 1436 #     extraction tool, then add the temp directory to
 1437 #     the list of dirs to process.
 1438 #  b) Create a list of every file that might contain source
 1439 #     code.  Ignore binary files, zero-sized files, and
 1440 #     any file in a directory the user says to exclude.
 1441 #  c) Determine the language for each file in the list.
 1442 #
 1443 my @binary_archive = ();
 1444 my $cwd            = cwd();
 1445 if ($opt_extract_with) {
 1446 #print "cwd main = [$cwd]\n";
 1447     my @extract_location = ();
 1448     foreach my $bin_file (@ARGV) {
 1449         my $extract_dir = undef;
 1450         if ($opt_sdir) {
 1451             ++$TEMP_OFF;
 1452             $extract_dir = "$opt_sdir/$TEMP_OFF";
 1453             File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
 1454             File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
 1455         } else {
 1456             $extract_dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
 1457         }
 1458         $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
 1459         print "mkdir $extract_dir\n"  if $opt_v;
 1460         print "cd    $extract_dir\n"  if $opt_v;
 1461         chdir $extract_dir;
 1462         my $bin_file_full_path = "";
 1463         if (File::Spec->file_name_is_absolute( $bin_file )) {
 1464             $bin_file_full_path = $bin_file;
 1465 #print "bin_file_full_path (was ful) = [$bin_file_full_path]\n";
 1466         } else {
 1467             $bin_file_full_path = File::Spec->catfile( $cwd, $bin_file );
 1468 #print "bin_file_full_path (was rel) = [$bin_file_full_path]\n";
 1469         }
 1470         my     $extract_cmd = uncompress_archive_cmd($bin_file_full_path);
 1471         print  $extract_cmd, "\n" if $opt_v;
 1472         system $extract_cmd;
 1473         push @extract_location, $extract_dir;
 1474         chdir $cwd;
 1475     }
 1476     # It is possible that the binary archive itself contains additional
 1477     # files compressed the same way (true for Java .ear files).  Go
 1478     # through all the files that were extracted, see if they are binary
 1479     # archives and try to extract them.  Lather, rinse, repeat.
 1480     my $binary_archives_exist = 1;
 1481     my $count_binary_archives = 0;
 1482     my $previous_count        = 0;
 1483     my $n_pass                = 0;
 1484     while ($binary_archives_exist) {
 1485         @binary_archive = ();
 1486         foreach my $dir (@extract_location) {
 1487             find(\&archive_files, $dir);  # populates global @binary_archive
 1488         }
 1489         foreach my $archive (@binary_archive) {
 1490             my $extract_dir = undef;
 1491             if ($opt_sdir) {
 1492                 ++$TEMP_OFF;
 1493                 $extract_dir = "$opt_sdir/$TEMP_OFF";
 1494                 File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
 1495                 File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
 1496             } else {
 1497                 $extract_dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
 1498             }
 1499             $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
 1500             print "mkdir $extract_dir\n"  if $opt_v;
 1501             print "cd    $extract_dir\n"  if $opt_v;
 1502             chdir  $extract_dir;
 1503 
 1504             my     $extract_cmd = uncompress_archive_cmd($archive);
 1505             print  $extract_cmd, "\n" if $opt_v;
 1506             system $extract_cmd;
 1507             push @extract_location, $extract_dir;
 1508             unlink $archive;  # otherwise will be extracting it forever
 1509         }
 1510         $count_binary_archives = scalar @binary_archive;
 1511         if ($count_binary_archives == $previous_count) {
 1512             $binary_archives_exist = 0;
 1513         }
 1514         $previous_count = $count_binary_archives;
 1515     }
 1516     chdir $cwd;
 1517 
 1518     @ARGV = @extract_location;
 1519 } else {
 1520     # see if any of the inputs need to be auto-uncompressed &/or expanded
 1521     my @updated_ARGS = ();
 1522     replace_git_hash_with_tarfile(\@ARGV, \@git_similarity) if $opt_force_git;
 1523     foreach my $Arg (@ARGV) {
 1524         if (is_dir($Arg)) {
 1525             push @updated_ARGS, $Arg;
 1526             next;
 1527         }
 1528         my $full_path = "";
 1529         if (File::Spec->file_name_is_absolute( $Arg )) {
 1530             $full_path = $Arg;
 1531         } else {
 1532             $full_path = File::Spec->catfile( $cwd, $Arg );
 1533         }
 1534 #print "full_path = [$full_path]\n";
 1535         my $extract_cmd = uncompress_archive_cmd($full_path);
 1536         if ($extract_cmd) {
 1537             my $extract_dir = undef;
 1538             if ($opt_sdir) {
 1539                 ++$TEMP_OFF;
 1540                 $extract_dir = "$opt_sdir/$TEMP_OFF";
 1541                 File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
 1542                 File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
 1543             } else {
 1544                 $extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
 1545             }
 1546             $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
 1547             print "mkdir $extract_dir\n"  if $opt_v;
 1548             print "cd    $extract_dir\n"  if $opt_v;
 1549             chdir  $extract_dir;
 1550             print  $extract_cmd, "\n" if $opt_v;
 1551             system $extract_cmd;
 1552             push @updated_ARGS, $extract_dir;
 1553             chdir $cwd;
 1554         } else {
 1555             # this is a conventional, uncompressed, unarchived file
 1556             # or a directory; keep as-is
 1557             push @updated_ARGS, $Arg;
 1558         }
 1559     }
 1560     @ARGV = @updated_ARGS;
 1561 
 1562     # make sure we're not counting any directory containing
 1563     # temporary installations of Regexp::Common, Algorithm::Diff
 1564     foreach my $d (sort keys %TEMP_INST) {
 1565         foreach my $a (@ARGV) {
 1566             next unless is_dir($a);
 1567             if ($opt_v > 2) {
 1568                 printf "Comparing %s (location of %s) to input [%s]\n",
 1569                         $d, $TEMP_INST{$d}, $a;
 1570             }
 1571             if ($a eq $d) {
 1572                 die "File::Temp::tempdir chose directory ",
 1573                     $d, " to install ", $TEMP_INST{$d}, " but this ",
 1574                     "matches one of your input directories.  Rerun ",
 1575                     "with --sdir and supply a different temporary ",
 1576                     "directory for ", $TEMP_INST{$d}, "\n";
 1577             }
 1578         }
 1579     }
 1580 }
 1581 # 1}}}
 1582 my @Errors    = ();
 1583 my @file_list = ();  # global variable updated in files()
 1584 my %Ignored   = ();  # files that are not counted (language not recognized or
 1585                      # problems reading the file)
 1586 my @Lines_Out = ();
 1587 if ($opt_diff) {
 1588 # Step 4:  Separate code from non-code files.  {{{1
 1589 my @fh            = ();
 1590 my @files_for_set = ();
 1591 my @files_added_tot = ();
 1592 my @files_removed_tot = ();
 1593 my @file_pairs_tot = ();
 1594 # make file lists for each separate argument
 1595 if ($opt_diff_list_file) {
 1596     @files_for_set = ( (), () );
 1597     file_pairs_from_file($opt_diff_list_file, # in
 1598                         \@files_added_tot   , # out
 1599                         \@files_removed_tot , # out
 1600                         \@file_pairs_tot    , # out
 1601                        );
 1602     foreach my $F (@files_added_tot) {
 1603         push @{$files_for_set[1]}, $F;
 1604     }
 1605     foreach my $F (@files_removed_tot) {
 1606         push @{$files_for_set[0]}, $F;
 1607     }
 1608     foreach my $pair (@file_pairs_tot) {
 1609         push @{$files_for_set[0]}, $pair->[0];
 1610         push @{$files_for_set[1]}, $pair->[1];
 1611     }
 1612     @ARGV = (1, 2); # place holders
 1613 }
 1614 for (my $i = 0; $i < scalar @ARGV; $i++) {
 1615     if ($opt_diff_list_file) {
 1616         push @fh, make_file_list($files_for_set[$i],
 1617                                 \%Error_Codes, \@Errors, \%Ignored);
 1618         @{$files_for_set[$i]} = @file_list;
 1619     } else {
 1620         push @fh, make_file_list([ $ARGV[$i] ],
 1621                                 \%Error_Codes, \@Errors, \%Ignored);
 1622         @{$files_for_set[$i]} = @file_list;
 1623     }
 1624     if ($opt_exclude_list_file) {
 1625         # note: process_exclude_list_file() references global @file_list
 1626         process_exclude_list_file($opt_exclude_list_file,
 1627                                  \%Exclude_Dir,
 1628                                  \%Ignored);
 1629     }
 1630     if ($opt_no_autogen) {
 1631         exclude_autogenerated_files(\@{$files_for_set[$i]},  # in/out
 1632                                     \%Error_Codes, \@Errors, \%Ignored);
 1633     }
 1634     @file_list = ();
 1635 }
 1636 # 1}}}
 1637 # Step 5:  Remove duplicate files.             {{{1
 1638 #
 1639 my %Language           = ();
 1640 my %unique_source_file = ();
 1641 my $n_set = 0;
 1642 foreach my $FH (@fh) {  # loop over each pair of file sets
 1643     ++$n_set;
 1644     remove_duplicate_files($FH,
 1645                                \%{$Language{$FH}}               ,
 1646                                \%{$unique_source_file{$FH}}     ,
 1647                           \%Error_Codes                         ,
 1648                                \@Errors                         ,
 1649                                \%Ignored                        );
 1650     if ($opt_exclude_content) {
 1651         exclude_by_regex($opt_exclude_content,              # in
 1652                         \%{$unique_source_file{$FH}},       # in/out
 1653                         \%Ignored);                         # out
 1654     }
 1655 
 1656     if ($opt_include_lang) {
 1657         # remove files associated with languages not
 1658         # specified by --include-lang
 1659         my @delete_file = ();
 1660         foreach my $file (keys %{$unique_source_file{$FH}}) {
 1661             my $keep_file = 0;
 1662             foreach my $keep_lang (keys %Include_Language) {
 1663                 if ($Language{$FH}{$file} eq $keep_lang) {
 1664                     $keep_file = 1;
 1665                     last;
 1666                 }
 1667             }
 1668             next if $keep_file;
 1669             push @delete_file, $file;
 1670         }
 1671         foreach my $file (@delete_file) {
 1672             delete $Language{$FH}{$file};
 1673         }
 1674     }
 1675 
 1676     printf "%2d: %8d unique file%s.                          \r",
 1677         $n_set,
 1678         plural_form(scalar keys %unique_source_file)
 1679         unless $opt_quiet;
 1680 }
 1681 # 1}}}
 1682 # Step 6:  Count code, comments, blank lines.  {{{1
 1683 #
 1684 my %Results_by_Language = ();
 1685 my %Results_by_File     = ();
 1686 my %Delta_by_Language   = ();
 1687 my %Delta_by_File       = ();
 1688 
 1689 my %alignment = ();
 1690 
 1691 my $fset_a = $fh[0];
 1692 my $fset_b = $fh[1];
 1693 
 1694 my $n_filepairs_compared = 0;
 1695 my $tot_counted = 0;
 1696 
 1697 if ( scalar @fh != 2 ) {
 1698     print "Error: incorrect length fh array when preparing diff at step 6.\n";
 1699     exit 1;
 1700 }
 1701 if (!$opt_diff_list_file) {
 1702     align_by_pairs(\%{$unique_source_file{$fset_a}}      , # in
 1703                    \%{$unique_source_file{$fset_b}}      , # in
 1704                    \@files_added_tot                     , # out
 1705                    \@files_removed_tot                   , # out
 1706                    \@file_pairs_tot                      , # out
 1707                   );
 1708 }
 1709 
 1710 #use Data::Dumper;
 1711 #print "added : ", Dumper(\@files_added_tot);
 1712 #print "removed : ", Dumper(\@files_removed_tot);
 1713 #print "pairs : ", Dumper(\@file_pairs_tot);
 1714 
 1715 if ( $max_processes == 0) {
 1716     # Multiprocessing is disabled
 1717     my $part = count_filesets ( $fset_a, $fset_b, \@files_added_tot,
 1718                                \@files_removed_tot, \@file_pairs_tot,
 1719                                0, \%Language, \%Ignored);
 1720     %Results_by_File = %{$part->{'results_by_file'}};
 1721     %Results_by_Language= %{$part->{'results_by_language'}};
 1722     %Delta_by_File = %{$part->{'delta_by_file'}};
 1723     %Delta_by_Language= %{$part->{'delta_by_language'}};
 1724     %Ignored = ( %Ignored, %{$part->{'ignored'}});
 1725     %alignment = %{$part->{'alignment'}};
 1726     $n_filepairs_compared = $part->{'n_filepairs_compared'};
 1727     push ( @Errors, @{$part->{'errors'}});
 1728 } else {
 1729     # Multiprocessing is enabled
 1730     # Do not create more processes than the amount of data to be processed
 1731     my $num_processes = min(max(scalar @files_added_tot,
 1732                                 scalar @files_removed_tot,
 1733                                 scalar @file_pairs_tot),
 1734                             $max_processes);
 1735     # ... but use at least one process.
 1736        $num_processes = 1
 1737             if $num_processes == 0;
 1738     # Start processes for counting
 1739     my $pm = Parallel::ForkManager->new($num_processes);
 1740     # When processes finish, they will use the embedded subroutine for
 1741     # merging the data into global variables.
 1742     $pm->run_on_finish ( sub {
 1743         my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $part) = @_;
 1744         my $part_ignored = $part->{'ignored'};
 1745         my $part_result_by_file = $part->{'results_by_file'};
 1746         my $part_result_by_language = $part->{'results_by_language'};
 1747         my $part_delta_by_file = $part->{'delta_by_file'};
 1748         my $part_delta_by_language = $part->{'delta_by_language'};
 1749         my $part_alignment = $part->{'alignment'};
 1750         my $part_errors = $part->{'errors'};
 1751            $tot_counted += scalar keys %$part_result_by_file;
 1752            $n_filepairs_compared += $part->{'n_filepairs_compared'};
 1753         # Since files are processed by multiple processes, we can't measure
 1754         # the number of processed files exactly. We approximate this by showing
 1755         # the number of files counted by finished processes.
 1756         printf "Counting:  %d\r", $tot_counted
 1757                  if $opt_progress_rate;
 1758 
 1759         foreach my $this_language ( keys %$part_result_by_language ) {
 1760             my $counts = $part_result_by_language->{$this_language};
 1761             foreach my $inner_key ( keys %$counts ) {
 1762                 $Results_by_Language{$this_language}{$inner_key} +=
 1763                     $counts->{$inner_key};
 1764             }
 1765         }
 1766 
 1767         foreach my $this_language ( keys %$part_delta_by_language ) {
 1768             my $counts = $part_delta_by_language->{$this_language};
 1769             foreach my $inner_key ( keys %$counts ) {
 1770                 my $statuses = $counts->{$inner_key};
 1771                 foreach my $inner_status ( keys %$statuses ) {
 1772                     $Delta_by_Language{$this_language}{$inner_key}{$inner_status} +=
 1773                           $counts->{$inner_key}->{$inner_status};
 1774                 }
 1775             }
 1776         }
 1777 
 1778         foreach my $label ( keys %$part_alignment ) {
 1779             my $inner = $part_alignment->{$label};
 1780             foreach my $key ( keys %$inner ) {
 1781                 $alignment{$label}{$key} = 1;
 1782             }
 1783         }
 1784 
 1785         %Results_by_File = ( %Results_by_File, %$part_result_by_file );
 1786         %Delta_by_File = ( %Delta_by_File, %$part_delta_by_file );
 1787         %Ignored = (%Ignored, %$part_ignored );
 1788         push ( @Errors, @$part_errors );
 1789     } );
 1790 
 1791     my $num_filepairs_per_part = ceil ( ( scalar @file_pairs_tot ) / $num_processes );
 1792     my $num_filesremoved_per_part = ceil ( ( scalar @files_removed_tot ) / $num_processes );
 1793     my $num_filesadded_per_part = ceil ( ( scalar @files_added_tot ) / $num_processes );
 1794 
 1795     while ( 1 ) {
 1796         my @files_added_part = splice @files_added_tot, 0, $num_filesadded_per_part;
 1797         my @files_removed_part = splice @files_removed_tot, 0, $num_filesremoved_per_part;
 1798         my @filepairs_part = splice @file_pairs_tot, 0, $num_filepairs_per_part;
 1799         if ( scalar @files_added_part == 0 and scalar @files_removed_part == 0 and
 1800              scalar @filepairs_part == 0 ) {
 1801             last;
 1802         }
 1803 
 1804         $pm->start() and next;
 1805         my $count_result = count_filesets ( $fset_a, $fset_b,
 1806             \@files_added_part, \@files_removed_part,
 1807             \@filepairs_part, 1, \%Language, \%Ignored );
 1808         $pm->finish(0 , $count_result);
 1809     }
 1810     # Wait for processes to finish
 1811     $pm->wait_all_children();
 1812 }
 1813 
 1814 # Write alignment data, if needed
 1815 if ($opt_diff_alignment) {
 1816     write_alignment_data ( $opt_diff_alignment, $n_filepairs_compared, \%alignment ) ;
 1817 }
 1818 
 1819 my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
 1820 write_file($opt_ignored, {"file_type" => "ignored",
 1821                           "separator" => ": ",
 1822                           "columns"   => ["file", "reason"],
 1823                          }, @ignored_reasons   ) if $opt_ignored;
 1824 write_file($opt_counted, {}, sort keys %Results_by_File) if $opt_counted;
 1825 # 1}}}
 1826 # Step 7:  Assemble results.                   {{{1
 1827 #
 1828 my $end_time = get_time();
 1829 printf "%8d file%s ignored.                           \n",
 1830     plural_form(scalar keys %Ignored) unless $opt_quiet;
 1831 print_errors(\%Error_Codes, \@Errors) if @Errors;
 1832 if (!%Delta_by_Language) {
 1833     print "Nothing to count.\n";
 1834     exit;
 1835 }
 1836 
 1837 if ($opt_by_file) {
 1838     @Lines_Out = diff_report($VERSION, get_time() - $start_time,
 1839                             "by file",
 1840                             \%Delta_by_File, \%Scale_Factor);
 1841 } else {
 1842     @Lines_Out = diff_report($VERSION, get_time() - $start_time,
 1843                             "by language",
 1844                             \%Delta_by_Language, \%Scale_Factor);
 1845 }
 1846 
 1847 # 1}}}
 1848 } else {
 1849 # Step 4:  Separate code from non-code files.  {{{1
 1850 my $fh = 0;
 1851 if ($opt_list_file or $opt_vcs) {
 1852     my @list;
 1853     if ($opt_vcs) {
 1854         @list = invoke_generator($opt_vcs, \@ARGV);
 1855     } else {
 1856         @list = read_list_file($opt_list_file);
 1857     }
 1858     $fh = make_file_list(\@list, \%Error_Codes, \@Errors, \%Ignored);
 1859 } else {
 1860     $fh = make_file_list(\@ARGV, \%Error_Codes, \@Errors, \%Ignored);
 1861     #     make_file_list populates global variable @file_list via call to
 1862     #     File::Find's find() which in turn calls files()
 1863 }
 1864 if ($opt_exclude_list_file) {
 1865     # note: process_exclude_list_file() references global @file_list
 1866     process_exclude_list_file($opt_exclude_list_file,
 1867                              \%Exclude_Dir,
 1868                              \%Ignored);
 1869 }
 1870 if ($opt_skip_win_hidden and $ON_WINDOWS) {
 1871     my @file_list_minus_hidded = ();
 1872     # eval code to run on Unix without 'missing Win32::File module' error.
 1873     my $win32_file_invocation = '
 1874         use Win32::File;
 1875         foreach my $F (@file_list) {
 1876             my $attr = undef;
 1877             Win32::File::GetAttributes($F, $attr);
 1878             if ($attr & HIDDEN) {
 1879                 $Ignored{$F} = "Windows hidden file";
 1880                 print "Ignoring $F since it is a Windows hidden file\n"
 1881                     if $opt_v > 1;
 1882             } else {
 1883                 push @file_list_minus_hidded, $F;
 1884             }
 1885         }';
 1886     eval $win32_file_invocation;
 1887     @file_list = @file_list_minus_hidded;
 1888 }
 1889 if ($opt_no_autogen) {
 1890     exclude_autogenerated_files(\@file_list,  # in/out
 1891                                 \%Error_Codes, \@Errors, \%Ignored);
 1892 }
 1893 #printf "%8d file%s excluded.                     \n",
 1894 #   plural_form(scalar keys %Ignored)
 1895 #   unless $opt_quiet;
 1896 # die print ": ", join("\n: ", @file_list), "\n";
 1897 # 1}}}
 1898 # Step 5:  Remove duplicate files.             {{{1
 1899 #
 1900 my %Language           = ();
 1901 my %unique_source_file = ();
 1902 remove_duplicate_files($fh                          ,   # in
 1903                            \%Language               ,   # out
 1904                            \%unique_source_file     ,   # out
 1905                       \%Error_Codes                 ,   # in
 1906                            \@Errors                 ,   # out
 1907                            \%Ignored                );  # out
 1908 if ($opt_exclude_content) {
 1909     exclude_by_regex($opt_exclude_content,              # in
 1910                     \%unique_source_file ,              # in/out
 1911                     \%Ignored);                         # out
 1912 }
 1913 printf "%8d unique file%s.                              \n",
 1914     plural_form(scalar keys %unique_source_file)
 1915     unless $opt_quiet;
 1916 # 1}}}
 1917 # Step 6:  Count code, comments, blank lines.  {{{1
 1918 #
 1919 my %Results_by_Language = ();
 1920 my %Results_by_File     = ();
 1921 my @results_parts  = ();
 1922 my @sorted_files = sort keys %unique_source_file;
 1923 
 1924 if ( $max_processes == 0) {
 1925     # Multiprocessing is disabled
 1926     my $part = count_files ( \@sorted_files , 0, \%Language);
 1927     %Results_by_File = %{$part->{'results_by_file'}};
 1928     %Results_by_Language= %{$part->{'results_by_language'}};
 1929     %Ignored = ( %Ignored, %{$part->{'ignored'}});
 1930     push ( @Errors, @{$part->{'errors'}});
 1931 } else {
 1932     # Do not create more processes than the number of files to be processed
 1933     my $num_files = scalar @sorted_files;
 1934     my $num_processes = $num_files >= $max_processes ? $max_processes : $num_files;
 1935     # Use at least one process.
 1936        $num_processes = 1
 1937             if $num_processes == 0;
 1938     # Start processes for counting
 1939     my $pm = Parallel::ForkManager->new($num_processes);
 1940     # When processes finish, they will use the embedded subroutine for
 1941     # merging the data into global variables.
 1942     $pm->run_on_finish ( sub {
 1943         my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $part) = @_;
 1944         my $part_ignored = $part->{'ignored'};
 1945         my $part_result_by_file = $part->{'results_by_file'};
 1946         my $part_result_by_language = $part->{'results_by_language'};
 1947         my $part_errors = $part->{'errors'};
 1948         my $nCounted+= scalar keys %$part_result_by_file;
 1949         # Since files are processed by multiple processes, we can't measure
 1950         # the number of processed files exactly. We approximate this by showing
 1951         # the number of files counted by finished processes.
 1952         printf "Counting:  %d\r", $nCounted
 1953                  if $opt_progress_rate;
 1954 
 1955         foreach my $this_language ( keys %$part_result_by_language ) {
 1956             my $counts = $part_result_by_language->{$this_language};
 1957             foreach my $inner_key ( keys %$counts ) {
 1958                 $Results_by_Language{$this_language}{$inner_key} +=
 1959                     $counts->{$inner_key};
 1960             }
 1961         }
 1962         %Results_by_File = ( %Results_by_File, %$part_result_by_file );
 1963         %Ignored = (%Ignored, %$part_ignored);
 1964         push ( @Errors, @$part_errors);
 1965     } );
 1966     my $num_files_per_part = ceil ( ( scalar @sorted_files ) / $num_processes );
 1967     while ( my @part = splice @sorted_files, 0 , $num_files_per_part ) {
 1968         $pm->start() and next;
 1969         my $count_result = count_files ( \@part, 1, \%Language );
 1970         $pm->finish(0 , $count_result);
 1971     }
 1972     # Wait for processes to finish
 1973     $pm->wait_all_children();
 1974 }
 1975 
 1976 my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
 1977 write_file($opt_ignored, {"file_type" => "ignored",
 1978                           "separator" => ": ",
 1979                           "columns"   => ["file", "reason"],
 1980                          }, @ignored_reasons   ) if $opt_ignored;
 1981 write_file($opt_counted, {}, sort keys %Results_by_File) if $opt_counted;
 1982 # 1}}}
 1983 # Step 7:  Assemble results.                   {{{1
 1984 #
 1985 my $end_time = get_time();
 1986 printf "%8d file%s ignored.\n", plural_form(scalar keys %Ignored)
 1987     unless $opt_quiet;
 1988 print_errors(\%Error_Codes, \@Errors) if @Errors;
 1989 exit unless %Results_by_Language;
 1990 
 1991 generate_sql($end_time - $start_time,
 1992             \%Results_by_File, \%Scale_Factor) if $opt_sql;
 1993 
 1994 exit if $skip_generate_report;
 1995 if      ($opt_by_file_by_lang) {
 1996     push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 1997                                       "by file",
 1998                                       \%Results_by_File,    \%Scale_Factor);
 1999     push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 2000                                       "by language",
 2001                                       \%Results_by_Language, \%Scale_Factor);
 2002 } elsif ($opt_by_file) {
 2003     push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 2004                                       "by file",
 2005                                       \%Results_by_File,    \%Scale_Factor);
 2006 } else {
 2007     push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 2008                                       "by language",
 2009                                       \%Results_by_Language, \%Scale_Factor);
 2010 }
 2011 # 1}}}
 2012 }
 2013 if ($opt_report_file) { write_file($opt_report_file, {}, @Lines_Out); }
 2014 else                  { print "\n", join("\n", @Lines_Out), "\n"; }
 2015 if ($opt_count_diff) {
 2016     ++$opt_count_diff;
 2017     exit if $opt_count_diff > 3;
 2018     goto Top_of_Processing_Loop;
 2019 }
 2020 sub exclude_by_regex {                       # {{{1
 2021     my ($regex,
 2022         $rh_unique_source_file, # in/out
 2023         $rh_ignored           , # out
 2024        ) = @_;
 2025     my @exclude = ();
 2026     foreach my $file (keys %{$rh_unique_source_file}) {
 2027         my $line_num = 0;
 2028         foreach my $line (read_file($file)) {
 2029             ++$line_num;
 2030             if ($line =~ /$regex/) {
 2031                 $rh_ignored->{$file} = "line $line_num match for --exclude-content=$regex";
 2032                 push @exclude, $file;
 2033                 last;
 2034             }
 2035         }
 2036     }
 2037     foreach my $file (@exclude) {
 2038         delete $rh_unique_source_file->{$file};
 2039     }
 2040 } # 1}}}
 2041 sub get_max_processes {                      # {{{1
 2042     # If user has specified valid number of processes, use that.
 2043     if (defined $opt_processes) {
 2044 #######eval "use Parallel::ForkManager 0.7.6;";
 2045 #######if ( defined $Parallel::ForkManager::VERSION ) {
 2046 ####### $HAVE_Parallel_ForkManager = 1;
 2047 #######}
 2048         if ( $opt_processes !~ /^\d+$/ ) {
 2049             print "Error: processes option argument must be numeric.\n";
 2050             exit 1;
 2051         }
 2052         elsif ( $opt_processes >0 and ! $HAVE_Parallel_ForkManager ) {
 2053             print "Error: cannot use multiple processes, because " .
 2054                   "Parallel::ForkManager is not installed, or the version is too old.\n";
 2055             exit 1;
 2056         }
 2057     elsif ( $opt_processes >0 and $ON_WINDOWS ) {
 2058             print "Error: cannot use multiple processes on Windows systems.\n";
 2059             exit 1;
 2060         }
 2061         else {
 2062             return $opt_processes;
 2063         }
 2064     }
 2065 
 2066     # Disable multiprocessing on Windows - does not work reliably
 2067     if ($ON_WINDOWS) {
 2068         return 0;
 2069     }
 2070 
 2071     # Disable multiprocessing if Parallel::ForkManager is not available
 2072     if ( ! $HAVE_Parallel_ForkManager ) {
 2073         return 0;
 2074     }
 2075 
 2076     # Set to number of cores on Linux
 2077     if ( $^O =~ /linux/i and -x '/usr/bin/nproc' ) {
 2078         my $numavcores_linux = `/usr/bin/nproc`;
 2079         chomp $numavcores_linux;
 2080         if ( $numavcores_linux =~ /^\d+$/ ) {
 2081             return $numavcores_linux;
 2082         }
 2083     }
 2084 
 2085     # Set to number of cores on MacOS
 2086     if ( $^O =~ /darwin/i and -x '/usr/sbin/sysctl') {
 2087        my $numavcores_macos = `/usr/sbin/sysctl -n hw.physicalcpu`;
 2088        chomp $numavcores_macos;
 2089        if ($numavcores_macos =~ /^\d+$/ ) {
 2090            return $numavcores_macos;
 2091        }
 2092     }
 2093 
 2094     # Disable multiprocessing in other cases
 2095     return 0;
 2096 } # 1}}}
 2097 sub exclude_autogenerated_files {            # {{{1
 2098     my ($ra_file_list, # in
 2099         $rh_Err      , # in   hash of error codes
 2100         $raa_errors  , # out
 2101         $rh_Ignored  , # out
 2102        ) = @_;
 2103     print "-> exclude_autogenerated_files()\n" if $opt_v > 2;
 2104     my @file_list_minus_autogen = ();
 2105     foreach my $file (@{$ra_file_list}) {
 2106         if ($file !~ /\.go$/) {
 2107             # at the moment, only know Go autogenerated files
 2108             push @file_list_minus_autogen, $file;
 2109             next;
 2110         }
 2111         my $first_line = first_line($file, $rh_Err, $raa_errors);
 2112         if ($first_line =~ m{^//\s+Code\s+generated\s+.*?\s+DO\s+NOT\s+EDIT\.$}) {
 2113             $rh_Ignored->{$file} = 'Go autogenerated file';
 2114         } else {
 2115             # Go, but not autogenerated
 2116             push @file_list_minus_autogen, $file;
 2117         }
 2118     }
 2119     @{$ra_file_list} = @file_list_minus_autogen;
 2120     print "<- exclude_autogenerated_files()\n" if $opt_v > 2;
 2121 } # 1}}}
 2122 sub file_extension {                         # {{{1
 2123     my ($fname, ) = @_;
 2124     $fname =~ m/\.(\w+)$/;
 2125     if ($1) {
 2126         return $1;
 2127     } else {
 2128         return "";
 2129     }
 2130 } # 1}}}
 2131 sub count_files {                            # {{{1
 2132     my ($filelist, $counter_type, $language_hash) = @_;
 2133     print "-> count_files()\n" if $opt_v > 2;
 2134     my @p_errors = ();
 2135     my %p_ignored = ();
 2136     my %p_rbl = ();
 2137     my %p_rbf = ();
 2138     my %Language = %{$language_hash};
 2139 
 2140     foreach my $file (@$filelist) {
 2141         if ( ! $counter_type ) {
 2142             # Multithreading disabled
 2143             $nCounted++;
 2144 
 2145             printf "Counting:  %d\r", $nCounted
 2146                  unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate));
 2147         }
 2148 
 2149         next if $Ignored{$file};
 2150         if ($opt_include_ext and not $Include_Ext{ file_extension($file) }) {
 2151             $p_ignored{$file} = "not in --include-ext=$opt_include_ext";
 2152             next;
 2153         }
 2154         if ($opt_include_lang and not $Include_Language{$Language{$file}}) {
 2155             $p_ignored{$file} = "not in --include-lang=$opt_include_lang";
 2156             next;
 2157         }
 2158         if ($Exclude_Language{$Language{$file}}) {
 2159             $p_ignored{$file} = "--exclude-lang=$Language{$file}";
 2160             next;
 2161         }
 2162 
 2163         my $Filters_by_Language_Language_file = ! @{$Filters_by_Language{$Language{$file}} };
 2164         if ($Filters_by_Language_Language_file) {
 2165             if ($Language{$file} eq "(unknown)") {
 2166                 $p_ignored{$file} = "language unknown (#1)";
 2167             } else {
 2168                 $p_ignored{$file} = "missing Filters_by_Language{$Language{$file}}";
 2169             }
 2170             next;
 2171         }
 2172 
 2173         my ($all_line_count, $blank_count, $comment_count, $code_count);
 2174         if ($opt_use_sloccount and $Language{$file} =~ /^(C|C\+\+|XML|PHP|Pascal|Java)$/) {
 2175             chomp ($blank_count     = `grep -cv \"[^[:space:]]\" '$file'`);
 2176             chomp ($all_line_count  = `cat '$file' | wc -l`);
 2177             if      ($Language{$file} =~ /^(C|C\+\+)$/) {
 2178                 $code_count = `cat '$file' | c_count      | head -n 1`;
 2179             } elsif ($Language{$file} eq "XML") {
 2180                 $code_count = `cat '$file' | xml_count    | head -n 1`;
 2181             } elsif ($Language{$file} eq "PHP") {
 2182                 $code_count = `cat '$file' | php_count    | head -n 1`;
 2183             } elsif ($Language{$file} eq "Pascal") {
 2184                 $code_count = `cat '$file' | pascal_count | head -n 1`;
 2185             } elsif ($Language{$file} eq "Java") {
 2186                 $code_count = `cat '$file' | java_count   | head -n 1`;
 2187             } else {
 2188                 die "SLOCCount match failure: file=[$file] lang=[$Language{$file}]";
 2189             }
 2190             $code_count = substr($code_count, 0, -2);
 2191             $comment_count = $all_line_count - $code_count - $blank_count;
 2192         } else {
 2193             ($all_line_count,
 2194              $blank_count   ,
 2195              $comment_count ,) = call_counter($file, $Language{$file}, \@Errors);
 2196             $code_count = $all_line_count - $blank_count - $comment_count;
 2197         }
 2198 
 2199         if ($opt_by_file) {
 2200             $p_rbf{$file}{'code'   } = $code_count     ;
 2201             $p_rbf{$file}{'blank'  } = $blank_count    ;
 2202             $p_rbf{$file}{'comment'} = $comment_count  ;
 2203             $p_rbf{$file}{'lang'   } = $Language{$file};
 2204             $p_rbf{$file}{'nFiles' } = 1;
 2205         } else {
 2206             $p_rbf{$file} = 1;  # just keep track of counted files
 2207         }
 2208 
 2209         $p_rbl{$Language{$file}}{'nFiles'}++;
 2210         $p_rbl{$Language{$file}}{'code'}    += $code_count   ;
 2211         $p_rbl{$Language{$file}}{'blank'}   += $blank_count  ;
 2212         $p_rbl{$Language{$file}}{'comment'} += $comment_count;
 2213 
 2214     }
 2215     print "<- count_files()\n" if $opt_v > 2;
 2216     return {
 2217         "ignored" => \%p_ignored,
 2218         "errors"  => \@p_errors,
 2219         "results_by_file" => \%p_rbf,
 2220         "results_by_language" => \%p_rbl,
 2221     }
 2222 } # 1}}}
 2223 sub count_filesets {                         # {{{1
 2224     my ($fset_a,
 2225         $fset_b,
 2226         $files_added,
 2227         $files_removed,
 2228         $file_pairs,
 2229         $counter_type,
 2230         $language_hash,
 2231         $rh_Ignored) = @_;
 2232     print "-> count_filesets()\n" if $opt_v > 2;
 2233     my @p_errors = ();
 2234     my %p_alignment = ();
 2235     my %p_ignored = ();
 2236     my %p_rbl = ();
 2237     my %p_rbf = ();
 2238     my %p_dbl = ();
 2239     my %p_dbf = ();
 2240     my %Language = %$language_hash;
 2241 
 2242     my $nCounted = 0;
 2243 
 2244     my %already_counted = (); # already_counted{ filename } = 1
 2245 
 2246     if (!@$file_pairs) {
 2247         # Special case where all files were either added or deleted.
 2248         # In this case, one of these arrays will be empty:
 2249         #   @files_added, @files_removed
 2250         # so loop over both to cover both cases.
 2251         my $status = @$files_added ? 'added' : 'removed';
 2252         my $fset = @$files_added ? $fset_b : $fset_a;
 2253         foreach my $file (@$files_added, @$files_removed) {
 2254             next unless defined $Language{$fset}{$file};
 2255             my $Lang = $Language{$fset}{$file};
 2256             next if $Lang eq '(unknown)';
 2257             my ($all_line_count,
 2258                 $blank_count   ,
 2259                 $comment_count ,
 2260                 ) = call_counter($file, $Lang, \@p_errors);
 2261             $already_counted{$file} = 1;
 2262             my $code_count = $all_line_count-$blank_count-$comment_count;
 2263             if ($opt_by_file) {
 2264                 $p_dbf{$file}{'code'   }{$status} += $code_count   ;
 2265                 $p_dbf{$file}{'blank'  }{$status} += $blank_count  ;
 2266                 $p_dbf{$file}{'comment'}{$status} += $comment_count;
 2267                 $p_dbf{$file}{'lang'   }{$status}  = $Lang         ;
 2268                 $p_dbf{$file}{'nFiles' }{$status} += 1             ;
 2269             }
 2270             $p_dbl{$Lang}{'code'   }{$status} += $code_count   ;
 2271             $p_dbl{$Lang}{'blank'  }{$status} += $blank_count  ;
 2272             $p_dbl{$Lang}{'comment'}{$status} += $comment_count;
 2273             $p_dbl{$Lang}{'nFiles' }{$status} += 1             ;
 2274         }
 2275     }
 2276 
 2277     #use Data::Dumper::Simple;
 2278     #use Data::Dumper;
 2279     #print Dumper(\@files_added, \@files_removed, \@file_pairs);
 2280     #print "after align_by_pairs:\n";
 2281     #print "added:\n";
 2282 
 2283     foreach my $f (@$files_added) {
 2284         next if $already_counted{$f};
 2285         #printf "%10s -> %s\n", $f, $Language{$fh[$F+1]}{$f};
 2286         # Don't proceed unless the file (both L and R versions)
 2287         # is in a known language.
 2288         next if $opt_include_ext
 2289             and not $Include_Ext{ file_extension($f) };
 2290         next if $opt_include_lang
 2291             and not $Include_Language{$Language{$fset_b}{$f}};
 2292         my $this_lang = $Language{$fset_b}{$f};
 2293         if ($this_lang eq "(unknown)") {
 2294             $p_ignored{$f} = "uknown language";
 2295             next;
 2296         }
 2297         if ($Exclude_Language{$this_lang}) {
 2298             $p_ignored{$f} = "--exclude-lang=$this_lang";
 2299             next;
 2300         }
 2301         $p_alignment{"added"}{sprintf "  + %s ; %s\n", $f, $this_lang} = 1;
 2302         ++$p_dbl{ $this_lang }{'nFiles'}{'added'};
 2303         # Additionally, add contents of file $f to
 2304         # Delta_by_File{$f}{comment/blank/code}{'added'}
 2305         # Delta_by_Language{$lang}{comment/blank/code}{'added'}
 2306         # via the $p_dbl and $p_dbf variables.
 2307         my ($all_line_count,
 2308             $blank_count   ,
 2309             $comment_count ,
 2310            ) = call_counter($f, $this_lang, \@p_errors);
 2311         $p_dbl{ $this_lang }{'comment'}{'added'} += $comment_count;
 2312         $p_dbl{ $this_lang }{'blank'}{'added'}   += $blank_count;
 2313         $p_dbl{ $this_lang }{'code'}{'added'}    +=
 2314            $all_line_count - $blank_count - $comment_count;
 2315         $p_dbf{ $f }{'comment'}{'added'} = $comment_count;
 2316         $p_dbf{ $f }{'blank'}{'added'}   = $blank_count;
 2317         $p_dbf{ $f }{'code'}{'added'}    =
 2318            $all_line_count - $blank_count - $comment_count;
 2319     }
 2320 
 2321     #print "removed:\n";
 2322     foreach my $f (@$files_removed) {
 2323         next if $already_counted{$f};
 2324         # Don't proceed unless the file (both L and R versions)
 2325         # is in a known language.
 2326         next if $opt_include_ext
 2327             and not $Include_Ext{ file_extension($f) };
 2328         next if $opt_include_lang
 2329             and (not defined $Language{$fset_a}{$f}
 2330              or  not defined $Include_Language{$Language{$fset_a}{$f}});
 2331         my $this_lang = $Language{$fset_a}{$f};
 2332         if ($this_lang eq "(unknown)") {
 2333             $p_ignored{$f} = "uknown language";
 2334             next;
 2335         }
 2336         if ($Exclude_Language{$this_lang}) {
 2337             $p_ignored{$f} = "--exclude-lang=$this_lang";
 2338             next;
 2339         }
 2340         ++$p_dbl{ $this_lang }{'nFiles'}{'removed'};
 2341         $p_alignment{"removed"}{sprintf "  - %s ; %s\n", $f, $this_lang} = 1;
 2342         #printf "%10s -> %s\n", $f, $Language{$fh[$F  ]}{$f};
 2343         # Additionally, add contents of file $f to
 2344         #        Delta_by_File{$f}{comment/blank/code}{'removed'}
 2345         #        Delta_by_Language{$lang}{comment/blank/code}{'removed'}
 2346         # via the $p_dbl and $p_dbf variables.
 2347         my ($all_line_count,
 2348             $blank_count   ,
 2349             $comment_count ,
 2350            ) = call_counter($f, $this_lang, \@p_errors);
 2351         $p_dbl{ $this_lang}{'comment'}{'removed'} += $comment_count;
 2352         $p_dbl{ $this_lang}{'blank'}{'removed'}   += $blank_count;
 2353         $p_dbl{ $this_lang}{'code'}{'removed'}    +=
 2354              $all_line_count - $blank_count - $comment_count;
 2355         $p_dbf{ $f }{'comment'}{'removed'} = $comment_count;
 2356         $p_dbf{ $f }{'blank'}{'removed'}   = $blank_count;
 2357         $p_dbf{ $f }{'code'}{'removed'}    =
 2358             $all_line_count - $blank_count - $comment_count;
 2359     }
 2360 
 2361     my $n_file_pairs_compared = 0;
 2362     # Don't know ahead of time how many file pairs will be compared
 2363     # since duplicates are weeded out below.  The answer is
 2364     # scalar @file_pairs only if there are no duplicates.
 2365 
 2366     foreach my $pair (@$file_pairs) {
 2367         my $file_L = $pair->[0];
 2368         my $file_R = $pair->[1];
 2369         my $Lang_L = $Language{$fset_a}{$file_L};
 2370         my $Lang_R = $Language{$fset_b}{$file_R};
 2371         if (!defined($Lang_L) or !defined($Lang_R)) {
 2372             print " -> count_filesets skipping $file_L, $file_R ",
 2373                   "because language cannot be inferred\n" if $opt_v;
 2374             next;
 2375         }
 2376         #print "main step 6 file_L=$file_L    file_R=$file_R\n";
 2377         ++$nCounted;
 2378         printf "Counting:  %d\r", $nCounted
 2379              unless ($counter_type or !$opt_progress_rate or ($nCounted % $opt_progress_rate));
 2380         next if $p_ignored{$file_L} or $p_ignored{$file_R};
 2381 
 2382         # filter out non-included extensions
 2383         if ($opt_include_ext  and not $Include_Ext{ file_extension($file_L) }
 2384                               and not $Include_Ext{ file_extension($file_R) }) {
 2385             $p_ignored{$file_L} = "not in --include-lang=$opt_include_ext";
 2386             $p_ignored{$file_R} = "not in --include-lang=$opt_include_ext";
 2387             next;
 2388         }
 2389         # filter out non-included languages
 2390         if ($opt_include_lang and not $Include_Language{$Lang_L}
 2391                               and not $Include_Language{$Lang_R}) {
 2392             $p_ignored{$file_L} = "not in --include-lang=$opt_include_lang";
 2393             $p_ignored{$file_R} = "not in --include-lang=$opt_include_lang";
 2394             next;
 2395         }
 2396         # filter out excluded or unrecognized languages
 2397         if ($Exclude_Language{$Lang_L} or $Exclude_Language{$Lang_R}) {
 2398             $p_ignored{$file_L} = "--exclude-lang=$Lang_L";
 2399             $p_ignored{$file_R} = "--exclude-lang=$Lang_R";
 2400             next;
 2401         }
 2402 
 2403         my $not_Filters_by_Language_Lang_LR = 0;
 2404         #print "file_LR = [$file_L] [$file_R]\n";
 2405         #print "Lang_LR = [$Lang_L] [$Lang_R]\n";
 2406         if (($Lang_L eq "(unknown)") or
 2407             ($Lang_R eq "(unknown)") or
 2408             !(@{$Filters_by_Language{$Lang_L} }) or
 2409             !(@{$Filters_by_Language{$Lang_R} })) {
 2410             $not_Filters_by_Language_Lang_LR = 1;
 2411         }
 2412         if ($not_Filters_by_Language_Lang_LR) {
 2413             if (($Lang_L eq "(unknown)") or ($Lang_R eq "(unknown)")) {
 2414                 $p_ignored{$fset_a}{$file_L} = "language unknown (#1)";
 2415                 $p_ignored{$fset_b}{$file_R} = "language unknown (#1)";
 2416             } else {
 2417                 $p_ignored{$fset_a}{$file_L} = "missing Filters_by_Language{$Lang_L}";
 2418                 $p_ignored{$fset_b}{$file_R} = "missing Filters_by_Language{$Lang_R}";
 2419             }
 2420             next;
 2421         }
 2422 
 2423         # filter out explicitly excluded files
 2424         if ($opt_exclude_list_file and
 2425             ($rh_Ignored->{$file_L} or $rh_Ignored->{$file_R})) {
 2426             my $msg_2;
 2427             if ($rh_Ignored->{$file_L}) {
 2428                 $msg_2 = "$file_L (paired to $file_R)";
 2429             } else {
 2430                 $msg_2 = "$file_R (paired to $file_L)";
 2431             }
 2432             my $msg_1 = "in --exclude-list-file=$opt_exclude_list_file";
 2433             $p_ignored{$file_L} = "$msg_1, $msg_2";
 2434             $p_ignored{$file_R} = "$msg_1, $msg_2";
 2435             next;
 2436         }
 2437 
 2438         #print "DIFF($file_L, $file_R)\n";
 2439         # step 0: compare the two files' contents
 2440         chomp ( my @lines_L = read_file($file_L) );
 2441         chomp ( my @lines_R = read_file($file_R) );
 2442         my $language_file_L = "";
 2443         if (defined $Language{$fset_a}{$file_L}) {
 2444             $language_file_L = $Language{$fset_a}{$file_L};
 2445         } else {
 2446             # files $file_L and $file_R do not contain known language
 2447             next;
 2448         }
 2449 
 2450         my $contents_are_same = 1;
 2451         if (scalar @lines_L == scalar @lines_R) {
 2452             # same size, must compare line-by-line
 2453             for (my $i = 0; $i < scalar @lines_L; $i++) {
 2454                if ($lines_L[$i] ne $lines_R[$i]) {
 2455                    $contents_are_same = 0;
 2456                    last;
 2457                }
 2458             }
 2459             if ($contents_are_same) {
 2460                 ++$p_dbl{$language_file_L}{'nFiles'}{'same'};
 2461             } else {
 2462                 ++$p_dbl{$language_file_L}{'nFiles'}{'modified'};
 2463             }
 2464         } else {
 2465             $contents_are_same = 0;
 2466             # different sizes, contents have changed
 2467             ++$p_dbl{$language_file_L}{'nFiles'}{'modified'};
 2468         }
 2469 
 2470         if ($opt_diff_alignment) {
 2471             my $str =  "$file_L | $file_R ; $language_file_L";
 2472             if ($contents_are_same) {
 2473                 $p_alignment{"pairs"}{"  == $str"} = 1;
 2474             } else {
 2475                 $p_alignment{"pairs"}{"  != $str"} = 1;
 2476             }
 2477             ++$n_file_pairs_compared;
 2478         }
 2479 
 2480         my ($all_line_count_L, $blank_count_L   , $comment_count_L ,
 2481             $all_line_count_R, $blank_count_R   , $comment_count_R , )  = (0,0,0,0,0,0,);
 2482         if (!$contents_are_same) {
 2483             # step 1: identify comments in both files
 2484             #print "Diff blank removal L language= $Lang_L";
 2485             #print " scalar(lines_L)=", scalar @lines_L, "\n";
 2486             my @original_minus_blanks_L
 2487                     = rm_blanks(  \@lines_L, $Lang_L, \%EOL_Continuation_re);
 2488             #print "1: scalar(original_minus_blanks_L)=", scalar @original_minus_blanks_L, "\n";
 2489             @lines_L    = @original_minus_blanks_L;
 2490             #print "2: scalar(lines_L)=", scalar @lines_L, "\n";
 2491             @lines_L    = add_newlines(\@lines_L); # compensate for rm_comments()
 2492             @lines_L    = rm_comments( \@lines_L, $Lang_L, $file_L,
 2493                                        \%EOL_Continuation_re);
 2494             #print "3: scalar(lines_L)=", scalar @lines_L, "\n";
 2495 
 2496             #print "Diff blank removal R language= $Lang_R\n";
 2497             my @original_minus_blanks_R
 2498                     = rm_blanks(  \@lines_R, $Lang_R, \%EOL_Continuation_re);
 2499             @lines_R    = @original_minus_blanks_R;
 2500             @lines_R    = add_newlines(\@lines_R); # taken away by rm_comments()
 2501             @lines_R    = rm_comments( \@lines_R, $Lang_R, $file_R,
 2502                                        \%EOL_Continuation_re);
 2503 
 2504             my (@diff_LL, @diff_LR, );
 2505                    array_diff( $file_L                  ,   # in
 2506                        \@original_minus_blanks_L ,   # in
 2507                        \@lines_L                 ,   # in
 2508                        "comment"                 ,   # in
 2509                        \@diff_LL, \@diff_LR      ,   # out
 2510                        \@p_errors);                    # in/out
 2511 
 2512             my (@diff_RL, @diff_RR, );
 2513                     array_diff( $file_R                  ,   # in
 2514                        \@original_minus_blanks_R ,   # in
 2515                        \@lines_R                 ,   # in
 2516                        "comment"                 ,   # in
 2517                        \@diff_RL, \@diff_RR      ,   # out
 2518                        \@p_errors);                    # in/out
 2519             # each line of each file is now classified as
 2520             # code or comment
 2521             #use Data::Dumper;
 2522             #print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
 2523             #print Dumper("diff_RL", \@diff_RL, "diff_RR", \@diff_RR, );
 2524             #die;
 2525 
 2526             # step 2: separate code from comments for L and R files
 2527             my @code_L = ();
 2528             my @code_R = ();
 2529             my @comm_L = ();
 2530             my @comm_R = ();
 2531             foreach my $line_info (@diff_LL) {
 2532                 if      ($line_info->{'type'} eq "code"   ) {
 2533                     push @code_L, $line_info->{char};
 2534                 } elsif ($line_info->{'type'} eq "comment") {
 2535                     push @comm_L, $line_info->{char};
 2536                 } else {
 2537                     die "Diff unexpected line type ",
 2538                         $line_info->{'type'}, "for $file_L line ",
 2539                         $line_info->{'lnum'};
 2540                 }
 2541             }
 2542 
 2543             foreach my $line_info (@diff_RL) {
 2544                 if      ($line_info->{type} eq "code"   ) {
 2545                     push @code_R, $line_info->{'char'};
 2546                 } elsif ($line_info->{type} eq "comment") {
 2547                     push @comm_R, $line_info->{'char'};
 2548                 } else {
 2549                     die "Diff unexpected line type ",
 2550                         $line_info->{'type'}, "for $file_R line ",
 2551                         $line_info->{'lnum'};
 2552                 }
 2553             }
 2554 
 2555             if ($opt_ignore_whitespace) {
 2556                 # strip all whitespace from each line of source code
 2557                 # and comments then use these stripped arrays in diffs
 2558                 foreach (@code_L) { s/\s+//g }
 2559                 foreach (@code_R) { s/\s+//g }
 2560                 foreach (@comm_L) { s/\s+//g }
 2561                 foreach (@comm_R) { s/\s+//g }
 2562             }
 2563             if ($opt_ignore_case) {
 2564                 # change all text to lowercase in diffs
 2565                 foreach (@code_L) { $_ = lc }
 2566                 foreach (@code_R) { $_ = lc }
 2567                 foreach (@comm_L) { $_ = lc }
 2568                 foreach (@comm_R) { $_ = lc }
 2569             }
 2570             # step 3: compute code diffs
 2571             array_diff("$file_L v. $file_R"   ,   # in
 2572                        \@code_L               ,   # in
 2573                        \@code_R               ,   # in
 2574                        "revision"             ,   # in
 2575                        \@diff_LL, \@diff_LR   ,   # out
 2576                        \@p_errors);                 # in/out
 2577             #print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
 2578             #print Dumper("diff_LR", \@diff_LR);
 2579             foreach my $line_info (@diff_LR) {
 2580                 my $status = $line_info->{'desc'}; # same|added|removed|modified
 2581                 ++$p_dbl{$Lang_L}{'code'}{$status};
 2582                 if ($opt_by_file) {
 2583                     ++$p_dbf{$file_L}{'code'}{$status};
 2584                 }
 2585             }
 2586             #use Data::Dumper;
 2587             #print Dumper("code diffs:", \@diff_LL, \@diff_LR);
 2588 
 2589             # step 4: compute comment diffs
 2590             array_diff("$file_L v. $file_R"   ,   # in
 2591                        \@comm_L               ,   # in
 2592                        \@comm_R               ,   # in
 2593                        "revision"             ,   # in
 2594                        \@diff_LL, \@diff_LR   ,   # out
 2595                        \@Errors);                 # in/out
 2596             #print Dumper("comment diff_LR", \@diff_LR);
 2597             foreach my $line_info (@diff_LR) {
 2598                 my $status = $line_info->{'desc'}; # same|added|removed|modified
 2599                 ++$p_dbl{$Lang_L}{'comment'}{$status};
 2600                 if ($opt_by_file) {
 2601                     ++$p_dbf{$file_L}{'comment'}{$status};
 2602                 }
 2603             }
 2604             #print Dumper("comment diffs:", \@diff_LL, \@diff_LR);
 2605 
 2606             # step 5: compute difference in blank lines (kind of pointless)
 2607             next if $Lang_L eq '(unknown)' or
 2608                     $Lang_R eq '(unknown)';
 2609             ($all_line_count_L,
 2610              $blank_count_L   ,
 2611              $comment_count_L ,
 2612             ) = call_counter($file_L, $Lang_L, \@Errors);
 2613 
 2614             ($all_line_count_R,
 2615              $blank_count_R   ,
 2616              $comment_count_R ,
 2617             ) = call_counter($file_R, $Lang_R, \@Errors);
 2618         } else {
 2619             # L and R file contents are identical, no need to diff
 2620             ($all_line_count_L,
 2621              $blank_count_L   ,
 2622              $comment_count_L ,
 2623             ) = call_counter($file_L, $Lang_L, \@Errors);
 2624             $all_line_count_R = $all_line_count_L;
 2625             $blank_count_R    = $blank_count_L   ;
 2626             $comment_count_R  = $comment_count_L ;
 2627             my $code_lines_R  = $all_line_count_R - ($blank_count_R + $comment_count_R);
 2628             $p_dbl{$Lang_L}{'blank'}{'same'}   += $blank_count_R;
 2629             $p_dbl{$Lang_L}{'comment'}{'same'} += $comment_count_R;
 2630             $p_dbl{$Lang_L}{'code'}{'same'}    += $code_lines_R;
 2631             if ($opt_by_file) {
 2632                 $p_dbf{$file_L}{'blank'}{'same'}   += $blank_count_R;
 2633                 $p_dbf{$file_L}{'comment'}{'same'} += $comment_count_R;
 2634                 $p_dbf{$file_L}{'code'}{'same'}    += $code_lines_R;
 2635             }
 2636         }
 2637 
 2638         if ($blank_count_L <  $blank_count_R) {
 2639             my $D = $blank_count_R - $blank_count_L;
 2640             $p_dbl{$Lang_L}{'blank'}{'added'}   += $D;
 2641         } else {
 2642             my $D = $blank_count_L - $blank_count_R;
 2643             $p_dbl{$Lang_L}{'blank'}{'removed'} += $D;
 2644         }
 2645         if ($opt_by_file) {
 2646             if ($blank_count_L <  $blank_count_R) {
 2647                 my $D = $blank_count_R - $blank_count_L;
 2648                 $p_dbf{$file_L}{'blank'}{'added'}   += $D;
 2649             } else {
 2650                 my $D = $blank_count_L - $blank_count_R;
 2651                 $p_dbf{$file_L}{'blank'}{'removed'} += $D;
 2652             }
 2653         }
 2654 
 2655         my $code_count_L = $all_line_count_L-$blank_count_L-$comment_count_L;
 2656         if ($opt_by_file) {
 2657             $p_rbf{$file_L}{'code'   } = $code_count_L    ;
 2658             $p_rbf{$file_L}{'blank'  } = $blank_count_L   ;
 2659             $p_rbf{$file_L}{'comment'} = $comment_count_L ;
 2660             $p_rbf{$file_L}{'lang'   } = $Lang_L          ;
 2661             $p_rbf{$file_L}{'nFiles' } = 1                ;
 2662         } else {
 2663             $p_rbf{$file_L} = 1;  # just keep track of counted files
 2664         }
 2665 
 2666         $p_rbl{$Lang_L}{'nFiles'}++;
 2667         $p_rbl{$Lang_L}{'code'}    += $code_count_L   ;
 2668         $p_rbl{$Lang_L}{'blank'}   += $blank_count_L  ;
 2669         $p_rbl{$Lang_L}{'comment'} += $comment_count_L;
 2670     }
 2671 
 2672     print "<- count_filesets()\n" if $opt_v > 2;
 2673     return {
 2674         "ignored" => \%p_ignored,
 2675         "errors"  => \@p_errors,
 2676         "results_by_file" => \%p_rbf,
 2677         "results_by_language" => \%p_rbl,
 2678         "delta_by_file" => \%p_dbf,
 2679         "delta_by_language" => \%p_dbl,
 2680         "alignment" => \%p_alignment,
 2681         "n_filepairs_compared" => $n_file_pairs_compared
 2682     }
 2683 } # 1}}}
 2684 sub write_alignment_data {                   # {{{1
 2685     my ($filename, $n_filepairs_compared, $data ) = @_;
 2686     my @output = ();
 2687     if ( $data->{'added'} ) {
 2688         my %added_lines = %{$data->{'added'}};
 2689         push (@output, "Files added: " . (scalar keys %added_lines) . "\n");
 2690         foreach my $line ( sort keys %added_lines ) {
 2691             push (@output, $line);
 2692         }
 2693         push (@output, "\n" );
 2694     }
 2695     if ( $data->{'removed'} ) {
 2696         my %removed_lines = %{$data->{'removed'}};
 2697         push (@output, "Files removed: " . (scalar keys %removed_lines) . "\n");
 2698         foreach my $line ( sort keys %removed_lines ) {
 2699             push (@output, $line);
 2700         }
 2701         push (@output, "\n");
 2702     }
 2703     if ( $data->{'pairs'} ) {
 2704         my %pairs = %{$data->{'pairs'}};
 2705         push (@output, "File pairs compared: " . $n_filepairs_compared . "\n");
 2706         foreach my $pair ( sort keys %pairs ) {
 2707             push (@output, $pair);
 2708         }
 2709     }
 2710     write_file($filename, {}, @output);
 2711 } # 1}}}
 2712 sub exclude_dir_validates {                  # {{{1
 2713     my ($rh_Exclude_Dir) = @_;
 2714     my $is_OK = 1;
 2715     foreach my $dir (keys %{$rh_Exclude_Dir}) {
 2716         if (($ON_WINDOWS and $dir =~ m{\\}) or ($dir =~ m{/})) {
 2717             $is_OK = 0;
 2718             warn "--exclude-dir '$dir' :  cannot specify directory paths\n";
 2719         }
 2720     }
 2721     if (!$is_OK) {
 2722         warn "Use '--fullpath --not-match-d=REGEX' instead\n";
 2723     }
 2724     return $is_OK;
 2725 } # 1}}}
 2726 sub process_exclude_list_file {              # {{{1
 2727     my ($list_file      , # in
 2728         $rh_exclude_dir , # out
 2729         $rh_ignored     , # out
 2730        ) = @_;
 2731     # note: references global @file_list
 2732     print "-> process_exclude_list_file($list_file)\n" if $opt_v > 2;
 2733     # reject a specific set of files and/or directories
 2734     my @reject_list   = read_list_file($list_file);
 2735     my @file_reject_list = ();
 2736     foreach my $F_or_D (@reject_list) {
 2737         if (is_dir($F_or_D)) {
 2738             $rh_exclude_dir->{$F_or_D} = 1;
 2739         } elsif (is_file($F_or_D)) {
 2740             push @file_reject_list, $F_or_D;
 2741         }
 2742     }
 2743 
 2744     # Normalize file names for better comparison.
 2745     my %normalized_input   = normalize_file_names(@file_list);
 2746     my %normalized_reject  = normalize_file_names(@file_reject_list);
 2747     my %normalized_exclude = normalize_file_names(keys %{$rh_exclude_dir});
 2748     foreach my $F (keys %normalized_input) {
 2749         if ($normalized_reject{$F} or is_excluded($F, \%normalized_exclude)) {
 2750             my $orig_F = $normalized_input{$F};
 2751             $rh_ignored->{$orig_F} = "listed in exclusion file $opt_exclude_list_file";
 2752             print "Ignoring $orig_F because it appears in $opt_exclude_list_file\n"
 2753                 if $opt_v > 1;
 2754         }
 2755     }
 2756 
 2757     print "<- process_exclude_list_file\n" if $opt_v > 2;
 2758 } # 1}}}
 2759 sub combine_results {                        # {{{1
 2760     # returns 1 if the inputs are categorized by language
 2761     #         0 if no identifiable language was found
 2762     my ($ra_report_files, # in
 2763         $report_type    , # in  "by language" or "by report file"
 2764         $rhh_count      , # out count{TYPE}{nFiles|code|blank|comment|scaled}
 2765         $rhaa_Filters_by_Language , # in
 2766        ) = @_;
 2767 
 2768     print "-> combine_results(report_type=$report_type)\n" if $opt_v > 2;
 2769     my $found_language = 0;
 2770 
 2771     foreach my $file (@{$ra_report_files}) {
 2772         my $n_results_found = 0;
 2773         my $IN = new IO::File $file, "r";
 2774         if (!defined $IN) {
 2775             warn "Unable to read $file; ignoring.\n";
 2776             next;
 2777         }
 2778         while (<$IN>) {
 2779             next if /^(http|Language|SUM|-----)/;
 2780             if (!$opt_by_file  and
 2781                 m{^(.*?)\s+         # language
 2782                    (\d+)\s+         # files
 2783                    (\d+)\s+         # blank
 2784                    (\d+)\s+         # comments
 2785                    (\d+)\s+         # code
 2786                    (                #    next four entries missing with -no3
 2787                    x\s+             # x
 2788                    \d+\.\d+\s+      # scale
 2789                    =\s+             # =
 2790                    (\d+\.\d+)\s*    # scaled code
 2791                    )?
 2792                    $}x) {
 2793                 if ($report_type eq "by language") {
 2794                     if (!defined $rhaa_Filters_by_Language->{$1}) {
 2795                         warn "Unrecognized language '$1' in $file ignored\n";
 2796                         next;
 2797                     }
 2798                     # above test necessary to avoid trying to sum reports
 2799                     # of reports (which have no language breakdown).
 2800                     $found_language = 1;
 2801                     $rhh_count->{$1   }{'nFiles' } += $2;
 2802                     $rhh_count->{$1   }{'blank'  } += $3;
 2803                     $rhh_count->{$1   }{'comment'} += $4;
 2804                     $rhh_count->{$1   }{'code'   } += $5;
 2805                     $rhh_count->{$1   }{'scaled' } += $7 if $opt_3;
 2806                 } else {
 2807                     $rhh_count->{$file}{'nFiles' } += $2;
 2808                     $rhh_count->{$file}{'blank'  } += $3;
 2809                     $rhh_count->{$file}{'comment'} += $4;
 2810                     $rhh_count->{$file}{'code'   } += $5;
 2811                     $rhh_count->{$file}{'scaled' } += $7 if $opt_3;
 2812                 }
 2813                 ++$n_results_found;
 2814             } elsif ($opt_by_file  and
 2815                 m{^(.*?)\s+         # language
 2816                    (\d+)\s+         # blank
 2817                    (\d+)\s+         # comments
 2818                    (\d+)\s+         # code
 2819                    (                #    next four entries missing with -no3
 2820                    x\s+             # x
 2821                    \d+\.\d+\s+      # scale
 2822                    =\s+             # =
 2823                    (\d+\.\d+)\s*    # scaled code
 2824                    )?
 2825                    $}x) {
 2826                 if ($report_type eq "by language") {
 2827                     next unless %{$rhaa_Filters_by_Language->{$1}};
 2828                     # above test necessary to avoid trying to sum reports
 2829                     # of reports (which have no language breakdown).
 2830                     $found_language = 1;
 2831                     $rhh_count->{$1   }{'nFiles' } +=  1;
 2832                     $rhh_count->{$1   }{'blank'  } += $2;
 2833                     $rhh_count->{$1   }{'comment'} += $3;
 2834                     $rhh_count->{$1   }{'code'   } += $4;
 2835                     $rhh_count->{$1   }{'scaled' } += $6 if $opt_3;
 2836                 } else {
 2837                     $rhh_count->{$file}{'nFiles' } +=  1;
 2838                     $rhh_count->{$file}{'blank'  } += $2;
 2839                     $rhh_count->{$file}{'comment'} += $3;
 2840                     $rhh_count->{$file}{'code'   } += $4;
 2841                     $rhh_count->{$file}{'scaled' } += $6 if $opt_3;
 2842                 }
 2843                 ++$n_results_found;
 2844             }
 2845         }
 2846         warn "No counts found in $file--is the file format correct?\n"
 2847             unless $n_results_found;
 2848     }
 2849     print "<- combine_results\n" if $opt_v > 2;
 2850     return $found_language;
 2851 } # 1}}}
 2852 sub compute_denominator {                    # {{{1
 2853     my ($method, $nCode, $nComment, $nBlank, ) = @_;
 2854     print "-> compute_denominator\n" if $opt_v > 2;
 2855     my %den        = ( "c" => $nCode );
 2856        $den{"cm"}  = $den{"c"}  + $nComment;
 2857        $den{"cmb"} = $den{"cm"} + $nBlank;
 2858        $den{"cb"}  = $den{"c"}  + $nBlank;
 2859 
 2860     print "<- compute_denominator\n" if $opt_v > 2;
 2861     return $den{ $method };
 2862 } # 1}}}
 2863 sub yaml_to_json_separators {                # {{{1
 2864     # YAML and JSON are closely related.  Their differences can be captured
 2865     # by trailing commas ($C), braces ($open_B, $close_B), and
 2866     # quotes around text ($Q).
 2867     print "-> yaml_to_json_separators()\n" if $opt_v > 2;
 2868     my ($Q, $open_B, $close_B, $start, $C);
 2869     if ($opt_json) {
 2870        $C       = ',';
 2871        $Q       = '"';
 2872        $open_B  = '{';
 2873        $close_B = '}';
 2874        $start   = '{';
 2875     } else {
 2876        $C       = '';
 2877        $Q       = '' ;
 2878        $open_B  = '' ;
 2879        $close_B = '';
 2880        $start   = "---\n# $URL\n";
 2881     }
 2882     print "<- yaml_to_json_separators()\n" if $opt_v > 2;
 2883     return ($Q, $open_B, $close_B, $start, $C);
 2884 } # 1}}}
 2885 sub diff_report     {                        # {{{1
 2886     # returns an array of lines containing the results
 2887     print "-> diff_report\n" if $opt_v > 2;
 2888 
 2889     if ($opt_xml) {
 2890         print "<- diff_report\n" if $opt_v > 2;
 2891         return diff_xml_report(@_)
 2892     } elsif ($opt_yaml) {
 2893         print "<- diff_report\n" if $opt_v > 2;
 2894         return diff_yaml_report(@_)
 2895     } elsif ($opt_json) {
 2896         print "<- diff_report\n" if $opt_v > 2;
 2897         return diff_json_report(@_)
 2898     } elsif ($opt_csv or $opt_md) {
 2899         print "<- diff_report\n" if $opt_v > 2;
 2900         return diff_csv_report(@_)
 2901     }
 2902 
 2903     my ($version    , # in
 2904         $elapsed_sec, # in
 2905         $report_type, # in  "by language" | "by report file" | "by file"
 2906         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 2907         $rh_scale   , # in
 2908        ) = @_;
 2909 
 2910 #use Data::Dumper;
 2911 #print "diff_report: ", Dumper($rhhh_count), "\n";
 2912     my @results       = ();
 2913 
 2914     my $languages     = ();
 2915     my %sum           = (); # sum{nFiles|blank|comment|code}{same|modified|added|removed}
 2916     my $max_len       = 0;
 2917     foreach my $language (keys %{$rhhh_count}) {
 2918         foreach my $V (qw(nFiles blank comment code)) {
 2919             foreach my $S (qw(added same modified removed)) {
 2920                 $rhhh_count->{$language}{$V}{$S} = 0 unless
 2921                     defined $rhhh_count->{$language}{$V}{$S};
 2922                 $sum{$V}{$S}  += $rhhh_count->{$language}{$V}{$S};
 2923             }
 2924         }
 2925         $max_len      = length($language) if length($language) > $max_len;
 2926     }
 2927     my $column_1_offset = 0;
 2928        $column_1_offset = $max_len - 17 if $max_len > 17;
 2929     $elapsed_sec = 0.5 unless $elapsed_sec;
 2930 
 2931     my $spacing_0 = 23;
 2932     my $spacing_1 = 13;
 2933     my $spacing_2 =  9;
 2934     my $spacing_3 = 17;
 2935     if (!$opt_3) {
 2936         $spacing_1 = 19;
 2937         $spacing_2 = 14;
 2938         $spacing_3 = 27;
 2939     }
 2940     $spacing_0 += $column_1_offset;
 2941     $spacing_1 += $column_1_offset;
 2942     $spacing_3 += $column_1_offset;
 2943     my %Format = (
 2944         '1' => { 'xml' => 'name="%s" ',
 2945                  'txt' => "\%-${spacing_0}s ",
 2946                },
 2947         '2' => { 'xml' => 'name="%s" ',
 2948                  'txt' => "\%-${spacing_3}s ",
 2949                },
 2950         '3' => { 'xml' => 'files_count="%d" ',
 2951                  'txt' => '%6d ',
 2952                },
 2953         '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
 2954                  'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
 2955                },
 2956         '5' => { 'xml' => 'blank="%.2f" comment="%.2f" code="%d" ',
 2957                  'txt' => "\%3.2f \%3.2f \%${spacing_2}d",
 2958                },
 2959         '6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
 2960                  'txt' => ' x %6.2f = %14.2f',
 2961                },
 2962     );
 2963     my $Style = "txt";
 2964        $Style = "xml" if $opt_xml ;
 2965        $Style = "xml" if $opt_yaml;  # not a typo; just set to anything but txt
 2966        $Style = "xml" if $opt_json;  # not a typo; just set to anything but txt
 2967        $Style = "xml" if $opt_csv ;  # not a typo; just set to anything but txt
 2968 
 2969     my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
 2970        $hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset)
 2971             if (!$opt_3) and (68 + $column_1_offset) > 79;
 2972     my $data_line  = "";
 2973     my $first_column;
 2974     my $BY_LANGUAGE = 0;
 2975     my $BY_FILE     = 0;
 2976     if      ($report_type eq "by language") {
 2977         $first_column = "Language";
 2978         $BY_LANGUAGE  = 1;
 2979     } elsif ($report_type eq "by file")     {
 2980         $first_column = "File";
 2981         $BY_FILE      = 1;
 2982     } else {
 2983         $first_column = "Report File";
 2984     }
 2985 
 2986     # column headers
 2987     if (!$opt_3 and $BY_FILE) {
 2988         my $spacing_n = $spacing_1 - 11;
 2989         $data_line  = sprintf "%-${spacing_n}s" , $first_column;
 2990     } else {
 2991         $data_line  = sprintf "%-${spacing_1}s ", $first_column;
 2992     }
 2993     if ($BY_FILE) {
 2994         $data_line .= sprintf "%${spacing_2}s"   , ""     ;
 2995     } else {
 2996         $data_line .= sprintf "%${spacing_2}s "  , "files";
 2997     }
 2998     my $PCT_symbol = "";
 2999        $PCT_symbol = " \%" if $opt_by_percent;
 3000     $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
 3001         "blank${PCT_symbol}"         ,
 3002         "comment${PCT_symbol}"       ,
 3003         "code";
 3004 
 3005     if ($Style eq "txt") {
 3006         push @results, $data_line;
 3007         push @results, $hyphen_line;
 3008     }
 3009 
 3010     # sort diff output in descending order of cumulative entries
 3011     foreach my $lang_or_file (sort {
 3012                                 ($rhhh_count->{$b}{'code'}{'added'}    +
 3013                                  $rhhh_count->{$b}{'code'}{'same'}     +
 3014                                  $rhhh_count->{$b}{'code'}{'modified'} +
 3015                                  $rhhh_count->{$b}{'code'}{'removed'}  )  <=>
 3016                                 ($rhhh_count->{$a}{'code'}{'added'}    +
 3017                                  $rhhh_count->{$a}{'code'}{'same'}     +
 3018                                  $rhhh_count->{$a}{'code'}{'modified'} +
 3019                                  $rhhh_count->{$a}{'code'}{'removed'})
 3020                               or $a cmp $b }
 3021                                     keys %{$rhhh_count}) {
 3022 
 3023         if ($BY_FILE) {
 3024             push @results, rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 3025         } else {
 3026             push @results, $lang_or_file;
 3027         }
 3028         foreach my $S (qw(same modified added removed)) {
 3029             my $indent = $spacing_1 - 2;
 3030             my $line .= sprintf " %-${indent}s", $S;
 3031             if ($BY_FILE) {
 3032                 $line .= sprintf "   ";
 3033             } else {
 3034                 $line .= sprintf "  %${spacing_2}s", $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
 3035             }
 3036             if ($opt_by_percent) {
 3037                 my $DEN = compute_denominator($opt_by_percent  ,
 3038                     $rhhh_count->{$lang_or_file}{'code'}{$S}   ,
 3039                     $rhhh_count->{$lang_or_file}{'comment'}{$S},
 3040                     $rhhh_count->{$lang_or_file}{'blank'}{$S}  );
 3041                 if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
 3042                     $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 3043                         $rhhh_count->{$lang_or_file}{'blank'}{$S}   / $DEN * 100,
 3044                         $rhhh_count->{$lang_or_file}{'comment'}{$S} / $DEN * 100,
 3045                         $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
 3046                 } else {
 3047                     $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 3048                         0.0, 0.0, $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
 3049                 }
 3050             } else {
 3051                 $line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
 3052                     $rhhh_count->{$lang_or_file}{'blank'}{$S}   ,
 3053                     $rhhh_count->{$lang_or_file}{'comment'}{$S} ,
 3054                     $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
 3055             }
 3056             push @results, $line;
 3057         }
 3058     }
 3059     push @results, $hyphen_line;
 3060     push @results, "SUM:";
 3061     my $sum_files    = 0;
 3062     my $sum_lines    = 0;
 3063     foreach my $S (qw(same modified added removed)) {
 3064         my $indent = $spacing_1 - 2;
 3065         my $line .= sprintf " %-${indent}s", $S;
 3066             if ($BY_FILE) {
 3067                 $line .= sprintf "   ";
 3068                 $sum_files += 1;
 3069             } else {
 3070                 $line .= sprintf "  %${spacing_2}s", $sum{'nFiles'}{$S};
 3071                 $sum_files += $sum{'nFiles'}{$S};
 3072             }
 3073         if ($opt_by_percent) {
 3074             my $DEN = compute_denominator($opt_by_percent,
 3075                 $sum{'code'}{$S}, $sum{'comment'}{$S}, $sum{'blank'}{$S});
 3076             if ($sum{'code'}{$S} > 0) {
 3077                 $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 3078                     $sum{'blank'}{$S}   / $DEN * 100,
 3079                     $sum{'comment'}{$S} / $DEN * 100,
 3080                     $sum{'code'}{$S}    ;
 3081             } else {
 3082                 $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 3083                     0.0, 0.0, $sum{'code'}{$S}    ;
 3084             }
 3085         } else {
 3086             $line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
 3087                 $sum{'blank'}{$S}   ,
 3088                 $sum{'comment'}{$S} ,
 3089                 $sum{'code'}{$S}    ;
 3090         }
 3091         $sum_lines += $sum{'blank'}{$S} + $sum{'comment'}{$S} + $sum{'code'}{$S};
 3092         push @results, $line;
 3093     }
 3094 
 3095     my $header_line  = sprintf "%s v %s", $URL, $version;
 3096        $header_line .= sprintf("  T=%.2f s (%.1f files/s, %.1f lines/s)",
 3097                         $elapsed_sec           ,
 3098                         $sum_files/$elapsed_sec,
 3099                         $sum_lines/$elapsed_sec) unless $opt_sum_reports or $opt_hide_rate;
 3100     if ($Style eq "txt") {
 3101         unshift @results, output_header($header_line, $hyphen_line, $BY_FILE);
 3102     }
 3103 
 3104     push @results, $hyphen_line;
 3105     write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
 3106     print "<- diff_report\n" if $opt_v > 2;
 3107 
 3108     return @results;
 3109 } # 1}}}
 3110 sub xml_yaml_or_json_header {                # {{{1
 3111     my ($URL, $version, $elapsed_sec, $sum_files, $sum_lines, $by_file) = @_;
 3112     print "-> xml_yaml_or_json_header\n" if $opt_v > 2;
 3113     my $header      = "";
 3114     my $file_rate   = $sum_files/$elapsed_sec;
 3115     my $line_rate   = $sum_lines/$elapsed_sec;
 3116     my $type        = "";
 3117        $type        = "diff_" if $opt_diff;
 3118     my $report_file = "";
 3119     if ($opt_report_file) {
 3120         if ($opt_sum_reports) {
 3121             if ($by_file) {
 3122                 $report_file = "  <report_file>$opt_report_file.file</report_file>"
 3123             } else {
 3124                 $report_file = "  <report_file>$opt_report_file.lang</report_file>"
 3125             }
 3126         } else {
 3127             $report_file = "  <report_file>$opt_report_file</report_file>"
 3128         }
 3129     }
 3130     if ($opt_xml) {
 3131         $header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
 3132         $header .= "\n<?xml-stylesheet type=\"text/xsl\" href=\"" . $opt_xsl . "\"?>" if $opt_xsl;
 3133         $header .= "<${type}results>
 3134 <header>
 3135   <cloc_url>$URL</cloc_url>
 3136   <cloc_version>$version</cloc_version>
 3137   <elapsed_seconds>$elapsed_sec</elapsed_seconds>
 3138   <n_files>$sum_files</n_files>
 3139   <n_lines>$sum_lines</n_lines>
 3140   <files_per_second>$file_rate</files_per_second>
 3141   <lines_per_second>$line_rate</lines_per_second>";
 3142         $header .= "\n$report_file"
 3143             if $opt_report_file;
 3144         $header .= "\n</header>";
 3145     } elsif ($opt_yaml or $opt_json) {
 3146         my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 3147         $header = "${start}${Q}header${Q} : $open_B
 3148   ${Q}cloc_url${Q}           : ${Q}$URL${Q}${C}
 3149   ${Q}cloc_version${Q}       : ${Q}$version${Q}${C}
 3150   ${Q}elapsed_seconds${Q}    : $elapsed_sec${C}
 3151   ${Q}n_files${Q}            : $sum_files${C}
 3152   ${Q}n_lines${Q}            : $sum_lines${C}
 3153   ${Q}files_per_second${Q}   : $file_rate${C}
 3154   ${Q}lines_per_second${Q}   : $line_rate";
 3155         if ($opt_report_file) {
 3156             if ($opt_sum_reports) {
 3157                 if ($by_file) {
 3158                     $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$opt_report_file.file${Q}"
 3159                 } else {
 3160                     $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$opt_report_file.lang${Q}"
 3161                 }
 3162             } else {
 3163                 $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$opt_report_file${Q}";
 3164             }
 3165         }
 3166         $header .= "${close_B}${C}";
 3167     }
 3168     print "<- xml_yaml_or_json_header\n" if $opt_v > 2;
 3169     return $header;
 3170 } # 1}}}
 3171 sub diff_yaml_report {                       # {{{1
 3172     # returns an array of lines containing the results
 3173     my ($version    , # in
 3174         $elapsed_sec, # in
 3175         $report_type, # in  "by language" | "by report file" | "by file"
 3176         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 3177         $rh_scale   , # in
 3178        ) = @_;
 3179     print "-> diff_yaml_report\n" if $opt_v > 2;
 3180     $elapsed_sec = 0.5 unless $elapsed_sec;
 3181     my @results       = ();
 3182     my %sum           = ();
 3183     my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
 3184         diff_header_sum($report_type, $rhhh_count, \%sum);
 3185 
 3186     if (!$ALREADY_SHOWED_HEADER) {
 3187         push @results,
 3188               xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 3189                                  $sum_files, $sum_lines, $BY_FILE);
 3190         $ALREADY_SHOWED_HEADER = 1;
 3191     }
 3192     foreach my $S (qw(added same modified removed)) {
 3193         push @results, "$S :";
 3194         foreach my $F_or_L (keys %{$rhhh_count}) {
 3195             # force quoted language or filename in case these
 3196             # have embedded funny characters, issue #312
 3197             push @results, "  '" . rm_leading_tempdir($F_or_L, \%TEMP_DIR) . "' :";
 3198             foreach my $k (keys %{$rhhh_count->{$F_or_L}}) {
 3199                 next if $k eq "lang"; # present only in those cases
 3200                                       # where code exists for action $S
 3201                 $rhhh_count->{$F_or_L}{$k}{$S} = 0 unless
 3202                     defined $rhhh_count->{$F_or_L}{$k}{$S};
 3203                 push @results,
 3204                     "    $k : $rhhh_count->{$F_or_L}{$k}{$S}";
 3205             }
 3206         }
 3207     }
 3208 
 3209     push @results, "SUM :";
 3210     foreach my $S (qw(added same modified removed)) {
 3211         push @results, "  $S :";
 3212         foreach my $topic (keys %sum) {
 3213             push @results, "    $topic : $sum{$topic}{$S}";
 3214         }
 3215     }
 3216 
 3217     print "<- diff_yaml_report\n" if $opt_v > 2;
 3218 
 3219     return @results;
 3220 } # 1}}}
 3221 sub diff_json_report {                       # {{{1
 3222     # returns an array of lines containing the results
 3223     my ($version    , # in
 3224         $elapsed_sec, # in
 3225         $report_type, # in  "by language" | "by report file" | "by file"
 3226         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 3227         $rh_scale   , # in
 3228        ) = @_;
 3229     print "-> diff_json_report\n" if $opt_v > 2;
 3230     $elapsed_sec = 0.5 unless $elapsed_sec;
 3231     my @results       = ();
 3232     my %sum           = ();
 3233     my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
 3234         diff_header_sum($report_type, $rhhh_count, \%sum);
 3235 
 3236     if (!$ALREADY_SHOWED_HEADER) {
 3237         push @results,
 3238               xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 3239                                  $sum_files, $sum_lines, $BY_FILE);
 3240         $ALREADY_SHOWED_HEADER = 1;
 3241     }
 3242     foreach my $S (qw(added same modified removed)) {
 3243         push @results, " \"$S\" : {";
 3244         foreach my $F_or_L (keys %{$rhhh_count}) {
 3245             push @results, "  \"" . rm_leading_tempdir($F_or_L, \%TEMP_DIR) . "\" : {";
 3246             foreach my $k (keys %{$rhhh_count->{$F_or_L}}) {
 3247                 next if $k eq "lang"; # present only in those cases
 3248                                       # where code exists for action $S
 3249                 $rhhh_count->{$F_or_L}{$k}{$S} = 0 unless
 3250                     defined $rhhh_count->{$F_or_L}{$k}{$S};
 3251                 push @results,
 3252                     "    \"$k\" : $rhhh_count->{$F_or_L}{$k}{$S},";
 3253             }
 3254             $results[-1] =~ s/,\s*$//;
 3255             push @results, "  },"
 3256         }
 3257         $results[-1] =~ s/,\s*$//;
 3258         push @results, "  },"
 3259     }
 3260 
 3261     push @results, "  \"SUM\" : {";
 3262     foreach my $S (qw(added same modified removed)) {
 3263         push @results, "  \"$S\" : {";
 3264         foreach my $topic (keys %sum) {
 3265             push @results, "    \"$topic\" : $sum{$topic}{$S},";
 3266         }
 3267         $results[-1] =~ s/,\s*$//;
 3268         push @results, "},";
 3269     }
 3270 
 3271     $results[-1] =~ s/,\s*$//;
 3272     push @results, "} }";
 3273     print "<- diff_json_report\n" if $opt_v > 2;
 3274     return @results;
 3275 } # 1}}}
 3276 sub diff_header_sum {                        # {{{1
 3277     my ($report_type, # in  "by language" | "by report file" | "by file"
 3278         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 3279         $rhh_sum    , # out sum{nFiles|blank|comment|code}{same|modified|added|removed}
 3280        ) = @_;
 3281 
 3282     my $sum_files = 0;
 3283     my $sum_lines = 0;
 3284     foreach my $language (keys %{$rhhh_count}) {
 3285         foreach my $V (qw(nFiles blank comment code)) {
 3286             foreach my $S (qw(added same modified removed)) {
 3287                 $rhhh_count->{$language}{$V}{$S} = 0 unless
 3288                     defined $rhhh_count->{$language}{$V}{$S};
 3289                 $rhh_sum->{$V}{$S}  += $rhhh_count->{$language}{$V}{$S};
 3290                 if ($V eq "nFiles") {
 3291                     $sum_files += $rhhh_count->{$language}{$V}{$S};
 3292                 } else {
 3293                     $sum_lines += $rhhh_count->{$language}{$V}{$S};
 3294                 }
 3295             }
 3296         }
 3297     }
 3298 
 3299     my $BY_LANGUAGE = 0;
 3300     my $BY_FILE     = 0;
 3301     if      ($report_type eq "by language") {
 3302         $BY_LANGUAGE  = 1;
 3303     } elsif ($report_type eq "by file")     {
 3304         $BY_FILE      = 1;
 3305     }
 3306     return $sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE;
 3307 } # 1}}}
 3308 sub diff_xml_report {                        # {{{1
 3309     # returns an array of lines containing the results
 3310     my ($version    , # in
 3311         $elapsed_sec, # in
 3312         $report_type, # in  "by language" | "by report file" | "by file"
 3313         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 3314         $rh_scale   , # in
 3315        ) = @_;
 3316     print "-> diff_xml_report\n" if $opt_v > 2;
 3317     my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 3318 
 3319 #print "diff_report: ", Dumper($rhhh_count), "\n";
 3320     $elapsed_sec = 0.5 unless $elapsed_sec;
 3321     my @results       = ();
 3322     my %sum           = ();
 3323     my $languages     = ();
 3324 
 3325     my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
 3326         diff_header_sum($report_type, $rhhh_count, \%sum);
 3327 
 3328     my $data_line   = "";
 3329 
 3330     if (!$ALREADY_SHOWED_HEADER) {
 3331         push @results,
 3332               xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 3333                                  $sum_files, $sum_lines, $BY_FILE);
 3334         $ALREADY_SHOWED_HEADER = 1;
 3335     }
 3336 
 3337     foreach my $S (qw(same modified added removed)) {
 3338         push @results, "  <$S>";
 3339         foreach my $lang_or_file (sort {
 3340                                      $rhhh_count->{$b}{'code'} <=>
 3341                                      $rhhh_count->{$a}{'code'}
 3342                                    }
 3343                               keys %{$rhhh_count}) {
 3344             my $L = "";
 3345 
 3346             if ($BY_FILE) {
 3347                 $L .= sprintf "    <file name=\"%s\" files_count=\"1\" ",
 3348                     xml_metachars(
 3349                         rm_leading_tempdir($lang_or_file, \%TEMP_DIR));
 3350             } else {
 3351                 $L .= sprintf "    <language name=\"%s\" files_count=\"%d\" ",
 3352                         $lang_or_file ,
 3353                         $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
 3354             }
 3355             if ($opt_by_percent) {
 3356               my $DEN = compute_denominator($opt_by_percent            ,
 3357                             $rhhh_count->{$lang_or_file}{'code'}{$S}   ,
 3358                             $rhhh_count->{$lang_or_file}{'comment'}{$S},
 3359                             $rhhh_count->{$lang_or_file}{'blank'}{$S}  );
 3360               foreach my $T (qw(blank comment)) {
 3361                   if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
 3362                     $L .= sprintf "%s=\"%.2f\" ",
 3363                             $T, $rhhh_count->{$lang_or_file}{$T}{$S} / $DEN * 100;
 3364                   } else {
 3365                     $L .= sprintf "%s=\"0.0\" ", $T;
 3366                   }
 3367               }
 3368               foreach my $T (qw(code)) {
 3369                   $L .= sprintf "%s=\"%d\" ",
 3370                           $T, $rhhh_count->{$lang_or_file}{$T}{$S};
 3371               }
 3372             } else {
 3373               foreach my $T (qw(blank comment code)) {
 3374                   $L .= sprintf "%s=\"%d\" ",
 3375                           $T, $rhhh_count->{$lang_or_file}{$T}{$S};
 3376               }
 3377             }
 3378             push @results, $L . "/>";
 3379         }
 3380 
 3381 
 3382         my $L = sprintf "    <total sum_files=\"%d\" ", $sum{'nFiles'}{$S};
 3383         if ($opt_by_percent) {
 3384           my $DEN = compute_denominator($opt_by_percent,
 3385                         $sum{'code'}{$S}   ,
 3386                         $sum{'comment'}{$S},
 3387                         $sum{'blank'}{$S}  );
 3388           foreach my $V (qw(blank comment)) {
 3389               if ($sum{'code'}{$S} > 0) {
 3390                   $L .= sprintf "%s=\"%.2f\" ", $V, $sum{$V}{$S} / $DEN * 100;
 3391               } else {
 3392                   $L .= sprintf "%s=\"0.0\" ", $V;
 3393               }
 3394           }
 3395           foreach my $V (qw(code)) {
 3396               $L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
 3397           }
 3398         } else {
 3399           foreach my $V (qw(blank comment code)) {
 3400               $L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
 3401           }
 3402         }
 3403         push @results, $L . "/>";
 3404         push @results, "  </$S>";
 3405     }
 3406 
 3407     push @results, "</diff_results>";
 3408     write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
 3409     print "<- diff_xml_report\n" if $opt_v > 2;
 3410     return @results;
 3411 } # 1}}}
 3412 sub diff_csv_report {                        # {{{1
 3413     # returns an array of lines containing the results
 3414     my ($version    , # in
 3415         $elapsed_sec, # in
 3416         $report_type, # in  "by language" | "by report file" | "by file"
 3417         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 3418         $rh_scale   , # in  unused
 3419        ) = @_;
 3420     print "-> diff_csv_report\n" if $opt_v > 2;
 3421 
 3422     my @results       = ();
 3423     my $languages     = ();
 3424 
 3425     my $data_line   = "";
 3426     my $BY_LANGUAGE = 0;
 3427     my $BY_FILE     = 0;
 3428     if      ($report_type eq "by language") {
 3429         $BY_LANGUAGE  = 1;
 3430     } elsif ($report_type eq "by file")     {
 3431         $BY_FILE      = 1;
 3432     }
 3433     my $DELIM = ",";
 3434        $DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
 3435        $DELIM = "|" if defined $opt_md;
 3436 
 3437     $elapsed_sec = 0.5 unless $elapsed_sec;
 3438 
 3439     my $line = "Language${DELIM} ";
 3440        $line = "File${DELIM} " if $BY_FILE;
 3441     foreach my $item (qw(files blank comment code)) {
 3442         next if $BY_FILE and $item eq 'files';
 3443         foreach my $symbol ( '==', '!=', '+', '-', ) {
 3444             $line .= "$symbol $item${DELIM} ";
 3445         }
 3446     }
 3447 
 3448     my $T_elapsed_sec = "T=$elapsed_sec s";
 3449        $T_elapsed_sec = "" if $opt_hide_rate;
 3450 
 3451     if ($opt_md) {
 3452         push @results, "cloc|$URL v $version $T_elapsed_sec";
 3453         push @results, "--- | ---";
 3454         push @results, "";
 3455         push @results, $line;
 3456         my @col_header  = ();
 3457         push @col_header, ":-------";
 3458         foreach (1..16) {
 3459             push @col_header, "-------:";
 3460         }
 3461         push @results, join("|", @col_header) . "|";
 3462     } else {
 3463         $line .= "\"$URL v $version $T_elapsed_sec\"";
 3464         push @results, $line;
 3465     }
 3466 
 3467     foreach my $lang_or_file (keys %{$rhhh_count}) {
 3468         $rhhh_count->{$lang_or_file}{'code'}{'added'} = 0 unless
 3469             defined $rhhh_count->{$lang_or_file}{'code'};
 3470     }
 3471     foreach my $lang_or_file (sort {
 3472                                  $rhhh_count->{$b}{'code'} <=>
 3473                                  $rhhh_count->{$a}{'code'}
 3474                                }
 3475                           keys %{$rhhh_count}) {
 3476         if ($BY_FILE) {
 3477             $line = rm_leading_tempdir($lang_or_file, \%TEMP_DIR) . "$DELIM ";
 3478         } else {
 3479             $line = $lang_or_file . "${DELIM} ";
 3480         }
 3481         if ($opt_by_percent) {
 3482           foreach my $item (qw(nFiles)) {
 3483               next if $BY_FILE and $item eq 'nFiles';
 3484               foreach my $symbol (qw(same modified added removed)) {
 3485                   if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
 3486                       $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
 3487                   } else {
 3488                       $line .= "0${DELIM} ";
 3489                   }
 3490               }
 3491           }
 3492           foreach my $item (qw(blank comment)) {
 3493               foreach my $symbol (qw(same modified added removed)) {
 3494                   if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol} and
 3495                       defined $rhhh_count->{$lang_or_file}{'code'}{$symbol} and
 3496                       $rhhh_count->{$lang_or_file}{'code'}{$symbol} > 0) {
 3497                       $line .= sprintf("%.2f", $rhhh_count->{$lang_or_file}{$item}{$symbol} / $rhhh_count->{$lang_or_file}{'code'}{$symbol} * 100).${DELIM};
 3498                   } else {
 3499                       $line .= "0.00${DELIM} ";
 3500                   }
 3501               }
 3502           }
 3503           foreach my $item (qw(code)) {
 3504               foreach my $symbol (qw(same modified added removed)) {
 3505                   if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
 3506                       $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
 3507                   } else {
 3508                       $line .= "0${DELIM} ";
 3509                   }
 3510               }
 3511           }
 3512         } else {
 3513           foreach my $item (qw(nFiles blank comment code)) {
 3514               next if $BY_FILE and $item eq 'nFiles';
 3515               foreach my $symbol (qw(same modified added removed)) {
 3516                   if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
 3517                       $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
 3518                   } else {
 3519                       $line .= "0${DELIM} ";
 3520                   }
 3521               }
 3522           }
 3523         }
 3524         push @results, $line;
 3525     }
 3526 
 3527     print "<- diff_csv_report\n" if $opt_v > 2;
 3528     return @results;
 3529 } # 1}}}
 3530 sub rm_leading_tempdir {                     # {{{1
 3531     my ($in_file, $rh_temp_dirs, ) = @_;
 3532     my $clean_filename = $in_file;
 3533     foreach my $temp_d (keys %{$rh_temp_dirs}) {
 3534         if ($ON_WINDOWS) {
 3535         # \ -> / necessary to allow the next if test's
 3536         # m{} to work in the presence of spaces in file names
 3537             $temp_d         =~ s{\\}{/}g;
 3538             $clean_filename =~ s{\\}{/}g;
 3539         }
 3540         if ($clean_filename =~ m{^$temp_d/}) {
 3541             $clean_filename =~ s{^$temp_d/}{};
 3542             last;
 3543         }
 3544     }
 3545     if ($ON_WINDOWS and $opt_by_file) { # then go back from / to \
 3546         if ($opt_json) {
 3547             $clean_filename =~ s{/}{\\\\}g;
 3548         } else {
 3549             $clean_filename =~ s{/}{\\}g;
 3550         }
 3551     }
 3552     return $clean_filename;
 3553 } # 1}}}
 3554 sub generate_sql    {                        # {{{1
 3555     my ($elapsed_sec, # in
 3556         $rhh_count  , # in  count{TYPE}{lang|code|blank|comment|scaled}
 3557         $rh_scale   , # in
 3558        ) = @_;
 3559     print "-> generate_sql\n" if $opt_v > 2;
 3560 
 3561 #print "generate_sql A [$opt_sql_project]\n";
 3562     $opt_sql_project = cwd() unless defined $opt_sql_project;
 3563     $opt_sql_project = '' unless defined $opt_sql_project; # have seen cwd() fail
 3564 #print "generate_sql B [$opt_sql_project]\n";
 3565     $opt_sql_project =~ s{/}{\\}g if $ON_WINDOWS;
 3566 #print "generate_sql C [$opt_sql_project]\n";
 3567 
 3568     my $schema = undef;
 3569     if ($opt_sql_style eq "oracle") {
 3570         $schema = "
 3571 CREATE TABLE metadata
 3572 (
 3573   timestamp   TIMESTAMP,
 3574   project     VARCHAR2(500 CHAR),
 3575   elapsed_s   NUMBER(10, 6)
 3576 )
 3577 /
 3578 
 3579 CREATE TABLE t
 3580 (
 3581   project        VARCHAR2(500 CHAR),
 3582   language       VARCHAR2(500 CHAR),
 3583   file_fullname  VARCHAR2(500 CHAR),
 3584   file_dirname   VARCHAR2(500 CHAR),
 3585   file_basename  VARCHAR2(500 CHAR),
 3586   nblank         INTEGER,
 3587   ncomment       INTEGER,
 3588   ncode          INTEGER,
 3589   nscaled        NUMBER(10, 6)
 3590 )
 3591 /
 3592 
 3593 ";
 3594     } else {
 3595         $schema = "
 3596 create table metadata (          -- $URL v $VERSION
 3597                 timestamp varchar(500),
 3598                 Project   varchar(500),
 3599                 elapsed_s real);
 3600 create table t        (
 3601                 Project       varchar(500)   ,
 3602                 Language      varchar(500)   ,
 3603                 File          varchar(500)   ,
 3604                 File_dirname  varchar(500)   ,
 3605                 File_basename varchar(500)   ,
 3606                 nBlank        integer        ,
 3607                 nComment      integer        ,
 3608                 nCode         integer        ,
 3609                 nScaled       real           );
 3610 ";
 3611     }
 3612     $opt_sql = "-" if $opt_sql eq "1";
 3613 
 3614     my $open_mode = ">";
 3615        $open_mode = ">>" if $opt_sql_append;
 3616 
 3617     my $fh = new IO::File; # $opt_sql, "w";
 3618     if (!$fh->open("${open_mode}${opt_sql}")) {
 3619         die "Unable to write to $opt_sql  $!\n";
 3620     }
 3621     print $fh $schema unless defined $opt_sql_append;
 3622 
 3623     my $insert_into_t = "insert into t ";
 3624     if ($opt_sql_style eq "oracle") {
 3625         printf $fh "insert into metadata values(TO_TIMESTAMP('%s','yyyy-mm-dd hh24:mi:ss'), '%s', %f);\n",
 3626                     strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
 3627                     $opt_sql_project, $elapsed_sec;
 3628     } elsif ($opt_sql_style eq "named_columns") {
 3629         print $fh "begin transaction;\n";
 3630         $insert_into_t .= "( Project, Language, File, File_dirname, File_basename, nBlank, nComment, nCode, nScaled )";
 3631     } else {
 3632         print $fh "begin transaction;\n";
 3633         printf $fh "insert into metadata values('%s', '%s', %f);\n",
 3634                     strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
 3635                     $opt_sql_project, $elapsed_sec;
 3636     }
 3637 
 3638     my $nIns = 0;
 3639     foreach my $file (keys %{$rhh_count}) {
 3640         my $language = $rhh_count->{$file}{'lang'};
 3641         my $clean_filename = $file;
 3642         # If necessary (that is, if the input contained an
 3643         # archive file [.tar.gz, etc]), strip the temporary
 3644         # directory name which was used to expand the archive
 3645         # from the file name.
 3646 
 3647         $clean_filename = rm_leading_tempdir($clean_filename, \%TEMP_DIR);
 3648         $clean_filename =~ s/\'/''/g;  # double embedded single quotes
 3649                                        # to escape them
 3650 
 3651         printf $fh "$insert_into_t values('%s', '%s', '%s', '%s', '%s', " .
 3652                    "%d, %d, %d, %f);\n",
 3653                     $opt_sql_project           ,
 3654                     $language                  ,
 3655                     $clean_filename            ,
 3656                     dirname( $clean_filename)  ,
 3657                     basename($clean_filename)  ,
 3658                     $rhh_count->{$file}{'blank'},
 3659                     $rhh_count->{$file}{'comment'},
 3660                     $rhh_count->{$file}{'code'}   ,
 3661                     $rhh_count->{$file}{'code'}*$rh_scale->{$language};
 3662 
 3663         ++$nIns;
 3664         if (!($nIns % 10_000) and ($opt_sql_style ne "oracle")) {
 3665             print $fh "commit;\n";
 3666             print $fh "begin transaction;\n";
 3667         }
 3668     }
 3669     if ($opt_sql_style ne "oracle") {
 3670         print $fh "commit;\n";
 3671     }
 3672 
 3673     $fh->close unless $opt_sql eq "-"; # don't try to close STDOUT
 3674     print "<- generate_sql\n" if $opt_v > 2;
 3675 
 3676     # sample query:
 3677     #
 3678     #   select project, language,
 3679     #          sum(nCode)     as Code,
 3680     #          sum(nComment)  as Comments,
 3681     #          sum(nBlank)    as Blank,
 3682     #          sum(nCode)+sum(nComment)+sum(nBlank) as All_Lines,
 3683     #          100.0*sum(nComment)/(sum(nCode)+sum(nComment)) as Comment_Pct
 3684     #          from t group by Project, Language order by Project, Code desc;
 3685     #
 3686 } # 1}}}
 3687 sub output_header   {                        # {{{1
 3688     my ($header_line,
 3689         $hyphen_line,
 3690         $BY_FILE    ,)    = @_;
 3691     print "-> output_header\n" if $opt_v > 2;
 3692     my @R = ();
 3693     if      ($opt_xml) {
 3694         if (!$ALREADY_SHOWED_XML_SECTION) {
 3695             push @R, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
 3696             push @R, '<?xml-stylesheet type="text/xsl" href="' .
 3697                             $opt_xsl . '"?>' if $opt_xsl;
 3698             push @R, "<results>";
 3699             push @R, "<header>$header_line</header>";
 3700             $ALREADY_SHOWED_XML_SECTION = 1;
 3701         }
 3702         if ($BY_FILE) {
 3703             push @R, "<files>";
 3704         } else {
 3705             push @R, "<languages>";
 3706         }
 3707     } elsif ($opt_yaml) {
 3708         push @R, "---\n# $header_line";
 3709     } elsif ($opt_csv or $opt_md) {
 3710         # append the header to the end of the column headers
 3711         # to keep the output a bit cleaner from a spreadsheet
 3712         # perspective
 3713     } else {
 3714         if ($ALREADY_SHOWED_HEADER) {
 3715             push @R, "";
 3716         } else {
 3717             push @R, $header_line;
 3718             $ALREADY_SHOWED_HEADER = 1;
 3719         }
 3720         push @R, $hyphen_line;
 3721     }
 3722     print "<- output_header\n" if $opt_v > 2;
 3723     return @R;
 3724 } # 1}}}
 3725 sub generate_report {                        # {{{1
 3726     # returns an array of lines containing the results
 3727     my ($version    , # in
 3728         $elapsed_sec, # in
 3729         $report_type, # in  "by language" | "by report file" | "by file"
 3730         $rhh_count  , # in  count{TYPE}{nFiles|code|blank|comment|scaled}
 3731         $rh_scale   , # in
 3732        ) = @_;
 3733 
 3734     print "-> generate_report\n" if $opt_v > 2;
 3735     my $DELIM = ",";
 3736        $DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
 3737        $DELIM = "|" if defined $opt_md;
 3738 
 3739     my @results       = ();
 3740 
 3741     my $languages     = ();
 3742 
 3743     my $sum_files     = 0;
 3744     my $sum_code      = 0;
 3745     my $sum_blank     = 0;
 3746     my $sum_comment   = 0;
 3747     my $max_len       = 0;
 3748     foreach my $language (keys %{$rhh_count}) {
 3749         $sum_files   += $rhh_count->{$language}{'nFiles'} ;
 3750         $sum_blank   += $rhh_count->{$language}{'blank'}  ;
 3751         $sum_comment += $rhh_count->{$language}{'comment'};
 3752         $sum_code    += $rhh_count->{$language}{'code'}   ;
 3753         $max_len      = length($language) if length($language) > $max_len;
 3754     }
 3755     my $column_1_offset = 0;
 3756        $column_1_offset = $max_len - 17 if $max_len > 17;
 3757     my $sum_lines = $sum_blank + $sum_comment + $sum_code;
 3758     $elapsed_sec = 0.5 unless $elapsed_sec;
 3759 
 3760     my $spacing_0 = 23;
 3761     my $spacing_1 = 13;
 3762     my $spacing_2 =  9;
 3763     my $spacing_3 = 17;
 3764     if (!$opt_3) {
 3765         $spacing_1 = 19;
 3766         $spacing_2 = 14;
 3767         $spacing_3 = 27;
 3768     }
 3769     $spacing_0 += $column_1_offset;
 3770     $spacing_1 += $column_1_offset;
 3771     $spacing_3 += $column_1_offset;
 3772     my %Format = (
 3773         '1' => { 'xml' => 'name="%s" ',
 3774                  'txt' => "\%-${spacing_0}s ",
 3775                },
 3776         '2' => { 'xml' => 'name="%s" ',
 3777                  'txt' => "\%-${spacing_3}s ",
 3778                },
 3779         '3' => { 'xml' => 'files_count="%d" ',
 3780                  'txt' => '%6d ',
 3781                },
 3782         '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
 3783                  'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
 3784                },
 3785         '5' => { 'xml' => 'blank="%3.2f" comment="%3.2f" code="%d" ',
 3786                  'txt' => "\%14.2f \%14.2f \%${spacing_2}d",
 3787                },
 3788         '6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
 3789                  'txt' => ' x %6.2f = %14.2f',
 3790                },
 3791     );
 3792     my $Style = "txt";
 3793        $Style = "xml" if $opt_xml ;
 3794        $Style = "xml" if $opt_yaml;  # not a typo; just set to anything but txt
 3795        $Style = "xml" if $opt_json;  # not a typo; just set to anything but txt
 3796        $Style = "xml" if $opt_csv ;  # not a typo; just set to anything but txt
 3797 
 3798     my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
 3799        $hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset)
 3800             if (!$opt_sum_reports) and (!$opt_3) and (68 + $column_1_offset) > 79;
 3801     my $data_line  = "";
 3802     my $first_column;
 3803     my $BY_LANGUAGE = 0;
 3804     my $BY_FILE     = 0;
 3805     if      ($report_type eq "by language") {
 3806         $first_column = "Language";
 3807         $BY_LANGUAGE  = 1;
 3808     } elsif ($report_type eq "by file")     {
 3809         $first_column = "File";
 3810         $BY_FILE      = 1;
 3811     } elsif ($report_type eq "by report file")     {
 3812         $first_column = "File";
 3813     } else {
 3814         $first_column = "Report File";
 3815     }
 3816 
 3817     my $header_line  = sprintf "%s v %s", $URL, $version;
 3818        $header_line .= sprintf("  T=%.2f s (%.1f files/s, %.1f lines/s)",
 3819                         $elapsed_sec           ,
 3820                         $sum_files/$elapsed_sec,
 3821                         $sum_lines/$elapsed_sec) unless $opt_sum_reports or $opt_hide_rate;
 3822     if ($opt_xml or $opt_yaml or $opt_json) {
 3823         if (!$ALREADY_SHOWED_HEADER) {
 3824             if ($opt_by_file_by_lang and $opt_json) {
 3825                 push @results, '{ "by_file" : ';
 3826             }
 3827             push @results, xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 3828                                                    $sum_files, $sum_lines, $BY_FILE);
 3829 #           $ALREADY_SHOWED_HEADER = 1 unless $opt_sum_reports;
 3830             # --sum-reports yields two xml or yaml files, one by
 3831             # language and one by report file, each of which needs a header
 3832         }
 3833         if ($opt_xml) {
 3834             if ($BY_FILE or ($report_type eq "by report file")) {
 3835                 push @results, "<files>";
 3836             } else {
 3837                 push @results, "<languages>";
 3838             }
 3839         }
 3840     } else {
 3841         push @results, output_header($header_line, $hyphen_line, $BY_FILE);
 3842     }
 3843 
 3844     if ($Style eq "txt") {
 3845         # column headers
 3846         if (!$opt_3 and $BY_FILE) {
 3847             my $spacing_n = $spacing_1 - 11;
 3848             $data_line  = sprintf "%-${spacing_n}s ", $first_column;
 3849         } else {
 3850             $data_line  = sprintf "%-${spacing_1}s ", $first_column;
 3851         }
 3852         if ($BY_FILE) {
 3853             $data_line .= sprintf "%${spacing_2}s "  , " "    ;
 3854         } else {
 3855             $data_line .= sprintf "%${spacing_2}s "  , "files";
 3856         }
 3857         my $PCT_symbol = "";
 3858            $PCT_symbol = " \%" if $opt_by_percent;
 3859         $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
 3860             "blank${PCT_symbol}"   ,
 3861             "comment${PCT_symbol}" ,
 3862             "code";
 3863         $data_line .= sprintf " %8s   %14s",
 3864             "scale"         ,
 3865             "3rd gen. equiv"
 3866               if $opt_3;
 3867         if ($opt_md) {
 3868             my @col_header  = ();
 3869             if ($data_line =~ m{\s%}) {
 3870                 $data_line =~ s{\s%}{_%}g;
 3871                 foreach my $w ( split(' ', $data_line) ) {
 3872                     $w =~ s{_%}{ %};
 3873                     push @col_header, $w;
 3874                 }
 3875             } else {
 3876                 push @col_header, split(' ', $data_line);
 3877             }
 3878             my @col_hyphens    = ( '-------:') x scalar(@col_header);
 3879                $col_hyphens[0] =   ':-------'; # first column left justified
 3880             push @results, join("|", @col_header );
 3881             push @results, join("|", @col_hyphens);
 3882         } else {
 3883             push @results, $data_line;
 3884             push @results, $hyphen_line;
 3885         }
 3886     }
 3887 
 3888     if ($opt_csv)  {
 3889         my $header2;
 3890         if ($BY_FILE) {
 3891             $header2 = "language${DELIM}filename";
 3892         } else {
 3893             $header2 = "files${DELIM}language";
 3894         }
 3895         $header2 .= "${DELIM}blank${DELIM}comment${DELIM}code";
 3896         $header2 .= "${DELIM}scale${DELIM}3rd gen. equiv" if $opt_3;
 3897         $header2 .= ${DELIM} . '"' . $header_line . '"';
 3898         push @results, $header2;
 3899     }
 3900 
 3901     my $sum_scaled = 0;
 3902     foreach my $lang_or_file (sort {
 3903                                  $rhh_count->{$b}{'code'} <=>
 3904                                  $rhh_count->{$a}{'code'}
 3905                               or $a cmp $b
 3906                                         }
 3907                                    keys %{$rhh_count}) {
 3908         next if $lang_or_file eq "by report file";
 3909         my ($factor, $scaled);
 3910         if ($BY_LANGUAGE or $BY_FILE) {
 3911             $factor = 1;
 3912             if ($BY_LANGUAGE) {
 3913                 if (defined $rh_scale->{$lang_or_file}) {
 3914                     $factor = $rh_scale->{$lang_or_file};
 3915                 } else {
 3916                     warn "No scale factor for $lang_or_file; using 1.00";
 3917                 }
 3918             } else { # by individual code file
 3919                 if ($report_type ne "by report file") {
 3920                     next unless defined $rhh_count->{$lang_or_file}{'lang'};
 3921                     next unless defined $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
 3922                     $factor = $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
 3923                 }
 3924             }
 3925             $scaled = $factor*$rhh_count->{$lang_or_file}{'code'};
 3926         } else {
 3927             if (!defined $rhh_count->{$lang_or_file}{'scaled'}) {
 3928                 $opt_3 = 0;
 3929                 # If we're summing together files previously generated
 3930                 # with --no3 then rhh_count->{$lang_or_file}{'scaled'}
 3931                 # this variable will be undefined.  That should only
 3932                 # happen when summing together by file however.
 3933             } elsif ($BY_LANGUAGE) {
 3934                 warn "Missing scaled language info for $lang_or_file\n";
 3935             }
 3936             if ($opt_3) {
 3937                 $scaled =         $rhh_count->{$lang_or_file}{'scaled'};
 3938                 $factor = $scaled/$rhh_count->{$lang_or_file}{'code'};
 3939             }
 3940         }
 3941 
 3942         if ($BY_FILE) {
 3943             my $clean_filename = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 3944                $clean_filename = xml_metachars($clean_filename) if $opt_xml;
 3945             $data_line  = sprintf $Format{'1'}{$Style}, $clean_filename;
 3946         } else {
 3947             $data_line  = sprintf $Format{'2'}{$Style}, $lang_or_file;
 3948         }
 3949         $data_line .= sprintf $Format{3}{$Style}  ,
 3950                         $rhh_count->{$lang_or_file}{'nFiles'} unless $BY_FILE;
 3951         if ($opt_by_percent) {
 3952           my $DEN = compute_denominator($opt_by_percent       ,
 3953                         $rhh_count->{$lang_or_file}{'code'}   ,
 3954                         $rhh_count->{$lang_or_file}{'comment'},
 3955                         $rhh_count->{$lang_or_file}{'blank'}  );
 3956           $data_line .= sprintf $Format{5}{$Style}  ,
 3957               $rhh_count->{$lang_or_file}{'blank'}   / $DEN * 100,
 3958               $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100,
 3959               $rhh_count->{$lang_or_file}{'code'}   ;
 3960         } else {
 3961           $data_line .= sprintf $Format{4}{$Style}  ,
 3962               $rhh_count->{$lang_or_file}{'blank'}  ,
 3963               $rhh_count->{$lang_or_file}{'comment'},
 3964               $rhh_count->{$lang_or_file}{'code'}   ;
 3965         }
 3966         $data_line .= sprintf $Format{6}{$Style}  ,
 3967             $factor                               ,
 3968             $scaled if $opt_3;
 3969         $sum_scaled  += $scaled if $opt_3;
 3970 
 3971         if ($opt_xml) {
 3972             if (defined $rhh_count->{$lang_or_file}{'lang'}) {
 3973                 my $lang = $rhh_count->{$lang_or_file}{'lang'};
 3974                 if (!defined $languages->{$lang}) {
 3975                     $languages->{$lang} = $lang;
 3976                 }
 3977                 $data_line.=' language="' . $lang . '" ';
 3978             }
 3979             if ($BY_FILE or ($report_type eq "by report file")) {
 3980                 push @results, "  <file " . $data_line . "/>";
 3981             } else {
 3982                 push @results, "  <language " . $data_line . "/>";
 3983             }
 3984         } elsif ($opt_yaml or $opt_json) {
 3985             my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 3986             if ($opt_yaml) {
 3987                 # YAML: force quoted language or filename in case these
 3988                 #       have embedded funny characters, issue #312
 3989                 push @results,"'" . rm_leading_tempdir($lang_or_file, \%TEMP_DIR). "' :$open_B";
 3990             } else {
 3991                 push @results,"${Q}" . rm_leading_tempdir($lang_or_file, \%TEMP_DIR). "${Q} :$open_B";
 3992             }
 3993             push @results,"  ${Q}nFiles${Q}: " . $rhh_count->{$lang_or_file}{'nFiles'} . $C
 3994                 unless $BY_FILE;
 3995             if ($opt_by_percent) {
 3996               my $DEN = compute_denominator($opt_by_percent       ,
 3997                             $rhh_count->{$lang_or_file}{'code'}   ,
 3998                             $rhh_count->{$lang_or_file}{'comment'},
 3999                             $rhh_count->{$lang_or_file}{'blank'}  );
 4000               push @results,"  ${Q}blank_pct${Q}: "   .
 4001                 sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'} / $DEN * 100) . $C;
 4002               push @results,"  ${Q}comment_pct${Q}: " .
 4003                 sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100) . $C;
 4004               push @results,"  ${Q}code${Q}: "    . $rhh_count->{$lang_or_file}{'code'}  . $C;
 4005             } else {
 4006               push @results,"  ${Q}blank${Q}: "   . $rhh_count->{$lang_or_file}{'blank'}   . $C;
 4007               push @results,"  ${Q}comment${Q}: " . $rhh_count->{$lang_or_file}{'comment'} . $C;
 4008               push @results,"  ${Q}code${Q}: "    . $rhh_count->{$lang_or_file}{'code'}    . $C;
 4009             }
 4010             push @results,"  ${Q}language${Q}: "  . $Q . $rhh_count->{$lang_or_file}{'lang'} . $Q . $C
 4011                 if $BY_FILE;
 4012             if ($opt_3) {
 4013                 push @results, "  ${Q}scaled${Q}: " . $scaled . $C;
 4014                 push @results, "  ${Q}factor${Q}: " . $factor . $C;
 4015             }
 4016             if ($opt_json) { # replace the trailing comma with }, on the last line
 4017                 $results[-1] =~ s/,\s*$/},/;
 4018             }
 4019         } elsif ($opt_csv or $opt_md) {
 4020             my $extra_3 = "";
 4021                $extra_3 = "${DELIM}$factor${DELIM}$scaled" if $opt_3;
 4022             my $first_column = undef;
 4023             my $clean_name   = $lang_or_file;
 4024             my $str;
 4025             if ($opt_csv) {
 4026                 if ($BY_FILE) {
 4027                     $first_column = $rhh_count->{$lang_or_file}{'lang'};
 4028                     $clean_name   = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 4029                 } else {
 4030                     $first_column = $rhh_count->{$lang_or_file}{'nFiles'};
 4031                 }
 4032                 $str = $first_column   . ${DELIM} .
 4033                        $clean_name     . ${DELIM};
 4034             } else {
 4035                 if ($BY_FILE) {
 4036                     $first_column = $rhh_count->{$lang_or_file}{'lang'};
 4037                     $clean_name   = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 4038                     $str = $clean_name . ${DELIM};
 4039                 } else {
 4040                     $first_column = $rhh_count->{$lang_or_file}{'nFiles'};
 4041                     $str = $clean_name     . ${DELIM} .
 4042                            $first_column   . ${DELIM};
 4043                 }
 4044             }
 4045             if ($opt_by_percent) {
 4046               my $DEN = compute_denominator($opt_by_percent               ,
 4047                             $rhh_count->{$lang_or_file}{'code'}   ,
 4048                             $rhh_count->{$lang_or_file}{'comment'},
 4049                             $rhh_count->{$lang_or_file}{'blank'}  );
 4050               $str .= sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'}   / $DEN * 100) . ${DELIM} .
 4051                       sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100) . ${DELIM} .
 4052                       $rhh_count->{$lang_or_file}{'code'};
 4053             } else {
 4054               $str .= $rhh_count->{$lang_or_file}{'blank'}  . ${DELIM} .
 4055                       $rhh_count->{$lang_or_file}{'comment'}. ${DELIM} .
 4056                       $rhh_count->{$lang_or_file}{'code'};
 4057             }
 4058             $str .= $extra_3;
 4059             push @results, $str;
 4060 
 4061         } else {
 4062             push @results, $data_line;
 4063         }
 4064     }
 4065 
 4066     my $avg_scale = 1;  # weighted average of scale factors
 4067        $avg_scale = sprintf("%.2f", $sum_scaled / $sum_code)
 4068             if $sum_code and $opt_3;
 4069 
 4070     if ($opt_xml) {
 4071         $data_line = "";
 4072         if (!$BY_FILE) {
 4073             $data_line .= sprintf "sum_files=\"%d\" ", $sum_files;
 4074         }
 4075         if ($opt_by_percent) {
 4076           my $DEN = compute_denominator($opt_by_percent    ,
 4077                         $sum_code, $sum_comment, $sum_blank);
 4078           $data_line .= sprintf $Format{'5'}{$Style},
 4079               $sum_blank   / $DEN * 100,
 4080               $sum_comment / $DEN * 100,
 4081               $sum_code    ;
 4082         } else {
 4083           $data_line .= sprintf $Format{'4'}{$Style},
 4084               $sum_blank   ,
 4085               $sum_comment ,
 4086               $sum_code    ;
 4087         }
 4088         $data_line .= sprintf $Format{'6'}{$Style},
 4089             $avg_scale   ,
 4090             $sum_scaled  if $opt_3;
 4091         push @results, "  <total " . $data_line . "/>";
 4092 
 4093         if ($BY_FILE or ($report_type eq "by report file")) {
 4094             push @results, "</files>";
 4095         } else {
 4096             foreach my $language (keys %{$languages}) {
 4097                 push @results, '  <language name="' . $language . '"/>';
 4098             }
 4099             push @results, "</languages>";
 4100         }
 4101 
 4102         if (!$opt_by_file_by_lang or $ALREADY_SHOWED_XML_SECTION) {
 4103             push @results, "</results>";
 4104         } else {
 4105             $ALREADY_SHOWED_XML_SECTION = 1;
 4106         }
 4107     } elsif ($opt_yaml or $opt_json) {
 4108         my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 4109         push @results, "${Q}SUM${Q}: ${open_B}";
 4110         if ($opt_by_percent) {
 4111           my $DEN = compute_denominator($opt_by_percent    ,
 4112                         $sum_code, $sum_comment, $sum_blank);
 4113           push @results, "  ${Q}blank${Q}: "  . sprintf("%.2f", $sum_blank   / $DEN * 100) . $C;
 4114           push @results, "  ${Q}comment${Q}: ". sprintf("%.2f", $sum_comment / $DEN * 100) . $C;
 4115           push @results, "  ${Q}code${Q}: "   . $sum_code    . $C;
 4116         } else {
 4117           push @results, "  ${Q}blank${Q}: "  . $sum_blank   . $C;
 4118           push @results, "  ${Q}comment${Q}: ". $sum_comment . $C;
 4119           push @results, "  ${Q}code${Q}: "   . $sum_code    . $C;
 4120         }
 4121         push @results, "  ${Q}nFiles${Q}: " . $sum_files   . $C;
 4122         if ($opt_3) {
 4123             push @results, "  ${Q}scaled${Q}: " . $sum_scaled . $C;
 4124             push @results, "  ${Q}factor${Q}: " . $avg_scale  . $C;
 4125         }
 4126         if ($opt_json) {
 4127             $results[-1] =~ s/,\s*$/} }/;
 4128             if ($opt_by_file_by_lang) {
 4129                 if ($ALREADY_SHOWED_HEADER) {
 4130                     $results[-1] .= ' }';
 4131                 } else {
 4132                     $results[-1] .= ', "by_lang" : {';
 4133                 }
 4134             }
 4135         }
 4136     } elsif ($opt_csv) {
 4137         my @entries = ();
 4138         if ($opt_by_file) {
 4139             push @entries, "SUM";
 4140             push @entries, "";
 4141         } else {
 4142             push @entries, $sum_files;
 4143             push @entries, "SUM";
 4144         }
 4145         if ($opt_by_percent) {
 4146             my $DEN = compute_denominator($opt_by_percent    ,
 4147                           $sum_code, $sum_comment, $sum_blank);
 4148             push @entries, sprintf("%.2f", $sum_blank   / $DEN * 100);
 4149             push @entries, sprintf("%.2f", $sum_comment / $DEN * 100);
 4150         } else {
 4151             push @entries, $sum_blank;
 4152             push @entries, $sum_comment;
 4153         }
 4154         push @entries, $sum_code;
 4155         if ($opt_3) {
 4156             push @entries, $sum_scaled;
 4157             push @entries, $avg_scale ;
 4158         }
 4159         push @results, join(",", @entries);
 4160     } else {
 4161 
 4162         if ($BY_FILE) {
 4163             $data_line  = sprintf "%-${spacing_0}s ", "SUM:"  ;
 4164         } else {
 4165             $data_line  = sprintf "%-${spacing_1}s ", "SUM:"  ;
 4166             $data_line .= sprintf "%${spacing_2}d ", $sum_files;
 4167         }
 4168         if ($opt_by_percent) {
 4169           my $DEN = compute_denominator($opt_by_percent    ,
 4170                         $sum_code, $sum_comment, $sum_blank);
 4171           $data_line .= sprintf $Format{'5'}{$Style},
 4172               $sum_blank   / $DEN * 100,
 4173               $sum_comment / $DEN * 100,
 4174               $sum_code    ;
 4175         } else {
 4176           $data_line .= sprintf $Format{'4'}{$Style},
 4177               $sum_blank   ,
 4178               $sum_comment ,
 4179               $sum_code    ;
 4180         }
 4181         $data_line .= sprintf $Format{'6'}{$Style},
 4182             $avg_scale   ,
 4183             $sum_scaled if $opt_3;
 4184         if ($opt_md) {
 4185             my @words = split(' ', $data_line);
 4186             my $n_cols = scalar(@words);
 4187 #           my $n_cols = scalar(split(' ', $data_line));  # deprecated
 4188             $data_line =~ s/\s+/\|/g;
 4189             my @col_hyphens