"Fossies" - the Fresh Open Source Software Archive

Member "cloc-1.86/cloc" (19 May 2020, 615278 Bytes) of package /linux/privat/cloc-1.86.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. See also the latest Fossies "Diffs" side-by-side code changes report for "cloc": 1.84_vs_1.86.

    1 #!/usr/bin/env perl
    2 # cloc -- Count Lines of Code                  {{{1
    3 # Copyright (C) 2006-2020 Al Danial <al.danial@gmail.com>
    4 # First release August 2006
    5 #
    6 # Includes code from:
    7 #   - SLOCCount v2.26
    8 #     http://www.dwheeler.com/sloccount/
    9 #     by David Wheeler.
   10 #   - Regexp::Common v2013031301
   11 #     http://search.cpan.org/~abigail/Regexp-Common-2013031301/lib/Regexp/Common.pm
   12 #     by Damian Conway and Abigail.
   13 #   - Win32::Autoglob
   14 #     http://search.cpan.org/~sburke/Win32-Autoglob-1.01/Autoglob.pm
   15 #     by Sean M. Burke.
   16 #   - Algorithm::Diff
   17 #     http://search.cpan.org/~tyemq/Algorithm-Diff-1.1902/lib/Algorithm/Diff.pm
   18 #     by Tye McQueen.
   19 #
   20 # This program is free software; you can redistribute it and/or modify
   21 # it under the terms of the GNU General Public License as published by
   22 # the Free Software Foundation; either version 2 of the License, or
   23 # (at your option) any later version.
   24 #
   25 # This program is distributed in the hope that it will be useful,
   26 # but WITHOUT ANY WARRANTY; without even the implied warranty of
   27 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   28 # GNU General Public License for more details:
   29 # <http://www.gnu.org/licenses/gpl.txt>.
   30 #
   31 # 1}}}
   32 my $VERSION = "1.86";  # odd number == beta; even number == stable
   33 my $URL     = "github.com/AlDanial/cloc";  # 'https://' pushes header too wide
   34 require 5.006;
   35 # use modules                                  {{{1
   36 use warnings;
   37 use strict;
   38 
   39 use Getopt::Long;
   40 use File::Basename;
   41 use File::Temp qw { tempfile tempdir };
   42 use File::Find;
   43 use File::Path;
   44 use File::Spec;
   45 use IO::File;
   46 use List::Util qw( min max );
   47 use Cwd;
   48 use POSIX qw { strftime ceil};
   49 # Parallel::ForkManager isn't in the standard distribution.
   50 # Use it only if installed, and only if --processes=N is given.
   51 # The module load happens in get_max_processes().
   52 my $HAVE_Parallel_ForkManager = 0;
   53 
   54 # Digest::MD5 isn't in the standard distribution. Use it only if installed.
   55 my $HAVE_Digest_MD5 = 0;
   56 eval "use Digest::MD5;";
   57 if (defined $Digest::MD5::VERSION) {
   58     $HAVE_Digest_MD5 = 1;
   59 } else {
   60     warn "Digest::MD5 not installed; will skip file uniqueness checks.\n";
   61 }
   62 
   63 # Time::HiRes became standard with Perl 5.8
   64 my $HAVE_Time_HiRes = 0;
   65 eval "use Time::HiRes;";
   66 $HAVE_Time_HiRes = 1 if defined $Time::HiRes::VERSION;
   67 
   68 my $HAVE_Rexexp_Common;
   69 # Regexp::Common isn't in the standard distribution.  It will
   70 # be installed in a temp directory if necessary.
   71 BEGIN {
   72     if (eval "use Regexp::Common;") {
   73         $HAVE_Rexexp_Common = 1;
   74     } else {
   75         $HAVE_Rexexp_Common = 0;
   76     }
   77 }
   78 
   79 my $HAVE_Algorith_Diff = 0;
   80 # Algorithm::Diff isn't in the standard distribution.  It will
   81 # be installed in a temp directory if necessary.
   82 eval "use Algorithm::Diff qw ( sdiff ) ";
   83 if (defined $Algorithm::Diff::VERSION) {
   84     $HAVE_Algorith_Diff = 1;
   85 } else {
   86     Install_Algorithm_Diff();
   87 }
   88 # print "2 HAVE_Algorith_Diff = $HAVE_Algorith_Diff\n";
   89 # test_alg_diff($ARGV[$#ARGV - 1], $ARGV[$#ARGV]); die;
   90 # die "Hre=$HAVE_Rexexp_Common  Had=$HAVE_Algorith_Diff";
   91 
   92 # Uncomment next two lines when building Windows executable with perl2exe
   93 # or if running on a system that already has Regexp::Common.
   94 #use Regexp::Common;
   95 #$HAVE_Rexexp_Common = 1;
   96 
   97 #perl2exe_include "Regexp/Common/whitespace.pm"
   98 #perl2exe_include "Regexp/Common/URI.pm"
   99 #perl2exe_include "Regexp/Common/URI/fax.pm"
  100 #perl2exe_include "Regexp/Common/URI/file.pm"
  101 #perl2exe_include "Regexp/Common/URI/ftp.pm"
  102 #perl2exe_include "Regexp/Common/URI/gopher.pm"
  103 #perl2exe_include "Regexp/Common/URI/http.pm"
  104 #perl2exe_include "Regexp/Common/URI/pop.pm"
  105 #perl2exe_include "Regexp/Common/URI/prospero.pm"
  106 #perl2exe_include "Regexp/Common/URI/news.pm"
  107 #perl2exe_include "Regexp/Common/URI/tel.pm"
  108 #perl2exe_include "Regexp/Common/URI/telnet.pm"
  109 #perl2exe_include "Regexp/Common/URI/tv.pm"
  110 #perl2exe_include "Regexp/Common/URI/wais.pm"
  111 #perl2exe_include "Regexp/Common/CC.pm"
  112 #perl2exe_include "Regexp/Common/SEN.pm"
  113 #perl2exe_include "Regexp/Common/number.pm"
  114 #perl2exe_include "Regexp/Common/delimited.pm"
  115 #perl2exe_include "Regexp/Common/profanity.pm"
  116 #perl2exe_include "Regexp/Common/net.pm"
  117 #perl2exe_include "Regexp/Common/zip.pm"
  118 #perl2exe_include "Regexp/Common/comment.pm"
  119 #perl2exe_include "Regexp/Common/balanced.pm"
  120 #perl2exe_include "Regexp/Common/lingua.pm"
  121 #perl2exe_include "Regexp/Common/list.pm"
  122 #perl2exe_include "File/Glob.pm"
  123 
  124 use Text::Tabs qw { expand };
  125 use Cwd qw { cwd };
  126 use File::Glob;
  127 # 1}}}
  128 # Usage information, options processing.       {{{1
  129 my $ON_WINDOWS = 0;
  130    $ON_WINDOWS = 1 if ($^O =~ /^MSWin/) or ($^O eq "Windows_NT");
  131 if ($ON_WINDOWS and $ENV{'SHELL'}) {
  132     if ($ENV{'SHELL'} =~ m{^/}) {
  133         $ON_WINDOWS = 0;  # make Cygwin look like Unix
  134     } else {
  135         $ON_WINDOWS = 1;  # MKS defines $SHELL but still acts like Windows
  136     }
  137 }
  138 my $config_file = '';
  139 if ( $ENV{'HOME'} ) {
  140     $config_file = File::Spec->catfile( $ENV{'HOME'}, '.config', 'cloc', 'options.txt');
  141 } elsif ( $ENV{'APPDATA'} and $ON_WINDOWS ) {
  142     $config_file = File::Spec->catfile( $ENV{'APPDATA'}, 'cloc');
  143 }
  144 
  145 my $NN     = chr(27) . "[0m";  # normal
  146    $NN     = "" if $ON_WINDOWS or !(-t STDOUT); # -t STDOUT:  is it a terminal?
  147 my $BB     = chr(27) . "[1m";  # bold
  148    $BB     = "" if $ON_WINDOWS or !(-t STDOUT);
  149 my $script = basename $0;
  150 
  151 #  Intended for v1.88:
  152 #  --git-diff-simindex       Git diff strategy #3:  use git's similarity index
  153 #                            (git diff -M --name-status) to identify file pairs
  154 #                            to compare.  This is especially useful to compare
  155 #                            files that were renamed between the commits.
  156 
  157 my $brief_usage  = "
  158                        cloc -- Count Lines of Code
  159 
  160 Usage:
  161     $script [options] <file(s)/dir(s)/git hash(es)>
  162         Count physical lines of source code and comments in the given files
  163         (may be archives such as compressed tarballs or zip files) and/or
  164         recursively below the given directories or git commit hashes.
  165         Example:    cloc src/ include/ main.c
  166 
  167     $script [options] --diff <set1>  <set2>
  168         Compute differences of physical lines of source code and comments
  169         between any pairwise combination of directory names, archive
  170         files or git commit hashes.
  171         Example:    cloc --diff Python-3.5.tar.xz python-3.6/
  172 
  173 $script --help  shows full documentation on the options.
  174 https://$URL has numerous examples and more information.
  175 ";
  176 my $usage  = "
  177 Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <report files>
  178 
  179  Count, or compute differences of, physical lines of source code in the
  180  given files (may be archives such as compressed tarballs or zip files,
  181  or git commit hashes or branch names) and/or recursively below the
  182  given directories.
  183 
  184  ${BB}Input Options${NN}
  185    --extract-with=<cmd>      This option is only needed if cloc is unable
  186                              to figure out how to extract the contents of
  187                              the input file(s) by itself.
  188                              Use <cmd> to extract binary archive files (e.g.:
  189                              .tar.gz, .zip, .Z).  Use the literal '>FILE<' as
  190                              a stand-in for the actual file(s) to be
  191                              extracted.  For example, to count lines of code
  192                              in the input files
  193                                 gcc-4.2.tar.gz  perl-5.8.8.tar.gz
  194                              on Unix use
  195                                --extract-with='gzip -dc >FILE< | tar xf -'
  196                              or, if you have GNU tar,
  197                                --extract-with='tar zxf >FILE<'
  198                              and on Windows use, for example:
  199                                --extract-with=\"\\\"c:\\Program Files\\WinZip\\WinZip32.exe\\\" -e -o >FILE< .\"
  200                              (if WinZip is installed there).
  201    --list-file=<file>        Take the list of file and/or directory names to
  202                              process from <file>, which has one file/directory
  203                              name per line.  Only exact matches are counted;
  204                              relative path names will be resolved starting from
  205                              the directory where cloc is invoked.  Set <file>
  206                              to - to read file names from a STDIN pipe.
  207                              See also --exclude-list-file.
  208    --diff-list-file=<file>   Take the pairs of file names to be diff'ed from
  209                              <file>, whose format matches the output of
  210                              --diff-alignment.  (Run with that option to
  211                              see a sample.)  The language identifier at the
  212                              end of each line is ignored.  This enables --diff
  213                              mode and by-passes file pair alignment logic.
  214    --vcs=<VCS>               Invoke a system call to <VCS> to obtain a list of
  215                              files to work on.  If <VCS> is 'git', then will
  216                              invoke 'git ls-files' to get a file list and
  217                              'git submodule status' to get a list of submodules
  218                              whose contents will be ignored.  See also --git
  219                              which accepts git commit hashes and branch names.
  220                              If <VCS> is 'svn' then will invoke 'svn list -R'.
  221                              The primary benefit is that cloc will then skip
  222                              files explicitly excluded by the versioning tool
  223                              in question, ie, those in .gitignore or have the
  224                              svn:ignore property.
  225                              Alternatively <VCS> may be any system command
  226                              that generates a list of files.
  227                              Note:  cloc must be in a directory which can read
  228                              the files as they are returned by <VCS>.  cloc will
  229                              not download files from remote repositories.
  230                              'svn list -R' may refer to a remote repository
  231                              to obtain file names (and therefore may require
  232                              authentication to the remote repository), but
  233                              the files themselves must be local.
  234                              Setting <VCS> to 'auto' selects between 'git'
  235                              and 'svn' (or neither) depending on the presence
  236                              of a .git or .svn subdirectory below the directory
  237                              where cloc is invoked.
  238    --unicode                 Check binary files to see if they contain Unicode
  239                              expanded ASCII text.  This causes performance to
  240                              drop noticeably.
  241 
  242  ${BB}Processing Options${NN}
  243    --autoconf                Count .in files (as processed by GNU autoconf) of
  244                              recognized languages.  See also --no-autogen.
  245    --by-file                 Report results for every source file encountered.
  246    --by-file-by-lang         Report results for every source file encountered
  247                              in addition to reporting by language.
  248    --config <file>           Read command line switches from <file> instead of
  249                              the default location of $config_file.
  250                              The file should contain one switch, along with
  251                              arguments (if any), per line.  Blank lines and lines
  252                              beginning with '#' are skipped.  Options given on
  253                              the command line take priority over entries read from
  254                              the file.
  255    --count-and-diff <set1> <set2>
  256                              First perform direct code counts of source file(s)
  257                              of <set1> and <set2> separately, then perform a diff
  258                              of these.  Inputs may be pairs of files, directories,
  259                              or archives.  If --out or --report-file is given,
  260                              three output files will be created, one for each
  261                              of the two counts and one for the diff.  See also
  262                              --diff, --diff-alignment, --diff-timeout,
  263                              --ignore-case, --ignore-whitespace.
  264    --diff <set1> <set2>      Compute differences in code and comments between
  265                              source file(s) of <set1> and <set2>.  The inputs
  266                              may be any mix of files, directories, archives,
  267                              or git commit hashes.  Use --diff-alignment to
  268                              generate a list showing which file pairs where
  269                              compared.  When comparing git branches, only files
  270                              which have changed in either commit are compared.
  271                              See also --git, --count-and-diff, --diff-alignment,
  272                              --diff-list-file, --diff-timeout, --ignore-case,
  273                              --ignore-whitespace.
  274    --diff-timeout <N>        Ignore files which take more than <N> seconds
  275                              to process.  Default is 10 seconds.  Setting <N>
  276                              to 0 allows unlimited time.  (Large files with many
  277                              repeated lines can cause Algorithm::Diff::sdiff()
  278                              to take hours.) See also --timeout.
  279    --docstring-as-code       cloc considers docstrings to be comments, but this is
  280                              not always correct as docstrings represent regular
  281                              strings when they appear on the right hand side of an
  282                              assignment or as function arguments.  This switch
  283                              forces docstrings to be counted as code.
  284    --follow-links            [Unix only] Follow symbolic links to directories
  285                              (sym links to files are always followed).
  286                              See also --stat.
  287    --force-lang=<lang>[,<ext>]
  288                              Process all files that have a <ext> extension
  289                              with the counter for language <lang>.  For
  290                              example, to count all .f files with the
  291                              Fortran 90 counter (which expects files to
  292                              end with .f90) instead of the default Fortran 77
  293                              counter, use
  294                                --force-lang=\"Fortran 90\",f
  295                              If <ext> is omitted, every file will be counted
  296                              with the <lang> counter.  This option can be
  297                              specified multiple times (but that is only
  298                              useful when <ext> is given each time).
  299                              See also --script-lang, --lang-no-ext.
  300    --force-lang-def=<file>   Load language processing filters from <file>,
  301                              then use these filters instead of the built-in
  302                              filters.  Note:  languages which map to the same
  303                              file extension (for example:
  304                              MATLAB/Mathematica/Objective C/MUMPS/Mercury;
  305                              Pascal/PHP; Lisp/OpenCL; Lisp/Julia; Perl/Prolog)
  306                              will be ignored as these require additional
  307                              processing that is not expressed in language
  308                              definition files.  Use --read-lang-def to define
  309                              new language filters without replacing built-in
  310                              filters (see also --write-lang-def,
  311                              --write-lang-def-incl-dup).
  312    --git                     Forces the inputs to be interpreted as git targets
  313                              (commit hashes, branch names, et cetera) if these
  314                              are not first identified as file or directory
  315                              names.  This option overrides the --vcs=git logic
  316                              if this is given; in other words, --git gets its
  317                              list of files to work on directly from git using
  318                              the hash or branch name rather than from
  319                              'git ls-files'.  This option can be used with
  320                              --diff to perform line count diffs between git
  321                              commits, or between a git commit and a file,
  322                              directory, or archive.  Use -v/--verbose to see
  323                              the git system commands cloc issues.
  324    --git-diff-rel            Same as --git --diff, or just --diff if the inputs
  325                              are recognized as git targets.  Only files which
  326                              have changed in either commit are compared.
  327    --git-diff-all            Git diff strategy #2:  compare all files in the
  328                              repository between the two commits.
  329    --ignore-whitespace       Ignore horizontal white space when comparing files
  330                              with --diff.  See also --ignore-case.
  331    --ignore-case             Ignore changes in case within file contents;
  332                              consider upper- and lowercase letters equivalent
  333                              when comparing files with --diff.  See also
  334                              --ignore-whitespace.
  335    --ignore-case-ext         Ignore case of file name extensions.  This will
  336                              cause problems counting some languages
  337                              (specifically, .c and .C are associated with C and
  338                              C++; this switch would count .C files as C rather
  339                              than C++ on *nix operating systems).  File name
  340                              case insensitivity is always true on Windows.
  341    --lang-no-ext=<lang>      Count files without extensions using the <lang>
  342                              counter.  This option overrides internal logic
  343                              for files without extensions (where such files
  344                              are checked against known scripting languages
  345                              by examining the first line for #!).  See also
  346                              --force-lang, --script-lang.
  347    --max-file-size=<MB>      Skip files larger than <MB> megabytes when
  348                              traversing directories.  By default, <MB>=100.
  349                              cloc's memory requirement is roughly twenty times
  350                              larger than the largest file so running with
  351                              files larger than 100 MB on a computer with less
  352                              than 2 GB of memory will cause problems.
  353                              Note:  this check does not apply to files
  354                              explicitly passed as command line arguments.
  355    --no-autogen[=list]       Ignore files generated by code-production systems
  356                              such as GNU autoconf.  To see a list of these files
  357                              (then exit), run with --no-autogen list
  358                              See also --autoconf.
  359    --original-dir            [Only effective in combination with
  360                              --strip-comments]  Write the stripped files
  361                              to the same directory as the original files.
  362    --read-binary-files       Process binary files in addition to text files.
  363                              This is usually a bad idea and should only be
  364                              attempted with text files that have embedded
  365                              binary data.
  366    --read-lang-def=<file>    Load new language processing filters from <file>
  367                              and merge them with those already known to cloc.
  368                              If <file> defines a language cloc already knows
  369                              about, cloc's definition will take precedence.
  370                              Use --force-lang-def to over-ride cloc's
  371                              definitions (see also --write-lang-def,
  372                              --write-lang-def-incl-dup).
  373    --script-lang=<lang>,<s>  Process all files that invoke <s> as a #!
  374                              scripting language with the counter for language
  375                              <lang>.  For example, files that begin with
  376                                 #!/usr/local/bin/perl5.8.8
  377                              will be counted with the Perl counter by using
  378                                 --script-lang=Perl,perl5.8.8
  379                              The language name is case insensitive but the
  380                              name of the script language executable, <s>,
  381                              must have the right case.  This option can be
  382                              specified multiple times.  See also --force-lang,
  383                              --lang-no-ext.
  384    --sdir=<dir>              Use <dir> as the scratch directory instead of
  385                              letting File::Temp chose the location.  Files
  386                              written to this location are not removed at
  387                              the end of the run (as they are with File::Temp).
  388    --skip-uniqueness         Skip the file uniqueness check.  This will give
  389                              a performance boost at the expense of counting
  390                              files with identical contents multiple times
  391                              (if such duplicates exist).
  392    --stat                    Some file systems (AFS, CD-ROM, FAT, HPFS, SMB)
  393                              do not have directory 'nlink' counts that match
  394                              the number of its subdirectories.  Consequently
  395                              cloc may undercount or completely skip the
  396                              contents of such file systems.  This switch forces
  397                              File::Find to stat directories to obtain the
  398                              correct count.  File search spead will decrease.
  399                              See also --follow-links.
  400    --stdin-name=<file>       Give a file name to use to determine the language
  401                              for standard input.  (Use - as the input name to
  402                              receive source code via STDIN.)
  403    --strip-comments=<ext>    For each file processed, write to the current
  404                              directory a version of the file which has blank
  405                              and commented lines removed (in-line comments
  406                              persist).  The name of each stripped file is the
  407                              original file name with .<ext> appended to it.
  408                              It is written to the current directory unless
  409                              --original-dir is on.
  410    --strip-str-comments      Replace comment markers embedded in strings with
  411                              'xx'.  This attempts to work around a limitation
  412                              in Regexp::Common::Comment where comment markers
  413                              embedded in strings are seen as actual comment
  414                              markers and not strings, often resulting in a
  415                              'Complex regular subexpression recursion limit'
  416                              warning and incorrect counts.  There are two
  417                              disadvantages to using this switch:  1/code count
  418                              performance drops, and 2/code generated with
  419                              --strip-comments will contain different strings
  420                              where ever embedded comments are found.
  421    --sum-reports             Input arguments are report files previously
  422                              created with the --report-file option in plain
  423                              format (eg. not JSON, YAML, XML, or SQL).
  424                              Makes a cumulative set of results containing the
  425                              sum of data from the individual report files.
  426    --timeout <N>             Ignore files which take more than <N> seconds
  427                              to process at any of the language's filter stages.
  428                              The default maximum number of seconds spent on a
  429                              filter stage is the number of lines in the file
  430                              divided by one thousand.  Setting <N> to 0 allows
  431                              unlimited time.  See also --diff-timeout.
  432    --processes=NUM           [Available only on systems with a recent version
  433                              of the Parallel::ForkManager module.  Not
  434                              available on Windows.] Sets the maximum number of
  435                              cores that cloc uses.  The default value of 0
  436                              disables multiprocessing.
  437    --unix                    Override the operating system autodetection
  438                              logic and run in UNIX mode.  See also
  439                              --windows, --show-os.
  440    --use-sloccount           If SLOCCount is installed, use its compiled
  441                              executables c_count, java_count, pascal_count,
  442                              php_count, and xml_count instead of cloc's
  443                              counters.  SLOCCount's compiled counters are
  444                              substantially faster than cloc's and may give
  445                              a performance improvement when counting projects
  446                              with large files.  However, these cloc-specific
  447                              features will not be available: --diff,
  448                              --count-and-diff, --strip-comments, --unicode.
  449    --windows                 Override the operating system autodetection
  450                              logic and run in Microsoft Windows mode.
  451                              See also --unix, --show-os.
  452 
  453  ${BB}Filter Options${NN}
  454    --exclude-content=<regex> Exclude files containing text that matches the given
  455                              regular expression.
  456    --exclude-dir=<D1>[,D2,]  Exclude the given comma separated directories
  457                              D1, D2, D3, et cetera, from being scanned.  For
  458                              example  --exclude-dir=.cache,test  will skip
  459                              all files and subdirectories that have /.cache/
  460                              or /test/ as their parent directory.
  461                              Directories named .bzr, .cvs, .hg, .git, .svn,
  462                              and .snapshot are always excluded.
  463                              This option only works with individual directory
  464                              names so including file path separators is not
  465                              allowed.  Use --fullpath and --not-match-d=<regex>
  466                              to supply a regex matching multiple subdirectories.
  467    --exclude-ext=<ext1>[,<ext2>[...]]
  468                              Do not count files having the given file name
  469                              extensions.
  470    --exclude-lang=<L1>[,L2[...]]
  471                              Exclude the given comma separated languages
  472                              L1, L2, L3, et cetera, from being counted.
  473    --exclude-list-file=<file>  Ignore files and/or directories whose names
  474                              appear in <file>.  <file> should have one file
  475                              name per line.  Only exact matches are ignored;
  476                              relative path names will be resolved starting from
  477                              the directory where cloc is invoked.
  478                              See also --list-file.
  479    --fullpath                Modifies the behavior of --match-f, --not-match-f,
  480                              and --not-match-d to include the file's path
  481                              in the regex, not just the file's basename.
  482                              (This does not expand each file to include its
  483                              absolute path, instead it uses as much of
  484                              the path as is passed in to cloc.)
  485                              Note:  --match-d always looks at the full
  486                              path and therefore is unaffected by --fullpath.
  487    --include-ext=<ext1>[,ext2[...]]
  488                              Count only languages having the given comma
  489                              separated file extensions.  Use --show-ext to
  490                              see the recognized extensions.
  491    --include-lang=<L1>[,L2[...]]
  492                              Count only the given comma separated languages
  493                              L1, L2, L3, et cetera.  Use --show-lang to see
  494                              the list of recognized languages.
  495    --match-d=<regex>         Only count files in directories matching the Perl
  496                              regex.  For example
  497                                --match-d='/(src|include)/'
  498                              only counts files in directories containing
  499                              /src/ or /include/.  Unlike --not-match-d,
  500                              --match-f, and --not-match-f, --match-d always
  501                              compares the fully qualified path against the
  502                              regex.
  503    --not-match-d=<regex>     Count all files except those in directories
  504                              matching the Perl regex.  Only the trailing
  505                              directory name is compared, for example, when
  506                              counting in /usr/local/lib, only 'lib' is
  507                              compared to the regex.
  508                              Add --fullpath to compare parent directories to
  509                              the regex.
  510                              Do not include file path separators at the
  511                              beginning or end of the regex.
  512    --match-f=<regex>         Only count files whose basenames match the Perl
  513                              regex.  For example
  514                                --match-f='^[Ww]idget'
  515                              only counts files that start with Widget or widget.
  516                              Add --fullpath to include parent directories
  517                              in the regex instead of just the basename.
  518    --not-match-f=<regex>     Count all files except those whose basenames
  519                              match the Perl regex.  Add --fullpath to include
  520                              parent directories in the regex instead of just
  521                              the basename.
  522    --skip-archive=<regex>    Ignore files that end with the given Perl regular
  523                              expression.  For example, if given
  524                                --skip-archive='(zip|tar(\.(gz|Z|bz2|xz|7z))?)'
  525                              the code will skip files that end with .zip,
  526                              .tar, .tar.gz, .tar.Z, .tar.bz2, .tar.xz, and
  527                              .tar.7z.
  528    --skip-win-hidden         On Windows, ignore hidden files.
  529 
  530  ${BB}Debug Options${NN}
  531    --categorized=<file>      Save names of categorized files to <file>.
  532    --counted=<file>          Save names of processed source files to <file>.
  533    --diff-alignment=<file>   Write to <file> a list of files and file pairs
  534                              showing which files were added, removed, and/or
  535                              compared during a run with --diff.  This switch
  536                              forces the --diff mode on.
  537    --explain=<lang>          Print the filters used to remove comments for
  538                              language <lang> and exit.  In some cases the
  539                              filters refer to Perl subroutines rather than
  540                              regular expressions.  An examination of the
  541                              source code may be needed for further explanation.
  542    --help                    Print this usage information and exit.
  543    --found=<file>            Save names of every file found to <file>.
  544    --ignored=<file>          Save names of ignored files and the reason they
  545                              were ignored to <file>.
  546    --print-filter-stages     Print processed source code before and after
  547                              each filter is applied.
  548    --show-ext[=<ext>]        Print information about all known (or just the
  549                              given) file extensions and exit.
  550    --show-lang[=<lang>]      Print information about all known (or just the
  551                              given) languages and exit.
  552    --show-os                 Print the value of the operating system mode
  553                              and exit.  See also --unix, --windows.
  554    -v[=<n>]                  Verbose switch (optional numeric value).
  555    -verbose[=<n>]            Long form of -v.
  556    --version                 Print the version of this program and exit.
  557    --write-lang-def=<file>   Writes to <file> the language processing filters
  558                              then exits.  Useful as a first step to creating
  559                              custom language definitions. Note: languages which
  560                              map to the same file extension will be excluded.
  561                              (See also --force-lang-def, --read-lang-def).
  562    --write-lang-def-incl-dup=<file>
  563                              Same as --write-lang-def, but includes duplicated
  564                              extensions.  This generates a problematic language
  565                              definition file because cloc will refuse to use
  566                              it until duplicates are removed.
  567 
  568  ${BB}Output Options${NN}
  569    --3                       Print third-generation language output.
  570                              (This option can cause report summation to fail
  571                              if some reports were produced with this option
  572                              while others were produced without it.)
  573    --by-percent  X           Instead of comment and blank line counts, show
  574                              these values as percentages based on the value
  575                              of X in the denominator:
  576                                 X = 'c'   -> # lines of code
  577                                 X = 'cm'  -> # lines of code + comments
  578                                 X = 'cb'  -> # lines of code + blanks
  579                                 X = 'cmb' -> # lines of code + comments + blanks
  580                              For example, if using method 'c' and your code
  581                              has twice as many lines of comments as lines
  582                              of code, the value in the comment column will
  583                              be 200%.  The code column remains a line count.
  584    --csv                     Write the results as comma separated values.
  585    --csv-delimiter=<C>       Use the character <C> as the delimiter for comma
  586                              separated files instead of ,.  This switch forces
  587    --file-encoding=<E>       Write output files using the <E> encoding instead of
  588                              the default ASCII (<E> = 'UTF-7').  Examples: 'UTF-16',
  589                              'euc-kr', 'iso-8859-16'.  Known encodings can be
  590                              printed with
  591                                perl -MEncode -e 'print join(\"\\n\", Encode->encodings(\":all\")), \"\\n\"'
  592    --hide-rate               Do not show line and file processing rates in the
  593                              output header. This makes output deterministic.
  594    --json                    Write the results as JavaScript Object Notation
  595                              (JSON) formatted output.
  596    --md                      Write the results as Markdown-formatted text.
  597    --out=<file>              Synonym for --report-file=<file>.
  598    --progress-rate=<n>       Show progress update after every <n> files are
  599                              processed (default <n>=100).  Set <n> to 0 to
  600                              suppress progress output (useful when redirecting
  601                              output to STDOUT).
  602    --quiet                   Suppress all information messages except for
  603                              the final report.
  604    --report-file=<file>      Write the results to <file> instead of STDOUT.
  605    --sql=<file>              Write results as SQL create and insert statements
  606                              which can be read by a database program such as
  607                              SQLite.  If <file> is -, output is sent to STDOUT.
  608    --sql-append              Append SQL insert statements to the file specified
  609                              by --sql and do not generate table creation
  610                              statements.  Only valid with the --sql option.
  611    --sql-project=<name>      Use <name> as the project identifier for the
  612                              current run.  Only valid with the --sql option.
  613    --sql-style=<style>       Write SQL statements in the given style instead
  614                              of the default SQLite format.  Styles include
  615                              'Oracle' and 'Named_Columns'.
  616    --sum-one                 For plain text reports, show the SUM: output line
  617                              even if only one input file is processed.
  618    --xml                     Write the results in XML.
  619    --xsl=<file>              Reference <file> as an XSL stylesheet within
  620                              the XML output.  If <file> is 1 (numeric one),
  621                              writes a default stylesheet, cloc.xsl (or
  622                              cloc-diff.xsl if --diff is also given).
  623                              This switch forces --xml on.
  624    --yaml                    Write the results in YAML.
  625 
  626 ";
  627 #  Help information for options not yet implemented:
  628 #  --inline                  Process comments that appear at the end
  629 #                            of lines containing code.
  630 #  --html                    Create HTML files of each input file showing
  631 #                            comment and code lines in different colors.
  632 
  633 $| = 1;  # flush STDOUT
  634 my $start_time = get_time();
  635 my (
  636     $opt_categorized          ,
  637     $opt_found                ,
  638     @opt_force_lang           ,
  639     $opt_lang_no_ext          ,
  640     @opt_script_lang          ,
  641     $opt_count_diff           ,
  642     $opt_diff                 ,
  643     $opt_diff_alignment       ,
  644     $opt_diff_list_file       ,
  645     $opt_diff_timeout         ,
  646     $opt_timeout              ,
  647     $opt_html                 ,
  648     $opt_ignored              ,
  649     $opt_counted              ,
  650     $opt_show_ext             ,
  651     $opt_show_lang            ,
  652     $opt_progress_rate        ,
  653     $opt_print_filter_stages  ,
  654     $opt_v                    ,
  655     $opt_vcs                  ,
  656     $opt_version              ,
  657     $opt_exclude_content      ,
  658     $opt_exclude_lang         ,
  659     $opt_exclude_list_file    ,
  660     $opt_exclude_dir          ,
  661     $opt_explain              ,
  662     $opt_include_ext          ,
  663     $opt_include_lang         ,
  664     $opt_force_lang_def       ,
  665     $opt_read_lang_def        ,
  666     $opt_write_lang_def       ,
  667     $opt_write_lang_def_incl_dup,
  668     $opt_strip_comments       ,
  669     $opt_original_dir         ,
  670     $opt_quiet                ,
  671     $opt_report_file          ,
  672     $opt_sdir                 ,
  673     $opt_sum_reports          ,
  674     $opt_hide_rate            ,
  675     $opt_processes            ,
  676     $opt_unicode              ,
  677     $opt_no3                  ,   # accept it but don't use it
  678     $opt_3                    ,
  679     $opt_extract_with         ,
  680     $opt_by_file              ,
  681     $opt_by_file_by_lang      ,
  682     $opt_by_percent           ,
  683     $opt_xml                  ,
  684     $opt_xsl                  ,
  685     $opt_yaml                 ,
  686     $opt_csv                  ,
  687     $opt_csv_delimiter        ,
  688     $opt_fullpath             ,
  689     $opt_json                 ,
  690     $opt_md                   ,
  691     $opt_match_f              ,
  692     $opt_not_match_f          ,
  693     $opt_match_d              ,
  694     $opt_not_match_d          ,
  695     $opt_skip_uniqueness      ,
  696     $opt_list_file            ,
  697     $opt_help                 ,
  698     $opt_skip_win_hidden      ,
  699     $opt_read_binary_files    ,
  700     $opt_sql                  ,
  701     $opt_sql_append           ,
  702     $opt_sql_project          ,
  703     $opt_sql_style            ,
  704     $opt_inline               ,
  705     $opt_exclude_ext          ,
  706     $opt_ignore_whitespace    ,
  707     $opt_ignore_case          ,
  708     $opt_ignore_case_ext      ,
  709     $opt_follow_links         ,
  710     $opt_autoconf             ,
  711     $opt_sum_one              ,
  712     $opt_stdin_name           ,
  713     $opt_force_on_windows     ,
  714     $opt_force_on_unix        ,   # actually forces !$ON_WINDOWS
  715     $opt_show_os              ,
  716     $opt_skip_archive         ,
  717     $opt_max_file_size        ,   # in MB
  718     $opt_use_sloccount        ,
  719     $opt_no_autogen           ,
  720     $opt_force_git            ,
  721     $opt_git_diff_rel         ,
  722     $opt_git_diff_all         ,
  723     $opt_git_diff_simindex    ,
  724     $opt_config_file          ,
  725     $opt_strip_str_comments   ,
  726     $opt_file_encoding        ,
  727     $opt_docstring_as_code    ,
  728     $opt_stat                 ,
  729    );
  730 
  731 my $getopt_success = GetOptions(             # {{{1
  732    "by_file|by-file"                         => \$opt_by_file             ,
  733    "by_file_by_lang|by-file-by-lang"         => \$opt_by_file_by_lang     ,
  734    "categorized=s"                           => \$opt_categorized         ,
  735    "counted=s"                               => \$opt_counted             ,
  736    "include_ext|include-ext=s"               => \$opt_include_ext         ,
  737    "include_lang|include-lang=s"             => \$opt_include_lang        ,
  738    "exclude_content|exclude-content=s"       => \$opt_exclude_content     ,
  739    "exclude_lang|exclude-lang=s"             => \$opt_exclude_lang        ,
  740    "exclude_dir|exclude-dir=s"               => \$opt_exclude_dir         ,
  741    "exclude_list_file|exclude-list-file=s"   => \$opt_exclude_list_file   ,
  742    "explain=s"                               => \$opt_explain             ,
  743    "extract_with|extract-with=s"             => \$opt_extract_with        ,
  744    "found=s"                                 => \$opt_found               ,
  745    "count_and_diff|count-and-diff"           => \$opt_count_diff          ,
  746    "diff"                                    => \$opt_diff                ,
  747    "diff-alignment|diff_alignment=s"         => \$opt_diff_alignment      ,
  748    "diff-timeout|diff_timeout=i"             => \$opt_diff_timeout        ,
  749    "diff-list-file|diff_list_file=s"         => \$opt_diff_list_file      ,
  750    "timeout=i"                               => \$opt_timeout             ,
  751    "html"                                    => \$opt_html                ,
  752    "ignored=s"                               => \$opt_ignored             ,
  753    "quiet"                                   => \$opt_quiet               ,
  754    "force_lang_def|force-lang-def=s"         => \$opt_force_lang_def      ,
  755    "read_lang_def|read-lang-def=s"           => \$opt_read_lang_def       ,
  756    "show_ext|show-ext:s"                     => \$opt_show_ext            ,
  757    "show_lang|show-lang:s"                   => \$opt_show_lang           ,
  758    "progress_rate|progress-rate=i"           => \$opt_progress_rate       ,
  759    "print_filter_stages|print-filter-stages" => \$opt_print_filter_stages ,
  760    "report_file|report-file=s"               => \$opt_report_file         ,
  761    "out=s"                                   => \$opt_report_file         ,
  762    "script_lang|script-lang=s"               => \@opt_script_lang         ,
  763    "sdir=s"                                  => \$opt_sdir                ,
  764    "skip_uniqueness|skip-uniqueness"         => \$opt_skip_uniqueness     ,
  765    "strip_comments|strip-comments=s"         => \$opt_strip_comments      ,
  766    "original_dir|original-dir"               => \$opt_original_dir        ,
  767    "sum_reports|sum-reports"                 => \$opt_sum_reports         ,
  768    "hide_rate|hide-rate"                     => \$opt_hide_rate           ,
  769    "processes=n"                             => \$opt_processes           ,
  770    "unicode"                                 => \$opt_unicode             ,
  771    "no3"                                     => \$opt_no3                 ,  # ignored
  772    "3"                                       => \$opt_3                   ,
  773    "v|verbose:i"                             => \$opt_v                   ,
  774    "vcs=s"                                   => \$opt_vcs                 ,
  775    "version"                                 => \$opt_version             ,
  776    "write_lang_def|write-lang-def=s"         => \$opt_write_lang_def      ,
  777    "write_lang_def_incl_dup|write-lang-def-incl-dup=s" => \$opt_write_lang_def_incl_dup,
  778    "xml"                                     => \$opt_xml                 ,
  779    "xsl=s"                                   => \$opt_xsl                 ,
  780    "force_lang|force-lang=s"                 => \@opt_force_lang          ,
  781    "lang_no_ext|lang-no-ext=s"               => \$opt_lang_no_ext         ,
  782    "yaml"                                    => \$opt_yaml                ,
  783    "csv"                                     => \$opt_csv                 ,
  784    "csv_delimeter|csv-delimiter=s"           => \$opt_csv_delimiter       ,
  785    "json"                                    => \$opt_json                ,
  786    "md"                                      => \$opt_md                  ,
  787    "fullpath"                                => \$opt_fullpath            ,
  788    "match_f|match-f=s"                       => \$opt_match_f             ,
  789    "not_match_f|not-match-f=s"               => \$opt_not_match_f         ,
  790    "match_d|match-d=s"                       => \$opt_match_d             ,
  791    "not_match_d|not-match-d=s"               => \$opt_not_match_d         ,
  792    "list_file|list-file=s"                   => \$opt_list_file           ,
  793    "help"                                    => \$opt_help                ,
  794    "skip_win_hidden|skip-win-hidden"         => \$opt_skip_win_hidden     ,
  795    "read_binary_files|read-binary-files"     => \$opt_read_binary_files   ,
  796    "sql=s"                                   => \$opt_sql                 ,
  797    "sql_project|sql-project=s"               => \$opt_sql_project         ,
  798    "sql_append|sql-append"                   => \$opt_sql_append          ,
  799    "sql_style|sql-style=s"                   => \$opt_sql_style           ,
  800    "inline"                                  => \$opt_inline              ,
  801    "exclude_ext|exclude-ext=s"               => \$opt_exclude_ext         ,
  802    "ignore_whitespace|ignore-whitespace"     => \$opt_ignore_whitespace   ,
  803    "ignore_case|ignore-case"                 => \$opt_ignore_case         ,
  804    "ignore_case_ext|ignore-case-ext"         => \$opt_ignore_case_ext     ,
  805    "follow_links|follow-links"               => \$opt_follow_links        ,
  806    "autoconf"                                => \$opt_autoconf            ,
  807    "sum_one|sum-one"                         => \$opt_sum_one             ,
  808    "by_percent|by-percent=s"                 => \$opt_by_percent          ,
  809    "stdin_name|stdin-name=s"                 => \$opt_stdin_name          ,
  810    "windows"                                 => \$opt_force_on_windows    ,
  811    "unix"                                    => \$opt_force_on_unix       ,
  812    "show_os|show-os"                         => \$opt_show_os             ,
  813    "skip_archive|skip-archive=s"             => \$opt_skip_archive        ,
  814    "max_file_size|max-file-size=i"           => \$opt_max_file_size       ,
  815    "use_sloccount|use-sloccount"             => \$opt_use_sloccount       ,
  816    "no_autogen|no-autogen"                   => \$opt_no_autogen          ,
  817    "git"                                     => \$opt_force_git           ,
  818    "git_diff_rel|git-diff-rel"               => \$opt_git_diff_rel        ,
  819    "git_diff_all|git-diff-all"               => \$opt_git_diff_all        ,
  820 #  "git_diff_simindex|git-diff-simindex"     => \$opt_git_diff_simindex   ,
  821    "config=s"                                => \$opt_config_file         ,
  822    "strip_str_comments|strip-str-comments"   => \$opt_strip_str_comments  ,
  823    "file_encoding|file-encoding=s"           => \$opt_file_encoding       ,
  824    "docstring_as_code|docstring-as-code"     => \$opt_docstring_as_code   ,
  825    "stat"                                    => \$opt_stat                ,
  826   );
  827 # 1}}}
  828 $config_file = $opt_config_file if defined $opt_config_file;
  829 load_from_config_file($config_file,          # {{{2
  830                                                 \$opt_by_file             ,
  831                                                 \$opt_by_file_by_lang     ,
  832                                                 \$opt_categorized         ,
  833                                                 \$opt_counted             ,
  834                                                 \$opt_include_ext         ,
  835                                                 \$opt_include_lang        ,
  836                                                 \$opt_exclude_content     ,
  837                                                 \$opt_exclude_lang        ,
  838                                                 \$opt_exclude_dir         ,
  839                                                 \$opt_exclude_list_file   ,
  840                                                 \$opt_explain             ,
  841                                                 \$opt_extract_with        ,
  842                                                 \$opt_found               ,
  843                                                 \$opt_count_diff          ,
  844                                                 \$opt_diff                ,
  845                                                 \$opt_diff_alignment      ,
  846                                                 \$opt_diff_timeout        ,
  847                                                 \$opt_timeout             ,
  848                                                 \$opt_html                ,
  849                                                 \$opt_ignored             ,
  850                                                 \$opt_quiet               ,
  851                                                 \$opt_force_lang_def      ,
  852                                                 \$opt_read_lang_def       ,
  853                                                 \$opt_show_ext            ,
  854                                                 \$opt_show_lang           ,
  855                                                 \$opt_progress_rate       ,
  856                                                 \$opt_print_filter_stages ,
  857                                                 \$opt_report_file         ,
  858                                                 \@opt_script_lang         ,
  859                                                 \$opt_sdir                ,
  860                                                 \$opt_skip_uniqueness     ,
  861                                                 \$opt_strip_comments      ,
  862                                                 \$opt_original_dir        ,
  863                                                 \$opt_sum_reports         ,
  864                                                 \$opt_hide_rate           ,
  865                                                 \$opt_processes           ,
  866                                                 \$opt_unicode             ,
  867                                                 \$opt_3                   ,
  868                                                 \$opt_v                   ,
  869                                                 \$opt_vcs                 ,
  870                                                 \$opt_version             ,
  871                                                 \$opt_write_lang_def      ,
  872                                                 \$opt_write_lang_def_incl_dup,
  873                                                 \$opt_xml                 ,
  874                                                 \$opt_xsl                 ,
  875                                                 \@opt_force_lang          ,
  876                                                 \$opt_lang_no_ext         ,
  877                                                 \$opt_yaml                ,
  878                                                 \$opt_csv                 ,
  879                                                 \$opt_csv_delimiter       ,
  880                                                 \$opt_json                ,
  881                                                 \$opt_md                  ,
  882                                                 \$opt_fullpath            ,
  883                                                 \$opt_match_f             ,
  884                                                 \$opt_not_match_f         ,
  885                                                 \$opt_match_d             ,
  886                                                 \$opt_not_match_d         ,
  887                                                 \$opt_list_file           ,
  888                                                 \$opt_help                ,
  889                                                 \$opt_skip_win_hidden     ,
  890                                                 \$opt_read_binary_files   ,
  891                                                 \$opt_sql                 ,
  892                                                 \$opt_sql_project         ,
  893                                                 \$opt_sql_append          ,
  894                                                 \$opt_sql_style           ,
  895                                                 \$opt_inline              ,
  896                                                 \$opt_exclude_ext         ,
  897                                                 \$opt_ignore_whitespace   ,
  898                                                 \$opt_ignore_case         ,
  899                                                 \$opt_ignore_case_ext     ,
  900                                                 \$opt_follow_links        ,
  901                                                 \$opt_autoconf            ,
  902                                                 \$opt_sum_one             ,
  903                                                 \$opt_by_percent          ,
  904                                                 \$opt_stdin_name          ,
  905                                                 \$opt_force_on_windows    ,
  906                                                 \$opt_force_on_unix       ,
  907                                                 \$opt_show_os             ,
  908                                                 \$opt_skip_archive        ,
  909                                                 \$opt_max_file_size       ,
  910                                                 \$opt_use_sloccount       ,
  911                                                 \$opt_no_autogen          ,
  912                                                 \$opt_force_git           ,
  913                                                 \$opt_strip_str_comments  ,
  914                                                 \$opt_file_encoding       ,
  915                                                 \$opt_docstring_as_code   ,
  916                                                 \$opt_stat                ,
  917 );  # 2}}} Not pretty.  Not at all.
  918 if ($opt_version) {
  919     printf "$VERSION\n";
  920     exit;
  921 }
  922 $opt_by_file  = 1 if defined  $opt_by_file_by_lang;
  923 my $CLOC_XSL = "cloc.xsl"; # created with --xsl
  924    $CLOC_XSL = "cloc-diff.xsl" if $opt_diff;
  925 die "\n" unless $getopt_success;
  926 print $usage and exit if $opt_help;
  927 my %Exclude_Language = ();
  928    %Exclude_Language = map { $_ => 1 } split(/,/, $opt_exclude_lang)
  929         if $opt_exclude_lang;
  930 my %Exclude_Dir      = ();
  931    %Exclude_Dir      = map { $_ => 1 } split(/,/, $opt_exclude_dir )
  932         if $opt_exclude_dir ;
  933 die unless exclude_dir_validates(\%Exclude_Dir);
  934 my %Include_Ext = ();
  935    %Include_Ext = map { $_ => 1 } split(/,/, $opt_include_ext)
  936         if $opt_include_ext;
  937 my %Include_Language = ();
  938    %Include_Language = map { $_ => 1 } split(/,/, $opt_include_lang)
  939         if $opt_include_lang;
  940 # Forcibly exclude .svn, .cvs, .hg, .git, .bzr directories.  The contents of these
  941 # directories often conflict with files of interest.
  942 $opt_exclude_dir       = 1;
  943 $Exclude_Dir{".svn"}   = 1;
  944 $Exclude_Dir{".cvs"}   = 1;
  945 $Exclude_Dir{".hg"}    = 1;
  946 $Exclude_Dir{".git"}   = 1;
  947 $Exclude_Dir{".bzr"}   = 1;
  948 $Exclude_Dir{".snapshot"} = 1;  # NetApp backups
  949 $Exclude_Dir{".config"} = 1;
  950 $opt_count_diff        = defined $opt_count_diff ? 1 : 0;
  951 $opt_diff              = 1  if $opt_diff_alignment    or
  952                                $opt_diff_list_file    or
  953                                $opt_git_diff_rel      or
  954                                $opt_git_diff_all      or
  955                                $opt_git_diff_simindex;
  956 $opt_force_git         = 1  if $opt_git_diff_rel      or
  957                                $opt_git_diff_all      or
  958                                $opt_git_diff_simindex;
  959 $opt_diff_alignment    = 0  if $opt_diff_list_file;
  960 $opt_exclude_ext       = "" unless $opt_exclude_ext;
  961 $opt_ignore_whitespace = 0  unless $opt_ignore_whitespace;
  962 $opt_ignore_case       = 0  unless $opt_ignore_case;
  963 $opt_ignore_case_ext   = 0  unless $opt_ignore_case_ext;
  964 $opt_lang_no_ext       = 0  unless $opt_lang_no_ext;
  965 $opt_follow_links      = 0  unless $opt_follow_links;
  966 if (defined $opt_diff_timeout) {
  967     # if defined but with a value of <= 0, set to 2^31 seconds = 68 years
  968     $opt_diff_timeout = 2**31 unless $opt_diff_timeout > 0;
  969 } else {
  970     $opt_diff_timeout  =10; # seconds
  971 }
  972 if (defined $opt_timeout) {
  973     # if defined but with a value of <= 0, set to 2^31 seconds = 68 years
  974     $opt_timeout = 2**31 unless $opt_timeout > 0;
  975     # else is computed dynamically, ref $max_duration_sec
  976 }
  977 $opt_csv               = 1  if $opt_csv_delimiter;
  978 $ON_WINDOWS            = 1  if $opt_force_on_windows;
  979 $ON_WINDOWS            = 0  if $opt_force_on_unix;
  980 $opt_max_file_size     = 100 unless $opt_max_file_size;
  981 my $HAVE_SLOCCOUNT_c_count = 0;
  982 if (!$ON_WINDOWS and $opt_use_sloccount) {
  983     # Only bother doing this kludgey test is user explicitly wants
  984     # to use SLOCCount.  Debian based systems will hang if just doing
  985     #  external_utility_exists("c_count")
  986     # if c_count is in $PATH; c_count expects to have input.
  987     $HAVE_SLOCCOUNT_c_count = external_utility_exists("c_count /bin/sh");
  988 }
  989 if ($opt_use_sloccount) {
  990     if (!$HAVE_SLOCCOUNT_c_count) {
  991         warn "c_count could not be found; ignoring --use-sloccount\n";
  992         $opt_use_sloccount = 0;
  993     } else {
  994         warn "Using c_count, php_count, xml_count, pascal_count from SLOCCount\n";
  995         warn "--diff is disabled with --use-sloccount\n" if $opt_diff;
  996         warn "--count-and-diff is disabled with --use-sloccount\n" if $opt_count_diff;
  997         warn "--unicode is disabled with --use-sloccount\n" if $opt_unicode;
  998         warn "--strip-comments is disabled with --use-sloccount\n" if $opt_strip_comments;
  999         $opt_diff           = 0;
 1000         $opt_count_diff     = undef;
 1001         $opt_unicode        = 0;
 1002         $opt_strip_comments = 0;
 1003     }
 1004 }
 1005 $opt_vcs = 0 if $opt_force_git;
 1006 
 1007 my @COUNT_DIFF_ARGV        = undef;
 1008 my $COUNT_DIFF_report_file = undef;
 1009 if ($opt_count_diff and !$opt_diff_list_file) {
 1010     die "--count-and-diff requires two arguments; got ", scalar @ARGV, "\n"
 1011         if scalar @ARGV != 2;
 1012     # prefix with a dummy term so that $opt_count_diff is the
 1013     # index into @COUNT_DIFF_ARGV to work on at each pass
 1014     @COUNT_DIFF_ARGV = (undef, $ARGV[0],
 1015                                $ARGV[1],
 1016                               [$ARGV[0], $ARGV[1]]);  # 3rd pass: diff them
 1017     $COUNT_DIFF_report_file = $opt_report_file if $opt_report_file;
 1018 }
 1019 
 1020 # Options defaults:
 1021 $opt_quiet         =   1 if ($opt_md or $opt_json) and !defined $opt_report_file;
 1022 $opt_progress_rate = 100 unless defined $opt_progress_rate;
 1023 $opt_progress_rate =   0 if     defined $opt_quiet;
 1024 if (!defined $opt_v) {
 1025     $opt_v  = 0;
 1026 } elsif (!$opt_v) {
 1027     $opt_v  = 1;
 1028 }
 1029 if (defined $opt_xsl) {
 1030     $opt_xsl = $CLOC_XSL if $opt_xsl eq "1";
 1031     $opt_xml = 1;
 1032 }
 1033 my $skip_generate_report = 0;
 1034 $opt_sql_style = 0 unless defined $opt_sql_style;
 1035 $opt_sql = 0 unless $opt_sql_style or defined $opt_sql;
 1036 if ($opt_sql eq "-" || $opt_sql eq "1") { # stream SQL output to STDOUT
 1037     $opt_quiet            = 1;
 1038     $skip_generate_report = 1;
 1039     $opt_by_file          = 1;
 1040     $opt_sum_reports      = 0;
 1041     $opt_progress_rate    = 0;
 1042 } elsif ($opt_sql)  { # write SQL output to a file
 1043     $opt_by_file          = 1;
 1044     $skip_generate_report = 1;
 1045     $opt_sum_reports      = 0;
 1046 }
 1047 if ($opt_sql_style) {
 1048     $opt_sql_style = lc $opt_sql_style;
 1049     if (!grep { lc $_ eq $opt_sql_style } qw ( Oracle Named_Columns )) {
 1050         die "'$opt_sql_style' is not a recognized SQL style.\n";
 1051     }
 1052 }
 1053 $opt_by_percent = '' unless defined $opt_by_percent;
 1054 if ($opt_by_percent and $opt_by_percent !~ m/^(c|cm|cb|cmb)$/i) {
 1055     die "--by-percent must be either 'c', 'cm', 'cb', or 'cmb'\n";
 1056 }
 1057 $opt_by_percent = lc $opt_by_percent;
 1058 
 1059 if (defined $opt_vcs) {
 1060     if ($opt_vcs eq "auto") {
 1061         if      (-d ".git") {
 1062             $opt_vcs = "git";
 1063         } elsif (-d ".svn") {
 1064             $opt_vcs = "svn";
 1065         } else {
 1066             warn "--vcs auto:  unable to determine versioning system\n";
 1067         }
 1068     }
 1069     if      ($opt_vcs eq "git") {
 1070         $opt_vcs = "git ls-files";
 1071         my @submodules = invoke_generator('git submodule status');
 1072         foreach my $SM (@submodules) {
 1073             $SM =~ s/^\s+//;        # may have leading space
 1074             $SM =~ s/\(\S+\)\s*$//; # may end with something like (heads/master)
 1075             my ($checksum, $dir) = split(' ', $SM, 2);
 1076             $dir =~ s/\s+$//;
 1077             $Exclude_Dir{$dir} = 1;
 1078         }
 1079     } elsif ($opt_vcs eq "svn") {
 1080         $opt_vcs = "svn list -R";
 1081     }
 1082 }
 1083 
 1084 my $list_no_autogen = 0;
 1085 if (defined $opt_no_autogen and scalar @ARGV == 1 and $ARGV[0] eq "list") {
 1086     $list_no_autogen = 1;
 1087 }
 1088 
 1089 die $brief_usage unless defined $opt_version         or
 1090                         defined $opt_show_lang       or
 1091                         defined $opt_show_ext        or
 1092                         defined $opt_show_os         or
 1093                         defined $opt_write_lang_def  or
 1094                         defined $opt_write_lang_def_incl_dup  or
 1095                         defined $opt_list_file       or
 1096                         defined $opt_diff_list_file  or
 1097                         defined $opt_vcs             or
 1098                         defined $opt_xsl             or
 1099                         defined $opt_explain         or
 1100                         $list_no_autogen             or
 1101                         scalar @ARGV >= 1;
 1102 if (!$opt_diff_list_file) {
 1103     die "--diff requires two arguments; got ", scalar @ARGV, "\n"
 1104         if $opt_diff and !$opt_sum_reports and scalar @ARGV != 2;
 1105     die "--diff arguments are identical; nothing done", "\n"
 1106         if $opt_diff and !$opt_sum_reports and scalar @ARGV == 2
 1107                                            and $ARGV[0] eq $ARGV[1];
 1108 }
 1109 trick_pp_packer_encode() if $ON_WINDOWS and $opt_file_encoding;
 1110 $File::Find::dont_use_nlink = 1 if $opt_stat or top_level_SMB_dir(\@ARGV);
 1111 my @git_similarity = (); # only populated with --git-diff-simindex
 1112 replace_git_hash_with_tarfile(\@ARGV, \@git_similarity);
 1113 # 1}}}
 1114 # Step 1:  Initialize global constants.        {{{1
 1115 #
 1116 my $nFiles_Found = 0;  # updated in make_file_list
 1117 my (%Language_by_Extension, %Language_by_Script,
 1118     %Filters_by_Language, %Not_Code_Extension, %Not_Code_Filename,
 1119     %Language_by_File, %Scale_Factor, %Known_Binary_Archives,
 1120     %EOL_Continuation_re,
 1121    );
 1122 my $ALREADY_SHOWED_HEADER = 0;
 1123 my $ALREADY_SHOWED_XML_SECTION = 0;
 1124 my %Error_Codes = ( 'Unable to read'                => -1,
 1125                     'Neither file nor directory'    => -2,
 1126                     'Diff error (quoted comments?)' => -3,
 1127                     'Diff error, exceeded timeout'  => -4,
 1128                     'Line count, exceeded timeout'  => -5,
 1129                   );
 1130 my %Extension_Collision = (
 1131     'ADSO/IDSM'                                     => [ 'adso' ] ,
 1132     'C#/Smalltalk'                                  => [ 'cs'   ] ,
 1133     'D/dtrace'                                      => [ 'd'    ] ,
 1134     'F#/Forth'                                      => [ 'fs'   ] ,
 1135     'Fortran 77/Forth'                              => [ 'f', 'for' ] ,
 1136     'IDL/Qt Project/Prolog/ProGuard'                => [ 'pro'  ] ,
 1137     'Lisp/Julia'                                    => [ 'jl'   ] ,
 1138     'Lisp/OpenCL'                                   => [ 'cl'   ] ,
 1139     'MATLAB/Mathematica/Objective C/MUMPS/Mercury'  => [ 'm'    ] ,
 1140     'Pascal/Puppet'                                 => [ 'pp'   ] ,
 1141     'Perl/Prolog'                                   => [ 'pl', 'PL'  ] ,
 1142     'PHP/Pascal'                                    => [ 'inc'  ] ,
 1143     'Raku/Prolog'                                   => [ 'p6', 'P6'  ] ,
 1144     'Qt/Glade'                                      => [ 'ui'   ] ,
 1145     'TypeScript/Qt Linguist'                        => [ 'ts'   ] ,
 1146     'Verilog-SystemVerilog/Coq'                     => [ 'v'    ] ,
 1147     'Visual Basic/TeX/Apex Class'                   => [ 'cls'  ] ,
 1148     'Scheme/SaltStack'                              => [ 'sls'  ] ,
 1149 );
 1150 my @Autogen_to_ignore = no_autogen_files($list_no_autogen);
 1151 if ($opt_force_lang_def) {
 1152     # replace cloc's definitions
 1153     read_lang_def(
 1154         $opt_force_lang_def    , #        Sample values:
 1155         \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77'
 1156         \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
 1157         \%Language_by_File     , # Language_by_File{makefile}  = 'make'
 1158         \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] =
 1159                                  #      [ 'remove_matches' , '^\s*#'  ]
 1160         \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
 1161         \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
 1162         \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
 1163         \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
 1164         );
 1165 } else {
 1166     set_constants(               #
 1167         \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77'
 1168         \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
 1169         \%Language_by_File     , # Language_by_File{makefile}  = 'make'
 1170         \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] =
 1171                                  #      [ 'remove_matches' , '^\s*#'  ]
 1172         \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
 1173         \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
 1174         \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
 1175         \%Known_Binary_Archives, # Known_Binary_Archives{.tar} = 1
 1176         \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
 1177         );
 1178         if ($opt_no_autogen) {
 1179             foreach my $F (@Autogen_to_ignore) { $Not_Code_Filename{ $F } = 1; }
 1180         }
 1181 }
 1182 if ($opt_read_lang_def) {
 1183     # augment cloc's definitions (keep cloc's where there are overlaps)
 1184     merge_lang_def(
 1185         $opt_read_lang_def     , #        Sample values:
 1186         \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77'
 1187         \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
 1188         \%Language_by_File     , # Language_by_File{makefile}  = 'make'
 1189         \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] =
 1190                                  #      [ 'remove_matches' , '^\s*#'  ]
 1191         \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
 1192         \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
 1193         \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
 1194         \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
 1195         );
 1196 }
 1197 if ($opt_lang_no_ext and !defined $Filters_by_Language{$opt_lang_no_ext}) {
 1198     die_unknown_lang($opt_lang_no_ext, "--lang-no-ext")
 1199 }
 1200 check_scale_existence(\%Filters_by_Language, \%Language_by_Extension,
 1201                       \%Scale_Factor);
 1202 
 1203 my $nCounted = 0;
 1204 
 1205 # Process command line provided extension-to-language mapping overrides.
 1206 # Make a hash of known languages in lower case for easier matching.
 1207 my %Recognized_Language_lc = (); # key = language name in lc, value = true name
 1208 foreach my $language (keys %Filters_by_Language) {
 1209     my $lang_lc = lc $language;
 1210     $Recognized_Language_lc{$lang_lc} = $language;
 1211 }
 1212 my %Forced_Extension = (); # file name extensions which user wants to count
 1213 my $All_One_Language = 0;  # set to !0 if --force-lang's <ext> is missing
 1214 foreach my $pair (@opt_force_lang) {
 1215     my ($lang, $extension) = split(',', $pair);
 1216     my $lang_lc = lc $lang;
 1217     if (defined $extension) {
 1218         $Forced_Extension{$extension} = $lang;
 1219 
 1220         die_unknown_lang($lang, "--force-lang")
 1221             unless $Recognized_Language_lc{$lang_lc};
 1222 
 1223         $Language_by_Extension{$extension} = $Recognized_Language_lc{$lang_lc};
 1224     } else {
 1225         # the scary case--count everything as this language
 1226         $All_One_Language = $Recognized_Language_lc{$lang_lc};
 1227     }
 1228 }
 1229 
 1230 foreach my $pair (@opt_script_lang) {
 1231     my ($lang, $script_name) = split(',', $pair);
 1232     my $lang_lc = lc $lang;
 1233     if (!defined $script_name) {
 1234         die "The --script-lang option requires a comma separated pair of ".
 1235             "strings.\n";
 1236     }
 1237 
 1238     die_unknown_lang($lang, "--script-lang")
 1239         unless $Recognized_Language_lc{$lang_lc};
 1240 
 1241     $Language_by_Script{$script_name} = $Recognized_Language_lc{$lang_lc};
 1242 }
 1243 
 1244 # If user provided file extensions to ignore, add these to
 1245 # the exclusion list.
 1246 foreach my $ext (map { $_ => 1 } split(/,/, $opt_exclude_ext ) ) {
 1247     $ext = lc $ext if $ON_WINDOWS or $opt_ignore_case_ext;
 1248     $Not_Code_Extension{$ext} = 1;
 1249 }
 1250 
 1251 # If SQL or --by-file output is requested, keep track of directory names
 1252 # generated by File::Temp::tempdir and used to temporarily hold the results
 1253 # of compressed archives.  Contents of the SQL table 't' will be much
 1254 # cleaner if these meaningless directory names are stripped from the front
 1255 # of files pulled from the archives.
 1256 my %TEMP_DIR = ();
 1257 my $TEMP_OFF =  0;  # Needed for --sdir; keep track of the number of
 1258                     # scratch directories made in this run to avoid
 1259                     # file overwrites by multiple extractions to same
 1260                     # sdir.
 1261 # Also track locations where temporary installations, if necessary, of
 1262 # Algorithm::Diff and/or Regexp::Common are done.  Make sure these
 1263 # directories are not counted as inputs (ref bug #80 2012-11-23).
 1264 my %TEMP_INST = ();
 1265 
 1266 # invert %Language_by_Script hash to get an easy-to-look-up list of known
 1267 # scripting languages
 1268 my %Script_Language = map { $_ => 1 } values %Language_by_Script ;
 1269 # 1}}}
 1270 # Step 2:  Early exits for display, summation. {{{1
 1271 #
 1272 print_extension_info(   $opt_show_ext     ) if defined $opt_show_ext ;
 1273 print_language_info(    $opt_show_lang, '') if defined $opt_show_lang;
 1274 print_language_filters( $opt_explain      ) if defined $opt_explain  ;
 1275 exit if (defined $opt_show_ext)  or
 1276         (defined $opt_show_lang) or
 1277         (defined $opt_explain)   or
 1278         $list_no_autogen;
 1279 
 1280 Top_of_Processing_Loop:
 1281 # Sorry, coding purists.  Using a goto to implement --count-and-diff
 1282 # which has to do three passes over the main code, starting with
 1283 # a clean slate each time.
 1284 if ($opt_count_diff) {
 1285     @ARGV = ( $COUNT_DIFF_ARGV[ $opt_count_diff ] );
 1286     if ($opt_count_diff == 3) {
 1287         $opt_diff = 1;
 1288         @ARGV = @{$COUNT_DIFF_ARGV[ $opt_count_diff ]}; # last arg is list of list
 1289     }
 1290     if ($opt_report_file) {
 1291         # Instead of just one output file, will have three.
 1292         # Keep their names unique otherwise results are clobbered.
 1293         # Replace file path separators with underscores otherwise
 1294         # may end up with illegal file names.
 1295         my ($fn_0, $fn_1) = (undef, undef);
 1296         if ($ON_WINDOWS) {
 1297             ($fn_0 = $ARGV[0]) =~ s{\\}{_}g;
 1298              $fn_0 =~ s{::}{_}g;
 1299             ($fn_1 = $ARGV[1]) =~ s{\\}{_}g if defined $ARGV[1];
 1300              $fn_1 =~ s{::}{_}g             if defined $ARGV[1];
 1301         } else {
 1302             ($fn_0 = $ARGV[0]) =~ s{/}{_}g;
 1303             ($fn_1 = $ARGV[1]) =~ s{/}{_}g  if defined $ARGV[1];
 1304         }
 1305 
 1306         if ($opt_count_diff == 3) {
 1307             $opt_report_file = $COUNT_DIFF_report_file . ".diff.$fn_0.$fn_1";
 1308         } else {
 1309             $opt_report_file = $COUNT_DIFF_report_file . "." .  $fn_0;
 1310         }
 1311     } else {
 1312         # STDOUT; print a header showing what it's working on
 1313         if ($opt_count_diff == 3) {
 1314             print "\ndiff $ARGV[0] $ARGV[1]::\n";
 1315         } else {
 1316             print "\n" if $opt_count_diff > 1;
 1317             print "$ARGV[0]::\n";
 1318         }
 1319     }
 1320     $ALREADY_SHOWED_HEADER      = 0;
 1321     $ALREADY_SHOWED_XML_SECTION = 0;
 1322 }
 1323 
 1324 #print "Before glob have [", join(",", @ARGV), "]\n";
 1325 @ARGV = windows_glob(@ARGV) if $ON_WINDOWS;
 1326 #print "after  glob have [", join(",", @ARGV), "]\n";
 1327 
 1328 # filter out archive files if requested to do so
 1329 if (defined $opt_skip_archive) {
 1330     my @non_archive = ();
 1331     foreach my $candidate (@ARGV) {
 1332         if ($candidate !~ m/${opt_skip_archive}$/) {
 1333             push @non_archive, $candidate;
 1334 
 1335         }
 1336     }
 1337     @ARGV = @non_archive;
 1338 }
 1339 
 1340 if ($opt_sum_reports and $opt_diff) {
 1341     my @results = ();
 1342     if ($opt_list_file) { # read inputs from the list file
 1343         my @list = read_list_file($opt_list_file);
 1344         @results = combine_diffs(\@list);
 1345     } elsif ($opt_vcs) { # read inputs from the VCS generator
 1346         my @list = invoke_generator($opt_vcs, \@ARGV);
 1347         @results = combine_diffs(\@list);
 1348     } else { # get inputs from the command line
 1349         @results = combine_diffs(\@ARGV);
 1350     }
 1351     if ($opt_report_file) {
 1352         write_file($opt_report_file, {}, @results);
 1353     } else {
 1354         print "\n", join("\n", @results), "\n";
 1355     }
 1356     exit;
 1357 }
 1358 if ($opt_sum_reports) {
 1359     my %Results = ();
 1360     foreach my $type( "by language", "by report file" ) {
 1361         my $found_lang = undef;
 1362         if ($opt_list_file or $opt_vcs) {
 1363             # read inputs from the list file
 1364             my @list;
 1365             if ($opt_vcs) {
 1366                 @list = invoke_generator($opt_vcs, \@ARGV);
 1367             } else {
 1368                 @list = read_list_file($opt_list_file);
 1369             }
 1370             $found_lang = combine_results(\@list,
 1371                                            $type,
 1372                                           \%{$Results{ $type }},
 1373                                           \%Filters_by_Language );
 1374         } else { # get inputs from the command line
 1375             $found_lang = combine_results(\@ARGV,
 1376                                            $type,
 1377                                           \%{$Results{ $type }},
 1378                                           \%Filters_by_Language );
 1379         }
 1380         next unless %Results;
 1381         my $end_time = get_time();
 1382         my @results  = generate_report($VERSION, $end_time - $start_time,
 1383                                        $type,
 1384                                       \%{$Results{ $type }}, \%Scale_Factor);
 1385         if ($opt_report_file) {
 1386             my $ext  = ".lang";
 1387                $ext  = ".file" unless $type eq "by language";
 1388             next if !$found_lang and  $ext  eq ".lang";
 1389             write_file($opt_report_file . $ext, {}, @results);
 1390         } else {
 1391             print "\n", join("\n", @results), "\n";
 1392         }
 1393     }
 1394     exit;
 1395 }
 1396 if ($opt_write_lang_def or $opt_write_lang_def_incl_dup) {
 1397     my $file = $opt_write_lang_def          if $opt_write_lang_def;
 1398        $file = $opt_write_lang_def_incl_dup if $opt_write_lang_def_incl_dup;
 1399     write_lang_def($file                 ,
 1400                   \%Language_by_Extension,
 1401                   \%Language_by_Script   ,
 1402                   \%Language_by_File     ,
 1403                   \%Filters_by_Language  ,
 1404                   \%Not_Code_Extension   ,
 1405                   \%Not_Code_Filename    ,
 1406                   \%Scale_Factor         ,
 1407                   \%EOL_Continuation_re  ,
 1408                   );
 1409     exit;
 1410 }
 1411 if ($opt_show_os) {
 1412     if ($ON_WINDOWS) {
 1413         print "Windows\n";
 1414     } else {
 1415         print "UNIX\n";
 1416     }
 1417     exit;
 1418 }
 1419 
 1420 my $max_processes = get_max_processes();
 1421 
 1422 # 1}}}
 1423 # Step 3:  Create a list of files to consider. {{{1
 1424 #  a) If inputs are binary archives, first cd to a temp
 1425 #     directory, expand the archive with the user-given
 1426 #     extraction tool, then add the temp directory to
 1427 #     the list of dirs to process.
 1428 #  b) Create a list of every file that might contain source
 1429 #     code.  Ignore binary files, zero-sized files, and
 1430 #     any file in a directory the user says to exclude.
 1431 #  c) Determine the language for each file in the list.
 1432 #
 1433 my @binary_archive = ();
 1434 my $cwd            = cwd();
 1435 if ($opt_extract_with) {
 1436 #print "cwd main = [$cwd]\n";
 1437     my @extract_location = ();
 1438     foreach my $bin_file (@ARGV) {
 1439         my $extract_dir = undef;
 1440         if ($opt_sdir) {
 1441             ++$TEMP_OFF;
 1442             $extract_dir = "$opt_sdir/$TEMP_OFF";
 1443             File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
 1444             File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
 1445         } else {
 1446             $extract_dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
 1447         }
 1448         $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
 1449         print "mkdir $extract_dir\n"  if $opt_v;
 1450         print "cd    $extract_dir\n"  if $opt_v;
 1451         chdir $extract_dir;
 1452         my $bin_file_full_path = "";
 1453         if (File::Spec->file_name_is_absolute( $bin_file )) {
 1454             $bin_file_full_path = $bin_file;
 1455 #print "bin_file_full_path (was ful) = [$bin_file_full_path]\n";
 1456         } else {
 1457             $bin_file_full_path = File::Spec->catfile( $cwd, $bin_file );
 1458 #print "bin_file_full_path (was rel) = [$bin_file_full_path]\n";
 1459         }
 1460         my     $extract_cmd = uncompress_archive_cmd($bin_file_full_path);
 1461         print  $extract_cmd, "\n" if $opt_v;
 1462         system $extract_cmd;
 1463         push @extract_location, $extract_dir;
 1464         chdir $cwd;
 1465     }
 1466     # It is possible that the binary archive itself contains additional
 1467     # files compressed the same way (true for Java .ear files).  Go
 1468     # through all the files that were extracted, see if they are binary
 1469     # archives and try to extract them.  Lather, rinse, repeat.
 1470     my $binary_archives_exist = 1;
 1471     my $count_binary_archives = 0;
 1472     my $previous_count        = 0;
 1473     my $n_pass                = 0;
 1474     while ($binary_archives_exist) {
 1475         @binary_archive = ();
 1476         foreach my $dir (@extract_location) {
 1477             find(\&archive_files, $dir);  # populates global @binary_archive
 1478         }
 1479         foreach my $archive (@binary_archive) {
 1480             my $extract_dir = undef;
 1481             if ($opt_sdir) {
 1482                 ++$TEMP_OFF;
 1483                 $extract_dir = "$opt_sdir/$TEMP_OFF";
 1484                 File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
 1485                 File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
 1486             } else {
 1487                 $extract_dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
 1488             }
 1489             $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
 1490             print "mkdir $extract_dir\n"  if $opt_v;
 1491             print "cd    $extract_dir\n"  if $opt_v;
 1492             chdir  $extract_dir;
 1493 
 1494             my     $extract_cmd = uncompress_archive_cmd($archive);
 1495             print  $extract_cmd, "\n" if $opt_v;
 1496             system $extract_cmd;
 1497             push @extract_location, $extract_dir;
 1498             unlink $archive;  # otherwise will be extracting it forever
 1499         }
 1500         $count_binary_archives = scalar @binary_archive;
 1501         if ($count_binary_archives == $previous_count) {
 1502             $binary_archives_exist = 0;
 1503         }
 1504         $previous_count = $count_binary_archives;
 1505     }
 1506     chdir $cwd;
 1507 
 1508     @ARGV = @extract_location;
 1509 } else {
 1510     # see if any of the inputs need to be auto-uncompressed &/or expanded
 1511     my @updated_ARGS = ();
 1512     replace_git_hash_with_tarfile(\@ARGV, \@git_similarity) if $opt_force_git;
 1513     foreach my $Arg (@ARGV) {
 1514         if (is_dir($Arg)) {
 1515             push @updated_ARGS, $Arg;
 1516             next;
 1517         }
 1518         my $full_path = "";
 1519         if (File::Spec->file_name_is_absolute( $Arg )) {
 1520             $full_path = $Arg;
 1521         } else {
 1522             $full_path = File::Spec->catfile( $cwd, $Arg );
 1523         }
 1524 #print "full_path = [$full_path]\n";
 1525         my $extract_cmd = uncompress_archive_cmd($full_path);
 1526         if ($extract_cmd) {
 1527             my $extract_dir = undef;
 1528             if ($opt_sdir) {
 1529                 ++$TEMP_OFF;
 1530                 $extract_dir = "$opt_sdir/$TEMP_OFF";
 1531                 File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
 1532                 File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
 1533             } else {
 1534                 $extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
 1535             }
 1536             $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
 1537             print "mkdir $extract_dir\n"  if $opt_v;
 1538             print "cd    $extract_dir\n"  if $opt_v;
 1539             chdir  $extract_dir;
 1540             print  $extract_cmd, "\n" if $opt_v;
 1541             system $extract_cmd;
 1542             push @updated_ARGS, $extract_dir;
 1543             chdir $cwd;
 1544         } else {
 1545             # this is a conventional, uncompressed, unarchived file
 1546             # or a directory; keep as-is
 1547             push @updated_ARGS, $Arg;
 1548         }
 1549     }
 1550     @ARGV = @updated_ARGS;
 1551 
 1552     # make sure we're not counting any directory containing
 1553     # temporary installations of Regexp::Common, Algorithm::Diff
 1554     foreach my $d (sort keys %TEMP_INST) {
 1555         foreach my $a (@ARGV) {
 1556             next unless is_dir($a);
 1557             if ($opt_v > 2) {
 1558                 printf "Comparing %s (location of %s) to input [%s]\n",
 1559                         $d, $TEMP_INST{$d}, $a;
 1560             }
 1561             if ($a eq $d) {
 1562                 die "File::Temp::tempdir chose directory ",
 1563                     $d, " to install ", $TEMP_INST{$d}, " but this ",
 1564                     "matches one of your input directories.  Rerun ",
 1565                     "with --sdir and supply a different temporary ",
 1566                     "directory for ", $TEMP_INST{$d}, "\n";
 1567             }
 1568         }
 1569     }
 1570 }
 1571 # 1}}}
 1572 my @Errors    = ();
 1573 my @file_list = ();  # global variable updated in files()
 1574 my %Ignored   = ();  # files that are not counted (language not recognized or
 1575                      # problems reading the file)
 1576 my @Lines_Out = ();
 1577 if ($opt_diff) {
 1578 # Step 4:  Separate code from non-code files.  {{{1
 1579 my @fh            = ();
 1580 my @files_for_set = ();
 1581 my @files_added_tot = ();
 1582 my @files_removed_tot = ();
 1583 my @file_pairs_tot = ();
 1584 # make file lists for each separate argument
 1585 if ($opt_diff_list_file) {
 1586     @files_for_set = ( (), () );
 1587     file_pairs_from_file($opt_diff_list_file, # in
 1588                         \@files_added_tot   , # out
 1589                         \@files_removed_tot , # out
 1590                         \@file_pairs_tot    , # out
 1591                        );
 1592     foreach my $F (@files_added_tot) {
 1593         push @{$files_for_set[1]}, $F;
 1594     }
 1595     foreach my $F (@files_removed_tot) {
 1596         push @{$files_for_set[0]}, $F;
 1597     }
 1598     foreach my $pair (@file_pairs_tot) {
 1599         push @{$files_for_set[0]}, $pair->[0];
 1600         push @{$files_for_set[1]}, $pair->[1];
 1601     }
 1602     @ARGV = (1, 2); # place holders
 1603 }
 1604 for (my $i = 0; $i < scalar @ARGV; $i++) {
 1605     if ($opt_diff_list_file) {
 1606         push @fh, make_file_list($files_for_set[$i],
 1607                                 \%Error_Codes, \@Errors, \%Ignored);
 1608         @{$files_for_set[$i]} = @file_list;
 1609     } else {
 1610         push @fh, make_file_list([ $ARGV[$i] ],
 1611                                 \%Error_Codes, \@Errors, \%Ignored);
 1612         @{$files_for_set[$i]} = @file_list;
 1613     }
 1614     if ($opt_exclude_list_file) {
 1615         # note: process_exclude_list_file() references global @file_list
 1616         process_exclude_list_file($opt_exclude_list_file,
 1617                                  \%Exclude_Dir,
 1618                                  \%Ignored);
 1619     }
 1620     if ($opt_no_autogen) {
 1621         exclude_autogenerated_files(\@{$files_for_set[$i]},  # in/out
 1622                                     \%Error_Codes, \@Errors, \%Ignored);
 1623     }
 1624     @file_list = ();
 1625 }
 1626 # 1}}}
 1627 # Step 5:  Remove duplicate files.             {{{1
 1628 #
 1629 my %Language           = ();
 1630 my %unique_source_file = ();
 1631 my $n_set = 0;
 1632 foreach my $FH (@fh) {  # loop over each pair of file sets
 1633     ++$n_set;
 1634     remove_duplicate_files($FH,
 1635                                \%{$Language{$FH}}               ,
 1636                                \%{$unique_source_file{$FH}}     ,
 1637                           \%Error_Codes                         ,
 1638                                \@Errors                         ,
 1639                                \%Ignored                        );
 1640     if ($opt_exclude_content) {
 1641         exclude_by_regex($opt_exclude_content,              # in
 1642                         \%{$unique_source_file{$FH}},       # in/out
 1643                         \%Ignored);                         # out
 1644     }
 1645 
 1646     if ($opt_include_lang) {
 1647         # remove files associated with languages not
 1648         # specified by --include-lang
 1649         my @delete_file = ();
 1650         foreach my $file (keys %{$unique_source_file{$FH}}) {
 1651             my $keep_file = 0;
 1652             foreach my $keep_lang (keys %Include_Language) {
 1653                 if ($Language{$FH}{$file} eq $keep_lang) {
 1654                     $keep_file = 1;
 1655                     last;
 1656                 }
 1657             }
 1658             next if $keep_file;
 1659             push @delete_file, $file;
 1660         }
 1661         foreach my $file (@delete_file) {
 1662             delete $Language{$FH}{$file};
 1663         }
 1664     }
 1665 
 1666     printf "%2d: %8d unique file%s.                          \r",
 1667         $n_set,
 1668         plural_form(scalar keys %unique_source_file)
 1669         unless $opt_quiet;
 1670 }
 1671 # 1}}}
 1672 # Step 6:  Count code, comments, blank lines.  {{{1
 1673 #
 1674 my %Results_by_Language = ();
 1675 my %Results_by_File     = ();
 1676 my %Delta_by_Language   = ();
 1677 my %Delta_by_File       = ();
 1678 
 1679 my %alignment = ();
 1680 
 1681 my $fset_a = $fh[0];
 1682 my $fset_b = $fh[1];
 1683 
 1684 my $n_filepairs_compared = 0;
 1685 my $tot_counted = 0;
 1686 
 1687 if ( scalar @fh != 2 ) {
 1688     print "Error: incorrect length fh array when preparing diff at step 6.\n";
 1689     exit 1;
 1690 }
 1691 if (!$opt_diff_list_file) {
 1692     align_by_pairs(\%{$unique_source_file{$fset_a}}      , # in
 1693                    \%{$unique_source_file{$fset_b}}      , # in
 1694                    \@files_added_tot                     , # out
 1695                    \@files_removed_tot                   , # out
 1696                    \@file_pairs_tot                      , # out
 1697                   );
 1698 }
 1699 
 1700 #use Data::Dumper;
 1701 #print "added : ", Dumper(\@files_added_tot);
 1702 #print "removed : ", Dumper(\@files_removed_tot);
 1703 #print "pairs : ", Dumper(\@file_pairs_tot);
 1704 
 1705 if ( $max_processes == 0) {
 1706     # Multiprocessing is disabled
 1707     my $part = count_filesets ( $fset_a, $fset_b, \@files_added_tot,
 1708                                \@files_removed_tot, \@file_pairs_tot,
 1709                                0, \%Language, \%Ignored);
 1710     %Results_by_File = %{$part->{'results_by_file'}};
 1711     %Results_by_Language= %{$part->{'results_by_language'}};
 1712     %Delta_by_File = %{$part->{'delta_by_file'}};
 1713     %Delta_by_Language= %{$part->{'delta_by_language'}};
 1714     %Ignored = ( %Ignored, %{$part->{'ignored'}});
 1715     %alignment = %{$part->{'alignment'}};
 1716     $n_filepairs_compared = $part->{'n_filepairs_compared'};
 1717     push ( @Errors, @{$part->{'errors'}});
 1718 } else {
 1719     # Multiprocessing is enabled
 1720     # Do not create more processes than the amount of data to be processed
 1721     my $num_processes = min(max(scalar @files_added_tot,
 1722                                 scalar @files_removed_tot,
 1723                                 scalar @file_pairs_tot),
 1724                             $max_processes);
 1725     # ... but use at least one process.
 1726        $num_processes = 1
 1727             if $num_processes == 0;
 1728     # Start processes for counting
 1729     my $pm = Parallel::ForkManager->new($num_processes);
 1730     # When processes finish, they will use the embedded subroutine for
 1731     # merging the data into global variables.
 1732     $pm->run_on_finish ( sub {
 1733         my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $part) = @_;
 1734         my $part_ignored = $part->{'ignored'};
 1735         my $part_result_by_file = $part->{'results_by_file'};
 1736         my $part_result_by_language = $part->{'results_by_language'};
 1737         my $part_delta_by_file = $part->{'delta_by_file'};
 1738         my $part_delta_by_language = $part->{'delta_by_language'};
 1739         my $part_alignment = $part->{'alignment'};
 1740         my $part_errors = $part->{'errors'};
 1741            $tot_counted += scalar keys %$part_result_by_file;
 1742            $n_filepairs_compared += $part->{'n_filepairs_compared'};
 1743         # Since files are processed by multiple processes, we can't measure
 1744         # the number of processed files exactly. We approximate this by showing
 1745         # the number of files counted by finished processes.
 1746         printf "Counting:  %d\r", $tot_counted
 1747                  if $opt_progress_rate;
 1748 
 1749         foreach my $this_language ( keys %$part_result_by_language ) {
 1750             my $counts = $part_result_by_language->{$this_language};
 1751             foreach my $inner_key ( keys %$counts ) {
 1752                 $Results_by_Language{$this_language}{$inner_key} +=
 1753                     $counts->{$inner_key};
 1754             }
 1755         }
 1756 
 1757         foreach my $this_language ( keys %$part_delta_by_language ) {
 1758             my $counts = $part_delta_by_language->{$this_language};
 1759             foreach my $inner_key ( keys %$counts ) {
 1760                 my $statuses = $counts->{$inner_key};
 1761                 foreach my $inner_status ( keys %$statuses ) {
 1762                     $Delta_by_Language{$this_language}{$inner_key}{$inner_status} +=
 1763                           $counts->{$inner_key}->{$inner_status};
 1764                 }
 1765             }
 1766         }
 1767 
 1768         foreach my $label ( keys %$part_alignment ) {
 1769             my $inner = $part_alignment->{$label};
 1770             foreach my $key ( keys %$inner ) {
 1771                 $alignment{$label}{$key} = 1;
 1772             }
 1773         }
 1774 
 1775         %Results_by_File = ( %Results_by_File, %$part_result_by_file );
 1776         %Delta_by_File = ( %Delta_by_File, %$part_delta_by_file );
 1777         %Ignored = (%Ignored, %$part_ignored );
 1778         push ( @Errors, @$part_errors );
 1779     } );
 1780 
 1781     my $num_filepairs_per_part = ceil ( ( scalar @file_pairs_tot ) / $num_processes );
 1782     my $num_filesremoved_per_part = ceil ( ( scalar @files_removed_tot ) / $num_processes );
 1783     my $num_filesadded_per_part = ceil ( ( scalar @files_added_tot ) / $num_processes );
 1784 
 1785     while ( 1 ) {
 1786         my @files_added_part = splice @files_added_tot, 0, $num_filesadded_per_part;
 1787         my @files_removed_part = splice @files_removed_tot, 0, $num_filesremoved_per_part;
 1788         my @filepairs_part = splice @file_pairs_tot, 0, $num_filepairs_per_part;
 1789         if ( scalar @files_added_part == 0 and scalar @files_removed_part == 0 and
 1790              scalar @filepairs_part == 0 ) {
 1791             last;
 1792         }
 1793 
 1794         $pm->start() and next;
 1795         my $count_result = count_filesets ( $fset_a, $fset_b,
 1796             \@files_added_part, \@files_removed_part,
 1797             \@filepairs_part, 1, \%Language, \%Ignored );
 1798         $pm->finish(0 , $count_result);
 1799     }
 1800     # Wait for processes to finish
 1801     $pm->wait_all_children();
 1802 }
 1803 
 1804 # Write alignment data, if needed
 1805 if ($opt_diff_alignment) {
 1806     write_alignment_data ( $opt_diff_alignment, $n_filepairs_compared, \%alignment ) ;
 1807 }
 1808 
 1809 my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
 1810 write_file($opt_ignored, {"file_type" => "ignored",
 1811                           "separator" => ": ",
 1812                           "columns"   => ["file", "reason"],
 1813                          }, @ignored_reasons   ) if $opt_ignored;
 1814 write_file($opt_counted, {}, sort keys %Results_by_File) if $opt_counted;
 1815 # 1}}}
 1816 # Step 7:  Assemble results.                   {{{1
 1817 #
 1818 my $end_time = get_time();
 1819 printf "%8d file%s ignored.                           \n",
 1820     plural_form(scalar keys %Ignored) unless $opt_quiet;
 1821 print_errors(\%Error_Codes, \@Errors) if @Errors;
 1822 if (!%Delta_by_Language) {
 1823     print "Nothing to count.\n";
 1824     exit;
 1825 }
 1826 
 1827 if ($opt_by_file) {
 1828     @Lines_Out = diff_report($VERSION, get_time() - $start_time,
 1829                             "by file",
 1830                             \%Delta_by_File, \%Scale_Factor);
 1831 } else {
 1832     @Lines_Out = diff_report($VERSION, get_time() - $start_time,
 1833                             "by language",
 1834                             \%Delta_by_Language, \%Scale_Factor);
 1835 }
 1836 
 1837 # 1}}}
 1838 } else {
 1839 # Step 4:  Separate code from non-code files.  {{{1
 1840 my $fh = 0;
 1841 if ($opt_list_file or $opt_vcs) {
 1842     my @list;
 1843     if ($opt_vcs) {
 1844         @list = invoke_generator($opt_vcs, \@ARGV);
 1845     } else {
 1846         @list = read_list_file($opt_list_file);
 1847     }
 1848     $fh = make_file_list(\@list, \%Error_Codes, \@Errors, \%Ignored);
 1849 } else {
 1850     $fh = make_file_list(\@ARGV, \%Error_Codes, \@Errors, \%Ignored);
 1851     #     make_file_list populates global variable @file_list via call to
 1852     #     File::Find's find() which in turn calls files()
 1853 }
 1854 if ($opt_exclude_list_file) {
 1855     # note: process_exclude_list_file() references global @file_list
 1856     process_exclude_list_file($opt_exclude_list_file,
 1857                              \%Exclude_Dir,
 1858                              \%Ignored);
 1859 }
 1860 if ($opt_skip_win_hidden and $ON_WINDOWS) {
 1861     my @file_list_minus_hidded = ();
 1862     # eval code to run on Unix without 'missing Win32::File module' error.
 1863     my $win32_file_invocation = '
 1864         use Win32::File;
 1865         foreach my $F (@file_list) {
 1866             my $attr = undef;
 1867             Win32::File::GetAttributes($F, $attr);
 1868             if ($attr & HIDDEN) {
 1869                 $Ignored{$F} = "Windows hidden file";
 1870                 print "Ignoring $F since it is a Windows hidden file\n"
 1871                     if $opt_v > 1;
 1872             } else {
 1873                 push @file_list_minus_hidded, $F;
 1874             }
 1875         }';
 1876     eval $win32_file_invocation;
 1877     @file_list = @file_list_minus_hidded;
 1878 }
 1879 if ($opt_no_autogen) {
 1880     exclude_autogenerated_files(\@file_list,  # in/out
 1881                                 \%Error_Codes, \@Errors, \%Ignored);
 1882 }
 1883 #printf "%8d file%s excluded.                     \n",
 1884 #   plural_form(scalar keys %Ignored)
 1885 #   unless $opt_quiet;
 1886 # die print ": ", join("\n: ", @file_list), "\n";
 1887 # 1}}}
 1888 # Step 5:  Remove duplicate files.             {{{1
 1889 #
 1890 my %Language           = ();
 1891 my %unique_source_file = ();
 1892 remove_duplicate_files($fh                          ,   # in
 1893                            \%Language               ,   # out
 1894                            \%unique_source_file     ,   # out
 1895                       \%Error_Codes                 ,   # in
 1896                            \@Errors                 ,   # out
 1897                            \%Ignored                );  # out
 1898 if ($opt_exclude_content) {
 1899     exclude_by_regex($opt_exclude_content,              # in
 1900                     \%unique_source_file ,              # in/out
 1901                     \%Ignored);                         # out
 1902 }
 1903 printf "%8d unique file%s.                              \n",
 1904     plural_form(scalar keys %unique_source_file)
 1905     unless $opt_quiet;
 1906 # 1}}}
 1907 # Step 6:  Count code, comments, blank lines.  {{{1
 1908 #
 1909 my %Results_by_Language = ();
 1910 my %Results_by_File     = ();
 1911 my @results_parts  = ();
 1912 my @sorted_files = sort keys %unique_source_file;
 1913 
 1914 if ( $max_processes == 0) {
 1915     # Multiprocessing is disabled
 1916     my $part = count_files ( \@sorted_files , 0, \%Language);
 1917     %Results_by_File = %{$part->{'results_by_file'}};
 1918     %Results_by_Language= %{$part->{'results_by_language'}};
 1919     %Ignored = ( %Ignored, %{$part->{'ignored'}});
 1920     push ( @Errors, @{$part->{'errors'}});
 1921 } else {
 1922     # Do not create more processes than the number of files to be processed
 1923     my $num_files = scalar @sorted_files;
 1924     my $num_processes = $num_files >= $max_processes ? $max_processes : $num_files;
 1925     # Use at least one process.
 1926        $num_processes = 1
 1927             if $num_processes == 0;
 1928     # Start processes for counting
 1929     my $pm = Parallel::ForkManager->new($num_processes);
 1930     # When processes finish, they will use the embedded subroutine for
 1931     # merging the data into global variables.
 1932     $pm->run_on_finish ( sub {
 1933         my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $part) = @_;
 1934         my $part_ignored = $part->{'ignored'};
 1935         my $part_result_by_file = $part->{'results_by_file'};
 1936         my $part_result_by_language = $part->{'results_by_language'};
 1937         my $part_errors = $part->{'errors'};
 1938         my $nCounted+= scalar keys %$part_result_by_file;
 1939         # Since files are processed by multiple processes, we can't measure
 1940         # the number of processed files exactly. We approximate this by showing
 1941         # the number of files counted by finished processes.
 1942         printf "Counting:  %d\r", $nCounted
 1943                  if $opt_progress_rate;
 1944 
 1945         foreach my $this_language ( keys %$part_result_by_language ) {
 1946             my $counts = $part_result_by_language->{$this_language};
 1947             foreach my $inner_key ( keys %$counts ) {
 1948                 $Results_by_Language{$this_language}{$inner_key} +=
 1949                     $counts->{$inner_key};
 1950             }
 1951         }
 1952         %Results_by_File = ( %Results_by_File, %$part_result_by_file );
 1953         %Ignored = (%Ignored, %$part_ignored);
 1954         push ( @Errors, @$part_errors);
 1955     } );
 1956     my $num_files_per_part = ceil ( ( scalar @sorted_files ) / $num_processes );
 1957     while ( my @part = splice @sorted_files, 0 , $num_files_per_part ) {
 1958         $pm->start() and next;
 1959         my $count_result = count_files ( \@part, 1, \%Language );
 1960         $pm->finish(0 , $count_result);
 1961     }
 1962     # Wait for processes to finish
 1963     $pm->wait_all_children();
 1964 }
 1965 
 1966 my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
 1967 write_file($opt_ignored, {"file_type" => "ignored",
 1968                           "separator" => ": ",
 1969                           "columns"   => ["file", "reason"],
 1970                          }, @ignored_reasons   ) if $opt_ignored;
 1971 write_file($opt_counted, {}, sort keys %Results_by_File) if $opt_counted;
 1972 # 1}}}
 1973 # Step 7:  Assemble results.                   {{{1
 1974 #
 1975 my $end_time = get_time();
 1976 printf "%8d file%s ignored.\n", plural_form(scalar keys %Ignored)
 1977     unless $opt_quiet;
 1978 print_errors(\%Error_Codes, \@Errors) if @Errors;
 1979 exit unless %Results_by_Language;
 1980 
 1981 generate_sql($end_time - $start_time,
 1982             \%Results_by_File, \%Scale_Factor) if $opt_sql;
 1983 
 1984 exit if $skip_generate_report;
 1985 if      ($opt_by_file_by_lang) {
 1986     push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 1987                                       "by file",
 1988                                       \%Results_by_File,    \%Scale_Factor);
 1989     push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 1990                                       "by language",
 1991                                       \%Results_by_Language, \%Scale_Factor);
 1992 } elsif ($opt_by_file) {
 1993     push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 1994                                       "by file",
 1995                                       \%Results_by_File,    \%Scale_Factor);
 1996 } else {
 1997     push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 1998                                       "by language",
 1999                                       \%Results_by_Language, \%Scale_Factor);
 2000 }
 2001 # 1}}}
 2002 }
 2003 if ($opt_report_file) { write_file($opt_report_file, {}, @Lines_Out); }
 2004 else                  { print "\n", join("\n", @Lines_Out), "\n"; }
 2005 if ($opt_count_diff) {
 2006     ++$opt_count_diff;
 2007     exit if $opt_count_diff > 3;
 2008     goto Top_of_Processing_Loop;
 2009 }
 2010 sub exclude_by_regex {                       # {{{1
 2011     my ($regex,
 2012         $rh_unique_source_file, # in/out
 2013         $rh_ignored           , # out
 2014        ) = @_;
 2015     my @exclude = ();
 2016     foreach my $file (keys %{$rh_unique_source_file}) {
 2017         my $line_num = 0;
 2018         foreach my $line (read_file($file)) {
 2019             ++$line_num;
 2020             if ($line =~ /$regex/) {
 2021                 $rh_ignored->{$file} = "line $line_num match for --exclude-content=$regex";
 2022                 push @exclude, $file;
 2023                 last;
 2024             }
 2025         }
 2026     }
 2027     foreach my $file (@exclude) {
 2028         delete $rh_unique_source_file->{$file};
 2029     }
 2030 } # 1}}}
 2031 sub get_max_processes {                      # {{{1
 2032     # If user has specified valid number of processes, use that.
 2033     if (defined $opt_processes) {
 2034         eval "use Parallel::ForkManager 0.7.6;";
 2035         if ( defined $Parallel::ForkManager::VERSION ) {
 2036             $HAVE_Parallel_ForkManager = 1;
 2037         }
 2038         if ( $opt_processes !~ /^\d+$/ ) {
 2039             print "Error: processes option argument must be numeric.\n";
 2040             exit 1;
 2041         }
 2042         elsif ( $opt_processes >0 and ! $HAVE_Parallel_ForkManager ) {
 2043             print "Error: cannot use multiple processes, because " .
 2044                   "Parallel::ForkManager is not installed, or the version is too old.\n";
 2045             exit 1;
 2046         }
 2047     elsif ( $opt_processes >0 and $ON_WINDOWS ) {
 2048             print "Error: cannot use multiple processes on Windows systems.\n";
 2049             exit 1;
 2050         }
 2051         else {
 2052             return $opt_processes;
 2053         }
 2054     }
 2055 
 2056     # Disable multiprocessing on Windows - does not work reliably
 2057     if ($ON_WINDOWS) {
 2058         return 0;
 2059     }
 2060 
 2061     # Disable multiprocessing if Parallel::ForkManager is not available
 2062     if ( ! $HAVE_Parallel_ForkManager ) {
 2063         return 0;
 2064     }
 2065 
 2066     # Set to number of cores on Linux
 2067     if ( $^O =~ /linux/i and -x '/usr/bin/nproc' ) {
 2068         my $numavcores_linux = `/usr/bin/nproc`;
 2069         chomp $numavcores_linux;
 2070         if ( $numavcores_linux =~ /^\d+$/ ) {
 2071             return $numavcores_linux;
 2072         }
 2073     }
 2074 
 2075     # Set to number of cores on MacOS
 2076     if ( $^O =~ /darwin/i and -x '/usr/sbin/sysctl') {
 2077        my $numavcores_macos = `/usr/sbin/sysctl -n hw.physicalcpu`;
 2078        chomp $numavcores_macos;
 2079        if ($numavcores_macos =~ /^\d+$/ ) {
 2080            return $numavcores_macos;
 2081        }
 2082     }
 2083 
 2084     # Disable multiprocessing in other cases
 2085     return 0;
 2086 } # 1}}}
 2087 sub exclude_autogenerated_files {            # {{{1
 2088     my ($ra_file_list, # in
 2089         $rh_Err      , # in   hash of error codes
 2090         $raa_errors  , # out
 2091         $rh_Ignored  , # out
 2092        ) = @_;
 2093     print "-> exclude_autogenerated_files()\n" if $opt_v > 2;
 2094     my @file_list_minus_autogen = ();
 2095     foreach my $file (@{$ra_file_list}) {
 2096         if ($file !~ /\.go$/) {
 2097             # at the moment, only know Go autogenerated files
 2098             push @file_list_minus_autogen, $file;
 2099             next;
 2100         }
 2101         my $first_line = first_line($file, $rh_Err, $raa_errors);
 2102         if ($first_line =~ m{^//\s+Code\s+generated\s+.*?\s+DO\s+NOT\s+EDIT\.$}) {
 2103             $rh_Ignored->{$file} = 'Go autogenerated file';
 2104         } else {
 2105             # Go, but not autogenerated
 2106             push @file_list_minus_autogen, $file;
 2107         }
 2108     }
 2109     @{$ra_file_list} = @file_list_minus_autogen;
 2110     print "<- exclude_autogenerated_files()\n" if $opt_v > 2;
 2111 } # 1}}}
 2112 sub file_extension {                         # {{{1
 2113     my ($fname, ) = @_;
 2114     $fname =~ m/\.(\w+)$/;
 2115     if ($1) {
 2116         return $1;
 2117     } else {
 2118         return "";
 2119     }
 2120 } # 1}}}
 2121 sub count_files {                            # {{{1
 2122     my ($filelist, $counter_type, $language_hash) = @_;
 2123     print "-> count_files()\n" if $opt_v > 2;
 2124     my @p_errors = ();
 2125     my %p_ignored = ();
 2126     my %p_rbl = ();
 2127     my %p_rbf = ();
 2128     my %Language = %{$language_hash};
 2129 
 2130     foreach my $file (@$filelist) {
 2131         if ( ! $counter_type ) {
 2132             # Multithreading disabled
 2133             $nCounted++;
 2134 
 2135             printf "Counting:  %d\r", $nCounted
 2136                  unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate));
 2137         }
 2138 
 2139         next if $Ignored{$file};
 2140         if ($opt_include_ext and not $Include_Ext{ file_extension($file) }) {
 2141             $p_ignored{$file} = "not in --include-ext=$opt_include_ext";
 2142             next;
 2143         }
 2144         if ($opt_include_lang and not $Include_Language{$Language{$file}}) {
 2145             $p_ignored{$file} = "not in --include-lang=$opt_include_lang";
 2146             next;
 2147         }
 2148         if ($Exclude_Language{$Language{$file}}) {
 2149             $p_ignored{$file} = "--exclude-lang=$Language{$file}";
 2150             next;
 2151         }
 2152 
 2153         my $Filters_by_Language_Language_file = ! @{$Filters_by_Language{$Language{$file}} };
 2154         if ($Filters_by_Language_Language_file) {
 2155             if ($Language{$file} eq "(unknown)") {
 2156                 $p_ignored{$file} = "language unknown (#1)";
 2157             } else {
 2158                 $p_ignored{$file} = "missing Filters_by_Language{$Language{$file}}";
 2159             }
 2160             next;
 2161         }
 2162 
 2163         my ($all_line_count, $blank_count, $comment_count, $code_count);
 2164         if ($opt_use_sloccount and $Language{$file} =~ /^(C|C\+\+|XML|PHP|Pascal|Java)$/) {
 2165             chomp ($blank_count     = `grep -cv \"[^[:space:]]\" '$file'`);
 2166             chomp ($all_line_count  = `cat '$file' | wc -l`);
 2167             if      ($Language{$file} =~ /^(C|C\+\+)$/) {
 2168                 $code_count = `cat '$file' | c_count      | head -n 1`;
 2169             } elsif ($Language{$file} eq "XML") {
 2170                 $code_count = `cat '$file' | xml_count    | head -n 1`;
 2171             } elsif ($Language{$file} eq "PHP") {
 2172                 $code_count = `cat '$file' | php_count    | head -n 1`;
 2173             } elsif ($Language{$file} eq "Pascal") {
 2174                 $code_count = `cat '$file' | pascal_count | head -n 1`;
 2175             } elsif ($Language{$file} eq "Java") {
 2176                 $code_count = `cat '$file' | java_count   | head -n 1`;
 2177             } else {
 2178                 die "SLOCCount match failure: file=[$file] lang=[$Language{$file}]";
 2179             }
 2180             $code_count = substr($code_count, 0, -2);
 2181             $comment_count = $all_line_count - $code_count - $blank_count;
 2182         } else {
 2183             ($all_line_count,
 2184              $blank_count   ,
 2185              $comment_count ,) = call_counter($file, $Language{$file}, \@Errors);
 2186             $code_count = $all_line_count - $blank_count - $comment_count;
 2187         }
 2188 
 2189         if ($opt_by_file) {
 2190             $p_rbf{$file}{'code'   } = $code_count     ;
 2191             $p_rbf{$file}{'blank'  } = $blank_count    ;
 2192             $p_rbf{$file}{'comment'} = $comment_count  ;
 2193             $p_rbf{$file}{'lang'   } = $Language{$file};
 2194             $p_rbf{$file}{'nFiles' } = 1;
 2195         } else {
 2196             $p_rbf{$file} = 1;  # just keep track of counted files
 2197         }
 2198 
 2199         $p_rbl{$Language{$file}}{'nFiles'}++;
 2200         $p_rbl{$Language{$file}}{'code'}    += $code_count   ;
 2201         $p_rbl{$Language{$file}}{'blank'}   += $blank_count  ;
 2202         $p_rbl{$Language{$file}}{'comment'} += $comment_count;
 2203 
 2204     }
 2205     print "<- count_files()\n" if $opt_v > 2;
 2206     return {
 2207         "ignored" => \%p_ignored,
 2208         "errors"  => \@p_errors,
 2209         "results_by_file" => \%p_rbf,
 2210         "results_by_language" => \%p_rbl,
 2211     }
 2212 } # 1}}}
 2213 sub count_filesets {                         # {{{1
 2214     my ($fset_a,
 2215         $fset_b,
 2216         $files_added,
 2217         $files_removed,
 2218         $file_pairs,
 2219         $counter_type,
 2220         $language_hash,
 2221         $rh_Ignored) = @_;
 2222     print "-> count_filesets()\n" if $opt_v > 2;
 2223     my @p_errors = ();
 2224     my %p_alignment = ();
 2225     my %p_ignored = ();
 2226     my %p_rbl = ();
 2227     my %p_rbf = ();
 2228     my %p_dbl = ();
 2229     my %p_dbf = ();
 2230     my %Language = %$language_hash;
 2231 
 2232     my $nCounted = 0;
 2233 
 2234     my %already_counted = (); # already_counted{ filename } = 1
 2235 
 2236     if (!@$file_pairs) {
 2237         # Special case where all files were either added or deleted.
 2238         # In this case, one of these arrays will be empty:
 2239         #   @files_added, @files_removed
 2240         # so loop over both to cover both cases.
 2241         my $status = @$files_added ? 'added' : 'removed';
 2242         my $fset = @$files_added ? $fset_b : $fset_a;
 2243         foreach my $file (@$files_added, @$files_removed) {
 2244             next unless defined $Language{$fset}{$file};
 2245             my $Lang = $Language{$fset}{$file};
 2246             next if $Lang eq '(unknown)';
 2247             my ($all_line_count,
 2248                 $blank_count   ,
 2249                 $comment_count ,
 2250                 ) = call_counter($file, $Lang, \@p_errors);
 2251             $already_counted{$file} = 1;
 2252             my $code_count = $all_line_count-$blank_count-$comment_count;
 2253             if ($opt_by_file) {
 2254                 $p_dbf{$file}{'code'   }{$status} += $code_count   ;
 2255                 $p_dbf{$file}{'blank'  }{$status} += $blank_count  ;
 2256                 $p_dbf{$file}{'comment'}{$status} += $comment_count;
 2257                 $p_dbf{$file}{'lang'   }{$status}  = $Lang         ;
 2258                 $p_dbf{$file}{'nFiles' }{$status} += 1             ;
 2259             }
 2260             $p_dbl{$Lang}{'code'   }{$status} += $code_count   ;
 2261             $p_dbl{$Lang}{'blank'  }{$status} += $blank_count  ;
 2262             $p_dbl{$Lang}{'comment'}{$status} += $comment_count;
 2263             $p_dbl{$Lang}{'nFiles' }{$status} += 1             ;
 2264         }
 2265     }
 2266 
 2267     #use Data::Dumper::Simple;
 2268     #use Data::Dumper;
 2269     #print Dumper(\@files_added, \@files_removed, \@file_pairs);
 2270     #print "after align_by_pairs:\n";
 2271     #print "added:\n";
 2272 
 2273     foreach my $f (@$files_added) {
 2274         next if $already_counted{$f};
 2275         #printf "%10s -> %s\n", $f, $Language{$fh[$F+1]}{$f};
 2276         # Don't proceed unless the file (both L and R versions)
 2277         # is in a known language.
 2278         next if $opt_include_ext
 2279             and not $Include_Ext{ file_extension($f) };
 2280         next if $opt_include_lang
 2281             and not $Include_Language{$Language{$fset_b}{$f}};
 2282         my $this_lang = $Language{$fset_b}{$f};
 2283         if ($this_lang eq "(unknown)") {
 2284             $p_ignored{$f} = "uknown language";
 2285             next;
 2286         }
 2287         if ($Exclude_Language{$this_lang}) {
 2288             $p_ignored{$f} = "--exclude-lang=$this_lang";
 2289             next;
 2290         }
 2291         $p_alignment{"added"}{sprintf "  + %s ; %s\n", $f, $this_lang} = 1;
 2292         ++$p_dbl{ $this_lang }{'nFiles'}{'added'};
 2293         # Additionally, add contents of file $f to
 2294         # Delta_by_File{$f}{comment/blank/code}{'added'}
 2295         # Delta_by_Language{$lang}{comment/blank/code}{'added'}
 2296         # via the $p_dbl and $p_dbf variables.
 2297         my ($all_line_count,
 2298             $blank_count   ,
 2299             $comment_count ,
 2300            ) = call_counter($f, $this_lang, \@p_errors);
 2301         $p_dbl{ $this_lang }{'comment'}{'added'} += $comment_count;
 2302         $p_dbl{ $this_lang }{'blank'}{'added'}   += $blank_count;
 2303         $p_dbl{ $this_lang }{'code'}{'added'}    +=
 2304            $all_line_count - $blank_count - $comment_count;
 2305         $p_dbf{ $f }{'comment'}{'added'} = $comment_count;
 2306         $p_dbf{ $f }{'blank'}{'added'}   = $blank_count;
 2307         $p_dbf{ $f }{'code'}{'added'}    =
 2308            $all_line_count - $blank_count - $comment_count;
 2309     }
 2310 
 2311     #print "removed:\n";
 2312     foreach my $f (@$files_removed) {
 2313         next if $already_counted{$f};
 2314         # Don't proceed unless the file (both L and R versions)
 2315         # is in a known language.
 2316         next if $opt_include_ext
 2317             and not $Include_Ext{ file_extension($f) };
 2318         next if $opt_include_lang
 2319             and (not defined $Language{$fset_a}{$f}
 2320              or  not defined $Include_Language{$Language{$fset_a}{$f}});
 2321         my $this_lang = $Language{$fset_a}{$f};
 2322         if ($this_lang eq "(unknown)") {
 2323             $p_ignored{$f} = "uknown language";
 2324             next;
 2325         }
 2326         if ($Exclude_Language{$this_lang}) {
 2327             $p_ignored{$f} = "--exclude-lang=$this_lang";
 2328             next;
 2329         }
 2330         ++$p_dbl{ $this_lang }{'nFiles'}{'removed'};
 2331         $p_alignment{"removed"}{sprintf "  - %s ; %s\n", $f, $this_lang} = 1;
 2332         #printf "%10s -> %s\n", $f, $Language{$fh[$F  ]}{$f};
 2333         # Additionally, add contents of file $f to
 2334         #        Delta_by_File{$f}{comment/blank/code}{'removed'}
 2335         #        Delta_by_Language{$lang}{comment/blank/code}{'removed'}
 2336         # via the $p_dbl and $p_dbf variables.
 2337         my ($all_line_count,
 2338             $blank_count   ,
 2339             $comment_count ,
 2340            ) = call_counter($f, $this_lang, \@p_errors);
 2341         $p_dbl{ $this_lang}{'comment'}{'removed'} += $comment_count;
 2342         $p_dbl{ $this_lang}{'blank'}{'removed'}   += $blank_count;
 2343         $p_dbl{ $this_lang}{'code'}{'removed'}    +=
 2344              $all_line_count - $blank_count - $comment_count;
 2345         $p_dbf{ $f }{'comment'}{'removed'} = $comment_count;
 2346         $p_dbf{ $f }{'blank'}{'removed'}   = $blank_count;
 2347         $p_dbf{ $f }{'code'}{'removed'}    =
 2348             $all_line_count - $blank_count - $comment_count;
 2349     }
 2350 
 2351     my $n_file_pairs_compared = 0;
 2352     # Don't know ahead of time how many file pairs will be compared
 2353     # since duplicates are weeded out below.  The answer is
 2354     # scalar @file_pairs only if there are no duplicates.
 2355 
 2356     foreach my $pair (@$file_pairs) {
 2357         my $file_L = $pair->[0];
 2358         my $file_R = $pair->[1];
 2359         my $Lang_L = $Language{$fset_a}{$file_L};
 2360         my $Lang_R = $Language{$fset_b}{$file_R};
 2361         if (!defined($Lang_L) or !defined($Lang_R)) {
 2362             print " -> count_filesets skipping $file_L, $file_R ",
 2363                   "because language cannot be inferred\n" if $opt_v;
 2364             next;
 2365         }
 2366         #print "main step 6 file_L=$file_L    file_R=$file_R\n";
 2367         ++$nCounted;
 2368         printf "Counting:  %d\r", $nCounted
 2369              unless ($counter_type or !$opt_progress_rate or ($nCounted % $opt_progress_rate));
 2370         next if $p_ignored{$file_L} or $p_ignored{$file_R};
 2371 
 2372         # filter out non-included extensions
 2373         if ($opt_include_ext  and not $Include_Ext{ file_extension($file_L) }
 2374                               and not $Include_Ext{ file_extension($file_R) }) {
 2375             $p_ignored{$file_L} = "not in --include-lang=$opt_include_ext";
 2376             $p_ignored{$file_R} = "not in --include-lang=$opt_include_ext";
 2377             next;
 2378         }
 2379         # filter out non-included languages
 2380         if ($opt_include_lang and not $Include_Language{$Lang_L}
 2381                               and not $Include_Language{$Lang_R}) {
 2382             $p_ignored{$file_L} = "not in --include-lang=$opt_include_lang";
 2383             $p_ignored{$file_R} = "not in --include-lang=$opt_include_lang";
 2384             next;
 2385         }
 2386         # filter out excluded or unrecognized languages
 2387         if ($Exclude_Language{$Lang_L} or $Exclude_Language{$Lang_R}) {
 2388             $p_ignored{$file_L} = "--exclude-lang=$Lang_L";
 2389             $p_ignored{$file_R} = "--exclude-lang=$Lang_R";
 2390             next;
 2391         }
 2392 
 2393         my $not_Filters_by_Language_Lang_LR = 0;
 2394         #print "file_LR = [$file_L] [$file_R]\n";
 2395         #print "Lang_LR = [$Lang_L] [$Lang_R]\n";
 2396         if (($Lang_L eq "(unknown)") or
 2397             ($Lang_R eq "(unknown)") or
 2398             !(@{$Filters_by_Language{$Lang_L} }) or
 2399             !(@{$Filters_by_Language{$Lang_R} })) {
 2400             $not_Filters_by_Language_Lang_LR = 1;
 2401         }
 2402         if ($not_Filters_by_Language_Lang_LR) {
 2403             if (($Lang_L eq "(unknown)") or ($Lang_R eq "(unknown)")) {
 2404                 $p_ignored{$fset_a}{$file_L} = "language unknown (#1)";
 2405                 $p_ignored{$fset_b}{$file_R} = "language unknown (#1)";
 2406             } else {
 2407                 $p_ignored{$fset_a}{$file_L} = "missing Filters_by_Language{$Lang_L}";
 2408                 $p_ignored{$fset_b}{$file_R} = "missing Filters_by_Language{$Lang_R}";
 2409             }
 2410             next;
 2411         }
 2412 
 2413         # filter out explicitly excluded files
 2414         if ($opt_exclude_list_file and
 2415             ($rh_Ignored->{$file_L} or $rh_Ignored->{$file_R})) {
 2416             my $msg_2;
 2417             if ($rh_Ignored->{$file_L}) {
 2418                 $msg_2 = "$file_L (paired to $file_R)";
 2419             } else {
 2420                 $msg_2 = "$file_R (paired to $file_L)";
 2421             }
 2422             my $msg_1 = "in --exclude-list-file=$opt_exclude_list_file";
 2423             $p_ignored{$file_L} = "$msg_1, $msg_2";
 2424             $p_ignored{$file_R} = "$msg_1, $msg_2";
 2425             next;
 2426         }
 2427 
 2428         #print "DIFF($file_L, $file_R)\n";
 2429         # step 0: compare the two files' contents
 2430         chomp ( my @lines_L = read_file($file_L) );
 2431         chomp ( my @lines_R = read_file($file_R) );
 2432         my $language_file_L = "";
 2433         if (defined $Language{$fset_a}{$file_L}) {
 2434             $language_file_L = $Language{$fset_a}{$file_L};
 2435         } else {
 2436             # files $file_L and $file_R do not contain known language
 2437             next;
 2438         }
 2439 
 2440         my $contents_are_same = 1;
 2441         if (scalar @lines_L == scalar @lines_R) {
 2442             # same size, must compare line-by-line
 2443             for (my $i = 0; $i < scalar @lines_L; $i++) {
 2444                if ($lines_L[$i] ne $lines_R[$i]) {
 2445                    $contents_are_same = 0;
 2446                    last;
 2447                }
 2448             }
 2449             if ($contents_are_same) {
 2450                 ++$p_dbl{$language_file_L}{'nFiles'}{'same'};
 2451             } else {
 2452                 ++$p_dbl{$language_file_L}{'nFiles'}{'modified'};
 2453             }
 2454         } else {
 2455             $contents_are_same = 0;
 2456             # different sizes, contents have changed
 2457             ++$p_dbl{$language_file_L}{'nFiles'}{'modified'};
 2458         }
 2459 
 2460         if ($opt_diff_alignment) {
 2461             my $str =  "$file_L | $file_R ; $language_file_L";
 2462             if ($contents_are_same) {
 2463                 $p_alignment{"pairs"}{"  == $str"} = 1;
 2464             } else {
 2465                 $p_alignment{"pairs"}{"  != $str"} = 1;
 2466             }
 2467             ++$n_file_pairs_compared;
 2468         }
 2469 
 2470         my ($all_line_count_L, $blank_count_L   , $comment_count_L ,
 2471             $all_line_count_R, $blank_count_R   , $comment_count_R , )  = (0,0,0,0,0,0,);
 2472         if (!$contents_are_same) {
 2473             # step 1: identify comments in both files
 2474             #print "Diff blank removal L language= $Lang_L";
 2475             #print " scalar(lines_L)=", scalar @lines_L, "\n";
 2476             my @original_minus_blanks_L
 2477                     = rm_blanks(  \@lines_L, $Lang_L, \%EOL_Continuation_re);
 2478             #print "1: scalar(original_minus_blanks_L)=", scalar @original_minus_blanks_L, "\n";
 2479             @lines_L    = @original_minus_blanks_L;
 2480             #print "2: scalar(lines_L)=", scalar @lines_L, "\n";
 2481             @lines_L    = add_newlines(\@lines_L); # compensate for rm_comments()
 2482             @lines_L    = rm_comments( \@lines_L, $Lang_L, $file_L,
 2483                                        \%EOL_Continuation_re);
 2484             #print "3: scalar(lines_L)=", scalar @lines_L, "\n";
 2485 
 2486             #print "Diff blank removal R language= $Lang_R\n";
 2487             my @original_minus_blanks_R
 2488                     = rm_blanks(  \@lines_R, $Lang_R, \%EOL_Continuation_re);
 2489             @lines_R    = @original_minus_blanks_R;
 2490             @lines_R    = add_newlines(\@lines_R); # taken away by rm_comments()
 2491             @lines_R    = rm_comments( \@lines_R, $Lang_R, $file_R,
 2492                                        \%EOL_Continuation_re);
 2493 
 2494             my (@diff_LL, @diff_LR, );
 2495                    array_diff( $file_L                  ,   # in
 2496                        \@original_minus_blanks_L ,   # in
 2497                        \@lines_L                 ,   # in
 2498                        "comment"                 ,   # in
 2499                        \@diff_LL, \@diff_LR      ,   # out
 2500                        \@p_errors);                    # in/out
 2501 
 2502             my (@diff_RL, @diff_RR, );
 2503                     array_diff( $file_R                  ,   # in
 2504                        \@original_minus_blanks_R ,   # in
 2505                        \@lines_R                 ,   # in
 2506                        "comment"                 ,   # in
 2507                        \@diff_RL, \@diff_RR      ,   # out
 2508                        \@p_errors);                    # in/out
 2509             # each line of each file is now classified as
 2510             # code or comment
 2511             #use Data::Dumper;
 2512             #print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
 2513             #print Dumper("diff_RL", \@diff_RL, "diff_RR", \@diff_RR, );
 2514             #die;
 2515 
 2516             # step 2: separate code from comments for L and R files
 2517             my @code_L = ();
 2518             my @code_R = ();
 2519             my @comm_L = ();
 2520             my @comm_R = ();
 2521             foreach my $line_info (@diff_LL) {
 2522                 if      ($line_info->{'type'} eq "code"   ) {
 2523                     push @code_L, $line_info->{char};
 2524                 } elsif ($line_info->{'type'} eq "comment") {
 2525                     push @comm_L, $line_info->{char};
 2526                 } else {
 2527                     die "Diff unexpected line type ",
 2528                         $line_info->{'type'}, "for $file_L line ",
 2529                         $line_info->{'lnum'};
 2530                 }
 2531             }
 2532 
 2533             foreach my $line_info (@diff_RL) {
 2534                 if      ($line_info->{type} eq "code"   ) {
 2535                     push @code_R, $line_info->{'char'};
 2536                 } elsif ($line_info->{type} eq "comment") {
 2537                     push @comm_R, $line_info->{'char'};
 2538                 } else {
 2539                     die "Diff unexpected line type ",
 2540                         $line_info->{'type'}, "for $file_R line ",
 2541                         $line_info->{'lnum'};
 2542                 }
 2543             }
 2544 
 2545             if ($opt_ignore_whitespace) {
 2546                 # strip all whitespace from each line of source code
 2547                 # and comments then use these stripped arrays in diffs
 2548                 foreach (@code_L) { s/\s+//g }
 2549                 foreach (@code_R) { s/\s+//g }
 2550                 foreach (@comm_L) { s/\s+//g }
 2551                 foreach (@comm_R) { s/\s+//g }
 2552             }
 2553             if ($opt_ignore_case) {
 2554                 # change all text to lowercase in diffs
 2555                 foreach (@code_L) { $_ = lc }
 2556                 foreach (@code_R) { $_ = lc }
 2557                 foreach (@comm_L) { $_ = lc }
 2558                 foreach (@comm_R) { $_ = lc }
 2559             }
 2560             # step 3: compute code diffs
 2561             array_diff("$file_L v. $file_R"   ,   # in
 2562                        \@code_L               ,   # in
 2563                        \@code_R               ,   # in
 2564                        "revision"             ,   # in
 2565                        \@diff_LL, \@diff_LR   ,   # out
 2566                        \@p_errors);                 # in/out
 2567             #print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
 2568             #print Dumper("diff_LR", \@diff_LR);
 2569             foreach my $line_info (@diff_LR) {
 2570                 my $status = $line_info->{'desc'}; # same|added|removed|modified
 2571                 ++$p_dbl{$Lang_L}{'code'}{$status};
 2572                 if ($opt_by_file) {
 2573                     ++$p_dbf{$file_L}{'code'}{$status};
 2574                 }
 2575             }
 2576             #use Data::Dumper;
 2577             #print Dumper("code diffs:", \@diff_LL, \@diff_LR);
 2578 
 2579             # step 4: compute comment diffs
 2580             array_diff("$file_L v. $file_R"   ,   # in
 2581                        \@comm_L               ,   # in
 2582                        \@comm_R               ,   # in
 2583                        "revision"             ,   # in
 2584                        \@diff_LL, \@diff_LR   ,   # out
 2585                        \@Errors);                 # in/out
 2586             #print Dumper("comment diff_LR", \@diff_LR);
 2587             foreach my $line_info (@diff_LR) {
 2588                 my $status = $line_info->{'desc'}; # same|added|removed|modified
 2589                 ++$p_dbl{$Lang_L}{'comment'}{$status};
 2590                 if ($opt_by_file) {
 2591                     ++$p_dbf{$file_L}{'comment'}{$status};
 2592                 }
 2593             }
 2594             #print Dumper("comment diffs:", \@diff_LL, \@diff_LR);
 2595 
 2596             # step 5: compute difference in blank lines (kind of pointless)
 2597             next if $Lang_L eq '(unknown)' or
 2598                     $Lang_R eq '(unknown)';
 2599             ($all_line_count_L,
 2600              $blank_count_L   ,
 2601              $comment_count_L ,
 2602             ) = call_counter($file_L, $Lang_L, \@Errors);
 2603 
 2604             ($all_line_count_R,
 2605              $blank_count_R   ,
 2606              $comment_count_R ,
 2607             ) = call_counter($file_R, $Lang_R, \@Errors);
 2608         } else {
 2609             # L and R file contents are identical, no need to diff
 2610             ($all_line_count_L,
 2611              $blank_count_L   ,
 2612              $comment_count_L ,
 2613             ) = call_counter($file_L, $Lang_L, \@Errors);
 2614             $all_line_count_R = $all_line_count_L;
 2615             $blank_count_R    = $blank_count_L   ;
 2616             $comment_count_R  = $comment_count_L ;
 2617             my $code_lines_R  = $all_line_count_R - ($blank_count_R + $comment_count_R);
 2618             $p_dbl{$Lang_L}{'blank'}{'same'}   += $blank_count_R;
 2619             $p_dbl{$Lang_L}{'comment'}{'same'} += $comment_count_R;
 2620             $p_dbl{$Lang_L}{'code'}{'same'}    += $code_lines_R;
 2621             if ($opt_by_file) {
 2622                 $p_dbf{$file_L}{'blank'}{'same'}   += $blank_count_R;
 2623                 $p_dbf{$file_L}{'comment'}{'same'} += $comment_count_R;
 2624                 $p_dbf{$file_L}{'code'}{'same'}    += $code_lines_R;
 2625             }
 2626         }
 2627 
 2628         if ($blank_count_L <  $blank_count_R) {
 2629             my $D = $blank_count_R - $blank_count_L;
 2630             $p_dbl{$Lang_L}{'blank'}{'added'}   += $D;
 2631         } else {
 2632             my $D = $blank_count_L - $blank_count_R;
 2633             $p_dbl{$Lang_L}{'blank'}{'removed'} += $D;
 2634         }
 2635         if ($opt_by_file) {
 2636             if ($blank_count_L <  $blank_count_R) {
 2637                 my $D = $blank_count_R - $blank_count_L;
 2638                 $p_dbf{$file_L}{'blank'}{'added'}   += $D;
 2639             } else {
 2640                 my $D = $blank_count_L - $blank_count_R;
 2641                 $p_dbf{$file_L}{'blank'}{'removed'} += $D;
 2642             }
 2643         }
 2644 
 2645         my $code_count_L = $all_line_count_L-$blank_count_L-$comment_count_L;
 2646         if ($opt_by_file) {
 2647             $p_rbf{$file_L}{'code'   } = $code_count_L    ;
 2648             $p_rbf{$file_L}{'blank'  } = $blank_count_L   ;
 2649             $p_rbf{$file_L}{'comment'} = $comment_count_L ;
 2650             $p_rbf{$file_L}{'lang'   } = $Lang_L          ;
 2651             $p_rbf{$file_L}{'nFiles' } = 1                ;
 2652         } else {
 2653             $p_rbf{$file_L} = 1;  # just keep track of counted files
 2654         }
 2655 
 2656         $p_rbl{$Lang_L}{'nFiles'}++;
 2657         $p_rbl{$Lang_L}{'code'}    += $code_count_L   ;
 2658         $p_rbl{$Lang_L}{'blank'}   += $blank_count_L  ;
 2659         $p_rbl{$Lang_L}{'comment'} += $comment_count_L;
 2660     }
 2661 
 2662     print "<- count_filesets()\n" if $opt_v > 2;
 2663     return {
 2664         "ignored" => \%p_ignored,
 2665         "errors"  => \@p_errors,
 2666         "results_by_file" => \%p_rbf,
 2667         "results_by_language" => \%p_rbl,
 2668         "delta_by_file" => \%p_dbf,
 2669         "delta_by_language" => \%p_dbl,
 2670         "alignment" => \%p_alignment,
 2671         "n_filepairs_compared" => $n_file_pairs_compared
 2672     }
 2673 } # 1}}}
 2674 sub write_alignment_data {                   # {{{1
 2675     my ($filename, $n_filepairs_compared, $data ) = @_;
 2676     my @output = ();
 2677     if ( $data->{'added'} ) {
 2678         my %added_lines = %{$data->{'added'}};
 2679         push (@output, "Files added: " . (scalar keys %added_lines) . "\n");
 2680         foreach my $line ( sort keys %added_lines ) {
 2681             push (@output, $line);
 2682         }
 2683         push (@output, "\n" );
 2684     }
 2685     if ( $data->{'removed'} ) {
 2686         my %removed_lines = %{$data->{'removed'}};
 2687         push (@output, "Files removed: " . (scalar keys %removed_lines) . "\n");
 2688         foreach my $line ( sort keys %removed_lines ) {
 2689             push (@output, $line);
 2690         }
 2691         push (@output, "\n");
 2692     }
 2693     if ( $data->{'pairs'} ) {
 2694         my %pairs = %{$data->{'pairs'}};
 2695         push (@output, "File pairs compared: " . $n_filepairs_compared . "\n");
 2696         foreach my $pair ( sort keys %pairs ) {
 2697             push (@output, $pair);
 2698         }
 2699     }
 2700     write_file($filename, {}, @output);
 2701 } # 1}}}
 2702 sub exclude_dir_validates {                  # {{{1
 2703     my ($rh_Exclude_Dir) = @_;
 2704     my $is_OK = 1;
 2705     foreach my $dir (keys %{$rh_Exclude_Dir}) {
 2706         if (($ON_WINDOWS and $dir =~ m{\\}) or ($dir =~ m{/})) {
 2707             $is_OK = 0;
 2708             warn "--exclude-dir '$dir' :  cannot specify directory paths\n";
 2709         }
 2710     }
 2711     if (!$is_OK) {
 2712         warn "Use '--fullpath --not-match-d=REGEX' instead\n";
 2713     }
 2714     return $is_OK;
 2715 } # 1}}}
 2716 sub process_exclude_list_file {              # {{{1
 2717     my ($list_file      , # in
 2718         $rh_exclude_dir , # out
 2719         $rh_ignored     , # out
 2720        ) = @_;
 2721     # note: references global @file_list
 2722     print "-> process_exclude_list_file($list_file)\n" if $opt_v > 2;
 2723     # reject a specific set of files and/or directories
 2724     my @reject_list   = read_list_file($list_file);
 2725     my @file_reject_list = ();
 2726     foreach my $F_or_D (@reject_list) {
 2727         if (is_dir($F_or_D)) {
 2728             $rh_exclude_dir->{$F_or_D} = 1;
 2729         } elsif (is_file($F_or_D)) {
 2730             push @file_reject_list, $F_or_D;
 2731         }
 2732     }
 2733 
 2734     # Normalize file names for better comparison.
 2735     my %normalized_input   = normalize_file_names(@file_list);
 2736     my %normalized_reject  = normalize_file_names(@file_reject_list);
 2737     my %normalized_exclude = normalize_file_names(keys %{$rh_exclude_dir});
 2738     foreach my $F (keys %normalized_input) {
 2739         if ($normalized_reject{$F} or is_excluded($F, \%normalized_exclude)) {
 2740             my $orig_F = $normalized_input{$F};
 2741             $rh_ignored->{$orig_F} = "listed in exclusion file $opt_exclude_list_file";
 2742             print "Ignoring $orig_F because it appears in $opt_exclude_list_file\n"
 2743                 if $opt_v > 1;
 2744         }
 2745     }
 2746 
 2747     print "<- process_exclude_list_file\n" if $opt_v > 2;
 2748 } # 1}}}
 2749 sub combine_results {                        # {{{1
 2750     # returns 1 if the inputs are categorized by language
 2751     #         0 if no identifiable language was found
 2752     my ($ra_report_files, # in
 2753         $report_type    , # in  "by language" or "by report file"
 2754         $rhh_count      , # out count{TYPE}{nFiles|code|blank|comment|scaled}
 2755         $rhaa_Filters_by_Language , # in
 2756        ) = @_;
 2757 
 2758     print "-> combine_results(report_type=$report_type)\n" if $opt_v > 2;
 2759     my $found_language = 0;
 2760 
 2761     foreach my $file (@{$ra_report_files}) {
 2762         my $n_results_found = 0;
 2763         my $IN = new IO::File $file, "r";
 2764         if (!defined $IN) {
 2765             warn "Unable to read $file; ignoring.\n";
 2766             next;
 2767         }
 2768         while (<$IN>) {
 2769             next if /^(http|Language|SUM|-----)/;
 2770             if (!$opt_by_file  and
 2771                 m{^(.*?)\s+         # language
 2772                    (\d+)\s+         # files
 2773                    (\d+)\s+         # blank
 2774                    (\d+)\s+         # comments
 2775                    (\d+)\s+         # code
 2776                    (                #    next four entries missing with -no3
 2777                    x\s+             # x
 2778                    \d+\.\d+\s+      # scale
 2779                    =\s+             # =
 2780                    (\d+\.\d+)\s*    # scaled code
 2781                    )?
 2782                    $}x) {
 2783                 if ($report_type eq "by language") {
 2784                     if (!defined $rhaa_Filters_by_Language->{$1}) {
 2785                         warn "Unrecognized language '$1' in $file ignored\n";
 2786                         next;
 2787                     }
 2788                     # above test necessary to avoid trying to sum reports
 2789                     # of reports (which have no language breakdown).
 2790                     $found_language = 1;
 2791                     $rhh_count->{$1   }{'nFiles' } += $2;
 2792                     $rhh_count->{$1   }{'blank'  } += $3;
 2793                     $rhh_count->{$1   }{'comment'} += $4;
 2794                     $rhh_count->{$1   }{'code'   } += $5;
 2795                     $rhh_count->{$1   }{'scaled' } += $7 if $opt_3;
 2796                 } else {
 2797                     $rhh_count->{$file}{'nFiles' } += $2;
 2798                     $rhh_count->{$file}{'blank'  } += $3;
 2799                     $rhh_count->{$file}{'comment'} += $4;
 2800                     $rhh_count->{$file}{'code'   } += $5;
 2801                     $rhh_count->{$file}{'scaled' } += $7 if $opt_3;
 2802                 }
 2803                 ++$n_results_found;
 2804             } elsif ($opt_by_file  and
 2805                 m{^(.*?)\s+         # language
 2806                    (\d+)\s+         # blank
 2807                    (\d+)\s+         # comments
 2808                    (\d+)\s+         # code
 2809                    (                #    next four entries missing with -no3
 2810                    x\s+             # x
 2811                    \d+\.\d+\s+      # scale
 2812                    =\s+             # =
 2813                    (\d+\.\d+)\s*    # scaled code
 2814                    )?
 2815                    $}x) {
 2816                 if ($report_type eq "by language") {
 2817                     next unless %{$rhaa_Filters_by_Language->{$1}};
 2818                     # above test necessary to avoid trying to sum reports
 2819                     # of reports (which have no language breakdown).
 2820                     $found_language = 1;
 2821                     $rhh_count->{$1   }{'nFiles' } +=  1;
 2822                     $rhh_count->{$1   }{'blank'  } += $2;
 2823                     $rhh_count->{$1   }{'comment'} += $3;
 2824                     $rhh_count->{$1   }{'code'   } += $4;
 2825                     $rhh_count->{$1   }{'scaled' } += $6 if $opt_3;
 2826                 } else {
 2827                     $rhh_count->{$file}{'nFiles' } +=  1;
 2828                     $rhh_count->{$file}{'blank'  } += $2;
 2829                     $rhh_count->{$file}{'comment'} += $3;
 2830                     $rhh_count->{$file}{'code'   } += $4;
 2831                     $rhh_count->{$file}{'scaled' } += $6 if $opt_3;
 2832                 }
 2833                 ++$n_results_found;
 2834             }
 2835         }
 2836         warn "No counts found in $file--is the file format correct?\n"
 2837             unless $n_results_found;
 2838     }
 2839     print "<- combine_results\n" if $opt_v > 2;
 2840     return $found_language;
 2841 } # 1}}}
 2842 sub compute_denominator {                    # {{{1
 2843     my ($method, $nCode, $nComment, $nBlank, ) = @_;
 2844     print "-> compute_denominator\n" if $opt_v > 2;
 2845     my %den        = ( "c" => $nCode );
 2846        $den{"cm"}  = $den{"c"}  + $nComment;
 2847        $den{"cmb"} = $den{"cm"} + $nBlank;
 2848        $den{"cb"}  = $den{"c"}  + $nBlank;
 2849 
 2850     print "<- compute_denominator\n" if $opt_v > 2;
 2851     return $den{ $method };
 2852 } # 1}}}
 2853 sub yaml_to_json_separators {                # {{{1
 2854     # YAML and JSON are closely related.  Their differences can be captured
 2855     # by trailing commas ($C), braces ($open_B, $close_B), and
 2856     # quotes around text ($Q).
 2857     print "-> yaml_to_json_separators()\n" if $opt_v > 2;
 2858     my ($Q, $open_B, $close_B, $start, $C);
 2859     if ($opt_json) {
 2860        $C       = ',';
 2861        $Q       = '"';
 2862        $open_B  = '{';
 2863        $close_B = '}';
 2864        $start   = '{';
 2865     } else {
 2866        $C       = '';
 2867        $Q       = '' ;
 2868        $open_B  = '' ;
 2869        $close_B = '';
 2870        $start   = "---\n# $URL\n";
 2871     }
 2872     print "<- yaml_to_json_separators()\n" if $opt_v > 2;
 2873     return ($Q, $open_B, $close_B, $start, $C);
 2874 } # 1}}}
 2875 sub diff_report     {                        # {{{1
 2876     # returns an array of lines containing the results
 2877     print "-> diff_report\n" if $opt_v > 2;
 2878 
 2879     if ($opt_xml) {
 2880         print "<- diff_report\n" if $opt_v > 2;
 2881         return diff_xml_report(@_)
 2882     } elsif ($opt_yaml) {
 2883         print "<- diff_report\n" if $opt_v > 2;
 2884         return diff_yaml_report(@_)
 2885     } elsif ($opt_json) {
 2886         print "<- diff_report\n" if $opt_v > 2;
 2887         return diff_json_report(@_)
 2888     } elsif ($opt_csv or $opt_md) {
 2889         print "<- diff_report\n" if $opt_v > 2;
 2890         return diff_csv_report(@_)
 2891     }
 2892 
 2893     my ($version    , # in
 2894         $elapsed_sec, # in
 2895         $report_type, # in  "by language" | "by report file" | "by file"
 2896         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 2897         $rh_scale   , # in
 2898        ) = @_;
 2899 
 2900 #use Data::Dumper;
 2901 #print "diff_report: ", Dumper($rhhh_count), "\n";
 2902     my @results       = ();
 2903 
 2904     my $languages     = ();
 2905     my %sum           = (); # sum{nFiles|blank|comment|code}{same|modified|added|removed}
 2906     my $max_len       = 0;
 2907     foreach my $language (keys %{$rhhh_count}) {
 2908         foreach my $V (qw(nFiles blank comment code)) {
 2909             foreach my $S (qw(added same modified removed)) {
 2910                 $rhhh_count->{$language}{$V}{$S} = 0 unless
 2911                     defined $rhhh_count->{$language}{$V}{$S};
 2912                 $sum{$V}{$S}  += $rhhh_count->{$language}{$V}{$S};
 2913             }
 2914         }
 2915         $max_len      = length($language) if length($language) > $max_len;
 2916     }
 2917     my $column_1_offset = 0;
 2918        $column_1_offset = $max_len - 17 if $max_len > 17;
 2919     $elapsed_sec = 0.5 unless $elapsed_sec;
 2920 
 2921     my $spacing_0 = 23;
 2922     my $spacing_1 = 13;
 2923     my $spacing_2 =  9;
 2924     my $spacing_3 = 17;
 2925     if (!$opt_3) {
 2926         $spacing_1 = 19;
 2927         $spacing_2 = 14;
 2928         $spacing_3 = 27;
 2929     }
 2930     $spacing_0 += $column_1_offset;
 2931     $spacing_1 += $column_1_offset;
 2932     $spacing_3 += $column_1_offset;
 2933     my %Format = (
 2934         '1' => { 'xml' => 'name="%s" ',
 2935                  'txt' => "\%-${spacing_0}s ",
 2936                },
 2937         '2' => { 'xml' => 'name="%s" ',
 2938                  'txt' => "\%-${spacing_3}s ",
 2939                },
 2940         '3' => { 'xml' => 'files_count="%d" ',
 2941                  'txt' => '%6d ',
 2942                },
 2943         '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
 2944                  'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
 2945                },
 2946         '5' => { 'xml' => 'blank="%.2f" comment="%.2f" code="%d" ',
 2947                  'txt' => "\%3.2f \%3.2f \%${spacing_2}d",
 2948                },
 2949         '6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
 2950                  'txt' => ' x %6.2f = %14.2f',
 2951                },
 2952     );
 2953     my $Style = "txt";
 2954        $Style = "xml" if $opt_xml ;
 2955        $Style = "xml" if $opt_yaml;  # not a typo; just set to anything but txt
 2956        $Style = "xml" if $opt_json;  # not a typo; just set to anything but txt
 2957        $Style = "xml" if $opt_csv ;  # not a typo; just set to anything but txt
 2958 
 2959     my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
 2960        $hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset)
 2961             if (!$opt_3) and (68 + $column_1_offset) > 79;
 2962     my $data_line  = "";
 2963     my $first_column;
 2964     my $BY_LANGUAGE = 0;
 2965     my $BY_FILE     = 0;
 2966     if      ($report_type eq "by language") {
 2967         $first_column = "Language";
 2968         $BY_LANGUAGE  = 1;
 2969     } elsif ($report_type eq "by file")     {
 2970         $first_column = "File";
 2971         $BY_FILE      = 1;
 2972     } else {
 2973         $first_column = "Report File";
 2974     }
 2975 
 2976     # column headers
 2977     if (!$opt_3 and $BY_FILE) {
 2978         my $spacing_n = $spacing_1 - 11;
 2979         $data_line  = sprintf "%-${spacing_n}s" , $first_column;
 2980     } else {
 2981         $data_line  = sprintf "%-${spacing_1}s ", $first_column;
 2982     }
 2983     if ($BY_FILE) {
 2984         $data_line .= sprintf "%${spacing_2}s"   , ""     ;
 2985     } else {
 2986         $data_line .= sprintf "%${spacing_2}s "  , "files";
 2987     }
 2988     my $PCT_symbol = "";
 2989        $PCT_symbol = " \%" if $opt_by_percent;
 2990     $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
 2991         "blank${PCT_symbol}"         ,
 2992         "comment${PCT_symbol}"       ,
 2993         "code";
 2994 
 2995     if ($Style eq "txt") {
 2996         push @results, $data_line;
 2997         push @results, $hyphen_line;
 2998     }
 2999 
 3000     # sort diff output in descending order of cumulative entries
 3001     foreach my $lang_or_file (sort {
 3002                                 ($rhhh_count->{$b}{'code'}{'added'}    +
 3003                                  $rhhh_count->{$b}{'code'}{'same'}     +
 3004                                  $rhhh_count->{$b}{'code'}{'modified'} +
 3005                                  $rhhh_count->{$b}{'code'}{'removed'}  )  <=>
 3006                                 ($rhhh_count->{$a}{'code'}{'added'}    +
 3007                                  $rhhh_count->{$a}{'code'}{'same'}     +
 3008                                  $rhhh_count->{$a}{'code'}{'modified'} +
 3009                                  $rhhh_count->{$a}{'code'}{'removed'})
 3010                               or $a cmp $b }
 3011                                     keys %{$rhhh_count}) {
 3012 
 3013         if ($BY_FILE) {
 3014             push @results, rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 3015         } else {
 3016             push @results, $lang_or_file;
 3017         }
 3018         foreach my $S (qw(same modified added removed)) {
 3019             my $indent = $spacing_1 - 2;
 3020             my $line .= sprintf " %-${indent}s", $S;
 3021             if ($BY_FILE) {
 3022                 $line .= sprintf "   ";
 3023             } else {
 3024                 $line .= sprintf "  %${spacing_2}s", $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
 3025             }
 3026             if ($opt_by_percent) {
 3027                 my $DEN = compute_denominator($opt_by_percent  ,
 3028                     $rhhh_count->{$lang_or_file}{'code'}{$S}   ,
 3029                     $rhhh_count->{$lang_or_file}{'comment'}{$S},
 3030                     $rhhh_count->{$lang_or_file}{'blank'}{$S}  );
 3031                 if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
 3032                     $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 3033                         $rhhh_count->{$lang_or_file}{'blank'}{$S}   / $DEN * 100,
 3034                         $rhhh_count->{$lang_or_file}{'comment'}{$S} / $DEN * 100,
 3035                         $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
 3036                 } else {
 3037                     $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 3038                         0.0, 0.0, $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
 3039                 }
 3040             } else {
 3041                 $line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
 3042                     $rhhh_count->{$lang_or_file}{'blank'}{$S}   ,
 3043                     $rhhh_count->{$lang_or_file}{'comment'}{$S} ,
 3044                     $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
 3045             }
 3046             push @results, $line;
 3047         }
 3048     }
 3049     push @results, $hyphen_line;
 3050     push @results, "SUM:";
 3051     my $sum_files    = 0;
 3052     my $sum_lines    = 0;
 3053     foreach my $S (qw(same modified added removed)) {
 3054         my $indent = $spacing_1 - 2;
 3055         my $line .= sprintf " %-${indent}s", $S;
 3056             if ($BY_FILE) {
 3057                 $line .= sprintf "   ";
 3058                 $sum_files += 1;
 3059             } else {
 3060                 $line .= sprintf "  %${spacing_2}s", $sum{'nFiles'}{$S};
 3061                 $sum_files += $sum{'nFiles'}{$S};
 3062             }
 3063         if ($opt_by_percent) {
 3064             my $DEN = compute_denominator($opt_by_percent,
 3065                 $sum{'code'}{$S}, $sum{'comment'}{$S}, $sum{'blank'}{$S});
 3066             if ($sum{'code'}{$S} > 0) {
 3067                 $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 3068                     $sum{'blank'}{$S}   / $DEN * 100,
 3069                     $sum{'comment'}{$S} / $DEN * 100,
 3070                     $sum{'code'}{$S}    ;
 3071             } else {
 3072                 $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 3073                     0.0, 0.0, $sum{'code'}{$S}    ;
 3074             }
 3075         } else {
 3076             $line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
 3077                 $sum{'blank'}{$S}   ,
 3078                 $sum{'comment'}{$S} ,
 3079                 $sum{'code'}{$S}    ;
 3080         }
 3081         $sum_lines += $sum{'blank'}{$S} + $sum{'comment'}{$S} + $sum{'code'}{$S};
 3082         push @results, $line;
 3083     }
 3084 
 3085     my $header_line  = sprintf "%s v %s", $URL, $version;
 3086        $header_line .= sprintf("  T=%.2f s (%.1f files/s, %.1f lines/s)",
 3087                         $elapsed_sec           ,
 3088                         $sum_files/$elapsed_sec,
 3089                         $sum_lines/$elapsed_sec) unless $opt_sum_reports or $opt_hide_rate;
 3090     if ($Style eq "txt") {
 3091         unshift @results, output_header($header_line, $hyphen_line, $BY_FILE);
 3092     }
 3093 
 3094     push @results, $hyphen_line;
 3095     write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
 3096     print "<- diff_report\n" if $opt_v > 2;
 3097 
 3098     return @results;
 3099 } # 1}}}
 3100 sub xml_yaml_or_json_header {                # {{{1
 3101     my ($URL, $version, $elapsed_sec, $sum_files, $sum_lines, $by_file) = @_;
 3102     print "-> xml_yaml_or_json_header\n" if $opt_v > 2;
 3103     my $header      = "";
 3104     my $file_rate   = $sum_files/$elapsed_sec;
 3105     my $line_rate   = $sum_lines/$elapsed_sec;
 3106     my $type        = "";
 3107        $type        = "diff_" if $opt_diff;
 3108     my $report_file = "";
 3109     if ($opt_report_file) {
 3110         if ($opt_sum_reports) {
 3111             if ($by_file) {
 3112                 $report_file = "  <report_file>$opt_report_file.file</report_file>"
 3113             } else {
 3114                 $report_file = "  <report_file>$opt_report_file.lang</report_file>"
 3115             }
 3116         } else {
 3117             $report_file = "  <report_file>$opt_report_file</report_file>"
 3118         }
 3119     }
 3120     if ($opt_xml) {
 3121         $header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
 3122         $header .= "\n<?xml-stylesheet type=\"text/xsl\" href=\"" . $opt_xsl . "\"?>" if $opt_xsl;
 3123         $header .= "<${type}results>
 3124 <header>
 3125   <cloc_url>$URL</cloc_url>
 3126   <cloc_version>$version</cloc_version>
 3127   <elapsed_seconds>$elapsed_sec</elapsed_seconds>
 3128   <n_files>$sum_files</n_files>
 3129   <n_lines>$sum_lines</n_lines>
 3130   <files_per_second>$file_rate</files_per_second>
 3131   <lines_per_second>$line_rate</lines_per_second>";
 3132         $header .= "\n$report_file"
 3133             if $opt_report_file;
 3134         $header .= "\n</header>";
 3135     } elsif ($opt_yaml or $opt_json) {
 3136         my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 3137         $header = "${start}${Q}header${Q} : $open_B
 3138   ${Q}cloc_url${Q}           : ${Q}$URL${Q}${C}
 3139   ${Q}cloc_version${Q}       : ${Q}$version${Q}${C}
 3140   ${Q}elapsed_seconds${Q}    : $elapsed_sec${C}
 3141   ${Q}n_files${Q}            : $sum_files${C}
 3142   ${Q}n_lines${Q}            : $sum_lines${C}
 3143   ${Q}files_per_second${Q}   : $file_rate${C}
 3144   ${Q}lines_per_second${Q}   : $line_rate";
 3145         if ($opt_report_file) {
 3146             if ($opt_sum_reports) {
 3147                 if ($by_file) {
 3148                     $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$opt_report_file.file${Q}"
 3149                 } else {
 3150                     $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$opt_report_file.lang${Q}"
 3151                 }
 3152             } else {
 3153                 $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$opt_report_file${Q}";
 3154             }
 3155         }
 3156         $header .= "${close_B}${C}";
 3157     }
 3158     print "<- xml_yaml_or_json_header\n" if $opt_v > 2;
 3159     return $header;
 3160 } # 1}}}
 3161 sub diff_yaml_report {                       # {{{1
 3162     # returns an array of lines containing the results
 3163     my ($version    , # in
 3164         $elapsed_sec, # in
 3165         $report_type, # in  "by language" | "by report file" | "by file"
 3166         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 3167         $rh_scale   , # in
 3168        ) = @_;
 3169     print "-> diff_yaml_report\n" if $opt_v > 2;
 3170     $elapsed_sec = 0.5 unless $elapsed_sec;
 3171     my @results       = ();
 3172     my %sum           = ();
 3173     my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
 3174         diff_header_sum($report_type, $rhhh_count, \%sum);
 3175 
 3176     if (!$ALREADY_SHOWED_HEADER) {
 3177         push @results,
 3178               xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 3179                                  $sum_files, $sum_lines, $BY_FILE);
 3180         $ALREADY_SHOWED_HEADER = 1;
 3181     }
 3182     foreach my $S (qw(added same modified removed)) {
 3183         push @results, "$S :";
 3184         foreach my $F_or_L (keys %{$rhhh_count}) {
 3185             # force quoted language or filename in case these
 3186             # have embedded funny characters, issue #312
 3187             push @results, "  '" . rm_leading_tempdir($F_or_L, \%TEMP_DIR) . "' :";
 3188             foreach my $k (keys %{$rhhh_count->{$F_or_L}}) {
 3189                 next if $k eq "lang"; # present only in those cases
 3190                                       # where code exists for action $S
 3191                 $rhhh_count->{$F_or_L}{$k}{$S} = 0 unless
 3192                     defined $rhhh_count->{$F_or_L}{$k}{$S};
 3193                 push @results,
 3194                     "    $k : $rhhh_count->{$F_or_L}{$k}{$S}";
 3195             }
 3196         }
 3197     }
 3198 
 3199     push @results, "SUM :";
 3200     foreach my $S (qw(added same modified removed)) {
 3201         push @results, "  $S :";
 3202         foreach my $topic (keys %sum) {
 3203             push @results, "    $topic : $sum{$topic}{$S}";
 3204         }
 3205     }
 3206 
 3207     print "<- diff_yaml_report\n" if $opt_v > 2;
 3208 
 3209     return @results;
 3210 } # 1}}}
 3211 sub diff_json_report {                       # {{{1
 3212     # returns an array of lines containing the results
 3213     my ($version    , # in
 3214         $elapsed_sec, # in
 3215         $report_type, # in  "by language" | "by report file" | "by file"
 3216         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 3217         $rh_scale   , # in
 3218        ) = @_;
 3219     print "-> diff_json_report\n" if $opt_v > 2;
 3220     $elapsed_sec = 0.5 unless $elapsed_sec;
 3221     my @results       = ();
 3222     my %sum           = ();
 3223     my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
 3224         diff_header_sum($report_type, $rhhh_count, \%sum);
 3225 
 3226     if (!$ALREADY_SHOWED_HEADER) {
 3227         push @results,
 3228               xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 3229                                  $sum_files, $sum_lines, $BY_FILE);
 3230         $ALREADY_SHOWED_HEADER = 1;
 3231     }
 3232     foreach my $S (qw(added same modified removed)) {
 3233         push @results, " \"$S\" : {";
 3234         foreach my $F_or_L (keys %{$rhhh_count}) {
 3235             push @results, "  \"" . rm_leading_tempdir($F_or_L, \%TEMP_DIR) . "\" : {";
 3236             foreach my $k (keys %{$rhhh_count->{$F_or_L}}) {
 3237                 next if $k eq "lang"; # present only in those cases
 3238                                       # where code exists for action $S
 3239                 $rhhh_count->{$F_or_L}{$k}{$S} = 0 unless
 3240                     defined $rhhh_count->{$F_or_L}{$k}{$S};
 3241                 push @results,
 3242                     "    \"$k\" : $rhhh_count->{$F_or_L}{$k}{$S},";
 3243             }
 3244             $results[-1] =~ s/,\s*$//;
 3245             push @results, "  },"
 3246         }
 3247         $results[-1] =~ s/,\s*$//;
 3248         push @results, "  },"
 3249     }
 3250 
 3251     push @results, "  \"SUM\" : {";
 3252     foreach my $S (qw(added same modified removed)) {
 3253         push @results, "  \"$S\" : {";
 3254         foreach my $topic (keys %sum) {
 3255             push @results, "    \"$topic\" : $sum{$topic}{$S},";
 3256         }
 3257         $results[-1] =~ s/,\s*$//;
 3258         push @results, "},";
 3259     }
 3260 
 3261     $results[-1] =~ s/,\s*$//;
 3262     push @results, "} }";
 3263     print "<- diff_json_report\n" if $opt_v > 2;
 3264     return @results;
 3265 } # 1}}}
 3266 sub diff_header_sum {                        # {{{1
 3267     my ($report_type, # in  "by language" | "by report file" | "by file"
 3268         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 3269         $rhh_sum    , # out sum{nFiles|blank|comment|code}{same|modified|added|removed}
 3270        ) = @_;
 3271 
 3272     my $sum_files = 0;
 3273     my $sum_lines = 0;
 3274     foreach my $language (keys %{$rhhh_count}) {
 3275         foreach my $V (qw(nFiles blank comment code)) {
 3276             foreach my $S (qw(added same modified removed)) {
 3277                 $rhhh_count->{$language}{$V}{$S} = 0 unless
 3278                     defined $rhhh_count->{$language}{$V}{$S};
 3279                 $rhh_sum->{$V}{$S}  += $rhhh_count->{$language}{$V}{$S};
 3280                 if ($V eq "nFiles") {
 3281                     $sum_files += $rhhh_count->{$language}{$V}{$S};
 3282                 } else {
 3283                     $sum_lines += $rhhh_count->{$language}{$V}{$S};
 3284                 }
 3285             }
 3286         }
 3287     }
 3288 
 3289     my $BY_LANGUAGE = 0;
 3290     my $BY_FILE     = 0;
 3291     if      ($report_type eq "by language") {
 3292         $BY_LANGUAGE  = 1;
 3293     } elsif ($report_type eq "by file")     {
 3294         $BY_FILE      = 1;
 3295     }
 3296     return $sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE;
 3297 } # 1}}}
 3298 sub diff_xml_report {                        # {{{1
 3299     # returns an array of lines containing the results
 3300     my ($version    , # in
 3301         $elapsed_sec, # in
 3302         $report_type, # in  "by language" | "by report file" | "by file"
 3303         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 3304         $rh_scale   , # in
 3305        ) = @_;
 3306     print "-> diff_xml_report\n" if $opt_v > 2;
 3307     my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 3308 
 3309 #print "diff_report: ", Dumper($rhhh_count), "\n";
 3310     $elapsed_sec = 0.5 unless $elapsed_sec;
 3311     my @results       = ();
 3312     my %sum           = ();
 3313     my $languages     = ();
 3314 
 3315     my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
 3316         diff_header_sum($report_type, $rhhh_count, \%sum);
 3317 
 3318     my $data_line   = "";
 3319 
 3320     if (!$ALREADY_SHOWED_HEADER) {
 3321         push @results,
 3322               xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 3323                                  $sum_files, $sum_lines, $BY_FILE);
 3324         $ALREADY_SHOWED_HEADER = 1;
 3325     }
 3326 
 3327     foreach my $S (qw(same modified added removed)) {
 3328         push @results, "  <$S>";
 3329         foreach my $lang_or_file (sort {
 3330                                      $rhhh_count->{$b}{'code'} <=>
 3331                                      $rhhh_count->{$a}{'code'}
 3332                                    }
 3333                               keys %{$rhhh_count}) {
 3334             my $L = "";
 3335 
 3336             if ($BY_FILE) {
 3337                 $L .= sprintf "    <file name=\"%s\" files_count=\"1\" ",
 3338                     xml_metachars(
 3339                         rm_leading_tempdir($lang_or_file, \%TEMP_DIR));
 3340             } else {
 3341                 $L .= sprintf "    <language name=\"%s\" files_count=\"%d\" ",
 3342                         $lang_or_file ,
 3343                         $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
 3344             }
 3345             if ($opt_by_percent) {
 3346               my $DEN = compute_denominator($opt_by_percent            ,
 3347                             $rhhh_count->{$lang_or_file}{'code'}{$S}   ,
 3348                             $rhhh_count->{$lang_or_file}{'comment'}{$S},
 3349                             $rhhh_count->{$lang_or_file}{'blank'}{$S}  );
 3350               foreach my $T (qw(blank comment)) {
 3351                   if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
 3352                     $L .= sprintf "%s=\"%.2f\" ",
 3353                             $T, $rhhh_count->{$lang_or_file}{$T}{$S} / $DEN * 100;
 3354                   } else {
 3355                     $L .= sprintf "%s=\"0.0\" ", $T;
 3356                   }
 3357               }
 3358               foreach my $T (qw(code)) {
 3359                   $L .= sprintf "%s=\"%d\" ",
 3360                           $T, $rhhh_count->{$lang_or_file}{$T}{$S};
 3361               }
 3362             } else {
 3363               foreach my $T (qw(blank comment code)) {
 3364                   $L .= sprintf "%s=\"%d\" ",
 3365                           $T, $rhhh_count->{$lang_or_file}{$T}{$S};
 3366               }
 3367             }
 3368             push @results, $L . "/>";
 3369         }
 3370 
 3371 
 3372         my $L = sprintf "    <total sum_files=\"%d\" ", $sum{'nFiles'}{$S};
 3373         if ($opt_by_percent) {
 3374           my $DEN = compute_denominator($opt_by_percent,
 3375                         $sum{'code'}{$S}   ,
 3376                         $sum{'comment'}{$S},
 3377                         $sum{'blank'}{$S}  );
 3378           foreach my $V (qw(blank comment)) {
 3379               if ($sum{'code'}{$S} > 0) {
 3380                   $L .= sprintf "%s=\"%.2f\" ", $V, $sum{$V}{$S} / $DEN * 100;
 3381               } else {
 3382                   $L .= sprintf "%s=\"0.0\" ", $V;
 3383               }
 3384           }
 3385           foreach my $V (qw(code)) {
 3386               $L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
 3387           }
 3388         } else {
 3389           foreach my $V (qw(blank comment code)) {
 3390               $L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
 3391           }
 3392         }
 3393         push @results, $L . "/>";
 3394         push @results, "  </$S>";
 3395     }
 3396 
 3397     push @results, "</diff_results>";
 3398     write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
 3399     print "<- diff_xml_report\n" if $opt_v > 2;
 3400     return @results;
 3401 } # 1}}}
 3402 sub diff_csv_report {                        # {{{1
 3403     # returns an array of lines containing the results
 3404     my ($version    , # in
 3405         $elapsed_sec, # in
 3406         $report_type, # in  "by language" | "by report file" | "by file"
 3407         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 3408         $rh_scale   , # in  unused
 3409        ) = @_;
 3410     print "-> diff_csv_report\n" if $opt_v > 2;
 3411 
 3412     my @results       = ();
 3413     my $languages     = ();
 3414 
 3415     my $data_line   = "";
 3416     my $BY_LANGUAGE = 0;
 3417     my $BY_FILE     = 0;
 3418     if      ($report_type eq "by language") {
 3419         $BY_LANGUAGE  = 1;
 3420     } elsif ($report_type eq "by file")     {
 3421         $BY_FILE      = 1;
 3422     }
 3423     my $DELIM = ",";
 3424        $DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
 3425        $DELIM = "|" if defined $opt_md;
 3426 
 3427     $elapsed_sec = 0.5 unless $elapsed_sec;
 3428 
 3429     my $line = "Language${DELIM} ";
 3430        $line = "File${DELIM} " if $BY_FILE;
 3431     foreach my $item (qw(files blank comment code)) {
 3432         next if $BY_FILE and $item eq 'files';
 3433         foreach my $symbol ( '==', '!=', '+', '-', ) {
 3434             $line .= "$symbol $item${DELIM} ";
 3435         }
 3436     }
 3437 
 3438     my $T_elapsed_sec = "T=$elapsed_sec s";
 3439        $T_elapsed_sec = "" if $opt_hide_rate;
 3440 
 3441     if ($opt_md) {
 3442         push @results, "cloc|$URL v $version $T_elapsed_sec";
 3443         push @results, "--- | ---";
 3444         push @results, "";
 3445         push @results, $line;
 3446         my @col_header  = ();
 3447         push @col_header, ":-------";
 3448         foreach (1..16) {
 3449             push @col_header, "-------:";
 3450         }
 3451         push @results, join("|", @col_header) . "|";
 3452     } else {
 3453         $line .= "\"$URL v $version $T_elapsed_sec\"";
 3454         push @results, $line;
 3455     }
 3456 
 3457     foreach my $lang_or_file (keys %{$rhhh_count}) {
 3458         $rhhh_count->{$lang_or_file}{'code'}{'added'} = 0 unless
 3459             defined $rhhh_count->{$lang_or_file}{'code'};
 3460     }
 3461     foreach my $lang_or_file (sort {
 3462                                  $rhhh_count->{$b}{'code'} <=>
 3463                                  $rhhh_count->{$a}{'code'}
 3464                                }
 3465                           keys %{$rhhh_count}) {
 3466         if ($BY_FILE) {
 3467             $line = rm_leading_tempdir($lang_or_file, \%TEMP_DIR) . "$DELIM ";
 3468         } else {
 3469             $line = $lang_or_file . "${DELIM} ";
 3470         }
 3471         if ($opt_by_percent) {
 3472           foreach my $item (qw(nFiles)) {
 3473               next if $BY_FILE and $item eq 'nFiles';
 3474               foreach my $symbol (qw(same modified added removed)) {
 3475                   if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
 3476                       $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
 3477                   } else {
 3478                       $line .= "0${DELIM} ";
 3479                   }
 3480               }
 3481           }
 3482           foreach my $item (qw(blank comment)) {
 3483               foreach my $symbol (qw(same modified added removed)) {
 3484                   if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol} and
 3485                       defined $rhhh_count->{$lang_or_file}{'code'}{$symbol} and
 3486                       $rhhh_count->{$lang_or_file}{'code'}{$symbol} > 0) {
 3487                       $line .= sprintf("%.2f", $rhhh_count->{$lang_or_file}{$item}{$symbol} / $rhhh_count->{$lang_or_file}{'code'}{$symbol} * 100).${DELIM};
 3488                   } else {
 3489                       $line .= "0.00${DELIM} ";
 3490                   }
 3491               }
 3492           }
 3493           foreach my $item (qw(code)) {
 3494               foreach my $symbol (qw(same modified added removed)) {
 3495                   if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
 3496                       $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
 3497                   } else {
 3498                       $line .= "0${DELIM} ";
 3499                   }
 3500               }
 3501           }
 3502         } else {
 3503           foreach my $item (qw(nFiles blank comment code)) {
 3504               next if $BY_FILE and $item eq 'nFiles';
 3505               foreach my $symbol (qw(same modified added removed)) {
 3506                   if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
 3507                       $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
 3508                   } else {
 3509                       $line .= "0${DELIM} ";
 3510                   }
 3511               }
 3512           }
 3513         }
 3514         push @results, $line;
 3515     }
 3516 
 3517     print "<- diff_csv_report\n" if $opt_v > 2;
 3518     return @results;
 3519 } # 1}}}
 3520 sub rm_leading_tempdir {                     # {{{1
 3521     my ($in_file, $rh_temp_dirs, ) = @_;
 3522     my $clean_filename = $in_file;
 3523     foreach my $temp_d (keys %{$rh_temp_dirs}) {
 3524         if ($ON_WINDOWS) {
 3525         # \ -> / necessary to allow the next if test's
 3526         # m{} to work in the presence of spaces in file names
 3527             $temp_d         =~ s{\\}{/}g;
 3528             $clean_filename =~ s{\\}{/}g;
 3529         }
 3530         if ($clean_filename =~ m{^$temp_d/}) {
 3531             $clean_filename =~ s{^$temp_d/}{};
 3532             last;
 3533         }
 3534     }
 3535     if ($ON_WINDOWS and $opt_by_file) { # then go back from / to \
 3536         if ($opt_json) {
 3537             $clean_filename =~ s{/}{\\\\}g;
 3538         } else {
 3539             $clean_filename =~ s{/}{\\}g;
 3540         }
 3541     }
 3542     return $clean_filename;
 3543 } # 1}}}
 3544 sub generate_sql    {                        # {{{1
 3545     my ($elapsed_sec, # in
 3546         $rhh_count  , # in  count{TYPE}{lang|code|blank|comment|scaled}
 3547         $rh_scale   , # in
 3548        ) = @_;
 3549     print "-> generate_sql\n" if $opt_v > 2;
 3550 
 3551 #print "generate_sql A [$opt_sql_project]\n";
 3552     $opt_sql_project = cwd() unless defined $opt_sql_project;
 3553     $opt_sql_project = '' unless defined $opt_sql_project; # have seen cwd() fail
 3554 #print "generate_sql B [$opt_sql_project]\n";
 3555     $opt_sql_project =~ s{/}{\\}g if $ON_WINDOWS;
 3556 #print "generate_sql C [$opt_sql_project]\n";
 3557 
 3558     my $schema = undef;
 3559     if ($opt_sql_style eq "oracle") {
 3560         $schema = "
 3561 CREATE TABLE metadata
 3562 (
 3563   timestamp   TIMESTAMP,
 3564   project     VARCHAR2(500 CHAR),
 3565   elapsed_s   NUMBER(10, 6)
 3566 )
 3567 /
 3568 
 3569 CREATE TABLE t
 3570 (
 3571   project        VARCHAR2(500 CHAR),
 3572   language       VARCHAR2(500 CHAR),
 3573   file_fullname  VARCHAR2(500 CHAR),
 3574   file_dirname   VARCHAR2(500 CHAR),
 3575   file_basename  VARCHAR2(500 CHAR),
 3576   nblank         INTEGER,
 3577   ncomment       INTEGER,
 3578   ncode          INTEGER,
 3579   nscaled        NUMBER(10, 6)
 3580 )
 3581 /
 3582 
 3583 ";
 3584     } else {
 3585         $schema = "
 3586 create table metadata (          -- $URL v $VERSION
 3587                 timestamp varchar(500),
 3588                 Project   varchar(500),
 3589                 elapsed_s real);
 3590 create table t        (
 3591                 Project       varchar(500)   ,
 3592                 Language      varchar(500)   ,
 3593                 File          varchar(500)   ,
 3594                 File_dirname  varchar(500)   ,
 3595                 File_basename varchar(500)   ,
 3596                 nBlank        integer        ,
 3597                 nComment      integer        ,
 3598                 nCode         integer        ,
 3599                 nScaled       real           );
 3600 ";
 3601     }
 3602     $opt_sql = "-" if $opt_sql eq "1";
 3603 
 3604     my $open_mode = ">";
 3605        $open_mode = ">>" if $opt_sql_append;
 3606 
 3607     my $fh = new IO::File; # $opt_sql, "w";
 3608     if (!$fh->open("${open_mode}${opt_sql}")) {
 3609         die "Unable to write to $opt_sql  $!\n";
 3610     }
 3611     print $fh $schema unless defined $opt_sql_append;
 3612 
 3613     my $insert_into_t = "insert into t ";
 3614     if ($opt_sql_style eq "oracle") {
 3615         printf $fh "insert into metadata values(TO_TIMESTAMP('%s','yyyy-mm-dd hh24:mi:ss'), '%s', %f);\n",
 3616                     strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
 3617                     $opt_sql_project, $elapsed_sec;
 3618     } elsif ($opt_sql_style eq "named_columns") {
 3619         print $fh "begin transaction;\n";
 3620         $insert_into_t .= "( Project, Language, File, File_dirname, File_basename, nBlank, nComment, nCode, nScaled )";
 3621     } else {
 3622         print $fh "begin transaction;\n";
 3623         printf $fh "insert into metadata values('%s', '%s', %f);\n",
 3624                     strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
 3625                     $opt_sql_project, $elapsed_sec;
 3626     }
 3627 
 3628     my $nIns = 0;
 3629     foreach my $file (keys %{$rhh_count}) {
 3630         my $language = $rhh_count->{$file}{'lang'};
 3631         my $clean_filename = $file;
 3632         # If necessary (that is, if the input contained an
 3633         # archive file [.tar.gz, etc]), strip the temporary
 3634         # directory name which was used to expand the archive
 3635         # from the file name.
 3636 
 3637         $clean_filename = rm_leading_tempdir($clean_filename, \%TEMP_DIR);
 3638         $clean_filename =~ s/\'/''/g;  # double embedded single quotes
 3639                                        # to escape them
 3640 
 3641         printf $fh "$insert_into_t values('%s', '%s', '%s', '%s', '%s', " .
 3642                    "%d, %d, %d, %f);\n",
 3643                     $opt_sql_project           ,
 3644                     $language                  ,
 3645                     $clean_filename            ,
 3646                     dirname( $clean_filename)  ,
 3647                     basename($clean_filename)  ,
 3648                     $rhh_count->{$file}{'blank'},
 3649                     $rhh_count->{$file}{'comment'},
 3650                     $rhh_count->{$file}{'code'}   ,
 3651                     $rhh_count->{$file}{'code'}*$rh_scale->{$language};
 3652 
 3653         ++$nIns;
 3654         if (!($nIns % 10_000) and ($opt_sql_style ne "oracle")) {
 3655             print $fh "commit;\n";
 3656             print $fh "begin transaction;\n";
 3657         }
 3658     }
 3659     if ($opt_sql_style ne "oracle") {
 3660         print $fh "commit;\n";
 3661     }
 3662 
 3663     $fh->close unless $opt_sql eq "-"; # don't try to close STDOUT
 3664     print "<- generate_sql\n" if $opt_v > 2;
 3665 
 3666     # sample query:
 3667     #
 3668     #   select project, language,
 3669     #          sum(nCode)     as Code,
 3670     #          sum(nComment)  as Comments,
 3671     #          sum(nBlank)    as Blank,
 3672     #          sum(nCode)+sum(nComment)+sum(nBlank) as All_Lines,
 3673     #          100.0*sum(nComment)/(sum(nCode)+sum(nComment)) as Comment_Pct
 3674     #          from t group by Project, Language order by Project, Code desc;
 3675     #
 3676 } # 1}}}
 3677 sub output_header   {                        # {{{1
 3678     my ($header_line,
 3679         $hyphen_line,
 3680         $BY_FILE    ,)    = @_;
 3681     print "-> output_header\n" if $opt_v > 2;
 3682     my @R = ();
 3683     if      ($opt_xml) {
 3684         if (!$ALREADY_SHOWED_XML_SECTION) {
 3685             push @R, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
 3686             push @R, '<?xml-stylesheet type="text/xsl" href="' .
 3687                             $opt_xsl . '"?>' if $opt_xsl;
 3688             push @R, "<results>";
 3689             push @R, "<header>$header_line</header>";
 3690             $ALREADY_SHOWED_XML_SECTION = 1;
 3691         }
 3692         if ($BY_FILE) {
 3693             push @R, "<files>";
 3694         } else {
 3695             push @R, "<languages>";
 3696         }
 3697     } elsif ($opt_yaml) {
 3698         push @R, "---\n# $header_line";
 3699     } elsif ($opt_csv or $opt_md) {
 3700         # append the header to the end of the column headers
 3701         # to keep the output a bit cleaner from a spreadsheet
 3702         # perspective
 3703     } else {
 3704         if ($ALREADY_SHOWED_HEADER) {
 3705             push @R, "";
 3706         } else {
 3707             push @R, $header_line;
 3708             $ALREADY_SHOWED_HEADER = 1;
 3709         }
 3710         push @R, $hyphen_line;
 3711     }
 3712     print "<- output_header\n" if $opt_v > 2;
 3713     return @R;
 3714 } # 1}}}
 3715 sub generate_report {                        # {{{1
 3716     # returns an array of lines containing the results
 3717     my ($version    , # in
 3718         $elapsed_sec, # in
 3719         $report_type, # in  "by language" | "by report file" | "by file"
 3720         $rhh_count  , # in  count{TYPE}{nFiles|code|blank|comment|scaled}
 3721         $rh_scale   , # in
 3722        ) = @_;
 3723 
 3724     print "-> generate_report\n" if $opt_v > 2;
 3725     my $DELIM = ",";
 3726        $DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
 3727        $DELIM = "|" if defined $opt_md;
 3728 
 3729     my @results       = ();
 3730 
 3731     my $languages     = ();
 3732 
 3733     my $sum_files     = 0;
 3734     my $sum_code      = 0;
 3735     my $sum_blank     = 0;
 3736     my $sum_comment   = 0;
 3737     my $max_len       = 0;
 3738     foreach my $language (keys %{$rhh_count}) {
 3739         $sum_files   += $rhh_count->{$language}{'nFiles'} ;
 3740         $sum_blank   += $rhh_count->{$language}{'blank'}  ;
 3741         $sum_comment += $rhh_count->{$language}{'comment'};
 3742         $sum_code    += $rhh_count->{$language}{'code'}   ;
 3743         $max_len      = length($language) if length($language) > $max_len;
 3744     }
 3745     my $column_1_offset = 0;
 3746        $column_1_offset = $max_len - 17 if $max_len > 17;
 3747     my $sum_lines = $sum_blank + $sum_comment + $sum_code;
 3748     $elapsed_sec = 0.5 unless $elapsed_sec;
 3749 
 3750     my $spacing_0 = 23;
 3751     my $spacing_1 = 13;
 3752     my $spacing_2 =  9;
 3753     my $spacing_3 = 17;
 3754     if (!$opt_3) {
 3755         $spacing_1 = 19;
 3756         $spacing_2 = 14;
 3757         $spacing_3 = 27;
 3758     }
 3759     $spacing_0 += $column_1_offset;
 3760     $spacing_1 += $column_1_offset;
 3761     $spacing_3 += $column_1_offset;
 3762     my %Format = (
 3763         '1' => { 'xml' => 'name="%s" ',
 3764                  'txt' => "\%-${spacing_0}s ",
 3765                },
 3766         '2' => { 'xml' => 'name="%s" ',
 3767                  'txt' => "\%-${spacing_3}s ",
 3768                },
 3769         '3' => { 'xml' => 'files_count="%d" ',
 3770                  'txt' => '%6d ',
 3771                },
 3772         '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
 3773                  'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
 3774                },
 3775         '5' => { 'xml' => 'blank="%3.2f" comment="%3.2f" code="%d" ',
 3776                  'txt' => "\%14.2f \%14.2f \%${spacing_2}d",
 3777                },
 3778         '6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
 3779                  'txt' => ' x %6.2f = %14.2f',
 3780                },
 3781     );
 3782     my $Style = "txt";
 3783        $Style = "xml" if $opt_xml ;
 3784        $Style = "xml" if $opt_yaml;  # not a typo; just set to anything but txt
 3785        $Style = "xml" if $opt_json;  # not a typo; just set to anything but txt
 3786        $Style = "xml" if $opt_csv ;  # not a typo; just set to anything but txt
 3787 
 3788     my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
 3789        $hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset)
 3790             if (!$opt_sum_reports) and (!$opt_3) and (68 + $column_1_offset) > 79;
 3791     my $data_line  = "";
 3792     my $first_column;
 3793     my $BY_LANGUAGE = 0;
 3794     my $BY_FILE     = 0;
 3795     if      ($report_type eq "by language") {
 3796         $first_column = "Language";
 3797         $BY_LANGUAGE  = 1;
 3798     } elsif ($report_type eq "by file")     {
 3799         $first_column = "File";
 3800         $BY_FILE      = 1;
 3801     } elsif ($report_type eq "by report file")     {
 3802         $first_column = "File";
 3803     } else {
 3804         $first_column = "Report File";
 3805     }
 3806 
 3807     my $header_line  = sprintf "%s v %s", $URL, $version;
 3808        $header_line .= sprintf("  T=%.2f s (%.1f files/s, %.1f lines/s)",
 3809                         $elapsed_sec           ,
 3810                         $sum_files/$elapsed_sec,
 3811                         $sum_lines/$elapsed_sec) unless $opt_sum_reports or $opt_hide_rate;
 3812     if ($opt_xml or $opt_yaml or $opt_json) {
 3813         if (!$ALREADY_SHOWED_HEADER) {
 3814             if ($opt_by_file_by_lang and $opt_json) {
 3815                 push @results, '{ "by_file" : ';
 3816             }
 3817             push @results, xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 3818                                                    $sum_files, $sum_lines, $BY_FILE);
 3819 #           $ALREADY_SHOWED_HEADER = 1 unless $opt_sum_reports;
 3820             # --sum-reports yields two xml or yaml files, one by
 3821             # language and one by report file, each of which needs a header
 3822         }
 3823         if ($opt_xml) {
 3824             if ($BY_FILE or ($report_type eq "by report file")) {
 3825                 push @results, "<files>";
 3826             } else {
 3827                 push @results, "<languages>";
 3828             }
 3829         }
 3830     } else {
 3831         push @results, output_header($header_line, $hyphen_line, $BY_FILE);
 3832     }
 3833 
 3834     if ($Style eq "txt") {
 3835         # column headers
 3836         if (!$opt_3 and $BY_FILE) {
 3837             my $spacing_n = $spacing_1 - 11;
 3838             $data_line  = sprintf "%-${spacing_n}s ", $first_column;
 3839         } else {
 3840             $data_line  = sprintf "%-${spacing_1}s ", $first_column;
 3841         }
 3842         if ($BY_FILE) {
 3843             $data_line .= sprintf "%${spacing_2}s "  , " "    ;
 3844         } else {
 3845             $data_line .= sprintf "%${spacing_2}s "  , "files";
 3846         }
 3847         my $PCT_symbol = "";
 3848            $PCT_symbol = " \%" if $opt_by_percent;
 3849         $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
 3850             "blank${PCT_symbol}"   ,
 3851             "comment${PCT_symbol}" ,
 3852             "code";
 3853         $data_line .= sprintf " %8s   %14s",
 3854             "scale"         ,
 3855             "3rd gen. equiv"
 3856               if $opt_3;
 3857         if ($opt_md) {
 3858             my @col_header  = ();
 3859             if ($data_line =~ m{\s%}) {
 3860                 $data_line =~ s{\s%}{_%}g;
 3861                 foreach my $w ( split(' ', $data_line) ) {
 3862                     $w =~ s{_%}{ %};
 3863                     push @col_header, $w;
 3864                 }
 3865             } else {
 3866                 push @col_header, split(' ', $data_line);
 3867             }
 3868             my @col_hyphens    = ( '-------:') x scalar(@col_header);
 3869                $col_hyphens[0] =   ':-------'; # first column left justified
 3870             push @results, join("|", @col_header );
 3871             push @results, join("|", @col_hyphens);
 3872         } else {
 3873             push @results, $data_line;
 3874             push @results, $hyphen_line;
 3875         }
 3876     }
 3877 
 3878     if ($opt_csv)  {
 3879         my $header2;
 3880         if ($BY_FILE) {
 3881             $header2 = "language${DELIM}filename";
 3882         } else {
 3883             $header2 = "files${DELIM}language";
 3884         }
 3885         $header2 .= "${DELIM}blank${DELIM}comment${DELIM}code";
 3886         $header2 .= "${DELIM}scale${DELIM}3rd gen. equiv" if $opt_3;
 3887         $header2 .= ${DELIM} . '"' . $header_line . '"';
 3888         push @results, $header2;
 3889     }
 3890 
 3891     my $sum_scaled = 0;
 3892     foreach my $lang_or_file (sort {
 3893                                  $rhh_count->{$b}{'code'} <=>
 3894                                  $rhh_count->{$a}{'code'}
 3895                               or $a cmp $b
 3896                                         }
 3897                                    keys %{$rhh_count}) {
 3898         next if $lang_or_file eq "by report file";
 3899         my ($factor, $scaled);
 3900         if ($BY_LANGUAGE or $BY_FILE) {
 3901             $factor = 1;
 3902             if ($BY_LANGUAGE) {
 3903                 if (defined $rh_scale->{$lang_or_file}) {
 3904                     $factor = $rh_scale->{$lang_or_file};
 3905                 } else {
 3906                     warn "No scale factor for $lang_or_file; using 1.00";
 3907                 }
 3908             } else { # by individual code file
 3909                 if ($report_type ne "by report file") {
 3910                     next unless defined $rhh_count->{$lang_or_file}{'lang'};
 3911                     next unless defined $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
 3912                     $factor = $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
 3913                 }
 3914             }
 3915             $scaled = $factor*$rhh_count->{$lang_or_file}{'code'};
 3916         } else {
 3917             if (!defined $rhh_count->{$lang_or_file}{'scaled'}) {
 3918                 $opt_3 = 0;
 3919                 # If we're summing together files previously generated
 3920                 # with --no3 then rhh_count->{$lang_or_file}{'scaled'}
 3921                 # this variable will be undefined.  That should only
 3922                 # happen when summing together by file however.
 3923             } elsif ($BY_LANGUAGE) {
 3924                 warn "Missing scaled language info for $lang_or_file\n";
 3925             }
 3926             if ($opt_3) {
 3927                 $scaled =         $rhh_count->{$lang_or_file}{'scaled'};
 3928                 $factor = $scaled/$rhh_count->{$lang_or_file}{'code'};
 3929             }
 3930         }
 3931 
 3932         if ($BY_FILE) {
 3933             my $clean_filename = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 3934                $clean_filename = xml_metachars($clean_filename) if $opt_xml;
 3935             $data_line  = sprintf $Format{'1'}{$Style}, $clean_filename;
 3936         } else {
 3937             $data_line  = sprintf $Format{'2'}{$Style}, $lang_or_file;
 3938         }
 3939         $data_line .= sprintf $Format{3}{$Style}  ,
 3940                         $rhh_count->{$lang_or_file}{'nFiles'} unless $BY_FILE;
 3941         if ($opt_by_percent) {
 3942           my $DEN = compute_denominator($opt_by_percent       ,
 3943                         $rhh_count->{$lang_or_file}{'code'}   ,
 3944                         $rhh_count->{$lang_or_file}{'comment'},
 3945                         $rhh_count->{$lang_or_file}{'blank'}  );
 3946           $data_line .= sprintf $Format{5}{$Style}  ,
 3947               $rhh_count->{$lang_or_file}{'blank'}   / $DEN * 100,
 3948               $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100,
 3949               $rhh_count->{$lang_or_file}{'code'}   ;
 3950         } else {
 3951           $data_line .= sprintf $Format{4}{$Style}  ,
 3952               $rhh_count->{$lang_or_file}{'blank'}  ,
 3953               $rhh_count->{$lang_or_file}{'comment'},
 3954               $rhh_count->{$lang_or_file}{'code'}   ;
 3955         }
 3956         $data_line .= sprintf $Format{6}{$Style}  ,
 3957             $factor                               ,
 3958             $scaled if $opt_3;
 3959         $sum_scaled  += $scaled if $opt_3;
 3960 
 3961         if ($opt_xml) {
 3962             if (defined $rhh_count->{$lang_or_file}{'lang'}) {
 3963                 my $lang = $rhh_count->{$lang_or_file}{'lang'};
 3964                 if (!defined $languages->{$lang}) {
 3965                     $languages->{$lang} = $lang;
 3966                 }
 3967                 $data_line.=' language="' . $lang . '" ';
 3968             }
 3969             if ($BY_FILE or ($report_type eq "by report file")) {
 3970                 push @results, "  <file " . $data_line . "/>";
 3971             } else {
 3972                 push @results, "  <language " . $data_line . "/>";
 3973             }
 3974         } elsif ($opt_yaml or $opt_json) {
 3975             my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 3976             if ($opt_yaml) {
 3977                 # YAML: force quoted language or filename in case these
 3978                 #       have embedded funny characters, issue #312
 3979                 push @results,"'" . rm_leading_tempdir($lang_or_file, \%TEMP_DIR). "' :$open_B";
 3980             } else {
 3981                 push @results,"${Q}" . rm_leading_tempdir($lang_or_file, \%TEMP_DIR). "${Q} :$open_B";
 3982             }
 3983             push @results,"  ${Q}nFiles${Q}: " . $rhh_count->{$lang_or_file}{'nFiles'} . $C
 3984                 unless $BY_FILE;
 3985             if ($opt_by_percent) {
 3986               my $DEN = compute_denominator($opt_by_percent       ,
 3987                             $rhh_count->{$lang_or_file}{'code'}   ,
 3988                             $rhh_count->{$lang_or_file}{'comment'},
 3989                             $rhh_count->{$lang_or_file}{'blank'}  );
 3990               push @results,"  ${Q}blank_pct${Q}: "   .
 3991                 sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'} / $DEN * 100) . $C;
 3992               push @results,"  ${Q}comment_pct${Q}: " .
 3993                 sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100) . $C;
 3994               push @results,"  ${Q}code${Q}: "    . $rhh_count->{$lang_or_file}{'code'}  . $C;
 3995             } else {
 3996               push @results,"  ${Q}blank${Q}: "   . $rhh_count->{$lang_or_file}{'blank'}   . $C;
 3997               push @results,"  ${Q}comment${Q}: " . $rhh_count->{$lang_or_file}{'comment'} . $C;
 3998               push @results,"  ${Q}code${Q}: "    . $rhh_count->{$lang_or_file}{'code'}    . $C;
 3999             }
 4000             push @results,"  ${Q}language${Q}: "  . $Q . $rhh_count->{$lang_or_file}{'lang'} . $Q . $C
 4001                 if $BY_FILE;
 4002             if ($opt_3) {
 4003                 push @results, "  ${Q}scaled${Q}: " . $scaled . $C;
 4004                 push @results, "  ${Q}factor${Q}: " . $factor . $C;
 4005             }
 4006             if ($opt_json) { # replace the trailing comma with }, on the last line
 4007                 $results[-1] =~ s/,\s*$/},/;
 4008             }
 4009         } elsif ($opt_csv or $opt_md) {
 4010             my $extra_3 = "";
 4011                $extra_3 = "${DELIM}$factor${DELIM}$scaled" if $opt_3;
 4012             my $first_column = undef;
 4013             my $clean_name   = $lang_or_file;
 4014             my $str;
 4015             if ($opt_csv) {
 4016                 if ($BY_FILE) {
 4017                     $first_column = $rhh_count->{$lang_or_file}{'lang'};
 4018                     $clean_name   = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 4019                 } else {
 4020                     $first_column = $rhh_count->{$lang_or_file}{'nFiles'};
 4021                 }
 4022                 $str = $first_column   . ${DELIM} .
 4023                        $clean_name     . ${DELIM};
 4024             } else {
 4025                 if ($BY_FILE) {
 4026                     $first_column = $rhh_count->{$lang_or_file}{'lang'};
 4027                     $clean_name   = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 4028                     $str = $clean_name . ${DELIM};
 4029                 } else {
 4030                     $first_column = $rhh_count->{$lang_or_file}{'nFiles'};
 4031                     $str = $clean_name     . ${DELIM} .
 4032                            $first_column   . ${DELIM};
 4033                 }
 4034             }
 4035             if ($opt_by_percent) {
 4036               my $DEN = compute_denominator($opt_by_percent               ,
 4037                             $rhh_count->{$lang_or_file}{'code'}   ,
 4038                             $rhh_count->{$lang_or_file}{'comment'},
 4039                             $rhh_count->{$lang_or_file}{'blank'}  );
 4040               $str .= sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'}   / $DEN * 100) . ${DELIM} .
 4041                       sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100) . ${DELIM} .
 4042                       $rhh_count->{$lang_or_file}{'code'};
 4043             } else {
 4044               $str .= $rhh_count->{$lang_or_file}{'blank'}  . ${DELIM} .
 4045                       $rhh_count->{$lang_or_file}{'comment'}. ${DELIM} .
 4046                       $rhh_count->{$lang_or_file}{'code'};
 4047             }
 4048             $str .= $extra_3;
 4049             push @results, $str;
 4050 
 4051         } else {
 4052             push @results, $data_line;
 4053         }
 4054     }
 4055 
 4056     my $avg_scale = 1;  # weighted average of scale factors
 4057        $avg_scale = sprintf("%.2f", $sum_scaled / $sum_code)
 4058             if $sum_code and $opt_3;
 4059 
 4060     if ($opt_xml) {
 4061         $data_line = "";
 4062         if (!$BY_FILE) {
 4063             $data_line .= sprintf "sum_files=\"%d\" ", $sum_files;
 4064         }
 4065         if ($opt_by_percent) {
 4066           my $DEN = compute_denominator($opt_by_percent    ,
 4067                         $sum_code, $sum_comment, $sum_blank);
 4068           $data_line .= sprintf $Format{'5'}{$Style},
 4069               $sum_blank   / $DEN * 100,
 4070               $sum_comment / $DEN * 100,
 4071               $sum_code    ;
 4072         } else {
 4073           $data_line .= sprintf $Format{'4'}{$Style},
 4074               $sum_blank   ,
 4075               $sum_comment ,
 4076               $sum_code    ;
 4077         }
 4078         $data_line .= sprintf $Format{'6'}{$Style},
 4079             $avg_scale   ,
 4080             $sum_scaled  if $opt_3;
 4081         push @results, "  <total " . $data_line . "/>";
 4082 
 4083         if ($BY_FILE or ($report_type eq "by report file")) {
 4084             push @results, "</files>";
 4085         } else {
 4086             foreach my $language (keys %{$languages}) {
 4087                 push @results, '  <language name="' . $language . '"/>';
 4088             }
 4089             push @results, "</languages>";
 4090         }
 4091 
 4092         if (!$opt_by_file_by_lang or $ALREADY_SHOWED_XML_SECTION) {
 4093             push @results, "</results>";
 4094         } else {
 4095             $ALREADY_SHOWED_XML_SECTION = 1;
 4096         }
 4097     } elsif ($opt_yaml or $opt_json) {
 4098         my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 4099         push @results, "${Q}SUM${Q}: ${open_B}";
 4100         if ($opt_by_percent) {
 4101           my $DEN = compute_denominator($opt_by_percent    ,
 4102                         $sum_code, $sum_comment, $sum_blank);
 4103           push @results, "  ${Q}blank${Q}: "  . sprintf("%.2f", $sum_blank   / $DEN * 100) . $C;
 4104           push @results, "  ${Q}comment${Q}: ". sprintf("%.2f", $sum_comment / $DEN * 100) . $C;
 4105           push @results, "  ${Q}code${Q}: "   . $sum_code    . $C;
 4106         } else {
 4107           push @results, "  ${Q}blank${Q}: "  . $sum_blank   . $C;
 4108           push @results, "  ${Q}comment${Q}: ". $sum_comment . $C;
 4109           push @results, "  ${Q}code${Q}: "   . $sum_code    . $C;
 4110         }
 4111         push @results, "  ${Q}nFiles${Q}: " . $sum_files   . $C;
 4112         if ($opt_3) {
 4113             push @results, "  ${Q}scaled${Q}: " . $sum_scaled . $C;
 4114             push @results, "  ${Q}factor${Q}: " . $avg_scale  . $C;
 4115         }
 4116         if ($opt_json) {
 4117             $results[-1] =~ s/,\s*$/} }/;
 4118             if ($opt_by_file_by_lang) {
 4119                 if ($ALREADY_SHOWED_HEADER) {
 4120                     $results[-1] .= ' }';
 4121                 } else {
 4122                     $results[-1] .= ', "by_lang" : {';
 4123                 }
 4124             }
 4125         }
 4126     } elsif ($opt_csv) {
 4127         my @entries = ();
 4128         if ($opt_by_file) {
 4129             push @entries, "SUM";
 4130             push @entries, "";
 4131         } else {
 4132             push @entries, $sum_files;
 4133             push @entries, "SUM";
 4134         }
 4135         if ($opt_by_percent) {
 4136             my $DEN = compute_denominator($opt_by_percent    ,
 4137                           $sum_code, $sum_comment, $sum_blank);
 4138             push @entries, sprintf("%.2f", $sum_blank   / $DEN * 100);
 4139             push @entries, sprintf("%.2f", $sum_comment / $DEN * 100);
 4140         } else {
 4141             push @entries, $sum_blank;
 4142             push @entries, $sum_comment;
 4143         }
 4144         push @entries, $sum_code;
 4145         if ($opt_3) {
 4146             push @entries, $sum_scaled;
 4147             push @entries, $avg_scale ;
 4148         }
 4149         push @results, join(",", @entries);
 4150     } else {
 4151 
 4152         if ($BY_FILE) {
 4153             $data_line  = sprintf "%-${spacing_0}s ", "SUM:"  ;
 4154         } else {
 4155             $data_line  = sprintf "%-${spacing_1}s ", "SUM:"  ;
 4156             $data_line .= sprintf "%${spacing_2}d ", $sum_files;
 4157         }
 4158         if ($opt_by_percent) {
 4159           my $DEN = compute_denominator($opt_by_percent    ,
 4160                         $sum_code, $sum_comment, $sum_blank);
 4161           $data_line .= sprintf $Format{'5'}{$Style},
 4162               $sum_blank   / $DEN * 100,
 4163               $sum_comment / $DEN * 100,
 4164               $sum_code    ;
 4165         } else {
 4166           $data_line .= sprintf $Format{'4'}{$Style},
 4167               $sum_blank   ,
 4168               $sum_comment ,
 4169               $sum_code    ;
 4170         }
 4171         $data_line .= sprintf $Format{'6'}{$Style},
 4172             $avg_scale   ,
 4173             $sum_scaled if $opt_3;
 4174         if ($opt_md) {
 4175             my @words = split(' ', $data_line);
 4176             my $n_cols = scalar(@words);
 4177 #           my $n_cols = scalar(split(' ', $data_line));  # deprecated
 4178             $data_line =~ s/\s+/\|/g;
 4179             my