#! /usr/bin/env perl
# texi-elements-by-size -- dump list of elements based on words or line counts.
# Also serve as an example of using the Texinfo::Parser module,
# including the usual per-format options.
#
# Copyright 2012-2026 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License,
# or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
# Original author: Patrice Dumas <pertusus@free.fr>

use strict;

use warnings;

use Encode qw(decode encode);
use Config; # to determine the path separator
# for fileparse
use File::Basename;
use File::Spec;
use Getopt::Long qw(GetOptions);
# for dclone
use Storable;

Getopt::Long::Configure("gnu_getopt");

BEGIN {
  my $mydir = $0;
  $mydir =~ s,/[^/]+$,,;
  my $t2a_srcdir = `cd $mydir/../tta && pwd`;
  chomp($t2a_srcdir);
  if (defined($ENV{'top_builddir'})) {
    unshift(@INC, join('/', ($ENV{'top_builddir'}, 'tta', 'perl')));
  } else {
    # only works for in-source builds
    unshift(@INC, join('/', ($t2a_srcdir, 'perl')));
  }

  $ENV{t2a_srcdir} = "$t2a_srcdir";
  require Texinfo::ModulePath;
  Texinfo::ModulePath::init(undef, undef, undef);
}

use Texinfo::Parser;
use Texinfo::Convert::TextContent;

my $my_version = "0.1 (TP $Texinfo::Parser::VERSION)";

my ($real_command_name, $command_directory, $command_suffix)
  = fileparse($0, '.pl');

my $curdir = File::Spec->curdir();

# determine the path separators
my $path_separator = $Config{'path_sep'};
$path_separator = ':' if (!defined($path_separator));
my $quoted_path_separator = quotemeta($path_separator);

# the encoding used to decode command line arguments, and also for
# file names encoding, Perl is expecting sequences of bytes, not unicode
# code points.
my $locale_encoding;

eval 'require I18N::Langinfo';
if (!$@) {
  $locale_encoding = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET());
  $locale_encoding = undef if ($locale_encoding eq '');
}

if (!defined($locale_encoding) and $^O eq 'MSWin32') {
  eval 'require Win32::API';
  if (!$@) {
    Win32::API::More->Import("kernel32", "int GetACP()");
    my $CP = GetACP();
    if (defined($CP)) {
      $locale_encoding = 'cp'.$CP;
    }
  }
}

my $force = 0;
my $use_sections = 0;
my $count_words = 0;
my $no_warn = 0;

# placeholder for future i18n.
sub __($) {
  return $_[0];
}

my $format = 'info';  # make our counts from the Info output
# this is the format associated with the output format, which is replaced
# when the output format changes.  It may also be removed if there is the
# corresponding --no-ifformat.
#my $default_expanded_format = [ $format ];

# directories specified on the command line.
my @include_dirs = ();
my @prepend_dirs = ();

my $parser_options = {
                      'EXPANDED_FORMATS' => [ $format ],
                      'values' => {'txicommandconditionals' => 1},
                     };

sub _encode_message($)
{
  my $text = shift;
  my $encoding = $locale_encoding;
  if (defined($encoding)) {
    return encode($encoding, $text);
  } else {
    return $text;
  }
}

sub _decode_input($)
{
  my $text = shift;

  my $encoding = $locale_encoding;
  if (defined($encoding)) {
    return decode($encoding, $text);
  } else {
    return $text;
  }
}

sub set_expansion($$) {
  my $region = shift;
  my $set = shift;
  $set = 1 if (!defined($set));
  if ($set) {
    push @{$parser_options->{'EXPANDED_FORMATS'}}, $region
      unless (grep {$_ eq $region} @{$parser_options->{'expanded_formats'}});
  } else {
    @{$parser_options->{'EXPANDED_FORMATS'}} =
      grep {$_ ne $region} @{$parser_options->{'EXPANDED_FORMATS'}};
  }
}

my $result_options = Getopt::Long::GetOptions (
 'help|h' => sub { print _encode_message(help()); exit 0; },
 'version|V' => sub {print _encode_message(
                              "$real_command_name $my_version\n\n");
                     print _encode_message(sprintf __(
"Copyright (C) %s Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law."), "2025")."\n";
      exit 0;},
 'force' => \$force,
 'ifhtml!' => sub { set_expansion('html', $_[1]); },
 'ifinfo!' => sub { set_expansion('info', $_[1]); },
 'ifxml!' => sub { set_expansion('xml', $_[1]); },
 'ifdocbook!' => sub { set_expansion('docbook', $_[1]); },
 'iftex!' => sub { set_expansion('tex', $_[1]); },
 'ifplaintext!' => sub { set_expansion('plaintext', $_[1]); },
 'use-sections!' => \$use_sections,
 'count-words!' => \$count_words,
 'no-warn' => \$no_warn,
 'D=s' => sub {
    my $var = $_[1];
    my @field = split (/\s+/, $var, 2);
    if (@field == 1) {
      $parser_options->{'values'}->{_decode_input($var)} = 1;
    } else {
      $parser_options->{'values'}->{_decode_input($field[0])}
           = _decode_input($field[1]);
    }
 },
 'U=s' => sub {
    delete $parser_options->{'values'}->{_decode_input($_[1])};
 },

  'I=s' => sub {
                push @include_dirs, split(/$quoted_path_separator/, $_[1]); },
  'P=s' => sub { unshift @prepend_dirs, split(/$quoted_path_separator/, $_[1]); },
 'number-sections!' => sub { set_from_cmdline('NUMBER_SECTIONS', $_[1]); },
);

exit 1 if (!$result_options);

my @input_files = @ARGV;
# use STDIN if not a tty, like makeinfo does
@input_files = ('-') if (!scalar(@input_files) and !-t STDIN);

die _encode_message(
    sprintf(__("%s: missing file argument.")."\n", $real_command_name)
   .sprintf(__("Try `%s --help' for more information.")."\n", $real_command_name))
     unless (scalar(@input_files) >= 1);

if (scalar(@input_files) > 1) {
  my $encoded_files = join('|', @input_files[1..scalar(@input_files) -1]);
  my $files = _decode_input($encoded_files);
  warn _encode_message(
     sprintf(__("%s: superfluous file arguments: %s")."\n",
       $real_command_name, $files));
}

my $input_file_name = shift @input_files;

sub help() {
  my $help =
    sprintf(__("Usage: %s [OPTION]... TEXINFO-FILE...\n"), $real_command_name)
   ."\n".
    __("Write to standard output a list of Texinfo elements (nodes or sections)
sorted by the number of lines (or words) they contain,
after translation to Info format.\n")
."\n";

  $help .= __("General Options:
  --count-words    count words instead of lines.
  --force          keep going even if Texinfo file parsing fails.
  --help           display this help and exit.
  --no-warn        suppress warnings (but not errors).
  --use-sections   use sections as elements instead of nodes.
  --version        display version information and exit.\n")
."\n";
  $help .= __("Input file options:
  -D VAR                        define the variable VAR, as with \@set.
  -I DIR                        append DIR to the \@include search path.
  -P DIR                        prepend DIR to the \@include search path.
  -U VAR                        undefine the variable VAR, as with \@clear.\n")
."\n";
  $help .= __("Conditional processing in input:
  --ifdocbook       process \@ifdocbook and \@docbook.
  --ifhtml          process \@ifhtml and \@html.
  --ifinfo          process \@ifinfo.
  --ifplaintext     process \@ifplaintext.
  --iftex           process \@iftex and \@tex.
  --ifxml           process \@ifxml and \@xml.
  --no-ifdocbook    do not process \@ifdocbook and \@docbook text.
  --no-ifhtml       do not process \@ifhtml and \@html text.
  --no-ifinfo       do not process \@ifinfo text.
  --no-ifplaintext  do not process \@ifplaintext text.
  --no-iftex        do not process \@iftex and \@tex text.
  --no-ifxml        do not process \@ifxml and \@xml text.

  Also, for the --no-ifFORMAT options, do process \@ifnotFORMAT text.\n");
  return $help;

}

sub _exit($) {
  my $error_count = shift;
  exit(1) if ($error_count and !$force);
}

sub handle_errors(@) {
  my $errors = shift;
  my $additional_error_count = shift;
  my $error_count = shift;

  $error_count += $additional_error_count if ($additional_error_count);
  foreach my $error_message (@$errors) {
    if ($error_message->{'type'} eq 'error' or !$no_warn) {
      my $s = '';
      if (defined($error_message->{'file_name'})) {
        $s .= $error_message->{'file_name'} . ':';
      }
      if (defined($error_message->{'line_nr'})) {
        $s .= $error_message->{'line_nr'} . ':';
      }
      $s .= ' ' if ($s ne '');

      $s .= _encode_message($error_message->{'error_line'});
      warn $s;
    }
  }

  _exit($error_count);
  return $error_count;
}


my ($input_filename, $input_directory, $suffix)
  = fileparse($input_file_name);
if (!defined($input_directory) or $input_directory eq '') {
  $input_directory = $curdir;
}

my $parser_file_options = Storable::dclone($parser_options);
$parser_file_options->{'INCLUDE_DIRECTORIES'} = [@include_dirs];
my @prepended_include_directories = ('.');
push @prepended_include_directories, $input_directory
    if ($input_directory ne $curdir);
unshift @{$parser_file_options->{'INCLUDE_DIRECTORIES'}},
   @prepended_include_directories;
unshift @{$parser_file_options->{'INCLUDE_DIRECTORIES'}}, @prepend_dirs;

my $error_count = 0;
my $parser = Texinfo::Parser::parser($parser_file_options);
my $document = $parser->parse_texi_file($input_file_name);

$error_count
  = handle_errors($document->parser_errors(), $error_count);

my $converter_options = {};
my $converter = Texinfo::Convert::TextContent->converter($converter_options);

my ($sorted_name_counts_array, $formatted_result)
  = $converter->sort_element_counts($document, $use_sections,
                                    $count_words);

print STDOUT _encode_message($formatted_result);

exit(0);
