#!/usr/bin/perl -w
#
# dislines: distribute in several files the lines of a tagged text.
# Version 1.6 (c) 12-7 to 9-8-2005 Daniel Clemente Laboreo
# http://www.danielclemente.com/dislines/
my $VERSION="1.6";
#
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
#
#
# Super-quick syntax guide:
# (full syntax at): http://www.danielclemente.com/dislines/syntax.en.html
#
# Simple: @tag
# Multiple: @tag1,tag2,tag3
# Blocks: open with @{tag or @tag{ and close them with @}tag @tag} or @}
# Comments: @----- or in a block. Any number of -
# Repeat last tag used: @""""""" Any number of "
# You can use a lot of @ if you want: @@@@@@@@tag
# Common lines can also have it: @@ This is common
#
#
#
#
# The idea for the program is:
#
# 1. Create a temporary file for the common lines
# 2. For every line of the file:
# 3. | If it's common (no tag), add it to the global file created in (1)
# 4. | If it has a tag or more attached, do for each tag:
# 5. | | If it's the first time we see this tag:
# 6. | | | Duplicate the global file to make a temporary file for this tag
# 7. | | Add the line to the temporary file of the tag
# 8. | Also add the line to the global file
# 9. Now turn the temporary files into the final ones and delete the global
#
#
# use v5.0; # or 5.005_03 or later
use strict;
# use warnings; # Not on <v5.6.0. I'm using -w
use Getopt::Long;
use File::Copy;
use File::Basename;
# and File::Temp if available
#
# Customizable settings:
#
# Change to 1 to better understand the code:
my $DEBUG=0;
# How to separate the "TAG" from the "file" in "file.TAG.ext"
my $SEP_CHAR='.'; # file.TAG.ext
# my $SEP_CHAR='-'; # file-TAG.ext (eg. index-en.html)
# If you don't like ats (@), you can change the used symbol here.
# Of course, you must put the one you use in your files.
my $AT='@';
# my $AT='\s*@'; # Use this if you accept spaces before the @
# Other global variables:
# It's better to use our(...) here, but 'our' is only for >=5.6.0
use vars (
'$input_file',
'%file', # For each tag, it has its associated file handler
# tag "" means common text
'%lines', # For stats: for each tag, how many lines has the file
# tag "" means common text
# tag "-" means comment text
# Options from the command line
qw( $just_list $only_tags $out_base $just_stats
$show_help $show_version $quiet )
);
# No options on the command line ===> open graphical user interface
if (not @ARGV) {
# Load the Tk module, or complain if you don't have it
eval "require Tk;" or do {
my $error="\n To open the graphical interface, ".
"you need to install the perl-Tk package.\n".
"You can still use the command line options; ".
"try: perl $0 -h\n";
system(qq!Xdialog --title "Error: no perl-Tk" --msgbox "$error" 0 0!)==0
or print STDERR $error;
die "\n";
};
# and Tk::LabFrame, which in fact isn't really important (but nice to have)
eval "require Tk::LabFrame" or die " You don't have the Tk widget called 'LabFrame'. ".
"It's strange, since normally it comes with perl-Tk.\n".
"Tell me about your operating system and I will try to improve this.\n".
"For the moment, you can still the use the command line options; ".
"try: perl $0 -h\n";
# Tk found
print "I'm opening the GUI. If you want to use the command line, see -h\n";
# The main window, and the 'Advanced options' window.
my ($MainW, $AdvW);
$MainW = MainWindow -> new(-title=>'dislines');
my $action_sel=''; # '' or '--list' or '--stats'
# Variables for the important widgets
my ($w_frame, $w_text);
# Widgets: title: 'dislines', and the version
$MainW->Label (
-text => 'dislines',
-anchor => 'c',
-fg => 'orange',
-font => 'utopia -26 bold',
)->pack();
$MainW->Label (
-text => "v$VERSION",
)->pack();
# Widgets: the file chooser for the input file
$MainW->Label (
-text => 'File to process:',
-width=>60,
-anchor=>'w',
)->pack();
$w_frame = $MainW->Frame ()->pack();
$w_frame->Entry (
-width => 50,
-textvariable => \$input_file,
)->pack(
-side=>'left',
);
$w_frame->Button (
-text => 'Browse',
-command => sub {
my $selected=$MainW->getOpenFile(
-filetypes=>[
['All files', '*' ],
['Text files', '.txt' ],
['Web pages', ['.html','.xhtml'] ],
['txt2tags files', '.t2t' ],
]
);
$input_file = $selected || $input_file;
# A new file has been selected. Replace settings.
$out_base=$input_file if $selected;
# Furthermore, we keep the last used value for these settings:
# - action to do (process or list or stats)
# - separator to use at the file names
# - only tags that we have to process
#
# I think that this can be useful to someone who's trying to
# do a species of "batch" processing with several files.
},
-underline => 0,
)->pack(
-side=>'left',
);
# Widgets: the information window (also used for the output)
$w_text = $MainW->Text (
-relief=>'solid',
-wrap=>'word',
-height=>7,
-width=>60,
)->pack();
$w_text->tagConfigure('link', -foreground => 'blue', -underline => 1);
$w_text->tagConfigure('code', -foreground => '#1c1');
$w_text->insert('end',
"\n For every tag you used in your file, a file named ", 'normal',
"file.{TAG}.ext", 'code',
" will be created in the same directory, where ", 'normal',
"{TAG}", 'code',
" is the real tag, and assuming that ", 'normal',
"file.ext", 'code',
" is the name of the input file.\n More info at ", 'normal',
"http://www.danielclemente.com/dislines/",'link',
"\n", 'normal');
$w_text->configure(-state=>'disabled');
# Widgets: the 3 buttons: PROCESS, Advanced options, Exit
$w_frame = $MainW->Frame ()->pack(-pady=>5,);
$w_frame->Button (
-text=>'PROCESS',
-command => sub {
return if not $input_file;
# The application may have been started from any directory
# (since the user may have clicked some fancy icon in a file
# browser). We don't want to create the files there; it should
# be better in the same directory where the input file is.
my $out_topass=$out_base;
$out_topass ||= $input_file;
my $only_tags_topass = '';
$only_tags_topass="--tags=$only_tags" if $only_tags;
my $ret_info;
# Call the program
$ret_info=&dislines(
$input_file,
'--out' => $out_topass,
$action_sel, # '' or '--list' or '--stats'
$only_tags_topass, # '' or '--tags=es,it'...
) || $@;
# We're going to write the results into the Label
$w_text->configure(-state=>'normal'); # Be able to change it
$w_text->delete('0.0','end'); # Delete everything
$w_text->insert('end', $ret_info); # Write the text
# Now we'll write a message indicating whether there were errors
$w_text->tagConfigure('ok', -foreground => 'white', -background => '#1c1');
$w_text->tagConfigure('error', -foreground => 'white', -background => 'red');
if ($@) {
$w_text->insert('end','There were errors','error');
} else {
$w_text->insert('end','Operation completed with no problems','ok');
}
$w_text->configure(-state=>'disabled');
# The Label is blocked again (not editable)
if ($DEBUG) {
print "Finished. I put into the GUI this information:\n$ret_info";
}
},
)->pack(
-side=>'left',
);
$w_frame->Button (
-text=>'Advanced options',
-command => \&advanced_options,
)->pack(
-side=>'left',
);
$w_frame->Button (
-text=>'Exit',
-command => sub {
$MainW->destroy;
# exit() does a Segmentation Fault on my v5.8.6 if I use
# "require Tk" instead of "use Tk". I don't know why.
#exit 0;
},
)->pack(
-side=>'left',
);
# Show the window with the not-so-simple settings
sub advanced_options () {
if ($AdvW){
if ($DEBUG) {print "That window already was opened\n";}
# This sometimes failed; review: # $AdvW->focus;
return;
}
# Backup the current settings to be able to restore them if you click Cancel
my @advanced_options = (
$action_sel, $out_base, $SEP_CHAR, $only_tags,
);
my ($w_frame_output, $w_frame_tags); # and also $w_frame
# Create the window
$AdvW=$MainW->Toplevel(-title=>'dislines: Advanced options');
# Title
$AdvW->Label(-text=>'Advanced options', )->pack(-pady=>6,);
# Frame 1: 'Action'. 3 options.
$w_frame=$AdvW->LabFrame(-label=>'Action',)->pack();
$w_frame->Radiobutton(
-text=>'Process the file',
-variable=>\$action_sel,
-value=>'',
-width=>57, -anchor=>'w',
)->pack();
$w_frame->Radiobutton(
-text=>'Just list the used tags',
-variable=>\$action_sel,
-value=>'--list',
-width=>57, -anchor=>'w',
)->pack();
$w_frame->Radiobutton(
-text=>'Just show some statistics',
-variable=>\$action_sel,
-value=>'--stats',
-width=>57, -anchor=>'w',
)->pack();
# Frame 2: 'Output'. 2 text boxes.
$w_frame_output=$AdvW->LabFrame(-label=>'Output',)->pack();
$w_frame_output->Label(
-text=>
" Instead of the same name, use this as the output file base name.\n".
" For instance, using new.name.txt as the base name will generate\n".
" files called new.name.TAG.txt" ,
-width=>60,
-anchor=>'w',
-justify=>'left',
)->pack();
$w_frame = $w_frame_output->Frame ()->pack();
$w_frame->Entry (
-width => 40,
-textvariable => \$out_base,
)->pack(
-side=>'left',
);
$w_frame->Button (
-text => 'Browse',
-command => sub { $out_base= $AdvW->getSaveFile || $out_base; },
)->pack(
-side=>'left',
);
$w_frame_output->Label(-height=>1)->pack();
$w_frame = $w_frame_output->Frame()->pack();
$w_frame->Label(
-text=>'Separator to use in the file name.'.
"\nExamples: . for file.TAG.ext, - for file-TAG.ext, etc.",
-width=>50,
-anchor=>'w',
-justify=>'left',
)->pack(-side=>'left',);
$w_frame->Entry(
-width=>3,
-justify=>'center',
-textvariable=>\$SEP_CHAR
)->pack(-side=>'left',);
# Frame 3: 'Tags'. 1 text box.
$w_frame_tags=$AdvW->LabFrame(-label=>'Tags',)->pack();
$w_frame_tags->Label(
-text=>
' Just process these tags (leave blank to process all of them).'.
"\n Use a comma-separated list, for instance en,es,it or just en",
-width=>60,
-justify=>'left',
-anchor=>'w',
)->pack();
$w_frame_tags->Entry(
-width=>40,
-textvariable=>\$only_tags,
)->pack();
# Two buttons: Ok, Cancel. After several hours of reading about this, I
# decided to put the 'Ok' at the left since most of the options you have to
# click or text boxes you have to fill are justified to the left.
$w_frame = $AdvW->Frame ()->pack(-pady=>5,);
$w_frame->Button(-text=>'Ok',-command=>sub{
# A bit of preprocessing
$only_tags =~ s/\s+//g if $only_tags;
# Save the new settings
@advanced_options=
($action_sel, $out_base, $SEP_CHAR, $only_tags);
# Close the window
$AdvW->destroy; $AdvW="";
},)->pack(-side=>'left');
$w_frame->Button(-text=>'Cancel',-command=>sub{
# Cancel. Restore the settings we saved
($action_sel, $out_base, $SEP_CHAR, $only_tags)
= @advanced_options;
$AdvW->destroy; $AdvW="";
},)->pack(-side=>'left');
# Advanced options window is now ready and showing
return; # Yes, this was a sub
}
# Start the GUI and wait. The buttons will call the subs defined above.
Tk::MainLoop();
# The user closed the window. Nothing else to do.
} else {
# The user supplied some arguments. Don't open GUI. Just process them
# and print some information, or the errors (if any).
print ( &dislines(@ARGV) || $@ );
}
exit 0;
# And here finished the program
# Do everything. The parameters are the same as the command line options.
# Example of how to call it:
# print (&dislines qw( test.txt -t en,it -q ) || $@);
# or simply:
# dislines (file.txt);
#
# Returns:
# a scalar with some information you would normally want to view on the
# screen (ready to be printed).
# If it fails, it return undef (false) and sets $@ to the error message
# Beware: if you use --quiet, it may return "" (so, false) and this is not
# a sign of problem. You'll need to check $@
#
sub dislines (@) {
# We trap errors
eval q[
# Initializations
$just_list=$only_tags=$out_base=$just_stats=$quiet=$show_help=$show_version="";
# Simulate the command line
@ARGV=@_;
if ($DEBUG) {print "Parsing these options: @ARGV\n";}
GetOptions (
"list" => \$just_list,
"tags=s" => \$only_tags,
"out=s" => \$out_base,
"stats" => \$just_stats,
"quiet" => \$quiet,
"help" => \$show_help,
"version" => \$show_version,
)
or die "Strange parameters passed. Stopping";
if ($show_help) { return &show_help(); }
if ($show_version) { return &show_version(); }
if ( not @ARGV ) {
die "I need the name of the file to process. See help (-h)\n";
}
# Get just the first file.
# 'local' because the GUI doesn't want the value to be changing, and we indeed
# change it to remove the path and print only the basename. So, these changes will
# be local.
local $input_file = shift @ARGV;
&open_file();
# Listing tags doesn't need a full text processing
return &do_list() if $just_list;
# Create a temporary directory for our files
&init_temp_files();
# Do the hard job on all the document
&text_process();
# Get some information and delete the temporary files
return &do_stats() if $just_stats;
# Change the temporary files to the final ones.
# We get some information about which files were really written.
return &promote_files();
] or return undef; # and setting $@
# The result of this eval is what we return
}
# Prepare the filehandle INPUT for reading
sub open_file () {
open (INPUT, $input_file) or &system_error("Can't open $input_file");
warn "Be careful: $input_file doesn't seem a text file. Warning"
if not $quiet
and not -T $input_file
and not $input_file eq "-";
# Once opened, we just want its name, to shorten a little the printings
$^O =~ s/^dos$/msdos/i; # dosemu said just "dos"
fileparse_set_fstype($^O); # and this requires "msdos"
$input_file = basename ($input_file);
# If not specified, use the same name as the base for the output files
$out_base ||= $input_file;
# Don't print messages if we're using STDOUT
$quiet=1 if $out_base eq "-";
}
# This block has tools to access each temporary file
{
my $hasTemp; # Says if we have File::Temp
my $temp_dir; # Where to put all our files
my $counter; # For our own file name generator
# For each tag, has the name of the temporary file associated
my %names = ();
sub init_temp_files () {
# Detect if we have File::Temp (implemented in 5.6.1) to create temporary
# files safely. Otherwise, use our own temporary file creation.
#
$hasTemp=1;
eval 'use File::Temp qw(tempfile tempdir);';
$hasTemp=0 if $@;
$counter="000"; # In case we don't have it, we'll generate our own names
# We create a temporary directory, which will be destroyed after exit
eval '$temp_dir = tempdir( CLEANUP => 1 );';
die "Could not create temporary directory: $@" if ($hasTemp and $@);
if ($DEBUG) {print "Our temp directory is $temp_dir\n";}
}
# Return the filehandle of a newly created temporary file,
# associating it to the specified tag
sub create_temp_file ($) {
my $tag = shift;
my ($fh, $filename);
if ($hasTemp) { # We have File::Temp :-)
eval '($fh, $filename) =
tempfile( "disl_XXXX", DIR => "' . (quotemeta $temp_dir) . '");';
die "Could not create temporary file: $@" if $@;
} else {
# Try to create a good temp file
# For instance: tempfile.000, tempfile.001, .002, etc.
$filename = "tempfile.$counter";
$counter++; # from "000" to "001"
# 'autovivify' on open() is just for >=5.6.0
# So is the 3 parameters version...
open ( HANDLE, "+>" . $filename )
or &system_error("Can't create temporary file");
$fh=*HANDLE;
}
$names{$tag}=$filename;
if ($DEBUG) {print ("Created temp file for tag '$tag': $filename\n");}
return $fh;
}
# Returns the name of the temporary file for the given tag
sub temp_file_name ($) {
return $names{shift};
}
# Renames the temporary file associated to the specified tag
# to the final output file (whose name is specified)
sub temp_to_final ($$) {
my $old=shift;
$old=$names{$old};
my $new=shift;
if ($DEBUG) { print "Renaming $old to $new\n";}
move( $old, $new )
or &system_error("Error renaming temporary file");
}
# Remove the temporary file for the given tag
# In fact, File::Temp already removes the files on exit, but we have
# already been called to delete some files, so let's delete them ourselves.
sub delete_temp ($) {
my $tag = shift;
if ($DEBUG) {print "Deleting temporary file for tag '$tag': $names{$tag}\n";}
# Don't stop if there are problems; we want most files removed
unlink $names{$tag} or warn("Could not delete temp. file $names{$tag}: $!.");
delete $names{$tag}; # We need it no more in the hash
}
} # End of the 'temporary files' block
# Do the real text processing of INPUT file, and leave opened a bunch of temporary
# files, which can later be renamed to the final output files.
sub text_process() {
# Here starts the text processing
print "Processing $input_file\n" if not $quiet;
my $last_tag=""; # Last tag seen ( for @"" )
my $in_block=""; # Are we inside a block? If so, which tags is it for?
# Both variables contain a list of tags, like "en,eo,it"
# Open buffer for common lines (it's identified as if it had tag "")
$file{""} = &create_temp_file("");
# Reset the line counter
$. = 0;
# Start reading, line by line and in just one pass
while ( <INPUT> ) {
my ($command, $rest_of_line);
chomp; # No newlines ( \n ) at the end
if ($DEBUG) {print "$.: $_\n";}
($command, $rest_of_line)= /^${AT}+(.*?)(?: (.*))?$/o
or ($command, $rest_of_line)=("",$_); # Common line (no tag)
# Handle @en\n and others
$rest_of_line="" if not defined $rest_of_line;
# Skip comments: @---
if ($command =~ /^\-+$/) {
$lines{"-"}++ if $just_stats;
next;
}
# Handle @{ and @}
if ($command =~ /^\{(.*)$/ or $command =~ /^(.*)\{$/) {
# Opening a block
&bad_syntax("You can't nest blocks") if $in_block;
&bad_syntax("Block with no tags") if not $1;
$in_block=$1;
if ($DEBUG) {print "Entering block $in_block\n";}
next;
} elsif ($command =~ /^\}(.*)$/ or $command =~ /^(.*)\}$/) {
# Closing a block
&bad_syntax("You closed block $1, but no block was opened")
if (not $in_block);
&bad_syntax("You closed block $1, but I was expecting $in_block")
if ($1 and $1 ne $in_block);
if ($DEBUG) {print "Going out of block $in_block\n";}
$in_block="";
next;
}
# Handle blocks, and @""
if ($in_block) {
&bad_syntax("Use of tag $command not allowed inside a block")
if $command ne "";
# Translate the block @{tag to a simple @tag for each line
$command=$in_block;
} else { # Things to do when you're not into a block
$command=$last_tag if $command =~ /"+/;
}
if ($command eq "") {
# Not tagged. It may be a common line, or rest inside a block
if (not $in_block) { # Common line
# Add it to the global buffer ($file{""})
# and to each known file
foreach my $tag ( keys %file ) {
print { $file{$tag} } "$rest_of_line\n";
$lines{$tag}++ if $just_stats;
}
if($DEBUG) {print "Adding common line: $rest_of_line\n";}
next; # No tags to process
} else {
$command=$in_block;
}
}
# Handle things like @,,,,
&bad_syntax("Don't do that with the commas") if $command =~ /^,+$/;
# Iterate over the t1, t2, t3 of a @t1,t2,t3
foreach my $tag ( split /,/, $command ) {
&bad_syntax("Don't include null tag names")
if $tag eq ""; # as in the case of a @a,,b
&bad_syntax("Illegal tag name $tag (remove the '$&')")
if $tag =~ /[\/\\\@\{\}]/;
# now, tag is correct (it doesn't match /[ ,]/ nor /[\@{}]/ )
if ($tag =~ /^\-*$/) { # it's a comment
$lines{"-"}++ if $just_stats;
next;
}
next if ($only_tags and $only_tags !~ /\b$tag\b/); # Not selected
if (! $file{$tag} ) { # New tag found
# Create temporary file for this tag
local *HANDLE;
*HANDLE=&create_temp_file($tag);
$file{$tag}=*HANDLE;
# Copy all the lines from the global file to this one
seek ($file{""}, 0, 0)
and copy( $file{""} , $file{$tag} )
or &system_error("Problem duplicating file");
$lines{$tag}=$lines{""} if $just_stats;
if ($DEBUG) {print "New tag found: $tag\n";}
}
# Write in each tag's buffer
print { $file{$tag} } "$rest_of_line\n";
$lines{$tag}++ if $just_stats;
}
# Make @"" work
$last_tag=$command if not $in_block;
if ($DEBUG) {print ">>>>> Command: $command\tLine: $rest_of_line\n";}
}
# No more lines to process
&bad_syntax("You forgot to close block $in_block")
if $in_block;
}
# Final step: make the temporary files be the final ones
# Returns some text about which files did wrote.
sub promote_files () {
close INPUT;
close $file{""};
delete $file{""};
&delete_temp("");
my $ret_info="";
foreach my $tag (keys %file) {
# Are we writing files, or to the standard output?
if ($out_base ne "-") {
my $out_name=&out_file_name($tag);
close $file{$tag};
&temp_to_final( $tag, $out_name );
delete $file{$tag};
$ret_info .= "Wrote $out_name\n" if not $quiet;
} else {
# Write all files -concatenated- to STDOUT
# Hope that the user had selected just one with -t
my $handler = $file{$tag};
seek $handler, 0, 0;
print while ( <$handler> );
close $file{$tag};
# And delete it. We 'moved' it to STDOUT
&delete_temp($tag);
delete $file{$tag};
}
}
return $ret_info;
}
# Complain. These errors are trapped by the 'eval' at &dislines
sub bad_syntax ($) {
my $message = shift;
$message = "Error at line $.: $message\n";
&remove_all_temp();
die $message;
}
sub system_error ($) {
my $message = shift;
$message = "$message: $!\n";
&remove_all_temp();
die $message;
}
# Called in case of error, or statistics.
# (Otherwise, the temp files are converted to final ones).
sub remove_all_temp () {
foreach my $tag (keys %file) {
&delete_temp($tag); # Delete its temporary file on the disk
}
%file=(); # Also empty the list of known tags
}
# Decide the output file for each tag
sub out_file_name ($) {
# From a.html -> a.TAG.html
# a.b.txt -> a.b.TAG.txt
# abc -> abc.TAG
#
my $tag=shift;
my ($name, $dot_ext) = $out_base =~ /^(.*?)(\.[^\.]*)?$/;
$dot_ext="" if not $dot_ext;
# file . TAG .ext
return "${name}${SEP_CHAR}${tag}${dot_ext}";
}
# Return a list with the tags used in a document
sub do_list () {
# We're not doing a full parse of the file,
# neither check whether it is valid.
# We just search for lines beginning with @ to extract the tags.
my %seen_tags=();
while (<INPUT>) {
if ($DEBUG) {print "$. $_\n";}
# Take the command from the @command Rest_of_line
next unless /^${AT}+(?!\@)([^ ]+)[ \n]/o;
if ($DEBUG) {print "The command seems $1\n";}
foreach my $tag (split /[,\{\}]/, $1) {
next if $tag =~ /[\\\/\@]/; # skip the erroneous tags
next if not $tag;
# skip @"" and @--
next if $tag =~ /^\"+$/ or $tag =~ /^\-+$/;
if ($DEBUG) {print "$tag\n";}
$seen_tags{$tag}=1;
}
}
close INPUT;
my $ret_info="";
$ret_info.="List of tags used in $input_file:\n" if not $quiet;
$ret_info.="$_\n" for (keys %seen_tags);
return $ret_info;
}
# Return some statistics about the document and its tags
sub do_stats () {
# We have the line count of each file. We can extract some information.
my $total_input_lines = $.;
my $common_lines = $lines{""} || 0;
my $ret_info= "$input_file ($total_input_lines lines):";
$ret_info = "$ret_info\n".'-'x(length $ret_info)."\n";
# Get the maximum number of digits for some values
my ($longest_tag,$longest_lines)=(0,0);
foreach my $tag (keys %file) {
next if $tag eq "";
my $c;
$c=&length_in_chars($tag);
$longest_tag=$c if $c > $longest_tag;
$c=length $lines{$tag};
$longest_lines=$c if $c > $longest_lines;
}
$longest_tag= &length_in_chars(
&out_file_name( 'x' x $longest_tag )
); # Length of the full name, not just the largest tag
foreach my $tag (keys %file) {
close $file{$tag}; # Profit the occasion to close temp files
next if $tag eq ""; # Just real tags; we don't want the global buffer
my $num=$lines{$tag};
my $name=&out_file_name($tag);
my $lin="%${longest_lines}u"; # Format string for the line counts
# Of course, this can be done with "format"s, but wouldn't simplify
# the code since the records aren't fixed-width.
$ret_info .=
sprintf(
' 'x($longest_tag-&length_in_chars($name)) .
"%s: $lin lines = " .
"$lin proper + $lin common\n",
$name, $num,
$num-$common_lines, $common_lines);
}
my $num_tags= scalar(keys %file) -1; # -1 because of the "" (not real)
$lines{'-'} ||=0;
$ret_info .= "Number of tags: $num_tags. Number of comments: $lines{'-'}.\n\n";
&remove_all_temp(); # Since we won't create the output files
%lines=(); # Fresh hash for the next run
return $ret_info;
}
sub length_in_chars ($) {
# This is a little hack to get the length of a string
# which we don't know if it's Unicode (UTF-8) or not.
my $a = shift;
# Perl can't know if $a is Unicode or not
my $l = length $a; # Take its length as it if weren't
# This sets the "utf8" bit for $b
my $b;
eval q{
$b = pack "U0C*", unpack "C*", $a;
# We're hiding this from <5.6.0 versions,
# which will get the length as if it weren't UTF-8
};
# Try to take its length as if it were Unicode
eval q{
# If length detects some error (like malformed UTF-8 characters),
# die and don't touch $l. Otherwise, it's UTF-8, so get the real $l
use warnings FATAL => 'all';
$l=length $b;
};
return $l;
}
sub show_version () {
return <<"EOF";
dislines $VERSION < http://www.danielclemente.com/dislines/ >
Follows version 1 of the specification.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, version 2.
July-August 2005. Daniel Clemente Laboreo.
EOF
}
sub show_help () {
return <<'EOF';
Usage: dislines [OPTIONS] file.ext
Options:
(nothing) Process the file and create a file.TAG.ext for each tag
-l, --list Just show a list of the tags used in the file
-t, --tags=LIST Only include these tags. Ex: en,it,fr
-o, --out=FILE Use this base name for the output files (file.TAG.ext)
-s, --stats Just print some information about the files to be created
-q, --quiet Write only the necessary information to the screen
-h, --help Show this help (see the manual for the syntax)
-v, --version Print version information
You may put a dash (-) instead of file names to use STDIN or STDOUT.
EOF
}
# The end, really
syntax highlighted by Code2HTML, v. 0.9.1