#!/usr/bin/perl
#    Last change: PFB 2001-10-27 17:20:44
# srep.cgi
############################################################################
# POD Documentation

=head1 PROGRAM NAME AND AUTHOR

Search and Replace - Version 1.4
Build Date: October 27, 2001

srep.cgi
by Peter F. Brown
peterbrown@worldcommunity.com

=head1 WHAT IT IS

Search and Replace (ok, it's not an original name...)

It's quite fast. It processed 13,402,165 replaces, (13.4 million)
(a 67 meg text file with 163,441 lines) in 1 minute, 10 seconds
on a Pentium 166 with 64 megs of RAM. (This was the test in v1.0)

On one client's system it processed 13,029 files,
with 7,487 replaces, in 11 seconds.

As an alternate method, I recommend using VEDIT, the fastest
huge file text editor in the world, at 'www.vedit.com'.

HELP: type "srep.cgi" for a help screen,
      or read the explanation in the subroutine &help_header
      as well as the comments sprinkled throughout the code.

=head1 COPYRIGHT

Copyright 2001 Peter F. Brown
SR complies with the GNU GENERAL PUBLIC LICENSE
and is released as "Open Source Software".
NO WARRANTY IS OFFERED FOR THE USE OF THIS SOFTWARE

! Just remember. Back up your file first!
  (when you mess with huge data sets, please do
  save yourself grief and backup the file up :-)

=head1 BUG REPORTS AND SUPPORT

Send bug reports to peterbrown@worldcommunity.com.
Visit the author's web site at 'worldcommunity.com'
to view information about support, customer quotes,
a resume link, and fees for custom Perl/MySQL programming.

=head1 OBTAINING THE LATEST VERSION

==> Get the most recent version of this program at:
    http://worldcommunity.com

=head1 REQUIREMENTS

Perl 5

=head1 CHANGE_LOG

- v1.4 - October 27, 2001
        . moved 'values in text file' to external include file,
          to allow multiple users to use that feature.
          The thought behind specifying variables in an include file
          was that complicated regexes might be easier in a file,
          rather than on the command line.
        . created a variable for the default include file name.
        . modified the parameter for a user specified include file name
        . removed the -h parameter for HELP
        . changed -u (for 'values in text file') to -d
        . Now, -d pulls in the default include file name, or
               <filename> (one parameter) pulls in a user specified file
          
        . added input parameters to deal with file extensions
          -include, -exclude, -ignore [default= -include]
          => include means 'include files with these extensions'
          => exclude means 'exclude files with these extensions'
          => ignore  means 'ignore the extension and process all files'
          -FILENAME [default= -none (use internal extension array if no file)]

        . modified the help and prompt screens

- v1.3 - May 9, 2001
        . added output of actual replaces to 'sr.replaces.log'
        . changed the formal name to 'Search and Replace'
        . changed the file name to 'srep.cgi' (for stability)

- v1.2 - May 6, 2001
        . Initial Public Release.
        . Changed to line method of parsing.
        . Added prompts, recursive directories, logs, essentially a complete rewrite.

- v1.0 - May 20, 1998
        . Initial release. Used 'chunk' method of parsing text, instead of lines
        . Only operated on one file at a time

=cut

##############################################################################
# setup area

# the include file name that is looked for, if the user
# specifies -d (use DEFAULT internal file values)
# note that I use './' so that the script looks for it
# in the current directory
# If the user specifies a file name, instead of -d
# the script will look for that file name instead.
# If neither filename is found, the script will use
# the values contained in the ARG loop below.

$default_include_file_name = './srep_include.pl';

# these are the approved file extensions that the
# program will look for.

@file_extensions = qw[htm html shtml txt cgi pl js doc];

# extension paramaters:
# (-include, -exclude, -ignore [default= -include])
# (-FILENAME [default= -none (use internal extension array if no file)])

# if these params are not specified, the script will INCLUDE (process)
# the files with the extensions in the default array above.

# I recommend using an extension for the log file below that
# is NOT included in the array above.

$log_file     = './sr.log';
$replaces_log = './sr.replaces.log';

# With the include file parameter and the extension file parameter
# the script can be stored in a root folder such as
# /usr/local/bin, allowing it to be used by all users.
# Then, each user can use their own setup variables.

# end of setup area
##############################################################################

use File::Find;

$clear = `clear`;
print $clear;

$version = 'v1.4';

$sr_header = qq~Search and Replace $version - replaces characters in a text file.
       Copyright 2001 Peter Falkenberg Brown; peterbrown\@worldcommunity.com
       Open Source Software. [http://worldcommunity.com]~;

if ($#ARGV == 0)
      {
      # the script will try to open '$include_file_name'
      # if it can't find it, it will use the values below.
      ###################################################################
      # you can use regular expressions, if you're brave.
      # NOTE: this version doesn't support $1 parenthesizing
      # (perhaps in the next version)

      # note for DOS users: be careful of the 8.3 and \ conventions
      # when you name your files. Otherwise, sr should work under DOS.
      # NOTE: Using single quotes may have a different effect.

      # Your include file should contain the following variable = value pairs.
      # Note again that if your file exists the values below will NOT BE USED.
      # (note that your file should contain a '1;' on the last line)
      # this is because your file is being read as a Perl file, not just a text file.
      # sample syntax:
      #...............................................................

      # $input_file     = "YOUR INPUT FILE | or 'CURDIR";
      # $search_string  = "YOUR SEARCH STRING (CAN BE REGEX)";
      # $replace_string = "YOUR REPLACE STRING (CAN BE REGEX)";
      # $prompt_replace = "yes|no";
      # $save_backups   = "yes|no";
      # $outfile        = "outfile.sr | (OR ANY UNIQUE NAME)";
      # $double_check   = "yes|no";
      # $case_sensitive = "yes|no";
      # $ext_usage      = "include|exclude|ignore";
      # $ext_file       = "none | (OR YOUR PATH/FILENAME.EXT)";
      # 1;

      if ( $ARGV[0] eq "-d" )
            {
            $include_file_name = $default_include_file_name;
            }
      else
            {
            $include_file_name = $ARGV[0];
            }

      if ( -e $include_file_name )
            {
            $include_file_type = "external file ($include_file_name)";
            require $include_file_name;
            }
      else
            {
            $include_file_type = "srep.cgi ($include_file_name not found)";
            
            $input_file     = "PUT_YOUR_INPUT_FILE_NAME_HERE";
            $search_string  = "PUT_YOUR_SEARCH_STRING_HERE";
            $replace_string = "PUT_YOUR_REPLACE_STRING_HERE";
            $prompt_replace = "yes";
            $save_backups   = "yes";
            $outfile        = "outfile.sr";
            $double_check   = "no";
            $case_sensitive = "no";
            $ext_usage      = "include";
            $ext_file       = "none";
            }

      }
      ################ end of s/r editing ###############################

elsif ($#ARGV == 2)
      {
      $input_file     = $ARGV[0];
      $search_string  = $ARGV[1];
      $replace_string = $ARGV[2];
      $prompt_replace = 'yes';
      $save_backups   = 'yes';
      $outfile        = 'outfile.sr';
      $double_check   = 'no';
      $case_sensitive = 'no';
      $ext_usage      = 'include';
      $ext_file       = 'none';
      }

elsif ($#ARGV == 9)
      {
      $input_file     = $ARGV[0];
      $search_string  = $ARGV[1];
      $replace_string = $ARGV[2];
      $prompt_replace = $ARGV[3];
      $save_backups   = $ARGV[4];
      $outfile        = $ARGV[5];
      $double_check   = $ARGV[6];
      $case_sensitive = $ARGV[7];
      $ext_usage      = $ARGV[8];
      $ext_file       = $ARGV[9];
      }
else
      {
      &help_header;
      }

# convert input vars

$prompt_replace = lc($prompt_replace);
$save_backups   = lc($save_backups);
$double_check   = lc($double_check);
$case_sensitive = lc($case_sensitive);
$ext_usage      = lc($ext_usage);

if ( $prompt_replace ne 'yes' and $prompt_replace ne 'no' )
      {
      print "\nPrompt Replace must equal either 'yes' or 'no' or BLANK.\n";
      print "If you leave it blank, it will default to 'YES'.\n";
      print "Exiting ... \n\n";
      exit;
      }

if ( $save_backups ne 'yes' and $save_backups ne 'no' )
      {
      print "\nSave Backups must equal either 'yes' or 'no' or BLANK.\n";
      print "If you leave it blank, it will default to 'YES'.\n";
      print "Exiting ... \n\n";
      exit;
      }

if ( $double_check ne 'yes' and $double_check ne 'no' )
      {
      print "\nDouble Check must equal either 'yes' or 'no' or BLANK.\n";
      print "If you leave it blank, it will default to 'NO'.\n";
      print "Exiting ... \n\n";
      exit;
      }

if ( $case_sensitive ne 'yes' and $case_sensitive ne 'no' )
      {
      print "\nCase Sensitive must equal either 'yes' or 'no' or BLANK.\n";
      print "If you leave it blank, it will default to 'NO'.\n";
      print "Exiting ... \n\n";
      exit;
      }

if ( $ext_usage ne 'include' and $ext_usage ne 'exclude' and $ext_usage ne 'ignore' )
      {
      print "\nExt Usage must equal either 'include' or 'exclude' or 'ignore'.\n";
      print "If you leave it blank, it will default to 'include'.\n";
      print "Exiting ... \n\n";
      exit;
      }

if ( $ext_file ne 'none' and $ext_file =~ /\w+/ )
      {
      # check to see if file exists. If so, create array.

      unless ( open(EXT_FILE, "$ext_file" ))
            {
            print "Error with extensions file ($ext_file): $!\n";
            exit;
            }

      @ext_array = ();
      chomp ( @ext_array = <EXT_FILE> );
      close (EXT_FILE) or die print "Error closing extensions file $ext_file: $!\n";

      if ( scalar( @ext_array ) < 1 )
            {
            print "Extension File $ext_file had no extensions listed.\n";
            exit;
            }
      }

# check user input
#.............................

$| = 1;

# check for Unix or DOS, for console input

if (-e "/dev/tty")
     {$console = "/dev/tty";}
else {$console = "con";}

unless ( open(USER_PROMPT, "$console"))
    {
    print "Can't open console: $!\n";
    exit;
    }

#..............................
$process = "false";
while ($process eq "false")
{

print qq~
$sr_header
~;

if ($#ARGV == 0)
      {
      print "\nUsing values in $include_file_type\.\n";
      }

print qq~
        You have specified the following:
             Input File:     $input_file
          Search String:     $search_string
         Replace String:     $replace_string
         Prompt Replace:     $prompt_replace (prompts at each replace)
           Save Backups:     $save_backups
              Temp File:     $outfile
           Double Check:     $double_check (double checks each replace)
         Case Sensitive:     $case_sensitive
        Extension Usage:     $ext_usage
         Extension File:     $ext_file
Default File Extensions:     [ @file_extensions ]
   Your File Extensions:     [ @ext_array ]

                             If Your File Extensions are specified,
                             they will overwrite the defaults.

      Type 'srep.cgi' by itself for a Help Screen.

      Do you wish to continue (enter only "y" or "n")? ~;

     $continue = <USER_PROMPT>;
     chop $continue;
     $continue = lc($continue);
        
        if ($continue eq "y")
             {
             $process = "true";
             }
        elsif ($continue eq "n")
             {
             $process = "false";
             close(USER_PROMPT);
             print "\n";
             exit;
             }
        else
             {
             $process = "false";
             }

        }

        close(USER_PROMPT);

#........................................................................
#........................................................................
#... process ...

if ( scalar( @ext_array ) >= 1 )
     {
     # overwrite default array
     @file_extensions = @ext_array;
     }

$files_processed      = 0;
$grand_total_replaces = 0;
$start                = time;

# open log file; use append mode
unless ( open (LOG, ">>$log_file"))
    {
    print "Error opening log file ($log_file\): $!\n";
    exit;
    }

# open replaces_log file; use append mode
unless ( open (REPLACES_LOG, ">>$replaces_log"))
    {
    print "Error opening replaces log file ($replaces_log\): $!\n";
    exit;
    }

print LOG "\nSearch and Replace LOG File:\n\n";
print REPLACES_LOG "\nSearch and Replace REPLACES LOG File:\n\n";

$log_header = qq~
SEARCHING USING THE FOLLOWING PARAMETERS:

             Input File: $input_file
          Search String: $search_string
         Replace String: $replace_string
         Prompt Replace: $prompt_replace (prompts at each replace)
           Save Backups: $save_backups
              Temp File: $outfile
           Double Check: $double_check (double checks each replace)
         Case Sensitive: $case_sensitive
        Extension Usage: $ext_usage
         Extension File: $ext_file
Default File Extensions: @file_extensions
   Your File Extensions: (if specified, will overwrite defaults)
                         @ext_array

===========================================================================

~;

print LOG $log_header;
print REPLACES_LOG $log_header;

if ( $input_file eq 'CURDIR' )
        {
        # we go into dir mode
        @DIRLIST = qw[.];
        find(\&process_file, @DIRLIST);
        }
else
        {
        &process_file('single_file');
        }

$end = time;
$seconds = $end - $start;
$minutes = $seconds / 60;
print LOG "Processed $files_processed Files and $grand_total_replaces Replaces in $minutes minutes ($seconds seconds.)\n";
print REPLACES_LOG "Processed $files_processed Files and $grand_total_replaces Replaces in $minutes minutes ($seconds seconds.)\n";
print "Processed $files_processed Files and $grand_total_replaces Replaces in $minutes minutes ($seconds seconds.)\n";

close(LOG);
close(REPLACES_LOG);

exit;

###########################################################################
# process_file
sub process_file
{

my ($file_mode) = @_;

if ( $file_mode ne 'single_file' )
        {
        $input_file = $_;
        $input_file_long = $File::Find::name;
        }

$files_processed++;

#...... modified below in v1.4

if ( $ext_usage eq 'ignore' )
        {
        # ignore  - set to yes for all
        $good_ext = 'yes';
        }
elsif ( $ext_usage eq 'include' )
        {
        # include - defaults to no  - set to yes if file has extension
        $good_ext = 'no';

        # process array
        
        foreach $file_extension ( @file_extensions )
            {
            if ( $input_file =~ /\.$file_extension$/ )
                  {
                  $good_ext = 'yes';
                  }
            }
        }
elsif ( $ext_usage eq 'exclude' )
        {
        # exclude - defaults to yes - set to no if file has extension
        $good_ext = 'yes';

        # process array
        
        foreach $file_extension ( @file_extensions )
            {
            if ( $input_file =~ /\.$file_extension$/ )
                  {
                  $good_ext = 'no';
                  }
            }
        }
else
        {
        print "Error! ext_usage not set correctly ($ext_usage).\n";
        exit;
        }

#...... modified above in v1.4

unless ( $good_ext eq 'yes' )
        {
        print LOG "File $input_file_long does not end in an approved extension. Skipping.\n";
        print "File $input_file_long does not end in an approved extension. Skipping.\n";
        return;
        }

if ( -d $input_file )
   {
   print LOG "Skipping $input_file_long (directory.)\n";
   print "Skipping $input_file_long (directory.)\n";
   return;
   }

unless ( -T $input_file )
        {
        print LOG "Skipping $input_file_long (NOT a TEXT File.)\n";
        print "Skipping $input_file_long (NOT a TEXT File.)\n";
        return;
        }

$backup_file = $input_file . '.BAK';

# get file info

@info         = stat($input_file);
$file_mode    = $info[2];
$file_uid     = $info[4];
$file_gid     = $info[5];
$permissions  = sprintf ("%04o", $file_mode & 07777);
$permissions  = oct($permissions);

# open input file
unless ( open(IN_FILE, "< $input_file" ))
    {
    print LOG "Error with input file ($input_file_long): $!\n";
    print "Error with input file ($input_file_long): $!\n";
    return;
    }

# open temp output file
unless ( open (OUT_FILE, ">$outfile"))
    {
    print LOG "Error with output file ($outfile_long): $!\n";
    print "Error with output file ($outfile_long): $!\n";
    return;
    }

$lines    = 0;
$replaces = 0;

print LOG "\nProcessing $input_file_long\n";
print "\nProcessing $input_file_long\n";

# file looping here
#............................

while (<IN_FILE>)
    {
    $line  = $_;
    $found = 'no';

    if ( $case_sensitive eq 'yes' )
        {
        if ( $line =~ /$search_string/ )
                {
                $found = 'yes';
                }
        }
    else
        {
        if ( $line =~ /$search_string/i )
                {
                $found = 'yes';
                }
        }

    if ( $found eq 'yes' )
        {
        $line_check = $line;

        if ( $prompt_replace eq 'yes' )
                {
                print LOG "\nPRIOR TO REPLACE:\n... [$line_check\]\n\n";
                print "\nPRIOR TO REPLACE:\n... [$line_check\]\n\n";

                $response = 'n';
                $response = &user_prompt('Do you wish to replace these occurrences?');

                if ( $response eq 'y' )
                        {
                        if ( $case_sensitive eq 'yes' )
                            {$line_check =~ s/$search_string/$replace_string/g;}
                        else
                            {$line_check =~ s/$search_string/$replace_string/gi;}

                        print LOG "\nAFTER REPLACE:\n... [$line_check\]\n\n";
                        print "\nAFTER REPLACE:\n... [$line_check\]\n\n";
        
                        if ( $double_check eq 'yes' )
                                {        
                                $response = 'n';
                                $response = &user_prompt('Was the replace done correctly?');
                                if ( $response eq 'y' )
                                        {
                                        $line = $line_check;
                                        $replaces++;
                                        }
                                else
                                        {
                                        print LOG "\nError in Replace. Line Replace Not Saved. Skipping.\n";
                                        print "\nError in Replace. Line Replace Not Saved. Skipping.\n";
                                        }
                                }
                        else
                                {
                                $line = $line_check;
                                $replaces++;
                                }
                        }
                } # prompt replace equals yes
        else
                {
                # prompt replace equals no, so just go ahead and replace it

                if ( $case_sensitive eq 'yes' )
                    {$line_check =~ s/$search_string/$replace_string/g;}
                else
                    {$line_check =~ s/$search_string/$replace_string/gi;}

                $line = $line_check;
                $replaces++;
                }

        } # line contains search string

    print OUT_FILE ($line);

    $lines++;
    }

$_ = $input_file;

# end of file looping
#............................     
     
close(IN_FILE);
close(OUT_FILE);

# rename files
# restore permissions and ownership

if ( $save_backups eq 'yes' )
        {
        rename($input_file, $backup_file) or print LOG "ALERT! can't rename $input_file_long to $backup_file: $!\n";
        chown ($file_uid, $file_gid, $backup_file) == 1 or print LOG "ALERT! can't chown $backup_file: $!\n";
        chmod ($permissions, $backup_file) == 1 or print LOG "ALERT! can't chmod $backup_file: $!\n";
        print LOG "Wrote backup to: $backup_file.\n";
        print "Wrote backup to: $backup_file.\n";
        }

rename($outfile,    $input_file)  or print LOG "ALERT! can't rename $outfile to $input_file_long: $!\n";

chown ($file_uid, $file_gid, $input_file) == 1 or print LOG "ALERT! can't chown $input_file_long: $!\n";
chmod ($permissions, $input_file) == 1 or print LOG "ALERT! can't chmod $input_file_long: $!\n";

# finish

if ($replaces > 0)
    {
    print LOG "Processed $replaces replaces of $search_string with $replace_string in $input_file_long.\n";
    print REPLACES_LOG "Processed $replaces replaces of $search_string with $replace_string in $input_file_long.\n";
    print "Processed $replaces replaces of $search_string with $replace_string in $input_file_long.\n";
    }
else
    {
    print LOG "No replaces done.\n";
    print "No replaces done.\n";
    }

$grand_total_replaces = $grand_total_replaces + $replaces;

print LOG "Lines processed: $lines\n\n";
print "Lines processed: $lines\n\n";

}
###########################################################################
# help_header
sub help_header
{
print qq~
$sr_header
        
Syntax: srep.cgi  -d ... or ... <include_file_name>
        (use setup vars in -d file (for 'DEFAULT' - '$default_include_file_name')
         or USER SPECIFIED FILE (either one must exist))

   ..or.. srep.cgi  <in_filename|CURDIR> <search_string> <replace_string>
          OPTIONAL: <prompt_replace,  yes|no DEFAULT: yes>
          OPTIONAL: <save_backups,    yes|no DEFAULT: yes>
          OPTIONAL: <temp_filename,   FILE.. DEFAULT: outfile.sr>
          OPTIONAL: <double_check,    yes|no DEFAULT: no>
          OPTIONAL: <case_sensitive,  yes|no DEFAULT: no>
          OPTIONAL: <extension_usage, include|exclude|ignore DEFAULT: include>
          OPTIONAL: <extension_file,  path/filename.ext|none DEFAULT: none>
Default Extensions: [ @file_extensions ]

NOTE: If you use ANY of the OPTIONAL parameters, you have to specify ALL of them.
For a fast UNPROMPTED replace of a directory tree, type:
"srep.cgi CURDIR 'SEARCHSTR' 'REPLACESTR' no no o.sr no no include none"

NOTE: 'case_sensitive' only applies to search string (not replace string).
NOTE: If <in_filename> equals 'CURDIR', then all the TEXT files in the
      current directory and all of its subdirectories will be processed.
NOTE: If save_backups is set to 'yes', then the input file will be copied
      to input_file.bak. In either case, the input file will be overwritten
      with the temp file, for 'in place' editing.
NOTE: If you specify a file containing 'file extensions', those extensions will
      replace the defaults. In that optional file, TYPE ONE EXTENSION PER LINE.

Note: You can use regular expressions in your s/r values. Using single or double
quotes may change your s/r values. To replace a Perl \$variable_name, place a \\
in front of the search \$ sign (not the replace string). BACK UP YOUR FILE FIRST!!!

~;

exit;

}

###########################################################################
# user_prompt
sub user_prompt
{

# syntax: $response = &user_prompt($message);

my ($message) = @_;
my $prompt = "false";
my $continue;

$message = $message . '("(y) yes", "(n) no")?' . "\n";

unless ( open(USER_PROMPT, "$console"))
    {
    print LOG "Can't open console: $!\n";
    print "Can't open console: $!\n";
    exit;
    }

#..............................

while ($prompt eq "false")
        {
        print $message;

        $continue = <USER_PROMPT>;
        chop $continue;
        $continue = lc($continue);
        
        if ($continue eq "y")
             {
             $prompt = "true";
             }
        elsif ($continue eq "n")
             {
             $prompt = "true";
             }
        else
             {
             $process = "false";
             }
        }
        close(USER_PROMPT);

return($continue);

}
###########################################################################

