MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # File: MergeTextFiles.pl
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2019 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use FindBin; use lib "$FindBin::Bin/../lib";
  28 use Getopt::Long;
  29 use File::Basename;
  30 use Text::ParseWords;
  31 use Benchmark;
  32 use FileHandle;
  33 use FileUtil;
  34 use TextUtil;
  35 
  36 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  37 
  38 # Autoflush STDOUT
  39 $| = 1;
  40 
  41 # Starting message...
  42 $ScriptName = basename $0;
  43 print "\n$ScriptName:Starting...\n\n";
  44 $StartTime = new Benchmark;
  45 
  46 # Get the options and setup script...
  47 SetupScriptUsage();
  48 if ($Options{help} || @ARGV < 1) {
  49   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  50 }
  51 
  52 my(@TextFilesList);
  53 @TextFilesList = ExpandFileNames(\@ARGV, "csv tsv");
  54 
  55 if (@TextFilesList == 1) {
  56   die "Error: Specify more than one text file.\n";
  57 }
  58 
  59 # Process options...
  60 print "Processing options...\n";
  61 my(%OptionsInfo);
  62 ProcessOptions();
  63 
  64 # Setup information about input files...
  65 my(%TextFilesInfo);
  66 print "Checking input text files...\n";
  67 RetrieveTextFilesInfo();
  68 RetrieveColumnsAndKeysInfo();
  69 
  70 # Merge files...
  71 print "\nGenerating new text file $OptionsInfo{NewTextFile}...\n";
  72 MergeTextFiles();
  73 
  74 print "\n$ScriptName:Done...\n\n";
  75 
  76 $EndTime = new Benchmark;
  77 $TotalTime = timediff ($EndTime, $StartTime);
  78 print "Total time: ", timestr($TotalTime), "\n";
  79 
  80 ###############################################################################
  81 
  82 # Merge all valid Text files...
  83 sub MergeTextFiles {
  84   my($Index);
  85 
  86   open NEWTEXTFILE, ">$OptionsInfo{NewTextFile}" or die "Error: Couldn't open $OptionsInfo{NewTextFile}: $! \n";
  87 
  88   WriteNewTextFileColumnLabels(\*NEWTEXTFILE);
  89 
  90   #Open up all the files and skip coumn label line...
  91   @{$TextFilesInfo{FileHandle}} = ();
  92   for $Index (0 .. $#TextFilesList) {
  93     $TextFilesInfo{FileHandle}[$Index] = new FileHandle;
  94 
  95     open $TextFilesInfo{FileHandle}[$Index], "$TextFilesList[$Index]" or die "Error: Couldn't open $TextFilesList[$Index]: $! \n";
  96     GetTextLine($TextFilesInfo{FileHandle}[$Index]);
  97   }
  98 
  99   # Merge files...
 100   if ($OptionsInfo{Keys}) {
 101     MergeColumnValuesUsingKeys(\*NEWTEXTFILE);
 102   }
 103   else {
 104     MergeColumnValues(\*NEWTEXTFILE);
 105   }
 106 
 107   # Close all opened files...
 108   close NEWTEXTFILE;
 109   for $Index (0 .. $#TextFilesList) {
 110     close $TextFilesInfo{FileHandle}[$Index];
 111   }
 112 
 113 }
 114 
 115 # Merge all the column values...
 116 sub MergeColumnValues {
 117   my($NewTextFileRef) = @_;
 118   my($Index, $Line, $InDelim, $Value, $ColNum, @LineWords, @File1LineWords, @ColValues);
 119 
 120   while ($Line = GetTextLine($TextFilesInfo{FileHandle}[0])) {
 121     $InDelim = $TextFilesInfo{InDelim}[0];
 122     @ColValues = ();
 123 
 124     #Collect column values from first file before the merge point...
 125     @File1LineWords = quotewords($InDelim, 0, $Line);
 126     for $ColNum (@{$TextFilesInfo{File1Part1ColNums}}) {
 127       $Value = ($ColNum < @File1LineWords) ? $File1LineWords[$ColNum] : "";
 128       push @ColValues, $Value;
 129     }
 130 
 131     #Collect column values from other text files...
 132     for $Index (1 .. $#TextFilesList) {
 133       $InDelim = $TextFilesInfo{InDelim}[$Index];
 134       if ($Line = GetTextLine($TextFilesInfo{FileHandle}[$Index])) {
 135         @LineWords = quotewords($InDelim, 0, $Line);
 136         for $ColNum (@{$TextFilesInfo{ColToMerge}[$Index]}) {
 137           $Value = ($ColNum < @LineWords) ? $LineWords[$ColNum] : "";
 138           push @ColValues, $Value;
 139         }
 140       }
 141     }
 142 
 143     #Collect column labels from first file after the merge point...
 144     for $ColNum (@{$TextFilesInfo{File1Part2ColNums}}) {
 145       $Value = ($ColNum < @File1LineWords) ? $File1LineWords[$ColNum] : "";
 146       push @ColValues, $Value;
 147     }
 148 
 149     # Write it out...
 150     $Line = JoinWords(\@ColValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 151     print $NewTextFileRef "$Line\n";
 152   }
 153 
 154 }
 155 
 156 # Merge column values using keys...
 157 sub MergeColumnValuesUsingKeys {
 158   my($NewTextFileRef) = @_;
 159   my($Index, $InDelim, $Line, $Value, $ColNum, $KeyColNum, $KeyColValue, @LineWords, @ColValues, @File1LineWords, @TextFilesKeysToLinesMap);
 160 
 161   @TextFilesKeysToLinesMap = ();
 162 
 163   # Retrieve text lines from all the files except for the first file...
 164   for $Index (1 .. $#TextFilesList) {
 165     %{$TextFilesKeysToLinesMap[$Index]} = ();
 166 
 167     $InDelim = $TextFilesInfo{InDelim}[$Index];
 168     $KeyColNum = $TextFilesInfo{KeysToUse}[$Index];
 169 
 170     while ($Line = GetTextLine($TextFilesInfo{FileHandle}[$Index])) {
 171       @LineWords = quotewords($InDelim, 0, $Line);
 172       if ($KeyColNum < @LineWords) {
 173         $KeyColValue = $LineWords[$KeyColNum];
 174         if (length($KeyColValue)) {
 175           if (exists($TextFilesKeysToLinesMap[$Index]{$KeyColValue})) {
 176             warn "Warning: Ignoring line, $Line, in text file $TextFilesList[$Index]: Column key value, $KeyColValue, already exists\n";
 177           }
 178           else {
 179             @{$TextFilesKeysToLinesMap[$Index]{$KeyColValue}} = ();
 180             push @{$TextFilesKeysToLinesMap[$Index]{$KeyColValue}}, @LineWords;
 181           }
 182         }
 183       }
 184     }
 185   }
 186 
 187   while ($Line = GetTextLine($TextFilesInfo{FileHandle}[0])) {
 188     $InDelim = $TextFilesInfo{InDelim}[0];
 189 
 190     @ColValues = ();
 191     @File1LineWords = quotewords($InDelim, 0, $Line);
 192 
 193     $KeyColNum = $TextFilesInfo{KeysToUse}[0];
 194     $KeyColValue = $File1LineWords[$KeyColNum];
 195 
 196     #Collect column values from first file before the merge point...
 197     for $ColNum (@{$TextFilesInfo{File1Part1ColNums}}) {
 198       $Value = ($ColNum < @File1LineWords) ? $File1LineWords[$ColNum] : "";
 199       push @ColValues, $Value;
 200     }
 201 
 202     #Collect column values from other text files...
 203     for $Index (1 .. $#TextFilesList) {
 204       @LineWords = ();
 205       if (exists($TextFilesKeysToLinesMap[$Index]{$KeyColValue})) {
 206         push @LineWords, @{$TextFilesKeysToLinesMap[$Index]{$KeyColValue}};
 207       }
 208       for $ColNum (@{$TextFilesInfo{ColToMerge}[$Index]}) {
 209         $Value = ($ColNum < @LineWords) ? $LineWords[$ColNum] : "";
 210         push @ColValues, $Value;
 211       }
 212     }
 213 
 214     #Collect column labels from first file after the merge point...
 215     for $ColNum (@{$TextFilesInfo{File1Part2ColNums}}) {
 216       $Value = ($ColNum < @File1LineWords) ? $File1LineWords[$ColNum] : "";
 217       push @ColValues, $Value;
 218     }
 219 
 220     # Write it out...
 221     $Line = JoinWords(\@ColValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 222     print $NewTextFileRef "$Line\n";
 223   }
 224 
 225 }
 226 
 227 # Write out column labels for new merged text file...
 228 sub WriteNewTextFileColumnLabels {
 229   my($NewTextFileRef) = @_;
 230   my($Index, $Line, $ColNum, @ColLabels);
 231 
 232   #Write out column labels for the merged text file...
 233   @ColLabels = ();
 234 
 235   #Collect column labels from first file before the merge point...
 236   for $ColNum (@{$TextFilesInfo{File1Part1ColNums}}) {
 237     push @ColLabels, $TextFilesInfo{ColToMergeNumToLabelMap}[0]{$ColNum};
 238   }
 239 
 240   #Collect column labels from other text files...
 241   for $Index (1 .. $#TextFilesList) {
 242     for $ColNum (@{$TextFilesInfo{ColToMerge}[$Index]}) {
 243       push @ColLabels, $TextFilesInfo{ColToMergeNumToLabelMap}[$Index]{$ColNum};
 244     }
 245   }
 246 
 247   #Collect column labels from first file after the merge point...
 248   for $ColNum (@{$TextFilesInfo{File1Part2ColNums}}) {
 249     push @ColLabels, $TextFilesInfo{ColToMergeNumToLabelMap}[0]{$ColNum};
 250   }
 251 
 252   #Write it out...
 253   $Line = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 254   print NEWTEXTFILE "$Line\n";
 255 }
 256 
 257 # Retrieve text file columns and keys information for specified options...
 258 sub RetrieveColumnsAndKeysInfo {
 259   ProcessColumnsInfo();
 260 
 261   if ($OptionsInfo{Keys}) {
 262     ProcessKeysInfo();
 263   }
 264 
 265   ProcessStartColInfo();
 266 }
 267 
 268 # Process specified columns...
 269 sub ProcessColumnsInfo {
 270   my($Index, $SpecifiedColNum, $Values, $ColIndex, $ColNum, $ColLabel, @Words);
 271 
 272   @{$TextFilesInfo{ColSpecified}} = ();
 273   @{$TextFilesInfo{ColToMerge}} = ();
 274   @{$TextFilesInfo{ColToMergeNumToLabelMap}} = ();
 275 
 276   for $Index (0 .. $#TextFilesList) {
 277 
 278     @{$TextFilesInfo{ColSpecified}[$Index]} = ();
 279 
 280     $Values = "all";
 281     if ($OptionsInfo{Columns}) {
 282       $Values = $OptionsInfo{ColValues}[$Index];
 283     }
 284 
 285     if ($Values =~ /all/i) {
 286       if ($OptionsInfo{Mode} =~ /^colnum$/i) {
 287         for $ColNum (1 .. $TextFilesInfo{ColCount}[$Index]) {
 288           push @{$TextFilesInfo{ColSpecified}[$Index]}, $ColNum;
 289         }
 290       }
 291       else {
 292         push @{$TextFilesInfo{ColSpecified}[$Index]}, @{$TextFilesInfo{ColLabels}[$Index]};
 293       }
 294     }
 295     else {
 296       @Words = split ",", $Values;
 297       push @{$TextFilesInfo{ColSpecified}[$Index]}, @Words;
 298     }
 299 
 300     @{$TextFilesInfo{ColToMerge}[$Index]} = ();
 301     %{$TextFilesInfo{ColToMergeNumToLabelMap}[$Index]} = ();
 302 
 303     if ($OptionsInfo{Mode} =~ /^collabel$/i) {
 304       for $ColIndex (0 .. $#{$TextFilesInfo{ColSpecified}[$Index]}) {
 305         $ColLabel = $TextFilesInfo{ColSpecified}[$Index][$ColIndex];
 306         if (exists($TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel})) {
 307           $ColNum = $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel};
 308           push @{$TextFilesInfo{ColToMerge}[$Index]}, $ColNum;
 309           $TextFilesInfo{ColToMergeNumToLabelMap}[$Index]{$ColNum} = $ColLabel;
 310         }
 311         else {
 312           warn "Warning: Ignoring value, $ColLabel, specified using \"-c --column\" option: column name doesn't exist in  $TextFilesList[$Index]  \n";
 313         }
 314       }
 315     }
 316     else {
 317       for $ColIndex (0 .. $#{$TextFilesInfo{ColSpecified}[$Index]}) {
 318         $SpecifiedColNum = $TextFilesInfo{ColSpecified}[$Index][$ColIndex];
 319         if ($SpecifiedColNum > 0 && $SpecifiedColNum <= $TextFilesInfo{ColCount}[$Index]) {
 320           $ColNum = $SpecifiedColNum - 1;
 321           push @{$TextFilesInfo{ColToMerge}[$Index]}, $ColNum;
 322           $TextFilesInfo{ColToMergeNumToLabelMap}[$Index]{$ColNum} = $TextFilesInfo{ColLabels}[$Index][$ColNum];
 323         }
 324         else {
 325           warn "Warning: Ignoring value, $SpecifiedColNum, specified using \"-c --column\" option: column number doesn't exist in  $TextFilesList[$Index]  \n";
 326         }
 327       }
 328     }
 329     my (@ColToMergeSorted) = sort { $a <=> $b } @{$TextFilesInfo{ColToMerge}[$Index]};
 330     @{$TextFilesInfo{ColToMerge}[$Index]} = ();
 331     push @{$TextFilesInfo{ColToMerge}[$Index]}, @ColToMergeSorted;
 332   }
 333 }
 334 
 335 # Process specified key column values...
 336 sub ProcessKeysInfo {
 337   my($Index, $Key, $ColLabel, $ColNum);
 338 
 339   @{$TextFilesInfo{KeysSpecified}} = ();
 340   @{$TextFilesInfo{KeysToUse}} = ();
 341 
 342   for $Index (0 .. $#TextFilesList) {
 343     $Key = $OptionsInfo{KeyValues}[$Index];
 344 
 345     $TextFilesInfo{KeysSpecified}[$Index] = $Key;
 346     $TextFilesInfo{KeysToUse}[$Index] = -1;
 347 
 348     if ($OptionsInfo{Mode} =~ /^collabel$/i) {
 349       $ColLabel = $Key;
 350       if (exists($TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel})) {
 351         $TextFilesInfo{KeysToUse}[$Index] =  $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel};
 352       }
 353       else {
 354         warn "Warning: Ignoring value, $ColLabel, specified using \"-k --keys\" option: column name doesn't exist in  $TextFilesList[$Index]  \n";
 355       }
 356     }
 357     else {
 358       $ColNum = $Key;
 359       if ($ColNum > 0 && $ColNum <= $TextFilesInfo{ColCount}[$Index]) {
 360         $TextFilesInfo{KeysToUse}[$Index] = $ColNum - 1;
 361       }
 362       else {
 363         warn "Warning: Ignoring value, $ColNum, specified using \"-k --keys\" option: column number doesn't exist in  $TextFilesList[$Index]  \n";
 364       }
 365     }
 366   }
 367 
 368   # Modify columns to merge list to make sure the columns identified by key are taken off the list
 369   # except for the first text file...
 370   my(@ColToMergeFiltered);
 371 
 372   for $Index (1 .. $#TextFilesList) {
 373     @ColToMergeFiltered = ();
 374     for $ColNum (@{$TextFilesInfo{ColToMerge}[$Index]}) {
 375       if ($TextFilesInfo{KeysToUse}[$Index] != $ColNum) {
 376         push @ColToMergeFiltered, $ColNum;
 377       }
 378     }
 379     @{$TextFilesInfo{ColToMerge}[$Index]} = ();
 380     push @{$TextFilesInfo{ColToMerge}[$Index]}, @ColToMergeFiltered;
 381   }
 382 }
 383 
 384 # Process specified start column value...
 385 sub ProcessStartColInfo {
 386   my($Index, $ColIndex, $ColNum, $StartColNum, $Part1StartColNum, $Part1EndColNum, $Part2StartColNum, $Part2EndColNum, $BeforeStartColNum, $AfterStartColNum, $FirstColNum, $LastColNum, $FirstIndex, $LastIndex);
 387 
 388   @{$TextFilesInfo{File1Part1ColNums}} = ();
 389   @{$TextFilesInfo{File1Part2ColNums}} = ();
 390 
 391   $StartColNum = "last";
 392   if ($OptionsInfo{StartCol}) {
 393     if (length($OptionsInfo{StartCol})) {
 394       $StartColNum = $OptionsInfo{StartCol}
 395     }
 396   }
 397 
 398   if ($StartColNum !~ /^last$/i) {
 399     if ($OptionsInfo{Mode} =~ /^collabel$/i) {
 400       if (exists($TextFilesInfo{ColLabelToNumMap}[0]{$StartColNum})) {
 401         $StartColNum = $TextFilesInfo{ColLabelToNumMap}[0]{$StartColNum};
 402       }
 403       else {
 404         die "Error: Invalid value $StartColNum specified using \"-s --startcol\" option: column name doesn't exist in  $TextFilesList[0]  \n";
 405       }
 406     }
 407     else {
 408       if ($StartColNum > 0 && $StartColNum <= $TextFilesInfo{ColCount}[0]) {
 409         $StartColNum -= 1;
 410       }
 411       else {
 412         die "Error: Invalid value $StartColNum specified using \"-s --startcol\" option: column number doesn't exist in  $TextFilesList[0]  \n";
 413       }
 414     }
 415   }
 416   else {
 417     $StartColNum = $TextFilesInfo{ColCount}[0] - 1;
 418   }
 419 
 420   # Make sure StartColNum is present on the list of columns to merge for the first text file...
 421   if (!exists($TextFilesInfo{ColToMergeNumToLabelMap}[0]{$StartColNum})) {
 422     die "Error: Invalid value $StartColNum specified using \"-s --startcol\" option: doesn't exist in the specified lists of columns to merge for  $TextFilesList[0]  \n";
 423   }
 424 
 425   # Find out the column number before and after StartColNum in first text file...
 426   $BeforeStartColNum = $StartColNum;
 427   $AfterStartColNum = $StartColNum;
 428 
 429   $FirstIndex = 0; $LastIndex = $#{$TextFilesInfo{ColToMerge}[0]};
 430 
 431   $FirstColNum = $TextFilesInfo{ColToMerge}[0][$FirstIndex];
 432   $LastColNum = $TextFilesInfo{ColToMerge}[0][$LastIndex];
 433 
 434   for $Index (0 .. $LastIndex) {
 435     if ($TextFilesInfo{ColToMerge}[0][$Index] == $StartColNum) {
 436       $BeforeStartColNum = (($Index -1) >= $FirstIndex) ? $TextFilesInfo{ColToMerge}[0][$Index - 1] : ($FirstColNum - 1);
 437       $AfterStartColNum = (($Index + 1) <= $LastIndex) ? $TextFilesInfo{ColToMerge}[0][$Index + 1] : ($LastColNum + 1);
 438     }
 439   }
 440 
 441   if ($OptionsInfo{StartColMode} =~ /^after$/i) {
 442     $Part1StartColNum = $FirstColNum; $Part1EndColNum = $StartColNum;
 443     $Part2StartColNum = $AfterStartColNum; $Part2EndColNum = $LastColNum;
 444   }
 445   else {
 446     $Part1StartColNum = $FirstColNum; $Part1EndColNum = $BeforeStartColNum;
 447     $Part2StartColNum = $StartColNum; $Part2EndColNum = $LastColNum;
 448   }
 449 
 450   @{$TextFilesInfo{File1Part1ColNums}} = ();
 451   @{$TextFilesInfo{File1Part2ColNums}} = ();
 452 
 453   for $ColIndex (0 .. $#{$TextFilesInfo{ColToMerge}[0]}) {
 454     $ColNum = $TextFilesInfo{ColToMerge}[0][$ColIndex];
 455     if ($ColNum >= $Part1StartColNum && $ColNum <= $Part1EndColNum) {
 456       push @{$TextFilesInfo{File1Part1ColNums}}, $ColNum;
 457     }
 458   }
 459 
 460   for $ColIndex (0 .. $#{$TextFilesInfo{ColToMerge}[0]}) {
 461     $ColNum = $TextFilesInfo{ColToMerge}[0][$ColIndex];
 462     if ($ColNum >= $Part2StartColNum && $ColNum <= $Part2EndColNum) {
 463       push @{$TextFilesInfo{File1Part2ColNums}}, $ColNum;
 464     }
 465   }
 466 
 467 }
 468 
 469 # Retrieve information about input text files...
 470 sub RetrieveTextFilesInfo {
 471   my($Index, $TextFile, $FileDir, $FileName, $FileExt, $InDelim, $Line, $ColNum, $ColLabel, $FileNotOkayCount, @ColLabels,);
 472 
 473   %TextFilesInfo = ();
 474 
 475   @{$TextFilesInfo{FileOkay}} = ();
 476   @{$TextFilesInfo{ColCount}} = ();
 477   @{$TextFilesInfo{ColLabels}} = ();
 478   @{$TextFilesInfo{ColLabelToNumMap}} = ();
 479   @{$TextFilesInfo{InDelim}} = ();
 480 
 481   $FileNotOkayCount = 0;
 482 
 483   FILELIST: for $Index (0 .. $#TextFilesList) {
 484     $TextFile = $TextFilesList[$Index];
 485 
 486     $TextFilesInfo{FileOkay}[$Index] = 0;
 487     $TextFilesInfo{ColCount}[$Index] = 0;
 488     $TextFilesInfo{InDelim}[$Index] = "";
 489 
 490     @{$TextFilesInfo{ColLabels}[$Index]} = ();
 491     %{$TextFilesInfo{ColLabelToNumMap}[$Index]} = ();
 492 
 493     if (!(-e $TextFile)) {
 494       warn "Warning: Ignoring file $TextFile: It doesn't exist\n";
 495       $FileNotOkayCount++;
 496       next FILELIST;
 497     }
 498     if (!CheckFileType($TextFile, "csv tsv")) {
 499       warn "Warning: Ignoring file $TextFile: It's not a csv or tsv file\n";
 500       $FileNotOkayCount++;
 501       next FILELIST;
 502     }
 503     ($FileDir, $FileName, $FileExt) = ParseFileName($TextFile);
 504     if ($FileExt =~ /^tsv$/i) {
 505       $InDelim = "\t";
 506     }
 507     else {
 508       $InDelim = "\,";
 509       if ($OptionsInfo{InDelim} !~ /^(comma|semicolon)$/i) {
 510         warn "Warning: Ignoring file $TextFile: The value specified, $OptionsInfo{InDelim}, for option \"--indelim\" is not valid for csv files\n";
 511         $FileNotOkayCount++;
 512         next FILELIST;
 513       }
 514       if ($OptionsInfo{InDelim} =~ /^semicolon$/i) {
 515         $InDelim = "\;";
 516       }
 517     }
 518 
 519     if (!open TEXTFILE, "$TextFile") {
 520       warn "Warning: Ignoring file $TextFile: Couldn't open it: $! \n";
 521       $FileNotOkayCount++;
 522       next FILELIST;
 523     }
 524 
 525     $Line = GetTextLine(\*TEXTFILE);
 526     @ColLabels = quotewords($InDelim, 0, $Line);
 527     close TEXTFILE;
 528 
 529     $TextFilesInfo{FileOkay}[$Index] = 1;
 530     $TextFilesInfo{InDelim}[$Index] = $InDelim;
 531 
 532     $TextFilesInfo{ColCount}[$Index] = @ColLabels;
 533     push @{$TextFilesInfo{ColLabels}[$Index]}, @ColLabels;
 534     for $ColNum (0 .. $#ColLabels) {
 535       $ColLabel = $ColLabels[$ColNum];
 536       $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel} = $ColNum;
 537     }
 538   }
 539   # Make sure all specified files are valid for merging to work properly...
 540   if ($FileNotOkayCount) {
 541     die "Error: Problems with input text file(s)...\n";
 542   }
 543 }
 544 
 545 # Process option values...
 546 sub ProcessOptions {
 547   my($Index, $FileDir, $FileName, $FileExt, $NewTextFile, @ColValues, @KeyValues);
 548 
 549   %OptionsInfo = ();
 550 
 551   $OptionsInfo{Mode} = $Options{mode};
 552 
 553   $OptionsInfo{Columns} = $Options{columns};
 554   @{$OptionsInfo{ColValues}} = ();
 555 
 556   if ($Options{columns}) {
 557     @ColValues = split ";", $Options{columns};
 558     if (@ColValues != @TextFilesList) {
 559       die "Error: Invalid number of values specified by \"-c --columns\" option: it must be equal to number of input text files.\n";
 560     }
 561     for $Index (0 .. $#ColValues) {
 562       if (!length($ColValues[$Index])) {
 563         die "Error: Invalid value specified by \"-c --columns\" option: empty values are not allowed.\n";
 564       }
 565     }
 566     @{$OptionsInfo{ColValues}} = @ColValues;
 567   }
 568 
 569   $OptionsInfo{Keys} = $Options{keys};
 570   @{$OptionsInfo{KeyValues}} = ();
 571 
 572   if ($Options{keys}) {
 573     @KeyValues = split ";", $Options{keys};
 574     if (@KeyValues != @TextFilesList) {
 575       die "Error: Invalid number of values specified by \"-k --keys\" option: it must be equal to number of input text files.\n";
 576     }
 577     for $Index (0 .. $#KeyValues) {
 578       if (!length($KeyValues[$Index])) {
 579         die "Error: Invalid value specified by \"-k --keys\" option: empty values are not allowed.\n";
 580       }
 581     }
 582     @{$OptionsInfo{KeyValues}} = @KeyValues;
 583   }
 584 
 585   $OptionsInfo{InDelim} = $Options{indelim};
 586 
 587   $OptionsInfo{StartCol} = $Options{startcol} ? $Options{startcol} : undef;
 588   $OptionsInfo{StartColMode} = $Options{startcolmode};
 589 
 590   $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : undef;
 591   $OptionsInfo{Overwrite} = $Options{overwrite} ? $Options{overwrite} : undef;
 592 
 593   $OptionsInfo{OutDelim} = ($Options{outdelim} =~ /^tab$/i ) ? "\t" : (($Options{outdelim} =~ /^semicolon$/i) ? "\;" : "\,");
 594   $OptionsInfo{OutQuote} = ($Options{quote} =~ /^yes$/i) ? 1 : 0;
 595 
 596   if ($Options{root}) {
 597     $FileDir = ""; $FileName = ""; $FileExt = "";
 598     ($FileDir, $FileName, $FileExt) = ParseFileName($Options{root});
 599     if ($FileName && $FileExt) {
 600       $NewTextFile = $FileName;
 601     } else {
 602       $NewTextFile =  $Options{root};
 603     }
 604   } else {
 605     $FileDir = ""; $FileName = ""; $FileExt = "";
 606     ($FileDir, $FileName, $FileExt) = ParseFileName($TextFilesList[0]);
 607     $NewTextFile = $FileName . "1To" . @TextFilesList . "Merged";
 608   }
 609   if ($Options{outdelim} =~ /^tab$/i) {
 610     $NewTextFile .= ".tsv";
 611   } else {
 612     $NewTextFile .= ".csv";
 613   }
 614   if (!$Options{overwrite}) {
 615     if (-e $NewTextFile) {
 616       die "Error: The file $NewTextFile already exists.\n";
 617     }
 618   }
 619   if ($Options{root}) {
 620     for $Index (0 .. $#TextFilesList) {
 621       if (lc($NewTextFile) eq lc($TextFilesList[$Index])) {
 622         die "Error: Output filename, $NewTextFile, is similar to a input file name.\nSpecify a different name using \"-r --root\" option or use default name.\n";
 623       }
 624     }
 625   }
 626 
 627   $OptionsInfo{NewTextFile} = $NewTextFile;
 628 }
 629 
 630 # Setup script usage  and retrieve command line arguments specified using various options...
 631 sub SetupScriptUsage {
 632 
 633   # Retrieve all the options...
 634   %Options = ();
 635 
 636   $Options{mode} = "colnum";
 637   $Options{indelim} = "comma";
 638   $Options{outdelim} = "comma";
 639   $Options{quote} = "yes";
 640   $Options{startcolmode} = "after";
 641 
 642   if (!GetOptions(\%Options, "help|h", "indelim=s", "columns|c=s", "keys|k=s", "mode|m=s", "outdelim=s", "overwrite|o", "quote|q=s", "root|r=s", "startcol|s=s", "startcolmode=s", "workingdir|w=s")) {
 643     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
 644   }
 645   if ($Options{workingdir}) {
 646     if (! -d $Options{workingdir}) {
 647       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
 648     }
 649     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
 650   }
 651   if ($Options{mode} !~ /^(colnum|collabel)$/i) {
 652     die "Error: The value specified, $Options{mode}, for option \"-m --mode\" is not valid. Allowed values: colnum, or collabel\n";
 653   }
 654   if ($Options{indelim} !~ /^(comma|semicolon)$/i) {
 655     die "Error: The value specified, $Options{indelim}, for option \"--indelim\" is not valid. Allowed values: comma or semicolon\n";
 656   }
 657   if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
 658     die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
 659   }
 660   if ($Options{quote} !~ /^(yes|no)$/i) {
 661     die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n";
 662   }
 663   if ($Options{startcolmode} !~ /^(before|after)$/i) {
 664     die "Error: The value specified, $Options{quote}, for option \"--startcolmode\" is not valid. Allowed values: before or after\n";
 665   }
 666 }
 667