1 #!/usr/bin/perl -w 2 # 3 # File: MergeTextFiles.pl 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use FindBin; use lib "$FindBin::Bin/../lib"; 28 use Getopt::Long; 29 use File::Basename; 30 use Text::ParseWords; 31 use Benchmark; 32 use FileHandle; 33 use FileUtil; 34 use TextUtil; 35 36 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); 37 38 # Autoflush STDOUT 39 $| = 1; 40 41 # Starting message... 42 $ScriptName = basename $0; 43 print "\n$ScriptName:Starting...\n\n"; 44 $StartTime = new Benchmark; 45 46 # Get the options and setup script... 47 SetupScriptUsage(); 48 if ($Options{help} || @ARGV < 1) { 49 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); 50 } 51 52 my(@TextFilesList); 53 @TextFilesList = ExpandFileNames(\@ARGV, "csv tsv"); 54 55 if (@TextFilesList == 1) { 56 die "Error: Specify more than one text file.\n"; 57 } 58 59 # Process options... 60 print "Processing options...\n"; 61 my(%OptionsInfo); 62 ProcessOptions(); 63 64 # Setup information about input files... 65 my(%TextFilesInfo); 66 print "Checking input text files...\n"; 67 RetrieveTextFilesInfo(); 68 RetrieveColumnsAndKeysInfo(); 69 70 # Merge files... 71 print "\nGenerating new text file $OptionsInfo{NewTextFile}...\n"; 72 MergeTextFiles(); 73 74 print "\n$ScriptName:Done...\n\n"; 75 76 $EndTime = new Benchmark; 77 $TotalTime = timediff ($EndTime, $StartTime); 78 print "Total time: ", timestr($TotalTime), "\n"; 79 80 ############################################################################### 81 82 # Merge all valid Text files... 83 sub MergeTextFiles { 84 my($Index); 85 86 open NEWTEXTFILE, ">$OptionsInfo{NewTextFile}" or die "Error: Couldn't open $OptionsInfo{NewTextFile}: $! \n"; 87 88 WriteNewTextFileColumnLabels(\*NEWTEXTFILE); 89 90 #Open up all the files and skip coumn label line... 91 @{$TextFilesInfo{FileHandle}} = (); 92 for $Index (0 .. $#TextFilesList) { 93 $TextFilesInfo{FileHandle}[$Index] = new FileHandle; 94 95 open $TextFilesInfo{FileHandle}[$Index], "$TextFilesList[$Index]" or die "Error: Couldn't open $TextFilesList[$Index]: $! \n"; 96 GetTextLine($TextFilesInfo{FileHandle}[$Index]); 97 } 98 99 # Merge files... 100 if ($OptionsInfo{Keys}) { 101 MergeColumnValuesUsingKeys(\*NEWTEXTFILE); 102 } 103 else { 104 MergeColumnValues(\*NEWTEXTFILE); 105 } 106 107 # Close all opened files... 108 close NEWTEXTFILE; 109 for $Index (0 .. $#TextFilesList) { 110 close $TextFilesInfo{FileHandle}[$Index]; 111 } 112 113 } 114 115 # Merge all the column values... 116 sub MergeColumnValues { 117 my($NewTextFileRef) = @_; 118 my($Index, $Line, $InDelim, $Value, $ColNum, @LineWords, @File1LineWords, @ColValues); 119 120 while ($Line = GetTextLine($TextFilesInfo{FileHandle}[0])) { 121 $InDelim = $TextFilesInfo{InDelim}[0]; 122 @ColValues = (); 123 124 #Collect column values from first file before the merge point... 125 @File1LineWords = quotewords($InDelim, 0, $Line); 126 for $ColNum (@{$TextFilesInfo{File1Part1ColNums}}) { 127 $Value = ($ColNum < @File1LineWords) ? $File1LineWords[$ColNum] : ""; 128 push @ColValues, $Value; 129 } 130 131 #Collect column values from other text files... 132 for $Index (1 .. $#TextFilesList) { 133 $InDelim = $TextFilesInfo{InDelim}[$Index]; 134 if ($Line = GetTextLine($TextFilesInfo{FileHandle}[$Index])) { 135 @LineWords = quotewords($InDelim, 0, $Line); 136 for $ColNum (@{$TextFilesInfo{ColToMerge}[$Index]}) { 137 $Value = ($ColNum < @LineWords) ? $LineWords[$ColNum] : ""; 138 push @ColValues, $Value; 139 } 140 } 141 } 142 143 #Collect column labels from first file after the merge point... 144 for $ColNum (@{$TextFilesInfo{File1Part2ColNums}}) { 145 $Value = ($ColNum < @File1LineWords) ? $File1LineWords[$ColNum] : ""; 146 push @ColValues, $Value; 147 } 148 149 # Write it out... 150 $Line = JoinWords(\@ColValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote}); 151 print $NewTextFileRef "$Line\n"; 152 } 153 154 } 155 156 # Merge column values using keys... 157 sub MergeColumnValuesUsingKeys { 158 my($NewTextFileRef) = @_; 159 my($Index, $InDelim, $Line, $Value, $ColNum, $KeyColNum, $KeyColValue, @LineWords, @ColValues, @File1LineWords, @TextFilesKeysToLinesMap); 160 161 @TextFilesKeysToLinesMap = (); 162 163 # Retrieve text lines from all the files except for the first file... 164 for $Index (1 .. $#TextFilesList) { 165 %{$TextFilesKeysToLinesMap[$Index]} = (); 166 167 $InDelim = $TextFilesInfo{InDelim}[$Index]; 168 $KeyColNum = $TextFilesInfo{KeysToUse}[$Index]; 169 170 while ($Line = GetTextLine($TextFilesInfo{FileHandle}[$Index])) { 171 @LineWords = quotewords($InDelim, 0, $Line); 172 if ($KeyColNum < @LineWords) { 173 $KeyColValue = $LineWords[$KeyColNum]; 174 if (length($KeyColValue)) { 175 if (exists($TextFilesKeysToLinesMap[$Index]{$KeyColValue})) { 176 warn "Warning: Ignoring line, $Line, in text file $TextFilesList[$Index]: Column key value, $KeyColValue, already exists\n"; 177 } 178 else { 179 @{$TextFilesKeysToLinesMap[$Index]{$KeyColValue}} = (); 180 push @{$TextFilesKeysToLinesMap[$Index]{$KeyColValue}}, @LineWords; 181 } 182 } 183 } 184 } 185 } 186 187 while ($Line = GetTextLine($TextFilesInfo{FileHandle}[0])) { 188 $InDelim = $TextFilesInfo{InDelim}[0]; 189 190 @ColValues = (); 191 @File1LineWords = quotewords($InDelim, 0, $Line); 192 193 $KeyColNum = $TextFilesInfo{KeysToUse}[0]; 194 $KeyColValue = $File1LineWords[$KeyColNum]; 195 196 #Collect column values from first file before the merge point... 197 for $ColNum (@{$TextFilesInfo{File1Part1ColNums}}) { 198 $Value = ($ColNum < @File1LineWords) ? $File1LineWords[$ColNum] : ""; 199 push @ColValues, $Value; 200 } 201 202 #Collect column values from other text files... 203 for $Index (1 .. $#TextFilesList) { 204 @LineWords = (); 205 if (exists($TextFilesKeysToLinesMap[$Index]{$KeyColValue})) { 206 push @LineWords, @{$TextFilesKeysToLinesMap[$Index]{$KeyColValue}}; 207 } 208 for $ColNum (@{$TextFilesInfo{ColToMerge}[$Index]}) { 209 $Value = ($ColNum < @LineWords) ? $LineWords[$ColNum] : ""; 210 push @ColValues, $Value; 211 } 212 } 213 214 #Collect column labels from first file after the merge point... 215 for $ColNum (@{$TextFilesInfo{File1Part2ColNums}}) { 216 $Value = ($ColNum < @File1LineWords) ? $File1LineWords[$ColNum] : ""; 217 push @ColValues, $Value; 218 } 219 220 # Write it out... 221 $Line = JoinWords(\@ColValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote}); 222 print $NewTextFileRef "$Line\n"; 223 } 224 225 } 226 227 # Write out column labels for new merged text file... 228 sub WriteNewTextFileColumnLabels { 229 my($NewTextFileRef) = @_; 230 my($Index, $Line, $ColNum, @ColLabels); 231 232 #Write out column labels for the merged text file... 233 @ColLabels = (); 234 235 #Collect column labels from first file before the merge point... 236 for $ColNum (@{$TextFilesInfo{File1Part1ColNums}}) { 237 push @ColLabels, $TextFilesInfo{ColToMergeNumToLabelMap}[0]{$ColNum}; 238 } 239 240 #Collect column labels from other text files... 241 for $Index (1 .. $#TextFilesList) { 242 for $ColNum (@{$TextFilesInfo{ColToMerge}[$Index]}) { 243 push @ColLabels, $TextFilesInfo{ColToMergeNumToLabelMap}[$Index]{$ColNum}; 244 } 245 } 246 247 #Collect column labels from first file after the merge point... 248 for $ColNum (@{$TextFilesInfo{File1Part2ColNums}}) { 249 push @ColLabels, $TextFilesInfo{ColToMergeNumToLabelMap}[0]{$ColNum}; 250 } 251 252 #Write it out... 253 $Line = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote}); 254 print NEWTEXTFILE "$Line\n"; 255 } 256 257 # Retrieve text file columns and keys information for specified options... 258 sub RetrieveColumnsAndKeysInfo { 259 ProcessColumnsInfo(); 260 261 if ($OptionsInfo{Keys}) { 262 ProcessKeysInfo(); 263 } 264 265 ProcessStartColInfo(); 266 } 267 268 # Process specified columns... 269 sub ProcessColumnsInfo { 270 my($Index, $SpecifiedColNum, $Values, $ColIndex, $ColNum, $ColLabel, @Words); 271 272 @{$TextFilesInfo{ColSpecified}} = (); 273 @{$TextFilesInfo{ColToMerge}} = (); 274 @{$TextFilesInfo{ColToMergeNumToLabelMap}} = (); 275 276 for $Index (0 .. $#TextFilesList) { 277 278 @{$TextFilesInfo{ColSpecified}[$Index]} = (); 279 280 $Values = "all"; 281 if ($OptionsInfo{Columns}) { 282 $Values = $OptionsInfo{ColValues}[$Index]; 283 } 284 285 if ($Values =~ /all/i) { 286 if ($OptionsInfo{Mode} =~ /^colnum$/i) { 287 for $ColNum (1 .. $TextFilesInfo{ColCount}[$Index]) { 288 push @{$TextFilesInfo{ColSpecified}[$Index]}, $ColNum; 289 } 290 } 291 else { 292 push @{$TextFilesInfo{ColSpecified}[$Index]}, @{$TextFilesInfo{ColLabels}[$Index]}; 293 } 294 } 295 else { 296 @Words = split ",", $Values; 297 push @{$TextFilesInfo{ColSpecified}[$Index]}, @Words; 298 } 299 300 @{$TextFilesInfo{ColToMerge}[$Index]} = (); 301 %{$TextFilesInfo{ColToMergeNumToLabelMap}[$Index]} = (); 302 303 if ($OptionsInfo{Mode} =~ /^collabel$/i) { 304 for $ColIndex (0 .. $#{$TextFilesInfo{ColSpecified}[$Index]}) { 305 $ColLabel = $TextFilesInfo{ColSpecified}[$Index][$ColIndex]; 306 if (exists($TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel})) { 307 $ColNum = $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel}; 308 push @{$TextFilesInfo{ColToMerge}[$Index]}, $ColNum; 309 $TextFilesInfo{ColToMergeNumToLabelMap}[$Index]{$ColNum} = $ColLabel; 310 } 311 else { 312 warn "Warning: Ignoring value, $ColLabel, specified using \"-c --column\" option: column name doesn't exist in $TextFilesList[$Index] \n"; 313 } 314 } 315 } 316 else { 317 for $ColIndex (0 .. $#{$TextFilesInfo{ColSpecified}[$Index]}) { 318 $SpecifiedColNum = $TextFilesInfo{ColSpecified}[$Index][$ColIndex]; 319 if ($SpecifiedColNum > 0 && $SpecifiedColNum <= $TextFilesInfo{ColCount}[$Index]) { 320 $ColNum = $SpecifiedColNum - 1; 321 push @{$TextFilesInfo{ColToMerge}[$Index]}, $ColNum; 322 $TextFilesInfo{ColToMergeNumToLabelMap}[$Index]{$ColNum} = $TextFilesInfo{ColLabels}[$Index][$ColNum]; 323 } 324 else { 325 warn "Warning: Ignoring value, $SpecifiedColNum, specified using \"-c --column\" option: column number doesn't exist in $TextFilesList[$Index] \n"; 326 } 327 } 328 } 329 my (@ColToMergeSorted) = sort { $a <=> $b } @{$TextFilesInfo{ColToMerge}[$Index]}; 330 @{$TextFilesInfo{ColToMerge}[$Index]} = (); 331 push @{$TextFilesInfo{ColToMerge}[$Index]}, @ColToMergeSorted; 332 } 333 } 334 335 # Process specified key column values... 336 sub ProcessKeysInfo { 337 my($Index, $Key, $ColLabel, $ColNum); 338 339 @{$TextFilesInfo{KeysSpecified}} = (); 340 @{$TextFilesInfo{KeysToUse}} = (); 341 342 for $Index (0 .. $#TextFilesList) { 343 $Key = $OptionsInfo{KeyValues}[$Index]; 344 345 $TextFilesInfo{KeysSpecified}[$Index] = $Key; 346 $TextFilesInfo{KeysToUse}[$Index] = -1; 347 348 if ($OptionsInfo{Mode} =~ /^collabel$/i) { 349 $ColLabel = $Key; 350 if (exists($TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel})) { 351 $TextFilesInfo{KeysToUse}[$Index] = $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel}; 352 } 353 else { 354 warn "Warning: Ignoring value, $ColLabel, specified using \"-k --keys\" option: column name doesn't exist in $TextFilesList[$Index] \n"; 355 } 356 } 357 else { 358 $ColNum = $Key; 359 if ($ColNum > 0 && $ColNum <= $TextFilesInfo{ColCount}[$Index]) { 360 $TextFilesInfo{KeysToUse}[$Index] = $ColNum - 1; 361 } 362 else { 363 warn "Warning: Ignoring value, $ColNum, specified using \"-k --keys\" option: column number doesn't exist in $TextFilesList[$Index] \n"; 364 } 365 } 366 } 367 368 # Modify columns to merge list to make sure the columns identified by key are taken off the list 369 # except for the first text file... 370 my(@ColToMergeFiltered); 371 372 for $Index (1 .. $#TextFilesList) { 373 @ColToMergeFiltered = (); 374 for $ColNum (@{$TextFilesInfo{ColToMerge}[$Index]}) { 375 if ($TextFilesInfo{KeysToUse}[$Index] != $ColNum) { 376 push @ColToMergeFiltered, $ColNum; 377 } 378 } 379 @{$TextFilesInfo{ColToMerge}[$Index]} = (); 380 push @{$TextFilesInfo{ColToMerge}[$Index]}, @ColToMergeFiltered; 381 } 382 } 383 384 # Process specified start column value... 385 sub ProcessStartColInfo { 386 my($Index, $ColIndex, $ColNum, $StartColNum, $Part1StartColNum, $Part1EndColNum, $Part2StartColNum, $Part2EndColNum, $BeforeStartColNum, $AfterStartColNum, $FirstColNum, $LastColNum, $FirstIndex, $LastIndex); 387 388 @{$TextFilesInfo{File1Part1ColNums}} = (); 389 @{$TextFilesInfo{File1Part2ColNums}} = (); 390 391 $StartColNum = "last"; 392 if ($OptionsInfo{StartCol}) { 393 if (length($OptionsInfo{StartCol})) { 394 $StartColNum = $OptionsInfo{StartCol} 395 } 396 } 397 398 if ($StartColNum !~ /^last$/i) { 399 if ($OptionsInfo{Mode} =~ /^collabel$/i) { 400 if (exists($TextFilesInfo{ColLabelToNumMap}[0]{$StartColNum})) { 401 $StartColNum = $TextFilesInfo{ColLabelToNumMap}[0]{$StartColNum}; 402 } 403 else { 404 die "Error: Invalid value $StartColNum specified using \"-s --startcol\" option: column name doesn't exist in $TextFilesList[0] \n"; 405 } 406 } 407 else { 408 if ($StartColNum > 0 && $StartColNum <= $TextFilesInfo{ColCount}[0]) { 409 $StartColNum -= 1; 410 } 411 else { 412 die "Error: Invalid value $StartColNum specified using \"-s --startcol\" option: column number doesn't exist in $TextFilesList[0] \n"; 413 } 414 } 415 } 416 else { 417 $StartColNum = $TextFilesInfo{ColCount}[0] - 1; 418 } 419 420 # Make sure StartColNum is present on the list of columns to merge for the first text file... 421 if (!exists($TextFilesInfo{ColToMergeNumToLabelMap}[0]{$StartColNum})) { 422 die "Error: Invalid value $StartColNum specified using \"-s --startcol\" option: doesn't exist in the specified lists of columns to merge for $TextFilesList[0] \n"; 423 } 424 425 # Find out the column number before and after StartColNum in first text file... 426 $BeforeStartColNum = $StartColNum; 427 $AfterStartColNum = $StartColNum; 428 429 $FirstIndex = 0; $LastIndex = $#{$TextFilesInfo{ColToMerge}[0]}; 430 431 $FirstColNum = $TextFilesInfo{ColToMerge}[0][$FirstIndex]; 432 $LastColNum = $TextFilesInfo{ColToMerge}[0][$LastIndex]; 433 434 for $Index (0 .. $LastIndex) { 435 if ($TextFilesInfo{ColToMerge}[0][$Index] == $StartColNum) { 436 $BeforeStartColNum = (($Index -1) >= $FirstIndex) ? $TextFilesInfo{ColToMerge}[0][$Index - 1] : ($FirstColNum - 1); 437 $AfterStartColNum = (($Index + 1) <= $LastIndex) ? $TextFilesInfo{ColToMerge}[0][$Index + 1] : ($LastColNum + 1); 438 } 439 } 440 441 if ($OptionsInfo{StartColMode} =~ /^after$/i) { 442 $Part1StartColNum = $FirstColNum; $Part1EndColNum = $StartColNum; 443 $Part2StartColNum = $AfterStartColNum; $Part2EndColNum = $LastColNum; 444 } 445 else { 446 $Part1StartColNum = $FirstColNum; $Part1EndColNum = $BeforeStartColNum; 447 $Part2StartColNum = $StartColNum; $Part2EndColNum = $LastColNum; 448 } 449 450 @{$TextFilesInfo{File1Part1ColNums}} = (); 451 @{$TextFilesInfo{File1Part2ColNums}} = (); 452 453 for $ColIndex (0 .. $#{$TextFilesInfo{ColToMerge}[0]}) { 454 $ColNum = $TextFilesInfo{ColToMerge}[0][$ColIndex]; 455 if ($ColNum >= $Part1StartColNum && $ColNum <= $Part1EndColNum) { 456 push @{$TextFilesInfo{File1Part1ColNums}}, $ColNum; 457 } 458 } 459 460 for $ColIndex (0 .. $#{$TextFilesInfo{ColToMerge}[0]}) { 461 $ColNum = $TextFilesInfo{ColToMerge}[0][$ColIndex]; 462 if ($ColNum >= $Part2StartColNum && $ColNum <= $Part2EndColNum) { 463 push @{$TextFilesInfo{File1Part2ColNums}}, $ColNum; 464 } 465 } 466 467 } 468 469 # Retrieve information about input text files... 470 sub RetrieveTextFilesInfo { 471 my($Index, $TextFile, $FileDir, $FileName, $FileExt, $InDelim, $Line, $ColNum, $ColLabel, $FileNotOkayCount, @ColLabels,); 472 473 %TextFilesInfo = (); 474 475 @{$TextFilesInfo{FileOkay}} = (); 476 @{$TextFilesInfo{ColCount}} = (); 477 @{$TextFilesInfo{ColLabels}} = (); 478 @{$TextFilesInfo{ColLabelToNumMap}} = (); 479 @{$TextFilesInfo{InDelim}} = (); 480 481 $FileNotOkayCount = 0; 482 483 FILELIST: for $Index (0 .. $#TextFilesList) { 484 $TextFile = $TextFilesList[$Index]; 485 486 $TextFilesInfo{FileOkay}[$Index] = 0; 487 $TextFilesInfo{ColCount}[$Index] = 0; 488 $TextFilesInfo{InDelim}[$Index] = ""; 489 490 @{$TextFilesInfo{ColLabels}[$Index]} = (); 491 %{$TextFilesInfo{ColLabelToNumMap}[$Index]} = (); 492 493 if (!(-e $TextFile)) { 494 warn "Warning: Ignoring file $TextFile: It doesn't exist\n"; 495 $FileNotOkayCount++; 496 next FILELIST; 497 } 498 if (!CheckFileType($TextFile, "csv tsv")) { 499 warn "Warning: Ignoring file $TextFile: It's not a csv or tsv file\n"; 500 $FileNotOkayCount++; 501 next FILELIST; 502 } 503 ($FileDir, $FileName, $FileExt) = ParseFileName($TextFile); 504 if ($FileExt =~ /^tsv$/i) { 505 $InDelim = "\t"; 506 } 507 else { 508 $InDelim = "\,"; 509 if ($OptionsInfo{InDelim} !~ /^(comma|semicolon)$/i) { 510 warn "Warning: Ignoring file $TextFile: The value specified, $OptionsInfo{InDelim}, for option \"--indelim\" is not valid for csv files\n"; 511 $FileNotOkayCount++; 512 next FILELIST; 513 } 514 if ($OptionsInfo{InDelim} =~ /^semicolon$/i) { 515 $InDelim = "\;"; 516 } 517 } 518 519 if (!open TEXTFILE, "$TextFile") { 520 warn "Warning: Ignoring file $TextFile: Couldn't open it: $! \n"; 521 $FileNotOkayCount++; 522 next FILELIST; 523 } 524 525 $Line = GetTextLine(\*TEXTFILE); 526 @ColLabels = quotewords($InDelim, 0, $Line); 527 close TEXTFILE; 528 529 $TextFilesInfo{FileOkay}[$Index] = 1; 530 $TextFilesInfo{InDelim}[$Index] = $InDelim; 531 532 $TextFilesInfo{ColCount}[$Index] = @ColLabels; 533 push @{$TextFilesInfo{ColLabels}[$Index]}, @ColLabels; 534 for $ColNum (0 .. $#ColLabels) { 535 $ColLabel = $ColLabels[$ColNum]; 536 $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel} = $ColNum; 537 } 538 } 539 # Make sure all specified files are valid for merging to work properly... 540 if ($FileNotOkayCount) { 541 die "Error: Problems with input text file(s)...\n"; 542 } 543 } 544 545 # Process option values... 546 sub ProcessOptions { 547 my($Index, $FileDir, $FileName, $FileExt, $NewTextFile, @ColValues, @KeyValues); 548 549 %OptionsInfo = (); 550 551 $OptionsInfo{Mode} = $Options{mode}; 552 553 $OptionsInfo{Columns} = $Options{columns}; 554 @{$OptionsInfo{ColValues}} = (); 555 556 if ($Options{columns}) { 557 @ColValues = split ";", $Options{columns}; 558 if (@ColValues != @TextFilesList) { 559 die "Error: Invalid number of values specified by \"-c --columns\" option: it must be equal to number of input text files.\n"; 560 } 561 for $Index (0 .. $#ColValues) { 562 if (!length($ColValues[$Index])) { 563 die "Error: Invalid value specified by \"-c --columns\" option: empty values are not allowed.\n"; 564 } 565 } 566 @{$OptionsInfo{ColValues}} = @ColValues; 567 } 568 569 $OptionsInfo{Keys} = $Options{keys}; 570 @{$OptionsInfo{KeyValues}} = (); 571 572 if ($Options{keys}) { 573 @KeyValues = split ";", $Options{keys}; 574 if (@KeyValues != @TextFilesList) { 575 die "Error: Invalid number of values specified by \"-k --keys\" option: it must be equal to number of input text files.\n"; 576 } 577 for $Index (0 .. $#KeyValues) { 578 if (!length($KeyValues[$Index])) { 579 die "Error: Invalid value specified by \"-k --keys\" option: empty values are not allowed.\n"; 580 } 581 } 582 @{$OptionsInfo{KeyValues}} = @KeyValues; 583 } 584 585 $OptionsInfo{InDelim} = $Options{indelim}; 586 587 $OptionsInfo{StartCol} = $Options{startcol} ? $Options{startcol} : undef; 588 $OptionsInfo{StartColMode} = $Options{startcolmode}; 589 590 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : undef; 591 $OptionsInfo{Overwrite} = $Options{overwrite} ? $Options{overwrite} : undef; 592 593 $OptionsInfo{OutDelim} = ($Options{outdelim} =~ /^tab$/i ) ? "\t" : (($Options{outdelim} =~ /^semicolon$/i) ? "\;" : "\,"); 594 $OptionsInfo{OutQuote} = ($Options{quote} =~ /^yes$/i) ? 1 : 0; 595 596 if ($Options{root}) { 597 $FileDir = ""; $FileName = ""; $FileExt = ""; 598 ($FileDir, $FileName, $FileExt) = ParseFileName($Options{root}); 599 if ($FileName && $FileExt) { 600 $NewTextFile = $FileName; 601 } else { 602 $NewTextFile = $Options{root}; 603 } 604 } else { 605 $FileDir = ""; $FileName = ""; $FileExt = ""; 606 ($FileDir, $FileName, $FileExt) = ParseFileName($TextFilesList[0]); 607 $NewTextFile = $FileName . "1To" . @TextFilesList . "Merged"; 608 } 609 if ($Options{outdelim} =~ /^tab$/i) { 610 $NewTextFile .= ".tsv"; 611 } else { 612 $NewTextFile .= ".csv"; 613 } 614 if (!$Options{overwrite}) { 615 if (-e $NewTextFile) { 616 die "Error: The file $NewTextFile already exists.\n"; 617 } 618 } 619 if ($Options{root}) { 620 for $Index (0 .. $#TextFilesList) { 621 if (lc($NewTextFile) eq lc($TextFilesList[$Index])) { 622 die "Error: Output filename, $NewTextFile, is similar to a input file name.\nSpecify a different name using \"-r --root\" option or use default name.\n"; 623 } 624 } 625 } 626 627 $OptionsInfo{NewTextFile} = $NewTextFile; 628 } 629 630 # Setup script usage and retrieve command line arguments specified using various options... 631 sub SetupScriptUsage { 632 633 # Retrieve all the options... 634 %Options = (); 635 636 $Options{mode} = "colnum"; 637 $Options{indelim} = "comma"; 638 $Options{outdelim} = "comma"; 639 $Options{quote} = "yes"; 640 $Options{startcolmode} = "after"; 641 642 if (!GetOptions(\%Options, "help|h", "indelim=s", "columns|c=s", "keys|k=s", "mode|m=s", "outdelim=s", "overwrite|o", "quote|q=s", "root|r=s", "startcol|s=s", "startcolmode=s", "workingdir|w=s")) { 643 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; 644 } 645 if ($Options{workingdir}) { 646 if (! -d $Options{workingdir}) { 647 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; 648 } 649 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; 650 } 651 if ($Options{mode} !~ /^(colnum|collabel)$/i) { 652 die "Error: The value specified, $Options{mode}, for option \"-m --mode\" is not valid. Allowed values: colnum, or collabel\n"; 653 } 654 if ($Options{indelim} !~ /^(comma|semicolon)$/i) { 655 die "Error: The value specified, $Options{indelim}, for option \"--indelim\" is not valid. Allowed values: comma or semicolon\n"; 656 } 657 if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) { 658 die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n"; 659 } 660 if ($Options{quote} !~ /^(yes|no)$/i) { 661 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n"; 662 } 663 if ($Options{startcolmode} !~ /^(before|after)$/i) { 664 die "Error: The value specified, $Options{quote}, for option \"--startcolmode\" is not valid. Allowed values: before or after\n"; 665 } 666 } 667