1 package FileIO::FingerprintsTextFileIO; 2 # 3 # File: FingerprintsTextFileIO.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Carp; 28 use Exporter; 29 use Scalar::Util (); 30 use TextUtil (); 31 use FileUtil (); 32 use Fingerprints::FingerprintsStringUtil (); 33 use FileIO::FileIO; 34 35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 36 37 @ISA = qw(FileIO::FileIO Exporter); 38 @EXPORT = qw(); 39 @EXPORT_OK = qw(IsFingerprintsTextFile); 40 41 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 42 43 # Setup class variables... 44 my($ClassName); 45 _InitializeClass(); 46 47 # Class constructor... 48 sub new { 49 my($Class, %NamesAndValues) = @_; 50 51 # Initialize object... 52 my $This = $Class->SUPER::new(); 53 bless $This, ref($Class) || $Class; 54 $This->_InitializeFingerprintsTextFileIO(); 55 56 $This->_InitializeFingerprintsTextFileIOProperties(%NamesAndValues); 57 58 return $This; 59 } 60 61 # Initialize object data... 62 # 63 sub _InitializeFingerprintsTextFileIO { 64 my($This) = @_; 65 66 # Fingerprints string data format during read/write... 67 # 68 # For file read: 69 # 70 # AutoDetect - automatically detect format of fingerprints string 71 # FingerprintsBitVectorString - Bit vector fingerprints string format 72 # FingerprintsVectorString - Vector fingerprints string format 73 # 74 # Default value: AutoDetect 75 # 76 # For file write: 77 # 78 # FingerprintsBitVectorString - Bit vector fingerprints string format 79 # FingerprintsVectorString - Vector fingerprints string format 80 # 81 # Default value: undef 82 # 83 $This->{FingerprintsStringMode} = undef; 84 85 # For file read: 86 # 87 # o Fingerprints bit-vector and vector object for current fingerprints string 88 # 89 # For file write: 90 # 91 # o Fingerprints bit-vector and vector object for current fingerprints string 92 # o Any supported fingerprints object: PathLengthFingerprints, ExtendedConnectivity, and so on. 93 # 94 $This->{FingerprintsObject} = undef; 95 96 # Fingepritns string for current line during read/write... 97 $This->{FingerprintsString} = undef; 98 99 # First data line read/write... 100 $This->{FirstDataLineIO} = 1; 101 102 # Current fingerprints string data line number during read/write... 103 $This->{LineNum} = 0; 104 105 # Text line data during read/write... 106 $This->{DataLine} = undef; 107 @{$This->{DataLineWords}} = (); 108 109 # Text file column data during read/write... 110 @{$This->{DataColLabels}} = (); 111 112 # Text file delimiter during read/write... 113 $This->{Delim} = ''; 114 115 # Initialize parameters for read... 116 $This->_InitializeFingerprintsTextFileIORead(); 117 118 # Initialize parameters for write... 119 $This->_InitializeFingerprintsTextFileIOWrite(); 120 121 return $This; 122 } 123 124 # Initialize class ... 125 sub _InitializeClass { 126 #Class name... 127 $ClassName = __PACKAGE__; 128 129 } 130 131 # Initialize object data for reading fingerprints text file... 132 # 133 sub _InitializeFingerprintsTextFileIORead { 134 my($This) = @_; 135 136 # Column ID specification for identification of comound ID or fingerints string 137 # data column... 138 # 139 # ColNum - A valid column number 140 # ColLabel - A valid column name 141 # 142 $This->{ColMode} = 'ColNum'; 143 144 # Fingerprints column to use for retrieving fingerprints string data... 145 # 146 # Value of AutoDetect implies use first column containing the word Fingerprints in its 147 # column label to retrieve fingerprints string data. Othwewise, a valid column number 148 # or column name must be specified based on the value of ColMode. 149 # 150 $This->{FingerprintsCol} = 'AutoDetect'; 151 152 # Compound ID column to use for retrieving compound IDs for fingerprints... 153 # 154 # Value of AutoDetect implies use first column containing the word CompoundID in its column 155 # label to retrieve compound IDs or assign seqyentially generated compound IDs. Othwewise, 156 # a valid column number or column name must be specified based on the value of ColMode. 157 # 158 $This->{CompoundIDCol} = 'AutoDetect'; 159 160 # A prefix string used for generating compound IDs like LabelPrefixString<Number> during 161 # sequential generation of compound IDs. Default value, Cmpd, generates compound IDs 162 # which look like like Cmpd<Number>. 163 # 164 $This->{CompoundIDPrefix} = 'Cmpd'; 165 166 # Input delimiter for fingerprints CSV text file. Possible values: comma, semicolon or tab. This 167 # option is ignored for TSV text file and tab is used as the delimiter. 168 # 169 $This->{InDelim} = 'comma'; 170 171 # By default, the fingerprints data corresponding to FingerprintsCol is assumed to 172 # be valid and no validation is performed before generating fingerprints objects... 173 # 174 $This->{ValidateData} = 1; 175 176 # Level of detail to print during validation of data for invalid or missing data... 177 $This->{DetailLevel} = 1; 178 179 # Number of missing and invalid fingerprints string data lines... 180 $This->{NumOfLinesWithMissingData} = 0; 181 $This->{NumOfLinesWithInvalidData} = 0; 182 183 # Compound ID for current fingerprints string... 184 $This->{CompoundID} = undef; 185 186 # Status of data in fingerprints text file... 187 $This->{ValidFileData} = 0; 188 189 $This->{ValidCompoundIDCol} = 0; 190 $This->{ValidFingerprintsCol} = 0; 191 192 $This->{ValidFingerprintsStringMode} = 0; 193 194 return $This; 195 } 196 197 # Initialize object data for writing fingerprints text file... 198 # 199 sub _InitializeFingerprintsTextFileIOWrite { 200 my($This) = @_; 201 202 # Fingerprints bit vector string format... 203 # 204 # Possible values: BinaryString or HexadecimalString [Default] 205 # 206 # Default BitStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat. 207 # 208 $This->{BitStringFormat} = undef; 209 210 # Bits order in fingerprints bit vector string... 211 # 212 # Ascending - First bit in each byte as the lowest bit [Default] 213 # Descending - First bit in each byte as the highest bit 214 # 215 # Default BitsOrder is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder. 216 # 217 $This->{BitsOrder} = undef; 218 219 # Fingerprints vector string format... 220 # 221 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString 222 # 223 # Default VectorStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat. 224 # For fingerprints vector object containing vector NumericalValues, it corresponds to IDsAndValuesString; othwerwise, 225 # it's set to ValuesString. 226 # 227 $This->{VectorStringFormat} = undef; 228 229 # Delimiter for output fingerprints CSV/TSV file. Possible values: comma, tab, semicolon. This 230 # option is ignored for TSV text file and tab is used as the delimiter. 231 # 232 $This->{OutDelim} = 'comma'; 233 234 # Quotes around column values for output fingerprints CSV/TSV text file... 235 $This->{OutQuote} = 1; 236 237 # Overwriting existing file... 238 $This->{Overwrite} = 0; 239 240 return $This; 241 } 242 243 # Initialize object values... 244 sub _InitializeFingerprintsTextFileIOProperties { 245 my($This, %NamesAndValues) = @_; 246 247 # All other property names and values along with all Set/Get<PropertyName> methods 248 # are implemented on-demand using ObjectProperty class. 249 250 my($Name, $Value, $MethodName); 251 while (($Name, $Value) = each %NamesAndValues) { 252 $MethodName = "Set${Name}"; 253 $This->$MethodName($Value); 254 } 255 256 if (!exists $NamesAndValues{Name}) { 257 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; 258 } 259 260 # Make sure it's a fingerprints file... 261 $Name = $NamesAndValues{Name}; 262 if (!$This->IsFingerprintsTextFile($Name)) { 263 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be fingerprints format..."; 264 } 265 266 if ($This->GetMode() =~ /^Read$/i) { 267 $This->_InitializeFingerprintsTextFileIOReadProperties(%NamesAndValues); 268 } 269 elsif ($This->GetMode() =~ /^(Write|Append)$/i) { 270 $This->_InitializeFingerprintsTextFileIOWriteProperties(%NamesAndValues); 271 } 272 273 return $This; 274 } 275 276 # Initialize object properties for reading fingerprints text file... 277 # 278 sub _InitializeFingerprintsTextFileIOReadProperties { 279 my($This, %NamesAndValues) = @_; 280 281 # Set default value for FingerprintsStringMode... 282 if (!$This->{FingerprintsStringMode}) { 283 $This->{FingerprintsStringMode} = 'AutoDetect'; 284 } 285 286 $This->_PrepareForReadingFingerprintsTextFileData(); 287 288 return $This; 289 } 290 291 # Initialize object properties for writing fingerprints text file... 292 # 293 sub _InitializeFingerprintsTextFileIOWriteProperties { 294 my($This, %NamesAndValues) = @_; 295 296 # Check FingerprintsStringMode value... 297 if (!exists $NamesAndValues{FingerprintsStringMode}) { 298 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying FingerprintsStringMode..."; 299 } 300 301 if ($This->{FingerprintsStringMode} !~ /^(FingerprintsBitVectorString|FingerprintsVectorString)$/i) { 302 croak "Error: ${ClassName}->: Object can't be instantiated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, is not valid; Supported values for write/append: FingerprintsBitVectorString or FingerprintsVectorString..."; 303 } 304 305 if (!exists $NamesAndValues{DataColLabels}) { 306 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying DataColLabels..."; 307 } 308 309 if ($This->{OutDelim} =~ /semicolon/i && !$This->{OutQuote}) { 310 croak "Error: ${ClassName}->: Object can't be instantiated: The value specified, $This->{OutQuote}, using \"OutQuote\" is not allowed with semicolon value of \"OutDelim\": Fingerprints string use semicolon as delimiter for various data fields and must be quoted.\n"; 311 } 312 313 $This->_PrepareForWritingFingerprintsTextFileData(); 314 315 return $This; 316 } 317 318 # Set FingerprintsStringMode... 319 # 320 sub SetFingerprintsStringMode { 321 my($This, $Value) = @_; 322 323 # AutoDetect - automatically detect format of fingerprints string 324 # FingerprintsBitVectorString - Bit vector fingerprints string format 325 # FingerprintsVectorString - Vector fingerprints string format 326 327 if ($Value !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) { 328 croak "Error: ${ClassName}->SetFingerprintsStringMode: FingerprintsStringMode value, $Value, is not valid; Supported values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString..."; 329 } 330 331 $This->{FingerprintsStringMode} = $Value; 332 333 return $This; 334 } 335 336 # Set ColMode... 337 # 338 sub SetColMode { 339 my($This, $Value) = @_; 340 341 if ($Value !~ /^(ColNum|ColLabel)$/i) { 342 croak "Error: ${ClassName}->SetColMode: ColMode value, $Value, is not valid; Supported values: ColNum or ColLabel..."; 343 } 344 345 $This->{ColMode} = $Value; 346 347 return $This; 348 } 349 350 # Set InDelim... 351 # 352 sub SetInDelim { 353 my($This, $Value) = @_; 354 355 if ($Value !~ /^(comma|semicolon|tab)$/i) { 356 croak "Error: ${ClassName}->SetInDelim: InDelim value, $Value, is not valid; Supported values: comma, semicolon, or tab..."; 357 } 358 359 $This->{InDelim} = $Value; 360 361 return $This; 362 } 363 364 # Set DetailLevel... 365 # 366 sub SetDetailLevel { 367 my($This, $Value) = @_; 368 369 if (!TextUtil::IsPositiveInteger($Value)) { 370 croak "Error: ${ClassName}->SetDetailLevel: DetailLevel value, $Value, is not valid; Supported values: > 0..."; 371 } 372 373 $This->{DetailLevel} = $Value; 374 375 return $This; 376 } 377 378 # Set BitStringFormat... 379 # 380 sub SetBitStringFormat { 381 my($This, $Value) = @_; 382 383 if ($Value !~ /^(BinaryString|HexadecimalString)$/i) { 384 croak "Error: ${ClassName}->SetBitStringFormat: BitStringFormat value, $Value, is not valid; Supported values: BinaryString or HexadecimalString..."; 385 } 386 387 $This->{BitStringFormat} = $Value; 388 389 return $This; 390 } 391 392 # Set BitsOrder... 393 # 394 sub SetBitsOrder { 395 my($This, $Value) = @_; 396 397 # Ascending - First bit in each byte as the lowest bit 398 # Descending - First bit in each byte as the highest bit 399 # 400 if ($Value !~ /^(Ascending|Descending)$/i) { 401 croak "Error: ${ClassName}->SetBitsOrder: FingerprintsStringMode value, $Value, is not valid; Supported values: Ascending or Descending..."; 402 } 403 404 $This->{BitsOrder} = $Value; 405 406 return $This; 407 } 408 409 # Set VectorStringFormat... 410 # 411 sub SetVectorStringFormat { 412 my($This, $Value) = @_; 413 414 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString 415 416 if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) { 417 croak "Error: ${ClassName}->SetVectorStringFormat: FingerprintsStringMode value, $Value, is not valid; Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString..."; 418 } 419 420 $This->{VectorStringFormat} = $Value; 421 422 return $This; 423 } 424 425 # Set FingerprintsStringMode... 426 # 427 sub SetOutDelim { 428 my($This, $Value) = @_; 429 430 if ($Value !~ /^(comma|tab|semicolon)$/i) { 431 croak "Error: ${ClassName}->SetOutDelim: OutDelim value, $Value, is not valid; Supported values: comma, tab or semicolon..."; 432 } 433 434 $This->{OutDelim} = $Value; 435 436 return $This; 437 } 438 439 # Set DataColLabels... 440 # 441 # Set output data column labels using: 442 # o List of column labels 443 # o Reference to an list of column labels 444 # 445 sub SetDataColLabels { 446 my($This, @Values) = @_; 447 my($FirstValue, $TypeOfFirstValue); 448 449 if (!@Values) { 450 carp "Warning: ${ClassName}->_SetDataColLabels: No data column labels specified..."; 451 return $This; 452 } 453 454 @{$This->{DataColLabels}} = (); 455 456 $FirstValue = $Values[0]; 457 $TypeOfFirstValue = ref $FirstValue; 458 459 if ($TypeOfFirstValue =~ /^ARRAY/) { 460 # Initialize using array refernce... 461 push @{$This->{DataColLabels}}, @{$FirstValue}; 462 } 463 else { 464 # It's a list of values... 465 push @{$This->{DataColLabels}}, @Values; 466 } 467 468 return $This; 469 } 470 471 # Get column labels or number of column labels in first text line... 472 # 473 sub GetDataColLabels { 474 my($This) = @_; 475 476 return wantarray ? @{$This->{DataColLabels}} : scalar @{$This->{DataColLabels}}; 477 } 478 479 # Get words or number of words in current data line... 480 # 481 sub GetDataLineWords { 482 my($This) = @_; 483 484 return wantarray ? @{$This->{DataLineWords}} : scalar @{$This->{DataLineWords}}; 485 } 486 487 # Set DataLineWords... 488 # 489 # Set data line words using: 490 # o List of line words 491 # o Reference to an list of line words 492 # 493 sub SetDataLineWords { 494 my($This, @Values) = @_; 495 my($FirstValue, $TypeOfFirstValue); 496 497 if (!@Values) { 498 carp "Warning: ${ClassName}->SetDataLineWords: No line words specified..."; 499 return $This; 500 } 501 502 @{$This->{DataLineWords}} = (); 503 504 $FirstValue = $Values[0]; 505 $TypeOfFirstValue = ref $FirstValue; 506 507 if ($TypeOfFirstValue =~ /^ARRAY/) { 508 # Initialize using array refernce... 509 push @{$This->{DataLineWords}}, @{$FirstValue}; 510 } 511 else { 512 # It's a list of values... 513 push @{$This->{DataLineWords}}, @Values; 514 } 515 516 return $This; 517 } 518 519 # Get fingerprints object for current data line using fingerprints, fingerprints bit-vector 520 # fingerprints vector object. Fingerprints object correspond to any of supported fingerprints 521 # objects such as PathLengthFingerprints, ExtendedConnectivity, and so on. 522 # 523 sub GetFingerprints { 524 my($This) = @_; 525 526 return $This->{FingerprintsObject}; 527 } 528 529 # Set fingerprints object for current data line... 530 # 531 sub SetFingerprints { 532 my($This, $FingerprintsObject) = @_; 533 534 $This->{FingerprintsObject} = $FingerprintsObject; 535 536 return $This; 537 } 538 539 # Get fingerprints string for current data line... 540 # 541 sub GetFingerprintsString { 542 my($This) = @_; 543 544 return $This->{FingerprintsString} ? $This->{FingerprintsString} : 'None'; 545 } 546 547 # Set fingerprints string for current data line... 548 # 549 sub SetFingerprintsString { 550 my($This, $FingerprintsString) = @_; 551 552 $This->{FingerprintsString} = $FingerprintsString; 553 554 return $This; 555 } 556 557 # Does fingerprints text file contain valid data? 558 # 559 sub IsFingerprintsFileDataValid { 560 my($This) = @_; 561 562 return $This->{ValidFileData} ? 1 : 0; 563 } 564 565 # Does current data line contains valid fingerprints object data? 566 # 567 sub IsFingerprintsDataValid { 568 my($This) = @_; 569 570 return defined $This->{FingerprintsObject} ? 1 : 0; 571 } 572 573 # Read next available fingerprints line, process it and generate appropriate fingerprints 574 # objects... 575 # 576 sub Read { 577 my($This) = @_; 578 579 # Read data line... 580 if (!$This->_ReadDataLine()) { 581 return undef; 582 } 583 584 # No need to process invalid text file with invalid data... 585 if (!$This->{ValidFileData}) { 586 if ($This->{ValidateData}) { 587 $This->{NumOfLinesWithMissingData} += 1; 588 } 589 return $This; 590 } 591 592 # Perform data validation... 593 if ($This->{ValidateData}) { 594 if (!$This->_ValidateReadDataLine()) { 595 return $This; 596 } 597 } 598 599 # Setup fingerprints string after checking again to handle problematic data for 600 # non-validated data lines... 601 # 602 if ($This->{FingerprintsColNum} <= $#{$This->{DataLineWords}}) { 603 $This->{FingerprintsString} = $This->{DataLineWords}[$This->{FingerprintsColNum}]; 604 } 605 606 # Generate fingeprints object... 607 $This->_GenerateFingerprintsObject(); 608 609 # Setup fingerprints compound ID for fingerprints string... 610 $This->_GenerateCompoundID(); 611 612 return $This; 613 } 614 615 # Read next available fingerprints line, process it and generate appropriate fingerprints 616 # objects... 617 # 618 sub Next { 619 my($This) = @_; 620 621 return $This->Read(); 622 } 623 624 # Read fingerprints data line line... 625 # 626 sub _ReadDataLine { 627 my($This) = @_; 628 629 if ($This->{FirstDataLineIO}) { 630 $This->_ProcessFirstDataLineRead(); 631 } 632 633 # Initialize data for current line... 634 $This->_InitializeReadDataLine(); 635 636 # Get next data line... 637 $This->{DataLine} = TextUtil::GetTextLine($This->{FileHandle}); 638 if (!$This->{DataLine}) { 639 return 0; 640 } 641 642 # Get line words... 643 $This->{LineNum} += 1; 644 @{$This->{DataLineWords}} = TextUtil::SplitWords($This->{DataLine}, $This->{Delim}); 645 646 return 1; 647 } 648 649 # Initialize data line for reading... 650 # 651 sub _InitializeReadDataLine { 652 my($This) = @_; 653 654 $This->{CompoundID} = undef; 655 656 $This->{DataLine} = undef; 657 @{$This->{DataLineWords}} = (); 658 659 $This->{FingerprintsObject} = undef; 660 $This->{FingerprintsString} = undef; 661 662 return $This; 663 } 664 665 # Validate fingerprints string data line... 666 # 667 sub _ValidateReadDataLine { 668 my($This) = @_; 669 670 # Check for missing data... 671 if ($This->{FingerprintsColNum} > $#{$This->{DataLineWords}}) { 672 # Missing data... 673 $This->{NumOfLinesWithMissingData} += 1; 674 if ($This->{DetailLevel} >= 3) { 675 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains no fingerprints data: $This->{DataLine}..."; 676 } 677 elsif ($This->{DetailLevel} >= 2) { 678 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains no fingerprints data..."; 679 } 680 return 0; 681 } 682 683 # Check for invalid data... 684 my($InvalidFingerprintsData, $FingerprintsColNum, $FingerprintsType, $FingerprintsDescription); 685 686 $InvalidFingerprintsData = 0; 687 $FingerprintsColNum = $This->{FingerprintsColNum}; 688 689 if (Fingerprints::FingerprintsStringUtil::AreFingerprintsStringValuesValid($This->{DataLineWords}[$FingerprintsColNum])) { 690 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{DataLineWords}[$FingerprintsColNum]); 691 if ($This->{FirstFingerprintsStringType} !~ /^$FingerprintsType$/i || $This->{FirstFingerprintsStringDescription} !~ /^$FingerprintsDescription$/i) { 692 $InvalidFingerprintsData = 1; 693 } 694 } 695 else { 696 $InvalidFingerprintsData = 1; 697 } 698 699 if ($InvalidFingerprintsData) { 700 $This->{NumOfLinesWithInvalidData} += 1; 701 if ($This->{DetailLevel} >= 3) { 702 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains invalid fingerprints data: $This->{DataLine}..."; 703 } 704 elsif ($This->{DetailLevel} >= 2) { 705 carp "Warning: ${ClassName}->_ValidateReadDataLine: Data line number $This->{LineNum} contains invalid fingerprints data..."; 706 } 707 return 0; 708 } 709 710 return 1; 711 } 712 713 # Setup fingerprints compound ID for fingerprints string... 714 sub _GenerateCompoundID { 715 my($This) = @_; 716 my($CompoundID); 717 718 $CompoundID = ''; 719 720 if ($This->{UseSequentialCompoundIDs} || ($This->{CompoundIDColNum} > $#{$This->{DataLineWords}})) { 721 my($CompoundNum); 722 723 $CompoundNum = $This->{LineNum} - 1; 724 $CompoundID = "$This->{CompoundIDPrefix}${CompoundNum}"; 725 } 726 else { 727 $CompoundID = $This->{DataLineWords}[$This->{CompoundIDColNum}]; 728 } 729 730 $This->{CompoundID} = $CompoundID; 731 732 # Set fingerprints ID... 733 if ($This->{FingerprintsObject}) { 734 $This->{FingerprintsObject}->SetID($This->{CompoundID}); 735 } 736 737 return $This; 738 } 739 740 # Process first read... 741 # 742 sub _ProcessFirstDataLineRead { 743 my($This) = @_; 744 745 # Skip column label line... 746 $This->{LineNum} += 1; 747 TextUtil::GetTextLine($This->{FileHandle}); 748 749 $This->{FirstDataLineIO} = 0; 750 751 return $This; 752 } 753 754 # Get ready for reading fingerprints text file... 755 # 756 sub _PrepareForReadingFingerprintsTextFileData { 757 my($This) = @_; 758 759 # Retrieve text file columns information.... 760 $This->_RetrieveTextFileColData(); 761 762 # Validate columns information... 763 $This->_ValidateReadCompoundIDCol(); 764 $This->_ValidateReadFingerprintsCol(); 765 766 # Validate fingeprints string mode information... 767 if ($This->{ValidFingerprintsCol}) { 768 $This->_ValidateReadFingerprintsStringMode(); 769 } 770 771 # Set status of text file data... 772 $This->{ValidFileData} = ($This->{ValidCompoundIDCol} && $This->{ValidFingerprintsCol} && $This->{ValidFingerprintsStringMode}) ? 1 : 0; 773 774 return $This; 775 } 776 777 # Retrieve information about columns and fingerprints string... 778 # 779 sub _RetrieveTextFileColData { 780 my($This) = @_; 781 my($TextFile, $FileDir, $FileName, $FileExt, $InDelim, $Line, $ColLabel, $ColNum, @ColLabels); 782 783 @{$This->{DataColLabels}} = (); 784 %{$This->{DataColLabelToNumMap}} = (); 785 786 $TextFile = $This->{Name}; 787 788 if (!(-e $TextFile)) { 789 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $TextFile, doesn't exist..."; 790 } 791 792 $FileDir = ""; $FileName = ""; $FileExt = ""; 793 ($FileDir, $FileName, $FileExt) = FileUtil::ParseFileName($TextFile); 794 795 $InDelim = ($FileExt =~ /^tsv$/i) ? "\t" : ($This->{InDelim} =~ /semicolon/i ? "\;" : "\,"); 796 $This->{Delim} = $InDelim; 797 798 if (!open TEXTFILE, "$TextFile") { 799 croak "Error: ${ClassName}->New: Object can't be instantiated: Couldn't open input text file $TextFile: $! ..."; 800 } 801 802 # Get column label line... 803 $Line = TextUtil::GetTextLine(\*TEXTFILE); 804 805 close TEXTFILE; 806 807 @ColLabels = TextUtil::SplitWords($Line, $InDelim); 808 809 # Set text file columns info.... 810 push @{$This->{DataColLabels}}, @ColLabels; 811 812 for $ColNum (0 .. $#ColLabels) { 813 $ColLabel = $ColLabels[$ColNum]; 814 $This->{DataColLabelToNumMap}{$ColLabel} = $ColNum; 815 } 816 817 return $This; 818 } 819 820 # Validate compound ID column information... 821 # 822 sub _ValidateReadCompoundIDCol { 823 my($This) = @_; 824 my($CompoundIDCol, $CompoundIDColNum, $UseSequentialCompoundIDs, $ColFound, $ColLabel, $ColNum); 825 826 $This->{ValidCompoundIDCol} = 0; 827 $This->{CompoundIDColNum} = undef; 828 $This->{UseSequentialCompoundIDs} = 0; 829 830 $CompoundIDCol = $This->{CompoundIDCol}; 831 832 $UseSequentialCompoundIDs = 0; 833 $CompoundIDColNum = ''; 834 835 if ($CompoundIDCol =~ /^AutoDetect$/i) { 836 # First column containing the word CompoundID in its label or sequential generation... 837 838 $ColFound = 0; 839 COLLABEL: for $ColLabel (@{$This->{DataColLabels}}) { 840 if ($ColLabel =~ /CompoundID/i) { 841 $ColFound = 1; 842 $ColNum = $This->{DataColLabelToNumMap}{$ColLabel}; 843 last COLLABEL; 844 } 845 } 846 if ($ColFound) { 847 $CompoundIDColNum = $ColNum; 848 } 849 else { 850 $UseSequentialCompoundIDs = 1; 851 } 852 } 853 else { 854 if ($This->{ColMode} =~ /^ColNum$/i) { 855 # Is it a valid column number? 856 if ($CompoundIDCol > scalar @{$This->{DataColLabels}}) { 857 carp "Warning: ${ClassName}->_ValidateReadCompoundIDCol: Column number, $CompoundIDCol, specified using CompoundIDCol doesn't exist..."; 858 return 0; 859 } 860 $CompoundIDColNum = $CompoundIDCol - 1; 861 } 862 elsif ($This->{ColMode} =~ /^ColLabel$/i) { 863 # Does this column exists? 864 if (!exists $This->{DataColLabelToNumMap}{$CompoundIDCol}) { 865 carp "Warning: ${ClassName}->_ValidateReadCompoundIDCol: Column name, $CompoundIDCol, specified using CompoundIDCol doesn't exist..."; 866 return 0; 867 } 868 $CompoundIDColNum = $This->{DataColLabelToNumMap}{$CompoundIDCol}; 869 } 870 } 871 872 $This->{ValidCompoundIDCol} = 1; 873 $This->{CompoundIDColNum} = $CompoundIDColNum; 874 $This->{UseSequentialCompoundIDs} = $UseSequentialCompoundIDs; 875 876 return 1; 877 } 878 879 # Validate fingerprints string column information... 880 # 881 sub _ValidateReadFingerprintsCol { 882 my($This) = @_; 883 my($FingerprintsColNum, $FingerprintsCol, $ColFound, $ColLabel, $ColNum); 884 885 $This->{ValidFingerprintsCol} = 0; 886 $This->{FingerprintsColNum} = undef; 887 888 $FingerprintsColNum = undef; 889 $FingerprintsCol = $This->{FingerprintsCol}; 890 891 if ($FingerprintsCol =~ /^AutoDetect$/i) { 892 # First column containing the word Fingerprints in its label... 893 894 $ColFound = 0; 895 COLLABEL: for $ColLabel (@{$This->{DataColLabels}}) { 896 if ($ColLabel =~ /Fingerprints/i) { 897 $ColFound = 1; 898 $ColNum = $This->{DataColLabelToNumMap}{$ColLabel}; 899 last COLLABEL; 900 } 901 } 902 if (!$ColFound) { 903 carp "Warning: ${ClassName}->_ValidateReadFingerprintsCol: Column label containing \"Fingerprints\" string in its name doesn't exist..."; 904 return 0; 905 } 906 $FingerprintsColNum = $ColNum; 907 } 908 else { 909 if ($This->{ColMode} =~ /^ColNum$/i) { 910 # Is it a valid column number? 911 if ($FingerprintsCol > scalar @{$This->{DataColLabels}}) { 912 carp "Warning: ${ClassName}->_ValidateReadFingerprintsCol: Column number, $FingerprintsCol, specified using FingerprintsCol doesn't exist..."; 913 return 0; 914 } 915 $FingerprintsColNum = $FingerprintsCol - 1; 916 } 917 elsif ($This->{ColMode} =~ /^ColLabel$/i) { 918 # Does this column exists? 919 if (!exists $This->{DataColLabelToNumMap}{$FingerprintsCol}) { 920 carp "Warning: ${ClassName}->_ValidateReadFingerprintsCol: Column label, $FingerprintsCol, specified using FingerprintsCol doesn't exist..."; 921 return 0; 922 } 923 $FingerprintsColNum = $This->{DataColLabelToNumMap}{$FingerprintsCol}; 924 } 925 } 926 927 $This->{ValidFingerprintsCol} = 1; 928 $This->{FingerprintsColNum} = $FingerprintsColNum; 929 930 return 1; 931 } 932 933 # Validate fingerprints string mode information... 934 # 935 sub _ValidateReadFingerprintsStringMode { 936 my($This) = @_; 937 my($FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FirstFingerprintsStringType, $FirstFingerprintsStringDescription, $TextFile, $Line, $FingerprintsColNum, $InDelim, $FingerprintsType, $FingerprintsDescription, @LineWords); 938 939 $This->{ValidFingerprintsStringMode} = 0; 940 941 $This->{FingerprintsBitVectorStringMode} = 0; 942 $This->{FingerprintsVectorStringMode} = 0; 943 944 $This->{FirstFingerprintsStringType} = ''; 945 $This->{FirstFingerprintsStringDescription} = ''; 946 947 $FingerprintsBitVectorStringMode = 0; 948 $FingerprintsVectorStringMode = 0; 949 950 $FirstFingerprintsStringType = ''; 951 $FirstFingerprintsStringDescription = ''; 952 953 $TextFile = $This->{Name}; 954 955 if (!open TEXTFILE, "$TextFile") { 956 croak "Error: ${ClassName}->New: Object can't be instantiated: Couldn't open input text file $TextFile: $! ..."; 957 } 958 959 # Skip column label line... 960 $Line = TextUtil::GetTextLine(\*TEXTFILE); 961 962 # First first fingerprints data line... 963 $Line = TextUtil::GetTextLine(\*TEXTFILE); 964 965 close TEXTFILE; 966 967 # Get first fingerprints type and description... 968 $InDelim = $This->{Delim}; 969 @LineWords = TextUtil::SplitWords($Line, $InDelim); 970 971 $FingerprintsColNum = $This->{FingerprintsColNum}; 972 973 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($LineWords[$FingerprintsColNum]); 974 975 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { 976 if ($FingerprintsType !~ /^FingerprintsBitVector$/i) { 977 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsBitVectorString, specified using \"FingerprintsStringMode\"..."; 978 return 0; 979 } 980 $FingerprintsBitVectorStringMode = 1; 981 $FirstFingerprintsStringType = 'FingerprintsBitVector'; 982 $FirstFingerprintsStringDescription = $FingerprintsDescription; 983 } 984 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { 985 if ($FingerprintsType !~ /^FingerprintsVector$/i) { 986 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsVectorString, specified using \"FingerprintsStringMode\"..."; 987 return 0; 988 } 989 $FingerprintsVectorStringMode = 1; 990 $FirstFingerprintsStringType = 'FingerprintsVector'; 991 $FirstFingerprintsStringDescription = $FingerprintsDescription; 992 } 993 else { 994 # AutoDetect mode... 995 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) { 996 $FingerprintsBitVectorStringMode = 1; 997 } 998 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) { 999 $FingerprintsVectorStringMode = 1; 1000 } 1001 else { 1002 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, identified during, AutoDetect, value of \"FingerprintsStringMode\" is not valid; Supported fingerprints types: FingerprintBitVector or FingerprintsVector..."; 1003 return 0; 1004 } 1005 $FirstFingerprintsStringType = $FingerprintsType; 1006 $FirstFingerprintsStringDescription = $FingerprintsDescription; 1007 } 1008 1009 $This->{ValidFingerprintsStringMode} = 1; 1010 1011 $This->{FingerprintsBitVectorStringMode} = $FingerprintsBitVectorStringMode; 1012 $This->{FingerprintsVectorStringMode} = $FingerprintsVectorStringMode; 1013 1014 $This->{FirstFingerprintsStringType} = $FirstFingerprintsStringType; 1015 $This->{FirstFingerprintsStringDescription} = $FirstFingerprintsStringDescription; 1016 1017 return 1; 1018 } 1019 1020 # Write fingerprints string generated from specified fingerprints, fingerprints-bit vector, or 1021 # fingerprints vector object and other data to text file... 1022 # 1023 sub WriteFingerprints { 1024 my($This, $FingerprintsObject, @DataColValues) = @_; 1025 1026 # Initialize data for current line... 1027 $This->_InitializeWriteDataLine(); 1028 1029 # Set fingerprints object... 1030 $This->{FingerprintsObject} = $FingerprintsObject; 1031 1032 # Generate fingerprints string... 1033 $This->_GenerateFingerprintsString(); 1034 1035 # Set data line words... 1036 $This->SetDataLineWords(@DataColValues); 1037 push @{$This->{DataLineWords}}, $This->{FingerprintsString}; 1038 1039 # Write data line.. 1040 $This->_WriteDataLine(); 1041 1042 return $This; 1043 } 1044 1045 # Write fingerprints string and other data to text file... 1046 # 1047 # Note: 1048 # o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat values 1049 # are ignored during writing of fingerprints and it's written to the file as it is. 1050 # 1051 # 1052 sub WriteFingerprintsString { 1053 my($This, $FingerprintsString, @DataColValues) = @_; 1054 1055 # Initialize data for current line... 1056 $This->_InitializeWriteDataLine(); 1057 1058 # Set fingerprints string... 1059 $This->{FingerprintsString} = $FingerprintsString; 1060 1061 # Generate fingerprints object... 1062 $This->_GenerateFingerprintsObject(); 1063 1064 # Set data line words... 1065 $This->SetDataLineWords(@DataColValues); 1066 push @{$This->{DataLineWords}}, $FingerprintsString; 1067 1068 # Write data line.. 1069 $This->_WriteDataLine(); 1070 1071 return $This; 1072 } 1073 1074 # Initialize data line for reading... 1075 # 1076 sub _InitializeWriteDataLine { 1077 my($This) = @_; 1078 1079 $This->{DataLine} = undef; 1080 @{$This->{DataLineWords}} = (); 1081 1082 $This->{FingerprintsObject} = undef; 1083 $This->{FingerprintsString} = undef; 1084 1085 return $This; 1086 } 1087 1088 # Write fingerprints data line line... 1089 # 1090 sub _WriteDataLine { 1091 my($This) = @_; 1092 my($FileHandle, $Line); 1093 1094 if ($This->{FirstDataLineIO}) { 1095 $This->_ProcessFirstDataLineWrite(); 1096 } 1097 1098 # Write out line words... 1099 $Line = TextUtil::JoinWords(\@{$This->{DataLineWords}}, $This->{Delim}, $This->{OutQuote}); 1100 1101 $This->{LineNum} += 1; 1102 $FileHandle = $This->{FileHandle}; 1103 print $FileHandle "$Line\n"; 1104 1105 $This->{DataLine} = $Line; 1106 1107 return $This; 1108 } 1109 1110 # Process first write... 1111 # 1112 sub _ProcessFirstDataLineWrite { 1113 my($This) = @_; 1114 my($Line, $FileHandle); 1115 1116 $This->{FirstDataLineIO} = 0; 1117 1118 if ($This->GetMode() =~ /^Write$/i) { 1119 # Write out column label line... 1120 $Line = TextUtil::JoinWords(\@{$This->{DataColLabels}}, $This->{Delim}, $This->{OutQuote}); 1121 1122 $This->{LineNum} += 1; 1123 $FileHandle = $This->{FileHandle}; 1124 print $FileHandle "$Line\n"; 1125 } 1126 1127 return $This; 1128 } 1129 1130 # Get ready for writing fingerprints text file... 1131 # 1132 sub _PrepareForWritingFingerprintsTextFileData { 1133 my($This) = @_; 1134 my($TextFile, $FileDir, $FileName, $FileExt, $OutDelim); 1135 1136 $TextFile = $This->{Name}; 1137 if (!$This->{Overwrite}) { 1138 if (-e $TextFile) { 1139 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $TextFile, already exist. Use overwrite option..."; 1140 } 1141 } 1142 1143 # Set up delimiter for writing file... 1144 1145 $FileDir = ""; $FileName = ""; $FileExt = ""; 1146 ($FileDir, $FileName, $FileExt) = FileUtil::ParseFileName($TextFile); 1147 1148 $OutDelim = ($FileExt =~ /^tsv$/i) ? "\t" : ($This->{OutDelim} =~ /semicolon/i ? "\;" : "\,"); 1149 $This->{Delim} = $OutDelim; 1150 1151 # Setup FingerprintsStringMode status... 1152 1153 $This->{FingerprintsBitVectorStringMode} = 0; 1154 $This->{FingerprintsVectorStringMode} = 0; 1155 $This->{ValidFingerprintsStringMode} = 0; 1156 1157 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { 1158 $This->{FingerprintsBitVectorStringMode} = 1; 1159 } 1160 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { 1161 $This->{FingerprintsVectorStringMode} = 1; 1162 } 1163 1164 $This->{ValidFingerprintsStringMode} = ($This->{FingerprintsBitVectorStringMode} || $This->{FingerprintsVectorStringMode}) ? 1 : 0; 1165 1166 if ($This->{FingerprintsBitVectorStringMode}) { 1167 $This->_SetDefaultBitStringFormat(); 1168 $This->_SetDefaultBitsOrder(); 1169 } 1170 elsif ($This->{FingerprintsVectorStringMode}) { 1171 $This->_SetDefaultVectorStringFormat(); 1172 } 1173 1174 return $This; 1175 } 1176 1177 # Set default value for bit string format... 1178 # 1179 sub _SetDefaultBitStringFormat { 1180 my($This) = @_; 1181 1182 if (!$This->{BitStringFormat}) { 1183 $This->{BitStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat(); 1184 } 1185 1186 return $This; 1187 } 1188 1189 # Set default value for bit string format... 1190 # 1191 sub _SetDefaultBitsOrder { 1192 my($This) = @_; 1193 1194 if (!$This->{BitsOrder}) { 1195 $This->{BitsOrder} = Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder(); 1196 } 1197 1198 return $This; 1199 } 1200 1201 # Set default value for vector string format... 1202 # 1203 sub _SetDefaultVectorStringFormat { 1204 my($This) = @_; 1205 1206 if (!$This->{VectorStringFormat} && $This->{FingerprintsObject}) { 1207 $This->{VectorStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat($This->{FingerprintsObject}); 1208 } 1209 1210 return $This; 1211 } 1212 1213 # Generate fingerprints object using current fingerprints string... 1214 # 1215 sub _GenerateFingerprintsObject { 1216 my($This) = @_; 1217 1218 $This->{FingerprintsObject} = undef; 1219 1220 if (!$This->{FingerprintsString}) { 1221 return $This; 1222 } 1223 1224 if ($This->{FingerprintsBitVectorStringMode}) { 1225 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsBitVectorString($This->{FingerprintsString}); 1226 } 1227 elsif ($This->{FingerprintsVectorStringMode}) { 1228 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsVectorString($This->{FingerprintsString}); 1229 } 1230 else { 1231 return undef; 1232 } 1233 1234 return $This; 1235 } 1236 1237 # Generate fingerprints string using current fingerprints object... 1238 # 1239 sub _GenerateFingerprintsString { 1240 my($This) = @_; 1241 1242 $This->{FingerprintsString} = ''; 1243 1244 if (!$This->{FingerprintsObject}) { 1245 return $This; 1246 } 1247 1248 if ($This->{FingerprintsBitVectorStringMode}) { 1249 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{BitStringFormat}, $This->{BitsOrder}); 1250 } 1251 elsif ($This->{FingerprintsVectorStringMode}) { 1252 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{VectorStringFormat}); 1253 } 1254 1255 return $This; 1256 } 1257 1258 # Is it a fingerprints file? 1259 sub IsFingerprintsTextFile ($;$) { 1260 my($FirstParameter, $SecondParameter) = @_; 1261 my($This, $FileName, $Status); 1262 1263 if ((@_ == 2) && (_IsFingerprintsTextFileIO($FirstParameter))) { 1264 ($This, $FileName) = ($FirstParameter, $SecondParameter); 1265 } 1266 else { 1267 $FileName = $FirstParameter; 1268 } 1269 1270 # Check file extension... 1271 $Status = FileUtil::CheckFileType($FileName, "csv tsv"); 1272 1273 return $Status; 1274 } 1275 1276 # Is it a FingerprintsTextFileIO object? 1277 sub _IsFingerprintsTextFileIO { 1278 my($Object) = @_; 1279 1280 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; 1281 } 1282