1 package FileIO::FingerprintsSDFileIO; 2 # 3 # File: FingerprintsSDFileIO.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Carp; 28 use Exporter; 29 use Scalar::Util (); 30 use TextUtil (); 31 use FileUtil (); 32 use SDFileUtil (); 33 use Fingerprints::FingerprintsStringUtil (); 34 use FileIO::FileIO; 35 36 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 37 38 @ISA = qw(FileIO::FileIO Exporter); 39 @EXPORT = qw(); 40 @EXPORT_OK = qw(IsFingerprintsSDFile); 41 42 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 43 44 # Setup class variables... 45 my($ClassName); 46 _InitializeClass(); 47 48 # Class constructor... 49 sub new { 50 my($Class, %NamesAndValues) = @_; 51 52 # Initialize object... 53 my $This = $Class->SUPER::new(); 54 bless $This, ref($Class) || $Class; 55 $This->_InitializeFingerprintsSDFileIO(); 56 57 $This->_InitializeFingerprintsSDFileIOProperties(%NamesAndValues); 58 59 return $This; 60 } 61 62 # Initialize object data... 63 # 64 sub _InitializeFingerprintsSDFileIO { 65 my($This) = @_; 66 67 # Fingerprints string data format during read/write... 68 # 69 # For file read: 70 # 71 # AutoDetect - automatically detect format of fingerprints string 72 # FingerprintsBitVectorString - Bit vector fingerprints string format 73 # FingerprintsVectorString - Vector fingerprints string format 74 # 75 # Default value: AutoDetect 76 # 77 # For file write: 78 # 79 # FingerprintsBitVectorString - Bit vector fingerprints string format 80 # FingerprintsVectorString - Vector fingerprints string format 81 # 82 # Default value: undef 83 # 84 $This->{FingerprintsStringMode} = undef; 85 86 # For file read: 87 # 88 # o Fingerprints bit-vector and vector object for current fingerprints string 89 # 90 # For file write: 91 # 92 # o Fingerprints bit-vector and vector object for current fingerprints string 93 # o Any supported fingerprints object: PathLengthFingerprints, ExtendedConnectivity, and so on. 94 # 95 $This->{FingerprintsObject} = undef; 96 97 # Fingerprints SD file data field label during read/write 98 # 99 # For file read: 100 # 101 # Value of AutoDetect implies use first data field containing the word Fingerprints in its 102 # data field label to retrieve fingerprints string data. Othwewise, a valid data field name 103 # must be specified. 104 # 105 # For file write: 106 # 107 # Data field label to use for writing fingerprints string. Default: Fingerprints 108 # 109 $This->{FingerprintsFieldLabel} = undef; 110 111 # Fingepritns string for current line during read/write... 112 $This->{FingerprintsString} = undef; 113 114 # First compound data string read/write... 115 $This->{FirstCompoundDataIO} = 1; 116 117 # Current fingerprints string data compound number during read/write... 118 $This->{CompoundNum} = 0; 119 120 # Compound data string during read/write... 121 $This->{CompoundString} = undef; 122 123 # Initialize parameters for read... 124 $This->_InitializeFingerprintsSDFileIORead(); 125 126 # Initialize parameters for write... 127 $This->_InitializeFingerprintsSDFileIOWrite(); 128 129 return $This; 130 } 131 132 # Initialize class ... 133 sub _InitializeClass { 134 #Class name... 135 $ClassName = __PACKAGE__; 136 137 } 138 139 # Initialize object data for reading fingerprints SD file... 140 # 141 sub _InitializeFingerprintsSDFileIORead { 142 my($This) = @_; 143 144 # Compound ID mode to use for retrieving compound IDs for fingerprints... 145 # 146 # Specify how to generate compound IDs: use a SD file datafield value; use molname line from 147 # SD file; generate a sequential ID with specific prefix; use combination of both MolName and 148 # LabelPrefix with usage of LabelPrefix values for empty molname lines. 149 # 150 # Possible values: DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix. Default: LabelPrefix. 151 # 152 # For MolNameAndLabelPrefix value of CompoundIDMode, molname line in SD file takes precedence over 153 # sequential compound IDs generated using LabelPrefix and only empty molname values are replaced 154 # with sequential compound IDs. 155 # 156 $This->{CompoundIDMode} = 'LabelPrefix'; 157 158 # 159 # Compound ID data field label name whose value is used as compound ID during DatafField value of 160 # CompoundIDMode 161 # 162 $This->{CompoundIDFieldLabel} = undef; 163 164 # A prefix string used for generating compound IDs like LabelPrefixString<Number> during LabelPrefix 165 # or MolNameOrLabelPrefix value of CompoundIDMode. Default value, Cmpd, generates compound IDs 166 # which look like Cmpd<Number>. 167 # 168 $This->{CompoundIDPrefix} = 'Cmpd'; 169 170 # By default, the fingerprints data corresponding to FingerprintsCol is assumed to 171 # be valid and no validation is performed before generating fingerprints objects... 172 # 173 $This->{ValidateData} = 1; 174 175 # Level of detail to print during validation of data for invalid or missing data... 176 $This->{DetailLevel} = 1; 177 178 # Number of missing and invalid fingerprints string data compound strings... 179 $This->{NumOfCmpdsWithMissingData} = 0; 180 $This->{NumOfCmpdsWithInvalidData} = 0; 181 182 # Compound ID for current fingerprints string... 183 $This->{CompoundID} = undef; 184 185 # Compound data field labels and values map for current compound data... 186 %{$This->{DataFieldLabelsAndValues}} = (); 187 188 # Status of data in fingerprints SD file... 189 $This->{ValidFileData} = 0; 190 191 $This->{ValidCompoundIDField} = 0; 192 $This->{ValidFingerprintsField} = 0; 193 194 $This->{ValidFingerprintsStringMode} = 0; 195 196 return $This; 197 } 198 199 # Initialize object data for writing fingerprints SD file... 200 # 201 sub _InitializeFingerprintsSDFileIOWrite { 202 my($This) = @_; 203 204 # Fingerprints bit vector string format... 205 # 206 # Possible values: BinaryString or HexadecimalString [Default] 207 # 208 # Default BitStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat. 209 # 210 $This->{BitStringFormat} = undef; 211 212 # Bits order in fingerprints bit vector string... 213 # 214 # Ascending - First bit in each byte as the lowest bit [Default] 215 # Descending - First bit in each byte as the highest bit 216 # 217 # Default BitsOrder is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder. 218 # 219 $This->{BitsOrder} = undef; 220 221 # Fingerprints vector string format... 222 # 223 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString 224 # 225 # Default VectorStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat. 226 # For fingerprints vector object containing vector NumericalValues, it corresponds to IDsAndValuesString; othwerwise, 227 # it's set to ValuesString. 228 # 229 $This->{VectorStringFormat} = undef; 230 231 # Overwriting existing file... 232 $This->{Overwrite} = 0; 233 234 return $This; 235 } 236 237 # Initialize object values... 238 sub _InitializeFingerprintsSDFileIOProperties { 239 my($This, %NamesAndValues) = @_; 240 241 # All other property names and values along with all Set/Get<PropertyName> methods 242 # are implemented on-demand using ObjectProperty class. 243 244 my($Name, $Value, $MethodName); 245 while (($Name, $Value) = each %NamesAndValues) { 246 $MethodName = "Set${Name}"; 247 $This->$MethodName($Value); 248 } 249 250 if (!exists $NamesAndValues{Name}) { 251 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; 252 } 253 254 # Make sure it's a fingerprints file... 255 $Name = $NamesAndValues{Name}; 256 if (!$This->IsFingerprintsSDFile($Name)) { 257 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be fingerprints format..."; 258 } 259 260 if ($This->GetMode() =~ /^Read$/i) { 261 $This->_InitializeFingerprintsSDFileIOReadProperties(%NamesAndValues); 262 } 263 elsif ($This->GetMode() =~ /^(Write|Append)$/i) { 264 $This->_InitializeFingerprintsSDFileIOWriteProperties(%NamesAndValues); 265 } 266 267 return $This; 268 } 269 270 # Initialize object properties for reading fingerprints SD file... 271 # 272 sub _InitializeFingerprintsSDFileIOReadProperties { 273 my($This, %NamesAndValues) = @_; 274 275 # Set default value for FingerprintsStringMode... 276 if (!$This->{FingerprintsStringMode}) { 277 $This->{FingerprintsStringMode} = 'AutoDetect'; 278 } 279 280 # Set default value for FingerprintsFieldLabel... 281 if (!$This->{FingerprintsFieldLabel}) { 282 $This->{FingerprintsFieldLabel} = 'AutoDetect'; 283 } 284 285 # Check compound ID data field... 286 if (($This->{CompoundIDMode} =~ /^DataField$/i) && (!defined($This->{CompoundIDFieldLabel}))) { 287 croak "Error: ${ClassName}->: Object can't be instantiated: Compound ID data field lable must be specifed using \"CompoundIDFieldLabel\" during \"DataField\" value of \"CompoundIDMode\"..."; 288 } 289 290 $This->_PrepareForReadingFingerprintsSDFileData(); 291 292 return $This; 293 } 294 295 # Initialize object properties for writing fingerprints SD file... 296 # 297 sub _InitializeFingerprintsSDFileIOWriteProperties { 298 my($This, %NamesAndValues) = @_; 299 300 # Check FingerprintsStringMode value... 301 if (!exists $NamesAndValues{FingerprintsStringMode}) { 302 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying FingerprintsStringMode..."; 303 } 304 305 if ($This->{FingerprintsStringMode} !~ /^(FingerprintsBitVectorString|FingerprintsVectorString)$/i) { 306 croak "Error: ${ClassName}->: Object can't be instantiated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, is not valid; Supported values for write/append: FingerprintsBitVectorString or FingerprintsVectorString..."; 307 } 308 309 # Set default value for FingerprintsFieldLabel... 310 if (!$This->{FingerprintsFieldLabel}) { 311 $This->{FingerprintsFieldLabel} = 'Fingerprints'; 312 } 313 314 $This->_PrepareForWritingFingerprintsSDFileData(); 315 316 return $This; 317 } 318 319 # Set FingerprintsStringMode... 320 # 321 sub SetFingerprintsStringMode { 322 my($This, $Value) = @_; 323 324 # AutoDetect - automatically detect format of fingerprints string 325 # FingerprintsBitVectorString - Bit vector fingerprints string format 326 # FingerprintsVectorString - Vector fingerprints string format 327 328 if ($Value !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) { 329 croak "Error: ${ClassName}->SetFingerprintsStringMode: FingerprintsStringMode value, $Value, is not valid; Supported values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString..."; 330 } 331 332 $This->{FingerprintsStringMode} = $Value; 333 334 return $This; 335 } 336 337 # Set CompoundIDMode... 338 # 339 sub SetCompoundIDMode { 340 my($This, $Value) = @_; 341 342 if ($Value !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) { 343 croak "Error: ${ClassName}->SetFingerprintsStringMode: CompoundIDMode value, $Value, is not valid; Supported values: DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix..."; 344 } 345 346 $This->{CompoundIDMode} = $Value; 347 348 return $This; 349 } 350 351 # Set DetailLevel... 352 # 353 sub SetDetailLevel { 354 my($This, $Value) = @_; 355 356 if (!TextUtil::IsPositiveInteger($Value)) { 357 croak "Error: ${ClassName}->SetDetailLevel: DetailLevel value, $Value, is not valid; Supported values: > 0..."; 358 } 359 360 $This->{DetailLevel} = $Value; 361 362 return $This; 363 } 364 365 # Set BitStringFormat... 366 # 367 sub SetBitStringFormat { 368 my($This, $Value) = @_; 369 370 if ($Value !~ /^(BinaryString|HexadecimalString)$/i) { 371 croak "Error: ${ClassName}->SetBitStringFormat: BitStringFormat value, $Value, is not valid; Supported values: BinaryString or HexadecimalString..."; 372 } 373 374 $This->{BitStringFormat} = $Value; 375 376 return $This; 377 } 378 379 # Set BitsOrder... 380 # 381 sub SetBitsOrder { 382 my($This, $Value) = @_; 383 384 # Ascending - First bit in each byte as the lowest bit 385 # Descending - First bit in each byte as the highest bit 386 # 387 if ($Value !~ /^(Ascending|Descending)$/i) { 388 croak "Error: ${ClassName}->SetBitsOrder: FingerprintsStringMode value, $Value, is not valid; Supported values: Ascending or Descending..."; 389 } 390 391 $This->{BitsOrder} = $Value; 392 393 return $This; 394 } 395 396 # Set VectorStringFormat... 397 # 398 sub SetVectorStringFormat { 399 my($This, $Value) = @_; 400 401 # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString 402 403 if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) { 404 croak "Error: ${ClassName}->SetVectorStringFormat: FingerprintsStringMode value, $Value, is not valid; Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString..."; 405 } 406 407 $This->{VectorStringFormat} = $Value; 408 409 return $This; 410 } 411 412 # Get compound string for current compound with optional removal of fingerprints data.. 413 # 414 sub GetCompoundString { 415 my($This, $RemoveFingerprintsData) = @_; 416 417 $RemoveFingerprintsData = defined $RemoveFingerprintsData ? $RemoveFingerprintsData : 0; 418 419 if ($RemoveFingerprintsData && $This->_IsFingerprintsDataPresentInCompoundString()) { 420 return SDFileUtil::RemoveCmpdDataHeaderLabelAndValue($This->{CompoundString}, $This->{FingerprintsFieldLabel}); 421 } 422 423 return $This->{CompoundString}; 424 } 425 426 # Set compound string for current compound.. 427 # 428 sub SetCompoundString { 429 my($This, $CompoundString) = @_; 430 431 $This->{CompoundString} = $CompoundString; 432 433 return $This; 434 } 435 436 # Get fingerprints object for current compound using fingerprints, fingerprints bit-vector 437 # fingerprints vector object. Fingerprints object correspond to any of supported fingerprints 438 # objects such as PathLengthFingerprints, ExtendedConnectivity, and so on. 439 # 440 sub GetFingerprints { 441 my($This) = @_; 442 443 return $This->{FingerprintsObject}; 444 } 445 446 # Set fingerprints object for current compound... 447 # 448 sub SetFingerprints { 449 my($This, $FingerprintsObject) = @_; 450 451 $This->{FingerprintsObject} = $FingerprintsObject; 452 453 return $This; 454 } 455 456 # Get fingerprints string for current compound... 457 # 458 sub GetFingerprintsString { 459 my($This) = @_; 460 461 return $This->{FingerprintsString} ? $This->{FingerprintsString} : 'None'; 462 } 463 464 # Set fingerprints string for current compound... 465 # 466 sub SetFingerprintsString { 467 my($This, $FingerprintsString) = @_; 468 469 $This->{FingerprintsString} = $FingerprintsString; 470 471 return $This; 472 } 473 474 # Does fingerprints SD file contain valid data? 475 # 476 sub IsFingerprintsFileDataValid { 477 my($This) = @_; 478 479 return $This->{ValidFileData} ? 1 : 0; 480 } 481 482 # Does current compound contains valid fingerprints object data? 483 # 484 sub IsFingerprintsDataValid { 485 my($This) = @_; 486 487 return defined $This->{FingerprintsObject} ? 1 : 0; 488 } 489 490 # Read next available compound data string, process it and generate appropriate fingerprints 491 # objects... 492 # 493 sub Read { 494 my($This) = @_; 495 496 # Read compound data string... 497 if (!$This->_ReadCompoundDataString()) { 498 return undef; 499 } 500 501 # No need to process invalid SD file with invalid data... 502 if (!$This->{ValidFileData}) { 503 if ($This->{ValidateData}) { 504 $This->{NumOfCmpdsWithMissingData} += 1; 505 } 506 return $This; 507 } 508 509 # Perform data validation... 510 if ($This->{ValidateData}) { 511 if (!$This->_ValidateReadCompoundDataString()) { 512 return $This; 513 } 514 } 515 516 # Setup fingerprints string after checking again to handle problematic data for 517 # non-validated compound string data... 518 # 519 my($FingerprintsFieldLabel); 520 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; 521 if (exists $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}) { 522 $This->{FingerprintsString} = $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}; 523 } 524 525 # Generate fingeprints object... 526 $This->_GenerateFingerprintsObject(); 527 528 # Setup fingerprints compound ID for fingerprints string... 529 $This->_GenerateCompoundID(); 530 531 return $This; 532 } 533 534 # Read next available compound data string, process it and generate appropriate fingerprints 535 # objects... 536 # 537 sub Next { 538 my($This) = @_; 539 540 return $This->Read(); 541 } 542 543 # Read compound data string... 544 # 545 sub _ReadCompoundDataString { 546 my($This) = @_; 547 my(@CmpdLines); 548 549 if ($This->{FirstCompoundDataIO}) { 550 $This->_ProcessFirstCompoundDataStringRead(); 551 } 552 553 # Initialize data for current compound data string... 554 $This->_InitializeReadCompoundDataString(); 555 556 # Get next compound data line... 557 $This->{CompoundString} = SDFileUtil::ReadCmpdString($This->{FileHandle}); 558 if (!$This->{CompoundString}) { 559 return 0; 560 } 561 562 $This->{CompoundNum} += 1; 563 564 # Set up data field labels and values... 565 @CmpdLines = split "\n", $This->{CompoundString}; 566 %{$This->{DataFieldLabelsAndValues}} = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@CmpdLines); 567 568 return 1; 569 } 570 571 # Initialize compound data string for reading... 572 # 573 sub _InitializeReadCompoundDataString { 574 my($This) = @_; 575 576 $This->{CompoundID} = undef; 577 $This->{CompoundString} = undef; 578 579 %{$This->{DataFieldLabelsAndValues}} = (); 580 581 $This->{FingerprintsObject} = undef; 582 $This->{FingerprintsString} = undef; 583 584 return $This; 585 } 586 587 # Validate compound data string containing fingerprints data... 588 # 589 sub _ValidateReadCompoundDataString { 590 my($This) = @_; 591 my($FingerprintsFieldLabel); 592 593 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; 594 595 # Check for missing data... 596 if (!exists $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}) { 597 # Missing data... 598 $This->{NumOfCmpdsWithMissingData} += 1; 599 if ($This->{DetailLevel} >= 3) { 600 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains no fingerprints data: $This->{CompoundString}..."; 601 } 602 elsif ($This->{DetailLevel} >= 2) { 603 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains no fingerprints data..."; 604 } 605 return 0; 606 } 607 608 # Check for invalid data... 609 my($InvalidFingerprintsData, $FingerprintsType, $FingerprintsDescription); 610 611 $InvalidFingerprintsData = 0; 612 613 if (Fingerprints::FingerprintsStringUtil::AreFingerprintsStringValuesValid($This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel})) { 614 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}); 615 if (defined($FingerprintsType) && defined($FingerprintsDescription)) { 616 if ($This->{FirstFingerprintsStringType} !~ /^$FingerprintsType$/i || $This->{FirstFingerprintsStringDescription} !~ /^$FingerprintsDescription$/i) { 617 $InvalidFingerprintsData = 1; 618 } 619 } 620 else { 621 $InvalidFingerprintsData = 1; 622 } 623 } 624 else { 625 $InvalidFingerprintsData = 1; 626 } 627 628 if ($InvalidFingerprintsData) { 629 $This->{NumOfCmpdsWithInvalidData} += 1; 630 if ($This->{DetailLevel} >= 3) { 631 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains invalid fingerprints data: $This->{DataLine}..."; 632 } 633 elsif ($This->{DetailLevel} >= 2) { 634 carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains invalid fingerprints data..."; 635 } 636 return 0; 637 } 638 639 return 1; 640 } 641 642 # Setup fingerprints compound ID for fingerprints string... 643 sub _GenerateCompoundID { 644 my($This) = @_; 645 my($CompoundID, $MolName); 646 647 $CompoundID = ''; 648 649 if ($This->{CompoundIDMode} =~ /^LabelPrefix$/i) { 650 $CompoundID = "$This->{CompoundIDPrefix}$This->{CompoundNum}"; 651 } 652 elsif ($This->{CompoundIDMode} =~ /^DataField$/i) { 653 my($SpecifiedDataField); 654 $SpecifiedDataField = $This->{CompoundIDFieldLabel}; 655 $CompoundID = exists $This->{DataFieldLabelsAndValues}{$SpecifiedDataField} ? $This->{DataFieldLabelsAndValues}{$SpecifiedDataField} : ''; 656 } 657 elsif ($This->{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) { 658 ($MolName) = split "\n", $This->{CompoundString}; 659 $CompoundID = TextUtil::IsNotEmpty($MolName) ? $MolName : "$This->{CompoundIDPrefix}$This->{CompoundNum}"; 660 } 661 elsif ($This->{CompoundIDMode} =~ /^MolName$/i) { 662 ($MolName) = split "\n", $This->{CompoundString}; 663 $CompoundID = $MolName; 664 } 665 666 $This->{CompoundID} = $CompoundID; 667 668 return $This; 669 } 670 671 # Process first compound data string read... 672 # 673 sub _ProcessFirstCompoundDataStringRead { 674 my($This) = @_; 675 my($Line, $FileHandle); 676 677 $This->{FirstCompoundDataIO} = 0; 678 679 return $This; 680 } 681 682 # Get ready for reading fingerprints SD file... 683 # 684 sub _PrepareForReadingFingerprintsSDFileData { 685 my($This) = @_; 686 687 # Retrieve SD file data fields information.... 688 $This->_RetrieveSDFileDataFields(); 689 690 # Validate compound and fingerprints field information... 691 $This->_ValidateReadCompoundIDField(); 692 $This->_ValidateReadFingerprintsField(); 693 694 # Validate fingeprints string mode information... 695 if ($This->{ValidFingerprintsField}) { 696 $This->_ValidateReadFingerprintsStringMode(); 697 } 698 699 # Set status of SD file data... 700 $This->{ValidFileData} = ($This->{ValidCompoundIDField} && $This->{ValidFingerprintsField} && $This->{ValidFingerprintsStringMode}) ? 1 : 0; 701 702 return $This; 703 } 704 705 # Retrieve information data fields and fingerprints string... 706 # 707 sub _RetrieveSDFileDataFields { 708 my($This) = @_; 709 my($SDFile, $CmpdString, @CmpdLines); 710 711 $SDFile = $This->{Name}; 712 713 if (!(-e $SDFile)) { 714 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $SDFile, doesn't exist..."; 715 } 716 717 if (!open SDFILE, "$SDFile") { 718 croak "Error: ${ClassName}->New: Object can't be instantiated: Couldn't open input SD file $SDFile: $! ..."; 719 } 720 $CmpdString = SDFileUtil::ReadCmpdString(\*SDFILE); 721 close SDFILE; 722 723 # Set up data field labels and values for first compound string data... 724 @CmpdLines = split "\n", $CmpdString; 725 726 %{$This->{FirstDataFieldLabelsAndValues}} = (); 727 %{$This->{FirstDataFieldLabelsAndValues}} = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@CmpdLines); 728 729 return $This; 730 } 731 732 # Validate compound ID field information... 733 # 734 sub _ValidateReadCompoundIDField { 735 my($This) = @_; 736 my($SpecifiedDataField); 737 738 $This->{ValidCompoundIDField} = 0; 739 740 if ($This->{CompoundIDMode} =~ /^DataField$/i) { 741 $SpecifiedDataField = $This->{CompoundIDFieldLabel}; 742 if (! exists $This->{FirstDataFieldLabelsAndValues}{$SpecifiedDataField}) { 743 carp "Warning: ${ClassName}->_ValidateReadCompoundIDField: Compound ID data field, $SpecifiedDataField, specified using \"CompoundIDField\" in \"DataField\" \"CompoundIDMode\" doesn't exist..."; 744 return 0; 745 } 746 } 747 748 $This->{ValidCompoundIDField} = 1; 749 750 return 1; 751 } 752 753 # Validate fingerprints string field information... 754 # 755 sub _ValidateReadFingerprintsField { 756 my($This) = @_; 757 my($FingerprintsFieldLabel); 758 759 $This->{ValidFingerprintsField} = 0; 760 761 $FingerprintsFieldLabel = ''; 762 763 if ($This->{FingerprintsFieldLabel} !~ /^AutoDetect$/i) { 764 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; 765 if (! exists $This->{FirstDataFieldLabelsAndValues}{$FingerprintsFieldLabel}) { 766 carp "Warning: ${ClassName}->_ValidateReadFingerprintsField: Fingerprints data field value, $FingerprintsFieldLabel, specified using \"FingerprintsFieldLabel\" doesn't exist..."; 767 return 0; 768 } 769 } 770 else { 771 # Make sure default fingerprints field does exist... 772 my($FingerprintsFieldFound, $DataFieldLabel); 773 $FingerprintsFieldFound = 0; 774 775 DATAFIELDLABEL: for $DataFieldLabel (keys %{$This->{FirstDataFieldLabelsAndValues}}) { 776 if ($DataFieldLabel =~ /Fingerprints/i) { 777 $FingerprintsFieldFound = 1; 778 $FingerprintsFieldLabel = $DataFieldLabel; 779 last DATAFIELDLABEL; 780 } 781 } 782 if (!$FingerprintsFieldFound) { 783 carp "Warning: ${ClassName}->_ValidateReadFingerprintsField: Data field label containing \"Fingerprints\" string in its name doesn't exist..."; 784 return 0; 785 } 786 } 787 788 $This->{ValidFingerprintsField} = 1; 789 $This->{FingerprintsFieldLabel} = $FingerprintsFieldLabel; 790 791 return 1; 792 } 793 794 # Validate fingerprints string mode information... 795 # 796 sub _ValidateReadFingerprintsStringMode { 797 my($This) = @_; 798 my($FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FirstFingerprintsStringType, $FirstFingerprintsStringDescription, $FingerprintsFieldLabel, $FingerprintsType, $FingerprintsDescription); 799 800 $This->{ValidFingerprintsStringMode} = 0; 801 802 $This->{FingerprintsBitVectorStringMode} = 0; 803 $This->{FingerprintsVectorStringMode} = 0; 804 805 $This->{FirstFingerprintsStringType} = ''; 806 $This->{FirstFingerprintsStringDescription} = ''; 807 808 $FingerprintsBitVectorStringMode = 0; 809 $FingerprintsVectorStringMode = 0; 810 811 $FirstFingerprintsStringType = ''; 812 $FirstFingerprintsStringDescription = ''; 813 814 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; 815 816 ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{FirstDataFieldLabelsAndValues}{$FingerprintsFieldLabel}); 817 818 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { 819 if ($FingerprintsType !~ /^FingerprintsBitVector$/i) { 820 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsBitVectorString, specified using \"FingerprintsStringMode\"..."; 821 return 0; 822 } 823 $FingerprintsBitVectorStringMode = 1; 824 $FirstFingerprintsStringType = 'FingerprintsBitVector'; 825 $FirstFingerprintsStringDescription = $FingerprintsDescription; 826 } 827 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { 828 if ($FingerprintsType !~ /^FingerprintsVector$/i) { 829 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsVectorString, specified using \"FingerprintsStringMode\"..."; 830 return 0; 831 } 832 $FingerprintsVectorStringMode = 1; 833 $FirstFingerprintsStringType = 'FingerprintsVector'; 834 $FirstFingerprintsStringDescription = $FingerprintsDescription; 835 } 836 else { 837 # AutoDetect mode... 838 if ($FingerprintsType =~ /^FingerprintsBitVector$/i) { 839 $FingerprintsBitVectorStringMode = 1; 840 } 841 elsif ($FingerprintsType =~ /^FingerprintsVector$/i) { 842 $FingerprintsVectorStringMode = 1; 843 } 844 else { 845 carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, identified during, AutoDetect, value of \"FingerprintsStringMode\" is not valid; Supported fingerprints types: FingerprintBitVector or FingerprintsVector..."; 846 return 0; 847 } 848 $FirstFingerprintsStringType = $FingerprintsType; 849 $FirstFingerprintsStringDescription = $FingerprintsDescription; 850 } 851 852 $This->{ValidFingerprintsStringMode} = 1; 853 854 $This->{FingerprintsBitVectorStringMode} = $FingerprintsBitVectorStringMode; 855 $This->{FingerprintsVectorStringMode} = $FingerprintsVectorStringMode; 856 857 $This->{FirstFingerprintsStringType} = $FirstFingerprintsStringType; 858 $This->{FirstFingerprintsStringDescription} = $FirstFingerprintsStringDescription; 859 860 return 1; 861 } 862 863 # Write fingerprints string generated from specified fingerprints, fingerprints-bit vector, or 864 # fingerprints vector object and other data to SD file... 865 # 866 sub WriteFingerprints { 867 my($This, $FingerprintsObject, $CompoundString) = @_; 868 869 # Initialize data for current compound... 870 $This->_InitializeWriteCompoundDataString(); 871 872 # Set fingerprints object... 873 $This->{FingerprintsObject} = $FingerprintsObject; 874 875 # Generate fingerprints string... 876 $This->_GenerateFingerprintsString(); 877 878 # Set and update compound string... 879 $This->{CompoundString} = $CompoundString; 880 $This->_AddFingerprintsDataToCompoundString(); 881 882 # Write it out... 883 $This->_WriteCompoundDataString(); 884 885 return $This; 886 } 887 888 # Write fingerprints string and other data to SD file... 889 # 890 # Note: 891 # o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat values 892 # are ignored during writing of fingerprints and it's written to the file as it is. 893 # o CompoundString contains no fingerprints data 894 # 895 sub WriteFingerprintsString { 896 my($This, $FingerprintsString, $CompoundString) = @_; 897 898 # Initialize data for current compound... 899 $This->_InitializeWriteCompoundDataString(); 900 901 # Set fingerprints string... 902 $This->{FingerprintsString} = $FingerprintsString; 903 904 # Generate fingerprints object... 905 $This->_GenerateFingerprintsObject(); 906 907 # Set and update compound string... 908 $This->{CompoundString} = $CompoundString; 909 $This->_AddFingerprintsDataToCompoundString(); 910 911 # Write it out... 912 $This->_WriteCompoundDataString(); 913 914 return $This; 915 } 916 917 # Initialize compound data string for writing... 918 # 919 sub _InitializeWriteCompoundDataString { 920 my($This) = @_; 921 922 $This->{CompoundString} = undef; 923 924 $This->{FingerprintsObject} = undef; 925 $This->{FingerprintsString} = undef; 926 927 return $This; 928 } 929 930 # Writi compound data string... 931 # 932 sub _WriteCompoundDataString { 933 my($This) = @_; 934 my($FileHandle); 935 936 if ($This->{FirstCompoundDataIO}) { 937 $This->_ProcessFirstCompoundDataStringWrite(); 938 } 939 940 $This->{CompoundNum} += 1; 941 $FileHandle = $This->{FileHandle}; 942 943 print $FileHandle "$This->{CompoundString}\n"; 944 945 return $This; 946 } 947 948 # Process first compound data string write... 949 # 950 sub _ProcessFirstCompoundDataStringWrite { 951 my($This) = @_; 952 my($Line, $FileHandle); 953 954 $This->{FirstCompoundDataIO} = 0; 955 956 return $This; 957 } 958 959 # Get ready for writing fingerprints SD file... 960 # 961 sub _PrepareForWritingFingerprintsSDFileData { 962 my($This) = @_; 963 my($SDFile); 964 965 $SDFile = $This->{Name}; 966 if (!$This->{Overwrite}) { 967 if (-e $SDFile) { 968 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $SDFile, already exist. Use overwrite option..."; 969 } 970 } 971 972 # Setup FingerprintsStringMode status... 973 974 $This->{FingerprintsBitVectorStringMode} = 0; 975 $This->{FingerprintsVectorStringMode} = 0; 976 $This->{ValidFingerprintsStringMode} = 0; 977 978 if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) { 979 $This->{FingerprintsBitVectorStringMode} = 1; 980 } 981 elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) { 982 $This->{FingerprintsVectorStringMode} = 1; 983 } 984 985 $This->{ValidFingerprintsStringMode} = ($This->{FingerprintsBitVectorStringMode} || $This->{FingerprintsVectorStringMode}) ? 1 : 0; 986 987 if ($This->{FingerprintsBitVectorStringMode}) { 988 $This->_SetDefaultBitStringFormat(); 989 $This->_SetDefaultBitsOrder(); 990 } 991 elsif ($This->{FingerprintsVectorStringMode}) { 992 $This->_SetDefaultVectorStringFormat(); 993 } 994 995 return $This; 996 } 997 998 # Set default value for bit string format... 999 # 1000 sub _SetDefaultBitStringFormat { 1001 my($This) = @_; 1002 1003 if (!$This->{BitStringFormat}) { 1004 $This->{BitStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat(); 1005 } 1006 1007 return $This; 1008 } 1009 1010 # Set default value for bit string format... 1011 # 1012 sub _SetDefaultBitsOrder { 1013 my($This) = @_; 1014 1015 if (!$This->{BitsOrder}) { 1016 $This->{BitsOrder} = Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder(); 1017 } 1018 1019 return $This; 1020 } 1021 1022 # Set default value for vector string format... 1023 # 1024 sub _SetDefaultVectorStringFormat { 1025 my($This) = @_; 1026 1027 if (!$This->{VectorStringFormat} && $This->{FingerprintsObject}) { 1028 $This->{VectorStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat($This->{FingerprintsObject}); 1029 } 1030 1031 return $This; 1032 } 1033 1034 # Add fingerprints data to compound string... 1035 # 1036 sub _AddFingerprintsDataToCompoundString { 1037 my($This) = @_; 1038 my($CmpdString); 1039 1040 # Check and remove existing fingerprints data... 1041 if ($This->_IsFingerprintsDataPresentInCompoundString()) { 1042 carp "Warning: ${ClassName}->_AddFingerprintsDataToCompoundString: The compound string already contains fingerprints data corresponding to fingerprints field label $This->{FingerprintsFieldLabel}; It has been replaced with new fingerprints data..."; 1043 $This->{CompoundString} = SDFileUtil::RemoveCmpdDataHeaderLabelAndValue($This->{CompoundString}, $This->{FingerprintsFieldLabel}); 1044 } 1045 1046 $CmpdString = $This->{CompoundString}; 1047 1048 $CmpdString =~ s/\$\$\$\$$//; 1049 1050 $This->{CompoundString} = "${CmpdString}> <$This->{FingerprintsFieldLabel}>\n$This->{FingerprintsString}\n\n\$\$\$\$"; 1051 1052 return $This; 1053 } 1054 1055 # Is fingerprints data already present in compound string? 1056 # 1057 sub _IsFingerprintsDataPresentInCompoundString { 1058 my($This) = @_; 1059 my($FingerprintsFieldLabel); 1060 1061 if (TextUtil::IsEmpty($This->{CompoundString}) || TextUtil::IsEmpty($This->{FingerprintsFieldLabel})) { 1062 return 0; 1063 } 1064 1065 $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel}; 1066 1067 return ($This->{CompoundString} =~ /<$FingerprintsFieldLabel>/) ? 1 : 0; 1068 } 1069 1070 # Generate fingerprints object using current fingerprints string... 1071 # 1072 sub _GenerateFingerprintsObject { 1073 my($This) = @_; 1074 1075 $This->{FingerprintsObject} = undef; 1076 1077 if (!$This->{FingerprintsString}) { 1078 return $This; 1079 } 1080 1081 if ($This->{FingerprintsBitVectorStringMode}) { 1082 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsBitVectorString($This->{FingerprintsString}); 1083 } 1084 elsif ($This->{FingerprintsVectorStringMode}) { 1085 $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsVectorString($This->{FingerprintsString}); 1086 } 1087 else { 1088 return undef; 1089 } 1090 1091 return $This; 1092 } 1093 1094 # Generate fingerprints string using current fingerprints object... 1095 # 1096 sub _GenerateFingerprintsString { 1097 my($This) = @_; 1098 1099 $This->{FingerprintsString} = ''; 1100 1101 if (!$This->{FingerprintsObject}) { 1102 return $This; 1103 } 1104 1105 if ($This->{FingerprintsBitVectorStringMode}) { 1106 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{BitStringFormat}, $This->{BitsOrder}); 1107 } 1108 elsif ($This->{FingerprintsVectorStringMode}) { 1109 $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{VectorStringFormat}); 1110 } 1111 1112 return $This; 1113 } 1114 1115 # Is it a fingerprints file? 1116 sub IsFingerprintsSDFile ($;$) { 1117 my($FirstParameter, $SecondParameter) = @_; 1118 my($This, $FileName, $Status); 1119 1120 if ((@_ == 2) && (_IsFingerprintsSDFileIO($FirstParameter))) { 1121 ($This, $FileName) = ($FirstParameter, $SecondParameter); 1122 } 1123 else { 1124 $FileName = $FirstParameter; 1125 } 1126 1127 # Check file extension... 1128 $Status = FileUtil::CheckFileType($FileName, "sdf sd"); 1129 1130 return $Status; 1131 } 1132 1133 # Is it a FingerprintsSDFileIO object? 1134 sub _IsFingerprintsSDFileIO { 1135 my($Object) = @_; 1136 1137 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; 1138 } 1139