MayaChemTools

   1 package FileIO::FingerprintsSDFileIO;
   2 #
   3 # File: FingerprintsSDFileIO.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Scalar::Util ();
  30 use TextUtil ();
  31 use FileUtil ();
  32 use SDFileUtil ();
  33 use Fingerprints::FingerprintsStringUtil ();
  34 use FileIO::FileIO;
  35 
  36 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  37 
  38 @ISA = qw(FileIO::FileIO Exporter);
  39 @EXPORT = qw();
  40 @EXPORT_OK = qw(IsFingerprintsSDFile);
  41 
  42 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  43 
  44 # Setup class variables...
  45 my($ClassName);
  46 _InitializeClass();
  47 
  48 # Class constructor...
  49 sub new {
  50   my($Class, %NamesAndValues) = @_;
  51 
  52   # Initialize object...
  53   my $This = $Class->SUPER::new();
  54   bless $This, ref($Class) || $Class;
  55   $This->_InitializeFingerprintsSDFileIO();
  56 
  57   $This->_InitializeFingerprintsSDFileIOProperties(%NamesAndValues);
  58 
  59   return $This;
  60 }
  61 
  62 # Initialize object data...
  63 #
  64 sub _InitializeFingerprintsSDFileIO {
  65   my($This) = @_;
  66 
  67   # Fingerprints string data format during read/write...
  68   #
  69   # For file read:
  70   #
  71   # AutoDetect  - automatically detect format of fingerprints string
  72   # FingerprintsBitVectorString - Bit vector fingerprints string format
  73   # FingerprintsVectorString - Vector fingerprints string format
  74   #
  75   # Default value: AutoDetect
  76   #
  77   # For file write:
  78   #
  79   # FingerprintsBitVectorString - Bit vector fingerprints string format
  80   # FingerprintsVectorString - Vector fingerprints string format
  81   #
  82   # Default value: undef
  83   #
  84   $This->{FingerprintsStringMode} = undef;
  85 
  86   # For file read:
  87   #
  88   #   o Fingerprints bit-vector and vector object for current fingerprints string
  89   #
  90   # For file write:
  91   #
  92   #   o Fingerprints bit-vector and vector object for current fingerprints string
  93   #   o Any supported fingerprints object: PathLengthFingerprints, ExtendedConnectivity, and so on.
  94   #
  95   $This->{FingerprintsObject} = undef;
  96 
  97   # Fingerprints SD file data field label during read/write
  98   #
  99   # For file read:
 100   #
 101   # Value of AutoDetect implies use first data field containing the word Fingerprints in its
 102   # data field label to retrieve fingerprints string data. Othwewise, a valid data field name
 103   # must be specified.
 104   #
 105   # For file write:
 106   #
 107   # Data field label to use for writing fingerprints string. Default: Fingerprints
 108   #
 109   $This->{FingerprintsFieldLabel} = undef;
 110 
 111   # Fingepritns string for current line during read/write...
 112   $This->{FingerprintsString} = undef;
 113 
 114   # First compound data string read/write...
 115   $This->{FirstCompoundDataIO} = 1;
 116 
 117   # Current fingerprints string data compound number during read/write...
 118   $This->{CompoundNum} = 0;
 119 
 120   # Compound data string during read/write...
 121   $This->{CompoundString} = undef;
 122 
 123   # Initialize parameters for read...
 124   $This->_InitializeFingerprintsSDFileIORead();
 125 
 126   # Initialize parameters for write...
 127   $This->_InitializeFingerprintsSDFileIOWrite();
 128 
 129   return $This;
 130 }
 131 
 132 # Initialize class ...
 133 sub _InitializeClass {
 134   #Class name...
 135   $ClassName = __PACKAGE__;
 136 
 137 }
 138 
 139 # Initialize object data for reading fingerprints SD file...
 140 #
 141 sub _InitializeFingerprintsSDFileIORead {
 142   my($This) = @_;
 143 
 144   # Compound ID mode to use for retrieving compound IDs for fingerprints...
 145   #
 146   # Specify how to generate compound IDs: use a SD file datafield value; use molname line from
 147   # SD file; generate a sequential ID with specific prefix; use combination of both MolName and
 148   # LabelPrefix with usage of LabelPrefix values for empty molname lines.
 149   #
 150   # Possible values: DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix. Default: LabelPrefix.
 151   #
 152   # For MolNameAndLabelPrefix value of CompoundIDMode, molname line in SD file takes precedence over
 153   # sequential compound IDs generated using LabelPrefix and only empty molname values are replaced
 154   # with sequential compound IDs.
 155   #
 156   $This->{CompoundIDMode} = 'LabelPrefix';
 157 
 158   #
 159   # Compound ID data field label name whose value is used as compound ID during DatafField value of
 160   # CompoundIDMode
 161   #
 162   $This->{CompoundIDFieldLabel} = undef;
 163 
 164   # A prefix string used for generating compound IDs like LabelPrefixString<Number> during LabelPrefix
 165   # or MolNameOrLabelPrefix value of  CompoundIDMode. Default value, Cmpd, generates compound IDs
 166   # which look like Cmpd<Number>.
 167   #
 168   $This->{CompoundIDPrefix} = 'Cmpd';
 169 
 170   # By default, the fingerprints data corresponding to FingerprintsCol is assumed to
 171   # be valid and no validation is performed before generating fingerprints objects...
 172   #
 173   $This->{ValidateData} = 1;
 174 
 175   # Level of detail to print during validation of data for invalid or missing data...
 176   $This->{DetailLevel} = 1;
 177 
 178   # Number of missing and invalid fingerprints string data compound strings...
 179   $This->{NumOfCmpdsWithMissingData} = 0;
 180   $This->{NumOfCmpdsWithInvalidData} = 0;
 181 
 182   # Compound ID for current fingerprints string...
 183   $This->{CompoundID} = undef;
 184 
 185   # Compound data field labels and values map for current compound data...
 186   %{$This->{DataFieldLabelsAndValues}} = ();
 187 
 188   # Status of data in fingerprints SD file...
 189   $This->{ValidFileData} = 0;
 190 
 191   $This->{ValidCompoundIDField} = 0;
 192   $This->{ValidFingerprintsField} = 0;
 193 
 194   $This->{ValidFingerprintsStringMode} = 0;
 195 
 196   return $This;
 197 }
 198 
 199 # Initialize object data for writing fingerprints SD file...
 200 #
 201 sub _InitializeFingerprintsSDFileIOWrite {
 202   my($This) = @_;
 203 
 204   # Fingerprints bit vector string format...
 205   #
 206   # Possible values: BinaryString or HexadecimalString [Default]
 207   #
 208   # Default BitStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat.
 209   #
 210   $This->{BitStringFormat} = undef;
 211 
 212   # Bits order in fingerprints bit vector string...
 213   #
 214   # Ascending - First bit in each byte as the lowest bit [Default]
 215   # Descending - First bit in each byte as the highest bit
 216   #
 217   # Default BitsOrder is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder.
 218   #
 219   $This->{BitsOrder} = undef;
 220 
 221   # Fingerprints vector string format...
 222   #
 223   # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString
 224   #
 225   # Default VectorStringFormat is set during first write using Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat.
 226   # For fingerprints vector object containing vector NumericalValues, it corresponds to IDsAndValuesString; othwerwise,
 227   # it's set to ValuesString.
 228   #
 229   $This->{VectorStringFormat} = undef;
 230 
 231   # Overwriting existing file...
 232   $This->{Overwrite} = 0;
 233 
 234   return $This;
 235 }
 236 
 237 # Initialize object values...
 238 sub _InitializeFingerprintsSDFileIOProperties {
 239   my($This, %NamesAndValues) = @_;
 240 
 241   # All other property names and values along with all Set/Get<PropertyName> methods
 242   # are implemented on-demand using ObjectProperty class.
 243 
 244   my($Name, $Value, $MethodName);
 245   while (($Name, $Value) = each  %NamesAndValues) {
 246     $MethodName = "Set${Name}";
 247     $This->$MethodName($Value);
 248   }
 249 
 250   if (!exists $NamesAndValues{Name}) {
 251     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name...";
 252   }
 253 
 254   # Make sure it's a fingerprints file...
 255   $Name = $NamesAndValues{Name};
 256   if (!$This->IsFingerprintsSDFile($Name)) {
 257     croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be fingerprints format...";
 258   }
 259 
 260   if ($This->GetMode() =~ /^Read$/i) {
 261     $This->_InitializeFingerprintsSDFileIOReadProperties(%NamesAndValues);
 262   }
 263   elsif ($This->GetMode() =~ /^(Write|Append)$/i) {
 264     $This->_InitializeFingerprintsSDFileIOWriteProperties(%NamesAndValues);
 265   }
 266 
 267   return $This;
 268 }
 269 
 270 # Initialize object properties for reading fingerprints SD file...
 271 #
 272 sub _InitializeFingerprintsSDFileIOReadProperties {
 273   my($This, %NamesAndValues) = @_;
 274 
 275   # Set default value for FingerprintsStringMode...
 276   if (!$This->{FingerprintsStringMode}) {
 277     $This->{FingerprintsStringMode} = 'AutoDetect';
 278   }
 279 
 280   # Set default value for FingerprintsFieldLabel...
 281   if (!$This->{FingerprintsFieldLabel}) {
 282     $This->{FingerprintsFieldLabel} = 'AutoDetect';
 283   }
 284 
 285   # Check compound ID data field...
 286   if (($This->{CompoundIDMode} =~ /^DataField$/i) && (!defined($This->{CompoundIDFieldLabel}))) {
 287     croak "Error: ${ClassName}->: Object can't be instantiated: Compound ID data field lable must be specifed using \"CompoundIDFieldLabel\" during \"DataField\" value of \"CompoundIDMode\"...";
 288   }
 289 
 290   $This->_PrepareForReadingFingerprintsSDFileData();
 291 
 292   return $This;
 293 }
 294 
 295 # Initialize object properties for writing fingerprints SD file...
 296 #
 297 sub _InitializeFingerprintsSDFileIOWriteProperties {
 298   my($This, %NamesAndValues) = @_;
 299 
 300   # Check FingerprintsStringMode value...
 301   if (!exists $NamesAndValues{FingerprintsStringMode}) {
 302     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying FingerprintsStringMode...";
 303   }
 304 
 305   if ($This->{FingerprintsStringMode} !~ /^(FingerprintsBitVectorString|FingerprintsVectorString)$/i) {
 306     croak "Error: ${ClassName}->: Object can't be instantiated: FingerprintsStringMode value, $This->{FingerprintsStringMode}, is not valid; Supported values for write/append: FingerprintsBitVectorString or FingerprintsVectorString...";
 307   }
 308 
 309   # Set default value for FingerprintsFieldLabel...
 310   if (!$This->{FingerprintsFieldLabel}) {
 311     $This->{FingerprintsFieldLabel} = 'Fingerprints';
 312   }
 313 
 314   $This->_PrepareForWritingFingerprintsSDFileData();
 315 
 316   return $This;
 317 }
 318 
 319 # Set FingerprintsStringMode...
 320 #
 321 sub SetFingerprintsStringMode {
 322   my($This, $Value) = @_;
 323 
 324   # AutoDetect - automatically detect format of fingerprints string
 325   # FingerprintsBitVectorString - Bit vector fingerprints string format
 326   # FingerprintsVectorString - Vector fingerprints string format
 327 
 328   if ($Value !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) {
 329     croak "Error: ${ClassName}->SetFingerprintsStringMode: FingerprintsStringMode value, $Value, is not valid; Supported values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString...";
 330   }
 331 
 332   $This->{FingerprintsStringMode} = $Value;
 333 
 334   return $This;
 335 }
 336 
 337 # Set CompoundIDMode...
 338 #
 339 sub SetCompoundIDMode {
 340   my($This, $Value) = @_;
 341 
 342   if ($Value !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) {
 343     croak "Error: ${ClassName}->SetFingerprintsStringMode: CompoundIDMode value, $Value, is not valid; Supported values: DataField, MolName, LabelPrefix, or MolNameOrLabelPrefix...";
 344   }
 345 
 346   $This->{CompoundIDMode} = $Value;
 347 
 348   return $This;
 349 }
 350 
 351 # Set DetailLevel...
 352 #
 353 sub SetDetailLevel {
 354   my($This, $Value) = @_;
 355 
 356   if (!TextUtil::IsPositiveInteger($Value)) {
 357     croak "Error: ${ClassName}->SetDetailLevel: DetailLevel value, $Value, is not valid; Supported values: > 0...";
 358   }
 359 
 360   $This->{DetailLevel} = $Value;
 361 
 362   return $This;
 363 }
 364 
 365 # Set BitStringFormat...
 366 #
 367 sub SetBitStringFormat {
 368   my($This, $Value) = @_;
 369 
 370   if ($Value !~ /^(BinaryString|HexadecimalString)$/i) {
 371     croak "Error: ${ClassName}->SetBitStringFormat: BitStringFormat value, $Value, is not valid; Supported values: BinaryString or HexadecimalString...";
 372   }
 373 
 374   $This->{BitStringFormat} = $Value;
 375 
 376   return $This;
 377 }
 378 
 379 # Set BitsOrder...
 380 #
 381 sub SetBitsOrder {
 382   my($This, $Value) = @_;
 383 
 384   # Ascending - First bit in each byte as the lowest bit
 385   # Descending - First bit in each byte as the highest bit
 386   #
 387   if ($Value !~ /^(Ascending|Descending)$/i) {
 388     croak "Error: ${ClassName}->SetBitsOrder: FingerprintsStringMode value, $Value, is not valid; Supported values: Ascending or Descending...";
 389   }
 390 
 391   $This->{BitsOrder} = $Value;
 392 
 393   return $This;
 394 }
 395 
 396 # Set VectorStringFormat...
 397 #
 398 sub SetVectorStringFormat {
 399   my($This, $Value) = @_;
 400 
 401   # Possible values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, ValuesString
 402 
 403   if ($Value !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString|ValuesString)$/i) {
 404     croak "Error: ${ClassName}->SetVectorStringFormat: FingerprintsStringMode value, $Value, is not valid; Supported values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString, ValuesAndIDsPairsString, or ValuesString...";
 405   }
 406 
 407   $This->{VectorStringFormat} = $Value;
 408 
 409   return $This;
 410 }
 411 
 412 # Get compound string for current compound with optional removal of fingerprints data..
 413 #
 414 sub GetCompoundString {
 415   my($This, $RemoveFingerprintsData) = @_;
 416 
 417   $RemoveFingerprintsData = defined $RemoveFingerprintsData ? $RemoveFingerprintsData : 0;
 418 
 419   if ($RemoveFingerprintsData && $This->_IsFingerprintsDataPresentInCompoundString()) {
 420     return SDFileUtil::RemoveCmpdDataHeaderLabelAndValue($This->{CompoundString}, $This->{FingerprintsFieldLabel});
 421   }
 422 
 423   return $This->{CompoundString};
 424 }
 425 
 426 # Set compound string for current compound..
 427 #
 428 sub SetCompoundString {
 429   my($This, $CompoundString) = @_;
 430 
 431   $This->{CompoundString} = $CompoundString;
 432 
 433   return $This;
 434 }
 435 
 436 # Get fingerprints object for current compound using fingerprints, fingerprints bit-vector
 437 # fingerprints vector object. Fingerprints object correspond to any of supported fingerprints
 438 # objects such as PathLengthFingerprints, ExtendedConnectivity, and so on.
 439 #
 440 sub GetFingerprints {
 441   my($This) = @_;
 442 
 443   return $This->{FingerprintsObject};
 444 }
 445 
 446 # Set fingerprints object for current compound...
 447 #
 448 sub SetFingerprints {
 449   my($This, $FingerprintsObject) = @_;
 450 
 451   $This->{FingerprintsObject} = $FingerprintsObject;
 452 
 453   return $This;
 454 }
 455 
 456 # Get fingerprints string  for current compound...
 457 #
 458 sub GetFingerprintsString {
 459   my($This) = @_;
 460 
 461   return $This->{FingerprintsString} ? $This->{FingerprintsString} : 'None';
 462 }
 463 
 464 # Set fingerprints string for current compound...
 465 #
 466 sub SetFingerprintsString {
 467   my($This, $FingerprintsString) = @_;
 468 
 469   $This->{FingerprintsString} = $FingerprintsString;
 470 
 471   return $This;
 472 }
 473 
 474 # Does fingerprints SD file contain valid data?
 475 #
 476 sub IsFingerprintsFileDataValid {
 477   my($This) = @_;
 478 
 479   return $This->{ValidFileData} ? 1 : 0;
 480 }
 481 
 482 # Does current compound contains valid fingerprints object data?
 483 #
 484 sub IsFingerprintsDataValid {
 485   my($This) = @_;
 486 
 487   return defined $This->{FingerprintsObject} ? 1 : 0;
 488 }
 489 
 490 # Read next available compound data string,  process it and generate appropriate fingerprints
 491 # objects...
 492 #
 493 sub Read {
 494   my($This) = @_;
 495 
 496   # Read compound data string...
 497   if (!$This->_ReadCompoundDataString()) {
 498     return undef;
 499   }
 500 
 501   # No need to process invalid SD file with invalid data...
 502   if (!$This->{ValidFileData}) {
 503     if ($This->{ValidateData}) {
 504       $This->{NumOfCmpdsWithMissingData} += 1;
 505     }
 506     return $This;
 507   }
 508 
 509   # Perform data validation...
 510   if ($This->{ValidateData}) {
 511     if (!$This->_ValidateReadCompoundDataString()) {
 512       return $This;
 513     }
 514   }
 515 
 516   # Setup fingerprints string after checking again to handle problematic data for
 517   # non-validated compound string data...
 518   #
 519   my($FingerprintsFieldLabel);
 520   $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel};
 521   if (exists $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}) {
 522     $This->{FingerprintsString} = $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel};
 523   }
 524 
 525   # Generate fingeprints object...
 526   $This->_GenerateFingerprintsObject();
 527 
 528   # Setup fingerprints compound ID for fingerprints string...
 529   $This->_GenerateCompoundID();
 530 
 531   return $This;
 532 }
 533 
 534 # Read next available compound data string,  process it and generate appropriate fingerprints
 535 # objects...
 536 #
 537 sub Next {
 538   my($This) = @_;
 539 
 540   return $This->Read();
 541 }
 542 
 543 # Read compound data string...
 544 #
 545 sub _ReadCompoundDataString {
 546   my($This) = @_;
 547   my(@CmpdLines);
 548 
 549   if ($This->{FirstCompoundDataIO}) {
 550     $This->_ProcessFirstCompoundDataStringRead();
 551   }
 552 
 553   # Initialize data for current compound data string...
 554   $This->_InitializeReadCompoundDataString();
 555 
 556   # Get next compound data line...
 557   $This->{CompoundString} = SDFileUtil::ReadCmpdString($This->{FileHandle});
 558   if (!$This->{CompoundString}) {
 559     return 0;
 560   }
 561 
 562   $This->{CompoundNum} += 1;
 563 
 564   # Set up data field labels and values...
 565   @CmpdLines = split "\n", $This->{CompoundString};
 566   %{$This->{DataFieldLabelsAndValues}} = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
 567 
 568   return 1;
 569 }
 570 
 571 # Initialize compound data string for reading...
 572 #
 573 sub _InitializeReadCompoundDataString {
 574   my($This) = @_;
 575 
 576   $This->{CompoundID} = undef;
 577   $This->{CompoundString} = undef;
 578 
 579   %{$This->{DataFieldLabelsAndValues}} = ();
 580 
 581   $This->{FingerprintsObject} = undef;
 582   $This->{FingerprintsString} = undef;
 583 
 584   return $This;
 585 }
 586 
 587 # Validate compound data string containing fingerprints data...
 588 #
 589 sub _ValidateReadCompoundDataString {
 590   my($This) = @_;
 591   my($FingerprintsFieldLabel);
 592 
 593   $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel};
 594 
 595   # Check for missing data...
 596   if (!exists $This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel}) {
 597     # Missing data...
 598     $This->{NumOfCmpdsWithMissingData} += 1;
 599     if ($This->{DetailLevel} >= 3) {
 600       carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains no fingerprints data: $This->{CompoundString}...";
 601     }
 602     elsif ($This->{DetailLevel} >= 2) {
 603       carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains no fingerprints data...";
 604     }
 605     return 0;
 606   }
 607 
 608   # Check for invalid data...
 609   my($InvalidFingerprintsData, $FingerprintsType, $FingerprintsDescription);
 610 
 611   $InvalidFingerprintsData = 0;
 612 
 613   if (Fingerprints::FingerprintsStringUtil::AreFingerprintsStringValuesValid($This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel})) {
 614     ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{DataFieldLabelsAndValues}{$FingerprintsFieldLabel});
 615     if (defined($FingerprintsType) && defined($FingerprintsDescription)) {
 616       if ($This->{FirstFingerprintsStringType} !~ /^$FingerprintsType$/i || $This->{FirstFingerprintsStringDescription} !~ /^$FingerprintsDescription$/i) {
 617         $InvalidFingerprintsData = 1;
 618       }
 619     }
 620     else {
 621       $InvalidFingerprintsData = 1;
 622     }
 623   }
 624   else {
 625     $InvalidFingerprintsData = 1;
 626   }
 627 
 628   if ($InvalidFingerprintsData) {
 629     $This->{NumOfCmpdsWithInvalidData} += 1;
 630     if ($This->{DetailLevel} >= 3) {
 631       carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains invalid fingerprints data: $This->{DataLine}...";
 632     }
 633     elsif ($This->{DetailLevel} >= 2) {
 634       carp "Warning: ${ClassName}->_ValidateReadCompoundDataString: Compound number $This->{CompoundNum} contains invalid fingerprints data...";
 635     }
 636     return 0;
 637   }
 638 
 639   return 1;
 640 }
 641 
 642 # Setup fingerprints compound ID for fingerprints string...
 643 sub _GenerateCompoundID {
 644   my($This) = @_;
 645   my($CompoundID, $MolName);
 646 
 647   $CompoundID = '';
 648 
 649   if ($This->{CompoundIDMode} =~ /^LabelPrefix$/i) {
 650     $CompoundID = "$This->{CompoundIDPrefix}$This->{CompoundNum}";
 651   }
 652   elsif ($This->{CompoundIDMode} =~ /^DataField$/i) {
 653     my($SpecifiedDataField);
 654     $SpecifiedDataField = $This->{CompoundIDFieldLabel};
 655     $CompoundID = exists $This->{DataFieldLabelsAndValues}{$SpecifiedDataField} ?  $This->{DataFieldLabelsAndValues}{$SpecifiedDataField} : '';
 656   }
 657   elsif ($This->{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) {
 658     ($MolName) = split "\n", $This->{CompoundString};
 659     $CompoundID = TextUtil::IsNotEmpty($MolName) ? $MolName : "$This->{CompoundIDPrefix}$This->{CompoundNum}";
 660   }
 661   elsif ($This->{CompoundIDMode} =~ /^MolName$/i) {
 662     ($MolName) = split "\n", $This->{CompoundString};
 663     $CompoundID = $MolName;
 664   }
 665 
 666   $This->{CompoundID} = $CompoundID;
 667 
 668   return $This;
 669 }
 670 
 671 # Process first compound data string read...
 672 #
 673 sub _ProcessFirstCompoundDataStringRead {
 674   my($This) = @_;
 675   my($Line, $FileHandle);
 676 
 677   $This->{FirstCompoundDataIO} = 0;
 678 
 679   return $This;
 680 }
 681 
 682 # Get ready for reading fingerprints SD file...
 683 #
 684 sub _PrepareForReadingFingerprintsSDFileData {
 685   my($This) = @_;
 686 
 687   # Retrieve SD file data fields information....
 688   $This->_RetrieveSDFileDataFields();
 689 
 690   # Validate compound and fingerprints field information...
 691   $This->_ValidateReadCompoundIDField();
 692   $This->_ValidateReadFingerprintsField();
 693 
 694   # Validate fingeprints string mode information...
 695   if ($This->{ValidFingerprintsField}) {
 696     $This->_ValidateReadFingerprintsStringMode();
 697   }
 698 
 699   # Set status of SD file data...
 700   $This->{ValidFileData} = ($This->{ValidCompoundIDField} && $This->{ValidFingerprintsField} && $This->{ValidFingerprintsStringMode}) ? 1 : 0;
 701 
 702   return $This;
 703 }
 704 
 705 # Retrieve information data fields and fingerprints string...
 706 #
 707 sub _RetrieveSDFileDataFields {
 708   my($This) = @_;
 709   my($SDFile, $CmpdString, @CmpdLines);
 710 
 711   $SDFile = $This->{Name};
 712 
 713   if (!(-e $SDFile)) {
 714     croak "Error: ${ClassName}->New: Object can't be instantiated: File, $SDFile, doesn't exist...";
 715   }
 716 
 717   if (!open SDFILE, "$SDFile") {
 718     croak "Error: ${ClassName}->New: Object can't be instantiated: Couldn't open input SD file $SDFile: $! ...";
 719   }
 720   $CmpdString = SDFileUtil::ReadCmpdString(\*SDFILE);
 721   close SDFILE;
 722 
 723   # Set up data field labels and values for first compound string data...
 724   @CmpdLines = split "\n", $CmpdString;
 725 
 726   %{$This->{FirstDataFieldLabelsAndValues}} = ();
 727   %{$This->{FirstDataFieldLabelsAndValues}} = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
 728 
 729   return $This;
 730 }
 731 
 732 # Validate compound ID field information...
 733 #
 734 sub _ValidateReadCompoundIDField {
 735   my($This) = @_;
 736   my($SpecifiedDataField);
 737 
 738   $This->{ValidCompoundIDField} = 0;
 739 
 740   if ($This->{CompoundIDMode} =~ /^DataField$/i) {
 741     $SpecifiedDataField = $This->{CompoundIDFieldLabel};
 742     if (! exists $This->{FirstDataFieldLabelsAndValues}{$SpecifiedDataField}) {
 743       carp "Warning: ${ClassName}->_ValidateReadCompoundIDField: Compound ID data field, $SpecifiedDataField, specified using \"CompoundIDField\" in \"DataField\" \"CompoundIDMode\" doesn't exist...";
 744       return 0;
 745     }
 746   }
 747 
 748   $This->{ValidCompoundIDField} = 1;
 749 
 750   return 1;
 751 }
 752 
 753 # Validate fingerprints string field information...
 754 #
 755 sub _ValidateReadFingerprintsField {
 756   my($This) = @_;
 757   my($FingerprintsFieldLabel);
 758 
 759   $This->{ValidFingerprintsField} = 0;
 760 
 761   $FingerprintsFieldLabel = '';
 762 
 763   if ($This->{FingerprintsFieldLabel} !~ /^AutoDetect$/i) {
 764     $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel};
 765     if (! exists $This->{FirstDataFieldLabelsAndValues}{$FingerprintsFieldLabel}) {
 766       carp "Warning: ${ClassName}->_ValidateReadFingerprintsField: Fingerprints data field value, $FingerprintsFieldLabel, specified using \"FingerprintsFieldLabel\" doesn't exist...";
 767       return 0;
 768     }
 769   }
 770   else {
 771     # Make sure default fingerprints field does exist...
 772     my($FingerprintsFieldFound, $DataFieldLabel);
 773     $FingerprintsFieldFound = 0;
 774 
 775     DATAFIELDLABEL: for $DataFieldLabel (keys %{$This->{FirstDataFieldLabelsAndValues}}) {
 776       if ($DataFieldLabel =~ /Fingerprints/i) {
 777         $FingerprintsFieldFound = 1;
 778         $FingerprintsFieldLabel = $DataFieldLabel;
 779         last DATAFIELDLABEL;
 780       }
 781     }
 782     if (!$FingerprintsFieldFound) {
 783       carp "Warning: ${ClassName}->_ValidateReadFingerprintsField: Data field label containing \"Fingerprints\" string in its name doesn't exist...";
 784       return 0;
 785     }
 786   }
 787 
 788   $This->{ValidFingerprintsField} = 1;
 789   $This->{FingerprintsFieldLabel} = $FingerprintsFieldLabel;
 790 
 791   return 1;
 792 }
 793 
 794 # Validate fingerprints string mode information...
 795 #
 796 sub _ValidateReadFingerprintsStringMode {
 797   my($This) = @_;
 798   my($FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FirstFingerprintsStringType, $FirstFingerprintsStringDescription, $FingerprintsFieldLabel, $FingerprintsType, $FingerprintsDescription);
 799 
 800   $This->{ValidFingerprintsStringMode} = 0;
 801 
 802   $This->{FingerprintsBitVectorStringMode} = 0;
 803   $This->{FingerprintsVectorStringMode} = 0;
 804 
 805   $This->{FirstFingerprintsStringType} = '';
 806   $This->{FirstFingerprintsStringDescription} = '';
 807 
 808   $FingerprintsBitVectorStringMode = 0;
 809   $FingerprintsVectorStringMode = 0;
 810 
 811   $FirstFingerprintsStringType = '';
 812   $FirstFingerprintsStringDescription = '';
 813 
 814   $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel};
 815 
 816   ($FingerprintsType, $FingerprintsDescription) = Fingerprints::FingerprintsStringUtil::GetFingerprintsStringTypeAndDescription($This->{FirstDataFieldLabelsAndValues}{$FingerprintsFieldLabel});
 817 
 818   if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) {
 819     if ($FingerprintsType !~ /^FingerprintsBitVector$/i) {
 820       carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsBitVectorString, specified using \"FingerprintsStringMode\"...";
 821       return 0;
 822     }
 823     $FingerprintsBitVectorStringMode = 1;
 824     $FirstFingerprintsStringType = 'FingerprintsBitVector';
 825     $FirstFingerprintsStringDescription = $FingerprintsDescription;
 826   }
 827   elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) {
 828     if ($FingerprintsType !~ /^FingerprintsVector$/i) {
 829       carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, doesn't match value, FingerprintsVectorString, specified using \"FingerprintsStringMode\"...";
 830       return 0;
 831     }
 832     $FingerprintsVectorStringMode = 1;
 833     $FirstFingerprintsStringType = 'FingerprintsVector';
 834     $FirstFingerprintsStringDescription = $FingerprintsDescription;
 835   }
 836   else {
 837     # AutoDetect mode...
 838     if ($FingerprintsType =~ /^FingerprintsBitVector$/i) {
 839       $FingerprintsBitVectorStringMode = 1;
 840     }
 841     elsif ($FingerprintsType =~ /^FingerprintsVector$/i) {
 842       $FingerprintsVectorStringMode = 1;
 843     }
 844     else {
 845       carp "Warning: ${ClassName}->_ValidateReadFingerprintsStringMode: First fingerprint string data type, $FingerprintsType, identified during, AutoDetect, value of \"FingerprintsStringMode\" is not valid; Supported fingerprints types: FingerprintBitVector or FingerprintsVector...";
 846       return 0;
 847     }
 848     $FirstFingerprintsStringType = $FingerprintsType;
 849     $FirstFingerprintsStringDescription = $FingerprintsDescription;
 850   }
 851 
 852   $This->{ValidFingerprintsStringMode} = 1;
 853 
 854   $This->{FingerprintsBitVectorStringMode} = $FingerprintsBitVectorStringMode;
 855   $This->{FingerprintsVectorStringMode} = $FingerprintsVectorStringMode;
 856 
 857   $This->{FirstFingerprintsStringType} = $FirstFingerprintsStringType;
 858   $This->{FirstFingerprintsStringDescription} = $FirstFingerprintsStringDescription;
 859 
 860   return 1;
 861 }
 862 
 863 # Write fingerprints string generated from specified fingerprints, fingerprints-bit vector, or
 864 # fingerprints vector object and other data to SD file...
 865 #
 866 sub WriteFingerprints {
 867   my($This, $FingerprintsObject, $CompoundString) = @_;
 868 
 869   # Initialize data for current compound...
 870   $This->_InitializeWriteCompoundDataString();
 871 
 872   # Set fingerprints object...
 873   $This->{FingerprintsObject} = $FingerprintsObject;
 874 
 875   # Generate fingerprints string...
 876   $This->_GenerateFingerprintsString();
 877 
 878   # Set and update compound string...
 879   $This->{CompoundString} = $CompoundString;
 880   $This->_AddFingerprintsDataToCompoundString();
 881 
 882   # Write it out...
 883   $This->_WriteCompoundDataString();
 884 
 885   return $This;
 886 }
 887 
 888 # Write fingerprints string and other data to SD file...
 889 #
 890 # Note:
 891 #   o FingerprintsStringMode, BitStringFormat, BitsOrder, VectorStringFormat values
 892 #     are ignored during writing of fingerprints and it's written to the file as it is.
 893 #   o CompoundString contains no fingerprints data
 894 #
 895 sub WriteFingerprintsString {
 896   my($This, $FingerprintsString, $CompoundString) = @_;
 897 
 898   # Initialize data for current compound...
 899   $This->_InitializeWriteCompoundDataString();
 900 
 901   # Set fingerprints string...
 902   $This->{FingerprintsString} = $FingerprintsString;
 903 
 904   # Generate fingerprints object...
 905   $This->_GenerateFingerprintsObject();
 906 
 907   # Set and update compound string...
 908   $This->{CompoundString} = $CompoundString;
 909   $This->_AddFingerprintsDataToCompoundString();
 910 
 911   # Write it out...
 912   $This->_WriteCompoundDataString();
 913 
 914   return $This;
 915 }
 916 
 917 # Initialize compound data string for writing...
 918 #
 919 sub _InitializeWriteCompoundDataString {
 920   my($This) = @_;
 921 
 922   $This->{CompoundString} = undef;
 923 
 924   $This->{FingerprintsObject} = undef;
 925   $This->{FingerprintsString} = undef;
 926 
 927   return $This;
 928 }
 929 
 930 # Writi compound data string...
 931 #
 932 sub _WriteCompoundDataString {
 933   my($This) = @_;
 934   my($FileHandle);
 935 
 936   if ($This->{FirstCompoundDataIO}) {
 937     $This->_ProcessFirstCompoundDataStringWrite();
 938   }
 939 
 940   $This->{CompoundNum} += 1;
 941   $FileHandle = $This->{FileHandle};
 942 
 943   print $FileHandle "$This->{CompoundString}\n";
 944 
 945   return $This;
 946 }
 947 
 948 # Process first compound data string write...
 949 #
 950 sub _ProcessFirstCompoundDataStringWrite {
 951   my($This) = @_;
 952   my($Line, $FileHandle);
 953 
 954   $This->{FirstCompoundDataIO} = 0;
 955 
 956   return $This;
 957 }
 958 
 959 # Get ready for writing fingerprints SD file...
 960 #
 961 sub _PrepareForWritingFingerprintsSDFileData {
 962   my($This) = @_;
 963   my($SDFile);
 964 
 965   $SDFile = $This->{Name};
 966   if (!$This->{Overwrite}) {
 967     if (-e $SDFile) {
 968       croak "Error: ${ClassName}->New: Object can't be instantiated: File, $SDFile, already exist. Use overwrite option...";
 969     }
 970   }
 971 
 972   # Setup FingerprintsStringMode status...
 973 
 974   $This->{FingerprintsBitVectorStringMode} = 0;
 975   $This->{FingerprintsVectorStringMode} = 0;
 976   $This->{ValidFingerprintsStringMode} = 0;
 977 
 978   if ($This->{FingerprintsStringMode} =~ /^FingerprintsBitVectorString$/i) {
 979     $This->{FingerprintsBitVectorStringMode} = 1;
 980   }
 981   elsif ($This->{FingerprintsStringMode} =~ /^FingerprintsVectorString$/i) {
 982     $This->{FingerprintsVectorStringMode} = 1;
 983   }
 984 
 985   $This->{ValidFingerprintsStringMode} = ($This->{FingerprintsBitVectorStringMode} || $This->{FingerprintsVectorStringMode}) ? 1 : 0;
 986 
 987   if ($This->{FingerprintsBitVectorStringMode}) {
 988     $This->_SetDefaultBitStringFormat();
 989     $This->_SetDefaultBitsOrder();
 990   }
 991   elsif ($This->{FingerprintsVectorStringMode}) {
 992     $This->_SetDefaultVectorStringFormat();
 993   }
 994 
 995   return $This;
 996 }
 997 
 998 # Set default value for bit string format...
 999 #
1000 sub _SetDefaultBitStringFormat {
1001   my($This) = @_;
1002 
1003   if (!$This->{BitStringFormat}) {
1004     $This->{BitStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultBitStringFormat();
1005   }
1006 
1007   return $This;
1008 }
1009 
1010 # Set default value for bit string format...
1011 #
1012 sub _SetDefaultBitsOrder {
1013   my($This) = @_;
1014 
1015   if (!$This->{BitsOrder}) {
1016     $This->{BitsOrder} = Fingerprints::FingerprintsStringUtil::GetDefaultBitsOrder();
1017   }
1018 
1019   return $This;
1020 }
1021 
1022 # Set default value for vector string format...
1023 #
1024 sub _SetDefaultVectorStringFormat {
1025   my($This) = @_;
1026 
1027   if (!$This->{VectorStringFormat} && $This->{FingerprintsObject}) {
1028     $This->{VectorStringFormat} = Fingerprints::FingerprintsStringUtil::GetDefaultVectorStringFormat($This->{FingerprintsObject});
1029   }
1030 
1031   return $This;
1032 }
1033 
1034 # Add fingerprints data to compound string...
1035 #
1036 sub _AddFingerprintsDataToCompoundString {
1037   my($This) = @_;
1038   my($CmpdString);
1039 
1040   # Check and remove existing fingerprints data...
1041   if ($This->_IsFingerprintsDataPresentInCompoundString()) {
1042     carp "Warning: ${ClassName}->_AddFingerprintsDataToCompoundString: The compound string already contains fingerprints data corresponding to fingerprints field label $This->{FingerprintsFieldLabel}; It has been replaced with new fingerprints data...";
1043     $This->{CompoundString} = SDFileUtil::RemoveCmpdDataHeaderLabelAndValue($This->{CompoundString}, $This->{FingerprintsFieldLabel});
1044   }
1045 
1046   $CmpdString = $This->{CompoundString};
1047 
1048   $CmpdString =~ s/\$\$\$\$$//;
1049 
1050   $This->{CompoundString} = "${CmpdString}>  <$This->{FingerprintsFieldLabel}>\n$This->{FingerprintsString}\n\n\$\$\$\$";
1051 
1052   return $This;
1053 }
1054 
1055 # Is fingerprints data already present in compound string?
1056 #
1057 sub _IsFingerprintsDataPresentInCompoundString {
1058   my($This) = @_;
1059   my($FingerprintsFieldLabel);
1060 
1061   if (TextUtil::IsEmpty($This->{CompoundString}) || TextUtil::IsEmpty($This->{FingerprintsFieldLabel})) {
1062     return 0;
1063   }
1064 
1065   $FingerprintsFieldLabel = $This->{FingerprintsFieldLabel};
1066 
1067   return ($This->{CompoundString} =~ /<$FingerprintsFieldLabel>/) ? 1 : 0;
1068 }
1069 
1070 # Generate fingerprints object using current fingerprints string...
1071 #
1072 sub _GenerateFingerprintsObject {
1073   my($This) = @_;
1074 
1075   $This->{FingerprintsObject} = undef;
1076 
1077   if (!$This->{FingerprintsString}) {
1078     return $This;
1079   }
1080 
1081   if ($This->{FingerprintsBitVectorStringMode}) {
1082     $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsBitVectorString($This->{FingerprintsString});
1083   }
1084   elsif ($This->{FingerprintsVectorStringMode}) {
1085     $This->{FingerprintsObject} = Fingerprints::FingerprintsStringUtil::ParseFingerprintsVectorString($This->{FingerprintsString});
1086   }
1087   else {
1088     return undef;
1089   }
1090 
1091   return $This;
1092 }
1093 
1094 # Generate fingerprints string using current fingerprints object...
1095 #
1096 sub _GenerateFingerprintsString {
1097   my($This) = @_;
1098 
1099   $This->{FingerprintsString} = '';
1100 
1101   if (!$This->{FingerprintsObject}) {
1102     return $This;
1103   }
1104 
1105   if ($This->{FingerprintsBitVectorStringMode}) {
1106     $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{BitStringFormat}, $This->{BitsOrder});
1107   }
1108   elsif ($This->{FingerprintsVectorStringMode}) {
1109     $This->{FingerprintsString} = Fingerprints::FingerprintsStringUtil::GenerateFingerprintsString($This->{FingerprintsObject}, $This->{VectorStringFormat});
1110   }
1111 
1112   return $This;
1113 }
1114 
1115 # Is it a fingerprints file?
1116 sub IsFingerprintsSDFile ($;$) {
1117   my($FirstParameter, $SecondParameter) = @_;
1118   my($This, $FileName, $Status);
1119 
1120   if ((@_ == 2) && (_IsFingerprintsSDFileIO($FirstParameter))) {
1121     ($This, $FileName) = ($FirstParameter, $SecondParameter);
1122   }
1123   else {
1124     $FileName = $FirstParameter;
1125   }
1126 
1127   # Check file extension...
1128   $Status = FileUtil::CheckFileType($FileName, "sdf sd");
1129 
1130   return $Status;
1131 }
1132 
1133 # Is it a FingerprintsSDFileIO object?
1134 sub _IsFingerprintsSDFileIO {
1135   my($Object) = @_;
1136 
1137   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
1138 }
1139