MayaChemTools

   1 package Fingerprints::PathLengthFingerprints;
   2 #
   3 # File: PathLengthFingerprints.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2025 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use TextUtil ();
  30 use MathUtil ();
  31 use Fingerprints::Fingerprints;
  32 use Molecule;
  33 use AtomTypes::AtomicInvariantsAtomTypes;
  34 use AtomTypes::DREIDINGAtomTypes;
  35 use AtomTypes::EStateAtomTypes;
  36 use AtomTypes::FunctionalClassAtomTypes;
  37 use AtomTypes::MMFF94AtomTypes;
  38 use AtomTypes::SLogPAtomTypes;
  39 use AtomTypes::SYBYLAtomTypes;
  40 use AtomTypes::TPSAAtomTypes;
  41 use AtomTypes::UFFAtomTypes;
  42 
  43 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  44 
  45 @ISA = qw(Fingerprints::Fingerprints Exporter);
  46 @EXPORT = qw();
  47 @EXPORT_OK = qw();
  48 
  49 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  50 
  51 # Setup class variables...
  52 my($ClassName);
  53 _InitializeClass();
  54 
  55 # Overload Perl functions...
  56 use overload '""' => 'StringifyPathLengthFingerprints';
  57 
  58 # Class constructor...
  59 sub new {
  60   my($Class, %NamesAndValues) = @_;
  61 
  62   # Initialize object...
  63   my $This = $Class->SUPER::new();
  64   bless $This, ref($Class) || $Class;
  65   $This->_InitializePathLengthFingerprints();
  66 
  67   $This->_InitializePathLengthFingerprintsProperties(%NamesAndValues);
  68 
  69   return $This;
  70 }
  71 
  72 # Initialize object data...
  73 #
  74 sub _InitializePathLengthFingerprints {
  75   my($This) = @_;
  76 
  77   # Type of fingerprint to generate...
  78   #
  79   # PathLengthBits - A bit vector indicating presence/absence of atom paths
  80   # PathLengthCount - A vector containing count of atom paths
  81   #
  82   $This->{Type} = '';
  83 
  84   # Type of vector: FingerprintsBitVector or FingerprintsVector
  85   $This->{VectorType} = '';
  86 
  87   # Set default mininum, maximum, and default size. Although any arbitrary size can
  88   # be specified, bit vector used to store bits work on a vector size which is
  89   # power of 2 and additonal bits are automatically added and cleared.
  90   #
  91   $This->{Size} = 1024;
  92 
  93   $This->{MinSize} = 32;
  94   $This->{MaxSize} = 2**32;
  95 
  96   # Minimum and maximum path lengths to use for fingerprints generation...
  97   $This->{MinLength} = 1;
  98   $This->{MaxLength} = 8;
  99 
 100   # Numner of bits to set for each atom path for FingerprintsBitVector...
 101   $This->{NumOfBitsToSetPerPath} = 1;
 102 
 103   # Atom identifier type to use for path atoms during fingerprints generation...
 104   #
 105   # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes,
 106   # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
 107   # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
 108   #
 109   $This->{AtomIdentifierType} = '';
 110 
 111   # Atom types assigned to atoms...
 112   %{$This->{AssignedAtomTypes}} = ();
 113 
 114   # For molecules containing rings, atom paths starting from each atom can be traversed in four
 115   # different ways:
 116   #
 117   # . Atom paths without any rings and sharing of bonds in traversed paths.
 118   # . Atom paths containing rings and without any sharing of bonds in traversed paths
 119   # . All possible atom paths without any rings and sharing of bonds in traversed paths
 120   # . All possible atom paths containing rings and with sharing of bonds in traversed paths.
 121   #
 122   # Atom path traversal is terminated at the last ring atom. For molecules containing no rings,
 123   # first two and last two types described above are equivalent.
 124   #
 125   # AllowSharedBonds and AllowRings variables allow generation of differen types of paths
 126   # to be used for fingerprints generation.
 127   #
 128   # In addition to atom symbols, bond symbols are also used to generate a string
 129   # for atom paths. These atom paths strings are hased to a 32 bit integer key which
 130   # in turn is used as a seed for a random number generation in range of 1 to fingerprint
 131   # size for setting corresponding bit in bit vector.
 132   #
 133   # UseBondSymbols variable allow generation of atom path strings and consequently fingerprints.
 134   #
 135   # Combination of AllowSharedBonds, AllowRings, and UseBondSymbols allow generation of
 136   # 8 different types of path length fingerprints:
 137   #
 138   # AllowSharedBonds    AllowRings    UseBondSymbols    PathLengthFingerprintsType
 139   #
 140   # No                  No            Yes                AtomPathsNoCyclesWithBondSymbols
 141   # No                  Yes           Yes                AtomPathsWithCyclesWithBondSymbols
 142   #
 143   # Yes                 No            Yes                AllAtomPathsNoCyclesWithBondSymbols
 144   # Yes                 Yes           Yes                AllAtomPathsWithCyclesWithBondSymbols [ DEFAULT ]
 145   #
 146   # No                  No            No                 AtomPathsNoCyclesNoBondSymbols
 147   # No                  Yes           No                 AtomPathsWithCyclesNoBondSymbols
 148   #
 149   # Yes                 No            No                 AllAtomPathsNoCyclesNoBondSymbols
 150   # Yes                 Yes           No                 AllAtomPathsWithCyclesNoWithBondSymbols
 151   #
 152   #
 153 
 154   # By default, atom paths starting from atoms are allowed to share bonds already traversed...
 155   $This->{AllowSharedBonds} = 1;
 156 
 157   # By default rings are included in paths...
 158   $This->{AllowRings} = 1;
 159 
 160   # By default bond symbols are included in atom path strings...
 161   $This->{UseBondSymbols} = 1;
 162 
 163   # By default only structurally unique atom paths are used for generation
 164   # atom path strings...
 165   $This->{UseUniquePaths} = 1;
 166 
 167   # Random number generator to use during generation of fingerprints bit-vector
 168   # string: Perl CORE::rand or MayaChemTools MathUtil::random function.
 169   #
 170   # The random number generator implemented in MayaChemTools is a variant of
 171   # linear congruential generator (LCG) as described by Miller et al. [ Ref 120 ].
 172   # It is also referred to as Lehmer random number generator or Park-Miller
 173   # random number generator.
 174   #
 175   # Unlike Perl's core random number generator function rand, the random number
 176   # generator implemented in MayaChemTools, MathUtil::random,  generates consistent
 177   # random values across different platformsfor a specific random seed and leads
 178   # to generation of portable fingerprints bit-vector strings.
 179   #
 180   $This->{UsePerlCoreRandom} = 1;
 181 
 182   # Bond symbols to use during generation of atom path strings...
 183   %{$This->{BondOrderToSymbol}} = ();
 184   %{$This->{BondOrderToSymbol}} = ('1' => '', '1.5' => ':', '2' => '=', '3' => '#');
 185 
 186   # BondSymbols map to use for bonded atom IDs to use during atom path strings...
 187   %{$This->{BondSymbols}} = ();
 188 
 189   # Path atom IDs to remove duplicate paths...
 190   %{$This->{UniqueLinearAtomPathsIDs}} = ();
 191   %{$This->{UniqueCyclicAtomPathsIDs}} = ();
 192 
 193   # Reference to all the atom paths upto specified path length...
 194   $This->{AtomPathsRef} = '';
 195 
 196   # Atom paths strings created using specified atom types and bond symbols...
 197   %{$This->{AtomPathsStrings}} = ();
 198 }
 199 
 200 # Initialize class ...
 201 sub _InitializeClass {
 202   #Class name...
 203   $ClassName = __PACKAGE__;
 204 }
 205 
 206 # Initialize object properties....
 207 sub _InitializePathLengthFingerprintsProperties {
 208   my($This, %NamesAndValues) = @_;
 209 
 210   my($Name, $Value, $MethodName);
 211   while (($Name, $Value) = each  %NamesAndValues) {
 212     $MethodName = "Set${Name}";
 213     $This->$MethodName($Value);
 214   }
 215 
 216   # Make sure molecule object was specified...
 217   if (!exists $NamesAndValues{Molecule}) {
 218     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 219   }
 220 
 221   if (!exists $NamesAndValues{Type}) {
 222     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying Type...";
 223   }
 224 
 225   if (!exists $NamesAndValues{AtomIdentifierType}) {
 226     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType...";
 227   }
 228 
 229   # Make sure it's power of 2...
 230   if (exists $NamesAndValues{Size}) {
 231     if (!TextUtil::IsNumberPowerOfNumber($NamesAndValues{Size}, 2)) {
 232       croak "Error: ${ClassName}->New: Specified size value, $NamesAndValues{Size}, must be power of 2...";
 233     }
 234   }
 235 
 236   if ($This->{Type} =~ /^PathLengthBits$/i) {
 237     $This->_InitializePathLengthBits();
 238   }
 239   elsif ($This->{Type} =~ /^PathLengthCount$/i) {
 240     $This->_InitializePathLengthCount();
 241   }
 242   else {
 243     croak "Error: ${ClassName}->_InitializePathLengthFingerprintsProperties: Unknown PathLength type: $This->{Type}; Supported PathLength type : PathLengthBits or PathLengthCount......";
 244   }
 245 
 246   return $This;
 247 }
 248 
 249 # Initialize PathLength bits...
 250 #
 251 sub _InitializePathLengthBits {
 252   my($This) = @_;
 253 
 254   # Vector type...
 255   $This->{VectorType} = 'FingerprintsBitVector';
 256 
 257   $This->_InitializeFingerprintsBitVector();
 258 
 259   return $This;
 260 }
 261 
 262 # Initialize PathLength key count...
 263 #
 264 sub _InitializePathLengthCount {
 265   my($This) = @_;
 266 
 267   # Vector type and type of values...
 268   $This->{VectorType} = 'FingerprintsVector';
 269   $This->{FingerprintsVectorType} = 'NumericalValues';
 270 
 271   $This->_InitializeFingerprintsVector();
 272 
 273   return $This;
 274 }
 275 
 276 # Set type...
 277 #
 278 sub SetType {
 279   my($This, $Type) = @_;
 280 
 281   if ($This->{Type}) {
 282     croak "Error: ${ClassName}->SetType: Can't change type:  It's already set...";
 283   }
 284 
 285   if ($Type =~ /^PathLengthBits$/i) {
 286     $This->{Type} = 'PathLengthBits';;
 287   }
 288   elsif ($Type =~ /^PathLengthCount$/i) {
 289     $This->{Type} = 'PathLengthCount';;
 290   }
 291   else {
 292     croak "Error: ${ClassName}->SetType: Unknown PathLength keys: $Type; Supported PathLength types: PathLengthBits or PathLengthCount...";
 293   }
 294   return $This;
 295 }
 296 
 297 # Disable vector type change...
 298 #
 299 sub SetVectorType {
 300   my($This, $Type) = @_;
 301 
 302   croak "Error: ${ClassName}->SetVectorType: Can't change vector type...";
 303 
 304   return $This;
 305 }
 306 
 307 # Disable vector type change...
 308 #
 309 sub SetFingerprintsVectorType {
 310   my($This, $Type) = @_;
 311 
 312   croak "Error: ${ClassName}->SetFingerprintsVectorType: Can't change fingerprints vector type...";
 313 
 314   return $This;
 315 }
 316 
 317 # Set atom identifier type to use for path length atom identifiers...
 318 #
 319 sub SetAtomIdentifierType {
 320   my($This, $IdentifierType) = @_;
 321 
 322   if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 323     croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes.";
 324   }
 325 
 326   if ($This->{AtomIdentifierType}) {
 327     croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change atom identifier type:  It's already set...";
 328   }
 329 
 330   $This->{AtomIdentifierType} = $IdentifierType;
 331 
 332   # Initialize atom identifier type information...
 333   $This->_InitializeAtomIdentifierTypeInformation();
 334 
 335   return $This;
 336 }
 337 
 338 # Set minimum path length...
 339 #
 340 sub SetMinLength {
 341   my($This, $Value) = @_;
 342 
 343   if (!TextUtil::IsPositiveInteger($Value)) {
 344     croak "Error: ${ClassName}->SetMinLength: MinLength value, $Value, is not valid:  It must be a positive integer...";
 345   }
 346   $This->{MinLength} = $Value;
 347 
 348   return $This;
 349 }
 350 
 351 # Set maximum path length...
 352 #
 353 sub SetMaxLength {
 354   my($This, $Value) = @_;
 355 
 356   if (!TextUtil::IsPositiveInteger($Value)) {
 357     croak "Error: ${ClassName}->SetMaxLength: MaxLength value, $Value, is not valid:  It must be a positive integer...";
 358   }
 359   $This->{MaxLength} = $Value;
 360 
 361   return $This;
 362 }
 363 
 364 # Set number of bits to set for each path...
 365 #
 366 sub SetNumOfBitsToSetPerPath {
 367   my($This, $Value) = @_;
 368 
 369   if (!TextUtil::IsPositiveInteger($Value)) {
 370     croak "Error: ${ClassName}->SetNumOfBitsToSetPerPath: NumOfBitsToSetPerPath value, $Value, is not valid:  It must be a positive integer...";
 371   }
 372   $This->{NumOfBitsToSetPerPath} = $Value;
 373 
 374   return $This;
 375 }
 376 
 377 # Generate fingerprints description...
 378 #
 379 sub GetDescription {
 380   my($This) = @_;
 381 
 382   # Is description explicity set?
 383   if (exists $This->{Description}) {
 384     return $This->{Description};
 385   }
 386 
 387   # Generate fingerprints description...
 388 
 389   return "$This->{Type}:$This->{AtomIdentifierType}:MinLength$This->{MinLength}:MaxLength$This->{MaxLength}";
 390 }
 391 
 392 # Generate path length fingerprints...
 393 #
 394 sub GenerateFingerprints {
 395   my($This) = @_;
 396 
 397   if ($This->{MinLength} > $This->{MaxLength}) {
 398     croak "Error: ${ClassName}->GenerateFingerprints: No fingerpritns generated: MinLength, $This->{MinLength}, must be <= MaxLength, $This->{MaxLength}...";
 399   }
 400 
 401   # Cache appropriate molecule data...
 402   $This->_SetupMoleculeDataCache();
 403 
 404   # Assign atom types to all atoms...
 405   if (!$This->_AssignAtomTypes()) {
 406     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms...";
 407     return $This;
 408   }
 409 
 410   # Setup bond symbol map...
 411   if ($This->{UseBondSymbols}) {
 412     $This->_InitializeBondSymbols();
 413   }
 414 
 415   # Generate appropriate atom paths...
 416   $This->_GenerateAtomPathsUpToMaxLength();
 417 
 418   # Initialize atom path strings...
 419   $This->_InitializeAtomPathsStrings();
 420 
 421   # Generate appropriate atom path strings for unique atom paths...
 422   $This->_GenerateAtomPathsStrings();
 423 
 424   # Set final fingerprints...
 425   $This->_SetFinalFingerprints();
 426 
 427   # Clear cached molecule data...
 428   $This->_ClearMoleculeDataCache();
 429 
 430   return $This;
 431 }
 432 
 433 # Assign appropriate atom types to all atoms...
 434 #
 435 sub _AssignAtomTypes {
 436   my($This) = @_;
 437   my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens);
 438 
 439   %{$This->{AssignedAtomTypes}} = ();
 440   $IgnoreHydrogens = 0;
 441 
 442   $SpecifiedAtomTypes = undef;
 443 
 444   IDENTIFIERTYPE: {
 445     if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 446       $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse});
 447       last IDENTIFIERTYPE;
 448     }
 449 
 450     if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
 451       $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 452       last IDENTIFIERTYPE;
 453     }
 454 
 455     if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
 456       $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 457       last IDENTIFIERTYPE;
 458     }
 459 
 460     if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 461       $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse});
 462       last IDENTIFIERTYPE;
 463     }
 464 
 465     if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
 466       $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 467       last IDENTIFIERTYPE;
 468     }
 469 
 470     if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
 471       $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 472       last IDENTIFIERTYPE;
 473     }
 474     if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
 475       $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 476       last IDENTIFIERTYPE;
 477     }
 478 
 479     if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
 480       $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0);
 481       last IDENTIFIERTYPE;
 482     }
 483 
 484     if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
 485       $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 486       last IDENTIFIERTYPE;
 487     }
 488 
 489     croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}...";
 490   }
 491 
 492   # Assign atom types...
 493   $SpecifiedAtomTypes->AssignAtomTypes();
 494 
 495   # Make sure atom types assignment is successful...
 496   if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) {
 497     return undef;
 498   }
 499 
 500   # Collect assigned atom types...
 501   ATOM: for $Atom (@{$This->{Atoms}}) {
 502     $AtomID = $Atom->GetID();
 503     $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom);
 504   }
 505 
 506   return $This;
 507 }
 508 
 509 # Setup bond symbol map for atoms to speed up generation of path length identifiers
 510 # during fingerprints generation...
 511 #
 512 sub _InitializeBondSymbols {
 513   my($This) = @_;
 514   my($Atom1, $Atom2, $AtomID1, $AtomID2, $Bond, $BondSymbol, $BondOrder);
 515 
 516   %{$This->{BondSymbols}} = ();
 517 
 518   if (!$This->{UseBondSymbols}) {
 519     return $This;
 520   }
 521 
 522   for $Bond ($This->{Molecule}->GetBonds()) {
 523     $BondOrder = $Bond->GetBondOrder();
 524     $BondSymbol = $Bond->IsAromatic() ? ':' : (exists($This->{BondOrderToSymbol}{$BondOrder}) ? $This->{BondOrderToSymbol}{$BondOrder} : $BondOrder);
 525     ($Atom1, $Atom2) = $Bond->GetAtoms();
 526     $AtomID1 = $Atom1->GetID(); $AtomID2 = $Atom2->GetID();
 527     if ($AtomID1 > $AtomID2) {
 528       ($AtomID1, $AtomID2) =  ($AtomID2, $AtomID1);
 529     }
 530 
 531     if (!exists $This->{BondSymbols}{$AtomID1}) {
 532       %{$This->{BondSymbols}{$AtomID1}} = ();
 533     }
 534     $This->{BondSymbols}{$AtomID1}{$AtomID2} = $BondSymbol;
 535   }
 536   return $This;
 537 }
 538 
 539 # Get appropriate atom paths with length up to MaxLength...
 540 #
 541 sub _GenerateAtomPathsUpToMaxLength {
 542   my($This) = @_;
 543   my($PathLength, $AllowRings, $Molecule, $AtomPathsRef);
 544 
 545   $PathLength = $This->{MaxLength};
 546   $AllowRings = $This->{AllowRings};
 547   $Molecule = $This->{Molecule};
 548 
 549   if ($This->{AllowSharedBonds}) {
 550     $AtomPathsRef =  $Molecule->GetAllAtomPathsWithLengthUpto($PathLength, $AllowRings);
 551   }
 552   else {
 553     $AtomPathsRef = $Molecule->GetAtomPathsWithLengthUpto($PathLength, $AllowRings);
 554   }
 555   $This->{AtomPathsRef} = $AtomPathsRef;
 556 
 557   return $This;
 558 }
 559 
 560 # Initialize atom paths strings at various pathlength levels...
 561 #
 562 sub _InitializeAtomPathsStrings {
 563   my($This) = @_;
 564   my($PathLength);
 565 
 566   %{$This->{AtomPathsStrings}} = ();
 567 
 568   for $PathLength ($This->{MinLength} .. $This->{MaxLength}) {
 569     %{$This->{AtomPathsStrings}{$PathLength}} = ();
 570   }
 571 
 572   return $This;
 573 }
 574 
 575 # Generate appropriate atom path strings for unique atom paths...
 576 #
 577 sub _GenerateAtomPathsStrings {
 578   my($This, $PathAtomsRef) = @_;
 579   my($PathLength, $MinPathLength, $UseUniquePaths);
 580 
 581   $MinPathLength = $This->{MinLength};
 582   $UseUniquePaths = $This->{UseUniquePaths};
 583 
 584   PATHATOMS: for $PathAtomsRef (@{$This->{AtomPathsRef}}) {
 585     $PathLength = scalar @{$PathAtomsRef};
 586     if ($PathLength < $MinPathLength) {
 587       next PATHATOMS;
 588     }
 589     if ($UseUniquePaths) {
 590       $This->_GenerateAtomPathStringUsingUniquePath($PathAtomsRef);
 591     }
 592     else {
 593       $This->_GenerateAtomPathString($PathAtomsRef);
 594     }
 595   }
 596   return $This;
 597 }
 598 
 599 # Generate atom path string using unique path...
 600 #
 601 sub _GenerateAtomPathStringUsingUniquePath {
 602   my($This, $PathAtomsRef) = @_;
 603 
 604   if ($This->{AllowRings} && $This->_DoesAtomPathContainsCycle($PathAtomsRef)) {
 605     $This->_GenerateAtomPathStringUsingUniquePathContainingCycle($PathAtomsRef);
 606   }
 607   else {
 608     $This->_GenerateAtomPathStringUsingUniqueLinearPath($PathAtomsRef);
 609   }
 610   return $This;
 611 }
 612 
 613 # Generate atom path string for specified path containing no cycle...
 614 #
 615 sub _GenerateAtomPathStringUsingUniqueLinearPath {
 616   my($This, $PathAtomsRef) = @_;
 617 
 618   # Is it a unique linear atom path?
 619   #
 620   if (!$This->_IsUniqueLinearAtomPath($PathAtomsRef)) {
 621     return $This;
 622   }
 623   $This->_GenerateAtomPathString($PathAtomsRef);
 624 
 625   return $This;
 626 }
 627 
 628 # Is it a structurally unique linear path?
 629 #
 630 # For a path to be structurally unique, all of its atom IDs must be diffferent from any
 631 # earlier path atom IDs. In order to generate atom path atom ID invariant of the atom
 632 # order in the molecule, atom IDs are sorted numerically before generating the path ID.
 633 #
 634 # Notes:
 635 #   . Atom path ID doesn't reflect the order of atoms in the atom path.
 636 #
 637 sub _IsUniqueLinearAtomPath {
 638   my($This, $PathAtomsRef) = @_;
 639   my($AtomPathID, $PathLength, @PathAtomIDs);
 640 
 641   @PathAtomIDs = ();
 642   @PathAtomIDs = map { $_->GetID(); } @{$PathAtomsRef};
 643 
 644   $AtomPathID = join '-', sort { $a <=> $b } @PathAtomIDs;
 645   if (exists $This->{UniqueLinearAtomPathsIDs}{$AtomPathID}) {
 646     return 0;
 647   }
 648 
 649   # It's a unique atom path...
 650   $This->{UniqueLinearAtomPathsIDs}{$AtomPathID} = 1;
 651 
 652   return 1;
 653 }
 654 
 655 # Generate atom path string for specified path containing a cycle...
 656 #
 657 sub _GenerateAtomPathStringUsingUniquePathContainingCycle {
 658   my($This, $PathAtomsRef) = @_;
 659 
 660   # Is it a unique atom path containing a cycle?
 661   #
 662   if (!$This->_IsUniqueAtomPathContainingCycle($PathAtomsRef)) {
 663     return $This;
 664   }
 665 
 666   my($CycleClosingPathAtomIndex);
 667   ($CycleClosingPathAtomIndex) = $This->_GetAtomPathCycleClosingAtomIndex($PathAtomsRef);
 668 
 669   if ($CycleClosingPathAtomIndex == 0) {
 670     $This->_GenerateUniqueAtomPathStringForPathCycle($PathAtomsRef);
 671   }
 672   else {
 673     $This->_GenerateUniqueAtomPathStringForPathContainingCycle($PathAtomsRef, $CycleClosingPathAtomIndex);
 674   }
 675   return $This;
 676 }
 677 
 678 # Generate a unique atom path string for a cyclic path by generating atom path
 679 # strings for all possible paths in the cycle and keeping the lexicographically smallest
 680 # one.
 681 #
 682 # Although all the paths enumerated during atom path string generation are also
 683 # present in the intial paths list, but structural uniqueness check would detect
 684 # 'em earlier and this method ends being invoked only once for the first cyclic path.
 685 #
 686 # For atom paths containg same atom types and bond symbols, atom path strings
 687 # would be same for the paths.
 688 #
 689 sub _GenerateUniqueAtomPathStringForPathCycle {
 690   my($This, $PathAtomsRef) = @_;
 691 
 692   if ($This->_AreAllPathAtomsSymbolsSame($PathAtomsRef) && $This->_AreAllPathBondSymbolsSame($PathAtomsRef)) {
 693     return $This->_GenerateAtomPathString($PathAtomsRef);
 694   }
 695 
 696   # Generate all possible atom path strings and select the lexicographically smallest one...
 697   my($Index, $PathLength, $FinalAtomPathString, $FirstAtomPathString, $LastIndex, $FirstPartIndex, $FirstPartStartIndex, $FirstPartEndIndex, $SecondPartIndex, $SecondPartStartIndex, $SecondPartEndIndex, $AtomPathSymbolsRef, $AtomPathString, $ReverseAtomPathString, @FirstPartPathAtoms, @SecondPartPathAtoms, @PathAtoms);
 698 
 699   $PathLength = scalar @{$PathAtomsRef};
 700   $LastIndex = $PathLength - 1;
 701 
 702   $FinalAtomPathString = '';
 703   $FirstAtomPathString = 1;
 704 
 705   @FirstPartPathAtoms = (); @SecondPartPathAtoms = (); @PathAtoms = ();
 706 
 707   for $Index (0 .. ($LastIndex - 1)) {
 708     @FirstPartPathAtoms = (); @SecondPartPathAtoms = (); @PathAtoms = ();
 709 
 710     $FirstPartStartIndex = 0; $FirstPartEndIndex = $Index - 1;
 711     $SecondPartStartIndex = $Index; $SecondPartEndIndex = $LastIndex - 1;
 712 
 713     # Get first part atoms...
 714     for $FirstPartIndex ($FirstPartStartIndex .. $FirstPartEndIndex) {
 715       push @FirstPartPathAtoms, $PathAtomsRef->[$FirstPartIndex];
 716     }
 717 
 718     # Get second part atoms...
 719     for $SecondPartIndex ($SecondPartStartIndex .. $SecondPartEndIndex) {
 720       push @SecondPartPathAtoms, $PathAtomsRef->[$SecondPartIndex];
 721     }
 722 
 723     # Get final list of path atoms...
 724     if (@SecondPartPathAtoms) {
 725       push @PathAtoms, @SecondPartPathAtoms;
 726     }
 727     if (@FirstPartPathAtoms) {
 728       push @PathAtoms, @FirstPartPathAtoms;
 729     }
 730 
 731     # Complete the cycle by adding first atom as the last atom...
 732     push @PathAtoms, $PathAtomsRef->[$SecondPartStartIndex];
 733 
 734     # Generate atom path string...
 735     $AtomPathSymbolsRef = $This->_GenerateAtomPathSymbols(\@PathAtoms);
 736 
 737     $AtomPathString = join '', @{$AtomPathSymbolsRef};
 738     $ReverseAtomPathString = join '', reverse @{$AtomPathSymbolsRef};
 739 
 740     if ($ReverseAtomPathString le $AtomPathString) {
 741       $AtomPathString = $ReverseAtomPathString;
 742     }
 743 
 744     # Update final atom path string...
 745 
 746     if ($FirstAtomPathString) {
 747       $FirstAtomPathString = 0;
 748       $FinalAtomPathString = $AtomPathString;
 749     }
 750     else {
 751       if ($AtomPathString le $FinalAtomPathString) {
 752         $FinalAtomPathString = $AtomPathString;
 753       }
 754     }
 755   }
 756 
 757   # Set final atom path string...
 758   #
 759   if (exists $This->{AtomPathsStrings}{$PathLength}{$FinalAtomPathString}) {
 760     $This->{AtomPathsStrings}{$PathLength}{$FinalAtomPathString} += 1;
 761   }
 762   else {
 763     $This->{AtomPathsStrings}{$PathLength}{$FinalAtomPathString} = 1;
 764   }
 765 
 766   return $This;
 767 }
 768 
 769 #
 770 # Generate a unique atom path string for paths containing a cycle closed by
 771 # the specified atom index and the last atom index.
 772 #
 773 # The following methodology is used to generate atom path string which is
 774 # independemt of initial atom ordering:
 775 #   . Generate atom paths string from first atom to the atom before the first cycle
 776 #     closing atom.
 777 #   . Generate atom path string from atoms from first cycle closing atom index to
 778 #     the last path atom in both forward and reverse order. And select the lexicographically
 779 #     smallest atom path string.
 780 #   . Combine atom path string generated in first step with second step to generate
 781 #     final atom path string.
 782 #
 783 sub _GenerateUniqueAtomPathStringForPathContainingCycle {
 784   my($This, $PathAtomsRef, $CycleClosingAtomIndex) = @_;
 785   my($Index, $PathLength, $LastIndex, $LinearPartStartIndex, $LinearPartEndIndex, $CyclicPartStartIndex, $CyclicPartEndIndex, $CyclicPartAtomPathSymbolsRef, $CyclicPartAtomPathString, $ReverseCyclicPartAtomPathString, $AtomPathString, $AtomPathSymbolsRef, @CyclicPartPathAtoms, @PathAtoms);
 786 
 787   $PathLength = scalar @{$PathAtomsRef};
 788   $LastIndex = $PathLength - 1;
 789 
 790   @PathAtoms = ();
 791 
 792   # Get path atoms corresponding to linear  part of the path...
 793   $LinearPartStartIndex = 0; $LinearPartEndIndex = $CycleClosingAtomIndex - 1;
 794 
 795   for $Index ($LinearPartStartIndex .. $LinearPartEndIndex) {
 796     push @PathAtoms, $PathAtomsRef->[$Index];
 797   }
 798 
 799   # Get atoms correcponding to cyclic part of the path...
 800   @CyclicPartPathAtoms = ();
 801   $CyclicPartStartIndex = $CycleClosingAtomIndex; $CyclicPartEndIndex = $LastIndex;
 802 
 803   for $Index ($CyclicPartStartIndex .. $CyclicPartEndIndex) {
 804     push @CyclicPartPathAtoms, $PathAtomsRef->[$Index];
 805   }
 806 
 807   # Setup a lexicographically smaller atom path string for cyclic part...
 808 
 809   $CyclicPartAtomPathSymbolsRef = $This->_GenerateAtomPathSymbols(\@CyclicPartPathAtoms);
 810   $CyclicPartAtomPathString = join '', @{$CyclicPartAtomPathSymbolsRef};
 811   $ReverseCyclicPartAtomPathString = join '', reverse @{$CyclicPartAtomPathSymbolsRef};
 812 
 813   # Setup atom path corresponding to linear part and lexigraphicall smaller cyclic part...
 814 
 815   if ($ReverseCyclicPartAtomPathString le $CyclicPartAtomPathString) {
 816     push @PathAtoms, reverse @CyclicPartPathAtoms;
 817   }
 818   else {
 819     push @PathAtoms, @CyclicPartPathAtoms;
 820   }
 821 
 822   # Setup final atom path string...
 823 
 824   $AtomPathSymbolsRef = $This->_GenerateAtomPathSymbols(\@PathAtoms);
 825   $AtomPathString = join '', @{$AtomPathSymbolsRef};
 826 
 827   if (exists $This->{AtomPathsStrings}{$PathLength}{$AtomPathString}) {
 828     $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} += 1;
 829   }
 830   else {
 831     $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} = 1;
 832   }
 833 
 834   return $This;
 835 }
 836 
 837 # Does atom path contain a cycle?
 838 #
 839 # For an atom path to contain cycle, it must satisfy the following conditions:
 840 #   . Pathlength >= 3
 841 #   . Last atom ID is equal to first atom ID or some other atom ID besides itself
 842 #
 843 sub _DoesAtomPathContainsCycle {
 844   my($This, $PathAtomsRef) = @_;
 845   my($PathLength);
 846 
 847   $PathLength = scalar @{$PathAtomsRef};
 848   if ($PathLength <= 2) {
 849     return 0;
 850   }
 851 
 852   my($AtomIndex, $LastAtomIndex, $Atom, $AtomID, $LastAtom, $LastAtomID);
 853 
 854   $LastAtomIndex = $PathLength - 1;
 855   $LastAtom = $PathAtomsRef->[$LastAtomIndex];
 856   $LastAtomID = $LastAtom->GetID();
 857 
 858   # Look for atomID similar to last atom ID...
 859   for $AtomIndex (0 .. ($LastAtomIndex - 1)) {
 860     $Atom =  $PathAtomsRef->[$AtomIndex];
 861     $AtomID = $Atom->GetID();
 862 
 863     if ($AtomID == $LastAtomID) {
 864       # It's a cycle...
 865       return 1;
 866     }
 867   }
 868   return 0;
 869 }
 870 
 871 # Get atom path cycle closing atom index...
 872 #
 873 sub _GetAtomPathCycleClosingAtomIndex {
 874   my($This, $PathAtomsRef) = @_;
 875   my($AtomIndex, $LastAtomIndex, $Atom, $AtomID, $LastAtom, $LastAtomID, $PathLength);
 876 
 877   $PathLength = scalar @{$PathAtomsRef};
 878 
 879   $LastAtomIndex = $PathLength - 1;
 880   $LastAtom = $PathAtomsRef->[$LastAtomIndex]; $LastAtomID = $LastAtom->GetID();
 881 
 882   # Look for atomID similar to last atom ID...
 883   for $AtomIndex (0 .. ($LastAtomIndex - 1)) {
 884     $Atom =  $PathAtomsRef->[$AtomIndex]; $AtomID = $Atom->GetID();
 885 
 886     if ($AtomID == $LastAtomID) {
 887       # It's a cycle closing atom...
 888       return $AtomIndex;
 889     }
 890   }
 891   return undef;
 892 }
 893 
 894 # Is it a structurally unique path containing a cycle?
 895 #
 896 # For atom paths containing cycles, last atom ID is either equal to first atom ID or
 897 # some other atom ID besides itself.
 898 #
 899 # In order to determine its structurally unqiue independent of initial atom ordering,
 900 # the following methodolgy is used:
 901 #
 902 #   . For paths with same first and atom IDs:
 903 #      . Remove the last atom ID from atom path
 904 #      . Sort atom IDs in the path
 905 #      . Add first atom ID from the sorted list to the end of list to complete the cycle
 906 #      . Generate a atom path ID
 907 #      . Use final path ID to track uniqueness of path containing cycle.
 908 #
 909 #   . For paths with last atom ID equal to some other atom ID besidies itself:
 910 #      . Sort atom IDs in atom path
 911 #      . Generate atom path ID and use it to track unqiueness of atom paths.
 912 #
 913 sub _IsUniqueAtomPathContainingCycle {
 914   my($This, $PathAtomsRef) = @_;
 915   my($PathLength, $AtomPathID, $FirstAtom, $LastAtom, $FirstAtomID, $LastAtomID, @PathAtomIDs, @SortedPathAtomIDs);
 916 
 917   @PathAtomIDs = ();
 918   @PathAtomIDs = map { $_->GetID(); } @{$PathAtomsRef};
 919 
 920   $PathLength = scalar @{$PathAtomsRef};
 921 
 922   $FirstAtom = $PathAtomsRef->[0]; $FirstAtomID = $FirstAtom->GetID();
 923   $LastAtom = $PathAtomsRef->[$PathLength - 1]; $LastAtomID = $LastAtom->GetID();
 924 
 925   if ($FirstAtomID == $LastAtomID) {
 926     pop @PathAtomIDs;
 927 
 928     @SortedPathAtomIDs = ();
 929     @SortedPathAtomIDs = sort { $a <=> $b } @PathAtomIDs;
 930 
 931     push @SortedPathAtomIDs, $SortedPathAtomIDs[0];
 932 
 933     $AtomPathID = join '-', @SortedPathAtomIDs;
 934   }
 935   else {
 936     $AtomPathID = join '-', sort { $a <=> $b } @PathAtomIDs;
 937   }
 938 
 939   if (exists $This->{UniqueCyclicAtomPathsIDs}{$AtomPathID}) {
 940     return 0;
 941   }
 942 
 943   # It's a unique atom path containing a cycle...
 944   $This->{UniqueCyclicAtomPathsIDs}{$AtomPathID} = 1;
 945 
 946   return 1;
 947 }
 948 
 949 # Generate atom path string for specified atom path...
 950 #
 951 sub _GenerateAtomPathString {
 952   my($This, $PathAtomsRef) = @_;
 953   my($PathLength, $AtomPathString, $ReverseAtomPathString, $AtomPathSymbolsRef);
 954 
 955   $PathLength = scalar @{$PathAtomsRef};
 956 
 957   # Generate path atom and bond symbols...
 958   #
 959   $AtomPathSymbolsRef = $This->_GenerateAtomPathSymbols($PathAtomsRef);
 960 
 961   # Check presence of path using path ID created by atom path symbols...
 962   $AtomPathString = join '', @{$AtomPathSymbolsRef};
 963   if (exists $This->{AtomPathsStrings}{$PathLength}{$AtomPathString}) {
 964     $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} += 1;
 965     return $This;
 966   }
 967 
 968   # Check presence of reverse path using path ID created by atom path symbols...
 969   #
 970   $ReverseAtomPathString = join '', reverse @{$AtomPathSymbolsRef};
 971   if (exists $This->{AtomPathsStrings}{$PathLength}{$ReverseAtomPathString}) {
 972     $This->{AtomPathsStrings}{$PathLength}{$ReverseAtomPathString} += 1;
 973     return $This;
 974   }
 975 
 976   # Use lexicographically smaller atom path string as PathID...
 977   #
 978   if ($AtomPathString le $ReverseAtomPathString) {
 979     $This->{AtomPathsStrings}{$PathLength}{$AtomPathString} = 1;
 980   }
 981   else {
 982     $This->{AtomPathsStrings}{$PathLength}{$ReverseAtomPathString} = 1;
 983   }
 984   return $This;
 985 }
 986 
 987 #  Are atom types for all path atoms same?
 988 #
 989 sub _AreAllPathAtomsSymbolsSame {
 990   my($This, $PathAtomsRef) = @_;
 991   my($Index, $Atom, $AtomID, $AtomType, $FirstAtomType);
 992 
 993   $Atom = $PathAtomsRef->[0]; $AtomID = $Atom->GetID();
 994   $FirstAtomType = $This->{AssignedAtomTypes}{$AtomID};
 995 
 996   for $Index (1 .. $#{$PathAtomsRef}) {
 997     $Atom = $PathAtomsRef->[$Index]; $AtomID = $Atom->GetID();
 998     $AtomType = $This->{AssignedAtomTypes}{$AtomID};
 999 
1000     if ($AtomType ne $FirstAtomType) {
1001       return 0;
1002     }
1003   }
1004   return 1;
1005 }
1006 
1007 #  Are bond symbols for all path bonds same?
1008 #
1009 sub _AreAllPathBondSymbolsSame {
1010   my($This, $PathAtomsRef) = @_;
1011   my($Index, $Atom, $BondedAtom, $AtomID, $BondedAtomID, $BondAtomID1, $BondAtomID2, $FirstBondSymbol, $BondSymbol);
1012 
1013   # During no usage of bond symbols, just ignore them and assume they are same...
1014   if (!$This->{UseBondSymbols}) {
1015     return 1;
1016   }
1017 
1018   $Atom = $PathAtomsRef->[0]; $BondedAtom = $PathAtomsRef->[1];
1019   $AtomID = $Atom->GetID(); $BondedAtomID = $BondedAtom->GetID();
1020 
1021   ($BondAtomID1, $BondAtomID2) = ($AtomID < $BondedAtomID) ? ($AtomID, $BondedAtomID) : ($BondedAtomID, $AtomID);
1022   $FirstBondSymbol = $This->{BondSymbols}{$BondAtomID1}{$BondAtomID2};
1023 
1024   for $Index (1 .. ($#{$PathAtomsRef} - 1)) {
1025     $Atom = $PathAtomsRef->[$Index]; $BondedAtom = $PathAtomsRef->[$Index + 1];
1026     $AtomID = $Atom->GetID(); $BondedAtomID = $BondedAtom->GetID();
1027 
1028     ($BondAtomID1, $BondAtomID2) = ($AtomID < $BondedAtomID) ? ($AtomID, $BondedAtomID) : ($BondedAtomID, $AtomID);
1029     $BondSymbol = $This->{BondSymbols}{$BondAtomID1}{$BondAtomID2};
1030 
1031     if ($BondSymbol ne $FirstBondSymbol) {
1032       return 0;
1033     }
1034   }
1035   return 1;
1036 }
1037 
1038 # Generate atom path symbols...
1039 #
1040 sub _GenerateAtomPathSymbols {
1041   my($This, $PathAtomsRef) = @_;
1042   my($Atom, $AtomID, @AtomPathSymbols);
1043 
1044   @AtomPathSymbols = ();
1045 
1046   if (@{$PathAtomsRef} == 1) {
1047     $Atom = $PathAtomsRef->[0]; $AtomID = $Atom->GetID();
1048     push @AtomPathSymbols, $This->{AssignedAtomTypes}{$AtomID};
1049     return \@AtomPathSymbols;
1050   }
1051 
1052   # Ignore bond information...
1053   if (!$This->{UseBondSymbols}) {
1054     for $Atom (@{$PathAtomsRef}) {
1055       $AtomID = $Atom->GetID();
1056       push @AtomPathSymbols, $This->{AssignedAtomTypes}{$AtomID};
1057     }
1058     return \@AtomPathSymbols;
1059   }
1060 
1061   # Use atoms and bonds to generate atom path string...
1062   my($Index, $BondedAtom, $BondedAtomID, $BondAtomID1, $BondAtomID2);
1063 
1064   # Process atom type of first atom in path...
1065   $Atom = $PathAtomsRef->[0]; $AtomID = $Atom->GetID();
1066   push @AtomPathSymbols, $This->{AssignedAtomTypes}{$AtomID};
1067 
1068   for $Index (0 .. ($#{$PathAtomsRef} - 1)) {
1069     $Atom = $PathAtomsRef->[$Index]; $BondedAtom = $PathAtomsRef->[$Index + 1];
1070     $AtomID = $Atom->GetID(); $BondedAtomID = $BondedAtom->GetID();
1071 
1072     ($BondAtomID1, $BondAtomID2) = ($AtomID < $BondedAtomID) ? ($AtomID, $BondedAtomID) : ($BondedAtomID, $AtomID);
1073     push @AtomPathSymbols, $This->{BondSymbols}{$BondAtomID1}{$BondAtomID2};
1074 
1075     # Process atom type of next atom in path...
1076     push @AtomPathSymbols, $This->{AssignedAtomTypes}{$BondedAtomID};
1077   }
1078   return \@AtomPathSymbols;
1079 }
1080 
1081 # Set final fingerprits...
1082 #
1083 sub _SetFinalFingerprints {
1084   my($This) = @_;
1085 
1086   # Mark successful generation of fingerprints...
1087   $This->{FingerprintsGenerated} = 1;
1088 
1089   if ($This->{Type} =~ /^PathLengthBits$/i) {
1090     $This->_SetFinalFingerprintsBitVector();
1091   }
1092   elsif ($This->{Type} =~ /^PathLengthCount$/i) {
1093     $This->_SetFinalFingerprintsVector();
1094   }
1095 
1096   return $This;
1097 }
1098 
1099 # Set final fingerprits bit vector...
1100 #
1101 sub _SetFinalFingerprintsBitVector {
1102   my($This) = @_;
1103   my($PathLength, $Size, $AtomPathString, $AtomPathHashCode, $AtomPathBitPos, $FingerprintsBitVector, $SkipBitPosCheck, $NumOfBitsToSetPerPath, $SetBitNum);
1104 
1105   $FingerprintsBitVector = $This->{FingerprintsBitVector};
1106 
1107   $Size = $This->{Size};
1108 
1109   $SkipBitPosCheck = 1;
1110   $NumOfBitsToSetPerPath = $This->{NumOfBitsToSetPerPath};
1111 
1112   for $PathLength (keys %{$This->{AtomPathsStrings}}) {
1113     for $AtomPathString (keys %{$This->{AtomPathsStrings}{$PathLength}}) {
1114       $AtomPathHashCode = TextUtil::HashCode($AtomPathString);
1115 
1116       # Set random number seed...
1117       if ($This->{UsePerlCoreRandom}) {
1118         CORE::srand($AtomPathHashCode);
1119       }
1120       else {
1121         MathUtil::srandom($AtomPathHashCode);
1122       }
1123 
1124       for $SetBitNum (1 .. $NumOfBitsToSetPerPath) {
1125         $AtomPathBitPos = $This->{UsePerlCoreRandom} ? int(CORE::rand($Size)) : int(MathUtil::random($Size));
1126         $FingerprintsBitVector->SetBit($AtomPathBitPos, $SkipBitPosCheck);
1127       }
1128     }
1129   }
1130   return $This;
1131 }
1132 
1133 # Set final fingerprits vector...
1134 #
1135 sub _SetFinalFingerprintsVector {
1136   my($This) = @_;
1137   my($PathLength, $AtomPathString, $FingerprintsVector, $AtomPathCount, @Values, @ValueIDs);
1138 
1139   @Values = ();
1140   @ValueIDs = ();
1141 
1142   for $PathLength (sort { $a <=> $b } keys %{$This->{AtomPathsStrings}}) {
1143     for $AtomPathString (sort keys %{$This->{AtomPathsStrings}{$PathLength}}) {
1144       $AtomPathCount = $This->{AtomPathsStrings}{$PathLength}{$AtomPathString};
1145 
1146       push @Values, $AtomPathCount;
1147       push @ValueIDs, $AtomPathString;
1148     }
1149   }
1150 
1151   # Add PathLengthIDs and values to fingerprint vector...
1152   $This->{FingerprintsVector}->AddValueIDs(\@ValueIDs);
1153   $This->{FingerprintsVector}->AddValues(\@Values);
1154 
1155   return $This;
1156 }
1157 
1158 # Cache  appropriate molecule data...
1159 #
1160 sub _SetupMoleculeDataCache {
1161   my($This) = @_;
1162 
1163   # Get all atoms...
1164   @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
1165 
1166   return $This;
1167 }
1168 
1169 # Clear cached molecule data...
1170 #
1171 sub _ClearMoleculeDataCache {
1172   my($This) = @_;
1173 
1174   # Clear atoms...
1175   @{$This->{Atoms}} = ();
1176 
1177   # Clear path atoms..
1178   $This->{AtomPathsRef} = '';
1179 
1180   return $This;
1181 }
1182 
1183 # Set atomic invariants to use atom identifiers...
1184 #
1185 sub SetAtomicInvariantsToUse {
1186   my($This, @Values) = @_;
1187   my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
1188 
1189   if (!@Values) {
1190     carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
1191     return;
1192   }
1193 
1194   $FirstValue = $Values[0];
1195   $TypeOfFirstValue = ref $FirstValue;
1196 
1197   @SpecifiedAtomicInvariants = ();
1198   @AtomicInvariantsToUse = ();
1199 
1200   if ($TypeOfFirstValue =~ /^ARRAY/) {
1201     push @SpecifiedAtomicInvariants, @{$FirstValue};
1202   }
1203   else {
1204     push @SpecifiedAtomicInvariants, @Values;
1205   }
1206 
1207   # Make sure specified AtomicInvariants are valid...
1208   for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
1209     if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
1210       croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
1211     }
1212     $AtomicInvariant = $SpecifiedAtomicInvariant;
1213     push @AtomicInvariantsToUse, $AtomicInvariant;
1214   }
1215 
1216   # Set atomic invariants to use...
1217   @{$This->{AtomicInvariantsToUse}} = ();
1218   push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
1219 
1220   return $This;
1221 }
1222 
1223 # Set functional classes to use for atom identifiers...
1224 #
1225 sub SetFunctionalClassesToUse {
1226   my($This, @Values) = @_;
1227   my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
1228 
1229   if (!@Values) {
1230     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
1231     return;
1232   }
1233 
1234   if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
1235     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
1236     return;
1237   }
1238 
1239   $FirstValue = $Values[0];
1240   $TypeOfFirstValue = ref $FirstValue;
1241 
1242   @SpecifiedFunctionalClasses = ();
1243   @FunctionalClassesToUse = ();
1244 
1245   if ($TypeOfFirstValue =~ /^ARRAY/) {
1246     push @SpecifiedFunctionalClasses, @{$FirstValue};
1247   }
1248   else {
1249     push @SpecifiedFunctionalClasses, @Values;
1250   }
1251 
1252   # Make sure specified FunctionalClasses are valid...
1253   for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
1254     if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
1255       croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
1256     }
1257     push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
1258   }
1259 
1260   # Set functional classes to use...
1261   @{$This->{FunctionalClassesToUse}} = ();
1262   push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
1263 
1264   return $This;
1265 }
1266 
1267 # Initialize atom indentifier type information...
1268 #
1269 # Current supported values:
1270 #
1271 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes,
1272 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
1273 #
1274 sub _InitializeAtomIdentifierTypeInformation {
1275   my($This) = @_;
1276 
1277   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
1278     $This->_InitializeAtomicInvariantsAtomTypesInformation();
1279   }
1280   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
1281     $This->_InitializeFunctionalClassAtomTypesInformation();
1282   }
1283   elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
1284     # Nothing to do for now...
1285   }
1286   else {
1287     croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
1288   }
1289 
1290   return $This;
1291 }
1292 
1293 # Initialize atomic invariants atom types to use for generating atom identifiers...
1294 #
1295 # Let:
1296 #   AS = Atom symbol corresponding to element symbol
1297 #
1298 #   X<n>   = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
1299 #   BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
1300 #   LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
1301 #   SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
1302 #   DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
1303 #   TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
1304 #   H<n>   = Number of implicit and explicit hydrogens for atom
1305 #   Ar     = Aromatic annotation indicating whether atom is aromatic
1306 #   RA     = Ring atom annotation indicating whether atom is a ring
1307 #   FC<+n/-n> = Formal charge assigned to atom
1308 #   MN<n> = Mass number indicating isotope other than most abundant isotope
1309 #   SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
1310 #
1311 # Then:
1312 #
1313 #   Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
1314 #
1315 #     AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
1316 #
1317 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
1318 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>.
1319 # AtomID specification doesn't include atomic invariants with zero or undefined values.
1320 #
1321 sub _InitializeAtomicInvariantsAtomTypesInformation {
1322   my($This) = @_;
1323 
1324   # Default atomic invariants to use for generating atom neighborhood atom IDs: AS, X, BO, H, FC
1325   #
1326   @{$This->{AtomicInvariantsToUse}} = ();
1327   @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC');
1328 
1329   return $This;
1330 }
1331 
1332 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
1333 # class, to use for generating atom identifiers...
1334 #
1335 # Let:
1336 #   HBD: HydrogenBondDonor
1337 #   HBA: HydrogenBondAcceptor
1338 #   PI :  PositivelyIonizable
1339 #   NI : NegativelyIonizable
1340 #   Ar : Aromatic
1341 #   Hal : Halogen
1342 #   H : Hydrophobic
1343 #   RA : RingAtom
1344 #   CA : ChainAtom
1345 #
1346 # Then:
1347 #
1348 #   Functiononal class atom type specification for an atom corresponds to:
1349 #
1350 #     Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
1351 #
1352 #   Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
1353 #
1354 #   FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
1355 #
1356 #     HydrogenBondDonor: NH, NH2, OH
1357 #     HydrogenBondAcceptor: N[!H], O
1358 #     PositivelyIonizable: +, NH2
1359 #     NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
1360 #
1361 sub _InitializeFunctionalClassAtomTypesInformation {
1362   my($This) = @_;
1363 
1364   # Default functional class atom typess to use for generating atom identifiers
1365   # are: HBD, HBA, PI, NI, Ar, Hal
1366   #
1367   @{$This->{FunctionalClassesToUse}} = ();
1368   @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
1369 
1370   return $This;
1371 }
1372 
1373 # Return a string containg data for PathLengthFingerprints object...
1374 #
1375 sub StringifyPathLengthFingerprints {
1376   my($This) = @_;
1377   my($PathLengthsFingerprintsString);
1378 
1379   # Type of fingerprint...
1380   $PathLengthsFingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}";
1381 
1382   # Path length...
1383   $PathLengthsFingerprintsString .= "; MinPathLength: $This->{MinLength}; MaxPathLength: $This->{MaxLength}";
1384 
1385   # Fingerprint generation control...
1386   my($AllowSharedBonds, $AllowRings, $UseBondSymbols, $UseUniquePaths);
1387 
1388   $AllowSharedBonds = $This->{AllowSharedBonds} ? "Yes" : "No";
1389   $AllowRings = $This->{AllowRings} ? "Yes" : "No";
1390   $UseBondSymbols = $This->{UseBondSymbols} ? "Yes" : "No";
1391   $UseUniquePaths = $This->{UseBondSymbols} ? "Yes" : "No";
1392 
1393   $PathLengthsFingerprintsString .= "; UseUniquePaths: $UseUniquePaths; AllowSharedBonds: $AllowSharedBonds; AllowRings: $AllowRings; UseBondSymbols: $UseBondSymbols";
1394 
1395   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
1396     my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
1397 
1398     @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
1399     %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
1400 
1401     for $AtomicInvariant (@AtomicInvariantsOrder) {
1402       push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
1403     }
1404 
1405     $PathLengthsFingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
1406     $PathLengthsFingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
1407     $PathLengthsFingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
1408   }
1409   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
1410     my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
1411 
1412     @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
1413     %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
1414 
1415     for $FunctionalClass (@FunctionalClassesOrder) {
1416       push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
1417     }
1418 
1419     $PathLengthsFingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
1420     $PathLengthsFingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
1421     $PathLengthsFingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
1422   }
1423 
1424   if ($This->{Type} =~ /^PathLengthBits$/i) {
1425     # Size...
1426     $PathLengthsFingerprintsString .= "; Size: $This->{Size}; MinSize: $This->{MinSize}; MaxSize: $This->{MaxSize}";
1427 
1428     # NumOfBitsToSetPerPath...
1429     $PathLengthsFingerprintsString .= "; NumOfBitsToSetPerPath: $This->{NumOfBitsToSetPerPath}";
1430 
1431     # Fingerprint bit density and num of bits set...
1432     my($NumOfSetBits, $BitDensity);
1433     $NumOfSetBits = $This->{FingerprintsBitVector}->GetNumOfSetBits();
1434     $BitDensity = $This->{FingerprintsBitVector}->GetFingerprintsBitDensity();
1435     $PathLengthsFingerprintsString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity";
1436 
1437     $PathLengthsFingerprintsString .= "; FingerprintsBitVector: < $This->{FingerprintsBitVector} >";
1438   }
1439   elsif ($This->{Type} =~ /^PathLengthCount$/i) {
1440     $PathLengthsFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
1441   }
1442 
1443   return $PathLengthsFingerprintsString;
1444 }
1445